xref: /illumos-gate/usr/src/uts/common/fs/specfs/specsubr.c (revision 88e55da9244bc48e3b3ad957a29e4be71309adcd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 2012 by Delphix. All rights reserved.
27  * Copyright 2017 Joyent, Inc.
28  */
29 
30 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
31 /*	  All Rights Reserved  	*/
32 
33 /*
34  * University Copyright- Copyright (c) 1982, 1986, 1988
35  * The Regents of the University of California
36  * All Rights Reserved
37  *
38  * University Acknowledgment- Portions of this document are derived from
39  * software developed by the University of California, Berkeley, and its
40  * contributors.
41  */
42 
43 
44 #include <sys/types.h>
45 #include <sys/t_lock.h>
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/buf.h>
49 #include <sys/conf.h>
50 #include <sys/cred.h>
51 #include <sys/kmem.h>
52 #include <sys/sysmacros.h>
53 #include <sys/vfs.h>
54 #include <sys/vfs_opreg.h>
55 #include <sys/vnode.h>
56 #include <sys/fs/snode.h>
57 #include <sys/fs/fifonode.h>
58 #include <sys/debug.h>
59 #include <sys/errno.h>
60 #include <sys/time.h>
61 #include <sys/file.h>
62 #include <sys/open.h>
63 #include <sys/user.h>
64 #include <sys/termios.h>
65 #include <sys/stream.h>
66 #include <sys/strsubr.h>
67 #include <sys/autoconf.h>
68 #include <sys/esunddi.h>
69 #include <sys/flock.h>
70 #include <sys/modctl.h>
71 
72 struct vfs spec_vfs;
73 static dev_t specdev;
74 struct kmem_cache *snode_cache;
75 int spec_debug = 0;
76 
77 static struct snode *sfind(dev_t, vtype_t, struct vnode *);
78 static struct vnode *get_cvp(dev_t, vtype_t, struct snode *, int *);
79 static void sinsert(struct snode *);
80 
81 struct vnode *
82 specvp_devfs(
83 	struct vnode	*realvp,
84 	dev_t		dev,
85 	vtype_t		vtyp,
86 	struct cred	*cr,
87 	dev_info_t	*dip)
88 {
89 	struct vnode	*vp;
90 
91 	ASSERT(realvp && dip);
92 	vp = specvp(realvp, dev, vtyp, cr);
93 	ASSERT(vp);
94 
95 	/* associate a dip hold with the common snode's s_dip pointer */
96 	spec_assoc_vp_with_devi(vp, dip);
97 	return (vp);
98 }
99 
100 /*
101  * Return a shadow special vnode for the given dev.
102  * If no snode exists for this dev create one and put it
103  * in a table hashed by <dev, realvp>.  If the snode for
104  * this dev is already in the table return it (ref count is
105  * incremented by sfind).  The snode will be flushed from the
106  * table when spec_inactive calls sdelete.
107  *
108  * The fsid is inherited from the real vnode so that clones
109  * can be found.
110  *
111  */
112 struct vnode *
113 specvp(
114 	struct vnode	*vp,
115 	dev_t		dev,
116 	vtype_t		type,
117 	struct cred	*cr)
118 {
119 	struct snode *sp;
120 	struct snode *nsp;
121 	struct snode *csp;
122 	struct vnode *svp;
123 	struct vattr va;
124 	int	rc;
125 	int	used_csp = 0;		/* Did we use pre-allocated csp */
126 
127 	if (vp == NULL)
128 		return (NULL);
129 	if (vp->v_type == VFIFO)
130 		return (fifovp(vp, cr));
131 
132 	ASSERT(vp->v_type == type);
133 	ASSERT(vp->v_rdev == dev);
134 
135 	/*
136 	 * Pre-allocate snodes before holding any locks in case we block
137 	 */
138 	nsp = kmem_cache_alloc(snode_cache, KM_SLEEP);
139 	csp = kmem_cache_alloc(snode_cache, KM_SLEEP);
140 
141 	/*
142 	 * Get the time attributes outside of the stable lock since
143 	 * this operation may block. Unfortunately, it may not have
144 	 * been required if the snode is in the cache.
145 	 */
146 	va.va_mask = AT_FSID | AT_TIMES;
147 	rc = VOP_GETATTR(vp, &va, 0, cr, NULL);	/* XXX may block! */
148 
149 	mutex_enter(&stable_lock);
150 	if ((sp = sfind(dev, type, vp)) == NULL) {
151 		struct vnode *cvp;
152 
153 		sp = nsp;	/* Use pre-allocated snode */
154 		svp = STOV(sp);
155 
156 		sp->s_realvp	= vp;
157 		VN_HOLD(vp);
158 		sp->s_commonvp	= NULL;
159 		sp->s_dev	= dev;
160 		sp->s_dip	= NULL;
161 		sp->s_nextr	= NULL;
162 		sp->s_list	= NULL;
163 		sp->s_plcy	= NULL;
164 		sp->s_size	= 0;
165 		sp->s_flag	= 0;
166 		if (rc == 0) {
167 			/*
168 			 * Set times in snode to those in the vnode.
169 			 */
170 			sp->s_fsid = va.va_fsid;
171 			sp->s_atime = va.va_atime.tv_sec;
172 			sp->s_mtime = va.va_mtime.tv_sec;
173 			sp->s_ctime = va.va_ctime.tv_sec;
174 		} else {
175 			sp->s_fsid = specdev;
176 			sp->s_atime = 0;
177 			sp->s_mtime = 0;
178 			sp->s_ctime = 0;
179 		}
180 		sp->s_count	= 0;
181 		sp->s_mapcnt	= 0;
182 
183 		vn_reinit(svp);
184 		svp->v_flag	= (vp->v_flag & VROOT);
185 		svp->v_vfsp	= vp->v_vfsp;
186 		VFS_HOLD(svp->v_vfsp);
187 		svp->v_type	= type;
188 		svp->v_rdev	= dev;
189 		(void) vn_copypath(vp, svp);
190 		if (type == VBLK || type == VCHR) {
191 			cvp = get_cvp(dev, type, csp, &used_csp);
192 			svp->v_stream = cvp->v_stream;
193 
194 			sp->s_commonvp = cvp;
195 		}
196 		vn_exists(svp);
197 		sinsert(sp);
198 		mutex_exit(&stable_lock);
199 		if (used_csp == 0) {
200 			/* Didn't use pre-allocated snode so free it */
201 			kmem_cache_free(snode_cache, csp);
202 		}
203 	} else {
204 		mutex_exit(&stable_lock);
205 		/* free unused snode memory */
206 		kmem_cache_free(snode_cache, nsp);
207 		kmem_cache_free(snode_cache, csp);
208 	}
209 	return (STOV(sp));
210 }
211 
212 /*
213  * Return a special vnode for the given dev; no vnode is supplied
214  * for it to shadow.  Always create a new snode and put it in the
215  * table hashed by <dev, NULL>.  The snode will be flushed from the
216  * table when spec_inactive() calls sdelete().  The association of
217  * this node with a attached instance of hardware is not made until
218  * spec_open time.
219  *
220  * N.B. Assumes caller takes on responsibility of making sure no one
221  * else is creating a snode for (dev, type) at this time.
222  */
223 struct vnode *
224 makespecvp(dev_t dev, vtype_t type)
225 {
226 	struct snode *sp;
227 	struct vnode *svp, *cvp;
228 	time_t now;
229 
230 	sp = kmem_cache_alloc(snode_cache, KM_SLEEP);
231 	svp = STOV(sp);
232 	cvp = commonvp(dev, type);
233 	now = gethrestime_sec();
234 
235 	sp->s_realvp	= NULL;
236 	sp->s_commonvp	= cvp;
237 	sp->s_dev	= dev;
238 	sp->s_dip	= NULL;
239 	sp->s_nextr	= NULL;
240 	sp->s_list	= NULL;
241 	sp->s_plcy	= NULL;
242 	sp->s_size	= 0;
243 	sp->s_flag	= 0;
244 	sp->s_fsid	= specdev;
245 	sp->s_atime	= now;
246 	sp->s_mtime	= now;
247 	sp->s_ctime	= now;
248 	sp->s_count	= 0;
249 	sp->s_mapcnt	= 0;
250 
251 	vn_reinit(svp);
252 	svp->v_vfsp	= &spec_vfs;
253 	svp->v_stream	= cvp->v_stream;
254 	svp->v_type	= type;
255 	svp->v_rdev	= dev;
256 
257 	vn_exists(svp);
258 	mutex_enter(&stable_lock);
259 	sinsert(sp);
260 	mutex_exit(&stable_lock);
261 
262 	return (svp);
263 }
264 
265 
266 /*
267  * This function is called from spec_assoc_vp_with_devi(). That function
268  * associates a "new" dip with a common snode, releasing (any) old dip
269  * in the process. This function (spec_assoc_fence()) looks at the "new dip"
270  * and determines whether the snode should be fenced of or not. As the table
271  * below indicates, the value of old-dip is a don't care for all cases.
272  *
273  * old-dip	new-dip		common-snode
274  * =========================================
275  * Don't care	NULL		unfence
276  * Don't care	retired		fence
277  * Don't care	not-retired	unfence
278  *
279  * Since old-dip value is a "don't care", it is not passed into this function.
280  */
281 static void
282 spec_assoc_fence(dev_info_t *ndip, vnode_t *vp)
283 {
284 	int		fence;
285 	struct snode	*csp;
286 
287 	ASSERT(vp);
288 	ASSERT(vn_matchops(vp, spec_getvnodeops()));
289 
290 	fence = 0;
291 	if (ndip != NULL) {
292 		mutex_enter(&DEVI(ndip)->devi_lock);
293 		if (DEVI(ndip)->devi_flags & DEVI_RETIRED)
294 			fence = 1;
295 		mutex_exit(&DEVI(ndip)->devi_lock);
296 	}
297 
298 	csp = VTOCS(vp);
299 	ASSERT(csp);
300 
301 	/* SFENCED flag only set on common snode */
302 	mutex_enter(&csp->s_lock);
303 	if (fence)
304 		csp->s_flag |= SFENCED;
305 	else
306 		csp->s_flag &= ~SFENCED;
307 	mutex_exit(&csp->s_lock);
308 
309 	FENDBG((CE_NOTE, "%sfenced common snode (%p) for new dip=%p",
310 	    fence ? "" : "un", (void *)csp, (void *)ndip));
311 }
312 
313 /*
314  * Associate the common snode with a devinfo node.  This is called from:
315  *
316  *   1) specvp_devfs to associate a specfs node with the dip attached
317  *	by devfs.
318  *
319  *   2) spec_open after path reconstruction and attach.
320  *
321  *   3) From dacf processing to associate a makespecvp node with
322  *	the dip that dacf postattach processing is being performed on.
323  *	This association is made prior to open to avoid recursion issues.
324  *
325  *   4) From ddi_assoc_queue_with_devi to change vnode association as part of
326  *	DL_ATTACH/DL_DETACH processing (SDIPSET already set).  The call
327  *	from ddi_assoc_queue_with_devi may specify a NULL dip.
328  *
329  * We put an extra hold on the devinfo node passed in as we establish it as
330  * the new s_dip pointer.  Any hold associated with the prior s_dip pointer
331  * is released. The new hold will stay active until another call to
332  * spec_assoc_vp_with_devi or until the common snode is destroyed by
333  * spec_inactive after the last VN_RELE of the common node. This devinfo hold
334  * transfers across a clone open except in the clone_dev case, where the clone
335  * driver is no longer required after open.
336  *
337  * When SDIPSET is set and s_dip is NULL, the vnode has an association with
338  * the driver even though there is currently no association with a specific
339  * hardware instance.
340  */
341 void
342 spec_assoc_vp_with_devi(struct vnode *vp, dev_info_t *dip)
343 {
344 	struct snode	*csp;
345 	dev_info_t	*olddip;
346 
347 	ASSERT(vp);
348 
349 	/*
350 	 * Don't establish a NULL association for a vnode associated with the
351 	 * clone driver.  The qassociate(, -1) call from a streams driver's
352 	 * open implementation to indicate support for qassociate has the
353 	 * side-effect of this type of spec_assoc_vp_with_devi call. This
354 	 * call should not change the the association of the pre-clone
355 	 * vnode associated with the clone driver, the post-clone newdev
356 	 * association will be established later by spec_clone().
357 	 */
358 	if ((dip == NULL) && (getmajor(vp->v_rdev) == clone_major))
359 		return;
360 
361 	/* hold the new */
362 	if (dip)
363 		e_ddi_hold_devi(dip);
364 
365 	csp = VTOS(VTOS(vp)->s_commonvp);
366 	mutex_enter(&csp->s_lock);
367 	olddip = csp->s_dip;
368 	csp->s_dip = dip;
369 	csp->s_flag |= SDIPSET;
370 
371 	/* If association changes then invalidate cached size */
372 	if (olddip != dip)
373 		csp->s_flag &= ~SSIZEVALID;
374 	mutex_exit(&csp->s_lock);
375 
376 	spec_assoc_fence(dip, vp);
377 
378 	/* release the old */
379 	if (olddip)
380 		ddi_release_devi(olddip);
381 }
382 
383 /*
384  * Return the held dip associated with the specified snode.
385  */
386 dev_info_t *
387 spec_hold_devi_by_vp(struct vnode *vp)
388 {
389 	struct snode	*csp;
390 	dev_info_t	*dip;
391 
392 	ASSERT(vn_matchops(vp, spec_getvnodeops()));
393 
394 	csp = VTOS(VTOS(vp)->s_commonvp);
395 	dip = csp->s_dip;
396 	if (dip)
397 		e_ddi_hold_devi(dip);
398 	return (dip);
399 }
400 
401 /*
402  * Find a special vnode that refers to the given device
403  * of the given type.  Never return a "common" vnode.
404  * Return NULL if a special vnode does not exist.
405  * HOLD the vnode before returning it.
406  */
407 struct vnode *
408 specfind(dev_t dev, vtype_t type)
409 {
410 	struct snode *st;
411 	struct vnode *nvp;
412 
413 	mutex_enter(&stable_lock);
414 	st = stable[STABLEHASH(dev)];
415 	while (st != NULL) {
416 		if (st->s_dev == dev) {
417 			nvp = STOV(st);
418 			if (nvp->v_type == type && st->s_commonvp != nvp) {
419 				VN_HOLD(nvp);
420 				/* validate vnode is visible in the zone */
421 				if (nvp->v_path != NULL &&
422 				    ZONE_PATH_VISIBLE(nvp->v_path, curzone)) {
423 					mutex_exit(&stable_lock);
424 					return (nvp);
425 				}
426 				VN_RELE(nvp);
427 			}
428 		}
429 		st = st->s_next;
430 	}
431 	mutex_exit(&stable_lock);
432 	return (NULL);
433 }
434 
435 /*
436  * Loop through the snode cache looking for snodes referencing dip.
437  *
438  * This function determines if a devinfo node is "BUSY" from the perspective
439  * of having an active vnode associated with the device, which represents a
440  * dependency on the device's services.  This function is needed because a
441  * devinfo node can have a non-zero devi_ref and still NOT be "BUSY" when,
442  * for instance, the framework is manipulating the node (has an open
443  * ndi_hold_devi).
444  *
445  * Returns:
446  *	DEVI_REFERENCED		- if dip is referenced
447  *	DEVI_NOT_REFERENCED	- if dip is not referenced
448  */
449 int
450 devi_stillreferenced(dev_info_t *dip)
451 {
452 	struct snode	*sp;
453 	int		i;
454 
455 	/* if no hold then there can't be an snode with s_dip == dip */
456 	if (e_ddi_devi_holdcnt(dip) == 0)
457 		return (DEVI_NOT_REFERENCED);
458 
459 	mutex_enter(&stable_lock);
460 	for (i = 0; i < STABLESIZE; i++) {
461 		for (sp = stable[i]; sp != NULL; sp = sp->s_next) {
462 			if (sp->s_dip == dip) {
463 				mutex_exit(&stable_lock);
464 				return (DEVI_REFERENCED);
465 			}
466 		}
467 	}
468 	mutex_exit(&stable_lock);
469 	return (DEVI_NOT_REFERENCED);
470 }
471 
472 /*
473  * Given an snode, returns the open count and the dip
474  * associated with that snode
475  * Assumes the caller holds the appropriate locks
476  * to prevent snode and/or dip from going away.
477  * Returns:
478  *	-1	No associated dip
479  *	>= 0	Number of opens.
480  */
481 int
482 spec_devi_open_count(struct snode *sp, dev_info_t **dipp)
483 {
484 	dev_info_t *dip;
485 	uint_t count;
486 	struct vnode *vp;
487 
488 	ASSERT(sp);
489 	ASSERT(dipp);
490 
491 	vp = STOV(sp);
492 
493 	*dipp = NULL;
494 
495 	/*
496 	 * We are only interested in common snodes. Only common snodes
497 	 * get their s_count fields bumped up on opens.
498 	 */
499 	if (sp->s_commonvp != vp || (dip = sp->s_dip) == NULL)
500 		return (-1);
501 
502 	mutex_enter(&sp->s_lock);
503 	count = sp->s_count + sp->s_mapcnt;
504 	if (sp->s_flag & SLOCKED)
505 		count++;
506 	mutex_exit(&sp->s_lock);
507 
508 	*dipp = dip;
509 
510 	return (count);
511 }
512 
513 /*
514  * Given a device vnode, return the common
515  * vnode associated with it.
516  */
517 struct vnode *
518 common_specvp(struct vnode *vp)
519 {
520 	struct snode *sp;
521 
522 	if ((vp->v_type != VBLK) && (vp->v_type != VCHR) ||
523 	    !vn_matchops(vp, spec_getvnodeops()))
524 		return (vp);
525 	sp = VTOS(vp);
526 	return (sp->s_commonvp);
527 }
528 
529 /*
530  * Returns a special vnode for the given dev.  The vnode is the
531  * one which is "common" to all the snodes which represent the
532  * same device.
533  * Similar to commonvp() but doesn't acquire the stable_lock, and
534  * may use a pre-allocated snode provided by caller.
535  */
536 static struct vnode *
537 get_cvp(
538 	dev_t		dev,
539 	vtype_t		type,
540 	struct snode	*nsp,		/* pre-allocated snode */
541 	int		*used_nsp)	/* flag indicating if we use nsp */
542 {
543 	struct snode *sp;
544 	struct vnode *svp;
545 
546 	ASSERT(MUTEX_HELD(&stable_lock));
547 	if ((sp = sfind(dev, type, NULL)) == NULL) {
548 		sp = nsp;		/* Use pre-allocated snode */
549 		*used_nsp = 1;		/* return value */
550 		svp = STOV(sp);
551 
552 		sp->s_realvp	= NULL;
553 		sp->s_commonvp	= svp;		/* points to itself */
554 		sp->s_dev	= dev;
555 		sp->s_dip	= NULL;
556 		sp->s_nextr	= NULL;
557 		sp->s_list	= NULL;
558 		sp->s_plcy	= NULL;
559 		sp->s_size	= UNKNOWN_SIZE;
560 		sp->s_flag	= 0;
561 		sp->s_fsid	= specdev;
562 		sp->s_atime	= 0;
563 		sp->s_mtime	= 0;
564 		sp->s_ctime	= 0;
565 		sp->s_count	= 0;
566 		sp->s_mapcnt	= 0;
567 
568 		vn_reinit(svp);
569 		svp->v_vfsp	= &spec_vfs;
570 		svp->v_type	= type;
571 		svp->v_rdev	= dev;
572 		vn_exists(svp);
573 		sinsert(sp);
574 	} else
575 		*used_nsp = 0;
576 	return (STOV(sp));
577 }
578 
579 /*
580  * Returns a special vnode for the given dev.  The vnode is the
581  * one which is "common" to all the snodes which represent the
582  * same device.  For use ONLY by SPECFS.
583  */
584 struct vnode *
585 commonvp(dev_t dev, vtype_t type)
586 {
587 	struct snode *sp, *nsp;
588 	struct vnode *svp;
589 
590 	/* Pre-allocate snode in case we might block */
591 	nsp = kmem_cache_alloc(snode_cache, KM_SLEEP);
592 
593 	mutex_enter(&stable_lock);
594 	if ((sp = sfind(dev, type, NULL)) == NULL) {
595 		sp = nsp;		/* Use pre-alloced snode */
596 		svp = STOV(sp);
597 
598 		sp->s_realvp	= NULL;
599 		sp->s_commonvp	= svp;		/* points to itself */
600 		sp->s_dev	= dev;
601 		sp->s_dip	= NULL;
602 		sp->s_nextr	= NULL;
603 		sp->s_list	= NULL;
604 		sp->s_plcy	= NULL;
605 		sp->s_size	= UNKNOWN_SIZE;
606 		sp->s_flag	= 0;
607 		sp->s_fsid	= specdev;
608 		sp->s_atime	= 0;
609 		sp->s_mtime	= 0;
610 		sp->s_ctime	= 0;
611 		sp->s_count	= 0;
612 		sp->s_mapcnt	= 0;
613 
614 		vn_reinit(svp);
615 		svp->v_vfsp	= &spec_vfs;
616 		svp->v_type	= type;
617 		svp->v_rdev	= dev;
618 		vn_exists(svp);
619 		sinsert(sp);
620 		mutex_exit(&stable_lock);
621 	} else {
622 		mutex_exit(&stable_lock);
623 		/* Didn't need the pre-allocated snode */
624 		kmem_cache_free(snode_cache, nsp);
625 	}
626 	return (STOV(sp));
627 }
628 
629 /*
630  * Snode lookup stuff.
631  * These routines maintain a table of snodes hashed by dev so
632  * that the snode for an dev can be found if it already exists.
633  */
634 struct snode *stable[STABLESIZE];
635 int		stablesz = STABLESIZE;
636 kmutex_t	stable_lock;
637 
638 /*
639  * Put a snode in the table.
640  */
641 static void
642 sinsert(struct snode *sp)
643 {
644 	ASSERT(MUTEX_HELD(&stable_lock));
645 	sp->s_next = stable[STABLEHASH(sp->s_dev)];
646 	stable[STABLEHASH(sp->s_dev)] = sp;
647 }
648 
649 /*
650  * Remove an snode from the hash table.
651  * The realvp is not released here because spec_inactive() still
652  * needs it to do a spec_fsync().
653  */
654 void
655 sdelete(struct snode *sp)
656 {
657 	struct snode *st;
658 	struct snode *stprev = NULL;
659 
660 	ASSERT(MUTEX_HELD(&stable_lock));
661 	st = stable[STABLEHASH(sp->s_dev)];
662 	while (st != NULL) {
663 		if (st == sp) {
664 			if (stprev == NULL)
665 				stable[STABLEHASH(sp->s_dev)] = st->s_next;
666 			else
667 				stprev->s_next = st->s_next;
668 			break;
669 		}
670 		stprev = st;
671 		st = st->s_next;
672 	}
673 }
674 
675 /*
676  * Lookup an snode by <dev, type, vp>.
677  * ONLY looks for snodes with non-NULL s_realvp members and
678  * common snodes (with s_commonvp pointing to its vnode).
679  *
680  * If vp is NULL, only return commonvp. Otherwise return
681  * shadow vp with both shadow and common vp's VN_HELD.
682  */
683 static struct snode *
684 sfind(
685 	dev_t	dev,
686 	vtype_t	type,
687 	struct vnode *vp)
688 {
689 	struct snode *st;
690 	struct vnode *svp;
691 
692 	ASSERT(MUTEX_HELD(&stable_lock));
693 	st = stable[STABLEHASH(dev)];
694 	while (st != NULL) {
695 		svp = STOV(st);
696 		if (st->s_dev == dev && svp->v_type == type &&
697 		    VN_CMP(st->s_realvp, vp) &&
698 		    (vp != NULL || st->s_commonvp == svp) &&
699 		    (vp == NULL || st->s_realvp->v_vfsp == vp->v_vfsp)) {
700 			VN_HOLD(svp);
701 			return (st);
702 		}
703 		st = st->s_next;
704 	}
705 	return (NULL);
706 }
707 
708 /*
709  * Mark the accessed, updated, or changed times in an snode
710  * with the current time.
711  */
712 void
713 smark(struct snode *sp, int flag)
714 {
715 	time_t	now = gethrestime_sec();
716 
717 	/* check for change to avoid unnecessary locking */
718 	ASSERT((flag & ~(SACC|SUPD|SCHG)) == 0);
719 	if (((flag & sp->s_flag) != flag) ||
720 	    ((flag & SACC) && (sp->s_atime != now)) ||
721 	    ((flag & SUPD) && (sp->s_mtime != now)) ||
722 	    ((flag & SCHG) && (sp->s_ctime != now))) {
723 		/* lock and update */
724 		mutex_enter(&sp->s_lock);
725 		sp->s_flag |= flag;
726 		if (flag & SACC)
727 			sp->s_atime = now;
728 		if (flag & SUPD)
729 			sp->s_mtime = now;
730 		if (flag & SCHG)
731 			sp->s_ctime = now;
732 		mutex_exit(&sp->s_lock);
733 	}
734 }
735 
736 /*
737  * Return the maximum file offset permitted for this device.
738  * -1 means unrestricted.  SLOFFSET is associated with D_64BIT.
739  *
740  * On a 32-bit kernel this will limit:
741  *   o	D_64BIT devices to SPEC_MAXOFFSET_T.
742  *   o	non-D_64BIT character drivers to a 32-bit offset (MAXOFF_T).
743  */
744 offset_t
745 spec_maxoffset(struct vnode *vp)
746 {
747 	struct snode *sp = VTOS(vp);
748 	struct snode *csp = VTOS(sp->s_commonvp);
749 
750 	if (vp->v_stream)
751 		return ((offset_t)-1);
752 	else if (csp->s_flag & SANYOFFSET)	/* D_U64BIT */
753 		return ((offset_t)-1);
754 #ifdef _ILP32
755 	if (csp->s_flag & SLOFFSET)		/* D_64BIT */
756 		return (SPEC_MAXOFFSET_T);
757 #endif	/* _ILP32 */
758 	return (MAXOFF_T);
759 }
760 
761 /*ARGSUSED*/
762 static int
763 snode_constructor(void *buf, void *cdrarg, int kmflags)
764 {
765 	struct snode *sp = buf;
766 	struct vnode *vp;
767 
768 	vp = sp->s_vnode = vn_alloc(kmflags);
769 	if (vp == NULL) {
770 		return (-1);
771 	}
772 	vn_setops(vp, spec_getvnodeops());
773 	vp->v_data = sp;
774 
775 	mutex_init(&sp->s_lock, NULL, MUTEX_DEFAULT, NULL);
776 	cv_init(&sp->s_cv, NULL, CV_DEFAULT, NULL);
777 	return (0);
778 }
779 
780 /*ARGSUSED1*/
781 static void
782 snode_destructor(void *buf, void *cdrarg)
783 {
784 	struct snode *sp = buf;
785 	struct vnode *vp = STOV(sp);
786 
787 	mutex_destroy(&sp->s_lock);
788 	cv_destroy(&sp->s_cv);
789 
790 	vn_free(vp);
791 }
792 
793 
794 int
795 specinit(int fstype, char *name)
796 {
797 	static const fs_operation_def_t spec_vfsops_template[] = {
798 		VFSNAME_SYNC, { .vfs_sync = spec_sync },
799 		NULL, NULL
800 	};
801 	extern struct vnodeops *spec_vnodeops;
802 	extern const fs_operation_def_t spec_vnodeops_template[];
803 	struct vfsops *spec_vfsops;
804 	int error;
805 	dev_t dev;
806 
807 	/*
808 	 * Associate vfs and vnode operations.
809 	 */
810 	error = vfs_setfsops(fstype, spec_vfsops_template, &spec_vfsops);
811 	if (error != 0) {
812 		cmn_err(CE_WARN, "specinit: bad vfs ops template");
813 		return (error);
814 	}
815 
816 	error = vn_make_ops(name, spec_vnodeops_template, &spec_vnodeops);
817 	if (error != 0) {
818 		(void) vfs_freevfsops_by_type(fstype);
819 		cmn_err(CE_WARN, "specinit: bad vnode ops template");
820 		return (error);
821 	}
822 
823 	mutex_init(&stable_lock, NULL, MUTEX_DEFAULT, NULL);
824 	mutex_init(&spec_syncbusy, NULL, MUTEX_DEFAULT, NULL);
825 
826 	/*
827 	 * Create snode cache
828 	 */
829 	snode_cache = kmem_cache_create("snode_cache", sizeof (struct snode),
830 	    0, snode_constructor, snode_destructor, NULL, NULL, NULL, 0);
831 
832 	/*
833 	 * Associate vfs operations with spec_vfs
834 	 */
835 	VFS_INIT(&spec_vfs, spec_vfsops, (caddr_t)NULL);
836 	if ((dev = getudev()) == -1)
837 		dev = 0;
838 	specdev = makedevice(dev, 0);
839 	return (0);
840 }
841 
842 int
843 device_close(struct vnode *vp, int flag, struct cred *cr)
844 {
845 	struct snode *sp = VTOS(vp);
846 	enum vtype type = vp->v_type;
847 	struct vnode *cvp;
848 	dev_t dev;
849 	int error;
850 
851 	dev = sp->s_dev;
852 	cvp = sp->s_commonvp;
853 
854 	switch (type) {
855 
856 	case VCHR:
857 		if (vp->v_stream) {
858 			if (cvp->v_stream != NULL)
859 				error = strclose(cvp, flag, cr);
860 			vp->v_stream = NULL;
861 		} else
862 			error = dev_close(dev, flag, OTYP_CHR, cr);
863 		break;
864 
865 	case VBLK:
866 		/*
867 		 * On last close a block device we must
868 		 * invalidate any in-core blocks so that we
869 		 * can, for example, change floppy disks.
870 		 */
871 		(void) spec_putpage(cvp, (offset_t)0,
872 		    (size_t)0, B_INVAL|B_FORCE, cr, NULL);
873 		bflush(dev);
874 		binval(dev);
875 		error = dev_close(dev, flag, OTYP_BLK, cr);
876 		break;
877 	default:
878 		panic("device_close: not a device");
879 		/*NOTREACHED*/
880 	}
881 
882 	return (error);
883 }
884 
885 struct vnode *
886 makectty(vnode_t *ovp)
887 {
888 	vnode_t *vp;
889 
890 	if (vp = makespecvp(ovp->v_rdev, VCHR)) {
891 		struct snode *sp;
892 		struct snode *csp;
893 		struct vnode *cvp;
894 
895 		sp = VTOS(vp);
896 		cvp = sp->s_commonvp;
897 		csp = VTOS(cvp);
898 		mutex_enter(&csp->s_lock);
899 		csp->s_count++;
900 		mutex_exit(&csp->s_lock);
901 	}
902 
903 	return (vp);
904 }
905 
906 void
907 spec_snode_walk(int (*callback)(struct snode *sp, void *arg), void *arg)
908 {
909 	struct snode	*sp;
910 	int		i;
911 
912 	ASSERT(callback);
913 
914 	mutex_enter(&stable_lock);
915 	for (i = 0; i < STABLESIZE; i++) {
916 		for (sp = stable[i]; sp; sp = sp->s_next) {
917 			if (callback(sp, arg) != DDI_WALK_CONTINUE)
918 				goto out;
919 		}
920 	}
921 out:
922 	mutex_exit(&stable_lock);
923 }
924 
925 int
926 spec_is_clone(vnode_t *vp)
927 {
928 	struct snode *sp;
929 
930 	if (vn_matchops(vp, spec_getvnodeops())) {
931 		sp = VTOS(vp);
932 		return ((sp->s_flag & SCLONE) ? 1 : 0);
933 	}
934 
935 	return (0);
936 }
937 
938 int
939 spec_is_selfclone(vnode_t *vp)
940 {
941 	struct snode *sp;
942 
943 	if (vn_matchops(vp, spec_getvnodeops())) {
944 		sp = VTOS(vp);
945 		return ((sp->s_flag & SSELFCLONE) ? 1 : 0);
946 	}
947 
948 	return (0);
949 }
950 
951 /*
952  * We may be invoked with a NULL vp in which case we fence off
953  * all snodes associated with dip
954  */
955 int
956 spec_fence_snode(dev_info_t *dip, struct vnode *vp)
957 {
958 	struct snode	*sp;
959 	struct snode	*csp;
960 	int		retired;
961 	int		i;
962 	char		*path;
963 	int		emitted;
964 
965 	ASSERT(dip);
966 
967 	retired = 0;
968 	mutex_enter(&DEVI(dip)->devi_lock);
969 	if (DEVI(dip)->devi_flags & DEVI_RETIRED)
970 		retired = 1;
971 	mutex_exit(&DEVI(dip)->devi_lock);
972 
973 	if (!retired)
974 		return (0);
975 
976 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
977 	(void) ddi_pathname(dip, path);
978 
979 
980 	if (vp != NULL) {
981 		ASSERT(vn_matchops(vp, spec_getvnodeops()));
982 		csp = VTOCS(vp);
983 		ASSERT(csp);
984 		mutex_enter(&csp->s_lock);
985 		csp->s_flag |= SFENCED;
986 		mutex_exit(&csp->s_lock);
987 		FENDBG((CE_NOTE, "fenced off snode(%p) for dip: %s",
988 		    (void *)csp, path));
989 		kmem_free(path, MAXPATHLEN);
990 		return (0);
991 	}
992 
993 	emitted = 0;
994 	mutex_enter(&stable_lock);
995 	for (i = 0; i < STABLESIZE; i++) {
996 		for (sp = stable[i]; sp != NULL; sp = sp->s_next) {
997 			ASSERT(sp->s_commonvp);
998 			csp = VTOS(sp->s_commonvp);
999 			if (csp->s_dip == dip) {
1000 				/* fence off the common snode */
1001 				mutex_enter(&csp->s_lock);
1002 				csp->s_flag |= SFENCED;
1003 				mutex_exit(&csp->s_lock);
1004 				if (!emitted) {
1005 					FENDBG((CE_NOTE, "fenced 1 of N"));
1006 					emitted++;
1007 				}
1008 			}
1009 		}
1010 	}
1011 	mutex_exit(&stable_lock);
1012 
1013 	FENDBG((CE_NOTE, "fenced off all snodes for dip: %s", path));
1014 	kmem_free(path, MAXPATHLEN);
1015 
1016 	return (0);
1017 }
1018 
1019 
1020 int
1021 spec_unfence_snode(dev_info_t *dip)
1022 {
1023 	struct snode	*sp;
1024 	struct snode	*csp;
1025 	int		i;
1026 	char		*path;
1027 	int		emitted;
1028 
1029 	ASSERT(dip);
1030 
1031 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1032 	(void) ddi_pathname(dip, path);
1033 
1034 	emitted = 0;
1035 	mutex_enter(&stable_lock);
1036 	for (i = 0; i < STABLESIZE; i++) {
1037 		for (sp = stable[i]; sp != NULL; sp = sp->s_next) {
1038 			ASSERT(sp->s_commonvp);
1039 			csp = VTOS(sp->s_commonvp);
1040 			ASSERT(csp);
1041 			if (csp->s_dip == dip) {
1042 				/* unfence the common snode */
1043 				mutex_enter(&csp->s_lock);
1044 				csp->s_flag &= ~SFENCED;
1045 				mutex_exit(&csp->s_lock);
1046 				if (!emitted) {
1047 					FENDBG((CE_NOTE, "unfenced 1 of N"));
1048 					emitted++;
1049 				}
1050 			}
1051 		}
1052 	}
1053 	mutex_exit(&stable_lock);
1054 
1055 	FENDBG((CE_NOTE, "unfenced all snodes for dip: %s", path));
1056 	kmem_free(path, MAXPATHLEN);
1057 
1058 	return (0);
1059 }
1060 
1061 void
1062 spec_size_invalidate(dev_t dev, vtype_t type)
1063 {
1064 
1065 	struct snode *csp;
1066 
1067 	mutex_enter(&stable_lock);
1068 	if ((csp = sfind(dev, type, NULL)) != NULL) {
1069 		mutex_enter(&csp->s_lock);
1070 		csp->s_flag &= ~SSIZEVALID;
1071 		VN_RELE_ASYNC(STOV(csp), system_taskq);
1072 		mutex_exit(&csp->s_lock);
1073 	}
1074 	mutex_exit(&stable_lock);
1075 }
1076