xref: /titanic_51/usr/src/uts/common/fs/specfs/specvnops.c (revision 06e6833ac1f55fa31b2fc68fa6af8abfc2974d0b)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5a5652762Spraks  * Common Development and Distribution License (the "License").
6a5652762Spraks  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22349dcea3SGarrett D'Amore  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
24*06e6833aSJosef 'Jeff' Sipek  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
287c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
297c478bd9Sstevel@tonic-gate 
307c478bd9Sstevel@tonic-gate /*
317c478bd9Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
327c478bd9Sstevel@tonic-gate  * The Regents of the University of California
337c478bd9Sstevel@tonic-gate  * All Rights Reserved
347c478bd9Sstevel@tonic-gate  *
357c478bd9Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
367c478bd9Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
377c478bd9Sstevel@tonic-gate  * contributors.
387c478bd9Sstevel@tonic-gate  */
397c478bd9Sstevel@tonic-gate 
407c478bd9Sstevel@tonic-gate #include <sys/types.h>
417c478bd9Sstevel@tonic-gate #include <sys/thread.h>
427c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
437c478bd9Sstevel@tonic-gate #include <sys/param.h>
447c478bd9Sstevel@tonic-gate #include <sys/systm.h>
457c478bd9Sstevel@tonic-gate #include <sys/bitmap.h>
467c478bd9Sstevel@tonic-gate #include <sys/buf.h>
477c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
487c478bd9Sstevel@tonic-gate #include <sys/conf.h>
497c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
507c478bd9Sstevel@tonic-gate #include <sys/debug.h>
51feb08c6bSbillm #include <sys/dkio.h>
527c478bd9Sstevel@tonic-gate #include <sys/errno.h>
537c478bd9Sstevel@tonic-gate #include <sys/time.h>
547c478bd9Sstevel@tonic-gate #include <sys/fcntl.h>
557c478bd9Sstevel@tonic-gate #include <sys/flock.h>
567c478bd9Sstevel@tonic-gate #include <sys/file.h>
577c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
587c478bd9Sstevel@tonic-gate #include <sys/mman.h>
597c478bd9Sstevel@tonic-gate #include <sys/open.h>
607c478bd9Sstevel@tonic-gate #include <sys/swap.h>
617c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
627c478bd9Sstevel@tonic-gate #include <sys/uio.h>
637c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
64aa59c4cbSrsb #include <sys/vfs_opreg.h>
657c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
667c478bd9Sstevel@tonic-gate #include <sys/stat.h>
677c478bd9Sstevel@tonic-gate #include <sys/poll.h>
687c478bd9Sstevel@tonic-gate #include <sys/stream.h>
697c478bd9Sstevel@tonic-gate #include <sys/strsubr.h>
707c478bd9Sstevel@tonic-gate #include <sys/policy.h>
717c478bd9Sstevel@tonic-gate #include <sys/devpolicy.h>
727c478bd9Sstevel@tonic-gate 
737c478bd9Sstevel@tonic-gate #include <sys/proc.h>
747c478bd9Sstevel@tonic-gate #include <sys/user.h>
757c478bd9Sstevel@tonic-gate #include <sys/session.h>
767c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h>
777c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
787c478bd9Sstevel@tonic-gate #include <sys/pathname.h>
797c478bd9Sstevel@tonic-gate 
807c478bd9Sstevel@tonic-gate #include <sys/fs/snode.h>
817c478bd9Sstevel@tonic-gate 
827c478bd9Sstevel@tonic-gate #include <vm/seg.h>
837c478bd9Sstevel@tonic-gate #include <vm/seg_map.h>
847c478bd9Sstevel@tonic-gate #include <vm/page.h>
857c478bd9Sstevel@tonic-gate #include <vm/pvn.h>
867c478bd9Sstevel@tonic-gate #include <vm/seg_dev.h>
877c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h>
887c478bd9Sstevel@tonic-gate 
897c478bd9Sstevel@tonic-gate #include <fs/fs_subr.h>
907c478bd9Sstevel@tonic-gate 
917c478bd9Sstevel@tonic-gate #include <sys/esunddi.h>
927c478bd9Sstevel@tonic-gate #include <sys/autoconf.h>
937c478bd9Sstevel@tonic-gate #include <sys/sunndi.h>
9425e8c5aaSvikram #include <sys/contract/device_impl.h>
957c478bd9Sstevel@tonic-gate 
967c478bd9Sstevel@tonic-gate 
97da6c28aaSamw static int spec_open(struct vnode **, int, struct cred *, caller_context_t *);
98da6c28aaSamw static int spec_close(struct vnode *, int, int, offset_t, struct cred *,
99da6c28aaSamw 	caller_context_t *);
1007c478bd9Sstevel@tonic-gate static int spec_read(struct vnode *, struct uio *, int, struct cred *,
101da6c28aaSamw 	caller_context_t *);
1027c478bd9Sstevel@tonic-gate static int spec_write(struct vnode *, struct uio *, int, struct cred *,
103da6c28aaSamw 	caller_context_t *);
104da6c28aaSamw static int spec_ioctl(struct vnode *, int, intptr_t, int, struct cred *, int *,
105da6c28aaSamw 	caller_context_t *);
106da6c28aaSamw static int spec_getattr(struct vnode *, struct vattr *, int, struct cred *,
107da6c28aaSamw 	caller_context_t *);
1087c478bd9Sstevel@tonic-gate static int spec_setattr(struct vnode *, struct vattr *, int, struct cred *,
1097c478bd9Sstevel@tonic-gate 	caller_context_t *);
110da6c28aaSamw static int spec_access(struct vnode *, int, int, struct cred *,
111da6c28aaSamw 	caller_context_t *);
112da6c28aaSamw static int spec_create(struct vnode *, char *, vattr_t *, enum vcexcl, int,
113da6c28aaSamw 	struct vnode **, struct cred *, int, caller_context_t *, vsecattr_t *);
114da6c28aaSamw static int spec_fsync(struct vnode *, int, struct cred *, caller_context_t *);
115da6c28aaSamw static void spec_inactive(struct vnode *, struct cred *, caller_context_t *);
116da6c28aaSamw static int spec_fid(struct vnode *, struct fid *, caller_context_t *);
117da6c28aaSamw static int spec_seek(struct vnode *, offset_t, offset_t *, caller_context_t *);
1187c478bd9Sstevel@tonic-gate static int spec_frlock(struct vnode *, int, struct flock64 *, int, offset_t,
119da6c28aaSamw 	struct flk_callback *, struct cred *, caller_context_t *);
120da6c28aaSamw static int spec_realvp(struct vnode *, struct vnode **, caller_context_t *);
1217c478bd9Sstevel@tonic-gate 
1227c478bd9Sstevel@tonic-gate static int spec_getpage(struct vnode *, offset_t, size_t, uint_t *, page_t **,
123da6c28aaSamw 	size_t, struct seg *, caddr_t, enum seg_rw, struct cred *,
124da6c28aaSamw 	caller_context_t *);
1257c478bd9Sstevel@tonic-gate static int spec_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, int,
1267c478bd9Sstevel@tonic-gate 	struct cred *);
1277c478bd9Sstevel@tonic-gate static struct buf *spec_startio(struct vnode *, page_t *, u_offset_t, size_t,
1287c478bd9Sstevel@tonic-gate 	int);
1297c478bd9Sstevel@tonic-gate static int spec_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
1307c478bd9Sstevel@tonic-gate 	page_t **, size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
1317c478bd9Sstevel@tonic-gate static int spec_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
132da6c28aaSamw 	uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
1337c478bd9Sstevel@tonic-gate static int spec_addmap(struct vnode *, offset_t, struct as *, caddr_t, size_t,
134da6c28aaSamw 	uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
1357c478bd9Sstevel@tonic-gate static int spec_delmap(struct vnode *, offset_t, struct as *, caddr_t, size_t,
136da6c28aaSamw 	uint_t, uint_t, uint_t, struct cred *, caller_context_t *);
1377c478bd9Sstevel@tonic-gate 
138da6c28aaSamw static int spec_poll(struct vnode *, short, int, short *, struct pollhead **,
139da6c28aaSamw 	caller_context_t *);
140d7334e51Srm15945 static int spec_dump(struct vnode *, caddr_t, offset_t, offset_t,
141d7334e51Srm15945     caller_context_t *);
1427c478bd9Sstevel@tonic-gate static int spec_pageio(struct vnode *, page_t *, u_offset_t, size_t, int,
143da6c28aaSamw     cred_t *, caller_context_t *);
1447c478bd9Sstevel@tonic-gate 
145da6c28aaSamw static int spec_getsecattr(struct vnode *, vsecattr_t *, int, struct cred *,
146da6c28aaSamw 	caller_context_t *);
147da6c28aaSamw static int spec_setsecattr(struct vnode *, vsecattr_t *, int, struct cred *,
148da6c28aaSamw 	caller_context_t *);
149da6c28aaSamw static int spec_pathconf(struct	vnode *, int, ulong_t *, struct cred *,
150da6c28aaSamw 	caller_context_t *);
1517c478bd9Sstevel@tonic-gate 
1527c478bd9Sstevel@tonic-gate #define	SN_HOLD(csp)	{ \
1537c478bd9Sstevel@tonic-gate 	mutex_enter(&csp->s_lock); \
1547c478bd9Sstevel@tonic-gate 	csp->s_count++; \
1557c478bd9Sstevel@tonic-gate 	mutex_exit(&csp->s_lock); \
1567c478bd9Sstevel@tonic-gate }
1577c478bd9Sstevel@tonic-gate 
1587c478bd9Sstevel@tonic-gate #define	SN_RELE(csp)	{ \
1597c478bd9Sstevel@tonic-gate 	mutex_enter(&csp->s_lock); \
1607c478bd9Sstevel@tonic-gate 	csp->s_count--; \
161fbe27353Sedp 	ASSERT((csp->s_count > 0) || (csp->s_vnode->v_stream == NULL)); \
1627c478bd9Sstevel@tonic-gate 	mutex_exit(&csp->s_lock); \
1637c478bd9Sstevel@tonic-gate }
1647c478bd9Sstevel@tonic-gate 
16525e8c5aaSvikram #define	S_ISFENCED(sp)	((VTOS((sp)->s_commonvp))->s_flag & SFENCED)
16625e8c5aaSvikram 
1677c478bd9Sstevel@tonic-gate struct vnodeops *spec_vnodeops;
1687c478bd9Sstevel@tonic-gate 
16925e8c5aaSvikram /*
17025e8c5aaSvikram  * *PLEASE NOTE*: If you add new entry points to specfs, do
17125e8c5aaSvikram  * not forget to add support for fencing. A fenced snode
17225e8c5aaSvikram  * is indicated by the SFENCED flag in the common snode.
17325e8c5aaSvikram  * If a snode is fenced, determine if your entry point is
17425e8c5aaSvikram  * a configuration operation (Example: open), a detection
17525e8c5aaSvikram  * operation (Example: gettattr), an I/O operation (Example: ioctl())
17625e8c5aaSvikram  * or an unconfiguration operation (Example: close). If it is
17725e8c5aaSvikram  * a configuration or detection operation, fail the operation
17825e8c5aaSvikram  * for a fenced snode with an ENXIO or EIO as appropriate. If
17925e8c5aaSvikram  * it is any other operation, let it through.
18025e8c5aaSvikram  */
18125e8c5aaSvikram 
1827c478bd9Sstevel@tonic-gate const fs_operation_def_t spec_vnodeops_template[] = {
183aa59c4cbSrsb 	VOPNAME_OPEN,		{ .vop_open = spec_open },
184aa59c4cbSrsb 	VOPNAME_CLOSE,		{ .vop_close = spec_close },
185aa59c4cbSrsb 	VOPNAME_READ,		{ .vop_read = spec_read },
186aa59c4cbSrsb 	VOPNAME_WRITE,		{ .vop_write = spec_write },
187aa59c4cbSrsb 	VOPNAME_IOCTL,		{ .vop_ioctl = spec_ioctl },
188aa59c4cbSrsb 	VOPNAME_GETATTR,	{ .vop_getattr = spec_getattr },
189aa59c4cbSrsb 	VOPNAME_SETATTR,	{ .vop_setattr = spec_setattr },
190aa59c4cbSrsb 	VOPNAME_ACCESS,		{ .vop_access = spec_access },
191aa59c4cbSrsb 	VOPNAME_CREATE,		{ .vop_create = spec_create },
192aa59c4cbSrsb 	VOPNAME_FSYNC,		{ .vop_fsync = spec_fsync },
193aa59c4cbSrsb 	VOPNAME_INACTIVE,	{ .vop_inactive = spec_inactive },
194aa59c4cbSrsb 	VOPNAME_FID,		{ .vop_fid = spec_fid },
195aa59c4cbSrsb 	VOPNAME_SEEK,		{ .vop_seek = spec_seek },
196aa59c4cbSrsb 	VOPNAME_PATHCONF,	{ .vop_pathconf = spec_pathconf },
197aa59c4cbSrsb 	VOPNAME_FRLOCK,		{ .vop_frlock = spec_frlock },
198aa59c4cbSrsb 	VOPNAME_REALVP,		{ .vop_realvp = spec_realvp },
199aa59c4cbSrsb 	VOPNAME_GETPAGE,	{ .vop_getpage = spec_getpage },
200aa59c4cbSrsb 	VOPNAME_PUTPAGE,	{ .vop_putpage = spec_putpage },
201aa59c4cbSrsb 	VOPNAME_MAP,		{ .vop_map = spec_map },
202aa59c4cbSrsb 	VOPNAME_ADDMAP,		{ .vop_addmap = spec_addmap },
203aa59c4cbSrsb 	VOPNAME_DELMAP,		{ .vop_delmap = spec_delmap },
204aa59c4cbSrsb 	VOPNAME_POLL,		{ .vop_poll = spec_poll },
205aa59c4cbSrsb 	VOPNAME_DUMP,		{ .vop_dump = spec_dump },
206aa59c4cbSrsb 	VOPNAME_PAGEIO,		{ .vop_pageio = spec_pageio },
207aa59c4cbSrsb 	VOPNAME_SETSECATTR,	{ .vop_setsecattr = spec_setsecattr },
208aa59c4cbSrsb 	VOPNAME_GETSECATTR,	{ .vop_getsecattr = spec_getsecattr },
2097c478bd9Sstevel@tonic-gate 	NULL,			NULL
2107c478bd9Sstevel@tonic-gate };
2117c478bd9Sstevel@tonic-gate 
2127c478bd9Sstevel@tonic-gate /*
2137c478bd9Sstevel@tonic-gate  * Return address of spec_vnodeops
2147c478bd9Sstevel@tonic-gate  */
2157c478bd9Sstevel@tonic-gate struct vnodeops *
2167c478bd9Sstevel@tonic-gate spec_getvnodeops(void)
2177c478bd9Sstevel@tonic-gate {
2187c478bd9Sstevel@tonic-gate 	return (spec_vnodeops);
2197c478bd9Sstevel@tonic-gate }
2207c478bd9Sstevel@tonic-gate 
2217c478bd9Sstevel@tonic-gate extern vnode_t *rconsvp;
2227c478bd9Sstevel@tonic-gate 
2237c478bd9Sstevel@tonic-gate /*
2247c478bd9Sstevel@tonic-gate  * Acquire the serial lock on the common snode.
2257c478bd9Sstevel@tonic-gate  */
226e099bf07Scth #define	LOCK_CSP(csp)			(void) spec_lockcsp(csp, 0, 1, 0)
227e099bf07Scth #define	LOCKHOLD_CSP_SIG(csp)		spec_lockcsp(csp, 1, 1, 1)
228e099bf07Scth #define	SYNCHOLD_CSP_SIG(csp, intr)	spec_lockcsp(csp, intr, 0, 1)
2297c478bd9Sstevel@tonic-gate 
2307f9b0c87Scg13442 typedef enum {
2317f9b0c87Scg13442 	LOOP,
2327f9b0c87Scg13442 	INTR,
2337f9b0c87Scg13442 	SUCCESS
2347f9b0c87Scg13442 } slock_ret_t;
2357f9b0c87Scg13442 
2367c478bd9Sstevel@tonic-gate /*
2377f9b0c87Scg13442  * Synchronize with active SLOCKED snode, optionally checking for a signal and
238e099bf07Scth  * optionally returning with SLOCKED set and SN_HOLD done.  The 'intr'
239e099bf07Scth  * argument determines if the thread is interruptible by a signal while
2407f9b0c87Scg13442  * waiting, the function returns INTR if interrupted while there is another
2417f9b0c87Scg13442  * thread closing this snonde and LOOP if interrupted otherwise.
2427f9b0c87Scg13442  * When SUCCESS is returned the 'hold' argument determines if the open
2437f9b0c87Scg13442  * count (SN_HOLD) has been incremented and the 'setlock' argument
2447f9b0c87Scg13442  * determines if the function returns with SLOCKED set.
2457c478bd9Sstevel@tonic-gate  */
2467f9b0c87Scg13442 static slock_ret_t
247e099bf07Scth spec_lockcsp(struct snode *csp, int intr, int setlock, int hold)
2487c478bd9Sstevel@tonic-gate {
2497f9b0c87Scg13442 	slock_ret_t ret = SUCCESS;
2507c478bd9Sstevel@tonic-gate 	mutex_enter(&csp->s_lock);
2517c478bd9Sstevel@tonic-gate 	while (csp->s_flag & SLOCKED) {
2527c478bd9Sstevel@tonic-gate 		csp->s_flag |= SWANT;
253e099bf07Scth 		if (intr) {
2547c478bd9Sstevel@tonic-gate 			if (!cv_wait_sig(&csp->s_cv, &csp->s_lock)) {
2557f9b0c87Scg13442 				if (csp->s_flag & SCLOSING)
2567f9b0c87Scg13442 					ret = INTR;
2577f9b0c87Scg13442 				else
2587f9b0c87Scg13442 					ret = LOOP;
2597c478bd9Sstevel@tonic-gate 				mutex_exit(&csp->s_lock);
2607f9b0c87Scg13442 				return (ret);		/* interrupted */
261e099bf07Scth 			}
262e099bf07Scth 		} else {
263e099bf07Scth 			cv_wait(&csp->s_cv, &csp->s_lock);
2647c478bd9Sstevel@tonic-gate 		}
2657c478bd9Sstevel@tonic-gate 	}
266e099bf07Scth 	if (setlock)
2677c478bd9Sstevel@tonic-gate 		csp->s_flag |= SLOCKED;
268e099bf07Scth 	if (hold)
269e099bf07Scth 		csp->s_count++;		/* one more open reference : SN_HOLD */
2707c478bd9Sstevel@tonic-gate 	mutex_exit(&csp->s_lock);
2717f9b0c87Scg13442 	return (ret);			/* serialized/locked */
2727c478bd9Sstevel@tonic-gate }
2737c478bd9Sstevel@tonic-gate 
2747c478bd9Sstevel@tonic-gate /*
2757c478bd9Sstevel@tonic-gate  * Unlock the serial lock on the common snode
2767c478bd9Sstevel@tonic-gate  */
2777c478bd9Sstevel@tonic-gate #define	UNLOCK_CSP_LOCK_HELD(csp)			\
2787c478bd9Sstevel@tonic-gate 	ASSERT(mutex_owned(&csp->s_lock));		\
2797c478bd9Sstevel@tonic-gate 	if (csp->s_flag & SWANT)			\
2807c478bd9Sstevel@tonic-gate 		cv_broadcast(&csp->s_cv);		\
2817c478bd9Sstevel@tonic-gate 	csp->s_flag &= ~(SWANT|SLOCKED);
2827c478bd9Sstevel@tonic-gate 
2837c478bd9Sstevel@tonic-gate #define	UNLOCK_CSP(csp)					\
2847c478bd9Sstevel@tonic-gate 	mutex_enter(&csp->s_lock);			\
2857c478bd9Sstevel@tonic-gate 	UNLOCK_CSP_LOCK_HELD(csp);			\
2867c478bd9Sstevel@tonic-gate 	mutex_exit(&csp->s_lock);
2877c478bd9Sstevel@tonic-gate 
2887c478bd9Sstevel@tonic-gate /*
2897c478bd9Sstevel@tonic-gate  * compute/return the size of the device
2907c478bd9Sstevel@tonic-gate  */
2917c478bd9Sstevel@tonic-gate #define	SPEC_SIZE(csp)	\
2927c478bd9Sstevel@tonic-gate 	(((csp)->s_flag & SSIZEVALID) ? (csp)->s_size : spec_size(csp))
2937c478bd9Sstevel@tonic-gate 
2947c478bd9Sstevel@tonic-gate /*
2957c478bd9Sstevel@tonic-gate  * Compute and return the size.  If the size in the common snode is valid then
2967c478bd9Sstevel@tonic-gate  * return it.  If not valid then get the size from the driver and set size in
2977c478bd9Sstevel@tonic-gate  * the common snode.  If the device has not been attached then we don't ask for
2987c478bd9Sstevel@tonic-gate  * an update from the driver- for non-streams SSIZEVALID stays unset until the
2997c478bd9Sstevel@tonic-gate  * device is attached. A stat of a mknod outside /devices (non-devfs) may
3007c478bd9Sstevel@tonic-gate  * report UNKNOWN_SIZE because the device may not be attached yet (SDIPSET not
3017c478bd9Sstevel@tonic-gate  * established in mknod until open time). An stat in /devices will report the
3027c478bd9Sstevel@tonic-gate  * size correctly.  Specfs should always call SPEC_SIZE instead of referring
3037c478bd9Sstevel@tonic-gate  * directly to s_size to initialize/retrieve the size of a device.
3047c478bd9Sstevel@tonic-gate  *
3057c478bd9Sstevel@tonic-gate  * XXX There is an inconsistency between block and raw - "unknown" is
3067c478bd9Sstevel@tonic-gate  * UNKNOWN_SIZE for VBLK and 0 for VCHR(raw).
3077c478bd9Sstevel@tonic-gate  */
3087c478bd9Sstevel@tonic-gate static u_offset_t
3097c478bd9Sstevel@tonic-gate spec_size(struct snode *csp)
3107c478bd9Sstevel@tonic-gate {
3117c478bd9Sstevel@tonic-gate 	struct vnode	*cvp = STOV(csp);
3127c478bd9Sstevel@tonic-gate 	u_offset_t	size;
3137c478bd9Sstevel@tonic-gate 	int		plen;
3147c478bd9Sstevel@tonic-gate 	uint32_t	size32;
3157c478bd9Sstevel@tonic-gate 	dev_t		dev;
3167c478bd9Sstevel@tonic-gate 	dev_info_t	*devi;
3177c478bd9Sstevel@tonic-gate 	major_t		maj;
318184cd04cScth 	uint_t		blksize;
319184cd04cScth 	int		blkshift;
3207c478bd9Sstevel@tonic-gate 
3217c478bd9Sstevel@tonic-gate 	ASSERT((csp)->s_commonvp == cvp);	/* must be common node */
3227c478bd9Sstevel@tonic-gate 
3237c478bd9Sstevel@tonic-gate 	/* return cached value */
3247c478bd9Sstevel@tonic-gate 	mutex_enter(&csp->s_lock);
3257c478bd9Sstevel@tonic-gate 	if (csp->s_flag & SSIZEVALID) {
3267c478bd9Sstevel@tonic-gate 		mutex_exit(&csp->s_lock);
3277c478bd9Sstevel@tonic-gate 		return (csp->s_size);
3287c478bd9Sstevel@tonic-gate 	}
3297c478bd9Sstevel@tonic-gate 
3307c478bd9Sstevel@tonic-gate 	/* VOP_GETATTR of mknod has not had devcnt restriction applied */
3317c478bd9Sstevel@tonic-gate 	dev = cvp->v_rdev;
3327c478bd9Sstevel@tonic-gate 	maj = getmajor(dev);
3337c478bd9Sstevel@tonic-gate 	if (maj >= devcnt) {
3347c478bd9Sstevel@tonic-gate 		/* return non-cached UNKNOWN_SIZE */
3357c478bd9Sstevel@tonic-gate 		mutex_exit(&csp->s_lock);
3367c478bd9Sstevel@tonic-gate 		return ((cvp->v_type == VCHR) ? 0 : UNKNOWN_SIZE);
3377c478bd9Sstevel@tonic-gate 	}
3387c478bd9Sstevel@tonic-gate 
3397c478bd9Sstevel@tonic-gate 	/* establish cached zero size for streams */
3407c478bd9Sstevel@tonic-gate 	if (STREAMSTAB(maj)) {
3417c478bd9Sstevel@tonic-gate 		csp->s_size = 0;
3427c478bd9Sstevel@tonic-gate 		csp->s_flag |= SSIZEVALID;
3437c478bd9Sstevel@tonic-gate 		mutex_exit(&csp->s_lock);
3447c478bd9Sstevel@tonic-gate 		return (0);
3457c478bd9Sstevel@tonic-gate 	}
3467c478bd9Sstevel@tonic-gate 
3477c478bd9Sstevel@tonic-gate 	/*
3487c478bd9Sstevel@tonic-gate 	 * Return non-cached UNKNOWN_SIZE if not open.
3497c478bd9Sstevel@tonic-gate 	 *
3507c478bd9Sstevel@tonic-gate 	 * NB: This check is bogus, calling prop_op(9E) should be gated by
3517c478bd9Sstevel@tonic-gate 	 * attach, not open. Not having this check however opens up a new
3527c478bd9Sstevel@tonic-gate 	 * context under which a driver's prop_op(9E) could be called. Calling
3537c478bd9Sstevel@tonic-gate 	 * prop_op(9E) in this new context has been shown to expose latent
3547c478bd9Sstevel@tonic-gate 	 * driver bugs (insufficient NULL pointer checks that lead to panic).
3557c478bd9Sstevel@tonic-gate 	 * We are keeping this open check for now to avoid these panics.
3567c478bd9Sstevel@tonic-gate 	 */
3577c478bd9Sstevel@tonic-gate 	if (csp->s_count == 0) {
3587c478bd9Sstevel@tonic-gate 		mutex_exit(&csp->s_lock);
3597c478bd9Sstevel@tonic-gate 		return ((cvp->v_type == VCHR) ? 0 : UNKNOWN_SIZE);
3607c478bd9Sstevel@tonic-gate 	}
3617c478bd9Sstevel@tonic-gate 
3627c478bd9Sstevel@tonic-gate 	/* Return non-cached UNKNOWN_SIZE if not attached. */
3637c478bd9Sstevel@tonic-gate 	if (((csp->s_flag & SDIPSET) == 0) || (csp->s_dip == NULL) ||
364737d277aScth 	    !i_ddi_devi_attached(csp->s_dip)) {
3657c478bd9Sstevel@tonic-gate 		mutex_exit(&csp->s_lock);
3667c478bd9Sstevel@tonic-gate 		return ((cvp->v_type == VCHR) ? 0 : UNKNOWN_SIZE);
3677c478bd9Sstevel@tonic-gate 	}
3687c478bd9Sstevel@tonic-gate 
3697c478bd9Sstevel@tonic-gate 	devi = csp->s_dip;
3707c478bd9Sstevel@tonic-gate 
3717c478bd9Sstevel@tonic-gate 	/*
3727c478bd9Sstevel@tonic-gate 	 * Established cached size obtained from the attached driver. Since we
3737c478bd9Sstevel@tonic-gate 	 * know the devinfo node, for efficiency we use cdev_prop_op directly
3747c478bd9Sstevel@tonic-gate 	 * instead of [cb]dev_[Ss]size.
3757c478bd9Sstevel@tonic-gate 	 */
3767c478bd9Sstevel@tonic-gate 	if (cvp->v_type == VCHR) {
3777c478bd9Sstevel@tonic-gate 		size = 0;
3787c478bd9Sstevel@tonic-gate 		plen = sizeof (size);
3797c478bd9Sstevel@tonic-gate 		if (cdev_prop_op(dev, devi, PROP_LEN_AND_VAL_BUF,
3807c478bd9Sstevel@tonic-gate 		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS |
3817c478bd9Sstevel@tonic-gate 		    DDI_PROP_CONSUMER_TYPED, "Size", (caddr_t)&size,
3827c478bd9Sstevel@tonic-gate 		    &plen) != DDI_PROP_SUCCESS) {
3837c478bd9Sstevel@tonic-gate 			plen = sizeof (size32);
3847c478bd9Sstevel@tonic-gate 			if (cdev_prop_op(dev, devi, PROP_LEN_AND_VAL_BUF,
3857c478bd9Sstevel@tonic-gate 			    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
3867c478bd9Sstevel@tonic-gate 			    "size", (caddr_t)&size32, &plen) ==
3877c478bd9Sstevel@tonic-gate 			    DDI_PROP_SUCCESS)
3887c478bd9Sstevel@tonic-gate 				size = size32;
3897c478bd9Sstevel@tonic-gate 		}
3907c478bd9Sstevel@tonic-gate 	} else {
3917c478bd9Sstevel@tonic-gate 		size = UNKNOWN_SIZE;
3927c478bd9Sstevel@tonic-gate 		plen = sizeof (size);
3937c478bd9Sstevel@tonic-gate 		if (cdev_prop_op(dev, devi, PROP_LEN_AND_VAL_BUF,
3947c478bd9Sstevel@tonic-gate 		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS |
3957c478bd9Sstevel@tonic-gate 		    DDI_PROP_CONSUMER_TYPED, "Nblocks", (caddr_t)&size,
3967c478bd9Sstevel@tonic-gate 		    &plen) != DDI_PROP_SUCCESS) {
3977c478bd9Sstevel@tonic-gate 			plen = sizeof (size32);
3987c478bd9Sstevel@tonic-gate 			if (cdev_prop_op(dev, devi, PROP_LEN_AND_VAL_BUF,
3997c478bd9Sstevel@tonic-gate 			    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
4007c478bd9Sstevel@tonic-gate 			    "nblocks", (caddr_t)&size32, &plen) ==
4017c478bd9Sstevel@tonic-gate 			    DDI_PROP_SUCCESS)
4027c478bd9Sstevel@tonic-gate 				size = size32;
4037c478bd9Sstevel@tonic-gate 		}
4047c478bd9Sstevel@tonic-gate 
4057c478bd9Sstevel@tonic-gate 		if (size != UNKNOWN_SIZE) {
406184cd04cScth 			blksize = DEV_BSIZE;		/* default */
407184cd04cScth 			plen = sizeof (blksize);
408184cd04cScth 
409184cd04cScth 			/* try to get dev_t specific "blksize" */
410184cd04cScth 			if (cdev_prop_op(dev, devi, PROP_LEN_AND_VAL_BUF,
411184cd04cScth 			    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
412184cd04cScth 			    "blksize", (caddr_t)&blksize, &plen) !=
413184cd04cScth 			    DDI_PROP_SUCCESS) {
414184cd04cScth 				/*
415184cd04cScth 				 * Try for dev_info node "device-blksize".
416184cd04cScth 				 * If this fails then blksize will still be
417184cd04cScth 				 * DEV_BSIZE default value.
418184cd04cScth 				 */
419184cd04cScth 				(void) cdev_prop_op(DDI_DEV_T_ANY, devi,
420184cd04cScth 				    PROP_LEN_AND_VAL_BUF,
421184cd04cScth 				    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
422184cd04cScth 				    "device-blksize", (caddr_t)&blksize, &plen);
423184cd04cScth 			}
424184cd04cScth 
425184cd04cScth 			/* blksize must be a power of two */
426184cd04cScth 			ASSERT(BIT_ONLYONESET(blksize));
427184cd04cScth 			blkshift = highbit(blksize) - 1;
428184cd04cScth 
4297c478bd9Sstevel@tonic-gate 			/* convert from block size to byte size */
430184cd04cScth 			if (size < (MAXOFFSET_T >> blkshift))
431184cd04cScth 				size = size << blkshift;
4327c478bd9Sstevel@tonic-gate 			else
4337c478bd9Sstevel@tonic-gate 				size = UNKNOWN_SIZE;
4347c478bd9Sstevel@tonic-gate 		}
4357c478bd9Sstevel@tonic-gate 	}
4367c478bd9Sstevel@tonic-gate 
4377c478bd9Sstevel@tonic-gate 	csp->s_size = size;
4387c478bd9Sstevel@tonic-gate 	csp->s_flag |= SSIZEVALID;
4397c478bd9Sstevel@tonic-gate 
4407c478bd9Sstevel@tonic-gate 	mutex_exit(&csp->s_lock);
4417c478bd9Sstevel@tonic-gate 	return (size);
4427c478bd9Sstevel@tonic-gate }
4437c478bd9Sstevel@tonic-gate 
4447c478bd9Sstevel@tonic-gate /*
4457c478bd9Sstevel@tonic-gate  * This function deal with vnode substitution in the case of
4467c478bd9Sstevel@tonic-gate  * device cloning.
4477c478bd9Sstevel@tonic-gate  */
4487c478bd9Sstevel@tonic-gate static int
4497c478bd9Sstevel@tonic-gate spec_clone(struct vnode **vpp, dev_t newdev, int vtype, struct stdata *stp)
4507c478bd9Sstevel@tonic-gate {
4517c478bd9Sstevel@tonic-gate 	dev_t		dev = (*vpp)->v_rdev;
4527c478bd9Sstevel@tonic-gate 	major_t		maj = getmajor(dev);
4537c478bd9Sstevel@tonic-gate 	major_t 	newmaj = getmajor(newdev);
4547c478bd9Sstevel@tonic-gate 	int		sysclone = (maj == clone_major);
4557c478bd9Sstevel@tonic-gate 	int		qassociate_used = 0;
4567c478bd9Sstevel@tonic-gate 	struct snode	*oldsp, *oldcsp;
4577c478bd9Sstevel@tonic-gate 	struct snode	*newsp, *newcsp;
4587c478bd9Sstevel@tonic-gate 	struct vnode	*newvp, *newcvp;
4597c478bd9Sstevel@tonic-gate 	dev_info_t	*dip;
4607c478bd9Sstevel@tonic-gate 	queue_t		*dq;
4617c478bd9Sstevel@tonic-gate 
4627c478bd9Sstevel@tonic-gate 	ASSERT(dev != newdev);
4637c478bd9Sstevel@tonic-gate 
4647c478bd9Sstevel@tonic-gate 	/*
4657c478bd9Sstevel@tonic-gate 	 * Check for cloning across different drivers.
4667c478bd9Sstevel@tonic-gate 	 * We only support this under the system provided clone driver
4677c478bd9Sstevel@tonic-gate 	 */
4687c478bd9Sstevel@tonic-gate 	if ((maj != newmaj) && !sysclone) {
4697c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE,
4707c478bd9Sstevel@tonic-gate 		    "unsupported clone open maj = %u, newmaj = %u",
4717c478bd9Sstevel@tonic-gate 		    maj, newmaj);
4727c478bd9Sstevel@tonic-gate 		return (ENXIO);
4737c478bd9Sstevel@tonic-gate 	}
4747c478bd9Sstevel@tonic-gate 
4757c478bd9Sstevel@tonic-gate 	/* old */
4767c478bd9Sstevel@tonic-gate 	oldsp = VTOS(*vpp);
4777c478bd9Sstevel@tonic-gate 	oldcsp = VTOS(oldsp->s_commonvp);
4787c478bd9Sstevel@tonic-gate 
4797c478bd9Sstevel@tonic-gate 	/* new */
4807c478bd9Sstevel@tonic-gate 	newvp = makespecvp(newdev, vtype);
4817c478bd9Sstevel@tonic-gate 	ASSERT(newvp != NULL);
4827c478bd9Sstevel@tonic-gate 	newsp = VTOS(newvp);
4837c478bd9Sstevel@tonic-gate 	newcvp = newsp->s_commonvp;
4847c478bd9Sstevel@tonic-gate 	newcsp = VTOS(newcvp);
4857c478bd9Sstevel@tonic-gate 
4867c478bd9Sstevel@tonic-gate 	/*
4877c478bd9Sstevel@tonic-gate 	 * Clones inherit fsid, realvp, and dip.
4887c478bd9Sstevel@tonic-gate 	 * XXX realvp inherit is not occurring, does fstat of clone work?
4897c478bd9Sstevel@tonic-gate 	 */
4907c478bd9Sstevel@tonic-gate 	newsp->s_fsid = oldsp->s_fsid;
4917c478bd9Sstevel@tonic-gate 	if (sysclone) {
4927c478bd9Sstevel@tonic-gate 		newsp->s_flag |= SCLONE;
4937c478bd9Sstevel@tonic-gate 		dip = NULL;
4947c478bd9Sstevel@tonic-gate 	} else {
4957c478bd9Sstevel@tonic-gate 		newsp->s_flag |= SSELFCLONE;
4967c478bd9Sstevel@tonic-gate 		dip = oldcsp->s_dip;
4977c478bd9Sstevel@tonic-gate 	}
4987c478bd9Sstevel@tonic-gate 
4997c478bd9Sstevel@tonic-gate 	/*
5007c478bd9Sstevel@tonic-gate 	 * If we cloned to an opened newdev that already has called
5017c478bd9Sstevel@tonic-gate 	 * spec_assoc_vp_with_devi (SDIPSET set) then the association is
5027c478bd9Sstevel@tonic-gate 	 * already established.
5037c478bd9Sstevel@tonic-gate 	 */
5047c478bd9Sstevel@tonic-gate 	if (!(newcsp->s_flag & SDIPSET)) {
5057c478bd9Sstevel@tonic-gate 		/*
5067c478bd9Sstevel@tonic-gate 		 * Establish s_dip association for newdev.
5077c478bd9Sstevel@tonic-gate 		 *
5087c478bd9Sstevel@tonic-gate 		 * If we trusted the getinfo(9E) DDI_INFO_DEVT2INSTANCE
5097c478bd9Sstevel@tonic-gate 		 * implementation of all cloning drivers  (SCLONE and SELFCLONE)
5107c478bd9Sstevel@tonic-gate 		 * we would always use e_ddi_hold_devi_by_dev().  We know that
5117c478bd9Sstevel@tonic-gate 		 * many drivers have had (still have?) problems with
5127c478bd9Sstevel@tonic-gate 		 * DDI_INFO_DEVT2INSTANCE, so we try to minimize reliance by
5137c478bd9Sstevel@tonic-gate 		 * detecting drivers that use QASSOCIATE (by looking down the
5147c478bd9Sstevel@tonic-gate 		 * stream) and setting their s_dip association to NULL.
5157c478bd9Sstevel@tonic-gate 		 */
5167c478bd9Sstevel@tonic-gate 		qassociate_used = 0;
5177c478bd9Sstevel@tonic-gate 		if (stp) {
5187c478bd9Sstevel@tonic-gate 			for (dq = stp->sd_wrq; dq; dq = dq->q_next) {
5197c478bd9Sstevel@tonic-gate 				if (_RD(dq)->q_flag & _QASSOCIATED) {
5207c478bd9Sstevel@tonic-gate 					qassociate_used = 1;
5217c478bd9Sstevel@tonic-gate 					dip = NULL;
5227c478bd9Sstevel@tonic-gate 					break;
5237c478bd9Sstevel@tonic-gate 				}
5247c478bd9Sstevel@tonic-gate 			}
5257c478bd9Sstevel@tonic-gate 		}
5267c478bd9Sstevel@tonic-gate 
5277c478bd9Sstevel@tonic-gate 		if (dip || qassociate_used) {
5287c478bd9Sstevel@tonic-gate 			spec_assoc_vp_with_devi(newvp, dip);
5297c478bd9Sstevel@tonic-gate 		} else {
5307c478bd9Sstevel@tonic-gate 			/* derive association from newdev */
5317c478bd9Sstevel@tonic-gate 			dip = e_ddi_hold_devi_by_dev(newdev, 0);
5327c478bd9Sstevel@tonic-gate 			spec_assoc_vp_with_devi(newvp, dip);
5337c478bd9Sstevel@tonic-gate 			if (dip)
5347c478bd9Sstevel@tonic-gate 				ddi_release_devi(dip);
5357c478bd9Sstevel@tonic-gate 		}
5367c478bd9Sstevel@tonic-gate 	}
5377c478bd9Sstevel@tonic-gate 
5387c478bd9Sstevel@tonic-gate 	SN_HOLD(newcsp);
5397c478bd9Sstevel@tonic-gate 
5407c478bd9Sstevel@tonic-gate 	/* deal with stream stuff */
5417c478bd9Sstevel@tonic-gate 	if (stp != NULL) {
5427c478bd9Sstevel@tonic-gate 		LOCK_CSP(newcsp);	/* synchronize stream open/close */
5437c478bd9Sstevel@tonic-gate 		mutex_enter(&newcsp->s_lock);
5447c478bd9Sstevel@tonic-gate 		newcvp->v_stream = newvp->v_stream = stp;
5457c478bd9Sstevel@tonic-gate 		stp->sd_vnode = newcvp;
5467c478bd9Sstevel@tonic-gate 		stp->sd_strtab = STREAMSTAB(newmaj);
5477c478bd9Sstevel@tonic-gate 		mutex_exit(&newcsp->s_lock);
5487c478bd9Sstevel@tonic-gate 		UNLOCK_CSP(newcsp);
5497c478bd9Sstevel@tonic-gate 	}
5507c478bd9Sstevel@tonic-gate 
5517c478bd9Sstevel@tonic-gate 	/* substitute the vnode */
5527c478bd9Sstevel@tonic-gate 	SN_RELE(oldcsp);
5537c478bd9Sstevel@tonic-gate 	VN_RELE(*vpp);
5547c478bd9Sstevel@tonic-gate 	*vpp = newvp;
5557c478bd9Sstevel@tonic-gate 
5567c478bd9Sstevel@tonic-gate 	return (0);
5577c478bd9Sstevel@tonic-gate }
5587c478bd9Sstevel@tonic-gate 
5597c478bd9Sstevel@tonic-gate static int
560da6c28aaSamw spec_open(struct vnode **vpp, int flag, struct cred *cr, caller_context_t *cc)
5617c478bd9Sstevel@tonic-gate {
5627c478bd9Sstevel@tonic-gate 	major_t maj;
5637c478bd9Sstevel@tonic-gate 	dev_t dev, newdev;
5647c478bd9Sstevel@tonic-gate 	struct vnode *vp, *cvp;
5657c478bd9Sstevel@tonic-gate 	struct snode *sp, *csp;
5667c478bd9Sstevel@tonic-gate 	struct stdata *stp;
5677c478bd9Sstevel@tonic-gate 	dev_info_t *dip;
5687c478bd9Sstevel@tonic-gate 	int error, type;
56925e8c5aaSvikram 	contract_t *ct = NULL;
570e099bf07Scth 	int open_returns_eintr;
5717f9b0c87Scg13442 	slock_ret_t spec_locksp_ret;
5727f9b0c87Scg13442 
5737c478bd9Sstevel@tonic-gate 
5747c478bd9Sstevel@tonic-gate 	flag &= ~FCREAT;		/* paranoia */
5757c478bd9Sstevel@tonic-gate 
5767c478bd9Sstevel@tonic-gate 	vp = *vpp;
5777c478bd9Sstevel@tonic-gate 	sp = VTOS(vp);
5787c478bd9Sstevel@tonic-gate 	ASSERT((vp->v_type == VCHR) || (vp->v_type == VBLK));
5797c478bd9Sstevel@tonic-gate 	if ((vp->v_type != VCHR) && (vp->v_type != VBLK))
5807c478bd9Sstevel@tonic-gate 		return (ENXIO);
5817c478bd9Sstevel@tonic-gate 
5827c478bd9Sstevel@tonic-gate 	/*
5837c478bd9Sstevel@tonic-gate 	 * If the VFS_NODEVICES bit was set for the mount,
5847c478bd9Sstevel@tonic-gate 	 * do not allow opens of special devices.
5857c478bd9Sstevel@tonic-gate 	 */
5867c478bd9Sstevel@tonic-gate 	if (sp->s_realvp && (sp->s_realvp->v_vfsp->vfs_flag & VFS_NODEVICES))
5877c478bd9Sstevel@tonic-gate 		return (ENXIO);
5887c478bd9Sstevel@tonic-gate 
5897c478bd9Sstevel@tonic-gate 	newdev = dev = vp->v_rdev;
5907c478bd9Sstevel@tonic-gate 
5917c478bd9Sstevel@tonic-gate 	/*
5927c478bd9Sstevel@tonic-gate 	 * If we are opening a node that has not had spec_assoc_vp_with_devi
5937c478bd9Sstevel@tonic-gate 	 * called against it (mknod outside /devices or a non-dacf makespecvp
5947c478bd9Sstevel@tonic-gate 	 * node) then SDIPSET will not be set. In this case we call an
5957c478bd9Sstevel@tonic-gate 	 * interface which will reconstruct the path and lookup (drive attach)
5967c478bd9Sstevel@tonic-gate 	 * through devfs (e_ddi_hold_devi_by_dev -> e_ddi_hold_devi_by_path ->
5977c478bd9Sstevel@tonic-gate 	 * devfs_lookupname).  For support of broken drivers that don't call
5987c478bd9Sstevel@tonic-gate 	 * ddi_create_minor_node for all minor nodes in their instance space,
5997c478bd9Sstevel@tonic-gate 	 * we call interfaces that operates at the directory/devinfo
6007c478bd9Sstevel@tonic-gate 	 * (major/instance) level instead of to the leaf/minor node level.
6017c478bd9Sstevel@tonic-gate 	 * After finding and attaching the dip we associate it with the
6027c478bd9Sstevel@tonic-gate 	 * common specfs vnode (s_dip), which sets SDIPSET.  A DL_DETACH_REQ
6037c478bd9Sstevel@tonic-gate 	 * to style-2 stream driver may set s_dip to NULL with SDIPSET set.
6047c478bd9Sstevel@tonic-gate 	 *
6057c478bd9Sstevel@tonic-gate 	 * NOTE: Although e_ddi_hold_devi_by_dev takes a dev_t argument, its
6067c478bd9Sstevel@tonic-gate 	 * implementation operates at the major/instance level since it only
6077c478bd9Sstevel@tonic-gate 	 * need to return a dip.
6087c478bd9Sstevel@tonic-gate 	 */
6097c478bd9Sstevel@tonic-gate 	cvp = sp->s_commonvp;
6107c478bd9Sstevel@tonic-gate 	csp = VTOS(cvp);
6117c478bd9Sstevel@tonic-gate 	if (!(csp->s_flag & SDIPSET)) {
6127c478bd9Sstevel@tonic-gate 		/* try to attach, return error if we fail */
6137c478bd9Sstevel@tonic-gate 		if ((dip = e_ddi_hold_devi_by_dev(dev, 0)) == NULL)
6147c478bd9Sstevel@tonic-gate 			return (ENXIO);
6157c478bd9Sstevel@tonic-gate 
6167c478bd9Sstevel@tonic-gate 		/* associate dip with the common snode s_dip */
6177c478bd9Sstevel@tonic-gate 		spec_assoc_vp_with_devi(vp, dip);
6187c478bd9Sstevel@tonic-gate 		ddi_release_devi(dip);	/* from e_ddi_hold_devi_by_dev */
6197c478bd9Sstevel@tonic-gate 	}
6207c478bd9Sstevel@tonic-gate 
62125e8c5aaSvikram 	/* check if device fenced off */
62225e8c5aaSvikram 	if (S_ISFENCED(sp))
62325e8c5aaSvikram 		return (ENXIO);
62425e8c5aaSvikram 
6257c478bd9Sstevel@tonic-gate #ifdef  DEBUG
6267c478bd9Sstevel@tonic-gate 	/* verify attach/open exclusion guarantee */
6277c478bd9Sstevel@tonic-gate 	dip = csp->s_dip;
628737d277aScth 	ASSERT((dip == NULL) || i_ddi_devi_attached(dip));
6297c478bd9Sstevel@tonic-gate #endif  /* DEBUG */
6307c478bd9Sstevel@tonic-gate 
631853de45fSdh155122 	if ((error = secpolicy_spec_open(cr, vp, flag)) != 0)
6327c478bd9Sstevel@tonic-gate 		return (error);
6337c478bd9Sstevel@tonic-gate 
6344f7df455Scth 	/* Verify existance of open(9E) implementation. */
6357c478bd9Sstevel@tonic-gate 	maj = getmajor(dev);
6364f7df455Scth 	if ((maj >= devcnt) ||
6374f7df455Scth 	    (devopsp[maj]->devo_cb_ops == NULL) ||
6384f7df455Scth 	    (devopsp[maj]->devo_cb_ops->cb_open == NULL))
6394f7df455Scth 		return (ENXIO);
6404f7df455Scth 
641349dcea3SGarrett D'Amore 	/*
642349dcea3SGarrett D'Amore 	 * split STREAMS vs. non-STREAMS
643349dcea3SGarrett D'Amore 	 *
644349dcea3SGarrett D'Amore 	 * If the device is a dual-personality device, then we might want
645349dcea3SGarrett D'Amore 	 * to allow for a regular OTYP_BLK open.  If however it's strictly
646349dcea3SGarrett D'Amore 	 * a pure STREAMS device, the cb_open entry point will be
647349dcea3SGarrett D'Amore 	 * nodev() which returns ENXIO.  This does make this failure path
648349dcea3SGarrett D'Amore 	 * somewhat longer, but such attempts to use OTYP_BLK with STREAMS
649349dcea3SGarrett D'Amore 	 * devices should be exceedingly rare.  (Most of the time they will
650349dcea3SGarrett D'Amore 	 * be due to programmer error.)
651349dcea3SGarrett D'Amore 	 */
652349dcea3SGarrett D'Amore 	if ((vp->v_type == VCHR) && (STREAMSTAB(maj)))
6537c478bd9Sstevel@tonic-gate 		goto streams_open;
6547c478bd9Sstevel@tonic-gate 
655349dcea3SGarrett D'Amore not_streams:
656e099bf07Scth 	/*
657e099bf07Scth 	 * Wait for in progress last close to complete. This guarantees
658e099bf07Scth 	 * to the driver writer that we will never be in the drivers
659e099bf07Scth 	 * open and close on the same (dev_t, otype) at the same time.
660e099bf07Scth 	 * Open count already incremented (SN_HOLD) on non-zero return.
661e099bf07Scth 	 * The wait is interruptible by a signal if the driver sets the
662e099bf07Scth 	 * D_OPEN_RETURNS_EINTR cb_ops(9S) cb_flag or sets the
663e099bf07Scth 	 * ddi-open-returns-eintr(9P) property in its driver.conf.
664e099bf07Scth 	 */
665e099bf07Scth 	if ((devopsp[maj]->devo_cb_ops->cb_flag & D_OPEN_RETURNS_EINTR) ||
666e099bf07Scth 	    (devnamesp[maj].dn_flags & DN_OPEN_RETURNS_EINTR))
667e099bf07Scth 		open_returns_eintr = 1;
668e099bf07Scth 	else
669e099bf07Scth 		open_returns_eintr = 0;
6707f9b0c87Scg13442 	while ((spec_locksp_ret = SYNCHOLD_CSP_SIG(csp, open_returns_eintr)) !=
6717f9b0c87Scg13442 	    SUCCESS) {
6727f9b0c87Scg13442 		if (spec_locksp_ret == INTR)
673e099bf07Scth 			return (EINTR);
674e099bf07Scth 	}
675fbe27353Sedp 
6767c478bd9Sstevel@tonic-gate 	/* non streams open */
6777c478bd9Sstevel@tonic-gate 	type = (vp->v_type == VBLK ? OTYP_BLK : OTYP_CHR);
6787c478bd9Sstevel@tonic-gate 	error = dev_open(&newdev, flag, type, cr);
6797c478bd9Sstevel@tonic-gate 
6807c478bd9Sstevel@tonic-gate 	/* deal with clone case */
6817c478bd9Sstevel@tonic-gate 	if (error == 0 && dev != newdev) {
6827c478bd9Sstevel@tonic-gate 		error = spec_clone(vpp, newdev, vp->v_type, NULL);
6837c478bd9Sstevel@tonic-gate 		/*
6847c478bd9Sstevel@tonic-gate 		 * bail on clone failure, further processing
6857c478bd9Sstevel@tonic-gate 		 * results in undefined behaviors.
6867c478bd9Sstevel@tonic-gate 		 */
6877c478bd9Sstevel@tonic-gate 		if (error != 0)
6887c478bd9Sstevel@tonic-gate 			return (error);
6897c478bd9Sstevel@tonic-gate 		sp = VTOS(*vpp);
6907c478bd9Sstevel@tonic-gate 		csp = VTOS(sp->s_commonvp);
6917c478bd9Sstevel@tonic-gate 	}
6927c478bd9Sstevel@tonic-gate 
69325e8c5aaSvikram 	/*
69425e8c5aaSvikram 	 * create contracts only for userland opens
69525e8c5aaSvikram 	 * Successful open and cloning is done at this point.
69625e8c5aaSvikram 	 */
69725e8c5aaSvikram 	if (error == 0 && !(flag & FKLYR)) {
69825e8c5aaSvikram 		int spec_type;
69925e8c5aaSvikram 		spec_type = (STOV(csp)->v_type == VCHR) ? S_IFCHR : S_IFBLK;
70025e8c5aaSvikram 		if (contract_device_open(newdev, spec_type, NULL) != 0) {
70125e8c5aaSvikram 			error = EIO;
70225e8c5aaSvikram 		}
70325e8c5aaSvikram 	}
70425e8c5aaSvikram 
7057c478bd9Sstevel@tonic-gate 	if (error == 0) {
7067c478bd9Sstevel@tonic-gate 		sp->s_size = SPEC_SIZE(csp);
7077c478bd9Sstevel@tonic-gate 
7087c478bd9Sstevel@tonic-gate 		if ((csp->s_flag & SNEEDCLOSE) == 0) {
7097c478bd9Sstevel@tonic-gate 			int nmaj = getmajor(newdev);
7107c478bd9Sstevel@tonic-gate 			mutex_enter(&csp->s_lock);
7117c478bd9Sstevel@tonic-gate 			/* successful open needs a close later */
7127c478bd9Sstevel@tonic-gate 			csp->s_flag |= SNEEDCLOSE;
7137c478bd9Sstevel@tonic-gate 
7147c478bd9Sstevel@tonic-gate 			/*
7157c478bd9Sstevel@tonic-gate 			 * Invalidate possible cached "unknown" size
7167c478bd9Sstevel@tonic-gate 			 * established by a VOP_GETATTR while open was in
7177c478bd9Sstevel@tonic-gate 			 * progress, and the driver might fail prop_op(9E).
7187c478bd9Sstevel@tonic-gate 			 */
7197c478bd9Sstevel@tonic-gate 			if (((cvp->v_type == VCHR) && (csp->s_size == 0)) ||
7207c478bd9Sstevel@tonic-gate 			    ((cvp->v_type == VBLK) &&
7217c478bd9Sstevel@tonic-gate 			    (csp->s_size == UNKNOWN_SIZE)))
7227c478bd9Sstevel@tonic-gate 				csp->s_flag &= ~SSIZEVALID;
7237c478bd9Sstevel@tonic-gate 
7247c478bd9Sstevel@tonic-gate 			if (devopsp[nmaj]->devo_cb_ops->cb_flag & D_64BIT)
7257c478bd9Sstevel@tonic-gate 				csp->s_flag |= SLOFFSET;
7267c478bd9Sstevel@tonic-gate 			if (devopsp[nmaj]->devo_cb_ops->cb_flag & D_U64BIT)
7277c478bd9Sstevel@tonic-gate 				csp->s_flag |= SLOFFSET | SANYOFFSET;
7287c478bd9Sstevel@tonic-gate 			mutex_exit(&csp->s_lock);
7297c478bd9Sstevel@tonic-gate 		}
7307c478bd9Sstevel@tonic-gate 		return (0);
7317c478bd9Sstevel@tonic-gate 	}
7327c478bd9Sstevel@tonic-gate 
7337c478bd9Sstevel@tonic-gate 	/*
7347c478bd9Sstevel@tonic-gate 	 * Open failed. If we missed a close operation because
7357c478bd9Sstevel@tonic-gate 	 * we were trying to get the device open and it is the
7367c478bd9Sstevel@tonic-gate 	 * last in progress open that is failing then call close.
7377c478bd9Sstevel@tonic-gate 	 *
7387c478bd9Sstevel@tonic-gate 	 * NOTE: Only non-streams open has this race condition.
7397c478bd9Sstevel@tonic-gate 	 */
7407c478bd9Sstevel@tonic-gate 	mutex_enter(&csp->s_lock);
7417c478bd9Sstevel@tonic-gate 	csp->s_count--;			/* decrement open count : SN_RELE */
7427c478bd9Sstevel@tonic-gate 	if ((csp->s_count == 0) &&	/* no outstanding open */
7437c478bd9Sstevel@tonic-gate 	    (csp->s_mapcnt == 0) &&	/* no mapping */
7447c478bd9Sstevel@tonic-gate 	    (csp->s_flag & SNEEDCLOSE)) { /* need a close */
7457c478bd9Sstevel@tonic-gate 		csp->s_flag &= ~(SNEEDCLOSE | SSIZEVALID);
7467c478bd9Sstevel@tonic-gate 
7477c478bd9Sstevel@tonic-gate 		/* See comment in spec_close() */
7487c478bd9Sstevel@tonic-gate 		if (csp->s_flag & (SCLONE | SSELFCLONE))
7497c478bd9Sstevel@tonic-gate 			csp->s_flag &= ~SDIPSET;
7507c478bd9Sstevel@tonic-gate 
751e099bf07Scth 		csp->s_flag |= SCLOSING;
7527c478bd9Sstevel@tonic-gate 		mutex_exit(&csp->s_lock);
753e099bf07Scth 
7547c478bd9Sstevel@tonic-gate 		ASSERT(*vpp != NULL);
7557c478bd9Sstevel@tonic-gate 		(void) device_close(*vpp, flag, cr);
756e099bf07Scth 
757e099bf07Scth 		mutex_enter(&csp->s_lock);
758e099bf07Scth 		csp->s_flag &= ~SCLOSING;
759e099bf07Scth 		mutex_exit(&csp->s_lock);
7607c478bd9Sstevel@tonic-gate 	} else {
7617c478bd9Sstevel@tonic-gate 		mutex_exit(&csp->s_lock);
7627c478bd9Sstevel@tonic-gate 	}
7637c478bd9Sstevel@tonic-gate 	return (error);
7647c478bd9Sstevel@tonic-gate 
7657c478bd9Sstevel@tonic-gate streams_open:
7667c478bd9Sstevel@tonic-gate 	/*
767e099bf07Scth 	 * Lock common snode to prevent any new clone opens on this
768e099bf07Scth 	 * stream while one is in progress. This is necessary since
769e099bf07Scth 	 * the stream currently associated with the clone device will
770e099bf07Scth 	 * not be part of it after the clone open completes. Unfortunately
771e099bf07Scth 	 * we don't know in advance if this is a clone
772e099bf07Scth 	 * device so we have to lock all opens.
7737c478bd9Sstevel@tonic-gate 	 *
774e099bf07Scth 	 * If we fail, it's because of an interrupt - EINTR return is an
775e099bf07Scth 	 * expected aspect of opening a stream so we don't need to check
776e099bf07Scth 	 * D_OPEN_RETURNS_EINTR. Open count already incremented (SN_HOLD)
777e099bf07Scth 	 * on non-zero return.
7787c478bd9Sstevel@tonic-gate 	 */
7797f9b0c87Scg13442 	if (LOCKHOLD_CSP_SIG(csp) != SUCCESS)
7807c478bd9Sstevel@tonic-gate 		return (EINTR);
781fbe27353Sedp 
7827c478bd9Sstevel@tonic-gate 	error = stropen(cvp, &newdev, flag, cr);
7837c478bd9Sstevel@tonic-gate 	stp = cvp->v_stream;
7847c478bd9Sstevel@tonic-gate 
7857c478bd9Sstevel@tonic-gate 	/* deal with the clone case */
7867c478bd9Sstevel@tonic-gate 	if ((error == 0) && (dev != newdev)) {
7877c478bd9Sstevel@tonic-gate 		vp->v_stream = cvp->v_stream = NULL;
7887c478bd9Sstevel@tonic-gate 		UNLOCK_CSP(csp);
7897c478bd9Sstevel@tonic-gate 		error = spec_clone(vpp, newdev, vp->v_type, stp);
7907c478bd9Sstevel@tonic-gate 		/*
7917c478bd9Sstevel@tonic-gate 		 * bail on clone failure, further processing
7927c478bd9Sstevel@tonic-gate 		 * results in undefined behaviors.
7937c478bd9Sstevel@tonic-gate 		 */
7947c478bd9Sstevel@tonic-gate 		if (error != 0)
7957c478bd9Sstevel@tonic-gate 			return (error);
7967c478bd9Sstevel@tonic-gate 		sp = VTOS(*vpp);
7977c478bd9Sstevel@tonic-gate 		csp = VTOS(sp->s_commonvp);
7987c478bd9Sstevel@tonic-gate 	} else if (error == 0) {
7997c478bd9Sstevel@tonic-gate 		vp->v_stream = stp;
8007c478bd9Sstevel@tonic-gate 		UNLOCK_CSP(csp);
8017c478bd9Sstevel@tonic-gate 	}
8027c478bd9Sstevel@tonic-gate 
80325e8c5aaSvikram 	/*
80425e8c5aaSvikram 	 * create contracts only for userland opens
80525e8c5aaSvikram 	 * Successful open and cloning is done at this point.
80625e8c5aaSvikram 	 */
80725e8c5aaSvikram 	if (error == 0 && !(flag & FKLYR)) {
80825e8c5aaSvikram 		/* STREAM is of type S_IFCHR */
80925e8c5aaSvikram 		if (contract_device_open(newdev, S_IFCHR, &ct) != 0) {
81025e8c5aaSvikram 			UNLOCK_CSP(csp);
811da6c28aaSamw 			(void) spec_close(vp, flag, 1, 0, cr, cc);
81225e8c5aaSvikram 			return (EIO);
81325e8c5aaSvikram 		}
81425e8c5aaSvikram 	}
81525e8c5aaSvikram 
8167c478bd9Sstevel@tonic-gate 	if (error == 0) {
8177c478bd9Sstevel@tonic-gate 		/* STREAMS devices don't have a size */
8187c478bd9Sstevel@tonic-gate 		sp->s_size = csp->s_size = 0;
8197c478bd9Sstevel@tonic-gate 
8209acbbeafSnn35248 		if (!(stp->sd_flag & STRISTTY) || (flag & FNOCTTY))
8217c478bd9Sstevel@tonic-gate 			return (0);
8229acbbeafSnn35248 
8239acbbeafSnn35248 		/* try to allocate it as a controlling terminal */
8249acbbeafSnn35248 		if (strctty(stp) != EINTR)
8259acbbeafSnn35248 			return (0);
8269acbbeafSnn35248 
8279acbbeafSnn35248 		/* strctty() was interrupted by a signal */
82825e8c5aaSvikram 		if (ct) {
82925e8c5aaSvikram 			/* we only create contracts for userland opens */
83025e8c5aaSvikram 			ASSERT(ttoproc(curthread));
83125e8c5aaSvikram 			(void) contract_abandon(ct, ttoproc(curthread), 0);
83225e8c5aaSvikram 		}
833da6c28aaSamw 		(void) spec_close(vp, flag, 1, 0, cr, cc);
8349acbbeafSnn35248 		return (EINTR);
8357c478bd9Sstevel@tonic-gate 	}
8367c478bd9Sstevel@tonic-gate 
8377c478bd9Sstevel@tonic-gate 	/*
8387c478bd9Sstevel@tonic-gate 	 * Deal with stropen failure.
8397c478bd9Sstevel@tonic-gate 	 *
8407c478bd9Sstevel@tonic-gate 	 * sd_flag in the stream head cannot change since the
8417c478bd9Sstevel@tonic-gate 	 * common snode is locked before the call to stropen().
8427c478bd9Sstevel@tonic-gate 	 */
8437c478bd9Sstevel@tonic-gate 	if ((stp != NULL) && (stp->sd_flag & STREOPENFAIL)) {
8447c478bd9Sstevel@tonic-gate 		/*
8457c478bd9Sstevel@tonic-gate 		 * Open failed part way through.
8467c478bd9Sstevel@tonic-gate 		 */
8477c478bd9Sstevel@tonic-gate 		mutex_enter(&stp->sd_lock);
8487c478bd9Sstevel@tonic-gate 		stp->sd_flag &= ~STREOPENFAIL;
8497c478bd9Sstevel@tonic-gate 		mutex_exit(&stp->sd_lock);
8507c478bd9Sstevel@tonic-gate 
8517c478bd9Sstevel@tonic-gate 		UNLOCK_CSP(csp);
852da6c28aaSamw 		(void) spec_close(vp, flag, 1, 0, cr, cc);
8537c478bd9Sstevel@tonic-gate 	} else {
8547c478bd9Sstevel@tonic-gate 		UNLOCK_CSP(csp);
8557c478bd9Sstevel@tonic-gate 		SN_RELE(csp);
8567c478bd9Sstevel@tonic-gate 	}
8577c478bd9Sstevel@tonic-gate 
858349dcea3SGarrett D'Amore 	/*
859349dcea3SGarrett D'Amore 	 * Resolution for STREAMS vs. regular character device: If the
860349dcea3SGarrett D'Amore 	 * STREAMS open(9e) returns ENOSTR, then try an ordinary device
861349dcea3SGarrett D'Amore 	 * open instead.
862349dcea3SGarrett D'Amore 	 */
863349dcea3SGarrett D'Amore 	if (error == ENOSTR) {
864349dcea3SGarrett D'Amore 		goto not_streams;
865349dcea3SGarrett D'Amore 	}
8667c478bd9Sstevel@tonic-gate 	return (error);
8677c478bd9Sstevel@tonic-gate }
8687c478bd9Sstevel@tonic-gate 
8697c478bd9Sstevel@tonic-gate /*ARGSUSED2*/
8707c478bd9Sstevel@tonic-gate static int
8717c478bd9Sstevel@tonic-gate spec_close(
8727c478bd9Sstevel@tonic-gate 	struct vnode	*vp,
8737c478bd9Sstevel@tonic-gate 	int		flag,
8747c478bd9Sstevel@tonic-gate 	int		count,
8757c478bd9Sstevel@tonic-gate 	offset_t	offset,
876da6c28aaSamw 	struct cred	*cr,
877da6c28aaSamw 	caller_context_t *ct)
8787c478bd9Sstevel@tonic-gate {
8797c478bd9Sstevel@tonic-gate 	struct vnode *cvp;
8807c478bd9Sstevel@tonic-gate 	struct snode *sp, *csp;
8817c478bd9Sstevel@tonic-gate 	enum vtype type;
8827c478bd9Sstevel@tonic-gate 	dev_t dev;
8837c478bd9Sstevel@tonic-gate 	int error = 0;
8847c478bd9Sstevel@tonic-gate 	int sysclone;
8857c478bd9Sstevel@tonic-gate 
8867c478bd9Sstevel@tonic-gate 	if (!(flag & FKLYR)) {
8877c478bd9Sstevel@tonic-gate 		/* this only applies to closes of devices from userland */
8887c478bd9Sstevel@tonic-gate 		cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
8897c478bd9Sstevel@tonic-gate 		cleanshares(vp, ttoproc(curthread)->p_pid);
8907c478bd9Sstevel@tonic-gate 		if (vp->v_stream)
8917c478bd9Sstevel@tonic-gate 			strclean(vp);
8927c478bd9Sstevel@tonic-gate 	}
8937c478bd9Sstevel@tonic-gate 	if (count > 1)
8947c478bd9Sstevel@tonic-gate 		return (0);
8957c478bd9Sstevel@tonic-gate 
89625e8c5aaSvikram 	/* we allow close to succeed even if device is fenced off */
8977c478bd9Sstevel@tonic-gate 	sp = VTOS(vp);
8987c478bd9Sstevel@tonic-gate 	cvp = sp->s_commonvp;
8997c478bd9Sstevel@tonic-gate 
9007c478bd9Sstevel@tonic-gate 	dev = sp->s_dev;
9017c478bd9Sstevel@tonic-gate 	type = vp->v_type;
9027c478bd9Sstevel@tonic-gate 
9037c478bd9Sstevel@tonic-gate 	ASSERT(type == VCHR || type == VBLK);
9047c478bd9Sstevel@tonic-gate 
9057c478bd9Sstevel@tonic-gate 	/*
9067c478bd9Sstevel@tonic-gate 	 * Prevent close/close and close/open races by serializing closes
9077c478bd9Sstevel@tonic-gate 	 * on this common snode. Clone opens are held up until after
9087c478bd9Sstevel@tonic-gate 	 * we have closed this device so the streams linkage is maintained
9097c478bd9Sstevel@tonic-gate 	 */
9107c478bd9Sstevel@tonic-gate 	csp = VTOS(cvp);
9117c478bd9Sstevel@tonic-gate 
9127c478bd9Sstevel@tonic-gate 	LOCK_CSP(csp);
9137c478bd9Sstevel@tonic-gate 	mutex_enter(&csp->s_lock);
9147c478bd9Sstevel@tonic-gate 
9157c478bd9Sstevel@tonic-gate 	csp->s_count--;			/* one fewer open reference : SN_RELE */
9167c478bd9Sstevel@tonic-gate 	sysclone = sp->s_flag & SCLONE;
9177c478bd9Sstevel@tonic-gate 
9187c478bd9Sstevel@tonic-gate 	/*
9197c478bd9Sstevel@tonic-gate 	 * Invalidate size on each close.
9207c478bd9Sstevel@tonic-gate 	 *
9217c478bd9Sstevel@tonic-gate 	 * XXX We do this on each close because we don't have interfaces that
9227c478bd9Sstevel@tonic-gate 	 * allow a driver to invalidate the size.  Since clearing this on each
9237c478bd9Sstevel@tonic-gate 	 * close this causes property overhead we skip /dev/null and
9247c478bd9Sstevel@tonic-gate 	 * /dev/zero to avoid degrading kenbus performance.
9257c478bd9Sstevel@tonic-gate 	 */
9267c478bd9Sstevel@tonic-gate 	if (getmajor(dev) != mm_major)
9277c478bd9Sstevel@tonic-gate 		csp->s_flag &= ~SSIZEVALID;
9287c478bd9Sstevel@tonic-gate 
9297c478bd9Sstevel@tonic-gate 	/*
9307c478bd9Sstevel@tonic-gate 	 * Only call the close routine when the last open reference through
9317c478bd9Sstevel@tonic-gate 	 * any [s, v]node goes away.  This can be checked by looking at
9327c478bd9Sstevel@tonic-gate 	 * s_count on the common vnode.
9337c478bd9Sstevel@tonic-gate 	 */
9347c478bd9Sstevel@tonic-gate 	if ((csp->s_count == 0) && (csp->s_mapcnt == 0)) {
9357c478bd9Sstevel@tonic-gate 		/* we don't need a close */
9367c478bd9Sstevel@tonic-gate 		csp->s_flag &= ~(SNEEDCLOSE | SSIZEVALID);
9377c478bd9Sstevel@tonic-gate 
9387c478bd9Sstevel@tonic-gate 		/*
9397c478bd9Sstevel@tonic-gate 		 * A cloning driver may open-clone to the same dev_t that we
9407c478bd9Sstevel@tonic-gate 		 * are closing before spec_inactive destroys the common snode.
9417c478bd9Sstevel@tonic-gate 		 * If this occurs the s_dip association needs to be reevaluated.
9427c478bd9Sstevel@tonic-gate 		 * We clear SDIPSET to force reevaluation in this case.  When
9437c478bd9Sstevel@tonic-gate 		 * reevaluation occurs (by spec_clone after open), if the
9447c478bd9Sstevel@tonic-gate 		 * devinfo association has changed then the old association
9457c478bd9Sstevel@tonic-gate 		 * will be released as the new association is established by
9467c478bd9Sstevel@tonic-gate 		 * spec_assoc_vp_with_devi().
9477c478bd9Sstevel@tonic-gate 		 */
9487c478bd9Sstevel@tonic-gate 		if (csp->s_flag & (SCLONE | SSELFCLONE))
9497c478bd9Sstevel@tonic-gate 			csp->s_flag &= ~SDIPSET;
9507c478bd9Sstevel@tonic-gate 
951e099bf07Scth 		csp->s_flag |= SCLOSING;
9527c478bd9Sstevel@tonic-gate 		mutex_exit(&csp->s_lock);
9537c478bd9Sstevel@tonic-gate 		error = device_close(vp, flag, cr);
9547c478bd9Sstevel@tonic-gate 
9557c478bd9Sstevel@tonic-gate 		/*
9567c478bd9Sstevel@tonic-gate 		 * Decrement the devops held in clnopen()
9577c478bd9Sstevel@tonic-gate 		 */
9587c478bd9Sstevel@tonic-gate 		if (sysclone) {
9597c478bd9Sstevel@tonic-gate 			ddi_rele_driver(getmajor(dev));
9607c478bd9Sstevel@tonic-gate 		}
9617c478bd9Sstevel@tonic-gate 		mutex_enter(&csp->s_lock);
962e099bf07Scth 		csp->s_flag &= ~SCLOSING;
9637c478bd9Sstevel@tonic-gate 	}
9647c478bd9Sstevel@tonic-gate 
9657c478bd9Sstevel@tonic-gate 	UNLOCK_CSP_LOCK_HELD(csp);
9667c478bd9Sstevel@tonic-gate 	mutex_exit(&csp->s_lock);
9677c478bd9Sstevel@tonic-gate 
9687c478bd9Sstevel@tonic-gate 	return (error);
9697c478bd9Sstevel@tonic-gate }
9707c478bd9Sstevel@tonic-gate 
9717c478bd9Sstevel@tonic-gate /*ARGSUSED2*/
9727c478bd9Sstevel@tonic-gate static int
9737c478bd9Sstevel@tonic-gate spec_read(
9747c478bd9Sstevel@tonic-gate 	struct vnode	*vp,
9757c478bd9Sstevel@tonic-gate 	struct uio	*uiop,
9767c478bd9Sstevel@tonic-gate 	int		ioflag,
9777c478bd9Sstevel@tonic-gate 	struct cred	*cr,
978da6c28aaSamw 	caller_context_t *ct)
9797c478bd9Sstevel@tonic-gate {
9807c478bd9Sstevel@tonic-gate 	int error;
9817c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
9827c478bd9Sstevel@tonic-gate 	dev_t dev = sp->s_dev;
9837c478bd9Sstevel@tonic-gate 	size_t n;
9847c478bd9Sstevel@tonic-gate 	ulong_t on;
9857c478bd9Sstevel@tonic-gate 	u_offset_t bdevsize;
9867c478bd9Sstevel@tonic-gate 	offset_t maxoff;
9877c478bd9Sstevel@tonic-gate 	offset_t off;
9887c478bd9Sstevel@tonic-gate 	struct vnode *blkvp;
9897c478bd9Sstevel@tonic-gate 
9907c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_type == VCHR || vp->v_type == VBLK);
9917c478bd9Sstevel@tonic-gate 
992349dcea3SGarrett D'Amore 	if (vp->v_stream) {
9937c478bd9Sstevel@tonic-gate 		ASSERT(vp->v_type == VCHR);
9947c478bd9Sstevel@tonic-gate 		smark(sp, SACC);
9957c478bd9Sstevel@tonic-gate 		return (strread(vp, uiop, cr));
9967c478bd9Sstevel@tonic-gate 	}
9977c478bd9Sstevel@tonic-gate 
9987c478bd9Sstevel@tonic-gate 	if (uiop->uio_resid == 0)
9997c478bd9Sstevel@tonic-gate 		return (0);
10007c478bd9Sstevel@tonic-gate 
10017c478bd9Sstevel@tonic-gate 	/*
10027c478bd9Sstevel@tonic-gate 	 * Plain old character devices that set D_U64BIT can have
10037c478bd9Sstevel@tonic-gate 	 * unrestricted offsets.
10047c478bd9Sstevel@tonic-gate 	 */
10057c478bd9Sstevel@tonic-gate 	maxoff = spec_maxoffset(vp);
10067c478bd9Sstevel@tonic-gate 	ASSERT(maxoff != -1 || vp->v_type == VCHR);
10077c478bd9Sstevel@tonic-gate 
10087c478bd9Sstevel@tonic-gate 	if (maxoff != -1 && (uiop->uio_loffset < 0 ||
10097c478bd9Sstevel@tonic-gate 	    uiop->uio_loffset + uiop->uio_resid > maxoff))
10107c478bd9Sstevel@tonic-gate 		return (EINVAL);
10117c478bd9Sstevel@tonic-gate 
10127c478bd9Sstevel@tonic-gate 	if (vp->v_type == VCHR) {
10137c478bd9Sstevel@tonic-gate 		smark(sp, SACC);
1014349dcea3SGarrett D'Amore 		ASSERT(vp->v_stream == NULL);
10157c478bd9Sstevel@tonic-gate 		return (cdev_read(dev, uiop, cr));
10167c478bd9Sstevel@tonic-gate 	}
10177c478bd9Sstevel@tonic-gate 
10187c478bd9Sstevel@tonic-gate 	/*
10197c478bd9Sstevel@tonic-gate 	 * Block device.
10207c478bd9Sstevel@tonic-gate 	 */
10217c478bd9Sstevel@tonic-gate 	error = 0;
10227c478bd9Sstevel@tonic-gate 	blkvp = sp->s_commonvp;
10237c478bd9Sstevel@tonic-gate 	bdevsize = SPEC_SIZE(VTOS(blkvp));
10247c478bd9Sstevel@tonic-gate 
10257c478bd9Sstevel@tonic-gate 	do {
10267c478bd9Sstevel@tonic-gate 		caddr_t base;
10277c478bd9Sstevel@tonic-gate 		offset_t diff;
10287c478bd9Sstevel@tonic-gate 
10297c478bd9Sstevel@tonic-gate 		off = uiop->uio_loffset & (offset_t)MAXBMASK;
10307c478bd9Sstevel@tonic-gate 		on = (size_t)(uiop->uio_loffset & MAXBOFFSET);
10317c478bd9Sstevel@tonic-gate 		n = (size_t)MIN(MAXBSIZE - on, uiop->uio_resid);
10327c478bd9Sstevel@tonic-gate 		diff = bdevsize - uiop->uio_loffset;
10337c478bd9Sstevel@tonic-gate 
10347c478bd9Sstevel@tonic-gate 		if (diff <= 0)
10357c478bd9Sstevel@tonic-gate 			break;
10367c478bd9Sstevel@tonic-gate 		if (diff < n)
10377c478bd9Sstevel@tonic-gate 			n = (size_t)diff;
10387c478bd9Sstevel@tonic-gate 
1039a5652762Spraks 		if (vpm_enable) {
1040a5652762Spraks 			error = vpm_data_copy(blkvp, (u_offset_t)(off + on),
1041a5652762Spraks 			    n, uiop, 1, NULL, 0, S_READ);
1042a5652762Spraks 		} else {
10437c478bd9Sstevel@tonic-gate 			base = segmap_getmapflt(segkmap, blkvp,
10447c478bd9Sstevel@tonic-gate 			    (u_offset_t)(off + on), n, 1, S_READ);
10457c478bd9Sstevel@tonic-gate 
1046a5652762Spraks 			error = uiomove(base + on, n, UIO_READ, uiop);
1047a5652762Spraks 		}
1048a5652762Spraks 		if (!error) {
10497c478bd9Sstevel@tonic-gate 			int flags = 0;
10507c478bd9Sstevel@tonic-gate 			/*
10517c478bd9Sstevel@tonic-gate 			 * If we read a whole block, we won't need this
10527c478bd9Sstevel@tonic-gate 			 * buffer again soon.
10537c478bd9Sstevel@tonic-gate 			 */
10547c478bd9Sstevel@tonic-gate 			if (n + on == MAXBSIZE)
10557c478bd9Sstevel@tonic-gate 				flags = SM_DONTNEED | SM_FREE;
1056a5652762Spraks 			if (vpm_enable) {
1057a5652762Spraks 				error = vpm_sync_pages(blkvp, off, n, flags);
1058a5652762Spraks 			} else {
10597c478bd9Sstevel@tonic-gate 				error = segmap_release(segkmap, base, flags);
1060a5652762Spraks 			}
1061a5652762Spraks 		} else {
1062a5652762Spraks 			if (vpm_enable) {
1063a5652762Spraks 				(void) vpm_sync_pages(blkvp, off, n, 0);
10647c478bd9Sstevel@tonic-gate 			} else {
10657c478bd9Sstevel@tonic-gate 				(void) segmap_release(segkmap, base, 0);
1066a5652762Spraks 			}
10677c478bd9Sstevel@tonic-gate 			if (bdevsize == UNKNOWN_SIZE) {
10687c478bd9Sstevel@tonic-gate 				error = 0;
10697c478bd9Sstevel@tonic-gate 				break;
10707c478bd9Sstevel@tonic-gate 			}
10717c478bd9Sstevel@tonic-gate 		}
10727c478bd9Sstevel@tonic-gate 	} while (error == 0 && uiop->uio_resid > 0 && n != 0);
10737c478bd9Sstevel@tonic-gate 
10747c478bd9Sstevel@tonic-gate 	return (error);
10757c478bd9Sstevel@tonic-gate }
10767c478bd9Sstevel@tonic-gate 
10777c478bd9Sstevel@tonic-gate /*ARGSUSED*/
10787c478bd9Sstevel@tonic-gate static int
10797c478bd9Sstevel@tonic-gate spec_write(
10807c478bd9Sstevel@tonic-gate 	struct vnode *vp,
10817c478bd9Sstevel@tonic-gate 	struct uio *uiop,
10827c478bd9Sstevel@tonic-gate 	int ioflag,
10837c478bd9Sstevel@tonic-gate 	struct cred *cr,
1084da6c28aaSamw 	caller_context_t *ct)
10857c478bd9Sstevel@tonic-gate {
10867c478bd9Sstevel@tonic-gate 	int error;
10877c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
10887c478bd9Sstevel@tonic-gate 	dev_t dev = sp->s_dev;
10897c478bd9Sstevel@tonic-gate 	size_t n;
10907c478bd9Sstevel@tonic-gate 	ulong_t on;
10917c478bd9Sstevel@tonic-gate 	u_offset_t bdevsize;
10927c478bd9Sstevel@tonic-gate 	offset_t maxoff;
10937c478bd9Sstevel@tonic-gate 	offset_t off;
10947c478bd9Sstevel@tonic-gate 	struct vnode *blkvp;
10957c478bd9Sstevel@tonic-gate 
10967c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_type == VCHR || vp->v_type == VBLK);
10977c478bd9Sstevel@tonic-gate 
1098349dcea3SGarrett D'Amore 	if (vp->v_stream) {
10997c478bd9Sstevel@tonic-gate 		ASSERT(vp->v_type == VCHR);
11007c478bd9Sstevel@tonic-gate 		smark(sp, SUPD);
11017c478bd9Sstevel@tonic-gate 		return (strwrite(vp, uiop, cr));
11027c478bd9Sstevel@tonic-gate 	}
11037c478bd9Sstevel@tonic-gate 
11047c478bd9Sstevel@tonic-gate 	/*
11057c478bd9Sstevel@tonic-gate 	 * Plain old character devices that set D_U64BIT can have
11067c478bd9Sstevel@tonic-gate 	 * unrestricted offsets.
11077c478bd9Sstevel@tonic-gate 	 */
11087c478bd9Sstevel@tonic-gate 	maxoff = spec_maxoffset(vp);
11097c478bd9Sstevel@tonic-gate 	ASSERT(maxoff != -1 || vp->v_type == VCHR);
11107c478bd9Sstevel@tonic-gate 
11117c478bd9Sstevel@tonic-gate 	if (maxoff != -1 && (uiop->uio_loffset < 0 ||
11127c478bd9Sstevel@tonic-gate 	    uiop->uio_loffset + uiop->uio_resid > maxoff))
11137c478bd9Sstevel@tonic-gate 		return (EINVAL);
11147c478bd9Sstevel@tonic-gate 
11157c478bd9Sstevel@tonic-gate 	if (vp->v_type == VCHR) {
11167c478bd9Sstevel@tonic-gate 		smark(sp, SUPD);
1117349dcea3SGarrett D'Amore 		ASSERT(vp->v_stream == NULL);
11187c478bd9Sstevel@tonic-gate 		return (cdev_write(dev, uiop, cr));
11197c478bd9Sstevel@tonic-gate 	}
11207c478bd9Sstevel@tonic-gate 
11217c478bd9Sstevel@tonic-gate 	if (uiop->uio_resid == 0)
11227c478bd9Sstevel@tonic-gate 		return (0);
11237c478bd9Sstevel@tonic-gate 
11247c478bd9Sstevel@tonic-gate 	error = 0;
11257c478bd9Sstevel@tonic-gate 	blkvp = sp->s_commonvp;
11267c478bd9Sstevel@tonic-gate 	bdevsize = SPEC_SIZE(VTOS(blkvp));
11277c478bd9Sstevel@tonic-gate 
11287c478bd9Sstevel@tonic-gate 	do {
11297c478bd9Sstevel@tonic-gate 		int pagecreate;
11307c478bd9Sstevel@tonic-gate 		int newpage;
11317c478bd9Sstevel@tonic-gate 		caddr_t base;
11327c478bd9Sstevel@tonic-gate 		offset_t diff;
11337c478bd9Sstevel@tonic-gate 
11347c478bd9Sstevel@tonic-gate 		off = uiop->uio_loffset & (offset_t)MAXBMASK;
11357c478bd9Sstevel@tonic-gate 		on = (ulong_t)(uiop->uio_loffset & MAXBOFFSET);
11367c478bd9Sstevel@tonic-gate 		n = (size_t)MIN(MAXBSIZE - on, uiop->uio_resid);
11377c478bd9Sstevel@tonic-gate 		pagecreate = 0;
11387c478bd9Sstevel@tonic-gate 
11397c478bd9Sstevel@tonic-gate 		diff = bdevsize - uiop->uio_loffset;
11407c478bd9Sstevel@tonic-gate 		if (diff <= 0) {
11417c478bd9Sstevel@tonic-gate 			error = ENXIO;
11427c478bd9Sstevel@tonic-gate 			break;
11437c478bd9Sstevel@tonic-gate 		}
11447c478bd9Sstevel@tonic-gate 		if (diff < n)
11457c478bd9Sstevel@tonic-gate 			n = (size_t)diff;
11467c478bd9Sstevel@tonic-gate 
11477c478bd9Sstevel@tonic-gate 		/*
11487c478bd9Sstevel@tonic-gate 		 * Check to see if we can skip reading in the page
11497c478bd9Sstevel@tonic-gate 		 * and just allocate the memory.  We can do this
11507c478bd9Sstevel@tonic-gate 		 * if we are going to rewrite the entire mapping
11517c478bd9Sstevel@tonic-gate 		 * or if we are going to write to end of the device
11527c478bd9Sstevel@tonic-gate 		 * from the beginning of the mapping.
11537c478bd9Sstevel@tonic-gate 		 */
11547c478bd9Sstevel@tonic-gate 		if (n == MAXBSIZE || (on == 0 && (off + n) == bdevsize))
11557c478bd9Sstevel@tonic-gate 			pagecreate = 1;
11567c478bd9Sstevel@tonic-gate 
1157a5652762Spraks 		newpage = 0;
11586f5f1c63SDonghai Qiao 
11596f5f1c63SDonghai Qiao 		/*
11606f5f1c63SDonghai Qiao 		 * Touch the page and fault it in if it is not in core
11616f5f1c63SDonghai Qiao 		 * before segmap_getmapflt or vpm_data_copy can lock it.
11626f5f1c63SDonghai Qiao 		 * This is to avoid the deadlock if the buffer is mapped
11636f5f1c63SDonghai Qiao 		 * to the same file through mmap which we want to write.
11646f5f1c63SDonghai Qiao 		 */
11656f5f1c63SDonghai Qiao 		uio_prefaultpages((long)n, uiop);
11666f5f1c63SDonghai Qiao 
1167a5652762Spraks 		if (vpm_enable) {
1168a5652762Spraks 			error = vpm_data_copy(blkvp, (u_offset_t)(off + on),
1169a5652762Spraks 			    n, uiop, !pagecreate, NULL, 0, S_WRITE);
1170a5652762Spraks 		} else {
11717c478bd9Sstevel@tonic-gate 			base = segmap_getmapflt(segkmap, blkvp,
11727c478bd9Sstevel@tonic-gate 			    (u_offset_t)(off + on), n, !pagecreate, S_WRITE);
11737c478bd9Sstevel@tonic-gate 
11747c478bd9Sstevel@tonic-gate 			/*
11757c478bd9Sstevel@tonic-gate 			 * segmap_pagecreate() returns 1 if it calls
11767c478bd9Sstevel@tonic-gate 			 * page_create_va() to allocate any pages.
11777c478bd9Sstevel@tonic-gate 			 */
11787c478bd9Sstevel@tonic-gate 
11797c478bd9Sstevel@tonic-gate 			if (pagecreate)
11807c478bd9Sstevel@tonic-gate 				newpage = segmap_pagecreate(segkmap, base + on,
11817c478bd9Sstevel@tonic-gate 				    n, 0);
11827c478bd9Sstevel@tonic-gate 
11837c478bd9Sstevel@tonic-gate 			error = uiomove(base + on, n, UIO_WRITE, uiop);
1184a5652762Spraks 		}
11857c478bd9Sstevel@tonic-gate 
1186a5652762Spraks 		if (!vpm_enable && pagecreate &&
11877c478bd9Sstevel@tonic-gate 		    uiop->uio_loffset <
11887c478bd9Sstevel@tonic-gate 		    P2ROUNDUP_TYPED(off + on + n, PAGESIZE, offset_t)) {
11897c478bd9Sstevel@tonic-gate 			/*
11907c478bd9Sstevel@tonic-gate 			 * We created pages w/o initializing them completely,
11917c478bd9Sstevel@tonic-gate 			 * thus we need to zero the part that wasn't set up.
11927c478bd9Sstevel@tonic-gate 			 * This can happen if we write to the end of the device
11937c478bd9Sstevel@tonic-gate 			 * or if we had some sort of error during the uiomove.
11947c478bd9Sstevel@tonic-gate 			 */
11957c478bd9Sstevel@tonic-gate 			long nzero;
11967c478bd9Sstevel@tonic-gate 			offset_t nmoved;
11977c478bd9Sstevel@tonic-gate 
11987c478bd9Sstevel@tonic-gate 			nmoved = (uiop->uio_loffset - (off + on));
11997c478bd9Sstevel@tonic-gate 			if (nmoved < 0 || nmoved > n) {
12007c478bd9Sstevel@tonic-gate 				panic("spec_write: nmoved bogus");
12017c478bd9Sstevel@tonic-gate 				/*NOTREACHED*/
12027c478bd9Sstevel@tonic-gate 			}
12037c478bd9Sstevel@tonic-gate 			nzero = (long)P2ROUNDUP(on + n, PAGESIZE) -
12047c478bd9Sstevel@tonic-gate 			    (on + nmoved);
12057c478bd9Sstevel@tonic-gate 			if (nzero < 0 || (on + nmoved + nzero > MAXBSIZE)) {
12067c478bd9Sstevel@tonic-gate 				panic("spec_write: nzero bogus");
12077c478bd9Sstevel@tonic-gate 				/*NOTREACHED*/
12087c478bd9Sstevel@tonic-gate 			}
12097c478bd9Sstevel@tonic-gate 			(void) kzero(base + on + nmoved, (size_t)nzero);
12107c478bd9Sstevel@tonic-gate 		}
12117c478bd9Sstevel@tonic-gate 
12127c478bd9Sstevel@tonic-gate 		/*
12137c478bd9Sstevel@tonic-gate 		 * Unlock the pages which have been allocated by
12147c478bd9Sstevel@tonic-gate 		 * page_create_va() in segmap_pagecreate().
12157c478bd9Sstevel@tonic-gate 		 */
1216a5652762Spraks 		if (!vpm_enable && newpage)
12177c478bd9Sstevel@tonic-gate 			segmap_pageunlock(segkmap, base + on,
12187c478bd9Sstevel@tonic-gate 			    (size_t)n, S_WRITE);
12197c478bd9Sstevel@tonic-gate 
12207c478bd9Sstevel@tonic-gate 		if (error == 0) {
12217c478bd9Sstevel@tonic-gate 			int flags = 0;
12227c478bd9Sstevel@tonic-gate 
12237c478bd9Sstevel@tonic-gate 			/*
12247c478bd9Sstevel@tonic-gate 			 * Force write back for synchronous write cases.
12257c478bd9Sstevel@tonic-gate 			 */
12267c478bd9Sstevel@tonic-gate 			if (ioflag & (FSYNC|FDSYNC))
12277c478bd9Sstevel@tonic-gate 				flags = SM_WRITE;
12287c478bd9Sstevel@tonic-gate 			else if (n + on == MAXBSIZE || IS_SWAPVP(vp)) {
12297c478bd9Sstevel@tonic-gate 				/*
12307c478bd9Sstevel@tonic-gate 				 * Have written a whole block.
12317c478bd9Sstevel@tonic-gate 				 * Start an asynchronous write and
12327c478bd9Sstevel@tonic-gate 				 * mark the buffer to indicate that
12337c478bd9Sstevel@tonic-gate 				 * it won't be needed again soon.
12347c478bd9Sstevel@tonic-gate 				 * Push swap files here, since it
12357c478bd9Sstevel@tonic-gate 				 * won't happen anywhere else.
12367c478bd9Sstevel@tonic-gate 				 */
12377c478bd9Sstevel@tonic-gate 				flags = SM_WRITE | SM_ASYNC | SM_DONTNEED;
12387c478bd9Sstevel@tonic-gate 			}
12397c478bd9Sstevel@tonic-gate 			smark(sp, SUPD|SCHG);
1240a5652762Spraks 			if (vpm_enable) {
1241a5652762Spraks 				error = vpm_sync_pages(blkvp, off, n, flags);
1242a5652762Spraks 			} else {
12437c478bd9Sstevel@tonic-gate 				error = segmap_release(segkmap, base, flags);
1244a5652762Spraks 			}
1245a5652762Spraks 		} else {
1246a5652762Spraks 			if (vpm_enable) {
1247a5652762Spraks 				(void) vpm_sync_pages(blkvp, off, n, SM_INVAL);
1248a5652762Spraks 			} else {
12497c478bd9Sstevel@tonic-gate 				(void) segmap_release(segkmap, base, SM_INVAL);
1250a5652762Spraks 			}
1251a5652762Spraks 		}
12527c478bd9Sstevel@tonic-gate 
12537c478bd9Sstevel@tonic-gate 	} while (error == 0 && uiop->uio_resid > 0 && n != 0);
12547c478bd9Sstevel@tonic-gate 
12557c478bd9Sstevel@tonic-gate 	return (error);
12567c478bd9Sstevel@tonic-gate }
12577c478bd9Sstevel@tonic-gate 
1258da6c28aaSamw /*ARGSUSED6*/
12597c478bd9Sstevel@tonic-gate static int
12607c478bd9Sstevel@tonic-gate spec_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, struct cred *cr,
1261da6c28aaSamw     int *rvalp, caller_context_t *ct)
12627c478bd9Sstevel@tonic-gate {
12637c478bd9Sstevel@tonic-gate 	struct snode *sp;
12647c478bd9Sstevel@tonic-gate 	dev_t dev;
12657c478bd9Sstevel@tonic-gate 	int error;
12667c478bd9Sstevel@tonic-gate 
12677c478bd9Sstevel@tonic-gate 	if (vp->v_type != VCHR)
12687c478bd9Sstevel@tonic-gate 		return (ENOTTY);
126925e8c5aaSvikram 
127025e8c5aaSvikram 	/*
127125e8c5aaSvikram 	 * allow ioctls() to go through even for fenced snodes, as they
127225e8c5aaSvikram 	 * may include unconfiguration operation - for example popping of
127325e8c5aaSvikram 	 * streams modules.
127425e8c5aaSvikram 	 */
127525e8c5aaSvikram 
12767c478bd9Sstevel@tonic-gate 	sp = VTOS(vp);
12777c478bd9Sstevel@tonic-gate 	dev = sp->s_dev;
1278349dcea3SGarrett D'Amore 	if (vp->v_stream) {
12797c478bd9Sstevel@tonic-gate 		error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
12807c478bd9Sstevel@tonic-gate 	} else {
12817c478bd9Sstevel@tonic-gate 		error = cdev_ioctl(dev, cmd, arg, mode, cr, rvalp);
12827c478bd9Sstevel@tonic-gate 	}
12837c478bd9Sstevel@tonic-gate 	return (error);
12847c478bd9Sstevel@tonic-gate }
12857c478bd9Sstevel@tonic-gate 
12867c478bd9Sstevel@tonic-gate static int
1287da6c28aaSamw spec_getattr(
1288da6c28aaSamw 	struct vnode *vp,
1289da6c28aaSamw 	struct vattr *vap,
1290da6c28aaSamw 	int flags,
1291da6c28aaSamw 	struct cred *cr,
1292da6c28aaSamw 	caller_context_t *ct)
12937c478bd9Sstevel@tonic-gate {
12947c478bd9Sstevel@tonic-gate 	int error;
12957c478bd9Sstevel@tonic-gate 	struct snode *sp;
12967c478bd9Sstevel@tonic-gate 	struct vnode *realvp;
12977c478bd9Sstevel@tonic-gate 
12987c478bd9Sstevel@tonic-gate 	/* With ATTR_COMM we will not get attributes from realvp */
12997c478bd9Sstevel@tonic-gate 	if (flags & ATTR_COMM) {
13007c478bd9Sstevel@tonic-gate 		sp = VTOS(vp);
13017c478bd9Sstevel@tonic-gate 		vp = sp->s_commonvp;
13027c478bd9Sstevel@tonic-gate 	}
13037c478bd9Sstevel@tonic-gate 	sp = VTOS(vp);
130425e8c5aaSvikram 
130525e8c5aaSvikram 	/* we want stat() to fail with ENXIO if the device is fenced off */
130625e8c5aaSvikram 	if (S_ISFENCED(sp))
130725e8c5aaSvikram 		return (ENXIO);
130825e8c5aaSvikram 
13097c478bd9Sstevel@tonic-gate 	realvp = sp->s_realvp;
13107c478bd9Sstevel@tonic-gate 
13117c478bd9Sstevel@tonic-gate 	if (realvp == NULL) {
13127c478bd9Sstevel@tonic-gate 		static int snode_shift	= 0;
13137c478bd9Sstevel@tonic-gate 
13147c478bd9Sstevel@tonic-gate 		/*
13157c478bd9Sstevel@tonic-gate 		 * Calculate the amount of bitshift to a snode pointer which
13167c478bd9Sstevel@tonic-gate 		 * will still keep it unique.  See below.
13177c478bd9Sstevel@tonic-gate 		 */
13187c478bd9Sstevel@tonic-gate 		if (snode_shift == 0)
13197c478bd9Sstevel@tonic-gate 			snode_shift = highbit(sizeof (struct snode));
13207c478bd9Sstevel@tonic-gate 		ASSERT(snode_shift > 0);
13217c478bd9Sstevel@tonic-gate 
13227c478bd9Sstevel@tonic-gate 		/*
13237c478bd9Sstevel@tonic-gate 		 * No real vnode behind this one.  Fill in the fields
13247c478bd9Sstevel@tonic-gate 		 * from the snode.
13257c478bd9Sstevel@tonic-gate 		 *
13267c478bd9Sstevel@tonic-gate 		 * This code should be refined to return only the
13277c478bd9Sstevel@tonic-gate 		 * attributes asked for instead of all of them.
13287c478bd9Sstevel@tonic-gate 		 */
13297c478bd9Sstevel@tonic-gate 		vap->va_type = vp->v_type;
13307c478bd9Sstevel@tonic-gate 		vap->va_mode = 0;
13317c478bd9Sstevel@tonic-gate 		vap->va_uid = vap->va_gid = 0;
13327c478bd9Sstevel@tonic-gate 		vap->va_fsid = sp->s_fsid;
13337c478bd9Sstevel@tonic-gate 
13347c478bd9Sstevel@tonic-gate 		/*
13357c478bd9Sstevel@tonic-gate 		 * If the va_nodeid is > MAX_USHORT, then i386 stats might
13367c478bd9Sstevel@tonic-gate 		 * fail. So we shift down the snode pointer to try and get
13377c478bd9Sstevel@tonic-gate 		 * the most uniqueness into 16-bits.
13387c478bd9Sstevel@tonic-gate 		 */
13397c478bd9Sstevel@tonic-gate 		vap->va_nodeid = ((ino64_t)(uintptr_t)sp >> snode_shift) &
13407c478bd9Sstevel@tonic-gate 		    0xFFFF;
13417c478bd9Sstevel@tonic-gate 		vap->va_nlink = 0;
13427c478bd9Sstevel@tonic-gate 		vap->va_rdev = sp->s_dev;
13437c478bd9Sstevel@tonic-gate 
13447c478bd9Sstevel@tonic-gate 		/*
13457c478bd9Sstevel@tonic-gate 		 * va_nblocks is the number of 512 byte blocks used to store
13467c478bd9Sstevel@tonic-gate 		 * the mknod for the device, not the number of blocks on the
13477c478bd9Sstevel@tonic-gate 		 * device itself.  This is typically zero since the mknod is
13487c478bd9Sstevel@tonic-gate 		 * represented directly in the inode itself.
13497c478bd9Sstevel@tonic-gate 		 */
13507c478bd9Sstevel@tonic-gate 		vap->va_nblocks = 0;
13517c478bd9Sstevel@tonic-gate 	} else {
1352da6c28aaSamw 		error = VOP_GETATTR(realvp, vap, flags, cr, ct);
13537c478bd9Sstevel@tonic-gate 		if (error != 0)
13547c478bd9Sstevel@tonic-gate 			return (error);
13557c478bd9Sstevel@tonic-gate 	}
13567c478bd9Sstevel@tonic-gate 
13577c478bd9Sstevel@tonic-gate 	/* set the size from the snode */
13587c478bd9Sstevel@tonic-gate 	vap->va_size = SPEC_SIZE(VTOS(sp->s_commonvp));
13597c478bd9Sstevel@tonic-gate 	vap->va_blksize = MAXBSIZE;
13607c478bd9Sstevel@tonic-gate 
13617c478bd9Sstevel@tonic-gate 	mutex_enter(&sp->s_lock);
13627c478bd9Sstevel@tonic-gate 	vap->va_atime.tv_sec = sp->s_atime;
13637c478bd9Sstevel@tonic-gate 	vap->va_mtime.tv_sec = sp->s_mtime;
13647c478bd9Sstevel@tonic-gate 	vap->va_ctime.tv_sec = sp->s_ctime;
13657c478bd9Sstevel@tonic-gate 	mutex_exit(&sp->s_lock);
13667c478bd9Sstevel@tonic-gate 
13677c478bd9Sstevel@tonic-gate 	vap->va_atime.tv_nsec = 0;
13687c478bd9Sstevel@tonic-gate 	vap->va_mtime.tv_nsec = 0;
13697c478bd9Sstevel@tonic-gate 	vap->va_ctime.tv_nsec = 0;
13707c478bd9Sstevel@tonic-gate 	vap->va_seq = 0;
13717c478bd9Sstevel@tonic-gate 
13727c478bd9Sstevel@tonic-gate 	return (0);
13737c478bd9Sstevel@tonic-gate }
13747c478bd9Sstevel@tonic-gate 
13757c478bd9Sstevel@tonic-gate static int
13767c478bd9Sstevel@tonic-gate spec_setattr(
13777c478bd9Sstevel@tonic-gate 	struct vnode *vp,
13787c478bd9Sstevel@tonic-gate 	struct vattr *vap,
13797c478bd9Sstevel@tonic-gate 	int flags,
13807c478bd9Sstevel@tonic-gate 	struct cred *cr,
1381da6c28aaSamw 	caller_context_t *ct)
13827c478bd9Sstevel@tonic-gate {
13837c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
13847c478bd9Sstevel@tonic-gate 	struct vnode *realvp;
13857c478bd9Sstevel@tonic-gate 	int error;
13867c478bd9Sstevel@tonic-gate 
138725e8c5aaSvikram 	/* fail with ENXIO if the device is fenced off */
138825e8c5aaSvikram 	if (S_ISFENCED(sp))
138925e8c5aaSvikram 		return (ENXIO);
139025e8c5aaSvikram 
13917c478bd9Sstevel@tonic-gate 	if (vp->v_type == VCHR && vp->v_stream && (vap->va_mask & AT_SIZE)) {
13927c478bd9Sstevel@tonic-gate 		/*
13937c478bd9Sstevel@tonic-gate 		 * 1135080:	O_TRUNC should have no effect on
13947c478bd9Sstevel@tonic-gate 		 *		named pipes and terminal devices.
13957c478bd9Sstevel@tonic-gate 		 */
13967c478bd9Sstevel@tonic-gate 		ASSERT(vap->va_mask == AT_SIZE);
13977c478bd9Sstevel@tonic-gate 		return (0);
13987c478bd9Sstevel@tonic-gate 	}
13997c478bd9Sstevel@tonic-gate 
14007c478bd9Sstevel@tonic-gate 	if ((realvp = sp->s_realvp) == NULL)
14017c478bd9Sstevel@tonic-gate 		error = 0;	/* no real vnode to update */
14027c478bd9Sstevel@tonic-gate 	else
1403da6c28aaSamw 		error = VOP_SETATTR(realvp, vap, flags, cr, ct);
14047c478bd9Sstevel@tonic-gate 	if (error == 0) {
14057c478bd9Sstevel@tonic-gate 		/*
14067c478bd9Sstevel@tonic-gate 		 * If times were changed, update snode.
14077c478bd9Sstevel@tonic-gate 		 */
14087c478bd9Sstevel@tonic-gate 		mutex_enter(&sp->s_lock);
14097c478bd9Sstevel@tonic-gate 		if (vap->va_mask & AT_ATIME)
14107c478bd9Sstevel@tonic-gate 			sp->s_atime = vap->va_atime.tv_sec;
14117c478bd9Sstevel@tonic-gate 		if (vap->va_mask & AT_MTIME) {
14127c478bd9Sstevel@tonic-gate 			sp->s_mtime = vap->va_mtime.tv_sec;
14137c478bd9Sstevel@tonic-gate 			sp->s_ctime = gethrestime_sec();
14147c478bd9Sstevel@tonic-gate 		}
14157c478bd9Sstevel@tonic-gate 		mutex_exit(&sp->s_lock);
14167c478bd9Sstevel@tonic-gate 	}
14177c478bd9Sstevel@tonic-gate 	return (error);
14187c478bd9Sstevel@tonic-gate }
14197c478bd9Sstevel@tonic-gate 
14207c478bd9Sstevel@tonic-gate static int
1421da6c28aaSamw spec_access(
1422da6c28aaSamw 	struct vnode *vp,
1423da6c28aaSamw 	int mode,
1424da6c28aaSamw 	int flags,
1425da6c28aaSamw 	struct cred *cr,
1426da6c28aaSamw 	caller_context_t *ct)
14277c478bd9Sstevel@tonic-gate {
14287c478bd9Sstevel@tonic-gate 	struct vnode *realvp;
14297c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
14307c478bd9Sstevel@tonic-gate 
143125e8c5aaSvikram 	/* fail with ENXIO if the device is fenced off */
143225e8c5aaSvikram 	if (S_ISFENCED(sp))
143325e8c5aaSvikram 		return (ENXIO);
143425e8c5aaSvikram 
14357c478bd9Sstevel@tonic-gate 	if ((realvp = sp->s_realvp) != NULL)
1436da6c28aaSamw 		return (VOP_ACCESS(realvp, mode, flags, cr, ct));
14377c478bd9Sstevel@tonic-gate 	else
14387c478bd9Sstevel@tonic-gate 		return (0);	/* Allow all access. */
14397c478bd9Sstevel@tonic-gate }
14407c478bd9Sstevel@tonic-gate 
14417c478bd9Sstevel@tonic-gate /*
14427c478bd9Sstevel@tonic-gate  * This can be called if creat or an open with O_CREAT is done on the root
14437c478bd9Sstevel@tonic-gate  * of a lofs mount where the mounted entity is a special file.
14447c478bd9Sstevel@tonic-gate  */
14457c478bd9Sstevel@tonic-gate /*ARGSUSED*/
14467c478bd9Sstevel@tonic-gate static int
1447da6c28aaSamw spec_create(
1448da6c28aaSamw 	struct vnode *dvp,
1449da6c28aaSamw 	char *name,
1450da6c28aaSamw 	vattr_t *vap,
1451da6c28aaSamw 	enum vcexcl excl,
1452da6c28aaSamw 	int mode,
1453da6c28aaSamw 	struct vnode **vpp,
1454da6c28aaSamw 	struct cred *cr,
1455da6c28aaSamw 	int flag,
1456da6c28aaSamw 	caller_context_t *ct,
1457da6c28aaSamw 	vsecattr_t *vsecp)
14587c478bd9Sstevel@tonic-gate {
14597c478bd9Sstevel@tonic-gate 	int error;
146025e8c5aaSvikram 	struct snode *sp = VTOS(dvp);
146125e8c5aaSvikram 
146225e8c5aaSvikram 	/* fail with ENXIO if the device is fenced off */
146325e8c5aaSvikram 	if (S_ISFENCED(sp))
146425e8c5aaSvikram 		return (ENXIO);
14657c478bd9Sstevel@tonic-gate 
14667c478bd9Sstevel@tonic-gate 	ASSERT(dvp && (dvp->v_flag & VROOT) && *name == '\0');
14677c478bd9Sstevel@tonic-gate 	if (excl == NONEXCL) {
1468da6c28aaSamw 		if (mode && (error = spec_access(dvp, mode, 0, cr, ct)))
14697c478bd9Sstevel@tonic-gate 			return (error);
14707c478bd9Sstevel@tonic-gate 		VN_HOLD(dvp);
14717c478bd9Sstevel@tonic-gate 		return (0);
14727c478bd9Sstevel@tonic-gate 	}
14737c478bd9Sstevel@tonic-gate 	return (EEXIST);
14747c478bd9Sstevel@tonic-gate }
14757c478bd9Sstevel@tonic-gate 
14767c478bd9Sstevel@tonic-gate /*
14777c478bd9Sstevel@tonic-gate  * In order to sync out the snode times without multi-client problems,
14787c478bd9Sstevel@tonic-gate  * make sure the times written out are never earlier than the times
14797c478bd9Sstevel@tonic-gate  * already set in the vnode.
14807c478bd9Sstevel@tonic-gate  */
14817c478bd9Sstevel@tonic-gate static int
1482da6c28aaSamw spec_fsync(
1483da6c28aaSamw 	struct vnode *vp,
1484da6c28aaSamw 	int syncflag,
1485da6c28aaSamw 	struct cred *cr,
1486da6c28aaSamw 	caller_context_t *ct)
14877c478bd9Sstevel@tonic-gate {
14887c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
14897c478bd9Sstevel@tonic-gate 	struct vnode *realvp;
14907c478bd9Sstevel@tonic-gate 	struct vnode *cvp;
14917c478bd9Sstevel@tonic-gate 	struct vattr va, vatmp;
14927c478bd9Sstevel@tonic-gate 
149325e8c5aaSvikram 	/* allow syncing even if device is fenced off */
149425e8c5aaSvikram 
14957c478bd9Sstevel@tonic-gate 	/* If times didn't change, don't flush anything. */
14967c478bd9Sstevel@tonic-gate 	mutex_enter(&sp->s_lock);
14977c478bd9Sstevel@tonic-gate 	if ((sp->s_flag & (SACC|SUPD|SCHG)) == 0 && vp->v_type != VBLK) {
14987c478bd9Sstevel@tonic-gate 		mutex_exit(&sp->s_lock);
14997c478bd9Sstevel@tonic-gate 		return (0);
15007c478bd9Sstevel@tonic-gate 	}
15017c478bd9Sstevel@tonic-gate 	sp->s_flag &= ~(SACC|SUPD|SCHG);
15027c478bd9Sstevel@tonic-gate 	mutex_exit(&sp->s_lock);
15037c478bd9Sstevel@tonic-gate 	cvp = sp->s_commonvp;
15047c478bd9Sstevel@tonic-gate 	realvp = sp->s_realvp;
15057c478bd9Sstevel@tonic-gate 
15067c478bd9Sstevel@tonic-gate 	if (vp->v_type == VBLK && cvp != vp && vn_has_cached_data(cvp) &&
15077c478bd9Sstevel@tonic-gate 	    (cvp->v_flag & VISSWAP) == 0)
1508da6c28aaSamw 		(void) VOP_PUTPAGE(cvp, (offset_t)0, 0, 0, cr, ct);
15097c478bd9Sstevel@tonic-gate 
15107c478bd9Sstevel@tonic-gate 	/*
1511feb08c6bSbillm 	 * For devices that support it, force write cache to stable storage.
1512feb08c6bSbillm 	 * We don't need the lock to check s_flags since we can treat
1513feb08c6bSbillm 	 * SNOFLUSH as a hint.
1514feb08c6bSbillm 	 */
1515feb08c6bSbillm 	if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
1516feb08c6bSbillm 	    !(sp->s_flag & SNOFLUSH)) {
1517feb08c6bSbillm 		int rval, rc;
1518a84224b3Sgz161490 		struct dk_callback spec_callback;
1519a84224b3Sgz161490 
1520a84224b3Sgz161490 		spec_callback.dkc_flag = FLUSH_VOLATILE;
1521a84224b3Sgz161490 		spec_callback.dkc_callback = NULL;
1522a84224b3Sgz161490 
1523a84224b3Sgz161490 		/* synchronous flush on volatile cache */
1524feb08c6bSbillm 		rc = cdev_ioctl(vp->v_rdev, DKIOCFLUSHWRITECACHE,
1525a84224b3Sgz161490 		    (intptr_t)&spec_callback, FNATIVE|FKIOCTL, cr, &rval);
1526a84224b3Sgz161490 
1527feb08c6bSbillm 		if (rc == ENOTSUP || rc == ENOTTY) {
1528feb08c6bSbillm 			mutex_enter(&sp->s_lock);
1529feb08c6bSbillm 			sp->s_flag |= SNOFLUSH;
1530feb08c6bSbillm 			mutex_exit(&sp->s_lock);
1531feb08c6bSbillm 		}
1532feb08c6bSbillm 	}
1533feb08c6bSbillm 
1534feb08c6bSbillm 	/*
15357c478bd9Sstevel@tonic-gate 	 * If no real vnode to update, don't flush anything.
15367c478bd9Sstevel@tonic-gate 	 */
15377c478bd9Sstevel@tonic-gate 	if (realvp == NULL)
15387c478bd9Sstevel@tonic-gate 		return (0);
15397c478bd9Sstevel@tonic-gate 
15407c478bd9Sstevel@tonic-gate 	vatmp.va_mask = AT_ATIME|AT_MTIME;
1541da6c28aaSamw 	if (VOP_GETATTR(realvp, &vatmp, 0, cr, ct) == 0) {
15427c478bd9Sstevel@tonic-gate 
15437c478bd9Sstevel@tonic-gate 		mutex_enter(&sp->s_lock);
15447c478bd9Sstevel@tonic-gate 		if (vatmp.va_atime.tv_sec > sp->s_atime)
15457c478bd9Sstevel@tonic-gate 			va.va_atime = vatmp.va_atime;
15467c478bd9Sstevel@tonic-gate 		else {
15477c478bd9Sstevel@tonic-gate 			va.va_atime.tv_sec = sp->s_atime;
15487c478bd9Sstevel@tonic-gate 			va.va_atime.tv_nsec = 0;
15497c478bd9Sstevel@tonic-gate 		}
15507c478bd9Sstevel@tonic-gate 		if (vatmp.va_mtime.tv_sec > sp->s_mtime)
15517c478bd9Sstevel@tonic-gate 			va.va_mtime = vatmp.va_mtime;
15527c478bd9Sstevel@tonic-gate 		else {
15537c478bd9Sstevel@tonic-gate 			va.va_mtime.tv_sec = sp->s_mtime;
15547c478bd9Sstevel@tonic-gate 			va.va_mtime.tv_nsec = 0;
15557c478bd9Sstevel@tonic-gate 		}
15567c478bd9Sstevel@tonic-gate 		mutex_exit(&sp->s_lock);
15577c478bd9Sstevel@tonic-gate 
15587c478bd9Sstevel@tonic-gate 		va.va_mask = AT_ATIME|AT_MTIME;
1559da6c28aaSamw 		(void) VOP_SETATTR(realvp, &va, 0, cr, ct);
15607c478bd9Sstevel@tonic-gate 	}
1561da6c28aaSamw 	(void) VOP_FSYNC(realvp, syncflag, cr, ct);
15627c478bd9Sstevel@tonic-gate 	return (0);
15637c478bd9Sstevel@tonic-gate }
15647c478bd9Sstevel@tonic-gate 
15657c478bd9Sstevel@tonic-gate /*ARGSUSED*/
15667c478bd9Sstevel@tonic-gate static void
1567da6c28aaSamw spec_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
15687c478bd9Sstevel@tonic-gate {
15697c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
15707c478bd9Sstevel@tonic-gate 	struct vnode *cvp;
15717c478bd9Sstevel@tonic-gate 	struct vnode *rvp;
15727c478bd9Sstevel@tonic-gate 
15737c478bd9Sstevel@tonic-gate 	/*
15747c478bd9Sstevel@tonic-gate 	 * If no one has reclaimed the vnode, remove from the
15757c478bd9Sstevel@tonic-gate 	 * cache now.
15767c478bd9Sstevel@tonic-gate 	 */
15777c478bd9Sstevel@tonic-gate 	if (vp->v_count < 1) {
15787c478bd9Sstevel@tonic-gate 		panic("spec_inactive: Bad v_count");
15797c478bd9Sstevel@tonic-gate 		/*NOTREACHED*/
15807c478bd9Sstevel@tonic-gate 	}
15817c478bd9Sstevel@tonic-gate 	mutex_enter(&stable_lock);
15827c478bd9Sstevel@tonic-gate 
15837c478bd9Sstevel@tonic-gate 	mutex_enter(&vp->v_lock);
15847c478bd9Sstevel@tonic-gate 	/*
15857c478bd9Sstevel@tonic-gate 	 * Drop the temporary hold by vn_rele now
15867c478bd9Sstevel@tonic-gate 	 */
15877c478bd9Sstevel@tonic-gate 	if (--vp->v_count != 0) {
15887c478bd9Sstevel@tonic-gate 		mutex_exit(&vp->v_lock);
15897c478bd9Sstevel@tonic-gate 		mutex_exit(&stable_lock);
15907c478bd9Sstevel@tonic-gate 		return;
15917c478bd9Sstevel@tonic-gate 	}
15927c478bd9Sstevel@tonic-gate 	mutex_exit(&vp->v_lock);
15937c478bd9Sstevel@tonic-gate 
15947c478bd9Sstevel@tonic-gate 	sdelete(sp);
15957c478bd9Sstevel@tonic-gate 	mutex_exit(&stable_lock);
15967c478bd9Sstevel@tonic-gate 
15977c478bd9Sstevel@tonic-gate 	/* We are the sole owner of sp now */
15987c478bd9Sstevel@tonic-gate 	cvp = sp->s_commonvp;
15997c478bd9Sstevel@tonic-gate 	rvp = sp->s_realvp;
16007c478bd9Sstevel@tonic-gate 
16017c478bd9Sstevel@tonic-gate 	if (rvp) {
16027c478bd9Sstevel@tonic-gate 		/*
16037c478bd9Sstevel@tonic-gate 		 * If the snode times changed, then update the times
16047c478bd9Sstevel@tonic-gate 		 * associated with the "realvp".
16057c478bd9Sstevel@tonic-gate 		 */
16067c478bd9Sstevel@tonic-gate 		if ((sp->s_flag & (SACC|SUPD|SCHG)) != 0) {
16077c478bd9Sstevel@tonic-gate 
16087c478bd9Sstevel@tonic-gate 			struct vattr va, vatmp;
16097c478bd9Sstevel@tonic-gate 
16107c478bd9Sstevel@tonic-gate 			mutex_enter(&sp->s_lock);
16117c478bd9Sstevel@tonic-gate 			sp->s_flag &= ~(SACC|SUPD|SCHG);
16127c478bd9Sstevel@tonic-gate 			mutex_exit(&sp->s_lock);
16137c478bd9Sstevel@tonic-gate 			vatmp.va_mask = AT_ATIME|AT_MTIME;
16147c478bd9Sstevel@tonic-gate 			/*
16157c478bd9Sstevel@tonic-gate 			 * The user may not own the device, but we
16167c478bd9Sstevel@tonic-gate 			 * want to update the attributes anyway.
16177c478bd9Sstevel@tonic-gate 			 */
1618da6c28aaSamw 			if (VOP_GETATTR(rvp, &vatmp, 0, kcred, ct) == 0) {
16197c478bd9Sstevel@tonic-gate 				if (vatmp.va_atime.tv_sec > sp->s_atime)
16207c478bd9Sstevel@tonic-gate 					va.va_atime = vatmp.va_atime;
16217c478bd9Sstevel@tonic-gate 				else {
16227c478bd9Sstevel@tonic-gate 					va.va_atime.tv_sec = sp->s_atime;
16237c478bd9Sstevel@tonic-gate 					va.va_atime.tv_nsec = 0;
16247c478bd9Sstevel@tonic-gate 				}
16257c478bd9Sstevel@tonic-gate 				if (vatmp.va_mtime.tv_sec > sp->s_mtime)
16267c478bd9Sstevel@tonic-gate 					va.va_mtime = vatmp.va_mtime;
16277c478bd9Sstevel@tonic-gate 				else {
16287c478bd9Sstevel@tonic-gate 					va.va_mtime.tv_sec = sp->s_mtime;
16297c478bd9Sstevel@tonic-gate 					va.va_mtime.tv_nsec = 0;
16307c478bd9Sstevel@tonic-gate 				}
16317c478bd9Sstevel@tonic-gate 
16327c478bd9Sstevel@tonic-gate 				va.va_mask = AT_ATIME|AT_MTIME;
1633da6c28aaSamw 				(void) VOP_SETATTR(rvp, &va, 0, kcred, ct);
16347c478bd9Sstevel@tonic-gate 			}
16357c478bd9Sstevel@tonic-gate 		}
16367c478bd9Sstevel@tonic-gate 	}
16377c478bd9Sstevel@tonic-gate 	ASSERT(!vn_has_cached_data(vp));
16387c478bd9Sstevel@tonic-gate 	vn_invalid(vp);
16397c478bd9Sstevel@tonic-gate 
16407c478bd9Sstevel@tonic-gate 	/* if we are sharing another file systems vfs, release it */
16417c478bd9Sstevel@tonic-gate 	if (vp->v_vfsp && (vp->v_vfsp != &spec_vfs))
16427c478bd9Sstevel@tonic-gate 		VFS_RELE(vp->v_vfsp);
16437c478bd9Sstevel@tonic-gate 
16447c478bd9Sstevel@tonic-gate 	/* if we have a realvp, release the realvp */
16457c478bd9Sstevel@tonic-gate 	if (rvp)
16467c478bd9Sstevel@tonic-gate 		VN_RELE(rvp);
16477c478bd9Sstevel@tonic-gate 
16487c478bd9Sstevel@tonic-gate 	/* if we have a common, release the common */
1649fbe27353Sedp 	if (cvp && (cvp != vp)) {
16507c478bd9Sstevel@tonic-gate 		VN_RELE(cvp);
1651fbe27353Sedp #ifdef DEBUG
1652fbe27353Sedp 	} else if (cvp) {
1653fbe27353Sedp 		/*
1654fbe27353Sedp 		 * if this is the last reference to a common vnode, any
1655fbe27353Sedp 		 * associated stream had better have been closed
1656fbe27353Sedp 		 */
1657fbe27353Sedp 		ASSERT(cvp == vp);
1658fbe27353Sedp 		ASSERT(cvp->v_stream == NULL);
1659fbe27353Sedp #endif /* DEBUG */
1660fbe27353Sedp 	}
16617c478bd9Sstevel@tonic-gate 
16627c478bd9Sstevel@tonic-gate 	/*
16637c478bd9Sstevel@tonic-gate 	 * if we have a hold on a devinfo node (established by
16647c478bd9Sstevel@tonic-gate 	 * spec_assoc_vp_with_devi), release the hold
16657c478bd9Sstevel@tonic-gate 	 */
16667c478bd9Sstevel@tonic-gate 	if (sp->s_dip)
16677c478bd9Sstevel@tonic-gate 		ddi_release_devi(sp->s_dip);
16687c478bd9Sstevel@tonic-gate 
16697c478bd9Sstevel@tonic-gate 	/*
16707c478bd9Sstevel@tonic-gate 	 * If we have an associated device policy, release it.
16717c478bd9Sstevel@tonic-gate 	 */
16727c478bd9Sstevel@tonic-gate 	if (sp->s_plcy != NULL)
16737c478bd9Sstevel@tonic-gate 		dpfree(sp->s_plcy);
16747c478bd9Sstevel@tonic-gate 
16757c478bd9Sstevel@tonic-gate 	/*
16767c478bd9Sstevel@tonic-gate 	 * If all holds on the devinfo node are through specfs/devfs
16777c478bd9Sstevel@tonic-gate 	 * and we just destroyed the last specfs node associated with the
16787c478bd9Sstevel@tonic-gate 	 * device, then the devinfo node reference count should now be
16797c478bd9Sstevel@tonic-gate 	 * zero.  We can't check this because there may be other holds
16807c478bd9Sstevel@tonic-gate 	 * on the node from non file system sources: ddi_hold_devi_by_instance
16817c478bd9Sstevel@tonic-gate 	 * for example.
16827c478bd9Sstevel@tonic-gate 	 */
16837c478bd9Sstevel@tonic-gate 	kmem_cache_free(snode_cache, sp);
16847c478bd9Sstevel@tonic-gate }
16857c478bd9Sstevel@tonic-gate 
16867c478bd9Sstevel@tonic-gate static int
1687da6c28aaSamw spec_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
16887c478bd9Sstevel@tonic-gate {
16897c478bd9Sstevel@tonic-gate 	struct vnode *realvp;
16907c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
16917c478bd9Sstevel@tonic-gate 
16927c478bd9Sstevel@tonic-gate 	if ((realvp = sp->s_realvp) != NULL)
1693da6c28aaSamw 		return (VOP_FID(realvp, fidp, ct));
16947c478bd9Sstevel@tonic-gate 	else
16957c478bd9Sstevel@tonic-gate 		return (EINVAL);
16967c478bd9Sstevel@tonic-gate }
16977c478bd9Sstevel@tonic-gate 
16987c478bd9Sstevel@tonic-gate /*ARGSUSED1*/
16997c478bd9Sstevel@tonic-gate static int
1700da6c28aaSamw spec_seek(
1701da6c28aaSamw 	struct vnode *vp,
1702da6c28aaSamw 	offset_t ooff,
1703da6c28aaSamw 	offset_t *noffp,
1704da6c28aaSamw 	caller_context_t *ct)
17057c478bd9Sstevel@tonic-gate {
17067c478bd9Sstevel@tonic-gate 	offset_t maxoff = spec_maxoffset(vp);
17077c478bd9Sstevel@tonic-gate 
17087c478bd9Sstevel@tonic-gate 	if (maxoff == -1 || *noffp <= maxoff)
17097c478bd9Sstevel@tonic-gate 		return (0);
17107c478bd9Sstevel@tonic-gate 	else
17117c478bd9Sstevel@tonic-gate 		return (EINVAL);
17127c478bd9Sstevel@tonic-gate }
17137c478bd9Sstevel@tonic-gate 
17147c478bd9Sstevel@tonic-gate static int
17157c478bd9Sstevel@tonic-gate spec_frlock(
17167c478bd9Sstevel@tonic-gate 	struct vnode *vp,
17177c478bd9Sstevel@tonic-gate 	int		cmd,
17187c478bd9Sstevel@tonic-gate 	struct flock64	*bfp,
17197c478bd9Sstevel@tonic-gate 	int		flag,
17207c478bd9Sstevel@tonic-gate 	offset_t	offset,
17217c478bd9Sstevel@tonic-gate 	struct flk_callback *flk_cbp,
1722da6c28aaSamw 	struct cred	*cr,
1723da6c28aaSamw 	caller_context_t *ct)
17247c478bd9Sstevel@tonic-gate {
17257c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
17267c478bd9Sstevel@tonic-gate 	struct snode *csp;
17277c478bd9Sstevel@tonic-gate 
17287c478bd9Sstevel@tonic-gate 	csp = VTOS(sp->s_commonvp);
17297c478bd9Sstevel@tonic-gate 	/*
17307c478bd9Sstevel@tonic-gate 	 * If file is being mapped, disallow frlock.
17317c478bd9Sstevel@tonic-gate 	 */
17327c478bd9Sstevel@tonic-gate 	if (csp->s_mapcnt > 0)
17337c478bd9Sstevel@tonic-gate 		return (EAGAIN);
17347c478bd9Sstevel@tonic-gate 
1735da6c28aaSamw 	return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
17367c478bd9Sstevel@tonic-gate }
17377c478bd9Sstevel@tonic-gate 
17387c478bd9Sstevel@tonic-gate static int
1739da6c28aaSamw spec_realvp(struct vnode *vp, struct vnode **vpp, caller_context_t *ct)
17407c478bd9Sstevel@tonic-gate {
17417c478bd9Sstevel@tonic-gate 	struct vnode *rvp;
17427c478bd9Sstevel@tonic-gate 
17437c478bd9Sstevel@tonic-gate 	if ((rvp = VTOS(vp)->s_realvp) != NULL) {
17447c478bd9Sstevel@tonic-gate 		vp = rvp;
1745da6c28aaSamw 		if (VOP_REALVP(vp, &rvp, ct) == 0)
17467c478bd9Sstevel@tonic-gate 			vp = rvp;
17477c478bd9Sstevel@tonic-gate 	}
17487c478bd9Sstevel@tonic-gate 
17497c478bd9Sstevel@tonic-gate 	*vpp = vp;
17507c478bd9Sstevel@tonic-gate 	return (0);
17517c478bd9Sstevel@tonic-gate }
17527c478bd9Sstevel@tonic-gate 
17537c478bd9Sstevel@tonic-gate /*
17547c478bd9Sstevel@tonic-gate  * Return all the pages from [off..off + len] in block
17557c478bd9Sstevel@tonic-gate  * or character device.
17567c478bd9Sstevel@tonic-gate  */
1757da6c28aaSamw /*ARGSUSED*/
17587c478bd9Sstevel@tonic-gate static int
17597c478bd9Sstevel@tonic-gate spec_getpage(
17607c478bd9Sstevel@tonic-gate 	struct vnode	*vp,
17617c478bd9Sstevel@tonic-gate 	offset_t	off,
17627c478bd9Sstevel@tonic-gate 	size_t		len,
17637c478bd9Sstevel@tonic-gate 	uint_t		*protp,
17647c478bd9Sstevel@tonic-gate 	page_t		*pl[],
17657c478bd9Sstevel@tonic-gate 	size_t		plsz,
17667c478bd9Sstevel@tonic-gate 	struct seg	*seg,
17677c478bd9Sstevel@tonic-gate 	caddr_t		addr,
17687c478bd9Sstevel@tonic-gate 	enum seg_rw	rw,
1769da6c28aaSamw 	struct cred	*cr,
1770da6c28aaSamw 	caller_context_t *ct)
17717c478bd9Sstevel@tonic-gate {
17727c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
17737c478bd9Sstevel@tonic-gate 	int err;
17747c478bd9Sstevel@tonic-gate 
17757c478bd9Sstevel@tonic-gate 	ASSERT(sp->s_commonvp == vp);
17767c478bd9Sstevel@tonic-gate 
17777c478bd9Sstevel@tonic-gate 	/*
17787c478bd9Sstevel@tonic-gate 	 * XXX	Given the above assertion, this might not do
17797c478bd9Sstevel@tonic-gate 	 *	what is wanted here.
17807c478bd9Sstevel@tonic-gate 	 */
17817c478bd9Sstevel@tonic-gate 	if (vp->v_flag & VNOMAP)
17827c478bd9Sstevel@tonic-gate 		return (ENOSYS);
17837c478bd9Sstevel@tonic-gate 	TRACE_4(TR_FAC_SPECFS, TR_SPECFS_GETPAGE,
17847c478bd9Sstevel@tonic-gate 	    "specfs getpage:vp %p off %llx len %ld snode %p",
17857c478bd9Sstevel@tonic-gate 	    vp, off, len, sp);
17867c478bd9Sstevel@tonic-gate 
17877c478bd9Sstevel@tonic-gate 	switch (vp->v_type) {
17887c478bd9Sstevel@tonic-gate 	case VBLK:
17897c478bd9Sstevel@tonic-gate 		if (protp != NULL)
17907c478bd9Sstevel@tonic-gate 			*protp = PROT_ALL;
17917c478bd9Sstevel@tonic-gate 
17927c478bd9Sstevel@tonic-gate 		if (((u_offset_t)off + len) > (SPEC_SIZE(sp) + PAGEOFFSET))
17937c478bd9Sstevel@tonic-gate 			return (EFAULT);	/* beyond EOF */
17947c478bd9Sstevel@tonic-gate 
1795*06e6833aSJosef 'Jeff' Sipek 		err = pvn_getpages(spec_getapage, vp, (u_offset_t)off, len,
1796*06e6833aSJosef 'Jeff' Sipek 		    protp, pl, plsz, seg, addr, rw, cr);
17977c478bd9Sstevel@tonic-gate 		break;
17987c478bd9Sstevel@tonic-gate 
17997c478bd9Sstevel@tonic-gate 	case VCHR:
18007c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "spec_getpage called for character device. "
18017c478bd9Sstevel@tonic-gate 		    "Check any non-ON consolidation drivers");
18027c478bd9Sstevel@tonic-gate 		err = 0;
18037c478bd9Sstevel@tonic-gate 		pl[0] = (page_t *)0;
18047c478bd9Sstevel@tonic-gate 		break;
18057c478bd9Sstevel@tonic-gate 
18067c478bd9Sstevel@tonic-gate 	default:
18077c478bd9Sstevel@tonic-gate 		panic("spec_getpage: bad v_type 0x%x", vp->v_type);
18087c478bd9Sstevel@tonic-gate 		/*NOTREACHED*/
18097c478bd9Sstevel@tonic-gate 	}
18107c478bd9Sstevel@tonic-gate 
18117c478bd9Sstevel@tonic-gate 	return (err);
18127c478bd9Sstevel@tonic-gate }
18137c478bd9Sstevel@tonic-gate 
18147c478bd9Sstevel@tonic-gate extern int klustsize;	/* set in machdep.c */
18157c478bd9Sstevel@tonic-gate 
18167c478bd9Sstevel@tonic-gate int spec_ra = 1;
18177c478bd9Sstevel@tonic-gate int spec_lostpage;	/* number of times we lost original page */
18187c478bd9Sstevel@tonic-gate 
18197c478bd9Sstevel@tonic-gate /*ARGSUSED2*/
18207c478bd9Sstevel@tonic-gate static int
18217c478bd9Sstevel@tonic-gate spec_getapage(
18227c478bd9Sstevel@tonic-gate 	struct vnode *vp,
18237c478bd9Sstevel@tonic-gate 	u_offset_t	off,
18247c478bd9Sstevel@tonic-gate 	size_t		len,
18257c478bd9Sstevel@tonic-gate 	uint_t		*protp,
18267c478bd9Sstevel@tonic-gate 	page_t		*pl[],
18277c478bd9Sstevel@tonic-gate 	size_t		plsz,
18287c478bd9Sstevel@tonic-gate 	struct seg	*seg,
18297c478bd9Sstevel@tonic-gate 	caddr_t		addr,
18307c478bd9Sstevel@tonic-gate 	enum seg_rw	rw,
18317c478bd9Sstevel@tonic-gate 	struct cred	*cr)
18327c478bd9Sstevel@tonic-gate {
18337c478bd9Sstevel@tonic-gate 	struct snode *sp;
18347c478bd9Sstevel@tonic-gate 	struct buf *bp;
18357c478bd9Sstevel@tonic-gate 	page_t *pp, *pp2;
18367c478bd9Sstevel@tonic-gate 	u_offset_t io_off1, io_off2;
18377c478bd9Sstevel@tonic-gate 	size_t io_len1;
18387c478bd9Sstevel@tonic-gate 	size_t io_len2;
18397c478bd9Sstevel@tonic-gate 	size_t blksz;
18407c478bd9Sstevel@tonic-gate 	u_offset_t blkoff;
18417c478bd9Sstevel@tonic-gate 	int dora, err;
18427c478bd9Sstevel@tonic-gate 	page_t *pagefound;
18437c478bd9Sstevel@tonic-gate 	uint_t xlen;
18447c478bd9Sstevel@tonic-gate 	size_t adj_klustsize;
18457c478bd9Sstevel@tonic-gate 	u_offset_t size;
18467c478bd9Sstevel@tonic-gate 	u_offset_t tmpoff;
18477c478bd9Sstevel@tonic-gate 
18487c478bd9Sstevel@tonic-gate 	sp = VTOS(vp);
18497c478bd9Sstevel@tonic-gate 	TRACE_3(TR_FAC_SPECFS, TR_SPECFS_GETAPAGE,
18507c478bd9Sstevel@tonic-gate 	    "specfs getapage:vp %p off %llx snode %p", vp, off, sp);
18517c478bd9Sstevel@tonic-gate reread:
18527c478bd9Sstevel@tonic-gate 
18537c478bd9Sstevel@tonic-gate 	err = 0;
18547c478bd9Sstevel@tonic-gate 	bp = NULL;
18557c478bd9Sstevel@tonic-gate 	pp = NULL;
18567c478bd9Sstevel@tonic-gate 	pp2 = NULL;
18577c478bd9Sstevel@tonic-gate 
18587c478bd9Sstevel@tonic-gate 	if (pl != NULL)
18597c478bd9Sstevel@tonic-gate 		pl[0] = NULL;
18607c478bd9Sstevel@tonic-gate 
18617c478bd9Sstevel@tonic-gate 	size = SPEC_SIZE(VTOS(sp->s_commonvp));
18627c478bd9Sstevel@tonic-gate 
18637c478bd9Sstevel@tonic-gate 	if (spec_ra && sp->s_nextr == off)
18647c478bd9Sstevel@tonic-gate 		dora = 1;
18657c478bd9Sstevel@tonic-gate 	else
18667c478bd9Sstevel@tonic-gate 		dora = 0;
18677c478bd9Sstevel@tonic-gate 
18687c478bd9Sstevel@tonic-gate 	if (size == UNKNOWN_SIZE) {
18697c478bd9Sstevel@tonic-gate 		dora = 0;
18707c478bd9Sstevel@tonic-gate 		adj_klustsize = PAGESIZE;
18717c478bd9Sstevel@tonic-gate 	} else {
18727c478bd9Sstevel@tonic-gate 		adj_klustsize = dora ? klustsize : PAGESIZE;
18737c478bd9Sstevel@tonic-gate 	}
18747c478bd9Sstevel@tonic-gate 
18757c478bd9Sstevel@tonic-gate again:
18767c478bd9Sstevel@tonic-gate 	if ((pagefound = page_exists(vp, off)) == NULL) {
18777c478bd9Sstevel@tonic-gate 		if (rw == S_CREATE) {
18787c478bd9Sstevel@tonic-gate 			/*
18797c478bd9Sstevel@tonic-gate 			 * We're allocating a swap slot and it's
18807c478bd9Sstevel@tonic-gate 			 * associated page was not found, so allocate
18817c478bd9Sstevel@tonic-gate 			 * and return it.
18827c478bd9Sstevel@tonic-gate 			 */
18837c478bd9Sstevel@tonic-gate 			if ((pp = page_create_va(vp, off,
18847c478bd9Sstevel@tonic-gate 			    PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
18857c478bd9Sstevel@tonic-gate 				panic("spec_getapage: page_create");
18867c478bd9Sstevel@tonic-gate 				/*NOTREACHED*/
18877c478bd9Sstevel@tonic-gate 			}
18887c478bd9Sstevel@tonic-gate 			io_len1 = PAGESIZE;
18897c478bd9Sstevel@tonic-gate 			sp->s_nextr = off + PAGESIZE;
18907c478bd9Sstevel@tonic-gate 		} else {
18917c478bd9Sstevel@tonic-gate 			/*
18927c478bd9Sstevel@tonic-gate 			 * Need to really do disk I/O to get the page(s).
18937c478bd9Sstevel@tonic-gate 			 */
18947c478bd9Sstevel@tonic-gate 			blkoff = (off / adj_klustsize) * adj_klustsize;
18957c478bd9Sstevel@tonic-gate 			if (size == UNKNOWN_SIZE) {
18967c478bd9Sstevel@tonic-gate 				blksz = PAGESIZE;
18977c478bd9Sstevel@tonic-gate 			} else {
18987c478bd9Sstevel@tonic-gate 				if (blkoff + adj_klustsize <= size)
18997c478bd9Sstevel@tonic-gate 					blksz = adj_klustsize;
19007c478bd9Sstevel@tonic-gate 				else
19017c478bd9Sstevel@tonic-gate 					blksz =
19027c478bd9Sstevel@tonic-gate 					    MIN(size - blkoff, adj_klustsize);
19037c478bd9Sstevel@tonic-gate 			}
19047c478bd9Sstevel@tonic-gate 
19057c478bd9Sstevel@tonic-gate 			pp = pvn_read_kluster(vp, off, seg, addr, &tmpoff,
19067c478bd9Sstevel@tonic-gate 			    &io_len1, blkoff, blksz, 0);
19077c478bd9Sstevel@tonic-gate 			io_off1 = tmpoff;
19087c478bd9Sstevel@tonic-gate 			/*
19097c478bd9Sstevel@tonic-gate 			 * Make sure the page didn't sneek into the
19107c478bd9Sstevel@tonic-gate 			 * cache while we blocked in pvn_read_kluster.
19117c478bd9Sstevel@tonic-gate 			 */
19127c478bd9Sstevel@tonic-gate 			if (pp == NULL)
19137c478bd9Sstevel@tonic-gate 				goto again;
19147c478bd9Sstevel@tonic-gate 
19157c478bd9Sstevel@tonic-gate 			/*
19167c478bd9Sstevel@tonic-gate 			 * Zero part of page which we are not
19177c478bd9Sstevel@tonic-gate 			 * going to be reading from disk now.
19187c478bd9Sstevel@tonic-gate 			 */
19197c478bd9Sstevel@tonic-gate 			xlen = (uint_t)(io_len1 & PAGEOFFSET);
19207c478bd9Sstevel@tonic-gate 			if (xlen != 0)
19217c478bd9Sstevel@tonic-gate 				pagezero(pp->p_prev, xlen, PAGESIZE - xlen);
19227c478bd9Sstevel@tonic-gate 
19237c478bd9Sstevel@tonic-gate 			bp = spec_startio(vp, pp, io_off1, io_len1,
19247c478bd9Sstevel@tonic-gate 			    pl == NULL ? (B_ASYNC | B_READ) : B_READ);
19257c478bd9Sstevel@tonic-gate 			sp->s_nextr = io_off1 + io_len1;
19267c478bd9Sstevel@tonic-gate 		}
19277c478bd9Sstevel@tonic-gate 	}
19287c478bd9Sstevel@tonic-gate 
19297c478bd9Sstevel@tonic-gate 	if (dora && rw != S_CREATE) {
19307c478bd9Sstevel@tonic-gate 		u_offset_t off2;
19317c478bd9Sstevel@tonic-gate 		caddr_t addr2;
19327c478bd9Sstevel@tonic-gate 
19337c478bd9Sstevel@tonic-gate 		off2 = ((off / adj_klustsize) + 1) * adj_klustsize;
19347c478bd9Sstevel@tonic-gate 		addr2 = addr + (off2 - off);
19357c478bd9Sstevel@tonic-gate 
19367c478bd9Sstevel@tonic-gate 		pp2 = NULL;
19377c478bd9Sstevel@tonic-gate 		/*
19387c478bd9Sstevel@tonic-gate 		 * If we are past EOF then don't bother trying
19397c478bd9Sstevel@tonic-gate 		 * with read-ahead.
19407c478bd9Sstevel@tonic-gate 		 */
19417c478bd9Sstevel@tonic-gate 		if (off2 >= size)
19427c478bd9Sstevel@tonic-gate 			pp2 = NULL;
19437c478bd9Sstevel@tonic-gate 		else {
19447c478bd9Sstevel@tonic-gate 			if (off2 + adj_klustsize <= size)
19457c478bd9Sstevel@tonic-gate 				blksz = adj_klustsize;
19467c478bd9Sstevel@tonic-gate 			else
19477c478bd9Sstevel@tonic-gate 				blksz = MIN(size - off2, adj_klustsize);
19487c478bd9Sstevel@tonic-gate 
19497c478bd9Sstevel@tonic-gate 			pp2 = pvn_read_kluster(vp, off2, seg, addr2, &tmpoff,
19507c478bd9Sstevel@tonic-gate 			    &io_len2, off2, blksz, 1);
19517c478bd9Sstevel@tonic-gate 			io_off2 = tmpoff;
19527c478bd9Sstevel@tonic-gate 		}
19537c478bd9Sstevel@tonic-gate 
19547c478bd9Sstevel@tonic-gate 		if (pp2 != NULL) {
19557c478bd9Sstevel@tonic-gate 			/*
19567c478bd9Sstevel@tonic-gate 			 * Zero part of page which we are not
19577c478bd9Sstevel@tonic-gate 			 * going to be reading from disk now.
19587c478bd9Sstevel@tonic-gate 			 */
19597c478bd9Sstevel@tonic-gate 			xlen = (uint_t)(io_len2 & PAGEOFFSET);
19607c478bd9Sstevel@tonic-gate 			if (xlen != 0)
19617c478bd9Sstevel@tonic-gate 				pagezero(pp2->p_prev, xlen, PAGESIZE - xlen);
19627c478bd9Sstevel@tonic-gate 
19637c478bd9Sstevel@tonic-gate 			(void) spec_startio(vp, pp2, io_off2, io_len2,
19647c478bd9Sstevel@tonic-gate 			    B_READ | B_ASYNC);
19657c478bd9Sstevel@tonic-gate 		}
19667c478bd9Sstevel@tonic-gate 	}
19677c478bd9Sstevel@tonic-gate 
19687c478bd9Sstevel@tonic-gate 	if (pl == NULL)
19697c478bd9Sstevel@tonic-gate 		return (err);
19707c478bd9Sstevel@tonic-gate 
19717c478bd9Sstevel@tonic-gate 	if (bp != NULL) {
19727c478bd9Sstevel@tonic-gate 		err = biowait(bp);
19737c478bd9Sstevel@tonic-gate 		pageio_done(bp);
19747c478bd9Sstevel@tonic-gate 
19757c478bd9Sstevel@tonic-gate 		if (err) {
19767c478bd9Sstevel@tonic-gate 			if (pp != NULL)
19777c478bd9Sstevel@tonic-gate 				pvn_read_done(pp, B_ERROR);
19787c478bd9Sstevel@tonic-gate 			return (err);
19797c478bd9Sstevel@tonic-gate 		}
19807c478bd9Sstevel@tonic-gate 	}
19817c478bd9Sstevel@tonic-gate 
19827c478bd9Sstevel@tonic-gate 	if (pagefound) {
19837c478bd9Sstevel@tonic-gate 		se_t se = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
19847c478bd9Sstevel@tonic-gate 		/*
19857c478bd9Sstevel@tonic-gate 		 * Page exists in the cache, acquire the appropriate
19867c478bd9Sstevel@tonic-gate 		 * lock.  If this fails, start all over again.
19877c478bd9Sstevel@tonic-gate 		 */
19887c478bd9Sstevel@tonic-gate 
19897c478bd9Sstevel@tonic-gate 		if ((pp = page_lookup(vp, off, se)) == NULL) {
19907c478bd9Sstevel@tonic-gate 			spec_lostpage++;
19917c478bd9Sstevel@tonic-gate 			goto reread;
19927c478bd9Sstevel@tonic-gate 		}
19937c478bd9Sstevel@tonic-gate 		pl[0] = pp;
19947c478bd9Sstevel@tonic-gate 		pl[1] = NULL;
19957c478bd9Sstevel@tonic-gate 
19967c478bd9Sstevel@tonic-gate 		sp->s_nextr = off + PAGESIZE;
19977c478bd9Sstevel@tonic-gate 		return (0);
19987c478bd9Sstevel@tonic-gate 	}
19997c478bd9Sstevel@tonic-gate 
20007c478bd9Sstevel@tonic-gate 	if (pp != NULL)
20017c478bd9Sstevel@tonic-gate 		pvn_plist_init(pp, pl, plsz, off, io_len1, rw);
20027c478bd9Sstevel@tonic-gate 	return (0);
20037c478bd9Sstevel@tonic-gate }
20047c478bd9Sstevel@tonic-gate 
20057c478bd9Sstevel@tonic-gate /*
20067c478bd9Sstevel@tonic-gate  * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED, B_FORCE}.
20077c478bd9Sstevel@tonic-gate  * If len == 0, do from off to EOF.
20087c478bd9Sstevel@tonic-gate  *
20097c478bd9Sstevel@tonic-gate  * The normal cases should be len == 0 & off == 0 (entire vp list),
20107c478bd9Sstevel@tonic-gate  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
20117c478bd9Sstevel@tonic-gate  * (from pageout).
20127c478bd9Sstevel@tonic-gate  */
2013da6c28aaSamw /*ARGSUSED5*/
20147c478bd9Sstevel@tonic-gate int
20157c478bd9Sstevel@tonic-gate spec_putpage(
20167c478bd9Sstevel@tonic-gate 	struct vnode *vp,
20177c478bd9Sstevel@tonic-gate 	offset_t	off,
20187c478bd9Sstevel@tonic-gate 	size_t		len,
20197c478bd9Sstevel@tonic-gate 	int		flags,
2020da6c28aaSamw 	struct cred	*cr,
2021da6c28aaSamw 	caller_context_t *ct)
20227c478bd9Sstevel@tonic-gate {
20237c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
20247c478bd9Sstevel@tonic-gate 	struct vnode *cvp;
20257c478bd9Sstevel@tonic-gate 	page_t *pp;
20267c478bd9Sstevel@tonic-gate 	u_offset_t io_off;
20277c478bd9Sstevel@tonic-gate 	size_t io_len = 0;	/* for lint */
20287c478bd9Sstevel@tonic-gate 	int err = 0;
20297c478bd9Sstevel@tonic-gate 	u_offset_t size;
20307c478bd9Sstevel@tonic-gate 	u_offset_t tmpoff;
20317c478bd9Sstevel@tonic-gate 
20327c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_count != 0);
20337c478bd9Sstevel@tonic-gate 
20347c478bd9Sstevel@tonic-gate 	if (vp->v_flag & VNOMAP)
20357c478bd9Sstevel@tonic-gate 		return (ENOSYS);
20367c478bd9Sstevel@tonic-gate 
20377c478bd9Sstevel@tonic-gate 	cvp = sp->s_commonvp;
20387c478bd9Sstevel@tonic-gate 	size = SPEC_SIZE(VTOS(cvp));
20397c478bd9Sstevel@tonic-gate 
20407c478bd9Sstevel@tonic-gate 	if (!vn_has_cached_data(vp) || off >= size)
20417c478bd9Sstevel@tonic-gate 		return (0);
20427c478bd9Sstevel@tonic-gate 
20437c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_type == VBLK && cvp == vp);
20447c478bd9Sstevel@tonic-gate 	TRACE_4(TR_FAC_SPECFS, TR_SPECFS_PUTPAGE,
20457c478bd9Sstevel@tonic-gate 	    "specfs putpage:vp %p off %llx len %ld snode %p",
20467c478bd9Sstevel@tonic-gate 	    vp, off, len, sp);
20477c478bd9Sstevel@tonic-gate 
20487c478bd9Sstevel@tonic-gate 	if (len == 0) {
20497c478bd9Sstevel@tonic-gate 		/*
20507c478bd9Sstevel@tonic-gate 		 * Search the entire vp list for pages >= off.
20517c478bd9Sstevel@tonic-gate 		 */
20527c478bd9Sstevel@tonic-gate 		err = pvn_vplist_dirty(vp, off, spec_putapage,
20537c478bd9Sstevel@tonic-gate 		    flags, cr);
20547c478bd9Sstevel@tonic-gate 	} else {
20557c478bd9Sstevel@tonic-gate 		u_offset_t eoff;
20567c478bd9Sstevel@tonic-gate 
20577c478bd9Sstevel@tonic-gate 		/*
20587c478bd9Sstevel@tonic-gate 		 * Loop over all offsets in the range [off...off + len]
20597c478bd9Sstevel@tonic-gate 		 * looking for pages to deal with.  We set limits so
20607c478bd9Sstevel@tonic-gate 		 * that we kluster to klustsize boundaries.
20617c478bd9Sstevel@tonic-gate 		 */
20627c478bd9Sstevel@tonic-gate 		eoff = off + len;
20637c478bd9Sstevel@tonic-gate 		for (io_off = off; io_off < eoff && io_off < size;
20647c478bd9Sstevel@tonic-gate 		    io_off += io_len) {
20657c478bd9Sstevel@tonic-gate 			/*
20667c478bd9Sstevel@tonic-gate 			 * If we are not invalidating, synchronously
20677c478bd9Sstevel@tonic-gate 			 * freeing or writing pages use the routine
20687c478bd9Sstevel@tonic-gate 			 * page_lookup_nowait() to prevent reclaiming
20697c478bd9Sstevel@tonic-gate 			 * them from the free list.
20707c478bd9Sstevel@tonic-gate 			 */
20717c478bd9Sstevel@tonic-gate 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
20727c478bd9Sstevel@tonic-gate 				pp = page_lookup(vp, io_off,
20737c478bd9Sstevel@tonic-gate 				    (flags & (B_INVAL | B_FREE)) ?
20747c478bd9Sstevel@tonic-gate 				    SE_EXCL : SE_SHARED);
20757c478bd9Sstevel@tonic-gate 			} else {
20767c478bd9Sstevel@tonic-gate 				pp = page_lookup_nowait(vp, io_off,
20777c478bd9Sstevel@tonic-gate 				    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
20787c478bd9Sstevel@tonic-gate 			}
20797c478bd9Sstevel@tonic-gate 
20807c478bd9Sstevel@tonic-gate 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
20817c478bd9Sstevel@tonic-gate 				io_len = PAGESIZE;
20827c478bd9Sstevel@tonic-gate 			else {
20837c478bd9Sstevel@tonic-gate 				err = spec_putapage(vp, pp, &tmpoff, &io_len,
20847c478bd9Sstevel@tonic-gate 				    flags, cr);
20857c478bd9Sstevel@tonic-gate 				io_off = tmpoff;
20867c478bd9Sstevel@tonic-gate 				if (err != 0)
20877c478bd9Sstevel@tonic-gate 					break;
20887c478bd9Sstevel@tonic-gate 				/*
20897c478bd9Sstevel@tonic-gate 				 * "io_off" and "io_len" are returned as
20907c478bd9Sstevel@tonic-gate 				 * the range of pages we actually wrote.
20917c478bd9Sstevel@tonic-gate 				 * This allows us to skip ahead more quickly
20927c478bd9Sstevel@tonic-gate 				 * since several pages may've been dealt
20937c478bd9Sstevel@tonic-gate 				 * with by this iteration of the loop.
20947c478bd9Sstevel@tonic-gate 				 */
20957c478bd9Sstevel@tonic-gate 			}
20967c478bd9Sstevel@tonic-gate 		}
20977c478bd9Sstevel@tonic-gate 	}
20987c478bd9Sstevel@tonic-gate 	return (err);
20997c478bd9Sstevel@tonic-gate }
21007c478bd9Sstevel@tonic-gate 
21017c478bd9Sstevel@tonic-gate 
21027c478bd9Sstevel@tonic-gate /*
21037c478bd9Sstevel@tonic-gate  * Write out a single page, possibly klustering adjacent
21047c478bd9Sstevel@tonic-gate  * dirty pages.
21057c478bd9Sstevel@tonic-gate  */
21067c478bd9Sstevel@tonic-gate /*ARGSUSED5*/
21077c478bd9Sstevel@tonic-gate static int
21087c478bd9Sstevel@tonic-gate spec_putapage(
21097c478bd9Sstevel@tonic-gate 	struct vnode	*vp,
21107c478bd9Sstevel@tonic-gate 	page_t		*pp,
21117c478bd9Sstevel@tonic-gate 	u_offset_t	*offp,		/* return value */
21127c478bd9Sstevel@tonic-gate 	size_t		*lenp,		/* return value */
21137c478bd9Sstevel@tonic-gate 	int		flags,
21147c478bd9Sstevel@tonic-gate 	struct cred	*cr)
21157c478bd9Sstevel@tonic-gate {
21167c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
21177c478bd9Sstevel@tonic-gate 	u_offset_t io_off;
21187c478bd9Sstevel@tonic-gate 	size_t io_len;
21197c478bd9Sstevel@tonic-gate 	size_t blksz;
21207c478bd9Sstevel@tonic-gate 	u_offset_t blkoff;
21217c478bd9Sstevel@tonic-gate 	int err = 0;
21227c478bd9Sstevel@tonic-gate 	struct buf *bp;
21237c478bd9Sstevel@tonic-gate 	u_offset_t size;
21247c478bd9Sstevel@tonic-gate 	size_t adj_klustsize;
21257c478bd9Sstevel@tonic-gate 	u_offset_t tmpoff;
21267c478bd9Sstevel@tonic-gate 
21277c478bd9Sstevel@tonic-gate 	/*
21287c478bd9Sstevel@tonic-gate 	 * Destroy read ahead value since we are really going to write.
21297c478bd9Sstevel@tonic-gate 	 */
21307c478bd9Sstevel@tonic-gate 	sp->s_nextr = 0;
21317c478bd9Sstevel@tonic-gate 	size = SPEC_SIZE(VTOS(sp->s_commonvp));
21327c478bd9Sstevel@tonic-gate 
21337c478bd9Sstevel@tonic-gate 	adj_klustsize = klustsize;
21347c478bd9Sstevel@tonic-gate 
21357c478bd9Sstevel@tonic-gate 	blkoff = (pp->p_offset / adj_klustsize) * adj_klustsize;
21367c478bd9Sstevel@tonic-gate 
21377c478bd9Sstevel@tonic-gate 	if (blkoff + adj_klustsize <= size)
21387c478bd9Sstevel@tonic-gate 		blksz = adj_klustsize;
21397c478bd9Sstevel@tonic-gate 	else
21407c478bd9Sstevel@tonic-gate 		blksz = size - blkoff;
21417c478bd9Sstevel@tonic-gate 
21427c478bd9Sstevel@tonic-gate 	/*
21437c478bd9Sstevel@tonic-gate 	 * Find a kluster that fits in one contiguous chunk.
21447c478bd9Sstevel@tonic-gate 	 */
21457c478bd9Sstevel@tonic-gate 	pp = pvn_write_kluster(vp, pp, &tmpoff, &io_len, blkoff,
21467c478bd9Sstevel@tonic-gate 	    blksz, flags);
21477c478bd9Sstevel@tonic-gate 	io_off = tmpoff;
21487c478bd9Sstevel@tonic-gate 
21497c478bd9Sstevel@tonic-gate 	/*
21507c478bd9Sstevel@tonic-gate 	 * Check for page length rounding problems
21517c478bd9Sstevel@tonic-gate 	 * XXX - Is this necessary?
21527c478bd9Sstevel@tonic-gate 	 */
21537c478bd9Sstevel@tonic-gate 	if (io_off + io_len > size) {
21547c478bd9Sstevel@tonic-gate 		ASSERT((io_off + io_len) - size < PAGESIZE);
21557c478bd9Sstevel@tonic-gate 		io_len = size - io_off;
21567c478bd9Sstevel@tonic-gate 	}
21577c478bd9Sstevel@tonic-gate 
21587c478bd9Sstevel@tonic-gate 	bp = spec_startio(vp, pp, io_off, io_len, B_WRITE | flags);
21597c478bd9Sstevel@tonic-gate 
21607c478bd9Sstevel@tonic-gate 	/*
21617c478bd9Sstevel@tonic-gate 	 * Wait for i/o to complete if the request is not B_ASYNC.
21627c478bd9Sstevel@tonic-gate 	 */
21637c478bd9Sstevel@tonic-gate 	if ((flags & B_ASYNC) == 0) {
21647c478bd9Sstevel@tonic-gate 		err = biowait(bp);
21657c478bd9Sstevel@tonic-gate 		pageio_done(bp);
21667c478bd9Sstevel@tonic-gate 		pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
21677c478bd9Sstevel@tonic-gate 	}
21687c478bd9Sstevel@tonic-gate 
21697c478bd9Sstevel@tonic-gate 	if (offp)
21707c478bd9Sstevel@tonic-gate 		*offp = io_off;
21717c478bd9Sstevel@tonic-gate 	if (lenp)
21727c478bd9Sstevel@tonic-gate 		*lenp = io_len;
21737c478bd9Sstevel@tonic-gate 	TRACE_4(TR_FAC_SPECFS, TR_SPECFS_PUTAPAGE,
21747c478bd9Sstevel@tonic-gate 	    "specfs putapage:vp %p offp %p snode %p err %d",
21757c478bd9Sstevel@tonic-gate 	    vp, offp, sp, err);
21767c478bd9Sstevel@tonic-gate 	return (err);
21777c478bd9Sstevel@tonic-gate }
21787c478bd9Sstevel@tonic-gate 
21797c478bd9Sstevel@tonic-gate /*
21807c478bd9Sstevel@tonic-gate  * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED}
21817c478bd9Sstevel@tonic-gate  */
21827c478bd9Sstevel@tonic-gate static struct buf *
21837c478bd9Sstevel@tonic-gate spec_startio(
21847c478bd9Sstevel@tonic-gate 	struct vnode *vp,
21857c478bd9Sstevel@tonic-gate 	page_t		*pp,
21867c478bd9Sstevel@tonic-gate 	u_offset_t	io_off,
21877c478bd9Sstevel@tonic-gate 	size_t		io_len,
21887c478bd9Sstevel@tonic-gate 	int		flags)
21897c478bd9Sstevel@tonic-gate {
21907c478bd9Sstevel@tonic-gate 	struct buf *bp;
21917c478bd9Sstevel@tonic-gate 
21927c478bd9Sstevel@tonic-gate 	bp = pageio_setup(pp, io_len, vp, flags);
21937c478bd9Sstevel@tonic-gate 
21947c478bd9Sstevel@tonic-gate 	bp->b_edev = vp->v_rdev;
21957c478bd9Sstevel@tonic-gate 	bp->b_dev = cmpdev(vp->v_rdev);
21967c478bd9Sstevel@tonic-gate 	bp->b_blkno = btodt(io_off);
21977c478bd9Sstevel@tonic-gate 	bp->b_un.b_addr = (caddr_t)0;
21987c478bd9Sstevel@tonic-gate 
21997c478bd9Sstevel@tonic-gate 	(void) bdev_strategy(bp);
22007c478bd9Sstevel@tonic-gate 
22017c478bd9Sstevel@tonic-gate 	if (flags & B_READ)
22027c478bd9Sstevel@tonic-gate 		lwp_stat_update(LWP_STAT_INBLK, 1);
22037c478bd9Sstevel@tonic-gate 	else
22047c478bd9Sstevel@tonic-gate 		lwp_stat_update(LWP_STAT_OUBLK, 1);
22057c478bd9Sstevel@tonic-gate 
22067c478bd9Sstevel@tonic-gate 	return (bp);
22077c478bd9Sstevel@tonic-gate }
22087c478bd9Sstevel@tonic-gate 
22097c478bd9Sstevel@tonic-gate static int
22107c478bd9Sstevel@tonic-gate spec_poll(
22117c478bd9Sstevel@tonic-gate 	struct vnode	*vp,
22127c478bd9Sstevel@tonic-gate 	short		events,
22137c478bd9Sstevel@tonic-gate 	int		anyyet,
22147c478bd9Sstevel@tonic-gate 	short		*reventsp,
2215da6c28aaSamw 	struct pollhead **phpp,
2216da6c28aaSamw 	caller_context_t *ct)
22177c478bd9Sstevel@tonic-gate {
22187c478bd9Sstevel@tonic-gate 	dev_t dev;
22197c478bd9Sstevel@tonic-gate 	int error;
22207c478bd9Sstevel@tonic-gate 
22217c478bd9Sstevel@tonic-gate 	if (vp->v_type == VBLK)
2222da6c28aaSamw 		error = fs_poll(vp, events, anyyet, reventsp, phpp, ct);
22237c478bd9Sstevel@tonic-gate 	else {
22247c478bd9Sstevel@tonic-gate 		ASSERT(vp->v_type == VCHR);
22257c478bd9Sstevel@tonic-gate 		dev = vp->v_rdev;
2226349dcea3SGarrett D'Amore 		if (vp->v_stream) {
22277c478bd9Sstevel@tonic-gate 			ASSERT(vp->v_stream != NULL);
22287c478bd9Sstevel@tonic-gate 			error = strpoll(vp->v_stream, events, anyyet,
22297c478bd9Sstevel@tonic-gate 			    reventsp, phpp);
22307c478bd9Sstevel@tonic-gate 		} else if (devopsp[getmajor(dev)]->devo_cb_ops->cb_chpoll) {
22317c478bd9Sstevel@tonic-gate 			error = cdev_poll(dev, events, anyyet, reventsp, phpp);
22327c478bd9Sstevel@tonic-gate 		} else {
2233da6c28aaSamw 			error = fs_poll(vp, events, anyyet, reventsp, phpp, ct);
22347c478bd9Sstevel@tonic-gate 		}
22357c478bd9Sstevel@tonic-gate 	}
22367c478bd9Sstevel@tonic-gate 	return (error);
22377c478bd9Sstevel@tonic-gate }
22387c478bd9Sstevel@tonic-gate 
22397c478bd9Sstevel@tonic-gate /*
22407c478bd9Sstevel@tonic-gate  * This routine is called through the cdevsw[] table to handle
22417c478bd9Sstevel@tonic-gate  * traditional mmap'able devices that support a d_mmap function.
22427c478bd9Sstevel@tonic-gate  */
22437c478bd9Sstevel@tonic-gate /*ARGSUSED8*/
22447c478bd9Sstevel@tonic-gate int
22457c478bd9Sstevel@tonic-gate spec_segmap(
22467c478bd9Sstevel@tonic-gate 	dev_t dev,
22477c478bd9Sstevel@tonic-gate 	off_t off,
22487c478bd9Sstevel@tonic-gate 	struct as *as,
22497c478bd9Sstevel@tonic-gate 	caddr_t *addrp,
22507c478bd9Sstevel@tonic-gate 	off_t len,
22517c478bd9Sstevel@tonic-gate 	uint_t prot,
22527c478bd9Sstevel@tonic-gate 	uint_t maxprot,
22537c478bd9Sstevel@tonic-gate 	uint_t flags,
22547c478bd9Sstevel@tonic-gate 	struct cred *cred)
22557c478bd9Sstevel@tonic-gate {
22567c478bd9Sstevel@tonic-gate 	struct segdev_crargs dev_a;
22577c478bd9Sstevel@tonic-gate 	int (*mapfunc)(dev_t dev, off_t off, int prot);
22587c478bd9Sstevel@tonic-gate 	size_t i;
22597c478bd9Sstevel@tonic-gate 	int	error;
22607c478bd9Sstevel@tonic-gate 
22617c478bd9Sstevel@tonic-gate 	if ((mapfunc = devopsp[getmajor(dev)]->devo_cb_ops->cb_mmap) == nodev)
22627c478bd9Sstevel@tonic-gate 		return (ENODEV);
22637c478bd9Sstevel@tonic-gate 	TRACE_4(TR_FAC_SPECFS, TR_SPECFS_SEGMAP,
22647c478bd9Sstevel@tonic-gate 	    "specfs segmap:dev %x as %p len %lx prot %x",
22657c478bd9Sstevel@tonic-gate 	    dev, as, len, prot);
22667c478bd9Sstevel@tonic-gate 
22677c478bd9Sstevel@tonic-gate 	/*
22687c478bd9Sstevel@tonic-gate 	 * Character devices that support the d_mmap
22697c478bd9Sstevel@tonic-gate 	 * interface can only be mmap'ed shared.
22707c478bd9Sstevel@tonic-gate 	 */
22717c478bd9Sstevel@tonic-gate 	if ((flags & MAP_TYPE) != MAP_SHARED)
22727c478bd9Sstevel@tonic-gate 		return (EINVAL);
22737c478bd9Sstevel@tonic-gate 
22747c478bd9Sstevel@tonic-gate 	/*
22757c478bd9Sstevel@tonic-gate 	 * Check to ensure that the entire range is
22767c478bd9Sstevel@tonic-gate 	 * legal and we are not trying to map in
22777c478bd9Sstevel@tonic-gate 	 * more than the device will let us.
22787c478bd9Sstevel@tonic-gate 	 */
22797c478bd9Sstevel@tonic-gate 	for (i = 0; i < len; i += PAGESIZE) {
22807c478bd9Sstevel@tonic-gate 		if (cdev_mmap(mapfunc, dev, off + i, maxprot) == -1)
22817c478bd9Sstevel@tonic-gate 			return (ENXIO);
22827c478bd9Sstevel@tonic-gate 	}
22837c478bd9Sstevel@tonic-gate 
22847c478bd9Sstevel@tonic-gate 	as_rangelock(as);
228560946fe0Smec 	/* Pick an address w/o worrying about any vac alignment constraints. */
228660946fe0Smec 	error = choose_addr(as, addrp, len, off, ADDR_NOVACALIGN, flags);
228760946fe0Smec 	if (error != 0) {
22887c478bd9Sstevel@tonic-gate 		as_rangeunlock(as);
228960946fe0Smec 		return (error);
22907c478bd9Sstevel@tonic-gate 	}
22917c478bd9Sstevel@tonic-gate 
22927c478bd9Sstevel@tonic-gate 	dev_a.mapfunc = mapfunc;
22937c478bd9Sstevel@tonic-gate 	dev_a.dev = dev;
22947c478bd9Sstevel@tonic-gate 	dev_a.offset = off;
22957c478bd9Sstevel@tonic-gate 	dev_a.prot = (uchar_t)prot;
22967c478bd9Sstevel@tonic-gate 	dev_a.maxprot = (uchar_t)maxprot;
22977c478bd9Sstevel@tonic-gate 	dev_a.hat_flags = 0;
22987c478bd9Sstevel@tonic-gate 	dev_a.hat_attr = 0;
22997c478bd9Sstevel@tonic-gate 	dev_a.devmap_data = NULL;
23007c478bd9Sstevel@tonic-gate 
23017c478bd9Sstevel@tonic-gate 	error = as_map(as, *addrp, len, segdev_create, &dev_a);
23027c478bd9Sstevel@tonic-gate 	as_rangeunlock(as);
23037c478bd9Sstevel@tonic-gate 	return (error);
23047c478bd9Sstevel@tonic-gate }
23057c478bd9Sstevel@tonic-gate 
23067c478bd9Sstevel@tonic-gate int
23077c478bd9Sstevel@tonic-gate spec_char_map(
23087c478bd9Sstevel@tonic-gate 	dev_t dev,
23097c478bd9Sstevel@tonic-gate 	offset_t off,
23107c478bd9Sstevel@tonic-gate 	struct as *as,
23117c478bd9Sstevel@tonic-gate 	caddr_t *addrp,
23127c478bd9Sstevel@tonic-gate 	size_t len,
23137c478bd9Sstevel@tonic-gate 	uchar_t prot,
23147c478bd9Sstevel@tonic-gate 	uchar_t maxprot,
23157c478bd9Sstevel@tonic-gate 	uint_t flags,
23167c478bd9Sstevel@tonic-gate 	struct cred *cred)
23177c478bd9Sstevel@tonic-gate {
23187c478bd9Sstevel@tonic-gate 	int error = 0;
23197c478bd9Sstevel@tonic-gate 	major_t maj = getmajor(dev);
23207c478bd9Sstevel@tonic-gate 	int map_flag;
23217c478bd9Sstevel@tonic-gate 	int (*segmap)(dev_t, off_t, struct as *,
23227c478bd9Sstevel@tonic-gate 	    caddr_t *, off_t, uint_t, uint_t, uint_t, cred_t *);
23237c478bd9Sstevel@tonic-gate 	int (*devmap)(dev_t, devmap_cookie_t, offset_t,
23247c478bd9Sstevel@tonic-gate 	    size_t, size_t *, uint_t);
23257c478bd9Sstevel@tonic-gate 	int (*mmap)(dev_t dev, off_t off, int prot);
23267c478bd9Sstevel@tonic-gate 
23277c478bd9Sstevel@tonic-gate 	/*
23287c478bd9Sstevel@tonic-gate 	 * Character device: let the device driver
23297c478bd9Sstevel@tonic-gate 	 * pick the appropriate segment driver.
23307c478bd9Sstevel@tonic-gate 	 *
23317c478bd9Sstevel@tonic-gate 	 * 4.x compat.: allow 'NULL' cb_segmap => spec_segmap
23327c478bd9Sstevel@tonic-gate 	 * Kindness: allow 'nulldev' cb_segmap => spec_segmap
23337c478bd9Sstevel@tonic-gate 	 */
23347c478bd9Sstevel@tonic-gate 	segmap = devopsp[maj]->devo_cb_ops->cb_segmap;
23357c478bd9Sstevel@tonic-gate 	if (segmap == NULL || segmap == nulldev || segmap == nodev) {
23367c478bd9Sstevel@tonic-gate 		mmap = devopsp[maj]->devo_cb_ops->cb_mmap;
23377c478bd9Sstevel@tonic-gate 		map_flag = devopsp[maj]->devo_cb_ops->cb_flag;
23387c478bd9Sstevel@tonic-gate 
23397c478bd9Sstevel@tonic-gate 		/*
23407c478bd9Sstevel@tonic-gate 		 * Use old mmap framework if the driver has both mmap
23417c478bd9Sstevel@tonic-gate 		 * and devmap entry points.  This is to prevent the
23427c478bd9Sstevel@tonic-gate 		 * system from calling invalid devmap entry point
23437c478bd9Sstevel@tonic-gate 		 * for some drivers that might have put garbage in the
23447c478bd9Sstevel@tonic-gate 		 * devmap entry point.
23457c478bd9Sstevel@tonic-gate 		 */
23467c478bd9Sstevel@tonic-gate 		if ((map_flag & D_DEVMAP) || mmap == NULL ||
23477c478bd9Sstevel@tonic-gate 		    mmap == nulldev || mmap == nodev) {
23487c478bd9Sstevel@tonic-gate 			devmap = devopsp[maj]->devo_cb_ops->cb_devmap;
23497c478bd9Sstevel@tonic-gate 
23507c478bd9Sstevel@tonic-gate 			/*
23517c478bd9Sstevel@tonic-gate 			 * If driver provides devmap entry point in
23527c478bd9Sstevel@tonic-gate 			 * cb_ops but not xx_segmap(9E), call
23537c478bd9Sstevel@tonic-gate 			 * devmap_setup with default settings
23547c478bd9Sstevel@tonic-gate 			 * (NULL) for callback_ops and driver
23557c478bd9Sstevel@tonic-gate 			 * callback private data
23567c478bd9Sstevel@tonic-gate 			 */
23577c478bd9Sstevel@tonic-gate 			if (devmap == nodev || devmap == NULL ||
23587c478bd9Sstevel@tonic-gate 			    devmap == nulldev)
23597c478bd9Sstevel@tonic-gate 				return (ENODEV);
23607c478bd9Sstevel@tonic-gate 
23617c478bd9Sstevel@tonic-gate 			error = devmap_setup(dev, off, as, addrp,
23627c478bd9Sstevel@tonic-gate 			    len, prot, maxprot, flags, cred);
23637c478bd9Sstevel@tonic-gate 
23647c478bd9Sstevel@tonic-gate 			return (error);
23657c478bd9Sstevel@tonic-gate 		} else
23667c478bd9Sstevel@tonic-gate 			segmap = spec_segmap;
23677c478bd9Sstevel@tonic-gate 	} else
23687c478bd9Sstevel@tonic-gate 		segmap = cdev_segmap;
23697c478bd9Sstevel@tonic-gate 
23707c478bd9Sstevel@tonic-gate 	return ((*segmap)(dev, (off_t)off, as, addrp, len, prot,
23717c478bd9Sstevel@tonic-gate 	    maxprot, flags, cred));
23727c478bd9Sstevel@tonic-gate }
23737c478bd9Sstevel@tonic-gate 
2374da6c28aaSamw /*ARGSUSED9*/
23757c478bd9Sstevel@tonic-gate static int
23767c478bd9Sstevel@tonic-gate spec_map(
23777c478bd9Sstevel@tonic-gate 	struct vnode *vp,
23787c478bd9Sstevel@tonic-gate 	offset_t off,
23797c478bd9Sstevel@tonic-gate 	struct as *as,
23807c478bd9Sstevel@tonic-gate 	caddr_t *addrp,
23817c478bd9Sstevel@tonic-gate 	size_t len,
23827c478bd9Sstevel@tonic-gate 	uchar_t prot,
23837c478bd9Sstevel@tonic-gate 	uchar_t maxprot,
23847c478bd9Sstevel@tonic-gate 	uint_t flags,
2385da6c28aaSamw 	struct cred *cred,
2386da6c28aaSamw 	caller_context_t *ct)
23877c478bd9Sstevel@tonic-gate {
23887c478bd9Sstevel@tonic-gate 	int error = 0;
238925e8c5aaSvikram 	struct snode *sp = VTOS(vp);
23907c478bd9Sstevel@tonic-gate 
23917c478bd9Sstevel@tonic-gate 	if (vp->v_flag & VNOMAP)
23927c478bd9Sstevel@tonic-gate 		return (ENOSYS);
23937c478bd9Sstevel@tonic-gate 
239425e8c5aaSvikram 	/* fail map with ENXIO if the device is fenced off */
239525e8c5aaSvikram 	if (S_ISFENCED(sp))
239625e8c5aaSvikram 		return (ENXIO);
239725e8c5aaSvikram 
23987c478bd9Sstevel@tonic-gate 	/*
23997c478bd9Sstevel@tonic-gate 	 * If file is locked, fail mapping attempt.
24007c478bd9Sstevel@tonic-gate 	 */
24017c478bd9Sstevel@tonic-gate 	if (vn_has_flocks(vp))
24027c478bd9Sstevel@tonic-gate 		return (EAGAIN);
24037c478bd9Sstevel@tonic-gate 
24047c478bd9Sstevel@tonic-gate 	if (vp->v_type == VCHR) {
24057c478bd9Sstevel@tonic-gate 		return (spec_char_map(vp->v_rdev, off, as, addrp, len, prot,
24067c478bd9Sstevel@tonic-gate 		    maxprot, flags, cred));
24077c478bd9Sstevel@tonic-gate 	} else if (vp->v_type == VBLK) {
24087c478bd9Sstevel@tonic-gate 		struct segvn_crargs vn_a;
24097c478bd9Sstevel@tonic-gate 		struct vnode *cvp;
24107c478bd9Sstevel@tonic-gate 		struct snode *sp;
24117c478bd9Sstevel@tonic-gate 
24127c478bd9Sstevel@tonic-gate 		/*
24137c478bd9Sstevel@tonic-gate 		 * Block device, use segvn mapping to the underlying commonvp
24147c478bd9Sstevel@tonic-gate 		 * for pages.
24157c478bd9Sstevel@tonic-gate 		 */
24167c478bd9Sstevel@tonic-gate 		if (off > spec_maxoffset(vp))
24170dee76a0Speterte 			return (ENXIO);
24187c478bd9Sstevel@tonic-gate 
24197c478bd9Sstevel@tonic-gate 		sp = VTOS(vp);
24207c478bd9Sstevel@tonic-gate 		cvp = sp->s_commonvp;
24217c478bd9Sstevel@tonic-gate 		ASSERT(cvp != NULL);
24227c478bd9Sstevel@tonic-gate 
2423ae115bc7Smrj 		if (off < 0 || ((offset_t)(off + len) < 0))
24240dee76a0Speterte 			return (ENXIO);
24257c478bd9Sstevel@tonic-gate 
24267c478bd9Sstevel@tonic-gate 		as_rangelock(as);
242760946fe0Smec 		error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
242860946fe0Smec 		if (error != 0) {
24297c478bd9Sstevel@tonic-gate 			as_rangeunlock(as);
243060946fe0Smec 			return (error);
24317c478bd9Sstevel@tonic-gate 		}
24327c478bd9Sstevel@tonic-gate 
24337c478bd9Sstevel@tonic-gate 		vn_a.vp = cvp;
24347c478bd9Sstevel@tonic-gate 		vn_a.offset = off;
24357c478bd9Sstevel@tonic-gate 		vn_a.type = flags & MAP_TYPE;
24367c478bd9Sstevel@tonic-gate 		vn_a.prot = (uchar_t)prot;
24377c478bd9Sstevel@tonic-gate 		vn_a.maxprot = (uchar_t)maxprot;
24387c478bd9Sstevel@tonic-gate 		vn_a.flags = flags & ~MAP_TYPE;
24397c478bd9Sstevel@tonic-gate 		vn_a.cred = cred;
24407c478bd9Sstevel@tonic-gate 		vn_a.amp = NULL;
24417c478bd9Sstevel@tonic-gate 		vn_a.szc = 0;
24427c478bd9Sstevel@tonic-gate 		vn_a.lgrp_mem_policy_flags = 0;
24437c478bd9Sstevel@tonic-gate 
24447c478bd9Sstevel@tonic-gate 		error = as_map(as, *addrp, len, segvn_create, &vn_a);
24457c478bd9Sstevel@tonic-gate 		as_rangeunlock(as);
24467c478bd9Sstevel@tonic-gate 	} else
24477c478bd9Sstevel@tonic-gate 		return (ENODEV);
24487c478bd9Sstevel@tonic-gate 
24497c478bd9Sstevel@tonic-gate 	return (error);
24507c478bd9Sstevel@tonic-gate }
24517c478bd9Sstevel@tonic-gate 
24527c478bd9Sstevel@tonic-gate /*ARGSUSED1*/
24537c478bd9Sstevel@tonic-gate static int
24547c478bd9Sstevel@tonic-gate spec_addmap(
24557c478bd9Sstevel@tonic-gate 	struct vnode *vp,	/* the common vnode */
24567c478bd9Sstevel@tonic-gate 	offset_t off,
24577c478bd9Sstevel@tonic-gate 	struct as *as,
24587c478bd9Sstevel@tonic-gate 	caddr_t addr,
24597c478bd9Sstevel@tonic-gate 	size_t len,		/* how many bytes to add */
24607c478bd9Sstevel@tonic-gate 	uchar_t prot,
24617c478bd9Sstevel@tonic-gate 	uchar_t maxprot,
24627c478bd9Sstevel@tonic-gate 	uint_t flags,
2463da6c28aaSamw 	struct cred *cred,
2464da6c28aaSamw 	caller_context_t *ct)
24657c478bd9Sstevel@tonic-gate {
24667c478bd9Sstevel@tonic-gate 	int error = 0;
24677c478bd9Sstevel@tonic-gate 	struct snode *csp = VTOS(vp);
24687c478bd9Sstevel@tonic-gate 	ulong_t npages;
24697c478bd9Sstevel@tonic-gate 
24707c478bd9Sstevel@tonic-gate 	ASSERT(vp != NULL && VTOS(vp)->s_commonvp == vp);
24717c478bd9Sstevel@tonic-gate 
24727c478bd9Sstevel@tonic-gate 	/*
24737c478bd9Sstevel@tonic-gate 	 * XXX	Given the above assertion, this might not
24747c478bd9Sstevel@tonic-gate 	 *	be a particularly sensible thing to test.
24757c478bd9Sstevel@tonic-gate 	 */
24767c478bd9Sstevel@tonic-gate 	if (vp->v_flag & VNOMAP)
24777c478bd9Sstevel@tonic-gate 		return (ENOSYS);
24787c478bd9Sstevel@tonic-gate 
247925e8c5aaSvikram 	/* fail with EIO if the device is fenced off */
248025e8c5aaSvikram 	if (S_ISFENCED(csp))
248125e8c5aaSvikram 		return (EIO);
248225e8c5aaSvikram 
24837c478bd9Sstevel@tonic-gate 	npages = btopr(len);
24847c478bd9Sstevel@tonic-gate 	LOCK_CSP(csp);
24857c478bd9Sstevel@tonic-gate 	csp->s_mapcnt += npages;
24867c478bd9Sstevel@tonic-gate 
24877c478bd9Sstevel@tonic-gate 	UNLOCK_CSP(csp);
24887c478bd9Sstevel@tonic-gate 	return (error);
24897c478bd9Sstevel@tonic-gate }
24907c478bd9Sstevel@tonic-gate 
24917c478bd9Sstevel@tonic-gate /*ARGSUSED1*/
24927c478bd9Sstevel@tonic-gate static int
24937c478bd9Sstevel@tonic-gate spec_delmap(
24947c478bd9Sstevel@tonic-gate 	struct vnode *vp,	/* the common vnode */
24957c478bd9Sstevel@tonic-gate 	offset_t off,
24967c478bd9Sstevel@tonic-gate 	struct as *as,
24977c478bd9Sstevel@tonic-gate 	caddr_t addr,
24987c478bd9Sstevel@tonic-gate 	size_t len,		/* how many bytes to take away */
24997c478bd9Sstevel@tonic-gate 	uint_t prot,
25007c478bd9Sstevel@tonic-gate 	uint_t maxprot,
25017c478bd9Sstevel@tonic-gate 	uint_t flags,
2502da6c28aaSamw 	struct cred *cred,
2503da6c28aaSamw 	caller_context_t *ct)
25047c478bd9Sstevel@tonic-gate {
25057c478bd9Sstevel@tonic-gate 	struct snode *csp = VTOS(vp);
25067c478bd9Sstevel@tonic-gate 	ulong_t npages;
25077c478bd9Sstevel@tonic-gate 	long mcnt;
25087c478bd9Sstevel@tonic-gate 
25097c478bd9Sstevel@tonic-gate 	/* segdev passes us the common vp */
25107c478bd9Sstevel@tonic-gate 
25117c478bd9Sstevel@tonic-gate 	ASSERT(vp != NULL && VTOS(vp)->s_commonvp == vp);
25127c478bd9Sstevel@tonic-gate 
251325e8c5aaSvikram 	/* allow delmap to succeed even if device fenced off */
251425e8c5aaSvikram 
25157c478bd9Sstevel@tonic-gate 	/*
25167c478bd9Sstevel@tonic-gate 	 * XXX	Given the above assertion, this might not
25177c478bd9Sstevel@tonic-gate 	 *	be a particularly sensible thing to test..
25187c478bd9Sstevel@tonic-gate 	 */
25197c478bd9Sstevel@tonic-gate 	if (vp->v_flag & VNOMAP)
25207c478bd9Sstevel@tonic-gate 		return (ENOSYS);
25217c478bd9Sstevel@tonic-gate 
25227c478bd9Sstevel@tonic-gate 	npages = btopr(len);
25237c478bd9Sstevel@tonic-gate 
25247c478bd9Sstevel@tonic-gate 	LOCK_CSP(csp);
25257c478bd9Sstevel@tonic-gate 	mutex_enter(&csp->s_lock);
25267c478bd9Sstevel@tonic-gate 	mcnt = (csp->s_mapcnt -= npages);
25277c478bd9Sstevel@tonic-gate 
25287c478bd9Sstevel@tonic-gate 	if (mcnt == 0) {
25297c478bd9Sstevel@tonic-gate 		/*
25307c478bd9Sstevel@tonic-gate 		 * Call the close routine when the last reference of any
25317c478bd9Sstevel@tonic-gate 		 * kind through any [s, v]node goes away.  The s_dip hold
25327c478bd9Sstevel@tonic-gate 		 * on the devinfo node is released when the vnode is
25337c478bd9Sstevel@tonic-gate 		 * destroyed.
25347c478bd9Sstevel@tonic-gate 		 */
25357c478bd9Sstevel@tonic-gate 		if (csp->s_count == 0) {
25367c478bd9Sstevel@tonic-gate 			csp->s_flag &= ~(SNEEDCLOSE | SSIZEVALID);
25377c478bd9Sstevel@tonic-gate 
25387c478bd9Sstevel@tonic-gate 			/* See comment in spec_close() */
25397c478bd9Sstevel@tonic-gate 			if (csp->s_flag & (SCLONE | SSELFCLONE))
25407c478bd9Sstevel@tonic-gate 				csp->s_flag &= ~SDIPSET;
25417c478bd9Sstevel@tonic-gate 
25427c478bd9Sstevel@tonic-gate 			mutex_exit(&csp->s_lock);
25437c478bd9Sstevel@tonic-gate 
25447c478bd9Sstevel@tonic-gate 			(void) device_close(vp, 0, cred);
25457c478bd9Sstevel@tonic-gate 		} else
25467c478bd9Sstevel@tonic-gate 			mutex_exit(&csp->s_lock);
25477c478bd9Sstevel@tonic-gate 
25487c478bd9Sstevel@tonic-gate 		mutex_enter(&csp->s_lock);
25497c478bd9Sstevel@tonic-gate 	}
25507c478bd9Sstevel@tonic-gate 	ASSERT(mcnt >= 0);
25517c478bd9Sstevel@tonic-gate 
25527c478bd9Sstevel@tonic-gate 	UNLOCK_CSP_LOCK_HELD(csp);
25537c478bd9Sstevel@tonic-gate 	mutex_exit(&csp->s_lock);
25547c478bd9Sstevel@tonic-gate 
25557c478bd9Sstevel@tonic-gate 	return (0);
25567c478bd9Sstevel@tonic-gate }
25577c478bd9Sstevel@tonic-gate 
2558da6c28aaSamw /*ARGSUSED4*/
25597c478bd9Sstevel@tonic-gate static int
2560da6c28aaSamw spec_dump(
2561da6c28aaSamw 	struct vnode *vp,
2562da6c28aaSamw 	caddr_t addr,
2563d7334e51Srm15945 	offset_t bn,
2564d7334e51Srm15945 	offset_t count,
2565da6c28aaSamw 	caller_context_t *ct)
25667c478bd9Sstevel@tonic-gate {
256725e8c5aaSvikram 	/* allow dump to succeed even if device fenced off */
256825e8c5aaSvikram 
25697c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_type == VBLK);
2570d7334e51Srm15945 	return (bdev_dump(vp->v_rdev, addr, (daddr_t)bn, (int)count));
25717c478bd9Sstevel@tonic-gate }
25727c478bd9Sstevel@tonic-gate 
25737c478bd9Sstevel@tonic-gate 
25747c478bd9Sstevel@tonic-gate /*
25757c478bd9Sstevel@tonic-gate  * Do i/o on the given page list from/to vp, io_off for io_len.
25767c478bd9Sstevel@tonic-gate  * Flags are composed of:
25777c478bd9Sstevel@tonic-gate  * 	{B_ASYNC, B_INVAL, B_FREE, B_DONTNEED, B_READ, B_WRITE}
25787c478bd9Sstevel@tonic-gate  * If B_ASYNC is not set i/o is waited for.
25797c478bd9Sstevel@tonic-gate  */
25807c478bd9Sstevel@tonic-gate /*ARGSUSED5*/
25817c478bd9Sstevel@tonic-gate static int
25827c478bd9Sstevel@tonic-gate spec_pageio(
25837c478bd9Sstevel@tonic-gate 	struct vnode *vp,
25847c478bd9Sstevel@tonic-gate 	page_t	*pp,
25857c478bd9Sstevel@tonic-gate 	u_offset_t io_off,
25867c478bd9Sstevel@tonic-gate 	size_t	io_len,
25877c478bd9Sstevel@tonic-gate 	int	flags,
2588da6c28aaSamw 	cred_t	*cr,
2589da6c28aaSamw 	caller_context_t *ct)
25907c478bd9Sstevel@tonic-gate {
25917c478bd9Sstevel@tonic-gate 	struct buf *bp = NULL;
25927c478bd9Sstevel@tonic-gate 	int err = 0;
25937c478bd9Sstevel@tonic-gate 
25947c478bd9Sstevel@tonic-gate 	if (pp == NULL)
25957c478bd9Sstevel@tonic-gate 		return (EINVAL);
25967c478bd9Sstevel@tonic-gate 
25977c478bd9Sstevel@tonic-gate 	bp = spec_startio(vp, pp, io_off, io_len, flags);
25987c478bd9Sstevel@tonic-gate 
25997c478bd9Sstevel@tonic-gate 	/*
26007c478bd9Sstevel@tonic-gate 	 * Wait for i/o to complete if the request is not B_ASYNC.
26017c478bd9Sstevel@tonic-gate 	 */
26027c478bd9Sstevel@tonic-gate 	if ((flags & B_ASYNC) == 0) {
26037c478bd9Sstevel@tonic-gate 		err = biowait(bp);
26047c478bd9Sstevel@tonic-gate 		pageio_done(bp);
26057c478bd9Sstevel@tonic-gate 	}
26067c478bd9Sstevel@tonic-gate 	return (err);
26077c478bd9Sstevel@tonic-gate }
26087c478bd9Sstevel@tonic-gate 
26097c478bd9Sstevel@tonic-gate /*
26107c478bd9Sstevel@tonic-gate  * Set ACL on underlying vnode if one exists, or return ENOSYS otherwise.
26117c478bd9Sstevel@tonic-gate  */
26127c478bd9Sstevel@tonic-gate int
2613da6c28aaSamw spec_setsecattr(
2614da6c28aaSamw 	struct vnode *vp,
2615da6c28aaSamw 	vsecattr_t *vsap,
2616da6c28aaSamw 	int flag,
2617da6c28aaSamw 	struct cred *cr,
2618da6c28aaSamw 	caller_context_t *ct)
26197c478bd9Sstevel@tonic-gate {
26207c478bd9Sstevel@tonic-gate 	struct vnode *realvp;
26217c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
26227c478bd9Sstevel@tonic-gate 	int error;
26237c478bd9Sstevel@tonic-gate 
262425e8c5aaSvikram 	/* fail with ENXIO if the device is fenced off */
262525e8c5aaSvikram 	if (S_ISFENCED(sp))
262625e8c5aaSvikram 		return (ENXIO);
262725e8c5aaSvikram 
26287c478bd9Sstevel@tonic-gate 	/*
26297c478bd9Sstevel@tonic-gate 	 * The acl(2) system calls VOP_RWLOCK on the file before setting an
26307c478bd9Sstevel@tonic-gate 	 * ACL, but since specfs does not serialize reads and writes, this
26317c478bd9Sstevel@tonic-gate 	 * VOP does not do anything.  However, some backing file systems may
26327c478bd9Sstevel@tonic-gate 	 * expect the lock to be held before setting an ACL, so it is taken
26337c478bd9Sstevel@tonic-gate 	 * here privately to avoid serializing specfs reads and writes.
26347c478bd9Sstevel@tonic-gate 	 */
26357c478bd9Sstevel@tonic-gate 	if ((realvp = sp->s_realvp) != NULL) {
2636da6c28aaSamw 		(void) VOP_RWLOCK(realvp, V_WRITELOCK_TRUE, ct);
2637da6c28aaSamw 		error = VOP_SETSECATTR(realvp, vsap, flag, cr, ct);
2638da6c28aaSamw 		(void) VOP_RWUNLOCK(realvp, V_WRITELOCK_TRUE, ct);
26397c478bd9Sstevel@tonic-gate 		return (error);
26407c478bd9Sstevel@tonic-gate 	} else
26417c478bd9Sstevel@tonic-gate 		return (fs_nosys());
26427c478bd9Sstevel@tonic-gate }
26437c478bd9Sstevel@tonic-gate 
26447c478bd9Sstevel@tonic-gate /*
26457c478bd9Sstevel@tonic-gate  * Get ACL from underlying vnode if one exists, or fabricate it from
26467c478bd9Sstevel@tonic-gate  * the permissions returned by spec_getattr() otherwise.
26477c478bd9Sstevel@tonic-gate  */
26487c478bd9Sstevel@tonic-gate int
2649da6c28aaSamw spec_getsecattr(
2650da6c28aaSamw 	struct vnode *vp,
2651da6c28aaSamw 	vsecattr_t *vsap,
2652da6c28aaSamw 	int flag,
2653da6c28aaSamw 	struct cred *cr,
2654da6c28aaSamw 	caller_context_t *ct)
26557c478bd9Sstevel@tonic-gate {
26567c478bd9Sstevel@tonic-gate 	struct vnode *realvp;
26577c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
26587c478bd9Sstevel@tonic-gate 
265925e8c5aaSvikram 	/* fail with ENXIO if the device is fenced off */
266025e8c5aaSvikram 	if (S_ISFENCED(sp))
266125e8c5aaSvikram 		return (ENXIO);
266225e8c5aaSvikram 
26637c478bd9Sstevel@tonic-gate 	if ((realvp = sp->s_realvp) != NULL)
2664da6c28aaSamw 		return (VOP_GETSECATTR(realvp, vsap, flag, cr, ct));
26657c478bd9Sstevel@tonic-gate 	else
2666da6c28aaSamw 		return (fs_fab_acl(vp, vsap, flag, cr, ct));
26677c478bd9Sstevel@tonic-gate }
26687c478bd9Sstevel@tonic-gate 
26697c478bd9Sstevel@tonic-gate int
2670da6c28aaSamw spec_pathconf(
2671da6c28aaSamw 	vnode_t *vp,
2672da6c28aaSamw 	int cmd,
2673da6c28aaSamw 	ulong_t *valp,
2674da6c28aaSamw 	cred_t *cr,
2675da6c28aaSamw 	caller_context_t *ct)
26767c478bd9Sstevel@tonic-gate {
26777c478bd9Sstevel@tonic-gate 	vnode_t *realvp;
26787c478bd9Sstevel@tonic-gate 	struct snode *sp = VTOS(vp);
26797c478bd9Sstevel@tonic-gate 
268025e8c5aaSvikram 	/* fail with ENXIO if the device is fenced off */
268125e8c5aaSvikram 	if (S_ISFENCED(sp))
268225e8c5aaSvikram 		return (ENXIO);
268325e8c5aaSvikram 
26847c478bd9Sstevel@tonic-gate 	if ((realvp = sp->s_realvp) != NULL)
2685da6c28aaSamw 		return (VOP_PATHCONF(realvp, cmd, valp, cr, ct));
26867c478bd9Sstevel@tonic-gate 	else
2687da6c28aaSamw 		return (fs_pathconf(vp, cmd, valp, cr, ct));
26887c478bd9Sstevel@tonic-gate }
2689