1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2017 by Delphix. All rights reserved.
26 * Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
27 */
28
29 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
30 /* All Rights Reserved */
31
32 /*
33 * University Copyright- Copyright (c) 1982, 1986, 1988
34 * The Regents of the University of California
35 * All Rights Reserved
36 *
37 * University Acknowledgment- Portions of this document are derived from
38 * software developed by the University of California, Berkeley, and its
39 * contributors.
40 */
41
42 #include <sys/types.h>
43 #include <sys/thread.h>
44 #include <sys/t_lock.h>
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/bitmap.h>
48 #include <sys/buf.h>
49 #include <sys/cmn_err.h>
50 #include <sys/conf.h>
51 #include <sys/ddi.h>
52 #include <sys/debug.h>
53 #include <sys/dkio.h>
54 #include <sys/errno.h>
55 #include <sys/time.h>
56 #include <sys/fcntl.h>
57 #include <sys/flock.h>
58 #include <sys/file.h>
59 #include <sys/kmem.h>
60 #include <sys/mman.h>
61 #include <sys/open.h>
62 #include <sys/swap.h>
63 #include <sys/sysmacros.h>
64 #include <sys/uio.h>
65 #include <sys/vfs.h>
66 #include <sys/vfs_opreg.h>
67 #include <sys/vnode.h>
68 #include <sys/stat.h>
69 #include <sys/poll.h>
70 #include <sys/stream.h>
71 #include <sys/strsubr.h>
72 #include <sys/policy.h>
73 #include <sys/devpolicy.h>
74
75 #include <sys/proc.h>
76 #include <sys/user.h>
77 #include <sys/session.h>
78 #include <sys/vmsystm.h>
79 #include <sys/vtrace.h>
80 #include <sys/pathname.h>
81
82 #include <sys/fs/snode.h>
83
84 #include <vm/seg.h>
85 #include <vm/seg_map.h>
86 #include <vm/page.h>
87 #include <vm/pvn.h>
88 #include <vm/seg_dev.h>
89 #include <vm/seg_vn.h>
90
91 #include <fs/fs_subr.h>
92
93 #include <sys/esunddi.h>
94 #include <sys/autoconf.h>
95 #include <sys/sunndi.h>
96 #include <sys/contract/device_impl.h>
97
98
99 static int spec_open(struct vnode **, int, struct cred *, caller_context_t *);
100 static int spec_close(struct vnode *, int, int, offset_t, struct cred *,
101 caller_context_t *);
102 static int spec_read(struct vnode *, struct uio *, int, struct cred *,
103 caller_context_t *);
104 static int spec_write(struct vnode *, struct uio *, int, struct cred *,
105 caller_context_t *);
106 static int spec_ioctl(struct vnode *, int, intptr_t, int, struct cred *, int *,
107 caller_context_t *);
108 static int spec_getattr(struct vnode *, struct vattr *, int, struct cred *,
109 caller_context_t *);
110 static int spec_setattr(struct vnode *, struct vattr *, int, struct cred *,
111 caller_context_t *);
112 static int spec_access(struct vnode *, int, int, struct cred *,
113 caller_context_t *);
114 static int spec_create(struct vnode *, char *, vattr_t *, enum vcexcl, int,
115 struct vnode **, struct cred *, int, caller_context_t *, vsecattr_t *);
116 static int spec_fsync(struct vnode *, int, struct cred *, caller_context_t *);
117 static void spec_inactive(struct vnode *, struct cred *, caller_context_t *);
118 static int spec_fid(struct vnode *, struct fid *, caller_context_t *);
119 static int spec_seek(struct vnode *, offset_t, offset_t *, caller_context_t *);
120 static int spec_frlock(struct vnode *, int, struct flock64 *, int, offset_t,
121 struct flk_callback *, struct cred *, caller_context_t *);
122 static int spec_realvp(struct vnode *, struct vnode **, caller_context_t *);
123
124 static int spec_getpage(struct vnode *, offset_t, size_t, uint_t *, page_t **,
125 size_t, struct seg *, caddr_t, enum seg_rw, struct cred *,
126 caller_context_t *);
127 static int spec_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, int,
128 struct cred *);
129 static struct buf *spec_startio(struct vnode *, page_t *, u_offset_t, size_t,
130 int);
131 static int spec_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
132 page_t **, size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
133 static int spec_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
134 uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
135 static int spec_addmap(struct vnode *, offset_t, struct as *, caddr_t, size_t,
136 uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
137 static int spec_delmap(struct vnode *, offset_t, struct as *, caddr_t, size_t,
138 uint_t, uint_t, uint_t, struct cred *, caller_context_t *);
139
140 static int spec_poll(struct vnode *, short, int, short *, struct pollhead **,
141 caller_context_t *);
142 static int spec_dump(struct vnode *, caddr_t, offset_t, offset_t,
143 caller_context_t *);
144 static int spec_pageio(struct vnode *, page_t *, u_offset_t, size_t, int,
145 cred_t *, caller_context_t *);
146
147 static int spec_getsecattr(struct vnode *, vsecattr_t *, int, struct cred *,
148 caller_context_t *);
149 static int spec_setsecattr(struct vnode *, vsecattr_t *, int, struct cred *,
150 caller_context_t *);
151 static int spec_pathconf(struct vnode *, int, ulong_t *, struct cred *,
152 caller_context_t *);
153
154 #define SN_HOLD(csp) { \
155 mutex_enter(&csp->s_lock); \
156 csp->s_count++; \
157 mutex_exit(&csp->s_lock); \
158 }
159
160 #define SN_RELE(csp) { \
161 mutex_enter(&csp->s_lock); \
162 csp->s_count--; \
163 ASSERT((csp->s_count > 0) || (csp->s_vnode->v_stream == NULL)); \
164 mutex_exit(&csp->s_lock); \
165 }
166
167 #define S_ISFENCED(sp) ((VTOS((sp)->s_commonvp))->s_flag & SFENCED)
168
169 struct vnodeops *spec_vnodeops;
170
171 /*
172 * *PLEASE NOTE*: If you add new entry points to specfs, do
173 * not forget to add support for fencing. A fenced snode
174 * is indicated by the SFENCED flag in the common snode.
175 * If a snode is fenced, determine if your entry point is
176 * a configuration operation (Example: open), a detection
177 * operation (Example: gettattr), an I/O operation (Example: ioctl())
178 * or an unconfiguration operation (Example: close). If it is
179 * a configuration or detection operation, fail the operation
180 * for a fenced snode with an ENXIO or EIO as appropriate. If
181 * it is any other operation, let it through.
182 */
183
184 const fs_operation_def_t spec_vnodeops_template[] = {
185 VOPNAME_OPEN, { .vop_open = spec_open },
186 VOPNAME_CLOSE, { .vop_close = spec_close },
187 VOPNAME_READ, { .vop_read = spec_read },
188 VOPNAME_WRITE, { .vop_write = spec_write },
189 VOPNAME_IOCTL, { .vop_ioctl = spec_ioctl },
190 VOPNAME_GETATTR, { .vop_getattr = spec_getattr },
191 VOPNAME_SETATTR, { .vop_setattr = spec_setattr },
192 VOPNAME_ACCESS, { .vop_access = spec_access },
193 VOPNAME_CREATE, { .vop_create = spec_create },
194 VOPNAME_FSYNC, { .vop_fsync = spec_fsync },
195 VOPNAME_INACTIVE, { .vop_inactive = spec_inactive },
196 VOPNAME_FID, { .vop_fid = spec_fid },
197 VOPNAME_SEEK, { .vop_seek = spec_seek },
198 VOPNAME_PATHCONF, { .vop_pathconf = spec_pathconf },
199 VOPNAME_FRLOCK, { .vop_frlock = spec_frlock },
200 VOPNAME_REALVP, { .vop_realvp = spec_realvp },
201 VOPNAME_GETPAGE, { .vop_getpage = spec_getpage },
202 VOPNAME_PUTPAGE, { .vop_putpage = spec_putpage },
203 VOPNAME_MAP, { .vop_map = spec_map },
204 VOPNAME_ADDMAP, { .vop_addmap = spec_addmap },
205 VOPNAME_DELMAP, { .vop_delmap = spec_delmap },
206 VOPNAME_POLL, { .vop_poll = spec_poll },
207 VOPNAME_DUMP, { .vop_dump = spec_dump },
208 VOPNAME_PAGEIO, { .vop_pageio = spec_pageio },
209 VOPNAME_SETSECATTR, { .vop_setsecattr = spec_setsecattr },
210 VOPNAME_GETSECATTR, { .vop_getsecattr = spec_getsecattr },
211 NULL, NULL
212 };
213
214 /*
215 * Return address of spec_vnodeops
216 */
217 struct vnodeops *
spec_getvnodeops(void)218 spec_getvnodeops(void)
219 {
220 return (spec_vnodeops);
221 }
222
223 extern vnode_t *rconsvp;
224
225 /*
226 * Acquire the serial lock on the common snode.
227 */
228 #define LOCK_CSP(csp) (void) spec_lockcsp(csp, 0, 1, 0)
229 #define LOCKHOLD_CSP_SIG(csp) spec_lockcsp(csp, 1, 1, 1)
230 #define SYNCHOLD_CSP_SIG(csp, intr) spec_lockcsp(csp, intr, 0, 1)
231
232 typedef enum {
233 LOOP,
234 INTR,
235 SUCCESS
236 } slock_ret_t;
237
238 /*
239 * Synchronize with active SLOCKED snode, optionally checking for a signal and
240 * optionally returning with SLOCKED set and SN_HOLD done. The 'intr'
241 * argument determines if the thread is interruptible by a signal while
242 * waiting, the function returns INTR if interrupted while there is another
243 * thread closing this snonde and LOOP if interrupted otherwise.
244 * When SUCCESS is returned the 'hold' argument determines if the open
245 * count (SN_HOLD) has been incremented and the 'setlock' argument
246 * determines if the function returns with SLOCKED set.
247 */
248 static slock_ret_t
spec_lockcsp(struct snode * csp,int intr,int setlock,int hold)249 spec_lockcsp(struct snode *csp, int intr, int setlock, int hold)
250 {
251 slock_ret_t ret = SUCCESS;
252 mutex_enter(&csp->s_lock);
253 while (csp->s_flag & SLOCKED) {
254 csp->s_flag |= SWANT;
255 if (intr) {
256 if (!cv_wait_sig(&csp->s_cv, &csp->s_lock)) {
257 if (csp->s_flag & SCLOSING)
258 ret = INTR;
259 else
260 ret = LOOP;
261 mutex_exit(&csp->s_lock);
262 return (ret); /* interrupted */
263 }
264 } else {
265 cv_wait(&csp->s_cv, &csp->s_lock);
266 }
267 }
268 if (setlock)
269 csp->s_flag |= SLOCKED;
270 if (hold)
271 csp->s_count++; /* one more open reference : SN_HOLD */
272 mutex_exit(&csp->s_lock);
273 return (ret); /* serialized/locked */
274 }
275
276 /*
277 * Unlock the serial lock on the common snode
278 */
279 #define UNLOCK_CSP_LOCK_HELD(csp) \
280 ASSERT(mutex_owned(&csp->s_lock)); \
281 if (csp->s_flag & SWANT) \
282 cv_broadcast(&csp->s_cv); \
283 csp->s_flag &= ~(SWANT|SLOCKED);
284
285 #define UNLOCK_CSP(csp) \
286 mutex_enter(&csp->s_lock); \
287 UNLOCK_CSP_LOCK_HELD(csp); \
288 mutex_exit(&csp->s_lock);
289
290 /*
291 * compute/return the size of the device
292 */
293 #define SPEC_SIZE(csp) \
294 (((csp)->s_flag & SSIZEVALID) ? (csp)->s_size : spec_size(csp))
295
296 /*
297 * Compute and return the size. If the size in the common snode is valid then
298 * return it. If not valid then get the size from the driver and set size in
299 * the common snode. If the device has not been attached then we don't ask for
300 * an update from the driver- for non-streams SSIZEVALID stays unset until the
301 * device is attached. A stat of a mknod outside /devices (non-devfs) may
302 * report UNKNOWN_SIZE because the device may not be attached yet (SDIPSET not
303 * established in mknod until open time). An stat in /devices will report the
304 * size correctly. Specfs should always call SPEC_SIZE instead of referring
305 * directly to s_size to initialize/retrieve the size of a device.
306 *
307 * XXX There is an inconsistency between block and raw - "unknown" is
308 * UNKNOWN_SIZE for VBLK and 0 for VCHR(raw).
309 */
310 static u_offset_t
spec_size(struct snode * csp)311 spec_size(struct snode *csp)
312 {
313 struct vnode *cvp = STOV(csp);
314 u_offset_t size;
315 int plen;
316 uint32_t size32;
317 dev_t dev;
318 dev_info_t *devi;
319 major_t maj;
320 uint_t blksize;
321 int blkshift;
322
323 ASSERT((csp)->s_commonvp == cvp); /* must be common node */
324
325 /* return cached value */
326 mutex_enter(&csp->s_lock);
327 if (csp->s_flag & SSIZEVALID) {
328 mutex_exit(&csp->s_lock);
329 return (csp->s_size);
330 }
331
332 /* VOP_GETATTR of mknod has not had devcnt restriction applied */
333 dev = cvp->v_rdev;
334 maj = getmajor(dev);
335 if (maj >= devcnt) {
336 /* return non-cached UNKNOWN_SIZE */
337 mutex_exit(&csp->s_lock);
338 return ((cvp->v_type == VCHR) ? 0 : UNKNOWN_SIZE);
339 }
340
341 /* establish cached zero size for streams */
342 if (STREAMSTAB(maj)) {
343 csp->s_size = 0;
344 csp->s_flag |= SSIZEVALID;
345 mutex_exit(&csp->s_lock);
346 return (0);
347 }
348
349 /*
350 * Return non-cached UNKNOWN_SIZE if not open.
351 *
352 * NB: This check is bogus, calling prop_op(9E) should be gated by
353 * attach, not open. Not having this check however opens up a new
354 * context under which a driver's prop_op(9E) could be called. Calling
355 * prop_op(9E) in this new context has been shown to expose latent
356 * driver bugs (insufficient NULL pointer checks that lead to panic).
357 * We are keeping this open check for now to avoid these panics.
358 */
359 if (csp->s_count == 0) {
360 mutex_exit(&csp->s_lock);
361 return ((cvp->v_type == VCHR) ? 0 : UNKNOWN_SIZE);
362 }
363
364 /* Return non-cached UNKNOWN_SIZE if not attached. */
365 if (((csp->s_flag & SDIPSET) == 0) || (csp->s_dip == NULL) ||
366 !i_ddi_devi_attached(csp->s_dip)) {
367 mutex_exit(&csp->s_lock);
368 return ((cvp->v_type == VCHR) ? 0 : UNKNOWN_SIZE);
369 }
370
371 devi = csp->s_dip;
372
373 /*
374 * Established cached size obtained from the attached driver. Since we
375 * know the devinfo node, for efficiency we use cdev_prop_op directly
376 * instead of [cb]dev_[Ss]size.
377 */
378 if (cvp->v_type == VCHR) {
379 size = 0;
380 plen = sizeof (size);
381 if (cdev_prop_op(dev, devi, PROP_LEN_AND_VAL_BUF,
382 DDI_PROP_NOTPROM | DDI_PROP_DONTPASS |
383 DDI_PROP_CONSUMER_TYPED, "Size", (caddr_t)&size,
384 &plen) != DDI_PROP_SUCCESS) {
385 plen = sizeof (size32);
386 if (cdev_prop_op(dev, devi, PROP_LEN_AND_VAL_BUF,
387 DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
388 "size", (caddr_t)&size32, &plen) ==
389 DDI_PROP_SUCCESS)
390 size = size32;
391 }
392 } else {
393 size = UNKNOWN_SIZE;
394 plen = sizeof (size);
395 if (cdev_prop_op(dev, devi, PROP_LEN_AND_VAL_BUF,
396 DDI_PROP_NOTPROM | DDI_PROP_DONTPASS |
397 DDI_PROP_CONSUMER_TYPED, "Nblocks", (caddr_t)&size,
398 &plen) != DDI_PROP_SUCCESS) {
399 plen = sizeof (size32);
400 if (cdev_prop_op(dev, devi, PROP_LEN_AND_VAL_BUF,
401 DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
402 "nblocks", (caddr_t)&size32, &plen) ==
403 DDI_PROP_SUCCESS)
404 size = size32;
405 }
406
407 if (size != UNKNOWN_SIZE) {
408 blksize = DEV_BSIZE; /* default */
409 plen = sizeof (blksize);
410
411 /* try to get dev_t specific "blksize" */
412 if (cdev_prop_op(dev, devi, PROP_LEN_AND_VAL_BUF,
413 DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
414 "blksize", (caddr_t)&blksize, &plen) !=
415 DDI_PROP_SUCCESS) {
416 /*
417 * Try for dev_info node "device-blksize".
418 * If this fails then blksize will still be
419 * DEV_BSIZE default value.
420 */
421 (void) cdev_prop_op(DDI_DEV_T_ANY, devi,
422 PROP_LEN_AND_VAL_BUF,
423 DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
424 "device-blksize", (caddr_t)&blksize, &plen);
425 }
426
427 /* blksize must be a power of two */
428 ASSERT(BIT_ONLYONESET(blksize));
429 blkshift = highbit(blksize) - 1;
430
431 /* convert from block size to byte size */
432 if (size < (MAXOFFSET_T >> blkshift))
433 size = size << blkshift;
434 else
435 size = UNKNOWN_SIZE;
436 }
437 }
438
439 csp->s_size = size;
440 csp->s_flag |= SSIZEVALID;
441
442 mutex_exit(&csp->s_lock);
443 return (size);
444 }
445
446 /*
447 * This function deals with vnode substitution in the case of
448 * device cloning.
449 */
450 static int
spec_clone(struct vnode ** vpp,dev_t newdev,int vtype,struct stdata * stp)451 spec_clone(struct vnode **vpp, dev_t newdev, int vtype, struct stdata *stp)
452 {
453 dev_t dev = (*vpp)->v_rdev;
454 major_t maj = getmajor(dev);
455 major_t newmaj = getmajor(newdev);
456 int sysclone = (maj == clone_major);
457 int qassociate_used = 0;
458 struct snode *oldsp, *oldcsp;
459 struct snode *newsp, *newcsp;
460 struct vnode *newvp, *newcvp;
461 dev_info_t *dip;
462 queue_t *dq;
463
464 ASSERT(dev != newdev);
465
466 /*
467 * Check for cloning across different drivers.
468 * We only support this under the system provided clone driver
469 */
470 if ((maj != newmaj) && !sysclone) {
471 cmn_err(CE_NOTE,
472 "unsupported clone open maj = %u, newmaj = %u",
473 maj, newmaj);
474 return (ENXIO);
475 }
476
477 /* old */
478 oldsp = VTOS(*vpp);
479 oldcsp = VTOS(oldsp->s_commonvp);
480
481 /* new */
482 newvp = makespecvp(newdev, vtype);
483 ASSERT(newvp != NULL);
484 newsp = VTOS(newvp);
485 newcvp = newsp->s_commonvp;
486 newcsp = VTOS(newcvp);
487
488 /*
489 * Clones inherit fsid, realvp, and dip.
490 * XXX realvp inherit is not occurring, does fstat of clone work?
491 */
492 newsp->s_fsid = oldsp->s_fsid;
493 if (sysclone) {
494 newsp->s_flag |= SCLONE;
495 dip = NULL;
496 } else {
497 newsp->s_flag |= SSELFCLONE;
498 dip = oldcsp->s_dip;
499 }
500
501 /*
502 * If we cloned to an opened newdev that already has called
503 * spec_assoc_vp_with_devi (SDIPSET set) then the association is
504 * already established.
505 */
506 if (!(newcsp->s_flag & SDIPSET)) {
507 /*
508 * Establish s_dip association for newdev.
509 *
510 * If we trusted the getinfo(9E) DDI_INFO_DEVT2INSTANCE
511 * implementation of all cloning drivers (SCLONE and SELFCLONE)
512 * we would always use e_ddi_hold_devi_by_dev(). We know that
513 * many drivers have had (still have?) problems with
514 * DDI_INFO_DEVT2INSTANCE, so we try to minimize reliance by
515 * detecting drivers that use QASSOCIATE (by looking down the
516 * stream) and setting their s_dip association to NULL.
517 */
518 qassociate_used = 0;
519 if (stp) {
520 for (dq = stp->sd_wrq; dq; dq = dq->q_next) {
521 if (_RD(dq)->q_flag & _QASSOCIATED) {
522 qassociate_used = 1;
523 dip = NULL;
524 break;
525 }
526 }
527 }
528
529 if (dip || qassociate_used) {
530 spec_assoc_vp_with_devi(newvp, dip);
531 } else {
532 /* derive association from newdev */
533 dip = e_ddi_hold_devi_by_dev(newdev, 0);
534 spec_assoc_vp_with_devi(newvp, dip);
535 if (dip)
536 ddi_release_devi(dip);
537 }
538 }
539
540 SN_HOLD(newcsp);
541
542 /* deal with stream stuff */
543 if (stp != NULL) {
544 LOCK_CSP(newcsp); /* synchronize stream open/close */
545 mutex_enter(&newcsp->s_lock);
546 newcvp->v_stream = newvp->v_stream = stp;
547 stp->sd_vnode = newcvp;
548 stp->sd_pvnode = newvp;
549 stp->sd_strtab = STREAMSTAB(newmaj);
550 mutex_exit(&newcsp->s_lock);
551 UNLOCK_CSP(newcsp);
552 }
553
554 /* substitute the vnode */
555 SN_RELE(oldcsp);
556 VN_RELE(*vpp);
557 *vpp = newvp;
558
559 return (0);
560 }
561
562 static int
spec_open(struct vnode ** vpp,int flag,struct cred * cr,caller_context_t * cc)563 spec_open(struct vnode **vpp, int flag, struct cred *cr, caller_context_t *cc)
564 {
565 major_t maj;
566 dev_t dev, newdev;
567 struct vnode *vp, *cvp;
568 struct snode *sp, *csp;
569 struct stdata *stp;
570 dev_info_t *dip;
571 int error, type;
572 contract_t *ct = NULL;
573 int open_returns_eintr;
574 slock_ret_t spec_locksp_ret;
575
576
577 flag &= ~FCREAT; /* paranoia */
578
579 vp = *vpp;
580 sp = VTOS(vp);
581 ASSERT((vp->v_type == VCHR) || (vp->v_type == VBLK));
582 if ((vp->v_type != VCHR) && (vp->v_type != VBLK))
583 return (ENXIO);
584
585 /*
586 * If the VFS_NODEVICES bit was set for the mount,
587 * do not allow opens of special devices.
588 */
589 if (sp->s_realvp && (sp->s_realvp->v_vfsp->vfs_flag & VFS_NODEVICES))
590 return (ENXIO);
591
592 newdev = dev = vp->v_rdev;
593
594 /*
595 * If we are opening a node that has not had spec_assoc_vp_with_devi
596 * called against it (mknod outside /devices or a non-dacf makespecvp
597 * node) then SDIPSET will not be set. In this case we call an
598 * interface which will reconstruct the path and lookup (drive attach)
599 * through devfs (e_ddi_hold_devi_by_dev -> e_ddi_hold_devi_by_path ->
600 * devfs_lookupname). For support of broken drivers that don't call
601 * ddi_create_minor_node for all minor nodes in their instance space,
602 * we call interfaces that operates at the directory/devinfo
603 * (major/instance) level instead of to the leaf/minor node level.
604 * After finding and attaching the dip we associate it with the
605 * common specfs vnode (s_dip), which sets SDIPSET. A DL_DETACH_REQ
606 * to style-2 stream driver may set s_dip to NULL with SDIPSET set.
607 *
608 * NOTE: Although e_ddi_hold_devi_by_dev takes a dev_t argument, its
609 * implementation operates at the major/instance level since it only
610 * need to return a dip.
611 */
612 cvp = sp->s_commonvp;
613 csp = VTOS(cvp);
614 if (!(csp->s_flag & SDIPSET)) {
615 /* try to attach, return error if we fail */
616 if ((dip = e_ddi_hold_devi_by_dev(dev, 0)) == NULL)
617 return (ENXIO);
618
619 /* associate dip with the common snode s_dip */
620 spec_assoc_vp_with_devi(vp, dip);
621 ddi_release_devi(dip); /* from e_ddi_hold_devi_by_dev */
622 }
623
624 /* check if device fenced off */
625 if (S_ISFENCED(sp))
626 return (ENXIO);
627
628 #ifdef DEBUG
629 /* verify attach/open exclusion guarantee */
630 dip = csp->s_dip;
631 ASSERT((dip == NULL) || i_ddi_devi_attached(dip));
632 #endif /* DEBUG */
633
634 if ((error = secpolicy_spec_open(cr, vp, flag)) != 0)
635 return (error);
636
637 /* Verify existance of open(9E) implementation. */
638 maj = getmajor(dev);
639 if ((maj >= devcnt) ||
640 (devopsp[maj]->devo_cb_ops == NULL) ||
641 (devopsp[maj]->devo_cb_ops->cb_open == NULL))
642 return (ENXIO);
643
644 /*
645 * split STREAMS vs. non-STREAMS
646 *
647 * If the device is a dual-personality device, then we might want
648 * to allow for a regular OTYP_BLK open. If however it's strictly
649 * a pure STREAMS device, the cb_open entry point will be
650 * nodev() which returns ENXIO. This does make this failure path
651 * somewhat longer, but such attempts to use OTYP_BLK with STREAMS
652 * devices should be exceedingly rare. (Most of the time they will
653 * be due to programmer error.)
654 */
655 if ((vp->v_type == VCHR) && (STREAMSTAB(maj)))
656 goto streams_open;
657
658 not_streams:
659 /*
660 * Wait for in progress last close to complete. This guarantees
661 * to the driver writer that we will never be in the drivers
662 * open and close on the same (dev_t, otype) at the same time.
663 * Open count already incremented (SN_HOLD) on non-zero return.
664 * The wait is interruptible by a signal if the driver sets the
665 * D_OPEN_RETURNS_EINTR cb_ops(9S) cb_flag or sets the
666 * ddi-open-returns-eintr(9P) property in its driver.conf.
667 */
668 if ((devopsp[maj]->devo_cb_ops->cb_flag & D_OPEN_RETURNS_EINTR) ||
669 (devnamesp[maj].dn_flags & DN_OPEN_RETURNS_EINTR))
670 open_returns_eintr = 1;
671 else
672 open_returns_eintr = 0;
673 while ((spec_locksp_ret = SYNCHOLD_CSP_SIG(csp, open_returns_eintr)) !=
674 SUCCESS) {
675 if (spec_locksp_ret == INTR)
676 return (EINTR);
677 }
678
679 /* non streams open */
680 type = (vp->v_type == VBLK ? OTYP_BLK : OTYP_CHR);
681 error = dev_open(&newdev, flag, type, cr);
682
683 /* deal with clone case */
684 if (error == 0 && dev != newdev) {
685 error = spec_clone(vpp, newdev, vp->v_type, NULL);
686 /*
687 * bail on clone failure, further processing
688 * results in undefined behaviors.
689 */
690 if (error != 0)
691 return (error);
692 sp = VTOS(*vpp);
693 csp = VTOS(sp->s_commonvp);
694 }
695
696 /*
697 * create contracts only for userland opens
698 * Successful open and cloning is done at this point.
699 */
700 if (error == 0 && !(flag & FKLYR)) {
701 int spec_type;
702 spec_type = (STOV(csp)->v_type == VCHR) ? S_IFCHR : S_IFBLK;
703 if (contract_device_open(newdev, spec_type, NULL) != 0) {
704 error = EIO;
705 }
706 }
707
708 if (error == 0) {
709 sp->s_size = SPEC_SIZE(csp);
710
711 if ((csp->s_flag & SNEEDCLOSE) == 0) {
712 int nmaj = getmajor(newdev);
713 mutex_enter(&csp->s_lock);
714 /* successful open needs a close later */
715 csp->s_flag |= SNEEDCLOSE;
716
717 /*
718 * Invalidate possible cached "unknown" size
719 * established by a VOP_GETATTR while open was in
720 * progress, and the driver might fail prop_op(9E).
721 */
722 if (((cvp->v_type == VCHR) && (csp->s_size == 0)) ||
723 ((cvp->v_type == VBLK) &&
724 (csp->s_size == UNKNOWN_SIZE)))
725 csp->s_flag &= ~SSIZEVALID;
726
727 if (devopsp[nmaj]->devo_cb_ops->cb_flag & D_64BIT)
728 csp->s_flag |= SLOFFSET;
729 if (devopsp[nmaj]->devo_cb_ops->cb_flag & D_U64BIT)
730 csp->s_flag |= SLOFFSET | SANYOFFSET;
731 mutex_exit(&csp->s_lock);
732 }
733 return (0);
734 }
735
736 /*
737 * Open failed. If we missed a close operation because
738 * we were trying to get the device open and it is the
739 * last in progress open that is failing then call close.
740 *
741 * NOTE: Only non-streams open has this race condition.
742 */
743 mutex_enter(&csp->s_lock);
744 csp->s_count--; /* decrement open count : SN_RELE */
745 if ((csp->s_count == 0) && /* no outstanding open */
746 (csp->s_mapcnt == 0) && /* no mapping */
747 (csp->s_flag & SNEEDCLOSE)) { /* need a close */
748 csp->s_flag &= ~(SNEEDCLOSE | SSIZEVALID);
749
750 /* See comment in spec_close() */
751 if (csp->s_flag & (SCLONE | SSELFCLONE))
752 csp->s_flag &= ~SDIPSET;
753
754 csp->s_flag |= SCLOSING;
755 mutex_exit(&csp->s_lock);
756
757 ASSERT(*vpp != NULL);
758 (void) device_close(*vpp, flag, cr);
759
760 mutex_enter(&csp->s_lock);
761 csp->s_flag &= ~SCLOSING;
762 mutex_exit(&csp->s_lock);
763 } else {
764 mutex_exit(&csp->s_lock);
765 }
766 return (error);
767
768 streams_open:
769 /*
770 * Lock common snode to prevent any new clone opens on this
771 * stream while one is in progress. This is necessary since
772 * the stream currently associated with the clone device will
773 * not be part of it after the clone open completes. Unfortunately
774 * we don't know in advance if this is a clone
775 * device so we have to lock all opens.
776 *
777 * If we fail, it's because of an interrupt - EINTR return is an
778 * expected aspect of opening a stream so we don't need to check
779 * D_OPEN_RETURNS_EINTR. Open count already incremented (SN_HOLD)
780 * on non-zero return.
781 */
782 if (LOCKHOLD_CSP_SIG(csp) != SUCCESS)
783 return (EINTR);
784
785 error = stropen(cvp, &newdev, flag, cr);
786 stp = cvp->v_stream;
787
788 /* deal with the clone case */
789 if ((error == 0) && (dev != newdev)) {
790 vp->v_stream = cvp->v_stream = NULL;
791 UNLOCK_CSP(csp);
792 error = spec_clone(vpp, newdev, vp->v_type, stp);
793 /*
794 * bail on clone failure, further processing
795 * results in undefined behaviors.
796 */
797 if (error != 0)
798 return (error);
799 sp = VTOS(*vpp);
800 csp = VTOS(sp->s_commonvp);
801 } else if (error == 0) {
802 vp->v_stream = stp;
803 UNLOCK_CSP(csp);
804 }
805
806 /*
807 * create contracts only for userland opens
808 * Successful open and cloning is done at this point.
809 */
810 if (error == 0 && !(flag & FKLYR)) {
811 /* STREAM is of type S_IFCHR */
812 if (contract_device_open(newdev, S_IFCHR, &ct) != 0) {
813 UNLOCK_CSP(csp);
814 (void) spec_close(vp, flag, 1, 0, cr, cc);
815 return (EIO);
816 }
817 }
818
819 if (error == 0) {
820 /* STREAMS devices don't have a size */
821 sp->s_size = csp->s_size = 0;
822
823 if (!(stp->sd_flag & STRISTTY) || (flag & FNOCTTY))
824 return (0);
825
826 /* try to allocate it as a controlling terminal */
827 if (strctty(stp) != EINTR)
828 return (0);
829
830 /* strctty() was interrupted by a signal */
831 if (ct) {
832 /* we only create contracts for userland opens */
833 ASSERT(ttoproc(curthread));
834 (void) contract_abandon(ct, ttoproc(curthread), 0);
835 }
836 (void) spec_close(vp, flag, 1, 0, cr, cc);
837 return (EINTR);
838 }
839
840 /*
841 * Deal with stropen failure.
842 *
843 * sd_flag in the stream head cannot change since the
844 * common snode is locked before the call to stropen().
845 */
846 if ((stp != NULL) && (stp->sd_flag & STREOPENFAIL)) {
847 /*
848 * Open failed part way through.
849 */
850 mutex_enter(&stp->sd_lock);
851 stp->sd_flag &= ~STREOPENFAIL;
852 mutex_exit(&stp->sd_lock);
853
854 UNLOCK_CSP(csp);
855 (void) spec_close(vp, flag, 1, 0, cr, cc);
856 } else {
857 UNLOCK_CSP(csp);
858 SN_RELE(csp);
859 }
860
861 /*
862 * Resolution for STREAMS vs. regular character device: If the
863 * STREAMS open(9e) returns ENOSTR, then try an ordinary device
864 * open instead.
865 */
866 if (error == ENOSTR) {
867 goto not_streams;
868 }
869 return (error);
870 }
871
872 /*ARGSUSED2*/
873 static int
spec_close(struct vnode * vp,int flag,int count,offset_t offset,struct cred * cr,caller_context_t * ct)874 spec_close(
875 struct vnode *vp,
876 int flag,
877 int count,
878 offset_t offset,
879 struct cred *cr,
880 caller_context_t *ct)
881 {
882 struct vnode *cvp;
883 struct snode *sp, *csp;
884 enum vtype type;
885 dev_t dev;
886 int error = 0;
887 int sysclone;
888
889 if (!(flag & FKLYR)) {
890 /* this only applies to closes of devices from userland */
891 cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
892 cleanshares(vp, ttoproc(curthread)->p_pid);
893 if (vp->v_stream)
894 strclean(vp);
895 }
896 if (count > 1)
897 return (0);
898
899 /* we allow close to succeed even if device is fenced off */
900 sp = VTOS(vp);
901 cvp = sp->s_commonvp;
902
903 dev = sp->s_dev;
904 type = vp->v_type;
905
906 ASSERT(type == VCHR || type == VBLK);
907
908 /*
909 * Prevent close/close and close/open races by serializing closes
910 * on this common snode. Clone opens are held up until after
911 * we have closed this device so the streams linkage is maintained
912 */
913 csp = VTOS(cvp);
914
915 LOCK_CSP(csp);
916 mutex_enter(&csp->s_lock);
917
918 csp->s_count--; /* one fewer open reference : SN_RELE */
919 sysclone = sp->s_flag & SCLONE;
920
921 /*
922 * Invalidate size on each close.
923 *
924 * XXX We do this on each close because we don't have interfaces that
925 * allow a driver to invalidate the size. Since clearing this on each
926 * close this causes property overhead we skip /dev/null and
927 * /dev/zero to avoid degrading kenbus performance.
928 */
929 if (getmajor(dev) != mm_major)
930 csp->s_flag &= ~SSIZEVALID;
931
932 /*
933 * Only call the close routine when the last open reference through
934 * any [s, v]node goes away. This can be checked by looking at
935 * s_count on the common vnode.
936 */
937 if ((csp->s_count == 0) && (csp->s_mapcnt == 0)) {
938 /* we don't need a close */
939 csp->s_flag &= ~(SNEEDCLOSE | SSIZEVALID);
940
941 /*
942 * A cloning driver may open-clone to the same dev_t that we
943 * are closing before spec_inactive destroys the common snode.
944 * If this occurs the s_dip association needs to be reevaluated.
945 * We clear SDIPSET to force reevaluation in this case. When
946 * reevaluation occurs (by spec_clone after open), if the
947 * devinfo association has changed then the old association
948 * will be released as the new association is established by
949 * spec_assoc_vp_with_devi().
950 */
951 if (csp->s_flag & (SCLONE | SSELFCLONE))
952 csp->s_flag &= ~SDIPSET;
953
954 csp->s_flag |= SCLOSING;
955 mutex_exit(&csp->s_lock);
956 error = device_close(vp, flag, cr);
957
958 /*
959 * Decrement the devops held in clnopen()
960 */
961 if (sysclone) {
962 ddi_rele_driver(getmajor(dev));
963 }
964 mutex_enter(&csp->s_lock);
965 csp->s_flag &= ~SCLOSING;
966 }
967
968 UNLOCK_CSP_LOCK_HELD(csp);
969 mutex_exit(&csp->s_lock);
970
971 return (error);
972 }
973
974 /*ARGSUSED2*/
975 static int
spec_read(struct vnode * vp,struct uio * uiop,int ioflag,struct cred * cr,caller_context_t * ct)976 spec_read(
977 struct vnode *vp,
978 struct uio *uiop,
979 int ioflag,
980 struct cred *cr,
981 caller_context_t *ct)
982 {
983 int error;
984 struct snode *sp = VTOS(vp);
985 dev_t dev = sp->s_dev;
986 size_t n;
987 ulong_t on;
988 u_offset_t bdevsize;
989 offset_t maxoff;
990 offset_t off;
991 struct vnode *blkvp;
992
993 ASSERT(vp->v_type == VCHR || vp->v_type == VBLK);
994
995 if (vp->v_stream) {
996 ASSERT(vp->v_type == VCHR);
997 smark(sp, SACC);
998 return (strread(vp, uiop, cr));
999 }
1000
1001 if (uiop->uio_resid == 0)
1002 return (0);
1003
1004 /*
1005 * Plain old character devices that set D_U64BIT can have
1006 * unrestricted offsets.
1007 */
1008 maxoff = spec_maxoffset(vp);
1009 ASSERT(maxoff != -1 || vp->v_type == VCHR);
1010
1011 if (maxoff != -1 && (uiop->uio_loffset < 0 ||
1012 uiop->uio_loffset + uiop->uio_resid > maxoff))
1013 return (EINVAL);
1014
1015 if (vp->v_type == VCHR) {
1016 smark(sp, SACC);
1017 ASSERT(vp->v_stream == NULL);
1018 return (cdev_read(dev, uiop, cr));
1019 }
1020
1021 /*
1022 * Block device.
1023 */
1024 error = 0;
1025 blkvp = sp->s_commonvp;
1026 bdevsize = SPEC_SIZE(VTOS(blkvp));
1027
1028 do {
1029 caddr_t base;
1030 offset_t diff;
1031
1032 off = uiop->uio_loffset & (offset_t)MAXBMASK;
1033 on = (size_t)(uiop->uio_loffset & MAXBOFFSET);
1034 n = (size_t)MIN(MAXBSIZE - on, uiop->uio_resid);
1035 diff = bdevsize - uiop->uio_loffset;
1036
1037 if (diff <= 0)
1038 break;
1039 if (diff < n)
1040 n = (size_t)diff;
1041
1042 if (vpm_enable) {
1043 error = vpm_data_copy(blkvp, (u_offset_t)(off + on),
1044 n, uiop, 1, NULL, 0, S_READ);
1045 } else {
1046 base = segmap_getmapflt(segkmap, blkvp,
1047 (u_offset_t)(off + on), n, 1, S_READ);
1048
1049 error = uiomove(base + on, n, UIO_READ, uiop);
1050 }
1051 if (!error) {
1052 int flags = 0;
1053 /*
1054 * If we read a whole block, we won't need this
1055 * buffer again soon.
1056 */
1057 if (n + on == MAXBSIZE)
1058 flags = SM_DONTNEED | SM_FREE;
1059 if (vpm_enable) {
1060 error = vpm_sync_pages(blkvp, off, n, flags);
1061 } else {
1062 error = segmap_release(segkmap, base, flags);
1063 }
1064 } else {
1065 if (vpm_enable) {
1066 (void) vpm_sync_pages(blkvp, off, n, 0);
1067 } else {
1068 (void) segmap_release(segkmap, base, 0);
1069 }
1070 if (bdevsize == UNKNOWN_SIZE) {
1071 error = 0;
1072 break;
1073 }
1074 }
1075 } while (error == 0 && uiop->uio_resid > 0 && n != 0);
1076
1077 return (error);
1078 }
1079
1080 /*ARGSUSED*/
1081 static int
spec_write(struct vnode * vp,struct uio * uiop,int ioflag,struct cred * cr,caller_context_t * ct)1082 spec_write(
1083 struct vnode *vp,
1084 struct uio *uiop,
1085 int ioflag,
1086 struct cred *cr,
1087 caller_context_t *ct)
1088 {
1089 int error;
1090 struct snode *sp = VTOS(vp);
1091 dev_t dev = sp->s_dev;
1092 size_t n;
1093 ulong_t on;
1094 u_offset_t bdevsize;
1095 offset_t maxoff;
1096 offset_t off;
1097 struct vnode *blkvp;
1098
1099 ASSERT(vp->v_type == VCHR || vp->v_type == VBLK);
1100
1101 if (vp->v_stream) {
1102 ASSERT(vp->v_type == VCHR);
1103 smark(sp, SUPD);
1104 return (strwrite(vp, uiop, cr));
1105 }
1106
1107 /*
1108 * Plain old character devices that set D_U64BIT can have
1109 * unrestricted offsets.
1110 */
1111 maxoff = spec_maxoffset(vp);
1112 ASSERT(maxoff != -1 || vp->v_type == VCHR);
1113
1114 if (maxoff != -1 && (uiop->uio_loffset < 0 ||
1115 uiop->uio_loffset + uiop->uio_resid > maxoff))
1116 return (EINVAL);
1117
1118 if (vp->v_type == VCHR) {
1119 smark(sp, SUPD);
1120 ASSERT(vp->v_stream == NULL);
1121 return (cdev_write(dev, uiop, cr));
1122 }
1123
1124 if (uiop->uio_resid == 0)
1125 return (0);
1126
1127 error = 0;
1128 blkvp = sp->s_commonvp;
1129 bdevsize = SPEC_SIZE(VTOS(blkvp));
1130
1131 do {
1132 int pagecreate;
1133 int newpage;
1134 caddr_t base;
1135 offset_t diff;
1136
1137 off = uiop->uio_loffset & (offset_t)MAXBMASK;
1138 on = (ulong_t)(uiop->uio_loffset & MAXBOFFSET);
1139 n = (size_t)MIN(MAXBSIZE - on, uiop->uio_resid);
1140 pagecreate = 0;
1141
1142 diff = bdevsize - uiop->uio_loffset;
1143 if (diff <= 0) {
1144 error = ENXIO;
1145 break;
1146 }
1147 if (diff < n)
1148 n = (size_t)diff;
1149
1150 /*
1151 * Check to see if we can skip reading in the page
1152 * and just allocate the memory. We can do this
1153 * if we are going to rewrite the entire mapping
1154 * or if we are going to write to end of the device
1155 * from the beginning of the mapping.
1156 */
1157 if (n == MAXBSIZE || (on == 0 && (off + n) == bdevsize))
1158 pagecreate = 1;
1159
1160 newpage = 0;
1161
1162 /*
1163 * Touch the page and fault it in if it is not in core
1164 * before segmap_getmapflt or vpm_data_copy can lock it.
1165 * This is to avoid the deadlock if the buffer is mapped
1166 * to the same file through mmap which we want to write.
1167 */
1168 uio_prefaultpages((long)n, uiop);
1169
1170 if (vpm_enable) {
1171 error = vpm_data_copy(blkvp, (u_offset_t)(off + on),
1172 n, uiop, !pagecreate, NULL, 0, S_WRITE);
1173 } else {
1174 base = segmap_getmapflt(segkmap, blkvp,
1175 (u_offset_t)(off + on), n, !pagecreate, S_WRITE);
1176
1177 /*
1178 * segmap_pagecreate() returns 1 if it calls
1179 * page_create_va() to allocate any pages.
1180 */
1181
1182 if (pagecreate)
1183 newpage = segmap_pagecreate(segkmap, base + on,
1184 n, 0);
1185
1186 error = uiomove(base + on, n, UIO_WRITE, uiop);
1187 }
1188
1189 if (!vpm_enable && pagecreate &&
1190 uiop->uio_loffset <
1191 P2ROUNDUP_TYPED(off + on + n, PAGESIZE, offset_t)) {
1192 /*
1193 * We created pages w/o initializing them completely,
1194 * thus we need to zero the part that wasn't set up.
1195 * This can happen if we write to the end of the device
1196 * or if we had some sort of error during the uiomove.
1197 */
1198 long nzero;
1199 offset_t nmoved;
1200
1201 nmoved = (uiop->uio_loffset - (off + on));
1202 if (nmoved < 0 || nmoved > n) {
1203 panic("spec_write: nmoved bogus");
1204 /*NOTREACHED*/
1205 }
1206 nzero = (long)P2ROUNDUP(on + n, PAGESIZE) -
1207 (on + nmoved);
1208 if (nzero < 0 || (on + nmoved + nzero > MAXBSIZE)) {
1209 panic("spec_write: nzero bogus");
1210 /*NOTREACHED*/
1211 }
1212 (void) kzero(base + on + nmoved, (size_t)nzero);
1213 }
1214
1215 /*
1216 * Unlock the pages which have been allocated by
1217 * page_create_va() in segmap_pagecreate().
1218 */
1219 if (!vpm_enable && newpage)
1220 segmap_pageunlock(segkmap, base + on,
1221 (size_t)n, S_WRITE);
1222
1223 if (error == 0) {
1224 int flags = 0;
1225
1226 /*
1227 * Force write back for synchronous write cases.
1228 */
1229 if (ioflag & (FSYNC|FDSYNC))
1230 flags = SM_WRITE;
1231 else if (n + on == MAXBSIZE || IS_SWAPVP(vp)) {
1232 /*
1233 * Have written a whole block.
1234 * Start an asynchronous write and
1235 * mark the buffer to indicate that
1236 * it won't be needed again soon.
1237 * Push swap files here, since it
1238 * won't happen anywhere else.
1239 */
1240 flags = SM_WRITE | SM_ASYNC | SM_DONTNEED;
1241 }
1242 smark(sp, SUPD|SCHG);
1243 if (vpm_enable) {
1244 error = vpm_sync_pages(blkvp, off, n, flags);
1245 } else {
1246 error = segmap_release(segkmap, base, flags);
1247 }
1248 } else {
1249 if (vpm_enable) {
1250 (void) vpm_sync_pages(blkvp, off, n, SM_INVAL);
1251 } else {
1252 (void) segmap_release(segkmap, base, SM_INVAL);
1253 }
1254 }
1255
1256 } while (error == 0 && uiop->uio_resid > 0 && n != 0);
1257
1258 return (error);
1259 }
1260
1261 /*ARGSUSED6*/
1262 static int
spec_ioctl(struct vnode * vp,int cmd,intptr_t arg,int mode,struct cred * cr,int * rvalp,caller_context_t * ct)1263 spec_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, struct cred *cr,
1264 int *rvalp, caller_context_t *ct)
1265 {
1266 struct snode *sp;
1267 dev_t dev;
1268 int error;
1269
1270 if (vp->v_type != VCHR)
1271 return (ENOTTY);
1272
1273 /*
1274 * allow ioctls() to go through even for fenced snodes, as they
1275 * may include unconfiguration operation - for example popping of
1276 * streams modules.
1277 */
1278
1279 sp = VTOS(vp);
1280 dev = sp->s_dev;
1281 if (vp->v_stream) {
1282 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
1283 } else {
1284 error = cdev_ioctl(dev, cmd, arg, mode, cr, rvalp);
1285 }
1286 return (error);
1287 }
1288
1289 static int
spec_getattr(struct vnode * vp,struct vattr * vap,int flags,struct cred * cr,caller_context_t * ct)1290 spec_getattr(
1291 struct vnode *vp,
1292 struct vattr *vap,
1293 int flags,
1294 struct cred *cr,
1295 caller_context_t *ct)
1296 {
1297 int error;
1298 struct snode *sp;
1299 struct vnode *realvp;
1300
1301 /* With ATTR_COMM we will not get attributes from realvp */
1302 if (flags & ATTR_COMM) {
1303 sp = VTOS(vp);
1304 vp = sp->s_commonvp;
1305 }
1306 sp = VTOS(vp);
1307
1308 /* we want stat() to fail with ENXIO if the device is fenced off */
1309 if (S_ISFENCED(sp))
1310 return (ENXIO);
1311
1312 realvp = sp->s_realvp;
1313
1314 if (realvp == NULL) {
1315 static int snode_shift = 0;
1316
1317 /*
1318 * Calculate the amount of bitshift to a snode pointer which
1319 * will still keep it unique. See below.
1320 */
1321 if (snode_shift == 0)
1322 snode_shift = highbit(sizeof (struct snode));
1323 ASSERT(snode_shift > 0);
1324
1325 /*
1326 * No real vnode behind this one. Fill in the fields
1327 * from the snode.
1328 *
1329 * This code should be refined to return only the
1330 * attributes asked for instead of all of them.
1331 */
1332 vap->va_type = vp->v_type;
1333 vap->va_mode = 0;
1334 vap->va_uid = vap->va_gid = 0;
1335 vap->va_fsid = sp->s_fsid;
1336
1337 /*
1338 * If the va_nodeid is > MAX_USHORT, then i386 stats might
1339 * fail. So we shift down the snode pointer to try and get
1340 * the most uniqueness into 16-bits.
1341 */
1342 vap->va_nodeid = ((ino64_t)(uintptr_t)sp >> snode_shift) &
1343 0xFFFF;
1344 vap->va_nlink = 0;
1345 vap->va_rdev = sp->s_dev;
1346
1347 /*
1348 * va_nblocks is the number of 512 byte blocks used to store
1349 * the mknod for the device, not the number of blocks on the
1350 * device itself. This is typically zero since the mknod is
1351 * represented directly in the inode itself.
1352 */
1353 vap->va_nblocks = 0;
1354 } else {
1355 error = VOP_GETATTR(realvp, vap, flags, cr, ct);
1356 if (error != 0)
1357 return (error);
1358 }
1359
1360 /* set the size from the snode */
1361 vap->va_size = SPEC_SIZE(VTOS(sp->s_commonvp));
1362 vap->va_blksize = MAXBSIZE;
1363
1364 mutex_enter(&sp->s_lock);
1365 vap->va_atime.tv_sec = sp->s_atime;
1366 vap->va_mtime.tv_sec = sp->s_mtime;
1367 vap->va_ctime.tv_sec = sp->s_ctime;
1368 mutex_exit(&sp->s_lock);
1369
1370 vap->va_atime.tv_nsec = 0;
1371 vap->va_mtime.tv_nsec = 0;
1372 vap->va_ctime.tv_nsec = 0;
1373 vap->va_seq = 0;
1374
1375 return (0);
1376 }
1377
1378 static int
spec_setattr(struct vnode * vp,struct vattr * vap,int flags,struct cred * cr,caller_context_t * ct)1379 spec_setattr(
1380 struct vnode *vp,
1381 struct vattr *vap,
1382 int flags,
1383 struct cred *cr,
1384 caller_context_t *ct)
1385 {
1386 struct snode *sp = VTOS(vp);
1387 struct vnode *realvp;
1388 int error;
1389
1390 /* fail with ENXIO if the device is fenced off */
1391 if (S_ISFENCED(sp))
1392 return (ENXIO);
1393
1394 if (vp->v_type == VCHR && vp->v_stream && (vap->va_mask & AT_SIZE)) {
1395 /*
1396 * 1135080: O_TRUNC should have no effect on
1397 * named pipes and terminal devices.
1398 */
1399 ASSERT(vap->va_mask == AT_SIZE);
1400 return (0);
1401 }
1402
1403 if ((realvp = sp->s_realvp) == NULL)
1404 error = 0; /* no real vnode to update */
1405 else
1406 error = VOP_SETATTR(realvp, vap, flags, cr, ct);
1407 if (error == 0) {
1408 /*
1409 * If times were changed, update snode.
1410 */
1411 mutex_enter(&sp->s_lock);
1412 if (vap->va_mask & AT_ATIME)
1413 sp->s_atime = vap->va_atime.tv_sec;
1414 if (vap->va_mask & AT_MTIME) {
1415 sp->s_mtime = vap->va_mtime.tv_sec;
1416 sp->s_ctime = gethrestime_sec();
1417 }
1418 mutex_exit(&sp->s_lock);
1419 }
1420 return (error);
1421 }
1422
1423 static int
spec_access(struct vnode * vp,int mode,int flags,struct cred * cr,caller_context_t * ct)1424 spec_access(
1425 struct vnode *vp,
1426 int mode,
1427 int flags,
1428 struct cred *cr,
1429 caller_context_t *ct)
1430 {
1431 struct vnode *realvp;
1432 struct snode *sp = VTOS(vp);
1433
1434 /* fail with ENXIO if the device is fenced off */
1435 if (S_ISFENCED(sp))
1436 return (ENXIO);
1437
1438 if ((realvp = sp->s_realvp) != NULL)
1439 return (VOP_ACCESS(realvp, mode, flags, cr, ct));
1440 else
1441 return (0); /* Allow all access. */
1442 }
1443
1444 /*
1445 * This can be called if creat or an open with O_CREAT is done on the root
1446 * of a lofs mount where the mounted entity is a special file.
1447 */
1448 /*ARGSUSED*/
1449 static int
spec_create(struct vnode * dvp,char * name,vattr_t * vap,enum vcexcl excl,int mode,struct vnode ** vpp,struct cred * cr,int flag,caller_context_t * ct,vsecattr_t * vsecp)1450 spec_create(
1451 struct vnode *dvp,
1452 char *name,
1453 vattr_t *vap,
1454 enum vcexcl excl,
1455 int mode,
1456 struct vnode **vpp,
1457 struct cred *cr,
1458 int flag,
1459 caller_context_t *ct,
1460 vsecattr_t *vsecp)
1461 {
1462 int error;
1463 struct snode *sp = VTOS(dvp);
1464
1465 /* fail with ENXIO if the device is fenced off */
1466 if (S_ISFENCED(sp))
1467 return (ENXIO);
1468
1469 ASSERT(dvp && (dvp->v_flag & VROOT) && *name == '\0');
1470 if (excl == NONEXCL) {
1471 if (mode && (error = spec_access(dvp, mode, 0, cr, ct)))
1472 return (error);
1473 VN_HOLD(dvp);
1474 return (0);
1475 }
1476 return (EEXIST);
1477 }
1478
1479 /*
1480 * In order to sync out the snode times without multi-client problems,
1481 * make sure the times written out are never earlier than the times
1482 * already set in the vnode.
1483 */
1484 static int
spec_fsync(struct vnode * vp,int syncflag,struct cred * cr,caller_context_t * ct)1485 spec_fsync(
1486 struct vnode *vp,
1487 int syncflag,
1488 struct cred *cr,
1489 caller_context_t *ct)
1490 {
1491 struct snode *sp = VTOS(vp);
1492 struct vnode *realvp;
1493 struct vnode *cvp;
1494 struct vattr va, vatmp;
1495
1496 /* allow syncing even if device is fenced off */
1497
1498 /* If times didn't change, don't flush anything. */
1499 mutex_enter(&sp->s_lock);
1500 if ((sp->s_flag & (SACC|SUPD|SCHG)) == 0 && vp->v_type != VBLK) {
1501 mutex_exit(&sp->s_lock);
1502 return (0);
1503 }
1504 sp->s_flag &= ~(SACC|SUPD|SCHG);
1505 mutex_exit(&sp->s_lock);
1506 cvp = sp->s_commonvp;
1507 realvp = sp->s_realvp;
1508
1509 if (vp->v_type == VBLK && cvp != vp && vn_has_cached_data(cvp) &&
1510 (cvp->v_flag & VISSWAP) == 0)
1511 (void) VOP_PUTPAGE(cvp, (offset_t)0, 0, 0, cr, ct);
1512
1513 /*
1514 * For devices that support it, force write cache to stable storage.
1515 * We don't need the lock to check s_flags since we can treat
1516 * SNOFLUSH as a hint.
1517 */
1518 if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
1519 !(sp->s_flag & SNOFLUSH)) {
1520 int rval, rc;
1521 struct dk_callback spec_callback;
1522
1523 spec_callback.dkc_flag = FLUSH_VOLATILE;
1524 spec_callback.dkc_callback = NULL;
1525
1526 /* synchronous flush on volatile cache */
1527 rc = cdev_ioctl(vp->v_rdev, DKIOCFLUSHWRITECACHE,
1528 (intptr_t)&spec_callback, FNATIVE|FKIOCTL, cr, &rval);
1529
1530 if (rc == ENOTSUP || rc == ENOTTY) {
1531 mutex_enter(&sp->s_lock);
1532 sp->s_flag |= SNOFLUSH;
1533 mutex_exit(&sp->s_lock);
1534 }
1535 }
1536
1537 /*
1538 * If no real vnode to update, don't flush anything.
1539 */
1540 if (realvp == NULL)
1541 return (0);
1542
1543 vatmp.va_mask = AT_ATIME|AT_MTIME;
1544 if (VOP_GETATTR(realvp, &vatmp, 0, cr, ct) == 0) {
1545
1546 mutex_enter(&sp->s_lock);
1547 if (vatmp.va_atime.tv_sec > sp->s_atime)
1548 va.va_atime = vatmp.va_atime;
1549 else {
1550 va.va_atime.tv_sec = sp->s_atime;
1551 va.va_atime.tv_nsec = 0;
1552 }
1553 if (vatmp.va_mtime.tv_sec > sp->s_mtime)
1554 va.va_mtime = vatmp.va_mtime;
1555 else {
1556 va.va_mtime.tv_sec = sp->s_mtime;
1557 va.va_mtime.tv_nsec = 0;
1558 }
1559 mutex_exit(&sp->s_lock);
1560
1561 va.va_mask = AT_ATIME|AT_MTIME;
1562 (void) VOP_SETATTR(realvp, &va, 0, cr, ct);
1563 }
1564 (void) VOP_FSYNC(realvp, syncflag, cr, ct);
1565 return (0);
1566 }
1567
1568 /*ARGSUSED*/
1569 static void
spec_inactive(struct vnode * vp,struct cred * cr,caller_context_t * ct)1570 spec_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
1571 {
1572 struct snode *sp = VTOS(vp);
1573 struct vnode *cvp;
1574 struct vnode *rvp;
1575
1576 /*
1577 * If no one has reclaimed the vnode, remove from the
1578 * cache now.
1579 */
1580 if (vp->v_count < 1) {
1581 panic("spec_inactive: Bad v_count");
1582 /*NOTREACHED*/
1583 }
1584 mutex_enter(&stable_lock);
1585
1586 mutex_enter(&vp->v_lock);
1587 VN_RELE_LOCKED(vp);
1588 if (vp->v_count != 0) {
1589 mutex_exit(&vp->v_lock);
1590 mutex_exit(&stable_lock);
1591 return;
1592 }
1593 mutex_exit(&vp->v_lock);
1594
1595 sdelete(sp);
1596 mutex_exit(&stable_lock);
1597
1598 /* We are the sole owner of sp now */
1599 cvp = sp->s_commonvp;
1600 rvp = sp->s_realvp;
1601
1602 if (rvp) {
1603 /*
1604 * If the snode times changed, then update the times
1605 * associated with the "realvp".
1606 */
1607 if ((sp->s_flag & (SACC|SUPD|SCHG)) != 0) {
1608
1609 struct vattr va, vatmp;
1610
1611 mutex_enter(&sp->s_lock);
1612 sp->s_flag &= ~(SACC|SUPD|SCHG);
1613 mutex_exit(&sp->s_lock);
1614 vatmp.va_mask = AT_ATIME|AT_MTIME;
1615 /*
1616 * The user may not own the device, but we
1617 * want to update the attributes anyway.
1618 */
1619 if (VOP_GETATTR(rvp, &vatmp, 0, kcred, ct) == 0) {
1620 if (vatmp.va_atime.tv_sec > sp->s_atime)
1621 va.va_atime = vatmp.va_atime;
1622 else {
1623 va.va_atime.tv_sec = sp->s_atime;
1624 va.va_atime.tv_nsec = 0;
1625 }
1626 if (vatmp.va_mtime.tv_sec > sp->s_mtime)
1627 va.va_mtime = vatmp.va_mtime;
1628 else {
1629 va.va_mtime.tv_sec = sp->s_mtime;
1630 va.va_mtime.tv_nsec = 0;
1631 }
1632
1633 va.va_mask = AT_ATIME|AT_MTIME;
1634 (void) VOP_SETATTR(rvp, &va, 0, kcred, ct);
1635 }
1636 }
1637 }
1638 ASSERT(!vn_has_cached_data(vp));
1639 vn_invalid(vp);
1640
1641 /* if we are sharing another file systems vfs, release it */
1642 if (vp->v_vfsp && (vp->v_vfsp != &spec_vfs))
1643 VFS_RELE(vp->v_vfsp);
1644
1645 /* if we have a realvp, release the realvp */
1646 if (rvp)
1647 VN_RELE(rvp);
1648
1649 /* if we have a common, release the common */
1650 if (cvp && (cvp != vp)) {
1651 VN_RELE(cvp);
1652 #ifdef DEBUG
1653 } else if (cvp) {
1654 /*
1655 * if this is the last reference to a common vnode, any
1656 * associated stream had better have been closed
1657 */
1658 ASSERT(cvp == vp);
1659 ASSERT(cvp->v_stream == NULL);
1660 #endif /* DEBUG */
1661 }
1662
1663 /*
1664 * if we have a hold on a devinfo node (established by
1665 * spec_assoc_vp_with_devi), release the hold
1666 */
1667 if (sp->s_dip)
1668 ddi_release_devi(sp->s_dip);
1669
1670 /*
1671 * If we have an associated device policy, release it.
1672 */
1673 if (sp->s_plcy != NULL)
1674 dpfree(sp->s_plcy);
1675
1676 /*
1677 * If all holds on the devinfo node are through specfs/devfs
1678 * and we just destroyed the last specfs node associated with the
1679 * device, then the devinfo node reference count should now be
1680 * zero. We can't check this because there may be other holds
1681 * on the node from non file system sources: ddi_hold_devi_by_instance
1682 * for example.
1683 */
1684 kmem_cache_free(snode_cache, sp);
1685 }
1686
1687 static int
spec_fid(struct vnode * vp,struct fid * fidp,caller_context_t * ct)1688 spec_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
1689 {
1690 struct vnode *realvp;
1691 struct snode *sp = VTOS(vp);
1692
1693 if ((realvp = sp->s_realvp) != NULL)
1694 return (VOP_FID(realvp, fidp, ct));
1695 else
1696 return (EINVAL);
1697 }
1698
1699 /*ARGSUSED1*/
1700 static int
spec_seek(struct vnode * vp,offset_t ooff,offset_t * noffp,caller_context_t * ct)1701 spec_seek(
1702 struct vnode *vp,
1703 offset_t ooff,
1704 offset_t *noffp,
1705 caller_context_t *ct)
1706 {
1707 offset_t maxoff = spec_maxoffset(vp);
1708
1709 if (maxoff == -1 || *noffp <= maxoff)
1710 return (0);
1711 else
1712 return (EINVAL);
1713 }
1714
1715 static int
spec_frlock(struct vnode * vp,int cmd,struct flock64 * bfp,int flag,offset_t offset,struct flk_callback * flk_cbp,struct cred * cr,caller_context_t * ct)1716 spec_frlock(
1717 struct vnode *vp,
1718 int cmd,
1719 struct flock64 *bfp,
1720 int flag,
1721 offset_t offset,
1722 struct flk_callback *flk_cbp,
1723 struct cred *cr,
1724 caller_context_t *ct)
1725 {
1726 struct snode *sp = VTOS(vp);
1727 struct snode *csp;
1728
1729 csp = VTOS(sp->s_commonvp);
1730 /*
1731 * If file is being mapped, disallow frlock.
1732 */
1733 if (csp->s_mapcnt > 0)
1734 return (EAGAIN);
1735
1736 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1737 }
1738
1739 static int
spec_realvp(struct vnode * vp,struct vnode ** vpp,caller_context_t * ct)1740 spec_realvp(struct vnode *vp, struct vnode **vpp, caller_context_t *ct)
1741 {
1742 struct vnode *rvp;
1743
1744 if ((rvp = VTOS(vp)->s_realvp) != NULL) {
1745 vp = rvp;
1746 if (VOP_REALVP(vp, &rvp, ct) == 0)
1747 vp = rvp;
1748 }
1749
1750 *vpp = vp;
1751 return (0);
1752 }
1753
1754 /*
1755 * Return all the pages from [off..off + len] in block
1756 * or character device.
1757 */
1758 /*ARGSUSED*/
1759 static int
spec_getpage(struct vnode * vp,offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr,caller_context_t * ct)1760 spec_getpage(
1761 struct vnode *vp,
1762 offset_t off,
1763 size_t len,
1764 uint_t *protp,
1765 page_t *pl[],
1766 size_t plsz,
1767 struct seg *seg,
1768 caddr_t addr,
1769 enum seg_rw rw,
1770 struct cred *cr,
1771 caller_context_t *ct)
1772 {
1773 struct snode *sp = VTOS(vp);
1774 int err;
1775
1776 ASSERT(sp->s_commonvp == vp);
1777
1778 /*
1779 * XXX Given the above assertion, this might not do
1780 * what is wanted here.
1781 */
1782 if (vp->v_flag & VNOMAP)
1783 return (ENOSYS);
1784 TRACE_4(TR_FAC_SPECFS, TR_SPECFS_GETPAGE,
1785 "specfs getpage:vp %p off %llx len %ld snode %p",
1786 vp, off, len, sp);
1787
1788 switch (vp->v_type) {
1789 case VBLK:
1790 if (protp != NULL)
1791 *protp = PROT_ALL;
1792
1793 if (((u_offset_t)off + len) > (SPEC_SIZE(sp) + PAGEOFFSET))
1794 return (EFAULT); /* beyond EOF */
1795
1796 err = pvn_getpages(spec_getapage, vp, (u_offset_t)off, len,
1797 protp, pl, plsz, seg, addr, rw, cr);
1798 break;
1799
1800 case VCHR:
1801 cmn_err(CE_NOTE, "spec_getpage called for character device. "
1802 "Check any non-ON consolidation drivers");
1803 err = 0;
1804 pl[0] = (page_t *)0;
1805 break;
1806
1807 default:
1808 panic("spec_getpage: bad v_type 0x%x", vp->v_type);
1809 /*NOTREACHED*/
1810 }
1811
1812 return (err);
1813 }
1814
1815 extern int klustsize; /* set in machdep.c */
1816
1817 int spec_ra = 1;
1818 int spec_lostpage; /* number of times we lost original page */
1819
1820 /*ARGSUSED2*/
1821 static int
spec_getapage(struct vnode * vp,u_offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr)1822 spec_getapage(
1823 struct vnode *vp,
1824 u_offset_t off,
1825 size_t len,
1826 uint_t *protp,
1827 page_t *pl[],
1828 size_t plsz,
1829 struct seg *seg,
1830 caddr_t addr,
1831 enum seg_rw rw,
1832 struct cred *cr)
1833 {
1834 struct snode *sp;
1835 struct buf *bp;
1836 page_t *pp, *pp2;
1837 u_offset_t io_off1, io_off2;
1838 size_t io_len1;
1839 size_t io_len2;
1840 size_t blksz;
1841 u_offset_t blkoff;
1842 int dora, err;
1843 page_t *pagefound;
1844 uint_t xlen;
1845 size_t adj_klustsize;
1846 u_offset_t size;
1847 u_offset_t tmpoff;
1848
1849 sp = VTOS(vp);
1850 TRACE_3(TR_FAC_SPECFS, TR_SPECFS_GETAPAGE,
1851 "specfs getapage:vp %p off %llx snode %p", vp, off, sp);
1852 reread:
1853
1854 err = 0;
1855 bp = NULL;
1856 pp = NULL;
1857 pp2 = NULL;
1858
1859 if (pl != NULL)
1860 pl[0] = NULL;
1861
1862 size = SPEC_SIZE(VTOS(sp->s_commonvp));
1863
1864 if (spec_ra && sp->s_nextr == off)
1865 dora = 1;
1866 else
1867 dora = 0;
1868
1869 if (size == UNKNOWN_SIZE) {
1870 dora = 0;
1871 adj_klustsize = PAGESIZE;
1872 } else {
1873 adj_klustsize = dora ? klustsize : PAGESIZE;
1874 }
1875
1876 again:
1877 if ((pagefound = page_exists(vp, off)) == NULL) {
1878 if (rw == S_CREATE) {
1879 /*
1880 * We're allocating a swap slot and it's
1881 * associated page was not found, so allocate
1882 * and return it.
1883 */
1884 if ((pp = page_create_va(vp, off,
1885 PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
1886 panic("spec_getapage: page_create");
1887 /*NOTREACHED*/
1888 }
1889 io_len1 = PAGESIZE;
1890 sp->s_nextr = off + PAGESIZE;
1891 } else {
1892 /*
1893 * Need to really do disk I/O to get the page(s).
1894 */
1895 blkoff = (off / adj_klustsize) * adj_klustsize;
1896 if (size == UNKNOWN_SIZE) {
1897 blksz = PAGESIZE;
1898 } else {
1899 if (blkoff + adj_klustsize <= size)
1900 blksz = adj_klustsize;
1901 else
1902 blksz =
1903 MIN(size - blkoff, adj_klustsize);
1904 }
1905
1906 pp = pvn_read_kluster(vp, off, seg, addr, &tmpoff,
1907 &io_len1, blkoff, blksz, 0);
1908 io_off1 = tmpoff;
1909 /*
1910 * Make sure the page didn't sneek into the
1911 * cache while we blocked in pvn_read_kluster.
1912 */
1913 if (pp == NULL)
1914 goto again;
1915
1916 /*
1917 * Zero part of page which we are not
1918 * going to be reading from disk now.
1919 */
1920 xlen = (uint_t)(io_len1 & PAGEOFFSET);
1921 if (xlen != 0)
1922 pagezero(pp->p_prev, xlen, PAGESIZE - xlen);
1923
1924 bp = spec_startio(vp, pp, io_off1, io_len1,
1925 pl == NULL ? (B_ASYNC | B_READ) : B_READ);
1926 sp->s_nextr = io_off1 + io_len1;
1927 }
1928 }
1929
1930 if (dora && rw != S_CREATE) {
1931 u_offset_t off2;
1932 caddr_t addr2;
1933
1934 off2 = ((off / adj_klustsize) + 1) * adj_klustsize;
1935 addr2 = addr + (off2 - off);
1936
1937 pp2 = NULL;
1938 /*
1939 * If we are past EOF then don't bother trying
1940 * with read-ahead.
1941 */
1942 if (off2 >= size)
1943 pp2 = NULL;
1944 else {
1945 if (off2 + adj_klustsize <= size)
1946 blksz = adj_klustsize;
1947 else
1948 blksz = MIN(size - off2, adj_klustsize);
1949
1950 pp2 = pvn_read_kluster(vp, off2, seg, addr2, &tmpoff,
1951 &io_len2, off2, blksz, 1);
1952 io_off2 = tmpoff;
1953 }
1954
1955 if (pp2 != NULL) {
1956 /*
1957 * Zero part of page which we are not
1958 * going to be reading from disk now.
1959 */
1960 xlen = (uint_t)(io_len2 & PAGEOFFSET);
1961 if (xlen != 0)
1962 pagezero(pp2->p_prev, xlen, PAGESIZE - xlen);
1963
1964 (void) spec_startio(vp, pp2, io_off2, io_len2,
1965 B_READ | B_ASYNC);
1966 }
1967 }
1968
1969 if (pl == NULL)
1970 return (err);
1971
1972 if (bp != NULL) {
1973 err = biowait(bp);
1974 pageio_done(bp);
1975
1976 if (err) {
1977 if (pp != NULL)
1978 pvn_read_done(pp, B_ERROR);
1979 return (err);
1980 }
1981 }
1982
1983 if (pagefound) {
1984 se_t se = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
1985 /*
1986 * Page exists in the cache, acquire the appropriate
1987 * lock. If this fails, start all over again.
1988 */
1989
1990 if ((pp = page_lookup(vp, off, se)) == NULL) {
1991 spec_lostpage++;
1992 goto reread;
1993 }
1994 pl[0] = pp;
1995 pl[1] = NULL;
1996
1997 sp->s_nextr = off + PAGESIZE;
1998 return (0);
1999 }
2000
2001 if (pp != NULL)
2002 pvn_plist_init(pp, pl, plsz, off, io_len1, rw);
2003 return (0);
2004 }
2005
2006 /*
2007 * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED, B_FORCE}.
2008 * If len == 0, do from off to EOF.
2009 *
2010 * The normal cases should be len == 0 & off == 0 (entire vp list),
2011 * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
2012 * (from pageout).
2013 */
2014 /*ARGSUSED5*/
2015 int
spec_putpage(struct vnode * vp,offset_t off,size_t len,int flags,struct cred * cr,caller_context_t * ct)2016 spec_putpage(
2017 struct vnode *vp,
2018 offset_t off,
2019 size_t len,
2020 int flags,
2021 struct cred *cr,
2022 caller_context_t *ct)
2023 {
2024 struct snode *sp = VTOS(vp);
2025 struct vnode *cvp;
2026 page_t *pp;
2027 u_offset_t io_off;
2028 size_t io_len = 0; /* for lint */
2029 int err = 0;
2030 u_offset_t size;
2031 u_offset_t tmpoff;
2032
2033 ASSERT(vp->v_count != 0);
2034
2035 if (vp->v_flag & VNOMAP)
2036 return (ENOSYS);
2037
2038 cvp = sp->s_commonvp;
2039 size = SPEC_SIZE(VTOS(cvp));
2040
2041 if (!vn_has_cached_data(vp) || off >= size)
2042 return (0);
2043
2044 ASSERT(vp->v_type == VBLK && cvp == vp);
2045 TRACE_4(TR_FAC_SPECFS, TR_SPECFS_PUTPAGE,
2046 "specfs putpage:vp %p off %llx len %ld snode %p",
2047 vp, off, len, sp);
2048
2049 if (len == 0) {
2050 /*
2051 * Search the entire vp list for pages >= off.
2052 */
2053 err = pvn_vplist_dirty(vp, off, spec_putapage,
2054 flags, cr);
2055 } else {
2056 u_offset_t eoff;
2057
2058 /*
2059 * Loop over all offsets in the range [off...off + len]
2060 * looking for pages to deal with. We set limits so
2061 * that we kluster to klustsize boundaries.
2062 */
2063 eoff = off + len;
2064 for (io_off = off; io_off < eoff && io_off < size;
2065 io_off += io_len) {
2066 /*
2067 * If we are not invalidating, synchronously
2068 * freeing or writing pages use the routine
2069 * page_lookup_nowait() to prevent reclaiming
2070 * them from the free list.
2071 */
2072 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
2073 pp = page_lookup(vp, io_off,
2074 (flags & (B_INVAL | B_FREE)) ?
2075 SE_EXCL : SE_SHARED);
2076 } else {
2077 pp = page_lookup_nowait(vp, io_off,
2078 (flags & B_FREE) ? SE_EXCL : SE_SHARED);
2079 }
2080
2081 if (pp == NULL || pvn_getdirty(pp, flags) == 0)
2082 io_len = PAGESIZE;
2083 else {
2084 err = spec_putapage(vp, pp, &tmpoff, &io_len,
2085 flags, cr);
2086 io_off = tmpoff;
2087 if (err != 0)
2088 break;
2089 /*
2090 * "io_off" and "io_len" are returned as
2091 * the range of pages we actually wrote.
2092 * This allows us to skip ahead more quickly
2093 * since several pages may've been dealt
2094 * with by this iteration of the loop.
2095 */
2096 }
2097 }
2098 }
2099 return (err);
2100 }
2101
2102
2103 /*
2104 * Write out a single page, possibly klustering adjacent
2105 * dirty pages.
2106 */
2107 /*ARGSUSED5*/
2108 static int
spec_putapage(struct vnode * vp,page_t * pp,u_offset_t * offp,size_t * lenp,int flags,struct cred * cr)2109 spec_putapage(
2110 struct vnode *vp,
2111 page_t *pp,
2112 u_offset_t *offp, /* return value */
2113 size_t *lenp, /* return value */
2114 int flags,
2115 struct cred *cr)
2116 {
2117 struct snode *sp = VTOS(vp);
2118 u_offset_t io_off;
2119 size_t io_len;
2120 size_t blksz;
2121 u_offset_t blkoff;
2122 int err = 0;
2123 struct buf *bp;
2124 u_offset_t size;
2125 size_t adj_klustsize;
2126 u_offset_t tmpoff;
2127
2128 /*
2129 * Destroy read ahead value since we are really going to write.
2130 */
2131 sp->s_nextr = 0;
2132 size = SPEC_SIZE(VTOS(sp->s_commonvp));
2133
2134 adj_klustsize = klustsize;
2135
2136 blkoff = (pp->p_offset / adj_klustsize) * adj_klustsize;
2137
2138 if (blkoff + adj_klustsize <= size)
2139 blksz = adj_klustsize;
2140 else
2141 blksz = size - blkoff;
2142
2143 /*
2144 * Find a kluster that fits in one contiguous chunk.
2145 */
2146 pp = pvn_write_kluster(vp, pp, &tmpoff, &io_len, blkoff,
2147 blksz, flags);
2148 io_off = tmpoff;
2149
2150 /*
2151 * Check for page length rounding problems
2152 * XXX - Is this necessary?
2153 */
2154 if (io_off + io_len > size) {
2155 ASSERT((io_off + io_len) - size < PAGESIZE);
2156 io_len = size - io_off;
2157 }
2158
2159 bp = spec_startio(vp, pp, io_off, io_len, B_WRITE | flags);
2160
2161 /*
2162 * Wait for i/o to complete if the request is not B_ASYNC.
2163 */
2164 if ((flags & B_ASYNC) == 0) {
2165 err = biowait(bp);
2166 pageio_done(bp);
2167 pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
2168 }
2169
2170 if (offp)
2171 *offp = io_off;
2172 if (lenp)
2173 *lenp = io_len;
2174 TRACE_4(TR_FAC_SPECFS, TR_SPECFS_PUTAPAGE,
2175 "specfs putapage:vp %p offp %p snode %p err %d",
2176 vp, offp, sp, err);
2177 return (err);
2178 }
2179
2180 /*
2181 * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED}
2182 */
2183 static struct buf *
spec_startio(struct vnode * vp,page_t * pp,u_offset_t io_off,size_t io_len,int flags)2184 spec_startio(
2185 struct vnode *vp,
2186 page_t *pp,
2187 u_offset_t io_off,
2188 size_t io_len,
2189 int flags)
2190 {
2191 struct buf *bp;
2192
2193 bp = pageio_setup(pp, io_len, vp, flags);
2194
2195 bp->b_edev = vp->v_rdev;
2196 bp->b_dev = cmpdev(vp->v_rdev);
2197 bp->b_blkno = btodt(io_off);
2198 bp->b_un.b_addr = (caddr_t)0;
2199
2200 (void) bdev_strategy(bp);
2201
2202 if (flags & B_READ)
2203 lwp_stat_update(LWP_STAT_INBLK, 1);
2204 else
2205 lwp_stat_update(LWP_STAT_OUBLK, 1);
2206
2207 return (bp);
2208 }
2209
2210 static int
spec_poll(struct vnode * vp,short events,int anyyet,short * reventsp,struct pollhead ** phpp,caller_context_t * ct)2211 spec_poll(
2212 struct vnode *vp,
2213 short events,
2214 int anyyet,
2215 short *reventsp,
2216 struct pollhead **phpp,
2217 caller_context_t *ct)
2218 {
2219 dev_t dev;
2220 int error;
2221
2222 if (vp->v_type == VBLK)
2223 error = fs_poll(vp, events, anyyet, reventsp, phpp, ct);
2224 else {
2225 ASSERT(vp->v_type == VCHR);
2226 dev = vp->v_rdev;
2227 if (vp->v_stream) {
2228 ASSERT(vp->v_stream != NULL);
2229 error = strpoll(vp->v_stream, events, anyyet,
2230 reventsp, phpp);
2231 } else if (devopsp[getmajor(dev)]->devo_cb_ops->cb_chpoll) {
2232 error = cdev_poll(dev, events, anyyet, reventsp, phpp);
2233 } else {
2234 error = fs_poll(vp, events, anyyet, reventsp, phpp, ct);
2235 }
2236 }
2237 return (error);
2238 }
2239
2240 /*
2241 * This routine is called through the cdevsw[] table to handle
2242 * traditional mmap'able devices that support a d_mmap function.
2243 */
2244 /*ARGSUSED8*/
2245 int
spec_segmap(dev_t dev,off_t off,struct as * as,caddr_t * addrp,off_t len,uint_t prot,uint_t maxprot,uint_t flags,struct cred * cred)2246 spec_segmap(
2247 dev_t dev,
2248 off_t off,
2249 struct as *as,
2250 caddr_t *addrp,
2251 off_t len,
2252 uint_t prot,
2253 uint_t maxprot,
2254 uint_t flags,
2255 struct cred *cred)
2256 {
2257 struct segdev_crargs dev_a;
2258 int (*mapfunc)(dev_t dev, off_t off, int prot);
2259 size_t i;
2260 int error;
2261
2262 if ((mapfunc = devopsp[getmajor(dev)]->devo_cb_ops->cb_mmap) == nodev)
2263 return (ENODEV);
2264 TRACE_4(TR_FAC_SPECFS, TR_SPECFS_SEGMAP,
2265 "specfs segmap:dev %x as %p len %lx prot %x",
2266 dev, as, len, prot);
2267
2268 /*
2269 * Character devices that support the d_mmap
2270 * interface can only be mmap'ed shared.
2271 */
2272 if ((flags & MAP_TYPE) != MAP_SHARED)
2273 return (EINVAL);
2274
2275 /*
2276 * Check to ensure that the entire range is
2277 * legal and we are not trying to map in
2278 * more than the device will let us.
2279 */
2280 for (i = 0; i < len; i += PAGESIZE) {
2281 if (cdev_mmap(mapfunc, dev, off + i, maxprot) == -1)
2282 return (ENXIO);
2283 }
2284
2285 as_rangelock(as);
2286 /* Pick an address w/o worrying about any vac alignment constraints. */
2287 error = choose_addr(as, addrp, len, off, ADDR_NOVACALIGN, flags);
2288 if (error != 0) {
2289 as_rangeunlock(as);
2290 return (error);
2291 }
2292
2293 dev_a.mapfunc = mapfunc;
2294 dev_a.dev = dev;
2295 dev_a.offset = off;
2296 dev_a.prot = (uchar_t)prot;
2297 dev_a.maxprot = (uchar_t)maxprot;
2298 dev_a.hat_flags = 0;
2299 dev_a.hat_attr = 0;
2300 dev_a.devmap_data = NULL;
2301
2302 error = as_map(as, *addrp, len, segdev_create, &dev_a);
2303 as_rangeunlock(as);
2304 return (error);
2305 }
2306
2307 int
spec_char_map(dev_t dev,offset_t off,struct as * as,caddr_t * addrp,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,struct cred * cred)2308 spec_char_map(
2309 dev_t dev,
2310 offset_t off,
2311 struct as *as,
2312 caddr_t *addrp,
2313 size_t len,
2314 uchar_t prot,
2315 uchar_t maxprot,
2316 uint_t flags,
2317 struct cred *cred)
2318 {
2319 int error = 0;
2320 major_t maj = getmajor(dev);
2321 int map_flag;
2322 int (*segmap)(dev_t, off_t, struct as *,
2323 caddr_t *, off_t, uint_t, uint_t, uint_t, cred_t *);
2324 int (*devmap)(dev_t, devmap_cookie_t, offset_t,
2325 size_t, size_t *, uint_t);
2326 int (*mmap)(dev_t dev, off_t off, int prot);
2327
2328 /*
2329 * Character device: let the device driver
2330 * pick the appropriate segment driver.
2331 *
2332 * 4.x compat.: allow 'NULL' cb_segmap => spec_segmap
2333 * Kindness: allow 'nulldev' cb_segmap => spec_segmap
2334 */
2335 segmap = devopsp[maj]->devo_cb_ops->cb_segmap;
2336 if (segmap == NULL || segmap == nulldev || segmap == nodev) {
2337 mmap = devopsp[maj]->devo_cb_ops->cb_mmap;
2338 map_flag = devopsp[maj]->devo_cb_ops->cb_flag;
2339
2340 /*
2341 * Use old mmap framework if the driver has both mmap
2342 * and devmap entry points. This is to prevent the
2343 * system from calling invalid devmap entry point
2344 * for some drivers that might have put garbage in the
2345 * devmap entry point.
2346 */
2347 if ((map_flag & D_DEVMAP) || mmap == NULL ||
2348 mmap == nulldev || mmap == nodev) {
2349 devmap = devopsp[maj]->devo_cb_ops->cb_devmap;
2350
2351 /*
2352 * If driver provides devmap entry point in
2353 * cb_ops but not xx_segmap(9E), call
2354 * devmap_setup with default settings
2355 * (NULL) for callback_ops and driver
2356 * callback private data
2357 */
2358 if (devmap == nodev || devmap == NULL ||
2359 devmap == nulldev)
2360 return (ENODEV);
2361
2362 error = devmap_setup(dev, off, as, addrp,
2363 len, prot, maxprot, flags, cred);
2364
2365 return (error);
2366 } else
2367 segmap = spec_segmap;
2368 } else
2369 segmap = cdev_segmap;
2370
2371 return ((*segmap)(dev, (off_t)off, as, addrp, len, prot,
2372 maxprot, flags, cred));
2373 }
2374
2375 /*ARGSUSED9*/
2376 static int
spec_map(struct vnode * vp,offset_t off,struct as * as,caddr_t * addrp,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,struct cred * cred,caller_context_t * ct)2377 spec_map(
2378 struct vnode *vp,
2379 offset_t off,
2380 struct as *as,
2381 caddr_t *addrp,
2382 size_t len,
2383 uchar_t prot,
2384 uchar_t maxprot,
2385 uint_t flags,
2386 struct cred *cred,
2387 caller_context_t *ct)
2388 {
2389 int error = 0;
2390 struct snode *sp = VTOS(vp);
2391
2392 if (vp->v_flag & VNOMAP)
2393 return (ENOSYS);
2394
2395 /* fail map with ENXIO if the device is fenced off */
2396 if (S_ISFENCED(sp))
2397 return (ENXIO);
2398
2399 /*
2400 * If file is locked, fail mapping attempt.
2401 */
2402 if (vn_has_flocks(vp))
2403 return (EAGAIN);
2404
2405 if (vp->v_type == VCHR) {
2406 return (spec_char_map(vp->v_rdev, off, as, addrp, len, prot,
2407 maxprot, flags, cred));
2408 } else if (vp->v_type == VBLK) {
2409 struct segvn_crargs vn_a;
2410 struct vnode *cvp;
2411 struct snode *sp;
2412
2413 /*
2414 * Block device, use segvn mapping to the underlying commonvp
2415 * for pages.
2416 */
2417 if (off > spec_maxoffset(vp))
2418 return (ENXIO);
2419
2420 sp = VTOS(vp);
2421 cvp = sp->s_commonvp;
2422 ASSERT(cvp != NULL);
2423
2424 if (off < 0 || ((offset_t)(off + len) < 0))
2425 return (ENXIO);
2426
2427 as_rangelock(as);
2428 error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
2429 if (error != 0) {
2430 as_rangeunlock(as);
2431 return (error);
2432 }
2433
2434 vn_a.vp = cvp;
2435 vn_a.offset = off;
2436 vn_a.type = flags & MAP_TYPE;
2437 vn_a.prot = (uchar_t)prot;
2438 vn_a.maxprot = (uchar_t)maxprot;
2439 vn_a.flags = flags & ~MAP_TYPE;
2440 vn_a.cred = cred;
2441 vn_a.amp = NULL;
2442 vn_a.szc = 0;
2443 vn_a.lgrp_mem_policy_flags = 0;
2444
2445 error = as_map(as, *addrp, len, segvn_create, &vn_a);
2446 as_rangeunlock(as);
2447 } else
2448 return (ENODEV);
2449
2450 return (error);
2451 }
2452
2453 /*ARGSUSED1*/
2454 static int
spec_addmap(struct vnode * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,struct cred * cred,caller_context_t * ct)2455 spec_addmap(
2456 struct vnode *vp, /* the common vnode */
2457 offset_t off,
2458 struct as *as,
2459 caddr_t addr,
2460 size_t len, /* how many bytes to add */
2461 uchar_t prot,
2462 uchar_t maxprot,
2463 uint_t flags,
2464 struct cred *cred,
2465 caller_context_t *ct)
2466 {
2467 int error = 0;
2468 struct snode *csp = VTOS(vp);
2469 ulong_t npages;
2470
2471 ASSERT(vp != NULL && VTOS(vp)->s_commonvp == vp);
2472
2473 /*
2474 * XXX Given the above assertion, this might not
2475 * be a particularly sensible thing to test.
2476 */
2477 if (vp->v_flag & VNOMAP)
2478 return (ENOSYS);
2479
2480 /* fail with EIO if the device is fenced off */
2481 if (S_ISFENCED(csp))
2482 return (EIO);
2483
2484 npages = btopr(len);
2485 LOCK_CSP(csp);
2486 csp->s_mapcnt += npages;
2487
2488 UNLOCK_CSP(csp);
2489 return (error);
2490 }
2491
2492 /*ARGSUSED1*/
2493 static int
spec_delmap(struct vnode * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uint_t prot,uint_t maxprot,uint_t flags,struct cred * cred,caller_context_t * ct)2494 spec_delmap(
2495 struct vnode *vp, /* the common vnode */
2496 offset_t off,
2497 struct as *as,
2498 caddr_t addr,
2499 size_t len, /* how many bytes to take away */
2500 uint_t prot,
2501 uint_t maxprot,
2502 uint_t flags,
2503 struct cred *cred,
2504 caller_context_t *ct)
2505 {
2506 struct snode *csp = VTOS(vp);
2507 ulong_t npages;
2508 long mcnt;
2509
2510 /* segdev passes us the common vp */
2511
2512 ASSERT(vp != NULL && VTOS(vp)->s_commonvp == vp);
2513
2514 /* allow delmap to succeed even if device fenced off */
2515
2516 /*
2517 * XXX Given the above assertion, this might not
2518 * be a particularly sensible thing to test..
2519 */
2520 if (vp->v_flag & VNOMAP)
2521 return (ENOSYS);
2522
2523 npages = btopr(len);
2524
2525 LOCK_CSP(csp);
2526 mutex_enter(&csp->s_lock);
2527 mcnt = (csp->s_mapcnt -= npages);
2528
2529 if (mcnt == 0) {
2530 /*
2531 * Call the close routine when the last reference of any
2532 * kind through any [s, v]node goes away. The s_dip hold
2533 * on the devinfo node is released when the vnode is
2534 * destroyed.
2535 */
2536 if (csp->s_count == 0) {
2537 csp->s_flag &= ~(SNEEDCLOSE | SSIZEVALID);
2538
2539 /* See comment in spec_close() */
2540 if (csp->s_flag & (SCLONE | SSELFCLONE))
2541 csp->s_flag &= ~SDIPSET;
2542
2543 mutex_exit(&csp->s_lock);
2544
2545 (void) device_close(vp, 0, cred);
2546 } else
2547 mutex_exit(&csp->s_lock);
2548
2549 mutex_enter(&csp->s_lock);
2550 }
2551 ASSERT(mcnt >= 0);
2552
2553 UNLOCK_CSP_LOCK_HELD(csp);
2554 mutex_exit(&csp->s_lock);
2555
2556 return (0);
2557 }
2558
2559 /*ARGSUSED4*/
2560 static int
spec_dump(struct vnode * vp,caddr_t addr,offset_t bn,offset_t count,caller_context_t * ct)2561 spec_dump(
2562 struct vnode *vp,
2563 caddr_t addr,
2564 offset_t bn,
2565 offset_t count,
2566 caller_context_t *ct)
2567 {
2568 /* allow dump to succeed even if device fenced off */
2569
2570 ASSERT(vp->v_type == VBLK);
2571 return (bdev_dump(vp->v_rdev, addr, (daddr_t)bn, (int)count));
2572 }
2573
2574
2575 /*
2576 * Do i/o on the given page list from/to vp, io_off for io_len.
2577 * Flags are composed of:
2578 * {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED, B_READ, B_WRITE}
2579 * If B_ASYNC is not set i/o is waited for.
2580 */
2581 /*ARGSUSED5*/
2582 static int
spec_pageio(struct vnode * vp,page_t * pp,u_offset_t io_off,size_t io_len,int flags,cred_t * cr,caller_context_t * ct)2583 spec_pageio(
2584 struct vnode *vp,
2585 page_t *pp,
2586 u_offset_t io_off,
2587 size_t io_len,
2588 int flags,
2589 cred_t *cr,
2590 caller_context_t *ct)
2591 {
2592 struct buf *bp = NULL;
2593 int err = 0;
2594
2595 if (pp == NULL)
2596 return (EINVAL);
2597
2598 bp = spec_startio(vp, pp, io_off, io_len, flags);
2599
2600 /*
2601 * Wait for i/o to complete if the request is not B_ASYNC.
2602 */
2603 if ((flags & B_ASYNC) == 0) {
2604 err = biowait(bp);
2605 pageio_done(bp);
2606 }
2607 return (err);
2608 }
2609
2610 /*
2611 * Set ACL on underlying vnode if one exists, or return ENOSYS otherwise.
2612 */
2613 int
spec_setsecattr(struct vnode * vp,vsecattr_t * vsap,int flag,struct cred * cr,caller_context_t * ct)2614 spec_setsecattr(
2615 struct vnode *vp,
2616 vsecattr_t *vsap,
2617 int flag,
2618 struct cred *cr,
2619 caller_context_t *ct)
2620 {
2621 struct vnode *realvp;
2622 struct snode *sp = VTOS(vp);
2623 int error;
2624
2625 /* fail with ENXIO if the device is fenced off */
2626 if (S_ISFENCED(sp))
2627 return (ENXIO);
2628
2629 /*
2630 * The acl(2) system calls VOP_RWLOCK on the file before setting an
2631 * ACL, but since specfs does not serialize reads and writes, this
2632 * VOP does not do anything. However, some backing file systems may
2633 * expect the lock to be held before setting an ACL, so it is taken
2634 * here privately to avoid serializing specfs reads and writes.
2635 */
2636 if ((realvp = sp->s_realvp) != NULL) {
2637 (void) VOP_RWLOCK(realvp, V_WRITELOCK_TRUE, ct);
2638 error = VOP_SETSECATTR(realvp, vsap, flag, cr, ct);
2639 (void) VOP_RWUNLOCK(realvp, V_WRITELOCK_TRUE, ct);
2640 return (error);
2641 } else
2642 return (fs_nosys());
2643 }
2644
2645 /*
2646 * Get ACL from underlying vnode if one exists, or fabricate it from
2647 * the permissions returned by spec_getattr() otherwise.
2648 */
2649 int
spec_getsecattr(struct vnode * vp,vsecattr_t * vsap,int flag,struct cred * cr,caller_context_t * ct)2650 spec_getsecattr(
2651 struct vnode *vp,
2652 vsecattr_t *vsap,
2653 int flag,
2654 struct cred *cr,
2655 caller_context_t *ct)
2656 {
2657 struct vnode *realvp;
2658 struct snode *sp = VTOS(vp);
2659
2660 /* fail with ENXIO if the device is fenced off */
2661 if (S_ISFENCED(sp))
2662 return (ENXIO);
2663
2664 if ((realvp = sp->s_realvp) != NULL)
2665 return (VOP_GETSECATTR(realvp, vsap, flag, cr, ct));
2666 else
2667 return (fs_fab_acl(vp, vsap, flag, cr, ct));
2668 }
2669
2670 int
spec_pathconf(vnode_t * vp,int cmd,ulong_t * valp,cred_t * cr,caller_context_t * ct)2671 spec_pathconf(
2672 vnode_t *vp,
2673 int cmd,
2674 ulong_t *valp,
2675 cred_t *cr,
2676 caller_context_t *ct)
2677 {
2678 vnode_t *realvp;
2679 struct snode *sp = VTOS(vp);
2680
2681 /* fail with ENXIO if the device is fenced off */
2682 if (S_ISFENCED(sp))
2683 return (ENXIO);
2684
2685 if ((realvp = sp->s_realvp) != NULL)
2686 return (VOP_PATHCONF(realvp, cmd, valp, cr, ct));
2687 else
2688 return (fs_pathconf(vp, cmd, valp, cr, ct));
2689 }
2690