xref: /illumos-gate/usr/src/uts/common/io/zcons.c (revision bbf215553c7233fbab8a0afdf1fac74c44781867)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 /*
28  * Zone Console Driver.
29  *
30  * This driver, derived from the pts/ptm drivers, is the pseudo console driver
31  * for system zones.  Its implementation is straightforward.  Each instance
32  * of the driver represents a global-zone/local-zone pair (this maps in a
33  * straightforward way to the commonly used terminal notion of "manager side"
34  * and "subsidiary side", and we use that terminology throughout).
35  *
36  * Instances of zcons are onlined as children of /pseudo/zconsnex@1/
37  * by zoneadmd in userland, using the devctl framework; thus the driver
38  * does not need to maintain any sort of "admin" node.
39  *
40  * The driver shuttles I/O from manager side to subsidiary side and back.  In a
41  * break from the pts/ptm semantics, if one side is not open, I/O directed
42  * towards it will simply be discarded.  This is so that if zoneadmd is not
43  * holding the manager side console open (i.e. it has died somehow), processes
44  * in the zone do not experience any errors and I/O to the console does not
45  * hang.
46  *
47  * TODO: we may want to revisit the other direction; i.e. we may want
48  * zoneadmd to be able to detect whether no zone processes are holding the
49  * console open, an unusual situation.
50  *
51  *
52  *
53  * MANAGER SIDE IOCTLS
54  *
55  * The ZC_HOLDSUBSID and ZC_RELEASESUBSID ioctls instruct the manager side of
56  * the console to hold and release a reference to the subsidiary side's vnode.
57  * They are meant to be issued by zoneadmd after the console device node is
58  * created and before it is destroyed so that the subsidiary's STREAMS anchor,
59  * ptem, is preserved when ttymon starts popping STREAMS modules from within
60  * the associated zone.  This guarantees that the zone console will always have
61  * terminal semantics while the zone is running.
62  *
63  * Here is the issue: the ptem module is anchored in the zone console
64  * (subsidiary side) so that processes within the associated non-global zone
65  * will fail to pop it off, thus ensuring that the subsidiary will retain
66  * terminal semantics.  When a process attempts to pop the anchor off of a
67  * stream, the STREAMS subsystem checks whether the calling process' zone is
68  * the same as that of the process that pushed the anchor onto the stream and
69  * cancels the pop if they differ.  zoneadmd used to hold an open file
70  * descriptor for the subsidiary while the associated non-global zone ran, thus
71  * ensuring that the subsidiary's STREAMS anchor would never be popped from
72  * within the non-global zone (because zoneadmd runs in the global zone).
73  * However, this file descriptor was removed to make zone console management
74  * more robust.  sad(4D) is now used to automatically set up the subsidiary's
75  * STREAMS modules when the zone console is freshly opened within the
76  * associated non-global zone.  However, when a process within the non-global
77  * zone freshly opens the zone console, the anchor is pushed from within the
78  * non-global zone, making it possible for processes within the non-global zone
79  * (e.g., ttymon) to pop the anchor and destroy the zone console's terminal
80  * semantics.
81  *
82  * One solution is to make the zcons device hold the subsidiary open while the
83  * associated non-global zone runs so that the STREAMS anchor will always be
84  * associated with the global zone.  Unfortunately, the subsidiary cannot be
85  * opened from within the zcons driver because the driver is not reentrant: it
86  * has an outer STREAMS perimeter.  Therefore, the next best option is for
87  * zcons to provide an ioctl interface to zoneadmd to manage holding and
88  * releasing the subsidiary side of the console.  It is sufficient to hold the
89  * subsidiary side's vnode and bump the associated snode's reference count to
90  * preserve the subsidiary's STREAMS configuration while the associated zone
91  * runs, so that's what the ioctls do.
92  *
93  *
94  * ZC_HOLDSUBSID
95  *
96  * This ioctl takes a file descriptor as an argument.  It effectively gets a
97  * reference to the subsidiary side's minor node's vnode and bumps the
98  * associated snode's reference count.  The vnode reference is stored in the
99  * zcons device node's soft state.  This ioctl succeeds if the given file
100  * descriptor refers to the subsidiary side's minor node or if there is already
101  * a reference to the subsidiary side's minor node's vnode in the device's soft
102  * state.
103  *
104  *
105  * ZC_RELEASESUBSID
106  *
107  * This ioctl takes a file descriptor as an argument.  It effectively releases
108  * the vnode reference stored in the zcons device node's soft state (which was
109  * previously acquired via ZC_HOLDSUBSID) and decrements the reference count of
110  * the snode associated with the vnode.  This ioctl succeeds if the given file
111  * descriptor refers to the subsidiary side's minor node or if no reference to
112  * the subsidiary side's minor node's vnode is stored in the device's soft
113  * state.
114  *
115  *
116  * Note that the file descriptor arguments for both ioctls must be cast to
117  * integers of pointer width.
118  *
119  * Here's how the dance between zcons and zoneadmd works:
120  *
121  *     Zone boot:
122  *     1.  While booting the zone, zoneadmd creates an instance of zcons.
123  *     2.  zoneadmd opens the manager and subsidiary sides of the new zone
124  *         console and issues the ZC_HOLDSUBSID ioctl on the manager side,
125  *         passing its file descriptor for the subsidiary side as the ioctl
126  *         argument.
127  *     3.  zcons holds the subsidiary side's vnode, bumps the snode's reference
128  *         count, and stores a pointer to the vnode in the device's soft
129  *         state.
130  *     4.  zoneadmd closes the manager and subsidiary sides and continues to
131  *         boot the zone.
132  *
133  *     Zone halt:
134  *     1.  While halting the zone, zoneadmd opens the manager and subsidiary
135  *         sides of the zone's console and issues the ZC_RELEASESUBSID ioctl on
136  *         the manager side, passing its file descriptor for the subsidiary
137  *         side as the ioctl argument.
138  *     2.  zcons decrements the subsidiary side's snode's reference count,
139  *         releases the subsidiary's vnode, and eliminates its reference to the
140  *         vnode in the device's soft state.
141  *     3.  zoneadmd closes the manager and subsidiary sides.
142  *     4.  zoneadmd destroys the zcons device and continues to halt the zone.
143  *
144  * It is necessary for zoneadmd to hold the subsidiary open while issuing
145  * ZC_RELEASESUBSID because zcons might otherwise release the last reference to
146  * the subsidiary's vnode.  If it does, then specfs will panic because it will
147  * expect that the STREAMS configuration for the vnode was destroyed, which
148  * VN_RELE doesn't do.  Forcing zoneadmd to hold the subsidiary open guarantees
149  * that zcons won't release the vnode's last reference.  zoneadmd will properly
150  * destroy the vnode and the snode when it closes the file descriptor.
151  *
152  * Technically, any process that can access the manager side can issue these
153  * ioctls, but they should be treated as private interfaces for zoneadmd.
154  */
155 
156 #include <sys/types.h>
157 #include <sys/cmn_err.h>
158 #include <sys/conf.h>
159 #include <sys/cred.h>
160 #include <sys/ddi.h>
161 #include <sys/debug.h>
162 #include <sys/devops.h>
163 #include <sys/errno.h>
164 #include <sys/file.h>
165 #include <sys/kstr.h>
166 #include <sys/modctl.h>
167 #include <sys/param.h>
168 #include <sys/stat.h>
169 #include <sys/stream.h>
170 #include <sys/stropts.h>
171 #include <sys/strsun.h>
172 #include <sys/sunddi.h>
173 #include <sys/sysmacros.h>
174 #include <sys/systm.h>
175 #include <sys/types.h>
176 #include <sys/zcons.h>
177 #include <sys/vnode.h>
178 #include <sys/fs/snode.h>
179 #include <sys/zone.h>
180 
181 static int zc_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
182 static int zc_attach(dev_info_t *, ddi_attach_cmd_t);
183 static int zc_detach(dev_info_t *, ddi_detach_cmd_t);
184 
185 static int zc_open(queue_t *, dev_t *, int, int, cred_t *);
186 static int zc_close(queue_t *, int, cred_t *);
187 static int zc_wput(queue_t *, mblk_t *);
188 static int zc_rsrv(queue_t *);
189 static int zc_wsrv(queue_t *);
190 
191 /*
192  * The instance number is encoded in the dev_t in the minor number; the lowest
193  * bit of the minor number is used to track the manager vs. subsidiary side of
194  * the virtual console.  The rest of the bits in the minor number are the
195  * instance.
196  */
197 #define	ZC_MANAGER_MINOR	0
198 #define	ZC_SUBSID_MINOR		1
199 
200 #define	ZC_INSTANCE(x)		(getminor((x)) >> 1)
201 #define	ZC_NODE(x)		(getminor((x)) & 0x01)
202 
203 /*
204  * This macro converts a zc_state_t pointer to the associated subsidiary minor
205  * node's dev_t.
206  */
207 #define	ZC_STATE_TO_SUBDEV(x)	(makedevice(ddi_driver_major((x)->zc_devinfo), \
208 	(minor_t)(ddi_get_instance((x)->zc_devinfo) << 1 | ZC_SUBSID_MINOR)))
209 
210 int zcons_debug = 0;
211 #define	DBG(a)   if (zcons_debug) cmn_err(CE_NOTE, a)
212 #define	DBG1(a, b)   if (zcons_debug) cmn_err(CE_NOTE, a, b)
213 
214 
215 /*
216  * Zone Console Pseudo Terminal Module: stream data structure definitions
217  */
218 static struct module_info zc_info = {
219 	31337,	/* c0z we r hAx0rs */
220 	"zcons",
221 	0,
222 	INFPSZ,
223 	_TTY_BUFSIZ,
224 	128
225 };
226 
227 static struct qinit zc_rinit = {
228 	NULL,
229 	zc_rsrv,
230 	zc_open,
231 	zc_close,
232 	NULL,
233 	&zc_info,
234 	NULL
235 };
236 
237 static struct qinit zc_winit = {
238 	zc_wput,
239 	zc_wsrv,
240 	NULL,
241 	NULL,
242 	NULL,
243 	&zc_info,
244 	NULL
245 };
246 
247 static struct streamtab zc_tab_info = {
248 	&zc_rinit,
249 	&zc_winit,
250 	NULL,
251 	NULL
252 };
253 
254 #define	ZC_CONF_FLAG	(D_MP | D_MTQPAIR | D_MTOUTPERIM | D_MTOCEXCL)
255 
256 /*
257  * this will define (struct cb_ops cb_zc_ops) and (struct dev_ops zc_ops)
258  */
259 DDI_DEFINE_STREAM_OPS(zc_ops, nulldev, nulldev,	zc_attach, zc_detach, nodev, \
260     zc_getinfo, ZC_CONF_FLAG, &zc_tab_info, ddi_quiesce_not_needed);
261 
262 /*
263  * Module linkage information for the kernel.
264  */
265 
266 static struct modldrv modldrv = {
267 	&mod_driverops,		/* Type of module (this is a pseudo driver) */
268 	"Zone console driver",	/* description of module */
269 	&zc_ops			/* driver ops */
270 };
271 
272 static struct modlinkage modlinkage = {
273 	MODREV_1,
274 	&modldrv,
275 	NULL
276 };
277 
278 typedef struct zc_state {
279 	dev_info_t *zc_devinfo;
280 	queue_t *zc_manager_rdq;
281 	queue_t *zc_subsid_rdq;
282 	vnode_t *zc_subsid_vnode;
283 	int zc_state;
284 } zc_state_t;
285 
286 #define	ZC_STATE_MOPEN	0x01
287 #define	ZC_STATE_SOPEN	0x02
288 
289 static void *zc_soft_state;
290 
291 /*
292  * List of STREAMS modules that should be pushed onto every subsidiary instance.
293  */
294 static char *zcons_mods[] = {
295 	"ptem",
296 	"ldterm",
297 	"ttcompat",
298 	NULL
299 };
300 
301 int
_init(void)302 _init(void)
303 {
304 	int err;
305 
306 	if ((err = ddi_soft_state_init(&zc_soft_state,
307 	    sizeof (zc_state_t), 0)) != 0) {
308 		return (err);
309 	}
310 
311 	if ((err = mod_install(&modlinkage)) != 0)
312 		ddi_soft_state_fini(zc_soft_state);
313 
314 	return (err);
315 }
316 
317 
318 int
_fini(void)319 _fini(void)
320 {
321 	int err;
322 
323 	if ((err = mod_remove(&modlinkage)) != 0) {
324 		return (err);
325 	}
326 
327 	ddi_soft_state_fini(&zc_soft_state);
328 	return (0);
329 }
330 
331 int
_info(struct modinfo * modinfop)332 _info(struct modinfo *modinfop)
333 {
334 	return (mod_info(&modlinkage, modinfop));
335 }
336 
337 static int
zc_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)338 zc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
339 {
340 	zc_state_t *zcs;
341 	int instance;
342 
343 	if (cmd != DDI_ATTACH)
344 		return (DDI_FAILURE);
345 
346 	instance = ddi_get_instance(dip);
347 	if (ddi_soft_state_zalloc(zc_soft_state, instance) != DDI_SUCCESS)
348 		return (DDI_FAILURE);
349 
350 	/*
351 	 * Create the manager and subsidiary minor nodes.
352 	 */
353 	if ((ddi_create_minor_node(dip, ZCONS_SUBSIDIARY_NAME, S_IFCHR,
354 	    instance << 1 | ZC_SUBSID_MINOR, DDI_PSEUDO, 0) == DDI_FAILURE) ||
355 	    (ddi_create_minor_node(dip, ZCONS_MANAGER_NAME, S_IFCHR,
356 	    instance << 1 | ZC_MANAGER_MINOR, DDI_PSEUDO, 0) == DDI_FAILURE)) {
357 		ddi_remove_minor_node(dip, NULL);
358 		ddi_soft_state_free(zc_soft_state, instance);
359 		return (DDI_FAILURE);
360 	}
361 
362 	VERIFY((zcs = ddi_get_soft_state(zc_soft_state, instance)) != NULL);
363 	zcs->zc_devinfo = dip;
364 	return (DDI_SUCCESS);
365 }
366 
367 static int
zc_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)368 zc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
369 {
370 	zc_state_t *zcs;
371 	int instance;
372 
373 	if (cmd != DDI_DETACH)
374 		return (DDI_FAILURE);
375 
376 	instance = ddi_get_instance(dip);
377 	if ((zcs = ddi_get_soft_state(zc_soft_state, instance)) == NULL)
378 		return (DDI_FAILURE);
379 
380 	if ((zcs->zc_state & ZC_STATE_MOPEN) ||
381 	    (zcs->zc_state & ZC_STATE_SOPEN)) {
382 		DBG1("zc_detach: device (dip=%p) still open\n", (void *)dip);
383 		return (DDI_FAILURE);
384 	}
385 
386 	ddi_remove_minor_node(dip, NULL);
387 	ddi_soft_state_free(zc_soft_state, instance);
388 
389 	return (DDI_SUCCESS);
390 }
391 
392 /*
393  * zc_getinfo()
394  *	getinfo(9e) entrypoint.
395  */
396 static int
zc_getinfo(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)397 zc_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
398 {
399 	zc_state_t *zcs;
400 	int instance = ZC_INSTANCE((dev_t)arg);
401 
402 	switch (infocmd) {
403 	case DDI_INFO_DEVT2DEVINFO:
404 		if ((zcs = ddi_get_soft_state(zc_soft_state, instance)) == NULL)
405 			return (DDI_FAILURE);
406 		*result = zcs->zc_devinfo;
407 		return (DDI_SUCCESS);
408 	case DDI_INFO_DEVT2INSTANCE:
409 		*result = (void *)(uintptr_t)instance;
410 		return (DDI_SUCCESS);
411 	}
412 	return (DDI_FAILURE);
413 }
414 
415 /*
416  * Return the equivalent queue from the other side of the relationship.
417  * e.g.: given the subsidiary's write queue, return the manager's write queue.
418  */
419 static queue_t *
zc_switch(queue_t * qp)420 zc_switch(queue_t *qp)
421 {
422 	zc_state_t *zcs = qp->q_ptr;
423 	ASSERT(zcs != NULL);
424 
425 	if (qp == zcs->zc_manager_rdq) {
426 		return (zcs->zc_subsid_rdq);
427 	} else if (OTHERQ(qp) == zcs->zc_manager_rdq &&
428 	    zcs->zc_subsid_rdq != NULL) {
429 		return (OTHERQ(zcs->zc_subsid_rdq));
430 	} else if (qp == zcs->zc_subsid_rdq) {
431 		return (zcs->zc_manager_rdq);
432 	} else if (OTHERQ(qp) == zcs->zc_subsid_rdq &&
433 	    zcs->zc_manager_rdq != NULL) {
434 		return (OTHERQ(zcs->zc_manager_rdq));
435 	} else {
436 		return (NULL);
437 	}
438 }
439 
440 /*
441  * For debugging and outputting messages.  Returns the name of the side of
442  * the relationship associated with this queue.
443  */
444 static const char *
zc_side(queue_t * qp)445 zc_side(queue_t *qp)
446 {
447 	zc_state_t *zcs = qp->q_ptr;
448 	ASSERT(zcs != NULL);
449 
450 	if (qp == zcs->zc_manager_rdq ||
451 	    OTHERQ(qp) == zcs->zc_manager_rdq) {
452 		return ("manager");
453 	}
454 	ASSERT(qp == zcs->zc_subsid_rdq || OTHERQ(qp) == zcs->zc_subsid_rdq);
455 	return ("subsidiary");
456 }
457 
458 static int
zc_manager_open(zc_state_t * zcs,queue_t * rqp,dev_t * devp,int oflag,int sflag,cred_t * credp)459 zc_manager_open(zc_state_t *zcs,
460     queue_t	*rqp,	/* pointer to the read side queue */
461     dev_t	*devp,	/* pointer to stream tail's dev */
462     int		oflag,	/* the user open(2) supplied flags */
463     int		sflag,	/* open state flag */
464     cred_t	*credp)	/* credentials */
465 {
466 	mblk_t *mop;
467 	struct stroptions *sop;
468 
469 	/*
470 	 * Enforce exclusivity on the manager side; the only consumer should
471 	 * be the zoneadmd for the zone.
472 	 */
473 	if ((zcs->zc_state & ZC_STATE_MOPEN) != 0)
474 		return (EBUSY);
475 
476 	if ((mop = allocb(sizeof (struct stroptions), BPRI_MED)) == NULL) {
477 		DBG("zc_manager_open(): mop allocation failed\n");
478 		return (ENOMEM);
479 	}
480 
481 	zcs->zc_state |= ZC_STATE_MOPEN;
482 
483 	/*
484 	 * q_ptr stores driver private data; stash the soft state data on both
485 	 * read and write sides of the queue.
486 	 */
487 	WR(rqp)->q_ptr = rqp->q_ptr = zcs;
488 	qprocson(rqp);
489 
490 	/*
491 	 * Following qprocson(), the manager side is fully plumbed into the
492 	 * STREAM and may send/receive messages.  Setting zcs->zc_manager_rdq
493 	 * will allow the subsidiary to send messages to us (the manager).
494 	 * This cannot occur before qprocson() because the manager is not
495 	 * ready to process them until that point.
496 	 */
497 	zcs->zc_manager_rdq = rqp;
498 
499 	/*
500 	 * set up hi/lo water marks on stream head read queue and add
501 	 * controlling tty as needed.
502 	 */
503 	mop->b_datap->db_type = M_SETOPTS;
504 	mop->b_wptr += sizeof (struct stroptions);
505 	sop = (struct stroptions *)(void *)mop->b_rptr;
506 	if (oflag & FNOCTTY)
507 		sop->so_flags = SO_HIWAT | SO_LOWAT;
508 	else
509 		sop->so_flags = SO_HIWAT | SO_LOWAT | SO_ISTTY;
510 	sop->so_hiwat = _TTY_BUFSIZ;
511 	sop->so_lowat = 256;
512 	putnext(rqp, mop);
513 
514 	return (0);
515 }
516 
517 static int
zc_subsidiary_open(zc_state_t * zcs,queue_t * rqp,dev_t * devp,int oflag,int sflag,cred_t * credp)518 zc_subsidiary_open(zc_state_t *zcs,
519     queue_t	*rqp,	/* pointer to the read side queue */
520     dev_t	*devp,	/* pointer to stream tail's dev */
521     int		oflag,	/* the user open(2) supplied flags */
522     int		sflag,	/* open state flag */
523     cred_t	*credp)	/* credentials */
524 {
525 	mblk_t *mop;
526 	struct stroptions *sop;
527 	major_t major;
528 	minor_t minor;
529 	minor_t lastminor;
530 	uint_t anchorindex;
531 
532 	/*
533 	 * The subsidiary side can be opened as many times as needed.
534 	 */
535 	if ((zcs->zc_state & ZC_STATE_SOPEN) != 0) {
536 		ASSERT((rqp != NULL) && (WR(rqp)->q_ptr == zcs));
537 		return (0);
538 	}
539 
540 	/*
541 	 * Set up sad(4D) so that the necessary STREAMS modules will be in
542 	 * place.  A wrinkle is that 'ptem' must be anchored
543 	 * in place (see streamio(4I)) because we always want the console to
544 	 * have terminal semantics.
545 	 */
546 	minor = ddi_get_instance(zcs->zc_devinfo) << 1 | ZC_SUBSID_MINOR;
547 	major = ddi_driver_major(zcs->zc_devinfo);
548 	lastminor = 0;
549 	anchorindex = 1;
550 	if (kstr_autopush(SET_AUTOPUSH, &major, &minor, &lastminor,
551 	    &anchorindex, zcons_mods) != 0) {
552 		DBG("zc_subsidiary_open(): kstr_autopush() failed\n");
553 		return (EIO);
554 	}
555 
556 	if ((mop = allocb(sizeof (struct stroptions), BPRI_MED)) == NULL) {
557 		DBG("zc_subsidiary_open(): mop allocation failed\n");
558 		return (ENOMEM);
559 	}
560 
561 	zcs->zc_state |= ZC_STATE_SOPEN;
562 
563 	/*
564 	 * q_ptr stores driver private data; stash the soft state data on both
565 	 * read and write sides of the queue.
566 	 */
567 	WR(rqp)->q_ptr = rqp->q_ptr = zcs;
568 
569 	qprocson(rqp);
570 
571 	/*
572 	 * Must follow qprocson(), since we aren't ready to process until then.
573 	 */
574 	zcs->zc_subsid_rdq = rqp;
575 
576 	/*
577 	 * set up hi/lo water marks on stream head read queue and add
578 	 * controlling tty as needed.
579 	 */
580 	mop->b_datap->db_type = M_SETOPTS;
581 	mop->b_wptr += sizeof (struct stroptions);
582 	sop = (struct stroptions *)(void *)mop->b_rptr;
583 	sop->so_flags = SO_HIWAT | SO_LOWAT | SO_ISTTY;
584 	sop->so_hiwat = _TTY_BUFSIZ;
585 	sop->so_lowat = 256;
586 	putnext(rqp, mop);
587 
588 	return (0);
589 }
590 
591 /*
592  * open(9e) entrypoint; checks sflag, and rejects anything unordinary.
593  */
594 static int
zc_open(queue_t * rqp,dev_t * devp,int oflag,int sflag,cred_t * credp)595 zc_open(queue_t *rqp,	/* pointer to the read side queue */
596     dev_t   *devp,	/* pointer to stream tail's dev */
597     int	oflag,		/* the user open(2) supplied flags */
598     int	sflag,		/* open state flag */
599     cred_t  *credp)	/* credentials */
600 {
601 	int instance = ZC_INSTANCE(*devp);
602 	int ret;
603 	zc_state_t *zcs;
604 
605 	if (sflag != 0)
606 		return (EINVAL);
607 
608 	if ((zcs = ddi_get_soft_state(zc_soft_state, instance)) == NULL)
609 		return (ENXIO);
610 
611 	switch (ZC_NODE(*devp)) {
612 	case ZC_MANAGER_MINOR:
613 		ret = zc_manager_open(zcs, rqp, devp, oflag, sflag, credp);
614 		break;
615 	case ZC_SUBSID_MINOR:
616 		ret = zc_subsidiary_open(zcs, rqp, devp, oflag, sflag, credp);
617 		break;
618 	default:
619 		ret = ENXIO;
620 		break;
621 	}
622 
623 	return (ret);
624 }
625 
626 /*
627  * close(9e) entrypoint.
628  */
629 static int
zc_close(queue_t * rqp,int flag,cred_t * credp)630 zc_close(queue_t *rqp, int flag, cred_t *credp)
631 {
632 	queue_t *wqp;
633 	mblk_t	*bp;
634 	zc_state_t *zcs;
635 	major_t major;
636 	minor_t minor;
637 
638 	zcs = (zc_state_t *)rqp->q_ptr;
639 
640 	if (rqp == zcs->zc_manager_rdq) {
641 		DBG("Closing manager side");
642 
643 		zcs->zc_manager_rdq = NULL;
644 		zcs->zc_state &= ~ZC_STATE_MOPEN;
645 
646 		/*
647 		 * qenable subsidiary side write queue so that it can flush
648 		 * its messages as manager's read queue is going away
649 		 */
650 		if (zcs->zc_subsid_rdq != NULL) {
651 			qenable(WR(zcs->zc_subsid_rdq));
652 		}
653 
654 		qprocsoff(rqp);
655 		WR(rqp)->q_ptr = rqp->q_ptr = NULL;
656 
657 	} else if (rqp == zcs->zc_subsid_rdq) {
658 
659 		DBG("Closing subsidiary side");
660 		zcs->zc_state &= ~ZC_STATE_SOPEN;
661 		zcs->zc_subsid_rdq = NULL;
662 
663 		wqp = WR(rqp);
664 		while ((bp = getq(wqp)) != NULL) {
665 			if (zcs->zc_manager_rdq != NULL)
666 				putnext(zcs->zc_manager_rdq, bp);
667 			else if (bp->b_datap->db_type == M_IOCTL)
668 				miocnak(wqp, bp, 0, 0);
669 			else
670 				freemsg(bp);
671 		}
672 
673 		/*
674 		 * Qenable manager side write queue so that it can flush its
675 		 * messages as subsidiarys's read queue is going away.
676 		 */
677 		if (zcs->zc_manager_rdq != NULL)
678 			qenable(WR(zcs->zc_manager_rdq));
679 
680 		qprocsoff(rqp);
681 		WR(rqp)->q_ptr = rqp->q_ptr = NULL;
682 
683 		/*
684 		 * Clear the sad configuration so that reopening doesn't fail
685 		 * to set up sad configuration.
686 		 */
687 		major = ddi_driver_major(zcs->zc_devinfo);
688 		minor = ddi_get_instance(zcs->zc_devinfo) << 1 |
689 		    ZC_SUBSID_MINOR;
690 		(void) kstr_autopush(CLR_AUTOPUSH, &major, &minor, NULL, NULL,
691 		    NULL);
692 	}
693 
694 	return (0);
695 }
696 
697 static void
handle_mflush(queue_t * qp,mblk_t * mp)698 handle_mflush(queue_t *qp, mblk_t *mp)
699 {
700 	mblk_t *nmp;
701 	DBG1("M_FLUSH on %s side", zc_side(qp));
702 
703 	if (*mp->b_rptr & FLUSHW) {
704 		DBG1("M_FLUSH, FLUSHW, %s side", zc_side(qp));
705 		flushq(qp, FLUSHDATA);
706 		*mp->b_rptr &= ~FLUSHW;
707 		if ((*mp->b_rptr & FLUSHR) == 0) {
708 			/*
709 			 * FLUSHW only. Change to FLUSHR and putnext other side,
710 			 * then we are done.
711 			 */
712 			*mp->b_rptr |= FLUSHR;
713 			if (zc_switch(RD(qp)) != NULL) {
714 				putnext(zc_switch(RD(qp)), mp);
715 				return;
716 			}
717 		} else if ((zc_switch(RD(qp)) != NULL) &&
718 		    (nmp = copyb(mp)) != NULL) {
719 			/*
720 			 * It is a FLUSHRW; we copy the mblk and send
721 			 * it to the other side, since we still need to use
722 			 * the mblk in FLUSHR processing, below.
723 			 */
724 			putnext(zc_switch(RD(qp)), nmp);
725 		}
726 	}
727 
728 	if (*mp->b_rptr & FLUSHR) {
729 		DBG("qreply(qp) turning FLUSHR around\n");
730 		qreply(qp, mp);
731 		return;
732 	}
733 	freemsg(mp);
734 }
735 
736 /*
737  * wput(9E) is symmetric for manager and subsidiary sides, so this handles both
738  * without splitting the codepath.  (The only exception to this is the
739  * processing of zcons ioctls, which is restricted to the manager side.)
740  *
741  * zc_wput() looks at the other side; if there is no process holding that
742  * side open, it frees the message.  This prevents processes from hanging
743  * if no one is holding open the console.  Otherwise, it putnext's high
744  * priority messages, putnext's normal messages if possible, and otherwise
745  * enqueues the messages; in the case that something is enqueued, wsrv(9E)
746  * will take care of eventually shuttling I/O to the other side.
747  */
748 static int
zc_wput(queue_t * qp,mblk_t * mp)749 zc_wput(queue_t *qp, mblk_t *mp)
750 {
751 	unsigned char type = mp->b_datap->db_type;
752 	zc_state_t *zcs;
753 	struct iocblk *iocbp;
754 	file_t *subsidiary_filep;
755 	struct snode *subsidiary_snodep;
756 	int subsidiary_fd;
757 
758 	ASSERT(qp->q_ptr);
759 
760 	DBG1("entering zc_wput, %s side", zc_side(qp));
761 
762 	/*
763 	 * Process zcons ioctl messages if qp is the manager console's write
764 	 * queue.
765 	 */
766 	zcs = (zc_state_t *)qp->q_ptr;
767 	if (zcs->zc_manager_rdq != NULL && qp == WR(zcs->zc_manager_rdq) &&
768 	    type == M_IOCTL) {
769 		iocbp = (struct iocblk *)(void *)mp->b_rptr;
770 		switch (iocbp->ioc_cmd) {
771 		case ZC_HOLDSUBSID:
772 			/*
773 			 * Hold the subsidiary's vnode and increment the
774 			 * refcount of the snode.  If the vnode is already
775 			 * held, then indicate success.
776 			 */
777 			if (iocbp->ioc_count != TRANSPARENT) {
778 				miocack(qp, mp, 0, EINVAL);
779 				return (0);
780 			}
781 			if (zcs->zc_subsid_vnode != NULL) {
782 				miocack(qp, mp, 0, 0);
783 				return (0);
784 			}
785 
786 			/*
787 			 * The process that passed the ioctl must be running in
788 			 * the global zone.
789 			 */
790 			if (curzone != global_zone) {
791 				miocack(qp, mp, 0, EINVAL);
792 				return (0);
793 			}
794 
795 			/*
796 			 * The calling process must pass a file descriptor for
797 			 * the subsidiary device.
798 			 */
799 			subsidiary_fd =
800 			    (int)(intptr_t)*(caddr_t *)(void *)mp->b_cont->
801 			    b_rptr;
802 			subsidiary_filep = getf(subsidiary_fd);
803 			if (subsidiary_filep == NULL) {
804 				miocack(qp, mp, 0, EINVAL);
805 				return (0);
806 			}
807 			if (ZC_STATE_TO_SUBDEV(zcs) !=
808 			    subsidiary_filep->f_vnode->v_rdev) {
809 				releasef(subsidiary_fd);
810 				miocack(qp, mp, 0, EINVAL);
811 				return (0);
812 			}
813 
814 			/*
815 			 * Get a reference to the subsidiary's vnode.  Also
816 			 * bump the reference count on the associated snode.
817 			 */
818 			ASSERT(vn_matchops(subsidiary_filep->f_vnode,
819 			    spec_getvnodeops()));
820 			zcs->zc_subsid_vnode = subsidiary_filep->f_vnode;
821 			VN_HOLD(zcs->zc_subsid_vnode);
822 			subsidiary_snodep = VTOCS(zcs->zc_subsid_vnode);
823 			mutex_enter(&subsidiary_snodep->s_lock);
824 			++subsidiary_snodep->s_count;
825 			mutex_exit(&subsidiary_snodep->s_lock);
826 			releasef(subsidiary_fd);
827 			miocack(qp, mp, 0, 0);
828 			return (0);
829 		case ZC_RELEASESUBSID:
830 			/*
831 			 * Release the manager's handle on the subsidiary's
832 			 * vnode.  If there isn't a handle for the vnode, then
833 			 * indicate success.
834 			 */
835 			if (iocbp->ioc_count != TRANSPARENT) {
836 				miocack(qp, mp, 0, EINVAL);
837 				return (0);
838 			}
839 			if (zcs->zc_subsid_vnode == NULL) {
840 				miocack(qp, mp, 0, 0);
841 				return (0);
842 			}
843 
844 			/*
845 			 * The process that passed the ioctl must be running in
846 			 * the global zone.
847 			 */
848 			if (curzone != global_zone) {
849 				miocack(qp, mp, 0, EINVAL);
850 				return (0);
851 			}
852 
853 			/*
854 			 * The process that passed the ioctl must have provided
855 			 * a file descriptor for the subsidiary device.  Make
856 			 * sure this is correct.
857 			 */
858 			subsidiary_fd =
859 			    (int)(intptr_t)*(caddr_t *)(void *)mp->b_cont->
860 			    b_rptr;
861 			subsidiary_filep = getf(subsidiary_fd);
862 			if (subsidiary_filep == NULL) {
863 				miocack(qp, mp, 0, EINVAL);
864 				return (0);
865 			}
866 			if (zcs->zc_subsid_vnode->v_rdev !=
867 			    subsidiary_filep->f_vnode->v_rdev) {
868 				releasef(subsidiary_fd);
869 				miocack(qp, mp, 0, EINVAL);
870 				return (0);
871 			}
872 
873 			/*
874 			 * Decrement the snode's reference count and release the
875 			 * vnode.
876 			 */
877 			ASSERT(vn_matchops(subsidiary_filep->f_vnode,
878 			    spec_getvnodeops()));
879 			subsidiary_snodep = VTOCS(zcs->zc_subsid_vnode);
880 			mutex_enter(&subsidiary_snodep->s_lock);
881 			--subsidiary_snodep->s_count;
882 			mutex_exit(&subsidiary_snodep->s_lock);
883 			VN_RELE(zcs->zc_subsid_vnode);
884 			zcs->zc_subsid_vnode = NULL;
885 			releasef(subsidiary_fd);
886 			miocack(qp, mp, 0, 0);
887 			return (0);
888 		default:
889 			break;
890 		}
891 	}
892 
893 	if (zc_switch(RD(qp)) == NULL) {
894 		DBG1("wput to %s side (no one listening)", zc_side(qp));
895 		switch (type) {
896 		case M_FLUSH:
897 			handle_mflush(qp, mp);
898 			break;
899 		case M_IOCTL:
900 			miocnak(qp, mp, 0, 0);
901 			break;
902 		default:
903 			freemsg(mp);
904 			break;
905 		}
906 		return (0);
907 	}
908 
909 	if (type >= QPCTL) {
910 		DBG1("(hipri) wput, %s side", zc_side(qp));
911 		switch (type) {
912 		case M_READ:		/* supposedly from ldterm? */
913 			DBG("zc_wput: tossing M_READ\n");
914 			freemsg(mp);
915 			break;
916 		case M_FLUSH:
917 			handle_mflush(qp, mp);
918 			break;
919 		default:
920 			/*
921 			 * Put this to the other side.
922 			 */
923 			ASSERT(zc_switch(RD(qp)) != NULL);
924 			putnext(zc_switch(RD(qp)), mp);
925 			break;
926 		}
927 		DBG1("done (hipri) wput, %s side", zc_side(qp));
928 		return (0);
929 	}
930 
931 	/*
932 	 * Only putnext if there isn't already something in the queue.
933 	 * otherwise things would wind up out of order.
934 	 */
935 	if (qp->q_first == NULL && bcanputnext(RD(zc_switch(qp)), mp->b_band)) {
936 		DBG("wput: putting message to other side\n");
937 		putnext(RD(zc_switch(qp)), mp);
938 	} else {
939 		DBG("wput: putting msg onto queue\n");
940 		(void) putq(qp, mp);
941 	}
942 	DBG1("done wput, %s side", zc_side(qp));
943 	return (0);
944 }
945 
946 /*
947  * rsrv(9E) is symmetric for manager and subsidiary, so zc_rsrv() handles both
948  * without splitting up the codepath.
949  *
950  * Enable the write side of the partner.  This triggers the partner to send
951  * messages queued on its write side to this queue's read side.
952  */
953 static int
zc_rsrv(queue_t * qp)954 zc_rsrv(queue_t *qp)
955 {
956 	zc_state_t *zcs;
957 	zcs = (zc_state_t *)qp->q_ptr;
958 
959 	/*
960 	 * Care must be taken here, as either of the manager or subsidiary side
961 	 * qptr could be NULL.
962 	 */
963 	ASSERT(qp == zcs->zc_manager_rdq || qp == zcs->zc_subsid_rdq);
964 	if (zc_switch(qp) == NULL) {
965 		DBG("zc_rsrv: other side isn't listening\n");
966 		return (0);
967 	}
968 	qenable(WR(zc_switch(qp)));
969 	return (0);
970 }
971 
972 /*
973  * This routine is symmetric for manager and subsidiary, so it handles both
974  * without splitting up the codepath.
975  *
976  * If there are messages on this queue that can be sent to the other, send
977  * them via putnext(). Else, if queued messages cannot be sent, leave them
978  * on this queue.
979  */
980 static int
zc_wsrv(queue_t * qp)981 zc_wsrv(queue_t *qp)
982 {
983 	mblk_t *mp;
984 
985 	DBG1("zc_wsrv manager (%s) side", zc_side(qp));
986 
987 	/*
988 	 * Partner has no read queue, so take the data, and throw it away.
989 	 */
990 	if (zc_switch(RD(qp)) == NULL) {
991 		DBG("zc_wsrv: other side isn't listening");
992 		while ((mp = getq(qp)) != NULL) {
993 			if (mp->b_datap->db_type == M_IOCTL)
994 				miocnak(qp, mp, 0, 0);
995 			else
996 				freemsg(mp);
997 		}
998 		flushq(qp, FLUSHALL);
999 		return (0);
1000 	}
1001 
1002 	/*
1003 	 * while there are messages on this write queue...
1004 	 */
1005 	while ((mp = getq(qp)) != NULL) {
1006 		/*
1007 		 * Due to the way zc_wput is implemented, we should never
1008 		 * see a control message here.
1009 		 */
1010 		ASSERT(mp->b_datap->db_type < QPCTL);
1011 
1012 		if (bcanputnext(RD(zc_switch(qp)), mp->b_band)) {
1013 			DBG("wsrv: send message to other side\n");
1014 			putnext(RD(zc_switch(qp)), mp);
1015 		} else {
1016 			DBG("wsrv: putting msg back on queue\n");
1017 			(void) putbq(qp, mp);
1018 			break;
1019 		}
1020 	}
1021 	return (0);
1022 }
1023