xref: /illumos-gate/usr/src/uts/common/io/vio9p/vio9p.c (revision ddb365bfc9e868ad24ccdcb0dc91af18b10df082)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2022 Oxide Computer Company
24  */
25 
26 /*
27  * VIRTIO 9P DRIVER
28  *
29  * This driver provides support for Virtio 9P devices.  Each driver instance
30  * attaches to a single underlying 9P channel.  A 9P file system will use LDI
31  * to open this device.
32  */
33 
34 #include <sys/modctl.h>
35 #include <sys/types.h>
36 #include <sys/file.h>
37 #include <sys/errno.h>
38 #include <sys/param.h>
39 #include <sys/stropts.h>
40 #include <sys/stream.h>
41 #include <sys/strsubr.h>
42 #include <sys/kmem.h>
43 #include <sys/ddi.h>
44 #include <sys/sunddi.h>
45 #include <sys/conf.h>
46 #include <sys/devops.h>
47 #include <sys/ksynch.h>
48 #include <sys/stat.h>
49 #include <sys/modctl.h>
50 #include <sys/debug.h>
51 #include <sys/pci.h>
52 #include <sys/containerof.h>
53 #include <sys/ctype.h>
54 #include <sys/stdbool.h>
55 #include <sys/sysmacros.h>
56 #include <sys/list.h>
57 
58 #include "virtio.h"
59 #include "vio9p_impl.h"
60 
61 static void *vio9p_state;
62 
63 uint_t vio9p_int_handler(caddr_t, caddr_t);
64 static uint_t vio9p_poll(vio9p_t *);
65 static int vio9p_quiesce(dev_info_t *);
66 static int vio9p_attach(dev_info_t *, ddi_attach_cmd_t);
67 static int vio9p_teardown(vio9p_t *, vio9p_teardown_style_t);
68 static int vio9p_detach(dev_info_t *, ddi_detach_cmd_t);
69 static int vio9p_open(dev_t *, int, int, cred_t *);
70 static int vio9p_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
71 static int vio9p_close(dev_t, int, int, cred_t *);
72 static int vio9p_read(dev_t, uio_t *, cred_t *);
73 static int vio9p_write(dev_t, uio_t *, cred_t *);
74 static vio9p_req_t *vio9p_req_alloc_impl(vio9p_t *, int);
75 static void vio9p_req_free_impl(vio9p_t *, vio9p_req_t *);
76 
77 static struct cb_ops vio9p_cb_ops = {
78 	.cb_rev =			CB_REV,
79 	.cb_flag =			D_NEW | D_MP,
80 
81 	.cb_open =			vio9p_open,
82 	.cb_close =			vio9p_close,
83 	.cb_read =			vio9p_read,
84 	.cb_write =			vio9p_write,
85 	.cb_ioctl =			vio9p_ioctl,
86 
87 	.cb_strategy =			nodev,
88 	.cb_print =			nodev,
89 	.cb_dump =			nodev,
90 	.cb_devmap =			nodev,
91 	.cb_mmap =			nodev,
92 	.cb_segmap =			nodev,
93 	.cb_chpoll =			nochpoll,
94 	.cb_prop_op =			ddi_prop_op,
95 	.cb_str =			NULL,
96 	.cb_aread =			nodev,
97 	.cb_awrite =			nodev,
98 };
99 
100 static struct dev_ops vio9p_dev_ops = {
101 	.devo_rev =			DEVO_REV,
102 	.devo_refcnt =			0,
103 
104 	.devo_attach =			vio9p_attach,
105 	.devo_detach =			vio9p_detach,
106 	.devo_quiesce =			vio9p_quiesce,
107 
108 	.devo_cb_ops =			&vio9p_cb_ops,
109 
110 	.devo_getinfo =			ddi_no_info,
111 	.devo_identify =		nulldev,
112 	.devo_probe =			nulldev,
113 	.devo_reset =			nodev,
114 	.devo_bus_ops =			NULL,
115 	.devo_power =			NULL,
116 };
117 
118 static struct modldrv vio9p_modldrv = {
119 	.drv_modops =			&mod_driverops,
120 	.drv_linkinfo =			"VIRTIO 9P driver",
121 	.drv_dev_ops =			&vio9p_dev_ops
122 };
123 
124 static struct modlinkage vio9p_modlinkage = {
125 	.ml_rev =			MODREV_1,
126 	.ml_linkage =			{ &vio9p_modldrv, NULL }
127 };
128 
129 /*
130  * DMA attribute template for header and status blocks.
131  */
132 static const ddi_dma_attr_t vio9p_dma_attr = {
133 	.dma_attr_version =		DMA_ATTR_V0,
134 	.dma_attr_addr_lo =		0x0000000000000000,
135 	.dma_attr_addr_hi =		0xFFFFFFFFFFFFFFFF,
136 	.dma_attr_count_max =		0x00000000FFFFFFFF,
137 	.dma_attr_align =		1,
138 	.dma_attr_burstsizes =		1,
139 	.dma_attr_minxfer =		1,
140 	.dma_attr_maxxfer =		0x00000000FFFFFFFF,
141 	.dma_attr_seg =			0x00000000FFFFFFFF,
142 	.dma_attr_sgllen =		VIRTIO_9P_MAX_SGL,
143 	.dma_attr_granular =		1,
144 	.dma_attr_flags =		0
145 };
146 
147 uint_t
148 vio9p_int_handler(caddr_t arg0, caddr_t arg1)
149 {
150 	vio9p_t *vin = (vio9p_t *)arg0;
151 
152 	mutex_enter(&vin->vin_mutex);
153 	uint_t count = vio9p_poll(vin);
154 	mutex_exit(&vin->vin_mutex);
155 
156 	return (count > 0 ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
157 }
158 
159 static void
160 vio9p_req_freelist_put(vio9p_t *vin, vio9p_req_t *vnr)
161 {
162 	VERIFY(!list_link_active(&vnr->vnr_link_complete));
163 	VERIFY(!list_link_active(&vnr->vnr_link_free));
164 
165 	vin->vin_generation = 0;
166 	list_insert_head(&vin->vin_req_freelist, vnr);
167 
168 	if (vin->vin_open) {
169 		/*
170 		 * Wake any callers waiting in vio9p_req_alloc() for an entry:
171 		 */
172 		cv_broadcast(&vin->vin_cv);
173 	}
174 }
175 
176 static void
177 vio9p_req_free(vio9p_t *vin, vio9p_req_t *vnr)
178 {
179 	VERIFY(MUTEX_HELD(&vin->vin_mutex));
180 
181 	if (list_link_active(&vnr->vnr_link_complete)) {
182 		list_remove(&vin->vin_completes, vnr);
183 	}
184 
185 	vio9p_req_freelist_put(vin, vnr);
186 }
187 
188 static void
189 vio9p_req_free_impl(vio9p_t *vin, vio9p_req_t *vnr)
190 {
191 	if (vnr->vnr_chain != NULL) {
192 		virtio_chain_free(vnr->vnr_chain);
193 		vnr->vnr_chain = NULL;
194 	}
195 	if (vnr->vnr_dma_in != NULL) {
196 		virtio_dma_free(vnr->vnr_dma_in);
197 		vnr->vnr_dma_in = NULL;
198 	}
199 	if (vnr->vnr_dma_out != NULL) {
200 		virtio_dma_free(vnr->vnr_dma_out);
201 		vnr->vnr_dma_out = NULL;
202 	}
203 
204 	VERIFY(!list_link_active(&vnr->vnr_link_complete));
205 	VERIFY(!list_link_active(&vnr->vnr_link_free));
206 
207 	list_remove(&vin->vin_reqs, vnr);
208 	VERIFY3U(vin->vin_nreqs, >, 0);
209 	vin->vin_nreqs--;
210 
211 	kmem_free(vnr, sizeof (*vnr));
212 }
213 
214 /*
215  * Allocate a request for a transaction.  If one is not available and this is
216  * for a blocking request, wait for one to become available.
217  */
218 static vio9p_req_t *
219 vio9p_req_alloc(vio9p_t *vin, bool wait)
220 {
221 	vio9p_req_t *vnr;
222 
223 	VERIFY(MUTEX_HELD(&vin->vin_mutex));
224 
225 again:
226 	/*
227 	 * Try the free list first:
228 	 */
229 	if ((vnr = list_remove_head(&vin->vin_req_freelist)) != NULL) {
230 		return (vnr);
231 	}
232 
233 	/*
234 	 * Failing that, try to allocate more memory if we are under our
235 	 * request cap:
236 	 */
237 	if ((vnr = vio9p_req_alloc_impl(vin, KM_NOSLEEP_LAZY)) != NULL) {
238 		return (vnr);
239 	}
240 
241 	/*
242 	 * If this is a blocking request, wait for an entry to become available
243 	 * on the free list:
244 	 */
245 	if (wait) {
246 		if (cv_wait_sig(&vin->vin_cv, &vin->vin_mutex) == 0) {
247 			return (NULL);
248 		}
249 
250 		goto again;
251 	}
252 
253 	return (NULL);
254 }
255 
256 static vio9p_req_t *
257 vio9p_req_alloc_impl(vio9p_t *vin, int kmflag)
258 {
259 	dev_info_t *dip = vin->vin_dip;
260 	vio9p_req_t *vnr;
261 
262 	if (vin->vin_nreqs >= VIRTIO_9P_MAX_REQS) {
263 		/*
264 		 * We have reached the limit of requests that we are willing to
265 		 * allocate for the whole device.
266 		 */
267 		return (NULL);
268 	}
269 
270 	/*
271 	 * Note that the request object has various list link fields which are
272 	 * initialised to zero here and which we check at various points later.
273 	 */
274 	if ((vnr = kmem_zalloc(sizeof (*vnr), kmflag)) == NULL) {
275 		return (NULL);
276 	}
277 	list_insert_tail(&vin->vin_reqs, vnr);
278 	vin->vin_nreqs++;
279 
280 	if ((vnr->vnr_chain = virtio_chain_alloc(vin->vin_vq, kmflag)) ==
281 	    NULL) {
282 		dev_err(vin->vin_dip, CE_WARN, "!chain alloc failure");
283 		goto fail;
284 	}
285 	virtio_chain_data_set(vnr->vnr_chain, vnr);
286 
287 	/*
288 	 * Allocate outbound request buffer:
289 	 */
290 	if ((vnr->vnr_dma_out = virtio_dma_alloc(vin->vin_virtio,
291 	    VIRTIO_9P_REQ_SIZE, &vio9p_dma_attr,
292 	    DDI_DMA_CONSISTENT | DDI_DMA_WRITE, kmflag)) == NULL) {
293 		dev_err(dip, CE_WARN, "!DMA out alloc failure");
294 		goto fail;
295 	}
296 	VERIFY3U(virtio_dma_ncookies(vnr->vnr_dma_out), <=, VIRTIO_9P_MAX_SGL);
297 
298 	for (uint_t n = 0; n < virtio_dma_ncookies(vnr->vnr_dma_out); n++) {
299 		if (virtio_chain_append(vnr->vnr_chain,
300 		    virtio_dma_cookie_pa(vnr->vnr_dma_out, n),
301 		    virtio_dma_cookie_size(vnr->vnr_dma_out, n),
302 		    VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
303 			dev_err(dip, CE_WARN, "!chain append out failure");
304 			goto fail;
305 		}
306 	}
307 
308 	/*
309 	 * Allocate inbound request buffer:
310 	 */
311 	if ((vnr->vnr_dma_in = virtio_dma_alloc(vin->vin_virtio,
312 	    VIRTIO_9P_REQ_SIZE, &vio9p_dma_attr,
313 	    DDI_DMA_CONSISTENT | DDI_DMA_READ, kmflag)) == NULL) {
314 		dev_err(dip, CE_WARN, "!DMA in alloc failure");
315 		goto fail;
316 	}
317 	VERIFY3U(virtio_dma_ncookies(vnr->vnr_dma_in), <=, VIRTIO_9P_MAX_SGL);
318 
319 	for (uint_t n = 0; n < virtio_dma_ncookies(vnr->vnr_dma_in); n++) {
320 		if (virtio_chain_append(vnr->vnr_chain,
321 		    virtio_dma_cookie_pa(vnr->vnr_dma_in, n),
322 		    virtio_dma_cookie_size(vnr->vnr_dma_in, n),
323 		    VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
324 			dev_err(dip, CE_WARN, "!chain append in failure");
325 			goto fail;
326 		}
327 	}
328 
329 	return (vnr);
330 
331 fail:
332 	vio9p_req_free_impl(vin, vnr);
333 	return (NULL);
334 }
335 
336 static uint_t
337 vio9p_poll(vio9p_t *vin)
338 {
339 	virtio_chain_t *vic;
340 	uint_t count = 0;
341 	bool wakeup = false;
342 
343 	VERIFY(MUTEX_HELD(&vin->vin_mutex));
344 
345 	while ((vic = virtio_queue_poll(vin->vin_vq)) != NULL) {
346 		vio9p_req_t *vnr = virtio_chain_data(vic);
347 
348 		count++;
349 
350 		virtio_dma_sync(vnr->vnr_dma_in, DDI_DMA_SYNC_FORCPU);
351 
352 		if (!vin->vin_open ||
353 		    vnr->vnr_generation != vin->vin_generation) {
354 			/*
355 			 * Either the device is not open, or the device has
356 			 * been closed and opened again since this request was
357 			 * submitted.  Just free the memory and drive on.
358 			 */
359 			vio9p_req_free(vin, vnr);
360 			continue;
361 		}
362 
363 		list_insert_tail(&vin->vin_completes, vnr);
364 		wakeup = true;
365 	}
366 
367 	if (wakeup) {
368 		cv_broadcast(&vin->vin_cv);
369 	}
370 
371 	return (count);
372 }
373 
374 static int
375 vio9p_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
376 {
377 	int instance = ddi_get_instance(dip);
378 	virtio_t *vio;
379 	vio9p_req_t *vnr;
380 
381 	if (cmd != DDI_ATTACH) {
382 		return (DDI_FAILURE);
383 	}
384 
385 	if (ddi_soft_state_zalloc(vio9p_state, instance) != DDI_SUCCESS) {
386 		return (DDI_FAILURE);
387 	}
388 
389 	if ((vio = virtio_init(dip, VIRTIO_9P_WANTED_FEATURES, B_TRUE)) ==
390 	    NULL) {
391 		ddi_soft_state_free(vio9p_state, instance);
392 		dev_err(dip, CE_WARN, "failed to start Virtio init");
393 		return (DDI_FAILURE);
394 	}
395 
396 	vio9p_t *vin = ddi_get_soft_state(vio9p_state, instance);
397 	vin->vin_dip = dip;
398 	vin->vin_virtio = vio;
399 	ddi_set_driver_private(dip, vin);
400 	list_create(&vin->vin_reqs, sizeof (vio9p_req_t),
401 	    offsetof(vio9p_req_t, vnr_link));
402 	list_create(&vin->vin_completes, sizeof (vio9p_req_t),
403 	    offsetof(vio9p_req_t, vnr_link_complete));
404 	list_create(&vin->vin_req_freelist, sizeof (vio9p_req_t),
405 	    offsetof(vio9p_req_t, vnr_link_free));
406 
407 	if (virtio_feature_present(vio, VIRTIO_9P_F_MOUNT_TAG)) {
408 		uint16_t len = virtio_dev_get16(vio, VIRTIO_9P_CONFIG_TAG_SZ);
409 		if (len > VIRTIO_9P_TAGLEN) {
410 			len = VIRTIO_9P_TAGLEN;
411 		}
412 
413 		/*
414 		 * This array is one byte longer than VIRTIO_9P_TAGLEN, and is
415 		 * thus always NUL-terminated by the use of
416 		 * ddi_soft_state_zalloc() above.
417 		 */
418 		for (uint16_t n = 0; n < len; n++) {
419 			vin->vin_tag[n] = virtio_dev_get8(vio,
420 			    VIRTIO_9P_CONFIG_TAG + n);
421 		}
422 	}
423 
424 	/*
425 	 * When allocating the request queue, we include enough slots for a
426 	 * full set of cookies (based on our DMA attributes) in both the in and
427 	 * the out direction.
428 	 */
429 	if ((vin->vin_vq = virtio_queue_alloc(vio, VIRTIO_9P_VIRTQ_REQUESTS,
430 	    "requests", vio9p_int_handler, vin, B_FALSE,
431 	    2 * VIRTIO_9P_MAX_SGL)) == NULL) {
432 		return (vio9p_teardown(vin, VIRTIO_9P_TEARDOWN_PRE_MUTEX));
433 	}
434 
435 	if (virtio_init_complete(vio, VIRTIO_ANY_INTR_TYPE) != DDI_SUCCESS) {
436 		dev_err(dip, CE_WARN, "failed to complete Virtio init");
437 		return (vio9p_teardown(vin, VIRTIO_9P_TEARDOWN_PRE_MUTEX));
438 	}
439 
440 	cv_init(&vin->vin_cv, NULL, CV_DRIVER, NULL);
441 	mutex_init(&vin->vin_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio));
442 
443 	/*
444 	 * Make sure the free list contains at least one request at attach time
445 	 * so that the device is always somewhat useable:
446 	 */
447 	if ((vnr = vio9p_req_alloc_impl(vin, KM_SLEEP)) == NULL) {
448 		dev_err(dip, CE_WARN, "failed to allocate first request");
449 		return (vio9p_teardown(vin, VIRTIO_9P_TEARDOWN_ATTACH));
450 	}
451 	vio9p_req_freelist_put(vin, vnr);
452 
453 	if (virtio_interrupts_enable(vio) != DDI_SUCCESS) {
454 		return (vio9p_teardown(vin, VIRTIO_9P_TEARDOWN_ATTACH));
455 	}
456 
457 	/*
458 	 * Hang out a minor node so that we can be opened.
459 	 */
460 	int minor = ddi_get_instance(dip);
461 	if (ddi_create_minor_node(dip, "9p", S_IFCHR, minor, DDI_PSEUDO,
462 	    0) != DDI_SUCCESS) {
463 		dev_err(dip, CE_WARN, "could not create minor node");
464 		return (vio9p_teardown(vin, VIRTIO_9P_TEARDOWN_ATTACH));
465 	}
466 
467 	ddi_report_dev(dip);
468 
469 	return (DDI_SUCCESS);
470 }
471 
472 static int
473 vio9p_teardown(vio9p_t *vin, vio9p_teardown_style_t style)
474 {
475 	dev_info_t *dip = vin->vin_dip;
476 
477 	if (style != VIRTIO_9P_TEARDOWN_PRE_MUTEX) {
478 		/*
479 		 * Make sure we do not hold the mutex across interrupt disable.
480 		 */
481 		VERIFY(MUTEX_NOT_HELD(&vin->vin_mutex));
482 	}
483 
484 	ddi_remove_minor_node(dip, NULL);
485 
486 	if (vin->vin_virtio != NULL) {
487 		/*
488 		 * Disable interrupts so that we can be sure our handler does
489 		 * not run again while we free things.
490 		 */
491 		virtio_interrupts_disable(vin->vin_virtio);
492 	}
493 
494 	/*
495 	 * Empty the free list:
496 	 */
497 	for (;;) {
498 		vio9p_req_t *vnr = list_remove_head(&vin->vin_req_freelist);
499 		if (vnr == NULL) {
500 			break;
501 		}
502 		vio9p_req_free_impl(vin, vnr);
503 	}
504 	VERIFY(list_is_empty(&vin->vin_req_freelist));
505 	list_destroy(&vin->vin_req_freelist);
506 
507 	/*
508 	 * Any active requests should have been freed in vio9p_detach(), so
509 	 * there should be no other requests left at this point.
510 	 */
511 	VERIFY0(vin->vin_nreqs);
512 	VERIFY(list_is_empty(&vin->vin_reqs));
513 	list_destroy(&vin->vin_reqs);
514 
515 	VERIFY(list_is_empty(&vin->vin_completes));
516 	list_destroy(&vin->vin_completes);
517 
518 	/*
519 	 * Tear down the Virtio framework.
520 	 */
521 	if (vin->vin_virtio != NULL) {
522 		boolean_t failed = (style != VIRTIO_9P_TEARDOWN_DETACH);
523 		virtio_fini(vin->vin_virtio, failed);
524 	}
525 
526 	if (style != VIRTIO_9P_TEARDOWN_PRE_MUTEX) {
527 		mutex_destroy(&vin->vin_mutex);
528 		cv_destroy(&vin->vin_cv);
529 	}
530 
531 	ddi_set_driver_private(dip, NULL);
532 	ddi_soft_state_free(vio9p_state, ddi_get_instance(dip));
533 
534 	return (style == VIRTIO_9P_TEARDOWN_DETACH ? DDI_SUCCESS : DDI_FAILURE);
535 }
536 
537 static int
538 vio9p_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
539 {
540 	vio9p_t *vin = ddi_get_driver_private(dip);
541 
542 	if (cmd != DDI_DETACH) {
543 		return (DDI_FAILURE);
544 	}
545 
546 	mutex_enter(&vin->vin_mutex);
547 
548 	/*
549 	 * Detach will only be called once we are no longer held open.
550 	 */
551 	VERIFY(!vin->vin_open);
552 
553 	/*
554 	 * If a request was submitted to the hypervisor but never completed, it
555 	 * may still be active even though the device has been closed.
556 	 */
557 	bool shutdown = false;
558 	for (vio9p_req_t *vnr = list_head(&vin->vin_reqs);
559 	    vnr != NULL; vnr = list_next(&vin->vin_reqs, vnr)) {
560 		if (!list_link_active(&vnr->vnr_link_free)) {
561 			/*
562 			 * There is at least one active request.  We need to
563 			 * reset the device to claw back the DMA memory.
564 			 */
565 			shutdown = true;
566 			break;
567 		}
568 	}
569 
570 	if (shutdown) {
571 		virtio_chain_t *vic;
572 
573 		virtio_shutdown(vin->vin_virtio);
574 		while ((vic = virtio_queue_evacuate(vin->vin_vq)) != NULL) {
575 			vio9p_req_t *vnr = virtio_chain_data(vic);
576 
577 			virtio_dma_sync(vnr->vnr_dma_in, DDI_DMA_SYNC_FORCPU);
578 
579 			vio9p_req_free_impl(vin, vnr);
580 		}
581 	}
582 
583 	mutex_exit(&vin->vin_mutex);
584 
585 	return (vio9p_teardown(vin, VIRTIO_9P_TEARDOWN_DETACH));
586 }
587 
588 static int
589 vio9p_quiesce(dev_info_t *dip)
590 {
591 	vio9p_t *vin;
592 
593 	if ((vin = ddi_get_driver_private(dip)) == NULL) {
594 		return (DDI_FAILURE);
595 	}
596 
597 	return (virtio_quiesce(vin->vin_virtio));
598 }
599 
600 static int
601 vio9p_open(dev_t *dev, int flag, int otyp, cred_t *cred)
602 {
603 	if (otyp != OTYP_CHR) {
604 		return (EINVAL);
605 	}
606 
607 	/*
608 	 * This device represents a request-response communication channel
609 	 * between the host and the hypervisor; as such we insist that it be
610 	 * opened exclusively, and for both read and write access.
611 	 */
612 	if (!(flag & FEXCL) || !(flag & FREAD) || !(flag & FWRITE)) {
613 		return (EINVAL);
614 	}
615 
616 	vio9p_t *vin = ddi_get_soft_state(vio9p_state, getminor(*dev));
617 	if (vin == NULL) {
618 		return (ENXIO);
619 	}
620 
621 	mutex_enter(&vin->vin_mutex);
622 	if (vin->vin_open) {
623 		mutex_exit(&vin->vin_mutex);
624 		return (EBUSY);
625 	}
626 	vin->vin_open = true;
627 
628 	vin->vin_generation++;
629 	if (vin->vin_generation == 0) {
630 		vin->vin_generation++;
631 	}
632 
633 	mutex_exit(&vin->vin_mutex);
634 	return (0);
635 }
636 
637 static int
638 vio9p_close(dev_t dev, int flag, int otyp, cred_t *cred)
639 {
640 	if (otyp != OTYP_CHR) {
641 		return (EINVAL);
642 	}
643 
644 	vio9p_t *vin = ddi_get_soft_state(vio9p_state, getminor(dev));
645 	if (vin == NULL) {
646 		return (ENXIO);
647 	}
648 
649 	mutex_enter(&vin->vin_mutex);
650 	if (!vin->vin_open) {
651 		mutex_exit(&vin->vin_mutex);
652 		return (EIO);
653 	}
654 
655 	/*
656 	 * Free all completed requests that have not yet been read:
657 	 */
658 	vio9p_req_t *vnr;
659 	while ((vnr = list_remove_head(&vin->vin_completes)) != NULL) {
660 		vio9p_req_free(vin, vnr);
661 	}
662 
663 	vin->vin_open = false;
664 	mutex_exit(&vin->vin_mutex);
665 	return (0);
666 }
667 
668 static int
669 vio9p_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
670     int *rvalp)
671 {
672 	vio9p_t *vin = ddi_get_soft_state(vio9p_state, getminor(dev));
673 	if (vin == NULL) {
674 		return (ENXIO);
675 	}
676 
677 	switch (cmd) {
678 	case VIO9P_IOC_MOUNT_TAG:
679 		if (ddi_copyout(vin->vin_tag, (void *)arg,
680 		    sizeof (vin->vin_tag), mode) != 0) {
681 			return (EFAULT);
682 		}
683 		return (0);
684 
685 	default:
686 		return (ENOTTY);
687 	}
688 }
689 
690 static int
691 vio9p_read(dev_t dev, struct uio *uio, cred_t *cred)
692 {
693 	bool blocking = (uio->uio_fmode & (FNDELAY | FNONBLOCK)) == 0;
694 	vio9p_req_t *vnr;
695 	vio9p_t *vin;
696 
697 	if ((vin = ddi_get_soft_state(vio9p_state, getminor(dev))) == NULL) {
698 		return (ENXIO);
699 	}
700 
701 	mutex_enter(&vin->vin_mutex);
702 again:
703 	if ((vnr = list_remove_head(&vin->vin_completes)) == NULL) {
704 		if (!blocking) {
705 			mutex_exit(&vin->vin_mutex);
706 			return (EAGAIN);
707 		}
708 
709 		/*
710 		 * There is nothing to read right now.  Wait for something:
711 		 */
712 		if (cv_wait_sig(&vin->vin_cv, &vin->vin_mutex) == 0) {
713 			mutex_exit(&vin->vin_mutex);
714 			return (EINTR);
715 		}
716 		goto again;
717 	}
718 
719 	/*
720 	 * Determine the size of the response message using the initial size[4]
721 	 * field of the response.  The various specifying documents that exist
722 	 * suggest this is an unsigned integer in little-endian order.
723 	 */
724 	uint32_t msz;
725 	bcopy(virtio_dma_va(vnr->vnr_dma_in, 0), &msz, sizeof (msz));
726 	msz = LE_32(msz);
727 	if (msz > virtio_dma_size(vnr->vnr_dma_in)) {
728 		msz = virtio_dma_size(vnr->vnr_dma_in);
729 	}
730 
731 	if (msz > uio->uio_resid) {
732 		/*
733 		 * Tell the consumer they are going to need a bigger
734 		 * buffer.
735 		 */
736 		list_insert_head(&vin->vin_completes, vnr);
737 		mutex_exit(&vin->vin_mutex);
738 		return (EOVERFLOW);
739 	}
740 
741 	mutex_exit(&vin->vin_mutex);
742 	int e = uiomove(virtio_dma_va(vnr->vnr_dma_in, 0), msz, UIO_READ, uio);
743 	mutex_enter(&vin->vin_mutex);
744 
745 	if (e == 0) {
746 		vio9p_req_free(vin, vnr);
747 	} else {
748 		/*
749 		 * Put the response back in the list for another try, so that
750 		 * we do not drop any messages:
751 		 */
752 		list_insert_head(&vin->vin_completes, vnr);
753 	}
754 
755 	mutex_exit(&vin->vin_mutex);
756 	return (e);
757 }
758 
759 static int
760 vio9p_write(dev_t dev, struct uio *uio, cred_t *cred)
761 {
762 	bool blocking = (uio->uio_fmode & (FNDELAY | FNONBLOCK)) == 0;
763 
764 	size_t wsz = uio->uio_resid;
765 	if (wsz < 7) {
766 		/*
767 		 * Requests should be well-formed 9P messages.  They must
768 		 * contain at least 7 bytes: msize[4] + type[1] + tag[2].
769 		 */
770 		return (EINVAL);
771 	} else if (wsz > VIRTIO_9P_REQ_SIZE) {
772 		return (EMSGSIZE);
773 	}
774 
775 	vio9p_t *vin = ddi_get_soft_state(vio9p_state, getminor(dev));
776 	if (vin == NULL) {
777 		return (ENXIO);
778 	}
779 
780 	mutex_enter(&vin->vin_mutex);
781 	vio9p_req_t *vnr = vio9p_req_alloc(vin, blocking);
782 	if (vnr == NULL) {
783 		mutex_exit(&vin->vin_mutex);
784 		return (blocking ? ENOMEM : EAGAIN);
785 	}
786 	vnr->vnr_generation = vin->vin_generation;
787 	VERIFY3U(wsz, <=, virtio_dma_size(vnr->vnr_dma_out));
788 
789 	mutex_exit(&vin->vin_mutex);
790 	int e = uiomove(virtio_dma_va(vnr->vnr_dma_out, 0), wsz, UIO_WRITE,
791 	    uio);
792 	mutex_enter(&vin->vin_mutex);
793 
794 	if (e == 0) {
795 		virtio_dma_sync(vnr->vnr_dma_out, DDI_DMA_SYNC_FORDEV);
796 		virtio_chain_submit(vnr->vnr_chain, B_TRUE);
797 	} else {
798 		vio9p_req_free(vin, vnr);
799 	}
800 
801 	mutex_exit(&vin->vin_mutex);
802 	return (e);
803 }
804 
805 int
806 _init(void)
807 {
808 	int r;
809 
810 	if ((r = ddi_soft_state_init(&vio9p_state, sizeof (vio9p_t), 0)) != 0) {
811 		return (r);
812 	}
813 
814 	if ((r = mod_install(&vio9p_modlinkage)) != 0) {
815 		ddi_soft_state_fini(&vio9p_state);
816 	}
817 
818 	return (r);
819 }
820 
821 int
822 _fini(void)
823 {
824 	int r;
825 
826 	if ((r = mod_remove(&vio9p_modlinkage)) != 0) {
827 		return (r);
828 	}
829 
830 	ddi_soft_state_fini(&vio9p_state);
831 
832 	return (r);
833 }
834 
835 int
836 _info(struct modinfo *modinfop)
837 {
838 	return (mod_info(&vio9p_modlinkage, modinfop));
839 }
840