xref: /illumos-gate/usr/src/uts/common/io/vioscsi/vioscsi.c (revision 16b76d3cb933ff92018a2a75594449010192eacb)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019 Nexenta by DDN, Inc. All rights reserved.
14  * Copyright 2022 RackTop Systems, Inc.
15  */
16 
17 #include "vioscsi.h"
18 
19 static char vioscsi_ident[] = "VIRTIO SCSI driver";
20 
21 static uint_t vioscsi_ctl_handler(caddr_t arg1, caddr_t arg2);
22 static uint_t vioscsi_evt_handler(caddr_t arg1, caddr_t arg2);
23 static uint_t vioscsi_cmd_handler(caddr_t arg1, caddr_t arg2);
24 
25 static int vioscsi_tran_getcap(struct scsi_address *, char *, int);
26 static int vioscsi_tran_setcap(struct scsi_address *, char *, int, int);
27 static int vioscsi_tran_reset(struct scsi_address *, int);
28 
29 static int vioscsi_tran_start(struct scsi_address *, struct scsi_pkt *);
30 static int vioscsi_tran_abort(struct scsi_address *, struct scsi_pkt *);
31 
32 static int vioscsi_iport_attach(dev_info_t *);
33 static int vioscsi_iport_detach(dev_info_t *);
34 
35 static int vioscsi_req_init(vioscsi_softc_t *, vioscsi_request_t *,
36     virtio_queue_t *, int);
37 static void vioscsi_req_fini(vioscsi_request_t *);
38 static boolean_t vioscsi_req_abort(vioscsi_softc_t *, vioscsi_request_t *);
39 static void vioscsi_lun_changed(vioscsi_softc_t *sc, uint8_t target);
40 static void vioscsi_discover(void *);
41 
42 /*
43  * DMA attributes. We support a linked list, but most of our uses require a
44  * single aligned buffer.  The HBA buffers will use a copy of this adjusted for
45  * the actual virtio limits.
46  */
47 static ddi_dma_attr_t virtio_dma_attr = {
48 	.dma_attr_version =		DMA_ATTR_V0,
49 	.dma_attr_addr_lo =		0,
50 	.dma_attr_addr_hi =		0xFFFFFFFFFFFFFFFFull,
51 	.dma_attr_count_max =		0x00000000FFFFFFFFull,
52 	.dma_attr_align =		1,
53 	.dma_attr_burstsizes =		1,
54 	.dma_attr_minxfer =		1,
55 	.dma_attr_maxxfer =		0xFFFFFFFFull,
56 	.dma_attr_seg =			0xFFFFFFFFFFFFFFFFull,
57 	.dma_attr_sgllen =		1,
58 	.dma_attr_granular =		1,
59 	.dma_attr_flags =		0,
60 };
61 
62 /*
63  * this avoids calls to drv_usectohz that might be expensive:
64  */
65 static clock_t vioscsi_hz;
66 
67 static boolean_t
68 vioscsi_poll_until(vioscsi_softc_t *sc, vioscsi_request_t *req,
69     ddi_intr_handler_t func, clock_t until)
70 {
71 	until *= 1000000; /* convert to usec */
72 	while (until > 0) {
73 		(void) func((caddr_t)sc, NULL);
74 		if (req->vr_done) {
75 			return (B_TRUE);
76 		}
77 		drv_usecwait(10);
78 		until -= 10;
79 	}
80 	atomic_or_8(&req->vr_expired, 1);
81 	return (B_FALSE);
82 }
83 
84 static boolean_t
85 vioscsi_tmf(vioscsi_softc_t *sc, uint32_t func, uint8_t target, uint16_t lun,
86     vioscsi_request_t *task)
87 {
88 	vioscsi_request_t req;
89 	vioscsi_tmf_res_t *res;
90 	vioscsi_tmf_req_t *tmf;
91 
92 	bzero(&req, sizeof (req));
93 
94 	if (vioscsi_req_init(sc, &req, sc->vs_ctl_vq, KM_NOSLEEP) != 0) {
95 		return (B_FALSE);
96 	}
97 
98 	tmf = &req.vr_req->tmf;
99 	res = &req.vr_res->tmf;
100 
101 	tmf->type = VIRTIO_SCSI_T_TMF;
102 	tmf->subtype = func;
103 	tmf->lun[0] = 1;
104 	tmf->lun[1] = target;
105 	tmf->lun[2] = 0x40 | (lun >> 8);
106 	tmf->lun[3] = lun & 0xff;
107 	tmf->tag = (uint64_t)task;
108 
109 	virtio_chain_clear(req.vr_vic);
110 	if (virtio_chain_append(req.vr_vic, req.vr_req_pa, sizeof (*tmf),
111 	    VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
112 		return (B_FALSE);
113 	}
114 
115 	if (virtio_chain_append(req.vr_vic, req.vr_res_pa, sizeof (*res),
116 	    VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
117 		return (B_FALSE);
118 	}
119 
120 	/*
121 	 * Make sure the device can see our request:
122 	 */
123 	virtio_dma_sync(req.vr_dma, DDI_DMA_SYNC_FORDEV);
124 
125 	/*
126 	 * Push chain into the queue:
127 	 */
128 	virtio_chain_submit(req.vr_vic, B_TRUE);
129 
130 	/*
131 	 * Wait for it to complete -- these should always complete in a tiny
132 	 * amount of time.  Give it 5 seconds to be sure.
133 	 */
134 	if (!vioscsi_poll_until(sc, &req,  vioscsi_ctl_handler, 5)) {
135 		/*
136 		 * We timed out -- this should *NEVER* happen!
137 		 * There is no safe way to deal with this if it occurs, so we
138 		 * just warn and leak the resources.  Plan for a reboot soon.
139 		 */
140 		dev_err(sc->vs_dip, CE_WARN,
141 		    "task mgmt timeout! (target %d lun %d)", target, lun);
142 		return (B_FALSE);
143 	}
144 
145 	vioscsi_req_fini(&req);
146 
147 	switch (res->response) {
148 	case VIRTIO_SCSI_S_OK:
149 	case VIRTIO_SCSI_S_FUNCTION_SUCCEEDED:
150 		break;
151 	default:
152 		return (B_FALSE);
153 	}
154 	return (B_TRUE);
155 }
156 
157 static boolean_t
158 vioscsi_lun_reset(vioscsi_softc_t *sc, uint8_t target, uint16_t lun)
159 {
160 	return (vioscsi_tmf(sc, VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET,
161 	    target, lun, NULL));
162 }
163 
164 static boolean_t
165 vioscsi_target_reset(vioscsi_softc_t *sc, uint8_t target)
166 {
167 	return (vioscsi_tmf(sc, VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET,
168 	    target, 0, NULL));
169 }
170 
171 static boolean_t
172 vioscsi_req_abort(vioscsi_softc_t *sc, vioscsi_request_t *req)
173 {
174 	return (vioscsi_tmf(sc, VIRTIO_SCSI_T_TMF_ABORT_TASK,
175 	    req->vr_target, req->vr_lun, req));
176 }
177 
178 static void
179 vioscsi_dev_abort(vioscsi_dev_t *vd)
180 {
181 	vioscsi_request_t *req;
182 	list_t *l = &vd->vd_reqs;
183 
184 	mutex_enter(&vd->vd_lock);
185 	for (req = list_head(l); req != NULL; req = list_next(l, req)) {
186 		(void) vioscsi_tmf(vd->vd_sc, VIRTIO_SCSI_T_TMF_ABORT_TASK,
187 		    req->vr_target, req->vr_lun, req);
188 	}
189 	mutex_exit(&vd->vd_lock);
190 }
191 
192 static void
193 vioscsi_dev_timeout(void *arg)
194 {
195 	vioscsi_dev_t *vd = arg;
196 	vioscsi_softc_t *sc = vd->vd_sc;
197 	vioscsi_request_t *req;
198 	timeout_id_t tid;
199 	clock_t now;
200 	list_t *l;
201 
202 	mutex_enter(&vd->vd_lock);
203 	if ((tid = vd->vd_timeout) == 0) {
204 		/*
205 		 * We are shutting down, stop and do not reschedule.
206 		 */
207 		mutex_exit(&vd->vd_lock);
208 		return;
209 	}
210 	vd->vd_timeout = 0;
211 
212 	now = ddi_get_lbolt();
213 	l = &vd->vd_reqs;
214 
215 	for (req = list_head(l); req != NULL; req = list_next(l, req)) {
216 		/*
217 		 * The list is sorted by expiration time, so if we reach an
218 		 * item that hasn't expired yet, we're done.
219 		 */
220 		if (now < req->vr_expire) {
221 			break;
222 		}
223 		atomic_or_8(&req->vr_expired, 1);
224 
225 		/*
226 		 * This command timed out, so send an abort.
227 		 */
228 		dev_err(sc->vs_dip, CE_WARN, "cmd timed out (%ds)",
229 		    (int)req->vr_time);
230 		(void) vioscsi_req_abort(sc, req);
231 	}
232 
233 	if (!list_is_empty(l)) {
234 		/*
235 		 * Check again in a second.
236 		 * If these wake ups are too expensive, we could
237 		 * calculate other timeouts, but that would require
238 		 * doing untimeout if we want to wake up earlier.
239 		 * This is probably cheaper, and certainly simpler.
240 		 */
241 		vd->vd_timeout = timeout(vioscsi_dev_timeout, vd, vioscsi_hz);
242 	}
243 	mutex_exit(&vd->vd_lock);
244 }
245 
246 static void
247 vioscsi_poll(vioscsi_softc_t *sc, vioscsi_request_t *req)
248 {
249 	if (vioscsi_poll_until(sc, req, vioscsi_cmd_handler, req->vr_time)) {
250 		return;
251 	}
252 
253 	/*
254 	 * Try a "gentle" task abort -- timeouts may be quasi-normal for some
255 	 * types of requests and devices.
256 	 */
257 	if (vioscsi_req_abort(sc, req) &&
258 	    vioscsi_poll_until(sc, req, vioscsi_cmd_handler, 1)) {
259 		return;
260 	}
261 
262 	/*
263 	 * A little more forceful with a lun reset:
264 	 */
265 	if (vioscsi_lun_reset(sc, req->vr_target, req->vr_lun) &&
266 	    vioscsi_poll_until(sc, req, vioscsi_cmd_handler, 1)) {
267 		return;
268 	}
269 
270 	/*
271 	 * If all else fails, reset the target, and keep trying.
272 	 * This can wind up blocking forever, but if it does it means we are in
273 	 * a very bad situation (and the virtio device is busted).
274 	 * We may also be leaking request structures at this point, but only at
275 	 * the maximum rate of one per minute.
276 	 */
277 	for (;;) {
278 		dev_err(sc->vs_dip, CE_WARN, "request stuck, resetting target");
279 		(void) vioscsi_target_reset(sc, req->vr_target);
280 		if (vioscsi_poll_until(sc, req, vioscsi_cmd_handler, 60)) {
281 			return;
282 		}
283 	}
284 }
285 
286 static void
287 vioscsi_start(vioscsi_softc_t *sc, vioscsi_request_t *req)
288 {
289 	vioscsi_cmd_req_t *cmd = &req->vr_req->cmd;
290 
291 	req->vr_done = 0;
292 	req->vr_expired = 0;
293 	cmd->lun[0] = 1;
294 	cmd->lun[1] = req->vr_target;
295 	cmd->lun[2] = 0x40 | ((req->vr_lun >> 8) & 0xff);
296 	cmd->lun[3] = req->vr_lun & 0xff;
297 	cmd->lun[4] = 0;
298 	cmd->lun[5] = 0;
299 	cmd->lun[6] = 0;
300 	cmd->lun[7] = 0;
301 	cmd->tag = (uint64_t)req;
302 	cmd->prio = 0;
303 	cmd->crn = 0;
304 	cmd->task_attr = req->vr_task_attr;
305 
306 	/*
307 	 * Make sure the device can see our CDB data:
308 	 */
309 	virtio_dma_sync(req->vr_dma, DDI_DMA_SYNC_FORDEV);
310 
311 	/*
312 	 * Determine whether we expect to poll before submitting (because we
313 	 * cannot touch the request after submission if we are not polling).
314 	 */
315 	if (req->vr_poll) {
316 		/*
317 		 * Push chain into the queue:
318 		 */
319 		virtio_chain_submit(req->vr_vic, B_TRUE);
320 
321 		/*
322 		 * NB: Interrupts may be enabled, or might not be.  It is fine
323 		 * either way.
324 		 */
325 		vioscsi_poll(sc, req);
326 	} else {
327 		/*
328 		 * Push chain into the queue:
329 		 */
330 		virtio_chain_submit(req->vr_vic, B_TRUE);
331 	}
332 }
333 
334 static int
335 vioscsi_tran_start(struct scsi_address *ap, struct scsi_pkt *pkt)
336 {
337 	struct scsi_device *sd = scsi_address_device(ap);
338 	vioscsi_dev_t *vd = scsi_device_hba_private_get(sd);
339 	vioscsi_request_t *req = pkt->pkt_ha_private;
340 	virtio_chain_t *vic = req->vr_vic;
341 	vioscsi_cmd_req_t *cmd = &req->vr_req->cmd;
342 	vioscsi_cmd_res_t *res = &req->vr_res->cmd;
343 
344 	if (pkt->pkt_cdbp == NULL) {
345 		return (TRAN_BADPKT);
346 	}
347 
348 	bzero(cmd, sizeof (*cmd));
349 	bcopy(pkt->pkt_cdbp, cmd->cdb, pkt->pkt_cdblen);
350 
351 	/*
352 	 * Default expiration is 10 seconds, clip at an hour.
353 	 * (order of operations here is to avoid wrapping, if run in a 32-bit
354 	 * kernel)
355 	 */
356 	req->vr_time = min(pkt->pkt_time ? pkt->pkt_time : 10, 3600);
357 	req->vr_dev = vd;
358 	req->vr_poll = ((pkt->pkt_flags & FLAG_NOINTR) != 0);
359 	req->vr_target = vd->vd_target;
360 	req->vr_lun = vd->vd_lun;
361 	req->vr_start = ddi_get_lbolt();
362 	req->vr_expire = req->vr_start + req->vr_time * vioscsi_hz;
363 
364 	/*
365 	 * Configure task queuing behavior:
366 	 */
367 	if (pkt->pkt_flags & (FLAG_HTAG|FLAG_HEAD)) {
368 		req->vr_task_attr = VIRTIO_SCSI_S_HEAD;
369 	} else if (pkt->pkt_flags & FLAG_OTAG) {
370 		req->vr_task_attr = VIRTIO_SCSI_S_ORDERED;
371 	} else if (pkt->pkt_flags & FLAG_SENSING) {
372 		req->vr_task_attr = VIRTIO_SCSI_S_ACA;
373 	} else { /* FLAG_STAG is also our default */
374 		req->vr_task_attr = VIRTIO_SCSI_S_SIMPLE;
375 	}
376 
377 	/*
378 	 * Make sure we start with a clear chain:
379 	 */
380 	virtio_chain_clear(vic);
381 
382 	/*
383 	 * The KVM SCSI emulation requires that all outgoing buffers are added
384 	 * first with the request header being the first entry.  After the
385 	 * outgoing have been added then the incoming buffers with the response
386 	 * buffer being the first of the incoming.  This requirement is
387 	 * independent of using chained ring entries or one ring entry with
388 	 * indirect buffers.
389 	 */
390 
391 	/*
392 	 * Add request header:
393 	 */
394 	if (virtio_chain_append(vic, req->vr_req_pa, sizeof (*cmd),
395 	    VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
396 		return (TRAN_BUSY);
397 	}
398 
399 	/*
400 	 * Add write buffers:
401 	 */
402 	if (pkt->pkt_dma_flags & DDI_DMA_WRITE) {
403 		for (int i = 0; i < pkt->pkt_numcookies; i++) {
404 			if (virtio_chain_append(vic,
405 			    pkt->pkt_cookies[i].dmac_laddress,
406 			    pkt->pkt_cookies[i].dmac_size,
407 			    VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
408 				return (TRAN_BUSY);
409 			}
410 		}
411 	}
412 
413 	/*
414 	 * Add response header:
415 	 */
416 	if (virtio_chain_append(vic, req->vr_res_pa, sizeof (*res),
417 	    VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
418 		return (TRAN_BUSY);
419 	}
420 
421 	/*
422 	 * Add read buffers:
423 	 */
424 	if (pkt->pkt_dma_flags & DDI_DMA_READ) {
425 		for (int i = 0; i < pkt->pkt_numcookies; i++) {
426 			if (virtio_chain_append(vic,
427 			    pkt->pkt_cookies[i].dmac_laddress,
428 			    pkt->pkt_cookies[i].dmac_size,
429 			    VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
430 				return (TRAN_BUSY);
431 			}
432 		}
433 	}
434 
435 	/*
436 	 * Check for queue depth, and add to the timeout list:
437 	 */
438 	mutex_enter(&vd->vd_lock);
439 	if (vd->vd_num_cmd >= vd->vd_max_cmd) {
440 		mutex_exit(&vd->vd_lock);
441 		return (TRAN_BUSY);
442 	}
443 	vd->vd_num_cmd++;
444 
445 	if (!req->vr_poll) {
446 		/*
447 		 * Add the request to the timeout list.
448 		 *
449 		 * In order to minimize the work done during timeout handling,
450 		 * we keep requests sorted.  This assumes that requests mostly
451 		 * have the same timeout, and requests with long timeouts are
452 		 * infrequent.
453 		 */
454 		list_t *l = &vd->vd_reqs;
455 		vioscsi_request_t *r;
456 
457 		for (r = list_tail(l); r != NULL; r = list_prev(l, r)) {
458 			/*
459 			 * Avoids wrapping lbolt:
460 			 */
461 			if ((req->vr_expire - r->vr_expire) >= 0) {
462 				list_insert_after(l, r, req);
463 				break;
464 			}
465 		}
466 		if (r == NULL) {
467 			/*
468 			 * List empty, or this one expires before others:
469 			 */
470 			list_insert_head(l, req);
471 		}
472 		if (vd->vd_timeout == 0) {
473 			vd->vd_timeout = timeout(vioscsi_dev_timeout, vd,
474 			    vioscsi_hz);
475 		}
476 	}
477 
478 	mutex_exit(&vd->vd_lock);
479 
480 	vioscsi_start(vd->vd_sc, req);
481 	return (TRAN_ACCEPT);
482 }
483 
484 static int
485 vioscsi_tran_abort(struct scsi_address *ap, struct scsi_pkt *pkt)
486 {
487 	struct scsi_device *sd;
488 	vioscsi_dev_t *vd;
489 	vioscsi_request_t *req;
490 
491 	if ((ap == NULL) ||
492 	    ((sd = scsi_address_device(ap)) == NULL) ||
493 	    ((vd = scsi_device_hba_private_get(sd)) == NULL)) {
494 		return (0);
495 	}
496 	if (pkt == NULL) {
497 		/*
498 		 * Abort all requests for the LUN.
499 		 */
500 		vioscsi_dev_abort(vd);
501 		return (1);
502 	}
503 	if ((req = pkt->pkt_ha_private) != NULL) {
504 		return (vioscsi_req_abort(vd->vd_sc, req) ? 1 : 0);
505 	}
506 
507 	return (0);
508 }
509 
510 static void
511 vioscsi_req_fini(vioscsi_request_t *req)
512 {
513 	if (req->vr_dma != NULL) {
514 		virtio_dma_free(req->vr_dma);
515 		req->vr_dma = NULL;
516 	}
517 	if (req->vr_vic != NULL) {
518 		virtio_chain_free(req->vr_vic);
519 		req->vr_vic = NULL;
520 	}
521 }
522 
523 static int
524 vioscsi_req_init(vioscsi_softc_t *sc, vioscsi_request_t *req,
525     virtio_queue_t *vq, int sleep)
526 {
527 	uint64_t pa;
528 
529 	bzero(req, sizeof (*req));
530 	list_link_init(&req->vr_node);
531 	req->vr_vq = vq;
532 	req->vr_dma = virtio_dma_alloc(sc->vs_virtio, sizeof (vioscsi_op_t),
533 	    &virtio_dma_attr, DDI_DMA_STREAMING | DDI_DMA_READ | DDI_DMA_WRITE,
534 	    sleep);
535 	req->vr_vic = virtio_chain_alloc(vq, sleep);
536 	if ((req->vr_dma == NULL) || (req->vr_vic == NULL)) {
537 		return (-1);
538 	}
539 	virtio_chain_data_set(req->vr_vic, req);
540 	req->vr_req = virtio_dma_va(req->vr_dma, VIOSCSI_REQ_OFFSET);
541 	req->vr_res = virtio_dma_va(req->vr_dma, VIOSCSI_RES_OFFSET);
542 	pa = virtio_dma_cookie_pa(req->vr_dma, 0);
543 	req->vr_req_pa = pa + VIOSCSI_REQ_OFFSET;
544 	req->vr_res_pa = pa + VIOSCSI_RES_OFFSET;
545 	return (0);
546 }
547 
548 static void
549 vioscsi_tran_pkt_destructor(struct scsi_pkt *pkt, scsi_hba_tran_t *tran)
550 {
551 	vioscsi_request_t *req = pkt->pkt_ha_private;
552 
553 	vioscsi_req_fini(req);
554 }
555 
556 static int
557 vioscsi_tran_pkt_constructor(struct scsi_pkt *pkt, scsi_hba_tran_t *tran,
558     int sleep)
559 {
560 	vioscsi_softc_t *sc = tran->tran_hba_private;
561 	vioscsi_request_t *req = pkt->pkt_ha_private;
562 
563 	if (vioscsi_req_init(sc, req, sc->vs_cmd_vq, sleep) != 0) {
564 		vioscsi_req_fini(req);
565 		return (-1);
566 	}
567 	req->vr_pkt = pkt;
568 	return (0);
569 }
570 
571 static int
572 vioscsi_tran_setup_pkt(struct scsi_pkt *pkt, int (*cb)(caddr_t), caddr_t arg)
573 {
574 	if ((pkt->pkt_dma_flags & DDI_DMA_RDWR) == DDI_DMA_RDWR) {
575 		/*
576 		 * We can do read, or write, but not both.
577 		 */
578 		return (-1);
579 	}
580 
581 	return (0);
582 }
583 
584 static void
585 vioscsi_tran_teardown_pkt(struct scsi_pkt *pkt)
586 {
587 	vioscsi_request_t *req = pkt->pkt_ha_private;
588 	virtio_chain_t *vic = req->vr_vic;
589 
590 	virtio_chain_clear(vic);
591 }
592 
593 static int
594 vioscsi_tran_getcap(struct scsi_address *ap, char *cap, int whom)
595 {
596 	int rval = 0;
597 	vioscsi_softc_t *sc = ap->a_hba_tran->tran_hba_private;
598 
599 	if (cap == NULL)
600 		return (-1);
601 
602 	switch (scsi_hba_lookup_capstr(cap)) {
603 	case SCSI_CAP_CDB_LEN:
604 		rval = sc->vs_cdb_size;
605 		break;
606 
607 	case SCSI_CAP_ARQ:
608 	case SCSI_CAP_LUN_RESET:
609 	case SCSI_CAP_TAGGED_QING:
610 	case SCSI_CAP_UNTAGGED_QING:
611 		rval = 1;
612 		break;
613 
614 	default:
615 		rval = -1;
616 	}
617 	return (rval);
618 }
619 
620 static int
621 vioscsi_tran_setcap(struct scsi_address *ap, char *cap, int value, int whom)
622 {
623 	int rval = 1;
624 
625 	if (cap == NULL || whom == 0) {
626 		return (-1);
627 	}
628 
629 	switch (scsi_hba_lookup_capstr(cap)) {
630 	default:
631 		rval = 1;
632 	}
633 	return (rval);
634 }
635 
636 static int
637 vioscsi_tran_reset(struct scsi_address *ap, int level)
638 {
639 	struct scsi_device *sd;
640 	vioscsi_dev_t *vd;
641 
642 	if ((ap == NULL) ||
643 	    ((sd = scsi_address_device(ap)) == NULL) ||
644 	    ((vd = scsi_device_hba_private_get(sd)) == NULL)) {
645 		return (0);
646 	}
647 
648 	switch (level) {
649 	case RESET_LUN:
650 		if (vioscsi_lun_reset(vd->vd_sc, vd->vd_target, vd->vd_lun)) {
651 			return (1);
652 		}
653 		break;
654 	case RESET_TARGET:
655 		if (vioscsi_target_reset(vd->vd_sc, vd->vd_target)) {
656 			return (1);
657 		}
658 		break;
659 	case RESET_ALL:
660 	default:
661 		break;
662 	}
663 	return (0);
664 }
665 
666 static boolean_t
667 vioscsi_parse_unit_address(const char *ua, int *tgt, int *lun)
668 {
669 	long num;
670 	char *end;
671 
672 	if ((ddi_strtol(ua, &end, 16, &num) != 0) ||
673 	    ((*end != ',') && (*end != 0))) {
674 		return (B_FALSE);
675 	}
676 	*tgt = (int)num;
677 	if (*end == 0) {
678 		*lun = 0;
679 		return (B_TRUE);
680 	}
681 	end++; /* skip comma */
682 	if ((ddi_strtol(end, &end, 16, &num) != 0) || (*end != 0)) {
683 		return (B_FALSE);
684 	}
685 	*lun = (int)num;
686 	return (B_TRUE);
687 }
688 
689 uint_t
690 vioscsi_ctl_handler(caddr_t arg1, caddr_t arg2)
691 {
692 	vioscsi_softc_t *sc = (vioscsi_softc_t *)arg1;
693 	virtio_chain_t *vic;
694 
695 	while ((vic = virtio_queue_poll(sc->vs_ctl_vq)) != NULL) {
696 		vioscsi_request_t *req;
697 
698 		if ((req = virtio_chain_data(vic)) == NULL) {
699 			dev_err(sc->vs_dip, CE_WARN, "missing ctl chain data");
700 			continue;
701 		}
702 		atomic_or_8(&req->vr_done, 1);
703 	}
704 	return (DDI_INTR_CLAIMED);
705 }
706 
707 uint_t
708 vioscsi_evt_handler(caddr_t arg1, caddr_t arg2)
709 {
710 	vioscsi_softc_t *sc = (vioscsi_softc_t *)arg1;
711 	virtio_chain_t *vic;
712 	boolean_t missed = B_FALSE;
713 
714 	while ((vic = virtio_queue_poll(sc->vs_evt_vq)) != NULL) {
715 		vioscsi_evt_t *evt;
716 		vioscsi_event_t *ve;
717 		uint8_t target;
718 
719 		if ((ve = virtio_chain_data(vic)) == NULL) {
720 			/*
721 			 * This should never occur, it's a bug if it does.
722 			 */
723 			dev_err(sc->vs_dip, CE_WARN, "missing evt chain data");
724 			continue;
725 		}
726 		evt = ve->ve_evt;
727 
728 		virtio_dma_sync(ve->ve_dma, DDI_DMA_SYNC_FORKERNEL);
729 
730 		target = evt->lun[1];
731 		switch (evt->event & 0x7FFFFFFF) {
732 		case VIRTIO_SCSI_T_TRANSPORT_RESET:
733 			switch (evt->reason) {
734 			case VIRTIO_SCSI_EVT_RESET_HARD:
735 				/*
736 				 * We could reset-notify, but this doesn't seem
737 				 * to get fired for targets initiated from
738 				 * host.
739 				 */
740 				break;
741 			case VIRTIO_SCSI_EVT_RESET_REMOVED:
742 			case VIRTIO_SCSI_EVT_RESET_RESCAN:
743 				/*
744 				 * We can treat these the same for the target,
745 				 * and not worry about the actual LUN id here.
746 				 */
747 				vioscsi_lun_changed(sc, target);
748 				break;
749 			default:
750 				/*
751 				 * Some other event we don't know about.
752 				 */
753 				break;
754 			}
755 			break;
756 		case VIRTIO_SCSI_T_NO_EVENT:
757 			/*
758 			 * If this happens, we missed some event(s).
759 			 */
760 			missed = B_TRUE;
761 			break;
762 		case VIRTIO_SCSI_T_ASYNC_NOTIFY:
763 			/*
764 			 * We don't register for these, so we don't expect
765 			 * them.
766 			 */
767 			break;
768 		}
769 
770 		if (evt->event & VIRTIO_SCSI_T_EVENTS_MISSED) {
771 			missed = B_TRUE;
772 		}
773 
774 		/*
775 		 * Resubmit the chain for the next event.
776 		 */
777 		virtio_chain_submit(vic, B_TRUE);
778 	}
779 
780 	if (missed) {
781 		(void) ddi_taskq_dispatch(sc->vs_tq, vioscsi_discover, sc,
782 		    DDI_NOSLEEP);
783 	}
784 
785 	return (DDI_INTR_CLAIMED);
786 }
787 
788 uint_t
789 vioscsi_cmd_handler(caddr_t arg1, caddr_t arg2)
790 {
791 	vioscsi_softc_t *sc = (vioscsi_softc_t *)arg1;
792 	virtio_chain_t *vic;
793 
794 	while ((vic = virtio_queue_poll(sc->vs_cmd_vq)) != NULL) {
795 
796 		vioscsi_request_t *req;
797 		vioscsi_dev_t *vd;
798 		struct scsi_pkt *pkt;
799 		struct virtio_scsi_cmd_resp *res;
800 
801 		if ((req = virtio_chain_data(vic)) == NULL) {
802 			/*
803 			 * This should never occur, it's a bug if it does.
804 			 */
805 			dev_err(sc->vs_dip, CE_WARN, "missing cmd chain data");
806 			continue;
807 		}
808 
809 		virtio_dma_sync(req->vr_dma, DDI_DMA_SYNC_FORKERNEL);
810 		res = &req->vr_res->cmd;
811 		pkt = req->vr_pkt;
812 
813 		if (pkt == NULL) {
814 			/*
815 			 * This is an internal request (from discovery), and
816 			 * doesn't have an associated SCSI pkt structure.  In
817 			 * this case, the notification we've done is
818 			 * sufficient, and the submitter will examine the
819 			 * response field directly.
820 			 */
821 			if (req->vr_poll) {
822 				atomic_or_8(&req->vr_done, 1);
823 			}
824 			continue;
825 		}
826 
827 		if ((vd = req->vr_dev) != NULL) {
828 			mutex_enter(&vd->vd_lock);
829 			vd->vd_num_cmd--;
830 			list_remove(&vd->vd_reqs, req);
831 			mutex_exit(&vd->vd_lock);
832 		}
833 
834 		switch (res->response) {
835 
836 		case VIRTIO_SCSI_S_OK:
837 			/*
838 			 * Request processed successfully, check SCSI status.
839 			 */
840 			pkt->pkt_scbp[0] = res->status;
841 			pkt->pkt_resid = 0;
842 			pkt->pkt_reason = CMD_CMPLT;
843 			pkt->pkt_state =
844 			    STATE_GOT_BUS | STATE_GOT_TARGET |
845 			    STATE_SENT_CMD | STATE_GOT_STATUS;
846 			if ((pkt->pkt_numcookies > 0) &&
847 			    (pkt->pkt_cookies[0].dmac_size > 0)) {
848 				pkt->pkt_state |= STATE_XFERRED_DATA;
849 			}
850 
851 			/*
852 			 * For CHECK_CONDITION, fill out the ARQ details:
853 			 */
854 			if (res->status == STATUS_CHECK) {
855 				/*
856 				 * ARQ status and arq structure:
857 				 */
858 				pkt->pkt_state |= STATE_ARQ_DONE;
859 				pkt->pkt_scbp[1] = STATUS_GOOD;
860 				struct scsi_arq_status *ars =
861 				    (void *)pkt->pkt_scbp;
862 				ars->sts_rqpkt_reason = CMD_CMPLT;
863 				ars->sts_rqpkt_resid = 0;
864 				ars->sts_rqpkt_state =
865 				    STATE_GOT_BUS | STATE_GOT_TARGET |
866 				    STATE_GOT_STATUS | STATE_SENT_CMD |
867 				    STATE_XFERRED_DATA;
868 				bcopy(res->sense, &ars->sts_sensedata,
869 				    res->sense_len);
870 			}
871 			break;
872 
873 		case VIRTIO_SCSI_S_BAD_TARGET:
874 		case VIRTIO_SCSI_S_INCORRECT_LUN:
875 			pkt->pkt_reason = CMD_DEV_GONE;
876 			break;
877 
878 		case VIRTIO_SCSI_S_OVERRUN:
879 			dev_err(sc->vs_dip, CE_WARN, "OVERRUN");
880 			pkt->pkt_reason = CMD_DATA_OVR;
881 			break;
882 
883 		case VIRTIO_SCSI_S_RESET:
884 			pkt->pkt_reason = CMD_RESET;
885 			pkt->pkt_statistics |= STAT_DEV_RESET;
886 			break;
887 
888 		case VIRTIO_SCSI_S_ABORTED:
889 			if (req->vr_expired) {
890 				pkt->pkt_statistics |= STAT_TIMEOUT;
891 				pkt->pkt_reason = CMD_TIMEOUT;
892 			} else {
893 				pkt->pkt_reason = CMD_ABORTED;
894 				pkt->pkt_statistics |= STAT_ABORTED;
895 			}
896 			break;
897 
898 		case VIRTIO_SCSI_S_BUSY:
899 			/*
900 			 * Busy, should have been caught at submission:
901 			 */
902 			pkt->pkt_reason = CMD_TRAN_ERR;
903 			break;
904 
905 		default:
906 			dev_err(sc->vs_dip, CE_WARN, "Unknown response: 0x%x",
907 			    res->response);
908 			pkt->pkt_reason = CMD_TRAN_ERR;
909 			break;
910 		}
911 
912 
913 		if (!req->vr_poll) {
914 			scsi_hba_pkt_comp(pkt);
915 		} else {
916 			atomic_or_8(&req->vr_done, 1);
917 		}
918 	}
919 	return (DDI_INTR_CLAIMED);
920 }
921 
922 static int
923 vioscsi_tran_tgt_init(dev_info_t *hdip, dev_info_t *tdip, scsi_hba_tran_t *tran,
924     struct scsi_device *sd)
925 {
926 	const char *ua;
927 	vioscsi_softc_t *sc;
928 	int target;
929 	int lun;
930 	vioscsi_dev_t *vd;
931 
932 	if (scsi_hba_iport_unit_address(hdip) == NULL) {
933 		return (DDI_FAILURE); /* only iport has targets */
934 	}
935 	if ((sc = tran->tran_hba_private) == NULL) {
936 		return (DDI_FAILURE);
937 	}
938 
939 	if (((ua = scsi_device_unit_address(sd)) == NULL) ||
940 	    (!vioscsi_parse_unit_address(ua, &target, &lun))) {
941 		return (DDI_FAILURE);
942 	}
943 
944 	vd = kmem_zalloc(sizeof (*vd), KM_SLEEP);
945 	list_create(&vd->vd_reqs, sizeof (vioscsi_request_t),
946 	    offsetof(vioscsi_request_t, vr_node));
947 	mutex_init(&vd->vd_lock, NULL, MUTEX_DRIVER,
948 	    virtio_intr_pri(sc->vs_virtio));
949 
950 	vd->vd_target = (uint8_t)target;
951 	vd->vd_lun = (uint16_t)lun;
952 	vd->vd_sc = sc;
953 	vd->vd_sd = sd;
954 	vd->vd_max_cmd = sc->vs_cmd_per_lun;
955 	vd->vd_num_cmd = 0;
956 
957 	scsi_device_hba_private_set(sd, vd);
958 
959 	mutex_enter(&sc->vs_lock);
960 	list_insert_tail(&sc->vs_devs, vd);
961 	mutex_exit(&sc->vs_lock);
962 
963 	return (DDI_SUCCESS);
964 }
965 
966 static void
967 vioscsi_tran_tgt_free(dev_info_t *hdip, dev_info_t *tdip, scsi_hba_tran_t *tran,
968     struct scsi_device *sd)
969 {
970 	vioscsi_dev_t *vd = scsi_device_hba_private_get(sd);
971 	vioscsi_softc_t *sc = vd->vd_sc;
972 	timeout_id_t tid;
973 
974 	scsi_device_hba_private_set(sd, NULL);
975 
976 	mutex_enter(&vd->vd_lock);
977 	tid = vd->vd_timeout;
978 	vd->vd_timeout = 0;
979 	mutex_exit(&vd->vd_lock);
980 
981 	if (tid != 0) {
982 		(void) untimeout(tid);
983 	}
984 
985 	mutex_enter(&sc->vs_lock);
986 	list_remove(&sc->vs_devs, vd);
987 	mutex_exit(&sc->vs_lock);
988 
989 	list_destroy(&vd->vd_reqs);
990 	mutex_destroy(&vd->vd_lock);
991 	kmem_free(vd, sizeof (*vd));
992 }
993 
994 /*
995  * vioscsi_probe_target probes for existence of a valid target (LUN 0).
996  * It utilizes the supplied request, and sends TEST UNIT READY.
997  * (This command is used because it requires no data.)
998  * It returns 1 if the target is found, 0 if not, and -1 on error.
999  * It is expected additional LUNs will be discovered by the HBA framework using
1000  * REPORT LUNS on LUN 0.
1001  */
1002 static int
1003 vioscsi_probe_target(vioscsi_softc_t *sc, vioscsi_request_t *req,
1004     uint8_t target)
1005 {
1006 	struct virtio_scsi_cmd_req *cmd = &req->vr_req->cmd;
1007 	struct virtio_scsi_cmd_resp *res = &req->vr_res->cmd;
1008 
1009 	bzero(cmd, sizeof (*cmd));
1010 	cmd->cdb[0] = SCMD_TEST_UNIT_READY;
1011 
1012 	virtio_chain_clear(req->vr_vic);
1013 	if (virtio_chain_append(req->vr_vic, req->vr_req_pa,
1014 	    sizeof (*cmd), VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
1015 		return (-1);
1016 	}
1017 	if (virtio_chain_append(req->vr_vic, req->vr_res_pa,
1018 	    sizeof (*res), VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
1019 		return (-1);
1020 	}
1021 	req->vr_poll = B_TRUE;
1022 	req->vr_start = ddi_get_lbolt();
1023 	req->vr_time = 10; /* seconds */
1024 	req->vr_target = target;
1025 	req->vr_lun = 0;
1026 	req->vr_task_attr = VIRTIO_SCSI_S_HEAD;
1027 	vioscsi_start(sc, req);
1028 	switch (res->response) {
1029 	case VIRTIO_SCSI_S_OK:
1030 		return (1);
1031 	case VIRTIO_SCSI_S_INCORRECT_LUN:
1032 	case VIRTIO_SCSI_S_BAD_TARGET:
1033 		return (0);
1034 	default:
1035 		return (-1);
1036 	}
1037 }
1038 
1039 static void
1040 vioscsi_rescan_luns(void *arg)
1041 {
1042 	vioscsi_softc_t		*sc = arg;
1043 	vioscsi_dev_t		*vd;
1044 	scsi_hba_tgtmap_t	*tm = sc->vs_tgtmap;
1045 	list_t			*l;
1046 	char			addr[16];
1047 
1048 	l = &sc->vs_devs;
1049 	mutex_enter(&sc->vs_lock);
1050 	for (vd = list_head(l); vd != NULL; vd = list_next(l, vd)) {
1051 		if (!vd->vd_rescan) {
1052 			continue;
1053 		}
1054 
1055 		vd->vd_rescan = B_FALSE;
1056 		(void) snprintf(addr, sizeof (addr), "%x", vd->vd_target);
1057 		scsi_hba_tgtmap_scan_luns(tm, addr);
1058 	}
1059 	mutex_exit(&sc->vs_lock);
1060 }
1061 
1062 static void
1063 vioscsi_lun_changed(vioscsi_softc_t *sc, uint8_t target)
1064 {
1065 	vioscsi_dev_t *vd;
1066 	list_t *l = &sc->vs_devs;
1067 	boolean_t found = B_FALSE;
1068 
1069 	mutex_enter(&sc->vs_lock);
1070 	for (vd = list_head(l); vd != NULL; vd = list_next(l, vd)) {
1071 		if ((vd->vd_target == target) && (vd->vd_lun == 0)) {
1072 			vd->vd_rescan = B_TRUE;
1073 			found = B_TRUE;
1074 			break;
1075 		}
1076 	}
1077 	mutex_exit(&sc->vs_lock);
1078 
1079 	if (found) {
1080 		/*
1081 		 * We have lun 0 already, so report luns changed:
1082 		 */
1083 		(void) ddi_taskq_dispatch(sc->vs_tq, vioscsi_rescan_luns,
1084 		    sc, DDI_NOSLEEP);
1085 	} else {
1086 		/*
1087 		 * We didn't find lun 0, so issue a new discovery:
1088 		 */
1089 		(void) ddi_taskq_dispatch(sc->vs_tq, vioscsi_discover,
1090 		    sc, DDI_NOSLEEP);
1091 	}
1092 }
1093 
1094 /*
1095  * vioscsi_discover is our task function for performing target and lun
1096  * discovery.  This is done using active SCSI probes.
1097  */
1098 static void
1099 vioscsi_discover(void *arg)
1100 {
1101 	vioscsi_softc_t *sc = arg;
1102 	scsi_hba_tgtmap_t *tm = sc->vs_tgtmap;
1103 	vioscsi_request_t req;
1104 
1105 	if (vioscsi_req_init(sc, &req, sc->vs_cmd_vq, KM_SLEEP) != 0) {
1106 		vioscsi_req_fini(&req);
1107 		return;
1108 	}
1109 
1110 	if (scsi_hba_tgtmap_set_begin(tm) != DDI_SUCCESS) {
1111 		vioscsi_req_fini(&req);
1112 		return;
1113 	}
1114 	for (uint8_t target = 0; target < sc->vs_max_target; target++) {
1115 		char ua[10];
1116 		switch (vioscsi_probe_target(sc, &req, target)) {
1117 		case 1:
1118 			(void) snprintf(ua, sizeof (ua), "%x", target);
1119 			if (scsi_hba_tgtmap_set_add(tm, SCSI_TGT_SCSI_DEVICE,
1120 			    ua, NULL) != DDI_SUCCESS) {
1121 				(void) scsi_hba_tgtmap_set_flush(tm);
1122 				vioscsi_req_fini(&req);
1123 				return;
1124 			}
1125 			break;
1126 		case 0:
1127 			continue;
1128 		case -1:
1129 			(void) scsi_hba_tgtmap_set_flush(tm);
1130 			vioscsi_req_fini(&req);
1131 			return;
1132 		}
1133 	}
1134 	(void) scsi_hba_tgtmap_set_end(tm, 0);
1135 	vioscsi_req_fini(&req);
1136 }
1137 
1138 static void
1139 vioscsi_teardown(vioscsi_softc_t *sc, boolean_t failed)
1140 {
1141 	if (sc->vs_virtio != NULL) {
1142 		virtio_fini(sc->vs_virtio, failed);
1143 	}
1144 
1145 	/*
1146 	 * Free up the event resources:
1147 	 */
1148 	for (int i = 0; i < VIOSCSI_NUM_EVENTS; i++) {
1149 		vioscsi_event_t *ve = &sc->vs_events[i];
1150 		if (ve->ve_vic != NULL) {
1151 			virtio_chain_free(ve->ve_vic);
1152 		}
1153 		if (ve->ve_dma != NULL) {
1154 			virtio_dma_free(ve->ve_dma);
1155 		}
1156 	}
1157 
1158 	if (sc->vs_tran != NULL) {
1159 		scsi_hba_tran_free(sc->vs_tran);
1160 	}
1161 	if (sc->vs_tq != NULL) {
1162 		ddi_taskq_destroy(sc->vs_tq);
1163 	}
1164 	if (sc->vs_intr_pri != NULL) {
1165 		mutex_destroy(&sc->vs_lock);
1166 	}
1167 	kmem_free(sc, sizeof (*sc));
1168 }
1169 
1170 static int
1171 vioscsi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1172 {
1173 	scsi_hba_tran_t *tran = NULL;
1174 	vioscsi_softc_t *sc;
1175 	virtio_t *vio;
1176 	ddi_dma_attr_t attr;
1177 
1178 	if (cmd != DDI_ATTACH) { /* no suspend/resume support */
1179 		return (DDI_FAILURE);
1180 	}
1181 
1182 	if (scsi_hba_iport_unit_address(dip) != NULL) {
1183 		return (vioscsi_iport_attach(dip));
1184 	}
1185 
1186 	sc = kmem_zalloc(sizeof (*sc), KM_SLEEP);
1187 	sc->vs_dip = dip;
1188 
1189 	list_create(&sc->vs_devs, sizeof (vioscsi_dev_t),
1190 	    offsetof(vioscsi_dev_t, vd_node));
1191 
1192 	tran = scsi_hba_tran_alloc(dip, SCSI_HBA_CANSLEEP);
1193 	sc->vs_tran = tran;
1194 
1195 	tran->tran_hba_len = sizeof (vioscsi_request_t);
1196 	tran->tran_hba_private = sc;
1197 
1198 	/*
1199 	 * We don't use WWN addressing, so advertise parallel.  The underlying
1200 	 * device might still be using a different transport, even in a
1201 	 * pass-through, but we cannot discriminate that at this layer.
1202 	 */
1203 	tran->tran_interconnect_type = INTERCONNECT_PARALLEL;
1204 
1205 	tran->tran_start = vioscsi_tran_start;
1206 	tran->tran_abort = vioscsi_tran_abort;
1207 	tran->tran_reset = vioscsi_tran_reset;
1208 	tran->tran_getcap = vioscsi_tran_getcap;
1209 	tran->tran_setcap = vioscsi_tran_setcap;
1210 
1211 	tran->tran_tgt_init = vioscsi_tran_tgt_init;
1212 	tran->tran_tgt_free = vioscsi_tran_tgt_free;
1213 
1214 	tran->tran_setup_pkt = vioscsi_tran_setup_pkt;
1215 	tran->tran_teardown_pkt = vioscsi_tran_teardown_pkt;
1216 	tran->tran_pkt_constructor = vioscsi_tran_pkt_constructor;
1217 	tran->tran_pkt_destructor = vioscsi_tran_pkt_destructor;
1218 
1219 	/*
1220 	 * We need to determine some device settings here, so we initialize the
1221 	 * virtio in order to access those values.  The rest of the setup we do
1222 	 * in the iport attach.  Note that this driver cannot support
1223 	 * reattaching a child iport once it is removed -- the entire driver
1224 	 * will need to be reset for that.
1225 	 */
1226 	vio = virtio_init(dip, VIOSCSI_WANTED_FEATURES, B_TRUE);
1227 	if ((sc->vs_virtio = vio) == NULL) {
1228 		dev_err(dip, CE_WARN, "failed to init virtio");
1229 		vioscsi_teardown(sc, B_TRUE);
1230 		return (DDI_FAILURE);
1231 	}
1232 
1233 	/*
1234 	 * Get virtio parameters:
1235 	 */
1236 	sc->vs_max_target = virtio_dev_get16(vio, VIRTIO_SCSI_CFG_MAX_TARGET);
1237 	sc->vs_max_lun = virtio_dev_get32(vio, VIRTIO_SCSI_CFG_MAX_LUN);
1238 	sc->vs_cdb_size = virtio_dev_get32(vio, VIRTIO_SCSI_CFG_CDB_SIZE);
1239 	sc->vs_max_seg = virtio_dev_get32(vio, VIRTIO_SCSI_CFG_SEG_MAX);
1240 	sc->vs_cmd_per_lun = virtio_dev_get32(vio, VIRTIO_SCSI_CFG_CMD_PER_LUN);
1241 
1242 	/*
1243 	 * Adjust operating parameters to functional limits:
1244 	 */
1245 	sc->vs_max_target = min(VIOSCSI_MAX_TARGET, sc->vs_max_target);
1246 	sc->vs_cmd_per_lun = max(1, sc->vs_max_target);
1247 	sc->vs_max_seg = max(VIOSCSI_MIN_SEGS, sc->vs_max_seg);
1248 
1249 	/*
1250 	 * Allocate queues:
1251 	 */
1252 	sc->vs_ctl_vq = virtio_queue_alloc(vio, 0, "ctl",
1253 	    vioscsi_ctl_handler, sc, B_FALSE, sc->vs_max_seg);
1254 	sc->vs_evt_vq = virtio_queue_alloc(vio, 1, "evt",
1255 	    vioscsi_evt_handler, sc, B_FALSE, sc->vs_max_seg);
1256 	sc->vs_cmd_vq = virtio_queue_alloc(vio, 2, "cmd",
1257 	    vioscsi_cmd_handler, sc, B_FALSE, sc->vs_max_seg);
1258 
1259 	if ((sc->vs_ctl_vq == NULL) || (sc->vs_evt_vq == NULL) ||
1260 	    (sc->vs_cmd_vq == NULL)) {
1261 		dev_err(dip, CE_WARN, "failed allocating queue(s)");
1262 		vioscsi_teardown(sc, B_TRUE);
1263 		return (DDI_FAILURE);
1264 	}
1265 
1266 	if (virtio_init_complete(vio, VIRTIO_ANY_INTR_TYPE) != DDI_SUCCESS) {
1267 		dev_err(dip, CE_WARN, "virtio_init_complete failed");
1268 		vioscsi_teardown(sc, B_TRUE);
1269 		return (DDI_FAILURE);
1270 	}
1271 
1272 	/*
1273 	 * We cannot initialize this mutex before virtio_init_complete:
1274 	 */
1275 	sc->vs_intr_pri = virtio_intr_pri(vio);
1276 	mutex_init(&sc->vs_lock, NULL, MUTEX_DRIVER, sc->vs_intr_pri);
1277 
1278 	/*
1279 	 * Allocate events, but do not submit yet:
1280 	 */
1281 	for (int i = 0; i < VIOSCSI_NUM_EVENTS; i++) {
1282 		vioscsi_event_t *ve = &sc->vs_events[i];
1283 		ve->ve_vic = virtio_chain_alloc(sc->vs_evt_vq, KM_SLEEP);
1284 		ve->ve_dma = virtio_dma_alloc(sc->vs_virtio,
1285 		    sizeof (vioscsi_evt_t), &virtio_dma_attr,
1286 		    DDI_DMA_STREAMING | DDI_DMA_READ, KM_SLEEP);
1287 		if ((ve->ve_vic == NULL) || (ve->ve_dma == NULL)) {
1288 			vioscsi_teardown(sc, B_TRUE);
1289 			return (DDI_FAILURE);
1290 		}
1291 		if (virtio_chain_append(ve->ve_vic,
1292 		    virtio_dma_cookie_pa(ve->ve_dma, 0), sizeof (*ve->ve_evt),
1293 		    VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
1294 			vioscsi_teardown(sc, B_TRUE);
1295 			return (DDI_FAILURE);
1296 		}
1297 		ve->ve_evt = virtio_dma_va(ve->ve_dma, 0);
1298 		virtio_chain_data_set(ve->ve_vic, ve);
1299 	}
1300 
1301 	sc->vs_tq = ddi_taskq_create(dip, "task", 1, TASKQ_DEFAULTPRI, 0);
1302 	if (sc->vs_tq == NULL) {
1303 		dev_err(dip, CE_WARN, "failed to create taskq");
1304 		vioscsi_teardown(sc, B_TRUE);
1305 		return (DDI_FAILURE);
1306 	}
1307 
1308 	/*
1309 	 * Maximum number of segments, subtract two needed for headers:
1310 	 */
1311 	attr = virtio_dma_attr;
1312 	attr.dma_attr_sgllen = sc->vs_max_seg - 2;
1313 
1314 	if (scsi_hba_attach_setup(dip, &attr, tran,
1315 	    SCSI_HBA_ADDR_COMPLEX | SCSI_HBA_HBA |
1316 	    SCSI_HBA_TRAN_CDB | SCSI_HBA_TRAN_SCB) !=
1317 	    DDI_SUCCESS) {
1318 		vioscsi_teardown(sc, B_TRUE);
1319 		return (DDI_FAILURE);
1320 	}
1321 
1322 	if (scsi_hba_iport_register(dip, "iport0") != DDI_SUCCESS) {
1323 		vioscsi_teardown(sc, B_TRUE);
1324 		return (DDI_FAILURE);
1325 	}
1326 
1327 	ddi_report_dev(dip);
1328 
1329 	return (DDI_SUCCESS);
1330 }
1331 
1332 static void
1333 vioscsi_iport_teardown(vioscsi_softc_t *sc)
1334 {
1335 	/*
1336 	 * Stop the taskq -- ensures we don't try to access resources from a
1337 	 * task while we are tearing down.
1338 	 */
1339 	ddi_taskq_suspend(sc->vs_tq);
1340 	ddi_taskq_wait(sc->vs_tq);
1341 
1342 	/*
1343 	 * Shutdown all interrupts and device transfers:
1344 	 */
1345 	virtio_interrupts_disable(sc->vs_virtio);
1346 	virtio_shutdown(sc->vs_virtio);
1347 
1348 	/*
1349 	 * Common resources:
1350 	 */
1351 	if (sc->vs_tgtmap != NULL) {
1352 		scsi_hba_tgtmap_destroy(sc->vs_tgtmap);
1353 		sc->vs_tgtmap = NULL;
1354 	}
1355 }
1356 
1357 /*
1358  * vioscsi_iport_attach implements the attach of the iport.  We do the final
1359  * set up of interrupts, and posting of event buffers here, as we do not want
1360  * any activity unless the iport is attached.  This matches detach, and makes
1361  * teardown safer.
1362  */
1363 static int
1364 vioscsi_iport_attach(dev_info_t *dip)
1365 {
1366 	const char *ua = scsi_hba_iport_unit_address(dip);
1367 	scsi_hba_tran_t *tran;
1368 	vioscsi_softc_t *sc;
1369 
1370 	/*
1371 	 * We only support a single iport -- all disks are virtual and all
1372 	 * disks use target/lun addresses.
1373 	 */
1374 	if ((ua == NULL) || (strcmp(ua, "iport0") != 0)) {
1375 		return (DDI_FAILURE);
1376 	}
1377 
1378 	/*
1379 	 * Get our parent's tran, and look up the sc from that:
1380 	 */
1381 	tran = ddi_get_driver_private(ddi_get_parent(dip));
1382 	if ((tran == NULL) ||
1383 	    ((sc = tran->tran_hba_private) == NULL)) {
1384 		return (DDI_FAILURE);
1385 	}
1386 
1387 	/*
1388 	 * Save a copy of the soft state in our tran private area.
1389 	 * (The framework clears this after cloning from parent.)
1390 	 */
1391 	tran = ddi_get_driver_private(dip);
1392 	tran->tran_hba_private = sc;
1393 
1394 	/*
1395 	 * We don't want interrupts on the control queue -- strictly polled
1396 	 * (however if this handler is called from an interrupt, it should
1397 	 * still be absolutely fine).
1398 	 */
1399 	virtio_queue_no_interrupt(sc->vs_ctl_vq, B_TRUE);
1400 
1401 	if (scsi_hba_tgtmap_create(dip, SCSI_TM_FULLSET, MICROSEC,
1402 	    2 * MICROSEC, sc, NULL, NULL, &sc->vs_tgtmap) != DDI_SUCCESS) {
1403 		vioscsi_iport_teardown(sc);
1404 		return (DDI_FAILURE);
1405 	}
1406 
1407 	/*
1408 	 * Post events:
1409 	 */
1410 	for (int i = 0; i < VIOSCSI_NUM_EVENTS; i++) {
1411 		virtio_chain_submit(sc->vs_events[i].ve_vic, B_FALSE);
1412 	}
1413 	virtio_queue_flush(sc->vs_evt_vq);
1414 
1415 	/*
1416 	 * Start interrupts going now:
1417 	 */
1418 	if (virtio_interrupts_enable(sc->vs_virtio) != DDI_SUCCESS) {
1419 		vioscsi_iport_teardown(sc);
1420 		return (DDI_FAILURE);
1421 	}
1422 
1423 	/*
1424 	 * Start a discovery:
1425 	 */
1426 	(void) ddi_taskq_dispatch(sc->vs_tq, vioscsi_discover, sc, DDI_SLEEP);
1427 
1428 	return (DDI_SUCCESS);
1429 }
1430 
1431 static int
1432 vioscsi_quiesce(dev_info_t *dip)
1433 {
1434 	vioscsi_softc_t *sc;
1435 	scsi_hba_tran_t *tran;
1436 
1437 	if (((tran = ddi_get_driver_private(dip)) == NULL) ||
1438 	    ((sc = tran->tran_hba_private) == NULL)) {
1439 		return (DDI_FAILURE);
1440 	}
1441 	if (sc->vs_virtio == NULL) {
1442 		return (DDI_SUCCESS); /* not initialized yet */
1443 	}
1444 
1445 	return (virtio_quiesce(sc->vs_virtio));
1446 }
1447 
1448 /*
1449  * vioscsi_iport_detach is used to perform the detach of the iport.  It
1450  * disables interrupts and the device, but does not free resources, other than
1451  * the target map.  Note that due to lack of a way to start virtio after
1452  * virtio_shutdown(), it is not possible to reattach the iport after this is
1453  * called, unless the underlying HBA is also detached and then re-attached.
1454  */
1455 static int
1456 vioscsi_iport_detach(dev_info_t *dip)
1457 {
1458 	const char *ua = scsi_hba_iport_unit_address(dip);
1459 	vioscsi_softc_t *sc;
1460 	scsi_hba_tran_t *tran;
1461 
1462 	if ((ua == NULL) || (strcmp(ua, "iport0") != 0)) {
1463 		return (DDI_FAILURE);
1464 	}
1465 
1466 	if (((tran = ddi_get_driver_private(dip)) == NULL) ||
1467 	    ((sc = tran->tran_hba_private) == NULL)) {
1468 		return (DDI_FAILURE);
1469 	}
1470 
1471 	mutex_enter(&sc->vs_lock);
1472 	if (!list_is_empty(&sc->vs_devs)) {
1473 		/*
1474 		 * Cannot detach while we have target children.
1475 		 */
1476 		mutex_exit(&sc->vs_lock);
1477 		return (DDI_FAILURE);
1478 	}
1479 
1480 	vioscsi_iport_teardown(sc);
1481 
1482 	return (DDI_SUCCESS);
1483 }
1484 
1485 static int
1486 vioscsi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1487 {
1488 	vioscsi_softc_t *sc;
1489 	scsi_hba_tran_t *tran;
1490 
1491 	if (cmd != DDI_DETACH)  {
1492 		return (DDI_FAILURE);
1493 	}
1494 
1495 	if (scsi_hba_iport_unit_address(dip) != NULL) {
1496 		return (vioscsi_iport_detach(dip));
1497 	}
1498 
1499 	if (((tran = ddi_get_driver_private(dip)) == NULL) ||
1500 	    ((sc = tran->tran_hba_private) == NULL)) {
1501 		return (DDI_FAILURE);
1502 	}
1503 
1504 	if (scsi_hba_detach(dip) != DDI_SUCCESS) {
1505 		return (DDI_FAILURE);
1506 	}
1507 	vioscsi_teardown(sc, B_FALSE);
1508 
1509 	return (DDI_SUCCESS);
1510 }
1511 
1512 static struct dev_ops vioscsi_dev_ops = {
1513 	.devo_rev =		DEVO_REV,
1514 	.devo_refcnt =		0,
1515 	.devo_getinfo =		nodev,
1516 	.devo_identify =	nulldev,
1517 	.devo_probe =		nulldev,
1518 	.devo_attach =		vioscsi_attach,
1519 	.devo_detach =		vioscsi_detach,
1520 	.devo_reset =		nodev,
1521 	.devo_cb_ops =		NULL,
1522 	.devo_bus_ops =		NULL,
1523 	.devo_power =		NULL,
1524 	.devo_quiesce =		vioscsi_quiesce,
1525 };
1526 
1527 static struct modldrv modldrv = {
1528 	.drv_modops =		&mod_driverops,
1529 	.drv_linkinfo =		vioscsi_ident,
1530 	.drv_dev_ops =		&vioscsi_dev_ops,
1531 };
1532 
1533 static struct modlinkage modlinkage = {
1534 	.ml_rev =		MODREV_1,
1535 	.ml_linkage =		{ &modldrv, NULL, },
1536 };
1537 
1538 
1539 int
1540 _init(void)
1541 {
1542 	int err;
1543 
1544 	/*
1545 	 * Initialize this unconditionally:
1546 	 */
1547 	vioscsi_hz = drv_usectohz(1000000);
1548 
1549 	if ((err = scsi_hba_init(&modlinkage)) != 0) {
1550 		return (err);
1551 	}
1552 
1553 	if ((err = mod_install(&modlinkage)) != 0) {
1554 		scsi_hba_fini(&modlinkage);
1555 		return (err);
1556 	}
1557 
1558 	return (err);
1559 }
1560 
1561 int
1562 _fini(void)
1563 {
1564 	int err;
1565 
1566 	if ((err = mod_remove(&modlinkage)) != 0) {
1567 		return (err);
1568 	}
1569 
1570 	scsi_hba_fini(&modlinkage);
1571 
1572 	return (DDI_SUCCESS);
1573 }
1574 
1575 int
1576 _info(struct modinfo *modinfop)
1577 {
1578 	return (mod_info(&modlinkage, modinfop));
1579 }
1580