xref: /illumos-gate/usr/src/uts/common/io/scsi/adapters/pvscsi/pvscsi.c (revision b3783300013fa93b98278c901b855062f538f7e2)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2016 Nexenta Systems, Inc.
14  * Copyright 2022 RackTop Systems, Inc.
15  */
16 
17 #include <sys/atomic.h>
18 #include <sys/cmn_err.h>
19 #include <sys/cpuvar.h>
20 #include <sys/ddi.h>
21 #include <sys/id32.h>
22 #include <sys/kmem.h>
23 #include <sys/list.h>
24 #include <sys/modctl.h>
25 #include <sys/pci.h>
26 #include <sys/scsi/scsi.h>
27 #include <sys/sunddi.h>
28 #include <sys/sysmacros.h>
29 #include <sys/types.h>
30 #include <sys/note.h>
31 
32 #include "pvscsi.h"
33 #include "pvscsi_var.h"
34 
35 /* we can support any of the interrupt types */
36 int pvscsi_intr_types = \
37 	DDI_INTR_TYPE_MSIX|DDI_INTR_TYPE_MSI|DDI_INTR_TYPE_FIXED;
38 int pvscsi_ring_pages = PVSCSI_DEFAULT_NUM_PAGES_PER_RING;
39 int pvscsi_msg_ring_pages = PVSCSI_DEFAULT_NUM_PAGES_MSG_RING;
40 static int pvscsi_hz;
41 
42 static int pvscsi_abort(struct scsi_address *, struct scsi_pkt *);
43 static void pvscsi_timeout(void *);
44 static void pvscsi_setup_rings(pvscsi_softc_t *);
45 static void pvscsi_complete_cmds(pvscsi_softc_t *, pvscsi_cmd_t *);
46 static boolean_t pvscsi_cmd_init(pvscsi_softc_t *, pvscsi_cmd_t *, int);
47 static void pvscsi_cmd_fini(pvscsi_cmd_t *);
48 
49 /* HBA DMA attributes */
50 static ddi_dma_attr_t pvscsi_dma_attr = {
51 	.dma_attr_version =	DMA_ATTR_V0,
52 	.dma_attr_addr_lo =	0,
53 	.dma_attr_addr_hi =	0xFFFFFFFFFFFFFFFFull,
54 	.dma_attr_count_max =	0xFFFFFFFFFFFFFFFFull,
55 	.dma_attr_align =	PAGE_SIZE,
56 	.dma_attr_burstsizes =	1,
57 	.dma_attr_minxfer =	1,
58 	.dma_attr_maxxfer =	0xFFFFFFFFFFFFFFFFull,
59 	.dma_attr_seg =		0xFFFFFFFFFFFFFFFFull,
60 	.dma_attr_sgllen =	1,
61 	.dma_attr_granular =	1,
62 	.dma_attr_flags =	0
63 };
64 
65 /* DMA attributes for buffer I/O */
66 static ddi_dma_attr_t pvscsi_io_dma_attr = {
67 	.dma_attr_version =	DMA_ATTR_V0,
68 	.dma_attr_addr_lo =	0,
69 	.dma_attr_addr_hi =	0xFFFFFFFFFFFFFFFFull,
70 	.dma_attr_count_max =	0x7FFFFFFFll,
71 	.dma_attr_align =	1,
72 	.dma_attr_burstsizes =	1,
73 	.dma_attr_minxfer =	1,
74 	.dma_attr_maxxfer =	PAGE_SIZE * PVSCSI_MAX_SG_SIZE,
75 	.dma_attr_seg =		0xFFFFFFFFFFFFFFFFull,
76 	.dma_attr_sgllen =	PVSCSI_MAX_SG_SIZE,
77 	.dma_attr_granular =	1,
78 	.dma_attr_flags =	0
79 };
80 
81 /*
82  * The structures are always little endian (VMware only runs
83  * on little endian CPUs), but we only run on LE processors,
84  * and NEVERSWAP avoids needing to use DDI accessor functions.
85  * (It would be incredibly bizarre to have a VMware guest running
86  * with a different endianness than the hypervisor.)
87  */
88 static ddi_device_acc_attr_t pvscsi_mmio_attr = {
89 	.devacc_attr_version =		DDI_DEVICE_ATTR_V1,
90 	.devacc_attr_endian_flags =	DDI_NEVERSWAP_ACC,
91 	.devacc_attr_dataorder =	DDI_STRICTORDER_ACC,
92 	.devacc_attr_access =		DDI_DEFAULT_ACC
93 };
94 
95 static ddi_device_acc_attr_t pvscsi_dma_attrs = {
96 	.devacc_attr_version =		DDI_DEVICE_ATTR_V1,
97 	.devacc_attr_endian_flags =	DDI_NEVERSWAP_ACC,
98 	.devacc_attr_dataorder =	DDI_STRICTORDER_ACC,
99 	.devacc_attr_access =		DDI_DEFAULT_ACC,
100 };
101 
102 static void
103 pvscsi_add_to_queue(pvscsi_softc_t *pvs, pvscsi_cmd_t *cmd)
104 {
105 	pvscsi_cmd_t	*r;
106 	list_t		*l;
107 
108 	/*
109 	 * We insert in order of expiration, with the earliest
110 	 * expirations at the front.  This logic assumes that most
111 	 * commands will have the same timeout, and is optimized
112 	 * to minimize walking the list.  It allows timeouts to
113 	 * run without looking at more than one node that has not
114 	 * yet expired.
115 	 */
116 	ASSERT(mutex_owned(&pvs->lock));
117 
118 	l = &pvs->cmd_queue;
119 	for (r = list_tail(l); r != NULL; r = list_prev(l, r)) {
120 		/* this subtraction is safe if lbolt wraps */
121 		if (((cmd->start + cmd->timeout) -
122 		    (r->start + r->timeout)) >= 0) {
123 			list_insert_after(l, r, cmd);
124 			return;
125 		}
126 	}
127 
128 	list_insert_head(l, cmd);
129 }
130 
131 static uint32_t
132 pvscsi_reg_read(pvscsi_softc_t *pvs, uint32_t offset)
133 {
134 	uint32_t	ret;
135 
136 	ASSERT((offset & (sizeof (uint32_t) - 1)) == 0);
137 
138 	ret = ddi_get32(pvs->mmio_handle,
139 	    (uint32_t *)(pvs->mmio_base + offset));
140 
141 	return (ret);
142 }
143 
144 static void
145 pvscsi_reg_write(pvscsi_softc_t *pvs, uint32_t offset, uint32_t value)
146 {
147 	ASSERT((offset & (sizeof (uint32_t) - 1)) == 0);
148 
149 	ddi_put32(pvs->mmio_handle, (uint32_t *)(pvs->mmio_base + offset),
150 	    value);
151 }
152 
153 static void
154 pvscsi_write_cmd_desc(pvscsi_softc_t *pvs, uint32_t cmd, void *desc, size_t len)
155 {
156 	len /= sizeof (uint32_t);
157 	pvscsi_reg_write(pvs, PVSCSI_REG_OFFSET_COMMAND, cmd);
158 	ddi_rep_put32(pvs->mmio_handle, (uint32_t *)desc,
159 	    (uint32_t *)(pvs->mmio_base + PVSCSI_REG_OFFSET_COMMAND_DATA),
160 	    len, DDI_DEV_NO_AUTOINCR);
161 }
162 
163 static uint32_t
164 pvscsi_read_intr_status(pvscsi_softc_t *pvs)
165 {
166 	return (pvscsi_reg_read(pvs, PVSCSI_REG_OFFSET_INTR_STATUS));
167 }
168 
169 static void
170 pvscsi_write_intr_status(pvscsi_softc_t *pvs, uint32_t val)
171 {
172 	pvscsi_reg_write(pvs, PVSCSI_REG_OFFSET_INTR_STATUS, val);
173 }
174 
175 static pvscsi_cmd_t *
176 pvscsi_reclaim_cmds(pvscsi_softc_t *pvs)
177 {
178 	pvscsi_cmd_t	*head = NULL;
179 	pvscsi_cmd_t	**tail = &head;
180 	pvscsi_cmd_t	*cmd;
181 
182 	ASSERT(mutex_owned(&pvs->lock));
183 	while ((cmd = list_remove_head(&pvs->cmd_queue)) != NULL) {
184 		list_remove(&pvs->cmd_queue, cmd);
185 		*tail = cmd;
186 		tail = &cmd->next_cmd;
187 		*tail = NULL;
188 		cmd->host_status = BTSTAT_BUSRESET;
189 	}
190 	return (head);
191 }
192 
193 static void
194 pvscsi_stop_hba(pvscsi_softc_t *pvs)
195 {
196 	pvscsi_write_cmd_desc(pvs, PVSCSI_CMD_ADAPTER_RESET, NULL, 0);
197 	pvscsi_reg_write(pvs, PVSCSI_REG_OFFSET_INTR_MASK, 0);
198 	/* read interrupt status to flush PCI write buffers */
199 	(void) pvscsi_read_intr_status(pvs);
200 }
201 
202 static void
203 pvscsi_start_hba(pvscsi_softc_t *pvs)
204 {
205 	pvscsi_setup_rings(pvs);
206 	pvscsi_reg_write(pvs, PVSCSI_REG_OFFSET_INTR_MASK,
207 	    PVSCSI_INTR_CMPL_MASK | PVSCSI_INTR_MSG_MASK);
208 }
209 
210 static void
211 pvscsi_reset_bus(pvscsi_softc_t *pvs)
212 {
213 	pvscsi_write_cmd_desc(pvs, PVSCSI_CMD_RESET_BUS, NULL, 0);
214 }
215 
216 /*
217  * pvscsi_restart_hba resets the HBA, and reconfigures it.  It also
218  * completes all commands that have not been already completed with
219  * a reset.
220  */
221 static void
222 pvscsi_restart_hba(pvscsi_softc_t *pvs)
223 {
224 	pvscsi_cmd_t	*cmd;
225 
226 	mutex_enter(&pvs->lock);
227 	pvscsi_stop_hba(pvs);
228 	cmd = pvscsi_reclaim_cmds(pvs);
229 	pvscsi_start_hba(pvs);
230 	mutex_exit(&pvs->lock);
231 
232 	/* run the completions from the reclaimed commands */
233 	pvscsi_complete_cmds(pvs, cmd);
234 }
235 
236 static void
237 pvscsi_submit_nonrw_io(pvscsi_softc_t *pvs)
238 {
239 	pvscsi_reg_write(pvs, PVSCSI_REG_OFFSET_KICK_NON_RW_IO, 0);
240 }
241 
242 static void
243 pvscsi_submit_rw_io(pvscsi_softc_t *pvs)
244 {
245 	pvscsi_reg_write(pvs, PVSCSI_REG_OFFSET_KICK_RW_IO, 0);
246 }
247 
248 static pvscsi_cmd_t *
249 pvscsi_process_comp_ring(pvscsi_softc_t *pvs)
250 {
251 	pvscsi_cmd_t	**pnext_cmd;
252 	pvscsi_cmd_t	*cmd;
253 	pvscsi_cmd_t	*head = NULL;
254 	struct PVSCSIRingsState *sdesc = RINGS_STATE(pvs);
255 	uint32_t	cmp_ne = sdesc->cmpNumEntriesLog2;
256 
257 	ASSERT(mutex_owned(&pvs->lock));
258 
259 	pnext_cmd = &head;
260 
261 	(void) ddi_dma_sync(pvs->state_buf.dmah, 0, 0, DDI_DMA_SYNC_FORKERNEL);
262 
263 	while (sdesc->cmpConsIdx != sdesc->cmpProdIdx) {
264 		struct PVSCSIRingCmpDesc *cdesc;
265 
266 		(void) ddi_dma_sync(pvs->cmp_ring_buf.dmah, 0, 0,
267 		    DDI_DMA_SYNC_FORKERNEL);
268 
269 		cdesc = CMP_RING(pvs) + (sdesc->cmpConsIdx & MASK(cmp_ne));
270 
271 		if ((cmd = id32_lookup((uint32_t)cdesc->context)) != NULL) {
272 			cmd->next_cmd = NULL;
273 
274 			/* Save command status for further processing */
275 			cmd->host_status = cdesc->hostStatus;
276 			cmd->scsi_status = cdesc->scsiStatus;
277 			cmd->transferred = cdesc->dataLen;
278 
279 			*pnext_cmd = cmd;
280 			pnext_cmd = &cmd->next_cmd;
281 
282 			list_remove(&pvs->cmd_queue, cmd);
283 		}
284 
285 		sdesc->cmpConsIdx++;
286 	}
287 	(void) ddi_dma_sync(pvs->state_buf.dmah, 0, 0, DDI_DMA_SYNC_FORDEV);
288 
289 	return (head);
290 }
291 
292 static pvscsi_msg_t *
293 pvscsi_process_msg_ring(pvscsi_softc_t *pvs)
294 {
295 	pvscsi_msg_t	*msg;
296 	struct PVSCSIRingsState *sdesc = RINGS_STATE(pvs);
297 	struct PVSCSIRingMsgDesc *mdesc;
298 	struct PVSCSIMsgDescDevStatusChanged *desc;
299 	uint32_t	msg_ne = sdesc->msgNumEntriesLog2;
300 
301 	(void) ddi_dma_sync(pvs->state_buf.dmah, 0, 0, DDI_DMA_SYNC_FORKERNEL);
302 
303 	if (sdesc->msgProdIdx == sdesc->msgConsIdx) {
304 		return (NULL);
305 	}
306 
307 	(void) ddi_dma_sync(pvs->msg_ring_buf.dmah, 0, 0,
308 	    DDI_DMA_SYNC_FORKERNEL);
309 
310 	mdesc = MSG_RING(pvs) + (sdesc->msgConsIdx & MASK(msg_ne));
311 
312 	switch (mdesc->type) {
313 	case PVSCSI_MSG_DEV_ADDED:
314 	case PVSCSI_MSG_DEV_REMOVED:
315 		desc = (struct PVSCSIMsgDescDevStatusChanged *)mdesc;
316 		msg = kmem_alloc(sizeof (pvscsi_msg_t), KM_NOSLEEP);
317 		if (msg == NULL)
318 			return (NULL);
319 		msg->pvs = pvs;
320 		msg->type = mdesc->type;
321 		msg->target = desc->target;
322 		msg->lun = desc->lun[1]; /* T10 format */
323 		break;
324 	default:
325 		dev_err(pvs->dip, CE_WARN, "!unknown msg type: %d",
326 		    mdesc->type);
327 		return (NULL);
328 	}
329 
330 	sdesc->msgConsIdx++;
331 	(void) ddi_dma_sync(pvs->state_buf.dmah, 0, 0, DDI_DMA_SYNC_FORDEV);
332 	return (msg);
333 }
334 
335 static void
336 pvscsi_handle_msg(void *arg)
337 {
338 	pvscsi_msg_t	*msg = arg;
339 	pvscsi_softc_t	*pvs = msg->pvs;
340 	char		addr[8];
341 
342 	(void) snprintf(addr, sizeof (addr), "%x", msg->target);
343 
344 	if (msg->lun == 0) {
345 		switch (msg->type) {
346 		case PVSCSI_MSG_DEV_ADDED:
347 			(void) scsi_hba_tgtmap_tgt_add(pvs->tgtmap,
348 			    SCSI_TGT_SCSI_DEVICE, addr, NULL);
349 			break;
350 		case PVSCSI_MSG_DEV_REMOVED:
351 			(void) scsi_hba_tgtmap_tgt_remove(pvs->tgtmap,
352 			    SCSI_TGT_SCSI_DEVICE, addr);
353 			break;
354 		}
355 	} else {
356 		scsi_hba_tgtmap_scan_luns(pvs->tgtmap, addr);
357 	}
358 	kmem_free(msg, sizeof (pvscsi_msg_t));
359 }
360 
361 static void
362 pvscsi_abort_cmd(pvscsi_softc_t *pvs, pvscsi_cmd_t *cmd)
363 {
364 	struct PVSCSICmdDescAbortCmd	acmd;
365 
366 	bzero(&acmd, sizeof (acmd));
367 	acmd.target = cmd->target;
368 	acmd.context = cmd->ctx;
369 	pvscsi_write_cmd_desc(pvs, PVSCSI_CMD_ABORT_CMD, &acmd, sizeof (acmd));
370 }
371 
372 static void
373 pvscsi_map_buffers(pvscsi_cmd_t *cmd, struct PVSCSIRingReqDesc *rdesc)
374 {
375 	struct scsi_pkt *pkt = cmd->pkt;
376 
377 	rdesc->dataLen = 0;
378 	rdesc->dataAddr = 0;
379 	if (pkt == NULL || pkt->pkt_numcookies == 0) {
380 		return;
381 	}
382 
383 	pkt->pkt_resid = 0;
384 
385 	if (pkt->pkt_numcookies > 1) {
386 		size_t	len = 0;
387 		struct PVSCSISGElement *sgl = cmd->sgl;
388 
389 		for (uint_t i = 0; i < pkt->pkt_numcookies; i++) {
390 			sgl[i].addr = pkt->pkt_cookies[i].dmac_laddress;
391 			sgl[i].length = pkt->pkt_cookies[i].dmac_size;
392 			sgl[i].flags = 0;
393 			len += pkt->pkt_cookies[i].dmac_size;
394 		}
395 		rdesc->flags |= PVSCSI_FLAG_CMD_WITH_SG_LIST;
396 		rdesc->dataAddr = cmd->sgl_pa;
397 		rdesc->dataLen = len;
398 		(void) ddi_dma_sync(cmd->sgl_dmah, 0, 0, DDI_DMA_SYNC_FORDEV);
399 	} else {
400 		rdesc->flags = 0;
401 		rdesc->dataAddr = pkt->pkt_cookies[0].dmac_laddress;
402 		rdesc->dataLen = pkt->pkt_cookies[0].dmac_size;
403 	}
404 	pkt->pkt_resid = rdesc->dataLen;
405 }
406 
407 static void
408 pvscsi_comp_cmd(pvscsi_cmd_t *cmd)
409 {
410 	struct scsi_pkt	*pkt = cmd->pkt;
411 	uint8_t		status = cmd->scsi_status;
412 
413 	pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET | STATE_SENT_CMD |
414 	    STATE_GOT_STATUS);
415 	if (pkt->pkt_numcookies > 0) {
416 		pkt->pkt_state |= STATE_XFERRED_DATA;
417 	}
418 	pkt->pkt_reason = CMD_CMPLT;
419 	pkt->pkt_resid -= cmd->transferred;
420 	*(pkt->pkt_scbp) = status;
421 
422 	if (status == STATUS_CHECK) {
423 		/*
424 		 * Our virtual HBA *always* does ARQ, and it never
425 		 * is more than 20 bytes, so no need to try to handle
426 		 * extended versions of it.
427 		 */
428 		struct scsi_arq_status *ars = (void *)(pkt->pkt_scbp);
429 		int		len = min(pkt->pkt_scblen, SENSE_LENGTH);
430 
431 		pkt->pkt_state |= STATE_ARQ_DONE;
432 		ars->sts_rqpkt_resid = 0;
433 		bcopy(cmd->arq_sense, &ars->sts_sensedata, len);
434 		ars->sts_rqpkt_reason = CMD_CMPLT;
435 		*(uint8_t *)&ars->sts_rqpkt_status = STATUS_GOOD;
436 		ars->sts_rqpkt_state = STATE_GOT_BUS |
437 		    STATE_GOT_TARGET | STATE_SENT_CMD |
438 		    STATE_XFERRED_DATA | STATE_GOT_STATUS;
439 	}
440 }
441 
442 static void
443 pvscsi_set_status(pvscsi_softc_t *pvs, pvscsi_cmd_t *cmd)
444 {
445 	struct scsi_pkt	*pkt = cmd->pkt;
446 	uint32_t	host_status = cmd->host_status;
447 
448 	switch (host_status) {
449 	case BTSTAT_SUCCESS:
450 	case BTSTAT_LINKED_COMMAND_COMPLETED:
451 	case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG:
452 		pvscsi_comp_cmd(cmd);
453 		break;
454 	case BTSTAT_DATARUN:
455 		pkt->pkt_reason = CMD_DATA_OVR;
456 		pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET |
457 		    STATE_SENT_CMD | STATE_GOT_STATUS |
458 		    STATE_XFERRED_DATA);
459 		pkt->pkt_resid -= cmd->transferred;
460 		break;
461 	case BTSTAT_SELTIMEO:
462 		pkt->pkt_reason = CMD_DEV_GONE;
463 		pkt->pkt_state |= STATE_GOT_BUS;
464 		break;
465 	case BTSTAT_TAGREJECT:
466 		pkt->pkt_reason = CMD_TAG_REJECT;
467 		pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET |
468 		    STATE_SENT_CMD | STATE_GOT_STATUS);
469 		break;
470 	case BTSTAT_BADMSG:
471 		pkt->pkt_reason = CMD_BADMSG;
472 		pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET |
473 		    STATE_SENT_CMD | STATE_GOT_STATUS);
474 		break;
475 	case BTSTAT_SENTRST:
476 	case BTSTAT_RECVRST:
477 		pkt->pkt_reason = CMD_RESET;
478 		pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET |
479 		    STATE_SENT_CMD | STATE_GOT_STATUS);
480 		pkt->pkt_statistics |= STAT_DEV_RESET;
481 		pkt->pkt_resid -= cmd->transferred;
482 		break;
483 	case BTSTAT_BUSRESET:
484 		pkt->pkt_reason = CMD_RESET;
485 		pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET |
486 		    STATE_SENT_CMD | STATE_GOT_STATUS);
487 		pkt->pkt_statistics |= STAT_BUS_RESET;
488 		pkt->pkt_resid -= cmd->transferred;
489 		break;
490 	case BTSTAT_ABORTQUEUE:
491 		if (cmd->expired) {
492 			pkt->pkt_reason = CMD_TIMEOUT;
493 			pkt->pkt_statistics |= STAT_TIMEOUT;
494 		} else {
495 			pkt->pkt_reason = CMD_ABORTED;
496 			pkt->pkt_statistics |= STAT_ABORTED;
497 		}
498 		pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET |
499 		    STATE_SENT_CMD | STATE_GOT_STATUS);
500 		pkt->pkt_resid -= cmd->transferred;
501 		break;
502 	case BTSTAT_HAHARDWARE:
503 	case BTSTAT_INVPHASE:
504 	case BTSTAT_HATIMEOUT:
505 	case BTSTAT_NORESPONSE:
506 	case BTSTAT_DISCONNECT:
507 	case BTSTAT_HASOFTWARE:
508 	case BTSTAT_BUSFREE:
509 	case BTSTAT_SENSFAILED:
510 	case BTSTAT_DATA_UNDERRUN:
511 		pkt->pkt_reason = CMD_TRAN_ERR;
512 		pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET |
513 		    STATE_SENT_CMD | STATE_GOT_STATUS);
514 		pkt->pkt_resid -= cmd->transferred;
515 		break;
516 	default:
517 		dev_err(pvs->dip, CE_WARN,
518 		    "!unknown host status code: %d", host_status);
519 		pkt->pkt_reason = CMD_TRAN_ERR;
520 		pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET |
521 		    STATE_SENT_CMD | STATE_GOT_STATUS);
522 		break;
523 	}
524 }
525 
526 /*
527  * pvscsi_complete_cmds processes a linked list of
528  * commands that have been completed.  This is done
529  * without acquiring any locks.
530  */
531 static void
532 pvscsi_complete_cmds(pvscsi_softc_t *pvs, pvscsi_cmd_t *cmd)
533 {
534 	struct scsi_pkt	*pkt;
535 
536 	while (cmd != NULL) {
537 		pvscsi_cmd_t	*next = cmd->next_cmd;
538 
539 		cmd->next_cmd = NULL;
540 
541 		if (((pkt = cmd->pkt) == NULL) || (cmd->poll)) {
542 			atomic_or_8(&cmd->done, 1);
543 		} else {
544 			pvscsi_set_status(pvs, cmd);
545 			scsi_hba_pkt_comp(pkt);
546 		}
547 
548 		cmd = next;
549 	}
550 }
551 
552 static void
553 pvscsi_dev_reset(pvscsi_softc_t *pvs, int target, int lun)
554 {
555 	struct PVSCSICmdDescResetDevice cmd = { 0 };
556 
557 	cmd.target = target;
558 	cmd.lun[1] = lun & 0xff;
559 	pvscsi_write_cmd_desc(pvs, PVSCSI_CMD_RESET_DEVICE, &cmd, sizeof (cmd));
560 }
561 
562 static boolean_t
563 pvscsi_poll_cmd_until(pvscsi_softc_t *pvs, pvscsi_cmd_t *cmd, clock_t usec)
564 {
565 	while (usec > 0) {
566 		pvscsi_cmd_t	*done;
567 		if (cmd->done) {
568 			return (B_TRUE);
569 		}
570 		mutex_enter(&pvs->lock);
571 		done = pvscsi_process_comp_ring(pvs);
572 		mutex_exit(&pvs->lock);
573 
574 		pvscsi_complete_cmds(pvs, done);
575 		drv_usecwait(10);
576 		usec -= 10;
577 	}
578 
579 	return (B_FALSE);
580 }
581 
582 static void
583 pvscsi_poll_cmd(pvscsi_softc_t *pvs, pvscsi_cmd_t *cmd)
584 {
585 	if (pvscsi_poll_cmd_until(pvs, cmd, drv_hztousec(cmd->timeout))) {
586 		return;
587 	}
588 
589 	/* now we try an abort first */
590 	pvscsi_abort_cmd(pvs, cmd);
591 	if (pvscsi_poll_cmd_until(pvs, cmd, 2)) {
592 		return;
593 	}
594 	/* well that failed... try reset */
595 	pvscsi_dev_reset(pvs, cmd->target, cmd->lun);
596 	if (pvscsi_poll_cmd_until(pvs, cmd, 2)) {
597 		return;
598 	}
599 	/* still trying... reset the bus */
600 	pvscsi_reset_bus(pvs);
601 	if (pvscsi_poll_cmd_until(pvs, cmd, 2)) {
602 		return;
603 	}
604 	/* full up adapter reset -- be brutal */
605 	pvscsi_restart_hba(pvs);
606 }
607 
608 static void
609 pvscsi_abort_all(pvscsi_softc_t *pvs, pvscsi_device_t *pd)
610 {
611 	pvscsi_cmd_t	*cmd;
612 
613 	mutex_enter(&pvs->lock);
614 	list_t *l = &pvs->cmd_queue;
615 	for (cmd = list_head(l); cmd != NULL; cmd = list_next(l, cmd)) {
616 		if ((pd->target == cmd->target) && (pd->lun == cmd->lun)) {
617 			pvscsi_abort_cmd(pvs, cmd);
618 		}
619 	}
620 	mutex_exit(&pvs->lock);
621 }
622 
623 static int
624 pvscsi_transport_command(pvscsi_softc_t *pvs, pvscsi_cmd_t *cmd)
625 {
626 	struct PVSCSIRingReqDesc	*rdesc;
627 	struct PVSCSIRingsState		*sdesc = RINGS_STATE(pvs);
628 	uint32_t			req_ne = sdesc->reqNumEntriesLog2;
629 
630 	cmd->done = 0;
631 	cmd->expired = 0;
632 
633 	mutex_enter(&pvs->lock);
634 
635 	if ((sdesc->reqProdIdx - sdesc->cmpConsIdx) >= (1 << req_ne)) {
636 		mutex_exit(&pvs->lock);
637 		return (TRAN_BUSY);
638 	}
639 
640 	rdesc = REQ_RING(pvs) + (sdesc->reqProdIdx & MASK(req_ne));
641 
642 	rdesc->bus = 0;
643 	rdesc->target = cmd->target;
644 	bzero(rdesc->lun, sizeof (rdesc->lun));
645 	/* Matches other implementations; can pvscsi support luns > 255? */
646 	rdesc->lun[1] = cmd->lun & 0xff;
647 
648 	bzero(cmd->arq_sense, sizeof (cmd->arq_sense));
649 	rdesc->context = cmd->ctx;
650 	rdesc->senseLen = sizeof (cmd->arq_sense);
651 	rdesc->senseAddr = cmd->arq_pa;
652 	rdesc->tag = cmd->tag;
653 	rdesc->vcpuHint = CPU->cpu_id;
654 	rdesc->cdbLen = cmd->cdblen;
655 	rdesc->flags = cmd->dma_dir;
656 	bcopy(cmd->cdb, rdesc->cdb, cmd->cdblen);
657 	pvscsi_map_buffers(cmd, rdesc);
658 
659 	(void) ddi_dma_sync(pvs->req_ring_buf.dmah, 0, 0, DDI_DMA_SYNC_FORDEV);
660 
661 	sdesc->reqProdIdx++;
662 	(void) ddi_dma_sync(pvs->state_buf.dmah, 0, 0, DDI_DMA_SYNC_FORDEV);
663 
664 	pvscsi_add_to_queue(pvs, cmd);
665 
666 	switch (cmd->cdb[0]) {
667 	case SCMD_READ:
668 	case SCMD_WRITE:
669 	case SCMD_READ_G1:
670 	case SCMD_WRITE_G1:
671 	case SCMD_READ_G4:
672 	case SCMD_WRITE_G4:
673 	case SCMD_READ_G5:
674 	case SCMD_WRITE_G5:
675 		pvscsi_submit_rw_io(pvs);
676 		break;
677 	default:
678 		pvscsi_submit_nonrw_io(pvs);
679 		break;
680 	}
681 
682 	if (pvs->timeout == 0) {
683 		/* drivers above should supply, but give a default */
684 		pvs->timeout = timeout(pvscsi_timeout, pvs, pvscsi_hz * 8);
685 	}
686 	mutex_exit(&pvs->lock);
687 
688 	return (TRAN_ACCEPT);
689 }
690 
691 static int
692 pvscsi_setup_dma_buffer(pvscsi_softc_t *pvs, size_t length,
693     pvscsi_dma_buf_t *buf)
694 {
695 	if ((ddi_dma_alloc_handle(pvs->dip, &pvscsi_dma_attr,
696 	    DDI_DMA_SLEEP, NULL, &buf->dmah)) != DDI_SUCCESS) {
697 		dev_err(pvs->dip, CE_WARN, "!failed to alloc DMA handle");
698 		return (DDI_FAILURE);
699 	}
700 
701 	if ((ddi_dma_mem_alloc(buf->dmah, length, &pvscsi_dma_attrs,
702 	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &buf->addr,
703 	    &length, &buf->acch)) != DDI_SUCCESS) {
704 		dev_err(pvs->dip, CE_WARN, "!failed to alloc DMA memory");
705 		return (DDI_FAILURE);
706 	}
707 
708 	if ((ddi_dma_addr_bind_handle(buf->dmah, NULL, buf->addr,
709 	    length, DDI_DMA_CONSISTENT | DDI_DMA_RDWR, DDI_DMA_SLEEP,
710 	    NULL, NULL, NULL)) != DDI_SUCCESS) {
711 		dev_err(pvs->dip, CE_WARN, "!failed to bind DMA buffer");
712 		return (DDI_FAILURE);
713 	}
714 
715 	buf->pa = ddi_dma_cookie_one(buf->dmah)->dmac_laddress;
716 
717 	return (DDI_SUCCESS);
718 }
719 
720 static void
721 pvscsi_free_dma_buffer(pvscsi_dma_buf_t *buf)
722 {
723 	if (buf->pa != 0) {
724 		(void) ddi_dma_unbind_handle(buf->dmah);
725 	}
726 	if (buf->acch != NULL) {
727 		ddi_dma_mem_free(&buf->acch);
728 	}
729 	if (buf->dmah != NULL) {
730 		ddi_dma_free_handle(&buf->dmah);
731 	}
732 }
733 
734 static int
735 pvscsi_allocate_rings(pvscsi_softc_t *pvs)
736 {
737 	/* allocate DMA buffer for rings state */
738 	if (pvscsi_setup_dma_buffer(pvs, PAGE_SIZE, &pvs->state_buf) !=
739 	    DDI_SUCCESS) {
740 		return (DDI_FAILURE);
741 	}
742 
743 	/* allocate DMA buffer for request ring */
744 	pvs->req_pages = MIN(pvscsi_ring_pages, PVSCSI_MAX_NUM_PAGES_REQ_RING);
745 	pvs->req_depth = pvs->req_pages * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
746 	if (pvscsi_setup_dma_buffer(pvs, pvs->req_pages * PAGE_SIZE,
747 	    &pvs->req_ring_buf) != DDI_SUCCESS) {
748 		return (DDI_FAILURE);
749 	}
750 
751 	/* allocate completion ring */
752 	pvs->cmp_pages = MIN(pvscsi_ring_pages, PVSCSI_MAX_NUM_PAGES_CMP_RING);
753 	if (pvscsi_setup_dma_buffer(pvs, pvs->cmp_pages * PAGE_SIZE,
754 	    &pvs->cmp_ring_buf) != DDI_SUCCESS) {
755 		return (DDI_FAILURE);
756 	}
757 
758 	/* allocate message ring */
759 	pvs->msg_pages = MIN(pvscsi_msg_ring_pages,
760 	    PVSCSI_MAX_NUM_PAGES_MSG_RING);
761 	if (pvscsi_setup_dma_buffer(pvs, pvs->msg_pages * PAGE_SIZE,
762 	    &pvs->msg_ring_buf) != DDI_SUCCESS) {
763 		return (DDI_FAILURE);
764 	}
765 
766 	return (DDI_SUCCESS);
767 }
768 
769 static void
770 pvscsi_free_rings(pvscsi_softc_t *pvs)
771 {
772 	pvscsi_free_dma_buffer(&pvs->msg_ring_buf);
773 	pvscsi_free_dma_buffer(&pvs->cmp_ring_buf);
774 	pvscsi_free_dma_buffer(&pvs->req_ring_buf);
775 	pvscsi_free_dma_buffer(&pvs->state_buf);
776 }
777 
778 static void
779 pvscsi_setup_rings(pvscsi_softc_t *pvs)
780 {
781 	int		i;
782 	struct PVSCSICmdDescSetupMsgRing cmd_msg = { 0 };
783 	struct PVSCSICmdDescSetupRings cmd = { 0 };
784 	uint64_t	base;
785 
786 	cmd.ringsStatePPN = pvs->state_buf.pa >> PAGE_SHIFT;
787 	cmd.reqRingNumPages = pvs->req_pages;
788 	cmd.cmpRingNumPages = pvs->cmp_pages;
789 
790 	/* Setup request ring */
791 	base = pvs->req_ring_buf.pa;
792 	for (i = 0; i < pvs->req_pages; i++) {
793 		cmd.reqRingPPNs[i] = base >> PAGE_SHIFT;
794 		base += PAGE_SIZE;
795 	}
796 
797 	/* Setup completion ring */
798 	base = pvs->cmp_ring_buf.pa;
799 	for (i = 0; i < pvs->cmp_pages; i++) {
800 		cmd.cmpRingPPNs[i] = base >> PAGE_SHIFT;
801 		base += PAGE_SIZE;
802 	}
803 
804 	bzero(RINGS_STATE(pvs), PAGE_SIZE);
805 	bzero(REQ_RING(pvs), pvs->req_pages * PAGE_SIZE);
806 	bzero(CMP_RING(pvs), pvs->cmp_pages * PAGE_SIZE);
807 
808 	/* Issue SETUP command */
809 	pvscsi_write_cmd_desc(pvs, PVSCSI_CMD_SETUP_RINGS, &cmd, sizeof (cmd));
810 
811 	/* Setup message ring */
812 	cmd_msg.numPages = pvs->msg_pages;
813 	base = pvs->msg_ring_buf.pa;
814 
815 	for (i = 0; i < pvs->msg_pages; i++) {
816 		cmd_msg.ringPPNs[i] = base >> PAGE_SHIFT;
817 		base += PAGE_SIZE;
818 	}
819 	bzero(MSG_RING(pvs), pvs->msg_pages * PAGE_SIZE);
820 
821 	pvscsi_write_cmd_desc(pvs, PVSCSI_CMD_SETUP_MSG_RING, &cmd_msg,
822 	    sizeof (cmd_msg));
823 }
824 
825 static int
826 pvscsi_setup_io(pvscsi_softc_t *pvs)
827 {
828 	int		offset, rcount, rn, type;
829 	int		ret = DDI_FAILURE;
830 	off_t		regsize;
831 	pci_regspec_t	*regs;
832 	uint_t		regs_length;
833 
834 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, pvs->dip,
835 	    DDI_PROP_DONTPASS, "reg", (int **)&regs,
836 	    &regs_length) != DDI_PROP_SUCCESS) {
837 		dev_err(pvs->dip, CE_WARN, "!failed to lookup 'reg' property");
838 		return (DDI_FAILURE);
839 	}
840 
841 	rcount = regs_length * sizeof (int) / sizeof (pci_regspec_t);
842 
843 	for (offset = PCI_CONF_BASE0; offset <= PCI_CONF_BASE5; offset += 4) {
844 		for (rn = 0; rn < rcount; ++rn) {
845 			if (PCI_REG_REG_G(regs[rn].pci_phys_hi) == offset) {
846 				type = regs[rn].pci_phys_hi & PCI_ADDR_MASK;
847 				break;
848 			}
849 		}
850 
851 		if (rn >= rcount)
852 			continue;
853 
854 		if (type != PCI_ADDR_IO) {
855 			if (ddi_dev_regsize(pvs->dip, rn,
856 			    &regsize) != DDI_SUCCESS) {
857 				dev_err(pvs->dip, CE_WARN,
858 				    "!failed to get size of reg %d", rn);
859 				goto out;
860 			}
861 			if (regsize == PVSCSI_MEM_SPACE_SIZE) {
862 				if (ddi_regs_map_setup(pvs->dip, rn,
863 				    &pvs->mmio_base, 0, 0,
864 				    &pvscsi_mmio_attr,
865 				    &pvs->mmio_handle) != DDI_SUCCESS) {
866 					dev_err(pvs->dip, CE_WARN,
867 					    "!failed to map MMIO BAR");
868 					goto out;
869 				}
870 				ret = DDI_SUCCESS;
871 				break;
872 			}
873 		}
874 	}
875 
876 out:
877 	ddi_prop_free(regs);
878 
879 	return (ret);
880 }
881 
882 static int
883 pvscsi_enable_intrs(pvscsi_softc_t *pvs)
884 {
885 	int	i, rc, intr_caps;
886 
887 	if ((rc = ddi_intr_get_cap(pvs->intr_handles[0], &intr_caps)) !=
888 	    DDI_SUCCESS) {
889 		dev_err(pvs->dip, CE_WARN, "!failed to get interrupt caps");
890 		return (DDI_FAILURE);
891 	}
892 
893 	if ((intr_caps & DDI_INTR_FLAG_BLOCK) != 0) {
894 		if ((rc = ddi_intr_block_enable(pvs->intr_handles,
895 		    pvs->intr_cnt)) != DDI_SUCCESS) {
896 			dev_err(pvs->dip, CE_WARN,
897 			    "!failed to enable interrupt block");
898 		}
899 	} else {
900 		for (i = 0; i < pvs->intr_cnt; i++) {
901 			if ((rc = ddi_intr_enable(pvs->intr_handles[i])) ==
902 			    DDI_SUCCESS)
903 				continue;
904 			dev_err(pvs->dip, CE_WARN,
905 			    "!failed to enable interrupt");
906 			while (--i >= 0)
907 				(void) ddi_intr_disable(pvs->intr_handles[i]);
908 			break;
909 		}
910 	}
911 
912 	return (rc);
913 }
914 
915 static uint32_t
916 pvscsi_intr(caddr_t arg1, caddr_t arg2)
917 {
918 	pvscsi_softc_t	*pvs = (pvscsi_softc_t *)arg1;
919 	uint32_t	status;
920 	pvscsi_cmd_t	*cmd;
921 	pvscsi_msg_t	*msg;
922 	uint32_t	rv = DDI_INTR_CLAIMED;
923 	_NOTE(ARGUNUSED(arg2));
924 
925 	mutex_enter(&pvs->lock);
926 	status = pvscsi_read_intr_status(pvs);
927 	if ((status & PVSCSI_INTR_ALL_SUPPORTED) != 0) {
928 		pvscsi_write_intr_status(pvs, status);
929 	} else if (pvs->intr_type == DDI_INTR_TYPE_FIXED) {
930 		rv = DDI_INTR_UNCLAIMED;
931 	}
932 	if (pvs->detach) {
933 		mutex_exit(&pvs->lock);
934 		return (rv);
935 	}
936 	cmd = pvscsi_process_comp_ring(pvs);
937 	msg = pvscsi_process_msg_ring(pvs);
938 
939 	/*
940 	 * Do this under the lock, so that we won't dispatch
941 	 * if we are detaching
942 	 */
943 	if (msg != NULL) {
944 		if (ddi_taskq_dispatch(pvs->tq, pvscsi_handle_msg, msg,
945 		    DDI_NOSLEEP) != DDI_SUCCESS) {
946 			dev_err(pvs->dip, CE_WARN,
947 			    "!failed to dispatch discovery");
948 		}
949 	}
950 	mutex_exit(&pvs->lock);
951 
952 	pvscsi_complete_cmds(pvs, cmd);
953 
954 	return (rv);
955 }
956 
957 static void
958 pvscsi_free_intrs(pvscsi_softc_t *pvs)
959 {
960 	for (int i = 0; i < pvs->intr_cnt; i++) {
961 		(void) ddi_intr_disable(pvs->intr_handles[i]);
962 		(void) ddi_intr_remove_handler(pvs->intr_handles[i]);
963 		(void) ddi_intr_free(pvs->intr_handles[i]);
964 	}
965 	pvs->intr_cnt = 0;
966 }
967 
968 static int
969 pvscsi_register_isr(pvscsi_softc_t *pvs, int type)
970 {
971 	int	navail, nactual;
972 	int	i;
973 
974 	if (ddi_intr_get_navail(pvs->dip, type, &navail) != DDI_SUCCESS ||
975 	    navail == 0) {
976 		dev_err(pvs->dip, CE_WARN,
977 		    "!failed to get number of available interrupts of type %d",
978 		    type);
979 		return (DDI_FAILURE);
980 	}
981 	navail = MIN(navail, PVSCSI_MAX_INTRS);
982 
983 	if (ddi_intr_alloc(pvs->dip, pvs->intr_handles, type, 0, navail,
984 	    &nactual, DDI_INTR_ALLOC_NORMAL) != DDI_SUCCESS || nactual == 0) {
985 		dev_err(pvs->dip, CE_WARN, "!failed to allocate %d interrupts",
986 		    navail);
987 		return (DDI_FAILURE);
988 	}
989 
990 	pvs->intr_cnt = nactual;
991 
992 	if (ddi_intr_get_pri(pvs->intr_handles[0], (uint_t *)&pvs->intr_pri) !=
993 	    DDI_SUCCESS) {
994 		dev_err(pvs->dip, CE_WARN, "!failed to get interrupt priority");
995 		pvscsi_free_intrs(pvs);
996 		return (DDI_FAILURE);
997 	}
998 
999 	for (i = 0; i < nactual; i++) {
1000 		if (ddi_intr_add_handler(pvs->intr_handles[i], pvscsi_intr,
1001 		    (caddr_t)pvs, NULL) != DDI_SUCCESS) {
1002 			dev_err(pvs->dip, CE_WARN,
1003 			    "!failed to add intr handler");
1004 			pvscsi_free_intrs(pvs);
1005 			return (DDI_FAILURE);
1006 		}
1007 	}
1008 
1009 	pvs->intr_type = type;
1010 	return (DDI_SUCCESS);
1011 }
1012 
1013 static int
1014 pvscsi_setup_isr(pvscsi_softc_t *pvs)
1015 {
1016 	int	types;
1017 
1018 	if (ddi_intr_get_supported_types(pvs->dip, &types) != DDI_SUCCESS) {
1019 		dev_err(pvs->dip, CE_WARN, "!failed to get interrupt types");
1020 		return (DDI_FAILURE);
1021 	}
1022 
1023 	types &= pvscsi_intr_types;
1024 	if (types == 0) {
1025 		dev_err(pvs->dip, CE_WARN, "!no supported interrupt types");
1026 		return (DDI_FAILURE);
1027 	}
1028 
1029 
1030 	if (((types & DDI_INTR_TYPE_MSIX) != 0) &&
1031 	    (pvscsi_register_isr(pvs, DDI_INTR_TYPE_MSIX) == DDI_SUCCESS)) {
1032 		return (DDI_SUCCESS);
1033 	}
1034 	if (((types & DDI_INTR_TYPE_MSI) != 0) &&
1035 	    (pvscsi_register_isr(pvs, DDI_INTR_TYPE_MSI) == DDI_SUCCESS)) {
1036 		return (DDI_SUCCESS);
1037 	}
1038 	if (((types & DDI_INTR_TYPE_FIXED) != 0) &&
1039 	    (pvscsi_register_isr(pvs, DDI_INTR_TYPE_FIXED) == DDI_SUCCESS)) {
1040 		return (DDI_SUCCESS);
1041 	}
1042 
1043 	dev_err(pvs->dip, CE_WARN, "!failed installing any interrupt handler");
1044 	return (DDI_FAILURE);
1045 }
1046 
1047 
1048 static void
1049 pvscsi_timeout(void *arg)
1050 {
1051 	pvscsi_softc_t	*pvs;
1052 	pvscsi_cmd_t	*cmd;
1053 	pvscsi_cmd_t	*reclaimed = NULL;
1054 	list_t		*l;
1055 	clock_t		now;
1056 
1057 	pvs = arg;
1058 	l = &pvs->cmd_queue;
1059 	now = ddi_get_lbolt();
1060 
1061 	mutex_enter(&pvs->lock);
1062 	if (pvs->timeout == 0) {
1063 		mutex_exit(&pvs->lock);
1064 		return;
1065 	}
1066 
1067 	for (cmd = list_head(l); cmd != NULL; cmd = list_next(l, cmd)) {
1068 		clock_t	overdue;
1069 
1070 		/* polling takes care of it's own timeouts */
1071 		if (cmd->poll) {
1072 			continue;
1073 		}
1074 
1075 		overdue = now - (cmd->start + cmd->timeout);
1076 
1077 		/*
1078 		 * We keep the list of requests sorted by expiration
1079 		 * time, so we hopefully won't need to walk through
1080 		 * many of these.  This check is safe if lbolt wraps.
1081 		 */
1082 		if (overdue <= 0) {
1083 			break;
1084 		}
1085 
1086 		/* first we try aborting */
1087 		if (!cmd->expired) {
1088 			atomic_or_8(&cmd->expired, 1);
1089 			dev_err(pvs->dip, CE_WARN, "!cmd timed out (%lds)",
1090 			    drv_hztousec(cmd->timeout)/1000000);
1091 			continue;
1092 		}
1093 
1094 		/* if we're less than 2 seconds overdue, wait for abort */
1095 		if (overdue <= pvscsi_hz * 2) {
1096 			continue;
1097 		}
1098 
1099 		/* next it's a reset of the device */
1100 		if (overdue <= pvscsi_hz * 8) {
1101 			pvscsi_dev_reset(pvs, cmd->target, cmd->lun);
1102 			break;
1103 		}
1104 
1105 		/* next it's a reset of the bus */
1106 		if (overdue <= pvscsi_hz * 16) {
1107 			pvscsi_reset_bus(pvs);
1108 			break;
1109 		}
1110 
1111 		/* finally it's a reset of the entire adapter */
1112 		dev_err(pvs->dip, CE_WARN, "!adapter hung? restarting...");
1113 		mutex_enter(&pvs->lock);
1114 		pvscsi_stop_hba(pvs);
1115 		reclaimed = pvscsi_reclaim_cmds(pvs);
1116 		pvscsi_start_hba(pvs);
1117 		mutex_exit(&pvs->lock);
1118 		break;
1119 	}
1120 
1121 	/* see if reset or abort completed anything */
1122 	cmd = pvscsi_process_comp_ring(pvs);
1123 
1124 	/* reschedule us if we still have requests pending */
1125 	if (!list_is_empty(l)) {
1126 		pvs->timeout = timeout(pvscsi_timeout, pvs, pvscsi_hz);
1127 	}
1128 
1129 	mutex_exit(&pvs->lock);
1130 
1131 	/* if we had things that got completed, then do the callbacks */
1132 	pvscsi_complete_cmds(pvs, reclaimed);
1133 	pvscsi_complete_cmds(pvs, cmd);
1134 }
1135 
1136 static int
1137 pvscsi_start(struct scsi_address *ap, struct scsi_pkt *pkt)
1138 {
1139 	pvscsi_cmd_t		*cmd = pkt->pkt_ha_private;
1140 	struct scsi_device	*sd;
1141 	pvscsi_device_t		*pd;
1142 	pvscsi_softc_t		*pvs;
1143 	int			rc;
1144 	boolean_t		poll;
1145 
1146 	/* make sure the packet is sane */
1147 	if ((pkt->pkt_numcookies > PVSCSI_MAX_SG_SIZE) ||
1148 	    ((pkt->pkt_dma_flags & DDI_DMA_RDWR) == DDI_DMA_RDWR) ||
1149 	    (pkt->pkt_cdblen > sizeof (cmd->cdb)) ||
1150 	    ((sd = scsi_address_device(ap)) == NULL) ||
1151 	    ((pd = scsi_device_hba_private_get(sd)) == NULL) ||
1152 	    ((pvs = pd->pvs) == NULL))  {
1153 		return (TRAN_BADPKT);
1154 	}
1155 
1156 	ASSERT(cmd->pkt == pkt);
1157 
1158 	poll = cmd->poll = ((pkt->pkt_flags & FLAG_NOINTR) != 0);
1159 
1160 	if (pkt->pkt_flags & (FLAG_HTAG|FLAG_HEAD)) {
1161 		cmd->tag = MSG_HEAD_QTAG;
1162 	} else if (pkt->pkt_flags & FLAG_OTAG) {
1163 		cmd->tag = MSG_ORDERED_QTAG;
1164 	} else { /* also FLAG_STAG */
1165 		cmd->tag = MSG_SIMPLE_QTAG;
1166 	}
1167 
1168 	bcopy(pkt->pkt_cdbp, cmd->cdb, pkt->pkt_cdblen);
1169 	cmd->cdblen = pkt->pkt_cdblen;
1170 	bzero(&cmd->cmd_scb, sizeof (cmd->cmd_scb));
1171 
1172 	/*
1173 	 * Reinitialize some fields because the packet may
1174 	 * have been resubmitted.
1175 	 */
1176 	pkt->pkt_reason = CMD_CMPLT;
1177 	pkt->pkt_state = 0;
1178 	pkt->pkt_statistics = 0;
1179 
1180 	/* Zero status byte - but only if present */
1181 	if (pkt->pkt_scblen > 0) {
1182 		*(pkt->pkt_scbp) = 0;
1183 	}
1184 
1185 	if (pkt->pkt_numcookies > 0) {
1186 		if (pkt->pkt_dma_flags & DDI_DMA_READ) {
1187 			cmd->dma_dir = PVSCSI_FLAG_CMD_DIR_TOHOST;
1188 		} else if (pkt->pkt_dma_flags & DDI_DMA_WRITE) {
1189 			cmd->dma_dir = PVSCSI_FLAG_CMD_DIR_TODEVICE;
1190 		} else {
1191 			cmd->dma_dir = 0;
1192 		}
1193 	}
1194 
1195 	cmd->target = pd->target;
1196 	cmd->lun = pd->lun;
1197 	cmd->start = ddi_get_lbolt();
1198 	cmd->timeout = pkt->pkt_time * pvscsi_hz;
1199 
1200 	rc = pvscsi_transport_command(pvs, cmd);
1201 
1202 	if (poll && rc == TRAN_ACCEPT) {
1203 		pvscsi_poll_cmd(pvs, cmd);
1204 		pvscsi_set_status(pvs, cmd);
1205 	}
1206 
1207 	return (rc);
1208 }
1209 
1210 
1211 static int
1212 pvscsi_parse_ua(const char *ua, int *target, int *lun)
1213 {
1214 	char *end;
1215 	long num;
1216 	if (((ddi_strtol(ua, &end, 16, &num)) != 0) ||
1217 	    ((*end != ',') && (*end != 0))) {
1218 		return (DDI_FAILURE);
1219 	}
1220 	*target = (int)num;
1221 	if (*end == 0) {
1222 		*lun = 0;
1223 		return (DDI_SUCCESS);
1224 	}
1225 	end++;
1226 	if ((ddi_strtol(end, &end, 16, &num) != 0) || (*end != 0)) {
1227 		return (DDI_FAILURE);
1228 	}
1229 	*lun = (int)num;
1230 	return (DDI_SUCCESS);
1231 }
1232 
1233 static uint32_t
1234 pvscsi_max_targets(pvscsi_softc_t *pvs)
1235 {
1236 	pvscsi_dma_buf_t			db;
1237 	struct PVSCSIConfigPageController	cpc;
1238 	struct PVSCSICmdDescConfigCmd		cmd;
1239 
1240 	bzero(&db, sizeof (db));
1241 
1242 	/* NB: config pages fit in a single page */
1243 	if (pvscsi_setup_dma_buffer(pvs, PAGE_SIZE, &db) != DDI_SUCCESS) {
1244 		dev_err(pvs->dip, CE_WARN,
1245 		    "!failed to setup config page DMA");
1246 		return (PVSCSI_MAXTGTS);
1247 	}
1248 
1249 	bzero(&cmd, sizeof (cmd));
1250 	cmd.configPageAddress = PVSCSI_CONFIG_CONTROLLER_ADDRESS;
1251 	cmd.configPageAddress <<= 32;
1252 	cmd.configPageNum = PVSCSI_CONFIG_PAGE_CONTROLLER;
1253 	cmd.cmpAddr = db.pa;
1254 
1255 	pvscsi_write_cmd_desc(pvs, PVSCSI_CMD_CONFIG, &cmd, sizeof (cmd));
1256 	(void) ddi_dma_sync(db.dmah, 0, 0, DDI_DMA_SYNC_FORKERNEL);
1257 	bcopy(db.addr, &cpc, sizeof (cpc));
1258 	pvscsi_free_dma_buffer(&db);
1259 
1260 
1261 	if ((cpc.header.scsiStatus == STATUS_GOOD) &&
1262 	    (cpc.header.hostStatus == BTSTAT_SUCCESS) &&
1263 	    (cpc.numPhys > 0)) {
1264 		return (cpc.numPhys);
1265 	}
1266 
1267 	dev_err(pvs->dip, CE_WARN, "!failed to determine max targets");
1268 	return (PVSCSI_MAXTGTS);
1269 }
1270 
1271 static boolean_t
1272 pvscsi_probe_target(pvscsi_softc_t *pvs, int target)
1273 {
1274 	pvscsi_cmd_t		cmd;
1275 
1276 	if (!pvscsi_cmd_init(pvs, &cmd, KM_SLEEP)) {
1277 		pvscsi_cmd_fini(&cmd);
1278 		return (B_FALSE);
1279 	}
1280 	/* NB: CDB 0 is a TUR which is perfect for our needs */
1281 	bzero(cmd.cdb, sizeof (cmd.cdb));
1282 	cmd.poll = B_TRUE;
1283 	cmd.dma_dir = 0;
1284 	cmd.target = target;
1285 	cmd.lun = 0;
1286 	cmd.start = ddi_get_lbolt();
1287 	cmd.timeout = pvscsi_hz;
1288 
1289 	if (pvscsi_transport_command(pvs, &cmd) != TRAN_ACCEPT) {
1290 		pvscsi_cmd_fini(&cmd);
1291 		return (B_FALSE);
1292 	}
1293 	pvscsi_poll_cmd(pvs, &cmd);
1294 
1295 	switch (cmd.host_status) {
1296 	case BTSTAT_SUCCESS:
1297 	case BTSTAT_LINKED_COMMAND_COMPLETED:
1298 	case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG:
1299 		/* We don't care about the actual SCSI status */
1300 		pvscsi_cmd_fini(&cmd);
1301 		return (B_TRUE);
1302 	}
1303 
1304 	pvscsi_cmd_fini(&cmd);
1305 	return (B_FALSE);
1306 }
1307 
1308 static int
1309 pvscsi_tgt_init(dev_info_t *dip, dev_info_t *child, scsi_hba_tran_t *tran,
1310     struct scsi_device *sd)
1311 {
1312 	/*
1313 	 * Assumption: the HBA framework only asks us to have a single
1314 	 * target initialized per address at any given time.
1315 	 */
1316 	pvscsi_device_t	*pd;
1317 	pvscsi_softc_t	*pvs;
1318 	const char	*ua;
1319 
1320 	if (((scsi_hba_iport_unit_address(dip)) == NULL) ||
1321 	    ((pvs = tran->tran_hba_private) == NULL) ||
1322 	    ((ua = scsi_device_unit_address(sd)) == NULL)) {
1323 		return (DDI_FAILURE);
1324 	}
1325 
1326 	/* parse the unit address */
1327 	pd = kmem_zalloc(sizeof (*pd), KM_SLEEP);
1328 	if (pvscsi_parse_ua(ua, &pd->target, &pd->lun) != DDI_SUCCESS) {
1329 		kmem_free(pd, sizeof (*pd));
1330 		return (DDI_FAILURE);
1331 	}
1332 	pd->pvs = pvs;
1333 	scsi_device_hba_private_set(sd, pd);
1334 
1335 	mutex_enter(&pvs->lock);
1336 	list_insert_tail(&pvs->devices, pd);
1337 	mutex_exit(&pvs->lock);
1338 	return (DDI_SUCCESS);
1339 }
1340 
1341 static void
1342 pvscsi_tgt_free(dev_info_t *dip, dev_info_t *child, scsi_hba_tran_t *tran,
1343     struct scsi_device *sd)
1344 {
1345 	pvscsi_device_t	*pd;
1346 	pvscsi_softc_t	*pvs;
1347 
1348 	if (((scsi_hba_iport_unit_address(dip)) == NULL) ||
1349 	    ((pvs = tran->tran_hba_private) == NULL) ||
1350 	    ((pd = scsi_device_hba_private_get(sd)) == NULL)) {
1351 		return;
1352 	}
1353 	scsi_device_hba_private_set(sd, NULL);
1354 	mutex_enter(&pvs->lock);
1355 	list_remove(&pvs->devices, pd);
1356 	mutex_exit(&pvs->lock);
1357 
1358 	kmem_free(pd, sizeof (*pd));
1359 }
1360 
1361 static int
1362 pvscsi_reset(struct scsi_address *ap, int level)
1363 {
1364 	struct scsi_device	*sd;
1365 	pvscsi_device_t		*pd;
1366 	pvscsi_softc_t		*pvs;
1367 	pvscsi_cmd_t		*cmd;
1368 
1369 	if (((sd = scsi_address_device(ap)) == NULL) ||
1370 	    ((pd = scsi_device_hba_private_get(sd)) == NULL) ||
1371 	    ((pvs = pd->pvs) == NULL))  {
1372 		return (0);
1373 	}
1374 	switch (level) {
1375 	case RESET_ALL:
1376 	case RESET_BUS:
1377 		pvscsi_reset_bus(pvs);
1378 		break;
1379 	case RESET_TARGET:
1380 		/* reset both the lun and lun 0 */
1381 		pvscsi_dev_reset(pvs, pd->target, pd->lun);
1382 		pvscsi_dev_reset(pvs, pd->target, 0);
1383 		break;
1384 	case RESET_LUN:
1385 		pvscsi_dev_reset(pvs, pd->target, pd->lun);
1386 		break;
1387 	default:
1388 		return (0);
1389 	}
1390 
1391 	/* reset may have caused some completions */
1392 	mutex_enter(&pvs->lock);
1393 	cmd = pvscsi_process_comp_ring(pvs);
1394 	mutex_exit(&pvs->lock);
1395 
1396 	pvscsi_complete_cmds(pvs, cmd);
1397 	return (1);
1398 }
1399 
1400 static int
1401 pvscsi_abort(struct scsi_address *ap, struct scsi_pkt *pkt)
1402 {
1403 	struct scsi_device	*sd;
1404 	pvscsi_device_t		*pd;
1405 	pvscsi_softc_t		*pvs;
1406 	pvscsi_cmd_t		*cmd;
1407 
1408 	if (pkt != NULL) {
1409 		/* abort single command */
1410 		cmd = pkt->pkt_ha_private;
1411 		pvs = cmd->pvs;
1412 		pvscsi_abort_cmd(pvs, cmd);
1413 	} else if ((ap != NULL) &&
1414 	    ((sd = scsi_address_device(ap)) != NULL) &&
1415 	    ((pd = scsi_device_hba_private_get(sd)) != NULL) &&
1416 	    ((pvs = pd->pvs) != NULL)) {
1417 		/* abort all commands on the bus */
1418 		pvscsi_abort_all(pvs, pd);
1419 	} else {
1420 		return (0);
1421 	}
1422 
1423 	/* abort may have caused some completions */
1424 	mutex_enter(&pvs->lock);
1425 	cmd = pvscsi_process_comp_ring(pvs);
1426 	mutex_exit(&pvs->lock);
1427 
1428 	pvscsi_complete_cmds(pvs, cmd);
1429 
1430 	return (1);
1431 }
1432 
1433 static int
1434 pvscsi_getcap(struct scsi_address *ap, char *cap, int whom)
1435 {
1436 	_NOTE(ARGUNUSED(ap));
1437 	_NOTE(ARGUNUSED(whom));
1438 
1439 	if (cap == NULL) {
1440 		return (-1);
1441 	}
1442 
1443 	switch (scsi_hba_lookup_capstr(cap)) {
1444 	case SCSI_CAP_ARQ:
1445 	case SCSI_CAP_UNTAGGED_QING:
1446 	case SCSI_CAP_TAGGED_QING:
1447 		return (1);
1448 	default:
1449 		return (-1);
1450 	}
1451 }
1452 
1453 static int
1454 pvscsi_setcap(struct scsi_address *ap, char *cap, int value, int whom)
1455 {
1456 	_NOTE(ARGUNUSED(ap));
1457 	_NOTE(ARGUNUSED(value));
1458 	_NOTE(ARGUNUSED(whom));
1459 
1460 	if (cap == NULL) {
1461 		return (-1);
1462 	}
1463 
1464 	switch (scsi_hba_lookup_capstr(cap)) {
1465 	case SCSI_CAP_ARQ:
1466 	case SCSI_CAP_UNTAGGED_QING:
1467 	case SCSI_CAP_TAGGED_QING:
1468 		return (0); /* not changeable */
1469 	default:
1470 		return (-1);
1471 	}
1472 }
1473 
1474 static void
1475 pvscsi_cmd_fini(pvscsi_cmd_t *cmd)
1476 {
1477 	if (cmd->arq_pa != 0) {
1478 		(void) ddi_dma_unbind_handle(cmd->arq_dmah);
1479 		cmd->arq_dmah = NULL;
1480 	}
1481 	if (cmd->arq_dmah != NULL) {
1482 		ddi_dma_free_handle(&cmd->arq_dmah);
1483 		cmd->arq_dmah = NULL;
1484 	}
1485 	if (cmd->sgl_pa != 0) {
1486 		(void) ddi_dma_unbind_handle(cmd->sgl_dmah);
1487 		cmd->sgl_pa = 0;
1488 	}
1489 	if (cmd->sgl_acch != NULL) {
1490 		ddi_dma_mem_free(&cmd->sgl_acch);
1491 		cmd->sgl_acch = NULL;
1492 		cmd->sgl = NULL;
1493 	}
1494 	if (cmd->sgl_dmah != NULL) {
1495 		ddi_dma_free_handle(&cmd->sgl_dmah);
1496 		cmd->sgl_dmah = NULL;
1497 	}
1498 	if (cmd->ctx != 0) {
1499 		id32_free(cmd->ctx);
1500 		cmd->ctx = 0;
1501 	}
1502 }
1503 
1504 static void
1505 pvscsi_pkt_dtor(struct scsi_pkt *pkt, scsi_hba_tran_t *tran)
1506 {
1507 	pvscsi_cmd_t	*cmd = pkt->pkt_ha_private;
1508 	pvscsi_cmd_fini(cmd);
1509 }
1510 
1511 static boolean_t
1512 pvscsi_cmd_init(pvscsi_softc_t *pvs, pvscsi_cmd_t *cmd, int sleep)
1513 {
1514 	int		(*cb)(caddr_t);
1515 	size_t		len;
1516 	caddr_t		kaddr;
1517 
1518 	cb = sleep == KM_SLEEP ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT;
1519 
1520 	bzero(cmd, sizeof (*cmd));
1521 	cmd->ctx = id32_alloc(cmd, sleep);
1522 	if (cmd->ctx == 0) {
1523 		dev_err(pvs->dip, CE_WARN,
1524 		    "!failed to allocate 32-bit context id");
1525 		return (B_FALSE);
1526 	}
1527 
1528 	/* allocate DMA resources for scatter/gather list */
1529 	if (ddi_dma_alloc_handle(pvs->dip, &pvscsi_dma_attr, cb, NULL,
1530 	    &cmd->sgl_dmah) != DDI_SUCCESS) {
1531 		dev_err(pvs->dip, CE_WARN,
1532 		    "!failed to allocate DMA handle for SG list");
1533 		return (B_FALSE);
1534 	}
1535 	if (ddi_dma_mem_alloc(cmd->sgl_dmah, PAGE_SIZE, &pvscsi_dma_attrs,
1536 	    DDI_DMA_CONSISTENT, cb, NULL, &kaddr, &len, &cmd->sgl_acch) !=
1537 	    DDI_SUCCESS) {
1538 		dev_err(pvs->dip, CE_WARN,
1539 		    "!failed to allocate DMA memory for SG list");
1540 		return (B_FALSE);
1541 	}
1542 	cmd->sgl = (void *)kaddr;
1543 	if (ddi_dma_addr_bind_handle(cmd->sgl_dmah, NULL, kaddr,
1544 	    PAGE_SIZE, DDI_DMA_WRITE | DDI_DMA_CONSISTENT, cb, NULL,
1545 	    NULL, NULL) != DDI_DMA_MAPPED) {
1546 		dev_err(pvs->dip, CE_WARN, "!failed to bind SGL list");
1547 		return (B_FALSE);
1548 	}
1549 	cmd->sgl_pa = ddi_dma_cookie_one(cmd->sgl_dmah)->dmac_laddress;
1550 
1551 	/* allocate DMA resource for auto-sense-request */
1552 	if (ddi_dma_alloc_handle(pvs->dip, &pvscsi_dma_attr,
1553 	    cb, NULL, &cmd->arq_dmah) != DDI_SUCCESS) {
1554 		dev_err(pvs->dip, CE_WARN,
1555 		    "!failed to allocate DMA handle for ARQ buffer");
1556 		return (B_FALSE);
1557 	}
1558 
1559 	if (ddi_dma_addr_bind_handle(cmd->arq_dmah, NULL,
1560 	    (void *)cmd->arq_sense, SENSE_LENGTH,
1561 	    DDI_DMA_READ | DDI_DMA_CONSISTENT, cb, NULL,
1562 	    NULL, NULL) != DDI_DMA_MAPPED) {
1563 		dev_err(pvs->dip, CE_WARN, "!failed to bind ARQ buffer");
1564 		return (B_FALSE);
1565 	}
1566 	cmd->arq_pa = ddi_dma_cookie_one(cmd->arq_dmah)->dmac_laddress;
1567 	return (B_TRUE);
1568 }
1569 
1570 static int
1571 pvscsi_pkt_ctor(struct scsi_pkt *pkt, scsi_hba_tran_t *tran, int sleep)
1572 {
1573 	pvscsi_cmd_t	*cmd = pkt->pkt_ha_private;
1574 	pvscsi_softc_t	*pvs = tran->tran_hba_private;
1575 
1576 	if (!pvscsi_cmd_init(pvs, cmd, sleep)) {
1577 		pvscsi_pkt_dtor(pkt, tran);
1578 		return (-1);
1579 	}
1580 	cmd->pkt = pkt;
1581 	return (0);
1582 }
1583 
1584 static void
1585 pvscsi_teardown_pkt(struct scsi_pkt *pkt)
1586 {
1587 	_NOTE(ARGUNUSED(pkt));
1588 	/* nothing to do */
1589 }
1590 
1591 static int
1592 pvscsi_setup_pkt(struct scsi_pkt *pkt, int (*cb)(caddr_t), caddr_t arg)
1593 {
1594 	/* all work is done in start */
1595 	return (0);
1596 }
1597 
1598 static int
1599 pvscsi_hba_setup(pvscsi_softc_t *pvs)
1600 {
1601 	scsi_hba_tran_t	*tran;
1602 
1603 	tran = scsi_hba_tran_alloc(pvs->dip, SCSI_HBA_CANSLEEP);
1604 	ASSERT(tran != NULL);
1605 
1606 	tran->tran_hba_private = pvs;
1607 	tran->tran_start = pvscsi_start;
1608 	tran->tran_reset = pvscsi_reset;
1609 	tran->tran_abort = pvscsi_abort;
1610 	tran->tran_getcap = pvscsi_getcap;
1611 	tran->tran_setcap = pvscsi_setcap;
1612 	tran->tran_pkt_constructor = pvscsi_pkt_ctor;
1613 	tran->tran_pkt_destructor = pvscsi_pkt_dtor;
1614 	tran->tran_setup_pkt = pvscsi_setup_pkt;
1615 	tran->tran_teardown_pkt = pvscsi_teardown_pkt;
1616 	tran->tran_tgt_init = pvscsi_tgt_init;
1617 	tran->tran_tgt_free = pvscsi_tgt_free;
1618 	tran->tran_hba_len = sizeof (pvscsi_cmd_t);
1619 
1620 	tran->tran_interconnect_type = INTERCONNECT_PARALLEL;
1621 
1622 	if (scsi_hba_attach_setup(pvs->dip, &pvscsi_io_dma_attr, tran,
1623 	    SCSI_HBA_HBA | SCSI_HBA_TRAN_CDB | SCSI_HBA_TRAN_SCB |
1624 	    SCSI_HBA_ADDR_COMPLEX) !=
1625 	    DDI_SUCCESS) {
1626 		scsi_hba_tran_free(tran);
1627 		dev_err(pvs->dip, CE_WARN, "!failed to attach HBA");
1628 		return (DDI_FAILURE);
1629 	}
1630 
1631 	pvs->tran = tran;
1632 	return (DDI_SUCCESS);
1633 }
1634 
1635 static void
1636 pvscsi_teardown(pvscsi_softc_t *pvs)
1637 {
1638 	timeout_id_t	tid;
1639 
1640 	pvscsi_stop_hba(pvs);
1641 
1642 	if (pvs->tq != NULL) {
1643 		ddi_taskq_destroy(pvs->tq);
1644 	}
1645 	mutex_enter(&pvs->lock);
1646 	tid = pvs->timeout;
1647 	pvs->timeout = 0;
1648 	mutex_exit(&pvs->lock);
1649 
1650 	if (tid != 0) {
1651 		(void) untimeout(tid);
1652 	}
1653 
1654 	pvscsi_free_intrs(pvs);
1655 	pvscsi_free_rings(pvs);
1656 
1657 	if (pvs->mmio_handle != NULL) {
1658 		ddi_regs_map_free(&pvs->mmio_handle);
1659 	}
1660 
1661 	if (pvs->tran != NULL) {
1662 		scsi_hba_tran_free(pvs->tran);
1663 	}
1664 	mutex_destroy(&pvs->lock);
1665 	list_destroy(&pvs->cmd_queue);
1666 	list_destroy(&pvs->devices);
1667 
1668 	kmem_free(pvs, sizeof (*pvs));
1669 }
1670 
1671 static int
1672 pvscsi_iport_attach(dev_info_t *dip)
1673 {
1674 	scsi_hba_tran_t	*tran;
1675 	dev_info_t	*parent;
1676 	pvscsi_softc_t	*pvs;
1677 	char		*ua;
1678 	uint32_t	max_targets;
1679 
1680 	if (((parent = ddi_get_parent(dip)) == NULL) ||
1681 	    ((tran = ddi_get_driver_private(parent)) == NULL) ||
1682 	    ((pvs = tran->tran_hba_private) == NULL) ||
1683 	    ((ua = scsi_hba_iport_unit_address(dip)) == NULL) ||
1684 	    (strcmp(ua, "iport0") != 0)) {
1685 		return (DDI_FAILURE);
1686 	}
1687 
1688 	/* store our softc on the iport private tran */
1689 	tran = ddi_get_driver_private(dip);
1690 	tran->tran_hba_private = pvs;
1691 
1692 	/* setup the target map - allow 100ms for settle / sync times */
1693 	if (scsi_hba_tgtmap_create(dip, SCSI_TM_PERADDR, 100000,
1694 	    100000, pvs, NULL, NULL, &pvs->tgtmap) != DDI_SUCCESS) {
1695 		dev_err(pvs->dip, CE_WARN, "!failed to create target map");
1696 		return (DDI_FAILURE);
1697 	}
1698 
1699 	/* reset hardware and setup the rings */
1700 	mutex_enter(&pvs->lock);
1701 	pvs->detach = B_FALSE; /* in case of reattach */
1702 	pvscsi_start_hba(pvs);
1703 
1704 	max_targets = pvs->max_targets = pvscsi_max_targets(pvs);
1705 	mutex_exit(&pvs->lock);
1706 
1707 	for (uint32_t i = 0; i < max_targets; i++) {
1708 		char addr[8];
1709 		if (pvscsi_probe_target(pvs, i)) {
1710 			(void) snprintf(addr, sizeof (addr), "%x", i);
1711 			(void) scsi_hba_tgtmap_tgt_add(pvs->tgtmap,
1712 			    SCSI_TGT_SCSI_DEVICE, addr, NULL);
1713 		}
1714 	}
1715 
1716 	return (DDI_SUCCESS);
1717 }
1718 
1719 static int
1720 pvscsi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1721 {
1722 	pvscsi_softc_t	*pvs;
1723 
1724 	if (cmd != DDI_ATTACH) {
1725 		return (DDI_FAILURE);
1726 	}
1727 
1728 	if (scsi_hba_iport_unit_address(dip) != NULL) {
1729 		return (pvscsi_iport_attach(dip));
1730 	}
1731 
1732 	pvs = kmem_zalloc(sizeof (*pvs), KM_SLEEP);
1733 
1734 	/* Setup HBA instance */
1735 	pvs->dip = dip;
1736 
1737 	/*
1738 	 * mutex initialization - note that we always run below
1739 	 * lock level, so we can get by without interrupt priorities
1740 	 */
1741 	mutex_init(&pvs->lock, NULL, MUTEX_DRIVER, NULL);
1742 	list_create(&pvs->cmd_queue, sizeof (pvscsi_cmd_t),
1743 	    offsetof(pvscsi_cmd_t, queue_node));
1744 	list_create(&pvs->devices, sizeof (pvscsi_device_t),
1745 	    offsetof(pvscsi_device_t, node));
1746 
1747 	if ((pvscsi_setup_io(pvs)) != DDI_SUCCESS) {
1748 		dev_err(pvs->dip, CE_WARN, "!failed to setup I/O region");
1749 		pvscsi_teardown(pvs);
1750 		return (DDI_FAILURE);
1751 	}
1752 
1753 	pvscsi_stop_hba(pvs);
1754 
1755 	if ((pvscsi_allocate_rings(pvs)) != DDI_SUCCESS) {
1756 		dev_err(pvs->dip, CE_WARN, "!failed to allocate DMA rings");
1757 		pvscsi_teardown(pvs);
1758 		return (DDI_FAILURE);
1759 	}
1760 
1761 	if (pvscsi_setup_isr(pvs) != DDI_SUCCESS) {
1762 		dev_err(pvs->dip, CE_WARN, "!failed to setup ISR");
1763 		pvscsi_teardown(pvs);
1764 		return (DDI_FAILURE);
1765 	}
1766 
1767 	/* enable interrupts */
1768 	if (pvscsi_enable_intrs(pvs) != DDI_SUCCESS) {
1769 		dev_err(pvs->dip, CE_WARN, "!failed to enable interrupts");
1770 		pvscsi_teardown(pvs);
1771 		return (DDI_FAILURE);
1772 	}
1773 
1774 	pvs->tq = ddi_taskq_create(dip, "iport", 1, TASKQ_DEFAULTPRI, 0);
1775 	if (pvs->tq == NULL) {
1776 		dev_err(pvs->dip, CE_WARN, "!failed creating tq");
1777 		pvscsi_teardown(pvs);
1778 		return (DDI_FAILURE);
1779 	}
1780 	if (pvscsi_hba_setup(pvs) != DDI_SUCCESS) {
1781 		dev_err(pvs->dip, CE_WARN, "!failed to setup HBA");
1782 		pvscsi_teardown(pvs);
1783 		return (DDI_FAILURE);
1784 	}
1785 
1786 	if (scsi_hba_iport_register(dip, "iport0") != 0) {
1787 		dev_err(pvs->dip, CE_WARN, "failed to register iport");
1788 		/* detach cannot fail since we didn't setup the iport */
1789 		(void) scsi_hba_detach(dip);
1790 		pvscsi_teardown(pvs);
1791 		return (DDI_FAILURE);
1792 	}
1793 
1794 	return (DDI_SUCCESS);
1795 }
1796 
1797 static int
1798 pvscsi_iport_detach(dev_info_t *dip)
1799 {
1800 	pvscsi_softc_t	*pvs;
1801 	scsi_hba_tran_t	*tran;
1802 	const char	*ua;
1803 	pvscsi_cmd_t	*reclaimed;
1804 
1805 	if (((ua = scsi_hba_iport_unit_address(dip)) == NULL) ||
1806 	    (strcmp(ua, "iport0") != 0) ||
1807 	    ((tran = ddi_get_driver_private(dip)) == NULL) ||
1808 	    ((pvs = tran->tran_hba_private) == NULL)) {
1809 		return (DDI_FAILURE);
1810 	}
1811 
1812 	/* stop the HBA */
1813 	mutex_enter(&pvs->lock);
1814 	pvs->detach = B_TRUE;
1815 	pvscsi_stop_hba(pvs);
1816 	mutex_exit(&pvs->lock);
1817 
1818 	/* drain the taskq - nothing else will post to it */
1819 	ddi_taskq_wait(pvs->tq);
1820 
1821 	/* reset the HBA */
1822 	mutex_enter(&pvs->lock);
1823 	reclaimed = pvscsi_reclaim_cmds(pvs);
1824 	mutex_exit(&pvs->lock);
1825 
1826 	/*
1827 	 * If we had any commands, complete them so we can
1828 	 * reclaim the resources.  There really should not be any.
1829 	 */
1830 	pvscsi_complete_cmds(pvs, reclaimed);
1831 
1832 	scsi_hba_tgtmap_destroy(pvs->tgtmap);
1833 	pvs->tgtmap = NULL;
1834 
1835 	return (DDI_SUCCESS);
1836 }
1837 
1838 static int
1839 pvscsi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1840 {
1841 	pvscsi_softc_t	*pvs;
1842 	scsi_hba_tran_t	*tran;
1843 
1844 	if (cmd != DDI_DETACH) {
1845 		return (DDI_FAILURE);
1846 	}
1847 
1848 	if (scsi_hba_iport_unit_address(dip) != NULL) {
1849 		return (pvscsi_iport_detach(dip));
1850 	}
1851 
1852 	if (((tran = ddi_get_driver_private(dip)) == NULL) ||
1853 	    ((pvs = tran->tran_hba_private) == NULL)) {
1854 		/* this can only mean we aren't attached yet */
1855 		return (DDI_SUCCESS);
1856 	}
1857 	if (scsi_hba_detach(dip) != DDI_SUCCESS) {
1858 		return (DDI_FAILURE);
1859 	}
1860 
1861 	pvscsi_teardown(pvs);
1862 
1863 	return (DDI_SUCCESS);
1864 }
1865 
1866 static int
1867 pvscsi_quiesce(dev_info_t *dip)
1868 {
1869 	scsi_hba_tran_t	*tran;
1870 	pvscsi_softc_t	*pvs;
1871 
1872 	if (((tran = ddi_get_driver_private(dip)) == NULL) ||
1873 	    ((pvs = tran->tran_hba_private) == NULL)) {
1874 		return (DDI_SUCCESS);
1875 	}
1876 
1877 	pvscsi_stop_hba(pvs);
1878 
1879 	return (DDI_SUCCESS);
1880 }
1881 
1882 static struct dev_ops pvscsi_ops = {
1883 	.devo_rev =	DEVO_REV,
1884 	.devo_refcnt =	0,
1885 	.devo_getinfo =	nodev,
1886 	.devo_identify = nulldev,
1887 	.devo_probe =	nulldev,
1888 	.devo_attach =	pvscsi_attach,
1889 	.devo_detach =	pvscsi_detach,
1890 	.devo_reset =	nodev,
1891 	.devo_cb_ops =	NULL,
1892 	.devo_bus_ops =	NULL,
1893 	.devo_power =	NULL,
1894 	.devo_quiesce =	pvscsi_quiesce
1895 };
1896 
1897 #define	PVSCSI_IDENT "VMware PVSCSI"
1898 
1899 static struct modldrv modldrv = {
1900 	&mod_driverops,
1901 	PVSCSI_IDENT,
1902 	&pvscsi_ops,
1903 };
1904 
1905 static struct modlinkage modlinkage = {
1906 	MODREV_1,
1907 	&modldrv,
1908 	NULL
1909 };
1910 
1911 int
1912 _init(void)
1913 {
1914 	int	ret;
1915 
1916 	/* get HZ - DDI compliant */
1917 	pvscsi_hz = drv_usectohz(1000000);
1918 
1919 	if ((ret = scsi_hba_init(&modlinkage)) != 0) {
1920 		cmn_err(CE_WARN, "!scsi_hba_init() failed");
1921 		return (ret);
1922 	}
1923 
1924 	if ((ret = mod_install(&modlinkage)) != 0) {
1925 		cmn_err(CE_WARN, "!mod_install() failed");
1926 		scsi_hba_fini(&modlinkage);
1927 	}
1928 
1929 	return (ret);
1930 }
1931 
1932 int
1933 _info(struct modinfo *modinfop)
1934 {
1935 	return (mod_info(&modlinkage, modinfop));
1936 }
1937 
1938 int
1939 _fini(void)
1940 {
1941 	int	ret;
1942 
1943 	if ((ret = mod_remove(&modlinkage)) == 0) {
1944 		scsi_hba_fini(&modlinkage);
1945 	}
1946 
1947 	return (ret);
1948 }
1949