xref: /illumos-gate/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c (revision 5bbb4db2c3f208d12bf0fd11769728f9e5ba66a2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  *
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * This file contains various support routines.
28  */
29 
30 #include <sys/scsi/adapters/pmcs/pmcs.h>
31 
32 /*
33  * Local static data
34  */
35 static int tgtmap_usec = MICROSEC;
36 
37 /*
38  * SAS Topology Configuration
39  */
40 static void pmcs_new_tport(pmcs_hw_t *, pmcs_phy_t *);
41 static void pmcs_configure_expander(pmcs_hw_t *, pmcs_phy_t *, pmcs_iport_t *);
42 
43 static boolean_t pmcs_check_expanders(pmcs_hw_t *, pmcs_phy_t *);
44 static void pmcs_check_expander(pmcs_hw_t *, pmcs_phy_t *);
45 static void pmcs_clear_expander(pmcs_hw_t *, pmcs_phy_t *, int);
46 
47 static int pmcs_expander_get_nphy(pmcs_hw_t *, pmcs_phy_t *);
48 static int pmcs_expander_content_discover(pmcs_hw_t *, pmcs_phy_t *,
49     pmcs_phy_t *);
50 
51 static int pmcs_smp_function_result(pmcs_hw_t *, smp_response_frame_t *);
52 static boolean_t pmcs_validate_devid(pmcs_phy_t *, pmcs_phy_t *, uint32_t);
53 static void pmcs_clear_phys(pmcs_hw_t *, pmcs_phy_t *);
54 static int pmcs_configure_new_devices(pmcs_hw_t *, pmcs_phy_t *);
55 static boolean_t pmcs_report_observations(pmcs_hw_t *);
56 static boolean_t pmcs_report_iport_observations(pmcs_hw_t *, pmcs_iport_t *,
57     pmcs_phy_t *);
58 static pmcs_phy_t *pmcs_find_phy_needing_work(pmcs_hw_t *, pmcs_phy_t *);
59 static int pmcs_kill_devices(pmcs_hw_t *, pmcs_phy_t *);
60 static void pmcs_lock_phy_impl(pmcs_phy_t *, int);
61 static void pmcs_unlock_phy_impl(pmcs_phy_t *, int);
62 static pmcs_phy_t *pmcs_clone_phy(pmcs_phy_t *);
63 static boolean_t pmcs_configure_phy(pmcs_hw_t *, pmcs_phy_t *);
64 static void pmcs_reap_dead_phy(pmcs_phy_t *);
65 static pmcs_iport_t *pmcs_get_iport_by_ua(pmcs_hw_t *, char *);
66 static boolean_t pmcs_phy_target_match(pmcs_phy_t *);
67 static void pmcs_handle_ds_recovery_error(pmcs_phy_t *phyp,
68     pmcs_xscsi_t *tgt, pmcs_hw_t *pwp, const char *func_name, int line,
69     char *reason_string);
70 
71 /*
72  * Often used strings
73  */
74 const char pmcs_nowrk[] = "%s: unable to get work structure";
75 const char pmcs_nomsg[] = "%s: unable to get Inbound Message entry";
76 const char pmcs_timeo[] = "!%s: command timed out";
77 
78 extern const ddi_dma_attr_t pmcs_dattr;
79 
80 /*
81  * Some Initial setup steps.
82  */
83 
84 int
85 pmcs_setup(pmcs_hw_t *pwp)
86 {
87 	uint32_t barval = pwp->mpibar;
88 	uint32_t i, scratch, regbar, regoff, barbar, baroff;
89 	uint32_t new_ioq_depth, ferr = 0;
90 
91 	/*
92 	 * Check current state. If we're not at READY state,
93 	 * we can't go further.
94 	 */
95 	scratch = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1);
96 	if ((scratch & PMCS_MSGU_AAP_STATE_MASK) == PMCS_MSGU_AAP_STATE_ERROR) {
97 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: AAP Error State (0x%x)",
98 		    __func__, pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
99 		    PMCS_MSGU_AAP_ERROR_MASK);
100 		pmcs_fm_ereport(pwp, DDI_FM_DEVICE_INVAL_STATE);
101 		ddi_fm_service_impact(pwp->dip, DDI_SERVICE_LOST);
102 		return (-1);
103 	}
104 	if ((scratch & PMCS_MSGU_AAP_STATE_MASK) != PMCS_MSGU_AAP_STATE_READY) {
105 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
106 		    "%s: AAP unit not ready (state 0x%x)",
107 		    __func__, scratch & PMCS_MSGU_AAP_STATE_MASK);
108 		pmcs_fm_ereport(pwp, DDI_FM_DEVICE_INVAL_STATE);
109 		ddi_fm_service_impact(pwp->dip, DDI_SERVICE_LOST);
110 		return (-1);
111 	}
112 
113 	/*
114 	 * Read the offset from the Message Unit scratchpad 0 register.
115 	 * This allows us to read the MPI Configuration table.
116 	 *
117 	 * Check its signature for validity.
118 	 */
119 	baroff = barval;
120 	barbar = barval >> PMCS_MSGU_MPI_BAR_SHIFT;
121 	baroff &= PMCS_MSGU_MPI_OFFSET_MASK;
122 
123 	regoff = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH0);
124 	regbar = regoff >> PMCS_MSGU_MPI_BAR_SHIFT;
125 	regoff &= PMCS_MSGU_MPI_OFFSET_MASK;
126 
127 	if (regoff > baroff) {
128 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: bad MPI Table Length "
129 		    "(register offset=0x%08x, passed offset=0x%08x)", __func__,
130 		    regoff, baroff);
131 		return (-1);
132 	}
133 	if (regbar != barbar) {
134 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: bad MPI BAR (register "
135 		    "BAROFF=0x%08x, passed BAROFF=0x%08x)", __func__,
136 		    regbar, barbar);
137 		return (-1);
138 	}
139 	pwp->mpi_offset = regoff;
140 	if (pmcs_rd_mpi_tbl(pwp, PMCS_MPI_AS) != PMCS_SIGNATURE) {
141 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
142 		    "%s: Bad MPI Configuration Table Signature 0x%x", __func__,
143 		    pmcs_rd_mpi_tbl(pwp, PMCS_MPI_AS));
144 		return (-1);
145 	}
146 
147 	if (pmcs_rd_mpi_tbl(pwp, PMCS_MPI_IR) != PMCS_MPI_REVISION1) {
148 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
149 		    "%s: Bad MPI Configuration Revision 0x%x", __func__,
150 		    pmcs_rd_mpi_tbl(pwp, PMCS_MPI_IR));
151 		return (-1);
152 	}
153 
154 	/*
155 	 * Generate offsets for the General System, Inbound Queue Configuration
156 	 * and Outbound Queue configuration tables. This way the macros to
157 	 * access those tables will work correctly.
158 	 */
159 	pwp->mpi_gst_offset =
160 	    pwp->mpi_offset + pmcs_rd_mpi_tbl(pwp, PMCS_MPI_GSTO);
161 	pwp->mpi_iqc_offset =
162 	    pwp->mpi_offset + pmcs_rd_mpi_tbl(pwp, PMCS_MPI_IQCTO);
163 	pwp->mpi_oqc_offset =
164 	    pwp->mpi_offset + pmcs_rd_mpi_tbl(pwp, PMCS_MPI_OQCTO);
165 
166 	pwp->fw = pmcs_rd_mpi_tbl(pwp, PMCS_MPI_FW);
167 
168 	pwp->max_cmd = pmcs_rd_mpi_tbl(pwp, PMCS_MPI_MOIO);
169 	pwp->max_dev = pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO0) >> 16;
170 
171 	pwp->max_iq = PMCS_MNIQ(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1));
172 	pwp->max_oq = PMCS_MNOQ(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1));
173 	pwp->nphy = PMCS_NPHY(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1));
174 	if (pwp->max_iq <= PMCS_NIQ) {
175 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: not enough Inbound Queues "
176 		    "supported (need %d, max_oq=%d)", __func__, pwp->max_iq,
177 		    PMCS_NIQ);
178 		return (-1);
179 	}
180 	if (pwp->max_oq <= PMCS_NOQ) {
181 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: not enough Outbound Queues "
182 		    "supported (need %d, max_oq=%d)", __func__, pwp->max_oq,
183 		    PMCS_NOQ);
184 		return (-1);
185 	}
186 	if (pwp->nphy == 0) {
187 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: zero phys reported",
188 		    __func__);
189 		return (-1);
190 	}
191 	if (PMCS_HPIQ(pmcs_rd_mpi_tbl(pwp, PMCS_MPI_INFO1))) {
192 		pwp->hipri_queue = (1 << PMCS_IQ_OTHER);
193 	}
194 
195 
196 	for (i = 0; i < pwp->nphy; i++) {
197 		PMCS_MPI_EVQSET(pwp, PMCS_OQ_EVENTS, i);
198 		PMCS_MPI_NCQSET(pwp, PMCS_OQ_EVENTS, i);
199 	}
200 
201 	pmcs_wr_mpi_tbl(pwp, PMCS_MPI_INFO2,
202 	    (PMCS_OQ_EVENTS << GENERAL_EVENT_OQ_SHIFT) |
203 	    (PMCS_OQ_EVENTS << DEVICE_HANDLE_REMOVED_SHIFT));
204 
205 	/*
206 	 * Verify that ioq_depth is valid (> 0 and not so high that it
207 	 * would cause us to overrun the chip with commands).
208 	 */
209 	if (pwp->ioq_depth == 0) {
210 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
211 		    "%s: I/O queue depth set to 0. Setting to %d",
212 		    __func__, PMCS_NQENTRY);
213 		pwp->ioq_depth = PMCS_NQENTRY;
214 	}
215 
216 	if (pwp->ioq_depth < PMCS_MIN_NQENTRY) {
217 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
218 		    "%s: I/O queue depth set too low (%d). Setting to %d",
219 		    __func__, pwp->ioq_depth, PMCS_MIN_NQENTRY);
220 		pwp->ioq_depth = PMCS_MIN_NQENTRY;
221 	}
222 
223 	if (pwp->ioq_depth > (pwp->max_cmd / (PMCS_IO_IQ_MASK + 1))) {
224 		new_ioq_depth = pwp->max_cmd / (PMCS_IO_IQ_MASK + 1);
225 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
226 		    "%s: I/O queue depth set too high (%d). Setting to %d",
227 		    __func__, pwp->ioq_depth, new_ioq_depth);
228 		pwp->ioq_depth = new_ioq_depth;
229 	}
230 
231 	/*
232 	 * Allocate consistent memory for OQs and IQs.
233 	 */
234 	pwp->iqp_dma_attr = pwp->oqp_dma_attr = pmcs_dattr;
235 	pwp->iqp_dma_attr.dma_attr_align =
236 	    pwp->oqp_dma_attr.dma_attr_align = PMCS_QENTRY_SIZE;
237 
238 	/*
239 	 * The Rev C chip has the ability to do PIO to or from consistent
240 	 * memory anywhere in a 64 bit address space, but the firmware is
241 	 * not presently set up to do so.
242 	 */
243 	pwp->iqp_dma_attr.dma_attr_addr_hi =
244 	    pwp->oqp_dma_attr.dma_attr_addr_hi = 0x000000FFFFFFFFFFull;
245 
246 	for (i = 0; i < PMCS_NIQ; i++) {
247 		if (pmcs_dma_setup(pwp, &pwp->iqp_dma_attr,
248 		    &pwp->iqp_acchdls[i],
249 		    &pwp->iqp_handles[i], PMCS_QENTRY_SIZE * pwp->ioq_depth,
250 		    (caddr_t *)&pwp->iqp[i], &pwp->iqaddr[i]) == B_FALSE) {
251 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
252 			    "Failed to setup DMA for iqp[%d]", i);
253 			return (-1);
254 		}
255 		bzero(pwp->iqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
256 	}
257 
258 	for (i = 0; i < PMCS_NOQ; i++) {
259 		if (pmcs_dma_setup(pwp, &pwp->oqp_dma_attr,
260 		    &pwp->oqp_acchdls[i],
261 		    &pwp->oqp_handles[i], PMCS_QENTRY_SIZE * pwp->ioq_depth,
262 		    (caddr_t *)&pwp->oqp[i], &pwp->oqaddr[i]) == B_FALSE) {
263 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
264 			    "Failed to setup DMA for oqp[%d]", i);
265 			return (-1);
266 		}
267 		bzero(pwp->oqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
268 	}
269 
270 	/*
271 	 * Install the IQ and OQ addresses (and null out the rest).
272 	 */
273 	for (i = 0; i < pwp->max_iq; i++) {
274 		pwp->iqpi_offset[i] = pmcs_rd_iqc_tbl(pwp, PMCS_IQPIOFFX(i));
275 		if (i < PMCS_NIQ) {
276 			if (i != PMCS_IQ_OTHER) {
277 				pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i),
278 				    pwp->ioq_depth | (PMCS_QENTRY_SIZE << 16));
279 			} else {
280 				pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i),
281 				    (1 << 30) | pwp->ioq_depth |
282 				    (PMCS_QENTRY_SIZE << 16));
283 			}
284 			pmcs_wr_iqc_tbl(pwp, PMCS_IQBAHX(i),
285 			    DWORD1(pwp->iqaddr[i]));
286 			pmcs_wr_iqc_tbl(pwp, PMCS_IQBALX(i),
287 			    DWORD0(pwp->iqaddr[i]));
288 			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBAHX(i),
289 			    DWORD1(pwp->ciaddr+IQ_OFFSET(i)));
290 			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBALX(i),
291 			    DWORD0(pwp->ciaddr+IQ_OFFSET(i)));
292 		} else {
293 			pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i), 0);
294 			pmcs_wr_iqc_tbl(pwp, PMCS_IQBAHX(i), 0);
295 			pmcs_wr_iqc_tbl(pwp, PMCS_IQBALX(i), 0);
296 			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBAHX(i), 0);
297 			pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBALX(i), 0);
298 		}
299 	}
300 
301 	for (i = 0; i < pwp->max_oq; i++) {
302 		pwp->oqci_offset[i] = pmcs_rd_oqc_tbl(pwp, PMCS_OQCIOFFX(i));
303 		if (i < PMCS_NOQ) {
304 			pmcs_wr_oqc_tbl(pwp, PMCS_OQC_PARMX(i), pwp->ioq_depth |
305 			    (PMCS_QENTRY_SIZE << 16) | OQIEX);
306 			pmcs_wr_oqc_tbl(pwp, PMCS_OQBAHX(i),
307 			    DWORD1(pwp->oqaddr[i]));
308 			pmcs_wr_oqc_tbl(pwp, PMCS_OQBALX(i),
309 			    DWORD0(pwp->oqaddr[i]));
310 			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBAHX(i),
311 			    DWORD1(pwp->ciaddr+OQ_OFFSET(i)));
312 			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBALX(i),
313 			    DWORD0(pwp->ciaddr+OQ_OFFSET(i)));
314 			pmcs_wr_oqc_tbl(pwp, PMCS_OQIPARM(i),
315 			    pwp->oqvec[i] << 24);
316 			pmcs_wr_oqc_tbl(pwp, PMCS_OQDICX(i), 0);
317 		} else {
318 			pmcs_wr_oqc_tbl(pwp, PMCS_OQC_PARMX(i), 0);
319 			pmcs_wr_oqc_tbl(pwp, PMCS_OQBAHX(i), 0);
320 			pmcs_wr_oqc_tbl(pwp, PMCS_OQBALX(i), 0);
321 			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBAHX(i), 0);
322 			pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBALX(i), 0);
323 			pmcs_wr_oqc_tbl(pwp, PMCS_OQIPARM(i), 0);
324 			pmcs_wr_oqc_tbl(pwp, PMCS_OQDICX(i), 0);
325 		}
326 	}
327 
328 	/*
329 	 * Set up logging, if defined.
330 	 */
331 	if (pwp->fwlog) {
332 		uint64_t logdma = pwp->fwaddr;
333 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELBAH, DWORD1(logdma));
334 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELBAL, DWORD0(logdma));
335 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELBS, PMCS_FWLOG_SIZE >> 1);
336 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_MELSEV, pwp->fwlog);
337 		logdma += (PMCS_FWLOG_SIZE >> 1);
338 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELBAH, DWORD1(logdma));
339 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELBAL, DWORD0(logdma));
340 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELBS, PMCS_FWLOG_SIZE >> 1);
341 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_IELSEV, pwp->fwlog);
342 	}
343 
344 	/*
345 	 * Interrupt vectors, outbound queues, and odb_auto_clear
346 	 *
347 	 * MSI/MSI-X:
348 	 * If we got 4 interrupt vectors, we'll assign one to each outbound
349 	 * queue as well as the fatal interrupt, and auto clear can be set
350 	 * for each.
351 	 *
352 	 * If we only got 2 vectors, one will be used for I/O completions
353 	 * and the other for the other two vectors.  In this case, auto_
354 	 * clear can only be set for I/Os, which is fine.  The fatal
355 	 * interrupt will be mapped to the PMCS_FATAL_INTERRUPT bit, which
356 	 * is not an interrupt vector.
357 	 *
358 	 * MSI/MSI-X/INT-X:
359 	 * If we only got 1 interrupt vector, auto_clear must be set to 0,
360 	 * and again the fatal interrupt will be mapped to the
361 	 * PMCS_FATAL_INTERRUPT bit (again, not an interrupt vector).
362 	 */
363 
364 	switch (pwp->int_type) {
365 	case PMCS_INT_MSIX:
366 	case PMCS_INT_MSI:
367 		switch (pwp->intr_cnt) {
368 		case 1:
369 			pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, PMCS_FERRIE |
370 			    (PMCS_FATAL_INTERRUPT << PMCS_FERIV_SHIFT));
371 			pwp->odb_auto_clear = 0;
372 			break;
373 		case 2:
374 			pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, PMCS_FERRIE |
375 			    (PMCS_FATAL_INTERRUPT << PMCS_FERIV_SHIFT));
376 			pwp->odb_auto_clear = (1 << PMCS_FATAL_INTERRUPT) |
377 			    (1 << PMCS_MSIX_IODONE);
378 			break;
379 		case 4:
380 			pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, PMCS_FERRIE |
381 			    (PMCS_MSIX_FATAL << PMCS_FERIV_SHIFT));
382 			pwp->odb_auto_clear = (1 << PMCS_MSIX_FATAL) |
383 			    (1 << PMCS_MSIX_GENERAL) | (1 << PMCS_MSIX_IODONE) |
384 			    (1 << PMCS_MSIX_EVENTS);
385 			break;
386 		}
387 		break;
388 
389 	case PMCS_INT_FIXED:
390 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR,
391 		    PMCS_FERRIE | (PMCS_FATAL_INTERRUPT << PMCS_FERIV_SHIFT));
392 		pwp->odb_auto_clear = 0;
393 		break;
394 	}
395 
396 	/*
397 	 * Enable Interrupt Reassertion
398 	 * Default Delay 1000us
399 	 */
400 	ferr = pmcs_rd_mpi_tbl(pwp, PMCS_MPI_FERR);
401 	if ((ferr & PMCS_MPI_IRAE) == 0) {
402 		ferr &= ~(PMCS_MPI_IRAU | PMCS_MPI_IRAD_MASK);
403 		pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, ferr | PMCS_MPI_IRAE);
404 	}
405 
406 	pmcs_wr_topunit(pwp, PMCS_OBDB_AUTO_CLR, pwp->odb_auto_clear);
407 	pwp->mpi_table_setup = 1;
408 	return (0);
409 }
410 
411 /*
412  * Start the Message Passing protocol with the PMC chip.
413  */
414 int
415 pmcs_start_mpi(pmcs_hw_t *pwp)
416 {
417 	int i;
418 
419 	pmcs_wr_msgunit(pwp, PMCS_MSGU_IBDB, PMCS_MSGU_IBDB_MPIINI);
420 	for (i = 0; i < 1000; i++) {
421 		if ((pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) &
422 		    PMCS_MSGU_IBDB_MPIINI) == 0) {
423 			break;
424 		}
425 		drv_usecwait(1000);
426 	}
427 	if (pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) & PMCS_MSGU_IBDB_MPIINI) {
428 		return (-1);
429 	}
430 	drv_usecwait(500000);
431 
432 	/*
433 	 * Check to make sure we got to INIT state.
434 	 */
435 	if (PMCS_MPI_S(pmcs_rd_gst_tbl(pwp, PMCS_GST_BASE)) !=
436 	    PMCS_MPI_STATE_INIT) {
437 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: MPI launch failed (GST 0x%x "
438 		    "DBCLR 0x%x)", __func__,
439 		    pmcs_rd_gst_tbl(pwp, PMCS_GST_BASE),
440 		    pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB_CLEAR));
441 		return (-1);
442 	}
443 	return (0);
444 }
445 
446 /*
447  * Stop the Message Passing protocol with the PMC chip.
448  */
449 int
450 pmcs_stop_mpi(pmcs_hw_t *pwp)
451 {
452 	int i;
453 
454 	for (i = 0; i < pwp->max_iq; i++) {
455 		pmcs_wr_iqc_tbl(pwp, PMCS_IQC_PARMX(i), 0);
456 		pmcs_wr_iqc_tbl(pwp, PMCS_IQBAHX(i), 0);
457 		pmcs_wr_iqc_tbl(pwp, PMCS_IQBALX(i), 0);
458 		pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBAHX(i), 0);
459 		pmcs_wr_iqc_tbl(pwp, PMCS_IQCIBALX(i), 0);
460 	}
461 	for (i = 0; i < pwp->max_oq; i++) {
462 		pmcs_wr_oqc_tbl(pwp, PMCS_OQC_PARMX(i), 0);
463 		pmcs_wr_oqc_tbl(pwp, PMCS_OQBAHX(i), 0);
464 		pmcs_wr_oqc_tbl(pwp, PMCS_OQBALX(i), 0);
465 		pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBAHX(i), 0);
466 		pmcs_wr_oqc_tbl(pwp, PMCS_OQPIBALX(i), 0);
467 		pmcs_wr_oqc_tbl(pwp, PMCS_OQIPARM(i), 0);
468 		pmcs_wr_oqc_tbl(pwp, PMCS_OQDICX(i), 0);
469 	}
470 	pmcs_wr_mpi_tbl(pwp, PMCS_MPI_FERR, 0);
471 	pmcs_wr_msgunit(pwp, PMCS_MSGU_IBDB, PMCS_MSGU_IBDB_MPICTU);
472 	for (i = 0; i < 2000; i++) {
473 		if ((pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) &
474 		    PMCS_MSGU_IBDB_MPICTU) == 0) {
475 			break;
476 		}
477 		drv_usecwait(1000);
478 	}
479 	if (pmcs_rd_msgunit(pwp, PMCS_MSGU_IBDB) & PMCS_MSGU_IBDB_MPICTU) {
480 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: MPI stop failed", __func__);
481 		return (-1);
482 	}
483 	return (0);
484 }
485 
486 /*
487  * Do a sequence of ECHO messages to test for MPI functionality,
488  * all inbound and outbound queue functionality and interrupts.
489  */
490 int
491 pmcs_echo_test(pmcs_hw_t *pwp)
492 {
493 	echo_test_t fred;
494 	struct pmcwork *pwrk;
495 	uint32_t *msg, count;
496 	int iqe = 0, iqo = 0, result, rval = 0;
497 	int iterations;
498 	hrtime_t echo_start, echo_end, echo_total;
499 
500 	ASSERT(pwp->max_cmd > 0);
501 
502 	/*
503 	 * We want iterations to be max_cmd * 3 to ensure that we run the
504 	 * echo test enough times to iterate through every inbound queue
505 	 * at least twice.
506 	 */
507 	iterations = pwp->max_cmd * 3;
508 
509 	echo_total = 0;
510 	count = 0;
511 
512 	while (count < iterations) {
513 		pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, NULL);
514 		if (pwrk == NULL) {
515 			pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
516 			rval = -1;
517 			break;
518 		}
519 
520 		mutex_enter(&pwp->iqp_lock[iqe]);
521 		msg = GET_IQ_ENTRY(pwp, iqe);
522 		if (msg == NULL) {
523 			mutex_exit(&pwp->iqp_lock[iqe]);
524 			pmcs_pwork(pwp, pwrk);
525 			pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
526 			rval = -1;
527 			break;
528 		}
529 
530 		bzero(msg, PMCS_QENTRY_SIZE);
531 
532 		if (iqe == PMCS_IQ_OTHER) {
533 			/* This is on the high priority queue */
534 			msg[0] = LE_32(PMCS_HIPRI(pwp, iqo, PMCIN_ECHO));
535 		} else {
536 			msg[0] = LE_32(PMCS_IOMB_IN_SAS(iqo, PMCIN_ECHO));
537 		}
538 		msg[1] = LE_32(pwrk->htag);
539 		fred.signature = 0xdeadbeef;
540 		fred.count = count;
541 		fred.ptr = &count;
542 		(void) memcpy(&msg[2], &fred, sizeof (fred));
543 		pwrk->state = PMCS_WORK_STATE_ONCHIP;
544 
545 		INC_IQ_ENTRY(pwp, iqe);
546 
547 		echo_start = gethrtime();
548 		DTRACE_PROBE2(pmcs__echo__test__wait__start,
549 		    hrtime_t, echo_start, uint32_t, pwrk->htag);
550 
551 		if (++iqe == PMCS_NIQ) {
552 			iqe = 0;
553 		}
554 		if (++iqo == PMCS_NOQ) {
555 			iqo = 0;
556 		}
557 
558 		WAIT_FOR(pwrk, 250, result);
559 
560 		echo_end = gethrtime();
561 		DTRACE_PROBE2(pmcs__echo__test__wait__end,
562 		    hrtime_t, echo_end, int, result);
563 
564 		echo_total += (echo_end - echo_start);
565 
566 		pmcs_pwork(pwp, pwrk);
567 		if (result) {
568 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
569 			    "%s: command timed out on echo test #%d",
570 			    __func__, count);
571 			rval = -1;
572 			break;
573 		}
574 	}
575 
576 	/*
577 	 * The intr_threshold is adjusted by PMCS_INTR_THRESHOLD in order to
578 	 * remove the overhead of things like the delay in getting signaled
579 	 * for completion.
580 	 */
581 	if (echo_total != 0) {
582 		pwp->io_intr_coal.intr_latency =
583 		    (echo_total / iterations) / 2;
584 		pwp->io_intr_coal.intr_threshold =
585 		    PMCS_INTR_THRESHOLD(PMCS_QUANTUM_TIME_USECS * 1000 /
586 		    pwp->io_intr_coal.intr_latency);
587 	}
588 
589 	return (rval);
590 }
591 
592 /*
593  * Start the (real) phys
594  */
595 int
596 pmcs_start_phy(pmcs_hw_t *pwp, int phynum, int linkmode, int speed)
597 {
598 	int result;
599 	uint32_t *msg;
600 	struct pmcwork *pwrk;
601 	pmcs_phy_t *pptr;
602 	sas_identify_af_t sap;
603 
604 	mutex_enter(&pwp->lock);
605 	pptr = pwp->root_phys + phynum;
606 	if (pptr == NULL) {
607 		mutex_exit(&pwp->lock);
608 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: cannot find port %d",
609 		    __func__, phynum);
610 		return (0);
611 	}
612 
613 	pmcs_lock_phy(pptr);
614 	mutex_exit(&pwp->lock);
615 
616 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
617 	if (pwrk == NULL) {
618 		pmcs_unlock_phy(pptr);
619 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
620 		return (-1);
621 	}
622 
623 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
624 	msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
625 
626 	if (msg == NULL) {
627 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
628 		pmcs_unlock_phy(pptr);
629 		pmcs_pwork(pwp, pwrk);
630 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
631 		return (-1);
632 	}
633 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_PHY_START));
634 	msg[1] = LE_32(pwrk->htag);
635 	msg[2] = LE_32(linkmode | speed | phynum);
636 	bzero(&sap, sizeof (sap));
637 	sap.device_type = SAS_IF_DTYPE_ENDPOINT;
638 	sap.ssp_ini_port = 1;
639 
640 	if (pwp->separate_ports) {
641 		pmcs_wwn2barray(pwp->sas_wwns[phynum], sap.sas_address);
642 	} else {
643 		pmcs_wwn2barray(pwp->sas_wwns[0], sap.sas_address);
644 	}
645 
646 	ASSERT(phynum < SAS2_PHYNUM_MAX);
647 	sap.phy_identifier = phynum & SAS2_PHYNUM_MASK;
648 	(void) memcpy(&msg[3], &sap, sizeof (sas_identify_af_t));
649 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
650 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
651 
652 	pptr->state.prog_min_rate = (lowbit((ulong_t)speed) - 1);
653 	pptr->state.prog_max_rate = (highbit((ulong_t)speed) - 1);
654 	pptr->state.hw_min_rate = PMCS_HW_MIN_LINK_RATE;
655 	pptr->state.hw_max_rate = PMCS_HW_MAX_LINK_RATE;
656 
657 	pmcs_unlock_phy(pptr);
658 	WAIT_FOR(pwrk, 1000, result);
659 	pmcs_pwork(pwp, pwrk);
660 
661 	if (result) {
662 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_timeo, __func__);
663 	} else {
664 		mutex_enter(&pwp->lock);
665 		pwp->phys_started |= (1 << phynum);
666 		mutex_exit(&pwp->lock);
667 	}
668 
669 	return (0);
670 }
671 
672 int
673 pmcs_start_phys(pmcs_hw_t *pwp)
674 {
675 	int i;
676 
677 	for (i = 0; i < pwp->nphy; i++) {
678 		if ((pwp->phyid_block_mask & (1 << i)) == 0) {
679 			if (pmcs_start_phy(pwp, i,
680 			    (pwp->phymode << PHY_MODE_SHIFT),
681 			    pwp->physpeed << PHY_LINK_SHIFT)) {
682 				return (-1);
683 			}
684 			if (pmcs_clear_diag_counters(pwp, i)) {
685 				pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: failed to "
686 				    "reset counters on PHY (%d)", __func__, i);
687 			}
688 		}
689 	}
690 	return (0);
691 }
692 
693 /*
694  * Called with PHY locked
695  */
696 int
697 pmcs_reset_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint8_t type)
698 {
699 	uint32_t *msg;
700 	uint32_t iomb[(PMCS_QENTRY_SIZE << 1) >> 2];
701 	const char *mbar;
702 	uint32_t amt;
703 	uint32_t pdevid;
704 	uint32_t stsoff;
705 	uint32_t status;
706 	int result, level, phynum;
707 	struct pmcwork *pwrk;
708 	uint32_t htag;
709 
710 	ASSERT(mutex_owned(&pptr->phy_lock));
711 
712 	bzero(iomb, PMCS_QENTRY_SIZE);
713 	phynum = pptr->phynum;
714 	level = pptr->level;
715 	if (level > 0) {
716 		pdevid = pptr->parent->device_id;
717 	}
718 
719 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
720 
721 	if (pwrk == NULL) {
722 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
723 		return (ENOMEM);
724 	}
725 
726 	pwrk->arg = iomb;
727 
728 	/*
729 	 * If level > 0, we need to issue an SMP_REQUEST with a PHY_CONTROL
730 	 * function to do either a link reset or hard reset.  If level == 0,
731 	 * then we do a LOCAL_PHY_CONTROL IOMB to do link/hard reset to the
732 	 * root (local) PHY
733 	 */
734 	if (level) {
735 		stsoff = 2;
736 		iomb[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
737 		    PMCIN_SMP_REQUEST));
738 		iomb[1] = LE_32(pwrk->htag);
739 		iomb[2] = LE_32(pdevid);
740 		iomb[3] = LE_32(40 << SMP_REQUEST_LENGTH_SHIFT);
741 		/*
742 		 * Send SMP PHY CONTROL/HARD or LINK RESET
743 		 */
744 		iomb[4] = BE_32(0x40910000);
745 		iomb[5] = 0;
746 
747 		if (type == PMCS_PHYOP_HARD_RESET) {
748 			mbar = "SMP PHY CONTROL/HARD RESET";
749 			iomb[6] = BE_32((phynum << 24) |
750 			    (PMCS_PHYOP_HARD_RESET << 16));
751 		} else {
752 			mbar = "SMP PHY CONTROL/LINK RESET";
753 			iomb[6] = BE_32((phynum << 24) |
754 			    (PMCS_PHYOP_LINK_RESET << 16));
755 		}
756 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
757 		    "%s: sending %s to %s for phy 0x%x",
758 		    __func__, mbar, pptr->parent->path, pptr->phynum);
759 		amt = 7;
760 	} else {
761 		/*
762 		 * Unlike most other Outbound messages, status for
763 		 * a local phy operation is in DWORD 3.
764 		 */
765 		stsoff = 3;
766 		iomb[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
767 		    PMCIN_LOCAL_PHY_CONTROL));
768 		iomb[1] = LE_32(pwrk->htag);
769 		if (type == PMCS_PHYOP_LINK_RESET) {
770 			mbar = "LOCAL PHY LINK RESET";
771 			iomb[2] = LE_32((PMCS_PHYOP_LINK_RESET << 8) | phynum);
772 		} else {
773 			mbar = "LOCAL PHY HARD RESET";
774 			iomb[2] = LE_32((PMCS_PHYOP_HARD_RESET << 8) | phynum);
775 		}
776 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
777 		    "%s: sending %s to %s", __func__, mbar, pptr->path);
778 		amt = 3;
779 	}
780 
781 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
782 	msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
783 	if (msg == NULL) {
784 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
785 		pmcs_pwork(pwp, pwrk);
786 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
787 		return (ENOMEM);
788 	}
789 	COPY_MESSAGE(msg, iomb, amt);
790 	htag = pwrk->htag;
791 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
792 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
793 
794 	pmcs_unlock_phy(pptr);
795 	WAIT_FOR(pwrk, 1000, result);
796 	pmcs_pwork(pwp, pwrk);
797 	pmcs_lock_phy(pptr);
798 
799 	if (result) {
800 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_timeo, __func__);
801 
802 		if (pmcs_abort(pwp, pptr, htag, 0, 0)) {
803 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
804 			    "%s: Unable to issue SMP abort for htag 0x%08x",
805 			    __func__, htag);
806 		} else {
807 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
808 			    "%s: Issuing SMP ABORT for htag 0x%08x",
809 			    __func__, htag);
810 		}
811 		return (EIO);
812 	}
813 	status = LE_32(iomb[stsoff]);
814 
815 	if (status != PMCOUT_STATUS_OK) {
816 		char buf[32];
817 		const char *es =  pmcs_status_str(status);
818 		if (es == NULL) {
819 			(void) snprintf(buf, sizeof (buf), "Status 0x%x",
820 			    status);
821 			es = buf;
822 		}
823 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
824 		    "%s: %s action returned %s for %s", __func__, mbar, es,
825 		    pptr->path);
826 		return (EIO);
827 	}
828 
829 	return (0);
830 }
831 
832 /*
833  * Stop the (real) phys.  No PHY or softstate locks are required as this only
834  * happens during detach.
835  */
836 void
837 pmcs_stop_phy(pmcs_hw_t *pwp, int phynum)
838 {
839 	int result;
840 	pmcs_phy_t *pptr;
841 	uint32_t *msg;
842 	struct pmcwork *pwrk;
843 
844 	pptr =  pwp->root_phys + phynum;
845 	if (pptr == NULL) {
846 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
847 		    "%s: unable to find port %d", __func__, phynum);
848 		return;
849 	}
850 
851 	if (pwp->phys_started & (1 << phynum)) {
852 		pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
853 
854 		if (pwrk == NULL) {
855 			pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
856 			return;
857 		}
858 
859 		mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
860 		msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
861 
862 		if (msg == NULL) {
863 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
864 			pmcs_pwork(pwp, pwrk);
865 			pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
866 			return;
867 		}
868 
869 		msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_PHY_STOP));
870 		msg[1] = LE_32(pwrk->htag);
871 		msg[2] = LE_32(phynum);
872 		pwrk->state = PMCS_WORK_STATE_ONCHIP;
873 		/*
874 		 * Make this unconfigured now.
875 		 */
876 		INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
877 		WAIT_FOR(pwrk, 1000, result);
878 
879 		pmcs_pwork(pwp, pwrk);
880 		if (result) {
881 			pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_timeo, __func__);
882 		}
883 
884 		pwp->phys_started &= ~(1 << phynum);
885 	}
886 
887 	pptr->configured = 0;
888 }
889 
890 /*
891  * No locks should be required as this is only called during detach
892  */
893 void
894 pmcs_stop_phys(pmcs_hw_t *pwp)
895 {
896 	int i;
897 	for (i = 0; i < pwp->nphy; i++) {
898 		if ((pwp->phyid_block_mask & (1 << i)) == 0) {
899 			pmcs_stop_phy(pwp, i);
900 		}
901 	}
902 }
903 
904 /*
905  * Run SAS_DIAG_EXECUTE with cmd and cmd_desc passed.
906  * 	ERR_CNT_RESET: return status of cmd
907  *	DIAG_REPORT_GET: return value of the counter
908  */
909 int
910 pmcs_sas_diag_execute(pmcs_hw_t *pwp, uint32_t cmd, uint32_t cmd_desc,
911     uint8_t phynum)
912 {
913 	uint32_t htag, *ptr, status, msg[PMCS_MSG_SIZE << 1];
914 	int result;
915 	struct pmcwork *pwrk;
916 
917 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, NULL);
918 	if (pwrk == NULL) {
919 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
920 		return (DDI_FAILURE);
921 	}
922 	pwrk->arg = msg;
923 	htag = pwrk->htag;
924 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_SAS_DIAG_EXECUTE));
925 	msg[1] = LE_32(htag);
926 	msg[2] = LE_32((cmd << PMCS_DIAG_CMD_SHIFT) |
927 	    (cmd_desc << PMCS_DIAG_CMD_DESC_SHIFT) | phynum);
928 
929 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
930 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
931 	if (ptr == NULL) {
932 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
933 		pmcs_pwork(pwp, pwrk);
934 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
935 		return (DDI_FAILURE);
936 	}
937 	COPY_MESSAGE(ptr, msg, 3);
938 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
939 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
940 
941 	WAIT_FOR(pwrk, 1000, result);
942 
943 	pmcs_pwork(pwp, pwrk);
944 
945 	if (result) {
946 		pmcs_timed_out(pwp, htag, __func__);
947 		return (DDI_FAILURE);
948 	}
949 
950 	status = LE_32(msg[3]);
951 
952 	/* Return for counter reset */
953 	if (cmd == PMCS_ERR_CNT_RESET)
954 		return (status);
955 
956 	/* Return for counter value */
957 	if (status) {
958 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: failed, status (0x%x)",
959 		    __func__, status);
960 		return (DDI_FAILURE);
961 	}
962 	return (LE_32(msg[4]));
963 }
964 
965 /* Get the current value of the counter for desc on phynum and return it. */
966 int
967 pmcs_get_diag_report(pmcs_hw_t *pwp, uint32_t desc, uint8_t phynum)
968 {
969 	return (pmcs_sas_diag_execute(pwp, PMCS_DIAG_REPORT_GET, desc, phynum));
970 }
971 
972 /* Clear all of the counters for phynum. Returns the status of the command. */
973 int
974 pmcs_clear_diag_counters(pmcs_hw_t *pwp, uint8_t phynum)
975 {
976 	uint32_t	cmd = PMCS_ERR_CNT_RESET;
977 	uint32_t	cmd_desc;
978 
979 	cmd_desc = PMCS_INVALID_DWORD_CNT;
980 	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
981 		return (DDI_FAILURE);
982 
983 	cmd_desc = PMCS_DISPARITY_ERR_CNT;
984 	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
985 		return (DDI_FAILURE);
986 
987 	cmd_desc = PMCS_LOST_DWORD_SYNC_CNT;
988 	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
989 		return (DDI_FAILURE);
990 
991 	cmd_desc = PMCS_RESET_FAILED_CNT;
992 	if (pmcs_sas_diag_execute(pwp, cmd, cmd_desc, phynum))
993 		return (DDI_FAILURE);
994 
995 	return (DDI_SUCCESS);
996 }
997 
998 /*
999  * Get firmware timestamp
1000  */
1001 int
1002 pmcs_get_time_stamp(pmcs_hw_t *pwp, uint64_t *ts)
1003 {
1004 	uint32_t htag, *ptr, msg[PMCS_MSG_SIZE << 1];
1005 	int result;
1006 	struct pmcwork *pwrk;
1007 
1008 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, NULL);
1009 	if (pwrk == NULL) {
1010 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
1011 		return (-1);
1012 	}
1013 	pwrk->arg = msg;
1014 	htag = pwrk->htag;
1015 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_EVENTS, PMCIN_GET_TIME_STAMP));
1016 	msg[1] = LE_32(pwrk->htag);
1017 
1018 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1019 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1020 	if (ptr == NULL) {
1021 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1022 		pmcs_pwork(pwp, pwrk);
1023 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
1024 		return (-1);
1025 	}
1026 	COPY_MESSAGE(ptr, msg, 2);
1027 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
1028 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1029 
1030 	WAIT_FOR(pwrk, 1000, result);
1031 
1032 	pmcs_pwork(pwp, pwrk);
1033 
1034 	if (result) {
1035 		pmcs_timed_out(pwp, htag, __func__);
1036 		return (-1);
1037 	}
1038 	*ts = LE_32(msg[2]) | (((uint64_t)LE_32(msg[3])) << 32);
1039 	return (0);
1040 }
1041 
1042 /*
1043  * Dump all pertinent registers
1044  */
1045 
1046 void
1047 pmcs_register_dump(pmcs_hw_t *pwp)
1048 {
1049 	int i;
1050 	uint32_t val;
1051 
1052 	pmcs_prt(pwp, PMCS_PRT_INFO, "pmcs%d: Register dump start",
1053 	    ddi_get_instance(pwp->dip));
1054 	pmcs_prt(pwp, PMCS_PRT_INFO,
1055 	    "OBDB (intr): 0x%08x (mask): 0x%08x (clear): 0x%08x",
1056 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_OBDB),
1057 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_OBDB_MASK),
1058 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR));
1059 	pmcs_prt(pwp, PMCS_PRT_INFO, "SCRATCH0: 0x%08x",
1060 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH0));
1061 	pmcs_prt(pwp, PMCS_PRT_INFO, "SCRATCH1: 0x%08x",
1062 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1));
1063 	pmcs_prt(pwp, PMCS_PRT_INFO, "SCRATCH2: 0x%08x",
1064 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2));
1065 	pmcs_prt(pwp, PMCS_PRT_INFO, "SCRATCH3: 0x%08x",
1066 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH3));
1067 	for (i = 0; i < PMCS_NIQ; i++) {
1068 		pmcs_prt(pwp, PMCS_PRT_INFO, "IQ %d: CI %u PI %u",
1069 		    i, pmcs_rd_iqci(pwp, i), pmcs_rd_iqpi(pwp, i));
1070 	}
1071 	for (i = 0; i < PMCS_NOQ; i++) {
1072 		pmcs_prt(pwp, PMCS_PRT_INFO, "OQ %d: CI %u PI %u",
1073 		    i, pmcs_rd_oqci(pwp, i), pmcs_rd_oqpi(pwp, i));
1074 	}
1075 	val = pmcs_rd_gst_tbl(pwp, PMCS_GST_BASE);
1076 	pmcs_prt(pwp, PMCS_PRT_INFO,
1077 	    "GST TABLE BASE: 0x%08x (STATE=0x%x QF=%d GSTLEN=%d HMI_ERR=0x%x)",
1078 	    val, PMCS_MPI_S(val), PMCS_QF(val), PMCS_GSTLEN(val) * 4,
1079 	    PMCS_HMI_ERR(val));
1080 	pmcs_prt(pwp, PMCS_PRT_INFO, "GST TABLE IQFRZ0: 0x%08x",
1081 	    pmcs_rd_gst_tbl(pwp, PMCS_GST_IQFRZ0));
1082 	pmcs_prt(pwp, PMCS_PRT_INFO, "GST TABLE IQFRZ1: 0x%08x",
1083 	    pmcs_rd_gst_tbl(pwp, PMCS_GST_IQFRZ1));
1084 	pmcs_prt(pwp, PMCS_PRT_INFO, "GST TABLE MSGU TICK: 0x%08x",
1085 	    pmcs_rd_gst_tbl(pwp, PMCS_GST_MSGU_TICK));
1086 	pmcs_prt(pwp, PMCS_PRT_INFO, "GST TABLE IOP TICK: 0x%08x",
1087 	    pmcs_rd_gst_tbl(pwp, PMCS_GST_IOP_TICK));
1088 	for (i = 0; i < pwp->nphy; i++) {
1089 		uint32_t rerrf, pinfo, started = 0, link = 0;
1090 		pinfo = pmcs_rd_gst_tbl(pwp, PMCS_GST_PHY_INFO(i));
1091 		if (pinfo & 1) {
1092 			started = 1;
1093 			link = pinfo & 2;
1094 		}
1095 		rerrf = pmcs_rd_gst_tbl(pwp, PMCS_GST_RERR_INFO(i));
1096 		pmcs_prt(pwp, PMCS_PRT_INFO,
1097 		    "GST TABLE PHY%d STARTED=%d LINK=%d RERR=0x%08x",
1098 		    i, started, link, rerrf);
1099 	}
1100 	pmcs_prt(pwp, PMCS_PRT_INFO, "pmcs%d: Register dump end",
1101 	    ddi_get_instance(pwp->dip));
1102 }
1103 
1104 /*
1105  * Handle SATA Abort and other error processing
1106  */
1107 int
1108 pmcs_abort_handler(pmcs_hw_t *pwp)
1109 {
1110 	pmcs_phy_t *pptr, *pnext, *pnext_uplevel[PMCS_MAX_XPND];
1111 	int r, level = 0;
1112 
1113 	pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s", __func__);
1114 
1115 	mutex_enter(&pwp->lock);
1116 	pptr = pwp->root_phys;
1117 	mutex_exit(&pwp->lock);
1118 
1119 	while (pptr) {
1120 		/*
1121 		 * XXX: Need to make sure this doesn't happen
1122 		 * XXX: when non-NCQ commands are running.
1123 		 */
1124 		pmcs_lock_phy(pptr);
1125 		if (pptr->need_rl_ext) {
1126 			ASSERT(pptr->dtype == SATA);
1127 			if (pmcs_acquire_scratch(pwp, B_FALSE)) {
1128 				goto next_phy;
1129 			}
1130 			r = pmcs_sata_abort_ncq(pwp, pptr);
1131 			pmcs_release_scratch(pwp);
1132 			if (r == ENOMEM) {
1133 				goto next_phy;
1134 			}
1135 			if (r) {
1136 				r = pmcs_reset_phy(pwp, pptr,
1137 				    PMCS_PHYOP_LINK_RESET);
1138 				if (r == ENOMEM) {
1139 					goto next_phy;
1140 				}
1141 				/* what if other failures happened? */
1142 				pptr->abort_pending = 1;
1143 				pptr->abort_sent = 0;
1144 			}
1145 		}
1146 		if (pptr->abort_pending == 0 || pptr->abort_sent) {
1147 			goto next_phy;
1148 		}
1149 		pptr->abort_pending = 0;
1150 		if (pmcs_abort(pwp, pptr, pptr->device_id, 1, 1) == ENOMEM) {
1151 			pptr->abort_pending = 1;
1152 			goto next_phy;
1153 		}
1154 		pptr->abort_sent = 1;
1155 
1156 next_phy:
1157 		if (pptr->children) {
1158 			pnext = pptr->children;
1159 			pnext_uplevel[level++] = pptr->sibling;
1160 		} else {
1161 			pnext = pptr->sibling;
1162 			while ((pnext == NULL) && (level > 0)) {
1163 				pnext = pnext_uplevel[--level];
1164 			}
1165 		}
1166 
1167 		pmcs_unlock_phy(pptr);
1168 		pptr = pnext;
1169 	}
1170 
1171 	return (0);
1172 }
1173 
1174 /*
1175  * Register a device (get a device handle for it).
1176  * Called with PHY lock held.
1177  */
1178 int
1179 pmcs_register_device(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
1180 {
1181 	struct pmcwork *pwrk;
1182 	int result = 0;
1183 	uint32_t *msg;
1184 	uint32_t tmp, status;
1185 	uint32_t iomb[(PMCS_QENTRY_SIZE << 1) >> 2];
1186 
1187 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1188 	msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1189 
1190 	if (msg == NULL ||
1191 	    (pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr)) == NULL) {
1192 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1193 		result = ENOMEM;
1194 		goto out;
1195 	}
1196 
1197 	pwrk->arg = iomb;
1198 	pwrk->dtype = pptr->dtype;
1199 
1200 	msg[1] = LE_32(pwrk->htag);
1201 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, PMCIN_REGISTER_DEVICE));
1202 	tmp = PMCS_DEVREG_TLR |
1203 	    (pptr->link_rate << PMCS_DEVREG_LINK_RATE_SHIFT);
1204 	if (IS_ROOT_PHY(pptr)) {
1205 		msg[2] = LE_32(pptr->portid |
1206 		    (pptr->phynum << PMCS_PHYID_SHIFT));
1207 	} else {
1208 		msg[2] = LE_32(pptr->portid);
1209 	}
1210 	if (pptr->dtype == SATA) {
1211 		if (IS_ROOT_PHY(pptr)) {
1212 			tmp |= PMCS_DEVREG_TYPE_SATA_DIRECT;
1213 		} else {
1214 			tmp |= PMCS_DEVREG_TYPE_SATA;
1215 		}
1216 	} else {
1217 		tmp |= PMCS_DEVREG_TYPE_SAS;
1218 	}
1219 	msg[3] = LE_32(tmp);
1220 	msg[4] = LE_32(PMCS_DEVREG_IT_NEXUS_TIMEOUT);
1221 	(void) memcpy(&msg[5], pptr->sas_address, 8);
1222 
1223 	CLEAN_MESSAGE(msg, 7);
1224 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
1225 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1226 
1227 	pmcs_unlock_phy(pptr);
1228 	WAIT_FOR(pwrk, 250, result);
1229 	pmcs_lock_phy(pptr);
1230 	pmcs_pwork(pwp, pwrk);
1231 
1232 	if (result) {
1233 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_timeo, __func__);
1234 		result = ETIMEDOUT;
1235 		goto out;
1236 	}
1237 	status = LE_32(iomb[2]);
1238 	tmp = LE_32(iomb[3]);
1239 	switch (status) {
1240 	case PMCS_DEVREG_OK:
1241 	case PMCS_DEVREG_DEVICE_ALREADY_REGISTERED:
1242 	case PMCS_DEVREG_PHY_ALREADY_REGISTERED:
1243 		if (pmcs_validate_devid(pwp->root_phys, pptr, tmp) == B_FALSE) {
1244 			result = EEXIST;
1245 			goto out;
1246 		} else if (status != PMCS_DEVREG_OK) {
1247 			if (tmp == 0xffffffff) {	/* F/W bug */
1248 				pmcs_prt(pwp, PMCS_PRT_INFO,
1249 				    "%s: phy %s already has bogus devid 0x%x",
1250 				    __func__, pptr->path, tmp);
1251 				result = EIO;
1252 				goto out;
1253 			} else {
1254 				pmcs_prt(pwp, PMCS_PRT_INFO,
1255 				    "%s: phy %s already has a device id 0x%x",
1256 				    __func__, pptr->path, tmp);
1257 			}
1258 		}
1259 		break;
1260 	default:
1261 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: status 0x%x when trying to "
1262 		    "register device %s", __func__, status, pptr->path);
1263 		result = EIO;
1264 		goto out;
1265 	}
1266 	pptr->device_id = tmp;
1267 	pptr->valid_device_id = 1;
1268 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "Phy %s/" SAS_ADDR_FMT
1269 	    " registered with device_id 0x%x (portid %d)", pptr->path,
1270 	    SAS_ADDR_PRT(pptr->sas_address), tmp, pptr->portid);
1271 out:
1272 	return (result);
1273 }
1274 
1275 /*
1276  * Deregister a device (remove a device handle).
1277  * Called with PHY locked.
1278  */
1279 void
1280 pmcs_deregister_device(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
1281 {
1282 	struct pmcwork *pwrk;
1283 	uint32_t msg[PMCS_MSG_SIZE], *ptr, status;
1284 	uint32_t iomb[(PMCS_QENTRY_SIZE << 1) >> 2];
1285 	int result;
1286 
1287 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
1288 	if (pwrk == NULL) {
1289 		return;
1290 	}
1291 
1292 	pwrk->arg = iomb;
1293 	pwrk->dtype = pptr->dtype;
1294 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1295 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1296 	if (ptr == NULL) {
1297 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
1298 		pmcs_pwork(pwp, pwrk);
1299 		return;
1300 	}
1301 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
1302 	    PMCIN_DEREGISTER_DEVICE_HANDLE));
1303 	msg[1] = LE_32(pwrk->htag);
1304 	msg[2] = LE_32(pptr->device_id);
1305 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
1306 	COPY_MESSAGE(ptr, msg, 3);
1307 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
1308 
1309 	pmcs_unlock_phy(pptr);
1310 	WAIT_FOR(pwrk, 250, result);
1311 	pmcs_pwork(pwp, pwrk);
1312 	pmcs_lock_phy(pptr);
1313 
1314 	if (result) {
1315 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_timeo, __func__);
1316 		return;
1317 	}
1318 	status = LE_32(iomb[2]);
1319 	if (status != PMCOUT_STATUS_OK) {
1320 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: status 0x%x when trying to "
1321 		    "deregister device %s", __func__, status, pptr->path);
1322 	} else {
1323 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: device %s deregistered",
1324 		    __func__, pptr->path);
1325 		pptr->valid_device_id = 0;
1326 		pptr->device_id = PMCS_INVALID_DEVICE_ID;
1327 	}
1328 }
1329 
1330 /*
1331  * Deregister all registered devices.
1332  */
1333 void
1334 pmcs_deregister_devices(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
1335 {
1336 	/*
1337 	 * Start at the maximum level and walk back to level 0.  This only
1338 	 * gets done during detach after all threads and timers have been
1339 	 * destroyed, so there's no need to hold the softstate or PHY lock.
1340 	 */
1341 	while (phyp) {
1342 		if (phyp->children) {
1343 			pmcs_deregister_devices(pwp, phyp->children);
1344 		}
1345 		if (phyp->valid_device_id) {
1346 			pmcs_deregister_device(pwp, phyp);
1347 		}
1348 		phyp = phyp->sibling;
1349 	}
1350 }
1351 
1352 /*
1353  * Perform a 'soft' reset on the PMC chip
1354  */
1355 int
1356 pmcs_soft_reset(pmcs_hw_t *pwp, boolean_t no_restart)
1357 {
1358 	uint32_t s2, sfrbits, gsm, rapchk, wapchk, wdpchk, spc, tsmode;
1359 	pmcs_phy_t *pptr;
1360 	char *msg = NULL;
1361 	int i;
1362 
1363 	/*
1364 	 * Disable interrupts
1365 	 */
1366 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_MASK, 0xffffffff);
1367 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR, 0xffffffff);
1368 
1369 	pmcs_prt(pwp, PMCS_PRT_INFO, "%s", __func__);
1370 
1371 	if (pwp->locks_initted) {
1372 		mutex_enter(&pwp->lock);
1373 	}
1374 	pwp->blocked = 1;
1375 
1376 	/*
1377 	 * Step 1
1378 	 */
1379 	s2 = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2);
1380 	if ((s2 & PMCS_MSGU_HOST_SOFT_RESET_READY) == 0) {
1381 		pmcs_wr_gsm_reg(pwp, RB6_ACCESS, RB6_NMI_SIGNATURE);
1382 		pmcs_wr_gsm_reg(pwp, RB6_ACCESS, RB6_NMI_SIGNATURE);
1383 		for (i = 0; i < 100; i++) {
1384 			s2 = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2) &
1385 			    PMCS_MSGU_HOST_SOFT_RESET_READY;
1386 			if (s2) {
1387 				break;
1388 			}
1389 			drv_usecwait(10000);
1390 		}
1391 		s2 = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2) &
1392 		    PMCS_MSGU_HOST_SOFT_RESET_READY;
1393 		if (s2 == 0) {
1394 			pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: PMCS_MSGU_HOST_"
1395 			    "SOFT_RESET_READY never came ready", __func__);
1396 			pmcs_register_dump(pwp);
1397 			if ((pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
1398 			    PMCS_MSGU_CPU_SOFT_RESET_READY) == 0 ||
1399 			    (pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2) &
1400 			    PMCS_MSGU_CPU_SOFT_RESET_READY) == 0) {
1401 				pwp->state = STATE_DEAD;
1402 				pwp->blocked = 0;
1403 				if (pwp->locks_initted) {
1404 					mutex_exit(&pwp->lock);
1405 				}
1406 				return (-1);
1407 			}
1408 		}
1409 	}
1410 
1411 	/*
1412 	 * Step 2
1413 	 */
1414 	pmcs_wr_gsm_reg(pwp, NMI_EN_VPE0_IOP, 0);
1415 	drv_usecwait(10);
1416 	pmcs_wr_gsm_reg(pwp, NMI_EN_VPE0_AAP1, 0);
1417 	drv_usecwait(10);
1418 	pmcs_wr_topunit(pwp, PMCS_EVENT_INT_ENABLE, 0);
1419 	drv_usecwait(10);
1420 	pmcs_wr_topunit(pwp, PMCS_EVENT_INT_STAT,
1421 	    pmcs_rd_topunit(pwp, PMCS_EVENT_INT_STAT));
1422 	drv_usecwait(10);
1423 	pmcs_wr_topunit(pwp, PMCS_ERROR_INT_ENABLE, 0);
1424 	drv_usecwait(10);
1425 	pmcs_wr_topunit(pwp, PMCS_ERROR_INT_STAT,
1426 	    pmcs_rd_topunit(pwp, PMCS_ERROR_INT_STAT));
1427 	drv_usecwait(10);
1428 
1429 	sfrbits = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
1430 	    PMCS_MSGU_AAP_SFR_PROGRESS;
1431 	sfrbits ^= PMCS_MSGU_AAP_SFR_PROGRESS;
1432 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "PMCS_MSGU_HOST_SCRATCH0 %08x -> %08x",
1433 	    pmcs_rd_msgunit(pwp, PMCS_MSGU_HOST_SCRATCH0), HST_SFT_RESET_SIG);
1434 	pmcs_wr_msgunit(pwp, PMCS_MSGU_HOST_SCRATCH0, HST_SFT_RESET_SIG);
1435 
1436 	/*
1437 	 * Step 3
1438 	 */
1439 	gsm = pmcs_rd_gsm_reg(pwp, GSM_CFG_AND_RESET);
1440 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "GSM %08x -> %08x", gsm,
1441 	    gsm & ~PMCS_SOFT_RESET_BITS);
1442 	pmcs_wr_gsm_reg(pwp, GSM_CFG_AND_RESET, gsm & ~PMCS_SOFT_RESET_BITS);
1443 
1444 	/*
1445 	 * Step 4
1446 	 */
1447 	rapchk = pmcs_rd_gsm_reg(pwp, READ_ADR_PARITY_CHK_EN);
1448 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "READ_ADR_PARITY_CHK_EN %08x -> %08x",
1449 	    rapchk, 0);
1450 	pmcs_wr_gsm_reg(pwp, READ_ADR_PARITY_CHK_EN, 0);
1451 	wapchk = pmcs_rd_gsm_reg(pwp, WRITE_ADR_PARITY_CHK_EN);
1452 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "WRITE_ADR_PARITY_CHK_EN %08x -> %08x",
1453 	    wapchk, 0);
1454 	pmcs_wr_gsm_reg(pwp, WRITE_ADR_PARITY_CHK_EN, 0);
1455 	wdpchk = pmcs_rd_gsm_reg(pwp, WRITE_DATA_PARITY_CHK_EN);
1456 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "WRITE_DATA_PARITY_CHK_EN %08x -> %08x",
1457 	    wdpchk, 0);
1458 	pmcs_wr_gsm_reg(pwp, WRITE_DATA_PARITY_CHK_EN, 0);
1459 
1460 	/*
1461 	 * Step 5
1462 	 */
1463 	drv_usecwait(100);
1464 
1465 	/*
1466 	 * Step 5.5 (Temporary workaround for 1.07.xx Beta)
1467 	 */
1468 	tsmode = pmcs_rd_gsm_reg(pwp, PMCS_GPIO_TRISTATE_MODE_ADDR);
1469 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "GPIO TSMODE %08x -> %08x", tsmode,
1470 	    tsmode & ~(PMCS_GPIO_TSMODE_BIT0|PMCS_GPIO_TSMODE_BIT1));
1471 	pmcs_wr_gsm_reg(pwp, PMCS_GPIO_TRISTATE_MODE_ADDR,
1472 	    tsmode & ~(PMCS_GPIO_TSMODE_BIT0|PMCS_GPIO_TSMODE_BIT1));
1473 	drv_usecwait(10);
1474 
1475 	/*
1476 	 * Step 6
1477 	 */
1478 	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1479 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "SPC_RESET %08x -> %08x", spc,
1480 	    spc & ~(PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1481 	pmcs_wr_topunit(pwp, PMCS_SPC_RESET,
1482 	    spc & ~(PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1483 	drv_usecwait(10);
1484 
1485 	/*
1486 	 * Step 7
1487 	 */
1488 	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1489 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "SPC_RESET %08x -> %08x", spc,
1490 	    spc & ~(BDMA_CORE_RSTB|OSSP_RSTB));
1491 	pmcs_wr_topunit(pwp, PMCS_SPC_RESET, spc & ~(BDMA_CORE_RSTB|OSSP_RSTB));
1492 
1493 	/*
1494 	 * Step 8
1495 	 */
1496 	drv_usecwait(100);
1497 
1498 	/*
1499 	 * Step 9
1500 	 */
1501 	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1502 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "SPC_RESET %08x -> %08x", spc,
1503 	    spc | (BDMA_CORE_RSTB|OSSP_RSTB));
1504 	pmcs_wr_topunit(pwp, PMCS_SPC_RESET, spc | (BDMA_CORE_RSTB|OSSP_RSTB));
1505 
1506 	/*
1507 	 * Step 10
1508 	 */
1509 	drv_usecwait(100);
1510 
1511 	/*
1512 	 * Step 11
1513 	 */
1514 	gsm = pmcs_rd_gsm_reg(pwp, GSM_CFG_AND_RESET);
1515 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "GSM %08x -> %08x", gsm,
1516 	    gsm | PMCS_SOFT_RESET_BITS);
1517 	pmcs_wr_gsm_reg(pwp, GSM_CFG_AND_RESET, gsm | PMCS_SOFT_RESET_BITS);
1518 	drv_usecwait(10);
1519 
1520 	/*
1521 	 * Step 12
1522 	 */
1523 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "READ_ADR_PARITY_CHK_EN %08x -> %08x",
1524 	    pmcs_rd_gsm_reg(pwp, READ_ADR_PARITY_CHK_EN), rapchk);
1525 	pmcs_wr_gsm_reg(pwp, READ_ADR_PARITY_CHK_EN, rapchk);
1526 	drv_usecwait(10);
1527 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "WRITE_ADR_PARITY_CHK_EN %08x -> %08x",
1528 	    pmcs_rd_gsm_reg(pwp, WRITE_ADR_PARITY_CHK_EN), wapchk);
1529 	pmcs_wr_gsm_reg(pwp, WRITE_ADR_PARITY_CHK_EN, wapchk);
1530 	drv_usecwait(10);
1531 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "WRITE_DATA_PARITY_CHK_EN %08x -> %08x",
1532 	    pmcs_rd_gsm_reg(pwp, WRITE_DATA_PARITY_CHK_EN), wapchk);
1533 	pmcs_wr_gsm_reg(pwp, WRITE_DATA_PARITY_CHK_EN, wdpchk);
1534 	drv_usecwait(10);
1535 
1536 	/*
1537 	 * Step 13
1538 	 */
1539 	spc = pmcs_rd_topunit(pwp, PMCS_SPC_RESET);
1540 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "SPC_RESET %08x -> %08x", spc,
1541 	    spc | (PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1542 	pmcs_wr_topunit(pwp, PMCS_SPC_RESET,
1543 	    spc | (PCS_IOP_SS_RSTB|PCS_AAP1_SS_RSTB));
1544 
1545 	/*
1546 	 * Step 14
1547 	 */
1548 	drv_usecwait(100);
1549 
1550 	/*
1551 	 * Step 15
1552 	 */
1553 	for (spc = 0, i = 0; i < 1000; i++) {
1554 		drv_usecwait(1000);
1555 		spc = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1);
1556 		if ((spc & PMCS_MSGU_AAP_SFR_PROGRESS) == sfrbits) {
1557 			break;
1558 		}
1559 	}
1560 
1561 	if ((spc & PMCS_MSGU_AAP_SFR_PROGRESS) != sfrbits) {
1562 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
1563 		    "SFR didn't toggle (sfr 0x%x)", spc);
1564 		pwp->state = STATE_DEAD;
1565 		pwp->blocked = 0;
1566 		if (pwp->locks_initted) {
1567 			mutex_exit(&pwp->lock);
1568 		}
1569 		return (-1);
1570 	}
1571 
1572 	/*
1573 	 * Step 16
1574 	 */
1575 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_MASK, 0xffffffff);
1576 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR, 0xffffffff);
1577 
1578 	/*
1579 	 * Wait for up to 5 seconds for AAP state to come either ready or error.
1580 	 */
1581 	for (i = 0; i < 50; i++) {
1582 		spc = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1) &
1583 		    PMCS_MSGU_AAP_STATE_MASK;
1584 		if (spc == PMCS_MSGU_AAP_STATE_ERROR ||
1585 		    spc == PMCS_MSGU_AAP_STATE_READY) {
1586 			break;
1587 		}
1588 		drv_usecwait(100000);
1589 	}
1590 	spc = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH1);
1591 	if ((spc & PMCS_MSGU_AAP_STATE_MASK) != PMCS_MSGU_AAP_STATE_READY) {
1592 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
1593 		    "soft reset failed (state 0x%x)", spc);
1594 		pwp->state = STATE_DEAD;
1595 		pwp->blocked = 0;
1596 		if (pwp->locks_initted) {
1597 			mutex_exit(&pwp->lock);
1598 		}
1599 		return (-1);
1600 	}
1601 
1602 
1603 	if (pwp->state == STATE_DEAD || pwp->state == STATE_UNPROBING ||
1604 	    pwp->state == STATE_PROBING || pwp->locks_initted == 0) {
1605 		pwp->blocked = 0;
1606 		if (pwp->locks_initted) {
1607 			mutex_exit(&pwp->lock);
1608 		}
1609 		return (0);
1610 	}
1611 
1612 	/*
1613 	 * Return at this point if we dont need to startup.
1614 	 */
1615 	if (no_restart) {
1616 		return (0);
1617 	}
1618 
1619 	ASSERT(pwp->locks_initted != 0);
1620 
1621 	/*
1622 	 * Clean up various soft state.
1623 	 */
1624 	bzero(pwp->ports, sizeof (pwp->ports));
1625 
1626 	pmcs_free_all_phys(pwp, pwp->root_phys);
1627 
1628 	for (pptr = pwp->root_phys; pptr; pptr = pptr->sibling) {
1629 		pmcs_lock_phy(pptr);
1630 		pmcs_clear_phy(pwp, pptr);
1631 		pmcs_unlock_phy(pptr);
1632 	}
1633 
1634 	if (pwp->targets) {
1635 		for (i = 0; i < pwp->max_dev; i++) {
1636 			pmcs_xscsi_t *xp = pwp->targets[i];
1637 
1638 			if (xp == NULL) {
1639 				continue;
1640 			}
1641 			mutex_enter(&xp->statlock);
1642 			if (xp->assigned == 0 && xp->dying == 0) {
1643 				if (xp->new) {
1644 					xp->new = 0;
1645 					xp->ca = 0;
1646 					xp->qdepth = 0;
1647 					xp->phy = NULL;
1648 				}
1649 				mutex_exit(&xp->statlock);
1650 				continue;
1651 			}
1652 			xp->tagmap = 0;
1653 			xp->dying = 1;
1654 			xp->assigned = 0;
1655 			mutex_exit(&xp->statlock);
1656 			SCHEDULE_WORK(pwp, PMCS_WORK_REM_DEVICES);
1657 		}
1658 	}
1659 
1660 	bzero(pwp->shadow_iqpi, sizeof (pwp->shadow_iqpi));
1661 	for (i = 0; i < PMCS_NIQ; i++) {
1662 		if (pwp->iqp[i]) {
1663 			bzero(pwp->iqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
1664 			pmcs_wr_iqpi(pwp, i, 0);
1665 			pmcs_wr_iqci(pwp, i, 0);
1666 		}
1667 	}
1668 	for (i = 0; i < PMCS_NOQ; i++) {
1669 		if (pwp->oqp[i]) {
1670 			bzero(pwp->oqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth);
1671 			pmcs_wr_oqpi(pwp, i, 0);
1672 			pmcs_wr_oqci(pwp, i, 0);
1673 		}
1674 
1675 	}
1676 	if (pwp->fwlogp) {
1677 		bzero(pwp->fwlogp, PMCS_FWLOG_SIZE);
1678 	}
1679 	STAILQ_INIT(&pwp->wf);
1680 	bzero(pwp->work, sizeof (pmcwork_t) * pwp->max_cmd);
1681 	for (i = 0; i < pwp->max_cmd - 1; i++) {
1682 		pmcwork_t *pwrk = &pwp->work[i];
1683 		STAILQ_INSERT_TAIL(&pwp->wf, pwrk, next);
1684 	}
1685 
1686 	/*
1687 	 * Clear out any leftover commands sitting in the work list
1688 	 */
1689 	for (i = 0; i < pwp->max_cmd; i++) {
1690 		pmcwork_t *pwrk = &pwp->work[i];
1691 		mutex_enter(&pwrk->lock);
1692 		if (pwrk->state == PMCS_WORK_STATE_ONCHIP) {
1693 			switch (PMCS_TAG_TYPE(pwrk->htag)) {
1694 			case PMCS_TAG_TYPE_WAIT:
1695 				mutex_exit(&pwrk->lock);
1696 				break;
1697 			case PMCS_TAG_TYPE_CBACK:
1698 			case PMCS_TAG_TYPE_NONE:
1699 				pmcs_pwork(pwp, pwrk);
1700 				break;
1701 			default:
1702 				break;
1703 			}
1704 		} else if (pwrk->state == PMCS_WORK_STATE_IOCOMPQ) {
1705 			pwrk->dead = 1;
1706 			mutex_exit(&pwrk->lock);
1707 		} else {
1708 			/*
1709 			 * The other states of NIL, READY and INTR
1710 			 * should not be visible outside of a lock being held.
1711 			 */
1712 			pmcs_pwork(pwp, pwrk);
1713 		}
1714 	}
1715 
1716 	/*
1717 	 * Restore Interrupt Mask
1718 	 */
1719 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_MASK, pwp->intr_mask);
1720 	pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR, 0xffffffff);
1721 
1722 	pwp->blocked = 0;
1723 	pwp->mpi_table_setup = 0;
1724 	mutex_exit(&pwp->lock);
1725 
1726 	/*
1727 	 * Set up MPI again.
1728 	 */
1729 	if (pmcs_setup(pwp)) {
1730 		msg = "unable to setup MPI tables again";
1731 		goto fail_restart;
1732 	}
1733 	pmcs_report_fwversion(pwp);
1734 
1735 	/*
1736 	 * Restart MPI
1737 	 */
1738 	if (pmcs_start_mpi(pwp)) {
1739 		msg = "unable to restart MPI again";
1740 		goto fail_restart;
1741 	}
1742 
1743 	mutex_enter(&pwp->lock);
1744 	pwp->blocked = 0;
1745 	SCHEDULE_WORK(pwp, PMCS_WORK_RUN_QUEUES);
1746 	mutex_exit(&pwp->lock);
1747 
1748 	/*
1749 	 * Run any completions
1750 	 */
1751 	PMCS_CQ_RUN(pwp);
1752 
1753 	/*
1754 	 * Delay
1755 	 */
1756 	drv_usecwait(1000000);
1757 	return (0);
1758 
1759 fail_restart:
1760 	mutex_enter(&pwp->lock);
1761 	pwp->state = STATE_DEAD;
1762 	mutex_exit(&pwp->lock);
1763 	pmcs_prt(pwp, PMCS_PRT_ERR, "%s: Failed: %s", __func__, msg);
1764 	return (-1);
1765 }
1766 
1767 /*
1768  * Reset a device or a logical unit.
1769  */
1770 int
1771 pmcs_reset_dev(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint64_t lun)
1772 {
1773 	int rval = 0;
1774 
1775 	if (pptr == NULL) {
1776 		return (ENXIO);
1777 	}
1778 
1779 	pmcs_lock_phy(pptr);
1780 	if (pptr->dtype == SAS) {
1781 		/*
1782 		 * Some devices do not support SAS_I_T_NEXUS_RESET as
1783 		 * it is not a mandatory (in SAM4) task management
1784 		 * function, while LOGIC_UNIT_RESET is mandatory.
1785 		 *
1786 		 * The problem here is that we need to iterate over
1787 		 * all known LUNs to emulate the semantics of
1788 		 * "RESET_TARGET".
1789 		 *
1790 		 * XXX: FIX ME
1791 		 */
1792 		if (lun == (uint64_t)-1) {
1793 			lun = 0;
1794 		}
1795 		rval = pmcs_ssp_tmf(pwp, pptr, SAS_LOGICAL_UNIT_RESET, 0, lun,
1796 		    NULL);
1797 	} else if (pptr->dtype == SATA) {
1798 		if (lun != 0ull) {
1799 			pmcs_unlock_phy(pptr);
1800 			return (EINVAL);
1801 		}
1802 		rval = pmcs_reset_phy(pwp, pptr, PMCS_PHYOP_LINK_RESET);
1803 	} else {
1804 		pmcs_unlock_phy(pptr);
1805 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
1806 		    "%s: cannot reset a SMP device yet (%s)",
1807 		    __func__, pptr->path);
1808 		return (EINVAL);
1809 	}
1810 
1811 	/*
1812 	 * Now harvest any commands killed by this action
1813 	 * by issuing an ABORT for all commands on this device.
1814 	 *
1815 	 * We do this even if the the tmf or reset fails (in case there
1816 	 * are any dead commands around to be harvested *anyway*).
1817 	 * We don't have to await for the abort to complete.
1818 	 */
1819 	if (pmcs_abort(pwp, pptr, 0, 1, 0)) {
1820 		pptr->abort_pending = 1;
1821 		SCHEDULE_WORK(pwp, PMCS_WORK_ABORT_HANDLE);
1822 	}
1823 
1824 	pmcs_unlock_phy(pptr);
1825 	return (rval);
1826 }
1827 
1828 /*
1829  * Called with PHY locked.
1830  */
1831 static int
1832 pmcs_get_device_handle(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
1833 {
1834 	if (pptr->valid_device_id == 0) {
1835 		int result = pmcs_register_device(pwp, pptr);
1836 
1837 		/*
1838 		 * If we changed while registering, punt
1839 		 */
1840 		if (pptr->changed) {
1841 			RESTART_DISCOVERY(pwp);
1842 			return (-1);
1843 		}
1844 
1845 		/*
1846 		 * If we had a failure to register, check against errors.
1847 		 * An ENOMEM error means we just retry (temp resource shortage).
1848 		 */
1849 		if (result == ENOMEM) {
1850 			PHY_CHANGED(pwp, pptr);
1851 			RESTART_DISCOVERY(pwp);
1852 			return (-1);
1853 		}
1854 
1855 		/*
1856 		 * An ETIMEDOUT error means we retry (if our counter isn't
1857 		 * exhausted)
1858 		 */
1859 		if (result == ETIMEDOUT) {
1860 			if (ddi_get_lbolt() < pptr->config_stop) {
1861 				PHY_CHANGED(pwp, pptr);
1862 				RESTART_DISCOVERY(pwp);
1863 			} else {
1864 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
1865 				    "%s: Retries exhausted for %s, killing",
1866 				    __func__, pptr->path);
1867 				pptr->config_stop = 0;
1868 				pmcs_kill_changed(pwp, pptr, 0);
1869 			}
1870 			return (-1);
1871 		}
1872 		/*
1873 		 * Other errors or no valid device id is fatal, but don't
1874 		 * preclude a future action.
1875 		 */
1876 		if (result || pptr->valid_device_id == 0) {
1877 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: %s could not "
1878 			    "be registered", __func__,  pptr->path);
1879 			return (-1);
1880 		}
1881 	}
1882 	return (0);
1883 }
1884 
1885 int
1886 pmcs_iport_tgtmap_create(pmcs_iport_t *iport)
1887 {
1888 	ASSERT(iport);
1889 	if (iport == NULL)
1890 		return (B_FALSE);
1891 
1892 	pmcs_prt(iport->pwp, PMCS_PRT_DEBUG_MAP, "%s", __func__);
1893 
1894 	/* create target map */
1895 	if (scsi_hba_tgtmap_create(iport->dip, SCSI_TM_FULLSET, tgtmap_usec,
1896 	    2048, NULL, NULL, NULL, &iport->iss_tgtmap) != DDI_SUCCESS) {
1897 		pmcs_prt(iport->pwp, PMCS_PRT_DEBUG,
1898 		    "%s: failed to create tgtmap", __func__);
1899 		return (B_FALSE);
1900 	}
1901 	return (B_TRUE);
1902 }
1903 
1904 int
1905 pmcs_iport_tgtmap_destroy(pmcs_iport_t *iport)
1906 {
1907 	ASSERT(iport && iport->iss_tgtmap);
1908 	if ((iport == NULL) || (iport->iss_tgtmap == NULL))
1909 		return (B_FALSE);
1910 
1911 	pmcs_prt(iport->pwp, PMCS_PRT_DEBUG_MAP, "%s", __func__);
1912 
1913 	/* destroy target map */
1914 	scsi_hba_tgtmap_destroy(iport->iss_tgtmap);
1915 	return (B_TRUE);
1916 }
1917 
1918 /*
1919  * Query the phymap and populate the iport handle passed in.
1920  * Called with iport lock held.
1921  */
1922 int
1923 pmcs_iport_configure_phys(pmcs_iport_t *iport)
1924 {
1925 	pmcs_hw_t		*pwp;
1926 	pmcs_phy_t		*pptr;
1927 	sas_phymap_phys_t	*phys;
1928 	int			phynum;
1929 	int			inst;
1930 
1931 	ASSERT(iport);
1932 	ASSERT(mutex_owned(&iport->lock));
1933 	pwp = iport->pwp;
1934 	ASSERT(pwp);
1935 	inst = ddi_get_instance(iport->dip);
1936 
1937 	mutex_enter(&pwp->lock);
1938 	ASSERT(pwp->root_phys != NULL);
1939 
1940 	/*
1941 	 * Query the phymap regarding the phys in this iport and populate
1942 	 * the iport's phys list. Hereafter this list is maintained via
1943 	 * port up and down events in pmcs_intr.c
1944 	 */
1945 	ASSERT(list_is_empty(&iport->phys));
1946 	phys = sas_phymap_ua2phys(pwp->hss_phymap, iport->ua);
1947 	while ((phynum = sas_phymap_phys_next(phys)) != -1) {
1948 		/* Grab the phy pointer from root_phys */
1949 		pptr = pwp->root_phys + phynum;
1950 		ASSERT(pptr);
1951 		pmcs_lock_phy(pptr);
1952 		ASSERT(pptr->phynum == phynum);
1953 
1954 		/*
1955 		 * Set a back pointer in the phy to this iport.
1956 		 */
1957 		pptr->iport = iport;
1958 
1959 		/*
1960 		 * If this phy is the primary, set a pointer to it on our
1961 		 * iport handle, and set our portid from it.
1962 		 */
1963 		if (!pptr->subsidiary) {
1964 			iport->pptr = pptr;
1965 			iport->portid = pptr->portid;
1966 		}
1967 
1968 		/*
1969 		 * Finally, insert the phy into our list
1970 		 */
1971 		pmcs_add_phy_to_iport(iport, pptr);
1972 		pmcs_unlock_phy(pptr);
1973 
1974 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: found phy %d [0x%p] "
1975 		    "on iport%d, refcnt(%d)", __func__, phynum,
1976 		    (void *)pptr, inst, iport->refcnt);
1977 	}
1978 	mutex_exit(&pwp->lock);
1979 	sas_phymap_phys_free(phys);
1980 	RESTART_DISCOVERY(pwp);
1981 	return (DDI_SUCCESS);
1982 }
1983 
1984 /*
1985  * Return the iport that ua is associated with, or NULL.  If an iport is
1986  * returned, it will be held and the caller must release the hold.
1987  */
1988 static pmcs_iport_t *
1989 pmcs_get_iport_by_ua(pmcs_hw_t *pwp, char *ua)
1990 {
1991 	pmcs_iport_t	*iport = NULL;
1992 
1993 	rw_enter(&pwp->iports_lock, RW_READER);
1994 	for (iport = list_head(&pwp->iports);
1995 	    iport != NULL;
1996 	    iport = list_next(&pwp->iports, iport)) {
1997 		mutex_enter(&iport->lock);
1998 		if (strcmp(iport->ua, ua) == 0) {
1999 			mutex_exit(&iport->lock);
2000 			mutex_enter(&iport->refcnt_lock);
2001 			iport->refcnt++;
2002 			mutex_exit(&iport->refcnt_lock);
2003 			break;
2004 		}
2005 		mutex_exit(&iport->lock);
2006 	}
2007 	rw_exit(&pwp->iports_lock);
2008 
2009 	return (iport);
2010 }
2011 
2012 /*
2013  * Return the iport that pptr is associated with, or NULL.
2014  * If an iport is returned, there is a hold that the caller must release.
2015  */
2016 pmcs_iport_t *
2017 pmcs_get_iport_by_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2018 {
2019 	pmcs_iport_t	*iport = NULL;
2020 	char		*ua;
2021 
2022 	ua = sas_phymap_lookup_ua(pwp->hss_phymap, pwp->sas_wwns[0],
2023 	    pmcs_barray2wwn(pptr->sas_address));
2024 	if (ua) {
2025 		iport = pmcs_get_iport_by_ua(pwp, ua);
2026 		if (iport) {
2027 			mutex_enter(&iport->lock);
2028 			iport->ua_state = UA_ACTIVE;
2029 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: "
2030 			    "found iport [0x%p] on ua (%s) for phy [0x%p], "
2031 			    "refcnt (%d)", __func__, (void *)iport, ua,
2032 			    (void *)pptr, iport->refcnt);
2033 			mutex_exit(&iport->lock);
2034 		}
2035 	}
2036 
2037 	return (iport);
2038 }
2039 
2040 void
2041 pmcs_rele_iport(pmcs_iport_t *iport)
2042 {
2043 	/*
2044 	 * Release a refcnt on this iport. If this is the last reference,
2045 	 * signal the potential waiter in pmcs_iport_unattach().
2046 	 */
2047 	ASSERT(iport->refcnt > 0);
2048 	mutex_enter(&iport->refcnt_lock);
2049 	iport->refcnt--;
2050 	mutex_exit(&iport->refcnt_lock);
2051 	if (iport->refcnt == 0) {
2052 		cv_signal(&iport->refcnt_cv);
2053 	}
2054 	pmcs_prt(iport->pwp, PMCS_PRT_DEBUG_CONFIG, "%s: iport [0x%p] "
2055 	    "refcnt (%d)", __func__, (void *)iport, iport->refcnt);
2056 }
2057 
2058 void
2059 pmcs_phymap_activate(void *arg, char *ua, void **privp)
2060 {
2061 	_NOTE(ARGUNUSED(privp));
2062 	pmcs_hw_t	*pwp = arg;
2063 	pmcs_iport_t	*iport = NULL;
2064 
2065 	mutex_enter(&pwp->lock);
2066 	if ((pwp->state == STATE_UNPROBING) || (pwp->state == STATE_DEAD)) {
2067 		mutex_exit(&pwp->lock);
2068 		return;
2069 	}
2070 	pwp->phymap_active++;
2071 	mutex_exit(&pwp->lock);
2072 
2073 	if (scsi_hba_iportmap_iport_add(pwp->hss_iportmap, ua, NULL) !=
2074 	    DDI_SUCCESS) {
2075 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, "%s: failed to add "
2076 		    "iport handle on unit address [%s]", __func__, ua);
2077 	} else {
2078 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, "%s: phymap_active count "
2079 		    "(%d), added iport handle on unit address [%s]", __func__,
2080 		    pwp->phymap_active, ua);
2081 	}
2082 
2083 	/* Set the HBA softstate as our private data for this unit address */
2084 	*privp = (void *)pwp;
2085 
2086 	/*
2087 	 * We are waiting on attach for this iport node, unless it is still
2088 	 * attached. This can happen if a consumer has an outstanding open
2089 	 * on our iport node, but the port is down.  If this is the case, we
2090 	 * need to configure our iport here for reuse.
2091 	 */
2092 	iport = pmcs_get_iport_by_ua(pwp, ua);
2093 	if (iport) {
2094 		mutex_enter(&iport->lock);
2095 		if (pmcs_iport_configure_phys(iport) != DDI_SUCCESS) {
2096 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: "
2097 			    "failed to configure phys on iport [0x%p] at "
2098 			    "unit address (%s)", __func__, (void *)iport, ua);
2099 		}
2100 		iport->ua_state = UA_ACTIVE;
2101 		pmcs_smhba_add_iport_prop(iport, DATA_TYPE_INT32, PMCS_NUM_PHYS,
2102 		    &iport->nphy);
2103 		mutex_exit(&iport->lock);
2104 		pmcs_rele_iport(iport);
2105 	}
2106 
2107 }
2108 
2109 void
2110 pmcs_phymap_deactivate(void *arg, char *ua, void *privp)
2111 {
2112 	_NOTE(ARGUNUSED(privp));
2113 	pmcs_hw_t	*pwp = arg;
2114 	pmcs_iport_t	*iport;
2115 
2116 	mutex_enter(&pwp->lock);
2117 	pwp->phymap_active--;
2118 	mutex_exit(&pwp->lock);
2119 
2120 	if (scsi_hba_iportmap_iport_remove(pwp->hss_iportmap, ua) !=
2121 	    DDI_SUCCESS) {
2122 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, "%s: failed to remove "
2123 		    "iport handle on unit address [%s]", __func__, ua);
2124 	} else {
2125 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP, "%s: phymap_active "
2126 		    "count (%d), removed iport handle on unit address [%s]",
2127 		    __func__, pwp->phymap_active, ua);
2128 	}
2129 
2130 	iport = pmcs_get_iport_by_ua(pwp, ua);
2131 
2132 	if (iport == NULL) {
2133 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: failed lookup of "
2134 		    "iport handle on unit address (%s)", __func__, ua);
2135 		return;
2136 	}
2137 
2138 	mutex_enter(&iport->lock);
2139 	iport->ua_state = UA_INACTIVE;
2140 	iport->portid = PMCS_IPORT_INVALID_PORT_ID;
2141 	pmcs_remove_phy_from_iport(iport, NULL);
2142 	mutex_exit(&iport->lock);
2143 	pmcs_rele_iport(iport);
2144 }
2145 
2146 /*
2147  * Top-level discovery function
2148  */
2149 void
2150 pmcs_discover(pmcs_hw_t *pwp)
2151 {
2152 	pmcs_phy_t		*pptr;
2153 	pmcs_phy_t		*root_phy;
2154 
2155 	DTRACE_PROBE2(pmcs__discover__entry, ulong_t, pwp->work_flags,
2156 	    boolean_t, pwp->config_changed);
2157 
2158 	mutex_enter(&pwp->lock);
2159 
2160 	if (pwp->state != STATE_RUNNING) {
2161 		mutex_exit(&pwp->lock);
2162 		return;
2163 	}
2164 
2165 	/* Ensure we have at least one phymap active */
2166 	if (pwp->phymap_active == 0) {
2167 		mutex_exit(&pwp->lock);
2168 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2169 		    "%s: phymap inactive, exiting", __func__);
2170 		return;
2171 	}
2172 
2173 	mutex_exit(&pwp->lock);
2174 
2175 	/*
2176 	 * If no iports have attached, but we have PHYs that are up, we
2177 	 * are waiting for iport attach to complete.  Restart discovery.
2178 	 */
2179 	rw_enter(&pwp->iports_lock, RW_READER);
2180 	if (!pwp->iports_attached) {
2181 		rw_exit(&pwp->iports_lock);
2182 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2183 		    "%s: no iports attached, retry discovery", __func__);
2184 		SCHEDULE_WORK(pwp, PMCS_WORK_DISCOVER);
2185 		return;
2186 	}
2187 	rw_exit(&pwp->iports_lock);
2188 
2189 	mutex_enter(&pwp->config_lock);
2190 	if (pwp->configuring) {
2191 		mutex_exit(&pwp->config_lock);
2192 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2193 		    "%s: configuration already in progress", __func__);
2194 		return;
2195 	}
2196 
2197 	if (pmcs_acquire_scratch(pwp, B_FALSE)) {
2198 		mutex_exit(&pwp->config_lock);
2199 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2200 		    "%s: cannot allocate scratch", __func__);
2201 		SCHEDULE_WORK(pwp, PMCS_WORK_DISCOVER);
2202 		return;
2203 	}
2204 
2205 	pwp->configuring = 1;
2206 	pwp->config_changed = B_FALSE;
2207 	mutex_exit(&pwp->config_lock);
2208 
2209 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "Discovery begin");
2210 
2211 	/*
2212 	 * The order of the following traversals is important.
2213 	 *
2214 	 * The first one checks for changed expanders.
2215 	 *
2216 	 * The second one aborts commands for dead devices and deregisters them.
2217 	 *
2218 	 * The third one clears the contents of dead expanders from the tree
2219 	 *
2220 	 * The fourth one clears now dead devices in expanders that remain.
2221 	 */
2222 
2223 	/*
2224 	 * 1. Check expanders marked changed (but not dead) to see if they still
2225 	 * have the same number of phys and the same SAS address. Mark them,
2226 	 * their subsidiary phys (if wide) and their descendents dead if
2227 	 * anything has changed. Check the devices they contain to see if
2228 	 * *they* have changed. If they've changed from type NOTHING we leave
2229 	 * them marked changed to be configured later (picking up a new SAS
2230 	 * address and link rate if possible). Otherwise, any change in type,
2231 	 * SAS address or removal of target role will cause us to mark them
2232 	 * (and their descendents) as dead (and cause any pending commands
2233 	 * and associated devices to be removed).
2234 	 */
2235 	root_phy = pwp->root_phys;
2236 	if (pmcs_check_expanders(pwp, root_phy) == B_TRUE) {
2237 		goto out;
2238 	}
2239 
2240 	/*
2241 	 * 2. Descend the tree looking for dead devices and kill them
2242 	 * by aborting all active commands and then deregistering them.
2243 	 */
2244 	if (pmcs_kill_devices(pwp, root_phy)) {
2245 		goto out;
2246 	}
2247 
2248 	/*
2249 	 * 3. Check for dead expanders and remove their children from the tree.
2250 	 * By the time we get here, the devices and commands for them have
2251 	 * already been terminated and removed.
2252 	 *
2253 	 * We do this independent of the configuration count changing so we can
2254 	 * free any dead device PHYs that were discovered while checking
2255 	 * expanders. We ignore any subsidiary phys as pmcs_clear_expander
2256 	 * will take care of those.
2257 	 *
2258 	 * NOTE: pmcs_clear_expander requires softstate lock
2259 	 */
2260 	mutex_enter(&pwp->lock);
2261 	for (pptr = pwp->root_phys; pptr; pptr = pptr->sibling) {
2262 		/*
2263 		 * Call pmcs_clear_expander for every root PHY.  It will
2264 		 * recurse and determine which (if any) expanders actually
2265 		 * need to be cleared.
2266 		 */
2267 		pmcs_lock_phy(pptr);
2268 		pmcs_clear_expander(pwp, pptr, 0);
2269 		pmcs_unlock_phy(pptr);
2270 	}
2271 	mutex_exit(&pwp->lock);
2272 
2273 	/*
2274 	 * 4. Check for dead devices and nullify them. By the time we get here,
2275 	 * the devices and commands for them have already been terminated
2276 	 * and removed. This is different from step 2 in that this just nulls
2277 	 * phys that are part of expanders that are still here but used to
2278 	 * be something but are no longer something (e.g., after a pulled
2279 	 * disk drive). Note that dead expanders had their contained phys
2280 	 * removed from the tree- here, the expanders themselves are
2281 	 * nullified (unless they were removed by being contained in another
2282 	 * expander phy).
2283 	 */
2284 	pmcs_clear_phys(pwp, root_phy);
2285 
2286 	/*
2287 	 * 5. Now check for and configure new devices.
2288 	 */
2289 	if (pmcs_configure_new_devices(pwp, root_phy)) {
2290 		goto restart;
2291 	}
2292 
2293 out:
2294 	DTRACE_PROBE2(pmcs__discover__exit, ulong_t, pwp->work_flags,
2295 	    boolean_t, pwp->config_changed);
2296 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "Discovery end");
2297 
2298 	mutex_enter(&pwp->config_lock);
2299 
2300 	if (pwp->config_changed == B_FALSE) {
2301 		/*
2302 		 * Observation is stable, report what we currently see to
2303 		 * the tgtmaps for delta processing. Start by setting
2304 		 * BEGIN on all tgtmaps.
2305 		 */
2306 		mutex_exit(&pwp->config_lock);
2307 		if (pmcs_report_observations(pwp) == B_FALSE) {
2308 			goto restart;
2309 		}
2310 		mutex_enter(&pwp->config_lock);
2311 	} else {
2312 		/*
2313 		 * If config_changed is TRUE, we need to reschedule
2314 		 * discovery now.
2315 		 */
2316 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2317 		    "%s: Config has changed, will re-run discovery", __func__);
2318 		SCHEDULE_WORK(pwp, PMCS_WORK_DISCOVER);
2319 	}
2320 
2321 	pmcs_release_scratch(pwp);
2322 	pwp->configuring = 0;
2323 	mutex_exit(&pwp->config_lock);
2324 
2325 #ifdef DEBUG
2326 	pptr = pmcs_find_phy_needing_work(pwp, pwp->root_phys);
2327 	if (pptr != NULL) {
2328 		if (!WORK_IS_SCHEDULED(pwp, PMCS_WORK_DISCOVER)) {
2329 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
2330 			    "PHY %s dead=%d changed=%d configured=%d "
2331 			    "but no work scheduled", pptr->path, pptr->dead,
2332 			    pptr->changed, pptr->configured);
2333 		}
2334 		pmcs_unlock_phy(pptr);
2335 	}
2336 #endif
2337 
2338 	return;
2339 
2340 restart:
2341 	/* Clean up and restart discovery */
2342 	pmcs_release_scratch(pwp);
2343 	mutex_enter(&pwp->config_lock);
2344 	pwp->configuring = 0;
2345 	RESTART_DISCOVERY_LOCKED(pwp);
2346 	mutex_exit(&pwp->config_lock);
2347 }
2348 
2349 /*
2350  * Return any PHY that needs to have scheduled work done.  The PHY is returned
2351  * locked.
2352  */
2353 static pmcs_phy_t *
2354 pmcs_find_phy_needing_work(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2355 {
2356 	pmcs_phy_t *cphyp, *pnext;
2357 
2358 	while (pptr) {
2359 		pmcs_lock_phy(pptr);
2360 
2361 		if (pptr->changed || (pptr->dead && pptr->valid_device_id)) {
2362 			return (pptr);
2363 		}
2364 
2365 		pnext = pptr->sibling;
2366 
2367 		if (pptr->children) {
2368 			cphyp = pptr->children;
2369 			pmcs_unlock_phy(pptr);
2370 			cphyp = pmcs_find_phy_needing_work(pwp, cphyp);
2371 			if (cphyp) {
2372 				return (cphyp);
2373 			}
2374 		} else {
2375 			pmcs_unlock_phy(pptr);
2376 		}
2377 
2378 		pptr = pnext;
2379 	}
2380 
2381 	return (NULL);
2382 }
2383 
2384 /*
2385  * Report current observations to SCSA.
2386  */
2387 static boolean_t
2388 pmcs_report_observations(pmcs_hw_t *pwp)
2389 {
2390 	pmcs_iport_t		*iport;
2391 	scsi_hba_tgtmap_t	*tgtmap;
2392 	char			*ap;
2393 	pmcs_phy_t		*pptr;
2394 	uint64_t		wwn;
2395 
2396 	/*
2397 	 * Observation is stable, report what we currently see to the tgtmaps
2398 	 * for delta processing. Start by setting BEGIN on all tgtmaps.
2399 	 */
2400 	rw_enter(&pwp->iports_lock, RW_READER);
2401 	for (iport = list_head(&pwp->iports); iport != NULL;
2402 	    iport = list_next(&pwp->iports, iport)) {
2403 		/*
2404 		 * Unless we have at least one phy up, skip this iport.
2405 		 * Note we don't need to lock the iport for report_skip
2406 		 * since it is only used here.  We are doing the skip so that
2407 		 * the phymap and iportmap stabilization times are honored -
2408 		 * giving us the ability to recover port operation within the
2409 		 * stabilization time without unconfiguring targets using the
2410 		 * port.
2411 		 */
2412 		if (!sas_phymap_uahasphys(pwp->hss_phymap, iport->ua)) {
2413 			iport->report_skip = 1;
2414 			continue;		/* skip set_begin */
2415 		}
2416 		iport->report_skip = 0;
2417 
2418 		tgtmap = iport->iss_tgtmap;
2419 		ASSERT(tgtmap);
2420 		if (scsi_hba_tgtmap_set_begin(tgtmap) != DDI_SUCCESS) {
2421 			pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,
2422 			    "%s: cannot set_begin tgtmap ", __func__);
2423 			rw_exit(&pwp->iports_lock);
2424 			return (B_FALSE);
2425 		}
2426 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,
2427 		    "%s: set begin on tgtmap [0x%p]", __func__,
2428 		    (void *)tgtmap);
2429 	}
2430 	rw_exit(&pwp->iports_lock);
2431 
2432 	/*
2433 	 * Now, cycle through all levels of all phys and report
2434 	 * observations into their respective tgtmaps.
2435 	 */
2436 	pptr = pwp->root_phys;
2437 
2438 	while (pptr) {
2439 		pmcs_lock_phy(pptr);
2440 
2441 		/*
2442 		 * Skip PHYs that have nothing attached or are dead.
2443 		 */
2444 		if ((pptr->dtype == NOTHING) || pptr->dead) {
2445 			pmcs_unlock_phy(pptr);
2446 			pptr = pptr->sibling;
2447 			continue;
2448 		}
2449 
2450 		if (pptr->changed) {
2451 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2452 			    "%s: oops, PHY %s changed; restart discovery",
2453 			    __func__, pptr->path);
2454 			pmcs_unlock_phy(pptr);
2455 			return (B_FALSE);
2456 		}
2457 
2458 		/*
2459 		 * Get the iport for this root PHY, then call the helper
2460 		 * to report observations for this iport's targets
2461 		 */
2462 		iport = pmcs_get_iport_by_phy(pwp, pptr);
2463 		if (iport == NULL) {
2464 			/* No iport for this tgt */
2465 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2466 			    "%s: no iport for this target",
2467 			    __func__);
2468 			pmcs_unlock_phy(pptr);
2469 			pptr = pptr->sibling;
2470 			continue;
2471 		}
2472 
2473 		if (!iport->report_skip) {
2474 			if (pmcs_report_iport_observations(
2475 			    pwp, iport, pptr) == B_FALSE) {
2476 				pmcs_rele_iport(iport);
2477 				pmcs_unlock_phy(pptr);
2478 				return (B_FALSE);
2479 			}
2480 		}
2481 		pmcs_rele_iport(iport);
2482 		pmcs_unlock_phy(pptr);
2483 		pptr = pptr->sibling;
2484 	}
2485 
2486 	/*
2487 	 * The observation is complete, end sets. Note we will skip any
2488 	 * iports that are active, but have no PHYs in them (i.e. awaiting
2489 	 * unconfigure). Set to restart discovery if we find this.
2490 	 */
2491 	rw_enter(&pwp->iports_lock, RW_READER);
2492 	for (iport = list_head(&pwp->iports);
2493 	    iport != NULL;
2494 	    iport = list_next(&pwp->iports, iport)) {
2495 
2496 		if (iport->report_skip)
2497 			continue;		/* skip set_end */
2498 
2499 		tgtmap = iport->iss_tgtmap;
2500 		ASSERT(tgtmap);
2501 		if (scsi_hba_tgtmap_set_end(tgtmap, 0) != DDI_SUCCESS) {
2502 			pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,
2503 			    "%s: cannot set_end tgtmap ", __func__);
2504 			rw_exit(&pwp->iports_lock);
2505 			return (B_FALSE);
2506 		}
2507 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,
2508 		    "%s: set end on tgtmap [0x%p]", __func__,
2509 		    (void *)tgtmap);
2510 	}
2511 
2512 	/*
2513 	 * Now that discovery is complete, set up the necessary
2514 	 * DDI properties on each iport node.
2515 	 */
2516 	for (iport = list_head(&pwp->iports); iport != NULL;
2517 	    iport = list_next(&pwp->iports, iport)) {
2518 		/* Set up the DDI properties on each phy */
2519 		pmcs_smhba_set_phy_props(iport);
2520 
2521 		/* Set up the 'attached-port' property on the iport */
2522 		ap = kmem_zalloc(PMCS_MAX_UA_SIZE, KM_SLEEP);
2523 		mutex_enter(&iport->lock);
2524 		pptr = iport->pptr;
2525 		mutex_exit(&iport->lock);
2526 		if (pptr == NULL) {
2527 			/*
2528 			 * This iport is down, but has not been
2529 			 * removed from our list (unconfigured).
2530 			 * Set our value to '0'.
2531 			 */
2532 			(void) snprintf(ap, 1, "%s", "0");
2533 		} else {
2534 			/* Otherwise, set it to remote phy's wwn */
2535 			pmcs_lock_phy(pptr);
2536 			wwn = pmcs_barray2wwn(pptr->sas_address);
2537 			(void) scsi_wwn_to_wwnstr(wwn, 1, ap);
2538 			pmcs_unlock_phy(pptr);
2539 		}
2540 		if (ndi_prop_update_string(DDI_DEV_T_NONE, iport->dip,
2541 		    SCSI_ADDR_PROP_ATTACHED_PORT,  ap) != DDI_SUCCESS) {
2542 			pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Failed to "
2543 			    "set prop ("SCSI_ADDR_PROP_ATTACHED_PORT")",
2544 			    __func__);
2545 		}
2546 		kmem_free(ap, PMCS_MAX_UA_SIZE);
2547 	}
2548 	rw_exit(&pwp->iports_lock);
2549 
2550 	return (B_TRUE);
2551 }
2552 
2553 /*
2554  * Report observations into a particular iport's target map
2555  *
2556  * Called with phyp (and all descendents) locked
2557  */
2558 static boolean_t
2559 pmcs_report_iport_observations(pmcs_hw_t *pwp, pmcs_iport_t *iport,
2560     pmcs_phy_t *phyp)
2561 {
2562 	pmcs_phy_t		*lphyp;
2563 	scsi_hba_tgtmap_t	*tgtmap;
2564 	scsi_tgtmap_tgt_type_t	tgt_type;
2565 	char			*ua;
2566 	uint64_t		wwn;
2567 
2568 	tgtmap = iport->iss_tgtmap;
2569 	ASSERT(tgtmap);
2570 
2571 	lphyp = phyp;
2572 	while (lphyp) {
2573 		switch (lphyp->dtype) {
2574 		default:		/* Skip unknown PHYs. */
2575 			/* for non-root phys, skip to sibling */
2576 			goto next_phy;
2577 
2578 		case SATA:
2579 		case SAS:
2580 			tgt_type = SCSI_TGT_SCSI_DEVICE;
2581 			break;
2582 
2583 		case EXPANDER:
2584 			tgt_type = SCSI_TGT_SMP_DEVICE;
2585 			break;
2586 		}
2587 
2588 		if (lphyp->dead) {
2589 			goto next_phy;
2590 		}
2591 
2592 		wwn = pmcs_barray2wwn(lphyp->sas_address);
2593 		ua = scsi_wwn_to_wwnstr(wwn, 1, NULL);
2594 
2595 		pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,
2596 		    "iport_observation: adding %s on tgtmap [0x%p] phy [0x%p]",
2597 		    ua, (void *)tgtmap, (void*)lphyp);
2598 
2599 		if (scsi_hba_tgtmap_set_add(tgtmap, tgt_type, ua, NULL) !=
2600 		    DDI_SUCCESS) {
2601 			pmcs_prt(pwp, PMCS_PRT_DEBUG_MAP,
2602 			    "%s: failed to add address %s", __func__, ua);
2603 			scsi_free_wwnstr(ua);
2604 			return (B_FALSE);
2605 		}
2606 		scsi_free_wwnstr(ua);
2607 
2608 		if (lphyp->children) {
2609 			if (pmcs_report_iport_observations(pwp, iport,
2610 			    lphyp->children) == B_FALSE) {
2611 				return (B_FALSE);
2612 			}
2613 		}
2614 
2615 		/* for non-root phys, report siblings too */
2616 next_phy:
2617 		if (IS_ROOT_PHY(lphyp)) {
2618 			lphyp = NULL;
2619 		} else {
2620 			lphyp = lphyp->sibling;
2621 		}
2622 	}
2623 
2624 	return (B_TRUE);
2625 }
2626 
2627 /*
2628  * Check for and configure new devices.
2629  *
2630  * If the changed device is a SATA device, add a SATA device.
2631  *
2632  * If the changed device is a SAS device, add a SAS device.
2633  *
2634  * If the changed device is an EXPANDER device, do a REPORT
2635  * GENERAL SMP command to find out the number of contained phys.
2636  *
2637  * For each number of contained phys, allocate a phy, do a
2638  * DISCOVERY SMP command to find out what kind of device it
2639  * is and add it to the linked list of phys on the *next* level.
2640  *
2641  * NOTE: pptr passed in by the caller will be a root PHY
2642  */
2643 static int
2644 pmcs_configure_new_devices(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2645 {
2646 	int rval = 0;
2647 	pmcs_iport_t *iport;
2648 	pmcs_phy_t *pnext, *orig_pptr = pptr, *root_phy, *pchild;
2649 
2650 	/*
2651 	 * First, walk through each PHY at this level
2652 	 */
2653 	while (pptr) {
2654 		pmcs_lock_phy(pptr);
2655 		pnext = pptr->sibling;
2656 
2657 		/*
2658 		 * Set the new dtype if it has changed
2659 		 */
2660 		if ((pptr->pend_dtype != NEW) &&
2661 		    (pptr->pend_dtype != pptr->dtype)) {
2662 			pptr->dtype = pptr->pend_dtype;
2663 		}
2664 
2665 		if (pptr->changed == 0 || pptr->dead || pptr->configured) {
2666 			goto next_phy;
2667 		}
2668 
2669 		/*
2670 		 * Confirm that this target's iport is configured
2671 		 */
2672 		root_phy = pmcs_get_root_phy(pptr);
2673 		iport = pmcs_get_iport_by_phy(pwp, root_phy);
2674 		if (iport == NULL) {
2675 			/* No iport for this tgt, restart */
2676 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2677 			    "%s: iport not yet configured, "
2678 			    "retry discovery", __func__);
2679 			pnext = NULL;
2680 			rval = -1;
2681 			goto next_phy;
2682 		}
2683 
2684 		switch (pptr->dtype) {
2685 		case NOTHING:
2686 			pptr->changed = 0;
2687 			break;
2688 		case SATA:
2689 		case SAS:
2690 			pptr->iport = iport;
2691 			pmcs_new_tport(pwp, pptr);
2692 			break;
2693 		case EXPANDER:
2694 			pmcs_configure_expander(pwp, pptr, iport);
2695 			break;
2696 		}
2697 		pmcs_rele_iport(iport);
2698 
2699 		mutex_enter(&pwp->config_lock);
2700 		if (pwp->config_changed) {
2701 			mutex_exit(&pwp->config_lock);
2702 			pnext = NULL;
2703 			goto next_phy;
2704 		}
2705 		mutex_exit(&pwp->config_lock);
2706 
2707 next_phy:
2708 		pmcs_unlock_phy(pptr);
2709 		pptr = pnext;
2710 	}
2711 
2712 	if (rval != 0) {
2713 		return (rval);
2714 	}
2715 
2716 	/*
2717 	 * Now walk through each PHY again, recalling ourselves if they
2718 	 * have children
2719 	 */
2720 	pptr = orig_pptr;
2721 	while (pptr) {
2722 		pmcs_lock_phy(pptr);
2723 		pnext = pptr->sibling;
2724 		pchild = pptr->children;
2725 		pmcs_unlock_phy(pptr);
2726 
2727 		if (pchild) {
2728 			rval = pmcs_configure_new_devices(pwp, pchild);
2729 			if (rval != 0) {
2730 				break;
2731 			}
2732 		}
2733 
2734 		pptr = pnext;
2735 	}
2736 
2737 	return (rval);
2738 }
2739 
2740 /*
2741  * Set all phys and descendent phys as changed if changed == B_TRUE, otherwise
2742  * mark them all as not changed.
2743  *
2744  * Called with parent PHY locked.
2745  */
2746 void
2747 pmcs_set_changed(pmcs_hw_t *pwp, pmcs_phy_t *parent, boolean_t changed,
2748     int level)
2749 {
2750 	pmcs_phy_t *pptr;
2751 
2752 	if (level == 0) {
2753 		if (changed) {
2754 			PHY_CHANGED(pwp, parent);
2755 		} else {
2756 			parent->changed = 0;
2757 		}
2758 		if (parent->dtype == EXPANDER && parent->level) {
2759 			parent->width = 1;
2760 		}
2761 		if (parent->children) {
2762 			pmcs_set_changed(pwp, parent->children, changed,
2763 			    level + 1);
2764 		}
2765 	} else {
2766 		pptr = parent;
2767 		while (pptr) {
2768 			if (changed) {
2769 				PHY_CHANGED(pwp, pptr);
2770 			} else {
2771 				pptr->changed = 0;
2772 			}
2773 			if (pptr->dtype == EXPANDER && pptr->level) {
2774 				pptr->width = 1;
2775 			}
2776 			if (pptr->children) {
2777 				pmcs_set_changed(pwp, pptr->children, changed,
2778 				    level + 1);
2779 			}
2780 			pptr = pptr->sibling;
2781 		}
2782 	}
2783 }
2784 
2785 /*
2786  * Take the passed phy mark it and its descendants as dead.
2787  * Fire up reconfiguration to abort commands and bury it.
2788  *
2789  * Called with the parent PHY locked.
2790  */
2791 void
2792 pmcs_kill_changed(pmcs_hw_t *pwp, pmcs_phy_t *parent, int level)
2793 {
2794 	pmcs_phy_t *pptr = parent;
2795 
2796 	while (pptr) {
2797 		pptr->link_rate = 0;
2798 		pptr->abort_sent = 0;
2799 		pptr->abort_pending = 1;
2800 		SCHEDULE_WORK(pwp, PMCS_WORK_ABORT_HANDLE);
2801 		pptr->need_rl_ext = 0;
2802 
2803 		if (pptr->dead == 0) {
2804 			PHY_CHANGED(pwp, pptr);
2805 			RESTART_DISCOVERY(pwp);
2806 		}
2807 
2808 		pptr->dead = 1;
2809 
2810 		if (pptr->children) {
2811 			pmcs_kill_changed(pwp, pptr->children, level + 1);
2812 		}
2813 
2814 		/*
2815 		 * Only kill siblings at level > 0
2816 		 */
2817 		if (level == 0) {
2818 			return;
2819 		}
2820 
2821 		pptr = pptr->sibling;
2822 	}
2823 }
2824 
2825 /*
2826  * Go through every PHY and clear any that are dead (unless they're expanders)
2827  */
2828 static void
2829 pmcs_clear_phys(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2830 {
2831 	pmcs_phy_t *pnext, *phyp;
2832 
2833 	phyp = pptr;
2834 	while (phyp) {
2835 		if (IS_ROOT_PHY(phyp)) {
2836 			pmcs_lock_phy(phyp);
2837 		}
2838 
2839 		if ((phyp->dtype != EXPANDER) && phyp->dead) {
2840 			pmcs_clear_phy(pwp, phyp);
2841 		}
2842 
2843 		if (phyp->children) {
2844 			pmcs_clear_phys(pwp, phyp->children);
2845 		}
2846 
2847 		pnext = phyp->sibling;
2848 
2849 		if (IS_ROOT_PHY(phyp)) {
2850 			pmcs_unlock_phy(phyp);
2851 		}
2852 
2853 		phyp = pnext;
2854 	}
2855 }
2856 
2857 /*
2858  * Clear volatile parts of a phy.  Called with PHY locked.
2859  */
2860 void
2861 pmcs_clear_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2862 {
2863 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: %s", __func__, pptr->path);
2864 	ASSERT(mutex_owned(&pptr->phy_lock));
2865 	/* keep sibling */
2866 	/* keep children */
2867 	/* keep parent */
2868 	pptr->device_id = PMCS_INVALID_DEVICE_ID;
2869 	/* keep hw_event_ack */
2870 	pptr->ncphy = 0;
2871 	/* keep phynum */
2872 	pptr->width = 0;
2873 	pptr->ds_recovery_retries = 0;
2874 	/* keep dtype */
2875 	pptr->config_stop = 0;
2876 	pptr->spinup_hold = 0;
2877 	pptr->atdt = 0;
2878 	/* keep portid */
2879 	pptr->link_rate = 0;
2880 	pptr->valid_device_id = 0;
2881 	pptr->abort_sent = 0;
2882 	pptr->abort_pending = 0;
2883 	pptr->need_rl_ext = 0;
2884 	pptr->subsidiary = 0;
2885 	pptr->configured = 0;
2886 	/* Only mark dead if it's not a root PHY and its dtype isn't NOTHING */
2887 	/* XXX: What about directly attached disks? */
2888 	if (!IS_ROOT_PHY(pptr) && (pptr->dtype != NOTHING))
2889 		pptr->dead = 1;
2890 	pptr->changed = 0;
2891 	/* keep SAS address */
2892 	/* keep path */
2893 	/* keep ref_count */
2894 	/* Don't clear iport on root PHYs - they are handled in pmcs_intr.c */
2895 	if (!IS_ROOT_PHY(pptr)) {
2896 		pptr->iport = NULL;
2897 	}
2898 }
2899 
2900 /*
2901  * Allocate softstate for this target if there isn't already one.  If there
2902  * is, just redo our internal configuration.  If it is actually "new", we'll
2903  * soon get a tran_tgt_init for it.
2904  *
2905  * Called with PHY locked.
2906  */
2907 static void
2908 pmcs_new_tport(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2909 {
2910 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: phy 0x%p @ %s", __func__,
2911 	    (void *)pptr, pptr->path);
2912 
2913 	if (pmcs_configure_phy(pwp, pptr) == B_FALSE) {
2914 		/*
2915 		 * If the config failed, mark the PHY as changed.
2916 		 */
2917 		PHY_CHANGED(pwp, pptr);
2918 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2919 		    "%s: pmcs_configure_phy failed for phy 0x%p", __func__,
2920 		    (void *)pptr);
2921 		return;
2922 	}
2923 
2924 	/* Mark PHY as no longer changed */
2925 	pptr->changed = 0;
2926 
2927 	/*
2928 	 * If the PHY has no target pointer, see if there's a dead PHY that
2929 	 * matches.
2930 	 */
2931 	if (pptr->target == NULL) {
2932 		pmcs_reap_dead_phy(pptr);
2933 	}
2934 
2935 	/*
2936 	 * Only assign the device if there is a target for this PHY with a
2937 	 * matching SAS address.  If an iport is disconnected from one piece
2938 	 * of storage and connected to another within the iport stabilization
2939 	 * time, we can get the PHY/target mismatch situation.
2940 	 *
2941 	 * Otherwise, it'll get done in tran_tgt_init.
2942 	 */
2943 	if (pptr->target) {
2944 		mutex_enter(&pptr->target->statlock);
2945 		if (pmcs_phy_target_match(pptr) == B_FALSE) {
2946 			mutex_exit(&pptr->target->statlock);
2947 			if (!IS_ROOT_PHY(pptr)) {
2948 				pmcs_dec_phy_ref_count(pptr);
2949 			}
2950 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
2951 			    "%s: Not assigning existing tgt %p for PHY %p "
2952 			    "(WWN mismatch)", __func__, (void *)pptr->target,
2953 			    (void *)pptr);
2954 			pptr->target = NULL;
2955 			return;
2956 		}
2957 
2958 		if (!pmcs_assign_device(pwp, pptr->target)) {
2959 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
2960 			    "%s: pmcs_assign_device failed for target 0x%p",
2961 			    __func__, (void *)pptr->target);
2962 		}
2963 		mutex_exit(&pptr->target->statlock);
2964 	}
2965 }
2966 
2967 /*
2968  * Called with PHY lock held.
2969  */
2970 static boolean_t
2971 pmcs_configure_phy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
2972 {
2973 	char *dtype;
2974 
2975 	ASSERT(mutex_owned(&pptr->phy_lock));
2976 
2977 	/*
2978 	 * Mark this device as no longer changed.
2979 	 */
2980 	pptr->changed = 0;
2981 
2982 	/*
2983 	 * If we don't have a device handle, get one.
2984 	 */
2985 	if (pmcs_get_device_handle(pwp, pptr)) {
2986 		return (B_FALSE);
2987 	}
2988 
2989 	pptr->configured = 1;
2990 
2991 	switch (pptr->dtype) {
2992 	case SAS:
2993 		dtype = "SAS";
2994 		break;
2995 	case SATA:
2996 		dtype = "SATA";
2997 		break;
2998 	case EXPANDER:
2999 		dtype = "SMP";
3000 		break;
3001 	default:
3002 		dtype = "???";
3003 	}
3004 
3005 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "config_dev: %s dev %s "
3006 	    SAS_ADDR_FMT " dev id 0x%x lr 0x%x", dtype, pptr->path,
3007 	    SAS_ADDR_PRT(pptr->sas_address), pptr->device_id, pptr->link_rate);
3008 
3009 	return (B_TRUE);
3010 }
3011 
3012 /*
3013  * Called with PHY locked
3014  */
3015 static void
3016 pmcs_configure_expander(pmcs_hw_t *pwp, pmcs_phy_t *pptr, pmcs_iport_t *iport)
3017 {
3018 	pmcs_phy_t *ctmp, *clist = NULL, *cnext;
3019 	int result, i, nphy = 0;
3020 	boolean_t root_phy = B_FALSE;
3021 
3022 	ASSERT(iport);
3023 
3024 	/*
3025 	 * Step 1- clear our "changed" bit. If we need to retry/restart due
3026 	 * to resource shortages, we'll set it again. While we're doing
3027 	 * configuration, other events may set it again as well.  If the PHY
3028 	 * is a root PHY and is currently marked as having changed, reset the
3029 	 * config_stop timer as well.
3030 	 */
3031 	if (IS_ROOT_PHY(pptr) && pptr->changed) {
3032 		pptr->config_stop = ddi_get_lbolt() +
3033 		    drv_usectohz(PMCS_MAX_CONFIG_TIME);
3034 	}
3035 	pptr->changed = 0;
3036 
3037 	/*
3038 	 * Step 2- make sure we don't overflow
3039 	 */
3040 	if (pptr->level == PMCS_MAX_XPND-1) {
3041 		pmcs_prt(pwp, PMCS_PRT_WARN,
3042 		    "%s: SAS expansion tree too deep", __func__);
3043 		return;
3044 	}
3045 
3046 	/*
3047 	 * Step 3- Check if this expander is part of a wide phy that has
3048 	 * already been configured.
3049 	 *
3050 	 * This is known by checking this level for another EXPANDER device
3051 	 * with the same SAS address and isn't already marked as a subsidiary
3052 	 * phy and a parent whose SAS address is the same as our SAS address
3053 	 * (if there are parents).
3054 	 */
3055 	if (!IS_ROOT_PHY(pptr)) {
3056 		/*
3057 		 * No need to lock the parent here because we're in discovery
3058 		 * and the only time a PHY's children pointer can change is
3059 		 * in discovery; either in pmcs_clear_expander (which has
3060 		 * already been called) or here, down below.  Plus, trying to
3061 		 * grab the parent's lock here can cause deadlock.
3062 		 */
3063 		ctmp = pptr->parent->children;
3064 	} else {
3065 		ctmp = pwp->root_phys;
3066 		root_phy = B_TRUE;
3067 	}
3068 
3069 	while (ctmp) {
3070 		/*
3071 		 * If we've checked all PHYs up to pptr, we stop. Otherwise,
3072 		 * we'll be checking for a primary PHY with a higher PHY
3073 		 * number than pptr, which will never happen.  The primary
3074 		 * PHY on non-root expanders will ALWAYS be the lowest
3075 		 * numbered PHY.
3076 		 */
3077 		if (ctmp == pptr) {
3078 			break;
3079 		}
3080 
3081 		/*
3082 		 * If pptr and ctmp are root PHYs, just grab the mutex on
3083 		 * ctmp.  No need to lock the entire tree.  If they are not
3084 		 * root PHYs, there is no need to lock since a non-root PHY's
3085 		 * SAS address and other characteristics can only change in
3086 		 * discovery anyway.
3087 		 */
3088 		if (root_phy) {
3089 			mutex_enter(&ctmp->phy_lock);
3090 		}
3091 
3092 		if (ctmp->dtype == EXPANDER && ctmp->width &&
3093 		    memcmp(ctmp->sas_address, pptr->sas_address, 8) == 0) {
3094 			int widephy = 0;
3095 			/*
3096 			 * If these phys are not root PHYs, compare their SAS
3097 			 * addresses too.
3098 			 */
3099 			if (!root_phy) {
3100 				if (memcmp(ctmp->parent->sas_address,
3101 				    pptr->parent->sas_address, 8) == 0) {
3102 					widephy = 1;
3103 				}
3104 			} else {
3105 				widephy = 1;
3106 			}
3107 			if (widephy) {
3108 				ctmp->width++;
3109 				pptr->subsidiary = 1;
3110 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: PHY "
3111 				    "%s part of wide PHY %s (now %d wide)",
3112 				    __func__, pptr->path, ctmp->path,
3113 				    ctmp->width);
3114 				if (root_phy) {
3115 					mutex_exit(&ctmp->phy_lock);
3116 				}
3117 				return;
3118 			}
3119 		}
3120 
3121 		cnext = ctmp->sibling;
3122 		if (root_phy) {
3123 			mutex_exit(&ctmp->phy_lock);
3124 		}
3125 		ctmp = cnext;
3126 	}
3127 
3128 	/*
3129 	 * Step 4- If we don't have a device handle, get one.  Since this
3130 	 * is the primary PHY, make sure subsidiary is cleared.
3131 	 */
3132 	pptr->subsidiary = 0;
3133 	if (pmcs_get_device_handle(pwp, pptr)) {
3134 		goto out;
3135 	}
3136 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "Config expander %s "
3137 	    SAS_ADDR_FMT " dev id 0x%x lr 0x%x", pptr->path,
3138 	    SAS_ADDR_PRT(pptr->sas_address), pptr->device_id, pptr->link_rate);
3139 
3140 	/*
3141 	 * Step 5- figure out how many phys are in this expander.
3142 	 */
3143 	nphy = pmcs_expander_get_nphy(pwp, pptr);
3144 	if (nphy <= 0) {
3145 		if (nphy == 0 && ddi_get_lbolt() < pptr->config_stop) {
3146 			PHY_CHANGED(pwp, pptr);
3147 			RESTART_DISCOVERY(pwp);
3148 		} else {
3149 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3150 			    "%s: Retries exhausted for %s, killing", __func__,
3151 			    pptr->path);
3152 			pptr->config_stop = 0;
3153 			pmcs_kill_changed(pwp, pptr, 0);
3154 		}
3155 		goto out;
3156 	}
3157 
3158 	/*
3159 	 * Step 6- Allocate a list of phys for this expander and figure out
3160 	 * what each one is.
3161 	 */
3162 	for (i = 0; i < nphy; i++) {
3163 		ctmp = kmem_cache_alloc(pwp->phy_cache, KM_SLEEP);
3164 		bzero(ctmp, sizeof (pmcs_phy_t));
3165 		ctmp->device_id = PMCS_INVALID_DEVICE_ID;
3166 		ctmp->sibling = clist;
3167 		ctmp->pend_dtype = NEW;	/* Init pending dtype */
3168 		ctmp->config_stop = ddi_get_lbolt() +
3169 		    drv_usectohz(PMCS_MAX_CONFIG_TIME);
3170 		clist = ctmp;
3171 	}
3172 
3173 	mutex_enter(&pwp->config_lock);
3174 	if (pwp->config_changed) {
3175 		RESTART_DISCOVERY_LOCKED(pwp);
3176 		mutex_exit(&pwp->config_lock);
3177 		/*
3178 		 * Clean up the newly allocated PHYs and return
3179 		 */
3180 		while (clist) {
3181 			ctmp = clist->sibling;
3182 			kmem_cache_free(pwp->phy_cache, clist);
3183 			clist = ctmp;
3184 		}
3185 		return;
3186 	}
3187 	mutex_exit(&pwp->config_lock);
3188 
3189 	/*
3190 	 * Step 7- Now fill in the rest of the static portions of the phy.
3191 	 */
3192 	for (i = 0, ctmp = clist; ctmp; ctmp = ctmp->sibling, i++) {
3193 		ctmp->parent = pptr;
3194 		ctmp->pwp = pwp;
3195 		ctmp->level = pptr->level+1;
3196 		ctmp->portid = pptr->portid;
3197 		if (ctmp->tolerates_sas2) {
3198 			ASSERT(i < SAS2_PHYNUM_MAX);
3199 			ctmp->phynum = i & SAS2_PHYNUM_MASK;
3200 		} else {
3201 			ASSERT(i < SAS_PHYNUM_MAX);
3202 			ctmp->phynum = i & SAS_PHYNUM_MASK;
3203 		}
3204 		pmcs_phy_name(pwp, ctmp, ctmp->path, sizeof (ctmp->path));
3205 		pmcs_lock_phy(ctmp);
3206 	}
3207 
3208 	/*
3209 	 * Step 8- Discover things about each phy in the expander.
3210 	 */
3211 	for (i = 0, ctmp = clist; ctmp; ctmp = ctmp->sibling, i++) {
3212 		result = pmcs_expander_content_discover(pwp, pptr, ctmp);
3213 		if (result <= 0) {
3214 			if (ddi_get_lbolt() < pptr->config_stop) {
3215 				PHY_CHANGED(pwp, pptr);
3216 				RESTART_DISCOVERY(pwp);
3217 			} else {
3218 				pptr->config_stop = 0;
3219 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3220 				    "%s: Retries exhausted for %s, killing",
3221 				    __func__, pptr->path);
3222 				pmcs_kill_changed(pwp, pptr, 0);
3223 			}
3224 			goto out;
3225 		}
3226 
3227 		/* Set pend_dtype to dtype for 1st time initialization */
3228 		ctmp->pend_dtype = ctmp->dtype;
3229 	}
3230 
3231 	/*
3232 	 * Step 9- Install the new list on the next level. There should be
3233 	 * no children pointer on this PHY.  If there is, we'd need to know
3234 	 * how it happened (The expander suddenly got more PHYs?).
3235 	 */
3236 	ASSERT(pptr->children == NULL);
3237 	if (pptr->children != NULL) {
3238 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Already child PHYs attached "
3239 		    " to PHY %s: This should never happen", __func__,
3240 		    pptr->path);
3241 		goto out;
3242 	} else {
3243 		pptr->children = clist;
3244 	}
3245 
3246 	clist = NULL;
3247 	pptr->ncphy = nphy;
3248 	pptr->configured = 1;
3249 
3250 	/*
3251 	 * We only set width if we're greater than level 0.
3252 	 */
3253 	if (pptr->level) {
3254 		pptr->width = 1;
3255 	}
3256 
3257 	/*
3258 	 * Now tell the rest of the world about us, as an SMP node.
3259 	 */
3260 	pptr->iport = iport;
3261 	pmcs_new_tport(pwp, pptr);
3262 
3263 out:
3264 	while (clist) {
3265 		ctmp = clist->sibling;
3266 		pmcs_unlock_phy(clist);
3267 		kmem_cache_free(pwp->phy_cache, clist);
3268 		clist = ctmp;
3269 	}
3270 }
3271 
3272 /*
3273  * 2. Check expanders marked changed (but not dead) to see if they still have
3274  * the same number of phys and the same SAS address. Mark them, their subsidiary
3275  * phys (if wide) and their descendents dead if anything has changed. Check the
3276  * the devices they contain to see if *they* have changed. If they've changed
3277  * from type NOTHING we leave them marked changed to be configured later
3278  * (picking up a new SAS address and link rate if possible). Otherwise, any
3279  * change in type, SAS address or removal of target role will cause us to
3280  * mark them (and their descendents) as dead and cause any pending commands
3281  * and associated devices to be removed.
3282  *
3283  * Called with PHY (pptr) locked.
3284  */
3285 
3286 static void
3287 pmcs_check_expander(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
3288 {
3289 	int nphy, result;
3290 	pmcs_phy_t *ctmp, *local, *local_list = NULL, *local_tail = NULL;
3291 	boolean_t kill_changed, changed;
3292 
3293 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3294 	    "%s: check %s", __func__, pptr->path);
3295 
3296 	/*
3297 	 * Step 1: Mark phy as not changed. We will mark it changed if we need
3298 	 * to retry.
3299 	 */
3300 	pptr->changed = 0;
3301 
3302 	/*
3303 	 * Reset the config_stop time. Although we're not actually configuring
3304 	 * anything here, we do want some indication of when to give up trying
3305 	 * if we can't communicate with the expander.
3306 	 */
3307 	pptr->config_stop = ddi_get_lbolt() +
3308 	    drv_usectohz(PMCS_MAX_CONFIG_TIME);
3309 
3310 	/*
3311 	 * Step 2: Figure out how many phys are in this expander. If
3312 	 * pmcs_expander_get_nphy returns 0 we ran out of resources,
3313 	 * so reschedule and try later. If it returns another error,
3314 	 * just return.
3315 	 */
3316 	nphy = pmcs_expander_get_nphy(pwp, pptr);
3317 	if (nphy <= 0) {
3318 		if ((nphy == 0) && (ddi_get_lbolt() < pptr->config_stop)) {
3319 			PHY_CHANGED(pwp, pptr);
3320 			RESTART_DISCOVERY(pwp);
3321 		} else {
3322 			pptr->config_stop = 0;
3323 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3324 			    "%s: Retries exhausted for %s, killing", __func__,
3325 			    pptr->path);
3326 			pmcs_kill_changed(pwp, pptr, 0);
3327 		}
3328 		return;
3329 	}
3330 
3331 	/*
3332 	 * Step 3: If the number of phys don't agree, kill the old sub-tree.
3333 	 */
3334 	if (nphy != pptr->ncphy) {
3335 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3336 		    "%s: number of contained phys for %s changed from %d to %d",
3337 		    __func__, pptr->path, pptr->ncphy, nphy);
3338 		/*
3339 		 * Force a rescan of this expander after dead contents
3340 		 * are cleared and removed.
3341 		 */
3342 		pmcs_kill_changed(pwp, pptr, 0);
3343 		return;
3344 	}
3345 
3346 	/*
3347 	 * Step 4: if we're at the bottom of the stack, we're done
3348 	 * (we can't have any levels below us)
3349 	 */
3350 	if (pptr->level == PMCS_MAX_XPND-1) {
3351 		return;
3352 	}
3353 
3354 	/*
3355 	 * Step 5: Discover things about each phy in this expander.  We do
3356 	 * this by walking the current list of contained phys and doing a
3357 	 * content discovery for it to a local phy.
3358 	 */
3359 	ctmp = pptr->children;
3360 	ASSERT(ctmp);
3361 	if (ctmp == NULL) {
3362 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3363 		    "%s: No children attached to expander @ %s?", __func__,
3364 		    pptr->path);
3365 		return;
3366 	}
3367 
3368 	while (ctmp) {
3369 		/*
3370 		 * Allocate a local PHY to contain the proposed new contents
3371 		 * and link it to the rest of the local PHYs so that they
3372 		 * can all be freed later.
3373 		 */
3374 		local = pmcs_clone_phy(ctmp);
3375 
3376 		if (local_list == NULL) {
3377 			local_list = local;
3378 			local_tail = local;
3379 		} else {
3380 			local_tail->sibling = local;
3381 			local_tail = local;
3382 		}
3383 
3384 		/*
3385 		 * Need to lock the local PHY since pmcs_expander_content_
3386 		 * discovery may call pmcs_clear_phy on it, which expects
3387 		 * the PHY to be locked.
3388 		 */
3389 		pmcs_lock_phy(local);
3390 		result = pmcs_expander_content_discover(pwp, pptr, local);
3391 		pmcs_unlock_phy(local);
3392 		if (result <= 0) {
3393 			if (ddi_get_lbolt() < pptr->config_stop) {
3394 				PHY_CHANGED(pwp, pptr);
3395 				RESTART_DISCOVERY(pwp);
3396 			} else {
3397 				pptr->config_stop = 0;
3398 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3399 				    "%s: Retries exhausted for %s, killing",
3400 				    __func__, pptr->path);
3401 				pmcs_kill_changed(pwp, pptr, 0);
3402 			}
3403 
3404 			/*
3405 			 * Release all the local PHYs that we allocated.
3406 			 */
3407 			pmcs_free_phys(pwp, local_list);
3408 			return;
3409 		}
3410 
3411 		ctmp = ctmp->sibling;
3412 	}
3413 
3414 	/*
3415 	 * Step 6: Compare the local PHY's contents to our current PHY.  If
3416 	 * there are changes, take the appropriate action.
3417 	 * This is done in two steps (step 5 above, and 6 here) so that if we
3418 	 * have to bail during this process (e.g. pmcs_expander_content_discover
3419 	 * fails), we haven't actually changed the state of any of the real
3420 	 * PHYs.  Next time we come through here, we'll be starting over from
3421 	 * scratch.  This keeps us from marking a changed PHY as no longer
3422 	 * changed, but then having to bail only to come back next time and
3423 	 * think that the PHY hadn't changed.  If this were to happen, we
3424 	 * would fail to properly configure the device behind this PHY.
3425 	 */
3426 	local = local_list;
3427 	ctmp = pptr->children;
3428 
3429 	while (ctmp) {
3430 		changed = B_FALSE;
3431 		kill_changed = B_FALSE;
3432 
3433 		/*
3434 		 * We set local to local_list prior to this loop so that we
3435 		 * can simply walk the local_list while we walk this list.  The
3436 		 * two lists should be completely in sync.
3437 		 *
3438 		 * Clear the changed flag here.
3439 		 */
3440 		ctmp->changed = 0;
3441 
3442 		if (ctmp->dtype != local->dtype) {
3443 			if (ctmp->dtype != NOTHING) {
3444 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: %s "
3445 				    "type changed from %s to %s (killing)",
3446 				    __func__, ctmp->path, PHY_TYPE(ctmp),
3447 				    PHY_TYPE(local));
3448 				/*
3449 				 * Force a rescan of this expander after dead
3450 				 * contents are cleared and removed.
3451 				 */
3452 				changed = B_TRUE;
3453 				kill_changed = B_TRUE;
3454 			} else {
3455 				changed = B_TRUE;
3456 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3457 				    "%s: %s type changed from NOTHING to %s",
3458 				    __func__, ctmp->path, PHY_TYPE(local));
3459 			}
3460 
3461 		} else if (ctmp->atdt != local->atdt) {
3462 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: %s attached "
3463 			    "device type changed from %d to %d (killing)",
3464 			    __func__, ctmp->path, ctmp->atdt, local->atdt);
3465 			/*
3466 			 * Force a rescan of this expander after dead
3467 			 * contents are cleared and removed.
3468 			 */
3469 			changed = B_TRUE;
3470 
3471 			if (local->atdt == 0) {
3472 				kill_changed = B_TRUE;
3473 			}
3474 		} else if (ctmp->link_rate != local->link_rate) {
3475 			pmcs_prt(pwp, PMCS_PRT_INFO, "%s: %s changed speed from"
3476 			    " %s to %s", __func__, ctmp->path,
3477 			    pmcs_get_rate(ctmp->link_rate),
3478 			    pmcs_get_rate(local->link_rate));
3479 			/* If the speed changed from invalid, force rescan */
3480 			if (!PMCS_VALID_LINK_RATE(ctmp->link_rate)) {
3481 				changed = B_TRUE;
3482 				RESTART_DISCOVERY(pwp);
3483 			} else {
3484 				/* Just update to the new link rate */
3485 				ctmp->link_rate = local->link_rate;
3486 			}
3487 
3488 			if (!PMCS_VALID_LINK_RATE(local->link_rate)) {
3489 				kill_changed = B_TRUE;
3490 			}
3491 		} else if (memcmp(ctmp->sas_address, local->sas_address,
3492 		    sizeof (ctmp->sas_address)) != 0) {
3493 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: SASAddr "
3494 			    "for %s changed from " SAS_ADDR_FMT " to "
3495 			    SAS_ADDR_FMT " (kill old tree)", __func__,
3496 			    ctmp->path, SAS_ADDR_PRT(ctmp->sas_address),
3497 			    SAS_ADDR_PRT(local->sas_address));
3498 			/*
3499 			 * Force a rescan of this expander after dead
3500 			 * contents are cleared and removed.
3501 			 */
3502 			changed = B_TRUE;
3503 		} else {
3504 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3505 			    "%s: %s looks the same (type %s)",
3506 			    __func__, ctmp->path, PHY_TYPE(ctmp));
3507 			/*
3508 			 * If EXPANDER, still mark it changed so we
3509 			 * re-evaluate its contents.  If it's not an expander,
3510 			 * but it hasn't been configured, also mark it as
3511 			 * changed so that it will undergo configuration.
3512 			 */
3513 			if (ctmp->dtype == EXPANDER) {
3514 				changed = B_TRUE;
3515 			} else if ((ctmp->dtype != NOTHING) &&
3516 			    !ctmp->configured) {
3517 				ctmp->changed = 1;
3518 			} else {
3519 				/* It simply hasn't changed */
3520 				ctmp->changed = 0;
3521 			}
3522 		}
3523 
3524 		/*
3525 		 * If the PHY changed, call pmcs_kill_changed if indicated,
3526 		 * update its contents to reflect its current state and mark it
3527 		 * as changed.
3528 		 */
3529 		if (changed) {
3530 			/*
3531 			 * pmcs_kill_changed will mark the PHY as changed, so
3532 			 * only do PHY_CHANGED if we did not do kill_changed.
3533 			 */
3534 			if (kill_changed) {
3535 				pmcs_kill_changed(pwp, ctmp, 0);
3536 			} else {
3537 				/*
3538 				 * If we're not killing the device, it's not
3539 				 * dead.  Mark the PHY as changed.
3540 				 */
3541 				PHY_CHANGED(pwp, ctmp);
3542 
3543 				if (ctmp->dead) {
3544 					pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3545 					    "%s: Unmarking PHY %s dead, "
3546 					    "restarting discovery",
3547 					    __func__, ctmp->path);
3548 					ctmp->dead = 0;
3549 					RESTART_DISCOVERY(pwp);
3550 				}
3551 			}
3552 
3553 			/*
3554 			 * If the dtype of this PHY is now NOTHING, mark it as
3555 			 * unconfigured.  Set pend_dtype to what the new dtype
3556 			 * is.  It'll get updated at the end of the discovery
3557 			 * process.
3558 			 */
3559 			if (local->dtype == NOTHING) {
3560 				bzero(ctmp->sas_address,
3561 				    sizeof (local->sas_address));
3562 				ctmp->atdt = 0;
3563 				ctmp->link_rate = 0;
3564 				ctmp->pend_dtype = NOTHING;
3565 				ctmp->configured = 0;
3566 			} else {
3567 				(void) memcpy(ctmp->sas_address,
3568 				    local->sas_address,
3569 				    sizeof (local->sas_address));
3570 				ctmp->atdt = local->atdt;
3571 				ctmp->link_rate = local->link_rate;
3572 				ctmp->pend_dtype = local->dtype;
3573 			}
3574 		}
3575 
3576 		local = local->sibling;
3577 		ctmp = ctmp->sibling;
3578 	}
3579 
3580 	/*
3581 	 * If we got to here, that means we were able to see all the PHYs
3582 	 * and we can now update all of the real PHYs with the information
3583 	 * we got on the local PHYs.  Once that's done, free all the local
3584 	 * PHYs.
3585 	 */
3586 
3587 	pmcs_free_phys(pwp, local_list);
3588 }
3589 
3590 /*
3591  * Top level routine to check expanders.  We call pmcs_check_expander for
3592  * each expander.  Since we're not doing any configuration right now, it
3593  * doesn't matter if this is breadth-first.
3594  */
3595 static boolean_t
3596 pmcs_check_expanders(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
3597 {
3598 	pmcs_phy_t *phyp, *pnext, *pchild;
3599 	boolean_t config_changed = B_FALSE;
3600 
3601 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: %s", __func__, pptr->path);
3602 
3603 	/*
3604 	 * Check each expander at this level
3605 	 */
3606 	phyp = pptr;
3607 	while (phyp && !config_changed) {
3608 		pmcs_lock_phy(phyp);
3609 
3610 		if ((phyp->dtype == EXPANDER) && phyp->changed &&
3611 		    !phyp->dead && !phyp->subsidiary &&
3612 		    phyp->configured) {
3613 			pmcs_check_expander(pwp, phyp);
3614 		}
3615 
3616 		pnext = phyp->sibling;
3617 		pmcs_unlock_phy(phyp);
3618 
3619 		mutex_enter(&pwp->config_lock);
3620 		config_changed = pwp->config_changed;
3621 		mutex_exit(&pwp->config_lock);
3622 
3623 		phyp = pnext;
3624 	}
3625 
3626 	if (config_changed) {
3627 		return (config_changed);
3628 	}
3629 
3630 	/*
3631 	 * Now check the children
3632 	 */
3633 	phyp = pptr;
3634 	while (phyp && !config_changed) {
3635 		pmcs_lock_phy(phyp);
3636 		pnext = phyp->sibling;
3637 		pchild = phyp->children;
3638 		pmcs_unlock_phy(phyp);
3639 
3640 		if (pchild) {
3641 			(void) pmcs_check_expanders(pwp, pchild);
3642 		}
3643 
3644 		mutex_enter(&pwp->config_lock);
3645 		config_changed = pwp->config_changed;
3646 		mutex_exit(&pwp->config_lock);
3647 
3648 		phyp = pnext;
3649 	}
3650 
3651 	/*
3652 	 * We're done
3653 	 */
3654 	return (config_changed);
3655 }
3656 
3657 /*
3658  * Called with softstate and PHY locked
3659  */
3660 static void
3661 pmcs_clear_expander(pmcs_hw_t *pwp, pmcs_phy_t *pptr, int level)
3662 {
3663 	pmcs_phy_t *ctmp;
3664 
3665 	ASSERT(mutex_owned(&pwp->lock));
3666 	ASSERT(mutex_owned(&pptr->phy_lock));
3667 	ASSERT(pptr->level < PMCS_MAX_XPND - 1);
3668 
3669 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: checking %s", __func__,
3670 	    pptr->path);
3671 
3672 	ctmp = pptr->children;
3673 	while (ctmp) {
3674 		/*
3675 		 * If the expander is dead, mark its children dead
3676 		 */
3677 		if (pptr->dead) {
3678 			ctmp->dead = 1;
3679 		}
3680 		if (ctmp->dtype == EXPANDER) {
3681 			pmcs_clear_expander(pwp, ctmp, level + 1);
3682 		}
3683 		ctmp = ctmp->sibling;
3684 	}
3685 
3686 	/*
3687 	 * If this expander is not dead, we're done here.
3688 	 */
3689 	if (!pptr->dead) {
3690 		return;
3691 	}
3692 
3693 	/*
3694 	 * Now snip out the list of children below us and release them
3695 	 */
3696 	ctmp = pptr->children;
3697 	while (ctmp) {
3698 		pmcs_phy_t *nxt = ctmp->sibling;
3699 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3700 		    "%s: dead PHY 0x%p (%s) (ref_count %d)", __func__,
3701 		    (void *)ctmp, ctmp->path, ctmp->ref_count);
3702 		/*
3703 		 * Put this PHY on the dead PHY list for the watchdog to
3704 		 * clean up after any outstanding work has completed.
3705 		 */
3706 		mutex_enter(&pwp->dead_phylist_lock);
3707 		ctmp->dead_next = pwp->dead_phys;
3708 		pwp->dead_phys = ctmp;
3709 		mutex_exit(&pwp->dead_phylist_lock);
3710 		pmcs_unlock_phy(ctmp);
3711 		ctmp = nxt;
3712 	}
3713 
3714 	pptr->children = NULL;
3715 
3716 	/*
3717 	 * Clear subsidiary phys as well.  Getting the parent's PHY lock
3718 	 * is only necessary if level == 0 since otherwise the parent is
3719 	 * already locked.
3720 	 */
3721 	if (!IS_ROOT_PHY(pptr)) {
3722 		if (level == 0) {
3723 			mutex_enter(&pptr->parent->phy_lock);
3724 		}
3725 		ctmp = pptr->parent->children;
3726 		if (level == 0) {
3727 			mutex_exit(&pptr->parent->phy_lock);
3728 		}
3729 	} else {
3730 		ctmp = pwp->root_phys;
3731 	}
3732 
3733 	while (ctmp) {
3734 		if (ctmp == pptr) {
3735 			ctmp = ctmp->sibling;
3736 			continue;
3737 		}
3738 		/*
3739 		 * We only need to lock subsidiary PHYs on the level 0
3740 		 * expander.  Any children of that expander, subsidiaries or
3741 		 * not, will already be locked.
3742 		 */
3743 		if (level == 0) {
3744 			pmcs_lock_phy(ctmp);
3745 		}
3746 		if (ctmp->dtype != EXPANDER || ctmp->subsidiary == 0 ||
3747 		    memcmp(ctmp->sas_address, pptr->sas_address,
3748 		    sizeof (ctmp->sas_address)) != 0) {
3749 			if (level == 0) {
3750 				pmcs_unlock_phy(ctmp);
3751 			}
3752 			ctmp = ctmp->sibling;
3753 			continue;
3754 		}
3755 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: subsidiary %s",
3756 		    __func__, ctmp->path);
3757 		pmcs_clear_phy(pwp, ctmp);
3758 		if (level == 0) {
3759 			pmcs_unlock_phy(ctmp);
3760 		}
3761 		ctmp = ctmp->sibling;
3762 	}
3763 
3764 	pmcs_clear_phy(pwp, pptr);
3765 }
3766 
3767 /*
3768  * Called with PHY locked and with scratch acquired. We return 0 if
3769  * we fail to allocate resources or notice that the configuration
3770  * count changed while we were running the command. We return
3771  * less than zero if we had an I/O error or received an unsupported
3772  * configuration. Otherwise we return the number of phys in the
3773  * expander.
3774  */
3775 #define	DFM(m, y) if (m == NULL) m = y
3776 static int
3777 pmcs_expander_get_nphy(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
3778 {
3779 	struct pmcwork *pwrk;
3780 	char buf[64];
3781 	const uint_t rdoff = 0x100;	/* returned data offset */
3782 	smp_response_frame_t *srf;
3783 	smp_report_general_resp_t *srgr;
3784 	uint32_t msg[PMCS_MSG_SIZE], *ptr, htag, status, ival;
3785 	int result;
3786 
3787 	ival = 0x40001100;
3788 again:
3789 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
3790 	if (pwrk == NULL) {
3791 		result = 0;
3792 		goto out;
3793 	}
3794 	(void) memset(pwp->scratch, 0x77, PMCS_SCRATCH_SIZE);
3795 	pwrk->arg = pwp->scratch;
3796 	pwrk->dtype = pptr->dtype;
3797 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
3798 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
3799 	if (ptr == NULL) {
3800 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
3801 		pmcs_prt(pwp, PMCS_PRT_DEBUG2, "%s: GET_IQ_ENTRY failed",
3802 		    __func__);
3803 		pmcs_pwork(pwp, pwrk);
3804 		result = 0;
3805 		goto out;
3806 	}
3807 
3808 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, PMCIN_SMP_REQUEST));
3809 	msg[1] = LE_32(pwrk->htag);
3810 	msg[2] = LE_32(pptr->device_id);
3811 	msg[3] = LE_32((4 << SMP_REQUEST_LENGTH_SHIFT) | SMP_INDIRECT_RESPONSE);
3812 	/*
3813 	 * Send SMP REPORT GENERAL (of either SAS1.1 or SAS2 flavors).
3814 	 */
3815 	msg[4] = BE_32(ival);
3816 	msg[5] = 0;
3817 	msg[6] = 0;
3818 	msg[7] = 0;
3819 	msg[8] = 0;
3820 	msg[9] = 0;
3821 	msg[10] = 0;
3822 	msg[11] = 0;
3823 	msg[12] = LE_32(DWORD0(pwp->scratch_dma+rdoff));
3824 	msg[13] = LE_32(DWORD1(pwp->scratch_dma+rdoff));
3825 	msg[14] = LE_32(PMCS_SCRATCH_SIZE - rdoff);
3826 	msg[15] = 0;
3827 
3828 	COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
3829 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
3830 	htag = pwrk->htag;
3831 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
3832 
3833 	pmcs_unlock_phy(pptr);
3834 	WAIT_FOR(pwrk, 1000, result);
3835 	pmcs_lock_phy(pptr);
3836 	pmcs_pwork(pwp, pwrk);
3837 
3838 	mutex_enter(&pwp->config_lock);
3839 	if (pwp->config_changed) {
3840 		RESTART_DISCOVERY_LOCKED(pwp);
3841 		mutex_exit(&pwp->config_lock);
3842 		result = 0;
3843 		goto out;
3844 	}
3845 	mutex_exit(&pwp->config_lock);
3846 
3847 	if (result) {
3848 		pmcs_timed_out(pwp, htag, __func__);
3849 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3850 		    "%s: Issuing SMP ABORT for htag 0x%08x", __func__, htag);
3851 		if (pmcs_abort(pwp, pptr, htag, 0, 0)) {
3852 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3853 			    "%s: Unable to issue SMP ABORT for htag 0x%08x",
3854 			    __func__, htag);
3855 		} else {
3856 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3857 			    "%s: Issuing SMP ABORT for htag 0x%08x",
3858 			    __func__, htag);
3859 		}
3860 		result = 0;
3861 		goto out;
3862 	}
3863 	ptr = (void *)pwp->scratch;
3864 	status = LE_32(ptr[2]);
3865 	if (status == PMCOUT_STATUS_UNDERFLOW ||
3866 	    status == PMCOUT_STATUS_OVERFLOW) {
3867 		pmcs_prt(pwp, PMCS_PRT_DEBUG_UNDERFLOW,
3868 		    "%s: over/underflow", __func__);
3869 		status = PMCOUT_STATUS_OK;
3870 	}
3871 	srf = (smp_response_frame_t *)&((uint32_t *)pwp->scratch)[rdoff >> 2];
3872 	srgr = (smp_report_general_resp_t *)
3873 	    &((uint32_t *)pwp->scratch)[(rdoff >> 2)+1];
3874 
3875 	if (status != PMCOUT_STATUS_OK) {
3876 		char *nag = NULL;
3877 		(void) snprintf(buf, sizeof (buf),
3878 		    "%s: SMP op failed (0x%x)", __func__, status);
3879 		switch (status) {
3880 		case PMCOUT_STATUS_IO_PORT_IN_RESET:
3881 			DFM(nag, "I/O Port In Reset");
3882 			/* FALLTHROUGH */
3883 		case PMCOUT_STATUS_ERROR_HW_TIMEOUT:
3884 			DFM(nag, "Hardware Timeout");
3885 			/* FALLTHROUGH */
3886 		case PMCOUT_STATUS_ERROR_INTERNAL_SMP_RESOURCE:
3887 			DFM(nag, "Internal SMP Resource Failure");
3888 			/* FALLTHROUGH */
3889 		case PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY:
3890 			DFM(nag, "PHY Not Ready");
3891 			/* FALLTHROUGH */
3892 		case PMCOUT_STATUS_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
3893 			DFM(nag, "Connection Rate Not Supported");
3894 			/* FALLTHROUGH */
3895 		case PMCOUT_STATUS_IO_XFER_OPEN_RETRY_TIMEOUT:
3896 			DFM(nag, "Open Retry Timeout");
3897 			/* FALLTHROUGH */
3898 		case PMCOUT_STATUS_SMP_RESP_CONNECTION_ERROR:
3899 			DFM(nag, "Response Connection Error");
3900 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
3901 			    "%s: expander %s SMP operation failed (%s)",
3902 			    __func__, pptr->path, nag);
3903 			break;
3904 
3905 		/*
3906 		 * For the IO_DS_NON_OPERATIONAL case, we need to kick off
3907 		 * device state recovery and return 0 so that the caller
3908 		 * doesn't assume this expander is dead for good.
3909 		 */
3910 		case PMCOUT_STATUS_IO_DS_NON_OPERATIONAL: {
3911 			pmcs_xscsi_t *xp = pptr->target;
3912 
3913 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
3914 			    "%s: expander %s device state non-operational",
3915 			    __func__, pptr->path);
3916 
3917 			if (xp == NULL) {
3918 				pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
3919 				    "%s: No target to do DS recovery for PHY "
3920 				    "%p (%s), attempting PHY hard reset",
3921 				    __func__, (void *)pptr, pptr->path);
3922 				(void) pmcs_reset_phy(pwp, pptr,
3923 				    PMCS_PHYOP_HARD_RESET);
3924 				break;
3925 			}
3926 
3927 			mutex_enter(&xp->statlock);
3928 			pmcs_start_dev_state_recovery(xp, pptr);
3929 			mutex_exit(&xp->statlock);
3930 			break;
3931 		}
3932 
3933 		default:
3934 			pmcs_print_entry(pwp, PMCS_PRT_DEBUG, buf, ptr);
3935 			result = -EIO;
3936 			break;
3937 		}
3938 	} else if (srf->srf_frame_type != SMP_FRAME_TYPE_RESPONSE) {
3939 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
3940 		    "%s: bad response frame type 0x%x",
3941 		    __func__, srf->srf_frame_type);
3942 		result = -EINVAL;
3943 	} else if (srf->srf_function != SMP_FUNC_REPORT_GENERAL) {
3944 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: bad response function 0x%x",
3945 		    __func__, srf->srf_function);
3946 		result = -EINVAL;
3947 	} else if (srf->srf_result != 0) {
3948 		/*
3949 		 * Check to see if we have a value of 3 for failure and
3950 		 * whether we were using a SAS2.0 allocation length value
3951 		 * and retry without it.
3952 		 */
3953 		if (srf->srf_result == 3 && (ival & 0xff00)) {
3954 			ival &= ~0xff00;
3955 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
3956 			    "%s: err 0x%x with SAS2 request- retry with SAS1",
3957 			    __func__, srf->srf_result);
3958 			goto again;
3959 		}
3960 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: bad response 0x%x",
3961 		    __func__, srf->srf_result);
3962 		result = -EINVAL;
3963 	} else if (srgr->srgr_configuring) {
3964 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
3965 		    "%s: expander at phy %s is still configuring",
3966 		    __func__, pptr->path);
3967 		result = 0;
3968 	} else {
3969 		result = srgr->srgr_number_of_phys;
3970 		if (ival & 0xff00) {
3971 			pptr->tolerates_sas2 = 1;
3972 		}
3973 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
3974 		    "%s has %d phys and %s SAS2", pptr->path, result,
3975 		    pptr->tolerates_sas2? "tolerates" : "does not tolerate");
3976 	}
3977 out:
3978 	return (result);
3979 }
3980 
3981 /*
3982  * Called with expander locked (and thus, pptr) as well as all PHYs up to
3983  * the root, and scratch acquired. Return 0 if we fail to allocate resources
3984  * or notice that the configuration changed while we were running the command.
3985  *
3986  * We return less than zero if we had an I/O error or received an
3987  * unsupported configuration.
3988  */
3989 static int
3990 pmcs_expander_content_discover(pmcs_hw_t *pwp, pmcs_phy_t *expander,
3991     pmcs_phy_t *pptr)
3992 {
3993 	struct pmcwork *pwrk;
3994 	char buf[64];
3995 	uint8_t sas_address[8];
3996 	uint8_t att_sas_address[8];
3997 	smp_response_frame_t *srf;
3998 	smp_discover_resp_t *sdr;
3999 	const uint_t rdoff = 0x100;	/* returned data offset */
4000 	uint8_t *roff;
4001 	uint32_t status, *ptr, msg[PMCS_MSG_SIZE], htag;
4002 	int result;
4003 	uint8_t	ini_support;
4004 	uint8_t	tgt_support;
4005 
4006 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, expander);
4007 	if (pwrk == NULL) {
4008 		result = 0;
4009 		goto out;
4010 	}
4011 	(void) memset(pwp->scratch, 0x77, PMCS_SCRATCH_SIZE);
4012 	pwrk->arg = pwp->scratch;
4013 	pwrk->dtype = expander->dtype;
4014 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, PMCIN_SMP_REQUEST));
4015 	msg[1] = LE_32(pwrk->htag);
4016 	msg[2] = LE_32(expander->device_id);
4017 	msg[3] = LE_32((12 << SMP_REQUEST_LENGTH_SHIFT) |
4018 	    SMP_INDIRECT_RESPONSE);
4019 	/*
4020 	 * Send SMP DISCOVER (of either SAS1.1 or SAS2 flavors).
4021 	 */
4022 	if (expander->tolerates_sas2) {
4023 		msg[4] = BE_32(0x40101B00);
4024 	} else {
4025 		msg[4] = BE_32(0x40100000);
4026 	}
4027 	msg[5] = 0;
4028 	msg[6] = BE_32((pptr->phynum << 16));
4029 	msg[7] = 0;
4030 	msg[8] = 0;
4031 	msg[9] = 0;
4032 	msg[10] = 0;
4033 	msg[11] = 0;
4034 	msg[12] = LE_32(DWORD0(pwp->scratch_dma+rdoff));
4035 	msg[13] = LE_32(DWORD1(pwp->scratch_dma+rdoff));
4036 	msg[14] = LE_32(PMCS_SCRATCH_SIZE - rdoff);
4037 	msg[15] = 0;
4038 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4039 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4040 	if (ptr == NULL) {
4041 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4042 		result = 0;
4043 		goto out;
4044 	}
4045 
4046 	COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
4047 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
4048 	htag = pwrk->htag;
4049 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4050 
4051 	/*
4052 	 * Drop PHY lock while waiting so other completions aren't potentially
4053 	 * blocked.
4054 	 */
4055 	pmcs_unlock_phy(expander);
4056 	WAIT_FOR(pwrk, 1000, result);
4057 	pmcs_lock_phy(expander);
4058 	pmcs_pwork(pwp, pwrk);
4059 
4060 	mutex_enter(&pwp->config_lock);
4061 	if (pwp->config_changed) {
4062 		RESTART_DISCOVERY_LOCKED(pwp);
4063 		mutex_exit(&pwp->config_lock);
4064 		result = 0;
4065 		goto out;
4066 	}
4067 	mutex_exit(&pwp->config_lock);
4068 
4069 	if (result) {
4070 		pmcs_prt(pwp, PMCS_PRT_WARN, pmcs_timeo, __func__);
4071 		if (pmcs_abort(pwp, expander, htag, 0, 0)) {
4072 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
4073 			    "%s: Unable to issue SMP ABORT for htag 0x%08x",
4074 			    __func__, htag);
4075 		} else {
4076 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
4077 			    "%s: Issuing SMP ABORT for htag 0x%08x",
4078 			    __func__, htag);
4079 		}
4080 		result = -ETIMEDOUT;
4081 		goto out;
4082 	}
4083 	ptr = (void *)pwp->scratch;
4084 	/*
4085 	 * Point roff to the DMA offset for returned data
4086 	 */
4087 	roff = pwp->scratch;
4088 	roff += rdoff;
4089 	srf = (smp_response_frame_t *)roff;
4090 	sdr = (smp_discover_resp_t *)(roff+4);
4091 	status = LE_32(ptr[2]);
4092 	if (status == PMCOUT_STATUS_UNDERFLOW ||
4093 	    status == PMCOUT_STATUS_OVERFLOW) {
4094 		pmcs_prt(pwp, PMCS_PRT_DEBUG_UNDERFLOW,
4095 		    "%s: over/underflow", __func__);
4096 		status = PMCOUT_STATUS_OK;
4097 	}
4098 	if (status != PMCOUT_STATUS_OK) {
4099 		char *nag = NULL;
4100 		(void) snprintf(buf, sizeof (buf),
4101 		    "%s: SMP op failed (0x%x)", __func__, status);
4102 		switch (status) {
4103 		case PMCOUT_STATUS_ERROR_HW_TIMEOUT:
4104 			DFM(nag, "Hardware Timeout");
4105 			/* FALLTHROUGH */
4106 		case PMCOUT_STATUS_ERROR_INTERNAL_SMP_RESOURCE:
4107 			DFM(nag, "Internal SMP Resource Failure");
4108 			/* FALLTHROUGH */
4109 		case PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY:
4110 			DFM(nag, "PHY Not Ready");
4111 			/* FALLTHROUGH */
4112 		case PMCOUT_STATUS_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
4113 			DFM(nag, "Connection Rate Not Supported");
4114 			/* FALLTHROUGH */
4115 		case PMCOUT_STATUS_IO_XFER_OPEN_RETRY_TIMEOUT:
4116 			DFM(nag, "Open Retry Timeout");
4117 			/* FALLTHROUGH */
4118 		case PMCOUT_STATUS_SMP_RESP_CONNECTION_ERROR:
4119 			DFM(nag, "Response Connection Error");
4120 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
4121 			    "%s: expander %s SMP operation failed (%s)",
4122 			    __func__, pptr->path, nag);
4123 			break;
4124 		default:
4125 			pmcs_print_entry(pwp, PMCS_PRT_DEBUG, buf, ptr);
4126 			result = -EIO;
4127 			break;
4128 		}
4129 		goto out;
4130 	} else if (srf->srf_frame_type != SMP_FRAME_TYPE_RESPONSE) {
4131 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4132 		    "%s: bad response frame type 0x%x",
4133 		    __func__, srf->srf_frame_type);
4134 		result = -EINVAL;
4135 		goto out;
4136 	} else if (srf->srf_function != SMP_FUNC_DISCOVER) {
4137 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: bad response function 0x%x",
4138 		    __func__, srf->srf_function);
4139 		result = -EINVAL;
4140 		goto out;
4141 	} else if (srf->srf_result != SMP_RES_FUNCTION_ACCEPTED) {
4142 		result = pmcs_smp_function_result(pwp, srf);
4143 		/* Need not fail if PHY is Vacant */
4144 		if (result != SMP_RES_PHY_VACANT) {
4145 			result = -EINVAL;
4146 			goto out;
4147 		}
4148 	}
4149 
4150 	ini_support = (sdr->sdr_attached_sata_host |
4151 	    (sdr->sdr_attached_smp_initiator << 1) |
4152 	    (sdr->sdr_attached_stp_initiator << 2) |
4153 	    (sdr->sdr_attached_ssp_initiator << 3));
4154 
4155 	tgt_support = (sdr->sdr_attached_sata_device |
4156 	    (sdr->sdr_attached_smp_target << 1) |
4157 	    (sdr->sdr_attached_stp_target << 2) |
4158 	    (sdr->sdr_attached_ssp_target << 3));
4159 
4160 	pmcs_wwn2barray(BE_64(sdr->sdr_sas_addr), sas_address);
4161 	pmcs_wwn2barray(BE_64(sdr->sdr_attached_sas_addr), att_sas_address);
4162 
4163 	switch (sdr->sdr_attached_device_type) {
4164 	case SAS_IF_DTYPE_ENDPOINT:
4165 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
4166 		    "exp_content: %s atdt=0x%x lr=%x is=%x ts=%x SAS="
4167 		    SAS_ADDR_FMT " attSAS=" SAS_ADDR_FMT " atPHY=%x",
4168 		    pptr->path,
4169 		    sdr->sdr_attached_device_type,
4170 		    sdr->sdr_negotiated_logical_link_rate,
4171 		    ini_support,
4172 		    tgt_support,
4173 		    SAS_ADDR_PRT(sas_address),
4174 		    SAS_ADDR_PRT(att_sas_address),
4175 		    sdr->sdr_attached_phy_identifier);
4176 
4177 		if (sdr->sdr_attached_sata_device ||
4178 		    sdr->sdr_attached_stp_target) {
4179 			pptr->dtype = SATA;
4180 		} else if (sdr->sdr_attached_ssp_target) {
4181 			pptr->dtype = SAS;
4182 		} else if (tgt_support || ini_support) {
4183 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s: %s has "
4184 			    "tgt support=%x init support=(%x)",
4185 			    __func__, pptr->path, tgt_support, ini_support);
4186 		}
4187 		break;
4188 	case SAS_IF_DTYPE_EDGE:
4189 	case SAS_IF_DTYPE_FANOUT:
4190 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
4191 		    "exp_content: %s atdt=0x%x lr=%x is=%x ts=%x SAS="
4192 		    SAS_ADDR_FMT " attSAS=" SAS_ADDR_FMT " atPHY=%x",
4193 		    pptr->path,
4194 		    sdr->sdr_attached_device_type,
4195 		    sdr->sdr_negotiated_logical_link_rate,
4196 		    ini_support,
4197 		    tgt_support,
4198 		    SAS_ADDR_PRT(sas_address),
4199 		    SAS_ADDR_PRT(att_sas_address),
4200 		    sdr->sdr_attached_phy_identifier);
4201 		if (sdr->sdr_attached_smp_target) {
4202 			/*
4203 			 * Avoid configuring phys that just point back
4204 			 * at a parent phy
4205 			 */
4206 			if (expander->parent &&
4207 			    memcmp(expander->parent->sas_address,
4208 			    att_sas_address,
4209 			    sizeof (expander->parent->sas_address)) == 0) {
4210 				pmcs_prt(pwp, PMCS_PRT_DEBUG3,
4211 				    "%s: skipping port back to parent "
4212 				    "expander (%s)", __func__, pptr->path);
4213 				pptr->dtype = NOTHING;
4214 				break;
4215 			}
4216 			pptr->dtype = EXPANDER;
4217 
4218 		} else if (tgt_support || ini_support) {
4219 			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, "%s has "
4220 			    "tgt support=%x init support=(%x)",
4221 			    pptr->path, tgt_support, ini_support);
4222 			pptr->dtype = EXPANDER;
4223 		}
4224 		break;
4225 	default:
4226 		pptr->dtype = NOTHING;
4227 		break;
4228 	}
4229 	if (pptr->dtype != NOTHING) {
4230 		pmcs_phy_t *ctmp;
4231 
4232 		/*
4233 		 * If the attached device is a SATA device and the expander
4234 		 * is (possibly) a SAS2 compliant expander, check for whether
4235 		 * there is a NAA=5 WWN field starting at this offset and
4236 		 * use that for the SAS Address for this device.
4237 		 */
4238 		if (expander->tolerates_sas2 && pptr->dtype == SATA &&
4239 		    (roff[SAS_ATTACHED_NAME_OFFSET] >> 8) == 0x5) {
4240 			(void) memcpy(pptr->sas_address,
4241 			    &roff[SAS_ATTACHED_NAME_OFFSET], 8);
4242 		} else {
4243 			(void) memcpy(pptr->sas_address, att_sas_address, 8);
4244 		}
4245 		pptr->atdt = (sdr->sdr_attached_device_type);
4246 		/*
4247 		 * Now run up from the expander's parent up to the top to
4248 		 * make sure we only use the least common link_rate.
4249 		 */
4250 		for (ctmp = expander->parent; ctmp; ctmp = ctmp->parent) {
4251 			if (ctmp->link_rate <
4252 			    sdr->sdr_negotiated_logical_link_rate) {
4253 				pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
4254 				    "%s: derating link rate from %x to %x due "
4255 				    "to %s being slower", pptr->path,
4256 				    sdr->sdr_negotiated_logical_link_rate,
4257 				    ctmp->link_rate,
4258 				    ctmp->path);
4259 				sdr->sdr_negotiated_logical_link_rate =
4260 				    ctmp->link_rate;
4261 			}
4262 		}
4263 		pptr->link_rate = sdr->sdr_negotiated_logical_link_rate;
4264 		pptr->state.prog_min_rate = sdr->sdr_prog_min_phys_link_rate;
4265 		pptr->state.hw_min_rate = sdr->sdr_hw_min_phys_link_rate;
4266 		pptr->state.prog_max_rate = sdr->sdr_prog_max_phys_link_rate;
4267 		pptr->state.hw_max_rate = sdr->sdr_hw_max_phys_link_rate;
4268 		PHY_CHANGED(pwp, pptr);
4269 	} else {
4270 		pmcs_clear_phy(pwp, pptr);
4271 	}
4272 	result = 1;
4273 out:
4274 	return (result);
4275 }
4276 
4277 /*
4278  * Get a work structure and assign it a tag with type and serial number
4279  * If a structure is returned, it is returned locked.
4280  */
4281 pmcwork_t *
4282 pmcs_gwork(pmcs_hw_t *pwp, uint32_t tag_type, pmcs_phy_t *phyp)
4283 {
4284 	pmcwork_t *p;
4285 	uint16_t snum;
4286 	uint32_t off;
4287 
4288 	mutex_enter(&pwp->wfree_lock);
4289 	p = STAILQ_FIRST(&pwp->wf);
4290 	if (p == NULL) {
4291 		/*
4292 		 * If we couldn't get a work structure, it's time to bite
4293 		 * the bullet, grab the pfree_lock and copy over all the
4294 		 * work structures from the pending free list to the actual
4295 		 * free list.  This shouldn't happen all that often.
4296 		 */
4297 		mutex_enter(&pwp->pfree_lock);
4298 		pwp->wf.stqh_first = pwp->pf.stqh_first;
4299 		pwp->wf.stqh_last = pwp->pf.stqh_last;
4300 		STAILQ_INIT(&pwp->pf);
4301 		mutex_exit(&pwp->pfree_lock);
4302 
4303 		p = STAILQ_FIRST(&pwp->wf);
4304 		if (p == NULL) {
4305 			mutex_exit(&pwp->wfree_lock);
4306 			return (NULL);
4307 		}
4308 	}
4309 	STAILQ_REMOVE(&pwp->wf, p, pmcwork, next);
4310 	snum = pwp->wserno++;
4311 	mutex_exit(&pwp->wfree_lock);
4312 
4313 	off = p - pwp->work;
4314 
4315 	mutex_enter(&p->lock);
4316 	ASSERT(p->state == PMCS_WORK_STATE_NIL);
4317 	ASSERT(p->htag == PMCS_TAG_FREE);
4318 	p->htag = (tag_type << PMCS_TAG_TYPE_SHIFT) & PMCS_TAG_TYPE_MASK;
4319 	p->htag |= ((snum << PMCS_TAG_SERNO_SHIFT) & PMCS_TAG_SERNO_MASK);
4320 	p->htag |= ((off << PMCS_TAG_INDEX_SHIFT) & PMCS_TAG_INDEX_MASK);
4321 	p->start = gethrtime();
4322 	p->state = PMCS_WORK_STATE_READY;
4323 	p->ssp_event = 0;
4324 	p->dead = 0;
4325 
4326 	if (phyp) {
4327 		p->phy = phyp;
4328 		pmcs_inc_phy_ref_count(phyp);
4329 	}
4330 
4331 	return (p);
4332 }
4333 
4334 /*
4335  * Called with pwrk lock held.  Returned with lock released.
4336  */
4337 void
4338 pmcs_pwork(pmcs_hw_t *pwp, pmcwork_t *p)
4339 {
4340 	ASSERT(p != NULL);
4341 	ASSERT(mutex_owned(&p->lock));
4342 
4343 	p->last_ptr = p->ptr;
4344 	p->last_arg = p->arg;
4345 	p->last_phy = p->phy;
4346 	p->last_xp = p->xp;
4347 	p->last_htag = p->htag;
4348 	p->last_state = p->state;
4349 	p->finish = gethrtime();
4350 
4351 	if (p->phy) {
4352 		pmcs_dec_phy_ref_count(p->phy);
4353 	}
4354 
4355 	p->state = PMCS_WORK_STATE_NIL;
4356 	p->htag = PMCS_TAG_FREE;
4357 	p->xp = NULL;
4358 	p->ptr = NULL;
4359 	p->arg = NULL;
4360 	p->phy = NULL;
4361 	p->timer = 0;
4362 	mutex_exit(&p->lock);
4363 
4364 	if (mutex_tryenter(&pwp->wfree_lock) == 0) {
4365 		mutex_enter(&pwp->pfree_lock);
4366 		STAILQ_INSERT_TAIL(&pwp->pf, p, next);
4367 		mutex_exit(&pwp->pfree_lock);
4368 	} else {
4369 		STAILQ_INSERT_TAIL(&pwp->wf, p, next);
4370 		mutex_exit(&pwp->wfree_lock);
4371 	}
4372 }
4373 
4374 /*
4375  * Find a work structure based upon a tag and make sure that the tag
4376  * serial number matches the work structure we've found.
4377  * If a structure is found, its lock is held upon return.
4378  */
4379 pmcwork_t *
4380 pmcs_tag2wp(pmcs_hw_t *pwp, uint32_t htag)
4381 {
4382 	pmcwork_t *p;
4383 	uint32_t idx = PMCS_TAG_INDEX(htag);
4384 
4385 	p = &pwp->work[idx];
4386 
4387 	mutex_enter(&p->lock);
4388 	if (p->htag == htag) {
4389 		return (p);
4390 	}
4391 	mutex_exit(&p->lock);
4392 	pmcs_prt(pwp, PMCS_PRT_DEBUG2, "INDEX 0x%x HTAG 0x%x got p->htag 0x%x",
4393 	    idx, htag, p->htag);
4394 	return (NULL);
4395 }
4396 
4397 /*
4398  * Issue an abort for a command or for all commands.
4399  *
4400  * Since this can be called from interrupt context,
4401  * we don't wait for completion if wait is not set.
4402  *
4403  * Called with PHY lock held.
4404  */
4405 int
4406 pmcs_abort(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint32_t tag, int all_cmds,
4407     int wait)
4408 {
4409 	pmcwork_t *pwrk;
4410 	pmcs_xscsi_t *tgt;
4411 	uint32_t msg[PMCS_MSG_SIZE], *ptr;
4412 	int result, abt_type;
4413 	uint32_t abt_htag, status;
4414 
4415 	if (pptr->abort_all_start) {
4416 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: ABORT_ALL for (%s) already"
4417 		    " in progress.", __func__, pptr->path);
4418 		return (EBUSY);
4419 	}
4420 
4421 	switch (pptr->dtype) {
4422 	case SAS:
4423 		abt_type = PMCIN_SSP_ABORT;
4424 		break;
4425 	case SATA:
4426 		abt_type = PMCIN_SATA_ABORT;
4427 		break;
4428 	case EXPANDER:
4429 		abt_type = PMCIN_SMP_ABORT;
4430 		break;
4431 	default:
4432 		return (0);
4433 	}
4434 
4435 	pwrk = pmcs_gwork(pwp, wait ? PMCS_TAG_TYPE_WAIT : PMCS_TAG_TYPE_NONE,
4436 	    pptr);
4437 
4438 	if (pwrk == NULL) {
4439 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
4440 		return (ENOMEM);
4441 	}
4442 
4443 	pwrk->dtype = pptr->dtype;
4444 	if (wait) {
4445 		pwrk->arg = msg;
4446 	}
4447 	if (pptr->valid_device_id == 0) {
4448 		pmcs_pwork(pwp, pwrk);
4449 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Invalid DeviceID", __func__);
4450 		return (ENODEV);
4451 	}
4452 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, abt_type));
4453 	msg[1] = LE_32(pwrk->htag);
4454 	msg[2] = LE_32(pptr->device_id);
4455 	if (all_cmds) {
4456 		msg[3] = 0;
4457 		msg[4] = LE_32(1);
4458 		pwrk->ptr = NULL;
4459 		pptr->abort_all_start = gethrtime();
4460 	} else {
4461 		msg[3] = LE_32(tag);
4462 		msg[4] = 0;
4463 		pwrk->ptr = &tag;
4464 	}
4465 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4466 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4467 	if (ptr == NULL) {
4468 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4469 		pmcs_pwork(pwp, pwrk);
4470 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
4471 		return (ENOMEM);
4472 	}
4473 
4474 	COPY_MESSAGE(ptr, msg, 5);
4475 	if (all_cmds) {
4476 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4477 		    "%s: aborting all commands for %s device %s. (htag=0x%x)",
4478 		    __func__, pmcs_get_typename(pptr->dtype), pptr->path,
4479 		    msg[1]);
4480 	} else {
4481 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4482 		    "%s: aborting tag 0x%x for %s device %s. (htag=0x%x)",
4483 		    __func__, tag, pmcs_get_typename(pptr->dtype), pptr->path,
4484 		    msg[1]);
4485 	}
4486 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
4487 
4488 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4489 	if (!wait) {
4490 		mutex_exit(&pwrk->lock);
4491 		return (0);
4492 	}
4493 
4494 	abt_htag = pwrk->htag;
4495 	pmcs_unlock_phy(pwrk->phy);
4496 	WAIT_FOR(pwrk, 1000, result);
4497 	pmcs_lock_phy(pwrk->phy);
4498 
4499 	tgt = pwrk->xp;
4500 	pmcs_pwork(pwp, pwrk);
4501 
4502 	if (tgt != NULL) {
4503 		mutex_enter(&tgt->aqlock);
4504 		if (!STAILQ_EMPTY(&tgt->aq)) {
4505 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
4506 			    "%s: Abort complete (result=0x%x), but "
4507 			    "aq not empty (tgt 0x%p), waiting",
4508 			    __func__, result, (void *)tgt);
4509 			cv_wait(&tgt->abort_cv, &tgt->aqlock);
4510 		}
4511 		mutex_exit(&tgt->aqlock);
4512 	}
4513 
4514 	if (all_cmds) {
4515 		pptr->abort_all_start = 0;
4516 		cv_signal(&pptr->abort_all_cv);
4517 	}
4518 
4519 	if (result) {
4520 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4521 		    "%s: Abort (htag 0x%08x) request timed out",
4522 		    __func__, abt_htag);
4523 		if (tgt != NULL) {
4524 			mutex_enter(&tgt->statlock);
4525 			if ((tgt->dev_state != PMCS_DEVICE_STATE_IN_RECOVERY) &&
4526 			    (tgt->dev_state !=
4527 			    PMCS_DEVICE_STATE_NON_OPERATIONAL)) {
4528 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
4529 				    "%s: Trying DS error recovery for tgt 0x%p",
4530 				    __func__, (void *)tgt);
4531 				(void) pmcs_send_err_recovery_cmd(pwp,
4532 				    PMCS_DEVICE_STATE_IN_RECOVERY, tgt);
4533 			}
4534 			mutex_exit(&tgt->statlock);
4535 		}
4536 		return (ETIMEDOUT);
4537 	}
4538 
4539 	status = LE_32(msg[2]);
4540 	if (status != PMCOUT_STATUS_OK) {
4541 		/*
4542 		 * The only non-success status are IO_NOT_VALID &
4543 		 * IO_ABORT_IN_PROGRESS.
4544 		 * In case of IO_ABORT_IN_PROGRESS, the other ABORT cmd's
4545 		 * status is of concern and this duplicate cmd status can
4546 		 * be ignored.
4547 		 * If IO_NOT_VALID, that's not an error per-se.
4548 		 * For abort of single I/O complete the command anyway.
4549 		 * If, however, we were aborting all, that is a problem
4550 		 * as IO_NOT_VALID really means that the IO or device is
4551 		 * not there. So, discovery process will take of the cleanup.
4552 		 */
4553 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: abort result 0x%x",
4554 		    __func__, LE_32(msg[2]));
4555 		if (all_cmds) {
4556 			PHY_CHANGED(pwp, pptr);
4557 			RESTART_DISCOVERY(pwp);
4558 		} else {
4559 			return (EINVAL);
4560 		}
4561 
4562 		return (0);
4563 	}
4564 
4565 	if (tgt != NULL) {
4566 		mutex_enter(&tgt->statlock);
4567 		if (tgt->dev_state == PMCS_DEVICE_STATE_IN_RECOVERY) {
4568 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
4569 			    "%s: Restoring OPERATIONAL dev_state for tgt 0x%p",
4570 			    __func__, (void *)tgt);
4571 			(void) pmcs_send_err_recovery_cmd(pwp,
4572 			    PMCS_DEVICE_STATE_OPERATIONAL, tgt);
4573 		}
4574 		mutex_exit(&tgt->statlock);
4575 	}
4576 
4577 	return (0);
4578 }
4579 
4580 /*
4581  * Issue a task management function to an SSP device.
4582  *
4583  * Called with PHY lock held.
4584  * statlock CANNOT be held upon entry.
4585  */
4586 int
4587 pmcs_ssp_tmf(pmcs_hw_t *pwp, pmcs_phy_t *pptr, uint8_t tmf, uint32_t tag,
4588     uint64_t lun, uint32_t *response)
4589 {
4590 	int result, ds;
4591 	uint8_t local[PMCS_QENTRY_SIZE << 1], *xd;
4592 	sas_ssp_rsp_iu_t *rptr = (void *)local;
4593 	static const uint8_t ssp_rsp_evec[] = {
4594 		0x58, 0x61, 0x56, 0x72, 0x00
4595 	};
4596 	uint32_t msg[PMCS_MSG_SIZE], *ptr, status;
4597 	struct pmcwork *pwrk;
4598 	pmcs_xscsi_t *xp;
4599 
4600 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
4601 	if (pwrk == NULL) {
4602 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
4603 		return (ENOMEM);
4604 	}
4605 	/*
4606 	 * NB: We use the PMCS_OQ_GENERAL outbound queue
4607 	 * NB: so as to not get entangled in normal I/O
4608 	 * NB: processing.
4609 	 */
4610 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
4611 	    PMCIN_SSP_INI_TM_START));
4612 	msg[1] = LE_32(pwrk->htag);
4613 	msg[2] = LE_32(pptr->device_id);
4614 	if (tmf == SAS_ABORT_TASK || tmf == SAS_QUERY_TASK) {
4615 		msg[3] = LE_32(tag);
4616 	} else {
4617 		msg[3] = 0;
4618 	}
4619 	msg[4] = LE_32(tmf);
4620 	msg[5] = BE_32((uint32_t)lun);
4621 	msg[6] = BE_32((uint32_t)(lun >> 32));
4622 	msg[7] = LE_32(PMCIN_MESSAGE_REPORT);
4623 
4624 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4625 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4626 	if (ptr == NULL) {
4627 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4628 		pmcs_pwork(pwp, pwrk);
4629 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
4630 		return (ENOMEM);
4631 	}
4632 	COPY_MESSAGE(ptr, msg, 7);
4633 	pwrk->arg = msg;
4634 	pwrk->dtype = pptr->dtype;
4635 
4636 	xp = pptr->target;
4637 	if (xp != NULL) {
4638 		mutex_enter(&xp->statlock);
4639 		if (xp->dev_state == PMCS_DEVICE_STATE_NON_OPERATIONAL) {
4640 			mutex_exit(&xp->statlock);
4641 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4642 			pmcs_pwork(pwp, pwrk);
4643 			pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Not sending '%s'"
4644 			    " because DS is '%s'", __func__, pmcs_tmf2str(tmf),
4645 			    pmcs_status_str
4646 			    (PMCOUT_STATUS_IO_DS_NON_OPERATIONAL));
4647 			return (EIO);
4648 		}
4649 		mutex_exit(&xp->statlock);
4650 	}
4651 
4652 	pmcs_prt(pwp, PMCS_PRT_DEBUG,
4653 	    "%s: sending '%s' to %s (lun %llu) tag 0x%x", __func__,
4654 	    pmcs_tmf2str(tmf), pptr->path, (unsigned long long) lun, tag);
4655 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
4656 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4657 
4658 	pmcs_unlock_phy(pptr);
4659 	/*
4660 	 * This is a command sent to the target device, so it can take
4661 	 * significant amount of time to complete when path & device is busy.
4662 	 * Set a timeout to 20 seconds
4663 	 */
4664 	WAIT_FOR(pwrk, 20000, result);
4665 	pmcs_lock_phy(pptr);
4666 	pmcs_pwork(pwp, pwrk);
4667 
4668 	if (result) {
4669 		if (xp == NULL) {
4670 			return (ETIMEDOUT);
4671 		}
4672 
4673 		mutex_enter(&xp->statlock);
4674 		pmcs_start_dev_state_recovery(xp, pptr);
4675 		mutex_exit(&xp->statlock);
4676 		return (ETIMEDOUT);
4677 	}
4678 
4679 	status = LE_32(msg[2]);
4680 	if (status != PMCOUT_STATUS_OK) {
4681 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4682 		    "%s: status %s for TMF %s action to %s, lun %llu",
4683 		    __func__, pmcs_status_str(status),  pmcs_tmf2str(tmf),
4684 		    pptr->path, (unsigned long long) lun);
4685 		if ((status == PMCOUT_STATUS_IO_DS_NON_OPERATIONAL) ||
4686 		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_BREAK) ||
4687 		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS)) {
4688 			ds = PMCS_DEVICE_STATE_NON_OPERATIONAL;
4689 		} else if (status == PMCOUT_STATUS_IO_DS_IN_RECOVERY) {
4690 			/*
4691 			 * If the status is IN_RECOVERY, it's an indication
4692 			 * that it's now time for us to request to have the
4693 			 * device state set to OPERATIONAL since we're the ones
4694 			 * that requested recovery to begin with.
4695 			 */
4696 			ds = PMCS_DEVICE_STATE_OPERATIONAL;
4697 		} else {
4698 			ds = PMCS_DEVICE_STATE_IN_RECOVERY;
4699 		}
4700 		if (xp != NULL) {
4701 			mutex_enter(&xp->statlock);
4702 			if (xp->dev_state != ds) {
4703 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
4704 				    "%s: Sending err recovery cmd"
4705 				    " for tgt 0x%p (status = %s)",
4706 				    __func__, (void *)xp,
4707 				    pmcs_status_str(status));
4708 				(void) pmcs_send_err_recovery_cmd(pwp, ds, xp);
4709 			}
4710 			mutex_exit(&xp->statlock);
4711 		}
4712 		return (EIO);
4713 	} else {
4714 		ds = PMCS_DEVICE_STATE_OPERATIONAL;
4715 		if (xp != NULL) {
4716 			mutex_enter(&xp->statlock);
4717 			if (xp->dev_state != ds) {
4718 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
4719 				    "%s: Sending err recovery cmd"
4720 				    " for tgt 0x%p (status = %s)",
4721 				    __func__, (void *)xp,
4722 				    pmcs_status_str(status));
4723 				(void) pmcs_send_err_recovery_cmd(pwp, ds, xp);
4724 			}
4725 			mutex_exit(&xp->statlock);
4726 		}
4727 	}
4728 	if (LE_32(msg[3]) == 0) {
4729 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "TMF completed with no response");
4730 		return (EIO);
4731 	}
4732 	pmcs_endian_transform(pwp, local, &msg[5], ssp_rsp_evec);
4733 	xd = (uint8_t *)(&msg[5]);
4734 	xd += SAS_RSP_HDR_SIZE;
4735 	if (rptr->datapres != SAS_RSP_DATAPRES_RESPONSE_DATA) {
4736 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4737 		    "%s: TMF response not RESPONSE DATA (0x%x)",
4738 		    __func__, rptr->datapres);
4739 		return (EIO);
4740 	}
4741 	if (rptr->response_data_length != 4) {
4742 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG,
4743 		    "Bad SAS RESPONSE DATA LENGTH", msg);
4744 		return (EIO);
4745 	}
4746 	(void) memcpy(&status, xd, sizeof (uint32_t));
4747 	status = BE_32(status);
4748 	if (response != NULL)
4749 		*response = status;
4750 	/*
4751 	 * The status is actually in the low-order byte.  The upper three
4752 	 * bytes contain additional information for the TMFs that support them.
4753 	 * However, at this time we do not issue any of those.  In the other
4754 	 * cases, the upper three bytes are supposed to be 0, but it appears
4755 	 * they aren't always.  Just mask them off.
4756 	 */
4757 	switch (status & 0xff) {
4758 	case SAS_RSP_TMF_COMPLETE:
4759 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: TMF complete", __func__);
4760 		result = 0;
4761 		break;
4762 	case SAS_RSP_TMF_SUCCEEDED:
4763 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: TMF succeeded", __func__);
4764 		result = 0;
4765 		break;
4766 	case SAS_RSP_INVALID_FRAME:
4767 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4768 		    "%s: TMF returned INVALID FRAME", __func__);
4769 		result = EIO;
4770 		break;
4771 	case SAS_RSP_TMF_NOT_SUPPORTED:
4772 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4773 		    "%s: TMF returned TMF NOT SUPPORTED", __func__);
4774 		result = EIO;
4775 		break;
4776 	case SAS_RSP_TMF_FAILED:
4777 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4778 		    "%s: TMF returned TMF FAILED", __func__);
4779 		result = EIO;
4780 		break;
4781 	case SAS_RSP_TMF_INCORRECT_LUN:
4782 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4783 		    "%s: TMF returned INCORRECT LUN", __func__);
4784 		result = EIO;
4785 		break;
4786 	case SAS_RSP_OVERLAPPED_OIPTTA:
4787 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4788 		    "%s: TMF returned OVERLAPPED INITIATOR PORT TRANSFER TAG "
4789 		    "ATTEMPTED", __func__);
4790 		result = EIO;
4791 		break;
4792 	default:
4793 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
4794 		    "%s: TMF returned unknown code 0x%x", __func__, status);
4795 		result = EIO;
4796 		break;
4797 	}
4798 	return (result);
4799 }
4800 
4801 /*
4802  * Called with PHY lock held and scratch acquired
4803  */
4804 int
4805 pmcs_sata_abort_ncq(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
4806 {
4807 	const char *utag_fail_fmt = "%s: untagged NCQ command failure";
4808 	const char *tag_fail_fmt = "%s: NCQ command failure (tag 0x%x)";
4809 	uint32_t msg[PMCS_QENTRY_SIZE], *ptr, result, status;
4810 	uint8_t *fp = pwp->scratch, ds;
4811 	fis_t fis;
4812 	pmcwork_t *pwrk;
4813 	pmcs_xscsi_t *tgt;
4814 
4815 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr);
4816 	if (pwrk == NULL) {
4817 		return (ENOMEM);
4818 	}
4819 	msg[0] = LE_32(PMCS_IOMB_IN_SAS(PMCS_OQ_IODONE,
4820 	    PMCIN_SATA_HOST_IO_START));
4821 	msg[1] = LE_32(pwrk->htag);
4822 	msg[2] = LE_32(pptr->device_id);
4823 	msg[3] = LE_32(512);
4824 	msg[4] = LE_32(SATA_PROTOCOL_PIO | PMCIN_DATADIR_2_INI);
4825 	msg[5] = LE_32((READ_LOG_EXT << 16) | (C_BIT << 8) | FIS_REG_H2DEV);
4826 	msg[6] = LE_32(0x10);
4827 	msg[8] = LE_32(1);
4828 	msg[9] = 0;
4829 	msg[10] = 0;
4830 	msg[11] = 0;
4831 	msg[12] = LE_32(DWORD0(pwp->scratch_dma));
4832 	msg[13] = LE_32(DWORD1(pwp->scratch_dma));
4833 	msg[14] = LE_32(512);
4834 	msg[15] = 0;
4835 
4836 	pwrk->arg = msg;
4837 	pwrk->dtype = pptr->dtype;
4838 
4839 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4840 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4841 	if (ptr == NULL) {
4842 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
4843 		pmcs_pwork(pwp, pwrk);
4844 		return (ENOMEM);
4845 	}
4846 	COPY_MESSAGE(ptr, msg, PMCS_QENTRY_SIZE);
4847 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
4848 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
4849 
4850 	pmcs_unlock_phy(pptr);
4851 	WAIT_FOR(pwrk, 250, result);
4852 	pmcs_lock_phy(pptr);
4853 	pmcs_pwork(pwp, pwrk);
4854 
4855 	if (result) {
4856 		pmcs_prt(pwp, PMCS_PRT_INFO, pmcs_timeo, __func__);
4857 		return (EIO);
4858 	}
4859 	status = LE_32(msg[2]);
4860 	if (status != PMCOUT_STATUS_OK || LE_32(msg[3])) {
4861 		tgt = pptr->target;
4862 		if (tgt == NULL) {
4863 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
4864 			    "%s: cannot find target for phy 0x%p for "
4865 			    "dev state recovery", __func__, (void *)pptr);
4866 			return (EIO);
4867 		}
4868 
4869 		mutex_enter(&tgt->statlock);
4870 
4871 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG, "READ LOG EXT", msg);
4872 		if ((status == PMCOUT_STATUS_IO_DS_NON_OPERATIONAL) ||
4873 		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_BREAK) ||
4874 		    (status == PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS)) {
4875 			ds = PMCS_DEVICE_STATE_NON_OPERATIONAL;
4876 		} else {
4877 			ds = PMCS_DEVICE_STATE_IN_RECOVERY;
4878 		}
4879 		if (tgt->dev_state != ds) {
4880 			pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Trying SATA DS Error"
4881 			    " Recovery for tgt(0x%p) for status(%s)",
4882 			    __func__, (void *)tgt, pmcs_status_str(status));
4883 			(void) pmcs_send_err_recovery_cmd(pwp, ds, tgt);
4884 		}
4885 
4886 		mutex_exit(&tgt->statlock);
4887 		return (EIO);
4888 	}
4889 	fis[0] = (fp[4] << 24) | (fp[3] << 16) | (fp[2] << 8) | FIS_REG_D2H;
4890 	fis[1] = (fp[8] << 24) | (fp[7] << 16) | (fp[6] << 8) | fp[5];
4891 	fis[2] = (fp[12] << 24) | (fp[11] << 16) | (fp[10] << 8) | fp[9];
4892 	fis[3] = (fp[16] << 24) | (fp[15] << 16) | (fp[14] << 8) | fp[13];
4893 	fis[4] = 0;
4894 	if (fp[0] & 0x80) {
4895 		pmcs_prt(pwp, PMCS_PRT_DEBUG, utag_fail_fmt, __func__);
4896 	} else {
4897 		pmcs_prt(pwp, PMCS_PRT_DEBUG, tag_fail_fmt, __func__,
4898 		    fp[0] & 0x1f);
4899 	}
4900 	pmcs_fis_dump(pwp, fis);
4901 	pptr->need_rl_ext = 0;
4902 	return (0);
4903 }
4904 
4905 /*
4906  * Transform a structure from CPU to Device endian format, or
4907  * vice versa, based upon a transformation vector.
4908  *
4909  * A transformation vector is an array of bytes, each byte
4910  * of which is defined thusly:
4911  *
4912  *  bit 7: from CPU to desired endian, otherwise from desired endian
4913  *	   to CPU format
4914  *  bit 6: Big Endian, else Little Endian
4915  *  bits 5-4:
4916  *       00 Undefined
4917  *       01 One Byte quantities
4918  *       02 Two Byte quantities
4919  *       03 Four Byte quantities
4920  *
4921  *  bits 3-0:
4922  *       00 Undefined
4923  *       Number of quantities to transform
4924  *
4925  * The vector is terminated by a 0 value.
4926  */
4927 
4928 void
4929 pmcs_endian_transform(pmcs_hw_t *pwp, void *orig_out, void *orig_in,
4930     const uint8_t *xfvec)
4931 {
4932 	uint8_t c, *out = orig_out, *in = orig_in;
4933 
4934 	if (xfvec == NULL) {
4935 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: null xfvec", __func__);
4936 		return;
4937 	}
4938 	if (out == NULL) {
4939 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: null out", __func__);
4940 		return;
4941 	}
4942 	if (in == NULL) {
4943 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: null in", __func__);
4944 		return;
4945 	}
4946 	while ((c = *xfvec++) != 0) {
4947 		int nbyt = (c & 0xf);
4948 		int size = (c >> 4) & 0x3;
4949 		int bige = (c >> 4) & 0x4;
4950 
4951 		switch (size) {
4952 		case 1:
4953 		{
4954 			while (nbyt-- > 0) {
4955 				*out++ = *in++;
4956 			}
4957 			break;
4958 		}
4959 		case 2:
4960 		{
4961 			uint16_t tmp;
4962 			while (nbyt-- > 0) {
4963 				(void) memcpy(&tmp, in, sizeof (uint16_t));
4964 				if (bige) {
4965 					tmp = BE_16(tmp);
4966 				} else {
4967 					tmp = LE_16(tmp);
4968 				}
4969 				(void) memcpy(out, &tmp, sizeof (uint16_t));
4970 				out += sizeof (uint16_t);
4971 				in += sizeof (uint16_t);
4972 			}
4973 			break;
4974 		}
4975 		case 3:
4976 		{
4977 			uint32_t tmp;
4978 			while (nbyt-- > 0) {
4979 				(void) memcpy(&tmp, in, sizeof (uint32_t));
4980 				if (bige) {
4981 					tmp = BE_32(tmp);
4982 				} else {
4983 					tmp = LE_32(tmp);
4984 				}
4985 				(void) memcpy(out, &tmp, sizeof (uint32_t));
4986 				out += sizeof (uint32_t);
4987 				in += sizeof (uint32_t);
4988 			}
4989 			break;
4990 		}
4991 		default:
4992 			pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: bad size", __func__);
4993 			return;
4994 		}
4995 	}
4996 }
4997 
4998 const char *
4999 pmcs_get_rate(unsigned int linkrt)
5000 {
5001 	const char *rate;
5002 	switch (linkrt) {
5003 	case SAS_LINK_RATE_1_5GBIT:
5004 		rate = "1.5";
5005 		break;
5006 	case SAS_LINK_RATE_3GBIT:
5007 		rate = "3.0";
5008 		break;
5009 	case SAS_LINK_RATE_6GBIT:
5010 		rate = "6.0";
5011 		break;
5012 	default:
5013 		rate = "???";
5014 		break;
5015 	}
5016 	return (rate);
5017 }
5018 
5019 const char *
5020 pmcs_get_typename(pmcs_dtype_t type)
5021 {
5022 	switch (type) {
5023 	case NOTHING:
5024 		return ("NIL");
5025 	case SATA:
5026 		return ("SATA");
5027 	case SAS:
5028 		return ("SSP");
5029 	case EXPANDER:
5030 		return ("EXPANDER");
5031 	}
5032 	return ("????");
5033 }
5034 
5035 const char *
5036 pmcs_tmf2str(int tmf)
5037 {
5038 	switch (tmf) {
5039 	case SAS_ABORT_TASK:
5040 		return ("Abort Task");
5041 	case SAS_ABORT_TASK_SET:
5042 		return ("Abort Task Set");
5043 	case SAS_CLEAR_TASK_SET:
5044 		return ("Clear Task Set");
5045 	case SAS_LOGICAL_UNIT_RESET:
5046 		return ("Logical Unit Reset");
5047 	case SAS_I_T_NEXUS_RESET:
5048 		return ("I_T Nexus Reset");
5049 	case SAS_CLEAR_ACA:
5050 		return ("Clear ACA");
5051 	case SAS_QUERY_TASK:
5052 		return ("Query Task");
5053 	case SAS_QUERY_TASK_SET:
5054 		return ("Query Task Set");
5055 	case SAS_QUERY_UNIT_ATTENTION:
5056 		return ("Query Unit Attention");
5057 	default:
5058 		return ("Unknown");
5059 	}
5060 }
5061 
5062 const char *
5063 pmcs_status_str(uint32_t status)
5064 {
5065 	switch (status) {
5066 	case PMCOUT_STATUS_OK:
5067 		return ("OK");
5068 	case PMCOUT_STATUS_ABORTED:
5069 		return ("ABORTED");
5070 	case PMCOUT_STATUS_OVERFLOW:
5071 		return ("OVERFLOW");
5072 	case PMCOUT_STATUS_UNDERFLOW:
5073 		return ("UNDERFLOW");
5074 	case PMCOUT_STATUS_FAILED:
5075 		return ("FAILED");
5076 	case PMCOUT_STATUS_ABORT_RESET:
5077 		return ("ABORT_RESET");
5078 	case PMCOUT_STATUS_IO_NOT_VALID:
5079 		return ("IO_NOT_VALID");
5080 	case PMCOUT_STATUS_NO_DEVICE:
5081 		return ("NO_DEVICE");
5082 	case PMCOUT_STATUS_ILLEGAL_PARAMETER:
5083 		return ("ILLEGAL_PARAMETER");
5084 	case PMCOUT_STATUS_LINK_FAILURE:
5085 		return ("LINK_FAILURE");
5086 	case PMCOUT_STATUS_PROG_ERROR:
5087 		return ("PROG_ERROR");
5088 	case PMCOUT_STATUS_EDC_IN_ERROR:
5089 		return ("EDC_IN_ERROR");
5090 	case PMCOUT_STATUS_EDC_OUT_ERROR:
5091 		return ("EDC_OUT_ERROR");
5092 	case PMCOUT_STATUS_ERROR_HW_TIMEOUT:
5093 		return ("ERROR_HW_TIMEOUT");
5094 	case PMCOUT_STATUS_XFER_ERR_BREAK:
5095 		return ("XFER_ERR_BREAK");
5096 	case PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY:
5097 		return ("XFER_ERR_PHY_NOT_READY");
5098 	case PMCOUT_STATUS_OPEN_CNX_PROTOCOL_NOT_SUPPORTED:
5099 		return ("OPEN_CNX_PROTOCOL_NOT_SUPPORTED");
5100 	case PMCOUT_STATUS_OPEN_CNX_ERROR_ZONE_VIOLATION:
5101 		return ("OPEN_CNX_ERROR_ZONE_VIOLATION");
5102 	case PMCOUT_STATUS_OPEN_CNX_ERROR_BREAK:
5103 		return ("OPEN_CNX_ERROR_BREAK");
5104 	case PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS:
5105 		return ("OPEN_CNX_ERROR_IT_NEXUS_LOSS");
5106 	case PMCOUT_STATUS_OPENCNX_ERROR_BAD_DESTINATION:
5107 		return ("OPENCNX_ERROR_BAD_DESTINATION");
5108 	case PMCOUT_STATUS_OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED:
5109 		return ("OPEN_CNX_ERROR_CONNECTION_RATE_NOT_SUPPORTED");
5110 	case PMCOUT_STATUS_OPEN_CNX_ERROR_STP_RESOURCES_BUSY:
5111 		return ("OPEN_CNX_ERROR_STP_RESOURCES_BUSY");
5112 	case PMCOUT_STATUS_OPEN_CNX_ERROR_WRONG_DESTINATION:
5113 		return ("OPEN_CNX_ERROR_WRONG_DESTINATION");
5114 	case PMCOUT_STATUS_OPEN_CNX_ERROR_UNKNOWN_EROOR:
5115 		return ("OPEN_CNX_ERROR_UNKNOWN_EROOR");
5116 	case PMCOUT_STATUS_IO_XFER_ERROR_NAK_RECEIVED:
5117 		return ("IO_XFER_ERROR_NAK_RECEIVED");
5118 	case PMCOUT_STATUS_XFER_ERROR_ACK_NAK_TIMEOUT:
5119 		return ("XFER_ERROR_ACK_NAK_TIMEOUT");
5120 	case PMCOUT_STATUS_XFER_ERROR_PEER_ABORTED:
5121 		return ("XFER_ERROR_PEER_ABORTED");
5122 	case PMCOUT_STATUS_XFER_ERROR_RX_FRAME:
5123 		return ("XFER_ERROR_RX_FRAME");
5124 	case PMCOUT_STATUS_IO_XFER_ERROR_DMA:
5125 		return ("IO_XFER_ERROR_DMA");
5126 	case PMCOUT_STATUS_XFER_ERROR_CREDIT_TIMEOUT:
5127 		return ("XFER_ERROR_CREDIT_TIMEOUT");
5128 	case PMCOUT_STATUS_XFER_ERROR_SATA_LINK_TIMEOUT:
5129 		return ("XFER_ERROR_SATA_LINK_TIMEOUT");
5130 	case PMCOUT_STATUS_XFER_ERROR_SATA:
5131 		return ("XFER_ERROR_SATA");
5132 	case PMCOUT_STATUS_XFER_ERROR_REJECTED_NCQ_MODE:
5133 		return ("XFER_ERROR_REJECTED_NCQ_MODE");
5134 	case PMCOUT_STATUS_XFER_ERROR_ABORTED_DUE_TO_SRST:
5135 		return ("XFER_ERROR_ABORTED_DUE_TO_SRST");
5136 	case PMCOUT_STATUS_XFER_ERROR_ABORTED_NCQ_MODE:
5137 		return ("XFER_ERROR_ABORTED_NCQ_MODE");
5138 	case PMCOUT_STATUS_IO_XFER_OPEN_RETRY_TIMEOUT:
5139 		return ("IO_XFER_OPEN_RETRY_TIMEOUT");
5140 	case PMCOUT_STATUS_SMP_RESP_CONNECTION_ERROR:
5141 		return ("SMP_RESP_CONNECTION_ERROR");
5142 	case PMCOUT_STATUS_XFER_ERROR_UNEXPECTED_PHASE:
5143 		return ("XFER_ERROR_UNEXPECTED_PHASE");
5144 	case PMCOUT_STATUS_XFER_ERROR_RDY_OVERRUN:
5145 		return ("XFER_ERROR_RDY_OVERRUN");
5146 	case PMCOUT_STATUS_XFER_ERROR_RDY_NOT_EXPECTED:
5147 		return ("XFER_ERROR_RDY_NOT_EXPECTED");
5148 	case PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT:
5149 		return ("XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT");
5150 	case PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_BREAK_BEFORE_ACK_NACK:
5151 		return ("XFER_ERROR_CMD_ISSUE_BREAK_BEFORE_ACK_NACK");
5152 	case PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_PHY_DOWN_BEFORE_ACK_NAK:
5153 		return ("XFER_ERROR_CMD_ISSUE_PHY_DOWN_BEFORE_ACK_NAK");
5154 	case PMCOUT_STATUS_XFER_ERROR_OFFSET_MISMATCH:
5155 		return ("XFER_ERROR_OFFSET_MISMATCH");
5156 	case PMCOUT_STATUS_XFER_ERROR_ZERO_DATA_LEN:
5157 		return ("XFER_ERROR_ZERO_DATA_LEN");
5158 	case PMCOUT_STATUS_XFER_CMD_FRAME_ISSUED:
5159 		return ("XFER_CMD_FRAME_ISSUED");
5160 	case PMCOUT_STATUS_ERROR_INTERNAL_SMP_RESOURCE:
5161 		return ("ERROR_INTERNAL_SMP_RESOURCE");
5162 	case PMCOUT_STATUS_IO_PORT_IN_RESET:
5163 		return ("IO_PORT_IN_RESET");
5164 	case PMCOUT_STATUS_IO_DS_NON_OPERATIONAL:
5165 		return ("DEVICE STATE NON-OPERATIONAL");
5166 	case PMCOUT_STATUS_IO_DS_IN_RECOVERY:
5167 		return ("DEVICE STATE IN RECOVERY");
5168 	default:
5169 		return (NULL);
5170 	}
5171 }
5172 
5173 uint64_t
5174 pmcs_barray2wwn(uint8_t ba[8])
5175 {
5176 	uint64_t result = 0;
5177 	int i;
5178 
5179 	for (i = 0; i < 8; i++) {
5180 		result <<= 8;
5181 		result |= ba[i];
5182 	}
5183 	return (result);
5184 }
5185 
5186 void
5187 pmcs_wwn2barray(uint64_t wwn, uint8_t ba[8])
5188 {
5189 	int i;
5190 	for (i = 0; i < 8; i++) {
5191 		ba[7 - i] = wwn & 0xff;
5192 		wwn >>= 8;
5193 	}
5194 }
5195 
5196 void
5197 pmcs_report_fwversion(pmcs_hw_t *pwp)
5198 {
5199 	const char *fwsupport;
5200 	switch (PMCS_FW_TYPE(pwp)) {
5201 	case PMCS_FW_TYPE_RELEASED:
5202 		fwsupport = "Released";
5203 		break;
5204 	case PMCS_FW_TYPE_DEVELOPMENT:
5205 		fwsupport = "Development";
5206 		break;
5207 	case PMCS_FW_TYPE_ALPHA:
5208 		fwsupport = "Alpha";
5209 		break;
5210 	case PMCS_FW_TYPE_BETA:
5211 		fwsupport = "Beta";
5212 		break;
5213 	default:
5214 		fwsupport = "Special";
5215 		break;
5216 	}
5217 	pmcs_prt(pwp, PMCS_PRT_INFO,
5218 	    "Chip Revision: %c; F/W Revision %x.%x.%x %s", 'A' + pwp->chiprev,
5219 	    PMCS_FW_MAJOR(pwp), PMCS_FW_MINOR(pwp), PMCS_FW_MICRO(pwp),
5220 	    fwsupport);
5221 }
5222 
5223 void
5224 pmcs_phy_name(pmcs_hw_t *pwp, pmcs_phy_t *pptr, char *obuf, size_t olen)
5225 {
5226 	if (pptr->parent) {
5227 		pmcs_phy_name(pwp, pptr->parent, obuf, olen);
5228 		(void) snprintf(obuf, olen, "%s.%02x", obuf, pptr->phynum);
5229 	} else {
5230 		(void) snprintf(obuf, olen, "pp%02x", pptr->phynum);
5231 	}
5232 }
5233 
5234 /*
5235  * Implementation for pmcs_find_phy_by_devid.
5236  * If the PHY is found, it is returned locked.
5237  */
5238 static pmcs_phy_t *
5239 pmcs_find_phy_by_devid_impl(pmcs_phy_t *phyp, uint32_t device_id)
5240 {
5241 	pmcs_phy_t *match, *cphyp, *nphyp;
5242 
5243 	ASSERT(!mutex_owned(&phyp->phy_lock));
5244 
5245 	while (phyp) {
5246 		pmcs_lock_phy(phyp);
5247 
5248 		if ((phyp->valid_device_id) && (phyp->device_id == device_id)) {
5249 			return (phyp);
5250 		}
5251 		if (phyp->children) {
5252 			cphyp = phyp->children;
5253 			pmcs_unlock_phy(phyp);
5254 			match = pmcs_find_phy_by_devid_impl(cphyp, device_id);
5255 			if (match) {
5256 				ASSERT(mutex_owned(&match->phy_lock));
5257 				return (match);
5258 			}
5259 			pmcs_lock_phy(phyp);
5260 		}
5261 
5262 		if (IS_ROOT_PHY(phyp)) {
5263 			pmcs_unlock_phy(phyp);
5264 			phyp = NULL;
5265 		} else {
5266 			nphyp = phyp->sibling;
5267 			pmcs_unlock_phy(phyp);
5268 			phyp = nphyp;
5269 		}
5270 	}
5271 
5272 	return (NULL);
5273 }
5274 
5275 /*
5276  * If the PHY is found, it is returned locked
5277  */
5278 pmcs_phy_t *
5279 pmcs_find_phy_by_devid(pmcs_hw_t *pwp, uint32_t device_id)
5280 {
5281 	pmcs_phy_t *phyp, *match = NULL;
5282 
5283 	phyp = pwp->root_phys;
5284 
5285 	while (phyp) {
5286 		match = pmcs_find_phy_by_devid_impl(phyp, device_id);
5287 		if (match) {
5288 			ASSERT(mutex_owned(&match->phy_lock));
5289 			return (match);
5290 		}
5291 		phyp = phyp->sibling;
5292 	}
5293 
5294 	return (NULL);
5295 }
5296 
5297 /*
5298  * This function is called as a sanity check to ensure that a newly registered
5299  * PHY doesn't have a device_id that exists with another registered PHY.
5300  */
5301 static boolean_t
5302 pmcs_validate_devid(pmcs_phy_t *parent, pmcs_phy_t *phyp, uint32_t device_id)
5303 {
5304 	pmcs_phy_t *pptr;
5305 	boolean_t rval;
5306 
5307 	pptr = parent;
5308 
5309 	while (pptr) {
5310 		if (pptr->valid_device_id && (pptr != phyp) &&
5311 		    (pptr->device_id == device_id)) {
5312 			pmcs_prt(pptr->pwp, PMCS_PRT_DEBUG,
5313 			    "%s: phy %s already exists as %s with "
5314 			    "device id 0x%x", __func__, phyp->path,
5315 			    pptr->path, device_id);
5316 			return (B_FALSE);
5317 		}
5318 
5319 		if (pptr->children) {
5320 			rval = pmcs_validate_devid(pptr->children, phyp,
5321 			    device_id);
5322 			if (rval == B_FALSE) {
5323 				return (rval);
5324 			}
5325 		}
5326 
5327 		pptr = pptr->sibling;
5328 	}
5329 
5330 	/* This PHY and device_id are valid */
5331 	return (B_TRUE);
5332 }
5333 
5334 /*
5335  * If the PHY is found, it is returned locked
5336  */
5337 static pmcs_phy_t *
5338 pmcs_find_phy_by_wwn_impl(pmcs_phy_t *phyp, uint8_t *wwn)
5339 {
5340 	pmcs_phy_t *matched_phy, *cphyp, *nphyp;
5341 
5342 	ASSERT(!mutex_owned(&phyp->phy_lock));
5343 
5344 	while (phyp) {
5345 		pmcs_lock_phy(phyp);
5346 
5347 		if (phyp->valid_device_id) {
5348 			if (memcmp(phyp->sas_address, wwn, 8) == 0) {
5349 				return (phyp);
5350 			}
5351 		}
5352 
5353 		if (phyp->children) {
5354 			cphyp = phyp->children;
5355 			pmcs_unlock_phy(phyp);
5356 			matched_phy = pmcs_find_phy_by_wwn_impl(cphyp, wwn);
5357 			if (matched_phy) {
5358 				ASSERT(mutex_owned(&matched_phy->phy_lock));
5359 				return (matched_phy);
5360 			}
5361 			pmcs_lock_phy(phyp);
5362 		}
5363 
5364 		/*
5365 		 * Only iterate through non-root PHYs
5366 		 */
5367 		if (IS_ROOT_PHY(phyp)) {
5368 			pmcs_unlock_phy(phyp);
5369 			phyp = NULL;
5370 		} else {
5371 			nphyp = phyp->sibling;
5372 			pmcs_unlock_phy(phyp);
5373 			phyp = nphyp;
5374 		}
5375 	}
5376 
5377 	return (NULL);
5378 }
5379 
5380 pmcs_phy_t *
5381 pmcs_find_phy_by_wwn(pmcs_hw_t *pwp, uint64_t wwn)
5382 {
5383 	uint8_t ebstr[8];
5384 	pmcs_phy_t *pptr, *matched_phy;
5385 
5386 	pmcs_wwn2barray(wwn, ebstr);
5387 
5388 	pptr = pwp->root_phys;
5389 	while (pptr) {
5390 		matched_phy = pmcs_find_phy_by_wwn_impl(pptr, ebstr);
5391 		if (matched_phy) {
5392 			ASSERT(mutex_owned(&matched_phy->phy_lock));
5393 			return (matched_phy);
5394 		}
5395 
5396 		pptr = pptr->sibling;
5397 	}
5398 
5399 	return (NULL);
5400 }
5401 
5402 
5403 /*
5404  * pmcs_find_phy_by_sas_address
5405  *
5406  * Find a PHY that both matches "sas_addr" and is on "iport".
5407  * If a matching PHY is found, it is returned locked.
5408  */
5409 pmcs_phy_t *
5410 pmcs_find_phy_by_sas_address(pmcs_hw_t *pwp, pmcs_iport_t *iport,
5411     pmcs_phy_t *root, char *sas_addr)
5412 {
5413 	int ua_form = 1;
5414 	uint64_t wwn;
5415 	char addr[PMCS_MAX_UA_SIZE];
5416 	pmcs_phy_t *pptr, *pnext, *pchild;
5417 
5418 	if (root == NULL) {
5419 		pptr = pwp->root_phys;
5420 	} else {
5421 		pptr = root;
5422 	}
5423 
5424 	while (pptr) {
5425 		pmcs_lock_phy(pptr);
5426 		/*
5427 		 * If the PHY is dead or does not have a valid device ID,
5428 		 * skip it.
5429 		 */
5430 		if ((pptr->dead) || (!pptr->valid_device_id)) {
5431 			goto next_phy;
5432 		}
5433 
5434 		if (pptr->iport != iport) {
5435 			goto next_phy;
5436 		}
5437 
5438 		wwn = pmcs_barray2wwn(pptr->sas_address);
5439 		(void *) scsi_wwn_to_wwnstr(wwn, ua_form, addr);
5440 		if (strncmp(addr, sas_addr, strlen(addr)) == 0) {
5441 			return (pptr);
5442 		}
5443 
5444 		if (pptr->children) {
5445 			pchild = pptr->children;
5446 			pmcs_unlock_phy(pptr);
5447 			pnext = pmcs_find_phy_by_sas_address(pwp, iport, pchild,
5448 			    sas_addr);
5449 			if (pnext) {
5450 				return (pnext);
5451 			}
5452 			pmcs_lock_phy(pptr);
5453 		}
5454 
5455 next_phy:
5456 		pnext = pptr->sibling;
5457 		pmcs_unlock_phy(pptr);
5458 		pptr = pnext;
5459 	}
5460 
5461 	return (NULL);
5462 }
5463 
5464 void
5465 pmcs_fis_dump(pmcs_hw_t *pwp, fis_t fis)
5466 {
5467 	switch (fis[0] & 0xff) {
5468 	case FIS_REG_H2DEV:
5469 		pmcs_prt(pwp, PMCS_PRT_INFO, "FIS REGISTER HOST TO DEVICE: "
5470 		    "OP=0x%02x Feature=0x%04x Count=0x%04x Device=0x%02x "
5471 		    "LBA=%llu", BYTE2(fis[0]), BYTE3(fis[2]) << 8 |
5472 		    BYTE3(fis[0]), WORD0(fis[3]), BYTE3(fis[1]),
5473 		    (unsigned long long)
5474 		    (((uint64_t)fis[2] & 0x00ffffff) << 24 |
5475 		    ((uint64_t)fis[1] & 0x00ffffff)));
5476 		break;
5477 	case FIS_REG_D2H:
5478 		pmcs_prt(pwp, PMCS_PRT_INFO, "FIS REGISTER DEVICE TO HOST: Stat"
5479 		    "us=0x%02x Error=0x%02x Dev=0x%02x Count=0x%04x LBA=%llu",
5480 		    BYTE2(fis[0]), BYTE3(fis[0]), BYTE3(fis[1]), WORD0(fis[3]),
5481 		    (unsigned long long)(((uint64_t)fis[2] & 0x00ffffff) << 24 |
5482 		    ((uint64_t)fis[1] & 0x00ffffff)));
5483 		break;
5484 	default:
5485 		pmcs_prt(pwp, PMCS_PRT_INFO, "FIS: 0x%08x 0x%08x 0x%08x 0x%08x "
5486 		    "0x%08x 0x%08x 0x%08x",
5487 		    fis[0], fis[1], fis[2], fis[3], fis[4], fis[5], fis[6]);
5488 		break;
5489 	}
5490 }
5491 
5492 void
5493 pmcs_print_entry(pmcs_hw_t *pwp, int level, char *msg, void *arg)
5494 {
5495 	uint32_t *mb = arg;
5496 	size_t i;
5497 
5498 	pmcs_prt(pwp, level, msg);
5499 	for (i = 0; i < (PMCS_QENTRY_SIZE / sizeof (uint32_t)); i += 4) {
5500 		pmcs_prt(pwp, level, "Offset %2lu: 0x%08x 0x%08x 0x%08"
5501 		    "x 0x%08x", i * sizeof (uint32_t), LE_32(mb[i]),
5502 		    LE_32(mb[i+1]), LE_32(mb[i+2]),
5503 		    LE_32(mb[i+3]));
5504 	}
5505 }
5506 
5507 /*
5508  * If phyp == NULL we're being called from the worker thread, in which
5509  * case we need to check all the PHYs.  In this case, the softstate lock
5510  * will be held.
5511  * If phyp is non-NULL, just issue the spinup release for the specified PHY
5512  * (which will already be locked).
5513  */
5514 void
5515 pmcs_spinup_release(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
5516 {
5517 	uint32_t *msg;
5518 	struct pmcwork *pwrk;
5519 	pmcs_phy_t *tphyp;
5520 
5521 	if (phyp != NULL) {
5522 		ASSERT(mutex_owned(&phyp->phy_lock));
5523 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
5524 		    "%s: Issuing spinup release only for PHY %s", __func__,
5525 		    phyp->path);
5526 		mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5527 		msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5528 		if (msg == NULL || (pwrk =
5529 		    pmcs_gwork(pwp, PMCS_TAG_TYPE_NONE, NULL)) == NULL) {
5530 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5531 			SCHEDULE_WORK(pwp, PMCS_WORK_SPINUP_RELEASE);
5532 			return;
5533 		}
5534 
5535 		phyp->spinup_hold = 0;
5536 		bzero(msg, PMCS_QENTRY_SIZE);
5537 		msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
5538 		    PMCIN_LOCAL_PHY_CONTROL));
5539 		msg[1] = LE_32(pwrk->htag);
5540 		msg[2] = LE_32((0x10 << 8) | phyp->phynum);
5541 
5542 		pwrk->dtype = phyp->dtype;
5543 		pwrk->state = PMCS_WORK_STATE_ONCHIP;
5544 		mutex_exit(&pwrk->lock);
5545 		INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5546 		return;
5547 	}
5548 
5549 	ASSERT(mutex_owned(&pwp->lock));
5550 
5551 	tphyp = pwp->root_phys;
5552 	while (tphyp) {
5553 		pmcs_lock_phy(tphyp);
5554 		if (tphyp->spinup_hold == 0) {
5555 			pmcs_unlock_phy(tphyp);
5556 			tphyp = tphyp->sibling;
5557 			continue;
5558 		}
5559 
5560 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
5561 		    "%s: Issuing spinup release for PHY %s", __func__,
5562 		    phyp->path);
5563 
5564 		mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5565 		msg = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5566 		if (msg == NULL || (pwrk =
5567 		    pmcs_gwork(pwp, PMCS_TAG_TYPE_NONE, NULL)) == NULL) {
5568 			pmcs_unlock_phy(tphyp);
5569 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5570 			SCHEDULE_WORK(pwp, PMCS_WORK_SPINUP_RELEASE);
5571 			break;
5572 		}
5573 
5574 		tphyp->spinup_hold = 0;
5575 		bzero(msg, PMCS_QENTRY_SIZE);
5576 		msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
5577 		    PMCIN_LOCAL_PHY_CONTROL));
5578 		msg[1] = LE_32(pwrk->htag);
5579 		msg[2] = LE_32((0x10 << 8) | tphyp->phynum);
5580 
5581 		pwrk->dtype = phyp->dtype;
5582 		pwrk->state = PMCS_WORK_STATE_ONCHIP;
5583 		mutex_exit(&pwrk->lock);
5584 		INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5585 		pmcs_unlock_phy(tphyp);
5586 
5587 		tphyp = tphyp->sibling;
5588 	}
5589 }
5590 
5591 /*
5592  * Abort commands on dead PHYs and deregister them as well as removing
5593  * the associated targets.
5594  */
5595 static int
5596 pmcs_kill_devices(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
5597 {
5598 	pmcs_phy_t *pnext, *pchild;
5599 	boolean_t remove_device;
5600 	int rval = 0;
5601 
5602 	while (phyp) {
5603 		pmcs_lock_phy(phyp);
5604 		pchild = phyp->children;
5605 		pnext = phyp->sibling;
5606 		pmcs_unlock_phy(phyp);
5607 
5608 		if (pchild) {
5609 			rval = pmcs_kill_devices(pwp, pchild);
5610 			if (rval) {
5611 				return (rval);
5612 			}
5613 		}
5614 
5615 		/*
5616 		 * pmcs_remove_device requires the softstate lock.
5617 		 */
5618 		mutex_enter(&pwp->lock);
5619 		pmcs_lock_phy(phyp);
5620 		if (phyp->dead && phyp->valid_device_id) {
5621 			remove_device = B_TRUE;
5622 		} else {
5623 			remove_device = B_FALSE;
5624 		}
5625 
5626 		if (remove_device) {
5627 			pmcs_remove_device(pwp, phyp);
5628 			mutex_exit(&pwp->lock);
5629 
5630 			rval = pmcs_kill_device(pwp, phyp);
5631 
5632 			if (rval) {
5633 				pmcs_unlock_phy(phyp);
5634 				return (rval);
5635 			}
5636 		} else {
5637 			mutex_exit(&pwp->lock);
5638 		}
5639 
5640 		pmcs_unlock_phy(phyp);
5641 		phyp = pnext;
5642 	}
5643 
5644 	return (rval);
5645 }
5646 
5647 /*
5648  * Called with PHY locked
5649  */
5650 int
5651 pmcs_kill_device(pmcs_hw_t *pwp, pmcs_phy_t *pptr)
5652 {
5653 	int r, result;
5654 	uint32_t msg[PMCS_MSG_SIZE], *ptr, status;
5655 	struct pmcwork *pwrk;
5656 	pmcs_xscsi_t *tgt;
5657 
5658 	pmcs_prt(pwp, PMCS_PRT_DEBUG, "kill %s device @ %s",
5659 	    pmcs_get_typename(pptr->dtype), pptr->path);
5660 
5661 	/*
5662 	 * There may be an outstanding ABORT_ALL running, which we wouldn't
5663 	 * know just by checking abort_pending.  We can, however, check
5664 	 * abort_all_start.  If it's non-zero, there is one, and we'll just
5665 	 * sit here and wait for it to complete.  If we don't, we'll remove
5666 	 * the device while there are still commands pending.
5667 	 */
5668 	if (pptr->abort_all_start) {
5669 		while (pptr->abort_all_start) {
5670 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
5671 			    "%s: Waiting for outstanding ABORT_ALL on PHY 0x%p",
5672 			    __func__, (void *)pptr);
5673 			cv_wait(&pptr->abort_all_cv, &pptr->phy_lock);
5674 		}
5675 	} else if (pptr->abort_pending) {
5676 		r = pmcs_abort(pwp, pptr, pptr->device_id, 1, 1);
5677 
5678 		if (r) {
5679 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
5680 			    "%s: ABORT_ALL returned non-zero status (%d) for "
5681 			    "PHY 0x%p", __func__, r, (void *)pptr);
5682 			return (r);
5683 		}
5684 		pptr->abort_pending = 0;
5685 	}
5686 
5687 	/*
5688 	 * Now that everything is aborted from the chip's perspective (or even
5689 	 * if it is not), flush out the wait queue.  We won't flush the active
5690 	 * queue since it is possible that abort completions may follow after
5691 	 * the notification that the abort all has completed.
5692 	 */
5693 	tgt = pptr->target;
5694 	if (tgt) {
5695 		mutex_enter(&tgt->statlock);
5696 		pmcs_flush_target_queues(pwp, tgt, PMCS_TGT_WAIT_QUEUE);
5697 		mutex_exit(&tgt->statlock);
5698 	}
5699 
5700 	if (pptr->valid_device_id == 0) {
5701 		return (0);
5702 	}
5703 
5704 	if ((pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, pptr)) == NULL) {
5705 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
5706 		return (ENOMEM);
5707 	}
5708 	pwrk->arg = msg;
5709 	pwrk->dtype = pptr->dtype;
5710 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
5711 	    PMCIN_DEREGISTER_DEVICE_HANDLE));
5712 	msg[1] = LE_32(pwrk->htag);
5713 	msg[2] = LE_32(pptr->device_id);
5714 
5715 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5716 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5717 	if (ptr == NULL) {
5718 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5719 		mutex_exit(&pwrk->lock);
5720 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
5721 		return (ENOMEM);
5722 	}
5723 
5724 	COPY_MESSAGE(ptr, msg, 3);
5725 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
5726 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5727 
5728 	pmcs_unlock_phy(pptr);
5729 	WAIT_FOR(pwrk, 250, result);
5730 	pmcs_lock_phy(pptr);
5731 	pmcs_pwork(pwp, pwrk);
5732 
5733 	if (result) {
5734 		return (ETIMEDOUT);
5735 	}
5736 	status = LE_32(msg[2]);
5737 	if (status != PMCOUT_STATUS_OK) {
5738 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
5739 		    "%s: status 0x%x when trying to deregister device %s",
5740 		    __func__, status, pptr->path);
5741 	}
5742 
5743 	pptr->device_id = PMCS_INVALID_DEVICE_ID;
5744 	PHY_CHANGED(pwp, pptr);
5745 	RESTART_DISCOVERY(pwp);
5746 	pptr->valid_device_id = 0;
5747 	return (0);
5748 }
5749 
5750 /*
5751  * Acknowledge the SAS h/w events that need acknowledgement.
5752  * This is only needed for first level PHYs.
5753  */
5754 void
5755 pmcs_ack_events(pmcs_hw_t *pwp)
5756 {
5757 	uint32_t msg[PMCS_MSG_SIZE], *ptr;
5758 	struct pmcwork *pwrk;
5759 	pmcs_phy_t *pptr;
5760 
5761 	for (pptr = pwp->root_phys; pptr; pptr = pptr->sibling) {
5762 		pmcs_lock_phy(pptr);
5763 		if (pptr->hw_event_ack == 0) {
5764 			pmcs_unlock_phy(pptr);
5765 			continue;
5766 		}
5767 		mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5768 		ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5769 
5770 		if ((ptr == NULL) || (pwrk =
5771 		    pmcs_gwork(pwp, PMCS_TAG_TYPE_NONE, NULL)) == NULL) {
5772 			mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
5773 			pmcs_unlock_phy(pptr);
5774 			SCHEDULE_WORK(pwp, PMCS_WORK_SAS_HW_ACK);
5775 			break;
5776 		}
5777 
5778 		msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
5779 		    PMCIN_SAW_HW_EVENT_ACK));
5780 		msg[1] = LE_32(pwrk->htag);
5781 		msg[2] = LE_32(pptr->hw_event_ack);
5782 
5783 		mutex_exit(&pwrk->lock);
5784 		pwrk->dtype = pptr->dtype;
5785 		pptr->hw_event_ack = 0;
5786 		COPY_MESSAGE(ptr, msg, 3);
5787 		INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
5788 		pmcs_unlock_phy(pptr);
5789 	}
5790 }
5791 
5792 /*
5793  * Load DMA
5794  */
5795 int
5796 pmcs_dma_load(pmcs_hw_t *pwp, pmcs_cmd_t *sp, uint32_t *msg)
5797 {
5798 	ddi_dma_cookie_t *sg;
5799 	pmcs_dmachunk_t *tc;
5800 	pmcs_dmasgl_t *sgl, *prior;
5801 	int seg, tsc;
5802 	uint64_t sgl_addr;
5803 
5804 	/*
5805 	 * If we have no data segments, we're done.
5806 	 */
5807 	if (CMD2PKT(sp)->pkt_numcookies == 0) {
5808 		return (0);
5809 	}
5810 
5811 	/*
5812 	 * Get the S/G list pointer.
5813 	 */
5814 	sg = CMD2PKT(sp)->pkt_cookies;
5815 
5816 	/*
5817 	 * If we only have one dma segment, we can directly address that
5818 	 * data within the Inbound message itself.
5819 	 */
5820 	if (CMD2PKT(sp)->pkt_numcookies == 1) {
5821 		msg[12] = LE_32(DWORD0(sg->dmac_laddress));
5822 		msg[13] = LE_32(DWORD1(sg->dmac_laddress));
5823 		msg[14] = LE_32(sg->dmac_size);
5824 		msg[15] = 0;
5825 		return (0);
5826 	}
5827 
5828 	/*
5829 	 * Otherwise, we'll need one or more external S/G list chunks.
5830 	 * Get the first one and its dma address into the Inbound message.
5831 	 */
5832 	mutex_enter(&pwp->dma_lock);
5833 	tc = pwp->dma_freelist;
5834 	if (tc == NULL) {
5835 		SCHEDULE_WORK(pwp, PMCS_WORK_ADD_DMA_CHUNKS);
5836 		mutex_exit(&pwp->dma_lock);
5837 		pmcs_prt(pwp, PMCS_PRT_DEBUG2, "%s: out of SG lists", __func__);
5838 		return (-1);
5839 	}
5840 	pwp->dma_freelist = tc->nxt;
5841 	mutex_exit(&pwp->dma_lock);
5842 
5843 	tc->nxt = NULL;
5844 	sp->cmd_clist = tc;
5845 	sgl = tc->chunks;
5846 	(void) memset(tc->chunks, 0, PMCS_SGL_CHUNKSZ);
5847 	sgl_addr = tc->addr;
5848 	msg[12] = LE_32(DWORD0(sgl_addr));
5849 	msg[13] = LE_32(DWORD1(sgl_addr));
5850 	msg[14] = 0;
5851 	msg[15] = LE_32(PMCS_DMASGL_EXTENSION);
5852 
5853 	prior = sgl;
5854 	tsc = 0;
5855 
5856 	for (seg = 0; seg < CMD2PKT(sp)->pkt_numcookies; seg++) {
5857 		/*
5858 		 * If the current segment count for this chunk is one less than
5859 		 * the number s/g lists per chunk and we have more than one seg
5860 		 * to go, we need another chunk. Get it, and make sure that the
5861 		 * tail end of the the previous chunk points the new chunk
5862 		 * (if remembering an offset can be called 'pointing to').
5863 		 *
5864 		 * Note that we can store the offset into our command area that
5865 		 * represents the new chunk in the length field of the part
5866 		 * that points the PMC chip at the next chunk- the PMC chip
5867 		 * ignores this field when the EXTENSION bit is set.
5868 		 *
5869 		 * This is required for dma unloads later.
5870 		 */
5871 		if (tsc == (PMCS_SGL_NCHUNKS - 1) &&
5872 		    seg < (CMD2PKT(sp)->pkt_numcookies - 1)) {
5873 			mutex_enter(&pwp->dma_lock);
5874 			tc = pwp->dma_freelist;
5875 			if (tc == NULL) {
5876 				SCHEDULE_WORK(pwp, PMCS_WORK_ADD_DMA_CHUNKS);
5877 				mutex_exit(&pwp->dma_lock);
5878 				pmcs_dma_unload(pwp, sp);
5879 				pmcs_prt(pwp, PMCS_PRT_DEBUG2,
5880 				    "%s: out of SG lists", __func__);
5881 				return (-1);
5882 			}
5883 			pwp->dma_freelist = tc->nxt;
5884 			tc->nxt = sp->cmd_clist;
5885 			mutex_exit(&pwp->dma_lock);
5886 
5887 			sp->cmd_clist = tc;
5888 			(void) memset(tc->chunks, 0, PMCS_SGL_CHUNKSZ);
5889 			sgl = tc->chunks;
5890 			sgl_addr = tc->addr;
5891 			prior[PMCS_SGL_NCHUNKS-1].sglal =
5892 			    LE_32(DWORD0(sgl_addr));
5893 			prior[PMCS_SGL_NCHUNKS-1].sglah =
5894 			    LE_32(DWORD1(sgl_addr));
5895 			prior[PMCS_SGL_NCHUNKS-1].sglen = 0;
5896 			prior[PMCS_SGL_NCHUNKS-1].flags =
5897 			    LE_32(PMCS_DMASGL_EXTENSION);
5898 			prior = sgl;
5899 			tsc = 0;
5900 		}
5901 		sgl[tsc].sglal = LE_32(DWORD0(sg->dmac_laddress));
5902 		sgl[tsc].sglah = LE_32(DWORD1(sg->dmac_laddress));
5903 		sgl[tsc].sglen = LE_32(sg->dmac_size);
5904 		sgl[tsc++].flags = 0;
5905 		sg++;
5906 	}
5907 	return (0);
5908 }
5909 
5910 /*
5911  * Unload DMA
5912  */
5913 void
5914 pmcs_dma_unload(pmcs_hw_t *pwp, pmcs_cmd_t *sp)
5915 {
5916 	pmcs_dmachunk_t *cp;
5917 
5918 	mutex_enter(&pwp->dma_lock);
5919 	while ((cp = sp->cmd_clist) != NULL) {
5920 		sp->cmd_clist = cp->nxt;
5921 		cp->nxt = pwp->dma_freelist;
5922 		pwp->dma_freelist = cp;
5923 	}
5924 	mutex_exit(&pwp->dma_lock);
5925 }
5926 
5927 /*
5928  * Take a chunk of consistent memory that has just been allocated and inserted
5929  * into the cip indices and prepare it for DMA chunk usage and add it to the
5930  * freelist.
5931  *
5932  * Called with dma_lock locked (except during attach when it's unnecessary)
5933  */
5934 void
5935 pmcs_idma_chunks(pmcs_hw_t *pwp, pmcs_dmachunk_t *dcp,
5936     pmcs_chunk_t *pchunk, unsigned long lim)
5937 {
5938 	unsigned long off, n;
5939 	pmcs_dmachunk_t *np = dcp;
5940 	pmcs_chunk_t *tmp_chunk;
5941 
5942 	if (pwp->dma_chunklist == NULL) {
5943 		pwp->dma_chunklist = pchunk;
5944 	} else {
5945 		tmp_chunk = pwp->dma_chunklist;
5946 		while (tmp_chunk->next) {
5947 			tmp_chunk = tmp_chunk->next;
5948 		}
5949 		tmp_chunk->next = pchunk;
5950 	}
5951 
5952 	/*
5953 	 * Install offsets into chunk lists.
5954 	 */
5955 	for (n = 0, off = 0; off < lim; off += PMCS_SGL_CHUNKSZ, n++) {
5956 		np->chunks = (void *)&pchunk->addrp[off];
5957 		np->addr = pchunk->dma_addr + off;
5958 		np->acc_handle = pchunk->acc_handle;
5959 		np->dma_handle = pchunk->dma_handle;
5960 		if ((off + PMCS_SGL_CHUNKSZ) < lim) {
5961 			np = np->nxt;
5962 		}
5963 	}
5964 	np->nxt = pwp->dma_freelist;
5965 	pwp->dma_freelist = dcp;
5966 	pmcs_prt(pwp, PMCS_PRT_DEBUG2,
5967 	    "added %lu DMA chunks ", n);
5968 }
5969 
5970 /*
5971  * Change the value of the interrupt coalescing timer.  This is done currently
5972  * only for I/O completions.  If we're using the "auto clear" feature, it can
5973  * be turned back on when interrupt coalescing is turned off and must be
5974  * turned off when the coalescing timer is on.
5975  * NOTE: PMCS_MSIX_GENERAL and PMCS_OQ_IODONE are the same value.  As long
5976  * as that's true, we don't need to distinguish between them.
5977  */
5978 
5979 void
5980 pmcs_set_intr_coal_timer(pmcs_hw_t *pwp, pmcs_coal_timer_adj_t adj)
5981 {
5982 	if (adj == DECREASE_TIMER) {
5983 		/* If the timer is already off, nothing to do. */
5984 		if (pwp->io_intr_coal.timer_on == B_FALSE) {
5985 			return;
5986 		}
5987 
5988 		pwp->io_intr_coal.intr_coal_timer -= PMCS_COAL_TIMER_GRAN;
5989 
5990 		if (pwp->io_intr_coal.intr_coal_timer == 0) {
5991 			/* Disable the timer */
5992 			pmcs_wr_topunit(pwp, PMCS_INT_COALESCING_CONTROL, 0);
5993 
5994 			if (pwp->odb_auto_clear & (1 << PMCS_MSIX_IODONE)) {
5995 				pmcs_wr_topunit(pwp, PMCS_OBDB_AUTO_CLR,
5996 				    pwp->odb_auto_clear);
5997 			}
5998 
5999 			pwp->io_intr_coal.timer_on = B_FALSE;
6000 			pwp->io_intr_coal.max_io_completions = B_FALSE;
6001 			pwp->io_intr_coal.num_intrs = 0;
6002 			pwp->io_intr_coal.int_cleared = B_FALSE;
6003 			pwp->io_intr_coal.num_io_completions = 0;
6004 
6005 			DTRACE_PROBE1(pmcs__intr__coalesce__timer__off,
6006 			    pmcs_io_intr_coal_t *, &pwp->io_intr_coal);
6007 		} else {
6008 			pmcs_wr_topunit(pwp, PMCS_INT_COALESCING_TIMER,
6009 			    pwp->io_intr_coal.intr_coal_timer);
6010 		}
6011 	} else {
6012 		/*
6013 		 * If the timer isn't on yet, do the setup for it now.
6014 		 */
6015 		if (pwp->io_intr_coal.timer_on == B_FALSE) {
6016 			/* If auto clear is being used, turn it off. */
6017 			if (pwp->odb_auto_clear & (1 << PMCS_MSIX_IODONE)) {
6018 				pmcs_wr_topunit(pwp, PMCS_OBDB_AUTO_CLR,
6019 				    (pwp->odb_auto_clear &
6020 				    ~(1 << PMCS_MSIX_IODONE)));
6021 			}
6022 
6023 			pmcs_wr_topunit(pwp, PMCS_INT_COALESCING_CONTROL,
6024 			    (1 << PMCS_MSIX_IODONE));
6025 			pwp->io_intr_coal.timer_on = B_TRUE;
6026 			pwp->io_intr_coal.intr_coal_timer =
6027 			    PMCS_COAL_TIMER_GRAN;
6028 
6029 			DTRACE_PROBE1(pmcs__intr__coalesce__timer__on,
6030 			    pmcs_io_intr_coal_t *, &pwp->io_intr_coal);
6031 		} else {
6032 			pwp->io_intr_coal.intr_coal_timer +=
6033 			    PMCS_COAL_TIMER_GRAN;
6034 		}
6035 
6036 		if (pwp->io_intr_coal.intr_coal_timer > PMCS_MAX_COAL_TIMER) {
6037 			pwp->io_intr_coal.intr_coal_timer = PMCS_MAX_COAL_TIMER;
6038 		}
6039 
6040 		pmcs_wr_topunit(pwp, PMCS_INT_COALESCING_TIMER,
6041 		    pwp->io_intr_coal.intr_coal_timer);
6042 	}
6043 
6044 	/*
6045 	 * Adjust the interrupt threshold based on the current timer value
6046 	 */
6047 	pwp->io_intr_coal.intr_threshold =
6048 	    PMCS_INTR_THRESHOLD(PMCS_QUANTUM_TIME_USECS * 1000 /
6049 	    (pwp->io_intr_coal.intr_latency +
6050 	    (pwp->io_intr_coal.intr_coal_timer * 1000)));
6051 }
6052 
6053 /*
6054  * Register Access functions
6055  */
6056 uint32_t
6057 pmcs_rd_iqci(pmcs_hw_t *pwp, uint32_t qnum)
6058 {
6059 	uint32_t iqci;
6060 
6061 	if (ddi_dma_sync(pwp->cip_handles, 0, 0, DDI_DMA_SYNC_FORKERNEL) !=
6062 	    DDI_SUCCESS) {
6063 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: ddi_dma_sync failed?",
6064 		    __func__);
6065 	}
6066 
6067 	iqci = LE_32(
6068 	    ((uint32_t *)((void *)pwp->cip))[IQ_OFFSET(qnum) >> 2]);
6069 
6070 	return (iqci);
6071 }
6072 
6073 uint32_t
6074 pmcs_rd_oqpi(pmcs_hw_t *pwp, uint32_t qnum)
6075 {
6076 	uint32_t oqpi;
6077 
6078 	if (ddi_dma_sync(pwp->cip_handles, 0, 0, DDI_DMA_SYNC_FORKERNEL) !=
6079 	    DDI_SUCCESS) {
6080 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: ddi_dma_sync failed?",
6081 		    __func__);
6082 	}
6083 
6084 	oqpi = LE_32(
6085 	    ((uint32_t *)((void *)pwp->cip))[OQ_OFFSET(qnum) >> 2]);
6086 
6087 	return (oqpi);
6088 }
6089 
6090 uint32_t
6091 pmcs_rd_gsm_reg(pmcs_hw_t *pwp, uint32_t off)
6092 {
6093 	uint32_t rv, newaxil, oldaxil;
6094 
6095 	newaxil = off & ~GSM_BASE_MASK;
6096 	off &= GSM_BASE_MASK;
6097 	mutex_enter(&pwp->axil_lock);
6098 	oldaxil = ddi_get32(pwp->top_acc_handle,
6099 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]);
6100 	ddi_put32(pwp->top_acc_handle,
6101 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2], newaxil);
6102 	drv_usecwait(10);
6103 	if (ddi_get32(pwp->top_acc_handle,
6104 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]) != newaxil) {
6105 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "AXIL register update failed");
6106 	}
6107 	rv = ddi_get32(pwp->gsm_acc_handle, &pwp->gsm_regs[off >> 2]);
6108 	ddi_put32(pwp->top_acc_handle,
6109 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2], oldaxil);
6110 	drv_usecwait(10);
6111 	if (ddi_get32(pwp->top_acc_handle,
6112 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]) != oldaxil) {
6113 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "AXIL register restore failed");
6114 	}
6115 	mutex_exit(&pwp->axil_lock);
6116 	return (rv);
6117 }
6118 
6119 void
6120 pmcs_wr_gsm_reg(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6121 {
6122 	uint32_t newaxil, oldaxil;
6123 
6124 	newaxil = off & ~GSM_BASE_MASK;
6125 	off &= GSM_BASE_MASK;
6126 	mutex_enter(&pwp->axil_lock);
6127 	oldaxil = ddi_get32(pwp->top_acc_handle,
6128 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]);
6129 	ddi_put32(pwp->top_acc_handle,
6130 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2], newaxil);
6131 	drv_usecwait(10);
6132 	if (ddi_get32(pwp->top_acc_handle,
6133 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]) != newaxil) {
6134 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "AXIL register update failed");
6135 	}
6136 	ddi_put32(pwp->gsm_acc_handle, &pwp->gsm_regs[off >> 2], val);
6137 	ddi_put32(pwp->top_acc_handle,
6138 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2], oldaxil);
6139 	drv_usecwait(10);
6140 	if (ddi_get32(pwp->top_acc_handle,
6141 	    &pwp->top_regs[PMCS_AXI_TRANS >> 2]) != oldaxil) {
6142 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "AXIL register restore failed");
6143 	}
6144 	mutex_exit(&pwp->axil_lock);
6145 }
6146 
6147 uint32_t
6148 pmcs_rd_topunit(pmcs_hw_t *pwp, uint32_t off)
6149 {
6150 	switch (off) {
6151 	case PMCS_SPC_RESET:
6152 	case PMCS_SPC_BOOT_STRAP:
6153 	case PMCS_SPC_DEVICE_ID:
6154 	case PMCS_DEVICE_REVISION:
6155 		off = pmcs_rd_gsm_reg(pwp, off);
6156 		break;
6157 	default:
6158 		off = ddi_get32(pwp->top_acc_handle,
6159 		    &pwp->top_regs[off >> 2]);
6160 		break;
6161 	}
6162 	return (off);
6163 }
6164 
6165 void
6166 pmcs_wr_topunit(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6167 {
6168 	switch (off) {
6169 	case PMCS_SPC_RESET:
6170 	case PMCS_DEVICE_REVISION:
6171 		pmcs_wr_gsm_reg(pwp, off, val);
6172 		break;
6173 	default:
6174 		ddi_put32(pwp->top_acc_handle, &pwp->top_regs[off >> 2], val);
6175 		break;
6176 	}
6177 }
6178 
6179 uint32_t
6180 pmcs_rd_msgunit(pmcs_hw_t *pwp, uint32_t off)
6181 {
6182 	return (ddi_get32(pwp->msg_acc_handle, &pwp->msg_regs[off >> 2]));
6183 }
6184 
6185 uint32_t
6186 pmcs_rd_mpi_tbl(pmcs_hw_t *pwp, uint32_t off)
6187 {
6188 	return (ddi_get32(pwp->mpi_acc_handle,
6189 	    &pwp->mpi_regs[(pwp->mpi_offset + off) >> 2]));
6190 }
6191 
6192 uint32_t
6193 pmcs_rd_gst_tbl(pmcs_hw_t *pwp, uint32_t off)
6194 {
6195 	return (ddi_get32(pwp->mpi_acc_handle,
6196 	    &pwp->mpi_regs[(pwp->mpi_gst_offset + off) >> 2]));
6197 }
6198 
6199 uint32_t
6200 pmcs_rd_iqc_tbl(pmcs_hw_t *pwp, uint32_t off)
6201 {
6202 	return (ddi_get32(pwp->mpi_acc_handle,
6203 	    &pwp->mpi_regs[(pwp->mpi_iqc_offset + off) >> 2]));
6204 }
6205 
6206 uint32_t
6207 pmcs_rd_oqc_tbl(pmcs_hw_t *pwp, uint32_t off)
6208 {
6209 	return (ddi_get32(pwp->mpi_acc_handle,
6210 	    &pwp->mpi_regs[(pwp->mpi_oqc_offset + off) >> 2]));
6211 }
6212 
6213 uint32_t
6214 pmcs_rd_iqpi(pmcs_hw_t *pwp, uint32_t qnum)
6215 {
6216 	return (ddi_get32(pwp->mpi_acc_handle,
6217 	    &pwp->mpi_regs[pwp->iqpi_offset[qnum] >> 2]));
6218 }
6219 
6220 uint32_t
6221 pmcs_rd_oqci(pmcs_hw_t *pwp, uint32_t qnum)
6222 {
6223 	return (ddi_get32(pwp->mpi_acc_handle,
6224 	    &pwp->mpi_regs[pwp->oqci_offset[qnum] >> 2]));
6225 }
6226 
6227 void
6228 pmcs_wr_msgunit(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6229 {
6230 	ddi_put32(pwp->msg_acc_handle, &pwp->msg_regs[off >> 2], val);
6231 }
6232 
6233 void
6234 pmcs_wr_mpi_tbl(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6235 {
6236 	ddi_put32(pwp->mpi_acc_handle,
6237 	    &pwp->mpi_regs[(pwp->mpi_offset + off) >> 2], (val));
6238 }
6239 
6240 void
6241 pmcs_wr_gst_tbl(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6242 {
6243 	ddi_put32(pwp->mpi_acc_handle,
6244 	    &pwp->mpi_regs[(pwp->mpi_gst_offset + off) >> 2], val);
6245 }
6246 
6247 void
6248 pmcs_wr_iqc_tbl(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6249 {
6250 	ddi_put32(pwp->mpi_acc_handle,
6251 	    &pwp->mpi_regs[(pwp->mpi_iqc_offset + off) >> 2], val);
6252 }
6253 
6254 void
6255 pmcs_wr_oqc_tbl(pmcs_hw_t *pwp, uint32_t off, uint32_t val)
6256 {
6257 	ddi_put32(pwp->mpi_acc_handle,
6258 	    &pwp->mpi_regs[(pwp->mpi_oqc_offset + off) >> 2], val);
6259 }
6260 
6261 void
6262 pmcs_wr_iqci(pmcs_hw_t *pwp, uint32_t qnum, uint32_t val)
6263 {
6264 	((uint32_t *)((void *)pwp->cip))[IQ_OFFSET(qnum) >> 2] = val;
6265 	if (ddi_dma_sync(pwp->cip_handles, 0, 0, DDI_DMA_SYNC_FORDEV) !=
6266 	    DDI_SUCCESS) {
6267 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: ddi_dma_sync failed?",
6268 		    __func__);
6269 	}
6270 }
6271 
6272 void
6273 pmcs_wr_iqpi(pmcs_hw_t *pwp, uint32_t qnum, uint32_t val)
6274 {
6275 	ddi_put32(pwp->mpi_acc_handle,
6276 	    &pwp->mpi_regs[pwp->iqpi_offset[qnum] >> 2], val);
6277 }
6278 
6279 void
6280 pmcs_wr_oqci(pmcs_hw_t *pwp, uint32_t qnum, uint32_t val)
6281 {
6282 	ddi_put32(pwp->mpi_acc_handle,
6283 	    &pwp->mpi_regs[pwp->oqci_offset[qnum] >> 2], val);
6284 }
6285 
6286 void
6287 pmcs_wr_oqpi(pmcs_hw_t *pwp, uint32_t qnum, uint32_t val)
6288 {
6289 	((uint32_t *)((void *)pwp->cip))[OQ_OFFSET(qnum) >> 2] = val;
6290 	if (ddi_dma_sync(pwp->cip_handles, 0, 0, DDI_DMA_SYNC_FORDEV) !=
6291 	    DDI_SUCCESS) {
6292 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: ddi_dma_sync failed?",
6293 		    __func__);
6294 	}
6295 }
6296 
6297 /*
6298  * Check the status value of an outbound IOMB and report anything bad
6299  */
6300 
6301 void
6302 pmcs_check_iomb_status(pmcs_hw_t *pwp, uint32_t *iomb)
6303 {
6304 	uint16_t 	opcode;
6305 	int		offset;
6306 
6307 	if (iomb == NULL) {
6308 		return;
6309 	}
6310 
6311 	opcode = LE_32(iomb[0]) & 0xfff;
6312 
6313 	switch (opcode) {
6314 		/*
6315 		 * The following have no status field, so ignore them
6316 		 */
6317 	case PMCOUT_ECHO:
6318 	case PMCOUT_SAS_HW_EVENT:
6319 	case PMCOUT_GET_DEVICE_HANDLE:
6320 	case PMCOUT_SATA_EVENT:
6321 	case PMCOUT_SSP_EVENT:
6322 	case PMCOUT_DEVICE_HANDLE_ARRIVED:
6323 	case PMCOUT_SMP_REQUEST_RECEIVED:
6324 	case PMCOUT_GPIO:
6325 	case PMCOUT_GPIO_EVENT:
6326 	case PMCOUT_GET_TIME_STAMP:
6327 	case PMCOUT_SKIP_ENTRIES:
6328 	case PMCOUT_GET_NVMD_DATA:	/* Actually lower 16 bits of word 3 */
6329 	case PMCOUT_SET_NVMD_DATA:	/* but ignore - we don't use these */
6330 	case PMCOUT_DEVICE_HANDLE_REMOVED:
6331 	case PMCOUT_SSP_REQUEST_RECEIVED:
6332 		return;
6333 
6334 	case PMCOUT_GENERAL_EVENT:
6335 		offset = 1;
6336 		break;
6337 
6338 	case PMCOUT_SSP_COMPLETION:
6339 	case PMCOUT_SMP_COMPLETION:
6340 	case PMCOUT_DEVICE_REGISTRATION:
6341 	case PMCOUT_DEREGISTER_DEVICE_HANDLE:
6342 	case PMCOUT_SATA_COMPLETION:
6343 	case PMCOUT_DEVICE_INFO:
6344 	case PMCOUT_FW_FLASH_UPDATE:
6345 	case PMCOUT_SSP_ABORT:
6346 	case PMCOUT_SATA_ABORT:
6347 	case PMCOUT_SAS_DIAG_MODE_START_END:
6348 	case PMCOUT_SAS_HW_EVENT_ACK_ACK:
6349 	case PMCOUT_SMP_ABORT:
6350 	case PMCOUT_SET_DEVICE_STATE:
6351 	case PMCOUT_GET_DEVICE_STATE:
6352 	case PMCOUT_SET_DEVICE_INFO:
6353 		offset = 2;
6354 		break;
6355 
6356 	case PMCOUT_LOCAL_PHY_CONTROL:
6357 	case PMCOUT_SAS_DIAG_EXECUTE:
6358 	case PMCOUT_PORT_CONTROL:
6359 		offset = 3;
6360 		break;
6361 
6362 	case PMCOUT_GET_INFO:
6363 	case PMCOUT_GET_VPD:
6364 	case PMCOUT_SAS_ASSISTED_DISCOVERY_EVENT:
6365 	case PMCOUT_SATA_ASSISTED_DISCOVERY_EVENT:
6366 	case PMCOUT_SET_VPD:
6367 	case PMCOUT_TWI:
6368 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG,
6369 		    "Got response for deprecated opcode", iomb);
6370 		return;
6371 
6372 	default:
6373 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG,
6374 		    "Got response for unknown opcode", iomb);
6375 		return;
6376 	}
6377 
6378 	if (LE_32(iomb[offset]) != PMCOUT_STATUS_OK) {
6379 		pmcs_print_entry(pwp, PMCS_PRT_DEBUG,
6380 		    "bad status on TAG_TYPE_NONE command", iomb);
6381 	}
6382 }
6383 
6384 /*
6385  * Called with statlock held
6386  */
6387 void
6388 pmcs_clear_xp(pmcs_hw_t *pwp, pmcs_xscsi_t *xp)
6389 {
6390 	_NOTE(ARGUNUSED(pwp));
6391 
6392 	ASSERT(mutex_owned(&xp->statlock));
6393 	ASSERT(xp->dying);
6394 
6395 	pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Device 0x%p is gone.", __func__,
6396 	    (void *)xp);
6397 
6398 	/*
6399 	 * Clear the dip now.  This keeps pmcs_rem_old_devices from attempting
6400 	 * to call us on the same device while we're still flushing queues.
6401 	 * The only side effect is we can no longer update SM-HBA properties,
6402 	 * but this device is going away anyway, so no matter.
6403 	 */
6404 	xp->dip = NULL;
6405 
6406 	/*
6407 	 * Flush all target queues
6408 	 */
6409 	pmcs_flush_target_queues(pwp, xp, PMCS_TGT_ALL_QUEUES);
6410 
6411 	xp->special_running = 0;
6412 	xp->recovering = 0;
6413 	xp->recover_wait = 0;
6414 	xp->draining = 0;
6415 	xp->dying = 0;
6416 	xp->new = 0;
6417 	xp->assigned = 0;
6418 	xp->dev_state = 0;
6419 	xp->tagmap = 0;
6420 	xp->dev_gone = 1;
6421 	xp->event_recovery = 0;
6422 	xp->dtype = NOTHING;
6423 	xp->wq_recovery_tail = NULL;
6424 	/* Don't clear xp->phy */
6425 	/* Don't clear xp->actv_cnt */
6426 }
6427 
6428 static int
6429 pmcs_smp_function_result(pmcs_hw_t *pwp, smp_response_frame_t *srf)
6430 {
6431 	int result = srf->srf_result;
6432 
6433 	switch (result) {
6434 	case SMP_RES_UNKNOWN_FUNCTION:
6435 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6436 		    "Function Result: Unknown SMP Function(0x%x)",
6437 		    __func__, result);
6438 		break;
6439 	case SMP_RES_FUNCTION_FAILED:
6440 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6441 		    "Function Result: SMP Function Failed(0x%x)",
6442 		    __func__, result);
6443 		break;
6444 	case SMP_RES_INVALID_REQUEST_FRAME_LENGTH:
6445 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6446 		    "Function Result: Invalid Request Frame Length(0x%x)",
6447 		    __func__, result);
6448 		break;
6449 	case SMP_RES_INCOMPLETE_DESCRIPTOR_LIST:
6450 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6451 		    "Function Result: Incomplete Descriptor List(0x%x)",
6452 		    __func__, result);
6453 		break;
6454 	case SMP_RES_PHY_DOES_NOT_EXIST:
6455 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6456 		    "Function Result: PHY does not exist(0x%x)",
6457 		    __func__, result);
6458 		break;
6459 	case SMP_RES_PHY_VACANT:
6460 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6461 		    "Function Result: PHY Vacant(0x%x)",
6462 		    __func__, result);
6463 		break;
6464 	default:
6465 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: SMP DISCOVER Response "
6466 		    "Function Result: (0x%x)",
6467 		    __func__, result);
6468 		break;
6469 	}
6470 
6471 	return (result);
6472 }
6473 
6474 /*
6475  * Do all the repetitive stuff necessary to setup for DMA
6476  *
6477  * pwp: Used for dip
6478  * dma_attr: ddi_dma_attr_t to use for the mapping
6479  * acch: ddi_acc_handle_t to use for the mapping
6480  * dmah: ddi_dma_handle_t to use
6481  * length: Amount of memory for mapping
6482  * kvp: Pointer filled in with kernel virtual address on successful return
6483  * dma_addr: Pointer filled in with DMA address on successful return
6484  */
6485 boolean_t
6486 pmcs_dma_setup(pmcs_hw_t *pwp, ddi_dma_attr_t *dma_attr, ddi_acc_handle_t *acch,
6487     ddi_dma_handle_t *dmah, size_t length, caddr_t *kvp, uint64_t *dma_addr)
6488 {
6489 	dev_info_t		*dip = pwp->dip;
6490 	ddi_dma_cookie_t	cookie;
6491 	size_t			real_length;
6492 	uint_t			ddma_flag = DDI_DMA_CONSISTENT;
6493 	uint_t			ddabh_flag = DDI_DMA_CONSISTENT | DDI_DMA_RDWR;
6494 	uint_t			cookie_cnt;
6495 	ddi_device_acc_attr_t	mattr = {
6496 		DDI_DEVICE_ATTR_V0,
6497 		DDI_NEVERSWAP_ACC,
6498 		DDI_STRICTORDER_ACC,
6499 		DDI_DEFAULT_ACC
6500 	};
6501 
6502 	*acch = NULL;
6503 	*dmah = NULL;
6504 
6505 	if (ddi_dma_alloc_handle(dip, dma_attr, DDI_DMA_SLEEP, NULL, dmah) !=
6506 	    DDI_SUCCESS) {
6507 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "Failed to allocate DMA handle");
6508 		return (B_FALSE);
6509 	}
6510 
6511 	if (ddi_dma_mem_alloc(*dmah, length, &mattr, ddma_flag, DDI_DMA_SLEEP,
6512 	    NULL, kvp, &real_length, acch) != DDI_SUCCESS) {
6513 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "Failed to allocate DMA mem");
6514 		ddi_dma_free_handle(dmah);
6515 		*dmah = NULL;
6516 		return (B_FALSE);
6517 	}
6518 
6519 	if (ddi_dma_addr_bind_handle(*dmah, NULL, *kvp, real_length,
6520 	    ddabh_flag, DDI_DMA_SLEEP, NULL, &cookie, &cookie_cnt)
6521 	    != DDI_DMA_MAPPED) {
6522 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "Failed to bind DMA");
6523 		ddi_dma_free_handle(dmah);
6524 		ddi_dma_mem_free(acch);
6525 		*dmah = NULL;
6526 		*acch = NULL;
6527 		return (B_FALSE);
6528 	}
6529 
6530 	if (cookie_cnt != 1) {
6531 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "Multiple cookies");
6532 		if (ddi_dma_unbind_handle(*dmah) != DDI_SUCCESS) {
6533 			pmcs_prt(pwp, PMCS_PRT_DEBUG, "Condition failed at "
6534 			    "%s():%d", __func__, __LINE__);
6535 		}
6536 		ddi_dma_free_handle(dmah);
6537 		ddi_dma_mem_free(acch);
6538 		*dmah = NULL;
6539 		*acch = NULL;
6540 		return (B_FALSE);
6541 	}
6542 
6543 	*dma_addr = cookie.dmac_laddress;
6544 
6545 	return (B_TRUE);
6546 }
6547 
6548 /*
6549  * Flush requested queues for a particular target.  Called with statlock held
6550  */
6551 void
6552 pmcs_flush_target_queues(pmcs_hw_t *pwp, pmcs_xscsi_t *tgt, uint8_t queues)
6553 {
6554 	pmcs_cmd_t	*sp;
6555 	pmcwork_t	*pwrk;
6556 
6557 	ASSERT(pwp != NULL);
6558 	ASSERT(tgt != NULL);
6559 
6560 	pmcs_prt(pwp, PMCS_PRT_DEBUG,
6561 	    "%s: Flushing queues (%d) for target 0x%p", __func__,
6562 	    queues, (void *)tgt);
6563 
6564 	/*
6565 	 * Commands on the wait queue (or the special queue below) don't have
6566 	 * work structures associated with them.
6567 	 */
6568 	if (queues & PMCS_TGT_WAIT_QUEUE) {
6569 		mutex_enter(&tgt->wqlock);
6570 		while ((sp = STAILQ_FIRST(&tgt->wq)) != NULL) {
6571 			STAILQ_REMOVE(&tgt->wq, sp, pmcs_cmd, cmd_next);
6572 			pmcs_prt(pwp, PMCS_PRT_DEBUG1,
6573 			    "%s: Removing cmd 0x%p from wq for target 0x%p",
6574 			    __func__, (void *)sp, (void *)tgt);
6575 			CMD2PKT(sp)->pkt_reason = CMD_DEV_GONE;
6576 			CMD2PKT(sp)->pkt_state = STATE_GOT_BUS;
6577 			mutex_exit(&tgt->wqlock);
6578 			pmcs_dma_unload(pwp, sp);
6579 			mutex_enter(&pwp->cq_lock);
6580 			STAILQ_INSERT_TAIL(&pwp->cq, sp, cmd_next);
6581 			mutex_exit(&pwp->cq_lock);
6582 			mutex_enter(&tgt->wqlock);
6583 		}
6584 		mutex_exit(&tgt->wqlock);
6585 	}
6586 
6587 	/*
6588 	 * Commands on the active queue will have work structures associated
6589 	 * with them.
6590 	 */
6591 	if (queues & PMCS_TGT_ACTIVE_QUEUE) {
6592 		mutex_enter(&tgt->aqlock);
6593 		while ((sp = STAILQ_FIRST(&tgt->aq)) != NULL) {
6594 			STAILQ_REMOVE(&tgt->aq, sp, pmcs_cmd, cmd_next);
6595 			pwrk = pmcs_tag2wp(pwp, sp->cmd_tag);
6596 			mutex_exit(&tgt->aqlock);
6597 			mutex_exit(&tgt->statlock);
6598 			/*
6599 			 * If we found a work structure, mark it as dead
6600 			 * and complete it
6601 			 */
6602 			if (pwrk != NULL) {
6603 				pwrk->dead = 1;
6604 				CMD2PKT(sp)->pkt_reason = CMD_DEV_GONE;
6605 				CMD2PKT(sp)->pkt_state = STATE_GOT_BUS;
6606 				pmcs_complete_work_impl(pwp, pwrk, NULL, 0);
6607 			}
6608 			pmcs_prt(pwp, PMCS_PRT_DEBUG1,
6609 			    "%s: Removing cmd 0x%p from aq for target 0x%p",
6610 			    __func__, (void *)sp, (void *)tgt);
6611 			pmcs_dma_unload(pwp, sp);
6612 			mutex_enter(&pwp->cq_lock);
6613 			STAILQ_INSERT_TAIL(&pwp->cq, sp, cmd_next);
6614 			mutex_exit(&pwp->cq_lock);
6615 			mutex_enter(&tgt->aqlock);
6616 			mutex_enter(&tgt->statlock);
6617 		}
6618 		mutex_exit(&tgt->aqlock);
6619 	}
6620 
6621 	if (queues & PMCS_TGT_SPECIAL_QUEUE) {
6622 		while ((sp = STAILQ_FIRST(&tgt->sq)) != NULL) {
6623 			STAILQ_REMOVE(&tgt->sq, sp, pmcs_cmd, cmd_next);
6624 			pmcs_prt(pwp, PMCS_PRT_DEBUG1,
6625 			    "%s: Removing cmd 0x%p from sq for target 0x%p",
6626 			    __func__, (void *)sp, (void *)tgt);
6627 			CMD2PKT(sp)->pkt_reason = CMD_DEV_GONE;
6628 			CMD2PKT(sp)->pkt_state = STATE_GOT_BUS;
6629 			pmcs_dma_unload(pwp, sp);
6630 			mutex_enter(&pwp->cq_lock);
6631 			STAILQ_INSERT_TAIL(&pwp->cq, sp, cmd_next);
6632 			mutex_exit(&pwp->cq_lock);
6633 		}
6634 	}
6635 }
6636 
6637 void
6638 pmcs_complete_work_impl(pmcs_hw_t *pwp, pmcwork_t *pwrk, uint32_t *iomb,
6639     size_t amt)
6640 {
6641 	switch (PMCS_TAG_TYPE(pwrk->htag)) {
6642 	case PMCS_TAG_TYPE_CBACK:
6643 	{
6644 		pmcs_cb_t callback = (pmcs_cb_t)pwrk->ptr;
6645 		(*callback)(pwp, pwrk, iomb);
6646 		break;
6647 	}
6648 	case PMCS_TAG_TYPE_WAIT:
6649 		if (pwrk->arg && iomb && amt) {
6650 			(void) memcpy(pwrk->arg, iomb, amt);
6651 		}
6652 		cv_signal(&pwrk->sleep_cv);
6653 		mutex_exit(&pwrk->lock);
6654 		break;
6655 	case PMCS_TAG_TYPE_NONE:
6656 #ifdef DEBUG
6657 		pmcs_check_iomb_status(pwp, iomb);
6658 #endif
6659 		pmcs_pwork(pwp, pwrk);
6660 		break;
6661 	default:
6662 		/*
6663 		 * We will leak a structure here if we don't know
6664 		 * what happened
6665 		 */
6666 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Unknown PMCS_TAG_TYPE (%x)",
6667 		    __func__, PMCS_TAG_TYPE(pwrk->htag));
6668 		break;
6669 	}
6670 }
6671 
6672 /*
6673  * Determine if iport still has targets. During detach(9E), if SCSA is
6674  * successfull in its guarantee of tran_tgt_free(9E) before detach(9E),
6675  * this should always return B_FALSE.
6676  */
6677 boolean_t
6678 pmcs_iport_has_targets(pmcs_hw_t *pwp, pmcs_iport_t *iport)
6679 {
6680 	pmcs_xscsi_t *xp;
6681 	int i;
6682 
6683 	mutex_enter(&pwp->lock);
6684 
6685 	if (!pwp->targets || !pwp->max_dev) {
6686 		mutex_exit(&pwp->lock);
6687 		return (B_FALSE);
6688 	}
6689 
6690 	for (i = 0; i < pwp->max_dev; i++) {
6691 		xp = pwp->targets[i];
6692 		if ((xp == NULL) || (xp->phy == NULL) ||
6693 		    (xp->phy->iport != iport)) {
6694 			continue;
6695 		}
6696 
6697 		mutex_exit(&pwp->lock);
6698 		return (B_TRUE);
6699 	}
6700 
6701 	mutex_exit(&pwp->lock);
6702 	return (B_FALSE);
6703 }
6704 
6705 /*
6706  * Called with softstate lock held
6707  */
6708 void
6709 pmcs_destroy_target(pmcs_xscsi_t *target)
6710 {
6711 	pmcs_hw_t *pwp = target->pwp;
6712 	pmcs_iport_t *iport;
6713 
6714 	ASSERT(pwp);
6715 	ASSERT(mutex_owned(&pwp->lock));
6716 
6717 	if (!target->ua) {
6718 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
6719 		    "%s: target %p iport addres is null",
6720 		    __func__, (void *)target);
6721 	}
6722 
6723 	iport = pmcs_get_iport_by_ua(pwp, target->ua);
6724 	if (iport == NULL) {
6725 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
6726 		    "%s: no iport associated with tgt(0x%p)",
6727 		    __func__, (void *)target);
6728 		return;
6729 	}
6730 
6731 	pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
6732 	    "%s: free target %p", __func__, (void *)target);
6733 	if (target->ua) {
6734 		strfree(target->ua);
6735 	}
6736 
6737 	mutex_destroy(&target->wqlock);
6738 	mutex_destroy(&target->aqlock);
6739 	mutex_destroy(&target->statlock);
6740 	cv_destroy(&target->reset_cv);
6741 	cv_destroy(&target->abort_cv);
6742 	ddi_soft_state_bystr_fini(&target->lun_sstate);
6743 	ddi_soft_state_bystr_free(iport->tgt_sstate, target->unit_address);
6744 	pmcs_rele_iport(iport);
6745 }
6746 
6747 /*
6748  * Get device state.  Called with statlock and PHY lock held.
6749  */
6750 int
6751 pmcs_get_dev_state(pmcs_hw_t *pwp, pmcs_xscsi_t *xp, uint8_t *ds)
6752 {
6753 	uint32_t htag, *ptr, msg[PMCS_MSG_SIZE];
6754 	int result;
6755 	struct pmcwork *pwrk;
6756 	pmcs_phy_t *phyp;
6757 
6758 	pmcs_prt(pwp, PMCS_PRT_DEBUG3, "%s: tgt(0x%p)", __func__, (void *)xp);
6759 	if (xp == NULL) {
6760 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Target is NULL", __func__);
6761 		return (-1);
6762 	}
6763 
6764 	ASSERT(mutex_owned(&xp->statlock));
6765 	phyp = xp->phy;
6766 	ASSERT(mutex_owned(&phyp->phy_lock));
6767 
6768 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, phyp);
6769 	if (pwrk == NULL) {
6770 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
6771 		return (-1);
6772 	}
6773 	pwrk->arg = msg;
6774 	pwrk->dtype = phyp->dtype;
6775 
6776 	if (phyp->valid_device_id == 0) {
6777 		pmcs_pwork(pwp, pwrk);
6778 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Invalid DeviceID", __func__);
6779 		return (-1);
6780 	}
6781 	htag = pwrk->htag;
6782 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
6783 	    PMCIN_GET_DEVICE_STATE));
6784 	msg[1] = LE_32(pwrk->htag);
6785 	msg[2] = LE_32(phyp->device_id);
6786 
6787 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
6788 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
6789 	if (ptr == NULL) {
6790 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
6791 		pmcs_pwork(pwp, pwrk);
6792 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
6793 		return (-1);
6794 	}
6795 	COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
6796 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
6797 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
6798 	mutex_exit(&xp->statlock);
6799 	pmcs_unlock_phy(phyp);
6800 	WAIT_FOR(pwrk, 1000, result);
6801 	pmcs_lock_phy(phyp);
6802 	pmcs_pwork(pwp, pwrk);
6803 	mutex_enter(&xp->statlock);
6804 
6805 	if (result) {
6806 		pmcs_timed_out(pwp, htag, __func__);
6807 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: cmd timed out, returning ",
6808 		    __func__);
6809 		return (-1);
6810 	}
6811 	if (LE_32(msg[2]) == 0) {
6812 		*ds = (uint8_t)(LE_32(msg[4]));
6813 		if (*ds !=  xp->dev_state) {
6814 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
6815 			    "%s: retrieved_ds=0x%x, target_ds=0x%x", __func__,
6816 			    *ds, xp->dev_state);
6817 		}
6818 		return (0);
6819 	} else {
6820 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
6821 		    "%s: cmd failed Status(0x%x), returning ", __func__,
6822 		    LE_32(msg[2]));
6823 		return (-1);
6824 	}
6825 }
6826 
6827 /*
6828  * Set device state.  Called with target's statlock and PHY lock held.
6829  */
6830 int
6831 pmcs_set_dev_state(pmcs_hw_t *pwp, pmcs_xscsi_t *xp, uint8_t ds)
6832 {
6833 	uint32_t htag, *ptr, msg[PMCS_MSG_SIZE];
6834 	int result;
6835 	uint8_t pds, nds;
6836 	struct pmcwork *pwrk;
6837 	pmcs_phy_t *phyp;
6838 
6839 	pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, "%s: ds(0x%x), tgt(0x%p)",
6840 	    __func__, ds, (void *)xp);
6841 	if (xp == NULL) {
6842 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Target is Null", __func__);
6843 		return (-1);
6844 	}
6845 
6846 	phyp = xp->phy;
6847 	pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, phyp);
6848 	if (pwrk == NULL) {
6849 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nowrk, __func__);
6850 		return (-1);
6851 	}
6852 	if (phyp == NULL) {
6853 		pmcs_pwork(pwp, pwrk);
6854 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, "%s: PHY is Null",
6855 		    __func__);
6856 		return (-1);
6857 	}
6858 	if (phyp->valid_device_id == 0) {
6859 		pmcs_pwork(pwp, pwrk);
6860 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
6861 		    "%s: Invalid DeviceID", __func__);
6862 		return (-1);
6863 	}
6864 	pwrk->arg = msg;
6865 	pwrk->dtype = phyp->dtype;
6866 	htag = pwrk->htag;
6867 	msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL,
6868 	    PMCIN_SET_DEVICE_STATE));
6869 	msg[1] = LE_32(pwrk->htag);
6870 	msg[2] = LE_32(phyp->device_id);
6871 	msg[3] = LE_32(ds);
6872 
6873 	mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]);
6874 	ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
6875 	if (ptr == NULL) {
6876 		mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]);
6877 		pmcs_pwork(pwp, pwrk);
6878 		pmcs_prt(pwp, PMCS_PRT_ERR, pmcs_nomsg, __func__);
6879 		return (-1);
6880 	}
6881 	COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE);
6882 	pwrk->state = PMCS_WORK_STATE_ONCHIP;
6883 	INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER);
6884 
6885 	mutex_exit(&xp->statlock);
6886 	pmcs_unlock_phy(phyp);
6887 	WAIT_FOR(pwrk, 1000, result);
6888 	pmcs_lock_phy(phyp);
6889 	pmcs_pwork(pwp, pwrk);
6890 	mutex_enter(&xp->statlock);
6891 
6892 	if (result) {
6893 		pmcs_timed_out(pwp, htag, __func__);
6894 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
6895 		    "%s: cmd timed out, returning", __func__);
6896 		return (-1);
6897 	}
6898 	if (LE_32(msg[2]) == 0) {
6899 		pds = (uint8_t)(LE_32(msg[4]) >> 4);
6900 		nds = (uint8_t)(LE_32(msg[4]) & 0x0000000f);
6901 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, "%s: previous_ds=0x%x, "
6902 		    "new_ds=0x%x", __func__, pds, nds);
6903 		xp->dev_state = nds;
6904 		return (0);
6905 	} else {
6906 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
6907 		    "%s: cmd failed Status(0x%x), returning ", __func__,
6908 		    LE_32(msg[2]));
6909 		return (-1);
6910 	}
6911 }
6912 
6913 void
6914 pmcs_dev_state_recovery(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
6915 {
6916 	uint8_t	ds;
6917 	int rc;
6918 	pmcs_xscsi_t *tgt;
6919 	pmcs_phy_t *pptr, *pnext, *pchild;
6920 
6921 	/*
6922 	 * First time, check to see if we're already performing recovery
6923 	 */
6924 	if (phyp == NULL) {
6925 		mutex_enter(&pwp->lock);
6926 		if (pwp->ds_err_recovering) {
6927 			mutex_exit(&pwp->lock);
6928 			SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY);
6929 			return;
6930 		}
6931 
6932 		pwp->ds_err_recovering = 1;
6933 		pptr = pwp->root_phys;
6934 		mutex_exit(&pwp->lock);
6935 	} else {
6936 		pptr = phyp;
6937 	}
6938 
6939 	while (pptr) {
6940 		/*
6941 		 * Since ds_err_recovering is set, we can be assured these
6942 		 * PHYs won't disappear on us while we do this.
6943 		 */
6944 		pmcs_lock_phy(pptr);
6945 		pchild = pptr->children;
6946 		pnext = pptr->sibling;
6947 		pmcs_unlock_phy(pptr);
6948 
6949 		if (pchild) {
6950 			pmcs_dev_state_recovery(pwp, pchild);
6951 		}
6952 
6953 		tgt = NULL;
6954 		pmcs_lock_phy(pptr);
6955 
6956 		if (pptr->dead) {
6957 			goto next_phy;
6958 		}
6959 
6960 		tgt = pptr->target;
6961 		if (tgt == NULL) {
6962 			if (pptr->dtype != NOTHING) {
6963 				pmcs_prt(pwp, PMCS_PRT_DEBUG2,
6964 				    "%s: no target for DS error recovery for "
6965 				    "PHY 0x%p", __func__, (void *)pptr);
6966 			}
6967 			goto next_phy;
6968 		}
6969 
6970 		mutex_enter(&tgt->statlock);
6971 
6972 		if (tgt->recover_wait == 0) {
6973 			goto next_phy;
6974 		}
6975 
6976 		if (tgt->dying) {
6977 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
6978 			    "%s: Not doing DS recovery on dying target %p",
6979 			    __func__, (void *)tgt);
6980 			goto next_phy;
6981 		}
6982 
6983 		/*
6984 		 * Step 1: Put the device into the IN_RECOVERY state
6985 		 */
6986 		rc = pmcs_get_dev_state(pwp, tgt, &ds);
6987 		if (rc != 0) {
6988 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
6989 			    "%s: pmcs_get_dev_state on PHY %s "
6990 			    "failed (rc=%d)",
6991 			    __func__, pptr->path, rc);
6992 
6993 			pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
6994 			    __func__, __LINE__, "pmcs_get_dev_state");
6995 
6996 			goto next_phy;
6997 		}
6998 
6999 		if (tgt->dev_state == ds) {
7000 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7001 			    "%s: Target 0x%p already IN_RECOVERY", __func__,
7002 			    (void *)tgt);
7003 		} else {
7004 			tgt->dev_state = ds;
7005 			ds = PMCS_DEVICE_STATE_IN_RECOVERY;
7006 			rc = pmcs_send_err_recovery_cmd(pwp, ds, tgt);
7007 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7008 			    "%s: pmcs_send_err_recovery_cmd "
7009 			    "result(%d) tgt(0x%p) ds(0x%x) tgt->ds(0x%x)",
7010 			    __func__, rc, (void *)tgt, ds, tgt->dev_state);
7011 
7012 			if (rc) {
7013 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
7014 				    "%s: pmcs_send_err_recovery_cmd to PHY %s "
7015 				    "failed (rc=%d)",
7016 				    __func__, pptr->path, rc);
7017 
7018 				pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
7019 				    __func__, __LINE__,
7020 				    "pmcs_send_err_recovery_cmd");
7021 
7022 				goto next_phy;
7023 			}
7024 		}
7025 
7026 		/*
7027 		 * Step 2: Perform a hard reset on the PHY
7028 		 */
7029 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7030 		    "%s: Issue HARD_RESET to PHY %s", __func__, pptr->path);
7031 		/*
7032 		 * Must release statlock here because pmcs_reset_phy will
7033 		 * drop and reacquire the PHY lock.
7034 		 */
7035 		mutex_exit(&tgt->statlock);
7036 		rc = pmcs_reset_phy(pwp, pptr, PMCS_PHYOP_HARD_RESET);
7037 		mutex_enter(&tgt->statlock);
7038 		if (rc) {
7039 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
7040 			    "%s: HARD_RESET to PHY %s failed (rc=%d)",
7041 			    __func__, pptr->path, rc);
7042 
7043 			pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
7044 			    __func__, __LINE__, "HARD_RESET");
7045 
7046 			goto next_phy;
7047 		}
7048 
7049 		/*
7050 		 * Step 3: Abort all I/Os to the device
7051 		 */
7052 		if (pptr->abort_all_start) {
7053 			while (pptr->abort_all_start) {
7054 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
7055 				    "%s: Waiting for outstanding ABORT_ALL on "
7056 				    "PHY 0x%p", __func__, (void *)pptr);
7057 				cv_wait(&pptr->abort_all_cv, &pptr->phy_lock);
7058 			}
7059 		} else {
7060 			mutex_exit(&tgt->statlock);
7061 			rc = pmcs_abort(pwp, pptr, pptr->device_id, 1, 1);
7062 			mutex_enter(&tgt->statlock);
7063 			if (rc != 0) {
7064 				pptr->abort_pending = 1;
7065 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
7066 				    "%s: pmcs_abort to PHY %s failed (rc=%d)",
7067 				    __func__, pptr->path, rc);
7068 
7069 				pmcs_handle_ds_recovery_error(pptr, tgt,
7070 				    pwp, __func__, __LINE__, "pmcs_abort");
7071 
7072 				goto next_phy;
7073 			}
7074 		}
7075 
7076 		/*
7077 		 * Step 4: Set the device back to OPERATIONAL state
7078 		 */
7079 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7080 		    "%s: Set PHY/tgt 0x%p/0x%p to OPERATIONAL state",
7081 		    __func__, (void *)pptr, (void *)tgt);
7082 		rc = pmcs_set_dev_state(pwp, tgt,
7083 		    PMCS_DEVICE_STATE_OPERATIONAL);
7084 		if (rc == 0) {
7085 			tgt->recover_wait = 0;
7086 			pptr->ds_recovery_retries = 0;
7087 			/*
7088 			 * Don't bother to run the work queues if the PHY
7089 			 * is dead.
7090 			 */
7091 			if (tgt->phy && !tgt->phy->dead) {
7092 				SCHEDULE_WORK(pwp, PMCS_WORK_RUN_QUEUES);
7093 				(void) ddi_taskq_dispatch(pwp->tq, pmcs_worker,
7094 				    pwp, DDI_NOSLEEP);
7095 			}
7096 		} else {
7097 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7098 			    "%s: Failed to SET tgt 0x%p to OPERATIONAL state",
7099 			    __func__, (void *)tgt);
7100 
7101 			pmcs_handle_ds_recovery_error(pptr, tgt, pwp,
7102 			    __func__, __LINE__, "SET tgt to OPERATIONAL state");
7103 
7104 			goto next_phy;
7105 		}
7106 
7107 next_phy:
7108 		if (tgt) {
7109 			mutex_exit(&tgt->statlock);
7110 		}
7111 		pmcs_unlock_phy(pptr);
7112 		pptr = pnext;
7113 	}
7114 
7115 	/*
7116 	 * Only clear ds_err_recovering if we're exiting for good and not
7117 	 * just unwinding from recursion
7118 	 */
7119 	if (phyp == NULL) {
7120 		mutex_enter(&pwp->lock);
7121 		pwp->ds_err_recovering = 0;
7122 		mutex_exit(&pwp->lock);
7123 	}
7124 }
7125 
7126 /*
7127  * Called with target's statlock and PHY lock held.
7128  */
7129 int
7130 pmcs_send_err_recovery_cmd(pmcs_hw_t *pwp, uint8_t dev_state, pmcs_xscsi_t *tgt)
7131 {
7132 	pmcs_phy_t *pptr;
7133 	int rc = -1;
7134 
7135 	ASSERT(tgt != NULL);
7136 	ASSERT(mutex_owned(&tgt->statlock));
7137 
7138 	if (tgt->recovering) {
7139 		return (0);
7140 	}
7141 
7142 	tgt->recovering = 1;
7143 	pptr = tgt->phy;
7144 
7145 	if (pptr == NULL) {
7146 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, "%s: PHY is Null",
7147 		    __func__);
7148 		return (-1);
7149 	}
7150 
7151 	ASSERT(mutex_owned(&pptr->phy_lock));
7152 
7153 	pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, "%s: ds: 0x%x, tgt ds(0x%x)",
7154 	    __func__, dev_state, tgt->dev_state);
7155 
7156 	switch (dev_state) {
7157 	case PMCS_DEVICE_STATE_IN_RECOVERY:
7158 		if (tgt->dev_state == PMCS_DEVICE_STATE_IN_RECOVERY) {
7159 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7160 			    "%s: Target 0x%p already IN_RECOVERY", __func__,
7161 			    (void *)tgt);
7162 			rc = 0;	/* This is not an error */
7163 			goto no_action;
7164 		}
7165 
7166 		rc = pmcs_set_dev_state(pwp, tgt,
7167 		    PMCS_DEVICE_STATE_IN_RECOVERY);
7168 		if (rc != 0) {
7169 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7170 			    "%s(1): Failed to SET tgt(0x%p) to _IN_RECOVERY",
7171 			    __func__, (void *)tgt);
7172 		}
7173 
7174 		break;
7175 
7176 	case PMCS_DEVICE_STATE_OPERATIONAL:
7177 		if (tgt->dev_state != PMCS_DEVICE_STATE_IN_RECOVERY) {
7178 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7179 			    "%s: Target 0x%p not ready to go OPERATIONAL",
7180 			    __func__, (void *)tgt);
7181 			goto no_action;
7182 		}
7183 
7184 		rc = pmcs_set_dev_state(pwp, tgt,
7185 		    PMCS_DEVICE_STATE_OPERATIONAL);
7186 		tgt->reset_success = 1;
7187 		if (rc != 0) {
7188 			pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7189 			    "%s(2): Failed to SET tgt(0x%p) to OPERATIONAL",
7190 			    __func__, (void *)tgt);
7191 			tgt->reset_success = 0;
7192 		}
7193 
7194 		break;
7195 
7196 	case PMCS_DEVICE_STATE_NON_OPERATIONAL:
7197 		PHY_CHANGED(pwp, pptr);
7198 		RESTART_DISCOVERY(pwp);
7199 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7200 		    "%s: Device at %s is non-operational",
7201 		    __func__, pptr->path);
7202 		tgt->dev_state = PMCS_DEVICE_STATE_NON_OPERATIONAL;
7203 		rc = 0;
7204 
7205 		break;
7206 
7207 	default:
7208 		pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE,
7209 		    "%s: Invalid state requested (%d)", __func__,
7210 		    dev_state);
7211 		break;
7212 
7213 	}
7214 
7215 no_action:
7216 	tgt->recovering = 0;
7217 	return (rc);
7218 }
7219 
7220 /*
7221  * pmcs_lock_phy_impl
7222  *
7223  * This function is what does the actual work for pmcs_lock_phy.  It will
7224  * lock all PHYs from phyp down in a top-down fashion.
7225  *
7226  * Locking notes:
7227  * 1. level starts from 0 for the PHY ("parent") that's passed in.  It is
7228  * not a reflection of the actual level of the PHY in the SAS topology.
7229  * 2. If parent is an expander, then parent is locked along with all its
7230  * descendents.
7231  * 3. Expander subsidiary PHYs at level 0 are not locked.  It is the
7232  * responsibility of the caller to individually lock expander subsidiary PHYs
7233  * at level 0 if necessary.
7234  * 4. Siblings at level 0 are not traversed due to the possibility that we're
7235  * locking a PHY on the dead list.  The siblings could be pointing to invalid
7236  * PHYs.  We don't lock siblings at level 0 anyway.
7237  */
7238 static void
7239 pmcs_lock_phy_impl(pmcs_phy_t *phyp, int level)
7240 {
7241 	pmcs_phy_t *tphyp;
7242 
7243 	ASSERT((phyp->dtype == SAS) || (phyp->dtype == SATA) ||
7244 	    (phyp->dtype == EXPANDER) || (phyp->dtype == NOTHING));
7245 
7246 	/*
7247 	 * Start walking the PHYs.
7248 	 */
7249 	tphyp = phyp;
7250 	while (tphyp) {
7251 		/*
7252 		 * If we're at the top level, only lock ourselves.  For anything
7253 		 * at level > 0, traverse children while locking everything.
7254 		 */
7255 		if ((level > 0) || (tphyp == phyp)) {
7256 			pmcs_prt(tphyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7257 			    "%s: PHY 0x%p parent 0x%p path %s lvl %d",
7258 			    __func__, (void *)tphyp, (void *)tphyp->parent,
7259 			    tphyp->path, level);
7260 			mutex_enter(&tphyp->phy_lock);
7261 
7262 			if (tphyp->children) {
7263 				pmcs_lock_phy_impl(tphyp->children, level + 1);
7264 			}
7265 		}
7266 
7267 		if (level == 0) {
7268 			return;
7269 		}
7270 
7271 		tphyp = tphyp->sibling;
7272 	}
7273 }
7274 
7275 /*
7276  * pmcs_lock_phy
7277  *
7278  * This function is responsible for locking a PHY and all its descendents
7279  */
7280 void
7281 pmcs_lock_phy(pmcs_phy_t *phyp)
7282 {
7283 #ifdef DEBUG
7284 	char *callername = NULL;
7285 	ulong_t off;
7286 
7287 	ASSERT(phyp != NULL);
7288 
7289 	callername = modgetsymname((uintptr_t)caller(), &off);
7290 
7291 	if (callername == NULL) {
7292 		pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7293 		    "%s: PHY 0x%p path %s caller: unknown", __func__,
7294 		    (void *)phyp, phyp->path);
7295 	} else {
7296 		pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7297 		    "%s: PHY 0x%p path %s caller: %s+%lx", __func__,
7298 		    (void *)phyp, phyp->path, callername, off);
7299 	}
7300 #else
7301 	pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7302 	    "%s: PHY 0x%p path %s", __func__, (void *)phyp, phyp->path);
7303 #endif
7304 	pmcs_lock_phy_impl(phyp, 0);
7305 }
7306 
7307 /*
7308  * pmcs_unlock_phy_impl
7309  *
7310  * Unlock all PHYs from phyp down in a bottom-up fashion.
7311  */
7312 static void
7313 pmcs_unlock_phy_impl(pmcs_phy_t *phyp, int level)
7314 {
7315 	pmcs_phy_t *phy_next;
7316 
7317 	ASSERT((phyp->dtype == SAS) || (phyp->dtype == SATA) ||
7318 	    (phyp->dtype == EXPANDER) || (phyp->dtype == NOTHING));
7319 
7320 	/*
7321 	 * Recurse down to the bottom PHYs
7322 	 */
7323 	if (level == 0) {
7324 		if (phyp->children) {
7325 			pmcs_unlock_phy_impl(phyp->children, level + 1);
7326 		}
7327 	} else {
7328 		phy_next = phyp;
7329 		while (phy_next) {
7330 			if (phy_next->children) {
7331 				pmcs_unlock_phy_impl(phy_next->children,
7332 				    level + 1);
7333 			}
7334 			phy_next = phy_next->sibling;
7335 		}
7336 	}
7337 
7338 	/*
7339 	 * Iterate through PHYs unlocking all at level > 0 as well the top PHY
7340 	 */
7341 	phy_next = phyp;
7342 	while (phy_next) {
7343 		if ((level > 0) || (phy_next == phyp)) {
7344 			pmcs_prt(phy_next->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7345 			    "%s: PHY 0x%p parent 0x%p path %s lvl %d",
7346 			    __func__, (void *)phy_next,
7347 			    (void *)phy_next->parent, phy_next->path, level);
7348 			mutex_exit(&phy_next->phy_lock);
7349 		}
7350 
7351 		if (level == 0) {
7352 			return;
7353 		}
7354 
7355 		phy_next = phy_next->sibling;
7356 	}
7357 }
7358 
7359 /*
7360  * pmcs_unlock_phy
7361  *
7362  * Unlock a PHY and all its descendents
7363  */
7364 void
7365 pmcs_unlock_phy(pmcs_phy_t *phyp)
7366 {
7367 #ifdef DEBUG
7368 	char *callername = NULL;
7369 	ulong_t off;
7370 
7371 	ASSERT(phyp != NULL);
7372 
7373 	callername = modgetsymname((uintptr_t)caller(), &off);
7374 
7375 	if (callername == NULL) {
7376 		pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7377 		    "%s: PHY 0x%p path %s caller: unknown", __func__,
7378 		    (void *)phyp, phyp->path);
7379 	} else {
7380 		pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7381 		    "%s: PHY 0x%p path %s caller: %s+%lx", __func__,
7382 		    (void *)phyp, phyp->path, callername, off);
7383 	}
7384 #else
7385 	pmcs_prt(phyp->pwp, PMCS_PRT_DEBUG_PHY_LOCKING,
7386 	    "%s: PHY 0x%p path %s", __func__, (void *)phyp, phyp->path);
7387 #endif
7388 	pmcs_unlock_phy_impl(phyp, 0);
7389 }
7390 
7391 /*
7392  * pmcs_get_root_phy
7393  *
7394  * For a given phy pointer return its root phy.
7395  * The caller must be holding the lock on every PHY from phyp up to the root.
7396  */
7397 pmcs_phy_t *
7398 pmcs_get_root_phy(pmcs_phy_t *phyp)
7399 {
7400 	ASSERT(phyp);
7401 
7402 	while (phyp) {
7403 		if (IS_ROOT_PHY(phyp)) {
7404 			break;
7405 		}
7406 		phyp = phyp->parent;
7407 	}
7408 
7409 	return (phyp);
7410 }
7411 
7412 /*
7413  * pmcs_free_dma_chunklist
7414  *
7415  * Free DMA S/G chunk list
7416  */
7417 void
7418 pmcs_free_dma_chunklist(pmcs_hw_t *pwp)
7419 {
7420 	pmcs_chunk_t	*pchunk;
7421 
7422 	while (pwp->dma_chunklist) {
7423 		pchunk = pwp->dma_chunklist;
7424 		pwp->dma_chunklist = pwp->dma_chunklist->next;
7425 		if (pchunk->dma_handle) {
7426 			if (ddi_dma_unbind_handle(pchunk->dma_handle) !=
7427 			    DDI_SUCCESS) {
7428 				pmcs_prt(pwp, PMCS_PRT_DEBUG, "Condition failed"
7429 				    " at %s():%d", __func__, __LINE__);
7430 			}
7431 			ddi_dma_free_handle(&pchunk->dma_handle);
7432 			ddi_dma_mem_free(&pchunk->acc_handle);
7433 		}
7434 		kmem_free(pchunk, sizeof (pmcs_chunk_t));
7435 	}
7436 }
7437 
7438 
7439 /*
7440  * Start ssp event recovery. We have to schedule recovery operation because
7441  * it involves sending multiple commands to device and we should not do it
7442  * in the interrupt context.
7443  * If it is failure of a recovery command, let the recovery thread deal with it.
7444  * Called with pmcwork lock held.
7445  */
7446 
7447 void
7448 pmcs_start_ssp_event_recovery(pmcs_hw_t *pwp, pmcwork_t *pwrk, uint32_t *iomb,
7449     size_t amt)
7450 {
7451 	pmcs_xscsi_t *tgt = pwrk->xp;
7452 	uint32_t event = LE_32(iomb[2]);
7453 	pmcs_phy_t *pptr = pwrk->phy;
7454 	uint32_t tag;
7455 
7456 	if (tgt != NULL) {
7457 		mutex_enter(&tgt->statlock);
7458 		if (tgt->dying || !tgt->assigned) {
7459 			if (pptr) {
7460 				pmcs_dec_phy_ref_count(pptr);
7461 			}
7462 			pptr = NULL;
7463 			pwrk->phy = NULL;
7464 		}
7465 		mutex_exit(&tgt->statlock);
7466 	}
7467 	if (pptr == NULL) {
7468 		/*
7469 		 * No target or dying target.Need to run RE-DISCOVERY here.
7470 		 */
7471 		if (pwrk->state != PMCS_WORK_STATE_TIMED_OUT) {
7472 			pwrk->state = PMCS_WORK_STATE_INTR;
7473 		}
7474 		/*
7475 		 * Although we cannot mark phy to force abort nor mark phy
7476 		 * as changed, killing of a target would take care of aborting
7477 		 * commands for the device.
7478 		 */
7479 		pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: No valid target for event "
7480 		    "processing found. Scheduling RECONFIGURE",  __func__);
7481 		pmcs_pwork(pwp, pwrk);
7482 		RESTART_DISCOVERY(pwp);
7483 		return;
7484 	} else {
7485 		pmcs_lock_phy(pptr);
7486 		mutex_enter(&tgt->statlock);
7487 		if (event == PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS) {
7488 			if (tgt->dev_state !=
7489 			    PMCS_DEVICE_STATE_NON_OPERATIONAL) {
7490 				pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: Device at "
7491 				    "%s is non-operational", __func__,
7492 				    pptr->path);
7493 				tgt->dev_state =
7494 				    PMCS_DEVICE_STATE_NON_OPERATIONAL;
7495 			}
7496 			pptr->abort_pending = 1;
7497 			mutex_exit(&tgt->statlock);
7498 			pmcs_unlock_phy(pptr);
7499 			mutex_exit(&pwrk->lock);
7500 			SCHEDULE_WORK(pwp, PMCS_WORK_ABORT_HANDLE);
7501 			RESTART_DISCOVERY(pwp);
7502 			return;
7503 		}
7504 
7505 		/*
7506 		 * If this command is run in WAIT mode, it is a failing recovery
7507 		 * command. If so, just wake up recovery thread waiting for
7508 		 * command completion.
7509 		 */
7510 		tag = PMCS_TAG_TYPE(pwrk->htag);
7511 		if (tag == PMCS_TAG_TYPE_WAIT) {
7512 			pwrk->htag |= PMCS_TAG_DONE;
7513 			if (pwrk->arg && amt) {
7514 				(void) memcpy(pwrk->arg, iomb, amt);
7515 			}
7516 			cv_signal(&pwrk->sleep_cv);
7517 			mutex_exit(&tgt->statlock);
7518 			pmcs_unlock_phy(pptr);
7519 			mutex_exit(&pwrk->lock);
7520 			return;
7521 		}
7522 
7523 		/*
7524 		 * To recover from primary failures,
7525 		 * we need to schedule handling events recovery.
7526 		 */
7527 		tgt->event_recovery = 1;
7528 		mutex_exit(&tgt->statlock);
7529 		pmcs_unlock_phy(pptr);
7530 		pwrk->ssp_event = event;
7531 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
7532 		    "%s: Scheduling SSP event recovery for tgt(0x%p) "
7533 		    "pwrk(%p) tag(0x%x)", __func__, (void *)tgt, (void *)pwrk,
7534 		    pwrk->htag);
7535 		mutex_exit(&pwrk->lock);
7536 		SCHEDULE_WORK(pwp, PMCS_WORK_SSP_EVT_RECOVERY);
7537 	}
7538 
7539 	/* Work cannot be completed until event recovery is completed. */
7540 }
7541 
7542 /*
7543  * SSP target event recovery
7544  * Entered with a phy lock held
7545  * Pwrk lock is not needed - pwrk is on the target aq and no other thread
7546  * will do anything with it until this thread starts the chain of recovery.
7547  * Statlock may be acquired and released.
7548  */
7549 
7550 void
7551 pmcs_tgt_event_recovery(pmcs_hw_t *pwp, pmcwork_t *pwrk)
7552 {
7553 	pmcs_phy_t *pptr = pwrk->phy;
7554 	pmcs_cmd_t *sp = pwrk->arg;
7555 	pmcs_lun_t *lun = sp->cmd_lun;
7556 	pmcs_xscsi_t *tgt = pwrk->xp;
7557 	uint32_t event;
7558 	uint32_t htag;
7559 	uint32_t status;
7560 	uint8_t dstate;
7561 	int rv;
7562 
7563 	ASSERT(pwrk->arg != NULL);
7564 	ASSERT(pwrk->xp != NULL);
7565 	pmcs_prt(pwp, PMCS_PRT_DEBUG, "%s: event recovery for "
7566 	    "target 0x%p", __func__, (void *)pwrk->xp);
7567 	htag = pwrk->htag;
7568 	event = pwrk->ssp_event;
7569 	pwrk->ssp_event = 0xffffffff;
7570 	if (event == PMCOUT_STATUS_XFER_ERR_BREAK ||
7571 	    event == PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY ||
7572 	    event == PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT) {
7573 		/* Command may be still pending on device */
7574 		rv = pmcs_ssp_tmf(pwp, pptr, SAS_QUERY_TASK, htag,
7575 		    lun->lun_num, &status);
7576 		if (rv != 0) {
7577 			goto out;
7578 		}
7579 		if (status == SAS_RSP_TMF_COMPLETE) {
7580 			/* Command NOT pending on a device */
7581 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
7582 			    "%s: No pending command for tgt 0x%p",
7583 			    __func__, (void *)tgt);
7584 			/* Nothing more to do, just abort it on chip */
7585 			htag = 0;
7586 		}
7587 	}
7588 	/*
7589 	 * All other events left the command pending in the host
7590 	 * Send abort task and abort it on the chip
7591 	 */
7592 	if (htag != 0) {
7593 		if (pmcs_ssp_tmf(pwp, pptr, SAS_ABORT_TASK, htag,
7594 		    lun->lun_num, &status))
7595 			goto out;
7596 	}
7597 	(void) pmcs_abort(pwp, pptr, pwrk->htag, 0, 1);
7598 	/*
7599 	 * Abort either took care of work completion, or put device in
7600 	 * a recovery state
7601 	 */
7602 	return;
7603 out:
7604 	/* Abort failed, do full device recovery */
7605 	mutex_enter(&tgt->statlock);
7606 	if (!pmcs_get_dev_state(pwp, tgt, &dstate))
7607 		tgt->dev_state = dstate;
7608 
7609 	if ((tgt->dev_state != PMCS_DEVICE_STATE_IN_RECOVERY) &&
7610 	    (tgt->dev_state != PMCS_DEVICE_STATE_NON_OPERATIONAL)) {
7611 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
7612 		    "%s: Setting IN_RECOVERY for tgt 0x%p",
7613 		    __func__, (void *)tgt);
7614 		(void) pmcs_send_err_recovery_cmd(pwp,
7615 		    PMCS_DEVICE_STATE_IN_RECOVERY, tgt);
7616 	}
7617 	mutex_exit(&tgt->statlock);
7618 }
7619 
7620 /*
7621  * SSP event recovery task.
7622  */
7623 void
7624 pmcs_ssp_event_recovery(pmcs_hw_t *pwp)
7625 {
7626 	int idx;
7627 	pmcs_xscsi_t *tgt;
7628 	pmcs_cmd_t *cp;
7629 	pmcwork_t *pwrk;
7630 	pmcs_phy_t *pphy;
7631 	int er_flag;
7632 	uint32_t idxpwrk;
7633 
7634 restart:
7635 	for (idx = 0; idx < pwp->max_dev; idx++) {
7636 		mutex_enter(&pwp->lock);
7637 		tgt = pwp->targets[idx];
7638 		mutex_exit(&pwp->lock);
7639 		if (tgt != NULL) {
7640 			mutex_enter(&tgt->statlock);
7641 			if (tgt->dying || !tgt->assigned) {
7642 				mutex_exit(&tgt->statlock);
7643 				continue;
7644 			}
7645 			pphy = tgt->phy;
7646 			er_flag = tgt->event_recovery;
7647 			mutex_exit(&tgt->statlock);
7648 			if (pphy != NULL && er_flag != 0) {
7649 				pmcs_lock_phy(pphy);
7650 				mutex_enter(&tgt->statlock);
7651 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
7652 				    "%s: found target(0x%p)", __func__,
7653 				    (void *) tgt);
7654 
7655 				/* Check what cmd expects recovery */
7656 				mutex_enter(&tgt->aqlock);
7657 				STAILQ_FOREACH(cp, &tgt->aq, cmd_next) {
7658 					/*
7659 					 * Since work structure is on this
7660 					 * target aq, and only this thread
7661 					 * is accessing it now, we do not need
7662 					 * to lock it
7663 					 */
7664 					idxpwrk = PMCS_TAG_INDEX(cp->cmd_tag);
7665 					pwrk = &pwp->work[idxpwrk];
7666 					if (pwrk->htag != cp->cmd_tag) {
7667 						/*
7668 						 * aq may contain TMF commands,
7669 						 * so we may not find work
7670 						 * structure with htag
7671 						 */
7672 						break;
7673 					}
7674 					if (pwrk->ssp_event != 0 &&
7675 					    pwrk->ssp_event !=
7676 					    PMCS_REC_EVENT) {
7677 						pmcs_prt(pwp,
7678 						    PMCS_PRT_DEBUG,
7679 						    "%s: pwrk(%p) ctag(0x%x)",
7680 						    __func__, (void *) pwrk,
7681 						    cp->cmd_tag);
7682 						mutex_exit(&tgt->aqlock);
7683 						mutex_exit(&tgt->statlock);
7684 						pmcs_tgt_event_recovery(
7685 						    pwp, pwrk);
7686 						/*
7687 						 * We dropped statlock, so
7688 						 * restart scanning from scratch
7689 						 */
7690 						pmcs_unlock_phy(pphy);
7691 						goto restart;
7692 					}
7693 				}
7694 				mutex_exit(&tgt->aqlock);
7695 				tgt->event_recovery = 0;
7696 				pmcs_prt(pwp, PMCS_PRT_DEBUG,
7697 				    "%s: end of SSP event recovery for "
7698 				    "target(0x%p)", __func__, (void *) tgt);
7699 				mutex_exit(&tgt->statlock);
7700 				pmcs_unlock_phy(pphy);
7701 			}
7702 		}
7703 	}
7704 	pmcs_prt(pwp, PMCS_PRT_DEBUG,
7705 	    "%s: end of SSP event recovery for pwp(0x%p)", __func__,
7706 	    (void *) pwp);
7707 }
7708 
7709 /*ARGSUSED2*/
7710 int
7711 pmcs_phy_constructor(void *buf, void *arg, int kmflags)
7712 {
7713 	pmcs_hw_t *pwp = (pmcs_hw_t *)arg;
7714 	pmcs_phy_t *phyp = (pmcs_phy_t *)buf;
7715 
7716 	mutex_init(&phyp->phy_lock, NULL, MUTEX_DRIVER,
7717 	    DDI_INTR_PRI(pwp->intr_pri));
7718 	cv_init(&phyp->abort_all_cv, NULL, CV_DRIVER, NULL);
7719 	return (0);
7720 }
7721 
7722 /*ARGSUSED1*/
7723 void
7724 pmcs_phy_destructor(void *buf, void *arg)
7725 {
7726 	pmcs_phy_t *phyp = (pmcs_phy_t *)buf;
7727 
7728 	cv_destroy(&phyp->abort_all_cv);
7729 	mutex_destroy(&phyp->phy_lock);
7730 }
7731 
7732 /*
7733  * Free all PHYs from the kmem_cache starting at phyp as well as everything
7734  * on the dead_phys list.
7735  *
7736  * NOTE: This function does not free root PHYs as they are not allocated
7737  * from the kmem_cache.
7738  *
7739  * No PHY locks are acquired as this should only be called during DDI_DETACH
7740  * or soft reset (while pmcs interrupts are disabled).
7741  */
7742 void
7743 pmcs_free_all_phys(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
7744 {
7745 	pmcs_phy_t *tphyp, *nphyp;
7746 
7747 	if (phyp == NULL) {
7748 		return;
7749 	}
7750 
7751 	tphyp = phyp;
7752 	while (tphyp) {
7753 		nphyp = tphyp->sibling;
7754 
7755 		if (tphyp->children) {
7756 			pmcs_free_all_phys(pwp, tphyp->children);
7757 			tphyp->children = NULL;
7758 		}
7759 		if (!IS_ROOT_PHY(tphyp)) {
7760 			kmem_cache_free(pwp->phy_cache, tphyp);
7761 		}
7762 
7763 		tphyp = nphyp;
7764 	}
7765 
7766 	tphyp = pwp->dead_phys;
7767 	while (tphyp) {
7768 		nphyp = tphyp->sibling;
7769 		kmem_cache_free(pwp->phy_cache, tphyp);
7770 		tphyp = nphyp;
7771 	}
7772 	pwp->dead_phys = NULL;
7773 }
7774 
7775 /*
7776  * Free a list of PHYs linked together by the sibling pointer back to the
7777  * kmem cache from whence they came.  This function does not recurse, so the
7778  * caller must ensure there are no children.
7779  */
7780 void
7781 pmcs_free_phys(pmcs_hw_t *pwp, pmcs_phy_t *phyp)
7782 {
7783 	pmcs_phy_t *next_phy;
7784 
7785 	while (phyp) {
7786 		next_phy = phyp->sibling;
7787 		ASSERT(!mutex_owned(&phyp->phy_lock));
7788 		kmem_cache_free(pwp->phy_cache, phyp);
7789 		phyp = next_phy;
7790 	}
7791 }
7792 
7793 /*
7794  * Make a copy of an existing PHY structure.  This is used primarily in
7795  * discovery to compare the contents of an existing PHY with what gets
7796  * reported back by an expander.
7797  *
7798  * This function must not be called from any context where sleeping is
7799  * not possible.
7800  *
7801  * The new PHY is returned unlocked.
7802  */
7803 static pmcs_phy_t *
7804 pmcs_clone_phy(pmcs_phy_t *orig_phy)
7805 {
7806 	pmcs_phy_t *local;
7807 
7808 	local = kmem_cache_alloc(orig_phy->pwp->phy_cache, KM_SLEEP);
7809 
7810 	/*
7811 	 * Go ahead and just copy everything...
7812 	 */
7813 	*local = *orig_phy;
7814 
7815 	/*
7816 	 * But the following must be set appropriately for this copy
7817 	 */
7818 	local->sibling = NULL;
7819 	local->children = NULL;
7820 	mutex_init(&local->phy_lock, NULL, MUTEX_DRIVER,
7821 	    DDI_INTR_PRI(orig_phy->pwp->intr_pri));
7822 
7823 	return (local);
7824 }
7825 
7826 int
7827 pmcs_check_acc_handle(ddi_acc_handle_t handle)
7828 {
7829 	ddi_fm_error_t de;
7830 
7831 	if (handle == NULL) {
7832 		return (DDI_FAILURE);
7833 	}
7834 	ddi_fm_acc_err_get(handle, &de, DDI_FME_VER0);
7835 	return (de.fme_status);
7836 }
7837 
7838 int
7839 pmcs_check_dma_handle(ddi_dma_handle_t handle)
7840 {
7841 	ddi_fm_error_t de;
7842 
7843 	if (handle == NULL) {
7844 		return (DDI_FAILURE);
7845 	}
7846 	ddi_fm_dma_err_get(handle, &de, DDI_FME_VER0);
7847 	return (de.fme_status);
7848 }
7849 
7850 
7851 void
7852 pmcs_fm_ereport(pmcs_hw_t *pwp, char *detail)
7853 {
7854 	uint64_t ena;
7855 	char buf[FM_MAX_CLASS];
7856 
7857 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail);
7858 	ena = fm_ena_generate(0, FM_ENA_FMT1);
7859 	if (DDI_FM_EREPORT_CAP(pwp->fm_capabilities)) {
7860 		ddi_fm_ereport_post(pwp->dip, buf, ena, DDI_NOSLEEP,
7861 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, NULL);
7862 	}
7863 }
7864 
7865 int
7866 pmcs_check_acc_dma_handle(pmcs_hw_t *pwp)
7867 {
7868 	pmcs_chunk_t *pchunk;
7869 	int i;
7870 
7871 	/* check all acc & dma handles allocated in attach */
7872 	if ((pmcs_check_acc_handle(pwp->pci_acc_handle) != DDI_SUCCESS) ||
7873 	    (pmcs_check_acc_handle(pwp->msg_acc_handle) != DDI_SUCCESS) ||
7874 	    (pmcs_check_acc_handle(pwp->top_acc_handle) != DDI_SUCCESS) ||
7875 	    (pmcs_check_acc_handle(pwp->mpi_acc_handle) != DDI_SUCCESS) ||
7876 	    (pmcs_check_acc_handle(pwp->gsm_acc_handle) != DDI_SUCCESS)) {
7877 		goto check_failed;
7878 	}
7879 
7880 	for (i = 0; i < PMCS_NIQ; i++) {
7881 		if ((pmcs_check_dma_handle(
7882 		    pwp->iqp_handles[i]) != DDI_SUCCESS) ||
7883 		    (pmcs_check_acc_handle(
7884 		    pwp->iqp_acchdls[i]) != DDI_SUCCESS)) {
7885 			goto check_failed;
7886 		}
7887 	}
7888 
7889 	for (i = 0; i < PMCS_NOQ; i++) {
7890 		if ((pmcs_check_dma_handle(
7891 		    pwp->oqp_handles[i]) != DDI_SUCCESS) ||
7892 		    (pmcs_check_acc_handle(
7893 		    pwp->oqp_acchdls[i]) != DDI_SUCCESS)) {
7894 			goto check_failed;
7895 		}
7896 	}
7897 
7898 	if ((pmcs_check_dma_handle(pwp->cip_handles) != DDI_SUCCESS) ||
7899 	    (pmcs_check_acc_handle(pwp->cip_acchdls) != DDI_SUCCESS)) {
7900 		goto check_failed;
7901 	}
7902 
7903 	if (pwp->fwlog &&
7904 	    ((pmcs_check_dma_handle(pwp->fwlog_hndl) != DDI_SUCCESS) ||
7905 	    (pmcs_check_acc_handle(pwp->fwlog_acchdl) != DDI_SUCCESS))) {
7906 		goto check_failed;
7907 	}
7908 
7909 	if (pwp->regdump_hndl && pwp->regdump_acchdl &&
7910 	    ((pmcs_check_dma_handle(pwp->regdump_hndl) != DDI_SUCCESS) ||
7911 	    (pmcs_check_acc_handle(pwp->regdump_acchdl)
7912 	    != DDI_SUCCESS))) {
7913 		goto check_failed;
7914 	}
7915 
7916 
7917 	pchunk = pwp->dma_chunklist;
7918 	while (pchunk) {
7919 		if ((pmcs_check_acc_handle(pchunk->acc_handle)
7920 		    != DDI_SUCCESS) ||
7921 		    (pmcs_check_dma_handle(pchunk->dma_handle)
7922 		    != DDI_SUCCESS)) {
7923 			goto check_failed;
7924 		}
7925 		pchunk = pchunk->next;
7926 	}
7927 
7928 	return (0);
7929 
7930 check_failed:
7931 
7932 	return (1);
7933 }
7934 
7935 /*
7936  * pmcs_handle_dead_phys
7937  *
7938  * If the PHY has no outstanding work associated with it, remove it from
7939  * the dead PHY list and free it.
7940  *
7941  * If pwp->ds_err_recovering or pwp->configuring is set, don't run.
7942  * This keeps routines that need to submit work to the chip from having to
7943  * hold PHY locks to ensure that PHYs don't disappear while they do their work.
7944  */
7945 void
7946 pmcs_handle_dead_phys(pmcs_hw_t *pwp)
7947 {
7948 	pmcs_phy_t *phyp, *nphyp, *pphyp;
7949 
7950 	mutex_enter(&pwp->lock);
7951 	mutex_enter(&pwp->config_lock);
7952 
7953 	if (pwp->configuring | pwp->ds_err_recovering) {
7954 		mutex_exit(&pwp->config_lock);
7955 		mutex_exit(&pwp->lock);
7956 		return;
7957 	}
7958 
7959 	/*
7960 	 * Check every PHY in the dead PHY list
7961 	 */
7962 	mutex_enter(&pwp->dead_phylist_lock);
7963 	phyp = pwp->dead_phys;
7964 	pphyp = NULL;	/* Set previous PHY to NULL */
7965 
7966 	while (phyp != NULL) {
7967 		pmcs_lock_phy(phyp);
7968 		ASSERT(phyp->dead);
7969 
7970 		nphyp = phyp->dead_next;
7971 
7972 		/*
7973 		 * Check for outstanding work
7974 		 */
7975 		if (phyp->ref_count > 0) {
7976 			pmcs_unlock_phy(phyp);
7977 			pphyp = phyp;	/* This PHY becomes "previous" */
7978 		} else if (phyp->target) {
7979 			pmcs_unlock_phy(phyp);
7980 			pmcs_prt(pwp, PMCS_PRT_DEBUG1,
7981 			    "%s: Not freeing PHY 0x%p: target 0x%p is not free",
7982 			    __func__, (void *)phyp, (void *)phyp->target);
7983 			pphyp = phyp;
7984 		} else {
7985 			/*
7986 			 * No outstanding work or target references. Remove it
7987 			 * from the list and free it
7988 			 */
7989 			pmcs_prt(pwp, PMCS_PRT_DEBUG,
7990 			    "%s: Freeing inactive dead PHY 0x%p @ %s "
7991 			    "target = 0x%p", __func__, (void *)phyp,
7992 			    phyp->path, (void *)phyp->target);
7993 			/*
7994 			 * If pphyp is NULL, then phyp was the head of the list,
7995 			 * so just reset the head to nphyp. Otherwise, the
7996 			 * previous PHY will now point to nphyp (the next PHY)
7997 			 */
7998 			if (pphyp == NULL) {
7999 				pwp->dead_phys = nphyp;
8000 			} else {
8001 				pphyp->dead_next = nphyp;
8002 			}
8003 			/*
8004 			 * If the target still points to this PHY, remove
8005 			 * that linkage now.
8006 			 */
8007 			if (phyp->target) {
8008 				mutex_enter(&phyp->target->statlock);
8009 				if (phyp->target->phy == phyp) {
8010 					phyp->target->phy = NULL;
8011 				}
8012 				mutex_exit(&phyp->target->statlock);
8013 			}
8014 			kmem_cache_free(pwp->phy_cache, phyp);
8015 		}
8016 
8017 		phyp = nphyp;
8018 	}
8019 
8020 	mutex_exit(&pwp->dead_phylist_lock);
8021 	mutex_exit(&pwp->config_lock);
8022 	mutex_exit(&pwp->lock);
8023 }
8024 
8025 void
8026 pmcs_inc_phy_ref_count(pmcs_phy_t *phyp)
8027 {
8028 	atomic_inc_32(&phyp->ref_count);
8029 }
8030 
8031 void
8032 pmcs_dec_phy_ref_count(pmcs_phy_t *phyp)
8033 {
8034 	ASSERT(phyp->ref_count != 0);
8035 	atomic_dec_32(&phyp->ref_count);
8036 }
8037 
8038 /*
8039  * pmcs_reap_dead_phy
8040  *
8041  * This function is called from pmcs_new_tport when we have a PHY
8042  * without a target pointer.  It's possible in that case that this PHY
8043  * may have a "brother" on the dead_phys list.  That is, it may be the same as
8044  * this one but with a different root PHY number (e.g. pp05 vs. pp04).  If
8045  * that's the case, update the dead PHY and this new PHY.  If that's not the
8046  * case, we should get a tran_tgt_init on this after it's reported to SCSA.
8047  *
8048  * Called with PHY locked.
8049  */
8050 static void
8051 pmcs_reap_dead_phy(pmcs_phy_t *phyp)
8052 {
8053 	pmcs_hw_t *pwp = phyp->pwp;
8054 	pmcs_phy_t *ctmp;
8055 
8056 	ASSERT(mutex_owned(&phyp->phy_lock));
8057 
8058 	/*
8059 	 * Check the dead PHYs list
8060 	 */
8061 	mutex_enter(&pwp->dead_phylist_lock);
8062 	ctmp = pwp->dead_phys;
8063 	while (ctmp) {
8064 		if ((ctmp->iport != phyp->iport) ||
8065 		    (memcmp((void *)&ctmp->sas_address[0],
8066 		    (void *)&phyp->sas_address[0], 8))) {
8067 			ctmp = ctmp->dead_next;
8068 			continue;
8069 		}
8070 
8071 		/*
8072 		 * Same SAS address on same iport.  Now check to see if
8073 		 * the PHY path is the same with the possible exception
8074 		 * of the root PHY number.
8075 		 * The "5" is the string length of "pp00."
8076 		 */
8077 		if ((strnlen(phyp->path, 5) >= 5) &&
8078 		    (strnlen(ctmp->path, 5) >= 5)) {
8079 			if (memcmp((void *)&phyp->path[5],
8080 			    (void *)&ctmp->path[5],
8081 			    strnlen(phyp->path, 32) - 5) == 0) {
8082 				break;
8083 			}
8084 		}
8085 
8086 		ctmp = ctmp->dead_next;
8087 	}
8088 	mutex_exit(&pwp->dead_phylist_lock);
8089 
8090 	/*
8091 	 * Found a match.  Remove the target linkage and drop the
8092 	 * ref count on the old PHY.  Then, increment the ref count
8093 	 * on the new PHY to compensate.
8094 	 */
8095 	if (ctmp) {
8096 		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG,
8097 		    "%s: Found match in dead PHY list for new PHY %s",
8098 		    __func__, phyp->path);
8099 		if (ctmp->target) {
8100 			/*
8101 			 * If there is a pointer to the target in the dead
8102 			 * PHY, and that PHY's ref_count drops to 0, we can
8103 			 * clear the target linkage now.  If the PHY's
8104 			 * ref_count is > 1, then there may be multiple
8105 			 * LUNs still remaining, so leave the linkage.
8106 			 */
8107 			pmcs_inc_phy_ref_count(phyp);
8108 			pmcs_dec_phy_ref_count(ctmp);
8109 			phyp->target = ctmp->target;
8110 			/*
8111 			 * Update the target's linkage as well
8112 			 */
8113 			mutex_enter(&phyp->target->statlock);
8114 			phyp->target->phy = phyp;
8115 			phyp->target->dtype = phyp->dtype;
8116 			mutex_exit(&phyp->target->statlock);
8117 
8118 			if (ctmp->ref_count == 0) {
8119 				ctmp->target = NULL;
8120 			}
8121 		}
8122 	}
8123 }
8124 
8125 /*
8126  * Called with iport lock held
8127  */
8128 void
8129 pmcs_add_phy_to_iport(pmcs_iport_t *iport, pmcs_phy_t *phyp)
8130 {
8131 	ASSERT(mutex_owned(&iport->lock));
8132 	ASSERT(phyp);
8133 	ASSERT(!list_link_active(&phyp->list_node));
8134 	iport->nphy++;
8135 	pmcs_smhba_add_iport_prop(iport, DATA_TYPE_INT32, PMCS_NUM_PHYS,
8136 	    &iport->nphy);
8137 	list_insert_tail(&iport->phys, phyp);
8138 	mutex_enter(&iport->refcnt_lock);
8139 	iport->refcnt++;
8140 	mutex_exit(&iport->refcnt_lock);
8141 }
8142 
8143 /*
8144  * Called with the iport lock held
8145  */
8146 void
8147 pmcs_remove_phy_from_iport(pmcs_iport_t *iport, pmcs_phy_t *phyp)
8148 {
8149 	pmcs_phy_t *pptr, *next_pptr;
8150 
8151 	ASSERT(mutex_owned(&iport->lock));
8152 
8153 	/*
8154 	 * If phyp is NULL, remove all PHYs from the iport
8155 	 */
8156 	if (phyp == NULL) {
8157 		for (pptr = list_head(&iport->phys); pptr != NULL;
8158 		    pptr = next_pptr) {
8159 			next_pptr = list_next(&iport->phys, pptr);
8160 			mutex_enter(&pptr->phy_lock);
8161 			pptr->iport = NULL;
8162 			mutex_exit(&pptr->phy_lock);
8163 			pmcs_rele_iport(iport);
8164 			list_remove(&iport->phys, pptr);
8165 		}
8166 		iport->nphy = 0;
8167 		return;
8168 	}
8169 
8170 	ASSERT(phyp);
8171 	ASSERT(iport->nphy > 0);
8172 	ASSERT(list_link_active(&phyp->list_node));
8173 	iport->nphy--;
8174 	pmcs_smhba_add_iport_prop(iport, DATA_TYPE_INT32, PMCS_NUM_PHYS,
8175 	    &iport->nphy);
8176 	list_remove(&iport->phys, phyp);
8177 	pmcs_rele_iport(iport);
8178 }
8179 
8180 /*
8181  * This function checks to see if the target pointed to by phyp is still
8182  * correct.  This is done by comparing the target's unit address with the
8183  * SAS address in phyp.
8184  *
8185  * Called with PHY locked and target statlock held
8186  */
8187 static boolean_t
8188 pmcs_phy_target_match(pmcs_phy_t *phyp)
8189 {
8190 	uint64_t wwn;
8191 	char unit_address[PMCS_MAX_UA_SIZE];
8192 	boolean_t rval = B_FALSE;
8193 
8194 	ASSERT(phyp);
8195 	ASSERT(phyp->target);
8196 	ASSERT(mutex_owned(&phyp->phy_lock));
8197 	ASSERT(mutex_owned(&phyp->target->statlock));
8198 
8199 	wwn = pmcs_barray2wwn(phyp->sas_address);
8200 	(void) scsi_wwn_to_wwnstr(wwn, 1, unit_address);
8201 
8202 	if (memcmp((void *)unit_address, (void *)phyp->target->unit_address,
8203 	    strnlen(phyp->target->unit_address, PMCS_MAX_UA_SIZE)) == 0) {
8204 		rval = B_TRUE;
8205 	}
8206 
8207 	return (rval);
8208 }
8209 
8210 void
8211 pmcs_start_dev_state_recovery(pmcs_xscsi_t *xp, pmcs_phy_t *phyp)
8212 {
8213 	ASSERT(mutex_owned(&xp->statlock));
8214 	ASSERT(xp->pwp != NULL);
8215 
8216 	if (xp->recover_wait == 0) {
8217 		pmcs_prt(xp->pwp, PMCS_PRT_DEBUG_DEV_STATE,
8218 		    "%s: Start ds_recovery for tgt 0x%p/PHY 0x%p (%s)",
8219 		    __func__, (void *)xp, (void *)phyp, phyp->path);
8220 		xp->recover_wait = 1;
8221 
8222 		/*
8223 		 * Rather than waiting for the watchdog timer, we'll
8224 		 * kick it right now.
8225 		 */
8226 		SCHEDULE_WORK(xp->pwp, PMCS_WORK_DS_ERR_RECOVERY);
8227 		(void) ddi_taskq_dispatch(xp->pwp->tq, pmcs_worker, xp->pwp,
8228 		    DDI_NOSLEEP);
8229 	}
8230 }
8231 
8232 /*
8233  * Increment the phy ds error retry count.
8234  * If too many retries, mark phy dead and restart discovery;
8235  * otherwise schedule ds recovery.
8236  */
8237 static void
8238 pmcs_handle_ds_recovery_error(pmcs_phy_t *phyp, pmcs_xscsi_t *tgt,
8239     pmcs_hw_t *pwp, const char *func_name, int line, char *reason_string)
8240 {
8241 	ASSERT(mutex_owned(&phyp->phy_lock));
8242 
8243 	phyp->ds_recovery_retries++;
8244 
8245 	if (phyp->ds_recovery_retries > PMCS_MAX_DS_RECOVERY_RETRIES) {
8246 		pmcs_prt(pwp, PMCS_PRT_DEBUG,
8247 		    "%s: retry limit reached after %s to PHY %s failed",
8248 		    func_name, reason_string, phyp->path);
8249 		tgt->recover_wait = 0;
8250 		phyp->dead = 1;
8251 		PHY_CHANGED_AT_LOCATION(pwp, phyp, func_name, line);
8252 		RESTART_DISCOVERY(pwp);
8253 	} else {
8254 		SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY);
8255 	}
8256 }
8257