xref: /illumos-gate/usr/src/uts/common/io/scsi/adapters/scsi_vhci/fops/tpgs.c (revision bea83d026ee1bd1b2a2419e1d0232f107a5d7d9b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 #pragma ident	"%Z%%M%	%I%	%E% SMI"
26 
27 /*
28  * Implementation of "scsi_vhci_f_tpgs" T10 standard based failover_ops.
29  *
30  * NOTE: for non-sequential devices only.
31  */
32 
33 #include <sys/conf.h>
34 #include <sys/file.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/scsi/scsi.h>
38 #include <sys/scsi/adapters/scsi_vhci.h>
39 
40 /* Supported device table entries.  */
41 char	*std_dev_table[] = { NULL };
42 
43 /* Failover module plumbing. */
44 SCSI_FAILOVER_OP(SFO_NAME_TPGS, std, "%I%");
45 
46 #define	STD_SCSI_CMD_LEN 0xff
47 
48 #define	STD_FO_CMD_RETRY_DELAY	1000000 /* 1 seconds */
49 #define	STD_FO_RETRY_DELAY	2000000 /* 2 seconds */
50 /*
51  * max time for failover to complete is 3 minutes.  Compute
52  * number of retries accordingly, to ensure we wait for at least
53  * 3 minutes
54  */
55 #define	STD_FO_MAX_RETRIES	(3*60*1000000)/STD_FO_RETRY_DELAY
56 
57 /*
58  * max number of retries for std failover to complete where the ping
59  * command is failing due to transport errors or commands being rejected by
60  * std.
61  * STD_FO_MAX_RETRIES takes into account the case where CMD_CMPLTs but
62  * std takes time to complete the failover.
63  */
64 #define	STD_FO_MAX_CMD_RETRIES	3
65 
66 #define	STD_ACTIVE_OPTIMIZED    0x0
67 #define	STD_ACTIVE_NONOPTIMIZED 0x1
68 #define	STD_STANDBY		0x2
69 #define	STD_UNAVAILABLE		0x3
70 #define	STD_TRANSITIONING	0xf
71 
72 #define	STD_SCSI_ASC_STATE_TRANS	0x04
73 #define	STD_SCSI_ASCQ_STATE_TRANS_FAIL  0x0A
74 #define	STD_SCSI_ASC_STATE_CHG		0x2A
75 #define	STD_SCSI_ASCQ_STATE_CHG_SUCC	0x06
76 #define	STD_SCSI_ASCQ_STATE_CHG_FAILED	0x07
77 #define	STD_SCSI_ASC_INVAL_PARAM_LIST	0x26
78 #define	STD_SCSI_ASC_INVAL_CMD_OPCODE	0x20
79 #define	STD_LOGICAL_UNIT_NOT_ACCESSIBLE	0x04
80 #define	STD_TGT_PORT_UNAVAILABLE	0x0C
81 
82 
83 /* Special exported for direct use by MP-API */
84 int std_set_target_groups(struct scsi_address *, int, int);
85 
86 /*
87  * External function definitions
88  */
89 extern void vhci_mpapi_update_tpg_data(struct scsi_address *, char *);
90 
91 static int std_get_fo_mode(struct scsi_device *,
92 		int *, int *, int *, int *);
93 static int std_report_target_groups(struct scsi_address *, struct buf *,
94 		int, int, int *, int *);
95 
96 /* ARGSUSED */
97 static int
98 std_device_probe(struct scsi_device *sd, struct scsi_inquiry *inq,
99 void **ctpriv)
100 {
101 	unsigned int	tpgs_bits;
102 	unsigned char	*inqbuf = (unsigned char *)inq;
103 	unsigned char	dtype = (inq->inq_dtype & DTYPE_MASK);
104 
105 	int		mode, state, xlf, preferred = 0;
106 
107 	VHCI_DEBUG(6, (CE_NOTE, NULL, "std_device_probe: vidpid %s\n",
108 	    inq->inq_vid));
109 
110 	tpgs_bits = ((inqbuf[5] & 0x30) >> 4);
111 
112 	if (tpgs_bits == 0) {
113 		VHCI_DEBUG(4, (CE_WARN, NULL,
114 		    "!std_device_probe: not a standard tpgs device"));
115 		return (SFO_DEVICE_PROBE_PHCI);
116 	}
117 
118 	if (dtype == DTYPE_SEQUENTIAL) {
119 		VHCI_DEBUG(4, (CE_NOTE, NULL,
120 		    "!std_device_probe: Detected a "
121 		    "Standard Asymmetric device "
122 		    "not yet supported\n"));
123 		return (SFO_DEVICE_PROBE_PHCI);
124 	}
125 
126 	if (std_get_fo_mode(sd, &mode, &state, &xlf, &preferred)) {
127 		VHCI_DEBUG(4, (CE_WARN, NULL, "!unable to fetch fo "
128 		    "mode: sd(%p)", (void *) sd));
129 		return (SFO_DEVICE_PROBE_PHCI);
130 	}
131 
132 	if (tpgs_bits == SCSI_IMPLICIT_FAILOVER) {
133 		VHCI_DEBUG(1, (CE_NOTE, NULL,
134 		    "!std_device_probe: Detected a "
135 		    "Standard Asymmetric device "
136 		    "with implicit failover\n"));
137 		return (SFO_DEVICE_PROBE_VHCI);
138 	}
139 	if (tpgs_bits == SCSI_EXPLICIT_FAILOVER) {
140 		VHCI_DEBUG(1, (CE_NOTE, NULL,
141 		    "!std_device_probe: Detected a "
142 		    "Standard Asymmetric device "
143 		    "with explicit failover\n"));
144 		return (SFO_DEVICE_PROBE_VHCI);
145 	}
146 	if (tpgs_bits == SCSI_BOTH_FAILOVER) {
147 		VHCI_DEBUG(1, (CE_NOTE, NULL,
148 		    "!std_device_probe: Detected a "
149 		    "Standard Asymmetric device "
150 		    "which supports both implicit and explicit failover\n"));
151 		return (SFO_DEVICE_PROBE_VHCI);
152 	}
153 	VHCI_DEBUG(1, (CE_WARN, NULL,
154 	    "!std_device_probe: "
155 	    "Unknown tpgs_bits: %x", tpgs_bits));
156 	return (SFO_DEVICE_PROBE_PHCI);
157 }
158 
159 /* ARGSUSED */
160 static void
161 std_device_unprobe(struct scsi_device *sd, void *ctpriv)
162 {
163 	/*
164 	 * For future use
165 	 */
166 }
167 
168 static int
169 std_inquiry(struct scsi_address *ap, struct buf *bp, int *mode)
170 {
171 	struct scsi_pkt		*pkt;
172 	char			buf[STD_SCSI_CMD_LEN];
173 	int			buf_size = sizeof (buf);
174 	unsigned int		tpgs_bits;
175 	int			retval;
176 
177 	*mode = 0;
178 	bp->b_un.b_addr = (caddr_t)&buf;
179 	bp->b_flags = B_READ;
180 	bp->b_bcount = buf_size;
181 	bp->b_resid = 0;
182 
183 	pkt = scsi_init_pkt(ap, NULL, bp, CDB_GROUP0,
184 	    sizeof (struct scsi_arq_status), 0, 0, SLEEP_FUNC, NULL);
185 	pkt->pkt_cdbp[0] = SCMD_INQUIRY;
186 	pkt->pkt_cdbp[4] = (unsigned char)buf_size;
187 	pkt->pkt_time = 60;
188 
189 	retval = vhci_do_scsi_cmd(pkt);
190 	scsi_destroy_pkt(pkt);
191 	if (retval == 0) {
192 		VHCI_DEBUG(1, (CE_WARN, NULL,
193 		    "!std_inquiry: Failure returned from vhci_do_scsi_cmd"));
194 		return (1);
195 	}
196 
197 	tpgs_bits = ((buf[5] & 0x30) >> 4);
198 	if (tpgs_bits == 0) {
199 		VHCI_DEBUG(1, (CE_WARN, NULL,
200 		    "!std_inquiry: zero tpgs_bits"));
201 		return (1);
202 	}
203 	retval = 0;
204 	if (tpgs_bits == SCSI_IMPLICIT_FAILOVER) {
205 		*mode = SCSI_IMPLICIT_FAILOVER;
206 	} else if (tpgs_bits == SCSI_EXPLICIT_FAILOVER) {
207 		*mode = SCSI_EXPLICIT_FAILOVER;
208 	} else if (tpgs_bits == SCSI_BOTH_FAILOVER) {
209 		*mode = SCSI_BOTH_FAILOVER;
210 	} else {
211 		VHCI_DEBUG(1, (CE_WARN, NULL,
212 		    "!std_inquiry: Illegal mode returned: %x mode: %x",
213 		    tpgs_bits, *mode));
214 		retval = 1;
215 	}
216 
217 	return (retval);
218 }
219 
220 static int
221 std_page83(struct scsi_address *ap, struct buf *bp,
222 	int *rel_tgt_port, int *tgt_port, int *lu)
223 {
224 	char			*ptr, *end;
225 	struct scsi_pkt		*pkt;
226 	char			*bufp;
227 	unsigned int		buf_len, rx_bsize;
228 
229 	/*
230 	 * lets start the buf size with 512 bytes. If this
231 	 * if found to be insufficient, we can allocate
232 	 * appropriate size in the next iteration.
233 	 */
234 	buf_len = 512;
235 
236 once_again:
237 	bufp = kmem_zalloc(buf_len, KM_NOSLEEP);
238 	if (bufp == NULL) {
239 		VHCI_DEBUG(1, (CE_WARN, NULL, "!std_page83: "
240 		    "request packet allocation for %d failed....",
241 		    buf_len));
242 		return (1);
243 	}
244 
245 
246 	bp->b_un.b_addr = bufp;
247 	bp->b_flags = B_READ;
248 	bp->b_bcount = buf_len;
249 	bp->b_resid = 0;
250 
251 	pkt = scsi_init_pkt(ap, NULL, bp, CDB_GROUP0,
252 	    sizeof (struct scsi_arq_status), 0, 0, NULL, NULL);
253 	if (pkt == NULL) {
254 		VHCI_DEBUG(1, (CE_WARN, NULL,
255 		    "!std_page83: Failure returned from scsi_init_pkt"));
256 		kmem_free((void *)bufp, buf_len);
257 		return (1);
258 	}
259 
260 	pkt->pkt_cdbp[0] = SCMD_INQUIRY;
261 	pkt->pkt_cdbp[1] = 0x1;
262 	pkt->pkt_cdbp[2] = 0x83;
263 	pkt->pkt_cdbp[3] = (unsigned char)((buf_len >> 8) & 0xff);
264 	pkt->pkt_cdbp[4] = (unsigned char)(buf_len & 0xff);
265 	pkt->pkt_time = 90;
266 
267 	if (vhci_do_scsi_cmd(pkt) == 0) {
268 		VHCI_DEBUG(1, (CE_NOTE, NULL,
269 		    "!std_page83: vhci_do_scsi_cmd failed\n"));
270 		kmem_free((void *)bufp, buf_len);
271 		scsi_destroy_pkt(pkt);
272 		return (1);
273 	}
274 
275 	/*
276 	 * Now lets check if the size that was provided was
277 	 * sufficient. If not, allocate the appropriate size
278 	 * and retry the command again.
279 	 */
280 	rx_bsize = (((bufp[2] & 0xff) << 8) | (bufp[3] & 0xff));
281 	rx_bsize += 4;
282 	if (rx_bsize > buf_len) {
283 		/*
284 		 * Need to allocate more buf and retry again
285 		 */
286 		VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_page83: "
287 		    "bufsize: %d greater than allocated buf: %d\n",
288 		    rx_bsize, buf_len));
289 		VHCI_DEBUG(1, (CE_NOTE, NULL, "Retrying for size %d\n",
290 		    rx_bsize));
291 		kmem_free((void *)bufp, buf_len);
292 		buf_len = (unsigned int)(rx_bsize);
293 		goto once_again;
294 	}
295 
296 	ptr = bufp;
297 	ptr += 4; /* identification descriptor 0 */
298 	end = bufp + rx_bsize;
299 	while (ptr < end) {
300 		VHCI_DEBUG(1, (CE_NOTE, NULL, "std_page83: desc[1/4/5/6/7]:"
301 		    "%x %x %x %x %x\n",
302 		    ptr[1], ptr[4], ptr[5], ptr[6], ptr[7]));
303 		if ((ptr[1] & 0x0f) == 0x04) {
304 			*rel_tgt_port = 0;
305 			*rel_tgt_port |= ((ptr[6] & 0xff) << 8);
306 			*rel_tgt_port |= (ptr[7] & 0xff);
307 			VHCI_DEBUG(1, (CE_NOTE, NULL,
308 			    "!std_page83: relative target port: %x\n",
309 			    *rel_tgt_port));
310 		} else if ((ptr[1] & 0x0f) == 0x05) {
311 			*tgt_port = 0;
312 			*tgt_port = ((ptr[6] & 0xff) << 8);
313 			*tgt_port |= (ptr[7] & 0xff);
314 			VHCI_DEBUG(1, (CE_NOTE, NULL,
315 			    "!std_page83: target port: %x\n", *tgt_port));
316 		} else if ((ptr[1] & 0x0f) == 0x06) {
317 			*lu = 0;
318 			*lu |= ((ptr[6] & 0xff)<< 8);
319 			*lu |= (ptr[7] & 0xff);
320 			VHCI_DEBUG(1, (CE_NOTE, NULL,
321 			    "!std_page83: logical unit: %x\n", *lu));
322 		}
323 		ptr += ptr[3] + 4;  /* next identification descriptor */
324 	}
325 	kmem_free((void *)bufp, buf_len);
326 	scsi_destroy_pkt(pkt);
327 	return (0);
328 }
329 
330 #ifdef DEBUG
331 static void
332 print_buf(char *buf, int buf_size)
333 {
334 	int		i = 0, j;
335 	int		loop, left;
336 
337 	loop = buf_size / 8;
338 	left = buf_size % 8;
339 
340 	VHCI_DEBUG(4, (CE_NOTE, NULL, "!buf_size: %x loop: %x left: %x",
341 	    buf_size, loop, left));
342 
343 	for (j = 0; j < loop; j++) {
344 		VHCI_DEBUG(4, (CE_NOTE, NULL,
345 		    "!buf[%d-%d]: %x %x %x %x %x %x %x %x",
346 		    i, i + 7, buf[i], buf[i+1], buf[i+2], buf[i+3],
347 		    buf[i+4], buf[i+5], buf[i+6], buf[i+7]));
348 		i += 8;
349 	}
350 
351 	if (left) {
352 		VHCI_DEBUG(4, (CE_CONT, NULL,
353 		    "NOTICE: buf[%d-%d]:", i, i + left));
354 		for (j = 0; j < left; j++) {
355 			VHCI_DEBUG(4, (CE_CONT, NULL, " %x", buf[i + j]));
356 		}
357 		VHCI_DEBUG(4, (CE_CONT, NULL, "\n"));
358 	}
359 }
360 #endif
361 
362 static int
363 std_report_target_groups(struct scsi_address *ap, struct buf *bp,
364 	int rel_tgt_port, int tgt_port, int *pstate, int *preferred)
365 {
366 	struct scsi_pkt		*pkt;
367 	char			*ptr, *end, *bufp, *mpapi_ptr;
368 	unsigned int		rtpg_len = 0;
369 	unsigned int		l_tgt_port = 0, tpgs_state = 0;
370 	unsigned int		tgt_port_cnt = 0, lr_tgt_port = 0;
371 	int			i, len;
372 
373 	/*
374 	 * Start with buffer size of 512.
375 	 * If this is found to be insufficient, required size
376 	 * will be allocated and the command will be retried.
377 	 */
378 	len = 512;
379 
380 try_again:
381 	bufp = kmem_zalloc(len, KM_NOSLEEP);
382 	if (bufp == NULL) {
383 		VHCI_DEBUG(1, (CE_WARN, NULL, "!std_report_target_groups: "
384 		    "request packet allocation for %d failed....",
385 		    len));
386 		return (1);
387 	}
388 
389 	bp->b_un.b_addr = bufp;
390 	bp->b_flags = B_READ;
391 	bp->b_bcount = len;
392 	bp->b_resid = 0;
393 
394 	pkt = scsi_init_pkt(ap, NULL, bp, CDB_GROUP5,
395 	    sizeof (struct scsi_arq_status), 0, 0, NULL, NULL);
396 
397 	if (pkt == NULL) {
398 		VHCI_DEBUG(1, (CE_NOTE, NULL,
399 		    "!std_report_target_groups: scsi_init_pkt error\n"));
400 		kmem_free((void *)bufp, len);
401 		return (1);
402 	}
403 
404 	pkt->pkt_cdbp[0] = SCMD_MAINTENANCE_IN;
405 	pkt->pkt_cdbp[1] = SCMD_SET_TARGET_PORT_GROUPS;
406 	pkt->pkt_cdbp[6] = ((len >>  24) & 0xff);
407 	pkt->pkt_cdbp[7] = ((len >> 16) & 0xff);
408 	pkt->pkt_cdbp[8] = ((len >> 8) & 0xff);
409 	pkt->pkt_cdbp[9] = len & 0xff;
410 	pkt->pkt_time = 90;
411 
412 	VHCI_DEBUG(6, (CE_NOTE, NULL,
413 	    "!std_report_target_groups: sending target port group:"
414 	    " cdb[6/7/8/9]: %x/%x/%x/%x\n", pkt->pkt_cdbp[6],
415 	    pkt->pkt_cdbp[7], pkt->pkt_cdbp[8], pkt->pkt_cdbp[9]));
416 	if (vhci_do_scsi_cmd(pkt) == 0) {
417 		VHCI_DEBUG(4, (CE_NOTE, NULL, "!std_report_target_groups:"
418 		    " vhci_do_scsi_cmd failed\n"));
419 		kmem_free((void *)bufp, len);
420 		scsi_destroy_pkt(pkt);
421 		return (1);
422 	}
423 	ptr = bufp;
424 	VHCI_DEBUG(6, (CE_NOTE, NULL, "!std_report_target_groups:"
425 	    " returned from target"
426 	    " port group: buf[0/1/2/3]: %x/%x/%x/%x\n",
427 	    ptr[0], ptr[1], ptr[2], ptr[3]));
428 	rtpg_len = (unsigned int)((0xff & ptr[0]) << 24);
429 	rtpg_len |= (unsigned int)((0xff & ptr[1]) << 16);
430 	rtpg_len |= (unsigned int)((0xff & ptr[2]) << 8);
431 	rtpg_len |= (unsigned int)(0xff & ptr[3]);
432 	rtpg_len += 4;
433 	if (rtpg_len > len) {
434 		VHCI_DEBUG(4, (CE_NOTE, NULL, "!std_report_target_groups: "
435 		    "bufsize: %d greater than allocated buf: %d\n",
436 		    rtpg_len, len));
437 		VHCI_DEBUG(4, (CE_NOTE, NULL, "Retrying for size %d\n",
438 		    rtpg_len));
439 		kmem_free((void *)bufp, len);
440 		len = (unsigned int)(rtpg_len + 1);
441 		goto try_again;
442 	}
443 #ifdef DEBUG
444 	print_buf(bufp, rtpg_len);
445 #endif
446 	end = ptr + rtpg_len;
447 	ptr += 4;
448 	while (ptr < end) {
449 		mpapi_ptr = ptr;
450 		l_tgt_port = ((ptr[2] & 0xff) << 8) + (ptr[3] & 0xff);
451 		tpgs_state = ptr[0] & 0x0f;
452 		tgt_port_cnt = (ptr[7] & 0xff);
453 		VHCI_DEBUG(4, (CE_NOTE, NULL, "!std_report_tgt_groups:"
454 		    " tpgs state: %x"
455 		    " tgt_group: %x count: %x\n", tpgs_state,
456 		    l_tgt_port, tgt_port_cnt));
457 		ptr += 8;
458 		for (i = 0; i < tgt_port_cnt; i++) {
459 			lr_tgt_port = 0;
460 			lr_tgt_port |= ((ptr[2] & 0Xff) << 8);
461 			lr_tgt_port |= (ptr[3] & 0xff);
462 
463 			if ((lr_tgt_port == rel_tgt_port) &&
464 			    (l_tgt_port == tgt_port)) {
465 				VHCI_DEBUG(4, (CE_NOTE, NULL,
466 				    "!std_report_tgt_groups:"
467 				    " found tgt_port: %x rel_tgt_port:%x"
468 				    " tpgs_state: %x\n", tgt_port, rel_tgt_port,
469 				    tpgs_state));
470 				/*
471 				 * once we have the preferred flag
472 				 * and a non-optimized state flag
473 				 * we will get preferred flag  from the
474 				 * report target groups
475 				 */
476 				if (tpgs_state == STD_ACTIVE_OPTIMIZED) {
477 					*pstate = STD_ACTIVE_OPTIMIZED;
478 					*preferred = PCLASS_PREFERRED;
479 				} else if (tpgs_state ==
480 				    STD_ACTIVE_NONOPTIMIZED) {
481 					*pstate = STD_ACTIVE_NONOPTIMIZED;
482 					*preferred = PCLASS_NONPREFERRED;
483 				} else if (tpgs_state == STD_STANDBY) {
484 					*pstate = STD_STANDBY;
485 					*preferred = PCLASS_NONPREFERRED;
486 				} else {
487 					*pstate = STD_UNAVAILABLE;
488 					*preferred = PCLASS_NONPREFERRED;
489 				}
490 				vhci_mpapi_update_tpg_data(ap, mpapi_ptr);
491 				kmem_free((void *)bufp, len);
492 				scsi_destroy_pkt(pkt);
493 				return (0);
494 			}
495 			VHCI_DEBUG(4, (CE_NOTE, NULL, "!std_report_tgt_groups:"
496 			    " tgt_port: %x rel_tgt_port:%x\n", tgt_port,
497 			    rel_tgt_port));
498 			ptr += 4;
499 		}
500 	}
501 	*pstate = SCSI_PATH_INACTIVE;
502 	*preferred = PCLASS_NONPREFERRED;
503 	VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_report_tgt_groups: "
504 	    "NO rel_TGTPRT MATCH!!! Assigning Default: state: %x "
505 	    "preferred: %d\n", *pstate, *preferred));
506 	kmem_free((void *)bufp, len);
507 	scsi_destroy_pkt(pkt);
508 	return (1);
509 }
510 
511 /*
512  * get the failover mode, ownership and if it has extended failover
513  * capability. The mode(bits5-4/byte5) is defined as implicit, explicit, or
514  * both.  The state is defined as online-optimized(0h),
515  * online-nonoptimized(1h), standby(2h), offline(3h),
516  * and transitioning(fh). Currently, there is online,
517  * standby, and offline(defined in sunmdi.h).
518  * Online-nonoptimized will be a mode of secondary
519  * and an ownership of online. Thought about using a different mode but
520  * it appears the states are really for the states for secondary mode.
521  * We currently have IS_ONLINING, IS_OFFLINING - should we have TRANSITIONING
522  * to mean from online-optimized to online-nonoptimized or does onlining
523  * cover this?
524  */
525 /* ARGSUSED */
526 static int
527 std_get_fo_mode(struct scsi_device *sd, int *mode,
528     int *state, int *xlf_capable, int *preferred)
529 {
530 	int			retval = 0;
531 	struct buf		*bp;
532 	struct scsi_address	*ap;
533 	int			lu = 0, rel_tgt_port = 0, tgt_port = 0x0;
534 
535 	VHCI_DEBUG(6, (CE_NOTE, NULL, "!std_get_fo_mode: enter\n"));
536 	*mode = *state = *xlf_capable = 0;
537 	bp = getrbuf(KM_NOSLEEP);
538 	if (bp == NULL) {
539 		VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_get_fo_mode: "
540 		    " failed getrbuf\n"));
541 		return (1);
542 	}
543 
544 	ap = &sd->sd_address;
545 	if (std_inquiry(ap, bp, mode)) {
546 		VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_get_fo_mode: "
547 		    " failed std_inquiry\n"));
548 		retval = 1;
549 	} else if (std_page83(ap, bp, &rel_tgt_port, &tgt_port, &lu)) {
550 		VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_get_fo_mode: "
551 		    " failed std_page83\n"));
552 		retval = 1;
553 	} else if (std_report_target_groups(ap, bp, rel_tgt_port, tgt_port,
554 	    state, preferred)) {
555 		VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_get_fo_mode: "
556 		    " failed std_report_target_groups\n"));
557 		retval = 1;
558 	}
559 
560 	freerbuf(bp);
561 	if (retval == 0) {
562 		VHCI_DEBUG(6, (CE_NOTE, NULL, "!std_get_fo_mode: "
563 		    "SUCCESS\n"));
564 	}
565 	return (retval);
566 }
567 
568 /* ARGSUSED */
569 static int
570 std_activate_explicit(struct scsi_device *sd, int xlf_capable)
571 {
572 	cmn_err(CE_NOTE, "Explicit Activation is done by "
573 	    "std_set_target_groups() call from MPAPI");
574 	return (1);
575 }
576 
577 /*
578  * Process the packet reason of CMD_PKT_CMPLT - return 0 if no
579  * retry and 1 if a retry should be done
580  */
581 static int
582 std_process_cmplt_pkt(struct scsi_device *sd, struct scsi_pkt *pkt,
583 	int *retry_cnt)
584 {
585 	struct scsi_extended_sense	*sns;
586 
587 	/*
588 	 * Re-initialize retry_cmd_cnt. Allow transport and
589 	 * cmd errors to go through a full retry count when
590 	 * these are encountered.  This way TRAN/CMD errors
591 	 * retry count is not exhausted due to CMD_CMPLTs
592 	 * delay. This allows the system
593 	 * to brave a hick-up on the link at any given time,
594 	 * while waiting for the fo to complete.
595 	 */
596 	if (pkt->pkt_state & STATE_ARQ_DONE) {
597 		sns = &(((struct scsi_arq_status *)(uintptr_t)
598 		    (pkt->pkt_scbp))->sts_sensedata);
599 		if (sns->es_key == KEY_UNIT_ATTENTION) {
600 			/*
601 			 * tpgs access state changed
602 			 */
603 			if (sns->es_add_code == STD_SCSI_ASC_STATE_CHG &&
604 			    sns->es_qual_code == STD_SCSI_ASCQ_STATE_CHG_SUCC) {
605 				/* XXX: update path info? */
606 				cmn_err(CE_WARN, "!Device failover"
607 				    " state change");
608 			}
609 			return (1);
610 		} else if (sns->es_key == KEY_NOT_READY) {
611 			if ((*retry_cnt)++ >=
612 			    STD_FO_MAX_RETRIES) {
613 				cmn_err(CE_WARN, "!Device failover"
614 				    " failed: timed out waiting "
615 				    "for path to become active");
616 				return (0);
617 			}
618 			VHCI_DEBUG(6, (CE_NOTE, NULL,
619 			    "!(sd:%p)lun "
620 			    "becoming active...\n", (void *)sd));
621 			drv_usecwait(STD_FO_RETRY_DELAY);
622 			return (1);
623 		}
624 		cmn_err(CE_NOTE, "!Failover failed;"
625 		    " sense key:%x, ASC: %x, "
626 		    "ASCQ:%x", sns->es_key,
627 		    sns->es_add_code, sns->es_qual_code);
628 		return (0);
629 	}
630 	switch (SCBP_C(pkt)) {
631 		case STATUS_GOOD:
632 			break;
633 		case STATUS_CHECK:
634 			VHCI_DEBUG(4, (CE_WARN, NULL,
635 			    "!(sd:%p):"
636 			    " status returned CHECK during std"
637 			    " path activation", (void *)sd));
638 			return (0);
639 		case STATUS_QFULL:
640 			VHCI_DEBUG(6, (CE_NOTE, NULL, "QFULL "
641 			    "status returned QFULL during std "
642 			    "path activation for %p\n", (void *)sd));
643 			drv_usecwait(5000);
644 			return (1);
645 		case STATUS_BUSY:
646 			VHCI_DEBUG(6, (CE_NOTE, NULL, "BUSY "
647 			    "status returned BUSY during std "
648 			    "path activation for %p\n", (void *)sd));
649 			drv_usecwait(5000);
650 			return (1);
651 		default:
652 			VHCI_DEBUG(4, (CE_WARN, NULL,
653 			    "!(sd:%p) Bad status returned during std "
654 			    "activation (pkt %p, status %x)",
655 			    (void *)sd, (void *)pkt, SCBP_C(pkt)));
656 			return (0);
657 	}
658 	return (0);
659 }
660 
661 /*
662  * For now we are going to use primary/online and secondary/online.
663  * There is no standby path returned by the dsp and we may have
664  * to do something different for other devices that use standby
665  */
666 /* ARGSUSED */
667 static int
668 std_path_activate(struct scsi_device *sd, char *pathclass,
669 void *ctpriv)
670 {
671 	struct buf			*bp;
672 	struct scsi_pkt			*pkt;
673 	struct scsi_address		*ap;
674 	int				err, retry_cnt, retry_cmd_cnt;
675 	int				mode, state, retval, xlf, preferred;
676 
677 	ap = &sd->sd_address;
678 
679 	mode = state = 0;
680 
681 	if (std_get_fo_mode(sd, &mode, &state, &xlf, &preferred)) {
682 		VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_path_activate:"
683 		    " failed std_get_fo_mode\n"));
684 		return (1);
685 	}
686 	if ((state == STD_ACTIVE_OPTIMIZED) ||
687 	    (state == STD_ACTIVE_NONOPTIMIZED)) {
688 		VHCI_DEBUG(4, (CE_NOTE, NULL, "!path already active for %p\n",
689 		    (void *)sd));
690 		return (0);
691 	}
692 
693 	if (mode != SCSI_IMPLICIT_FAILOVER) {
694 		VHCI_DEBUG(4, (CE_NOTE, NULL,
695 		    "!mode is EXPLICIT for %p xlf %x\n",
696 		    (void *)sd, xlf));
697 		retval = std_activate_explicit(sd, xlf);
698 		if (retval != 0) {
699 			VHCI_DEBUG(4, (CE_NOTE, NULL,
700 			    "!(sd:%p)std_path_activate failed(1)\n",
701 			    (void *)sd));
702 			return (1);
703 		}
704 	} else {
705 		VHCI_DEBUG(4, (CE_NOTE, NULL, "STD mode is IMPLICIT for %p\n",
706 		    (void *)sd));
707 	}
708 
709 	bp = scsi_alloc_consistent_buf(ap, (struct buf *)NULL, DEV_BSIZE,
710 	    B_READ, NULL, NULL);
711 	if (!bp) {
712 		VHCI_DEBUG(4, (CE_WARN, NULL,
713 		    "!(sd:%p)std_path_activate failed to alloc buffer",
714 		    (void *)sd));
715 		return (1);
716 	}
717 
718 	pkt = scsi_init_pkt(ap, NULL, bp, CDB_GROUP1,
719 	    sizeof (struct scsi_arq_status), 0, PKT_CONSISTENT, NULL, NULL);
720 	if (!pkt) {
721 		VHCI_DEBUG(4, (CE_WARN, NULL,
722 		    "!(sd:%p)std_path_activate failed to initialize packet",
723 		    (void *)sd));
724 		scsi_free_consistent_buf(bp);
725 		return (1);
726 	}
727 
728 	(void) scsi_setup_cdb((union scsi_cdb *)(uintptr_t)pkt->pkt_cdbp,
729 	    SCMD_READ, 1, 1, 0);
730 	pkt->pkt_time = 3*30;
731 	pkt->pkt_flags |= FLAG_NOINTR;
732 
733 	retry_cnt = 0;
734 	retry_cmd_cnt = 0;
735 retry:
736 	err = scsi_transport(pkt);
737 	if (err != TRAN_ACCEPT) {
738 		/*
739 		 * Retry TRAN_BUSY till STD_FO_MAX_RETRIES is exhausted.
740 		 * All other errors are fatal and should not be retried.
741 		 */
742 		if ((err == TRAN_BUSY) &&
743 		    (retry_cnt++ < STD_FO_MAX_RETRIES)) {
744 			drv_usecwait(STD_FO_RETRY_DELAY);
745 			goto retry;
746 		}
747 		cmn_err(CE_WARN, "Failover failed, "
748 		    "couldn't transport packet");
749 		scsi_destroy_pkt(pkt);
750 		scsi_free_consistent_buf(bp);
751 		return (1);
752 	}
753 	switch (pkt->pkt_reason) {
754 		case CMD_CMPLT:
755 			retry_cmd_cnt = 0;
756 			retval = std_process_cmplt_pkt(sd, pkt, &retry_cnt);
757 			if (retval != 0) {
758 				goto retry;
759 			}
760 			break;
761 		case CMD_TIMEOUT:
762 			cmn_err(CE_WARN, "!Failover failed: timed out ");
763 			retval = 1;
764 			break;
765 		case CMD_INCOMPLETE:
766 		case CMD_RESET:
767 		case CMD_ABORTED:
768 		case CMD_TRAN_ERR:
769 			/*
770 			 * Increased the number of retries when these error
771 			 * cases are encountered.  Also added a 1 sec wait
772 			 * before retrying.
773 			 */
774 			if (retry_cmd_cnt++ < STD_FO_MAX_CMD_RETRIES) {
775 				drv_usecwait(STD_FO_CMD_RETRY_DELAY);
776 				VHCI_DEBUG(4, (CE_WARN, NULL,
777 				    "!Retrying path activation due to "
778 				    "pkt reason:%x, retry cnt:%d",
779 				    pkt->pkt_reason, retry_cmd_cnt));
780 				goto retry;
781 			}
782 			/* FALLTHROUGH */
783 		default:
784 			cmn_err(CE_WARN, "!Path activation did not "
785 			    "complete successfully,"
786 			    "(pkt reason %x)", pkt->pkt_reason);
787 			retval = 1;
788 			break;
789 	}
790 
791 
792 	VHCI_DEBUG(4, (CE_NOTE, NULL, "!Path activation success\n"));
793 	scsi_destroy_pkt(pkt);
794 	scsi_free_consistent_buf(bp);
795 	return (retval);
796 }
797 
798 /* ARGSUSED */
799 static int std_path_deactivate(struct scsi_device *sd, char *pathclass,
800 void *ctpriv)
801 {
802 	return (0);
803 }
804 
805 /* ARGSUSED */
806 static int
807 std_path_get_opinfo(struct scsi_device *sd, struct scsi_path_opinfo
808 *opinfo, void *ctpriv)
809 {
810 	int			mode, preferred, state, xlf;
811 
812 	opinfo->opinfo_rev = OPINFO_REV;
813 
814 	if (std_get_fo_mode(sd, &mode, &state, &xlf, &preferred)) {
815 		VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_path_getopinfo:"
816 		    " failed std_get_fo_mode\n"));
817 		return (1);
818 	}
819 
820 	if (state == STD_ACTIVE_OPTIMIZED) {
821 		opinfo->opinfo_path_state = SCSI_PATH_ACTIVE;
822 	} else if (state == STD_ACTIVE_NONOPTIMIZED) {
823 		opinfo->opinfo_path_state = SCSI_PATH_ACTIVE_NONOPT;
824 	} else if (state == STD_STANDBY) {
825 		opinfo->opinfo_path_state = SCSI_PATH_INACTIVE;
826 	} else if (state == STD_UNAVAILABLE) {
827 		opinfo->opinfo_path_state = SCSI_PATH_INACTIVE;
828 	}
829 	if (preferred) {
830 		(void) strcpy(opinfo->opinfo_path_attr, PCLASS_PRIMARY);
831 	} else {
832 		(void) strcpy(opinfo->opinfo_path_attr, PCLASS_SECONDARY);
833 	}
834 	VHCI_DEBUG(4, (CE_NOTE, NULL, "std_path_get_opinfo: "
835 	    "class: %s state: %s\n", opinfo->opinfo_path_attr,
836 	    opinfo->opinfo_path_state == SCSI_PATH_ACTIVE ?
837 	    "ACTIVE" : "INACTIVE"));
838 	opinfo->opinfo_xlf_capable = 0;
839 	opinfo->opinfo_pswtch_best = 30;
840 	opinfo->opinfo_pswtch_worst = 3*30;
841 	opinfo->opinfo_preferred = (uint16_t)preferred;
842 	opinfo->opinfo_mode = (uint16_t)mode;
843 
844 	return (0);
845 }
846 
847 /* ARGSUSED */
848 static int std_path_ping(struct scsi_device *sd, void *ctpriv)
849 {
850 	/*
851 	 * For future use
852 	 */
853 	return (1);
854 }
855 
856 /*
857  * Analyze the sense code to determine whether failover process
858  */
859 /* ARGSUSED */
860 static int
861 std_analyze_sense(struct scsi_device *sd, struct scsi_extended_sense
862 *sense, void *ctpriv)
863 {
864 	int rval = SCSI_SENSE_UNKNOWN;
865 
866 	if ((sense->es_key == KEY_UNIT_ATTENTION) &&
867 	    (sense->es_add_code == STD_SCSI_ASC_STATE_CHG) &&
868 	    (sense->es_qual_code == STD_SCSI_ASCQ_STATE_CHG_SUCC)) {
869 		rval = SCSI_SENSE_STATE_CHANGED;
870 		VHCI_DEBUG(4, (CE_NOTE, NULL, "!std_analyze_sense:"
871 		    " sense_key:%x, add_code: %x, qual_code:%x"
872 		    " sense:%x\n", sense->es_key, sense->es_add_code,
873 		    sense->es_qual_code, rval));
874 	} else if ((sense->es_key == KEY_NOT_READY) &&
875 	    (sense->es_add_code == STD_LOGICAL_UNIT_NOT_ACCESSIBLE) &&
876 	    (sense->es_qual_code == STD_TGT_PORT_UNAVAILABLE)) {
877 		rval = SCSI_SENSE_INACTIVE;
878 		VHCI_DEBUG(4, (CE_NOTE, NULL, "!std_analyze_sense:"
879 		    " sense_key:%x, add_code: %x, qual_code:%x"
880 		    " sense:%x\n", sense->es_key, sense->es_add_code,
881 		    sense->es_qual_code, rval));
882 	} else if ((sense->es_key == KEY_ILLEGAL_REQUEST) &&
883 	    (sense->es_add_code == STD_SCSI_ASC_INVAL_PARAM_LIST)) {
884 		rval = SCSI_SENSE_NOFAILOVER;
885 		VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_analyze_sense:"
886 		    " sense_key:%x, add_code: %x, qual_code:%x"
887 		    " sense:%x\n", sense->es_key, sense->es_add_code,
888 		    sense->es_qual_code, rval));
889 	} else if ((sense->es_key == KEY_ILLEGAL_REQUEST) &&
890 	    (sense->es_add_code == STD_SCSI_ASC_INVAL_CMD_OPCODE)) {
891 		rval = SCSI_SENSE_NOFAILOVER;
892 		VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_analyze_sense:"
893 		    " sense_key:%x, add_code: %x, qual_code:%x"
894 		    " sense:%x\n", sense->es_key, sense->es_add_code,
895 		    sense->es_qual_code, rval));
896 	} else {
897 		/*
898 		 * At this point sense data may be for power-on-reset
899 		 * UNIT ATTN hardware errors, vendor unqiue sense data etc.
900 		 * For all these cases, return SCSI_SENSE_UNKNOWN.
901 		 */
902 		VHCI_DEBUG(1, (CE_NOTE, NULL, "!Analyze sense UNKNOWN:"
903 		    " sense key:%x, ASC:%x, ASCQ:%x\n", sense->es_key,
904 		    sense->es_add_code, sense->es_qual_code));
905 	}
906 
907 	return (rval);
908 }
909 
910 /* ARGSUSED */
911 static int
912 std_pathclass_next(char *cur, char **nxt, void *ctpriv)
913 {
914 	/*
915 	 * The first phase does not have a standby path so
916 	 * there will be no explicit failover - when standard tpgs.
917 	 * standard defines preferred flag then we should start
918 	 * using this as the selection mechanism - there can be
919 	 * preferred primary standby that we should fail to first and then
920 	 * nonpreferred secondary standby.
921 	 */
922 	if (cur == NULL) {
923 		*nxt = PCLASS_PRIMARY;
924 		return (0);
925 	} else if (strcmp(cur, PCLASS_PRIMARY) == 0) {
926 		*nxt = PCLASS_SECONDARY;
927 		return (0);
928 	} else if (strcmp(cur, PCLASS_SECONDARY) == 0) {
929 		return (ENOENT);
930 	} else {
931 		return (EINVAL);
932 	}
933 }
934 
935 int
936 std_set_target_groups(struct scsi_address *ap, int set_state, int tpg_id)
937 {
938 	struct scsi_pkt			*pkt;
939 	struct buf			*bp;
940 	int				len, rval, ss = SCSI_SENSE_UNKNOWN;
941 	char				*bufp;
942 	struct scsi_extended_sense	*sns;
943 
944 	len = 8;
945 
946 	bp = getrbuf(KM_NOSLEEP);
947 	if (bp == NULL) {
948 		VHCI_DEBUG(1, (CE_WARN, NULL, "!std_set_target_groups: "
949 		    " failed getrbuf"));
950 		return (1);
951 	}
952 
953 	bufp = kmem_zalloc(len, KM_NOSLEEP);
954 	if (bufp == NULL) {
955 		VHCI_DEBUG(1, (CE_WARN, NULL, "!std_set_target_groups: "
956 		    "request packet allocation for %d failed....", len));
957 		freerbuf(bp);
958 		return (1);
959 	}
960 
961 	bp->b_un.b_addr = bufp;
962 	bp->b_flags = B_READ;
963 	bp->b_bcount = len;
964 	bp->b_resid = 0;
965 
966 	bufp[4] = (0x0f & set_state);
967 	bufp[6] = (0xff00 & tpg_id) >> 8;
968 	bufp[7] = (0x00ff & tpg_id);
969 
970 	pkt = scsi_init_pkt(ap, NULL, bp, CDB_GROUP5,
971 	    sizeof (struct scsi_arq_status), 0, 0, NULL, NULL);
972 
973 	if (pkt == NULL) {
974 		VHCI_DEBUG(1, (CE_NOTE, NULL,
975 		    "!std_set_target_groups: scsi_init_pkt error\n"));
976 		freerbuf(bp);
977 		kmem_free((void *)bufp, len);
978 		return (1);
979 	}
980 
981 	/*
982 	 * Sends 1 TPG descriptor only. Hence Parameter list length pkt_cdbp[9]
983 	 * is set to 8 bytes - Refer SPC3 for details.
984 	 */
985 	pkt->pkt_cdbp[0] = SCMD_MAINTENANCE_OUT;
986 	pkt->pkt_cdbp[1] = SCMD_SET_TARGET_PORT_GROUPS;
987 	pkt->pkt_cdbp[9] = 8;
988 	pkt->pkt_time = 90;
989 
990 	VHCI_DEBUG(1, (CE_NOTE, NULL,
991 	    "!std_set_target_groups: sending set target port group:"
992 	    " cdb[0/1/6/7/8/9]: %x/%x/%x/%x/%x/%x\n", pkt->pkt_cdbp[0],
993 	    pkt->pkt_cdbp[1], pkt->pkt_cdbp[6], pkt->pkt_cdbp[7],
994 	    pkt->pkt_cdbp[8], pkt->pkt_cdbp[9]));
995 
996 #ifdef DEBUG
997 	print_buf(bufp, len);
998 #endif
999 	rval = vhci_do_scsi_cmd(pkt);
1000 
1001 	if (rval == 0) {
1002 		VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_set_target_groups:"
1003 		    " vhci_do_scsi_cmd failed\n"));
1004 		freerbuf(bp);
1005 		kmem_free((void *)bufp, len);
1006 		scsi_destroy_pkt(pkt);
1007 		return (-1);
1008 	} else if ((pkt->pkt_reason == CMD_CMPLT) &&
1009 	    (SCBP_C(pkt) == STATUS_CHECK) &&
1010 	    (pkt->pkt_state & STATE_ARQ_DONE)) {
1011 		sns = &(((struct scsi_arq_status *)(uintptr_t)
1012 		    (pkt->pkt_scbp))->sts_sensedata);
1013 
1014 		if ((sns->es_key == KEY_UNIT_ATTENTION) &&
1015 		    (sns->es_add_code == STD_SCSI_ASC_STATE_CHG) &&
1016 		    (sns->es_qual_code == STD_SCSI_ASCQ_STATE_CHG_SUCC)) {
1017 			ss = SCSI_SENSE_STATE_CHANGED;
1018 			VHCI_DEBUG(4, (CE_NOTE, NULL, "!std_set_target_groups:"
1019 			    " sense:%x, add_code: %x, qual_code:%x"
1020 			    " sense:%x\n", sns->es_key, sns->es_add_code,
1021 			    sns->es_qual_code, ss));
1022 		} else if ((sns->es_key == KEY_ILLEGAL_REQUEST) &&
1023 		    (sns->es_add_code == STD_SCSI_ASC_INVAL_PARAM_LIST)) {
1024 			ss = SCSI_SENSE_NOFAILOVER;
1025 			VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_set_target_groups:"
1026 			    " sense:%x, add_code: %x, qual_code:%x"
1027 			    " sense:%x\n", sns->es_key, sns->es_add_code,
1028 			    sns->es_qual_code, ss));
1029 		} else if ((sns->es_key == KEY_ILLEGAL_REQUEST) &&
1030 		    (sns->es_add_code == STD_SCSI_ASC_INVAL_CMD_OPCODE)) {
1031 			ss = SCSI_SENSE_NOFAILOVER;
1032 			VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_set_target_groups:"
1033 			    " sense_key:%x, add_code: %x, qual_code:%x"
1034 			    " sense:%x\n", sns->es_key, sns->es_add_code,
1035 			    sns->es_qual_code, rval));
1036 		} else {
1037 			/*
1038 			 * At this point sns data may be for power-on-reset
1039 			 * UNIT ATTN hardware errors, vendor unqiue sense etc.
1040 			 * For all these cases, sense is unknown.
1041 			 */
1042 			ss = SCSI_SENSE_NOFAILOVER;
1043 			VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_set_target_groups: "
1044 			    " sense UNKNOWN: sense key:%x, ASC:%x, ASCQ:%x\n",
1045 			    sns->es_key, sns->es_add_code, sns->es_qual_code));
1046 		}
1047 
1048 		if (ss == SCSI_SENSE_STATE_CHANGED) {
1049 			freerbuf(bp);
1050 			kmem_free((void *)bufp, len);
1051 			scsi_destroy_pkt(pkt);
1052 			return (0);
1053 		}
1054 	}
1055 
1056 	freerbuf(bp);
1057 	kmem_free((void *)bufp, len);
1058 	scsi_destroy_pkt(pkt);
1059 	return (1);
1060 }
1061