1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2024 RackTop Systems, Inc.
25 */
26
27 /*
28 * Implementation of "scsi_vhci_f_tpgs" T10 standard based failover_ops.
29 *
30 * NOTE: for non-sequential devices only.
31 */
32
33 #include <sys/conf.h>
34 #include <sys/file.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/scsi/scsi.h>
38 #include <sys/scsi/adapters/scsi_vhci.h>
39 #include <sys/scsi/adapters/scsi_vhci_tpgs.h>
40
41 /* Supported device table entries. */
42 char *std_dev_table[] = { NULL };
43
44 /* Failover module plumbing. */
45 SCSI_FAILOVER_OP(SFO_NAME_TPGS, std);
46
47 #define STD_FO_CMD_RETRY_DELAY 1000000 /* 1 seconds */
48 #define STD_FO_RETRY_DELAY 2000000 /* 2 seconds */
49 /*
50 * max time for failover to complete is 3 minutes. Compute
51 * number of retries accordingly, to ensure we wait for at least
52 * 3 minutes
53 */
54 #define STD_FO_MAX_RETRIES (3*60*1000000)/STD_FO_RETRY_DELAY
55
56
57 /* ARGSUSED */
58 static int
std_device_probe(struct scsi_device * sd,struct scsi_inquiry * inq,void ** ctpriv)59 std_device_probe(struct scsi_device *sd, struct scsi_inquiry *inq,
60 void **ctpriv)
61 {
62 int mode, state, xlf, preferred = 0;
63
64 VHCI_DEBUG(6, (CE_NOTE, NULL, "std_device_probe: vidpid %s\n",
65 inq->inq_vid));
66
67 if (inq->inq_tpgs == TPGS_FAILOVER_NONE) {
68 VHCI_DEBUG(4, (CE_WARN, NULL,
69 "!std_device_probe: not a standard tpgs device"));
70 return (SFO_DEVICE_PROBE_PHCI);
71 }
72
73 if (inq->inq_dtype == DTYPE_SEQUENTIAL) {
74 VHCI_DEBUG(4, (CE_NOTE, NULL,
75 "!std_device_probe: Detected a "
76 "Standard Asymmetric device "
77 "not yet supported\n"));
78 return (SFO_DEVICE_PROBE_PHCI);
79 }
80
81 if (vhci_tpgs_get_target_fo_mode(sd, &mode, &state, &xlf, &preferred)) {
82 VHCI_DEBUG(4, (CE_WARN, NULL, "!unable to fetch fo "
83 "mode: sd(%p)", (void *) sd));
84 return (SFO_DEVICE_PROBE_PHCI);
85 }
86
87 if (inq->inq_tpgs == TPGS_FAILOVER_IMPLICIT) {
88 VHCI_DEBUG(1, (CE_NOTE, NULL,
89 "!std_device_probe: Detected a "
90 "Standard Asymmetric device "
91 "with implicit failover\n"));
92 return (SFO_DEVICE_PROBE_VHCI);
93 }
94 if (inq->inq_tpgs == TPGS_FAILOVER_EXPLICIT) {
95 VHCI_DEBUG(1, (CE_NOTE, NULL,
96 "!std_device_probe: Detected a "
97 "Standard Asymmetric device "
98 "with explicit failover\n"));
99 return (SFO_DEVICE_PROBE_VHCI);
100 }
101 if (inq->inq_tpgs == TPGS_FAILOVER_BOTH) {
102 VHCI_DEBUG(1, (CE_NOTE, NULL,
103 "!std_device_probe: Detected a "
104 "Standard Asymmetric device "
105 "which supports both implicit and explicit failover\n"));
106 return (SFO_DEVICE_PROBE_VHCI);
107 }
108 VHCI_DEBUG(1, (CE_WARN, NULL,
109 "!std_device_probe: "
110 "Unknown tpgs_bits: %x", inq->inq_tpgs));
111 return (SFO_DEVICE_PROBE_PHCI);
112 }
113
114 /* ARGSUSED */
115 static void
std_device_unprobe(struct scsi_device * sd,void * ctpriv)116 std_device_unprobe(struct scsi_device *sd, void *ctpriv)
117 {
118 /*
119 * For future use
120 */
121 }
122
123 /* ARGSUSED */
124 static int
std_activate_explicit(struct scsi_device * sd,int xlf_capable)125 std_activate_explicit(struct scsi_device *sd, int xlf_capable)
126 {
127 cmn_err(CE_NOTE, "Explicit Activation is done by "
128 "vhci_tpgs_set_target_groups() call from MPAPI");
129 return (1);
130 }
131
132 /*
133 * Process the packet reason of CMD_PKT_CMPLT - return 0 if no
134 * retry and 1 if a retry should be done
135 */
136 static int
std_process_cmplt_pkt(struct scsi_device * sd,struct scsi_pkt * pkt,int * retry_cnt,int * retval)137 std_process_cmplt_pkt(struct scsi_device *sd, struct scsi_pkt *pkt,
138 int *retry_cnt, int *retval)
139 {
140 *retval = 1; /* fail */
141
142 switch (SCBP_C(pkt)) {
143 case STATUS_GOOD:
144 *retval = 0;
145 break;
146 case STATUS_CHECK:
147 if (pkt->pkt_state & STATE_ARQ_DONE) {
148 uint8_t *sns, skey, asc, ascq;
149 sns = (uint8_t *)
150 &(((struct scsi_arq_status *)(uintptr_t)
151 (pkt->pkt_scbp))->sts_sensedata);
152 skey = scsi_sense_key(sns);
153 asc = scsi_sense_asc(sns);
154 ascq = scsi_sense_ascq(sns);
155 if (skey == KEY_UNIT_ATTENTION) {
156 /*
157 * tpgs access state changed
158 */
159 if (asc == STD_SCSI_ASC_STATE_CHG &&
160 ascq ==
161 STD_SCSI_ASCQ_STATE_CHG_SUCC) {
162 /* XXX: update path info? */
163 cmn_err(CE_WARN,
164 "!Device failover"
165 " state change");
166 }
167 return (1);
168 } else if (skey == KEY_NOT_READY) {
169 if (asc ==
170 STD_LOGICAL_UNIT_NOT_ACCESSIBLE &&
171 ascq == STD_TGT_PORT_STANDBY) {
172 /*
173 * Don't retry on the path
174 * which is indicated as
175 * standby, return failure.
176 */
177 return (0);
178 } else if ((*retry_cnt)++ >=
179 STD_FO_MAX_RETRIES) {
180 cmn_err(CE_WARN,
181 "!Device failover failed: "
182 "timed out waiting for "
183 "path to become active");
184 return (0);
185 }
186 VHCI_DEBUG(6, (CE_NOTE, NULL,
187 "!(sd:%p)lun becoming active...\n",
188 (void *)sd));
189 drv_usecwait(STD_FO_RETRY_DELAY);
190 return (1);
191 }
192 cmn_err(CE_NOTE, "!Failover failed;"
193 " sense key:%x, ASC: %x, "
194 "ASCQ:%x", skey, asc, ascq);
195 return (0);
196 }
197 VHCI_DEBUG(4, (CE_WARN, NULL,
198 "!(sd:%p):"
199 " status returned CHECK during std"
200 " path activation", (void *)sd));
201 return (0);
202 case STATUS_QFULL:
203 VHCI_DEBUG(6, (CE_NOTE, NULL, "QFULL "
204 "status returned QFULL during std "
205 "path activation for %p\n", (void *)sd));
206 drv_usecwait(5000);
207 return (1);
208 case STATUS_BUSY:
209 VHCI_DEBUG(6, (CE_NOTE, NULL, "BUSY "
210 "status returned BUSY during std "
211 "path activation for %p\n", (void *)sd));
212 drv_usecwait(5000);
213 return (1);
214 default:
215 VHCI_DEBUG(4, (CE_WARN, NULL,
216 "!(sd:%p) Bad status returned during std "
217 "activation (pkt %p, status %x)",
218 (void *)sd, (void *)pkt, SCBP_C(pkt)));
219 return (0);
220 }
221 return (0);
222 }
223
224 /*
225 * For now we are going to use primary/online and secondary/online.
226 * There is no standby path returned by the dsp and we may have
227 * to do something different for other devices that use standby
228 */
229 /* ARGSUSED */
230 static int
std_path_activate(struct scsi_device * sd,char * pathclass,void * ctpriv)231 std_path_activate(struct scsi_device *sd, char *pathclass,
232 void *ctpriv)
233 {
234 struct buf *bp;
235 struct scsi_pkt *pkt;
236 struct scsi_address *ap;
237 int err, retry_cnt, retry_cmd_cnt;
238 int mode, state, retval, xlf, preferred;
239 size_t blksize;
240
241 ap = &sd->sd_address;
242
243 mode = state = 0;
244
245 blksize = vhci_get_blocksize(sd->sd_dev);
246
247 if (vhci_tpgs_get_target_fo_mode(sd, &mode, &state, &xlf, &preferred)) {
248 VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_path_activate:"
249 " failed vhci_tpgs_get_target_fo_mode\n"));
250 return (1);
251 }
252 if ((state == STD_ACTIVE_OPTIMIZED) ||
253 (state == STD_ACTIVE_NONOPTIMIZED)) {
254 VHCI_DEBUG(4, (CE_NOTE, NULL, "!path already active for %p\n",
255 (void *)sd));
256 return (0);
257 }
258
259 if (mode == SCSI_EXPLICIT_FAILOVER) {
260 VHCI_DEBUG(4, (CE_NOTE, NULL,
261 "!mode is EXPLICIT for %p xlf %x\n",
262 (void *)sd, xlf));
263 retval = std_activate_explicit(sd, xlf);
264 if (retval != 0) {
265 VHCI_DEBUG(4, (CE_NOTE, NULL,
266 "!(sd:%p)std_path_activate failed(1)\n",
267 (void *)sd));
268 return (1);
269 }
270 } else {
271 VHCI_DEBUG(4, (CE_NOTE, NULL, "STD mode is IMPLICIT for %p\n",
272 (void *)sd));
273 }
274
275 bp = scsi_alloc_consistent_buf(ap, (struct buf *)NULL, blksize, B_READ,
276 NULL, NULL);
277 if (!bp) {
278 VHCI_DEBUG(4, (CE_WARN, NULL,
279 "!(sd:%p)std_path_activate failed to alloc buffer",
280 (void *)sd));
281 return (1);
282 }
283
284 pkt = scsi_init_pkt(ap, NULL, bp, CDB_GROUP1,
285 sizeof (struct scsi_arq_status), 0, PKT_CONSISTENT, NULL, NULL);
286 if (!pkt) {
287 VHCI_DEBUG(4, (CE_WARN, NULL,
288 "!(sd:%p)std_path_activate failed to initialize packet",
289 (void *)sd));
290 scsi_free_consistent_buf(bp);
291 return (1);
292 }
293
294 (void) scsi_setup_cdb((union scsi_cdb *)(uintptr_t)pkt->pkt_cdbp,
295 SCMD_READ_G1, 1, 1, 0);
296 pkt->pkt_time = 3*30;
297 pkt->pkt_flags |= FLAG_NOINTR;
298
299 retry_cnt = 0;
300 retry_cmd_cnt = 0;
301 retry:
302 err = scsi_transport(pkt);
303 if (err != TRAN_ACCEPT) {
304 /*
305 * Retry TRAN_BUSY till STD_FO_MAX_RETRIES is exhausted.
306 * All other errors are fatal and should not be retried.
307 */
308 if ((err == TRAN_BUSY) &&
309 (retry_cnt++ < STD_FO_MAX_RETRIES)) {
310 drv_usecwait(STD_FO_RETRY_DELAY);
311 goto retry;
312 }
313 cmn_err(CE_WARN, "Failover failed, "
314 "couldn't transport packet");
315 scsi_destroy_pkt(pkt);
316 scsi_free_consistent_buf(bp);
317 return (1);
318 }
319 switch (pkt->pkt_reason) {
320 case CMD_CMPLT:
321 /*
322 * Re-initialize retry_cmd_cnt. Allow transport and
323 * cmd errors to go through a full retry count when
324 * these are encountered. This way TRAN/CMD errors
325 * retry count is not exhausted due to CMD_CMPLTs
326 * delay. This allows the system
327 * to brave a hick-up on the link at any given time,
328 * while waiting for the fo to complete.
329 */
330 retry_cmd_cnt = 0;
331 if (std_process_cmplt_pkt(sd, pkt, &retry_cnt,
332 &retval) != 0) {
333 goto retry;
334 }
335 break;
336 case CMD_TIMEOUT:
337 cmn_err(CE_WARN, "!Failover failed: timed out ");
338 retval = 1;
339 break;
340 case CMD_INCOMPLETE:
341 case CMD_RESET:
342 case CMD_ABORTED:
343 case CMD_TRAN_ERR:
344 /*
345 * Increased the number of retries when these error
346 * cases are encountered. Also added a 1 sec wait
347 * before retrying.
348 */
349 if (retry_cmd_cnt++ < STD_FO_MAX_CMD_RETRIES) {
350 drv_usecwait(STD_FO_CMD_RETRY_DELAY);
351 VHCI_DEBUG(4, (CE_WARN, NULL,
352 "!Retrying path activation due to "
353 "pkt reason:%x, retry cnt:%d",
354 pkt->pkt_reason, retry_cmd_cnt));
355 goto retry;
356 }
357 /* FALLTHROUGH */
358 default:
359 cmn_err(CE_WARN, "!Path activation did not "
360 "complete successfully,"
361 "(pkt reason %x)", pkt->pkt_reason);
362 retval = 1;
363 break;
364 }
365
366 scsi_destroy_pkt(pkt);
367 scsi_free_consistent_buf(bp);
368 return (retval);
369 }
370
371 /* ARGSUSED */
std_path_deactivate(struct scsi_device * sd,char * pathclass,void * ctpriv)372 static int std_path_deactivate(struct scsi_device *sd, char *pathclass,
373 void *ctpriv)
374 {
375 return (0);
376 }
377
378 /* ARGSUSED */
379 static int
std_path_get_opinfo(struct scsi_device * sd,struct scsi_path_opinfo * opinfo,void * ctpriv)380 std_path_get_opinfo(struct scsi_device *sd, struct scsi_path_opinfo *opinfo,
381 void *ctpriv)
382 {
383 int mode, preferred, state, xlf;
384
385 opinfo->opinfo_rev = OPINFO_REV;
386
387 if (vhci_tpgs_get_target_fo_mode(sd, &mode, &state, &xlf, &preferred)) {
388 VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_path_getopinfo:"
389 " failed vhci_tpgs_get_target_fo_mode\n"));
390 return (1);
391 }
392
393 if (state == STD_ACTIVE_OPTIMIZED) {
394 opinfo->opinfo_path_state = SCSI_PATH_ACTIVE;
395 } else if (state == STD_ACTIVE_NONOPTIMIZED) {
396 opinfo->opinfo_path_state = SCSI_PATH_ACTIVE_NONOPT;
397 } else if (state == STD_STANDBY) {
398 opinfo->opinfo_path_state = SCSI_PATH_INACTIVE;
399 } else if (state == STD_UNAVAILABLE) {
400 opinfo->opinfo_path_state = SCSI_PATH_INACTIVE;
401 }
402 if (preferred) {
403 (void) strcpy(opinfo->opinfo_path_attr, PCLASS_PRIMARY);
404 } else {
405 (void) strcpy(opinfo->opinfo_path_attr, PCLASS_SECONDARY);
406 }
407 VHCI_DEBUG(4, (CE_NOTE, NULL, "std_path_get_opinfo: "
408 "class: %s state: %s\n", opinfo->opinfo_path_attr,
409 opinfo->opinfo_path_state == SCSI_PATH_ACTIVE ?
410 "ACTIVE" : "INACTIVE"));
411 opinfo->opinfo_xlf_capable = 0;
412 opinfo->opinfo_pswtch_best = 30;
413 opinfo->opinfo_pswtch_worst = 3*30;
414 opinfo->opinfo_preferred = (uint16_t)preferred;
415 opinfo->opinfo_mode = (uint16_t)mode;
416
417 return (0);
418 }
419
420 /* ARGSUSED */
std_path_ping(struct scsi_device * sd,void * ctpriv)421 static int std_path_ping(struct scsi_device *sd, void *ctpriv)
422 {
423 /*
424 * For future use
425 */
426 return (1);
427 }
428
429 /*
430 * Analyze the sense code to determine whether failover process
431 */
432 /* ARGSUSED */
433 static int
std_analyze_sense(struct scsi_device * sd,uint8_t * sense,void * ctpriv)434 std_analyze_sense(struct scsi_device *sd, uint8_t *sense,
435 void *ctpriv)
436 {
437 int rval = SCSI_SENSE_UNKNOWN;
438
439 uint8_t skey, asc, ascq;
440
441 skey = scsi_sense_key(sense);
442 asc = scsi_sense_asc(sense);
443 ascq = scsi_sense_ascq(sense);
444
445 if ((skey == KEY_UNIT_ATTENTION) &&
446 (asc == STD_SCSI_ASC_STATE_CHG) &&
447 (ascq == STD_SCSI_ASCQ_STATE_CHG_SUCC)) {
448 rval = SCSI_SENSE_STATE_CHANGED;
449 VHCI_DEBUG(4, (CE_NOTE, NULL, "!std_analyze_sense:"
450 " sense_key:%x, add_code: %x, qual_code:%x"
451 " sense:%x\n", skey, asc, ascq, rval));
452 } else if ((skey == KEY_NOT_READY) &&
453 (asc == STD_LOGICAL_UNIT_NOT_ACCESSIBLE) &&
454 ((ascq == STD_TGT_PORT_UNAVAILABLE) ||
455 (ascq == STD_TGT_PORT_STANDBY))) {
456 rval = SCSI_SENSE_INACTIVE;
457 VHCI_DEBUG(4, (CE_NOTE, NULL, "!std_analyze_sense:"
458 " sense_key:%x, add_code: %x, qual_code:%x"
459 " sense:%x\n", skey, asc, ascq, rval));
460 } else if ((skey == KEY_ILLEGAL_REQUEST) &&
461 (asc == STD_SCSI_ASC_INVAL_PARAM_LIST)) {
462 rval = SCSI_SENSE_NOFAILOVER;
463 VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_analyze_sense:"
464 " sense_key:%x, add_code: %x, qual_code:%x"
465 " sense:%x\n", skey, asc, ascq, rval));
466 } else if ((skey == KEY_ILLEGAL_REQUEST) &&
467 (asc == STD_SCSI_ASC_INVAL_CMD_OPCODE)) {
468 rval = SCSI_SENSE_NOFAILOVER;
469 VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_analyze_sense:"
470 " sense_key:%x, add_code: %x, qual_code:%x"
471 " sense:%x\n", skey, asc, ascq, rval));
472 } else {
473 /*
474 * At this point sense data may be for power-on-reset
475 * UNIT ATTN hardware errors, vendor unqiue sense data etc.
476 * For all these cases, return SCSI_SENSE_UNKNOWN.
477 */
478 VHCI_DEBUG(1, (CE_NOTE, NULL, "!Analyze sense UNKNOWN:"
479 " sense key:%x, ASC:%x, ASCQ:%x\n", skey, asc, ascq));
480 }
481
482 return (rval);
483 }
484
485 /* ARGSUSED */
486 static int
std_pathclass_next(char * cur,char ** nxt,void * ctpriv)487 std_pathclass_next(char *cur, char **nxt, void *ctpriv)
488 {
489 /*
490 * The first phase does not have a standby path so
491 * there will be no explicit failover - when standard tpgs.
492 * standard defines preferred flag then we should start
493 * using this as the selection mechanism - there can be
494 * preferred primary standby that we should fail to first and then
495 * nonpreferred secondary standby.
496 */
497 if (cur == NULL) {
498 *nxt = PCLASS_PRIMARY;
499 return (0);
500 } else if (strcmp(cur, PCLASS_PRIMARY) == 0) {
501 *nxt = PCLASS_SECONDARY;
502 return (0);
503 } else if (strcmp(cur, PCLASS_SECONDARY) == 0) {
504 return (ENOENT);
505 } else {
506 return (EINVAL);
507 }
508 }
509