1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2023 Tintri by DDN, Inc. All rights reserved.
14 */
15
16 /*
17 * This file contains code necessary to send SCSI commands to HBA.
18 */
19 #include <smartpqi.h>
20
21 /*
22 * []------------------------------------------------------------------[]
23 * | Forward declarations for support/utility functions |
24 * []------------------------------------------------------------------[]
25 */
26 static void aio_io_complete(pqi_io_request_t *io, void *context);
27 static void raid_io_complete(pqi_io_request_t *io, void *context);
28 static void build_aio_sg_list(pqi_state_t *s,
29 pqi_aio_path_request_t *rqst, pqi_cmd_t *cmd, pqi_io_request_t *);
30 static void build_raid_sg_list(pqi_state_t *s,
31 pqi_raid_path_request_t *rqst, pqi_cmd_t *cmd, pqi_io_request_t *);
32 static pqi_io_request_t *setup_aio_request(pqi_state_t *s, pqi_cmd_t *cmd);
33 static pqi_io_request_t *setup_raid_request(pqi_state_t *s, pqi_cmd_t *cmd);
34 static uint32_t read_heartbeat_counter(pqi_state_t *s);
35 static void take_ctlr_offline(pqi_state_t *s);
36 static uint32_t free_elem_count(pqi_index_t pi, pqi_index_t ci,
37 uint32_t per_iq);
38 static void ack_event(pqi_state_t *s, pqi_event_t *e);
39 static boolean_t is_aio_enabled(pqi_device_t *d);
40 static void lun_reset_worker(void *v);
41 static void lun_reset_complete(pqi_io_request_t *io, void *ctx);
42
43 #define DIV_UP(n, d) ((n + (d - 1)) / d)
44
45 /*
46 * []------------------------------------------------------------------[]
47 * | Main entry points in file. |
48 * []------------------------------------------------------------------[]
49 */
50
51 int pqi_do_reset_lun = -1;
52 int pqi_do_reset_ctlr = -1;
53 /*
54 * pqi_watchdog -- interrupt count and/or heartbeat must increase over time.
55 */
56 void
pqi_watchdog(void * v)57 pqi_watchdog(void *v)
58 {
59 pqi_state_t *s = v;
60 uint32_t hb;
61
62 if (pqi_is_offline(s))
63 return;
64
65 hb = read_heartbeat_counter(s);
66 if ((s->s_last_intr_count == s->s_intr_count) &&
67 (s->s_last_heartbeat_count == hb)) {
68 dev_err(s->s_dip, CE_NOTE, "No heartbeat");
69 pqi_show_dev_state(s);
70 take_ctlr_offline(s);
71 } else {
72 if (pqi_do_reset_ctlr == s->s_instance) {
73 pqi_do_reset_ctlr = -1;
74 take_ctlr_offline(s);
75 } else {
76 s->s_last_intr_count = s->s_intr_count;
77 s->s_last_heartbeat_count = hb;
78 s->s_watchdog = timeout(pqi_watchdog, s,
79 drv_usectohz(WATCHDOG));
80 }
81 }
82 }
83
84 /*
85 * pqi_start_io -- queues command to HBA.
86 *
87 * This method can be called either from the upper layer with a non-zero
88 * io argument or called during an interrupt to load the outgoing queue
89 * with more commands.
90 */
91 void
pqi_start_io(pqi_state_t * s,pqi_queue_group_t * qg,pqi_path_t path,pqi_io_request_t * io)92 pqi_start_io(pqi_state_t *s, pqi_queue_group_t *qg, pqi_path_t path,
93 pqi_io_request_t *io)
94 {
95 pqi_iu_header_t *rqst;
96 size_t iu_len;
97 size_t copy_to_end;
98 pqi_index_t iq_pi;
99 pqi_index_t iq_ci;
100 uint32_t elem_needed;
101 uint32_t elem_to_end;
102 caddr_t next_elem;
103 int sending = 0;
104
105 mutex_enter(&qg->submit_lock[path]);
106 if (io != NULL) {
107 io->io_queue_group = qg;
108 io->io_queue_path = path;
109 list_insert_tail(&qg->request_list[path], io);
110 }
111
112
113 iq_pi = qg->iq_pi_copy[path];
114 while ((io = list_remove_head(&qg->request_list[path])) != NULL) {
115
116 /* ---- Primary cause for !active is controller failure ---- */
117 if (qg->qg_active == B_FALSE) {
118 pqi_cmd_t *c = io->io_cmd;
119
120 mutex_enter(&c->pc_device->pd_mutex);
121 /*
122 * When a command is failed it will be removed from
123 * the queue group if pc_io_rqst is not NULL. Since
124 * we have already removed the command from the list
125 * would shouldn't attempt to do so a second time.
126 */
127 c->pc_io_rqst = NULL;
128 (void) pqi_fail_cmd(io->io_cmd, CMD_DEV_GONE,
129 STAT_TERMINATED);
130 mutex_exit(&c->pc_device->pd_mutex);
131 continue;
132 }
133
134 rqst = io->io_iu;
135 iu_len = rqst->iu_length + PQI_REQUEST_HEADER_LENGTH;
136 elem_needed = DIV_UP(iu_len, PQI_OPERATIONAL_IQ_ELEMENT_LENGTH);
137 (void) ddi_dma_sync(s->s_queue_dma->handle,
138 (uintptr_t)qg->iq_ci[path] -
139 (uintptr_t)s->s_queue_dma->alloc_memory, sizeof (iq_ci),
140 DDI_DMA_SYNC_FORCPU);
141 iq_ci = *qg->iq_ci[path];
142
143 if (elem_needed > free_elem_count(iq_pi, iq_ci,
144 s->s_num_elements_per_iq)) {
145 list_insert_head(&qg->request_list[path], io);
146 break;
147 }
148
149 if (pqi_cmd_action(io->io_cmd, PQI_CMD_START) == PQI_CMD_FAIL)
150 continue;
151
152 io->io_pi = iq_pi;
153 rqst->iu_id = qg->oq_id;
154 next_elem = qg->iq_element_array[path] +
155 (iq_pi * PQI_OPERATIONAL_IQ_ELEMENT_LENGTH);
156 elem_to_end = s->s_num_elements_per_iq - iq_pi;
157 if (elem_needed <= elem_to_end) {
158 (void) memcpy(next_elem, rqst, iu_len);
159 (void) ddi_dma_sync(s->s_queue_dma->handle,
160 (uintptr_t)next_elem -
161 (uintptr_t)s->s_queue_dma->alloc_memory, iu_len,
162 DDI_DMA_SYNC_FORDEV);
163 } else {
164 copy_to_end = elem_to_end *
165 PQI_OPERATIONAL_IQ_ELEMENT_LENGTH;
166 (void) memcpy(next_elem, rqst, copy_to_end);
167 (void) ddi_dma_sync(s->s_queue_dma->handle,
168 (uintptr_t)next_elem -
169 (uintptr_t)s->s_queue_dma->alloc_memory,
170 copy_to_end, DDI_DMA_SYNC_FORDEV);
171 (void) memcpy(qg->iq_element_array[path],
172 (caddr_t)rqst + copy_to_end,
173 iu_len - copy_to_end);
174 (void) ddi_dma_sync(s->s_queue_dma->handle,
175 0, iu_len - copy_to_end, DDI_DMA_SYNC_FORDEV);
176 }
177 sending += elem_needed;
178
179 iq_pi = (iq_pi + elem_needed) % s->s_num_elements_per_iq;
180 }
181
182 qg->submit_count += sending;
183 if (iq_pi != qg->iq_pi_copy[path]) {
184 qg->iq_pi_copy[path] = iq_pi;
185 ddi_put32(s->s_datap, qg->iq_pi[path], iq_pi);
186 } else {
187 ASSERT0(sending);
188 }
189 mutex_exit(&qg->submit_lock[path]);
190 }
191
192 int
pqi_transport_command(pqi_state_t * s,pqi_cmd_t * cmd)193 pqi_transport_command(pqi_state_t *s, pqi_cmd_t *cmd)
194 {
195 pqi_device_t *devp = cmd->pc_device;
196 int path;
197 pqi_io_request_t *io;
198
199 if (is_aio_enabled(devp) == B_TRUE) {
200 path = AIO_PATH;
201 io = setup_aio_request(s, cmd);
202 } else {
203 path = RAID_PATH;
204 io = setup_raid_request(s, cmd);
205 }
206
207 if (io == NULL)
208 return (TRAN_BUSY);
209
210 cmd->pc_io_rqst = io;
211 (void) pqi_cmd_action(cmd, PQI_CMD_QUEUE);
212
213 pqi_start_io(s, &s->s_queue_groups[PQI_DEFAULT_QUEUE_GROUP],
214 path, io);
215
216 return (TRAN_ACCEPT);
217 }
218
219 void
pqi_do_rescan(void * v)220 pqi_do_rescan(void *v)
221 {
222 pqi_state_t *s = v;
223
224 ndi_devi_enter(scsi_vhci_dip);
225 ndi_devi_enter(s->s_dip);
226 pqi_rescan_devices(s);
227 (void) pqi_config_all(s->s_dip, s);
228 ndi_devi_exit(s->s_dip);
229 ndi_devi_exit(scsi_vhci_dip);
230 }
231
232 void
pqi_event_worker(void * v)233 pqi_event_worker(void *v)
234 {
235 pqi_state_t *s = v;
236 int i;
237 pqi_event_t *e;
238 boolean_t non_heartbeat = B_FALSE;
239
240 if (pqi_is_offline(s))
241 return;
242
243 e = s->s_events;
244 for (i = 0; i < PQI_NUM_SUPPORTED_EVENTS; i++) {
245 if (e->ev_pending == B_TRUE) {
246 e->ev_pending = B_FALSE;
247 ack_event(s, e);
248 if (pqi_map_event(PQI_EVENT_TYPE_HEARTBEAT) != i)
249 non_heartbeat = B_TRUE;
250 }
251 e++;
252 }
253
254 if (non_heartbeat == B_TRUE)
255 pqi_do_rescan(s);
256 }
257
258 /*
259 * pqi_fail_cmd -- given a reason and stats the command is failed.
260 */
261 pqi_cmd_action_t
pqi_fail_cmd(pqi_cmd_t * cmd,uchar_t reason,uint_t stats)262 pqi_fail_cmd(pqi_cmd_t *cmd, uchar_t reason, uint_t stats)
263 {
264 struct scsi_pkt *pkt = CMD2PKT(cmd);
265
266 pkt->pkt_reason = reason;
267 pkt->pkt_statistics = stats;
268
269 return (pqi_cmd_action_nolock(cmd, PQI_CMD_FAIL));
270 }
271
272 void
pqi_fail_drive_cmds(pqi_device_t * d,uchar_t reason)273 pqi_fail_drive_cmds(pqi_device_t *d, uchar_t reason)
274 {
275 pqi_cmd_t *c, *next_c;
276
277 mutex_enter(&d->pd_mutex);
278
279 c = list_head(&d->pd_cmd_list);
280 while (c != NULL) {
281 next_c = list_next(&d->pd_cmd_list, c);
282 if (pqi_fail_cmd(c, reason, STAT_BUS_RESET) !=
283 PQI_CMD_START) {
284 /*
285 * The command can't be terminated in the driver because
286 * it was already handed off to the HBA and the driver
287 * will have to wait for completion. The reason is
288 * that the HBA indicates slots are complete, not a
289 * pointer to a command. If the code were to cancel
290 * an outstanding command that slot could be reused
291 * by another command and when the completion interrupt
292 * arrives the driver would signal that a command had
293 * completed when in fact it was a prior command that
294 * had been canceled.
295 *
296 * Should the command fail to complete due to an HBA
297 * error the command will be forced through to
298 * completion during a timeout scan that occurs on
299 * another thread.
300 */
301 d->pd_killed++;
302 } else {
303 d->pd_posted++;
304 }
305 c = next_c;
306 }
307
308 mutex_exit(&d->pd_mutex);
309 }
310
311 uint32_t
pqi_disable_intr(pqi_state_t * s)312 pqi_disable_intr(pqi_state_t *s)
313 {
314 uint32_t db;
315 uint32_t rval;
316
317 rval = db = G32(s, sis_host_to_ctrl_doorbell);
318 db &= ~(SIS_ENABLE_MSIX | SIS_ENABLE_INTX);
319 S32(s, sis_host_to_ctrl_doorbell, db);
320 return (rval);
321 }
322
323 void
pqi_enable_intr(pqi_state_t * s,uint32_t old_state)324 pqi_enable_intr(pqi_state_t *s, uint32_t old_state)
325 {
326 S32(s, sis_host_to_ctrl_doorbell, old_state);
327 }
328
329 typedef struct reset_closure {
330 pqi_state_t *rc_s;
331 pqi_device_t *rc_d;
332 } *reset_closure_t;
333
334 /*
335 * pqi_lun_reset -- set up callback to reset the device
336 *
337 * Dispatch queue is used here because the call tree can come from the interrupt
338 * routine. (pqi_process_io_intr -> aio_io_complete -> SCSA -> tran_reset ->
339 * pqi_lun_reset). If pqi_lun_reset were to actually do the reset work it would
340 * then wait for an interrupt which would never arrive since the current thread
341 * would be the interrupt thread. So, start a task to reset the device and
342 * wait for completion.
343 */
344 void
pqi_lun_reset(pqi_state_t * s,pqi_device_t * d)345 pqi_lun_reset(pqi_state_t *s, pqi_device_t *d)
346 {
347 reset_closure_t r = kmem_alloc(sizeof (struct reset_closure), KM_SLEEP);
348
349 r->rc_s = s;
350 r->rc_d = d;
351 (void) ddi_taskq_dispatch(s->s_events_taskq, lun_reset_worker, r, 0);
352 }
353
354 /*
355 * []------------------------------------------------------------------[]
356 * | Support/utility functions for main entry points |
357 * []------------------------------------------------------------------[]
358 */
359
360 static uint32_t
count_drive_cmds(pqi_device_t * d)361 count_drive_cmds(pqi_device_t *d)
362 {
363 pqi_cmd_t *c;
364 uint32_t count = 0;
365
366 mutex_enter(&d->pd_mutex);
367 c = list_head(&d->pd_cmd_list);
368 while (c != NULL) {
369 c = list_next(&d->pd_cmd_list, c);
370 count++;
371 }
372 mutex_exit(&d->pd_mutex);
373
374 return (count);
375 }
376
377 static uint32_t
count_oustanding_cmds(pqi_state_t * s)378 count_oustanding_cmds(pqi_state_t *s)
379 {
380 uint32_t count = 0;
381 pqi_device_t *d;
382
383 mutex_enter(&s->s_mutex);
384 d = list_head(&s->s_devnodes);
385 while (d != NULL) {
386 count += count_drive_cmds(d);
387 d = list_next(&s->s_devnodes, d);
388 }
389 mutex_exit(&s->s_mutex);
390
391 return (count);
392 }
393
394 static void
lun_reset_worker(void * v)395 lun_reset_worker(void *v)
396 {
397 reset_closure_t r = v;
398 pqi_state_t *s;
399 pqi_device_t *d;
400 pqi_io_request_t *io;
401 ksema_t sema;
402 pqi_task_management_rqst_t *rqst;
403 struct pqi_cmd cmd;
404
405 s = r->rc_s;
406 d = r->rc_d;
407
408 pqi_fail_drive_cmds(d, CMD_RESET);
409 sema_init(&sema, 0, NULL, SEMA_DRIVER, NULL);
410
411 bzero(&cmd, sizeof (cmd));
412 mutex_init(&cmd.pc_mutex, NULL, MUTEX_DRIVER, NULL);
413
414 if ((io = pqi_alloc_io(s)) == NULL) {
415 mutex_destroy(&cmd.pc_mutex);
416 kmem_free(r, sizeof (*r));
417 return;
418 }
419 io->io_cb = lun_reset_complete;
420 io->io_context = &sema;
421 io->io_cmd = &cmd;
422 cmd.pc_io_rqst = io;
423 cmd.pc_softc = s;
424 cmd.pc_device = &s->s_special_device;
425
426 (void) pqi_cmd_action(&cmd, PQI_CMD_QUEUE);
427
428 rqst = io->io_iu;
429 (void) memset(rqst, 0, sizeof (*rqst));
430
431 rqst->header.iu_type = PQI_REQUEST_IU_TASK_MANAGEMENT;
432 rqst->header.iu_length = sizeof (*rqst) - PQI_REQUEST_HEADER_LENGTH;
433 rqst->request_id = PQI_MAKE_REQID(io->io_index, io->io_gen);
434 (void) memcpy(rqst->lun_number, d->pd_scsi3addr,
435 sizeof (rqst->lun_number));
436 rqst->task_management_function = SOP_TASK_MANAGEMENT_LUN_RESET;
437
438 pqi_start_io(s, &s->s_queue_groups[PQI_DEFAULT_QUEUE_GROUP], RAID_PATH,
439 io);
440
441 sema_p(&sema);
442
443 (void) pqi_cmd_action(&cmd, PQI_CMD_CMPLT);
444 mutex_destroy(&cmd.pc_mutex);
445 kmem_free(r, sizeof (*r));
446 }
447
448 static void
lun_reset_complete(pqi_io_request_t * io __unused,void * ctx)449 lun_reset_complete(pqi_io_request_t *io __unused, void *ctx)
450 {
451 sema_v((ksema_t *)ctx);
452 }
453
454 static void
send_event_ack(pqi_state_t * s,pqi_event_acknowledge_request_t * rqst)455 send_event_ack(pqi_state_t *s, pqi_event_acknowledge_request_t *rqst)
456 {
457 pqi_queue_group_t *qg;
458 caddr_t next_element;
459 pqi_index_t iq_ci;
460 pqi_index_t iq_pi;
461 int ms_timeo = 1000 * 10;
462
463 qg = &s->s_queue_groups[PQI_DEFAULT_QUEUE_GROUP];
464 rqst->header.iu_id = qg->oq_id;
465
466 for (;;) {
467 mutex_enter(&qg->submit_lock[RAID_PATH]);
468 iq_pi = qg->iq_pi_copy[RAID_PATH];
469 iq_ci = ddi_get32(s->s_queue_dma->acc, qg->iq_ci[RAID_PATH]);
470
471 if (free_elem_count(iq_pi, iq_ci, s->s_num_elements_per_iq))
472 break;
473
474 mutex_exit(&qg->submit_lock[RAID_PATH]);
475 if (pqi_is_offline(s))
476 return;
477 }
478 next_element = qg->iq_element_array[RAID_PATH] +
479 (iq_pi * PQI_OPERATIONAL_IQ_ELEMENT_LENGTH);
480
481 (void) memcpy(next_element, rqst, sizeof (*rqst));
482 (void) ddi_dma_sync(s->s_queue_dma->handle, 0, 0, DDI_DMA_SYNC_FORDEV);
483
484 iq_pi = (iq_pi + 1) % s->s_num_elements_per_iq;
485 qg->iq_pi_copy[RAID_PATH] = iq_pi;
486
487 ddi_put32(s->s_datap, qg->iq_pi[RAID_PATH], iq_pi);
488
489 /*
490 * Special case processing for events required. The driver must
491 * wait until the acknowledgement is processed before proceeding.
492 * Unfortunately, the HBA doesn't provide an interrupt which means
493 * the code must busy wait.
494 * Code will wait up to 10 seconds.
495 */
496 while (ms_timeo--) {
497 drv_usecwait(1000);
498 iq_ci = ddi_get32(s->s_queue_dma->acc, qg->iq_ci[RAID_PATH]);
499 if (iq_pi == iq_ci)
500 break;
501 }
502
503 mutex_exit(&qg->submit_lock[RAID_PATH]);
504 }
505
506 static void
ack_event(pqi_state_t * s,pqi_event_t * e)507 ack_event(pqi_state_t *s, pqi_event_t *e)
508 {
509 pqi_event_acknowledge_request_t rqst;
510
511 (void) memset(&rqst, 0, sizeof (rqst));
512 rqst.header.iu_type = PQI_REQUEST_IU_ACKNOWLEDGE_VENDOR_EVENT;
513 rqst.header.iu_length = sizeof (rqst) - PQI_REQUEST_HEADER_LENGTH;
514 rqst.event_type = e->ev_type;
515 rqst.event_id = e->ev_id;
516 rqst.additional_event_id = e->ev_additional;
517
518 send_event_ack(s, &rqst);
519 }
520
521 static pqi_io_request_t *
setup_aio_request(pqi_state_t * s,pqi_cmd_t * cmd)522 setup_aio_request(pqi_state_t *s, pqi_cmd_t *cmd)
523 {
524 pqi_io_request_t *io;
525 pqi_aio_path_request_t *rqst;
526 pqi_device_t *devp = cmd->pc_device;
527
528 /* ---- Most likely received a signal during a cv_wait ---- */
529 if ((io = pqi_alloc_io(s)) == NULL)
530 return (NULL);
531
532 io->io_cb = aio_io_complete;
533 io->io_cmd = cmd;
534 io->io_raid_bypass = 0;
535
536 rqst = io->io_iu;
537 (void) memset(rqst, 0, sizeof (*rqst));
538
539 rqst->header.iu_type = PQI_REQUEST_IU_AIO_PATH_IO;
540 rqst->nexus_id = devp->pd_aio_handle;
541 rqst->buffer_length = cmd->pc_dma_count;
542 rqst->task_attribute = SOP_TASK_ATTRIBUTE_SIMPLE;
543 rqst->request_id = PQI_MAKE_REQID(io->io_index, io->io_gen);
544 rqst->error_index = io->io_index;
545 rqst->cdb_length = cmd->pc_cmdlen;
546 (void) memcpy(rqst->cdb, cmd->pc_cdb, cmd->pc_cmdlen);
547 (void) memcpy(rqst->lun_number, devp->pd_scsi3addr,
548 sizeof (rqst->lun_number));
549
550 if (cmd->pc_flags & PQI_FLAG_DMA_VALID) {
551 if (cmd->pc_flags & PQI_FLAG_IO_READ)
552 rqst->data_direction = SOP_READ_FLAG;
553 else
554 rqst->data_direction = SOP_WRITE_FLAG;
555 } else {
556 rqst->data_direction = SOP_NO_DIRECTION_FLAG;
557 }
558
559 build_aio_sg_list(s, rqst, cmd, io);
560 return (io);
561 }
562
563 static pqi_io_request_t *
setup_raid_request(pqi_state_t * s,pqi_cmd_t * cmd)564 setup_raid_request(pqi_state_t *s, pqi_cmd_t *cmd)
565 {
566 pqi_io_request_t *io;
567 pqi_raid_path_request_t *rqst;
568 pqi_device_t *devp = cmd->pc_device;
569
570 /* ---- Most likely received a signal during a cv_wait ---- */
571 if ((io = pqi_alloc_io(s)) == NULL)
572 return (NULL);
573
574 io->io_cb = raid_io_complete;
575 io->io_cmd = cmd;
576 io->io_raid_bypass = 0;
577
578 rqst = io->io_iu;
579 (void) memset(rqst, 0, sizeof (*rqst));
580 rqst->header.iu_type = PQI_REQUEST_IU_RAID_PATH_IO;
581 rqst->rp_data_len = cmd->pc_dma_count;
582 rqst->rp_task_attr = SOP_TASK_ATTRIBUTE_SIMPLE;
583 rqst->rp_id = PQI_MAKE_REQID(io->io_index, io->io_gen);
584 rqst->rp_error_index = io->io_index;
585 (void) memcpy(rqst->rp_lun, devp->pd_scsi3addr, sizeof (rqst->rp_lun));
586 (void) memcpy(rqst->rp_cdb, cmd->pc_cdb, cmd->pc_cmdlen);
587
588 ASSERT(cmd->pc_cmdlen <= 16);
589 rqst->rp_additional_cdb = SOP_ADDITIONAL_CDB_BYTES_0;
590
591 if (cmd->pc_flags & PQI_FLAG_DMA_VALID) {
592 if (cmd->pc_flags & PQI_FLAG_IO_READ)
593 rqst->rp_data_dir = SOP_READ_FLAG;
594 else
595 rqst->rp_data_dir = SOP_WRITE_FLAG;
596 } else {
597 rqst->rp_data_dir = SOP_NO_DIRECTION_FLAG;
598 }
599
600 build_raid_sg_list(s, rqst, cmd, io);
601 return (io);
602 }
603
604 pqi_cmd_t *
pqi_process_comp_ring(pqi_state_t * s __unused)605 pqi_process_comp_ring(pqi_state_t *s __unused)
606 {
607 return (NULL);
608 }
609
610 static void
raid_io_complete(pqi_io_request_t * io,void * context)611 raid_io_complete(pqi_io_request_t *io, void *context)
612 {
613 /*
614 * ---- XXX Not sure if this complete function will be the same
615 * or different in the end. If it's the same this will be removed
616 * and aio_io_complete will have it's named changed to something
617 * more generic.
618 */
619 aio_io_complete(io, context);
620 }
621
622 /*
623 * special_error_check -- See if sense buffer matches "offline" status.
624 *
625 * spc3r23 section 4.5.6 -- Sense key and sense code definitions.
626 * Sense key == 5 (KEY_ILLEGAL_REQUEST) indicates one of several conditions
627 * a) Command addressed to incorrect logical unit.
628 * b) Command had an invalid task attribute.
629 * ...
630 * Table 28 also shows that ASC 0x26 and ASCQ of 0x00 is an INVALID FIELD
631 * IN PARAMETER LIST.
632 * At no other time does this combination of KEY/ASC/ASCQ occur except when
633 * a device or cable is pulled from the system along with a Hotplug event.
634 * Without documentation it's only a guess, but it's the best that's available.
635 * So, if the conditions are true the command packet pkt_reason will be changed
636 * to CMD_DEV_GONE which causes MPxIO to switch to the other path and the
637 * Hotplug event will cause a scan to occur which removes other inactive
638 * devices in case of a cable pull.
639 */
640 boolean_t
special_error_check(pqi_cmd_t * cmd)641 special_error_check(pqi_cmd_t *cmd)
642 {
643 struct scsi_arq_status *arq;
644
645 /* LINTED E_BAD_PTR_CAST_ALIGN */
646 arq = (struct scsi_arq_status *)cmd->pc_pkt->pkt_scbp;
647
648 if (((*cmd->pc_pkt->pkt_scbp & STATUS_MASK) == STATUS_CHECK) &&
649 (arq->sts_sensedata.es_key == KEY_ILLEGAL_REQUEST) &&
650 (arq->sts_sensedata.es_add_code == 0x26) &&
651 (arq->sts_sensedata.es_qual_code == 0)) {
652 return (B_TRUE);
653 } else {
654 return (B_FALSE);
655 }
656 }
657
658 static void
aio_io_complete(pqi_io_request_t * io,void * context __unused)659 aio_io_complete(pqi_io_request_t *io, void *context __unused)
660 {
661 pqi_cmd_t *cmd = io->io_cmd;
662 struct scsi_pkt *pkt = CMD2PKT(cmd);
663 boolean_t pkt_ok = B_FALSE;
664
665 if (cmd->pc_flags & (PQI_FLAG_IO_READ | PQI_FLAG_IO_IOPB))
666 (void) ddi_dma_sync(cmd->pc_dmahdl, 0, 0, DDI_DMA_SYNC_FORCPU);
667
668 switch (io->io_status) {
669 case PQI_DATA_IN_OUT_UNDERFLOW:
670 pkt->pkt_state |= STATE_GOT_BUS | STATE_GOT_TARGET |
671 STATE_SENT_CMD | STATE_GOT_STATUS;
672 if (pkt->pkt_resid == cmd->pc_dma_count) {
673 pkt->pkt_reason = CMD_INCOMPLETE;
674 } else {
675 pkt->pkt_state |= STATE_XFERRED_DATA;
676 pkt->pkt_reason = CMD_CMPLT;
677 }
678 break;
679
680 case PQI_DATA_IN_OUT_GOOD:
681 pkt->pkt_state |= STATE_GOT_BUS | STATE_GOT_TARGET |
682 STATE_SENT_CMD | STATE_GOT_STATUS;
683 if (cmd->pc_flags & PQI_FLAG_DMA_VALID)
684 pkt->pkt_state |= STATE_XFERRED_DATA;
685 pkt->pkt_reason = CMD_CMPLT;
686 pkt->pkt_resid = 0;
687 pkt->pkt_statistics = 0;
688 pkt_ok = B_TRUE;
689 break;
690
691 case PQI_DATA_IN_OUT_ERROR:
692 pkt->pkt_state |= STATE_GOT_BUS | STATE_GOT_TARGET |
693 STATE_SENT_CMD;
694 if (pkt->pkt_resid != cmd->pc_dma_count) {
695 pkt->pkt_state |= STATE_XFERRED_DATA;
696 pkt->pkt_reason = CMD_CMPLT;
697 } else {
698 pkt->pkt_reason = CMD_CMPLT;
699 }
700 break;
701
702 case PQI_DATA_IN_OUT_PROTOCOL_ERROR:
703 pkt->pkt_reason = CMD_TERMINATED;
704 pkt->pkt_state |= STATE_GOT_BUS | STATE_GOT_TARGET;
705 break;
706
707 case PQI_DATA_IN_OUT_HARDWARE_ERROR:
708 pkt->pkt_reason = CMD_CMPLT;
709 pkt->pkt_state |= STATE_GOT_BUS;
710 break;
711
712 default:
713 pkt->pkt_reason = CMD_INCOMPLETE;
714 break;
715 }
716
717 if (pkt_ok == B_FALSE)
718 atomic_inc_32(&cmd->pc_device->pd_sense_errors);
719
720 if (special_error_check(cmd) == B_TRUE) {
721 pkt->pkt_reason = CMD_DEV_GONE;
722 pkt->pkt_statistics = STAT_TERMINATED;
723 }
724 (void) pqi_cmd_action(cmd, PQI_CMD_CMPLT);
725 }
726
727 static void
fail_outstanding_cmds(pqi_state_t * s)728 fail_outstanding_cmds(pqi_state_t *s)
729 {
730 pqi_device_t *devp;
731
732 ASSERT(MUTEX_HELD(&s->s_mutex));
733
734 pqi_fail_drive_cmds(&s->s_special_device, CMD_TRAN_ERR);
735 for (devp = list_head(&s->s_devnodes); devp != NULL;
736 devp = list_next(&s->s_devnodes, devp)) {
737 pqi_fail_drive_cmds(devp, CMD_TRAN_ERR);
738 }
739 }
740
741 static void
set_sg_descriptor(pqi_sg_entry_t * sg,ddi_dma_cookie_t * cookie)742 set_sg_descriptor(pqi_sg_entry_t *sg, ddi_dma_cookie_t *cookie)
743 {
744 sg->sg_addr = cookie->dmac_laddress;
745 sg->sg_len = cookie->dmac_size;
746 sg->sg_flags = 0;
747 }
748
749 static void
build_aio_sg_list(pqi_state_t * s,pqi_aio_path_request_t * rqst,pqi_cmd_t * cmd,pqi_io_request_t * io)750 build_aio_sg_list(pqi_state_t *s, pqi_aio_path_request_t *rqst,
751 pqi_cmd_t *cmd, pqi_io_request_t *io)
752 {
753 int i;
754 int max_sg_per_iu;
755 uint16_t iu_length;
756 uint8_t chained;
757 uint8_t num_sg_in_iu = 0;
758 ddi_dma_cookie_t *cookies;
759 pqi_sg_entry_t *sg;
760
761 iu_length = offsetof(struct pqi_aio_path_request, ap_sglist) -
762 PQI_REQUEST_HEADER_LENGTH;
763
764 if (cmd->pc_dmaccount == 0)
765 goto out;
766 sg = rqst->ap_sglist;
767 cookies = cmd->pc_cached_cookies;
768 max_sg_per_iu = s->s_max_sg_per_iu - 1;
769 i = 0;
770 chained = 0;
771
772 for (;;) {
773 set_sg_descriptor(sg, cookies);
774 if (!chained)
775 num_sg_in_iu++;
776 i++;
777 if (i == cmd->pc_dmaccount)
778 break;
779 sg++;
780 cookies++;
781 if (i == max_sg_per_iu) {
782 sg->sg_addr = io->io_sg_chain_dma->dma_addr;
783 sg->sg_len = (cmd->pc_dmaccount - num_sg_in_iu) *
784 sizeof (*sg);
785 sg->sg_flags = CISS_SG_CHAIN;
786 chained = 1;
787 num_sg_in_iu++;
788 sg = (pqi_sg_entry_t *)
789 io->io_sg_chain_dma->alloc_memory;
790 }
791 }
792 sg->sg_flags = CISS_SG_LAST;
793 rqst->partial = chained;
794 if (chained) {
795 (void) ddi_dma_sync(io->io_sg_chain_dma->handle, 0, 0,
796 DDI_DMA_SYNC_FORDEV);
797 }
798 iu_length += num_sg_in_iu * sizeof (*sg);
799
800 out:
801 rqst->header.iu_length = iu_length;
802 rqst->num_sg_descriptors = num_sg_in_iu;
803 }
804
805 static void
build_raid_sg_list(pqi_state_t * s,pqi_raid_path_request_t * rqst,pqi_cmd_t * cmd,pqi_io_request_t * io)806 build_raid_sg_list(pqi_state_t *s, pqi_raid_path_request_t *rqst,
807 pqi_cmd_t *cmd, pqi_io_request_t *io)
808 {
809 int i = 0;
810 int max_sg_per_iu;
811 int num_sg_in_iu = 0;
812 uint16_t iu_length;
813 uint8_t chained = 0;
814 ddi_dma_cookie_t *cookies;
815 pqi_sg_entry_t *sg;
816
817 iu_length = offsetof(struct pqi_raid_path_request, rp_sglist) -
818 PQI_REQUEST_HEADER_LENGTH;
819
820 if (cmd->pc_dmaccount == 0)
821 goto out;
822
823 sg = rqst->rp_sglist;
824 cookies = cmd->pc_cached_cookies;
825 max_sg_per_iu = s->s_max_sg_per_iu - 1;
826
827 for (;;) {
828 set_sg_descriptor(sg, cookies);
829 if (!chained)
830 num_sg_in_iu++;
831 i++;
832 if (i == cmd->pc_dmaccount)
833 break;
834 sg++;
835 cookies++;
836 if (i == max_sg_per_iu) {
837 ASSERT(io->io_sg_chain_dma != NULL);
838 sg->sg_addr = io->io_sg_chain_dma->dma_addr;
839 sg->sg_len = (cmd->pc_dmaccount - num_sg_in_iu) *
840 sizeof (*sg);
841 sg->sg_flags = CISS_SG_CHAIN;
842 chained = 1;
843 num_sg_in_iu++;
844 sg = (pqi_sg_entry_t *)
845 io->io_sg_chain_dma->alloc_memory;
846 }
847 }
848 sg->sg_flags = CISS_SG_LAST;
849 rqst->rp_partial = chained;
850 if (chained) {
851 (void) ddi_dma_sync(io->io_sg_chain_dma->handle, 0, 0,
852 DDI_DMA_SYNC_FORDEV);
853 }
854 iu_length += num_sg_in_iu * sizeof (*sg);
855
856 out:
857 rqst->header.iu_length = iu_length;
858 }
859
860 static uint32_t
read_heartbeat_counter(pqi_state_t * s)861 read_heartbeat_counter(pqi_state_t *s)
862 {
863 return (ddi_get32(s->s_datap, s->s_heartbeat_counter));
864 }
865
866 static void
take_ctlr_offline(pqi_state_t * s)867 take_ctlr_offline(pqi_state_t *s)
868 {
869 int num_passes = 5;
870 int i;
871 pqi_device_t *d;
872 pqi_cmd_t *c, *nc;
873 pqi_io_request_t *io;
874 uint32_t active_count;
875
876 /*
877 * 1) Why always panic here?
878 * Firmware resets don't work on the Microsemi HBA when the firmware
879 * is hung. The code as written fails outstanding commands and tries
880 * to reset the HBA. Since the reset don't work the HBA is left in an
881 * offline state and further commands sent (retries and new commands)
882 * are also failed. Eventually ZFS will panic with a deadman timer,
883 * but before that COMSTAR will see I/O requests error out and send
884 * I/O errors back to the client which causes corruption since these
885 * errors are no different than a device that starts to fail. So,
886 * instead of trying to play nice the driver now panics which will
887 * allow HA to fail fast to the other node.
888 *
889 * 2) Why not just remove this routine can call panic from the heartbeat
890 * routine?
891 * I'm hoping this is a temporary work around. We have been asking
892 * for more documentation on the product and we've been told there isn't
893 * any available. It has been implied that some HBA's do support
894 * firmware resets. Therefore documentation would enable the driver
895 * to determine model number and adjust parameters such as panic on
896 * firmware hang or try a reset.
897 */
898 if (1)
899 panic("Firmware hung");
900
901 d = &s->s_special_device;
902 mutex_enter(&d->pd_mutex);
903 while ((c = list_remove_head(&d->pd_cmd_list)) != NULL) {
904 io = c->pc_io_rqst;
905 io->io_status = PQI_DATA_IN_OUT_ERROR;
906
907 mutex_exit(&d->pd_mutex);
908 (io->io_cb)(io, io->io_context);
909 mutex_enter(&d->pd_mutex);
910 }
911 mutex_exit(&d->pd_mutex);
912
913 /*
914 * If pqi_reset_ctl() completes successfully the queues will be marked
915 * B_TRUE and the controller will be marked online again.
916 */
917 mutex_enter(&s->s_mutex);
918 for (i = 0; i < s->s_num_queue_groups; i++)
919 s->s_queue_groups[i].qg_active = B_FALSE;
920 s->s_offline = B_TRUE;
921 fail_outstanding_cmds(s);
922 mutex_exit(&s->s_mutex);
923
924 /*
925 * Commands have been canceled that can be. It's possible there are
926 * commands currently running that are about to complete. Give them
927 * up to 5 seconds to finish. If those haven't completed by then they
928 * are most likely hung in the firmware of the HBA so go ahead and
929 * reset the firmware.
930 */
931 while (num_passes-- > 0) {
932 active_count = count_oustanding_cmds(s);
933 if (active_count == 0)
934 break;
935 drv_usecwait(MICROSEC);
936 }
937
938 /*
939 * Any commands remaining are hung in the controller firmware so
940 * go ahead time them out so that the upper layers know what's
941 * happening.
942 */
943 mutex_enter(&s->s_mutex);
944 for (d = list_head(&s->s_devnodes); d != NULL;
945 d = list_next(&s->s_devnodes, d)) {
946 mutex_enter(&d->pd_mutex);
947 while ((c = list_head(&d->pd_cmd_list)) != NULL) {
948 struct scsi_pkt *pkt = CMD2PKT(c);
949
950 nc = list_next(&d->pd_cmd_list, c);
951 ASSERT(pkt);
952 if (pkt != NULL) {
953 pkt->pkt_reason = CMD_TIMEOUT;
954 pkt->pkt_statistics = STAT_TIMEOUT;
955 }
956 (void) pqi_cmd_action_nolock(c, PQI_CMD_TIMEOUT);
957 c = nc;
958 }
959 mutex_exit(&d->pd_mutex);
960 }
961 mutex_exit(&s->s_mutex);
962
963 cmn_err(CE_WARN, "Firmware Status: 0x%x", G32(s, sis_firmware_status));
964
965 if (pqi_reset_ctl(s) == B_FALSE) {
966 cmn_err(CE_WARN, "Failed to reset controller");
967 return;
968 }
969
970 /*
971 * This will have the effect of releasing the device's dip
972 * structure from the NDI layer do to s_offline == B_TRUE.
973 */
974 ndi_devi_enter(scsi_vhci_dip);
975 ndi_devi_enter(s->s_dip);
976 (void) pqi_config_all(s->s_dip, s);
977 ndi_devi_exit(s->s_dip);
978 ndi_devi_exit(scsi_vhci_dip);
979 }
980
981 static uint32_t
free_elem_count(pqi_index_t pi,pqi_index_t ci,uint32_t per_iq)982 free_elem_count(pqi_index_t pi, pqi_index_t ci, uint32_t per_iq)
983 {
984 pqi_index_t used;
985 if (pi >= ci) {
986 used = pi - ci;
987 } else {
988 used = per_iq - ci + pi;
989 }
990 return (per_iq - used - 1);
991 }
992
993 static boolean_t
is_aio_enabled(pqi_device_t * d)994 is_aio_enabled(pqi_device_t *d)
995 {
996 return (d->pd_aio_enabled ? B_TRUE : B_FALSE);
997 }
998