1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2024 Racktop Systems, Inc. 14 */ 15 16 /* 17 * This file implements the interfaces for communicating with the MegaRAID HBA. 18 * There are three basic interfaces: 19 * - the device registers, which provide basic information about the controller 20 * hardware and the features it supports, as well as control registers used 21 * during sending and reception of I/O frames 22 * - Fusion-MPT v2.5, perhaps later, which defines the format of the I/O frames 23 * used for communicating with the HBA and virtual and physical devices that 24 * are attached to it 25 * - MFI, the MegaRAID Firmware Interface, which are sent and received as MPT 26 * payloads to control and communicate with the RAID controller. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/ddi.h> 31 #include <sys/sunddi.h> 32 #include <sys/scsi/scsi.h> 33 34 #include <sys/cpuvar.h> 35 36 #include "lmrc.h" 37 #include "lmrc_reg.h" 38 #include "lmrc_raid.h" 39 #include "lmrc_phys.h" 40 41 static uint32_t lmrc_read_reg(lmrc_t *, uint32_t); 42 static void lmrc_write_reg(lmrc_t *, uint32_t, uint32_t); 43 static int lmrc_transition_to_ready(lmrc_t *); 44 static void lmrc_process_mptmfi_passthru(lmrc_t *, lmrc_mpt_cmd_t *); 45 static int lmrc_poll_mfi(lmrc_t *, lmrc_mfi_cmd_t *, uint8_t); 46 static boolean_t lmrc_check_fw_fault(lmrc_t *); 47 static int lmrc_get_event_log_info(lmrc_t *, lmrc_evt_log_info_t *); 48 static void lmrc_aen_handler(void *); 49 static void lmrc_complete_aen(lmrc_t *, lmrc_mfi_cmd_t *); 50 static int lmrc_register_aen(lmrc_t *, uint32_t); 51 52 /* 53 * Device register access functions. 54 * 55 * Due to the way ddi_get* and ddi_put* work, we'll need to calculate the 56 * absolute virtual address of the registers ourselves. 57 * 58 * For read accesses, employ a erratum workaround for Aero controllers. In some 59 * cases, reads of certain registers will intermittently return all zeros. As a 60 * workaround, retry the read up to three times until a non-zero value is read. 61 * Supposedly this is enough, every other driver I looked at does this. 62 */ 63 static uint32_t 64 lmrc_read_reg_1(lmrc_t *lmrc, uint32_t reg) 65 { 66 uint32_t *addr = (uint32_t *)((uintptr_t)lmrc->l_regmap + reg); 67 return (ddi_get32(lmrc->l_reghandle, addr)); 68 } 69 70 static uint32_t 71 lmrc_read_reg(lmrc_t *lmrc, uint32_t reg) 72 { 73 if (lmrc->l_class != LMRC_ACLASS_AERO) 74 return (lmrc_read_reg_1(lmrc, reg)); 75 76 /* Workaround for the hardware erratum in Aero controllers */ 77 for (uint_t i = 0; i < 3; i++) { 78 uint32_t val = lmrc_read_reg_1(lmrc, reg); 79 80 if (val != 0) 81 return (val); 82 } 83 84 return (0); 85 } 86 87 static void 88 lmrc_write_reg(lmrc_t *lmrc, uint32_t reg, uint32_t val) 89 { 90 uint32_t *addr = (uint32_t *)((uintptr_t)lmrc->l_regmap + reg); 91 ddi_put32(lmrc->l_reghandle, addr, val); 92 } 93 94 static void 95 lmrc_write_reg64(lmrc_t *lmrc, uint32_t reg, uint64_t val) 96 { 97 uint64_t *addr = (uint64_t *)((uintptr_t)lmrc->l_regmap + reg); 98 ddi_put64(lmrc->l_reghandle, addr, val); 99 } 100 101 /* 102 * Interrupt control 103 * 104 * There are two interrupt registers for host driver use, HostInterruptStatus 105 * and HostInterruptMask. Most of the bits in each register are reserved and 106 * must masked and/or preserved when used. 107 */ 108 void 109 lmrc_disable_intr(lmrc_t *lmrc) 110 { 111 uint32_t mask = lmrc_read_reg(lmrc, MPI2_HOST_INTERRUPT_MASK_OFFSET); 112 113 /* Disable all known interrupt: reset, reply, and doorbell. */ 114 mask |= MPI2_HIM_RESET_IRQ_MASK; 115 mask |= MPI2_HIM_REPLY_INT_MASK; 116 mask |= MPI2_HIM_IOC2SYS_DB_MASK; 117 118 lmrc_write_reg(lmrc, MPI2_HOST_INTERRUPT_MASK_OFFSET, mask); 119 120 /* Dummy read to force pci flush. Probably bogus but harmless. */ 121 (void) lmrc_read_reg(lmrc, MPI2_HOST_INTERRUPT_MASK_OFFSET); 122 } 123 124 void 125 lmrc_enable_intr(lmrc_t *lmrc) 126 { 127 uint32_t mask = lmrc_read_reg(lmrc, MPI2_HOST_INTERRUPT_MASK_OFFSET); 128 129 /* Enable the reply interrupts and the doorbell interrupts. */ 130 mask &= ~MPI2_HIM_REPLY_INT_MASK; 131 mask &= ~MPI2_HIM_IOC2SYS_DB_MASK; 132 133 /* Clear outstanding interrupts before enabling any. */ 134 lmrc_write_reg(lmrc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0); 135 /* Dummy read to force pci flush. Probably bogus but harmless. */ 136 (void) lmrc_read_reg(lmrc, MPI2_HOST_INTERRUPT_STATUS_OFFSET); 137 138 lmrc_write_reg(lmrc, MPI2_HOST_INTERRUPT_MASK_OFFSET, mask); 139 /* Dummy read to force pci flush. Probably bogus but harmless. */ 140 (void) lmrc_read_reg(lmrc, MPI2_HOST_INTERRUPT_MASK_OFFSET); 141 } 142 143 uint_t 144 lmrc_intr_ack(lmrc_t *lmrc) 145 { 146 uint32_t mask = 147 MPI2_HIS_REPLY_DESCRIPTOR_INTERRUPT | MPI2_HIS_IOC2SYS_DB_STATUS; 148 uint32_t status; 149 150 status = lmrc_read_reg(lmrc, MPI2_HOST_INTERRUPT_STATUS_OFFSET); 151 152 if ((status & mask) == 0) 153 return (DDI_INTR_UNCLAIMED); 154 155 if (lmrc_check_acc_handle(lmrc->l_reghandle) != DDI_SUCCESS) { 156 ddi_fm_service_impact(lmrc->l_dip, DDI_SERVICE_LOST); 157 return (DDI_INTR_UNCLAIMED); 158 } 159 160 return (DDI_INTR_CLAIMED); 161 } 162 163 /* 164 * Fusion-MPT requests 165 * 166 * The controller expects to have access to a large chunk of DMA memory, into 167 * which the driver writes fixed-size I/O requests for the controller to 168 * process. To notify the hardware about a new request, a request descriptor is 169 * written to the queue port registers which includes the SMID of the request. 170 * This memory isn't really a queue, though, as it seems there are no 171 * constraints about ordering of the requests. All that matters is that there 172 * is a valid request at the address that corresponds with the SMID in the 173 * descriptor. 174 * 175 * If the hardware supports MPI 2.6 atomic request descriptors, which are a 176 * 32bit subset of the 64bit MPI 2.0/2.5 request descriptors, the descriptor is 177 * sent to the controller in a single 32bit write into a device register. 178 * 179 * For all other descriptor types, we'll employ a 64bit write to the queue 180 * registers, assuming that provides the required atomicity. 181 */ 182 void 183 lmrc_send_atomic_request(lmrc_t *lmrc, lmrc_atomic_req_desc_t req_desc) 184 { 185 if (lmrc->l_atomic_desc_support) { 186 lmrc_write_reg(lmrc, 187 MPI26_ATOMIC_REQUEST_DESCRIPTOR_POST_OFFSET, 188 req_desc.rd_reg); 189 } else { 190 lmrc_req_desc_t rd; 191 192 bzero(&rd, sizeof (rd)); 193 rd.rd_atomic = req_desc; 194 195 lmrc_send_request(lmrc, rd); 196 } 197 } 198 199 void 200 lmrc_send_request(lmrc_t *lmrc, lmrc_req_desc_t req_desc) 201 { 202 lmrc_write_reg64(lmrc, MPI2_REQUEST_DESCRIPTOR_POST_LOW_OFFSET, 203 req_desc.rd_reg); 204 } 205 206 lmrc_atomic_req_desc_t 207 lmrc_build_atomic_request(lmrc_t *lmrc, lmrc_mpt_cmd_t *mpt, uint8_t flags) 208 { 209 lmrc_atomic_req_desc_t req_desc; 210 211 VERIFY3U(mpt->mpt_smid, !=, 0); 212 213 /* 214 * Select the reply queue based on the CPU id to distribute reply load 215 * among queues. 216 */ 217 mpt->mpt_queue = CPU->cpu_id % lmrc->l_max_reply_queues; 218 219 bzero(&req_desc, sizeof (req_desc)); 220 221 req_desc.rd_atomic.RequestFlags = flags; 222 req_desc.rd_atomic.MSIxIndex = mpt->mpt_queue; 223 req_desc.rd_atomic.SMID = mpt->mpt_smid; 224 225 return (req_desc); 226 } 227 228 /* 229 * Reply Processing 230 * 231 * The controller will post replies to completed requests in the DMA memory 232 * provided for that purpose. This memory is divided in equally-sized chunks, 233 * each being a separate reply queue that is also associated with an interrupt 234 * vector. The replies are fixed size structures and will be written by the 235 * hardware in order of completion into the queue. For each queue, there is a 236 * register to tell the hardware which replies have been consumed by the driver. 237 * 238 * In response to an interrupt, the driver will walk the reply queue associated 239 * with the interrupt vector at the last known position and processess all 240 * completed replies. After a number of replies has been processed, or if no 241 * more replies are ready to be processed, the controller will be notified about 242 * the last reply index to be processed by writing the appropriate register. 243 */ 244 245 /* 246 * lmrc_get_next_reply_desc 247 * 248 * Get the next unprocessed reply descriptor for a queue, or NULL if there is 249 * none. 250 */ 251 static Mpi2ReplyDescriptorsUnion_t * 252 lmrc_get_next_reply_desc(lmrc_t *lmrc, int queue) 253 { 254 Mpi2ReplyDescriptorsUnion_t *desc; 255 256 desc = lmrc->l_reply_dma.ld_buf; 257 258 desc += (queue * lmrc->l_reply_alloc_sz) / sizeof (*desc); 259 desc += lmrc->l_last_reply_idx[queue]; 260 261 VERIFY3S(ddi_dma_sync(lmrc->l_reply_dma.ld_hdl, 262 (void *)desc - lmrc->l_reply_dma.ld_buf, sizeof (*desc), 263 DDI_DMA_SYNC_FORKERNEL), ==, DDI_SUCCESS); 264 265 /* 266 * Check if this is an unused reply descriptor, indicating that 267 * we've reached the end of replies in this queue. 268 * 269 * Even if the descriptor is only "half unused" we can't use it. 270 */ 271 if (desc->Words.Low == MPI2_RPY_DESCRIPT_UNUSED_WORD0_MARK || 272 desc->Words.High == MPI2_RPY_DESCRIPT_UNUSED_WORD1_MARK) 273 return (NULL); 274 275 /* advance last reply index, wrap around if necessary */ 276 lmrc->l_last_reply_idx[queue]++; 277 if (lmrc->l_last_reply_idx[queue] >= lmrc->l_reply_q_depth) 278 lmrc->l_last_reply_idx[queue] = 0; 279 280 return (desc); 281 } 282 283 /* 284 * lmrc_write_rphi 285 * 286 * Write the Reply Post Host Index register for queue. 287 */ 288 static void 289 lmrc_write_rphi(lmrc_t *lmrc, uint32_t queue) 290 { 291 int reg = 0; 292 uint32_t val = (queue << 24) | lmrc->l_last_reply_idx[queue]; 293 294 if (lmrc->l_intr_type != DDI_INTR_TYPE_MSIX) 295 VERIFY3U(queue, ==, 0); 296 297 if (lmrc->l_msix_combined) { 298 reg = queue / 8; 299 val &= 0x07ffffff; 300 } 301 302 lmrc_write_reg(lmrc, lmrc->l_rphi[reg], val); 303 } 304 305 /* 306 * lmrc_process_mpt_pkt 307 * 308 * Process a reply to a MPT IO request. Update the scsi_pkt according to status, 309 * ex_status, and data_len, setting up the ARQ pkt if necessary. 310 */ 311 static void 312 lmrc_process_mpt_pkt(lmrc_t *lmrc, struct scsi_pkt *pkt, uint8_t status, 313 uint8_t ex_status, uint32_t data_len) 314 { 315 pkt->pkt_statistics = 0; 316 pkt->pkt_state = STATE_GOT_BUS | STATE_GOT_TARGET | STATE_SENT_CMD | 317 STATE_XFERRED_DATA | STATE_GOT_STATUS; 318 319 pkt->pkt_resid = pkt->pkt_dma_len - data_len; 320 321 switch (status) { 322 case MFI_STAT_OK: 323 case MFI_STAT_LD_CC_IN_PROGRESS: 324 case MFI_STAT_LD_RECON_IN_PROGRESS: 325 pkt->pkt_reason = CMD_CMPLT; 326 pkt->pkt_scbp[0] = STATUS_GOOD; 327 break; 328 329 case MFI_STAT_SCSI_DONE_WITH_ERROR: 330 case MFI_STAT_LD_LBA_OUT_OF_RANGE: { 331 struct scsi_arq_status *arq = 332 (struct scsi_arq_status *)pkt->pkt_scbp; 333 334 pkt->pkt_reason = CMD_CMPLT; 335 arq->sts_status.sts_chk = 1; 336 337 pkt->pkt_state |= STATE_ARQ_DONE; 338 arq->sts_rqpkt_reason = CMD_CMPLT; 339 arq->sts_rqpkt_resid = 0; 340 arq->sts_rqpkt_state |= STATE_GOT_BUS | STATE_GOT_TARGET | 341 STATE_SENT_CMD | STATE_XFERRED_DATA; 342 *(uint8_t *)&arq->sts_rqpkt_status = STATUS_GOOD; 343 break; 344 } 345 case MFI_STAT_LD_OFFLINE: 346 case MFI_STAT_DEVICE_NOT_FOUND: 347 pkt->pkt_reason = CMD_DEV_GONE; 348 pkt->pkt_statistics = STAT_DISCON; 349 break; 350 351 default: 352 dev_err(lmrc->l_dip, CE_WARN, "!command failed, status = %x, " 353 "ex_status = %x, cdb[0] = %x", status, ex_status, 354 pkt->pkt_cdbp[0]); 355 pkt->pkt_reason = CMD_TRAN_ERR; 356 break; 357 } 358 } 359 360 /* 361 * lmrc_poll_for_reply 362 * 363 * During a panic we'll have to resort to polled I/O to write core dumps. 364 * Repeatedly check the reply queue for a new reply associated with the 365 * given request descriptor and complete it, or return an error if we get 366 * no reply within a reasonable time. 367 */ 368 int 369 lmrc_poll_for_reply(lmrc_t *lmrc, lmrc_mpt_cmd_t *mpt) 370 { 371 clock_t max_wait = LMRC_IO_TIMEOUT * MILLISEC * 10; 372 Mpi25SCSIIORequest_t *io_req = mpt->mpt_io_frame; 373 Mpi2ReplyDescriptorsUnion_t *desc; 374 uint16_t desc_smid; 375 376 VERIFY(ddi_in_panic()); 377 378 /* 379 * Walk the reply queue. Discard entries which we aren't 380 * looking for. 381 */ 382 do { 383 desc = lmrc_get_next_reply_desc(lmrc, mpt->mpt_queue); 384 if (desc == NULL) { 385 if (max_wait == 0) 386 return (TRAN_FATAL_ERROR); 387 388 drv_usecwait(100); 389 max_wait--; 390 continue; 391 } 392 393 desc_smid = desc->SCSIIOSuccess.SMID; 394 395 /* reset descriptor */ 396 desc->Words.Low = MPI2_RPY_DESCRIPT_UNUSED_WORD0_MARK; 397 desc->Words.High = MPI2_RPY_DESCRIPT_UNUSED_WORD1_MARK; 398 399 lmrc_write_rphi(lmrc, mpt->mpt_queue); 400 } while (desc == NULL || desc_smid != mpt->mpt_smid); 401 402 VERIFY3S(ddi_dma_sync(lmrc->l_ioreq_dma.ld_hdl, 403 (void *)io_req - lmrc->l_ioreq_dma.ld_buf, 404 LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE, DDI_DMA_SYNC_FORKERNEL), 405 ==, DDI_SUCCESS); 406 407 /* If this is I/O, process it. */ 408 if (io_req->Function == LMRC_MPI2_FUNCTION_LD_IO_REQUEST || 409 io_req->Function == MPI2_FUNCTION_SCSI_IO_REQUEST) { 410 lmrc_process_mpt_pkt(lmrc, mpt->mpt_pkt, 411 io_req->VendorRegion.rc_status, 412 io_req->VendorRegion.rc_exstatus, io_req->DataLength); 413 } 414 415 return (TRAN_ACCEPT); 416 } 417 418 /* 419 * lmrc_process_replies 420 * 421 * Process all new reply entries in a queue in response to an interrupt. 422 */ 423 int 424 lmrc_process_replies(lmrc_t *lmrc, uint8_t queue) 425 { 426 int nprocessed = 0; 427 Mpi2ReplyDescriptorsUnion_t *desc; 428 429 for (desc = lmrc_get_next_reply_desc(lmrc, queue); 430 desc != NULL; 431 desc = lmrc_get_next_reply_desc(lmrc, queue)) { 432 Mpi2SCSIIOSuccessReplyDescriptor_t *reply = 433 &desc->SCSIIOSuccess; 434 uint16_t smid = reply->SMID; 435 lmrc_mpt_cmd_t *mpt = lmrc->l_mpt_cmds[smid - 1]; 436 lmrc_tgt_t *tgt = NULL; 437 Mpi25SCSIIORequest_t *io_req; 438 struct scsi_pkt *pkt; 439 struct scsi_device *sd; 440 441 VERIFY3U(reply->SMID, <=, lmrc->l_max_fw_cmds); 442 443 mutex_enter(&mpt->mpt_lock); 444 mpt->mpt_complete = B_TRUE; 445 pkt = mpt->mpt_pkt; 446 io_req = mpt->mpt_io_frame; 447 448 VERIFY3S(ddi_dma_sync(lmrc->l_ioreq_dma.ld_hdl, 449 (void *)io_req - lmrc->l_ioreq_dma.ld_buf, 450 LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE, 451 DDI_DMA_SYNC_FORKERNEL), ==, DDI_SUCCESS); 452 453 454 switch (io_req->Function) { 455 case MPI2_FUNCTION_SCSI_TASK_MGMT: 456 VERIFY0(pkt); 457 VERIFY0(list_link_active(&mpt->mpt_node)); 458 cv_signal(&mpt->mpt_cv); 459 break; 460 461 case MPI2_FUNCTION_SCSI_IO_REQUEST: 462 case LMRC_MPI2_FUNCTION_LD_IO_REQUEST: 463 VERIFY(pkt != NULL); 464 465 sd = scsi_address_device(&pkt->pkt_address); 466 VERIFY(sd != NULL); 467 468 tgt = scsi_device_hba_private_get(sd); 469 VERIFY(tgt != NULL); 470 471 lmrc_process_mpt_pkt(lmrc, pkt, 472 io_req->VendorRegion.rc_status, 473 io_req->VendorRegion.rc_exstatus, 474 io_req->DataLength); 475 476 break; 477 478 case LMRC_MPI2_FUNCTION_PASSTHRU_IO_REQUEST: 479 VERIFY0(pkt); 480 VERIFY0(list_link_active(&mpt->mpt_node)); 481 lmrc_process_mptmfi_passthru(lmrc, mpt); 482 break; 483 484 default: 485 mutex_exit(&mpt->mpt_lock); 486 dev_err(lmrc->l_dip, CE_PANIC, 487 "reply received for unknown Function %x", 488 io_req->Function); 489 } 490 491 mutex_exit(&mpt->mpt_lock); 492 493 if (pkt != NULL) { 494 lmrc_tgt_rem_active_mpt(tgt, mpt); 495 atomic_dec_uint(&lmrc->l_fw_outstanding_cmds); 496 scsi_hba_pkt_comp(pkt); 497 } 498 499 /* reset descriptor */ 500 desc->Words.Low = MPI2_RPY_DESCRIPT_UNUSED_WORD0_MARK; 501 desc->Words.High = MPI2_RPY_DESCRIPT_UNUSED_WORD1_MARK; 502 503 nprocessed++; 504 505 if (nprocessed % LMRC_THRESHOLD_REPLY_COUNT == 0) 506 lmrc_write_rphi(lmrc, queue); 507 } 508 509 if (nprocessed != 0 && nprocessed % LMRC_THRESHOLD_REPLY_COUNT != 0) 510 lmrc_write_rphi(lmrc, queue); 511 512 return (DDI_INTR_CLAIMED); 513 } 514 515 516 /* 517 * MFI - MegaRAID Firmware Interface 518 */ 519 520 /* 521 * lmrc_build_mptmfi_passthru 522 * 523 * MFI commands are send as MPT MFI passthrough I/O requests. To be able to send 524 * a MFI frame to the RAID controller, we need to have a MPT command set up as 525 * MPT I/O request and a one-entry SGL pointing to the MFI command. 526 * 527 * As there's only a small number of MFI commands compared to the amound of MPT 528 * commands, the MPT command for each MFI is pre-allocated at attach time and 529 * initialized here. 530 */ 531 int 532 lmrc_build_mptmfi_passthru(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi) 533 { 534 Mpi25SCSIIORequest_t *io_req; 535 const ddi_dma_cookie_t *cookie; 536 lmrc_mpt_cmd_t *mpt; 537 538 mpt = lmrc_get_mpt(lmrc); 539 if (mpt == NULL) 540 return (DDI_FAILURE); 541 542 /* lmrc_get_mpt() should return the mpt locked */ 543 ASSERT(mutex_owned(&mpt->mpt_lock)); 544 545 mfi->mfi_mpt = mpt; 546 mpt->mpt_mfi = mfi; 547 548 io_req = mpt->mpt_io_frame; 549 io_req->Function = LMRC_MPI2_FUNCTION_PASSTHRU_IO_REQUEST; 550 io_req->ChainOffset = lmrc->l_chain_offset_mfi_pthru; 551 552 cookie = ddi_dma_cookie_one(mfi->mfi_frame_dma.ld_hdl); 553 lmrc_dma_build_sgl(lmrc, mpt, cookie, 1); 554 555 VERIFY3S(ddi_dma_sync(lmrc->l_ioreq_dma.ld_hdl, 556 (void *)io_req - lmrc->l_ioreq_dma.ld_buf, 557 LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE, DDI_DMA_SYNC_FORDEV), 558 ==, DDI_SUCCESS); 559 560 /* 561 * As we're not sending this command to the hardware any time soon, 562 * drop the mutex before we return. 563 */ 564 mutex_exit(&mpt->mpt_lock); 565 566 return (DDI_SUCCESS); 567 } 568 569 /* 570 * lmrc_process_mptmfi_passthru 571 * 572 * When a MPT MFI passthrough command completes, invoke the callback if there 573 * is one. Panic if an invalid command completed as that should never happen. 574 */ 575 static void 576 lmrc_process_mptmfi_passthru(lmrc_t *lmrc, lmrc_mpt_cmd_t *mpt) 577 { 578 lmrc_mfi_cmd_t *mfi; 579 lmrc_mfi_header_t *hdr; 580 581 VERIFY3P(mpt->mpt_mfi, !=, NULL); 582 mfi = mpt->mpt_mfi; 583 VERIFY0(list_link_active(&mfi->mfi_node)); 584 585 hdr = &mfi->mfi_frame->mf_hdr; 586 587 if ((hdr->mh_flags & MFI_FRAME_DIR_READ) != 0) 588 (void) ddi_dma_sync(mfi->mfi_data_dma.ld_hdl, 0, 589 mfi->mfi_data_dma.ld_len, DDI_DMA_SYNC_FORKERNEL); 590 591 switch (hdr->mh_cmd) { 592 case MFI_CMD_DCMD: 593 case MFI_CMD_LD_SCSI_IO: 594 case MFI_CMD_PD_SCSI_IO: 595 case MFI_CMD_ABORT: 596 mutex_enter(&mfi->mfi_lock); 597 if (mfi->mfi_callback != NULL) 598 mfi->mfi_callback(lmrc, mfi); 599 mutex_exit(&mfi->mfi_lock); 600 break; 601 602 case MFI_CMD_INVALID: 603 default: 604 dev_err(lmrc->l_dip, CE_PANIC, 605 "invalid MFI cmd completion received, cmd = %x", 606 hdr->mh_cmd); 607 break; 608 } 609 } 610 611 /* 612 * lmrc_issue_mfi 613 * 614 * Post a MFI command to the firmware. Reset the cmd_status to invalid. Build 615 * a MPT MFI passthru command if necessary and a MPT atomic request descriptor 616 * before posting the request. The MFI command's mutex must be held. 617 */ 618 void 619 lmrc_issue_mfi(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi, lmrc_mfi_cmd_cb_t *cb) 620 { 621 lmrc_mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr; 622 lmrc_atomic_req_desc_t req_desc; 623 624 ASSERT(mutex_owned(&mfi->mfi_lock)); 625 626 if ((hdr->mh_flags & MFI_FRAME_DONT_POST_IN_REPLY_QUEUE) == 0) { 627 VERIFY3U(cb, !=, NULL); 628 mfi->mfi_callback = cb; 629 } else { 630 VERIFY3U(cb, ==, NULL); 631 } 632 633 hdr->mh_cmd_status = MFI_STAT_INVALID_STATUS; 634 635 req_desc = lmrc_build_atomic_request(lmrc, mfi->mfi_mpt, 636 MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO); 637 638 (void) ddi_dma_sync(mfi->mfi_frame_dma.ld_hdl, 0, 639 mfi->mfi_frame_dma.ld_len, DDI_DMA_SYNC_FORDEV); 640 641 if ((hdr->mh_flags & MFI_FRAME_DIR_WRITE) != 0) 642 (void) ddi_dma_sync(mfi->mfi_data_dma.ld_hdl, 0, 643 mfi->mfi_data_dma.ld_len, DDI_DMA_SYNC_FORDEV); 644 645 lmrc_send_atomic_request(lmrc, req_desc); 646 } 647 648 /* 649 * lmrc_poll_mfi 650 * 651 * Poll a MFI command for completion, waiting up to max_wait secs. Repeatedly 652 * check the command status until it changes to something that is not invalid. 653 * 654 * Trigger an online controller reset on timeout. 655 */ 656 static int 657 lmrc_poll_mfi(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi, uint8_t max_wait) 658 { 659 lmrc_mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr; 660 lmrc_dma_t *dma = &mfi->mfi_frame_dma; 661 clock_t timeout = ddi_get_lbolt() + drv_usectohz(max_wait * MICROSEC); 662 clock_t now; 663 664 ASSERT(mutex_owned(&mfi->mfi_lock)); 665 666 do { 667 (void) ddi_dma_sync(dma->ld_hdl, 0, dma->ld_len, 668 DDI_DMA_SYNC_FORKERNEL); 669 if (hdr->mh_cmd_status != MFI_STAT_INVALID_STATUS) 670 break; 671 672 (void) cv_reltimedwait(&mfi->mfi_cv, &mfi->mfi_lock, 673 drv_usectohz(MILLISEC), TR_MILLISEC); 674 now = ddi_get_lbolt(); 675 } while (!lmrc->l_fw_fault && now <= timeout); 676 677 if (hdr->mh_cmd_status != MFI_STAT_INVALID_STATUS) 678 return (DDI_SUCCESS); 679 680 if (now > timeout) { 681 dev_err(lmrc->l_dip, CE_WARN, 682 "!%s: command timeout after %ds", __func__, max_wait); 683 684 /* 685 * Signal the housekeeping thread to check for FW/HW faults, 686 * performing a reset if necessary. 687 */ 688 cv_signal(&lmrc->l_thread_cv); 689 } 690 691 return (DDI_FAILURE); 692 } 693 694 /* 695 * lmrc_wait_mfi 696 * 697 * Wait for up to max_wait secs for a MFI command to complete. The cmd mutex 698 * must be held. 699 * 700 * Trigger an online controller reset on timeout. 701 */ 702 int 703 lmrc_wait_mfi(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi, uint8_t max_wait) 704 { 705 lmrc_mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr; 706 lmrc_dma_t *dma = &mfi->mfi_frame_dma; 707 clock_t timeout = ddi_get_lbolt() + drv_usectohz(max_wait * MICROSEC); 708 int ret; 709 710 ASSERT(mutex_owned(&mfi->mfi_lock)); 711 712 do { 713 ret = cv_timedwait(&mfi->mfi_cv, &mfi->mfi_lock, timeout); 714 715 (void) ddi_dma_sync(dma->ld_hdl, 0, dma->ld_len, 716 DDI_DMA_SYNC_FORKERNEL); 717 718 } while (!lmrc->l_fw_fault && 719 hdr->mh_cmd_status == MFI_STAT_INVALID_STATUS && ret != -1); 720 721 if (!lmrc->l_fw_fault && ret != -1) 722 return (DDI_SUCCESS); 723 724 if (ret == -1) { 725 dev_err(lmrc->l_dip, CE_WARN, "!%s: blocked command timeout " 726 "after %ds, cmd = %d, status = %d", __func__, max_wait, 727 hdr->mh_cmd, hdr->mh_cmd_status); 728 729 /* 730 * Signal the housekeeping thread to check for FW/HW faults, 731 * performing a reset if necessary. 732 */ 733 cv_signal(&lmrc->l_thread_cv); 734 } 735 736 return (DDI_FAILURE); 737 } 738 739 /* 740 * lmrc_wakeup_mfi 741 * 742 * Signal the CV associated with a MFI command to wake up the thread waiting 743 * for its completion. 744 */ 745 void 746 lmrc_wakeup_mfi(lmrc_t *lmrc, lmrc_mfi_cmd_t *cmd) 747 { 748 ASSERT(mutex_owned(&cmd->mfi_lock)); 749 cv_signal(&cmd->mfi_cv); 750 } 751 752 /* 753 * lmrc_issue_blocked_mfi 754 * 755 * Post a MFI command to the firmware and wait for the command to complete. 756 */ 757 int 758 lmrc_issue_blocked_mfi(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi) 759 { 760 lmrc_mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr; 761 int ret; 762 763 mutex_enter(&mfi->mfi_lock); 764 lmrc_issue_mfi(lmrc, mfi, lmrc_wakeup_mfi); 765 ret = lmrc_wait_mfi(lmrc, mfi, LMRC_INTERNAL_CMD_WAIT_TIME); 766 mutex_exit(&mfi->mfi_lock); 767 768 if (ret == DDI_SUCCESS && hdr->mh_cmd_status == MFI_STAT_OK) 769 return (DDI_SUCCESS); 770 771 dev_err(lmrc->l_dip, CE_WARN, 772 "!%s: blocked command failure, cmd = %d, status = %d", 773 __func__, hdr->mh_cmd, hdr->mh_cmd_status); 774 775 return (ret); 776 } 777 778 /* 779 * lmrc_abort_cb 780 * 781 * Callback for any command that is to be aborted. 782 * 783 * If the command completed normally before it could be aborted, set the status 784 * to indicate the intended abortion. 785 */ 786 static void 787 lmrc_abort_cb(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi) 788 { 789 lmrc_mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr; 790 791 if (hdr->mh_cmd_status == MFI_STAT_OK) 792 hdr->mh_cmd_status = MFI_STAT_NOT_FOUND; 793 } 794 795 /* 796 * lmrc_abort_mfi 797 * 798 * Abort a MFI command. This is a bit tricky as the hardware may still complete 799 * it at any time. 800 * 801 * The mutex of the command to be aborted must be held to prevent it from 802 * completing behind our back. We'll replace its callback with our own, issue an 803 * ABORT command, and drop the mutex before we wait for the ABORT command to 804 * complete. 805 */ 806 static int 807 lmrc_abort_cmd(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi_to_abort) 808 { 809 lmrc_mfi_cmd_t *mfi = lmrc_get_mfi(lmrc); 810 lmrc_mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr; 811 lmrc_mfi_abort_payload_t *abort = &mfi->mfi_frame->mf_abort; 812 lmrc_mfi_cmd_cb_t *orig_cb = mfi_to_abort->mfi_callback; 813 int ret; 814 815 ASSERT(mutex_owned(&mfi_to_abort->mfi_lock)); 816 817 /* Replace the commands callback with our own. */ 818 mfi_to_abort->mfi_callback = lmrc_abort_cb; 819 820 hdr->mh_cmd = MFI_CMD_ABORT; 821 abort->ma_abort_context = mfi_to_abort->mfi_idx; 822 lmrc_dma_set_addr64(&mfi_to_abort->mfi_frame_dma, 823 &abort->ma_abort_mfi_phys_addr); 824 825 /* Send the ABORT. */ 826 mutex_enter(&mfi->mfi_lock); 827 lmrc_issue_mfi(lmrc, mfi, lmrc_wakeup_mfi); 828 829 /* 830 * Drop the mutex of the command to be aborted, allowing it to proceed 831 * while we wait for the ABORT command to complete. 832 */ 833 mutex_exit(&mfi_to_abort->mfi_lock); 834 ret = lmrc_wait_mfi(lmrc, mfi, LMRC_INTERNAL_CMD_WAIT_TIME); 835 mutex_exit(&mfi->mfi_lock); 836 837 /* 838 * The ABORT command may fail if cmd_to_abort has completed already. 839 * Treat any other failure as fatal, restore the callback and fail. 840 */ 841 if (ret != DDI_SUCCESS && hdr->mh_cmd_status != MFI_STAT_NOT_FOUND) { 842 mutex_enter(&mfi_to_abort->mfi_lock); 843 mfi_to_abort->mfi_callback = orig_cb; 844 goto out; 845 } 846 847 /* 848 * Wait for the aborted command to complete. If we time out on this 849 * there's little we can do here, so we restore the callback and fail. 850 */ 851 mutex_enter(&mfi_to_abort->mfi_lock); 852 ret = lmrc_poll_mfi(lmrc, mfi_to_abort, LMRC_INTERNAL_CMD_WAIT_TIME); 853 mfi_to_abort->mfi_callback = orig_cb; 854 855 if (ret != DDI_SUCCESS) 856 goto out; 857 858 /* Wake up anyone waiting on the aborted command. */ 859 if (mfi_to_abort->mfi_callback != NULL) 860 mfi_to_abort->mfi_callback(lmrc, mfi_to_abort); 861 862 out: 863 lmrc_put_mfi(mfi); 864 ASSERT(mutex_owned(&mfi_to_abort->mfi_lock)); 865 return (ret); 866 } 867 868 869 /* 870 * Controller Initialization and Housekeeping 871 */ 872 873 /* 874 * lmrc_check_fw_fault 875 * 876 * Check the firmware state. If faulted, return B_TRUE. 877 * Return B_FALSE otherwise. 878 */ 879 static boolean_t 880 lmrc_check_fw_fault(lmrc_t *lmrc) 881 { 882 uint32_t status = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD0_OFFSET); 883 uint32_t fw_state = LMRC_FW_STATE(status); 884 885 if (fw_state == LMRC_FW_STATE_FAULT) 886 return (B_TRUE); 887 888 return (B_FALSE); 889 } 890 891 /* 892 * lmrc_wait_for_reg 893 * 894 * Repeatedly read the register and check that 'bits' match 'exp'. 895 */ 896 static boolean_t 897 lmrc_wait_for_reg(lmrc_t *lmrc, uint32_t reg, uint32_t bits, uint32_t exp, 898 uint64_t max_wait) 899 { 900 uint32_t val; 901 uint64_t i; 902 903 max_wait *= MILLISEC / 100; 904 905 for (i = 0; i < max_wait; i++) { 906 delay(drv_usectohz(100 * MILLISEC)); 907 val = lmrc_read_reg(lmrc, reg); 908 909 if ((val & bits) == exp) 910 return (B_TRUE); 911 } 912 913 return (B_FALSE); 914 } 915 916 static int 917 lmrc_hard_reset(lmrc_t *lmrc) 918 { 919 int ret = DDI_SUCCESS; 920 921 /* Write the reset key sequence. */ 922 lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET, 923 MPI2_WRSEQ_FLUSH_KEY_VALUE); 924 lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET, 925 MPI2_WRSEQ_1ST_KEY_VALUE); 926 lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET, 927 MPI2_WRSEQ_2ND_KEY_VALUE); 928 lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET, 929 MPI2_WRSEQ_3RD_KEY_VALUE); 930 lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET, 931 MPI2_WRSEQ_4TH_KEY_VALUE); 932 lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET, 933 MPI2_WRSEQ_5TH_KEY_VALUE); 934 lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET, 935 MPI2_WRSEQ_6TH_KEY_VALUE); 936 937 /* Check diag write enable. */ 938 if (!lmrc_wait_for_reg(lmrc, MPI2_HOST_DIAGNOSTIC_OFFSET, 939 MPI2_DIAG_DIAG_WRITE_ENABLE, MPI2_DIAG_DIAG_WRITE_ENABLE, 940 LMRC_RESET_TIMEOUT)) { 941 dev_err(lmrc->l_dip, CE_WARN, "diag unlock failed"); 942 return (DDI_FAILURE); 943 } 944 945 /* Reset IOC. */ 946 lmrc_write_reg(lmrc, MPI2_HOST_DIAGNOSTIC_OFFSET, 947 lmrc_read_reg(lmrc, MPI2_HOST_DIAGNOSTIC_OFFSET) | 948 MPI2_DIAG_RESET_ADAPTER); 949 delay(drv_usectohz(MPI2_HARD_RESET_PCIE_FIRST_READ_DELAY_MICRO_SEC)); 950 951 /* Check the reset adapter bit. */ 952 if ((lmrc_read_reg(lmrc, MPI2_HOST_DIAGNOSTIC_OFFSET) & 953 MPI2_DIAG_RESET_ADAPTER) == 0) 954 goto out; 955 956 delay(drv_usectohz(MPI2_HARD_RESET_PCIE_SECOND_READ_DELAY_MICRO_SEC)); 957 958 /* Check the reset adapter bit again. */ 959 if ((lmrc_read_reg(lmrc, MPI2_HOST_DIAGNOSTIC_OFFSET) & 960 MPI2_DIAG_RESET_ADAPTER) == 0) 961 goto out; 962 963 ret = DDI_FAILURE; 964 out: 965 lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET, 966 MPI2_WRSEQ_FLUSH_KEY_VALUE); 967 return (ret); 968 } 969 970 /* 971 * lmrc_reset_ctrl 972 * 973 * Attempt to reset the controller, if the hardware supports it. 974 * If reset is unsupported or the reset fails repeatedly, we shut the 975 * controller down. 976 */ 977 static int 978 lmrc_reset_ctrl(lmrc_t *lmrc) 979 { 980 uint32_t status, fw_state, reset_adapter; 981 int max_wait, i; 982 983 if (lmrc->l_disable_online_ctrl_reset) 984 return (DDI_FAILURE); 985 986 status = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD0_OFFSET); 987 fw_state = LMRC_FW_STATE(status); 988 reset_adapter = LMRC_FW_RESET_ADAPTER(status); 989 990 if (fw_state == LMRC_FW_STATE_FAULT && reset_adapter == 0) { 991 dev_err(lmrc->l_dip, CE_WARN, 992 "FW in fault state, but reset not supported"); 993 goto out; 994 } 995 996 for (i = 0; i < LMRC_MAX_RESET_TRIES; i++) { 997 dev_err(lmrc->l_dip, CE_WARN, "resetting..."); 998 999 if (lmrc_hard_reset(lmrc) != DDI_SUCCESS) 1000 continue; 1001 1002 /* Wait for the FW state to move beyond INIT. */ 1003 max_wait = LMRC_IO_TIMEOUT * MILLISEC / 100; 1004 do { 1005 status = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD0_OFFSET); 1006 fw_state = LMRC_FW_STATE(status); 1007 1008 if (fw_state <= LMRC_FW_STATE_FW_INIT) 1009 delay(drv_usectohz(100 * MILLISEC)); 1010 } while (fw_state <= LMRC_FW_STATE_FW_INIT && max_wait > 0); 1011 1012 if (fw_state <= LMRC_FW_STATE_FW_INIT) { 1013 dev_err(lmrc->l_dip, CE_WARN, 1014 "fw state <= LMRC_FW_STATE_FW_INIT, state = %x", 1015 fw_state); 1016 continue; 1017 } 1018 1019 return (DDI_SUCCESS); 1020 } 1021 1022 dev_err(lmrc->l_dip, CE_WARN, "reset failed"); 1023 out: 1024 /* Stop the controller. */ 1025 lmrc_write_reg(lmrc, MPI2_DOORBELL_OFFSET, MFI_STOP_ADP); 1026 (void) lmrc_read_reg(lmrc, MPI2_DOORBELL_OFFSET); 1027 1028 return (DDI_FAILURE); 1029 } 1030 1031 /* 1032 * lmrc_tgt_complete_cmd 1033 * 1034 * In case of a controller reset, complete the cmd and clean up. This is done 1035 * in a taskq to avoid locking and list manipulation headaches. 1036 */ 1037 static void 1038 lmrc_tgt_complete_cmd(void *arg) 1039 { 1040 lmrc_scsa_cmd_t *cmd = arg; 1041 struct scsi_pkt *pkt; 1042 lmrc_t *lmrc; 1043 1044 mutex_enter(&cmd->sc_mpt->mpt_lock); 1045 1046 /* Just in case the command completed before the taskq was run... */ 1047 if (cmd->sc_mpt->mpt_complete) { 1048 mutex_exit(&cmd->sc_mpt->mpt_lock); 1049 return; 1050 } 1051 1052 lmrc = cmd->sc_mpt->mpt_lmrc; 1053 pkt = cmd->sc_mpt->mpt_pkt; 1054 1055 pkt->pkt_state = STATE_GOT_BUS | STATE_GOT_TARGET | STATE_SENT_CMD; 1056 pkt->pkt_reason = CMD_RESET; 1057 pkt->pkt_statistics = STAT_BUS_RESET; 1058 mutex_exit(&cmd->sc_mpt->mpt_lock); 1059 1060 lmrc_tgt_rem_active_mpt(cmd->sc_tgt, cmd->sc_mpt); 1061 atomic_dec_uint(&lmrc->l_fw_outstanding_cmds); 1062 1063 scsi_hba_pkt_comp(pkt); 1064 } 1065 1066 /* 1067 * lmrc_tgt_complete_cmds 1068 * 1069 * Walk the list of active commands of a target. Schedule a taskq to handle the 1070 * timeout processing and clean up. 1071 */ 1072 static void 1073 lmrc_tgt_complete_cmds(lmrc_t *lmrc, lmrc_tgt_t *tgt) 1074 { 1075 lmrc_mpt_cmd_t *mpt; 1076 1077 mutex_enter(&tgt->tgt_mpt_active_lock); 1078 if (list_is_empty(&tgt->tgt_mpt_active)) { 1079 mutex_exit(&tgt->tgt_mpt_active_lock); 1080 return; 1081 } 1082 1083 for (mpt = lmrc_tgt_first_active_mpt(tgt); 1084 mpt != NULL; 1085 mpt = lmrc_tgt_next_active_mpt(tgt, mpt)) { 1086 lmrc_scsa_cmd_t *cmd = mpt->mpt_pkt->pkt_ha_private; 1087 1088 ASSERT(mutex_owned(&mpt->mpt_lock)); 1089 VERIFY(mpt->mpt_pkt != NULL); 1090 VERIFY(cmd != NULL); 1091 1092 if (mpt->mpt_complete) 1093 continue; 1094 1095 taskq_dispatch_ent(lmrc->l_taskq, lmrc_tgt_complete_cmd, cmd, 1096 TQ_NOSLEEP, &mpt->mpt_tqent); 1097 } 1098 mutex_exit(&tgt->tgt_mpt_active_lock); 1099 } 1100 1101 /* 1102 * lmrc_tgt_timeout_cmds 1103 * 1104 * Walk the list of active commands of a target. Try to abort commands which are 1105 * overdue. 1106 */ 1107 static int 1108 lmrc_tgt_timeout_cmds(lmrc_t *lmrc, lmrc_tgt_t *tgt) 1109 { 1110 lmrc_mpt_cmd_t *mpt; 1111 int ret = DDI_SUCCESS; 1112 1113 mutex_enter(&tgt->tgt_mpt_active_lock); 1114 if (list_is_empty(&tgt->tgt_mpt_active)) 1115 goto out; 1116 1117 for (mpt = lmrc_tgt_first_active_mpt(tgt); 1118 mpt != NULL; 1119 mpt = lmrc_tgt_next_active_mpt(tgt, mpt)) { 1120 hrtime_t now; 1121 1122 ASSERT(mutex_owned(&mpt->mpt_lock)); 1123 VERIFY(mpt->mpt_pkt != NULL); 1124 1125 /* Just in case the command completed by now... */ 1126 if (mpt->mpt_complete) 1127 continue; 1128 1129 now = gethrtime(); 1130 1131 if (now > mpt->mpt_timeout) { 1132 /* 1133 * Give the packet a bit more time for the abort to 1134 * complete. 1135 */ 1136 mpt->mpt_timeout = now + LMRC_IO_TIMEOUT * NANOSEC; 1137 1138 /* 1139 * If the abort failed for whatever reason, 1140 * we can stop here as only a controller reset 1141 * can get us back into a sane state. 1142 */ 1143 if (lmrc_abort_mpt(lmrc, tgt, mpt) != 1) { 1144 mutex_exit(&mpt->mpt_lock); 1145 ret = DDI_FAILURE; 1146 goto out; 1147 } 1148 } 1149 } 1150 1151 out: 1152 mutex_exit(&tgt->tgt_mpt_active_lock); 1153 return (ret); 1154 } 1155 1156 /* 1157 * lmrc_thread 1158 * 1159 * Check whether the controller is FW fault state. Check all targets for 1160 * commands which have timed out. 1161 */ 1162 void 1163 lmrc_thread(void *arg) 1164 { 1165 lmrc_t *lmrc = arg; 1166 1167 do { 1168 int i; 1169 1170 /* Wake up at least once a minute. */ 1171 mutex_enter(&lmrc->l_thread_lock); 1172 (void) cv_reltimedwait(&lmrc->l_thread_cv, &lmrc->l_thread_lock, 1173 drv_usectohz(60 * MICROSEC), TR_SEC); 1174 mutex_exit(&lmrc->l_thread_lock); 1175 1176 if (lmrc->l_thread_stop) 1177 continue; 1178 1179 lmrc->l_fw_fault = lmrc_check_fw_fault(lmrc); 1180 1181 /* 1182 * Check all targets for timed-out commands. If we find any 1183 * and fail to abort them, we pretend the FW has faulted to 1184 * trigger a reset. 1185 */ 1186 if (!lmrc->l_fw_fault) { 1187 for (i = 0; i < ARRAY_SIZE(lmrc->l_targets); i++) { 1188 if (lmrc_tgt_timeout_cmds(lmrc, 1189 &lmrc->l_targets[i]) != DDI_SUCCESS) { 1190 lmrc->l_fw_fault = B_TRUE; 1191 break; 1192 } 1193 } 1194 } 1195 1196 /* 1197 * If the FW is faulted, try to recover by performing a reset. 1198 */ 1199 if (lmrc->l_fw_fault) { 1200 int ret; 1201 1202 lmrc_disable_intr(lmrc); 1203 1204 /* 1205 * Even if the reset failed, it will have stopped the 1206 * controller and we can complete all outstanding 1207 * commands. 1208 */ 1209 ret = lmrc_reset_ctrl(lmrc); 1210 1211 (void) lmrc_abort_outstanding_mfi(lmrc, 1212 LMRC_MAX_MFI_CMDS); 1213 1214 for (i = 0; i < ARRAY_SIZE(lmrc->l_targets); i++) 1215 lmrc_tgt_complete_cmds(lmrc, 1216 &lmrc->l_targets[i]); 1217 1218 if (ret != DDI_SUCCESS) { 1219 dev_err(lmrc->l_dip, CE_WARN, "reset failed"); 1220 continue; 1221 } 1222 1223 if (lmrc_transition_to_ready(lmrc) != DDI_SUCCESS) 1224 continue; 1225 1226 if (lmrc_ioc_init(lmrc) != DDI_SUCCESS) 1227 continue; 1228 1229 lmrc_enable_intr(lmrc); 1230 1231 if (lmrc_start_aen(lmrc) != DDI_SUCCESS) { 1232 dev_err(lmrc->l_dip, CE_WARN, 1233 "failed to re-initiate AEN"); 1234 continue; 1235 } 1236 1237 lmrc->l_fw_fault = lmrc_check_fw_fault(lmrc); 1238 } 1239 } while (!lmrc->l_thread_stop); 1240 1241 thread_exit(); 1242 } 1243 1244 /* 1245 * lmrc_transition_to_ready 1246 * 1247 * Move firmware to ready state. At attach time, the FW can potentially be in 1248 * any one of several possible states. If the FW is in operational, waiting-for- 1249 * handshake states, take steps to bring it to ready state. Otherwise, wait for 1250 * the FW to reach ready state. 1251 */ 1252 static int 1253 lmrc_transition_to_ready(lmrc_t *lmrc) 1254 { 1255 uint32_t status, new_status; 1256 uint32_t fw_state; 1257 uint8_t max_wait; 1258 uint_t i; 1259 1260 status = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD0_OFFSET); 1261 fw_state = LMRC_FW_STATE(status); 1262 max_wait = LMRC_RESET_TIMEOUT; 1263 1264 while (fw_state != LMRC_FW_STATE_READY) { 1265 switch (fw_state) { 1266 case LMRC_FW_STATE_FAULT: 1267 dev_err(lmrc->l_dip, CE_NOTE, "FW is in fault state!"); 1268 if (lmrc_reset_ctrl(lmrc) != DDI_SUCCESS) 1269 return (DDI_FAILURE); 1270 break; 1271 1272 case LMRC_FW_STATE_WAIT_HANDSHAKE: 1273 /* Set the CLR bit in inbound doorbell */ 1274 lmrc_write_reg(lmrc, MPI2_DOORBELL_OFFSET, 1275 MFI_INIT_CLEAR_HANDSHAKE | MFI_INIT_HOTPLUG); 1276 break; 1277 1278 case LMRC_FW_STATE_BOOT_MSG_PENDING: 1279 lmrc_write_reg(lmrc, MPI2_DOORBELL_OFFSET, 1280 MFI_INIT_HOTPLUG); 1281 break; 1282 1283 case LMRC_FW_STATE_OPERATIONAL: 1284 /* Bring it to READY state, wait up to 10s */ 1285 lmrc_disable_intr(lmrc); 1286 lmrc_write_reg(lmrc, MPI2_DOORBELL_OFFSET, 1287 MFI_RESET_FLAGS); 1288 (void) lmrc_wait_for_reg(lmrc, MPI2_DOORBELL_OFFSET, 1, 1289 0, 10); 1290 break; 1291 1292 case LMRC_FW_STATE_UNDEFINED: 1293 /* This state should not last for more than 2 sec */ 1294 case LMRC_FW_STATE_BB_INIT: 1295 case LMRC_FW_STATE_FW_INIT: 1296 case LMRC_FW_STATE_FW_INIT_2: 1297 case LMRC_FW_STATE_DEVICE_SCAN: 1298 case LMRC_FW_STATE_FLUSH_CACHE: 1299 break; 1300 default: 1301 dev_err(lmrc->l_dip, CE_WARN, "Unknown FW state %x", 1302 fw_state); 1303 return (DDI_FAILURE); 1304 } 1305 1306 /* 1307 * The current state should not last for more than max_wait 1308 * seconds. 1309 */ 1310 for (i = 0; i < max_wait * 1000; i++) { 1311 new_status = lmrc_read_reg(lmrc, 1312 MPI26_SCRATCHPAD0_OFFSET); 1313 1314 if (status != new_status) 1315 break; 1316 1317 delay(drv_usectohz(MILLISEC)); 1318 } 1319 1320 if (new_status == status) { 1321 dev_err(lmrc->l_dip, CE_WARN, 1322 "FW state (%x) hasn't changed in %d seconds", 1323 fw_state, max_wait); 1324 return (DDI_FAILURE); 1325 } 1326 1327 status = new_status; 1328 fw_state = LMRC_FW_STATE(status); 1329 } 1330 1331 if (lmrc_check_acc_handle(lmrc->l_reghandle) != DDI_FM_OK) 1332 return (DDI_FAILURE); 1333 1334 return (DDI_SUCCESS); 1335 } 1336 1337 /* 1338 * lmrc_adapter_init 1339 * 1340 * Get the hardware and firmware into a usable state, and fetch some basic 1341 * information from the registers to calculate sizes of basic data structures. 1342 */ 1343 int 1344 lmrc_adapter_init(lmrc_t *lmrc) 1345 { 1346 uint32_t reg; 1347 int ret; 1348 int i; 1349 1350 ret = lmrc_transition_to_ready(lmrc); 1351 if (ret != DDI_SUCCESS) 1352 return (ret); 1353 1354 /* 1355 * Get maximum RAID map size. 1356 */ 1357 reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD2_OFFSET); 1358 lmrc->l_max_raid_map_sz = LMRC_MAX_RAID_MAP_SZ(reg); 1359 1360 lmrc->l_max_reply_queues = 1; 1361 lmrc->l_rphi[0] = MPI2_REPLY_POST_HOST_INDEX_OFFSET; 1362 1363 /* 1364 * Apparently, bit 27 of the scratch pad register indicates whether 1365 * MSI-X is supported by the firmware. 1366 */ 1367 reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD0_OFFSET); 1368 1369 if (LMRC_FW_MSIX_ENABLED(reg)) { 1370 lmrc->l_fw_msix_enabled = B_TRUE; 1371 1372 reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD1_OFFSET); 1373 lmrc->l_max_reply_queues = LMRC_MAX_REPLY_QUEUES_EXT(reg); 1374 1375 if (lmrc->l_max_reply_queues > LMRC_MAX_REPLY_POST_HOST_INDEX) { 1376 lmrc->l_msix_combined = B_TRUE; 1377 lmrc->l_rphi[0] = 1378 MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET; 1379 } 1380 1381 /* 1382 * Compute reply post index register addresses 1-15. 1383 */ 1384 for (i = 1; i < LMRC_MAX_REPLY_POST_HOST_INDEX; i++) { 1385 lmrc->l_rphi[i] = i * 0x10 + 1386 MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET; 1387 } 1388 } 1389 1390 /* 1391 * Get the number of commands the firmware supports. Use one less, 1392 * because reply_q_depth is based on one more than this. XXX: Why? 1393 */ 1394 reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD0_OFFSET); 1395 lmrc->l_max_fw_cmds = LMRC_FW_MAX_CMD(reg) - 1; 1396 1397 if (lmrc->l_max_fw_cmds < LMRC_MAX_MFI_CMDS) { 1398 dev_err(lmrc->l_dip, CE_WARN, "!max_fw_cmds too low: %d", 1399 lmrc->l_max_fw_cmds); 1400 return (DDI_FAILURE); 1401 } 1402 1403 /* 1404 * Reserve some commands for MFI, the remainder is for SCSI commands. 1405 */ 1406 lmrc->l_max_scsi_cmds = lmrc->l_max_fw_cmds - LMRC_MAX_MFI_CMDS; 1407 1408 /* 1409 * XXX: This magic calculation isn't explained anywhere. Let's see... 1410 * lmrc_max_fw_cmds + 1 gives us what was reported in the register, 1411 * That + 15 is for rounding it up the next multiple of 16, which 1412 * / 16 * 16 does. 1413 * And apparently we want twice that much for queue depth. Why? 1414 * 1415 * So in reality, the queue depth is based on at least one more than 1416 * lmrc_max_fw_cmds, but it could be even more. That makes the above 1417 * statement about lmrc_max_fw_cmds questionable. 1418 */ 1419 lmrc->l_reply_q_depth = (lmrc->l_max_fw_cmds + 1 + 15) / 16 * 16 * 2; 1420 1421 /* Allocation size of one reply queue, based on depth. */ 1422 lmrc->l_reply_alloc_sz = 1423 sizeof (Mpi2ReplyDescriptorsUnion_t) * lmrc->l_reply_q_depth; 1424 1425 /* Allocation size of the DMA memory used for all MPI I/O frames. */ 1426 lmrc->l_io_frames_alloc_sz = LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE * 1427 (lmrc->l_max_fw_cmds + 2); 1428 1429 /* 1430 * If LMRC_EXT_CHAIN_SIZE_SUPPORT is set in scratch pad 1, firmware 1431 * supports an extended IO chain frame which is 4 times the size of a 1432 * legacy firmware frame. 1433 * Legacy Firmware frame size is (8 * 128) = 1K 1434 * 1M IO Firmware frame size is (8 * 128 * 4) = 4K 1435 */ 1436 reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD1_OFFSET); 1437 lmrc->l_max_chain_frame_sz = LMRC_MAX_CHAIN_SIZE(reg) * 1438 (LMRC_EXT_CHAIN_SIZE_SUPPORT(reg) ? LMRC_1MB_IO : LMRC_256K_IO); 1439 1440 /* 1441 * Check whether the controller supports DMA to the full 64bit address 1442 * space. 1443 */ 1444 lmrc->l_64bit_dma_support = LMRC_64BIT_DMA_SUPPORT(reg); 1445 1446 /* 1447 * We use a I/O frame size of 256 bytes, that is what 1448 * LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE is set to. 1449 * 1450 * The offset of the SGL in the I/O frame is 128, so 1451 * there are 128 bytes left for 8 SGEs of 16 bytes each. 1452 */ 1453 lmrc->l_max_sge_in_main_msg = 1454 (LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE - 1455 offsetof(Mpi25SCSIIORequest_t, SGL)) / sizeof (Mpi25SGEIOUnion_t); 1456 1457 /* 1458 * Similarly, number of SGE in a SGE chain frame. 1459 */ 1460 lmrc->l_max_sge_in_chain = 1461 lmrc->l_max_chain_frame_sz / sizeof (Mpi25SGEIOUnion_t); 1462 1463 /* 1464 * The total number of SGE we support in a transfer is sum of 1465 * the above two, minus one for the link (last SGE in main msg). 1466 * 1467 * XXX: So why -2? 1468 */ 1469 lmrc->l_max_num_sge = 1470 lmrc->l_max_sge_in_main_msg + lmrc->l_max_sge_in_chain - 2; 1471 1472 /* 1473 * The offset of the last SGE in the I/O request, used for linking 1474 * the SGE chain frame if necessary. 1475 */ 1476 lmrc->l_chain_offset_io_request = 1477 (LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE - 1478 sizeof (Mpi25SGEIOUnion_t)) / sizeof (Mpi25SGEIOUnion_t); 1479 1480 /* 1481 * For MFI passthru, the link to the SGE chain frame is always 1482 * the first SGE in the I/O frame, the other SGEs in the I/O frame 1483 * will not be used. 1484 */ 1485 lmrc->l_chain_offset_mfi_pthru = 1486 offsetof(Mpi25SCSIIORequest_t, SGL) / sizeof (Mpi25SGEIOUnion_t); 1487 1488 1489 reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD3_OFFSET); 1490 if (LMRC_NVME_PAGE_SHIFT(reg) > LMRC_DEFAULT_NVME_PAGE_SHIFT) { 1491 lmrc->l_nvme_page_sz = 1 << LMRC_NVME_PAGE_SHIFT(reg); 1492 dev_err(lmrc->l_dip, CE_NOTE, "!NVME page size: %ld", 1493 lmrc->l_nvme_page_sz); 1494 } 1495 1496 reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD1_OFFSET); 1497 lmrc->l_fw_sync_cache_support = LMRC_SYNC_CACHE_SUPPORT(reg); 1498 1499 if (lmrc->l_class == LMRC_ACLASS_AERO) { 1500 reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD1_OFFSET); 1501 lmrc->l_atomic_desc_support = 1502 LMRC_ATOMIC_DESCRIPTOR_SUPPORT(reg); 1503 } 1504 1505 return (DDI_SUCCESS); 1506 } 1507 1508 /* 1509 * lmrc_ioc_init 1510 * 1511 * Manually build a MFI IOC INIT command to setup basic operating parameters 1512 * such as the DMA parameters for the I/O request frames and the reply post 1513 * queues. Send the IOC INIT command using a special request descriptor which 1514 * directly includes the physical address of the MFI command frame. 1515 * 1516 * After this command completes, the controller is ready to accept MPT commands 1517 * using the normal method of placing it in the I/O request DMA memory and 1518 * writing a MPT request descripter to the appropriate registers. 1519 */ 1520 int 1521 lmrc_ioc_init(lmrc_t *lmrc) 1522 { 1523 lmrc_mfi_cmd_t *mfi = lmrc_get_mfi(lmrc); 1524 lmrc_mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr; 1525 lmrc_mfi_init_payload_t *init = &mfi->mfi_frame->mf_init; 1526 lmrc_req_desc_t req_desc; 1527 Mpi2IOCInitRequest_t *IOCInitMsg; 1528 lmrc_dma_t dma; 1529 int ret = DDI_SUCCESS; 1530 1531 ret = lmrc_dma_alloc(lmrc, lmrc->l_dma_attr, &dma, 1532 sizeof (Mpi2IOCInitRequest_t), 256, DDI_DMA_CONSISTENT); 1533 if (ret != DDI_SUCCESS) { 1534 lmrc_put_mfi(mfi); 1535 dev_err(lmrc->l_dip, CE_WARN, 1536 "!%s: failed to allocate IOC command", __func__); 1537 return (DDI_FAILURE); 1538 } 1539 1540 IOCInitMsg = dma.ld_buf; 1541 IOCInitMsg->Function = MPI2_FUNCTION_IOC_INIT; 1542 IOCInitMsg->WhoInit = MPI2_WHOINIT_HOST_DRIVER; 1543 IOCInitMsg->MsgVersion = MPI2_VERSION; 1544 IOCInitMsg->HeaderVersion = MPI2_HEADER_VERSION; 1545 IOCInitMsg->SystemRequestFrameSize = 1546 LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE / 4; 1547 IOCInitMsg->ReplyDescriptorPostQueueDepth = lmrc->l_reply_q_depth; 1548 lmrc_dma_set_addr64(&lmrc->l_reply_dma, 1549 (uint64_t *)&IOCInitMsg->ReplyDescriptorPostQueueAddress); 1550 lmrc_dma_set_addr64(&lmrc->l_ioreq_dma, 1551 (uint64_t *)&IOCInitMsg->SystemRequestFrameBaseAddress); 1552 IOCInitMsg->HostMSIxVectors = lmrc->l_max_reply_queues; 1553 /* XXX: Why NVMe? */ 1554 IOCInitMsg->HostPageSize = LMRC_DEFAULT_NVME_PAGE_SHIFT; 1555 1556 hdr->mh_cmd = MFI_CMD_INIT; 1557 hdr->mh_cmd_status = MFI_STAT_INVALID_STATUS; 1558 hdr->mh_flags = MFI_FRAME_DONT_POST_IN_REPLY_QUEUE; 1559 1560 hdr->mh_drv_opts.mc_support_additional_msix = 1; 1561 hdr->mh_drv_opts.mc_support_max_255lds = 1; 1562 hdr->mh_drv_opts.mc_support_ndrive_r1_lb = 1; 1563 hdr->mh_drv_opts.mc_support_security_protocol_cmds_fw = 1; 1564 hdr->mh_drv_opts.mc_support_ext_io_size = 1; 1565 1566 hdr->mh_data_xfer_len = lmrc_dma_get_size(&dma); 1567 1568 lmrc_dma_set_addr64(&dma, &init->mi_queue_info_new_phys_addr); 1569 1570 lmrc_dma_set_addr64(&mfi->mfi_frame_dma, &req_desc.rd_reg); 1571 VERIFY0(req_desc.rd_mfa_io.RequestFlags); 1572 req_desc.rd_mfa_io.RequestFlags = LMRC_REQ_DESCRIPT_FLAGS_MFA; 1573 1574 lmrc_disable_intr(lmrc); 1575 if (!lmrc_wait_for_reg(lmrc, MPI2_DOORBELL_OFFSET, 1, 0, 10)) 1576 return (DDI_FAILURE); 1577 1578 (void) ddi_dma_sync(dma.ld_hdl, 0, dma.ld_len, DDI_DMA_SYNC_FORDEV); 1579 (void) ddi_dma_sync(mfi->mfi_frame_dma.ld_hdl, 0, 1580 mfi->mfi_frame_dma.ld_len, DDI_DMA_SYNC_FORDEV); 1581 1582 lmrc_send_request(lmrc, req_desc); 1583 1584 mutex_enter(&mfi->mfi_lock); 1585 ret = lmrc_poll_mfi(lmrc, mfi, LMRC_INTERNAL_CMD_WAIT_TIME); 1586 mutex_exit(&mfi->mfi_lock); 1587 1588 if (ret != DDI_SUCCESS) { 1589 if (hdr->mh_cmd_status != MFI_STAT_INVALID_STATUS) 1590 dev_err(lmrc->l_dip, CE_WARN, 1591 "!IOC Init failed, status = 0x%x", 1592 hdr->mh_cmd_status); 1593 } 1594 1595 lmrc_dma_free(&dma); 1596 lmrc_put_mfi(mfi); 1597 1598 return (ret); 1599 } 1600 1601 /* 1602 * lmrc_get_ctrl_info 1603 * 1604 * Build a MFI DCMD to get controller information from FW. Update the copy in 1605 * the soft state. 1606 */ 1607 static int 1608 lmrc_get_ctrl_info(lmrc_t *lmrc) 1609 { 1610 lmrc_ctrl_info_t *ci = lmrc->l_ctrl_info; 1611 lmrc_mfi_cmd_t *mfi; 1612 int ret; 1613 1614 mfi = lmrc_get_dcmd(lmrc, MFI_FRAME_DIR_READ, LMRC_DCMD_CTRL_GET_INFO, 1615 sizeof (lmrc_ctrl_info_t), 1); 1616 1617 if (mfi == NULL) 1618 return (DDI_FAILURE); 1619 1620 ret = lmrc_issue_blocked_mfi(lmrc, mfi); 1621 1622 if (ret != DDI_SUCCESS) 1623 goto out; 1624 1625 (void) ddi_dma_sync(mfi->mfi_data_dma.ld_hdl, 0, 1626 mfi->mfi_data_dma.ld_len, DDI_DMA_SYNC_FORKERNEL); 1627 bcopy(mfi->mfi_data_dma.ld_buf, ci, sizeof (lmrc_ctrl_info_t)); 1628 1629 out: 1630 lmrc_put_dcmd(lmrc, mfi); 1631 return (ret); 1632 } 1633 1634 /* 1635 * lmrc_fw_init 1636 * 1637 * Complete firmware initialization. At this point, we can already send MFI 1638 * commands. so we can start by getting the controller information from the 1639 * firmware and set up things in our soft state. Next we issue the commands 1640 * to get the PD map and RAID map, which will complete asynchronously when 1641 * new information is available and then re-send themselves. 1642 */ 1643 int 1644 lmrc_fw_init(lmrc_t *lmrc) 1645 { 1646 int drv_max_lds = LMRC_MAX_LOGICAL_DRIVES; 1647 lmrc_ctrl_info_t *ci = lmrc->l_ctrl_info; 1648 int ret; 1649 1650 ret = lmrc_get_ctrl_info(lmrc); 1651 if (ret != DDI_SUCCESS) { 1652 dev_err(lmrc->l_dip, CE_WARN, "!Unable to get FW ctrl info."); 1653 return (DDI_FAILURE); 1654 } 1655 1656 lmrc->l_disable_online_ctrl_reset = 1657 ci->ci_prop.cp_disable_online_ctrl_reset == 1; 1658 1659 lmrc->l_max_256_vd_support = 1660 ci->ci_adapter_opts3.ao3_support_max_ext_lds == 1; 1661 1662 if (ci->ci_max_lds > 64) { 1663 lmrc->l_max_256_vd_support = B_TRUE; 1664 drv_max_lds = LMRC_MAX_LOGICAL_DRIVES_EXT; 1665 } 1666 1667 lmrc->l_fw_supported_vd_count = min(ci->ci_max_lds, drv_max_lds); 1668 1669 lmrc->l_fw_supported_pd_count = min(ci->ci_max_pds, LMRC_MAX_PHYS_DEV); 1670 1671 lmrc->l_max_map_sz = lmrc->l_current_map_sz = 1672 lmrc->l_max_raid_map_sz * LMRC_MIN_MAP_SIZE; 1673 1674 lmrc->l_use_seqnum_jbod_fp = 1675 ci->ci_adapter_opts3.ao3_use_seq_num_jbod_FP != 0; 1676 1677 lmrc->l_pdmap_tgtid_support = 1678 ci->ci_adapter_opts4.ao4_support_pd_map_target_id != 0; 1679 1680 return (DDI_SUCCESS); 1681 } 1682 1683 1684 /* 1685 * lmrc_ctrl_shutdown 1686 * 1687 * Called by lmrc_quiesce() to send a shutdown command to the controller. 1688 * Cannot use locks, therefore cannot use lmrc_get_dcmd() or lmrc_get_mfi(). 1689 */ 1690 int 1691 lmrc_ctrl_shutdown(lmrc_t *lmrc) 1692 { 1693 lmrc_mfi_cmd_t *mfi = list_remove_head(&lmrc->l_mfi_cmd_list); 1694 lmrc_mfi_header_t *hdr; 1695 lmrc_mfi_dcmd_payload_t *dcmd; 1696 1697 if (mfi == NULL) 1698 return (DDI_FAILURE); 1699 1700 hdr = &mfi->mfi_frame->mf_hdr; 1701 dcmd = &mfi->mfi_frame->mf_dcmd; 1702 1703 hdr->mh_cmd = MFI_CMD_DCMD; 1704 hdr->mh_flags = MFI_FRAME_DONT_POST_IN_REPLY_QUEUE; 1705 dcmd->md_opcode = LMRC_DCMD_CTRL_SHUTDOWN; 1706 1707 lmrc_disable_intr(lmrc); 1708 lmrc_issue_mfi(lmrc, mfi, NULL); 1709 1710 return (DDI_SUCCESS); 1711 } 1712 1713 /* 1714 * driver target state management 1715 * 1716 * The soft state of the controller instance keeps a pre-allocated array of 1717 * target structures for all possible targets, even though only a small number 1718 * of them are likely to be used. Each target structure contains back link to 1719 * the soft state and a mutex, which are never cleared or changed when a target 1720 * is added or removed. 1721 */ 1722 1723 /* 1724 * lmrc_tgt_init 1725 * 1726 * Initialize the tgt structure for a newly discovered tgt. The same tgt 1727 * structure is used for PDs and LDs, the distinction can be made by the 1728 * presence or absence of tgt_pd_info. LDs are always of type disk, the type 1729 * of PDs is taken from their pd_info. If a device has no SAS WWN, we'll fake 1730 * the interconnect type to be PARALLEL to make sure device address isn't 1731 * misunderstood as a WWN by devfsadm. 1732 */ 1733 void 1734 lmrc_tgt_init(lmrc_tgt_t *tgt, uint16_t dev_id, char *addr, 1735 lmrc_pd_info_t *pd_info) 1736 { 1737 rw_enter(&tgt->tgt_lock, RW_WRITER); 1738 1739 bzero(&tgt->tgt_dev_id, 1740 sizeof (lmrc_tgt_t) - offsetof(lmrc_tgt_t, tgt_dev_id)); 1741 1742 tgt->tgt_dev_id = dev_id; 1743 tgt->tgt_pd_info = pd_info; 1744 tgt->tgt_interconnect_type = INTERCONNECT_SAS; 1745 1746 if (pd_info == NULL) { 1747 tgt->tgt_type = DTYPE_DIRECT; 1748 } else { 1749 tgt->tgt_type = pd_info->pd_scsi_dev_type; 1750 } 1751 1752 (void) strlcpy(tgt->tgt_wwnstr, addr, sizeof (tgt->tgt_wwnstr)); 1753 if (scsi_wwnstr_to_wwn(tgt->tgt_wwnstr, &tgt->tgt_wwn) != DDI_SUCCESS) { 1754 tgt->tgt_interconnect_type = INTERCONNECT_PARALLEL; 1755 tgt->tgt_wwn = dev_id; 1756 } 1757 1758 rw_exit(&tgt->tgt_lock); 1759 } 1760 1761 /* 1762 * lmrc_tgt_clear 1763 * 1764 * Reset the tgt structure of a target which is no longer present. 1765 */ 1766 void 1767 lmrc_tgt_clear(lmrc_tgt_t *tgt) 1768 { 1769 rw_enter(&tgt->tgt_lock, RW_WRITER); 1770 1771 if (tgt->tgt_pd_info != NULL) 1772 kmem_free(tgt->tgt_pd_info, sizeof (lmrc_pd_info_t)); 1773 1774 bzero(&tgt->tgt_dev_id, 1775 sizeof (lmrc_tgt_t) - offsetof(lmrc_tgt_t, tgt_dev_id)); 1776 tgt->tgt_dev_id = LMRC_DEVHDL_INVALID; 1777 rw_exit(&tgt->tgt_lock); 1778 } 1779 1780 /* 1781 * lmrc_tgt_find 1782 * 1783 * Walk the target list and find a tgt matching the given scsi_device. 1784 * Return the tgt read-locked. The targets_lock mutex must be held the 1785 * whole time. 1786 */ 1787 lmrc_tgt_t * 1788 lmrc_tgt_find(lmrc_t *lmrc, struct scsi_device *sd) 1789 { 1790 const char *ua = scsi_device_unit_address(sd); 1791 char *comma, wwnstr[SCSI_WWN_BUFLEN]; 1792 uint64_t wwn; 1793 unsigned long tgtid; 1794 lmrc_tgt_t *tgt; 1795 size_t i; 1796 1797 VERIFY(ua != NULL); 1798 1799 (void) strlcpy(wwnstr, ua, sizeof (wwnstr)); 1800 1801 /* 1802 * If the unit address is a valid target ID and within range for 1803 * VD IDs, use that. 1804 */ 1805 if (ddi_strtoul(wwnstr, &comma, 10, &tgtid) == 0 && 1806 *comma == ',' && 1807 tgtid <= lmrc->l_fw_supported_vd_count) { 1808 tgt = &lmrc->l_targets[tgtid]; 1809 1810 rw_enter(&tgt->tgt_lock, RW_READER); 1811 if (tgt->tgt_dev_id == tgtid && 1812 tgt->tgt_wwn == tgtid) { 1813 return (tgt); 1814 } 1815 rw_exit(&tgt->tgt_lock); 1816 } 1817 1818 /* Chop off ",lun" as scsi_wwnstr_to_wwn() can't handle it. */ 1819 comma = strchr(wwnstr, ','); 1820 if (comma != NULL) 1821 *comma = '\0'; 1822 1823 /* Else, if unit address is a valid WWN, look for that. */ 1824 if (scsi_wwnstr_to_wwn(wwnstr, &wwn) == DDI_SUCCESS) { 1825 for (i = 0; i < ARRAY_SIZE(lmrc->l_targets); i++) { 1826 tgt = &lmrc->l_targets[i]; 1827 1828 rw_enter(&tgt->tgt_lock, RW_READER); 1829 if (tgt->tgt_wwn == wwn) { 1830 return (tgt); 1831 } 1832 rw_exit(&tgt->tgt_lock); 1833 } 1834 } else { 1835 /* Do it the hard way and compare wwnstr. */ 1836 for (i = 0; i < ARRAY_SIZE(lmrc->l_targets); i++) { 1837 tgt = &lmrc->l_targets[i]; 1838 1839 rw_enter(&tgt->tgt_lock, RW_READER); 1840 if (strcmp(tgt->tgt_wwnstr, wwnstr) == 0) { 1841 return (tgt); 1842 } 1843 rw_exit(&tgt->tgt_lock); 1844 } 1845 } 1846 1847 return (NULL); 1848 } 1849 1850 /* 1851 * MPT/MFI command management 1852 * 1853 * For each kind of command, MFI and MPT, the driver keeps an array of pre- 1854 * allocated and pre-initialized commands. Additionally, it keeps two lists of 1855 * currently unused commands. A set of functions is provided for each list to 1856 * get and put commands from/to the list. Commands are initialized during get(), 1857 * because having completed commands on the list can help in certain cases 1858 * during debugging. 1859 * 1860 * MPT commands in use for I/O are kept on a active command list of the target 1861 * they are addressing. All other types of commands are not kept on any list 1862 * while they are being processed by the hardware. When walking the command 1863 * arrays, busy commands not associated with a target can be distinguished by 1864 * not being linked on any list. 1865 */ 1866 1867 /* 1868 * lmrc_get_mpt 1869 * 1870 * Get a MPT command from the list and initialize it. Return the command locked. 1871 * Return NULL if the MPT command list is empty. 1872 */ 1873 lmrc_mpt_cmd_t * 1874 lmrc_get_mpt(lmrc_t *lmrc) 1875 { 1876 lmrc_mpt_cmd_t *mpt; 1877 Mpi25SCSIIORequest_t *io_req; 1878 1879 mutex_enter(&lmrc->l_mpt_cmd_lock); 1880 mpt = list_remove_head(&lmrc->l_mpt_cmd_list); 1881 mutex_exit(&lmrc->l_mpt_cmd_lock); 1882 if (mpt == NULL) 1883 return (NULL); 1884 1885 mutex_enter(&mpt->mpt_lock); 1886 bzero(mpt->mpt_io_frame, LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE); 1887 bzero(mpt->mpt_chain_dma.ld_buf, mpt->mpt_chain_dma.ld_len); 1888 bzero(mpt->mpt_sense_dma.ld_buf, mpt->mpt_sense_dma.ld_len); 1889 1890 mpt->mpt_mfi = NULL; 1891 mpt->mpt_pkt = NULL; 1892 1893 /* Set the offset of the SGL entries inside the MPT command. */ 1894 io_req = mpt->mpt_io_frame; 1895 io_req->SGLOffset0 = offsetof(Mpi25SCSIIORequest_t, SGL) / 4; 1896 1897 mpt->mpt_complete = B_FALSE; 1898 cv_init(&mpt->mpt_cv, NULL, CV_DRIVER, NULL); 1899 1900 return (mpt); 1901 } 1902 1903 /* 1904 * lmrc_put_mpt 1905 * 1906 * Put a MPT command back on the list. The command lock must be held when this 1907 * function is called, being unlocked only after the command has been put on 1908 * the free list. The command CV is destroyed, thereby asserting that no one is 1909 * still waiting on it. 1910 */ 1911 void 1912 lmrc_put_mpt(lmrc_mpt_cmd_t *mpt) 1913 { 1914 lmrc_t *lmrc = mpt->mpt_lmrc; 1915 1916 VERIFY(lmrc != NULL); 1917 1918 ASSERT0(list_link_active(&mpt->mpt_node)); 1919 ASSERT(mutex_owned(&mpt->mpt_lock)); 1920 cv_destroy(&mpt->mpt_cv); 1921 1922 mutex_enter(&lmrc->l_mpt_cmd_lock); 1923 list_insert_tail(&lmrc->l_mpt_cmd_list, mpt); 1924 mutex_exit(&lmrc->l_mpt_cmd_lock); 1925 mutex_exit(&mpt->mpt_lock); 1926 } 1927 1928 /* 1929 * lmrc_get_mfi 1930 * 1931 * Get a MFI command from the list and initialize it. 1932 */ 1933 lmrc_mfi_cmd_t * 1934 lmrc_get_mfi(lmrc_t *lmrc) 1935 { 1936 lmrc_mfi_cmd_t *mfi; 1937 1938 mutex_enter(&lmrc->l_mfi_cmd_lock); 1939 mfi = list_remove_head(&lmrc->l_mfi_cmd_list); 1940 mutex_exit(&lmrc->l_mfi_cmd_lock); 1941 VERIFY(mfi != NULL); 1942 1943 mutex_enter(&mfi->mfi_lock); 1944 bzero(mfi->mfi_frame, sizeof (lmrc_mfi_frame_t)); 1945 mfi->mfi_frame->mf_hdr.mh_context = mfi->mfi_idx; 1946 mfi->mfi_callback = NULL; 1947 1948 cv_init(&mfi->mfi_cv, NULL, CV_DRIVER, NULL); 1949 mutex_exit(&mfi->mfi_lock); 1950 1951 return (mfi); 1952 } 1953 1954 /* 1955 * lmrc_put_mfi 1956 * 1957 * Put a MFI command back on the list. Destroy the CV, thereby 1958 * asserting that no one is waiting on it. 1959 */ 1960 void 1961 lmrc_put_mfi(lmrc_mfi_cmd_t *mfi) 1962 { 1963 lmrc_t *lmrc = mfi->mfi_lmrc; 1964 1965 VERIFY(lmrc != NULL); 1966 1967 ASSERT0(list_link_active(&mfi->mfi_node)); 1968 1969 mutex_enter(&mfi->mfi_lock); 1970 1971 cv_destroy(&mfi->mfi_cv); 1972 1973 mutex_enter(&lmrc->l_mfi_cmd_lock); 1974 list_insert_tail(&lmrc->l_mfi_cmd_list, mfi); 1975 mutex_exit(&lmrc->l_mfi_cmd_lock); 1976 mutex_exit(&mfi->mfi_lock); 1977 } 1978 1979 /* 1980 * lmrc_abort_outstanding_mfi 1981 * 1982 * Walk the MFI cmd array and abort each command which is still outstanding, 1983 * which is indicated by not being linked on l_mfi_cmd_list. 1984 * 1985 * As a special case, if the FW is in fault state, just call each commands 1986 * completion callback. 1987 */ 1988 int 1989 lmrc_abort_outstanding_mfi(lmrc_t *lmrc, const size_t ncmd) 1990 { 1991 int ret; 1992 int i; 1993 1994 for (i = 0; i < ncmd; i++) { 1995 lmrc_mfi_cmd_t *mfi = lmrc->l_mfi_cmds[i]; 1996 1997 mutex_enter(&mfi->mfi_lock); 1998 if (list_link_active(&mfi->mfi_node)) { 1999 mutex_exit(&mfi->mfi_lock); 2000 continue; 2001 } 2002 2003 /* 2004 * If the FW is faulted, wake up anyone waiting on the command 2005 * to clean it up. 2006 */ 2007 if (lmrc->l_fw_fault) { 2008 if (mfi->mfi_callback != NULL) 2009 mfi->mfi_callback(lmrc, mfi); 2010 mutex_exit(&mfi->mfi_lock); 2011 continue; 2012 } 2013 2014 ret = lmrc_abort_cmd(lmrc, mfi); 2015 mutex_exit(&mfi->mfi_lock); 2016 if (ret != DDI_SUCCESS) 2017 return (ret); 2018 2019 lmrc_dma_free(&mfi->mfi_data_dma); 2020 lmrc_put_mfi(mfi); 2021 } 2022 2023 return (DDI_SUCCESS); 2024 } 2025 2026 /* 2027 * lmrc_get_dcmd 2028 * 2029 * Build a MFI DCMD with DMA memory for data transfers. 2030 */ 2031 lmrc_mfi_cmd_t * 2032 lmrc_get_dcmd(lmrc_t *lmrc, uint16_t flags, uint32_t opcode, uint32_t xferlen, 2033 uint_t align) 2034 { 2035 lmrc_mfi_cmd_t *mfi = lmrc_get_mfi(lmrc); 2036 lmrc_mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr; 2037 lmrc_mfi_dcmd_payload_t *dcmd = &mfi->mfi_frame->mf_dcmd; 2038 lmrc_dma_t *dma = &mfi->mfi_data_dma; 2039 int ret; 2040 2041 hdr->mh_cmd = MFI_CMD_DCMD; 2042 hdr->mh_flags = flags; 2043 2044 dcmd->md_opcode = opcode; 2045 2046 if ((flags & MFI_FRAME_DIR_READ) != 0 || 2047 (flags & MFI_FRAME_DIR_WRITE) != 0) { 2048 ret = lmrc_dma_alloc(lmrc, lmrc->l_dma_attr, dma, xferlen, 2049 align, DDI_DMA_CONSISTENT); 2050 if (ret != DDI_SUCCESS) { 2051 lmrc_put_mfi(mfi); 2052 return (NULL); 2053 } 2054 2055 hdr->mh_flags |= MFI_FRAME_SGL64; 2056 hdr->mh_sge_count = 1; 2057 hdr->mh_data_xfer_len = lmrc_dma_get_size(dma); 2058 2059 dcmd->md_sgl.ms64_length = lmrc_dma_get_size(dma); 2060 lmrc_dma_set_addr64(dma, &dcmd->md_sgl.ms64_phys_addr); 2061 } 2062 2063 return (mfi); 2064 } 2065 2066 /* 2067 * lmrc_put_dcmd 2068 * 2069 * Free the DMA memory of a MFI DCMD and return the command back on the list. 2070 */ 2071 void 2072 lmrc_put_dcmd(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi) 2073 { 2074 lmrc_dma_free(&mfi->mfi_data_dma); 2075 lmrc_put_mfi(mfi); 2076 } 2077 2078 2079 /* 2080 * Asynchronous Event Notifications 2081 */ 2082 /* 2083 * lmrc_get_event_log_info 2084 * 2085 * Get the Event Log Info from the firmware. 2086 */ 2087 static int 2088 lmrc_get_event_log_info(lmrc_t *lmrc, lmrc_evt_log_info_t *eli) 2089 { 2090 lmrc_mfi_cmd_t *mfi; 2091 int ret; 2092 2093 mfi = lmrc_get_dcmd(lmrc, MFI_FRAME_DIR_READ, 2094 LMRC_DCMD_CTRL_EVENT_GET_INFO, sizeof (lmrc_evt_log_info_t), 1); 2095 2096 if (mfi == NULL) 2097 return (DDI_FAILURE); 2098 2099 ret = lmrc_issue_blocked_mfi(lmrc, mfi); 2100 2101 if (ret != DDI_SUCCESS) 2102 goto out; 2103 2104 bcopy(mfi->mfi_data_dma.ld_buf, eli, sizeof (lmrc_evt_log_info_t)); 2105 2106 out: 2107 lmrc_put_dcmd(lmrc, mfi); 2108 return (ret); 2109 } 2110 2111 /* 2112 * lmrc_aen_handler 2113 * 2114 * Check the event code and handle it as needed. In the case of PD or LD related 2115 * events, invoke their special handlers. 2116 */ 2117 static void 2118 lmrc_aen_handler(void *arg) 2119 { 2120 lmrc_mfi_cmd_t *mfi = arg; 2121 lmrc_t *lmrc = mfi->mfi_lmrc; 2122 lmrc_evt_t *evt = mfi->mfi_data_dma.ld_buf; 2123 lmrc_mfi_dcmd_payload_t *dcmd = &mfi->mfi_frame->mf_dcmd; 2124 int ret = DDI_FAILURE; 2125 2126 /* Controller & Configuration specific events */ 2127 switch (evt->evt_code) { 2128 case LMRC_EVT_CFG_CLEARED: 2129 case LMRC_EVT_CTRL_HOST_BUS_SCAN_REQD: 2130 case LMRC_EVT_FOREIGN_CFG_IMPORTED: 2131 ret = lmrc_get_pd_list(lmrc); 2132 if (ret != DDI_SUCCESS) 2133 break; 2134 2135 ret = lmrc_get_ld_list(lmrc); 2136 break; 2137 2138 case LMRC_EVT_CTRL_PROP_CHANGED: 2139 ret = lmrc_get_ctrl_info(lmrc); 2140 break; 2141 2142 case LMRC_EVT_CTRL_PATROL_READ_START: 2143 case LMRC_EVT_CTRL_PATROL_READ_RESUMED: 2144 case LMRC_EVT_CTRL_PATROL_READ_COMPLETE: 2145 case LMRC_EVT_CTRL_PATROL_READ_CANT_START: 2146 case LMRC_EVT_CTRL_PERF_COLLECTION: 2147 case LMRC_EVT_CTRL_BOOTDEV_SET: 2148 case LMRC_EVT_CTRL_BOOTDEV_RESET: 2149 case LMRC_EVT_CTRL_PERSONALITY_CHANGE: 2150 case LMRC_EVT_CTRL_PERSONALITY_CHANGE_PEND: 2151 case LMRC_EVT_CTRL_NR_OF_VALID_SNAPDUMP: 2152 break; 2153 2154 default: 2155 /* LD-specific events */ 2156 if ((evt->evt_locale & LMRC_EVT_LOCALE_LD) != 0) 2157 ret = lmrc_raid_aen_handler(lmrc, evt); 2158 2159 /* PD-specific events */ 2160 else if ((evt->evt_locale & LMRC_EVT_LOCALE_PD) != 0) 2161 ret = lmrc_phys_aen_handler(lmrc, evt); 2162 2163 if (ret != DDI_SUCCESS) { 2164 dev_err(lmrc->l_dip, CE_NOTE, "!unknown AEN received, " 2165 "seqnum = %d, timestamp = %d, code = %x, " 2166 "locale = %x, class = %d, argtype = %d", 2167 evt->evt_seqnum, evt->evt_timestamp, evt->evt_code, 2168 evt->evt_locale, evt->evt_class, evt->evt_argtype); 2169 } 2170 } 2171 2172 dev_err(lmrc->l_dip, CE_NOTE, "!%s", evt->evt_descr); 2173 2174 /* 2175 * Just reuse the command in its entirety. Increase the sequence 2176 * number. 2177 */ 2178 dcmd->md_mbox_32[0] = evt->evt_seqnum + 1; 2179 mutex_enter(&mfi->mfi_lock); 2180 lmrc_issue_mfi(lmrc, mfi, lmrc_complete_aen); 2181 mutex_exit(&mfi->mfi_lock); 2182 } 2183 2184 /* 2185 * lmrc_complete_aen 2186 * 2187 * An AEN was received, so schedule a taskq to process it. 2188 */ 2189 static void 2190 lmrc_complete_aen(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi) 2191 { 2192 lmrc_mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr; 2193 2194 ASSERT(mutex_owned(&mfi->mfi_lock)); 2195 2196 if (hdr->mh_cmd_status != MFI_STAT_OK) { 2197 /* Was the command aborted? */ 2198 if (hdr->mh_cmd_status == MFI_STAT_NOT_FOUND) 2199 return; 2200 2201 dev_err(lmrc->l_dip, CE_WARN, 2202 "!AEN failed, status = %d", 2203 hdr->mh_cmd_status); 2204 taskq_dispatch_ent(lmrc->l_taskq, (task_func_t *)lmrc_put_mfi, 2205 mfi, TQ_NOSLEEP, &mfi->mfi_tqent); 2206 return; 2207 } 2208 2209 taskq_dispatch_ent(lmrc->l_taskq, lmrc_aen_handler, mfi, TQ_NOSLEEP, 2210 &mfi->mfi_tqent); 2211 } 2212 2213 /* 2214 * lmrc_register_aen 2215 * 2216 * In FreeBSD, this function checks for an existing AEN. If its class and locale 2217 * already include what is requested here they just return. In the other case, 2218 * the existing AEN is aborted and a new one is created, which includes 2219 * the previous locale and class and new ones. 2220 * 2221 * Given that the driver (same as in FreeBSD) calls this function during attach 2222 * to create an AEN with LOCALE_ALL and CLASS_DEBUG, all of this would be dead 2223 * code anyway. 2224 */ 2225 static int 2226 lmrc_register_aen(lmrc_t *lmrc, uint32_t seqnum) 2227 { 2228 lmrc_evt_class_locale_t ecl = { 2229 .ecl_class = LMRC_EVT_CLASS_DEBUG, 2230 .ecl_locale = LMRC_EVT_LOCALE_ALL 2231 }; 2232 2233 lmrc_mfi_cmd_t *mfi; 2234 lmrc_mfi_dcmd_payload_t *dcmd; 2235 2236 mfi = lmrc_get_dcmd(lmrc, MFI_FRAME_DIR_READ, LMRC_DCMD_CTRL_EVENT_WAIT, 2237 sizeof (lmrc_evt_t), 1); 2238 2239 if (mfi == NULL) 2240 return (DDI_FAILURE); 2241 2242 dcmd = &mfi->mfi_frame->mf_dcmd; 2243 dcmd->md_mbox_32[0] = seqnum; 2244 dcmd->md_mbox_32[1] = ecl.ecl_word; 2245 2246 mutex_enter(&mfi->mfi_lock); 2247 lmrc_issue_mfi(lmrc, mfi, lmrc_complete_aen); 2248 mutex_exit(&mfi->mfi_lock); 2249 2250 return (DDI_SUCCESS); 2251 } 2252 2253 /* 2254 * lmrc_start_aen 2255 * 2256 * Set up and enable AEN processing. 2257 */ 2258 int 2259 lmrc_start_aen(lmrc_t *lmrc) 2260 { 2261 lmrc_evt_log_info_t eli; 2262 int ret; 2263 2264 bzero(&eli, sizeof (eli)); 2265 2266 /* Get the latest sequence number from the Event Log Info. */ 2267 ret = lmrc_get_event_log_info(lmrc, &eli); 2268 if (ret != DDI_SUCCESS) 2269 return (ret); 2270 2271 /* Register AEN with FW for latest sequence number + 1. */ 2272 ret = lmrc_register_aen(lmrc, eli.eli_newest_seqnum + 1); 2273 return (ret); 2274 } 2275