1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2024 Racktop Systems, Inc. 14 */ 15 16 /* 17 * This file implements the interfaces for communicating with the MegaRAID HBA. 18 * There are three basic interfaces: 19 * - the device registers, which provide basic information about the controller 20 * hardware and the features it supports, as well as control registers used 21 * during sending and reception of I/O frames 22 * - Fusion-MPT v2.5, perhaps later, which defines the format of the I/O frames 23 * used for communicating with the HBA and virtual and physical devices that 24 * are attached to it 25 * - MFI, the MegaRAID Firmware Interface, which are sent and received as MPT 26 * payloads to control and communicate with the RAID controller. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/ddi.h> 31 #include <sys/sunddi.h> 32 #include <sys/scsi/scsi.h> 33 34 #include <sys/scsi/adapters/mfi/mfi.h> 35 #include <sys/scsi/adapters/mfi/mfi_evt.h> 36 #include <sys/scsi/adapters/mfi/mfi_pd.h> 37 38 #include <sys/cpuvar.h> 39 40 #include "lmrc.h" 41 #include "lmrc_reg.h" 42 #include "lmrc_raid.h" 43 #include "lmrc_phys.h" 44 45 static uint32_t lmrc_read_reg(lmrc_t *, uint32_t); 46 static void lmrc_write_reg(lmrc_t *, uint32_t, uint32_t); 47 static int lmrc_transition_to_ready(lmrc_t *); 48 static void lmrc_process_mptmfi_passthru(lmrc_t *, lmrc_mpt_cmd_t *); 49 static int lmrc_poll_mfi(lmrc_t *, lmrc_mfi_cmd_t *, uint8_t); 50 static boolean_t lmrc_check_fw_fault(lmrc_t *); 51 static int lmrc_get_event_log_info(lmrc_t *, mfi_evt_log_info_t *); 52 static void lmrc_aen_handler(void *); 53 static void lmrc_complete_aen(lmrc_t *, lmrc_mfi_cmd_t *); 54 static int lmrc_register_aen(lmrc_t *, uint32_t); 55 56 /* 57 * Device register access functions. 58 * 59 * Due to the way ddi_get* and ddi_put* work, we'll need to calculate the 60 * absolute virtual address of the registers ourselves. 61 * 62 * For read accesses, employ a erratum workaround for Aero controllers. In some 63 * cases, reads of certain registers will intermittently return all zeros. As a 64 * workaround, retry the read up to three times until a non-zero value is read. 65 * Supposedly this is enough, every other driver I looked at does this. 66 */ 67 static uint32_t 68 lmrc_read_reg_1(lmrc_t *lmrc, uint32_t reg) 69 { 70 uint32_t *addr = (uint32_t *)((uintptr_t)lmrc->l_regmap + reg); 71 return (ddi_get32(lmrc->l_reghandle, addr)); 72 } 73 74 static uint32_t 75 lmrc_read_reg(lmrc_t *lmrc, uint32_t reg) 76 { 77 if (lmrc->l_class != LMRC_ACLASS_AERO) 78 return (lmrc_read_reg_1(lmrc, reg)); 79 80 /* Workaround for the hardware erratum in Aero controllers */ 81 for (uint_t i = 0; i < 3; i++) { 82 uint32_t val = lmrc_read_reg_1(lmrc, reg); 83 84 if (val != 0) 85 return (val); 86 } 87 88 return (0); 89 } 90 91 static void 92 lmrc_write_reg(lmrc_t *lmrc, uint32_t reg, uint32_t val) 93 { 94 uint32_t *addr = (uint32_t *)((uintptr_t)lmrc->l_regmap + reg); 95 ddi_put32(lmrc->l_reghandle, addr, val); 96 } 97 98 static void 99 lmrc_write_reg64(lmrc_t *lmrc, uint32_t reg, uint64_t val) 100 { 101 uint64_t *addr = (uint64_t *)((uintptr_t)lmrc->l_regmap + reg); 102 ddi_put64(lmrc->l_reghandle, addr, val); 103 } 104 105 /* 106 * Interrupt control 107 * 108 * There are two interrupt registers for host driver use, HostInterruptStatus 109 * and HostInterruptMask. Most of the bits in each register are reserved and 110 * must masked and/or preserved when used. 111 */ 112 void 113 lmrc_disable_intr(lmrc_t *lmrc) 114 { 115 uint32_t mask = lmrc_read_reg(lmrc, MPI2_HOST_INTERRUPT_MASK_OFFSET); 116 117 /* Disable all known interrupt: reset, reply, and doorbell. */ 118 mask |= MPI2_HIM_RESET_IRQ_MASK; 119 mask |= MPI2_HIM_REPLY_INT_MASK; 120 mask |= MPI2_HIM_IOC2SYS_DB_MASK; 121 122 lmrc_write_reg(lmrc, MPI2_HOST_INTERRUPT_MASK_OFFSET, mask); 123 124 /* Dummy read to force pci flush. Probably bogus but harmless. */ 125 (void) lmrc_read_reg(lmrc, MPI2_HOST_INTERRUPT_MASK_OFFSET); 126 } 127 128 void 129 lmrc_enable_intr(lmrc_t *lmrc) 130 { 131 uint32_t mask = lmrc_read_reg(lmrc, MPI2_HOST_INTERRUPT_MASK_OFFSET); 132 133 /* Enable the reply interrupts and the doorbell interrupts. */ 134 mask &= ~MPI2_HIM_REPLY_INT_MASK; 135 mask &= ~MPI2_HIM_IOC2SYS_DB_MASK; 136 137 /* Clear outstanding interrupts before enabling any. */ 138 lmrc_write_reg(lmrc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0); 139 /* Dummy read to force pci flush. Probably bogus but harmless. */ 140 (void) lmrc_read_reg(lmrc, MPI2_HOST_INTERRUPT_STATUS_OFFSET); 141 142 lmrc_write_reg(lmrc, MPI2_HOST_INTERRUPT_MASK_OFFSET, mask); 143 /* Dummy read to force pci flush. Probably bogus but harmless. */ 144 (void) lmrc_read_reg(lmrc, MPI2_HOST_INTERRUPT_MASK_OFFSET); 145 } 146 147 uint_t 148 lmrc_intr_ack(lmrc_t *lmrc) 149 { 150 uint32_t mask = 151 MPI2_HIS_REPLY_DESCRIPTOR_INTERRUPT | MPI2_HIS_IOC2SYS_DB_STATUS; 152 uint32_t status; 153 154 status = lmrc_read_reg(lmrc, MPI2_HOST_INTERRUPT_STATUS_OFFSET); 155 156 if ((status & mask) == 0) 157 return (DDI_INTR_UNCLAIMED); 158 159 if (lmrc_check_acc_handle(lmrc->l_reghandle) != DDI_SUCCESS) { 160 ddi_fm_service_impact(lmrc->l_dip, DDI_SERVICE_LOST); 161 return (DDI_INTR_UNCLAIMED); 162 } 163 164 return (DDI_INTR_CLAIMED); 165 } 166 167 /* 168 * Fusion-MPT requests 169 * 170 * The controller expects to have access to a large chunk of DMA memory, into 171 * which the driver writes fixed-size I/O requests for the controller to 172 * process. To notify the hardware about a new request, a request descriptor is 173 * written to the queue port registers which includes the SMID of the request. 174 * This memory isn't really a queue, though, as it seems there are no 175 * constraints about ordering of the requests. All that matters is that there 176 * is a valid request at the address that corresponds with the SMID in the 177 * descriptor. 178 * 179 * If the hardware supports MPI 2.6 atomic request descriptors, which are a 180 * 32bit subset of the 64bit MPI 2.0/2.5 request descriptors, the descriptor is 181 * sent to the controller in a single 32bit write into a device register. 182 * 183 * For all other descriptor types, we'll employ a 64bit write to the queue 184 * registers, assuming that provides the required atomicity. 185 */ 186 void 187 lmrc_send_atomic_request(lmrc_t *lmrc, lmrc_atomic_req_desc_t req_desc) 188 { 189 if (lmrc->l_atomic_desc_support) { 190 lmrc_write_reg(lmrc, 191 MPI26_ATOMIC_REQUEST_DESCRIPTOR_POST_OFFSET, 192 req_desc.rd_reg); 193 } else { 194 lmrc_req_desc_t rd; 195 196 bzero(&rd, sizeof (rd)); 197 rd.rd_atomic = req_desc; 198 199 lmrc_send_request(lmrc, rd); 200 } 201 } 202 203 void 204 lmrc_send_request(lmrc_t *lmrc, lmrc_req_desc_t req_desc) 205 { 206 lmrc_write_reg64(lmrc, MPI2_REQUEST_DESCRIPTOR_POST_LOW_OFFSET, 207 req_desc.rd_reg); 208 } 209 210 lmrc_atomic_req_desc_t 211 lmrc_build_atomic_request(lmrc_t *lmrc, lmrc_mpt_cmd_t *mpt, uint8_t flags) 212 { 213 lmrc_atomic_req_desc_t req_desc; 214 215 VERIFY3U(mpt->mpt_smid, !=, 0); 216 217 /* 218 * Select the reply queue based on the CPU id to distribute reply load 219 * among queues. 220 */ 221 mpt->mpt_queue = CPU->cpu_id % lmrc->l_max_reply_queues; 222 223 bzero(&req_desc, sizeof (req_desc)); 224 225 req_desc.rd_atomic.RequestFlags = flags; 226 req_desc.rd_atomic.MSIxIndex = mpt->mpt_queue; 227 req_desc.rd_atomic.SMID = mpt->mpt_smid; 228 229 return (req_desc); 230 } 231 232 /* 233 * Reply Processing 234 * 235 * The controller will post replies to completed requests in the DMA memory 236 * provided for that purpose. This memory is divided in equally-sized chunks, 237 * each being a separate reply queue that is also associated with an interrupt 238 * vector. The replies are fixed size structures and will be written by the 239 * hardware in order of completion into the queue. For each queue, there is a 240 * register to tell the hardware which replies have been consumed by the driver. 241 * 242 * In response to an interrupt, the driver will walk the reply queue associated 243 * with the interrupt vector at the last known position and processess all 244 * completed replies. After a number of replies has been processed, or if no 245 * more replies are ready to be processed, the controller will be notified about 246 * the last reply index to be processed by writing the appropriate register. 247 */ 248 249 /* 250 * lmrc_get_next_reply_desc 251 * 252 * Get the next unprocessed reply descriptor for a queue, or NULL if there is 253 * none. 254 */ 255 static Mpi2ReplyDescriptorsUnion_t * 256 lmrc_get_next_reply_desc(lmrc_t *lmrc, int queue) 257 { 258 Mpi2ReplyDescriptorsUnion_t *desc; 259 260 desc = lmrc->l_reply_dma.ld_buf; 261 262 desc += (queue * lmrc->l_reply_alloc_sz) / sizeof (*desc); 263 desc += lmrc->l_last_reply_idx[queue]; 264 265 VERIFY3S(ddi_dma_sync(lmrc->l_reply_dma.ld_hdl, 266 (void *)desc - lmrc->l_reply_dma.ld_buf, sizeof (*desc), 267 DDI_DMA_SYNC_FORKERNEL), ==, DDI_SUCCESS); 268 269 /* 270 * Check if this is an unused reply descriptor, indicating that 271 * we've reached the end of replies in this queue. 272 * 273 * Even if the descriptor is only "half unused" we can't use it. 274 */ 275 if (desc->Words.Low == MPI2_RPY_DESCRIPT_UNUSED_WORD0_MARK || 276 desc->Words.High == MPI2_RPY_DESCRIPT_UNUSED_WORD1_MARK) 277 return (NULL); 278 279 /* advance last reply index, wrap around if necessary */ 280 lmrc->l_last_reply_idx[queue]++; 281 if (lmrc->l_last_reply_idx[queue] >= lmrc->l_reply_q_depth) 282 lmrc->l_last_reply_idx[queue] = 0; 283 284 return (desc); 285 } 286 287 /* 288 * lmrc_write_rphi 289 * 290 * Write the Reply Post Host Index register for queue. 291 */ 292 static void 293 lmrc_write_rphi(lmrc_t *lmrc, uint32_t queue) 294 { 295 int reg = 0; 296 uint32_t val = (queue << 24) | lmrc->l_last_reply_idx[queue]; 297 298 if (lmrc->l_intr_type != DDI_INTR_TYPE_MSIX) 299 VERIFY3U(queue, ==, 0); 300 301 if (lmrc->l_msix_combined) { 302 reg = queue / 8; 303 val &= 0x07ffffff; 304 } 305 306 lmrc_write_reg(lmrc, lmrc->l_rphi[reg], val); 307 } 308 309 /* 310 * lmrc_process_mpt_pkt 311 * 312 * Process a reply to a MPT IO request. Update the scsi_pkt according to status, 313 * ex_status, and data_len, setting up the ARQ pkt if necessary. 314 */ 315 static void 316 lmrc_process_mpt_pkt(lmrc_t *lmrc, struct scsi_pkt *pkt, uint8_t status, 317 uint8_t ex_status, uint32_t data_len) 318 { 319 pkt->pkt_statistics = 0; 320 pkt->pkt_state = STATE_GOT_BUS | STATE_GOT_TARGET | STATE_SENT_CMD | 321 STATE_XFERRED_DATA | STATE_GOT_STATUS; 322 323 pkt->pkt_resid = pkt->pkt_dma_len - data_len; 324 325 switch (status) { 326 case MFI_STAT_OK: 327 case MFI_STAT_LD_CC_IN_PROGRESS: 328 case MFI_STAT_LD_RECON_IN_PROGRESS: 329 pkt->pkt_reason = CMD_CMPLT; 330 pkt->pkt_scbp[0] = STATUS_GOOD; 331 break; 332 333 case MFI_STAT_SCSI_DONE_WITH_ERROR: 334 case MFI_STAT_LD_LBA_OUT_OF_RANGE: { 335 struct scsi_arq_status *arq = 336 (struct scsi_arq_status *)pkt->pkt_scbp; 337 338 pkt->pkt_reason = CMD_CMPLT; 339 arq->sts_status.sts_chk = 1; 340 341 pkt->pkt_state |= STATE_ARQ_DONE; 342 arq->sts_rqpkt_reason = CMD_CMPLT; 343 arq->sts_rqpkt_resid = 0; 344 arq->sts_rqpkt_state |= STATE_GOT_BUS | STATE_GOT_TARGET | 345 STATE_SENT_CMD | STATE_XFERRED_DATA; 346 *(uint8_t *)&arq->sts_rqpkt_status = STATUS_GOOD; 347 break; 348 } 349 case MFI_STAT_LD_OFFLINE: 350 case MFI_STAT_DEVICE_NOT_FOUND: 351 pkt->pkt_reason = CMD_DEV_GONE; 352 pkt->pkt_statistics = STAT_DISCON; 353 break; 354 355 default: 356 dev_err(lmrc->l_dip, CE_WARN, "!command failed, status = %x, " 357 "ex_status = %x, cdb[0] = %x", status, ex_status, 358 pkt->pkt_cdbp[0]); 359 pkt->pkt_reason = CMD_TRAN_ERR; 360 break; 361 } 362 } 363 364 /* 365 * lmrc_poll_for_reply 366 * 367 * During a panic we'll have to resort to polled I/O to write core dumps. 368 * Repeatedly check the reply queue for a new reply associated with the 369 * given request descriptor and complete it, or return an error if we get 370 * no reply within a reasonable time. 371 */ 372 int 373 lmrc_poll_for_reply(lmrc_t *lmrc, lmrc_mpt_cmd_t *mpt) 374 { 375 clock_t max_wait = LMRC_IO_TIMEOUT * MILLISEC * 10; 376 Mpi25SCSIIORequest_t *io_req = mpt->mpt_io_frame; 377 Mpi2ReplyDescriptorsUnion_t *desc; 378 uint16_t desc_smid; 379 380 VERIFY(ddi_in_panic()); 381 382 /* 383 * Walk the reply queue. Discard entries which we aren't 384 * looking for. 385 */ 386 do { 387 desc = lmrc_get_next_reply_desc(lmrc, mpt->mpt_queue); 388 if (desc == NULL) { 389 if (max_wait == 0) 390 return (TRAN_FATAL_ERROR); 391 392 drv_usecwait(100); 393 max_wait--; 394 continue; 395 } 396 397 desc_smid = desc->SCSIIOSuccess.SMID; 398 399 /* reset descriptor */ 400 desc->Words.Low = MPI2_RPY_DESCRIPT_UNUSED_WORD0_MARK; 401 desc->Words.High = MPI2_RPY_DESCRIPT_UNUSED_WORD1_MARK; 402 403 lmrc_write_rphi(lmrc, mpt->mpt_queue); 404 } while (desc == NULL || desc_smid != mpt->mpt_smid); 405 406 VERIFY3S(ddi_dma_sync(lmrc->l_ioreq_dma.ld_hdl, 407 (void *)io_req - lmrc->l_ioreq_dma.ld_buf, 408 LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE, DDI_DMA_SYNC_FORKERNEL), 409 ==, DDI_SUCCESS); 410 411 /* If this is I/O, process it. */ 412 if (io_req->Function == LMRC_MPI2_FUNCTION_LD_IO_REQUEST || 413 io_req->Function == MPI2_FUNCTION_SCSI_IO_REQUEST) { 414 lmrc_process_mpt_pkt(lmrc, mpt->mpt_pkt, 415 io_req->VendorRegion.rc_status, 416 io_req->VendorRegion.rc_exstatus, io_req->DataLength); 417 } 418 419 return (TRAN_ACCEPT); 420 } 421 422 /* 423 * lmrc_process_replies 424 * 425 * Process all new reply entries in a queue in response to an interrupt. 426 */ 427 int 428 lmrc_process_replies(lmrc_t *lmrc, uint8_t queue) 429 { 430 int nprocessed = 0; 431 Mpi2ReplyDescriptorsUnion_t *desc; 432 433 for (desc = lmrc_get_next_reply_desc(lmrc, queue); 434 desc != NULL; 435 desc = lmrc_get_next_reply_desc(lmrc, queue)) { 436 Mpi2SCSIIOSuccessReplyDescriptor_t *reply = 437 &desc->SCSIIOSuccess; 438 uint16_t smid = reply->SMID; 439 lmrc_mpt_cmd_t *mpt = lmrc->l_mpt_cmds[smid - 1]; 440 lmrc_tgt_t *tgt = NULL; 441 Mpi25SCSIIORequest_t *io_req; 442 struct scsi_pkt *pkt; 443 struct scsi_device *sd; 444 445 VERIFY3U(reply->SMID, <=, lmrc->l_max_fw_cmds); 446 447 mutex_enter(&mpt->mpt_lock); 448 mpt->mpt_complete = B_TRUE; 449 pkt = mpt->mpt_pkt; 450 io_req = mpt->mpt_io_frame; 451 452 VERIFY3S(ddi_dma_sync(lmrc->l_ioreq_dma.ld_hdl, 453 (void *)io_req - lmrc->l_ioreq_dma.ld_buf, 454 LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE, 455 DDI_DMA_SYNC_FORKERNEL), ==, DDI_SUCCESS); 456 457 458 switch (io_req->Function) { 459 case MPI2_FUNCTION_SCSI_TASK_MGMT: 460 VERIFY0(pkt); 461 VERIFY0(list_link_active(&mpt->mpt_node)); 462 cv_signal(&mpt->mpt_cv); 463 break; 464 465 case MPI2_FUNCTION_SCSI_IO_REQUEST: 466 case LMRC_MPI2_FUNCTION_LD_IO_REQUEST: 467 VERIFY(pkt != NULL); 468 469 sd = scsi_address_device(&pkt->pkt_address); 470 VERIFY(sd != NULL); 471 472 tgt = scsi_device_hba_private_get(sd); 473 VERIFY(tgt != NULL); 474 475 lmrc_process_mpt_pkt(lmrc, pkt, 476 io_req->VendorRegion.rc_status, 477 io_req->VendorRegion.rc_exstatus, 478 io_req->DataLength); 479 480 break; 481 482 case LMRC_MPI2_FUNCTION_PASSTHRU_IO_REQUEST: 483 VERIFY0(pkt); 484 VERIFY0(list_link_active(&mpt->mpt_node)); 485 lmrc_process_mptmfi_passthru(lmrc, mpt); 486 break; 487 488 default: 489 mutex_exit(&mpt->mpt_lock); 490 dev_err(lmrc->l_dip, CE_PANIC, 491 "reply received for unknown Function %x", 492 io_req->Function); 493 } 494 495 mutex_exit(&mpt->mpt_lock); 496 497 if (pkt != NULL) { 498 lmrc_tgt_rem_active_mpt(tgt, mpt); 499 atomic_dec_uint(&lmrc->l_fw_outstanding_cmds); 500 scsi_hba_pkt_comp(pkt); 501 } 502 503 /* reset descriptor */ 504 desc->Words.Low = MPI2_RPY_DESCRIPT_UNUSED_WORD0_MARK; 505 desc->Words.High = MPI2_RPY_DESCRIPT_UNUSED_WORD1_MARK; 506 507 nprocessed++; 508 509 if (nprocessed % LMRC_THRESHOLD_REPLY_COUNT == 0) 510 lmrc_write_rphi(lmrc, queue); 511 } 512 513 if (nprocessed != 0 && nprocessed % LMRC_THRESHOLD_REPLY_COUNT != 0) 514 lmrc_write_rphi(lmrc, queue); 515 516 return (DDI_INTR_CLAIMED); 517 } 518 519 520 /* 521 * MFI - MegaRAID Firmware Interface 522 */ 523 524 /* 525 * lmrc_build_mptmfi_passthru 526 * 527 * MFI commands are send as MPT MFI passthrough I/O requests. To be able to send 528 * a MFI frame to the RAID controller, we need to have a MPT command set up as 529 * MPT I/O request and a one-entry SGL pointing to the MFI command. 530 * 531 * As there's only a small number of MFI commands compared to the amound of MPT 532 * commands, the MPT command for each MFI is pre-allocated at attach time and 533 * initialized here. 534 */ 535 int 536 lmrc_build_mptmfi_passthru(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi) 537 { 538 Mpi25SCSIIORequest_t *io_req; 539 const ddi_dma_cookie_t *cookie; 540 lmrc_mpt_cmd_t *mpt; 541 542 mpt = lmrc_get_mpt(lmrc); 543 if (mpt == NULL) 544 return (DDI_FAILURE); 545 546 /* lmrc_get_mpt() should return the mpt locked */ 547 ASSERT(mutex_owned(&mpt->mpt_lock)); 548 549 mfi->mfi_mpt = mpt; 550 mpt->mpt_mfi = mfi; 551 552 io_req = mpt->mpt_io_frame; 553 io_req->Function = LMRC_MPI2_FUNCTION_PASSTHRU_IO_REQUEST; 554 io_req->ChainOffset = lmrc->l_chain_offset_mfi_pthru; 555 556 cookie = ddi_dma_cookie_one(mfi->mfi_frame_dma.ld_hdl); 557 lmrc_dma_build_sgl(lmrc, mpt, cookie, 1); 558 559 VERIFY3S(ddi_dma_sync(lmrc->l_ioreq_dma.ld_hdl, 560 (void *)io_req - lmrc->l_ioreq_dma.ld_buf, 561 LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE, DDI_DMA_SYNC_FORDEV), 562 ==, DDI_SUCCESS); 563 564 /* 565 * As we're not sending this command to the hardware any time soon, 566 * drop the mutex before we return. 567 */ 568 mutex_exit(&mpt->mpt_lock); 569 570 return (DDI_SUCCESS); 571 } 572 573 /* 574 * lmrc_process_mptmfi_passthru 575 * 576 * When a MPT MFI passthrough command completes, invoke the callback if there 577 * is one. Panic if an invalid command completed as that should never happen. 578 */ 579 static void 580 lmrc_process_mptmfi_passthru(lmrc_t *lmrc, lmrc_mpt_cmd_t *mpt) 581 { 582 lmrc_mfi_cmd_t *mfi; 583 mfi_header_t *hdr; 584 585 VERIFY3P(mpt->mpt_mfi, !=, NULL); 586 mfi = mpt->mpt_mfi; 587 VERIFY0(list_link_active(&mfi->mfi_node)); 588 589 hdr = &mfi->mfi_frame->mf_hdr; 590 591 if ((hdr->mh_flags & MFI_FRAME_DIR_READ) != 0) 592 (void) ddi_dma_sync(mfi->mfi_data_dma.ld_hdl, 0, 593 mfi->mfi_data_dma.ld_len, DDI_DMA_SYNC_FORKERNEL); 594 595 switch (hdr->mh_cmd) { 596 case MFI_CMD_DCMD: 597 case MFI_CMD_LD_SCSI_IO: 598 case MFI_CMD_PD_SCSI_IO: 599 case MFI_CMD_ABORT: 600 mutex_enter(&mfi->mfi_lock); 601 if (mfi->mfi_callback != NULL) 602 mfi->mfi_callback(lmrc, mfi); 603 mutex_exit(&mfi->mfi_lock); 604 break; 605 606 case MFI_CMD_INVALID: 607 default: 608 dev_err(lmrc->l_dip, CE_PANIC, 609 "invalid MFI cmd completion received, cmd = %x", 610 hdr->mh_cmd); 611 break; 612 } 613 } 614 615 /* 616 * lmrc_issue_mfi 617 * 618 * Post a MFI command to the firmware. Reset the cmd_status to invalid. Build 619 * a MPT MFI passthru command if necessary and a MPT atomic request descriptor 620 * before posting the request. The MFI command's mutex must be held. 621 */ 622 void 623 lmrc_issue_mfi(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi, lmrc_mfi_cmd_cb_t *cb) 624 { 625 mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr; 626 lmrc_atomic_req_desc_t req_desc; 627 628 ASSERT(mutex_owned(&mfi->mfi_lock)); 629 630 if ((hdr->mh_flags & MFI_FRAME_DONT_POST_IN_REPLY_QUEUE) == 0) { 631 VERIFY3U(cb, !=, NULL); 632 mfi->mfi_callback = cb; 633 } else { 634 VERIFY3U(cb, ==, NULL); 635 } 636 637 hdr->mh_cmd_status = MFI_STAT_INVALID_STATUS; 638 639 req_desc = lmrc_build_atomic_request(lmrc, mfi->mfi_mpt, 640 MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO); 641 642 (void) ddi_dma_sync(mfi->mfi_frame_dma.ld_hdl, 0, 643 mfi->mfi_frame_dma.ld_len, DDI_DMA_SYNC_FORDEV); 644 645 if ((hdr->mh_flags & MFI_FRAME_DIR_WRITE) != 0) 646 (void) ddi_dma_sync(mfi->mfi_data_dma.ld_hdl, 0, 647 mfi->mfi_data_dma.ld_len, DDI_DMA_SYNC_FORDEV); 648 649 lmrc_send_atomic_request(lmrc, req_desc); 650 } 651 652 /* 653 * lmrc_poll_mfi 654 * 655 * Poll a MFI command for completion, waiting up to max_wait secs. Repeatedly 656 * check the command status until it changes to something that is not invalid. 657 * 658 * Trigger an online controller reset on timeout. 659 */ 660 static int 661 lmrc_poll_mfi(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi, uint8_t max_wait) 662 { 663 mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr; 664 lmrc_dma_t *dma = &mfi->mfi_frame_dma; 665 clock_t timeout = ddi_get_lbolt() + drv_usectohz(max_wait * MICROSEC); 666 clock_t now; 667 668 ASSERT(mutex_owned(&mfi->mfi_lock)); 669 670 do { 671 (void) ddi_dma_sync(dma->ld_hdl, 0, dma->ld_len, 672 DDI_DMA_SYNC_FORKERNEL); 673 if (hdr->mh_cmd_status != MFI_STAT_INVALID_STATUS) 674 break; 675 676 (void) cv_reltimedwait(&mfi->mfi_cv, &mfi->mfi_lock, 677 drv_usectohz(MILLISEC), TR_MILLISEC); 678 now = ddi_get_lbolt(); 679 } while (!lmrc->l_fw_fault && now <= timeout); 680 681 if (hdr->mh_cmd_status != MFI_STAT_INVALID_STATUS) 682 return (DDI_SUCCESS); 683 684 if (now > timeout) { 685 dev_err(lmrc->l_dip, CE_WARN, 686 "!%s: command timeout after %ds", __func__, max_wait); 687 688 /* 689 * Signal the housekeeping thread to check for FW/HW faults, 690 * performing a reset if necessary. 691 */ 692 cv_signal(&lmrc->l_thread_cv); 693 } 694 695 return (DDI_FAILURE); 696 } 697 698 /* 699 * lmrc_wait_mfi 700 * 701 * Wait for up to max_wait secs for a MFI command to complete. The cmd mutex 702 * must be held. 703 * 704 * Trigger an online controller reset on timeout. 705 */ 706 int 707 lmrc_wait_mfi(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi, uint8_t max_wait) 708 { 709 mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr; 710 lmrc_dma_t *dma = &mfi->mfi_frame_dma; 711 clock_t timeout = ddi_get_lbolt() + drv_usectohz(max_wait * MICROSEC); 712 int ret; 713 714 ASSERT(mutex_owned(&mfi->mfi_lock)); 715 716 do { 717 ret = cv_timedwait(&mfi->mfi_cv, &mfi->mfi_lock, timeout); 718 719 (void) ddi_dma_sync(dma->ld_hdl, 0, dma->ld_len, 720 DDI_DMA_SYNC_FORKERNEL); 721 722 } while (!lmrc->l_fw_fault && 723 hdr->mh_cmd_status == MFI_STAT_INVALID_STATUS && ret != -1); 724 725 if (!lmrc->l_fw_fault && ret != -1) 726 return (DDI_SUCCESS); 727 728 if (ret == -1) { 729 dev_err(lmrc->l_dip, CE_WARN, "!%s: blocked command timeout " 730 "after %ds, cmd = %d, status = %d", __func__, max_wait, 731 hdr->mh_cmd, hdr->mh_cmd_status); 732 733 /* 734 * Signal the housekeeping thread to check for FW/HW faults, 735 * performing a reset if necessary. 736 */ 737 cv_signal(&lmrc->l_thread_cv); 738 } 739 740 return (DDI_FAILURE); 741 } 742 743 /* 744 * lmrc_wakeup_mfi 745 * 746 * Signal the CV associated with a MFI command to wake up the thread waiting 747 * for its completion. 748 */ 749 void 750 lmrc_wakeup_mfi(lmrc_t *lmrc, lmrc_mfi_cmd_t *cmd) 751 { 752 ASSERT(mutex_owned(&cmd->mfi_lock)); 753 cv_signal(&cmd->mfi_cv); 754 } 755 756 /* 757 * lmrc_issue_blocked_mfi 758 * 759 * Post a MFI command to the firmware and wait for the command to complete. 760 */ 761 int 762 lmrc_issue_blocked_mfi(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi) 763 { 764 mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr; 765 int ret; 766 767 mutex_enter(&mfi->mfi_lock); 768 lmrc_issue_mfi(lmrc, mfi, lmrc_wakeup_mfi); 769 ret = lmrc_wait_mfi(lmrc, mfi, LMRC_INTERNAL_CMD_WAIT_TIME); 770 mutex_exit(&mfi->mfi_lock); 771 772 if (ret == DDI_SUCCESS && hdr->mh_cmd_status == MFI_STAT_OK) 773 return (DDI_SUCCESS); 774 775 dev_err(lmrc->l_dip, CE_WARN, 776 "!%s: blocked command failure, cmd = %d, status = %d", 777 __func__, hdr->mh_cmd, hdr->mh_cmd_status); 778 779 return (ret); 780 } 781 782 /* 783 * lmrc_abort_cb 784 * 785 * Callback for any command that is to be aborted. 786 * 787 * If the command completed normally before it could be aborted, set the status 788 * to indicate the intended abortion. 789 */ 790 static void 791 lmrc_abort_cb(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi) 792 { 793 mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr; 794 795 if (hdr->mh_cmd_status == MFI_STAT_OK) 796 hdr->mh_cmd_status = MFI_STAT_NOT_FOUND; 797 } 798 799 /* 800 * lmrc_abort_mfi 801 * 802 * Abort a MFI command. This is a bit tricky as the hardware may still complete 803 * it at any time. 804 * 805 * The mutex of the command to be aborted must be held to prevent it from 806 * completing behind our back. We'll replace its callback with our own, issue an 807 * ABORT command, and drop the mutex before we wait for the ABORT command to 808 * complete. 809 */ 810 static int 811 lmrc_abort_cmd(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi_to_abort) 812 { 813 lmrc_mfi_cmd_t *mfi = lmrc_get_mfi(lmrc); 814 mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr; 815 mfi_abort_payload_t *abort = &mfi->mfi_frame->mf_abort; 816 lmrc_mfi_cmd_cb_t *orig_cb = mfi_to_abort->mfi_callback; 817 int ret; 818 819 ASSERT(mutex_owned(&mfi_to_abort->mfi_lock)); 820 821 /* Replace the commands callback with our own. */ 822 mfi_to_abort->mfi_callback = lmrc_abort_cb; 823 824 hdr->mh_cmd = MFI_CMD_ABORT; 825 abort->ma_abort_context = mfi_to_abort->mfi_idx; 826 lmrc_dma_set_addr64(&mfi_to_abort->mfi_frame_dma, 827 &abort->ma_abort_mfi_phys_addr); 828 829 /* Send the ABORT. */ 830 mutex_enter(&mfi->mfi_lock); 831 lmrc_issue_mfi(lmrc, mfi, lmrc_wakeup_mfi); 832 833 /* 834 * Drop the mutex of the command to be aborted, allowing it to proceed 835 * while we wait for the ABORT command to complete. 836 */ 837 mutex_exit(&mfi_to_abort->mfi_lock); 838 ret = lmrc_wait_mfi(lmrc, mfi, LMRC_INTERNAL_CMD_WAIT_TIME); 839 mutex_exit(&mfi->mfi_lock); 840 841 /* 842 * The ABORT command may fail if cmd_to_abort has completed already. 843 * Treat any other failure as fatal, restore the callback and fail. 844 */ 845 if (ret != DDI_SUCCESS && hdr->mh_cmd_status != MFI_STAT_NOT_FOUND) { 846 mutex_enter(&mfi_to_abort->mfi_lock); 847 mfi_to_abort->mfi_callback = orig_cb; 848 goto out; 849 } 850 851 /* 852 * Wait for the aborted command to complete. If we time out on this 853 * there's little we can do here, so we restore the callback and fail. 854 */ 855 mutex_enter(&mfi_to_abort->mfi_lock); 856 ret = lmrc_poll_mfi(lmrc, mfi_to_abort, LMRC_INTERNAL_CMD_WAIT_TIME); 857 mfi_to_abort->mfi_callback = orig_cb; 858 859 if (ret != DDI_SUCCESS) 860 goto out; 861 862 /* Wake up anyone waiting on the aborted command. */ 863 if (mfi_to_abort->mfi_callback != NULL) 864 mfi_to_abort->mfi_callback(lmrc, mfi_to_abort); 865 866 out: 867 lmrc_put_mfi(mfi); 868 ASSERT(mutex_owned(&mfi_to_abort->mfi_lock)); 869 return (ret); 870 } 871 872 873 /* 874 * Controller Initialization and Housekeeping 875 */ 876 877 /* 878 * lmrc_check_fw_fault 879 * 880 * Check the firmware state. If faulted, return B_TRUE. 881 * Return B_FALSE otherwise. 882 */ 883 static boolean_t 884 lmrc_check_fw_fault(lmrc_t *lmrc) 885 { 886 uint32_t status = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD0_OFFSET); 887 uint32_t fw_state = LMRC_FW_STATE(status); 888 889 if (fw_state == LMRC_FW_STATE_FAULT) 890 return (B_TRUE); 891 892 return (B_FALSE); 893 } 894 895 /* 896 * lmrc_wait_for_reg 897 * 898 * Repeatedly read the register and check that 'bits' match 'exp'. 899 */ 900 static boolean_t 901 lmrc_wait_for_reg(lmrc_t *lmrc, uint32_t reg, uint32_t bits, uint32_t exp, 902 uint64_t max_wait) 903 { 904 uint32_t val; 905 uint64_t i; 906 907 max_wait *= MILLISEC / 100; 908 909 for (i = 0; i < max_wait; i++) { 910 delay(drv_usectohz(100 * MILLISEC)); 911 val = lmrc_read_reg(lmrc, reg); 912 913 if ((val & bits) == exp) 914 return (B_TRUE); 915 } 916 917 return (B_FALSE); 918 } 919 920 static int 921 lmrc_hard_reset(lmrc_t *lmrc) 922 { 923 int ret = DDI_SUCCESS; 924 925 /* Write the reset key sequence. */ 926 lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET, 927 MPI2_WRSEQ_FLUSH_KEY_VALUE); 928 lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET, 929 MPI2_WRSEQ_1ST_KEY_VALUE); 930 lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET, 931 MPI2_WRSEQ_2ND_KEY_VALUE); 932 lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET, 933 MPI2_WRSEQ_3RD_KEY_VALUE); 934 lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET, 935 MPI2_WRSEQ_4TH_KEY_VALUE); 936 lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET, 937 MPI2_WRSEQ_5TH_KEY_VALUE); 938 lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET, 939 MPI2_WRSEQ_6TH_KEY_VALUE); 940 941 /* Check diag write enable. */ 942 if (!lmrc_wait_for_reg(lmrc, MPI2_HOST_DIAGNOSTIC_OFFSET, 943 MPI2_DIAG_DIAG_WRITE_ENABLE, MPI2_DIAG_DIAG_WRITE_ENABLE, 944 LMRC_RESET_TIMEOUT)) { 945 dev_err(lmrc->l_dip, CE_WARN, "diag unlock failed"); 946 return (DDI_FAILURE); 947 } 948 949 /* Reset IOC. */ 950 lmrc_write_reg(lmrc, MPI2_HOST_DIAGNOSTIC_OFFSET, 951 lmrc_read_reg(lmrc, MPI2_HOST_DIAGNOSTIC_OFFSET) | 952 MPI2_DIAG_RESET_ADAPTER); 953 delay(drv_usectohz(MPI2_HARD_RESET_PCIE_FIRST_READ_DELAY_MICRO_SEC)); 954 955 /* Check the reset adapter bit. */ 956 if ((lmrc_read_reg(lmrc, MPI2_HOST_DIAGNOSTIC_OFFSET) & 957 MPI2_DIAG_RESET_ADAPTER) == 0) 958 goto out; 959 960 delay(drv_usectohz(MPI2_HARD_RESET_PCIE_SECOND_READ_DELAY_MICRO_SEC)); 961 962 /* Check the reset adapter bit again. */ 963 if ((lmrc_read_reg(lmrc, MPI2_HOST_DIAGNOSTIC_OFFSET) & 964 MPI2_DIAG_RESET_ADAPTER) == 0) 965 goto out; 966 967 ret = DDI_FAILURE; 968 out: 969 lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET, 970 MPI2_WRSEQ_FLUSH_KEY_VALUE); 971 return (ret); 972 } 973 974 /* 975 * lmrc_reset_ctrl 976 * 977 * Attempt to reset the controller, if the hardware supports it. 978 * If reset is unsupported or the reset fails repeatedly, we shut the 979 * controller down. 980 */ 981 static int 982 lmrc_reset_ctrl(lmrc_t *lmrc) 983 { 984 uint32_t status, fw_state, reset_adapter; 985 int max_wait, i; 986 987 if (lmrc->l_disable_online_ctrl_reset) 988 return (DDI_FAILURE); 989 990 status = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD0_OFFSET); 991 fw_state = LMRC_FW_STATE(status); 992 reset_adapter = LMRC_FW_RESET_ADAPTER(status); 993 994 if (fw_state == LMRC_FW_STATE_FAULT && reset_adapter == 0) { 995 dev_err(lmrc->l_dip, CE_WARN, 996 "FW in fault state, but reset not supported"); 997 goto out; 998 } 999 1000 for (i = 0; i < LMRC_MAX_RESET_TRIES; i++) { 1001 dev_err(lmrc->l_dip, CE_WARN, "resetting..."); 1002 1003 if (lmrc_hard_reset(lmrc) != DDI_SUCCESS) 1004 continue; 1005 1006 /* Wait for the FW state to move beyond INIT. */ 1007 max_wait = LMRC_IO_TIMEOUT * MILLISEC / 100; 1008 do { 1009 status = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD0_OFFSET); 1010 fw_state = LMRC_FW_STATE(status); 1011 1012 if (fw_state <= LMRC_FW_STATE_FW_INIT) 1013 delay(drv_usectohz(100 * MILLISEC)); 1014 } while (fw_state <= LMRC_FW_STATE_FW_INIT && max_wait > 0); 1015 1016 if (fw_state <= LMRC_FW_STATE_FW_INIT) { 1017 dev_err(lmrc->l_dip, CE_WARN, 1018 "fw state <= LMRC_FW_STATE_FW_INIT, state = %x", 1019 fw_state); 1020 continue; 1021 } 1022 1023 return (DDI_SUCCESS); 1024 } 1025 1026 dev_err(lmrc->l_dip, CE_WARN, "reset failed"); 1027 out: 1028 /* Stop the controller. */ 1029 lmrc_write_reg(lmrc, MPI2_DOORBELL_OFFSET, MFI_STOP_ADP); 1030 (void) lmrc_read_reg(lmrc, MPI2_DOORBELL_OFFSET); 1031 1032 return (DDI_FAILURE); 1033 } 1034 1035 /* 1036 * lmrc_tgt_complete_cmd 1037 * 1038 * In case of a controller reset, complete the cmd and clean up. This is done 1039 * in a taskq to avoid locking and list manipulation headaches. 1040 */ 1041 static void 1042 lmrc_tgt_complete_cmd(void *arg) 1043 { 1044 lmrc_scsa_cmd_t *cmd = arg; 1045 struct scsi_pkt *pkt; 1046 lmrc_t *lmrc; 1047 1048 mutex_enter(&cmd->sc_mpt->mpt_lock); 1049 1050 /* Just in case the command completed before the taskq was run... */ 1051 if (cmd->sc_mpt->mpt_complete) { 1052 mutex_exit(&cmd->sc_mpt->mpt_lock); 1053 return; 1054 } 1055 1056 lmrc = cmd->sc_mpt->mpt_lmrc; 1057 pkt = cmd->sc_mpt->mpt_pkt; 1058 1059 pkt->pkt_state = STATE_GOT_BUS | STATE_GOT_TARGET | STATE_SENT_CMD; 1060 pkt->pkt_reason = CMD_RESET; 1061 pkt->pkt_statistics = STAT_BUS_RESET; 1062 mutex_exit(&cmd->sc_mpt->mpt_lock); 1063 1064 lmrc_tgt_rem_active_mpt(cmd->sc_tgt, cmd->sc_mpt); 1065 atomic_dec_uint(&lmrc->l_fw_outstanding_cmds); 1066 1067 scsi_hba_pkt_comp(pkt); 1068 } 1069 1070 /* 1071 * lmrc_tgt_complete_cmds 1072 * 1073 * Walk the list of active commands of a target. Schedule a taskq to handle the 1074 * timeout processing and clean up. 1075 */ 1076 static void 1077 lmrc_tgt_complete_cmds(lmrc_t *lmrc, lmrc_tgt_t *tgt) 1078 { 1079 lmrc_mpt_cmd_t *mpt; 1080 1081 mutex_enter(&tgt->tgt_mpt_active_lock); 1082 if (list_is_empty(&tgt->tgt_mpt_active)) { 1083 mutex_exit(&tgt->tgt_mpt_active_lock); 1084 return; 1085 } 1086 1087 for (mpt = lmrc_tgt_first_active_mpt(tgt); 1088 mpt != NULL; 1089 mpt = lmrc_tgt_next_active_mpt(tgt, mpt)) { 1090 lmrc_scsa_cmd_t *cmd = mpt->mpt_pkt->pkt_ha_private; 1091 1092 ASSERT(mutex_owned(&mpt->mpt_lock)); 1093 VERIFY(mpt->mpt_pkt != NULL); 1094 VERIFY(cmd != NULL); 1095 1096 if (mpt->mpt_complete) 1097 continue; 1098 1099 taskq_dispatch_ent(lmrc->l_taskq, lmrc_tgt_complete_cmd, cmd, 1100 TQ_NOSLEEP, &mpt->mpt_tqent); 1101 } 1102 mutex_exit(&tgt->tgt_mpt_active_lock); 1103 } 1104 1105 /* 1106 * lmrc_tgt_timeout_cmds 1107 * 1108 * Walk the list of active commands of a target. Try to abort commands which are 1109 * overdue. 1110 */ 1111 static int 1112 lmrc_tgt_timeout_cmds(lmrc_t *lmrc, lmrc_tgt_t *tgt) 1113 { 1114 lmrc_mpt_cmd_t *mpt; 1115 int ret = DDI_SUCCESS; 1116 1117 mutex_enter(&tgt->tgt_mpt_active_lock); 1118 if (list_is_empty(&tgt->tgt_mpt_active)) 1119 goto out; 1120 1121 for (mpt = lmrc_tgt_first_active_mpt(tgt); 1122 mpt != NULL; 1123 mpt = lmrc_tgt_next_active_mpt(tgt, mpt)) { 1124 hrtime_t now; 1125 1126 ASSERT(mutex_owned(&mpt->mpt_lock)); 1127 VERIFY(mpt->mpt_pkt != NULL); 1128 1129 /* Just in case the command completed by now... */ 1130 if (mpt->mpt_complete) 1131 continue; 1132 1133 now = gethrtime(); 1134 1135 if (now > mpt->mpt_timeout) { 1136 /* 1137 * Give the packet a bit more time for the abort to 1138 * complete. 1139 */ 1140 mpt->mpt_timeout = now + LMRC_IO_TIMEOUT * NANOSEC; 1141 1142 /* 1143 * If the abort failed for whatever reason, 1144 * we can stop here as only a controller reset 1145 * can get us back into a sane state. 1146 */ 1147 if (lmrc_abort_mpt(lmrc, tgt, mpt) != 1) { 1148 mutex_exit(&mpt->mpt_lock); 1149 ret = DDI_FAILURE; 1150 goto out; 1151 } 1152 } 1153 } 1154 1155 out: 1156 mutex_exit(&tgt->tgt_mpt_active_lock); 1157 return (ret); 1158 } 1159 1160 /* 1161 * lmrc_thread 1162 * 1163 * Check whether the controller is FW fault state. Check all targets for 1164 * commands which have timed out. 1165 */ 1166 void 1167 lmrc_thread(void *arg) 1168 { 1169 lmrc_t *lmrc = arg; 1170 1171 do { 1172 int i; 1173 1174 /* Wake up at least once a minute. */ 1175 mutex_enter(&lmrc->l_thread_lock); 1176 (void) cv_reltimedwait(&lmrc->l_thread_cv, &lmrc->l_thread_lock, 1177 drv_usectohz(60 * MICROSEC), TR_SEC); 1178 mutex_exit(&lmrc->l_thread_lock); 1179 1180 if (lmrc->l_thread_stop) 1181 continue; 1182 1183 lmrc->l_fw_fault = lmrc_check_fw_fault(lmrc); 1184 1185 /* 1186 * Check all targets for timed-out commands. If we find any 1187 * and fail to abort them, we pretend the FW has faulted to 1188 * trigger a reset. 1189 */ 1190 if (!lmrc->l_fw_fault) { 1191 for (i = 0; i < ARRAY_SIZE(lmrc->l_targets); i++) { 1192 if (lmrc_tgt_timeout_cmds(lmrc, 1193 &lmrc->l_targets[i]) != DDI_SUCCESS) { 1194 lmrc->l_fw_fault = B_TRUE; 1195 break; 1196 } 1197 } 1198 } 1199 1200 /* 1201 * If the FW is faulted, try to recover by performing a reset. 1202 */ 1203 if (lmrc->l_fw_fault) { 1204 int ret; 1205 1206 lmrc_disable_intr(lmrc); 1207 1208 /* 1209 * Even if the reset failed, it will have stopped the 1210 * controller and we can complete all outstanding 1211 * commands. 1212 */ 1213 ret = lmrc_reset_ctrl(lmrc); 1214 1215 (void) lmrc_abort_outstanding_mfi(lmrc, 1216 LMRC_MAX_MFI_CMDS); 1217 1218 for (i = 0; i < ARRAY_SIZE(lmrc->l_targets); i++) 1219 lmrc_tgt_complete_cmds(lmrc, 1220 &lmrc->l_targets[i]); 1221 1222 if (ret != DDI_SUCCESS) { 1223 dev_err(lmrc->l_dip, CE_WARN, "reset failed"); 1224 continue; 1225 } 1226 1227 if (lmrc_transition_to_ready(lmrc) != DDI_SUCCESS) 1228 continue; 1229 1230 if (lmrc_ioc_init(lmrc) != DDI_SUCCESS) 1231 continue; 1232 1233 lmrc_enable_intr(lmrc); 1234 1235 if (lmrc_start_aen(lmrc) != DDI_SUCCESS) { 1236 dev_err(lmrc->l_dip, CE_WARN, 1237 "failed to re-initiate AEN"); 1238 continue; 1239 } 1240 1241 lmrc->l_fw_fault = lmrc_check_fw_fault(lmrc); 1242 } 1243 } while (!lmrc->l_thread_stop); 1244 1245 thread_exit(); 1246 } 1247 1248 /* 1249 * lmrc_transition_to_ready 1250 * 1251 * Move firmware to ready state. At attach time, the FW can potentially be in 1252 * any one of several possible states. If the FW is in operational, waiting-for- 1253 * handshake states, take steps to bring it to ready state. Otherwise, wait for 1254 * the FW to reach ready state. 1255 */ 1256 static int 1257 lmrc_transition_to_ready(lmrc_t *lmrc) 1258 { 1259 uint32_t status, new_status; 1260 uint32_t fw_state; 1261 uint8_t max_wait; 1262 uint_t i; 1263 1264 status = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD0_OFFSET); 1265 fw_state = LMRC_FW_STATE(status); 1266 max_wait = LMRC_RESET_TIMEOUT; 1267 1268 while (fw_state != LMRC_FW_STATE_READY) { 1269 switch (fw_state) { 1270 case LMRC_FW_STATE_FAULT: 1271 dev_err(lmrc->l_dip, CE_NOTE, "FW is in fault state!"); 1272 if (lmrc_reset_ctrl(lmrc) != DDI_SUCCESS) 1273 return (DDI_FAILURE); 1274 break; 1275 1276 case LMRC_FW_STATE_WAIT_HANDSHAKE: 1277 /* Set the CLR bit in inbound doorbell */ 1278 lmrc_write_reg(lmrc, MPI2_DOORBELL_OFFSET, 1279 MFI_INIT_CLEAR_HANDSHAKE | MFI_INIT_HOTPLUG); 1280 break; 1281 1282 case LMRC_FW_STATE_BOOT_MSG_PENDING: 1283 lmrc_write_reg(lmrc, MPI2_DOORBELL_OFFSET, 1284 MFI_INIT_HOTPLUG); 1285 break; 1286 1287 case LMRC_FW_STATE_OPERATIONAL: 1288 /* Bring it to READY state, wait up to 10s */ 1289 lmrc_disable_intr(lmrc); 1290 lmrc_write_reg(lmrc, MPI2_DOORBELL_OFFSET, 1291 MFI_RESET_FLAGS); 1292 (void) lmrc_wait_for_reg(lmrc, MPI2_DOORBELL_OFFSET, 1, 1293 0, 10); 1294 break; 1295 1296 case LMRC_FW_STATE_UNDEFINED: 1297 /* This state should not last for more than 2 sec */ 1298 case LMRC_FW_STATE_BB_INIT: 1299 case LMRC_FW_STATE_FW_INIT: 1300 case LMRC_FW_STATE_FW_INIT_2: 1301 case LMRC_FW_STATE_DEVICE_SCAN: 1302 case LMRC_FW_STATE_FLUSH_CACHE: 1303 break; 1304 default: 1305 dev_err(lmrc->l_dip, CE_WARN, "Unknown FW state %x", 1306 fw_state); 1307 return (DDI_FAILURE); 1308 } 1309 1310 /* 1311 * The current state should not last for more than max_wait 1312 * seconds. 1313 */ 1314 for (i = 0; i < max_wait * 1000; i++) { 1315 new_status = lmrc_read_reg(lmrc, 1316 MPI26_SCRATCHPAD0_OFFSET); 1317 1318 if (status != new_status) 1319 break; 1320 1321 delay(drv_usectohz(MILLISEC)); 1322 } 1323 1324 if (new_status == status) { 1325 dev_err(lmrc->l_dip, CE_WARN, 1326 "FW state (%x) hasn't changed in %d seconds", 1327 fw_state, max_wait); 1328 return (DDI_FAILURE); 1329 } 1330 1331 status = new_status; 1332 fw_state = LMRC_FW_STATE(status); 1333 } 1334 1335 if (lmrc_check_acc_handle(lmrc->l_reghandle) != DDI_FM_OK) 1336 return (DDI_FAILURE); 1337 1338 return (DDI_SUCCESS); 1339 } 1340 1341 /* 1342 * lmrc_adapter_init 1343 * 1344 * Get the hardware and firmware into a usable state, and fetch some basic 1345 * information from the registers to calculate sizes of basic data structures. 1346 */ 1347 int 1348 lmrc_adapter_init(lmrc_t *lmrc) 1349 { 1350 uint32_t reg; 1351 int ret; 1352 int i; 1353 1354 ret = lmrc_transition_to_ready(lmrc); 1355 if (ret != DDI_SUCCESS) 1356 return (ret); 1357 1358 /* 1359 * Get maximum RAID map size. 1360 */ 1361 reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD2_OFFSET); 1362 lmrc->l_max_raid_map_sz = LMRC_MAX_RAID_MAP_SZ(reg); 1363 1364 lmrc->l_max_reply_queues = 1; 1365 lmrc->l_rphi[0] = MPI2_REPLY_POST_HOST_INDEX_OFFSET; 1366 1367 /* 1368 * Apparently, bit 27 of the scratch pad register indicates whether 1369 * MSI-X is supported by the firmware. 1370 */ 1371 reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD0_OFFSET); 1372 1373 if (LMRC_FW_MSIX_ENABLED(reg)) { 1374 lmrc->l_fw_msix_enabled = B_TRUE; 1375 1376 reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD1_OFFSET); 1377 lmrc->l_max_reply_queues = LMRC_MAX_REPLY_QUEUES_EXT(reg); 1378 1379 if (lmrc->l_max_reply_queues > LMRC_MAX_REPLY_POST_HOST_INDEX) { 1380 lmrc->l_msix_combined = B_TRUE; 1381 lmrc->l_rphi[0] = 1382 MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET; 1383 } 1384 1385 /* 1386 * Compute reply post index register addresses 1-15. 1387 */ 1388 for (i = 1; i < LMRC_MAX_REPLY_POST_HOST_INDEX; i++) { 1389 lmrc->l_rphi[i] = i * 0x10 + 1390 MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET; 1391 } 1392 } 1393 1394 /* 1395 * Get the number of commands the firmware supports. Use one less, 1396 * because reply_q_depth is based on one more than this. XXX: Why? 1397 */ 1398 reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD0_OFFSET); 1399 lmrc->l_max_fw_cmds = LMRC_FW_MAX_CMD(reg) - 1; 1400 1401 if (lmrc->l_max_fw_cmds < LMRC_MAX_MFI_CMDS) { 1402 dev_err(lmrc->l_dip, CE_WARN, "!max_fw_cmds too low: %d", 1403 lmrc->l_max_fw_cmds); 1404 return (DDI_FAILURE); 1405 } 1406 1407 /* 1408 * Reserve some commands for MFI, the remainder is for SCSI commands. 1409 */ 1410 lmrc->l_max_scsi_cmds = lmrc->l_max_fw_cmds - LMRC_MAX_MFI_CMDS; 1411 1412 /* 1413 * XXX: This magic calculation isn't explained anywhere. Let's see... 1414 * lmrc_max_fw_cmds + 1 gives us what was reported in the register, 1415 * That + 15 is for rounding it up the next multiple of 16, which 1416 * / 16 * 16 does. 1417 * And apparently we want twice that much for queue depth. Why? 1418 * 1419 * So in reality, the queue depth is based on at least one more than 1420 * lmrc_max_fw_cmds, but it could be even more. That makes the above 1421 * statement about lmrc_max_fw_cmds questionable. 1422 */ 1423 lmrc->l_reply_q_depth = (lmrc->l_max_fw_cmds + 1 + 15) / 16 * 16 * 2; 1424 1425 /* Allocation size of one reply queue, based on depth. */ 1426 lmrc->l_reply_alloc_sz = 1427 sizeof (Mpi2ReplyDescriptorsUnion_t) * lmrc->l_reply_q_depth; 1428 1429 /* Allocation size of the DMA memory used for all MPI I/O frames. */ 1430 lmrc->l_io_frames_alloc_sz = LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE * 1431 (lmrc->l_max_fw_cmds + 2); 1432 1433 /* 1434 * If LMRC_EXT_CHAIN_SIZE_SUPPORT is set in scratch pad 1, firmware 1435 * supports an extended IO chain frame which is 4 times the size of a 1436 * legacy firmware frame. 1437 * Legacy Firmware frame size is (8 * 128) = 1K 1438 * 1M IO Firmware frame size is (8 * 128 * 4) = 4K 1439 */ 1440 reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD1_OFFSET); 1441 lmrc->l_max_chain_frame_sz = LMRC_MAX_CHAIN_SIZE(reg) * 1442 (LMRC_EXT_CHAIN_SIZE_SUPPORT(reg) ? LMRC_1MB_IO : LMRC_256K_IO); 1443 1444 /* 1445 * Check whether the controller supports DMA to the full 64bit address 1446 * space. 1447 */ 1448 lmrc->l_64bit_dma_support = LMRC_64BIT_DMA_SUPPORT(reg); 1449 1450 /* 1451 * We use a I/O frame size of 256 bytes, that is what 1452 * LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE is set to. 1453 * 1454 * The offset of the SGL in the I/O frame is 128, so 1455 * there are 128 bytes left for 8 SGEs of 16 bytes each. 1456 */ 1457 lmrc->l_max_sge_in_main_msg = 1458 (LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE - 1459 offsetof(Mpi25SCSIIORequest_t, SGL)) / sizeof (Mpi25SGEIOUnion_t); 1460 1461 /* 1462 * Similarly, number of SGE in a SGE chain frame. 1463 */ 1464 lmrc->l_max_sge_in_chain = 1465 lmrc->l_max_chain_frame_sz / sizeof (Mpi25SGEIOUnion_t); 1466 1467 /* 1468 * The total number of SGE we support in a transfer is sum of 1469 * the above two, minus one for the link (last SGE in main msg). 1470 * 1471 * XXX: So why -2? 1472 */ 1473 lmrc->l_max_num_sge = 1474 lmrc->l_max_sge_in_main_msg + lmrc->l_max_sge_in_chain - 2; 1475 1476 /* 1477 * The offset of the last SGE in the I/O request, used for linking 1478 * the SGE chain frame if necessary. 1479 */ 1480 lmrc->l_chain_offset_io_request = 1481 (LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE - 1482 sizeof (Mpi25SGEIOUnion_t)) / sizeof (Mpi25SGEIOUnion_t); 1483 1484 /* 1485 * For MFI passthru, the link to the SGE chain frame is always 1486 * the first SGE in the I/O frame, the other SGEs in the I/O frame 1487 * will not be used. 1488 */ 1489 lmrc->l_chain_offset_mfi_pthru = 1490 offsetof(Mpi25SCSIIORequest_t, SGL) / sizeof (Mpi25SGEIOUnion_t); 1491 1492 1493 reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD3_OFFSET); 1494 if (LMRC_NVME_PAGE_SHIFT(reg) > LMRC_DEFAULT_NVME_PAGE_SHIFT) { 1495 lmrc->l_nvme_page_sz = 1 << LMRC_NVME_PAGE_SHIFT(reg); 1496 dev_err(lmrc->l_dip, CE_NOTE, "!NVME page size: %ld", 1497 lmrc->l_nvme_page_sz); 1498 } 1499 1500 reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD1_OFFSET); 1501 lmrc->l_fw_sync_cache_support = LMRC_SYNC_CACHE_SUPPORT(reg); 1502 1503 if (lmrc->l_class == LMRC_ACLASS_AERO) { 1504 reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD1_OFFSET); 1505 lmrc->l_atomic_desc_support = 1506 LMRC_ATOMIC_DESCRIPTOR_SUPPORT(reg); 1507 } 1508 1509 return (DDI_SUCCESS); 1510 } 1511 1512 /* 1513 * lmrc_ioc_init 1514 * 1515 * Manually build a MFI IOC INIT command to setup basic operating parameters 1516 * such as the DMA parameters for the I/O request frames and the reply post 1517 * queues. Send the IOC INIT command using a special request descriptor which 1518 * directly includes the physical address of the MFI command frame. 1519 * 1520 * After this command completes, the controller is ready to accept MPT commands 1521 * using the normal method of placing it in the I/O request DMA memory and 1522 * writing a MPT request descripter to the appropriate registers. 1523 */ 1524 int 1525 lmrc_ioc_init(lmrc_t *lmrc) 1526 { 1527 lmrc_mfi_cmd_t *mfi = lmrc_get_mfi(lmrc); 1528 mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr; 1529 mfi_init_payload_t *init = &mfi->mfi_frame->mf_init; 1530 lmrc_req_desc_t req_desc; 1531 Mpi2IOCInitRequest_t *IOCInitMsg; 1532 lmrc_dma_t dma; 1533 int ret = DDI_SUCCESS; 1534 1535 ret = lmrc_dma_alloc(lmrc, lmrc->l_dma_attr, &dma, 1536 sizeof (Mpi2IOCInitRequest_t), 256, DDI_DMA_CONSISTENT); 1537 if (ret != DDI_SUCCESS) { 1538 lmrc_put_mfi(mfi); 1539 dev_err(lmrc->l_dip, CE_WARN, 1540 "!%s: failed to allocate IOC command", __func__); 1541 return (DDI_FAILURE); 1542 } 1543 1544 IOCInitMsg = dma.ld_buf; 1545 IOCInitMsg->Function = MPI2_FUNCTION_IOC_INIT; 1546 IOCInitMsg->WhoInit = MPI2_WHOINIT_HOST_DRIVER; 1547 IOCInitMsg->MsgVersion = MPI2_VERSION; 1548 IOCInitMsg->HeaderVersion = MPI2_HEADER_VERSION; 1549 IOCInitMsg->SystemRequestFrameSize = 1550 LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE / 4; 1551 IOCInitMsg->ReplyDescriptorPostQueueDepth = lmrc->l_reply_q_depth; 1552 lmrc_dma_set_addr64(&lmrc->l_reply_dma, 1553 (uint64_t *)&IOCInitMsg->ReplyDescriptorPostQueueAddress); 1554 lmrc_dma_set_addr64(&lmrc->l_ioreq_dma, 1555 (uint64_t *)&IOCInitMsg->SystemRequestFrameBaseAddress); 1556 IOCInitMsg->HostMSIxVectors = lmrc->l_max_reply_queues; 1557 /* XXX: Why NVMe? */ 1558 IOCInitMsg->HostPageSize = LMRC_DEFAULT_NVME_PAGE_SHIFT; 1559 1560 hdr->mh_cmd = MFI_CMD_INIT; 1561 hdr->mh_cmd_status = MFI_STAT_INVALID_STATUS; 1562 hdr->mh_flags = MFI_FRAME_DONT_POST_IN_REPLY_QUEUE; 1563 1564 hdr->mh_drv_opts.mc_support_additional_msix = 1; 1565 hdr->mh_drv_opts.mc_support_max_255lds = 1; 1566 hdr->mh_drv_opts.mc_support_ndrive_r1_lb = 1; 1567 hdr->mh_drv_opts.mc_support_security_protocol_cmds_fw = 1; 1568 hdr->mh_drv_opts.mc_support_ext_io_size = 1; 1569 1570 hdr->mh_data_xfer_len = lmrc_dma_get_size(&dma); 1571 1572 lmrc_dma_set_addr64(&dma, &init->mi_queue_info_new_phys_addr); 1573 1574 lmrc_dma_set_addr64(&mfi->mfi_frame_dma, &req_desc.rd_reg); 1575 VERIFY0(req_desc.rd_mfa_io.RequestFlags); 1576 req_desc.rd_mfa_io.RequestFlags = LMRC_REQ_DESCRIPT_FLAGS_MFA; 1577 1578 lmrc_disable_intr(lmrc); 1579 if (!lmrc_wait_for_reg(lmrc, MPI2_DOORBELL_OFFSET, 1, 0, 10)) 1580 return (DDI_FAILURE); 1581 1582 (void) ddi_dma_sync(dma.ld_hdl, 0, dma.ld_len, DDI_DMA_SYNC_FORDEV); 1583 (void) ddi_dma_sync(mfi->mfi_frame_dma.ld_hdl, 0, 1584 mfi->mfi_frame_dma.ld_len, DDI_DMA_SYNC_FORDEV); 1585 1586 lmrc_send_request(lmrc, req_desc); 1587 1588 mutex_enter(&mfi->mfi_lock); 1589 ret = lmrc_poll_mfi(lmrc, mfi, LMRC_INTERNAL_CMD_WAIT_TIME); 1590 mutex_exit(&mfi->mfi_lock); 1591 1592 if (ret != DDI_SUCCESS) { 1593 if (hdr->mh_cmd_status != MFI_STAT_INVALID_STATUS) 1594 dev_err(lmrc->l_dip, CE_WARN, 1595 "!IOC Init failed, status = 0x%x", 1596 hdr->mh_cmd_status); 1597 } 1598 1599 lmrc_dma_free(&dma); 1600 lmrc_put_mfi(mfi); 1601 1602 return (ret); 1603 } 1604 1605 /* 1606 * lmrc_get_ctrl_info 1607 * 1608 * Build a MFI DCMD to get controller information from FW. Update the copy in 1609 * the soft state. 1610 */ 1611 static int 1612 lmrc_get_ctrl_info(lmrc_t *lmrc) 1613 { 1614 mfi_ctrl_info_t *ci = lmrc->l_ctrl_info; 1615 lmrc_mfi_cmd_t *mfi; 1616 int ret; 1617 1618 mfi = lmrc_get_dcmd(lmrc, MFI_FRAME_DIR_READ, MFI_DCMD_CTRL_GET_INFO, 1619 sizeof (mfi_ctrl_info_t), 1); 1620 1621 if (mfi == NULL) 1622 return (DDI_FAILURE); 1623 1624 ret = lmrc_issue_blocked_mfi(lmrc, mfi); 1625 1626 if (ret != DDI_SUCCESS) 1627 goto out; 1628 1629 (void) ddi_dma_sync(mfi->mfi_data_dma.ld_hdl, 0, 1630 mfi->mfi_data_dma.ld_len, DDI_DMA_SYNC_FORKERNEL); 1631 bcopy(mfi->mfi_data_dma.ld_buf, ci, sizeof (mfi_ctrl_info_t)); 1632 1633 out: 1634 lmrc_put_dcmd(lmrc, mfi); 1635 return (ret); 1636 } 1637 1638 /* 1639 * lmrc_fw_init 1640 * 1641 * Complete firmware initialization. At this point, we can already send MFI 1642 * commands. so we can start by getting the controller information from the 1643 * firmware and set up things in our soft state. Next we issue the commands 1644 * to get the PD map and RAID map, which will complete asynchronously when 1645 * new information is available and then re-send themselves. 1646 */ 1647 int 1648 lmrc_fw_init(lmrc_t *lmrc) 1649 { 1650 int drv_max_lds = MFI_MAX_LOGICAL_DRIVES; 1651 mfi_ctrl_info_t *ci = lmrc->l_ctrl_info; 1652 int ret; 1653 1654 ret = lmrc_get_ctrl_info(lmrc); 1655 if (ret != DDI_SUCCESS) { 1656 dev_err(lmrc->l_dip, CE_WARN, "!Unable to get FW ctrl info."); 1657 return (DDI_FAILURE); 1658 } 1659 1660 lmrc->l_disable_online_ctrl_reset = 1661 ci->ci_prop.cp_disable_online_ctrl_reset == 1; 1662 1663 lmrc->l_max_256_vd_support = 1664 ci->ci_adapter_opts3.ao3_support_max_ext_lds == 1; 1665 1666 if (ci->ci_max_lds > 64) { 1667 lmrc->l_max_256_vd_support = B_TRUE; 1668 drv_max_lds = LMRC_MAX_LOGICAL_DRIVES_EXT; 1669 } 1670 1671 lmrc->l_fw_supported_vd_count = min(ci->ci_max_lds, drv_max_lds); 1672 1673 lmrc->l_fw_supported_pd_count = min(ci->ci_max_pds, 1674 MFI_MAX_PHYSICAL_DRIVES); 1675 1676 lmrc->l_max_map_sz = lmrc->l_current_map_sz = 1677 lmrc->l_max_raid_map_sz * LMRC_MIN_MAP_SIZE; 1678 1679 lmrc->l_use_seqnum_jbod_fp = 1680 ci->ci_adapter_opts3.ao3_use_seq_num_jbod_FP != 0; 1681 1682 lmrc->l_pdmap_tgtid_support = 1683 ci->ci_adapter_opts4.ao4_support_pd_map_target_id != 0; 1684 1685 return (DDI_SUCCESS); 1686 } 1687 1688 1689 /* 1690 * lmrc_ctrl_shutdown 1691 * 1692 * Called by lmrc_quiesce() to send a shutdown command to the controller. 1693 * Cannot use locks, therefore cannot use lmrc_get_dcmd() or lmrc_get_mfi(). 1694 */ 1695 int 1696 lmrc_ctrl_shutdown(lmrc_t *lmrc) 1697 { 1698 lmrc_mfi_cmd_t *mfi = list_remove_head(&lmrc->l_mfi_cmd_list); 1699 mfi_header_t *hdr; 1700 mfi_dcmd_payload_t *dcmd; 1701 1702 if (mfi == NULL) 1703 return (DDI_FAILURE); 1704 1705 hdr = &mfi->mfi_frame->mf_hdr; 1706 dcmd = &mfi->mfi_frame->mf_dcmd; 1707 1708 hdr->mh_cmd = MFI_CMD_DCMD; 1709 hdr->mh_flags = MFI_FRAME_DONT_POST_IN_REPLY_QUEUE; 1710 dcmd->md_opcode = MFI_DCMD_CTRL_SHUTDOWN; 1711 1712 lmrc_disable_intr(lmrc); 1713 lmrc_issue_mfi(lmrc, mfi, NULL); 1714 1715 return (DDI_SUCCESS); 1716 } 1717 1718 /* 1719 * driver target state management 1720 * 1721 * The soft state of the controller instance keeps a pre-allocated array of 1722 * target structures for all possible targets, even though only a small number 1723 * of them are likely to be used. Each target structure contains back link to 1724 * the soft state and a mutex, which are never cleared or changed when a target 1725 * is added or removed. 1726 */ 1727 1728 /* 1729 * lmrc_tgt_init 1730 * 1731 * Initialize the tgt structure for a newly discovered tgt. The same tgt 1732 * structure is used for PDs and LDs, the distinction can be made by the 1733 * presence or absence of tgt_pd_info. LDs are always of type disk, the type 1734 * of PDs is taken from their pd_info. If a device has no SAS WWN, we'll fake 1735 * the interconnect type to be PARALLEL to make sure device address isn't 1736 * misunderstood as a WWN by devfsadm. 1737 */ 1738 void 1739 lmrc_tgt_init(lmrc_tgt_t *tgt, uint16_t dev_id, char *addr, 1740 mfi_pd_info_t *pd_info) 1741 { 1742 rw_enter(&tgt->tgt_lock, RW_WRITER); 1743 1744 bzero(&tgt->tgt_dev_id, 1745 sizeof (lmrc_tgt_t) - offsetof(lmrc_tgt_t, tgt_dev_id)); 1746 1747 tgt->tgt_dev_id = dev_id; 1748 tgt->tgt_pd_info = pd_info; 1749 tgt->tgt_interconnect_type = INTERCONNECT_SAS; 1750 1751 if (pd_info == NULL) { 1752 tgt->tgt_type = DTYPE_DIRECT; 1753 } else { 1754 tgt->tgt_type = pd_info->pd_scsi_dev_type; 1755 } 1756 1757 (void) strlcpy(tgt->tgt_wwnstr, addr, sizeof (tgt->tgt_wwnstr)); 1758 if (scsi_wwnstr_to_wwn(tgt->tgt_wwnstr, &tgt->tgt_wwn) != DDI_SUCCESS) { 1759 tgt->tgt_interconnect_type = INTERCONNECT_PARALLEL; 1760 tgt->tgt_wwn = dev_id; 1761 } 1762 1763 rw_exit(&tgt->tgt_lock); 1764 } 1765 1766 /* 1767 * lmrc_tgt_clear 1768 * 1769 * Reset the tgt structure of a target which is no longer present. 1770 */ 1771 void 1772 lmrc_tgt_clear(lmrc_tgt_t *tgt) 1773 { 1774 rw_enter(&tgt->tgt_lock, RW_WRITER); 1775 1776 if (tgt->tgt_pd_info != NULL) 1777 kmem_free(tgt->tgt_pd_info, sizeof (mfi_pd_info_t)); 1778 1779 bzero(&tgt->tgt_dev_id, 1780 sizeof (lmrc_tgt_t) - offsetof(lmrc_tgt_t, tgt_dev_id)); 1781 tgt->tgt_dev_id = LMRC_DEVHDL_INVALID; 1782 rw_exit(&tgt->tgt_lock); 1783 } 1784 1785 /* 1786 * lmrc_tgt_find 1787 * 1788 * Walk the target list and find a tgt matching the given scsi_device. 1789 * Return the tgt read-locked. The targets_lock mutex must be held the 1790 * whole time. 1791 */ 1792 lmrc_tgt_t * 1793 lmrc_tgt_find(lmrc_t *lmrc, struct scsi_device *sd) 1794 { 1795 const char *ua = scsi_device_unit_address(sd); 1796 char *comma, wwnstr[SCSI_WWN_BUFLEN]; 1797 uint64_t wwn; 1798 unsigned long tgtid; 1799 lmrc_tgt_t *tgt; 1800 size_t i; 1801 1802 VERIFY(ua != NULL); 1803 1804 (void) strlcpy(wwnstr, ua, sizeof (wwnstr)); 1805 1806 /* 1807 * If the unit address is a valid target ID and within range for 1808 * VD IDs, use that. 1809 */ 1810 if (ddi_strtoul(wwnstr, &comma, 10, &tgtid) == 0 && 1811 *comma == ',' && 1812 tgtid <= lmrc->l_fw_supported_vd_count) { 1813 tgt = &lmrc->l_targets[tgtid]; 1814 1815 rw_enter(&tgt->tgt_lock, RW_READER); 1816 if (tgt->tgt_dev_id == tgtid && 1817 tgt->tgt_wwn == tgtid) { 1818 return (tgt); 1819 } 1820 rw_exit(&tgt->tgt_lock); 1821 } 1822 1823 /* Chop off ",lun" as scsi_wwnstr_to_wwn() can't handle it. */ 1824 comma = strchr(wwnstr, ','); 1825 if (comma != NULL) 1826 *comma = '\0'; 1827 1828 /* Else, if unit address is a valid WWN, look for that. */ 1829 if (scsi_wwnstr_to_wwn(wwnstr, &wwn) == DDI_SUCCESS) { 1830 for (i = 0; i < ARRAY_SIZE(lmrc->l_targets); i++) { 1831 tgt = &lmrc->l_targets[i]; 1832 1833 rw_enter(&tgt->tgt_lock, RW_READER); 1834 if (tgt->tgt_wwn == wwn) { 1835 return (tgt); 1836 } 1837 rw_exit(&tgt->tgt_lock); 1838 } 1839 } else { 1840 /* Do it the hard way and compare wwnstr. */ 1841 for (i = 0; i < ARRAY_SIZE(lmrc->l_targets); i++) { 1842 tgt = &lmrc->l_targets[i]; 1843 1844 rw_enter(&tgt->tgt_lock, RW_READER); 1845 if (strcmp(tgt->tgt_wwnstr, wwnstr) == 0) { 1846 return (tgt); 1847 } 1848 rw_exit(&tgt->tgt_lock); 1849 } 1850 } 1851 1852 return (NULL); 1853 } 1854 1855 /* 1856 * MPT/MFI command management 1857 * 1858 * For each kind of command, MFI and MPT, the driver keeps an array of pre- 1859 * allocated and pre-initialized commands. Additionally, it keeps two lists of 1860 * currently unused commands. A set of functions is provided for each list to 1861 * get and put commands from/to the list. Commands are initialized during get(), 1862 * because having completed commands on the list can help in certain cases 1863 * during debugging. 1864 * 1865 * MPT commands in use for I/O are kept on a active command list of the target 1866 * they are addressing. All other types of commands are not kept on any list 1867 * while they are being processed by the hardware. When walking the command 1868 * arrays, busy commands not associated with a target can be distinguished by 1869 * not being linked on any list. 1870 */ 1871 1872 /* 1873 * lmrc_get_mpt 1874 * 1875 * Get a MPT command from the list and initialize it. Return the command locked. 1876 * Return NULL if the MPT command list is empty. 1877 */ 1878 lmrc_mpt_cmd_t * 1879 lmrc_get_mpt(lmrc_t *lmrc) 1880 { 1881 lmrc_mpt_cmd_t *mpt; 1882 Mpi25SCSIIORequest_t *io_req; 1883 1884 mutex_enter(&lmrc->l_mpt_cmd_lock); 1885 mpt = list_remove_head(&lmrc->l_mpt_cmd_list); 1886 mutex_exit(&lmrc->l_mpt_cmd_lock); 1887 if (mpt == NULL) 1888 return (NULL); 1889 1890 mutex_enter(&mpt->mpt_lock); 1891 bzero(mpt->mpt_io_frame, LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE); 1892 bzero(mpt->mpt_chain_dma.ld_buf, mpt->mpt_chain_dma.ld_len); 1893 bzero(mpt->mpt_sense_dma.ld_buf, mpt->mpt_sense_dma.ld_len); 1894 1895 mpt->mpt_mfi = NULL; 1896 mpt->mpt_pkt = NULL; 1897 1898 /* Set the offset of the SGL entries inside the MPT command. */ 1899 io_req = mpt->mpt_io_frame; 1900 io_req->SGLOffset0 = offsetof(Mpi25SCSIIORequest_t, SGL) / 4; 1901 1902 mpt->mpt_complete = B_FALSE; 1903 cv_init(&mpt->mpt_cv, NULL, CV_DRIVER, NULL); 1904 1905 return (mpt); 1906 } 1907 1908 /* 1909 * lmrc_put_mpt 1910 * 1911 * Put a MPT command back on the list. The command lock must be held when this 1912 * function is called, being unlocked only after the command has been put on 1913 * the free list. The command CV is destroyed, thereby asserting that no one is 1914 * still waiting on it. 1915 */ 1916 void 1917 lmrc_put_mpt(lmrc_mpt_cmd_t *mpt) 1918 { 1919 lmrc_t *lmrc = mpt->mpt_lmrc; 1920 1921 VERIFY(lmrc != NULL); 1922 1923 ASSERT0(list_link_active(&mpt->mpt_node)); 1924 ASSERT(mutex_owned(&mpt->mpt_lock)); 1925 cv_destroy(&mpt->mpt_cv); 1926 1927 mutex_enter(&lmrc->l_mpt_cmd_lock); 1928 list_insert_tail(&lmrc->l_mpt_cmd_list, mpt); 1929 mutex_exit(&lmrc->l_mpt_cmd_lock); 1930 mutex_exit(&mpt->mpt_lock); 1931 } 1932 1933 /* 1934 * lmrc_get_mfi 1935 * 1936 * Get a MFI command from the list and initialize it. 1937 */ 1938 lmrc_mfi_cmd_t * 1939 lmrc_get_mfi(lmrc_t *lmrc) 1940 { 1941 lmrc_mfi_cmd_t *mfi; 1942 1943 mutex_enter(&lmrc->l_mfi_cmd_lock); 1944 mfi = list_remove_head(&lmrc->l_mfi_cmd_list); 1945 mutex_exit(&lmrc->l_mfi_cmd_lock); 1946 VERIFY(mfi != NULL); 1947 1948 mutex_enter(&mfi->mfi_lock); 1949 bzero(mfi->mfi_frame, sizeof (mfi_frame_t)); 1950 mfi->mfi_frame->mf_hdr.mh_context = mfi->mfi_idx; 1951 mfi->mfi_callback = NULL; 1952 1953 cv_init(&mfi->mfi_cv, NULL, CV_DRIVER, NULL); 1954 mutex_exit(&mfi->mfi_lock); 1955 1956 return (mfi); 1957 } 1958 1959 /* 1960 * lmrc_put_mfi 1961 * 1962 * Put a MFI command back on the list. Destroy the CV, thereby 1963 * asserting that no one is waiting on it. 1964 */ 1965 void 1966 lmrc_put_mfi(lmrc_mfi_cmd_t *mfi) 1967 { 1968 lmrc_t *lmrc = mfi->mfi_lmrc; 1969 1970 VERIFY(lmrc != NULL); 1971 1972 ASSERT0(list_link_active(&mfi->mfi_node)); 1973 1974 mutex_enter(&mfi->mfi_lock); 1975 1976 cv_destroy(&mfi->mfi_cv); 1977 1978 mutex_enter(&lmrc->l_mfi_cmd_lock); 1979 list_insert_tail(&lmrc->l_mfi_cmd_list, mfi); 1980 mutex_exit(&lmrc->l_mfi_cmd_lock); 1981 mutex_exit(&mfi->mfi_lock); 1982 } 1983 1984 /* 1985 * lmrc_abort_outstanding_mfi 1986 * 1987 * Walk the MFI cmd array and abort each command which is still outstanding, 1988 * which is indicated by not being linked on l_mfi_cmd_list. 1989 * 1990 * As a special case, if the FW is in fault state, just call each commands 1991 * completion callback. 1992 */ 1993 int 1994 lmrc_abort_outstanding_mfi(lmrc_t *lmrc, const size_t ncmd) 1995 { 1996 int ret; 1997 int i; 1998 1999 for (i = 0; i < ncmd; i++) { 2000 lmrc_mfi_cmd_t *mfi = lmrc->l_mfi_cmds[i]; 2001 2002 mutex_enter(&mfi->mfi_lock); 2003 if (list_link_active(&mfi->mfi_node)) { 2004 mutex_exit(&mfi->mfi_lock); 2005 continue; 2006 } 2007 2008 /* 2009 * If the FW is faulted, wake up anyone waiting on the command 2010 * to clean it up. 2011 */ 2012 if (lmrc->l_fw_fault) { 2013 if (mfi->mfi_callback != NULL) 2014 mfi->mfi_callback(lmrc, mfi); 2015 mutex_exit(&mfi->mfi_lock); 2016 continue; 2017 } 2018 2019 ret = lmrc_abort_cmd(lmrc, mfi); 2020 mutex_exit(&mfi->mfi_lock); 2021 if (ret != DDI_SUCCESS) 2022 return (ret); 2023 2024 lmrc_dma_free(&mfi->mfi_data_dma); 2025 lmrc_put_mfi(mfi); 2026 } 2027 2028 return (DDI_SUCCESS); 2029 } 2030 2031 /* 2032 * lmrc_get_dcmd 2033 * 2034 * Build a MFI DCMD with DMA memory for data transfers. 2035 */ 2036 lmrc_mfi_cmd_t * 2037 lmrc_get_dcmd(lmrc_t *lmrc, uint16_t flags, uint32_t opcode, uint32_t xferlen, 2038 uint_t align) 2039 { 2040 lmrc_mfi_cmd_t *mfi = lmrc_get_mfi(lmrc); 2041 mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr; 2042 mfi_dcmd_payload_t *dcmd = &mfi->mfi_frame->mf_dcmd; 2043 lmrc_dma_t *dma = &mfi->mfi_data_dma; 2044 int ret; 2045 2046 hdr->mh_cmd = MFI_CMD_DCMD; 2047 hdr->mh_flags = flags; 2048 2049 dcmd->md_opcode = opcode; 2050 2051 if ((flags & MFI_FRAME_DIR_READ) != 0 || 2052 (flags & MFI_FRAME_DIR_WRITE) != 0) { 2053 ret = lmrc_dma_alloc(lmrc, lmrc->l_dma_attr, dma, xferlen, 2054 align, DDI_DMA_CONSISTENT); 2055 if (ret != DDI_SUCCESS) { 2056 lmrc_put_mfi(mfi); 2057 return (NULL); 2058 } 2059 2060 hdr->mh_flags |= MFI_FRAME_SGL64; 2061 hdr->mh_sge_count = 1; 2062 hdr->mh_data_xfer_len = lmrc_dma_get_size(dma); 2063 2064 dcmd->md_sgl.ms64_length = lmrc_dma_get_size(dma); 2065 lmrc_dma_set_addr64(dma, &dcmd->md_sgl.ms64_phys_addr); 2066 } 2067 2068 return (mfi); 2069 } 2070 2071 /* 2072 * lmrc_put_dcmd 2073 * 2074 * Free the DMA memory of a MFI DCMD and return the command back on the list. 2075 */ 2076 void 2077 lmrc_put_dcmd(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi) 2078 { 2079 lmrc_dma_free(&mfi->mfi_data_dma); 2080 lmrc_put_mfi(mfi); 2081 } 2082 2083 2084 /* 2085 * Asynchronous Event Notifications 2086 */ 2087 /* 2088 * lmrc_get_event_log_info 2089 * 2090 * Get the Event Log Info from the firmware. 2091 */ 2092 static int 2093 lmrc_get_event_log_info(lmrc_t *lmrc, mfi_evt_log_info_t *eli) 2094 { 2095 lmrc_mfi_cmd_t *mfi; 2096 int ret; 2097 2098 mfi = lmrc_get_dcmd(lmrc, MFI_FRAME_DIR_READ, 2099 MFI_DCMD_CTRL_EVENT_GET_INFO, sizeof (mfi_evt_log_info_t), 1); 2100 2101 if (mfi == NULL) 2102 return (DDI_FAILURE); 2103 2104 ret = lmrc_issue_blocked_mfi(lmrc, mfi); 2105 2106 if (ret != DDI_SUCCESS) 2107 goto out; 2108 2109 bcopy(mfi->mfi_data_dma.ld_buf, eli, sizeof (mfi_evt_log_info_t)); 2110 2111 out: 2112 lmrc_put_dcmd(lmrc, mfi); 2113 return (ret); 2114 } 2115 2116 /* 2117 * lmrc_aen_handler 2118 * 2119 * Check the event code and handle it as needed. In the case of PD or LD related 2120 * events, invoke their special handlers. 2121 */ 2122 static void 2123 lmrc_aen_handler(void *arg) 2124 { 2125 lmrc_mfi_cmd_t *mfi = arg; 2126 lmrc_t *lmrc = mfi->mfi_lmrc; 2127 mfi_evt_detail_t *evt = mfi->mfi_data_dma.ld_buf; 2128 mfi_dcmd_payload_t *dcmd = &mfi->mfi_frame->mf_dcmd; 2129 int ret = DDI_FAILURE; 2130 2131 /* Controller & Configuration specific events */ 2132 switch (evt->evt_code) { 2133 case MFI_EVT_CFG_CLEARED: 2134 case MFI_EVT_CTRL_HOST_BUS_SCAN_REQD: 2135 case MFI_EVT_FOREIGN_CFG_IMPORTED: 2136 ret = lmrc_get_pd_list(lmrc); 2137 if (ret != DDI_SUCCESS) 2138 break; 2139 2140 ret = lmrc_get_ld_list(lmrc); 2141 break; 2142 2143 case MFI_EVT_CTRL_PROP_CHANGED: 2144 ret = lmrc_get_ctrl_info(lmrc); 2145 break; 2146 2147 case MFI_EVT_CTRL_PATROL_READ_START: 2148 case MFI_EVT_CTRL_PATROL_READ_RESUMED: 2149 case MFI_EVT_CTRL_PATROL_READ_COMPLETE: 2150 case MFI_EVT_CTRL_PATROL_READ_CANT_START: 2151 case MFI_EVT_CTRL_PERF_COLLECTION: 2152 case MFI_EVT_CTRL_BOOTDEV_SET: 2153 case MFI_EVT_CTRL_BOOTDEV_RESET: 2154 case MFI_EVT_CTRL_PERSONALITY_CHANGE: 2155 case MFI_EVT_CTRL_PERSONALITY_CHANGE_PEND: 2156 case MFI_EVT_CTRL_NR_OF_VALID_SNAPDUMP: 2157 break; 2158 2159 default: 2160 /* LD-specific events */ 2161 if ((evt->evt_cl.evt_locale & MFI_EVT_LOCALE_LD) != 0) 2162 ret = lmrc_raid_aen_handler(lmrc, evt); 2163 2164 /* PD-specific events */ 2165 else if ((evt->evt_cl.evt_locale & MFI_EVT_LOCALE_PD) != 0) 2166 ret = lmrc_phys_aen_handler(lmrc, evt); 2167 2168 if (ret != DDI_SUCCESS) { 2169 dev_err(lmrc->l_dip, CE_NOTE, "!unknown AEN received, " 2170 "seqnum = %d, timestamp = %d, code = %x, " 2171 "locale = %x, class = %d, argtype = %d", 2172 evt->evt_seqnum, evt->evt_timestamp, evt->evt_code, 2173 evt->evt_cl.evt_locale, evt->evt_cl.evt_class, 2174 evt->evt_argtype); 2175 } 2176 } 2177 2178 dev_err(lmrc->l_dip, CE_NOTE, "!%s", evt->evt_descr); 2179 2180 /* 2181 * Just reuse the command in its entirety. Increase the sequence 2182 * number. 2183 */ 2184 dcmd->md_mbox_32[0] = evt->evt_seqnum + 1; 2185 mutex_enter(&mfi->mfi_lock); 2186 lmrc_issue_mfi(lmrc, mfi, lmrc_complete_aen); 2187 mutex_exit(&mfi->mfi_lock); 2188 } 2189 2190 /* 2191 * lmrc_complete_aen 2192 * 2193 * An AEN was received, so schedule a taskq to process it. 2194 */ 2195 static void 2196 lmrc_complete_aen(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi) 2197 { 2198 mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr; 2199 2200 ASSERT(mutex_owned(&mfi->mfi_lock)); 2201 2202 if (hdr->mh_cmd_status != MFI_STAT_OK) { 2203 /* Was the command aborted? */ 2204 if (hdr->mh_cmd_status == MFI_STAT_NOT_FOUND) 2205 return; 2206 2207 dev_err(lmrc->l_dip, CE_WARN, 2208 "!AEN failed, status = %d", 2209 hdr->mh_cmd_status); 2210 taskq_dispatch_ent(lmrc->l_taskq, (task_func_t *)lmrc_put_mfi, 2211 mfi, TQ_NOSLEEP, &mfi->mfi_tqent); 2212 return; 2213 } 2214 2215 taskq_dispatch_ent(lmrc->l_taskq, lmrc_aen_handler, mfi, TQ_NOSLEEP, 2216 &mfi->mfi_tqent); 2217 } 2218 2219 /* 2220 * lmrc_register_aen 2221 * 2222 * In FreeBSD, this function checks for an existing AEN. If its class and locale 2223 * already include what is requested here they just return. In the other case, 2224 * the existing AEN is aborted and a new one is created, which includes 2225 * the previous locale and class and new ones. 2226 * 2227 * Given that the driver (same as in FreeBSD) calls this function during attach 2228 * to create an AEN with LOCALE_ALL and CLASS_DEBUG, all of this would be dead 2229 * code anyway. 2230 */ 2231 static int 2232 lmrc_register_aen(lmrc_t *lmrc, uint32_t seqnum) 2233 { 2234 lmrc_mfi_cmd_t *mfi; 2235 mfi_dcmd_payload_t *dcmd; 2236 2237 mfi = lmrc_get_dcmd(lmrc, MFI_FRAME_DIR_READ, MFI_DCMD_CTRL_EVENT_WAIT, 2238 sizeof (mfi_evt_detail_t), 1); 2239 2240 if (mfi == NULL) 2241 return (DDI_FAILURE); 2242 2243 dcmd = &mfi->mfi_frame->mf_dcmd; 2244 dcmd->md_mbox_32[0] = seqnum; 2245 dcmd->md_mbox_16[2] = MFI_EVT_LOCALE_ALL; 2246 dcmd->md_mbox_8[7] = MFI_EVT_CLASS_DEBUG; 2247 2248 mutex_enter(&mfi->mfi_lock); 2249 lmrc_issue_mfi(lmrc, mfi, lmrc_complete_aen); 2250 mutex_exit(&mfi->mfi_lock); 2251 2252 return (DDI_SUCCESS); 2253 } 2254 2255 /* 2256 * lmrc_start_aen 2257 * 2258 * Set up and enable AEN processing. 2259 */ 2260 int 2261 lmrc_start_aen(lmrc_t *lmrc) 2262 { 2263 mfi_evt_log_info_t eli; 2264 int ret; 2265 2266 bzero(&eli, sizeof (eli)); 2267 2268 /* Get the latest sequence number from the Event Log Info. */ 2269 ret = lmrc_get_event_log_info(lmrc, &eli); 2270 if (ret != DDI_SUCCESS) 2271 return (ret); 2272 2273 /* Register AEN with FW for latest sequence number + 1. */ 2274 ret = lmrc_register_aen(lmrc, eli.eli_newest_seqnum + 1); 2275 return (ret); 2276 } 2277