xref: /illumos-gate/usr/src/uts/common/io/scsi/adapters/lmrc/lmrc.c (revision 2833423dc59f4c35fe4713dbb942950c82df0437)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2024 Racktop Systems, Inc.
14  */
15 
16 /*
17  * This file implements the interfaces for communicating with the MegaRAID HBA.
18  * There are three basic interfaces:
19  * - the device registers, which provide basic information about the controller
20  *   hardware and the features it supports, as well as control registers used
21  *   during sending and reception of I/O frames
22  * - Fusion-MPT v2.5, perhaps later, which defines the format of the I/O frames
23  *   used for communicating with the HBA and virtual and physical devices that
24  *   are attached to it
25  * - MFI, the MegaRAID Firmware Interface, which are sent and received as MPT
26  *   payloads to control and communicate with the RAID controller.
27  */
28 
29 #include <sys/types.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/scsi/scsi.h>
33 
34 #include <sys/scsi/adapters/mfi/mfi.h>
35 #include <sys/scsi/adapters/mfi/mfi_evt.h>
36 #include <sys/scsi/adapters/mfi/mfi_pd.h>
37 
38 #include <sys/cpuvar.h>
39 
40 #include "lmrc.h"
41 #include "lmrc_reg.h"
42 #include "lmrc_raid.h"
43 #include "lmrc_phys.h"
44 
45 static uint32_t lmrc_read_reg(lmrc_t *, uint32_t);
46 static void lmrc_write_reg(lmrc_t *, uint32_t, uint32_t);
47 static int lmrc_transition_to_ready(lmrc_t *);
48 static void lmrc_process_mptmfi_passthru(lmrc_t *, lmrc_mpt_cmd_t *);
49 static int lmrc_poll_mfi(lmrc_t *, lmrc_mfi_cmd_t *, uint8_t);
50 static boolean_t lmrc_check_fw_fault(lmrc_t *);
51 static int lmrc_get_event_log_info(lmrc_t *, mfi_evt_log_info_t *);
52 static void lmrc_aen_handler(void *);
53 static void lmrc_complete_aen(lmrc_t *, lmrc_mfi_cmd_t *);
54 static int lmrc_register_aen(lmrc_t *, uint32_t);
55 
56 /*
57  * Device register access functions.
58  *
59  * Due to the way ddi_get* and ddi_put* work, we'll need to calculate the
60  * absolute virtual address of the registers ourselves.
61  *
62  * For read accesses, employ a erratum workaround for Aero controllers. In some
63  * cases, reads of certain registers will intermittently return all zeros. As a
64  * workaround, retry the read up to three times until a non-zero value is read.
65  * Supposedly this is enough, every other driver I looked at does this.
66  */
67 static uint32_t
68 lmrc_read_reg_1(lmrc_t *lmrc, uint32_t reg)
69 {
70 	uint32_t *addr = (uint32_t *)((uintptr_t)lmrc->l_regmap + reg);
71 	return (ddi_get32(lmrc->l_reghandle, addr));
72 }
73 
74 static uint32_t
75 lmrc_read_reg(lmrc_t *lmrc, uint32_t reg)
76 {
77 	if (lmrc->l_class != LMRC_ACLASS_AERO)
78 		return (lmrc_read_reg_1(lmrc, reg));
79 
80 	/* Workaround for the hardware erratum in Aero controllers */
81 	for (uint_t i = 0; i < 3; i++) {
82 		uint32_t val = lmrc_read_reg_1(lmrc, reg);
83 
84 		if (val != 0)
85 			return (val);
86 	}
87 
88 	return (0);
89 }
90 
91 static void
92 lmrc_write_reg(lmrc_t *lmrc, uint32_t reg, uint32_t val)
93 {
94 	uint32_t *addr = (uint32_t *)((uintptr_t)lmrc->l_regmap + reg);
95 	ddi_put32(lmrc->l_reghandle, addr, val);
96 }
97 
98 static void
99 lmrc_write_reg64(lmrc_t *lmrc, uint32_t reg, uint64_t val)
100 {
101 	uint64_t *addr = (uint64_t *)((uintptr_t)lmrc->l_regmap + reg);
102 	ddi_put64(lmrc->l_reghandle, addr, val);
103 }
104 
105 /*
106  * Interrupt control
107  *
108  * There are two interrupt registers for host driver use, HostInterruptStatus
109  * and HostInterruptMask. Most of the bits in each register are reserved and
110  * must masked and/or preserved when used.
111  */
112 void
113 lmrc_disable_intr(lmrc_t *lmrc)
114 {
115 	uint32_t mask = lmrc_read_reg(lmrc, MPI2_HOST_INTERRUPT_MASK_OFFSET);
116 
117 	/* Disable all known interrupt: reset, reply, and doorbell. */
118 	mask |= MPI2_HIM_RESET_IRQ_MASK;
119 	mask |= MPI2_HIM_REPLY_INT_MASK;
120 	mask |= MPI2_HIM_IOC2SYS_DB_MASK;
121 
122 	lmrc_write_reg(lmrc, MPI2_HOST_INTERRUPT_MASK_OFFSET, mask);
123 
124 	/* Dummy read to force pci flush. Probably bogus but harmless. */
125 	(void) lmrc_read_reg(lmrc, MPI2_HOST_INTERRUPT_MASK_OFFSET);
126 }
127 
128 void
129 lmrc_enable_intr(lmrc_t *lmrc)
130 {
131 	uint32_t mask = lmrc_read_reg(lmrc, MPI2_HOST_INTERRUPT_MASK_OFFSET);
132 
133 	/* Enable the reply interrupts and the doorbell interrupts. */
134 	mask &= ~MPI2_HIM_REPLY_INT_MASK;
135 	mask &= ~MPI2_HIM_IOC2SYS_DB_MASK;
136 
137 	/* Clear outstanding interrupts before enabling any. */
138 	lmrc_write_reg(lmrc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0);
139 	/* Dummy read to force pci flush. Probably bogus but harmless. */
140 	(void) lmrc_read_reg(lmrc, MPI2_HOST_INTERRUPT_STATUS_OFFSET);
141 
142 	lmrc_write_reg(lmrc, MPI2_HOST_INTERRUPT_MASK_OFFSET, mask);
143 	/* Dummy read to force pci flush. Probably bogus but harmless. */
144 	(void) lmrc_read_reg(lmrc, MPI2_HOST_INTERRUPT_MASK_OFFSET);
145 }
146 
147 uint_t
148 lmrc_intr_ack(lmrc_t *lmrc)
149 {
150 	uint32_t mask =
151 	    MPI2_HIS_REPLY_DESCRIPTOR_INTERRUPT | MPI2_HIS_IOC2SYS_DB_STATUS;
152 	uint32_t status;
153 
154 	status = lmrc_read_reg(lmrc, MPI2_HOST_INTERRUPT_STATUS_OFFSET);
155 
156 	if ((status & mask) == 0)
157 		return (DDI_INTR_UNCLAIMED);
158 
159 	if (lmrc_check_acc_handle(lmrc->l_reghandle) != DDI_SUCCESS) {
160 		ddi_fm_service_impact(lmrc->l_dip, DDI_SERVICE_LOST);
161 		return (DDI_INTR_UNCLAIMED);
162 	}
163 
164 	return (DDI_INTR_CLAIMED);
165 }
166 
167 /*
168  * Fusion-MPT requests
169  *
170  * The controller expects to have access to a large chunk of DMA memory, into
171  * which the driver writes fixed-size I/O requests for the controller to
172  * process. To notify the hardware about a new request, a request descriptor is
173  * written to the queue port registers which includes the SMID of the request.
174  * This memory isn't really a queue, though, as it seems there are no
175  * constraints about ordering of the requests. All that matters is that there
176  * is a valid request at the address that corresponds with the SMID in the
177  * descriptor.
178  *
179  * If the hardware supports MPI 2.6 atomic request descriptors, which are a
180  * 32bit subset of the 64bit MPI 2.0/2.5 request descriptors, the descriptor is
181  * sent to the controller in a single 32bit write into a device register.
182  *
183  * For all other descriptor types, we'll employ a 64bit write to the queue
184  * registers, assuming that provides the required atomicity.
185  */
186 void
187 lmrc_send_atomic_request(lmrc_t *lmrc, lmrc_atomic_req_desc_t req_desc)
188 {
189 	if (lmrc->l_atomic_desc_support) {
190 		lmrc_write_reg(lmrc,
191 		    MPI26_ATOMIC_REQUEST_DESCRIPTOR_POST_OFFSET,
192 		    req_desc.rd_reg);
193 	} else {
194 		lmrc_req_desc_t rd;
195 
196 		bzero(&rd, sizeof (rd));
197 		rd.rd_atomic = req_desc;
198 
199 		lmrc_send_request(lmrc, rd);
200 	}
201 }
202 
203 void
204 lmrc_send_request(lmrc_t *lmrc, lmrc_req_desc_t req_desc)
205 {
206 	lmrc_write_reg64(lmrc, MPI2_REQUEST_DESCRIPTOR_POST_LOW_OFFSET,
207 	    req_desc.rd_reg);
208 }
209 
210 lmrc_atomic_req_desc_t
211 lmrc_build_atomic_request(lmrc_t *lmrc, lmrc_mpt_cmd_t *mpt, uint8_t flags)
212 {
213 	lmrc_atomic_req_desc_t req_desc;
214 
215 	VERIFY3U(mpt->mpt_smid, !=, 0);
216 
217 	/*
218 	 * Select the reply queue based on the CPU id to distribute reply load
219 	 * among queues.
220 	 */
221 	mpt->mpt_queue = CPU->cpu_id % lmrc->l_max_reply_queues;
222 
223 	bzero(&req_desc, sizeof (req_desc));
224 
225 	req_desc.rd_atomic.RequestFlags = flags;
226 	req_desc.rd_atomic.MSIxIndex = mpt->mpt_queue;
227 	req_desc.rd_atomic.SMID = mpt->mpt_smid;
228 
229 	return (req_desc);
230 }
231 
232 /*
233  * Reply Processing
234  *
235  * The controller will post replies to completed requests in the DMA memory
236  * provided for that purpose. This memory is divided in equally-sized chunks,
237  * each being a separate reply queue that is also associated with an interrupt
238  * vector. The replies are fixed size structures and will be written by the
239  * hardware in order of completion into the queue. For each queue, there is a
240  * register to tell the hardware which replies have been consumed by the driver.
241  *
242  * In response to an interrupt, the driver will walk the reply queue associated
243  * with the interrupt vector at the last known position and processess all
244  * completed replies. After a number of replies has been processed, or if no
245  * more replies are ready to be processed, the controller will be notified about
246  * the last reply index to be processed by writing the appropriate register.
247  */
248 
249 /*
250  * lmrc_get_next_reply_desc
251  *
252  * Get the next unprocessed reply descriptor for a queue, or NULL if there is
253  * none.
254  */
255 static Mpi2ReplyDescriptorsUnion_t *
256 lmrc_get_next_reply_desc(lmrc_t *lmrc, int queue)
257 {
258 	Mpi2ReplyDescriptorsUnion_t *desc;
259 
260 	desc = lmrc->l_reply_dma.ld_buf;
261 
262 	desc += (queue * lmrc->l_reply_alloc_sz) / sizeof (*desc);
263 	desc += lmrc->l_last_reply_idx[queue];
264 
265 	VERIFY3S(ddi_dma_sync(lmrc->l_reply_dma.ld_hdl,
266 	    (void *)desc - lmrc->l_reply_dma.ld_buf, sizeof (*desc),
267 	    DDI_DMA_SYNC_FORKERNEL), ==, DDI_SUCCESS);
268 
269 	/*
270 	 * Check if this is an unused reply descriptor, indicating that
271 	 * we've reached the end of replies in this queue.
272 	 *
273 	 * Even if the descriptor is only "half unused" we can't use it.
274 	 */
275 	if (desc->Words.Low == MPI2_RPY_DESCRIPT_UNUSED_WORD0_MARK ||
276 	    desc->Words.High == MPI2_RPY_DESCRIPT_UNUSED_WORD1_MARK)
277 		return (NULL);
278 
279 	/* advance last reply index, wrap around if necessary */
280 	lmrc->l_last_reply_idx[queue]++;
281 	if (lmrc->l_last_reply_idx[queue] >= lmrc->l_reply_q_depth)
282 		lmrc->l_last_reply_idx[queue] = 0;
283 
284 	return (desc);
285 }
286 
287 /*
288  * lmrc_write_rphi
289  *
290  * Write the Reply Post Host Index register for queue.
291  */
292 static void
293 lmrc_write_rphi(lmrc_t *lmrc, uint32_t queue)
294 {
295 	int reg = 0;
296 	uint32_t val = (queue << 24) | lmrc->l_last_reply_idx[queue];
297 
298 	if (lmrc->l_intr_type != DDI_INTR_TYPE_MSIX)
299 		VERIFY3U(queue, ==, 0);
300 
301 	if (lmrc->l_msix_combined) {
302 		reg = queue / 8;
303 		val &= 0x07ffffff;
304 	}
305 
306 	lmrc_write_reg(lmrc, lmrc->l_rphi[reg], val);
307 }
308 
309 /*
310  * lmrc_process_mpt_pkt
311  *
312  * Process a reply to a MPT IO request. Update the scsi_pkt according to status,
313  * ex_status, and data_len, setting up the ARQ pkt if necessary.
314  */
315 static void
316 lmrc_process_mpt_pkt(lmrc_t *lmrc, struct scsi_pkt *pkt, uint8_t status,
317     uint8_t ex_status, uint32_t data_len)
318 {
319 	pkt->pkt_statistics = 0;
320 	pkt->pkt_state = STATE_GOT_BUS | STATE_GOT_TARGET | STATE_SENT_CMD |
321 	    STATE_XFERRED_DATA | STATE_GOT_STATUS;
322 
323 	pkt->pkt_resid = pkt->pkt_dma_len - data_len;
324 
325 	switch (status) {
326 	case MFI_STAT_OK:
327 	case MFI_STAT_LD_CC_IN_PROGRESS:
328 	case MFI_STAT_LD_RECON_IN_PROGRESS:
329 		pkt->pkt_reason = CMD_CMPLT;
330 		pkt->pkt_scbp[0] = STATUS_GOOD;
331 		break;
332 
333 	case MFI_STAT_SCSI_DONE_WITH_ERROR:
334 	case MFI_STAT_LD_LBA_OUT_OF_RANGE: {
335 		struct scsi_arq_status *arq =
336 		    (struct scsi_arq_status *)pkt->pkt_scbp;
337 
338 		pkt->pkt_reason = CMD_CMPLT;
339 		arq->sts_status.sts_chk = 1;
340 
341 		pkt->pkt_state |= STATE_ARQ_DONE;
342 		arq->sts_rqpkt_reason = CMD_CMPLT;
343 		arq->sts_rqpkt_resid = 0;
344 		arq->sts_rqpkt_state |= STATE_GOT_BUS | STATE_GOT_TARGET |
345 		    STATE_SENT_CMD | STATE_XFERRED_DATA;
346 		*(uint8_t *)&arq->sts_rqpkt_status = STATUS_GOOD;
347 		break;
348 	}
349 	case MFI_STAT_LD_OFFLINE:
350 	case MFI_STAT_DEVICE_NOT_FOUND:
351 		pkt->pkt_reason = CMD_DEV_GONE;
352 		pkt->pkt_statistics = STAT_DISCON;
353 		break;
354 
355 	default:
356 		dev_err(lmrc->l_dip, CE_WARN, "!command failed, status = %x, "
357 		    "ex_status = %x, cdb[0] = %x", status, ex_status,
358 		    pkt->pkt_cdbp[0]);
359 		pkt->pkt_reason = CMD_TRAN_ERR;
360 		break;
361 	}
362 }
363 
364 /*
365  * lmrc_poll_for_reply
366  *
367  * During a panic we'll have to resort to polled I/O to write core dumps.
368  * Repeatedly check the reply queue for a new reply associated with the
369  * given request descriptor and complete it, or return an error if we get
370  * no reply within a reasonable time.
371  */
372 int
373 lmrc_poll_for_reply(lmrc_t *lmrc, lmrc_mpt_cmd_t *mpt)
374 {
375 	clock_t max_wait = LMRC_IO_TIMEOUT * MILLISEC * 10;
376 	Mpi25SCSIIORequest_t *io_req = mpt->mpt_io_frame;
377 	Mpi2ReplyDescriptorsUnion_t *desc;
378 	uint16_t desc_smid;
379 
380 	VERIFY(ddi_in_panic());
381 
382 	/*
383 	 * Walk the reply queue. Discard entries which we aren't
384 	 * looking for.
385 	 */
386 	do {
387 		desc = lmrc_get_next_reply_desc(lmrc, mpt->mpt_queue);
388 		if (desc == NULL) {
389 			if (max_wait == 0)
390 				return (TRAN_FATAL_ERROR);
391 
392 			drv_usecwait(100);
393 			max_wait--;
394 			continue;
395 		}
396 
397 		desc_smid = desc->SCSIIOSuccess.SMID;
398 
399 		/* reset descriptor */
400 		desc->Words.Low = MPI2_RPY_DESCRIPT_UNUSED_WORD0_MARK;
401 		desc->Words.High = MPI2_RPY_DESCRIPT_UNUSED_WORD1_MARK;
402 
403 		lmrc_write_rphi(lmrc, mpt->mpt_queue);
404 	} while (desc == NULL || desc_smid != mpt->mpt_smid);
405 
406 	VERIFY3S(ddi_dma_sync(lmrc->l_ioreq_dma.ld_hdl,
407 	    (void *)io_req - lmrc->l_ioreq_dma.ld_buf,
408 	    LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE, DDI_DMA_SYNC_FORKERNEL),
409 	    ==, DDI_SUCCESS);
410 
411 	/* If this is I/O, process it. */
412 	if (io_req->Function == LMRC_MPI2_FUNCTION_LD_IO_REQUEST ||
413 	    io_req->Function == MPI2_FUNCTION_SCSI_IO_REQUEST) {
414 		lmrc_process_mpt_pkt(lmrc, mpt->mpt_pkt,
415 		    io_req->VendorRegion.rc_status,
416 		    io_req->VendorRegion.rc_exstatus, io_req->DataLength);
417 	}
418 
419 	return (TRAN_ACCEPT);
420 }
421 
422 /*
423  * lmrc_process_replies
424  *
425  * Process all new reply entries in a queue in response to an interrupt.
426  */
427 int
428 lmrc_process_replies(lmrc_t *lmrc, uint8_t queue)
429 {
430 	int nprocessed = 0;
431 	Mpi2ReplyDescriptorsUnion_t *desc;
432 
433 	for (desc = lmrc_get_next_reply_desc(lmrc, queue);
434 	    desc != NULL;
435 	    desc = lmrc_get_next_reply_desc(lmrc, queue)) {
436 		Mpi2SCSIIOSuccessReplyDescriptor_t *reply =
437 		    &desc->SCSIIOSuccess;
438 		uint16_t smid = reply->SMID;
439 		lmrc_mpt_cmd_t *mpt = lmrc->l_mpt_cmds[smid - 1];
440 		lmrc_tgt_t *tgt = NULL;
441 		Mpi25SCSIIORequest_t *io_req;
442 		struct scsi_pkt *pkt;
443 		struct scsi_device *sd;
444 
445 		VERIFY3U(reply->SMID, <=, lmrc->l_max_fw_cmds);
446 
447 		mutex_enter(&mpt->mpt_lock);
448 		mpt->mpt_complete = B_TRUE;
449 		pkt = mpt->mpt_pkt;
450 		io_req = mpt->mpt_io_frame;
451 
452 		VERIFY3S(ddi_dma_sync(lmrc->l_ioreq_dma.ld_hdl,
453 		    (void *)io_req - lmrc->l_ioreq_dma.ld_buf,
454 		    LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE,
455 		    DDI_DMA_SYNC_FORKERNEL), ==, DDI_SUCCESS);
456 
457 
458 		switch (io_req->Function) {
459 		case MPI2_FUNCTION_SCSI_TASK_MGMT:
460 			VERIFY0(pkt);
461 			VERIFY0(list_link_active(&mpt->mpt_node));
462 			cv_signal(&mpt->mpt_cv);
463 			break;
464 
465 		case MPI2_FUNCTION_SCSI_IO_REQUEST:
466 		case LMRC_MPI2_FUNCTION_LD_IO_REQUEST:
467 			VERIFY(pkt != NULL);
468 
469 			sd = scsi_address_device(&pkt->pkt_address);
470 			VERIFY(sd != NULL);
471 
472 			tgt = scsi_device_hba_private_get(sd);
473 			VERIFY(tgt != NULL);
474 
475 			lmrc_process_mpt_pkt(lmrc, pkt,
476 			    io_req->VendorRegion.rc_status,
477 			    io_req->VendorRegion.rc_exstatus,
478 			    io_req->DataLength);
479 
480 			break;
481 
482 		case LMRC_MPI2_FUNCTION_PASSTHRU_IO_REQUEST:
483 			VERIFY0(pkt);
484 			VERIFY0(list_link_active(&mpt->mpt_node));
485 			lmrc_process_mptmfi_passthru(lmrc, mpt);
486 			break;
487 
488 		default:
489 			mutex_exit(&mpt->mpt_lock);
490 			dev_err(lmrc->l_dip, CE_PANIC,
491 			    "reply received for unknown Function %x",
492 			    io_req->Function);
493 		}
494 
495 		mutex_exit(&mpt->mpt_lock);
496 
497 		if (pkt != NULL) {
498 			lmrc_tgt_rem_active_mpt(tgt, mpt);
499 			atomic_dec_uint(&lmrc->l_fw_outstanding_cmds);
500 			scsi_hba_pkt_comp(pkt);
501 		}
502 
503 		/* reset descriptor */
504 		desc->Words.Low = MPI2_RPY_DESCRIPT_UNUSED_WORD0_MARK;
505 		desc->Words.High = MPI2_RPY_DESCRIPT_UNUSED_WORD1_MARK;
506 
507 		nprocessed++;
508 
509 		if (nprocessed % LMRC_THRESHOLD_REPLY_COUNT == 0)
510 			lmrc_write_rphi(lmrc, queue);
511 	}
512 
513 	if (nprocessed != 0 && nprocessed % LMRC_THRESHOLD_REPLY_COUNT != 0)
514 		lmrc_write_rphi(lmrc, queue);
515 
516 	return (DDI_INTR_CLAIMED);
517 }
518 
519 
520 /*
521  * MFI - MegaRAID Firmware Interface
522  */
523 
524 /*
525  * lmrc_build_mptmfi_passthru
526  *
527  * MFI commands are send as MPT MFI passthrough I/O requests. To be able to send
528  * a MFI frame to the RAID controller, we need to have a MPT command set up as
529  * MPT I/O request and a one-entry SGL pointing to the MFI command.
530  *
531  * As there's only a small number of MFI commands compared to the amound of MPT
532  * commands, the MPT command for each MFI is pre-allocated at attach time and
533  * initialized here.
534  */
535 int
536 lmrc_build_mptmfi_passthru(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi)
537 {
538 	Mpi25SCSIIORequest_t *io_req;
539 	const ddi_dma_cookie_t *cookie;
540 	lmrc_mpt_cmd_t *mpt;
541 
542 	mpt = lmrc_get_mpt(lmrc);
543 	if (mpt == NULL)
544 		return (DDI_FAILURE);
545 
546 	/* lmrc_get_mpt() should return the mpt locked */
547 	ASSERT(mutex_owned(&mpt->mpt_lock));
548 
549 	mfi->mfi_mpt = mpt;
550 	mpt->mpt_mfi = mfi;
551 
552 	io_req = mpt->mpt_io_frame;
553 	io_req->Function = LMRC_MPI2_FUNCTION_PASSTHRU_IO_REQUEST;
554 	io_req->ChainOffset = lmrc->l_chain_offset_mfi_pthru;
555 
556 	cookie = ddi_dma_cookie_one(mfi->mfi_frame_dma.ld_hdl);
557 	lmrc_dma_build_sgl(lmrc, mpt, cookie, 1);
558 
559 	VERIFY3S(ddi_dma_sync(lmrc->l_ioreq_dma.ld_hdl,
560 	    (void *)io_req - lmrc->l_ioreq_dma.ld_buf,
561 	    LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE, DDI_DMA_SYNC_FORDEV),
562 	    ==, DDI_SUCCESS);
563 
564 	/*
565 	 * As we're not sending this command to the hardware any time soon,
566 	 * drop the mutex before we return.
567 	 */
568 	mutex_exit(&mpt->mpt_lock);
569 
570 	return (DDI_SUCCESS);
571 }
572 
573 /*
574  * lmrc_process_mptmfi_passthru
575  *
576  * When a MPT MFI passthrough command completes, invoke the callback if there
577  * is one. Panic if an invalid command completed as that should never happen.
578  */
579 static void
580 lmrc_process_mptmfi_passthru(lmrc_t *lmrc, lmrc_mpt_cmd_t *mpt)
581 {
582 	lmrc_mfi_cmd_t *mfi;
583 	mfi_header_t *hdr;
584 
585 	VERIFY3P(mpt->mpt_mfi, !=, NULL);
586 	mfi = mpt->mpt_mfi;
587 	VERIFY0(list_link_active(&mfi->mfi_node));
588 
589 	hdr = &mfi->mfi_frame->mf_hdr;
590 
591 	if ((hdr->mh_flags & MFI_FRAME_DIR_READ) != 0)
592 		(void) ddi_dma_sync(mfi->mfi_data_dma.ld_hdl, 0,
593 		    mfi->mfi_data_dma.ld_len, DDI_DMA_SYNC_FORKERNEL);
594 
595 	switch (hdr->mh_cmd) {
596 	case MFI_CMD_DCMD:
597 	case MFI_CMD_LD_SCSI_IO:
598 	case MFI_CMD_PD_SCSI_IO:
599 	case MFI_CMD_ABORT:
600 		mutex_enter(&mfi->mfi_lock);
601 		if (mfi->mfi_callback != NULL)
602 			mfi->mfi_callback(lmrc, mfi);
603 		mutex_exit(&mfi->mfi_lock);
604 		break;
605 
606 	case MFI_CMD_INVALID:
607 	default:
608 		dev_err(lmrc->l_dip, CE_PANIC,
609 		    "invalid MFI cmd completion received, cmd = %x",
610 		    hdr->mh_cmd);
611 		break;
612 	}
613 }
614 
615 /*
616  * lmrc_issue_mfi
617  *
618  * Post a MFI command to the firmware. Reset the cmd_status to invalid. Build
619  * a MPT MFI passthru command if necessary and a MPT atomic request descriptor
620  * before posting the request. The MFI command's mutex must be held.
621  */
622 void
623 lmrc_issue_mfi(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi, lmrc_mfi_cmd_cb_t *cb)
624 {
625 	mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr;
626 	lmrc_atomic_req_desc_t req_desc;
627 
628 	ASSERT(mutex_owned(&mfi->mfi_lock));
629 
630 	if ((hdr->mh_flags & MFI_FRAME_DONT_POST_IN_REPLY_QUEUE) == 0) {
631 		VERIFY3U(cb, !=, NULL);
632 		mfi->mfi_callback = cb;
633 	} else {
634 		VERIFY3U(cb, ==, NULL);
635 	}
636 
637 	hdr->mh_cmd_status = MFI_STAT_INVALID_STATUS;
638 
639 	req_desc = lmrc_build_atomic_request(lmrc, mfi->mfi_mpt,
640 	    MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO);
641 
642 	(void) ddi_dma_sync(mfi->mfi_frame_dma.ld_hdl, 0,
643 	    mfi->mfi_frame_dma.ld_len, DDI_DMA_SYNC_FORDEV);
644 
645 	if ((hdr->mh_flags & MFI_FRAME_DIR_WRITE) != 0)
646 		(void) ddi_dma_sync(mfi->mfi_data_dma.ld_hdl, 0,
647 		    mfi->mfi_data_dma.ld_len, DDI_DMA_SYNC_FORDEV);
648 
649 	lmrc_send_atomic_request(lmrc, req_desc);
650 }
651 
652 /*
653  * lmrc_poll_mfi
654  *
655  * Poll a MFI command for completion, waiting up to max_wait secs. Repeatedly
656  * check the command status until it changes to something that is not invalid.
657  *
658  * Trigger an online controller reset on timeout.
659  */
660 static int
661 lmrc_poll_mfi(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi, uint8_t max_wait)
662 {
663 	mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr;
664 	lmrc_dma_t *dma = &mfi->mfi_frame_dma;
665 	clock_t timeout = ddi_get_lbolt() + drv_usectohz(max_wait * MICROSEC);
666 	clock_t now;
667 
668 	ASSERT(mutex_owned(&mfi->mfi_lock));
669 
670 	do {
671 		(void) ddi_dma_sync(dma->ld_hdl, 0, dma->ld_len,
672 		    DDI_DMA_SYNC_FORKERNEL);
673 		if (hdr->mh_cmd_status != MFI_STAT_INVALID_STATUS)
674 			break;
675 
676 		(void) cv_reltimedwait(&mfi->mfi_cv, &mfi->mfi_lock,
677 		    drv_usectohz(MILLISEC), TR_MILLISEC);
678 		now = ddi_get_lbolt();
679 	} while (!lmrc->l_fw_fault && now <= timeout);
680 
681 	if (hdr->mh_cmd_status != MFI_STAT_INVALID_STATUS)
682 		return (DDI_SUCCESS);
683 
684 	if (now > timeout) {
685 		dev_err(lmrc->l_dip, CE_WARN,
686 		    "!%s: command timeout after %ds", __func__, max_wait);
687 
688 		/*
689 		 * Signal the housekeeping thread to check for FW/HW faults,
690 		 * performing a reset if necessary.
691 		 */
692 		cv_signal(&lmrc->l_thread_cv);
693 	}
694 
695 	return (DDI_FAILURE);
696 }
697 
698 /*
699  * lmrc_wait_mfi
700  *
701  * Wait for up to max_wait secs for a MFI command to complete. The cmd mutex
702  * must be held.
703  *
704  * Trigger an online controller reset on timeout.
705  */
706 int
707 lmrc_wait_mfi(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi, uint8_t max_wait)
708 {
709 	mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr;
710 	lmrc_dma_t *dma = &mfi->mfi_frame_dma;
711 	clock_t timeout = ddi_get_lbolt() + drv_usectohz(max_wait * MICROSEC);
712 	int ret;
713 
714 	ASSERT(mutex_owned(&mfi->mfi_lock));
715 
716 	do {
717 		ret = cv_timedwait(&mfi->mfi_cv, &mfi->mfi_lock, timeout);
718 
719 		(void) ddi_dma_sync(dma->ld_hdl, 0, dma->ld_len,
720 		    DDI_DMA_SYNC_FORKERNEL);
721 
722 	} while (!lmrc->l_fw_fault &&
723 	    hdr->mh_cmd_status == MFI_STAT_INVALID_STATUS && ret != -1);
724 
725 	if (!lmrc->l_fw_fault && ret != -1)
726 		return (DDI_SUCCESS);
727 
728 	if (ret == -1) {
729 		dev_err(lmrc->l_dip, CE_WARN, "!%s: blocked command timeout "
730 		    "after %ds, cmd = %d, status = %d", __func__, max_wait,
731 		    hdr->mh_cmd, hdr->mh_cmd_status);
732 
733 		/*
734 		 * Signal the housekeeping thread to check for FW/HW faults,
735 		 * performing a reset if necessary.
736 		 */
737 		cv_signal(&lmrc->l_thread_cv);
738 	}
739 
740 	return (DDI_FAILURE);
741 }
742 
743 /*
744  * lmrc_wakeup_mfi
745  *
746  * Signal the CV associated with a MFI command to wake up the thread waiting
747  * for its completion.
748  */
749 void
750 lmrc_wakeup_mfi(lmrc_t *lmrc, lmrc_mfi_cmd_t *cmd)
751 {
752 	ASSERT(mutex_owned(&cmd->mfi_lock));
753 	cv_signal(&cmd->mfi_cv);
754 }
755 
756 /*
757  * lmrc_issue_blocked_mfi
758  *
759  * Post a MFI command to the firmware and wait for the command to complete.
760  */
761 int
762 lmrc_issue_blocked_mfi(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi)
763 {
764 	mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr;
765 	int ret;
766 
767 	mutex_enter(&mfi->mfi_lock);
768 	lmrc_issue_mfi(lmrc, mfi, lmrc_wakeup_mfi);
769 	ret = lmrc_wait_mfi(lmrc, mfi, LMRC_INTERNAL_CMD_WAIT_TIME);
770 	mutex_exit(&mfi->mfi_lock);
771 
772 	if (ret == DDI_SUCCESS && hdr->mh_cmd_status == MFI_STAT_OK)
773 		return (DDI_SUCCESS);
774 
775 	dev_err(lmrc->l_dip, CE_WARN,
776 	    "!%s: blocked command failure, cmd = %d, status = %d",
777 	    __func__, hdr->mh_cmd, hdr->mh_cmd_status);
778 
779 	return (ret);
780 }
781 
782 /*
783  * lmrc_abort_cb
784  *
785  * Callback for any command that is to be aborted.
786  *
787  * If the command completed normally before it could be aborted, set the status
788  * to indicate the intended abortion.
789  */
790 static void
791 lmrc_abort_cb(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi)
792 {
793 	mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr;
794 
795 	if (hdr->mh_cmd_status == MFI_STAT_OK)
796 		hdr->mh_cmd_status = MFI_STAT_NOT_FOUND;
797 }
798 
799 /*
800  * lmrc_abort_mfi
801  *
802  * Abort a MFI command. This is a bit tricky as the hardware may still complete
803  * it at any time.
804  *
805  * The mutex of the command to be aborted must be held to prevent it from
806  * completing behind our back. We'll replace its callback with our own, issue an
807  * ABORT command, and drop the mutex before we wait for the ABORT command to
808  * complete.
809  */
810 static int
811 lmrc_abort_cmd(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi_to_abort)
812 {
813 	lmrc_mfi_cmd_t *mfi = lmrc_get_mfi(lmrc);
814 	mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr;
815 	mfi_abort_payload_t *abort = &mfi->mfi_frame->mf_abort;
816 	lmrc_mfi_cmd_cb_t *orig_cb = mfi_to_abort->mfi_callback;
817 	int ret;
818 
819 	ASSERT(mutex_owned(&mfi_to_abort->mfi_lock));
820 
821 	/* Replace the commands callback with our own. */
822 	mfi_to_abort->mfi_callback = lmrc_abort_cb;
823 
824 	hdr->mh_cmd = MFI_CMD_ABORT;
825 	abort->ma_abort_context = mfi_to_abort->mfi_idx;
826 	lmrc_dma_set_addr64(&mfi_to_abort->mfi_frame_dma,
827 	    &abort->ma_abort_mfi_phys_addr);
828 
829 	/* Send the ABORT. */
830 	mutex_enter(&mfi->mfi_lock);
831 	lmrc_issue_mfi(lmrc, mfi, lmrc_wakeup_mfi);
832 
833 	/*
834 	 * Drop the mutex of the command to be aborted, allowing it to proceed
835 	 * while we wait for the ABORT command to complete.
836 	 */
837 	mutex_exit(&mfi_to_abort->mfi_lock);
838 	ret = lmrc_wait_mfi(lmrc, mfi, LMRC_INTERNAL_CMD_WAIT_TIME);
839 	mutex_exit(&mfi->mfi_lock);
840 
841 	/*
842 	 * The ABORT command may fail if cmd_to_abort has completed already.
843 	 * Treat any other failure as fatal, restore the callback and fail.
844 	 */
845 	if (ret != DDI_SUCCESS && hdr->mh_cmd_status != MFI_STAT_NOT_FOUND) {
846 		mutex_enter(&mfi_to_abort->mfi_lock);
847 		mfi_to_abort->mfi_callback = orig_cb;
848 		goto out;
849 	}
850 
851 	/*
852 	 * Wait for the aborted command to complete. If we time out on this
853 	 * there's little we can do here, so we restore the callback and fail.
854 	 */
855 	mutex_enter(&mfi_to_abort->mfi_lock);
856 	ret = lmrc_poll_mfi(lmrc, mfi_to_abort, LMRC_INTERNAL_CMD_WAIT_TIME);
857 	mfi_to_abort->mfi_callback = orig_cb;
858 
859 	if (ret != DDI_SUCCESS)
860 		goto out;
861 
862 	/* Wake up anyone waiting on the aborted command. */
863 	if (mfi_to_abort->mfi_callback != NULL)
864 		mfi_to_abort->mfi_callback(lmrc, mfi_to_abort);
865 
866 out:
867 	lmrc_put_mfi(mfi);
868 	ASSERT(mutex_owned(&mfi_to_abort->mfi_lock));
869 	return (ret);
870 }
871 
872 
873 /*
874  * Controller Initialization and Housekeeping
875  */
876 
877 /*
878  * lmrc_check_fw_fault
879  *
880  * Check the firmware state. If faulted, return B_TRUE.
881  * Return B_FALSE otherwise.
882  */
883 static boolean_t
884 lmrc_check_fw_fault(lmrc_t *lmrc)
885 {
886 	uint32_t status = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD0_OFFSET);
887 	uint32_t fw_state = LMRC_FW_STATE(status);
888 
889 	if (fw_state == LMRC_FW_STATE_FAULT)
890 		return (B_TRUE);
891 
892 	return (B_FALSE);
893 }
894 
895 /*
896  * lmrc_wait_for_reg
897  *
898  * Repeatedly read the register and check that 'bits' match 'exp'.
899  */
900 static boolean_t
901 lmrc_wait_for_reg(lmrc_t *lmrc, uint32_t reg, uint32_t bits, uint32_t exp,
902     uint64_t max_wait)
903 {
904 	uint32_t val;
905 	uint64_t i;
906 
907 	max_wait *= MILLISEC / 100;
908 
909 	for (i = 0; i < max_wait; i++) {
910 		delay(drv_usectohz(100 * MILLISEC));
911 		val = lmrc_read_reg(lmrc, reg);
912 
913 		if ((val & bits) == exp)
914 			return (B_TRUE);
915 	}
916 
917 	return (B_FALSE);
918 }
919 
920 static int
921 lmrc_hard_reset(lmrc_t *lmrc)
922 {
923 	int ret = DDI_SUCCESS;
924 
925 	/* Write the reset key sequence. */
926 	lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET,
927 	    MPI2_WRSEQ_FLUSH_KEY_VALUE);
928 	lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET,
929 	    MPI2_WRSEQ_1ST_KEY_VALUE);
930 	lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET,
931 	    MPI2_WRSEQ_2ND_KEY_VALUE);
932 	lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET,
933 	    MPI2_WRSEQ_3RD_KEY_VALUE);
934 	lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET,
935 	    MPI2_WRSEQ_4TH_KEY_VALUE);
936 	lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET,
937 	    MPI2_WRSEQ_5TH_KEY_VALUE);
938 	lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET,
939 	    MPI2_WRSEQ_6TH_KEY_VALUE);
940 
941 	/* Check diag write enable. */
942 	if (!lmrc_wait_for_reg(lmrc, MPI2_HOST_DIAGNOSTIC_OFFSET,
943 	    MPI2_DIAG_DIAG_WRITE_ENABLE, MPI2_DIAG_DIAG_WRITE_ENABLE,
944 	    LMRC_RESET_TIMEOUT)) {
945 		dev_err(lmrc->l_dip, CE_WARN, "diag unlock failed");
946 		return (DDI_FAILURE);
947 	}
948 
949 	/* Reset IOC. */
950 	lmrc_write_reg(lmrc, MPI2_HOST_DIAGNOSTIC_OFFSET,
951 	    lmrc_read_reg(lmrc, MPI2_HOST_DIAGNOSTIC_OFFSET) |
952 	    MPI2_DIAG_RESET_ADAPTER);
953 	delay(drv_usectohz(MPI2_HARD_RESET_PCIE_FIRST_READ_DELAY_MICRO_SEC));
954 
955 	/* Check the reset adapter bit. */
956 	if ((lmrc_read_reg(lmrc, MPI2_HOST_DIAGNOSTIC_OFFSET) &
957 	    MPI2_DIAG_RESET_ADAPTER) == 0)
958 		goto out;
959 
960 	delay(drv_usectohz(MPI2_HARD_RESET_PCIE_SECOND_READ_DELAY_MICRO_SEC));
961 
962 	/* Check the reset adapter bit again. */
963 	if ((lmrc_read_reg(lmrc, MPI2_HOST_DIAGNOSTIC_OFFSET) &
964 	    MPI2_DIAG_RESET_ADAPTER) == 0)
965 		goto out;
966 
967 	ret = DDI_FAILURE;
968 out:
969 	lmrc_write_reg(lmrc, MPI2_WRITE_SEQUENCE_OFFSET,
970 	    MPI2_WRSEQ_FLUSH_KEY_VALUE);
971 	return (ret);
972 }
973 
974 /*
975  * lmrc_reset_ctrl
976  *
977  * Attempt to reset the controller, if the hardware supports it.
978  * If reset is unsupported or the reset fails repeatedly, we shut the
979  * controller down.
980  */
981 static int
982 lmrc_reset_ctrl(lmrc_t *lmrc)
983 {
984 	uint32_t status, fw_state, reset_adapter;
985 	int max_wait, i;
986 
987 	if (lmrc->l_disable_online_ctrl_reset)
988 		return (DDI_FAILURE);
989 
990 	status = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD0_OFFSET);
991 	fw_state = LMRC_FW_STATE(status);
992 	reset_adapter = LMRC_FW_RESET_ADAPTER(status);
993 
994 	if (fw_state == LMRC_FW_STATE_FAULT && reset_adapter == 0) {
995 		dev_err(lmrc->l_dip, CE_WARN,
996 		    "FW in fault state, but reset not supported");
997 		goto out;
998 	}
999 
1000 	for (i = 0; i < LMRC_MAX_RESET_TRIES; i++) {
1001 		dev_err(lmrc->l_dip, CE_WARN, "resetting...");
1002 
1003 		if (lmrc_hard_reset(lmrc) != DDI_SUCCESS)
1004 			continue;
1005 
1006 		/* Wait for the FW state to move beyond INIT. */
1007 		max_wait = LMRC_IO_TIMEOUT * MILLISEC / 100;
1008 		do {
1009 			status = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD0_OFFSET);
1010 			fw_state = LMRC_FW_STATE(status);
1011 
1012 			if (fw_state <= LMRC_FW_STATE_FW_INIT)
1013 				delay(drv_usectohz(100 * MILLISEC));
1014 		} while (fw_state <= LMRC_FW_STATE_FW_INIT && max_wait > 0);
1015 
1016 		if (fw_state <= LMRC_FW_STATE_FW_INIT) {
1017 			dev_err(lmrc->l_dip, CE_WARN,
1018 			    "fw state <= LMRC_FW_STATE_FW_INIT, state = %x",
1019 			    fw_state);
1020 			continue;
1021 		}
1022 
1023 		return (DDI_SUCCESS);
1024 	}
1025 
1026 	dev_err(lmrc->l_dip, CE_WARN, "reset failed");
1027 out:
1028 	/* Stop the controller. */
1029 	lmrc_write_reg(lmrc, MPI2_DOORBELL_OFFSET, MFI_STOP_ADP);
1030 	(void) lmrc_read_reg(lmrc, MPI2_DOORBELL_OFFSET);
1031 
1032 	return (DDI_FAILURE);
1033 }
1034 
1035 /*
1036  * lmrc_tgt_complete_cmd
1037  *
1038  * In case of a controller reset, complete the cmd and clean up. This is done
1039  * in a taskq to avoid locking and list manipulation headaches.
1040  */
1041 static void
1042 lmrc_tgt_complete_cmd(void *arg)
1043 {
1044 	lmrc_scsa_cmd_t *cmd = arg;
1045 	struct scsi_pkt *pkt;
1046 	lmrc_t *lmrc;
1047 
1048 	mutex_enter(&cmd->sc_mpt->mpt_lock);
1049 
1050 	/* Just in case the command completed before the taskq was run... */
1051 	if (cmd->sc_mpt->mpt_complete) {
1052 		mutex_exit(&cmd->sc_mpt->mpt_lock);
1053 		return;
1054 	}
1055 
1056 	lmrc = cmd->sc_mpt->mpt_lmrc;
1057 	pkt = cmd->sc_mpt->mpt_pkt;
1058 
1059 	pkt->pkt_state = STATE_GOT_BUS | STATE_GOT_TARGET | STATE_SENT_CMD;
1060 	pkt->pkt_reason = CMD_RESET;
1061 	pkt->pkt_statistics = STAT_BUS_RESET;
1062 	mutex_exit(&cmd->sc_mpt->mpt_lock);
1063 
1064 	lmrc_tgt_rem_active_mpt(cmd->sc_tgt, cmd->sc_mpt);
1065 	atomic_dec_uint(&lmrc->l_fw_outstanding_cmds);
1066 
1067 	scsi_hba_pkt_comp(pkt);
1068 }
1069 
1070 /*
1071  * lmrc_tgt_complete_cmds
1072  *
1073  * Walk the list of active commands of a target. Schedule a taskq to handle the
1074  * timeout processing and clean up.
1075  */
1076 static void
1077 lmrc_tgt_complete_cmds(lmrc_t *lmrc, lmrc_tgt_t *tgt)
1078 {
1079 	lmrc_mpt_cmd_t *mpt;
1080 
1081 	mutex_enter(&tgt->tgt_mpt_active_lock);
1082 	if (list_is_empty(&tgt->tgt_mpt_active)) {
1083 		mutex_exit(&tgt->tgt_mpt_active_lock);
1084 		return;
1085 	}
1086 
1087 	for (mpt = lmrc_tgt_first_active_mpt(tgt);
1088 	    mpt != NULL;
1089 	    mpt = lmrc_tgt_next_active_mpt(tgt, mpt)) {
1090 		lmrc_scsa_cmd_t *cmd = mpt->mpt_pkt->pkt_ha_private;
1091 
1092 		ASSERT(mutex_owned(&mpt->mpt_lock));
1093 		VERIFY(mpt->mpt_pkt != NULL);
1094 		VERIFY(cmd != NULL);
1095 
1096 		if (mpt->mpt_complete)
1097 			continue;
1098 
1099 		taskq_dispatch_ent(lmrc->l_taskq, lmrc_tgt_complete_cmd, cmd,
1100 		    TQ_NOSLEEP, &mpt->mpt_tqent);
1101 	}
1102 	mutex_exit(&tgt->tgt_mpt_active_lock);
1103 }
1104 
1105 /*
1106  * lmrc_tgt_timeout_cmds
1107  *
1108  * Walk the list of active commands of a target. Try to abort commands which are
1109  * overdue.
1110  */
1111 static int
1112 lmrc_tgt_timeout_cmds(lmrc_t *lmrc, lmrc_tgt_t *tgt)
1113 {
1114 	lmrc_mpt_cmd_t *mpt;
1115 	int ret = DDI_SUCCESS;
1116 
1117 	mutex_enter(&tgt->tgt_mpt_active_lock);
1118 	if (list_is_empty(&tgt->tgt_mpt_active))
1119 		goto out;
1120 
1121 	for (mpt = lmrc_tgt_first_active_mpt(tgt);
1122 	    mpt != NULL;
1123 	    mpt = lmrc_tgt_next_active_mpt(tgt, mpt)) {
1124 		hrtime_t now;
1125 
1126 		ASSERT(mutex_owned(&mpt->mpt_lock));
1127 		VERIFY(mpt->mpt_pkt != NULL);
1128 
1129 		/* Just in case the command completed by now... */
1130 		if (mpt->mpt_complete)
1131 			continue;
1132 
1133 		now = gethrtime();
1134 
1135 		if (now > mpt->mpt_timeout) {
1136 			/*
1137 			 * Give the packet a bit more time for the abort to
1138 			 * complete.
1139 			 */
1140 			mpt->mpt_timeout = now + LMRC_IO_TIMEOUT * NANOSEC;
1141 
1142 			/*
1143 			 * If the abort failed for whatever reason,
1144 			 * we can stop here as only a controller reset
1145 			 * can get us back into a sane state.
1146 			 */
1147 			if (lmrc_abort_mpt(lmrc, tgt, mpt) != 1) {
1148 				mutex_exit(&mpt->mpt_lock);
1149 				ret = DDI_FAILURE;
1150 				goto out;
1151 			}
1152 		}
1153 	}
1154 
1155 out:
1156 	mutex_exit(&tgt->tgt_mpt_active_lock);
1157 	return (ret);
1158 }
1159 
1160 /*
1161  * lmrc_thread
1162  *
1163  * Check whether the controller is FW fault state. Check all targets for
1164  * commands which have timed out.
1165  */
1166 void
1167 lmrc_thread(void *arg)
1168 {
1169 	lmrc_t *lmrc = arg;
1170 
1171 	do {
1172 		int i;
1173 
1174 		/* Wake up at least once a minute. */
1175 		mutex_enter(&lmrc->l_thread_lock);
1176 		(void) cv_reltimedwait(&lmrc->l_thread_cv, &lmrc->l_thread_lock,
1177 		    drv_usectohz(60 * MICROSEC), TR_SEC);
1178 		mutex_exit(&lmrc->l_thread_lock);
1179 
1180 		if (lmrc->l_thread_stop)
1181 			continue;
1182 
1183 		lmrc->l_fw_fault = lmrc_check_fw_fault(lmrc);
1184 
1185 		/*
1186 		 * Check all targets for timed-out commands. If we find any
1187 		 * and fail to abort them, we pretend the FW has faulted to
1188 		 * trigger a reset.
1189 		 */
1190 		if (!lmrc->l_fw_fault) {
1191 			for (i = 0; i < ARRAY_SIZE(lmrc->l_targets); i++) {
1192 				if (lmrc_tgt_timeout_cmds(lmrc,
1193 				    &lmrc->l_targets[i]) != DDI_SUCCESS) {
1194 					lmrc->l_fw_fault = B_TRUE;
1195 					break;
1196 				}
1197 			}
1198 		}
1199 
1200 		/*
1201 		 * If the FW is faulted, try to recover by performing a reset.
1202 		 */
1203 		if (lmrc->l_fw_fault) {
1204 			int ret;
1205 
1206 			lmrc_disable_intr(lmrc);
1207 
1208 			/*
1209 			 * Even if the reset failed, it will have stopped the
1210 			 * controller and we can complete all outstanding
1211 			 * commands.
1212 			 */
1213 			ret = lmrc_reset_ctrl(lmrc);
1214 
1215 			(void) lmrc_abort_outstanding_mfi(lmrc,
1216 			    LMRC_MAX_MFI_CMDS);
1217 
1218 			for (i = 0; i < ARRAY_SIZE(lmrc->l_targets); i++)
1219 				lmrc_tgt_complete_cmds(lmrc,
1220 				    &lmrc->l_targets[i]);
1221 
1222 			if (ret != DDI_SUCCESS) {
1223 				dev_err(lmrc->l_dip, CE_WARN, "reset failed");
1224 				continue;
1225 			}
1226 
1227 			if (lmrc_transition_to_ready(lmrc) != DDI_SUCCESS)
1228 				continue;
1229 
1230 			if (lmrc_ioc_init(lmrc) != DDI_SUCCESS)
1231 				continue;
1232 
1233 			lmrc_enable_intr(lmrc);
1234 
1235 			if (lmrc_start_aen(lmrc) != DDI_SUCCESS) {
1236 				dev_err(lmrc->l_dip, CE_WARN,
1237 				    "failed to re-initiate AEN");
1238 				continue;
1239 			}
1240 
1241 			lmrc->l_fw_fault = lmrc_check_fw_fault(lmrc);
1242 		}
1243 	} while (!lmrc->l_thread_stop);
1244 
1245 	thread_exit();
1246 }
1247 
1248 /*
1249  * lmrc_transition_to_ready
1250  *
1251  * Move firmware to ready state. At attach time, the FW can potentially be in
1252  * any one of several possible states. If the FW is in operational, waiting-for-
1253  * handshake states, take steps to bring it to ready state. Otherwise, wait for
1254  * the FW to reach ready state.
1255  */
1256 static int
1257 lmrc_transition_to_ready(lmrc_t *lmrc)
1258 {
1259 	uint32_t status, new_status;
1260 	uint32_t fw_state;
1261 	uint8_t max_wait;
1262 	uint_t i;
1263 
1264 	status = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD0_OFFSET);
1265 	fw_state = LMRC_FW_STATE(status);
1266 	max_wait = LMRC_RESET_TIMEOUT;
1267 
1268 	while (fw_state != LMRC_FW_STATE_READY) {
1269 		switch (fw_state) {
1270 		case LMRC_FW_STATE_FAULT:
1271 			dev_err(lmrc->l_dip, CE_NOTE, "FW is in fault state!");
1272 			if (lmrc_reset_ctrl(lmrc) != DDI_SUCCESS)
1273 				return (DDI_FAILURE);
1274 			break;
1275 
1276 		case LMRC_FW_STATE_WAIT_HANDSHAKE:
1277 			/* Set the CLR bit in inbound doorbell */
1278 			lmrc_write_reg(lmrc, MPI2_DOORBELL_OFFSET,
1279 			    MFI_INIT_CLEAR_HANDSHAKE | MFI_INIT_HOTPLUG);
1280 			break;
1281 
1282 		case LMRC_FW_STATE_BOOT_MSG_PENDING:
1283 			lmrc_write_reg(lmrc, MPI2_DOORBELL_OFFSET,
1284 			    MFI_INIT_HOTPLUG);
1285 			break;
1286 
1287 		case LMRC_FW_STATE_OPERATIONAL:
1288 			/* Bring it to READY state, wait up to 10s */
1289 			lmrc_disable_intr(lmrc);
1290 			lmrc_write_reg(lmrc, MPI2_DOORBELL_OFFSET,
1291 			    MFI_RESET_FLAGS);
1292 			(void) lmrc_wait_for_reg(lmrc, MPI2_DOORBELL_OFFSET, 1,
1293 			    0, 10);
1294 			break;
1295 
1296 		case LMRC_FW_STATE_UNDEFINED:
1297 			/* This state should not last for more than 2 sec */
1298 		case LMRC_FW_STATE_BB_INIT:
1299 		case LMRC_FW_STATE_FW_INIT:
1300 		case LMRC_FW_STATE_FW_INIT_2:
1301 		case LMRC_FW_STATE_DEVICE_SCAN:
1302 		case LMRC_FW_STATE_FLUSH_CACHE:
1303 			break;
1304 		default:
1305 			dev_err(lmrc->l_dip, CE_WARN, "Unknown FW state %x",
1306 			    fw_state);
1307 			return (DDI_FAILURE);
1308 		}
1309 
1310 		/*
1311 		 * The current state should not last for more than max_wait
1312 		 * seconds.
1313 		 */
1314 		for (i = 0; i < max_wait * 1000; i++) {
1315 			new_status = lmrc_read_reg(lmrc,
1316 			    MPI26_SCRATCHPAD0_OFFSET);
1317 
1318 			if (status != new_status)
1319 				break;
1320 
1321 			delay(drv_usectohz(MILLISEC));
1322 		}
1323 
1324 		if (new_status == status) {
1325 			dev_err(lmrc->l_dip, CE_WARN,
1326 			    "FW state (%x) hasn't changed in %d seconds",
1327 			    fw_state, max_wait);
1328 			return (DDI_FAILURE);
1329 		}
1330 
1331 		status = new_status;
1332 		fw_state = LMRC_FW_STATE(status);
1333 	}
1334 
1335 	if (lmrc_check_acc_handle(lmrc->l_reghandle) != DDI_FM_OK)
1336 		return (DDI_FAILURE);
1337 
1338 	return (DDI_SUCCESS);
1339 }
1340 
1341 /*
1342  * lmrc_adapter_init
1343  *
1344  * Get the hardware and firmware into a usable state, and fetch some basic
1345  * information from the registers to calculate sizes of basic data structures.
1346  */
1347 int
1348 lmrc_adapter_init(lmrc_t *lmrc)
1349 {
1350 	uint32_t reg;
1351 	int ret;
1352 	int i;
1353 
1354 	ret = lmrc_transition_to_ready(lmrc);
1355 	if (ret != DDI_SUCCESS)
1356 		return (ret);
1357 
1358 	/*
1359 	 * Get maximum RAID map size.
1360 	 */
1361 	reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD2_OFFSET);
1362 	lmrc->l_max_raid_map_sz = LMRC_MAX_RAID_MAP_SZ(reg);
1363 
1364 	lmrc->l_max_reply_queues = 1;
1365 	lmrc->l_rphi[0] = MPI2_REPLY_POST_HOST_INDEX_OFFSET;
1366 
1367 	/*
1368 	 * Apparently, bit 27 of the scratch pad register indicates whether
1369 	 * MSI-X is supported by the firmware.
1370 	 */
1371 	reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD0_OFFSET);
1372 
1373 	if (LMRC_FW_MSIX_ENABLED(reg)) {
1374 		lmrc->l_fw_msix_enabled = B_TRUE;
1375 
1376 		reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD1_OFFSET);
1377 		lmrc->l_max_reply_queues = LMRC_MAX_REPLY_QUEUES_EXT(reg);
1378 
1379 		if (lmrc->l_max_reply_queues > LMRC_MAX_REPLY_POST_HOST_INDEX) {
1380 			lmrc->l_msix_combined = B_TRUE;
1381 			lmrc->l_rphi[0] =
1382 			    MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET;
1383 		}
1384 
1385 		/*
1386 		 * Compute reply post index register addresses 1-15.
1387 		 */
1388 		for (i = 1; i < LMRC_MAX_REPLY_POST_HOST_INDEX; i++) {
1389 			lmrc->l_rphi[i] = i * 0x10 +
1390 			    MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET;
1391 		}
1392 	}
1393 
1394 	/*
1395 	 * Get the number of commands the firmware supports. Use one less,
1396 	 * because reply_q_depth is based on one more than this. XXX: Why?
1397 	 */
1398 	reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD0_OFFSET);
1399 	lmrc->l_max_fw_cmds = LMRC_FW_MAX_CMD(reg) - 1;
1400 
1401 	if (lmrc->l_max_fw_cmds < LMRC_MAX_MFI_CMDS) {
1402 		dev_err(lmrc->l_dip, CE_WARN, "!max_fw_cmds too low: %d",
1403 		    lmrc->l_max_fw_cmds);
1404 		return (DDI_FAILURE);
1405 	}
1406 
1407 	/*
1408 	 * Reserve some commands for MFI, the remainder is for SCSI commands.
1409 	 */
1410 	lmrc->l_max_scsi_cmds = lmrc->l_max_fw_cmds - LMRC_MAX_MFI_CMDS;
1411 
1412 	/*
1413 	 * XXX: This magic calculation isn't explained anywhere. Let's see...
1414 	 * lmrc_max_fw_cmds + 1 gives us what was reported in the register,
1415 	 * That + 15 is for rounding it up the next multiple of 16, which
1416 	 * / 16 * 16 does.
1417 	 * And apparently we want twice that much for queue depth. Why?
1418 	 *
1419 	 * So in reality, the queue depth is based on at least one more than
1420 	 * lmrc_max_fw_cmds, but it could be even more. That makes the above
1421 	 * statement about lmrc_max_fw_cmds questionable.
1422 	 */
1423 	lmrc->l_reply_q_depth = (lmrc->l_max_fw_cmds + 1 + 15) / 16 * 16 * 2;
1424 
1425 	/* Allocation size of one reply queue, based on depth. */
1426 	lmrc->l_reply_alloc_sz =
1427 	    sizeof (Mpi2ReplyDescriptorsUnion_t) * lmrc->l_reply_q_depth;
1428 
1429 	/* Allocation size of the DMA memory used for all MPI I/O frames. */
1430 	lmrc->l_io_frames_alloc_sz = LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE *
1431 	    (lmrc->l_max_fw_cmds + 2);
1432 
1433 	/*
1434 	 * If LMRC_EXT_CHAIN_SIZE_SUPPORT is set in scratch pad 1, firmware
1435 	 * supports an extended IO chain frame which is 4 times the size of a
1436 	 * legacy firmware frame.
1437 	 * Legacy Firmware frame size is (8 * 128) = 1K
1438 	 * 1M IO Firmware frame size is (8 * 128 * 4) = 4K
1439 	 */
1440 	reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD1_OFFSET);
1441 	lmrc->l_max_chain_frame_sz = LMRC_MAX_CHAIN_SIZE(reg) *
1442 	    (LMRC_EXT_CHAIN_SIZE_SUPPORT(reg) ? LMRC_1MB_IO : LMRC_256K_IO);
1443 
1444 	/*
1445 	 * Check whether the controller supports DMA to the full 64bit address
1446 	 * space.
1447 	 */
1448 	lmrc->l_64bit_dma_support = LMRC_64BIT_DMA_SUPPORT(reg);
1449 
1450 	/*
1451 	 * We use a I/O frame size of 256 bytes, that is what
1452 	 * LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE is set to.
1453 	 *
1454 	 * The offset of the SGL in the I/O frame is 128, so
1455 	 * there are 128 bytes left for 8 SGEs of 16 bytes each.
1456 	 */
1457 	lmrc->l_max_sge_in_main_msg =
1458 	    (LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE -
1459 	    offsetof(Mpi25SCSIIORequest_t, SGL)) / sizeof (Mpi25SGEIOUnion_t);
1460 
1461 	/*
1462 	 * Similarly, number of SGE in a SGE chain frame.
1463 	 */
1464 	lmrc->l_max_sge_in_chain =
1465 	    lmrc->l_max_chain_frame_sz / sizeof (Mpi25SGEIOUnion_t);
1466 
1467 	/*
1468 	 * The total number of SGE we support in a transfer is sum of
1469 	 * the above two, minus one for the link (last SGE in main msg).
1470 	 *
1471 	 * XXX: So why -2?
1472 	 */
1473 	lmrc->l_max_num_sge =
1474 	    lmrc->l_max_sge_in_main_msg + lmrc->l_max_sge_in_chain - 2;
1475 
1476 	/*
1477 	 * The offset of the last SGE in the I/O request, used for linking
1478 	 * the SGE chain frame if necessary.
1479 	 */
1480 	lmrc->l_chain_offset_io_request =
1481 	    (LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE -
1482 	    sizeof (Mpi25SGEIOUnion_t)) / sizeof (Mpi25SGEIOUnion_t);
1483 
1484 	/*
1485 	 * For MFI passthru, the link to the SGE chain frame is always
1486 	 * the first SGE in the I/O frame, the other SGEs in the I/O frame
1487 	 * will not be used.
1488 	 */
1489 	lmrc->l_chain_offset_mfi_pthru =
1490 	    offsetof(Mpi25SCSIIORequest_t, SGL) / sizeof (Mpi25SGEIOUnion_t);
1491 
1492 
1493 	reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD3_OFFSET);
1494 	if (LMRC_NVME_PAGE_SHIFT(reg) > LMRC_DEFAULT_NVME_PAGE_SHIFT) {
1495 		lmrc->l_nvme_page_sz = 1 << LMRC_NVME_PAGE_SHIFT(reg);
1496 		dev_err(lmrc->l_dip, CE_NOTE, "!NVME page size: %ld",
1497 		    lmrc->l_nvme_page_sz);
1498 	}
1499 
1500 	reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD1_OFFSET);
1501 	lmrc->l_fw_sync_cache_support = LMRC_SYNC_CACHE_SUPPORT(reg);
1502 
1503 	if (lmrc->l_class == LMRC_ACLASS_AERO) {
1504 		reg = lmrc_read_reg(lmrc, MPI26_SCRATCHPAD1_OFFSET);
1505 		lmrc->l_atomic_desc_support =
1506 		    LMRC_ATOMIC_DESCRIPTOR_SUPPORT(reg);
1507 	}
1508 
1509 	return (DDI_SUCCESS);
1510 }
1511 
1512 /*
1513  * lmrc_ioc_init
1514  *
1515  * Manually build a MFI IOC INIT command to setup basic operating parameters
1516  * such as the DMA parameters for the I/O request frames and the reply post
1517  * queues. Send the IOC INIT command using a special request descriptor which
1518  * directly includes the physical address of the MFI command frame.
1519  *
1520  * After this command completes, the controller is ready to accept MPT commands
1521  * using the normal method of placing it in the I/O request DMA memory and
1522  * writing a MPT request descripter to the appropriate registers.
1523  */
1524 int
1525 lmrc_ioc_init(lmrc_t *lmrc)
1526 {
1527 	lmrc_mfi_cmd_t *mfi = lmrc_get_mfi(lmrc);
1528 	mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr;
1529 	mfi_init_payload_t *init = &mfi->mfi_frame->mf_init;
1530 	lmrc_req_desc_t req_desc;
1531 	Mpi2IOCInitRequest_t *IOCInitMsg;
1532 	lmrc_dma_t dma;
1533 	int ret = DDI_SUCCESS;
1534 
1535 	ret = lmrc_dma_alloc(lmrc, lmrc->l_dma_attr, &dma,
1536 	    sizeof (Mpi2IOCInitRequest_t), 256, DDI_DMA_CONSISTENT);
1537 	if (ret != DDI_SUCCESS) {
1538 		lmrc_put_mfi(mfi);
1539 		dev_err(lmrc->l_dip, CE_WARN,
1540 		    "!%s: failed to allocate IOC command", __func__);
1541 		return (DDI_FAILURE);
1542 	}
1543 
1544 	IOCInitMsg = dma.ld_buf;
1545 	IOCInitMsg->Function = MPI2_FUNCTION_IOC_INIT;
1546 	IOCInitMsg->WhoInit = MPI2_WHOINIT_HOST_DRIVER;
1547 	IOCInitMsg->MsgVersion = MPI2_VERSION;
1548 	IOCInitMsg->HeaderVersion = MPI2_HEADER_VERSION;
1549 	IOCInitMsg->SystemRequestFrameSize =
1550 	    LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE / 4;
1551 	IOCInitMsg->ReplyDescriptorPostQueueDepth = lmrc->l_reply_q_depth;
1552 	lmrc_dma_set_addr64(&lmrc->l_reply_dma,
1553 	    (uint64_t *)&IOCInitMsg->ReplyDescriptorPostQueueAddress);
1554 	lmrc_dma_set_addr64(&lmrc->l_ioreq_dma,
1555 	    (uint64_t *)&IOCInitMsg->SystemRequestFrameBaseAddress);
1556 	IOCInitMsg->HostMSIxVectors = lmrc->l_max_reply_queues;
1557 	/* XXX: Why NVMe? */
1558 	IOCInitMsg->HostPageSize = LMRC_DEFAULT_NVME_PAGE_SHIFT;
1559 
1560 	hdr->mh_cmd = MFI_CMD_INIT;
1561 	hdr->mh_cmd_status = MFI_STAT_INVALID_STATUS;
1562 	hdr->mh_flags = MFI_FRAME_DONT_POST_IN_REPLY_QUEUE;
1563 
1564 	hdr->mh_drv_opts.mc_support_additional_msix = 1;
1565 	hdr->mh_drv_opts.mc_support_max_255lds = 1;
1566 	hdr->mh_drv_opts.mc_support_ndrive_r1_lb = 1;
1567 	hdr->mh_drv_opts.mc_support_security_protocol_cmds_fw = 1;
1568 	hdr->mh_drv_opts.mc_support_ext_io_size = 1;
1569 
1570 	hdr->mh_data_xfer_len = lmrc_dma_get_size(&dma);
1571 
1572 	lmrc_dma_set_addr64(&dma, &init->mi_queue_info_new_phys_addr);
1573 
1574 	lmrc_dma_set_addr64(&mfi->mfi_frame_dma, &req_desc.rd_reg);
1575 	VERIFY0(req_desc.rd_mfa_io.RequestFlags);
1576 	req_desc.rd_mfa_io.RequestFlags = LMRC_REQ_DESCRIPT_FLAGS_MFA;
1577 
1578 	lmrc_disable_intr(lmrc);
1579 	if (!lmrc_wait_for_reg(lmrc, MPI2_DOORBELL_OFFSET, 1, 0, 10))
1580 		return (DDI_FAILURE);
1581 
1582 	(void) ddi_dma_sync(dma.ld_hdl, 0, dma.ld_len, DDI_DMA_SYNC_FORDEV);
1583 	(void) ddi_dma_sync(mfi->mfi_frame_dma.ld_hdl, 0,
1584 	    mfi->mfi_frame_dma.ld_len, DDI_DMA_SYNC_FORDEV);
1585 
1586 	lmrc_send_request(lmrc, req_desc);
1587 
1588 	mutex_enter(&mfi->mfi_lock);
1589 	ret = lmrc_poll_mfi(lmrc, mfi, LMRC_INTERNAL_CMD_WAIT_TIME);
1590 	mutex_exit(&mfi->mfi_lock);
1591 
1592 	if (ret != DDI_SUCCESS) {
1593 		if (hdr->mh_cmd_status != MFI_STAT_INVALID_STATUS)
1594 			dev_err(lmrc->l_dip, CE_WARN,
1595 			    "!IOC Init failed, status = 0x%x",
1596 			    hdr->mh_cmd_status);
1597 	}
1598 
1599 	lmrc_dma_free(&dma);
1600 	lmrc_put_mfi(mfi);
1601 
1602 	return (ret);
1603 }
1604 
1605 /*
1606  * lmrc_get_ctrl_info
1607  *
1608  * Build a MFI DCMD to get controller information from FW. Update the copy in
1609  * the soft state.
1610  */
1611 static int
1612 lmrc_get_ctrl_info(lmrc_t *lmrc)
1613 {
1614 	mfi_ctrl_info_t *ci = lmrc->l_ctrl_info;
1615 	lmrc_mfi_cmd_t *mfi;
1616 	int ret;
1617 
1618 	mfi = lmrc_get_dcmd(lmrc, MFI_FRAME_DIR_READ, MFI_DCMD_CTRL_GET_INFO,
1619 	    sizeof (mfi_ctrl_info_t), 1);
1620 
1621 	if (mfi == NULL)
1622 		return (DDI_FAILURE);
1623 
1624 	ret = lmrc_issue_blocked_mfi(lmrc, mfi);
1625 
1626 	if (ret != DDI_SUCCESS)
1627 		goto out;
1628 
1629 	(void) ddi_dma_sync(mfi->mfi_data_dma.ld_hdl, 0,
1630 	    mfi->mfi_data_dma.ld_len, DDI_DMA_SYNC_FORKERNEL);
1631 	bcopy(mfi->mfi_data_dma.ld_buf, ci, sizeof (mfi_ctrl_info_t));
1632 
1633 out:
1634 	lmrc_put_dcmd(lmrc, mfi);
1635 	return (ret);
1636 }
1637 
1638 /*
1639  * lmrc_fw_init
1640  *
1641  * Complete firmware initialization. At this point, we can already send MFI
1642  * commands. so we can start by getting the controller information from the
1643  * firmware and set up things in our soft state. Next we issue the commands
1644  * to get the PD map and RAID map, which will complete asynchronously when
1645  * new information is available and then re-send themselves.
1646  */
1647 int
1648 lmrc_fw_init(lmrc_t *lmrc)
1649 {
1650 	int drv_max_lds = MFI_MAX_LOGICAL_DRIVES;
1651 	mfi_ctrl_info_t *ci = lmrc->l_ctrl_info;
1652 	int ret;
1653 
1654 	ret = lmrc_get_ctrl_info(lmrc);
1655 	if (ret != DDI_SUCCESS) {
1656 		dev_err(lmrc->l_dip, CE_WARN, "!Unable to get FW ctrl info.");
1657 		return (DDI_FAILURE);
1658 	}
1659 
1660 	lmrc->l_disable_online_ctrl_reset =
1661 	    ci->ci_prop.cp_disable_online_ctrl_reset == 1;
1662 
1663 	lmrc->l_max_256_vd_support =
1664 	    ci->ci_adapter_opts3.ao3_support_max_ext_lds == 1;
1665 
1666 	if (ci->ci_max_lds > 64) {
1667 		lmrc->l_max_256_vd_support = B_TRUE;
1668 		drv_max_lds = LMRC_MAX_LOGICAL_DRIVES_EXT;
1669 	}
1670 
1671 	lmrc->l_fw_supported_vd_count = min(ci->ci_max_lds, drv_max_lds);
1672 
1673 	lmrc->l_fw_supported_pd_count = min(ci->ci_max_pds,
1674 	    MFI_MAX_PHYSICAL_DRIVES);
1675 
1676 	lmrc->l_max_map_sz = lmrc->l_current_map_sz =
1677 	    lmrc->l_max_raid_map_sz * LMRC_MIN_MAP_SIZE;
1678 
1679 	lmrc->l_use_seqnum_jbod_fp =
1680 	    ci->ci_adapter_opts3.ao3_use_seq_num_jbod_FP != 0;
1681 
1682 	lmrc->l_pdmap_tgtid_support =
1683 	    ci->ci_adapter_opts4.ao4_support_pd_map_target_id != 0;
1684 
1685 	return (DDI_SUCCESS);
1686 }
1687 
1688 
1689 /*
1690  * lmrc_ctrl_shutdown
1691  *
1692  * Called by lmrc_quiesce() to send a shutdown command to the controller.
1693  * Cannot use locks, therefore cannot use lmrc_get_dcmd() or lmrc_get_mfi().
1694  */
1695 int
1696 lmrc_ctrl_shutdown(lmrc_t *lmrc)
1697 {
1698 	lmrc_mfi_cmd_t *mfi = list_remove_head(&lmrc->l_mfi_cmd_list);
1699 	mfi_header_t *hdr;
1700 	mfi_dcmd_payload_t *dcmd;
1701 
1702 	if (mfi == NULL)
1703 		return (DDI_FAILURE);
1704 
1705 	hdr = &mfi->mfi_frame->mf_hdr;
1706 	dcmd = &mfi->mfi_frame->mf_dcmd;
1707 
1708 	hdr->mh_cmd = MFI_CMD_DCMD;
1709 	hdr->mh_flags = MFI_FRAME_DONT_POST_IN_REPLY_QUEUE;
1710 	dcmd->md_opcode = MFI_DCMD_CTRL_SHUTDOWN;
1711 
1712 	lmrc_disable_intr(lmrc);
1713 	lmrc_issue_mfi(lmrc, mfi, NULL);
1714 
1715 	return (DDI_SUCCESS);
1716 }
1717 
1718 /*
1719  * driver target state management
1720  *
1721  * The soft state of the controller instance keeps a pre-allocated array of
1722  * target structures for all possible targets, even though only a small number
1723  * of them are likely to be used. Each target structure contains back link to
1724  * the soft state and a mutex, which are never cleared or changed when a target
1725  * is added or removed.
1726  */
1727 
1728 /*
1729  * lmrc_tgt_init
1730  *
1731  * Initialize the tgt structure for a newly discovered tgt. The same tgt
1732  * structure is used for PDs and LDs, the distinction can be made by the
1733  * presence or absence of tgt_pd_info. LDs are always of type disk, the type
1734  * of PDs is taken from their pd_info. If a device has no SAS WWN, we'll fake
1735  * the interconnect type to be PARALLEL to make sure device address isn't
1736  * misunderstood as a WWN by devfsadm.
1737  */
1738 void
1739 lmrc_tgt_init(lmrc_tgt_t *tgt, uint16_t dev_id, char *addr,
1740     mfi_pd_info_t *pd_info)
1741 {
1742 	rw_enter(&tgt->tgt_lock, RW_WRITER);
1743 
1744 	bzero(&tgt->tgt_dev_id,
1745 	    sizeof (lmrc_tgt_t) - offsetof(lmrc_tgt_t, tgt_dev_id));
1746 
1747 	tgt->tgt_dev_id = dev_id;
1748 	tgt->tgt_pd_info = pd_info;
1749 	tgt->tgt_interconnect_type = INTERCONNECT_SAS;
1750 
1751 	if (pd_info == NULL) {
1752 		tgt->tgt_type = DTYPE_DIRECT;
1753 	} else {
1754 		tgt->tgt_type = pd_info->pd_scsi_dev_type;
1755 	}
1756 
1757 	(void) strlcpy(tgt->tgt_wwnstr, addr, sizeof (tgt->tgt_wwnstr));
1758 	if (scsi_wwnstr_to_wwn(tgt->tgt_wwnstr, &tgt->tgt_wwn) != DDI_SUCCESS) {
1759 		tgt->tgt_interconnect_type = INTERCONNECT_PARALLEL;
1760 		tgt->tgt_wwn = dev_id;
1761 	}
1762 
1763 	rw_exit(&tgt->tgt_lock);
1764 }
1765 
1766 /*
1767  * lmrc_tgt_clear
1768  *
1769  * Reset the tgt structure of a target which is no longer present.
1770  */
1771 void
1772 lmrc_tgt_clear(lmrc_tgt_t *tgt)
1773 {
1774 	rw_enter(&tgt->tgt_lock, RW_WRITER);
1775 
1776 	if (tgt->tgt_pd_info != NULL)
1777 		kmem_free(tgt->tgt_pd_info, sizeof (mfi_pd_info_t));
1778 
1779 	bzero(&tgt->tgt_dev_id,
1780 	    sizeof (lmrc_tgt_t) - offsetof(lmrc_tgt_t, tgt_dev_id));
1781 	tgt->tgt_dev_id = LMRC_DEVHDL_INVALID;
1782 	rw_exit(&tgt->tgt_lock);
1783 }
1784 
1785 /*
1786  * lmrc_tgt_find
1787  *
1788  * Walk the target list and find a tgt matching the given scsi_device.
1789  * Return the tgt read-locked. The targets_lock mutex must be held the
1790  * whole time.
1791  */
1792 lmrc_tgt_t *
1793 lmrc_tgt_find(lmrc_t *lmrc, struct scsi_device *sd)
1794 {
1795 	const char *ua = scsi_device_unit_address(sd);
1796 	char *comma, wwnstr[SCSI_WWN_BUFLEN];
1797 	uint64_t wwn;
1798 	unsigned long tgtid;
1799 	lmrc_tgt_t *tgt;
1800 	size_t i;
1801 
1802 	VERIFY(ua != NULL);
1803 
1804 	(void) strlcpy(wwnstr, ua, sizeof (wwnstr));
1805 
1806 	/*
1807 	 * If the unit address is a valid target ID and within range for
1808 	 * VD IDs, use that.
1809 	 */
1810 	if (ddi_strtoul(wwnstr, &comma, 10, &tgtid) == 0 &&
1811 	    *comma == ',' &&
1812 	    tgtid <= lmrc->l_fw_supported_vd_count) {
1813 		tgt = &lmrc->l_targets[tgtid];
1814 
1815 		rw_enter(&tgt->tgt_lock, RW_READER);
1816 		if (tgt->tgt_dev_id == tgtid &&
1817 		    tgt->tgt_wwn == tgtid) {
1818 			return (tgt);
1819 		}
1820 		rw_exit(&tgt->tgt_lock);
1821 	}
1822 
1823 	/* Chop off ",lun" as scsi_wwnstr_to_wwn() can't handle it. */
1824 	comma = strchr(wwnstr, ',');
1825 	if (comma != NULL)
1826 		*comma = '\0';
1827 
1828 	/* Else, if unit address is a valid WWN, look for that. */
1829 	if (scsi_wwnstr_to_wwn(wwnstr, &wwn) == DDI_SUCCESS) {
1830 		for (i = 0; i < ARRAY_SIZE(lmrc->l_targets); i++) {
1831 			tgt = &lmrc->l_targets[i];
1832 
1833 			rw_enter(&tgt->tgt_lock, RW_READER);
1834 			if (tgt->tgt_wwn == wwn) {
1835 				return (tgt);
1836 			}
1837 			rw_exit(&tgt->tgt_lock);
1838 		}
1839 	} else {
1840 		/* Do it the hard way and compare wwnstr. */
1841 		for (i = 0; i < ARRAY_SIZE(lmrc->l_targets); i++) {
1842 			tgt = &lmrc->l_targets[i];
1843 
1844 			rw_enter(&tgt->tgt_lock, RW_READER);
1845 			if (strcmp(tgt->tgt_wwnstr, wwnstr) == 0) {
1846 				return (tgt);
1847 			}
1848 			rw_exit(&tgt->tgt_lock);
1849 		}
1850 	}
1851 
1852 	return (NULL);
1853 }
1854 
1855 /*
1856  * MPT/MFI command management
1857  *
1858  * For each kind of command, MFI and MPT, the driver keeps an array of pre-
1859  * allocated and pre-initialized commands. Additionally, it keeps two lists of
1860  * currently unused commands. A set of functions is provided for each list to
1861  * get and put commands from/to the list. Commands are initialized during get(),
1862  * because having completed commands on the list can help in certain cases
1863  * during debugging.
1864  *
1865  * MPT commands in use for I/O are kept on a active command list of the target
1866  * they are addressing. All other types of commands are not kept on any list
1867  * while they are being processed by the hardware. When walking the command
1868  * arrays, busy commands not associated with a target can be distinguished by
1869  * not being linked on any list.
1870  */
1871 
1872 /*
1873  * lmrc_get_mpt
1874  *
1875  * Get a MPT command from the list and initialize it. Return the command locked.
1876  * Return NULL if the MPT command list is empty.
1877  */
1878 lmrc_mpt_cmd_t *
1879 lmrc_get_mpt(lmrc_t *lmrc)
1880 {
1881 	lmrc_mpt_cmd_t *mpt;
1882 	Mpi25SCSIIORequest_t *io_req;
1883 
1884 	mutex_enter(&lmrc->l_mpt_cmd_lock);
1885 	mpt = list_remove_head(&lmrc->l_mpt_cmd_list);
1886 	mutex_exit(&lmrc->l_mpt_cmd_lock);
1887 	if (mpt == NULL)
1888 		return (NULL);
1889 
1890 	mutex_enter(&mpt->mpt_lock);
1891 	bzero(mpt->mpt_io_frame, LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE);
1892 	bzero(mpt->mpt_chain_dma.ld_buf, mpt->mpt_chain_dma.ld_len);
1893 	bzero(mpt->mpt_sense_dma.ld_buf, mpt->mpt_sense_dma.ld_len);
1894 
1895 	mpt->mpt_mfi = NULL;
1896 	mpt->mpt_pkt = NULL;
1897 
1898 	/* Set the offset of the SGL entries inside the MPT command. */
1899 	io_req = mpt->mpt_io_frame;
1900 	io_req->SGLOffset0 = offsetof(Mpi25SCSIIORequest_t, SGL) / 4;
1901 
1902 	mpt->mpt_complete = B_FALSE;
1903 	cv_init(&mpt->mpt_cv, NULL, CV_DRIVER, NULL);
1904 
1905 	return (mpt);
1906 }
1907 
1908 /*
1909  * lmrc_put_mpt
1910  *
1911  * Put a MPT command back on the list. The command lock must be held when this
1912  * function is called, being unlocked only after the command has been put on
1913  * the free list. The command CV is destroyed, thereby asserting that no one is
1914  * still waiting on it.
1915  */
1916 void
1917 lmrc_put_mpt(lmrc_mpt_cmd_t *mpt)
1918 {
1919 	lmrc_t *lmrc = mpt->mpt_lmrc;
1920 
1921 	VERIFY(lmrc != NULL);
1922 
1923 	ASSERT0(list_link_active(&mpt->mpt_node));
1924 	ASSERT(mutex_owned(&mpt->mpt_lock));
1925 	cv_destroy(&mpt->mpt_cv);
1926 
1927 	mutex_enter(&lmrc->l_mpt_cmd_lock);
1928 	list_insert_tail(&lmrc->l_mpt_cmd_list, mpt);
1929 	mutex_exit(&lmrc->l_mpt_cmd_lock);
1930 	mutex_exit(&mpt->mpt_lock);
1931 }
1932 
1933 /*
1934  * lmrc_get_mfi
1935  *
1936  * Get a MFI command from the list and initialize it.
1937  */
1938 lmrc_mfi_cmd_t *
1939 lmrc_get_mfi(lmrc_t *lmrc)
1940 {
1941 	lmrc_mfi_cmd_t *mfi;
1942 
1943 	mutex_enter(&lmrc->l_mfi_cmd_lock);
1944 	mfi = list_remove_head(&lmrc->l_mfi_cmd_list);
1945 	mutex_exit(&lmrc->l_mfi_cmd_lock);
1946 	VERIFY(mfi != NULL);
1947 
1948 	mutex_enter(&mfi->mfi_lock);
1949 	bzero(mfi->mfi_frame, sizeof (mfi_frame_t));
1950 	mfi->mfi_frame->mf_hdr.mh_context = mfi->mfi_idx;
1951 	mfi->mfi_callback = NULL;
1952 
1953 	cv_init(&mfi->mfi_cv, NULL, CV_DRIVER, NULL);
1954 	mutex_exit(&mfi->mfi_lock);
1955 
1956 	return (mfi);
1957 }
1958 
1959 /*
1960  * lmrc_put_mfi
1961  *
1962  * Put a MFI command back on the list. Destroy the CV, thereby
1963  * asserting that no one is waiting on it.
1964  */
1965 void
1966 lmrc_put_mfi(lmrc_mfi_cmd_t *mfi)
1967 {
1968 	lmrc_t *lmrc = mfi->mfi_lmrc;
1969 
1970 	VERIFY(lmrc != NULL);
1971 
1972 	ASSERT0(list_link_active(&mfi->mfi_node));
1973 
1974 	mutex_enter(&mfi->mfi_lock);
1975 
1976 	cv_destroy(&mfi->mfi_cv);
1977 
1978 	mutex_enter(&lmrc->l_mfi_cmd_lock);
1979 	list_insert_tail(&lmrc->l_mfi_cmd_list, mfi);
1980 	mutex_exit(&lmrc->l_mfi_cmd_lock);
1981 	mutex_exit(&mfi->mfi_lock);
1982 }
1983 
1984 /*
1985  * lmrc_abort_outstanding_mfi
1986  *
1987  * Walk the MFI cmd array and abort each command which is still outstanding,
1988  * which is indicated by not being linked on l_mfi_cmd_list.
1989  *
1990  * As a special case, if the FW is in fault state, just call each commands
1991  * completion callback.
1992  */
1993 int
1994 lmrc_abort_outstanding_mfi(lmrc_t *lmrc, const size_t ncmd)
1995 {
1996 	int ret;
1997 	int i;
1998 
1999 	for (i = 0; i < ncmd; i++) {
2000 		lmrc_mfi_cmd_t *mfi = lmrc->l_mfi_cmds[i];
2001 
2002 		mutex_enter(&mfi->mfi_lock);
2003 		if (list_link_active(&mfi->mfi_node)) {
2004 			mutex_exit(&mfi->mfi_lock);
2005 			continue;
2006 		}
2007 
2008 		/*
2009 		 * If the FW is faulted, wake up anyone waiting on the command
2010 		 * to clean it up.
2011 		 */
2012 		if (lmrc->l_fw_fault) {
2013 			if (mfi->mfi_callback != NULL)
2014 				mfi->mfi_callback(lmrc, mfi);
2015 			mutex_exit(&mfi->mfi_lock);
2016 			continue;
2017 		}
2018 
2019 		ret = lmrc_abort_cmd(lmrc, mfi);
2020 		mutex_exit(&mfi->mfi_lock);
2021 		if (ret != DDI_SUCCESS)
2022 			return (ret);
2023 
2024 		lmrc_dma_free(&mfi->mfi_data_dma);
2025 		lmrc_put_mfi(mfi);
2026 	}
2027 
2028 	return (DDI_SUCCESS);
2029 }
2030 
2031 /*
2032  * lmrc_get_dcmd
2033  *
2034  * Build a MFI DCMD with DMA memory for data transfers.
2035  */
2036 lmrc_mfi_cmd_t *
2037 lmrc_get_dcmd(lmrc_t *lmrc, uint16_t flags, uint32_t opcode, uint32_t xferlen,
2038     uint_t align)
2039 {
2040 	lmrc_mfi_cmd_t *mfi = lmrc_get_mfi(lmrc);
2041 	mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr;
2042 	mfi_dcmd_payload_t *dcmd = &mfi->mfi_frame->mf_dcmd;
2043 	lmrc_dma_t *dma = &mfi->mfi_data_dma;
2044 	int ret;
2045 
2046 	hdr->mh_cmd = MFI_CMD_DCMD;
2047 	hdr->mh_flags = flags;
2048 
2049 	dcmd->md_opcode = opcode;
2050 
2051 	if ((flags & MFI_FRAME_DIR_READ) != 0 ||
2052 	    (flags & MFI_FRAME_DIR_WRITE) != 0) {
2053 		ret = lmrc_dma_alloc(lmrc, lmrc->l_dma_attr, dma, xferlen,
2054 		    align, DDI_DMA_CONSISTENT);
2055 		if (ret != DDI_SUCCESS) {
2056 			lmrc_put_mfi(mfi);
2057 			return (NULL);
2058 		}
2059 
2060 		hdr->mh_flags |= MFI_FRAME_SGL64;
2061 		hdr->mh_sge_count = 1;
2062 		hdr->mh_data_xfer_len = lmrc_dma_get_size(dma);
2063 
2064 		dcmd->md_sgl.ms64_length = lmrc_dma_get_size(dma);
2065 		lmrc_dma_set_addr64(dma, &dcmd->md_sgl.ms64_phys_addr);
2066 	}
2067 
2068 	return (mfi);
2069 }
2070 
2071 /*
2072  * lmrc_put_dcmd
2073  *
2074  * Free the DMA memory of a MFI DCMD and return the command back on the list.
2075  */
2076 void
2077 lmrc_put_dcmd(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi)
2078 {
2079 	lmrc_dma_free(&mfi->mfi_data_dma);
2080 	lmrc_put_mfi(mfi);
2081 }
2082 
2083 
2084 /*
2085  * Asynchronous Event Notifications
2086  */
2087 /*
2088  * lmrc_get_event_log_info
2089  *
2090  * Get the Event Log Info from the firmware.
2091  */
2092 static int
2093 lmrc_get_event_log_info(lmrc_t *lmrc, mfi_evt_log_info_t *eli)
2094 {
2095 	lmrc_mfi_cmd_t *mfi;
2096 	int ret;
2097 
2098 	mfi = lmrc_get_dcmd(lmrc, MFI_FRAME_DIR_READ,
2099 	    MFI_DCMD_CTRL_EVENT_GET_INFO, sizeof (mfi_evt_log_info_t), 1);
2100 
2101 	if (mfi == NULL)
2102 		return (DDI_FAILURE);
2103 
2104 	ret = lmrc_issue_blocked_mfi(lmrc, mfi);
2105 
2106 	if (ret != DDI_SUCCESS)
2107 		goto out;
2108 
2109 	bcopy(mfi->mfi_data_dma.ld_buf, eli, sizeof (mfi_evt_log_info_t));
2110 
2111 out:
2112 	lmrc_put_dcmd(lmrc, mfi);
2113 	return (ret);
2114 }
2115 
2116 /*
2117  * lmrc_aen_handler
2118  *
2119  * Check the event code and handle it as needed. In the case of PD or LD related
2120  * events, invoke their special handlers.
2121  */
2122 static void
2123 lmrc_aen_handler(void *arg)
2124 {
2125 	lmrc_mfi_cmd_t *mfi = arg;
2126 	lmrc_t *lmrc = mfi->mfi_lmrc;
2127 	mfi_evt_detail_t *evt = mfi->mfi_data_dma.ld_buf;
2128 	mfi_dcmd_payload_t *dcmd = &mfi->mfi_frame->mf_dcmd;
2129 	int ret = DDI_FAILURE;
2130 
2131 	/* Controller & Configuration specific events */
2132 	switch (evt->evt_code) {
2133 	case MFI_EVT_CFG_CLEARED:
2134 	case MFI_EVT_CTRL_HOST_BUS_SCAN_REQD:
2135 	case MFI_EVT_FOREIGN_CFG_IMPORTED:
2136 		ret = lmrc_get_pd_list(lmrc);
2137 		if (ret != DDI_SUCCESS)
2138 			break;
2139 
2140 		ret = lmrc_get_ld_list(lmrc);
2141 		break;
2142 
2143 	case MFI_EVT_CTRL_PROP_CHANGED:
2144 		ret = lmrc_get_ctrl_info(lmrc);
2145 		break;
2146 
2147 	case MFI_EVT_CTRL_PATROL_READ_START:
2148 	case MFI_EVT_CTRL_PATROL_READ_RESUMED:
2149 	case MFI_EVT_CTRL_PATROL_READ_COMPLETE:
2150 	case MFI_EVT_CTRL_PATROL_READ_CANT_START:
2151 	case MFI_EVT_CTRL_PERF_COLLECTION:
2152 	case MFI_EVT_CTRL_BOOTDEV_SET:
2153 	case MFI_EVT_CTRL_BOOTDEV_RESET:
2154 	case MFI_EVT_CTRL_PERSONALITY_CHANGE:
2155 	case MFI_EVT_CTRL_PERSONALITY_CHANGE_PEND:
2156 	case MFI_EVT_CTRL_NR_OF_VALID_SNAPDUMP:
2157 		break;
2158 
2159 	default:
2160 		/* LD-specific events */
2161 		if ((evt->evt_cl.evt_locale & MFI_EVT_LOCALE_LD) != 0)
2162 			ret = lmrc_raid_aen_handler(lmrc, evt);
2163 
2164 		/* PD-specific events */
2165 		else if ((evt->evt_cl.evt_locale & MFI_EVT_LOCALE_PD) != 0)
2166 			ret = lmrc_phys_aen_handler(lmrc, evt);
2167 
2168 		if (ret != DDI_SUCCESS) {
2169 			dev_err(lmrc->l_dip, CE_NOTE, "!unknown AEN received, "
2170 			    "seqnum = %d, timestamp = %d, code = %x, "
2171 			    "locale = %x, class = %d, argtype = %d",
2172 			    evt->evt_seqnum, evt->evt_timestamp, evt->evt_code,
2173 			    evt->evt_cl.evt_locale, evt->evt_cl.evt_class,
2174 			    evt->evt_argtype);
2175 		}
2176 	}
2177 
2178 	dev_err(lmrc->l_dip, CE_NOTE, "!%s", evt->evt_descr);
2179 
2180 	/*
2181 	 * Just reuse the command in its entirety. Increase the sequence
2182 	 * number.
2183 	 */
2184 	dcmd->md_mbox_32[0] = evt->evt_seqnum + 1;
2185 	mutex_enter(&mfi->mfi_lock);
2186 	lmrc_issue_mfi(lmrc, mfi, lmrc_complete_aen);
2187 	mutex_exit(&mfi->mfi_lock);
2188 }
2189 
2190 /*
2191  * lmrc_complete_aen
2192  *
2193  * An AEN was received, so schedule a taskq to process it.
2194  */
2195 static void
2196 lmrc_complete_aen(lmrc_t *lmrc, lmrc_mfi_cmd_t *mfi)
2197 {
2198 	mfi_header_t *hdr = &mfi->mfi_frame->mf_hdr;
2199 
2200 	ASSERT(mutex_owned(&mfi->mfi_lock));
2201 
2202 	if (hdr->mh_cmd_status != MFI_STAT_OK) {
2203 		/* Was the command aborted? */
2204 		if (hdr->mh_cmd_status == MFI_STAT_NOT_FOUND)
2205 			return;
2206 
2207 		dev_err(lmrc->l_dip, CE_WARN,
2208 		    "!AEN failed, status = %d",
2209 		    hdr->mh_cmd_status);
2210 		taskq_dispatch_ent(lmrc->l_taskq, (task_func_t *)lmrc_put_mfi,
2211 		    mfi, TQ_NOSLEEP, &mfi->mfi_tqent);
2212 		return;
2213 	}
2214 
2215 	taskq_dispatch_ent(lmrc->l_taskq, lmrc_aen_handler, mfi, TQ_NOSLEEP,
2216 	    &mfi->mfi_tqent);
2217 }
2218 
2219 /*
2220  * lmrc_register_aen
2221  *
2222  * In FreeBSD, this function checks for an existing AEN. If its class and locale
2223  * already include what is requested here they just return. In the other case,
2224  * the existing AEN is aborted and a new one is created, which includes
2225  * the previous locale and class and new ones.
2226  *
2227  * Given that the driver (same as in FreeBSD) calls this function during attach
2228  * to create an AEN with LOCALE_ALL and CLASS_DEBUG, all of this would be dead
2229  * code anyway.
2230  */
2231 static int
2232 lmrc_register_aen(lmrc_t *lmrc, uint32_t seqnum)
2233 {
2234 	lmrc_mfi_cmd_t *mfi;
2235 	mfi_dcmd_payload_t *dcmd;
2236 
2237 	mfi = lmrc_get_dcmd(lmrc, MFI_FRAME_DIR_READ, MFI_DCMD_CTRL_EVENT_WAIT,
2238 	    sizeof (mfi_evt_detail_t), 1);
2239 
2240 	if (mfi == NULL)
2241 		return (DDI_FAILURE);
2242 
2243 	dcmd = &mfi->mfi_frame->mf_dcmd;
2244 	dcmd->md_mbox_32[0] = seqnum;
2245 	dcmd->md_mbox_16[2] = MFI_EVT_LOCALE_ALL;
2246 	dcmd->md_mbox_8[7] = MFI_EVT_CLASS_DEBUG;
2247 
2248 	mutex_enter(&mfi->mfi_lock);
2249 	lmrc_issue_mfi(lmrc, mfi, lmrc_complete_aen);
2250 	mutex_exit(&mfi->mfi_lock);
2251 
2252 	return (DDI_SUCCESS);
2253 }
2254 
2255 /*
2256  * lmrc_start_aen
2257  *
2258  * Set up and enable AEN processing.
2259  */
2260 int
2261 lmrc_start_aen(lmrc_t *lmrc)
2262 {
2263 	mfi_evt_log_info_t eli;
2264 	int ret;
2265 
2266 	bzero(&eli, sizeof (eli));
2267 
2268 	/* Get the latest sequence number from the Event Log Info. */
2269 	ret = lmrc_get_event_log_info(lmrc, &eli);
2270 	if (ret != DDI_SUCCESS)
2271 		return (ret);
2272 
2273 	/* Register AEN with FW for latest sequence number + 1. */
2274 	ret = lmrc_register_aen(lmrc, eli.eli_newest_seqnum + 1);
2275 	return (ret);
2276 }
2277