1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2024 Racktop Systems, Inc.
14 */
15
16 /*
17 * This driver targets the LSI/Broadcom/AVAGO Megaraid SAS controllers
18 * of the 3rd generation, in particular the models Aero and Ventura.
19 *
20 * This file contains the interfaces to DDI.
21 *
22 * Driver attach:
23 * --------------
24 *
25 * For each HBA, the driver will attach three instances. The first will be for
26 * the controller, carrying out hardware and driver initialzation, while the
27 * remaining two are SCSA instances for the RAID (LD) and physical (PD) iports.
28 *
29 * Controller Initialization:
30 * --------------------------
31 *
32 * The initialization of the controller hardware is split across multiple
33 * functions which are called during lmrc_ctrl_attach():
34 * 1. As soon as the device registers are mapped, lmrc_adapter_init() will
35 * be called. This will attempt to bring the firmware to a ready state,
36 * after which control registers are read to fetch basic hardware properties
37 * and calculate the sizes of various data structures used by the driver.
38 * 2. After setting up interrupts and initializing mutexes, the expected number
39 * of MFI and MPT commands will be pre-allocated. Then, the I/O controller
40 * will be initialized by sending a IOC INIT command.
41 * 3. At this point the driver is able to send commands to the controller and
42 * receive replies. This will first be used to retrieve controller firmware
43 * properties to finish driver setup based on the information received.
44 * 4. As part of the remaining firmware configuration, we'll post a set of long-
45 * running commands to keep us informed about RAID map and PD map changes.
46 * These commands will complete asynchronously and will be rescheduled every
47 * time they have completed.
48 *
49 * While it's not really part of the controller initialization, it is worthwhile
50 * to mention here that we send a CTRL SHUTDOWN command to the controller during
51 * our quiesce(9e).
52 *
53 *
54 * SCSA HBA Setup:
55 * ---------------
56 *
57 * The driver is written to conform to SCSAv3.
58 *
59 * The driver will attach two iport(9) instances, one for physical devices that
60 * are directly exposed by the HBA to the host, and another for logical devices.
61 * The latter category not only includes RAID volumes but also physical disks
62 * when the controller is in JBOD mode.
63 *
64 * The attach function for either iport will enumerate the physical and logical
65 * devices, respectively, and populate a tgtmap(9). The driver itself maintains
66 * target state state in lmrc_tgt_t. It will attempt to get the SAS WWN of the
67 * target and use it as a device address, falling back to the target ID as used
68 * by the controller hardware.
69 *
70 * The array of target states is initialized once during controller attach. The
71 * initial portion of each target state contains a back link to the controller
72 * soft state and a mutex, neither of which need changing when a new target is
73 * discovered or a target disappears. The array of target states is indexed by
74 * the target ID as used by the controller hardware. Unused targets will have
75 * their target ID set to LMRC_DEVHDL_INVALID.
76 *
77 *
78 * MPT I/O request sending and reply processing:
79 * -----------------------------------------
80 *
81 * The hardware expects to have access to two large areas of DMA memory that the
82 * driver will use to send I/O requests and receive replies. The size of these
83 * DMA buffers are based on the fixed size of I/O requests and the number of
84 * such requests that the controller may accept, and the size of the replies,
85 * the queue depth supported by the hardware, and the number interrupt vectors
86 * available for this driver.
87 *
88 * Based on these numbers, the driver will pre-allocate enough MPT and MFI
89 * commands to match the size of the I/O request buffer. In addition, each
90 * MPT command will have a SGL chain frame and a sense buffer pre-allocated.
91 * A set of functions are available to get a initialized command structure to
92 * send a request, and to return it to the command list after use.
93 *
94 * Sending a MPT I/O request to the controller is done by filling out the I/O
95 * frame with all the parameters needed for the request and creating a request
96 * descriptor, filling in the SMID of the I/O frame used and the queue number
97 * where the reply should be posted. The request descriptor is then written
98 * into the appropriate device registers.
99 *
100 * On completion, an interrupt may or may not be posted, depending the I/O
101 * request flags and the overall system state, such as whether interrupts are
102 * enabled at all. If an interrupt is received, any new replies posted into the
103 * queue associated with the interrupt vector are processed and their callbacks,
104 * if any, will be called. The hardware will be informed about the last reply
105 * index processed by writing the appropriate register.
106 *
107 * Polled I/O is facilitated by repeatedly checking for the presence of a reply,
108 * waiting a short time in between, up to a pre-defined timeout.
109 *
110 *
111 * MFI (MegaRAID Firmware Interface) commands:
112 * -------------------------------------------
113 *
114 * MFI commands are used internally by the driver or by user space via the ioctl
115 * interface. Except for the initial IOC INIT command, all MFI commands will be
116 * sent using MPT MFI passthru commands. As the driver uses a only small number
117 * of MFI commands, each MFI command has a MPT command preallocated.
118 *
119 * MFI commands can be sent synchronously in "blocked" or "polled" mode, which
120 * differ only in the way the driver waits for completion. When sending a
121 * "blocked" command, the driver will set a callback and wait for the hardware
122 * to return the command through the normal interrupt driven code path. In
123 * "polled" mode, the command has a flag set to indicate to the hardware it
124 * should not be posted to a reply queue, and the driver repeatedly checks its
125 * status until it changes to indicate completion.
126 *
127 * MFI commands can also be sent asynchronously, in which case they are always
128 * completed through the interrupt code path and have a callback. This is used
129 * for RAID and PD map updates and Asynchronous Event Notifications (AENs). In
130 * all these cases, the commands are usually send to the hardware again after
131 * having been completed, avoiding unnecessary reallocation.
132 *
133 * As asynchronous commands can still be outstanding during detach, they can and
134 * will be aborted by sending a MFI ABORT command when the driver is shutting
135 * down.
136 *
137 * Asynchronous Event Notifications:
138 * ---------------------------------
139 *
140 * The driver will always have one AEN request outstanding to receive events
141 * from the controller. These events aren't very well documented, but it is
142 * known that they include a "locale" describing to which aspect of the HBA
143 * they apply, which is either the controller itself, physical devices, or
144 * logical devices.
145 *
146 * Most events will be logged but otherwise ignored by the driver, but some
147 * inform us about changes to the physical or logical drives connected to the
148 * HBA, in which case we update the respective target map.
149 *
150 *
151 * DMA considerations:
152 * -------------------
153 *
154 * Most of the MPT structures can hold a 64bit physical address for DMA, but
155 * some don't. Additionally, the hardware may indicate that it doesn't handle
156 * 64bit DMA, even though the structures could hold an address this wide.
157 *
158 * Consequently, the driver keeps two sets of DMA attributes in its soft state,
159 * one decidedly for 32bit DMA and another one for all other uses which could
160 * potentially support 64bit DMA. The latter will be modified to fit what the
161 * hardware actually supports.
162 *
163 *
164 * Interrupt considerations:
165 * -------------------------
166 *
167 * Unless we're in the unlikely situation that the hardware claims to not
168 * actually support it, the driver will prefer to get MSI-X interrupts. If that
169 * fails it'll do with MSI interrupts, falling back to FIXED interrupts if that
170 * fails as well.
171 *
172 * The number of queues supported is set to the minimum of what the hardware
173 * claims to support, and the number of interrupt vectors we can allocate. It is
174 * expected that the hardware will support much more queues and interrupt
175 * vectors than what the OS gives us by default.
176 *
177 *
178 * Locking considerations:
179 * -----------------------
180 *
181 * The driver uses several mutexes, rwlocks, and one semaphore to serialize
182 * accessess to various parts of its internal state.
183 *
184 * The semaphore lmrc->l_ioctl_sema is used to limit the amount of MFI commands
185 * concurrently in use by user space. This semaphore needs to be decremented by
186 * the ioctl code path before any other locks may be acquired.
187 *
188 * The PD and RAID maps are each protected by a rwlock, lrmc->l_pdmap_lock and
189 * lmrc->l_raidmap_lock. Either map is write-locked only when we recieve an
190 * updated map from the firmware and copy it over our map, which happens only
191 * in the context of the MFI command completion for respective MAP GET INFO
192 * with the respective MFI command mutex being held. Read-locking of either map
193 * does not require any specific lock ordering.
194 *
195 * Each lmrc_tgt_t has its own rwlock, tgt->tgt_lock, which is write-locked only
196 * during lmrc_tgt_clear(), lmrc_tgt_init(), and lmrc_raid_get_wwn(), all of
197 * which run to update our internal target state as the hardware notifies us
198 * about a target change. No other locks are held during target state changes.
199 * During lmrc_tran_start() and lmrc_task_mgmt(), all other required command and
200 * map locks are acquired and released as necessary with the addressed target
201 * being read-locked, preventing target state updates while I/O is being done.
202 *
203 * Each MPT and MFI command has an associated mutex (mpt_lock and mfi_lock,
204 * respectively) and condition variable used for synchronization and completion
205 * signalling. In general, the mutex should be held while the command is set up
206 * until it has been sent to the hardware. The interrupt handler acquires the
207 * mutex of each completed command before signalling completion. In case of
208 * command abortion, the mutex of a command to be aborted is held to block
209 * completion until the ABORT or TASK MGMT command is sent to the hardware to
210 * avoid races.
211 *
212 * To simplify MPT command handling, the function lmrc_get_mpt() used to get a
213 * MPT command from the free list always returns the command locked. Mirroring
214 * that, lmrc_put_mpt() expects the MPT command to be locked when it is put back
215 * on the free list, unlocking it only once it had been linked onto that list.
216 *
217 * Additionally, each lmrc_tgt_t has an active command list to keep track of all
218 * MPT I/O commands send to a target, protected by its own mutex. When iterating
219 * the active command list of a target, the mutex protecting this list must be
220 * held while the command mutexes are entered and exited. When adding a command
221 * to an active command list, the mutex protecting the list is acquired while
222 * the command mutex is held. Care must be taken to avoid a deadlock against the
223 * iterating functions when removing a command from an active command list: The
224 * command mutex must not be held when the mutex protecting the list is entered.
225 * Using the functions for active command list management ensures lock ordering.
226 */
227
228 #include <sys/class.h>
229 #include <sys/conf.h>
230 #include <sys/devops.h>
231 #include <sys/types.h>
232 #include <sys/errno.h>
233 #include <sys/ddi.h>
234 #include <sys/dditypes.h>
235 #include <sys/modctl.h>
236 #include <sys/debug.h>
237 #include <sys/pci.h>
238 #include <sys/policy.h>
239 #include <sys/scsi/scsi.h>
240
241 #include <sys/ddifm.h>
242 #include <sys/fm/protocol.h>
243 #include <sys/fm/util.h>
244 #include <sys/fm/io/ddi.h>
245
246 #include "lmrc.h"
247 #include "lmrc_reg.h"
248 #include "lmrc_ioctl.h"
249 #include "lmrc_phys.h"
250
251 #define INST2LSIRDCTL(x) ((x) << INST_MINOR_SHIFT)
252
253 void *lmrc_state;
254
255 /*
256 * Since the max sgl length can vary, we create a per-instance copy of
257 * lmrc_dma_attr and fill in .dma_attr_sgllen with the correct value
258 * during attach.
259 */
260 static const ddi_dma_attr_t lmrc_dma_attr = {
261 .dma_attr_version = DMA_ATTR_V0,
262 .dma_attr_addr_lo = 0x00000000,
263 .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFF,
264 .dma_attr_count_max = 0xFFFFFFFF,
265 .dma_attr_align = 8,
266 .dma_attr_burstsizes = 0x7,
267 .dma_attr_minxfer = 1,
268 .dma_attr_maxxfer = 0xFFFFFFFF,
269 .dma_attr_seg = 0xFFFFFFFF,
270 .dma_attr_sgllen = 0,
271 .dma_attr_granular = 512,
272 .dma_attr_flags = 0,
273 };
274
275 static struct ddi_device_acc_attr lmrc_acc_attr = {
276 .devacc_attr_version = DDI_DEVICE_ATTR_V1,
277 .devacc_attr_endian_flags = DDI_STRUCTURE_LE_ACC,
278 .devacc_attr_dataorder = DDI_STRICTORDER_ACC,
279 .devacc_attr_access = DDI_DEFAULT_ACC,
280 };
281
282 static int lmrc_attach(dev_info_t *, ddi_attach_cmd_t);
283 static int lmrc_detach(dev_info_t *, ddi_detach_cmd_t);
284 static int lmrc_ctrl_attach(dev_info_t *);
285 static int lmrc_ctrl_detach(dev_info_t *);
286 static int lmrc_cleanup(lmrc_t *, boolean_t);
287 static lmrc_adapter_class_t lmrc_get_class(lmrc_t *);
288 static int lmrc_regs_init(lmrc_t *);
289 static uint_t lmrc_isr(caddr_t, caddr_t);
290 static int lmrc_add_intrs(lmrc_t *, int);
291 static int lmrc_intr_init(lmrc_t *);
292 static void lmrc_intr_fini(lmrc_t *);
293 static int lmrc_fm_error_cb(dev_info_t *, ddi_fm_error_t *, const void *);
294 static void lmrc_fm_init(lmrc_t *);
295 static void lmrc_fm_fini(lmrc_t *);
296 static int lmrc_alloc_mpt_cmds(lmrc_t *, const size_t);
297 static void lmrc_free_mpt_cmds(lmrc_t *, const size_t);
298 static int lmrc_alloc_mfi_cmds(lmrc_t *, const size_t);
299 static void lmrc_free_mfi_cmds(lmrc_t *, const size_t);
300
301 static int
lmrc_ctrl_attach(dev_info_t * dip)302 lmrc_ctrl_attach(dev_info_t *dip)
303 {
304 char name[64]; /* large enough fo the taskq name */
305 lmrc_t *lmrc;
306 uint32_t instance;
307 int ret;
308 int i;
309
310 instance = ddi_get_instance(dip);
311 if (ddi_soft_state_zalloc(lmrc_state, instance) != DDI_SUCCESS) {
312 dev_err(dip, CE_WARN, "could not allocate soft state");
313 return (DDI_FAILURE);
314 }
315
316 lmrc = ddi_get_soft_state(lmrc_state, instance);
317 lmrc->l_dip = dip;
318
319 lmrc->l_ctrl_info = kmem_zalloc(sizeof (mfi_ctrl_info_t), KM_SLEEP);
320 INITLEVEL_SET(lmrc, LMRC_INITLEVEL_BASIC);
321
322 lmrc->l_class = lmrc_get_class(lmrc);
323
324 if (lmrc->l_class == LMRC_ACLASS_OTHER) {
325 dev_err(dip, CE_WARN, "unknown controller class");
326 goto fail;
327 }
328
329 lmrc->l_acc_attr = lmrc_acc_attr;
330 lmrc->l_dma_attr = lmrc_dma_attr;
331 lmrc->l_dma_attr_32 = lmrc_dma_attr;
332
333 lmrc_fm_init(lmrc);
334 INITLEVEL_SET(lmrc, LMRC_INITLEVEL_FM);
335
336 if (lmrc_regs_init(lmrc) != DDI_SUCCESS)
337 goto fail;
338 INITLEVEL_SET(lmrc, LMRC_INITLEVEL_REGS);
339
340 if (lmrc_adapter_init(lmrc) != DDI_SUCCESS)
341 goto fail;
342
343 lmrc->l_dma_attr_32.dma_attr_addr_hi = 0xFFFFFFFF;
344
345 /* Restrict all DMA to the lower 32bit address space if necessary. */
346 if (!lmrc->l_64bit_dma_support)
347 lmrc->l_dma_attr.dma_attr_addr_hi = 0xFFFFFFFF;
348
349 if (lmrc_intr_init(lmrc) != DDI_SUCCESS)
350 goto fail;
351 INITLEVEL_SET(lmrc, LMRC_INITLEVEL_INTR);
352
353 mutex_init(&lmrc->l_mpt_cmd_lock, NULL, MUTEX_DRIVER,
354 DDI_INTR_PRI(lmrc->l_intr_pri));
355 list_create(&lmrc->l_mpt_cmd_list, sizeof (lmrc_mpt_cmd_t),
356 offsetof(lmrc_mpt_cmd_t, mpt_node));
357
358 mutex_init(&lmrc->l_mfi_cmd_lock, NULL, MUTEX_DRIVER,
359 DDI_INTR_PRI(lmrc->l_intr_pri));
360 list_create(&lmrc->l_mfi_cmd_list, sizeof (lmrc_mfi_cmd_t),
361 offsetof(lmrc_mfi_cmd_t, mfi_node));
362
363 mutex_init(&lmrc->l_reg_lock, NULL, MUTEX_DRIVER,
364 DDI_INTR_PRI(lmrc->l_intr_pri));
365
366 rw_init(&lmrc->l_raidmap_lock, NULL, RW_DRIVER,
367 DDI_INTR_PRI(lmrc->l_intr_pri));
368 rw_init(&lmrc->l_pdmap_lock, NULL, RW_DRIVER,
369 DDI_INTR_PRI(lmrc->l_intr_pri));
370
371 sema_init(&lmrc->l_ioctl_sema, LMRC_MAX_IOCTL_CMDS, NULL, SEMA_DRIVER,
372 NULL);
373
374 mutex_init(&lmrc->l_thread_lock, NULL, MUTEX_DRIVER,
375 DDI_INTR_PRI(lmrc->l_intr_pri));
376 cv_init(&lmrc->l_thread_cv, NULL, CV_DRIVER, NULL);
377
378
379 for (i = 0; i < ARRAY_SIZE(lmrc->l_targets); i++) {
380 lmrc_tgt_t *tgt = &lmrc->l_targets[i];
381
382 rw_init(&tgt->tgt_lock, NULL, RW_DRIVER,
383 DDI_INTR_PRI(lmrc->l_intr_pri));
384 mutex_init(&tgt->tgt_mpt_active_lock, NULL, MUTEX_DRIVER,
385 DDI_INTR_PRI(lmrc->l_intr_pri));
386 list_create(&tgt->tgt_mpt_active, sizeof (lmrc_mpt_cmd_t),
387 offsetof(lmrc_mpt_cmd_t, mpt_node));
388 tgt->tgt_lmrc = lmrc;
389 tgt->tgt_dev_id = LMRC_DEVHDL_INVALID;
390 }
391
392 INITLEVEL_SET(lmrc, LMRC_INITLEVEL_SYNC);
393
394 if (lmrc_alloc_mpt_cmds(lmrc, lmrc->l_max_fw_cmds) != DDI_SUCCESS)
395 goto fail;
396 INITLEVEL_SET(lmrc, LMRC_INITLEVEL_MPTCMDS);
397
398 if (lmrc_alloc_mfi_cmds(lmrc, LMRC_MAX_MFI_CMDS) != DDI_SUCCESS)
399 goto fail;
400 INITLEVEL_SET(lmrc, LMRC_INITLEVEL_MFICMDS);
401
402 lmrc->l_thread = thread_create(NULL, 0, lmrc_thread, lmrc, 0, &p0,
403 TS_RUN, minclsyspri);
404 INITLEVEL_SET(lmrc, LMRC_INITLEVEL_THREAD);
405
406 if (lmrc_ioc_init(lmrc) != DDI_SUCCESS)
407 goto fail;
408
409 lmrc_enable_intr(lmrc);
410
411 if (lmrc_fw_init(lmrc) != DDI_SUCCESS)
412 goto fail;
413 INITLEVEL_SET(lmrc, LMRC_INITLEVEL_FW);
414
415 if (lmrc_hba_attach(lmrc) != DDI_SUCCESS)
416 goto fail;
417 INITLEVEL_SET(lmrc, LMRC_INITLEVEL_HBA);
418
419 (void) snprintf(lmrc->l_iocname, sizeof (lmrc->l_iocname),
420 "%d:lsirdctl", instance);
421 if (ddi_create_minor_node(dip, lmrc->l_iocname, S_IFCHR,
422 INST2LSIRDCTL(instance), DDI_PSEUDO, 0) != DDI_SUCCESS) {
423 dev_err(dip, CE_WARN, "failed to create ioctl node.");
424 goto fail;
425 }
426 INITLEVEL_SET(lmrc, LMRC_INITLEVEL_NODE);
427
428 (void) snprintf(name, sizeof (name), "%s%d_taskq",
429 ddi_driver_name(dip), ddi_get_instance(dip));
430
431 lmrc->l_taskq = taskq_create(name, lmrc->l_max_reply_queues,
432 minclsyspri, 64, INT_MAX, TASKQ_PREPOPULATE);
433 if (lmrc->l_taskq == NULL) {
434 dev_err(dip, CE_WARN, "failed to create taskq.");
435 goto fail;
436 }
437 INITLEVEL_SET(lmrc, LMRC_INITLEVEL_TASKQ);
438
439 if (lmrc_start_aen(lmrc) != DDI_SUCCESS) {
440 dev_err(dip, CE_WARN, "failed to initiate AEN.");
441 goto fail;
442 }
443 INITLEVEL_SET(lmrc, LMRC_INITLEVEL_AEN);
444
445 ddi_report_dev(dip);
446
447 if (lmrc_check_acc_handle(lmrc->l_reghandle) != DDI_SUCCESS) {
448 lmrc_fm_ereport(lmrc, DDI_FM_DEVICE_NO_RESPONSE);
449 ddi_fm_service_impact(lmrc->l_dip, DDI_SERVICE_LOST);
450 }
451
452 return (DDI_SUCCESS);
453
454 fail:
455 ret = lmrc_cleanup(lmrc, B_TRUE);
456 VERIFY3U(ret, ==, DDI_SUCCESS);
457
458 return (DDI_FAILURE);
459 }
460
461 static int
lmrc_ctrl_detach(dev_info_t * dip)462 lmrc_ctrl_detach(dev_info_t *dip)
463 {
464 lmrc_t *lmrc = ddi_get_soft_state(lmrc_state, ddi_get_instance(dip));
465 VERIFY(lmrc != NULL);
466
467 return (lmrc_cleanup(lmrc, B_FALSE));
468 }
469
470 static int
lmrc_cleanup(lmrc_t * lmrc,boolean_t failed)471 lmrc_cleanup(lmrc_t *lmrc, boolean_t failed)
472 {
473 int i, ret;
474
475 if (lmrc->l_raid_dip != NULL || lmrc->l_phys_dip != NULL)
476 return (DDI_FAILURE);
477
478 /*
479 * Before doing anything else, abort any outstanding commands.
480 * The first commands are issued during FW initialisation, so check
481 * that we're past this point.
482 */
483 if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_FW)) {
484 ret = lmrc_abort_outstanding_mfi(lmrc, LMRC_MAX_MFI_CMDS);
485 lmrc_disable_intr(lmrc);
486 if (ret != DDI_SUCCESS)
487 return (ret);
488 }
489
490 if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_AEN)) {
491 /* The AEN command was aborted above already. */
492 INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_AEN);
493 }
494
495 if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_TASKQ)) {
496 taskq_destroy(lmrc->l_taskq);
497 INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_TASKQ);
498 }
499
500 if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_NODE)) {
501 ddi_remove_minor_node(lmrc->l_dip, lmrc->l_iocname);
502 INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_NODE);
503 }
504
505 if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_HBA)) {
506 (void) lmrc_hba_detach(lmrc);
507 INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_HBA);
508 }
509
510
511 if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_FW)) {
512 lmrc_free_pdmap(lmrc);
513 lmrc_free_raidmap(lmrc);
514 INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_FW);
515 }
516
517 if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_THREAD)) {
518 mutex_enter(&lmrc->l_thread_lock);
519 lmrc->l_thread_stop = B_TRUE;
520 cv_signal(&lmrc->l_thread_cv);
521 mutex_exit(&lmrc->l_thread_lock);
522 thread_join(lmrc->l_thread->t_did);
523 INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_THREAD);
524 }
525
526 if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_MFICMDS)) {
527 lmrc_free_mfi_cmds(lmrc, LMRC_MAX_MFI_CMDS);
528 INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_MFICMDS);
529 }
530
531 if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_MPTCMDS)) {
532 lmrc_free_mpt_cmds(lmrc, lmrc->l_max_fw_cmds);
533 INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_MPTCMDS);
534 }
535
536 if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_SYNC)) {
537 for (i = 0; i < ARRAY_SIZE(lmrc->l_targets); i++) {
538 lmrc_tgt_t *tgt = &lmrc->l_targets[i];
539
540 list_destroy(&tgt->tgt_mpt_active);
541 mutex_destroy(&tgt->tgt_mpt_active_lock);
542 rw_destroy(&tgt->tgt_lock);
543 }
544
545 mutex_destroy(&lmrc->l_thread_lock);
546 cv_destroy(&lmrc->l_thread_cv);
547
548 sema_destroy(&lmrc->l_ioctl_sema);
549
550 mutex_destroy(&lmrc->l_mfi_cmd_lock);
551 list_destroy(&lmrc->l_mfi_cmd_list);
552
553 mutex_destroy(&lmrc->l_mpt_cmd_lock);
554 list_destroy(&lmrc->l_mpt_cmd_list);
555
556 rw_destroy(&lmrc->l_pdmap_lock);
557 rw_destroy(&lmrc->l_raidmap_lock);
558 mutex_destroy(&lmrc->l_reg_lock);
559 INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_SYNC);
560 }
561
562 if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_INTR)) {
563 lmrc_intr_fini(lmrc);
564 INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_INTR);
565 }
566
567 if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_REGS)) {
568 ddi_regs_map_free(&lmrc->l_reghandle);
569 lmrc->l_regmap = NULL;
570 INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_REGS);
571 }
572
573 if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_FM)) {
574 lmrc_fm_fini(lmrc);
575 INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_FM);
576 }
577
578 if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_BASIC)) {
579 kmem_free(lmrc->l_ctrl_info, sizeof (mfi_ctrl_info_t));
580 INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_BASIC);
581 }
582
583 VERIFY0(lmrc->l_init_level);
584 ddi_soft_state_free(lmrc_state, ddi_get_instance(lmrc->l_dip));
585
586 return (DDI_SUCCESS);
587 }
588
589 static int
lmrc_regs_init(lmrc_t * lmrc)590 lmrc_regs_init(lmrc_t *lmrc)
591 {
592 uint_t regno;
593 off_t regsize;
594
595 switch (lmrc->l_class) {
596 case LMRC_ACLASS_VENTURA:
597 case LMRC_ACLASS_AERO:
598 regno = 1;
599 break;
600 default:
601 regno = 2;
602 break;
603 }
604
605 if (ddi_dev_regsize(lmrc->l_dip, regno, ®size) != DDI_SUCCESS)
606 return (DDI_FAILURE);
607
608 if (regsize < LMRC_MFI_MIN_MEM) {
609 dev_err(lmrc->l_dip, CE_WARN, "reg %d size (%ld) is too small",
610 regno, regsize);
611 return (DDI_FAILURE);
612 }
613
614 if (ddi_regs_map_setup(lmrc->l_dip, regno, &lmrc->l_regmap, 0, 0,
615 &lmrc->l_acc_attr, &lmrc->l_reghandle)
616 != DDI_SUCCESS) {
617 dev_err(lmrc->l_dip, CE_WARN,
618 "unable to map control registers");
619 return (DDI_FAILURE);
620 }
621
622 return (DDI_SUCCESS);
623 }
624
625 static uint_t
lmrc_isr(caddr_t arg1,caddr_t arg2)626 lmrc_isr(caddr_t arg1, caddr_t arg2)
627 {
628 lmrc_t *lmrc = (lmrc_t *)arg1;
629 int queue = (int)(uintptr_t)arg2;
630 uint_t ret = DDI_INTR_UNCLAIMED;
631
632 if (lmrc->l_intr_type == DDI_INTR_TYPE_FIXED) {
633 ret = lmrc_intr_ack(lmrc);
634 if (ret != DDI_INTR_CLAIMED)
635 return (ret);
636 }
637
638 ret = lmrc_process_replies(lmrc, queue);
639 return (ret);
640 }
641
642 static int
lmrc_add_intrs(lmrc_t * lmrc,int intr_type)643 lmrc_add_intrs(lmrc_t *lmrc, int intr_type)
644 {
645 int navail, nintrs, count;
646 int ret;
647 int i;
648
649 if (lmrc->l_intr_types == 0) {
650 ret = ddi_intr_get_supported_types(lmrc->l_dip,
651 &lmrc->l_intr_types);
652 if (ret != DDI_SUCCESS) {
653 dev_err(lmrc->l_dip, CE_WARN,
654 "!%s: ddi_intr_get_supported_types failed",
655 __func__);
656 return (ret);
657 }
658 }
659
660 if ((lmrc->l_intr_types & intr_type) == 0)
661 return (DDI_FAILURE);
662
663 /* Don't use MSI-X if the firmware doesn't support it. */
664 if (intr_type == DDI_INTR_TYPE_MSIX && !lmrc->l_fw_msix_enabled)
665 return (DDI_FAILURE);
666
667 ret = ddi_intr_get_nintrs(lmrc->l_dip, intr_type, &nintrs);
668 if (ret != DDI_SUCCESS) {
669 dev_err(lmrc->l_dip, CE_WARN,
670 "!%s: ddi_intr_get_nintrs failed", __func__);
671 return (ret);
672 }
673
674 ret = ddi_intr_get_navail(lmrc->l_dip, intr_type, &navail);
675 if (ret != DDI_SUCCESS) {
676 dev_err(lmrc->l_dip, CE_WARN,
677 "!%s: ddi_intr_get_navail failed", __func__);
678 return (ret);
679 }
680
681 /*
682 * There's no point in having more interrupts than queues supported by
683 * the hardware.
684 */
685 if (navail > lmrc->l_max_reply_queues)
686 navail = lmrc->l_max_reply_queues;
687
688 lmrc->l_intr_htable_size = navail * sizeof (ddi_intr_handle_t);
689 lmrc->l_intr_htable = kmem_zalloc(lmrc->l_intr_htable_size, KM_SLEEP);
690
691 ret = ddi_intr_alloc(lmrc->l_dip, lmrc->l_intr_htable, intr_type, 0,
692 navail, &count, DDI_INTR_ALLOC_NORMAL);
693 if (ret != DDI_SUCCESS) {
694 dev_err(lmrc->l_dip, CE_WARN, "!%s: ddi_intr_alloc failed",
695 __func__);
696 goto fail;
697 }
698
699 if (count < navail) {
700 dev_err(lmrc->l_dip, CE_CONT,
701 "?requested %d interrupts, received %d\n", navail, count);
702 }
703
704 lmrc->l_intr_count = count;
705
706 ret = ddi_intr_get_pri(lmrc->l_intr_htable[0], &lmrc->l_intr_pri);
707 if (ret != DDI_SUCCESS) {
708 dev_err(lmrc->l_dip, CE_WARN, "!%s: ddi_intr_get_pri failed",
709 __func__);
710 goto fail;
711 }
712
713 if (lmrc->l_intr_pri >= ddi_intr_get_hilevel_pri()) {
714 dev_err(lmrc->l_dip, CE_WARN,
715 "high level interrupts not supported");
716 goto fail;
717 }
718
719 for (i = 0; i < lmrc->l_intr_count; i++) {
720 ret = ddi_intr_add_handler(lmrc->l_intr_htable[i], lmrc_isr,
721 (caddr_t)lmrc, (caddr_t)(uintptr_t)i);
722 if (ret != DDI_SUCCESS) {
723 dev_err(lmrc->l_dip, CE_WARN,
724 "!%s: ddi_intr_add_handler failed", __func__);
725 goto fail;
726 }
727 }
728
729 ret = ddi_intr_get_cap(lmrc->l_intr_htable[0], &lmrc->l_intr_cap);
730 if (ret != DDI_SUCCESS) {
731 dev_err(lmrc->l_dip, CE_WARN,
732 "!%s: ddi_intr_get_cap failed", __func__);
733 goto fail;
734 }
735
736 if ((lmrc->l_intr_cap & DDI_INTR_FLAG_BLOCK) != 0) {
737 ret = ddi_intr_block_enable(lmrc->l_intr_htable, count);
738 if (ret != DDI_SUCCESS) {
739 dev_err(lmrc->l_dip, CE_WARN,
740 "!%s: ddi_intr_block_enable failed", __func__);
741 goto fail;
742 }
743 } else {
744 for (i = 0; i < lmrc->l_intr_count; i++) {
745 ret = ddi_intr_enable(lmrc->l_intr_htable[i]);
746 if (ret != DDI_SUCCESS) {
747 dev_err(lmrc->l_dip, CE_WARN,
748 "!%s: ddi_entr_enable failed", __func__);
749 goto fail;
750 }
751 }
752 }
753
754 lmrc->l_intr_type = intr_type;
755 return (DDI_SUCCESS);
756
757 fail:
758 lmrc_intr_fini(lmrc);
759 return (ret);
760 }
761
762 static int
lmrc_intr_init(lmrc_t * lmrc)763 lmrc_intr_init(lmrc_t *lmrc)
764 {
765 int ret;
766
767 lmrc_disable_intr(lmrc);
768
769 if ((lmrc_add_intrs(lmrc, DDI_INTR_TYPE_MSIX) != DDI_SUCCESS) &&
770 (lmrc_add_intrs(lmrc, DDI_INTR_TYPE_MSI) != DDI_SUCCESS) &&
771 (lmrc_add_intrs(lmrc, DDI_INTR_TYPE_FIXED) != DDI_SUCCESS)) {
772 dev_err(lmrc->l_dip, CE_WARN, "failed to set up interrupts");
773 return (DDI_FAILURE);
774 }
775
776 dev_err(lmrc->l_dip, CE_NOTE, "!got %d %s interrupts",
777 lmrc->l_intr_count,
778 lmrc->l_intr_type == DDI_INTR_TYPE_MSIX ? "MSI-X" :
779 lmrc->l_intr_type == DDI_INTR_TYPE_MSI ? "MSI" : "FIXED");
780
781 /* Don't use more queues than we got interrupts for. */
782 if (lmrc->l_max_reply_queues > lmrc->l_intr_count)
783 lmrc->l_max_reply_queues = lmrc->l_intr_count;
784
785 lmrc->l_last_reply_idx =
786 kmem_zalloc(sizeof (uint16_t) * lmrc->l_max_reply_queues, KM_SLEEP);
787
788 /*
789 * While here, allocate the reply descriptor DMA memory and the array
790 * keeping the last reply index for each queue. Each queue will have
791 * space for reply_q_depth MPI2 descriptors (reply_alloc_sz).
792 */
793 ret = lmrc_dma_alloc(lmrc, lmrc->l_dma_attr, &lmrc->l_reply_dma,
794 lmrc->l_reply_alloc_sz * lmrc->l_max_reply_queues, 16,
795 DDI_DMA_CONSISTENT);
796 if (ret != DDI_SUCCESS) {
797 lmrc_intr_fini(lmrc);
798 return (ret);
799 }
800 memset(lmrc->l_reply_dma.ld_buf, -1, lmrc->l_reply_dma.ld_len);
801
802 return (DDI_SUCCESS);
803 }
804
805 static void
lmrc_intr_fini(lmrc_t * lmrc)806 lmrc_intr_fini(lmrc_t *lmrc)
807 {
808 uint_t i;
809
810 if (lmrc->l_intr_htable == NULL || lmrc->l_intr_htable[0] == NULL)
811 return;
812
813 if ((lmrc->l_intr_cap & DDI_INTR_FLAG_BLOCK) != 0) {
814 (void) ddi_intr_block_disable(lmrc->l_intr_htable,
815 lmrc->l_intr_count);
816 }
817
818 for (i = 0; i < lmrc->l_intr_count; i++) {
819 if (lmrc->l_intr_htable[i] == NULL)
820 break;
821
822 if ((lmrc->l_intr_cap & DDI_INTR_FLAG_BLOCK) == 0)
823 (void) ddi_intr_disable(lmrc->l_intr_htable[i]);
824 (void) ddi_intr_remove_handler(lmrc->l_intr_htable[i]);
825 (void) ddi_intr_free(lmrc->l_intr_htable[i]);
826 }
827
828 if (lmrc->l_intr_htable != NULL)
829 kmem_free(lmrc->l_intr_htable, lmrc->l_intr_htable_size);
830
831 lmrc->l_intr_htable = NULL;
832 lmrc->l_intr_htable_size = 0;
833
834 if (lmrc->l_last_reply_idx != NULL)
835 kmem_free(lmrc->l_last_reply_idx,
836 sizeof (uint16_t) * lmrc->l_max_reply_queues);
837
838 lmrc_dma_free(&lmrc->l_reply_dma);
839 }
840
841 static int
lmrc_fm_error_cb(dev_info_t * dip,ddi_fm_error_t * err_status,const void * arg)842 lmrc_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err_status,
843 const void *arg)
844 {
845 pci_ereport_post(dip, err_status, NULL);
846 return (err_status->fme_status);
847 }
848
849 static void
lmrc_fm_init(lmrc_t * lmrc)850 lmrc_fm_init(lmrc_t *lmrc)
851 {
852 ddi_iblock_cookie_t fm_ibc;
853
854 lmrc->l_fm_capabilities = ddi_prop_get_int(DDI_DEV_T_ANY,
855 lmrc->l_dip, DDI_PROP_DONTPASS, "fm-capable",
856 DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE |
857 DDI_FM_DMACHK_CAPABLE | DDI_FM_ERRCB_CAPABLE);
858
859 if (lmrc->l_fm_capabilities == 0)
860 return;
861
862 lmrc->l_dma_attr.dma_attr_flags = DDI_DMA_FLAGERR;
863 lmrc->l_dma_attr_32.dma_attr_flags = DDI_DMA_FLAGERR;
864 lmrc->l_acc_attr.devacc_attr_access = DDI_FLAGERR_ACC;
865
866 ddi_fm_init(lmrc->l_dip, &lmrc->l_fm_capabilities, &fm_ibc);
867
868 if (DDI_FM_EREPORT_CAP(lmrc->l_fm_capabilities) ||
869 DDI_FM_ERRCB_CAP(lmrc->l_fm_capabilities)) {
870 pci_ereport_setup(lmrc->l_dip);
871 }
872
873 if (DDI_FM_ERRCB_CAP(lmrc->l_fm_capabilities)) {
874 ddi_fm_handler_register(lmrc->l_dip, lmrc_fm_error_cb,
875 lmrc);
876 }
877 }
878
879 static void
lmrc_fm_fini(lmrc_t * lmrc)880 lmrc_fm_fini(lmrc_t *lmrc)
881 {
882 if (lmrc->l_fm_capabilities == 0)
883 return;
884
885 if (DDI_FM_ERRCB_CAP(lmrc->l_fm_capabilities))
886 ddi_fm_handler_unregister(lmrc->l_dip);
887
888 if (DDI_FM_EREPORT_CAP(lmrc->l_fm_capabilities) ||
889 DDI_FM_ERRCB_CAP(lmrc->l_fm_capabilities)) {
890 pci_ereport_teardown(lmrc->l_dip);
891 }
892
893 ddi_fm_fini(lmrc->l_dip);
894 }
895
896 void
lmrc_fm_ereport(lmrc_t * lmrc,const char * detail)897 lmrc_fm_ereport(lmrc_t *lmrc, const char *detail)
898 {
899 uint64_t ena;
900 char buf[FM_MAX_CLASS];
901
902 (void) snprintf(buf, sizeof (buf), "%s.%s", DDI_FM_DEVICE, detail);
903 ena = fm_ena_generate(0, FM_ENA_FMT1);
904 if (DDI_FM_EREPORT_CAP(lmrc->l_fm_capabilities)) {
905 ddi_fm_ereport_post(lmrc->l_dip, buf, ena, DDI_NOSLEEP,
906 FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERSION, NULL);
907 }
908 }
909
910 int
lmrc_check_acc_handle(ddi_acc_handle_t h)911 lmrc_check_acc_handle(ddi_acc_handle_t h)
912 {
913 ddi_fm_error_t de;
914
915 if (h == NULL)
916 return (DDI_FAILURE);
917
918 ddi_fm_acc_err_get(h, &de, DDI_FME_VERSION);
919 return (de.fme_status);
920 }
921
922 int
lmrc_check_dma_handle(ddi_dma_handle_t h)923 lmrc_check_dma_handle(ddi_dma_handle_t h)
924 {
925 ddi_fm_error_t de;
926
927 if (h == NULL)
928 return (DDI_FAILURE);
929
930 ddi_fm_dma_err_get(h, &de, DDI_FME_VERSION);
931 return (de.fme_status);
932 }
933
934 static int
lmrc_alloc_mpt_cmds(lmrc_t * lmrc,const size_t ncmd)935 lmrc_alloc_mpt_cmds(lmrc_t *lmrc, const size_t ncmd)
936 {
937 lmrc_mpt_cmd_t **cmds;
938 lmrc_mpt_cmd_t *cmd;
939 uint32_t i;
940 int ret;
941
942 /*
943 * The hardware expects to find MPI I/O request frames in a big chunk
944 * of DMA memory, indexed by the MPT cmd SMID.
945 */
946 ret = lmrc_dma_alloc(lmrc, lmrc->l_dma_attr, &lmrc->l_ioreq_dma,
947 lmrc->l_io_frames_alloc_sz, 256, DDI_DMA_CONSISTENT);
948 if (ret != DDI_SUCCESS)
949 return (ret);
950
951 lmrc->l_mpt_cmds = cmds =
952 kmem_zalloc(ncmd * sizeof (lmrc_mpt_cmd_t *), KM_SLEEP);
953 for (i = 0; i < ncmd; i++) {
954 cmd = kmem_zalloc(sizeof (lmrc_mpt_cmd_t), KM_SLEEP);
955
956 /* XXX: allocate on demand in tran_start / build_sgl? */
957 ret = lmrc_dma_alloc(lmrc, lmrc->l_dma_attr,
958 &cmd->mpt_chain_dma, lmrc->l_max_chain_frame_sz, 4,
959 DDI_DMA_CONSISTENT);
960 if (ret != DDI_SUCCESS)
961 goto fail;
962
963 cmd->mpt_chain = cmd->mpt_chain_dma.ld_buf;
964
965 /*
966 * We request a few bytes more for sense so that we can fit our
967 * arq struct before the actual sense data. We must make sure to
968 * put sts_sensedata at a 64 byte aligned address.
969 */
970 ret = lmrc_dma_alloc(lmrc, lmrc->l_dma_attr_32,
971 &cmd->mpt_sense_dma, LMRC_SENSE_LEN + P2ROUNDUP(
972 offsetof(struct scsi_arq_status, sts_sensedata), 64), 64,
973 DDI_DMA_CONSISTENT);
974 if (ret != DDI_SUCCESS)
975 goto fail;
976
977 /*
978 * Now that we have a sufficiently sized and 64 byte aligned DMA
979 * buffer for sense, calculate mpt_sense so that it points at a
980 * struct scsi_arq_status somewhere within the first 64 bytes in
981 * the DMA buffer, making sure its sts_sensedata is aligned at
982 * 64 bytes as well.
983 */
984 cmd->mpt_sense = cmd->mpt_sense_dma.ld_buf + 64 -
985 offsetof(struct scsi_arq_status, sts_sensedata);
986 VERIFY(IS_P2ALIGNED(&(((struct scsi_arq_status *)cmd->mpt_sense)
987 ->sts_sensedata), 64));
988
989 cmd->mpt_smid = i + 1;
990
991 /*
992 * Calculate address of this commands I/O frame within the DMA
993 * memory allocated earlier.
994 */
995 cmd->mpt_io_frame =
996 LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE * cmd->mpt_smid +
997 lmrc->l_ioreq_dma.ld_buf;
998
999 cmd->mpt_lmrc = lmrc;
1000
1001 mutex_init(&cmd->mpt_lock, NULL, MUTEX_DRIVER,
1002 DDI_INTR_PRI(lmrc->l_intr_pri));
1003
1004 cmds[i] = cmd;
1005 list_insert_tail(&lmrc->l_mpt_cmd_list, cmd);
1006 }
1007
1008 return (DDI_SUCCESS);
1009
1010 fail:
1011 if (cmd->mpt_chain != NULL)
1012 lmrc_dma_free(&cmd->mpt_chain_dma);
1013 kmem_free(cmd, sizeof (lmrc_mpt_cmd_t));
1014
1015 lmrc_free_mpt_cmds(lmrc, ncmd);
1016
1017 return (ret);
1018 }
1019
1020 static void
lmrc_free_mpt_cmds(lmrc_t * lmrc,const size_t ncmd)1021 lmrc_free_mpt_cmds(lmrc_t *lmrc, const size_t ncmd)
1022 {
1023 lmrc_mpt_cmd_t *cmd;
1024 size_t count = 0;
1025
1026 for (cmd = list_remove_head(&lmrc->l_mpt_cmd_list);
1027 cmd != NULL;
1028 cmd = list_remove_head(&lmrc->l_mpt_cmd_list)) {
1029 lmrc_dma_free(&cmd->mpt_chain_dma);
1030 lmrc_dma_free(&cmd->mpt_sense_dma);
1031 mutex_destroy(&cmd->mpt_lock);
1032 kmem_free(cmd, sizeof (lmrc_mpt_cmd_t));
1033 count++;
1034 }
1035 VERIFY3U(count, ==, ncmd);
1036 VERIFY(list_is_empty(&lmrc->l_mpt_cmd_list));
1037
1038 kmem_free(lmrc->l_mpt_cmds, ncmd * sizeof (lmrc_mpt_cmd_t *));
1039
1040 lmrc_dma_free(&lmrc->l_ioreq_dma);
1041 }
1042
1043 static int
lmrc_alloc_mfi_cmds(lmrc_t * lmrc,const size_t ncmd)1044 lmrc_alloc_mfi_cmds(lmrc_t *lmrc, const size_t ncmd)
1045 {
1046 int ret = DDI_SUCCESS;
1047 lmrc_mfi_cmd_t **cmds;
1048 lmrc_mfi_cmd_t *mfi;
1049 uint32_t i;
1050
1051 lmrc->l_mfi_cmds = cmds =
1052 kmem_zalloc(ncmd * sizeof (lmrc_mfi_cmd_t *), KM_SLEEP);
1053 for (i = 0; i < ncmd; i++) {
1054 mfi = kmem_zalloc(sizeof (lmrc_mfi_cmd_t), KM_SLEEP);
1055 ret = lmrc_dma_alloc(lmrc, lmrc->l_dma_attr,
1056 &mfi->mfi_frame_dma, sizeof (mfi_frame_t), 256,
1057 DDI_DMA_CONSISTENT);
1058 if (ret != DDI_SUCCESS)
1059 goto fail;
1060
1061 mfi->mfi_lmrc = lmrc;
1062 mfi->mfi_frame = mfi->mfi_frame_dma.ld_buf;
1063 mfi->mfi_idx = i;
1064
1065 if (lmrc_build_mptmfi_passthru(lmrc, mfi) != DDI_SUCCESS) {
1066 lmrc_dma_free(&mfi->mfi_frame_dma);
1067 goto fail;
1068 }
1069
1070 mutex_init(&mfi->mfi_lock, NULL, MUTEX_DRIVER,
1071 DDI_INTR_PRI(lmrc->l_intr_pri));
1072
1073 cmds[i] = mfi;
1074 list_insert_tail(&lmrc->l_mfi_cmd_list, mfi);
1075 }
1076
1077 return (DDI_SUCCESS);
1078
1079 fail:
1080 kmem_free(mfi, sizeof (lmrc_mfi_cmd_t));
1081 lmrc_free_mfi_cmds(lmrc, ncmd);
1082
1083 return (ret);
1084 }
1085
1086 static void
lmrc_free_mfi_cmds(lmrc_t * lmrc,const size_t ncmd)1087 lmrc_free_mfi_cmds(lmrc_t *lmrc, const size_t ncmd)
1088 {
1089 lmrc_mfi_cmd_t *mfi;
1090 size_t count = 0;
1091
1092 for (mfi = list_remove_head(&lmrc->l_mfi_cmd_list);
1093 mfi != NULL;
1094 mfi = list_remove_head(&lmrc->l_mfi_cmd_list)) {
1095 ASSERT(lmrc->l_mfi_cmds[mfi->mfi_idx] == mfi);
1096 lmrc->l_mfi_cmds[mfi->mfi_idx] = NULL;
1097
1098 /*
1099 * lmrc_put_mpt() requires the command to be locked, unlocking
1100 * after it has been put back on the free list.
1101 */
1102 mutex_enter(&mfi->mfi_mpt->mpt_lock);
1103 lmrc_put_mpt(mfi->mfi_mpt);
1104
1105 lmrc_dma_free(&mfi->mfi_frame_dma);
1106 mutex_destroy(&mfi->mfi_lock);
1107 kmem_free(mfi, sizeof (lmrc_mfi_cmd_t));
1108 count++;
1109 }
1110 VERIFY3U(count, ==, ncmd);
1111 VERIFY(list_is_empty(&lmrc->l_mfi_cmd_list));
1112
1113 kmem_free(lmrc->l_mfi_cmds, ncmd * sizeof (lmrc_mfi_cmd_t *));
1114 }
1115
1116
1117 void
lmrc_dma_build_sgl(lmrc_t * lmrc,lmrc_mpt_cmd_t * mpt,const ddi_dma_cookie_t * cookie,uint_t ncookies)1118 lmrc_dma_build_sgl(lmrc_t *lmrc, lmrc_mpt_cmd_t *mpt,
1119 const ddi_dma_cookie_t *cookie, uint_t ncookies)
1120 {
1121 Mpi25SCSIIORequest_t *io_req = mpt->mpt_io_frame;
1122 Mpi25IeeeSgeChain64_t *sgl_ptr = &io_req->SGL.IeeeChain;
1123 uint_t nsge, max_sge;
1124 uint_t i;
1125
1126 ASSERT(ncookies > 0);
1127
1128 /* Start with the 8 SGEs in the I/O frame. */
1129 max_sge = lmrc->l_max_sge_in_main_msg;
1130
1131 for (;;) {
1132 nsge = min(ncookies, max_sge);
1133
1134 for (i = 0; i < nsge; i++, cookie++) {
1135 *(uint64_t *)&sgl_ptr[i].Address =
1136 cookie->dmac_laddress;
1137 sgl_ptr[i].Length = cookie->dmac_size;
1138 sgl_ptr[i].Flags = 0;
1139 }
1140
1141 ncookies -= nsge;
1142
1143 if (ncookies == 0)
1144 break;
1145
1146 /*
1147 * There's more. Roll back to the last cookie processed,
1148 * setup SGE chain and repeat.
1149 */
1150 cookie--;
1151 ncookies++;
1152
1153 if ((io_req->IoFlags &
1154 MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH) == 0)
1155 /* XXX: Why? And why only if not fast path? */
1156 io_req->ChainOffset = lmrc->l_chain_offset_io_request;
1157 else
1158 io_req->ChainOffset = 0;
1159
1160 sgl_ptr[i - 1].Flags = MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT;
1161 sgl_ptr[i - 1].Length = sizeof (Mpi25SGEIOUnion_t) * ncookies;
1162 lmrc_dma_set_addr64(&mpt->mpt_chain_dma,
1163 (uint64_t *)&sgl_ptr[i - 1].Address);
1164 sgl_ptr = mpt->mpt_chain;
1165
1166 nsge = ncookies;
1167 max_sge = lmrc->l_max_sge_in_chain;
1168
1169 VERIFY3U(nsge, <=, max_sge);
1170 }
1171
1172 sgl_ptr[i - 1].Flags = MPI25_IEEE_SGE_FLAGS_END_OF_LIST;
1173
1174 (void) ddi_dma_sync(mpt->mpt_chain_dma.ld_hdl, 0,
1175 mpt->mpt_chain_dma.ld_len, DDI_DMA_SYNC_FORDEV);
1176 }
1177
1178 size_t
lmrc_dma_get_size(lmrc_dma_t * dmap)1179 lmrc_dma_get_size(lmrc_dma_t *dmap)
1180 {
1181 const ddi_dma_cookie_t *cookie = ddi_dma_cookie_one(dmap->ld_hdl);
1182
1183 return (cookie->dmac_size);
1184 }
1185
1186 void
lmrc_dma_set_addr64(lmrc_dma_t * dmap,uint64_t * addr)1187 lmrc_dma_set_addr64(lmrc_dma_t *dmap, uint64_t *addr)
1188 {
1189 const ddi_dma_cookie_t *cookie = ddi_dma_cookie_one(dmap->ld_hdl);
1190
1191 *addr = cookie->dmac_laddress;
1192 }
1193
1194 void
lmrc_dma_set_addr32(lmrc_dma_t * dmap,uint32_t * addr)1195 lmrc_dma_set_addr32(lmrc_dma_t *dmap, uint32_t *addr)
1196 {
1197 const ddi_dma_cookie_t *cookie = ddi_dma_cookie_one(dmap->ld_hdl);
1198
1199 *addr = cookie->dmac_address;
1200 }
1201
1202 int
lmrc_dma_alloc(lmrc_t * lmrc,ddi_dma_attr_t attr,lmrc_dma_t * dmap,size_t len,uint64_t align,uint_t flags)1203 lmrc_dma_alloc(lmrc_t *lmrc, ddi_dma_attr_t attr, lmrc_dma_t *dmap, size_t len,
1204 uint64_t align, uint_t flags)
1205 {
1206 int ret;
1207
1208 VERIFY3U(len, >, 0);
1209 VERIFY3U(align, >=, 1);
1210
1211 bzero(dmap, sizeof (*dmap));
1212
1213 attr.dma_attr_align = align;
1214 attr.dma_attr_sgllen = 1;
1215 attr.dma_attr_granular = 1;
1216
1217
1218 ret = ddi_dma_alloc_handle(lmrc->l_dip, &attr, DDI_DMA_SLEEP, NULL,
1219 &dmap->ld_hdl);
1220 if (ret != DDI_SUCCESS) {
1221 /*
1222 * Due to DDI_DMA_SLEEP this can't be DDI_DMA_NORESOURCES, and
1223 * the only other possible error is DDI_DMA_BADATTR which
1224 * indicates a driver bug which should cause a panic.
1225 */
1226 dev_err(lmrc->l_dip, CE_PANIC,
1227 "failed to allocate DMA handle, check DMA attributes");
1228 return (ret);
1229 }
1230
1231 ret = ddi_dma_mem_alloc(dmap->ld_hdl, len, &lmrc->l_acc_attr,
1232 flags, DDI_DMA_SLEEP, NULL, (caddr_t *)&dmap->ld_buf,
1233 &dmap->ld_len, &dmap->ld_acc);
1234 if (ret != DDI_SUCCESS) {
1235 /*
1236 * When DDI_DMA_NOSLEEP is specified, ddi_dma_mem_alloc() can
1237 * only fail if the flags are conflicting, which indicates a
1238 * driver bug and should cause a panic.
1239 */
1240 dev_err(lmrc->l_dip, CE_PANIC,
1241 "failed to allocate DMA memory, check DMA flags (%x)",
1242 flags);
1243 return (ret);
1244 }
1245
1246 ret = ddi_dma_addr_bind_handle(dmap->ld_hdl, NULL, dmap->ld_buf,
1247 dmap->ld_len, DDI_DMA_RDWR | flags, DDI_DMA_SLEEP, NULL, NULL,
1248 NULL);
1249 if (ret != DDI_DMA_MAPPED) {
1250 ddi_dma_mem_free(&dmap->ld_acc);
1251 ddi_dma_free_handle(&dmap->ld_hdl);
1252 return (ret);
1253 }
1254
1255 bzero(dmap->ld_buf, dmap->ld_len);
1256 return (DDI_SUCCESS);
1257 }
1258
1259 void
lmrc_dma_free(lmrc_dma_t * dmap)1260 lmrc_dma_free(lmrc_dma_t *dmap)
1261 {
1262 if (dmap->ld_hdl != NULL)
1263 (void) ddi_dma_unbind_handle(dmap->ld_hdl);
1264 if (dmap->ld_acc != NULL)
1265 ddi_dma_mem_free(&dmap->ld_acc);
1266 if (dmap->ld_hdl != NULL)
1267 ddi_dma_free_handle(&dmap->ld_hdl);
1268 bzero(dmap, sizeof (lmrc_dma_t));
1269 }
1270
1271 static lmrc_adapter_class_t
lmrc_get_class(lmrc_t * lmrc)1272 lmrc_get_class(lmrc_t *lmrc)
1273 {
1274 int device_id = ddi_prop_get_int(DDI_DEV_T_ANY, lmrc->l_dip,
1275 DDI_PROP_DONTPASS, "device-id", 0);
1276
1277 switch (device_id) {
1278 case LMRC_VENTURA:
1279 case LMRC_CRUSADER:
1280 case LMRC_HARPOON:
1281 case LMRC_TOMCAT:
1282 case LMRC_VENTURA_4PORT:
1283 case LMRC_CRUSADER_4PORT:
1284 return (LMRC_ACLASS_VENTURA);
1285
1286 case LMRC_AERO_10E1:
1287 case LMRC_AERO_10E5:
1288 dev_err(lmrc->l_dip, CE_CONT,
1289 "?Adapter is in configurable secure mode\n");
1290 /*FALLTHRU*/
1291 case LMRC_AERO_10E2:
1292 case LMRC_AERO_10E6:
1293 return (LMRC_ACLASS_AERO);
1294
1295 case LMRC_AERO_10E0:
1296 case LMRC_AERO_10E3:
1297 case LMRC_AERO_10E4:
1298 case LMRC_AERO_10E7:
1299 dev_err(lmrc->l_dip, CE_CONT,
1300 "?Adapter is in non-secure mode\n");
1301 }
1302
1303 return (LMRC_ACLASS_OTHER);
1304 }
1305
1306 static int
lmrc_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)1307 lmrc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1308 {
1309 const char *addr = scsi_hba_iport_unit_address(dip);
1310
1311 if (cmd != DDI_ATTACH)
1312 return (DDI_FAILURE);
1313
1314 if (addr == NULL)
1315 return (lmrc_ctrl_attach(dip));
1316
1317 if (strcmp(addr, LMRC_IPORT_RAID) == 0)
1318 return (lmrc_raid_attach(dip));
1319
1320 if (strcmp(addr, LMRC_IPORT_PHYS) == 0)
1321 return (lmrc_phys_attach(dip));
1322
1323 return (DDI_FAILURE);
1324 }
1325
1326 static int
lmrc_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)1327 lmrc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1328 {
1329 const char *addr = scsi_hba_iport_unit_address(dip);
1330
1331 if (cmd != DDI_DETACH)
1332 return (DDI_FAILURE);
1333
1334 if (addr == NULL)
1335 return (lmrc_ctrl_detach(dip));
1336
1337 if (strcmp(addr, LMRC_IPORT_RAID) == 0)
1338 return (lmrc_raid_detach(dip));
1339
1340 if (strcmp(addr, LMRC_IPORT_PHYS) == 0)
1341 return (lmrc_phys_detach(dip));
1342
1343 return (DDI_FAILURE);
1344 }
1345
1346 static int
lmrc_quiesce(dev_info_t * dip)1347 lmrc_quiesce(dev_info_t *dip)
1348 {
1349 lmrc_t *lmrc = ddi_get_soft_state(lmrc_state, ddi_get_instance(dip));
1350
1351 if (lmrc == NULL)
1352 return (DDI_SUCCESS);
1353
1354 return (lmrc_ctrl_shutdown(lmrc));
1355 }
1356
1357 static struct cb_ops lmrc_cb_ops = {
1358 .cb_rev = CB_REV,
1359 .cb_flag = D_NEW | D_MP,
1360
1361 .cb_open = scsi_hba_open,
1362 .cb_close = scsi_hba_close,
1363
1364 .cb_ioctl = lmrc_ioctl,
1365
1366 .cb_strategy = nodev,
1367 .cb_print = nodev,
1368 .cb_dump = nodev,
1369 .cb_read = nodev,
1370 .cb_write = nodev,
1371 .cb_devmap = nodev,
1372 .cb_mmap = nodev,
1373 .cb_segmap = nodev,
1374 .cb_chpoll = nochpoll,
1375 .cb_prop_op = ddi_prop_op,
1376 .cb_str = NULL,
1377 .cb_aread = nodev,
1378 .cb_awrite = nodev,
1379 };
1380
1381 static struct dev_ops lmrc_dev_ops = {
1382 .devo_rev = DEVO_REV,
1383 .devo_refcnt = 0,
1384
1385 .devo_attach = lmrc_attach,
1386 .devo_detach = lmrc_detach,
1387
1388 .devo_cb_ops = &lmrc_cb_ops,
1389
1390 .devo_getinfo = ddi_no_info,
1391 .devo_identify = nulldev,
1392 .devo_probe = nulldev,
1393 .devo_reset = nodev,
1394 .devo_bus_ops = NULL,
1395 .devo_power = nodev,
1396 .devo_quiesce = lmrc_quiesce,
1397 };
1398
1399 static struct modldrv lmrc_modldrv = {
1400 .drv_modops = &mod_driverops,
1401 .drv_linkinfo = "Broadcom MegaRAID 12G SAS RAID",
1402 .drv_dev_ops = &lmrc_dev_ops,
1403 };
1404
1405 static struct modlinkage lmrc_modlinkage = {
1406 .ml_rev = MODREV_1,
1407 .ml_linkage = { &lmrc_modldrv, NULL },
1408 };
1409
1410 int
_init(void)1411 _init(void)
1412 {
1413 int ret;
1414
1415 ret = ddi_soft_state_init(&lmrc_state, sizeof (lmrc_t), 1);
1416 if (ret != DDI_SUCCESS)
1417 return (ret);
1418
1419 ret = scsi_hba_init(&lmrc_modlinkage);
1420 if (ret != 0) {
1421 ddi_soft_state_fini(&lmrc_state);
1422 return (ret);
1423 }
1424
1425 ret = mod_install(&lmrc_modlinkage);
1426 if (ret != DDI_SUCCESS) {
1427 scsi_hba_fini(&lmrc_modlinkage);
1428 ddi_soft_state_fini(&lmrc_state);
1429 return (ret);
1430 }
1431
1432 return (DDI_SUCCESS);
1433 }
1434
1435 int
_fini(void)1436 _fini(void)
1437 {
1438 int ret;
1439
1440 ret = mod_remove(&lmrc_modlinkage);
1441 if (ret == DDI_SUCCESS) {
1442 scsi_hba_fini(&lmrc_modlinkage);
1443 ddi_soft_state_fini(&lmrc_state);
1444 }
1445
1446 return (ret);
1447 }
1448
1449 int
_info(struct modinfo * modinfop)1450 _info(struct modinfo *modinfop)
1451 {
1452 return (mod_info(&lmrc_modlinkage, modinfop));
1453 }
1454