1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * tavor.c
28 * Tavor (InfiniBand) HCA Driver attach/detach Routines
29 *
30 * Implements all the routines necessary for the attach, setup,
31 * initialization (and subsequent possible teardown and detach) of the
32 * Tavor InfiniBand HCA driver.
33 */
34
35 #include <sys/types.h>
36 #include <sys/file.h>
37 #include <sys/open.h>
38 #include <sys/conf.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/modctl.h>
42 #include <sys/stat.h>
43 #include <sys/pci.h>
44 #include <sys/pci_cap.h>
45 #include <sys/bitmap.h>
46 #include <sys/policy.h>
47
48 #include <sys/ib/adapters/tavor/tavor.h>
49 #include <sys/pci.h>
50
51 /* Tavor HCA State Pointer */
52 void *tavor_statep;
53
54 /*
55 * The Tavor "userland resource database" is common to instances of the
56 * Tavor HCA driver. This structure "tavor_userland_rsrc_db" contains all
57 * the necessary information to maintain it.
58 */
59 tavor_umap_db_t tavor_userland_rsrc_db;
60
61 static int tavor_attach(dev_info_t *, ddi_attach_cmd_t);
62 static int tavor_detach(dev_info_t *, ddi_detach_cmd_t);
63 static int tavor_open(dev_t *, int, int, cred_t *);
64 static int tavor_close(dev_t, int, int, cred_t *);
65 static int tavor_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
66 static int tavor_drv_init(tavor_state_t *state, dev_info_t *dip, int instance);
67 static void tavor_drv_fini(tavor_state_t *state);
68 static void tavor_drv_fini2(tavor_state_t *state);
69 static int tavor_isr_init(tavor_state_t *state);
70 static void tavor_isr_fini(tavor_state_t *state);
71 static int tavor_hw_init(tavor_state_t *state);
72 static void tavor_hw_fini(tavor_state_t *state,
73 tavor_drv_cleanup_level_t cleanup);
74 static int tavor_soft_state_init(tavor_state_t *state);
75 static void tavor_soft_state_fini(tavor_state_t *state);
76 static int tavor_hca_port_init(tavor_state_t *state);
77 static int tavor_hca_ports_shutdown(tavor_state_t *state, uint_t num_init);
78 static void tavor_hca_config_setup(tavor_state_t *state,
79 tavor_hw_initqueryhca_t *inithca);
80 static int tavor_internal_uarpgs_init(tavor_state_t *state);
81 static void tavor_internal_uarpgs_fini(tavor_state_t *state);
82 static int tavor_special_qp_contexts_reserve(tavor_state_t *state);
83 static void tavor_special_qp_contexts_unreserve(tavor_state_t *state);
84 static int tavor_sw_reset(tavor_state_t *state);
85 static int tavor_mcg_init(tavor_state_t *state);
86 static void tavor_mcg_fini(tavor_state_t *state);
87 static int tavor_fw_version_check(tavor_state_t *state);
88 static void tavor_device_info_report(tavor_state_t *state);
89 static void tavor_pci_capability_list(tavor_state_t *state,
90 ddi_acc_handle_t hdl);
91 static void tavor_pci_capability_vpd(tavor_state_t *state,
92 ddi_acc_handle_t hdl, uint_t offset);
93 static int tavor_pci_read_vpd(ddi_acc_handle_t hdl, uint_t offset,
94 uint32_t addr, uint32_t *data);
95 static void tavor_pci_capability_pcix(tavor_state_t *state,
96 ddi_acc_handle_t hdl, uint_t offset);
97 static int tavor_intr_or_msi_init(tavor_state_t *state);
98 static int tavor_add_intrs(tavor_state_t *state, int intr_type);
99 static int tavor_intr_or_msi_fini(tavor_state_t *state);
100
101 /* X86 fastreboot support */
102 static int tavor_intr_disable(tavor_state_t *);
103 static int tavor_quiesce(dev_info_t *);
104
105 /* Character/Block Operations */
106 static struct cb_ops tavor_cb_ops = {
107 tavor_open, /* open */
108 tavor_close, /* close */
109 nodev, /* strategy (block) */
110 nodev, /* print (block) */
111 nodev, /* dump (block) */
112 nodev, /* read */
113 nodev, /* write */
114 tavor_ioctl, /* ioctl */
115 tavor_devmap, /* devmap */
116 NULL, /* mmap */
117 nodev, /* segmap */
118 nochpoll, /* chpoll */
119 ddi_prop_op, /* prop_op */
120 NULL, /* streams */
121 D_NEW | D_MP |
122 D_64BIT | D_HOTPLUG |
123 D_DEVMAP, /* flags */
124 CB_REV /* rev */
125 };
126
127 /* Driver Operations */
128 static struct dev_ops tavor_ops = {
129 DEVO_REV, /* struct rev */
130 0, /* refcnt */
131 tavor_getinfo, /* getinfo */
132 nulldev, /* identify */
133 nulldev, /* probe */
134 tavor_attach, /* attach */
135 tavor_detach, /* detach */
136 nodev, /* reset */
137 &tavor_cb_ops, /* cb_ops */
138 NULL, /* bus_ops */
139 nodev, /* power */
140 tavor_quiesce, /* devo_quiesce */
141 };
142
143 /* Module Driver Info */
144 static struct modldrv tavor_modldrv = {
145 &mod_driverops,
146 "Tavor InfiniBand HCA Driver",
147 &tavor_ops
148 };
149
150 /* Module Linkage */
151 static struct modlinkage tavor_modlinkage = {
152 MODREV_1,
153 &tavor_modldrv,
154 NULL
155 };
156
157 /*
158 * This extern refers to the ibc_operations_t function vector that is defined
159 * in the tavor_ci.c file.
160 */
161 extern ibc_operations_t tavor_ibc_ops;
162
163 /*
164 * _init()
165 */
166 int
_init()167 _init()
168 {
169 int status;
170
171 status = ddi_soft_state_init(&tavor_statep, sizeof (tavor_state_t),
172 (size_t)TAVOR_INITIAL_STATES);
173 if (status != 0) {
174 return (status);
175 }
176
177 status = ibc_init(&tavor_modlinkage);
178 if (status != 0) {
179 ddi_soft_state_fini(&tavor_statep);
180 return (status);
181 }
182 status = mod_install(&tavor_modlinkage);
183 if (status != 0) {
184 ibc_fini(&tavor_modlinkage);
185 ddi_soft_state_fini(&tavor_statep);
186 return (status);
187 }
188
189 /* Initialize the Tavor "userland resources database" */
190 tavor_umap_db_init();
191
192 return (status);
193 }
194
195
196 /*
197 * _info()
198 */
199 int
_info(struct modinfo * modinfop)200 _info(struct modinfo *modinfop)
201 {
202 int status;
203
204 status = mod_info(&tavor_modlinkage, modinfop);
205 return (status);
206 }
207
208
209 /*
210 * _fini()
211 */
212 int
_fini()213 _fini()
214 {
215 int status;
216
217 status = mod_remove(&tavor_modlinkage);
218 if (status != 0) {
219 return (status);
220 }
221
222 /* Destroy the Tavor "userland resources database" */
223 tavor_umap_db_fini();
224
225 ibc_fini(&tavor_modlinkage);
226 ddi_soft_state_fini(&tavor_statep);
227 return (status);
228 }
229
230
231 /*
232 * tavor_getinfo()
233 */
234 /* ARGSUSED */
235 static int
tavor_getinfo(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** result)236 tavor_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
237 {
238 dev_t dev;
239 tavor_state_t *state;
240 minor_t instance;
241
242 switch (cmd) {
243 case DDI_INFO_DEVT2DEVINFO:
244 dev = (dev_t)arg;
245 instance = TAVOR_DEV_INSTANCE(dev);
246 state = ddi_get_soft_state(tavor_statep, instance);
247 if (state == NULL) {
248 return (DDI_FAILURE);
249 }
250 *result = (void *)state->ts_dip;
251 return (DDI_SUCCESS);
252
253 case DDI_INFO_DEVT2INSTANCE:
254 dev = (dev_t)arg;
255 instance = TAVOR_DEV_INSTANCE(dev);
256 *result = (void *)(uintptr_t)instance;
257 return (DDI_SUCCESS);
258
259 default:
260 break;
261 }
262
263 return (DDI_FAILURE);
264 }
265
266
267 /*
268 * tavor_open()
269 */
270 /* ARGSUSED */
271 static int
tavor_open(dev_t * devp,int flag,int otyp,cred_t * credp)272 tavor_open(dev_t *devp, int flag, int otyp, cred_t *credp)
273 {
274 tavor_state_t *state;
275 tavor_rsrc_t *rsrcp;
276 tavor_umap_db_entry_t *umapdb, *umapdb2;
277 minor_t instance;
278 uint64_t key, value;
279 uint_t tr_indx;
280 dev_t dev;
281 int status;
282
283 instance = TAVOR_DEV_INSTANCE(*devp);
284 state = ddi_get_soft_state(tavor_statep, instance);
285 if (state == NULL) {
286 return (ENXIO);
287 }
288
289 /*
290 * Only allow driver to be opened for character access, and verify
291 * whether exclusive access is allowed.
292 */
293 if ((otyp != OTYP_CHR) || ((flag & FEXCL) &&
294 secpolicy_excl_open(credp) != 0)) {
295 return (EINVAL);
296 }
297
298 /*
299 * Search for the current process PID in the "userland resources
300 * database". If it is not found, then attempt to allocate a UAR
301 * page and add the ("key", "value") pair to the database.
302 * Note: As a last step we always return a devp appropriate for
303 * the open. Either we return a new minor number (based on the
304 * instance and the UAR page index) or we return the current minor
305 * number for the given client process.
306 *
307 * We also add an entry to the database to allow for lookup from
308 * "dev_t" to the current process PID. This is necessary because,
309 * under certain circumstance, the process PID that calls the Tavor
310 * close() entry point may not be the same as the one who called
311 * open(). Specifically, this can happen if a child process calls
312 * the Tavor's open() entry point, gets a UAR page, maps it out (using
313 * mmap()), and then exits without calling munmap(). Because mmap()
314 * adds a reference to the file descriptor, at the exit of the child
315 * process the file descriptor is "inherited" by the parent (and will
316 * be close()'d by the parent's PID only when it exits).
317 *
318 * Note: We use the tavor_umap_db_find_nolock() and
319 * tavor_umap_db_add_nolock() database access routines below (with
320 * an explicit mutex_enter of the database lock - "tdl_umapdb_lock")
321 * to ensure that the multiple accesses (in this case searching for,
322 * and then adding _two_ database entries) can be done atomically.
323 */
324 key = ddi_get_pid();
325 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
326 status = tavor_umap_db_find_nolock(instance, key,
327 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
328 if (status != DDI_SUCCESS) {
329 /*
330 * If we are in 'maintenance mode', we cannot alloc a UAR page.
331 * But we still need some rsrcp value, and a mostly unique
332 * tr_indx value. So we set rsrcp to NULL for maintenance
333 * mode, and use a rolling count for tr_indx. The field
334 * 'ts_open_tr_indx' is used only in this maintenance mode
335 * condition.
336 *
337 * Otherwise, if we are in operational mode then we allocate
338 * the UAR page as normal, and use the rsrcp value and tr_indx
339 * value from that allocation.
340 */
341 if (!TAVOR_IS_OPERATIONAL(state->ts_operational_mode)) {
342 rsrcp = NULL;
343 tr_indx = state->ts_open_tr_indx++;
344 } else {
345 /* Allocate a new UAR page for this process */
346 status = tavor_rsrc_alloc(state, TAVOR_UARPG, 1,
347 TAVOR_NOSLEEP, &rsrcp);
348 if (status != DDI_SUCCESS) {
349 mutex_exit(
350 &tavor_userland_rsrc_db.tdl_umapdb_lock);
351 return (EAGAIN);
352 }
353
354 tr_indx = rsrcp->tr_indx;
355 }
356
357 /*
358 * Allocate an entry to track the UAR page resource in the
359 * "userland resources database".
360 */
361 umapdb = tavor_umap_db_alloc(instance, key,
362 MLNX_UMAP_UARPG_RSRC, (uint64_t)(uintptr_t)rsrcp);
363 if (umapdb == NULL) {
364 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
365 /* If in "maintenance mode", don't free the rsrc */
366 if (TAVOR_IS_OPERATIONAL(state->ts_operational_mode)) {
367 tavor_rsrc_free(state, &rsrcp);
368 }
369 return (EAGAIN);
370 }
371
372 /*
373 * Create a new device number. Minor number is a function of
374 * the UAR page index (15 bits) and the device instance number
375 * (3 bits).
376 */
377 dev = makedevice(getmajor(*devp), (tr_indx <<
378 TAVOR_MINORNUM_SHIFT) | instance);
379
380 /*
381 * Allocate another entry in the "userland resources database"
382 * to track the association of the device number (above) to
383 * the current process ID (in "key").
384 */
385 umapdb2 = tavor_umap_db_alloc(instance, dev,
386 MLNX_UMAP_PID_RSRC, (uint64_t)key);
387 if (umapdb2 == NULL) {
388 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
389 tavor_umap_db_free(umapdb);
390 /* If in "maintenance mode", don't free the rsrc */
391 if (TAVOR_IS_OPERATIONAL(state->ts_operational_mode)) {
392 tavor_rsrc_free(state, &rsrcp);
393 }
394 return (EAGAIN);
395 }
396
397 /* Add the entries to the database */
398 tavor_umap_db_add_nolock(umapdb);
399 tavor_umap_db_add_nolock(umapdb2);
400
401 } else {
402 /*
403 * Return the same device number as on the original open()
404 * call. This was calculated as a function of the UAR page
405 * index (top 16 bits) and the device instance number
406 */
407 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
408 dev = makedevice(getmajor(*devp), (rsrcp->tr_indx <<
409 TAVOR_MINORNUM_SHIFT) | instance);
410 }
411 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
412
413 *devp = dev;
414
415 return (0);
416 }
417
418
419 /*
420 * tavor_close()
421 */
422 /* ARGSUSED */
423 static int
tavor_close(dev_t dev,int flag,int otyp,cred_t * credp)424 tavor_close(dev_t dev, int flag, int otyp, cred_t *credp)
425 {
426 tavor_state_t *state;
427 tavor_rsrc_t *rsrcp;
428 tavor_umap_db_entry_t *umapdb;
429 tavor_umap_db_priv_t *priv;
430 minor_t instance;
431 uint64_t key, value;
432 int status;
433
434 instance = TAVOR_DEV_INSTANCE(dev);
435 state = ddi_get_soft_state(tavor_statep, instance);
436 if (state == NULL) {
437 return (ENXIO);
438 }
439
440 /*
441 * Search for "dev_t" in the "userland resources database". As
442 * explained above in tavor_open(), we can't depend on using the
443 * current process ID here to do the lookup because the process
444 * that ultimately closes may not be the same one who opened
445 * (because of inheritance).
446 * So we lookup the "dev_t" (which points to the PID of the process
447 * that opened), and we remove the entry from the database (and free
448 * it up). Then we do another query based on the PID value. And when
449 * we find that database entry, we free it up too and then free the
450 * Tavor UAR page resource.
451 *
452 * Note: We use the tavor_umap_db_find_nolock() database access
453 * routine below (with an explicit mutex_enter of the database lock)
454 * to ensure that the multiple accesses (which attempt to remove the
455 * two database entries) can be done atomically.
456 *
457 * This works the same in both maintenance mode and HCA mode, except
458 * for the call to tavor_rsrc_free(). In the case of maintenance mode,
459 * this call is not needed, as it was not allocated in tavor_open()
460 * above.
461 */
462 key = dev;
463 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
464 status = tavor_umap_db_find_nolock(instance, key, MLNX_UMAP_PID_RSRC,
465 &value, TAVOR_UMAP_DB_REMOVE, &umapdb);
466 if (status == DDI_SUCCESS) {
467 /*
468 * If the "tdb_priv" field is non-NULL, it indicates that
469 * some "on close" handling is still necessary. Call
470 * tavor_umap_db_handle_onclose_cb() to do the handling (i.e.
471 * to invoke all the registered callbacks). Then free up
472 * the resources associated with "tdb_priv" and continue
473 * closing.
474 */
475 priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv;
476 if (priv != NULL) {
477 tavor_umap_db_handle_onclose_cb(priv);
478 kmem_free(priv, sizeof (tavor_umap_db_priv_t));
479 umapdb->tdbe_common.tdb_priv = (void *)NULL;
480 }
481
482 tavor_umap_db_free(umapdb);
483
484 /*
485 * Now do another lookup using PID as the key (copy it from
486 * "value"). When this lookup is complete, the "value" field
487 * will contain the tavor_rsrc_t pointer for the UAR page
488 * resource.
489 */
490 key = value;
491 status = tavor_umap_db_find_nolock(instance, key,
492 MLNX_UMAP_UARPG_RSRC, &value, TAVOR_UMAP_DB_REMOVE,
493 &umapdb);
494 if (status == DDI_SUCCESS) {
495 tavor_umap_db_free(umapdb);
496 /* If in "maintenance mode", don't free the rsrc */
497 if (TAVOR_IS_OPERATIONAL(state->ts_operational_mode)) {
498 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
499 tavor_rsrc_free(state, &rsrcp);
500 }
501 }
502 }
503 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
504
505 return (0);
506 }
507
508
509 /*
510 * tavor_attach()
511 * Context: Only called from attach() path context
512 */
513 static int
tavor_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)514 tavor_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
515 {
516 tavor_state_t *state;
517 ibc_clnt_hdl_t tmp_ibtfpriv;
518 ibc_status_t ibc_status;
519 int instance;
520 int status;
521
522 #ifdef __lock_lint
523 (void) tavor_quiesce(dip);
524 #endif
525
526 switch (cmd) {
527 case DDI_ATTACH:
528 instance = ddi_get_instance(dip);
529 status = ddi_soft_state_zalloc(tavor_statep, instance);
530 if (status != DDI_SUCCESS) {
531 cmn_err(CE_NOTE, "tavor%d: driver failed to attach: "
532 "attach_ssz_fail", instance);
533 goto fail_attach_nomsg;
534
535 }
536 state = ddi_get_soft_state(tavor_statep, instance);
537 if (state == NULL) {
538 ddi_soft_state_free(tavor_statep, instance);
539 cmn_err(CE_NOTE, "tavor%d: driver failed to attach: "
540 "attach_gss_fail", instance);
541 goto fail_attach_nomsg;
542 }
543
544 /* clear the attach error buffer */
545 TAVOR_ATTACH_MSG_INIT(state->ts_attach_buf);
546
547 /*
548 * Initialize Tavor driver and hardware.
549 *
550 * Note: If this initialization fails we may still wish to
551 * create a device node and remain operational so that Tavor
552 * firmware can be updated/flashed (i.e. "maintenance mode").
553 * If this is the case, then "ts_operational_mode" will be
554 * equal to TAVOR_MAINTENANCE_MODE. We will not attempt to
555 * attach to the IBTF or register with the IBMF (i.e. no
556 * InfiniBand interfaces will be enabled).
557 */
558 status = tavor_drv_init(state, dip, instance);
559 if ((status != DDI_SUCCESS) &&
560 (TAVOR_IS_OPERATIONAL(state->ts_operational_mode))) {
561 goto fail_attach;
562 }
563
564 /* Create the minor node for device */
565 status = ddi_create_minor_node(dip, "devctl", S_IFCHR, instance,
566 DDI_PSEUDO, 0);
567 if (status != DDI_SUCCESS) {
568 tavor_drv_fini(state);
569 TAVOR_ATTACH_MSG(state->ts_attach_buf,
570 "attach_create_mn_fail");
571 goto fail_attach;
572 }
573
574 /*
575 * If we are in "maintenance mode", then we don't want to
576 * register with the IBTF. All InfiniBand interfaces are
577 * uninitialized, and the device is only capable of handling
578 * requests to update/flash firmware (or test/debug requests).
579 */
580 if (TAVOR_IS_OPERATIONAL(state->ts_operational_mode)) {
581
582 /* Attach to InfiniBand Transport Framework (IBTF) */
583 ibc_status = ibc_attach(&tmp_ibtfpriv,
584 &state->ts_ibtfinfo);
585 if (ibc_status != IBC_SUCCESS) {
586 ddi_remove_minor_node(dip, "devctl");
587 tavor_drv_fini(state);
588 TAVOR_ATTACH_MSG(state->ts_attach_buf,
589 "attach_ibcattach_fail");
590 goto fail_attach;
591 }
592
593 /*
594 * Now that we've successfully attached to the IBTF,
595 * we enable all appropriate asynch and CQ events to
596 * be forwarded to the IBTF.
597 */
598 TAVOR_ENABLE_IBTF_CALLB(state, tmp_ibtfpriv);
599
600 ibc_post_attach(state->ts_ibtfpriv);
601
602 /* Register agents with IB Mgmt Framework (IBMF) */
603 status = tavor_agent_handlers_init(state);
604 if (status != DDI_SUCCESS) {
605 (void) ibc_pre_detach(tmp_ibtfpriv, DDI_DETACH);
606 TAVOR_QUIESCE_IBTF_CALLB(state);
607 if (state->ts_in_evcallb != 0) {
608 TAVOR_WARNING(state, "unable to "
609 "quiesce Tavor IBTF callbacks");
610 }
611 ibc_detach(tmp_ibtfpriv);
612 ddi_remove_minor_node(dip, "devctl");
613 tavor_drv_fini(state);
614 TAVOR_ATTACH_MSG(state->ts_attach_buf,
615 "attach_agentinit_fail");
616 goto fail_attach;
617 }
618 }
619
620 /* Report that driver was loaded */
621 ddi_report_dev(dip);
622
623 /* Send device information to log file */
624 tavor_device_info_report(state);
625
626 /* Report attach in maintenance mode, if appropriate */
627 if (!(TAVOR_IS_OPERATIONAL(state->ts_operational_mode))) {
628 cmn_err(CE_NOTE, "tavor%d: driver attached "
629 "(for maintenance mode only)", state->ts_instance);
630 }
631
632 return (DDI_SUCCESS);
633
634 case DDI_RESUME:
635 /* Add code here for DDI_RESUME XXX */
636 return (DDI_FAILURE);
637
638 default:
639 break;
640 }
641
642 fail_attach:
643 cmn_err(CE_NOTE, "tavor%d: driver failed to attach: %s", instance,
644 state->ts_attach_buf);
645 tavor_drv_fini2(state);
646 ddi_soft_state_free(tavor_statep, instance);
647 fail_attach_nomsg:
648 return (DDI_FAILURE);
649 }
650
651
652 /*
653 * tavor_detach()
654 * Context: Only called from detach() path context
655 */
656 static int
tavor_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)657 tavor_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
658 {
659 tavor_state_t *state;
660 ibc_clnt_hdl_t tmp_ibtfpriv;
661 ibc_status_t ibc_status;
662 int instance, status;
663
664 instance = ddi_get_instance(dip);
665 state = ddi_get_soft_state(tavor_statep, instance);
666 if (state == NULL) {
667 return (DDI_FAILURE);
668 }
669
670 switch (cmd) {
671 case DDI_DETACH:
672 /*
673 * If we are in "maintenance mode", then we do not want to
674 * do teardown for any of the InfiniBand interfaces.
675 * Specifically, this means not detaching from IBTF (we never
676 * attached to begin with) and not deregistering from IBMF.
677 */
678 if (TAVOR_IS_OPERATIONAL(state->ts_operational_mode)) {
679 /* Unregister agents from IB Mgmt Framework (IBMF) */
680 status = tavor_agent_handlers_fini(state);
681 if (status != DDI_SUCCESS) {
682 return (DDI_FAILURE);
683 }
684
685 /*
686 * Attempt the "pre-detach" from InfiniBand Transport
687 * Framework (IBTF). At this point the IBTF is still
688 * capable of handling incoming asynch and completion
689 * events. This "pre-detach" is primarily a mechanism
690 * to notify the appropriate IBTF clients that the
691 * HCA is being removed/offlined.
692 */
693 ibc_status = ibc_pre_detach(state->ts_ibtfpriv, cmd);
694 if (ibc_status != IBC_SUCCESS) {
695 status = tavor_agent_handlers_init(state);
696 if (status != DDI_SUCCESS) {
697 TAVOR_WARNING(state, "failed to "
698 "restart Tavor agents");
699 }
700 return (DDI_FAILURE);
701 }
702
703 /*
704 * Before we can fully detach from the IBTF we need to
705 * ensure that we have handled all outstanding event
706 * callbacks. This is accomplished by quiescing the
707 * event callback mechanism. Note: if we are unable
708 * to successfully quiesce the callbacks, then this is
709 * an indication that something has probably gone
710 * seriously wrong. We print out a warning, but
711 * continue.
712 */
713 tmp_ibtfpriv = state->ts_ibtfpriv;
714 TAVOR_QUIESCE_IBTF_CALLB(state);
715 if (state->ts_in_evcallb != 0) {
716 TAVOR_WARNING(state, "unable to quiesce Tavor "
717 "IBTF callbacks");
718 }
719
720 /* Complete the detach from the IBTF */
721 ibc_detach(tmp_ibtfpriv);
722 }
723
724 /* Remove the minor node for device */
725 ddi_remove_minor_node(dip, "devctl");
726
727 /*
728 * Only call tavor_drv_fini() if we are in Tavor HCA mode.
729 * (Because if we are in "maintenance mode", then we never
730 * successfully finished init.) Only report successful
731 * detach for normal HCA mode.
732 */
733 if (TAVOR_IS_OPERATIONAL(state->ts_operational_mode)) {
734 /* Cleanup driver resources and shutdown hardware */
735 tavor_drv_fini(state);
736 cmn_err(CE_CONT, "Tavor driver successfully "
737 "detached\n");
738 }
739
740 tavor_drv_fini2(state);
741 ddi_soft_state_free(tavor_statep, instance);
742
743 return (DDI_SUCCESS);
744
745 case DDI_SUSPEND:
746 /* Add code here for DDI_SUSPEND XXX */
747 return (DDI_FAILURE);
748
749 default:
750 break;
751 }
752
753 return (DDI_FAILURE);
754 }
755
756
757 /*
758 * tavor_drv_init()
759 * Context: Only called from attach() path context
760 */
761 static int
tavor_drv_init(tavor_state_t * state,dev_info_t * dip,int instance)762 tavor_drv_init(tavor_state_t *state, dev_info_t *dip, int instance)
763 {
764 int status;
765
766 /* Save away devinfo and instance */
767 state->ts_dip = dip;
768 state->ts_instance = instance;
769
770 /*
771 * Check and set the operational mode of the device. If the driver is
772 * bound to the Tavor device in "maintenance mode", then this generally
773 * means that either the device has been specifically jumpered to
774 * start in this mode or the firmware boot process has failed to
775 * successfully load either the primary or the secondary firmware
776 * image.
777 */
778 if (TAVOR_IS_HCA_MODE(state->ts_dip)) {
779 state->ts_operational_mode = TAVOR_HCA_MODE;
780
781 } else if (TAVOR_IS_COMPAT_MODE(state->ts_dip)) {
782 state->ts_operational_mode = TAVOR_COMPAT_MODE;
783
784 } else if (TAVOR_IS_MAINTENANCE_MODE(state->ts_dip)) {
785 state->ts_operational_mode = TAVOR_MAINTENANCE_MODE;
786 return (DDI_FAILURE);
787
788 } else {
789 state->ts_operational_mode = 0; /* invalid operational mode */
790 TAVOR_WARNING(state, "unexpected device type detected");
791 return (DDI_FAILURE);
792 }
793
794 /*
795 * Initialize the Tavor hardware.
796 * Note: If this routine returns an error, it is often an reasonably
797 * good indication that something Tavor firmware-related has caused
798 * the failure. In order to give the user an opportunity (if desired)
799 * to update or reflash the Tavor firmware image, we set
800 * "ts_operational_mode" flag (described above) to indicate that we
801 * wish to enter maintenance mode.
802 */
803 status = tavor_hw_init(state);
804 if (status != DDI_SUCCESS) {
805 state->ts_operational_mode = TAVOR_MAINTENANCE_MODE;
806 cmn_err(CE_NOTE, "tavor%d: error during attach: %s", instance,
807 state->ts_attach_buf);
808 return (DDI_FAILURE);
809 }
810
811 /* Setup Tavor interrupt handler */
812 status = tavor_isr_init(state);
813 if (status != DDI_SUCCESS) {
814 tavor_hw_fini(state, TAVOR_DRV_CLEANUP_ALL);
815 return (DDI_FAILURE);
816 }
817
818 /* Initialize Tavor softstate */
819 status = tavor_soft_state_init(state);
820 if (status != DDI_SUCCESS) {
821 tavor_isr_fini(state);
822 tavor_hw_fini(state, TAVOR_DRV_CLEANUP_ALL);
823 return (DDI_FAILURE);
824 }
825
826 return (DDI_SUCCESS);
827 }
828
829
830 /*
831 * tavor_drv_fini()
832 * Context: Only called from attach() and/or detach() path contexts
833 */
834 static void
tavor_drv_fini(tavor_state_t * state)835 tavor_drv_fini(tavor_state_t *state)
836 {
837 /* Cleanup Tavor softstate */
838 tavor_soft_state_fini(state);
839
840 /* Teardown Tavor interrupts */
841 tavor_isr_fini(state);
842
843 /* Cleanup Tavor resources and shutdown hardware */
844 tavor_hw_fini(state, TAVOR_DRV_CLEANUP_ALL);
845 }
846
847 /*
848 * tavor_drv_fini2()
849 * Context: Only called from attach() and/or detach() path contexts
850 */
851 static void
tavor_drv_fini2(tavor_state_t * state)852 tavor_drv_fini2(tavor_state_t *state)
853 {
854 /* TAVOR_DRV_CLEANUP_LEVEL1 */
855 if (state->ts_reg_cmdhdl) {
856 ddi_regs_map_free(&state->ts_reg_cmdhdl);
857 state->ts_reg_cmdhdl = NULL;
858 }
859
860 /* TAVOR_DRV_CLEANUP_LEVEL0 */
861 if (state->ts_pci_cfghdl) {
862 pci_config_teardown(&state->ts_pci_cfghdl);
863 state->ts_pci_cfghdl = NULL;
864 }
865 }
866
867 /*
868 * tavor_isr_init()
869 * Context: Only called from attach() path context
870 */
871 static int
tavor_isr_init(tavor_state_t * state)872 tavor_isr_init(tavor_state_t *state)
873 {
874 int status;
875
876 /*
877 * Add a handler for the interrupt or MSI
878 */
879 status = ddi_intr_add_handler(state->ts_intrmsi_hdl, tavor_isr,
880 (caddr_t)state, NULL);
881 if (status != DDI_SUCCESS) {
882 return (DDI_FAILURE);
883 }
884
885 /*
886 * Enable the software interrupt. Note: Even though we are only
887 * using one (1) interrupt/MSI, depending on the value returned in
888 * the capability flag, we have to call either ddi_intr_block_enable()
889 * or ddi_intr_enable().
890 */
891 if (state->ts_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
892 status = ddi_intr_block_enable(&state->ts_intrmsi_hdl, 1);
893 if (status != DDI_SUCCESS) {
894 return (DDI_FAILURE);
895 }
896 } else {
897 status = ddi_intr_enable(state->ts_intrmsi_hdl);
898 if (status != DDI_SUCCESS) {
899 return (DDI_FAILURE);
900 }
901 }
902
903 /*
904 * Now that the ISR has been setup, arm all the EQs for event
905 * generation.
906 */
907 tavor_eq_arm_all(state);
908
909 return (DDI_SUCCESS);
910 }
911
912
913 /*
914 * tavor_isr_fini()
915 * Context: Only called from attach() and/or detach() path contexts
916 */
917 static void
tavor_isr_fini(tavor_state_t * state)918 tavor_isr_fini(tavor_state_t *state)
919 {
920 /* Disable the software interrupt */
921 if (state->ts_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
922 (void) ddi_intr_block_disable(&state->ts_intrmsi_hdl, 1);
923 } else {
924 (void) ddi_intr_disable(state->ts_intrmsi_hdl);
925 }
926
927 /*
928 * Remove the software handler for the interrupt or MSI
929 */
930 (void) ddi_intr_remove_handler(state->ts_intrmsi_hdl);
931 }
932
933
934 /*
935 * tavor_fix_error_buf()
936 * Context: Only called from attach().
937 *
938 * The error_buf_addr returned from QUERY_FW is a PCI address.
939 * We need to convert it to an offset from the base address,
940 * which is stored in the assigned-addresses property.
941 */
942 static int
tavor_fix_error_buf(tavor_state_t * state)943 tavor_fix_error_buf(tavor_state_t *state)
944 {
945 int assigned_addr_len;
946 pci_regspec_t *assigned_addr;
947
948 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->ts_dip,
949 DDI_PROP_DONTPASS, "assigned-addresses", (int **)&assigned_addr,
950 (uint_t *)&assigned_addr_len) != DDI_PROP_SUCCESS)
951 return (DDI_FAILURE);
952
953 state->ts_fw.error_buf_addr -= assigned_addr[0].pci_phys_low +
954 ((uint64_t)(assigned_addr[0].pci_phys_mid) << 32);
955 ddi_prop_free(assigned_addr);
956 return (DDI_SUCCESS);
957 }
958
959 /*
960 * tavor_hw_init()
961 * Context: Only called from attach() path context
962 */
963 static int
tavor_hw_init(tavor_state_t * state)964 tavor_hw_init(tavor_state_t *state)
965 {
966 tavor_drv_cleanup_level_t cleanup;
967 sm_nodeinfo_t nodeinfo;
968 uint64_t errorcode;
969 off_t ddr_size;
970 int status;
971 int retries;
972
973 /* This is where driver initialization begins */
974 cleanup = TAVOR_DRV_CLEANUP_LEVEL0;
975
976 /* Setup device access attributes */
977 state->ts_reg_accattr.devacc_attr_version = DDI_DEVICE_ATTR_V0;
978 state->ts_reg_accattr.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC;
979 state->ts_reg_accattr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
980
981 /* Setup for PCI config read/write of HCA device */
982 status = pci_config_setup(state->ts_dip, &state->ts_pci_cfghdl);
983 if (status != DDI_SUCCESS) {
984 tavor_hw_fini(state, cleanup);
985 TAVOR_ATTACH_MSG(state->ts_attach_buf,
986 "hw_init_PCI_config_space_regmap_fail");
987 /* This case is not the degraded one */
988 return (DDI_FAILURE);
989 }
990
991 /* Map in Tavor registers (CMD, UAR, DDR) and setup offsets */
992 status = ddi_regs_map_setup(state->ts_dip, TAVOR_CMD_BAR,
993 &state->ts_reg_cmd_baseaddr, 0, 0, &state->ts_reg_accattr,
994 &state->ts_reg_cmdhdl);
995 if (status != DDI_SUCCESS) {
996 tavor_hw_fini(state, cleanup);
997 TAVOR_ATTACH_MSG(state->ts_attach_buf,
998 "hw_init_CMD_ddirms_fail");
999 return (DDI_FAILURE);
1000 }
1001 cleanup = TAVOR_DRV_CLEANUP_LEVEL1;
1002
1003 status = ddi_regs_map_setup(state->ts_dip, TAVOR_UAR_BAR,
1004 &state->ts_reg_uar_baseaddr, 0, 0, &state->ts_reg_accattr,
1005 &state->ts_reg_uarhdl);
1006 if (status != DDI_SUCCESS) {
1007 tavor_hw_fini(state, cleanup);
1008 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1009 "hw_init_UAR_ddirms_fail");
1010 return (DDI_FAILURE);
1011 }
1012 cleanup = TAVOR_DRV_CLEANUP_LEVEL2;
1013
1014 status = ddi_dev_regsize(state->ts_dip, TAVOR_DDR_BAR, &ddr_size);
1015 if (status != DDI_SUCCESS) {
1016 cmn_err(CE_CONT, "Tavor: ddi_dev_regsize() failed "
1017 "(check HCA-attached DIMM memory?)\n");
1018 tavor_hw_fini(state, cleanup);
1019 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1020 "hw_init_DDR_ddi_regsize_fail");
1021 return (DDI_FAILURE);
1022 }
1023
1024 #if !defined(_ELF64) && !defined(__sparc)
1025 /*
1026 * For 32 bit x86/x64 kernels, where there is limited kernel virtual
1027 * memory available, define a minimal memory footprint. This is
1028 * specified in order to not take up too much resources, thus starving
1029 * out others. Only specified if the HCA DIMM is equal to or greater
1030 * than 256MB.
1031 *
1032 * Note: x86/x64 install and safemode boot are both 32bit.
1033 */
1034 ddr_size = TAVOR_DDR_SIZE_MIN;
1035 #endif /* !(_ELF64) && !(__sparc) */
1036
1037 state->ts_cfg_profile_setting = ddr_size;
1038
1039 status = ddi_regs_map_setup(state->ts_dip, TAVOR_DDR_BAR,
1040 &state->ts_reg_ddr_baseaddr, 0, ddr_size, &state->ts_reg_accattr,
1041 &state->ts_reg_ddrhdl);
1042
1043 /*
1044 * On 32-bit platform testing (primarily x86), it was seen that the
1045 * ddi_regs_map_setup() call would fail because there wasn't enough
1046 * kernel virtual address space available to map in the entire 256MB
1047 * DDR. So we add this check in here, so that if the 256 (or other
1048 * larger value of DDR) map in fails, that we fallback to try the lower
1049 * size of 128MB.
1050 *
1051 * Note: If we only have 128MB of DDR in the system in the first place,
1052 * we don't try another ddi_regs_map_setup(), and just skip over this
1053 * check and return failures.
1054 */
1055 if (status == DDI_ME_NORESOURCES && ddr_size > TAVOR_DDR_SIZE_128) {
1056 /* Try falling back to 128MB DDR mapping */
1057 status = ddi_regs_map_setup(state->ts_dip, TAVOR_DDR_BAR,
1058 &state->ts_reg_ddr_baseaddr, 0, TAVOR_DDR_SIZE_128,
1059 &state->ts_reg_accattr, &state->ts_reg_ddrhdl);
1060
1061 /*
1062 * 128MB DDR mapping worked.
1063 * Set the updated config profile setting here.
1064 */
1065 if (status == DDI_SUCCESS) {
1066 state->ts_cfg_profile_setting = TAVOR_DDR_SIZE_128;
1067 }
1068 }
1069
1070 if (status != DDI_SUCCESS) {
1071 if (status == DDI_ME_RNUMBER_RANGE) {
1072 cmn_err(CE_CONT, "Tavor: ddi_regs_map_setup() failed "
1073 "(check HCA-attached DIMM memory?)\n");
1074 }
1075 tavor_hw_fini(state, cleanup);
1076 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1077 "hw_init_DDR_ddirms_fail");
1078 return (DDI_FAILURE);
1079 }
1080 cleanup = TAVOR_DRV_CLEANUP_LEVEL3;
1081
1082 /* Setup Tavor Host Command Register (HCR) */
1083 state->ts_cmd_regs.hcr = (tavor_hw_hcr_t *)
1084 ((uintptr_t)state->ts_reg_cmd_baseaddr + TAVOR_CMD_HCR_OFFSET);
1085
1086 /* Setup Tavor Event Cause Register (ecr and clr_ecr) */
1087 state->ts_cmd_regs.ecr = (uint64_t *)
1088 ((uintptr_t)state->ts_reg_cmd_baseaddr + TAVOR_CMD_ECR_OFFSET);
1089 state->ts_cmd_regs.clr_ecr = (uint64_t *)
1090 ((uintptr_t)state->ts_reg_cmd_baseaddr + TAVOR_CMD_CLR_ECR_OFFSET);
1091
1092 /* Setup Tavor Software Reset register (sw_reset) */
1093 state->ts_cmd_regs.sw_reset = (uint32_t *)
1094 ((uintptr_t)state->ts_reg_cmd_baseaddr + TAVOR_CMD_SW_RESET_OFFSET);
1095
1096 /* Setup Tavor Clear Interrupt register (clr_int) */
1097 state->ts_cmd_regs.clr_int = (uint64_t *)
1098 ((uintptr_t)state->ts_reg_cmd_baseaddr + TAVOR_CMD_CLR_INT_OFFSET);
1099
1100 /* Initialize the Phase1 Tavor configuration profile */
1101 status = tavor_cfg_profile_init_phase1(state);
1102 if (status != DDI_SUCCESS) {
1103 tavor_hw_fini(state, cleanup);
1104 TAVOR_ATTACH_MSG(state->ts_attach_buf, "hw_init_cfginit_fail");
1105 return (DDI_FAILURE);
1106 }
1107 cleanup = TAVOR_DRV_CLEANUP_LEVEL4;
1108
1109 /* Do a software reset of the Tavor HW to ensure proper state */
1110 status = tavor_sw_reset(state);
1111 if (status != TAVOR_CMD_SUCCESS) {
1112 tavor_hw_fini(state, cleanup);
1113 TAVOR_ATTACH_MSG(state->ts_attach_buf, "hw_init_sw_reset_fail");
1114 return (DDI_FAILURE);
1115 }
1116
1117 /* Post the SYS_EN command to start the hardware */
1118 status = tavor_sys_en_cmd_post(state, TAVOR_CMD_SYS_EN_NORMAL,
1119 &errorcode, TAVOR_CMD_NOSLEEP_SPIN);
1120 if (status != TAVOR_CMD_SUCCESS) {
1121 if ((status == TAVOR_CMD_BAD_NVMEM) ||
1122 (status == TAVOR_CMD_DDR_MEM_ERR)) {
1123 cmn_err(CE_CONT, "Tavor: SYS_EN command failed: 0x%x "
1124 "0x%" PRIx64 " (invalid firmware image?)\n",
1125 status, errorcode);
1126 } else {
1127 cmn_err(CE_CONT, "Tavor: SYS_EN command failed: 0x%x "
1128 "0x%" PRIx64 "\n", status, errorcode);
1129 }
1130 tavor_hw_fini(state, cleanup);
1131 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1132 "hw_init_sys_en_cmd_fail");
1133 return (DDI_FAILURE);
1134 }
1135 cleanup = TAVOR_DRV_CLEANUP_LEVEL5;
1136
1137 /* First phase of init for Tavor configuration/resources */
1138 status = tavor_rsrc_init_phase1(state);
1139 if (status != DDI_SUCCESS) {
1140 tavor_hw_fini(state, cleanup);
1141 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1142 "hw_init_rsrcinit1_fail");
1143 return (DDI_FAILURE);
1144 }
1145 cleanup = TAVOR_DRV_CLEANUP_LEVEL6;
1146
1147 /* Query the DDR properties (e.g. total DDR size) */
1148 status = tavor_cmn_query_cmd_post(state, QUERY_DDR, 0,
1149 &state->ts_ddr, sizeof (tavor_hw_queryddr_t),
1150 TAVOR_CMD_NOSLEEP_SPIN);
1151 if (status != TAVOR_CMD_SUCCESS) {
1152 cmn_err(CE_CONT, "Tavor: QUERY_DDR command failed: %08x\n",
1153 status);
1154 tavor_hw_fini(state, cleanup);
1155 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1156 "hw_init_query_ddr_cmd_fail");
1157 return (DDI_FAILURE);
1158 }
1159
1160 /* Figure out how big the firmware image (in DDR) is */
1161 status = tavor_cmn_query_cmd_post(state, QUERY_FW, 0, &state->ts_fw,
1162 sizeof (tavor_hw_queryfw_t), TAVOR_CMD_NOSLEEP_SPIN);
1163 if (status != TAVOR_CMD_SUCCESS) {
1164 cmn_err(CE_CONT, "Tavor: QUERY_FW command failed: %08x\n",
1165 status);
1166 tavor_hw_fini(state, cleanup);
1167 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1168 "hw_init_query_fw_cmd_fail");
1169 return (DDI_FAILURE);
1170 }
1171
1172 if (tavor_fix_error_buf(state) != DDI_SUCCESS) {
1173 tavor_hw_fini(state, cleanup);
1174 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1175 "hw_init_fixerrorbuf_fail");
1176 return (DDI_FAILURE);
1177 }
1178
1179 /* Validate that the FW version is appropriate */
1180 status = tavor_fw_version_check(state);
1181 if (status != DDI_SUCCESS) {
1182 if (state->ts_operational_mode == TAVOR_HCA_MODE) {
1183 cmn_err(CE_CONT, "Unsupported Tavor FW version: "
1184 "expected: %04d.%04d.%04d, "
1185 "actual: %04d.%04d.%04d\n",
1186 TAVOR_FW_VER_MAJOR,
1187 TAVOR_FW_VER_MINOR,
1188 TAVOR_FW_VER_SUBMINOR,
1189 state->ts_fw.fw_rev_major,
1190 state->ts_fw.fw_rev_minor,
1191 state->ts_fw.fw_rev_subminor);
1192 } else if (state->ts_operational_mode == TAVOR_COMPAT_MODE) {
1193 cmn_err(CE_CONT, "Unsupported Tavor Compat FW version: "
1194 "expected: %04d.%04d.%04d, "
1195 "actual: %04d.%04d.%04d\n",
1196 TAVOR_COMPAT_FW_VER_MAJOR,
1197 TAVOR_COMPAT_FW_VER_MINOR,
1198 TAVOR_COMPAT_FW_VER_SUBMINOR,
1199 state->ts_fw.fw_rev_major,
1200 state->ts_fw.fw_rev_minor,
1201 state->ts_fw.fw_rev_subminor);
1202 } else {
1203 cmn_err(CE_CONT, "Unsupported FW version: "
1204 "%04d.%04d.%04d\n",
1205 state->ts_fw.fw_rev_major,
1206 state->ts_fw.fw_rev_minor,
1207 state->ts_fw.fw_rev_subminor);
1208 }
1209 tavor_hw_fini(state, cleanup);
1210 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1211 "hw_init_checkfwver_fail");
1212 return (DDI_FAILURE);
1213 }
1214
1215 drv_usecwait(10);
1216 retries = 1000; /* retry up to 1 second before giving up */
1217 retry:
1218 /* Call MOD_STAT_CFG to setup SRQ support (or disable) */
1219 status = tavor_mod_stat_cfg_cmd_post(state);
1220 if (status != DDI_SUCCESS) {
1221 if (retries > 0) {
1222 drv_usecwait(1000);
1223 retries--;
1224 goto retry;
1225 }
1226 cmn_err(CE_CONT, "Tavor: MOD_STAT_CFG command failed: %08x\n",
1227 status);
1228 tavor_hw_fini(state, cleanup);
1229 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1230 "hw_init_mod_stat_cfg_cmd_fail");
1231 return (DDI_FAILURE);
1232 }
1233
1234 /* Figure out Tavor device limits */
1235 status = tavor_cmn_query_cmd_post(state, QUERY_DEV_LIM, 0,
1236 &state->ts_devlim, sizeof (tavor_hw_querydevlim_t),
1237 TAVOR_CMD_NOSLEEP_SPIN);
1238 if (status != TAVOR_CMD_SUCCESS) {
1239 cmn_err(CE_CONT, "Tavor: QUERY_DEV_LIM command failed: %08x\n",
1240 status);
1241 tavor_hw_fini(state, cleanup);
1242 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1243 "hw_init_query_devlim_cmd_fail");
1244 return (DDI_FAILURE);
1245 }
1246
1247 /* Initialize the Phase2 Tavor configuration profile */
1248 status = tavor_cfg_profile_init_phase2(state);
1249 if (status != DDI_SUCCESS) {
1250 tavor_hw_fini(state, cleanup);
1251 TAVOR_ATTACH_MSG(state->ts_attach_buf, "hw_init_cfginit2_fail");
1252 return (DDI_FAILURE);
1253 }
1254
1255 /* Second phase of init for Tavor configuration/resources */
1256 status = tavor_rsrc_init_phase2(state);
1257 if (status != DDI_SUCCESS) {
1258 tavor_hw_fini(state, cleanup);
1259 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1260 "hw_init_rsrcinit2_fail");
1261 return (DDI_FAILURE);
1262 }
1263 cleanup = TAVOR_DRV_CLEANUP_LEVEL7;
1264
1265 /* Miscellaneous query information */
1266 status = tavor_cmn_query_cmd_post(state, QUERY_ADAPTER, 0,
1267 &state->ts_adapter, sizeof (tavor_hw_queryadapter_t),
1268 TAVOR_CMD_NOSLEEP_SPIN);
1269 if (status != TAVOR_CMD_SUCCESS) {
1270 cmn_err(CE_CONT, "Tavor: QUERY_ADAPTER command failed: %08x\n",
1271 status);
1272 tavor_hw_fini(state, cleanup);
1273 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1274 "hw_init_query_adapter_cmd_fail");
1275 return (DDI_FAILURE);
1276 }
1277
1278 /* Prepare configuration for Tavor INIT_HCA command */
1279 tavor_hca_config_setup(state, &state->ts_hcaparams);
1280
1281 /* Post command to init Tavor HCA */
1282 status = tavor_init_hca_cmd_post(state, &state->ts_hcaparams,
1283 TAVOR_CMD_NOSLEEP_SPIN);
1284 if (status != TAVOR_CMD_SUCCESS) {
1285 cmn_err(CE_CONT, "Tavor: INIT_HCA command failed: %08x\n",
1286 status);
1287 tavor_hw_fini(state, cleanup);
1288 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1289 "hw_init_init_hca_cmd_fail");
1290 return (DDI_FAILURE);
1291 }
1292 cleanup = TAVOR_DRV_CLEANUP_LEVEL8;
1293
1294 /* Allocate protection domain (PD) for Tavor internal use */
1295 status = tavor_pd_alloc(state, &state->ts_pdhdl_internal, TAVOR_SLEEP);
1296 if (status != DDI_SUCCESS) {
1297 tavor_hw_fini(state, cleanup);
1298 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1299 "hw_init_internal_pd_alloc_fail");
1300 return (DDI_FAILURE);
1301 }
1302 cleanup = TAVOR_DRV_CLEANUP_LEVEL9;
1303
1304 /* Setup Tavor internal UAR pages (0 and 1) */
1305 status = tavor_internal_uarpgs_init(state);
1306 if (status != DDI_SUCCESS) {
1307 tavor_hw_fini(state, cleanup);
1308 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1309 "hw_init_internal_uarpgs_alloc_fail");
1310 return (DDI_FAILURE);
1311 }
1312 cleanup = TAVOR_DRV_CLEANUP_LEVEL10;
1313
1314 /* Query and initialize the Tavor interrupt/MSI information */
1315 status = tavor_intr_or_msi_init(state);
1316 if (status != DDI_SUCCESS) {
1317 tavor_hw_fini(state, cleanup);
1318 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1319 "intr_or_msi_init_fail");
1320 return (DDI_FAILURE);
1321 }
1322 cleanup = TAVOR_DRV_CLEANUP_LEVEL11;
1323
1324 /* Setup all of the Tavor EQs */
1325 status = tavor_eq_init_all(state);
1326 if (status != DDI_SUCCESS) {
1327 tavor_hw_fini(state, cleanup);
1328 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1329 "hw_init_eqinitall_fail");
1330 return (DDI_FAILURE);
1331 }
1332 cleanup = TAVOR_DRV_CLEANUP_LEVEL12;
1333
1334 /* Set aside contexts for QP0 and QP1 */
1335 status = tavor_special_qp_contexts_reserve(state);
1336 if (status != DDI_SUCCESS) {
1337 tavor_hw_fini(state, cleanup);
1338 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1339 "hw_init_reserve_special_qp_fail");
1340 return (DDI_FAILURE);
1341 }
1342 cleanup = TAVOR_DRV_CLEANUP_LEVEL13;
1343
1344 /* Initialize for multicast group handling */
1345 status = tavor_mcg_init(state);
1346 if (status != DDI_SUCCESS) {
1347 tavor_hw_fini(state, cleanup);
1348 TAVOR_ATTACH_MSG(state->ts_attach_buf, "hw_init_mcg_init_fail");
1349 return (DDI_FAILURE);
1350 }
1351 cleanup = TAVOR_DRV_CLEANUP_LEVEL14;
1352
1353 /* Initialize the Tavor IB port(s) */
1354 status = tavor_hca_port_init(state);
1355 if (status != DDI_SUCCESS) {
1356 tavor_hw_fini(state, cleanup);
1357 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1358 "hw_init_hca_port_init_fail");
1359 return (DDI_FAILURE);
1360 }
1361 cleanup = TAVOR_DRV_CLEANUP_ALL;
1362
1363 /* Determine NodeGUID and SystemImageGUID */
1364 status = tavor_getnodeinfo_cmd_post(state, TAVOR_CMD_NOSLEEP_SPIN,
1365 &nodeinfo);
1366 if (status != TAVOR_CMD_SUCCESS) {
1367 cmn_err(CE_CONT, "Tavor: GetNodeInfo command failed: %08x\n",
1368 status);
1369 tavor_hw_fini(state, cleanup);
1370 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1371 "hw_init_getnodeinfo_cmd_fail");
1372 return (DDI_FAILURE);
1373 }
1374
1375 /*
1376 * If the NodeGUID value was set in OBP properties, then we use that
1377 * value. But we still print a message if the value we queried from
1378 * firmware does not match this value.
1379 *
1380 * Otherwise if OBP value is not set then we use the value from
1381 * firmware unconditionally.
1382 */
1383 if (state->ts_cfg_profile->cp_nodeguid) {
1384 state->ts_nodeguid = state->ts_cfg_profile->cp_nodeguid;
1385 } else {
1386 state->ts_nodeguid = nodeinfo.NodeGUID;
1387 }
1388
1389 if (state->ts_nodeguid != nodeinfo.NodeGUID) {
1390 cmn_err(CE_NOTE, "!NodeGUID value queried from firmware "
1391 "does not match value set by device property");
1392 }
1393
1394 /*
1395 * If the SystemImageGUID value was set in OBP properties, then we use
1396 * that value. But we still print a message if the value we queried
1397 * from firmware does not match this value.
1398 *
1399 * Otherwise if OBP value is not set then we use the value from
1400 * firmware unconditionally.
1401 */
1402 if (state->ts_cfg_profile->cp_sysimgguid) {
1403 state->ts_sysimgguid = state->ts_cfg_profile->cp_sysimgguid;
1404 } else {
1405 state->ts_sysimgguid = nodeinfo.SystemImageGUID;
1406 }
1407
1408 if (state->ts_sysimgguid != nodeinfo.SystemImageGUID) {
1409 cmn_err(CE_NOTE, "!SystemImageGUID value queried from firmware "
1410 "does not match value set by device property");
1411 }
1412
1413 /* Get NodeDescription */
1414 status = tavor_getnodedesc_cmd_post(state, TAVOR_CMD_NOSLEEP_SPIN,
1415 (sm_nodedesc_t *)&state->ts_nodedesc);
1416 if (status != TAVOR_CMD_SUCCESS) {
1417 cmn_err(CE_CONT, "Tavor: GetNodeDesc command failed: %08x\n",
1418 status);
1419 tavor_hw_fini(state, cleanup);
1420 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1421 "hw_init_getnodedesc_cmd_fail");
1422 return (DDI_FAILURE);
1423 }
1424
1425 return (DDI_SUCCESS);
1426 }
1427
1428
1429 /*
1430 * tavor_hw_fini()
1431 * Context: Only called from attach() and/or detach() path contexts
1432 */
1433 static void
tavor_hw_fini(tavor_state_t * state,tavor_drv_cleanup_level_t cleanup)1434 tavor_hw_fini(tavor_state_t *state, tavor_drv_cleanup_level_t cleanup)
1435 {
1436 uint_t num_ports;
1437 int status;
1438
1439 switch (cleanup) {
1440 /*
1441 * If we add more driver initialization steps that should be cleaned
1442 * up here, we need to ensure that TAVOR_DRV_CLEANUP_ALL is still the
1443 * first entry (i.e. corresponds to the last init step).
1444 */
1445 case TAVOR_DRV_CLEANUP_ALL:
1446 /* Shutdown the Tavor IB port(s) */
1447 num_ports = state->ts_cfg_profile->cp_num_ports;
1448 (void) tavor_hca_ports_shutdown(state, num_ports);
1449 /* FALLTHROUGH */
1450
1451 case TAVOR_DRV_CLEANUP_LEVEL14:
1452 /* Teardown resources used for multicast group handling */
1453 tavor_mcg_fini(state);
1454 /* FALLTHROUGH */
1455
1456 case TAVOR_DRV_CLEANUP_LEVEL13:
1457 /* Unreserve the special QP contexts */
1458 tavor_special_qp_contexts_unreserve(state);
1459 /* FALLTHROUGH */
1460
1461 case TAVOR_DRV_CLEANUP_LEVEL12:
1462 /*
1463 * Attempt to teardown all event queues (EQ). If we fail
1464 * here then print a warning message and return. Something
1465 * (either in HW or SW) has gone seriously wrong.
1466 */
1467 status = tavor_eq_fini_all(state);
1468 if (status != DDI_SUCCESS) {
1469 TAVOR_WARNING(state, "failed to teardown EQs");
1470 return;
1471 }
1472 /* FALLTHROUGH */
1473
1474 case TAVOR_DRV_CLEANUP_LEVEL11:
1475 status = tavor_intr_or_msi_fini(state);
1476 if (status != DDI_SUCCESS) {
1477 TAVOR_WARNING(state, "failed to free intr/MSI");
1478 return;
1479 }
1480 /* FALLTHROUGH */
1481
1482 case TAVOR_DRV_CLEANUP_LEVEL10:
1483 /* Free the resources for the Tavor internal UAR pages */
1484 tavor_internal_uarpgs_fini(state);
1485 /* FALLTHROUGH */
1486
1487 case TAVOR_DRV_CLEANUP_LEVEL9:
1488 /*
1489 * Free the PD that was used internally by Tavor software. If
1490 * we fail here then print a warning and return. Something
1491 * (probably software-related, but perhaps HW) has gone wrong.
1492 */
1493 status = tavor_pd_free(state, &state->ts_pdhdl_internal);
1494 if (status != DDI_SUCCESS) {
1495 TAVOR_WARNING(state, "failed to free internal PD");
1496 return;
1497 }
1498 /* FALLTHROUGH */
1499
1500 case TAVOR_DRV_CLEANUP_LEVEL8:
1501 /*
1502 * Post the CLOSE_HCA command to Tavor firmware. If we fail
1503 * here then print a warning and return. Something (either in
1504 * HW or SW) has gone seriously wrong.
1505 */
1506 status = tavor_close_hca_cmd_post(state,
1507 TAVOR_CMD_NOSLEEP_SPIN);
1508 if (status != TAVOR_CMD_SUCCESS) {
1509 TAVOR_WARNING(state, "failed to shutdown HCA");
1510 return;
1511 }
1512 /* FALLTHROUGH */
1513
1514 case TAVOR_DRV_CLEANUP_LEVEL7:
1515 /* Cleanup all the phase2 resources first */
1516 tavor_rsrc_fini(state, TAVOR_RSRC_CLEANUP_ALL);
1517 /* FALLTHROUGH */
1518
1519 case TAVOR_DRV_CLEANUP_LEVEL6:
1520 /* Then cleanup the phase1 resources */
1521 tavor_rsrc_fini(state, TAVOR_RSRC_CLEANUP_PHASE1_COMPLETE);
1522 /* FALLTHROUGH */
1523
1524 case TAVOR_DRV_CLEANUP_LEVEL5:
1525 /*
1526 * Post the SYS_DIS command to Tavor firmware to shut
1527 * everything down again. If we fail here then print a
1528 * warning and return. Something (probably in HW, but maybe
1529 * in SW) has gone seriously wrong.
1530 */
1531 status = tavor_sys_dis_cmd_post(state, TAVOR_CMD_NOSLEEP_SPIN);
1532 if (status != TAVOR_CMD_SUCCESS) {
1533 TAVOR_WARNING(state, "failed to shutdown hardware");
1534 return;
1535 }
1536 /* FALLTHROUGH */
1537
1538 case TAVOR_DRV_CLEANUP_LEVEL4:
1539 /* Teardown any resources allocated for the config profile */
1540 tavor_cfg_profile_fini(state);
1541 /* FALLTHROUGH */
1542
1543 case TAVOR_DRV_CLEANUP_LEVEL3:
1544 ddi_regs_map_free(&state->ts_reg_ddrhdl);
1545 /* FALLTHROUGH */
1546
1547 case TAVOR_DRV_CLEANUP_LEVEL2:
1548 ddi_regs_map_free(&state->ts_reg_uarhdl);
1549 /* FALLTHROUGH */
1550
1551 case TAVOR_DRV_CLEANUP_LEVEL1:
1552 case TAVOR_DRV_CLEANUP_LEVEL0:
1553 /*
1554 * LEVEL1 and LEVEL0 resources are freed in
1555 * tavor_drv_fini2().
1556 */
1557 break;
1558
1559 default:
1560 TAVOR_WARNING(state, "unexpected driver cleanup level");
1561 return;
1562 }
1563 }
1564
1565
1566 /*
1567 * tavor_soft_state_init()
1568 * Context: Only called from attach() path context
1569 */
1570 static int
tavor_soft_state_init(tavor_state_t * state)1571 tavor_soft_state_init(tavor_state_t *state)
1572 {
1573 ibt_hca_attr_t *hca_attr;
1574 uint64_t maxval, val;
1575 ibt_hca_flags_t caps = IBT_HCA_NO_FLAGS;
1576 int status;
1577
1578 /*
1579 * The ibc_hca_info_t struct is passed to the IBTF. This is the
1580 * routine where we initialize it. Many of the init values come from
1581 * either configuration variables or successful queries of the Tavor
1582 * hardware abilities
1583 */
1584 state->ts_ibtfinfo.hca_ci_vers = IBCI_V4;
1585 state->ts_ibtfinfo.hca_handle = (ibc_hca_hdl_t)state;
1586 state->ts_ibtfinfo.hca_ops = &tavor_ibc_ops;
1587
1588 hca_attr = kmem_zalloc(sizeof (ibt_hca_attr_t), KM_SLEEP);
1589 state->ts_ibtfinfo.hca_attr = hca_attr;
1590
1591 hca_attr->hca_dip = state->ts_dip;
1592 hca_attr->hca_fw_major_version = state->ts_fw.fw_rev_major;
1593 hca_attr->hca_fw_minor_version = state->ts_fw.fw_rev_minor;
1594 hca_attr->hca_fw_micro_version = state->ts_fw.fw_rev_subminor;
1595
1596 /*
1597 * Determine HCA capabilities:
1598 * No default support for IBT_HCA_RD, IBT_HCA_RAW_MULTICAST,
1599 * IBT_HCA_ATOMICS_GLOBAL, IBT_HCA_RESIZE_CHAN, IBT_HCA_INIT_TYPE,
1600 * or IBT_HCA_SHUTDOWN_PORT
1601 * But IBT_HCA_AH_PORT_CHECK, IBT_HCA_SQD_RTS_PORT, IBT_HCA_SI_GUID,
1602 * IBT_HCA_RNR_NAK, and IBT_HCA_CURRENT_QP_STATE are always
1603 * supported
1604 * All other features are conditionally supported, depending on the
1605 * status return by the Tavor HCA (in QUERY_DEV_LIM)
1606 */
1607 if (state->ts_devlim.ud_multi) {
1608 caps |= IBT_HCA_UD_MULTICAST;
1609 }
1610 if (state->ts_devlim.atomic) {
1611 caps |= IBT_HCA_ATOMICS_HCA;
1612 }
1613 if (state->ts_devlim.apm) {
1614 caps |= IBT_HCA_AUTO_PATH_MIG;
1615 }
1616 if (state->ts_devlim.pkey_v) {
1617 caps |= IBT_HCA_PKEY_CNTR;
1618 }
1619 if (state->ts_devlim.qkey_v) {
1620 caps |= IBT_HCA_QKEY_CNTR;
1621 }
1622 if (state->ts_cfg_profile->cp_srq_enable) {
1623 caps |= IBT_HCA_SRQ | IBT_HCA_RESIZE_SRQ;
1624 }
1625 caps |= (IBT_HCA_AH_PORT_CHECK | IBT_HCA_SQD_SQD_PORT |
1626 IBT_HCA_SI_GUID | IBT_HCA_RNR_NAK | IBT_HCA_CURRENT_QP_STATE |
1627 IBT_HCA_PORT_UP | IBT_HCA_SQD_STATE);
1628 hca_attr->hca_flags = caps;
1629 hca_attr->hca_flags2 = IBT_HCA2_DMA_MR;
1630
1631 /* Determine VendorID, DeviceID, and revision ID */
1632 hca_attr->hca_vendor_id = state->ts_adapter.vendor_id;
1633 hca_attr->hca_device_id = state->ts_adapter.device_id;
1634 hca_attr->hca_version_id = state->ts_adapter.rev_id;
1635
1636 /*
1637 * Determine number of available QPs and max QP size. Number of
1638 * available QPs is determined by subtracting the number of
1639 * "reserved QPs" (i.e. reserved for firmware use) from the
1640 * total number configured.
1641 */
1642 val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_num_qp);
1643 hca_attr->hca_max_qp = val - ((uint64_t)1 <<
1644 state->ts_devlim.log_rsvd_qp);
1645 maxval = ((uint64_t)1 << state->ts_devlim.log_max_qp_sz);
1646 val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_qp_sz);
1647 if (val > maxval) {
1648 kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1649 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1650 "soft_state_init_maxqpsz_toobig_fail");
1651 return (DDI_FAILURE);
1652 }
1653 hca_attr->hca_max_qp_sz = val;
1654
1655 /* Determine max scatter-gather size in WQEs */
1656 maxval = state->ts_devlim.max_sg;
1657 val = state->ts_cfg_profile->cp_wqe_max_sgl;
1658 if (val > maxval) {
1659 kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1660 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1661 "soft_state_init_toomanysgl_fail");
1662 return (DDI_FAILURE);
1663 }
1664 /* If the rounded value for max SGL is too large, cap it */
1665 if (state->ts_cfg_profile->cp_wqe_real_max_sgl > maxval) {
1666 state->ts_cfg_profile->cp_wqe_real_max_sgl = maxval;
1667 val = maxval;
1668 } else {
1669 val = state->ts_cfg_profile->cp_wqe_real_max_sgl;
1670 }
1671
1672 hca_attr->hca_max_sgl = val;
1673 hca_attr->hca_max_rd_sgl = 0; /* zero because RD is unsupported */
1674
1675 /*
1676 * Determine number of available CQs and max CQ size. Number of
1677 * available CQs is determined by subtracting the number of
1678 * "reserved CQs" (i.e. reserved for firmware use) from the
1679 * total number configured.
1680 */
1681 val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_num_cq);
1682 hca_attr->hca_max_cq = val - ((uint64_t)1 <<
1683 state->ts_devlim.log_rsvd_cq);
1684 maxval = ((uint64_t)1 << state->ts_devlim.log_max_cq_sz);
1685 val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_cq_sz) - 1;
1686 if (val > maxval) {
1687 kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1688 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1689 "soft_state_init_maxcqsz_toobig_fail");
1690 return (DDI_FAILURE);
1691 }
1692 hca_attr->hca_max_cq_sz = val;
1693
1694 /*
1695 * Determine number of available SRQs and max SRQ size. Number of
1696 * available SRQs is determined by subtracting the number of
1697 * "reserved SRQs" (i.e. reserved for firmware use) from the
1698 * total number configured.
1699 */
1700 val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_num_srq);
1701 hca_attr->hca_max_srqs = val - ((uint64_t)1 <<
1702 state->ts_devlim.log_rsvd_srq);
1703 maxval = ((uint64_t)1 << state->ts_devlim.log_max_srq_sz);
1704 val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_srq_sz);
1705
1706 if (val > maxval) {
1707 kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1708 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1709 "soft_state_init_maxsrqsz_toobig_fail");
1710 return (DDI_FAILURE);
1711 }
1712 hca_attr->hca_max_srqs_sz = val;
1713
1714 val = state->ts_cfg_profile->cp_srq_max_sgl;
1715 maxval = state->ts_devlim.max_sg;
1716 if (val > maxval) {
1717 kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1718 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1719 "soft_state_init_toomanysrqsgl_fail");
1720 return (DDI_FAILURE);
1721 }
1722 hca_attr->hca_max_srq_sgl = val;
1723
1724 /*
1725 * Determine supported HCA page sizes
1726 * XXX
1727 * For now we simply return the system pagesize as the only supported
1728 * pagesize
1729 */
1730 hca_attr->hca_page_sz = ((PAGESIZE == (1 << 13)) ? IBT_PAGE_8K :
1731 IBT_PAGE_4K);
1732
1733 /*
1734 * Determine number of available MemReg, MemWin, and their max size.
1735 * Number of available MRs and MWs is determined by subtracting
1736 * the number of "reserved MPTs" (i.e. reserved for firmware use)
1737 * from the total number configured for each.
1738 */
1739 val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_num_mpt);
1740 hca_attr->hca_max_memr = val - ((uint64_t)1 <<
1741 state->ts_devlim.log_rsvd_mpt);
1742 hca_attr->hca_max_mem_win = val - ((uint64_t)1 <<
1743 state->ts_devlim.log_rsvd_mpt);
1744 maxval = state->ts_devlim.log_max_mrw_sz;
1745 val = state->ts_cfg_profile->cp_log_max_mrw_sz;
1746 if (val > maxval) {
1747 kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1748 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1749 "soft_state_init_maxmrwsz_toobig_fail");
1750 return (DDI_FAILURE);
1751 }
1752 hca_attr->hca_max_memr_len = ((uint64_t)1 << val);
1753
1754 /* Determine RDMA/Atomic properties */
1755 val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_num_rdb);
1756 hca_attr->hca_max_rsc = val;
1757 val = state->ts_cfg_profile->cp_hca_max_rdma_in_qp;
1758 hca_attr->hca_max_rdma_in_qp = val;
1759 val = state->ts_cfg_profile->cp_hca_max_rdma_out_qp;
1760 hca_attr->hca_max_rdma_out_qp = val;
1761 hca_attr->hca_max_rdma_in_ee = 0;
1762 hca_attr->hca_max_rdma_out_ee = 0;
1763
1764 /*
1765 * Determine maximum number of raw IPv6 and Ether QPs. Set to 0
1766 * because neither type of raw QP is supported
1767 */
1768 hca_attr->hca_max_ipv6_qp = 0;
1769 hca_attr->hca_max_ether_qp = 0;
1770
1771 /* Determine max number of MCGs and max QP-per-MCG */
1772 val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_num_qp);
1773 hca_attr->hca_max_mcg_qps = val;
1774 val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_num_mcg);
1775 hca_attr->hca_max_mcg = val;
1776 val = state->ts_cfg_profile->cp_num_qp_per_mcg;
1777 hca_attr->hca_max_qp_per_mcg = val;
1778
1779 /* Determine max number partitions (i.e. PKeys) */
1780 maxval = ((uint64_t)1 << state->ts_devlim.log_max_pkey);
1781 val = ((uint64_t)state->ts_cfg_profile->cp_num_ports <<
1782 state->ts_cfg_profile->cp_log_max_pkeytbl);
1783
1784 if (val > maxval) {
1785 kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1786 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1787 "soft_state_init_toomanypkey_fail");
1788 return (DDI_FAILURE);
1789 }
1790 hca_attr->hca_max_partitions = val;
1791
1792 /* Determine number of ports */
1793 maxval = state->ts_devlim.num_ports;
1794 val = state->ts_cfg_profile->cp_num_ports;
1795 if ((val > maxval) || (val == 0)) {
1796 kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1797 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1798 "soft_state_init_toomanyports_fail");
1799 return (DDI_FAILURE);
1800 }
1801 hca_attr->hca_nports = val;
1802
1803 /* Copy NodeGUID and SystemImageGUID from softstate */
1804 hca_attr->hca_node_guid = state->ts_nodeguid;
1805 hca_attr->hca_si_guid = state->ts_sysimgguid;
1806
1807 /*
1808 * Determine local ACK delay. Use the value suggested by the Tavor
1809 * hardware (from the QUERY_DEV_LIM command)
1810 */
1811 hca_attr->hca_local_ack_delay = state->ts_devlim.ca_ack_delay;
1812
1813 /* Determine max SGID table and PKey table sizes */
1814 val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_gidtbl);
1815 hca_attr->hca_max_port_sgid_tbl_sz = val;
1816 val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_pkeytbl);
1817 hca_attr->hca_max_port_pkey_tbl_sz = val;
1818
1819 /* Determine max number of PDs */
1820 maxval = ((uint64_t)1 << state->ts_devlim.log_max_pd);
1821 val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_num_pd);
1822 if (val > maxval) {
1823 kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1824 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1825 "soft_state_init_toomanypd_fail");
1826 return (DDI_FAILURE);
1827 }
1828 hca_attr->hca_max_pd = val;
1829
1830 /* Determine max number of Address Handles */
1831 maxval = ((uint64_t)1 << state->ts_devlim.log_max_av);
1832 val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_num_ah);
1833 if (val > maxval) {
1834 kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1835 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1836 "soft_state_init_toomanyah_fail");
1837 return (DDI_FAILURE);
1838 }
1839 hca_attr->hca_max_ah = val;
1840
1841 /* No RDDs or EECs (since Reliable Datagram is not supported) */
1842 hca_attr->hca_max_rdd = 0;
1843 hca_attr->hca_max_eec = 0;
1844
1845 /* Initialize lock for reserved UAR page access */
1846 mutex_init(&state->ts_uar_lock, NULL, MUTEX_DRIVER,
1847 DDI_INTR_PRI(state->ts_intrmsi_pri));
1848
1849 /* Initialize the flash fields */
1850 state->ts_fw_flashstarted = 0;
1851 mutex_init(&state->ts_fw_flashlock, NULL, MUTEX_DRIVER,
1852 DDI_INTR_PRI(state->ts_intrmsi_pri));
1853
1854 /* Initialize the lock for the info ioctl */
1855 mutex_init(&state->ts_info_lock, NULL, MUTEX_DRIVER,
1856 DDI_INTR_PRI(state->ts_intrmsi_pri));
1857
1858 /* Initialize the AVL tree for QP number support */
1859 tavor_qpn_avl_init(state);
1860
1861 /* Initialize the kstat info structure */
1862 status = tavor_kstat_init(state);
1863 if (status != DDI_SUCCESS) {
1864 tavor_qpn_avl_fini(state);
1865 mutex_destroy(&state->ts_info_lock);
1866 mutex_destroy(&state->ts_fw_flashlock);
1867 mutex_destroy(&state->ts_uar_lock);
1868 kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1869 TAVOR_ATTACH_MSG(state->ts_attach_buf,
1870 "soft_state_init_kstatinit_fail");
1871 return (DDI_FAILURE);
1872 }
1873
1874 return (DDI_SUCCESS);
1875 }
1876
1877
1878 /*
1879 * tavor_soft_state_fini()
1880 * Context: Called only from detach() path context
1881 */
1882 static void
tavor_soft_state_fini(tavor_state_t * state)1883 tavor_soft_state_fini(tavor_state_t *state)
1884 {
1885 /* Teardown the kstat info */
1886 tavor_kstat_fini(state);
1887
1888 /* Teardown the AVL tree for QP number support */
1889 tavor_qpn_avl_fini(state);
1890
1891 /* Free up info ioctl mutex */
1892 mutex_destroy(&state->ts_info_lock);
1893
1894 /* Free up flash mutex */
1895 mutex_destroy(&state->ts_fw_flashlock);
1896
1897 /* Free up the UAR page access mutex */
1898 mutex_destroy(&state->ts_uar_lock);
1899
1900 /* Free up the hca_attr struct */
1901 kmem_free(state->ts_ibtfinfo.hca_attr, sizeof (ibt_hca_attr_t));
1902 }
1903
1904
1905 /*
1906 * tavor_hca_config_setup()
1907 * Context: Only called from attach() path context
1908 */
1909 static void
tavor_hca_config_setup(tavor_state_t * state,tavor_hw_initqueryhca_t * inithca)1910 tavor_hca_config_setup(tavor_state_t *state,
1911 tavor_hw_initqueryhca_t *inithca)
1912 {
1913 tavor_rsrc_pool_info_t *rsrc_pool;
1914 uint64_t ddr_baseaddr, ddr_base_map_addr;
1915 uint64_t offset, addr;
1916 uint_t mcg_size;
1917
1918 /* Set "host endianness". Default is big endian */
1919 #ifdef _LITTLE_ENDIAN
1920 inithca->big_endian = 0;
1921 #else
1922 inithca->big_endian = 1;
1923 #endif
1924 /* No Address Vector Protection, but Port Checking on by default */
1925 inithca->udav_chk = TAVOR_UDAV_PROTECT_DISABLED;
1926 inithca->udav_port_chk = TAVOR_UDAV_PORTCHK_ENABLED;
1927
1928 ddr_baseaddr = (uint64_t)(uintptr_t)state->ts_reg_ddr_baseaddr;
1929 ddr_base_map_addr = (uint64_t)state->ts_ddr.ddr_baseaddr;
1930
1931 /* Setup QPC table */
1932 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_QPC];
1933 offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
1934 addr = ddr_base_map_addr + offset;
1935 inithca->context.qpc_baseaddr_h = (addr >> 32);
1936 inithca->context.qpc_baseaddr_l = (addr & 0xFFFFFFFF) >> 7;
1937 inithca->context.log_num_qp = state->ts_cfg_profile->cp_log_num_qp;
1938
1939 /* Setup EEC table (initialize to zero - RD unsupported) */
1940 inithca->context.eec_baseaddr_h = 0;
1941 inithca->context.eec_baseaddr_l = 0;
1942 inithca->context.log_num_ee = 0;
1943
1944 /* Setup CQC table */
1945 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_CQC];
1946 offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
1947 addr = ddr_base_map_addr + offset;
1948 inithca->context.cqc_baseaddr_h = (addr >> 32);
1949 inithca->context.cqc_baseaddr_l = (addr & 0xFFFFFFFF) >> 6;
1950 inithca->context.log_num_cq = state->ts_cfg_profile->cp_log_num_cq;
1951
1952 /* Setup SRQC table */
1953 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_SRQC];
1954 offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
1955 addr = ddr_base_map_addr + offset;
1956 inithca->context.srqc_baseaddr_h = (addr >> 32);
1957 inithca->context.srqc_baseaddr_l = (addr & 0xFFFFFFFF) >> 6;
1958 inithca->context.log_num_srq =
1959 state->ts_cfg_profile->cp_log_num_srq;
1960
1961 /* Setup EQPC table */
1962 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_EQPC];
1963 offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
1964 addr = ddr_base_map_addr + offset;
1965 inithca->context.eqpc_baseaddr = addr;
1966
1967 /* Setup EEEC table (initialize to zero - RD unsupported) */
1968 inithca->context.eeec_baseaddr = 0;
1969
1970 /* Setup EQC table */
1971 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_EQC];
1972 offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
1973 addr = ddr_base_map_addr + offset;
1974 inithca->context.eqc_baseaddr_h = (addr >> 32);
1975 inithca->context.eqc_baseaddr_l = (addr & 0xFFFFFFFF) >> 6;
1976 inithca->context.log_num_eq = TAVOR_NUM_EQ_SHIFT;
1977
1978 /* Setup RDB table */
1979 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_RDB];
1980 offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
1981 addr = ddr_base_map_addr + offset;
1982 inithca->context.rdb_baseaddr_h = (addr >> 32);
1983 inithca->context.rdb_baseaddr_l = 0;
1984
1985 /* Setup Multicast */
1986 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MCG];
1987 offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
1988 addr = ddr_base_map_addr + offset;
1989 inithca->multi.mc_baseaddr = addr;
1990 mcg_size = TAVOR_MCGMEM_SZ(state);
1991 inithca->multi.log_mc_tbl_ent = highbit(mcg_size) - 1;
1992 inithca->multi.mc_tbl_hash_sz =
1993 (1 << state->ts_cfg_profile->cp_log_num_mcg_hash);
1994 inithca->multi.mc_hash_fn = TAVOR_MCG_DEFAULT_HASH_FN;
1995 inithca->multi.log_mc_tbl_sz = state->ts_cfg_profile->cp_log_num_mcg;
1996
1997
1998 /* Setup TPT */
1999 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MPT];
2000 offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
2001 addr = ddr_base_map_addr + offset;
2002 inithca->tpt.mpt_baseaddr = addr;
2003 inithca->tpt.mttseg_sz = TAVOR_MTTSEG_SIZE_SHIFT;
2004 inithca->tpt.log_mpt_sz = state->ts_cfg_profile->cp_log_num_mpt;
2005 inithca->tpt.mtt_version = TAVOR_MTT_PG_WALK_VER;
2006
2007 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT];
2008 offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
2009 addr = ddr_base_map_addr + offset;
2010 inithca->tpt.mtt_baseaddr = addr;
2011
2012 /* Setup UAR */
2013 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_UAR_SCR];
2014 offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
2015 addr = ddr_base_map_addr + offset;
2016 inithca->uar.uarscr_baseaddr = addr;
2017
2018 inithca->uar.uar_pg_sz = PAGESHIFT - 0xC;
2019 }
2020
2021
2022 /*
2023 * tavor_hca_port_init()
2024 * Context: Only called from attach() path context
2025 */
2026 static int
tavor_hca_port_init(tavor_state_t * state)2027 tavor_hca_port_init(tavor_state_t *state)
2028 {
2029 tavor_hw_initib_t *portinits, *initib;
2030 tavor_cfg_profile_t *cfgprof;
2031 uint_t num_ports;
2032 int i, status;
2033 uint64_t maxval, val;
2034 uint64_t sysimgguid, nodeguid, portguid;
2035
2036 cfgprof = state->ts_cfg_profile;
2037
2038 /* Get number of HCA ports */
2039 num_ports = cfgprof->cp_num_ports;
2040
2041 /* Allocate space for Tavor port init struct(s) */
2042 portinits = (tavor_hw_initib_t *)kmem_zalloc(num_ports *
2043 sizeof (tavor_hw_initib_t), KM_SLEEP);
2044
2045 /* Post command to initialize Tavor HCA port */
2046 for (i = 0; i < num_ports; i++) {
2047 initib = &portinits[i];
2048
2049 /*
2050 * Determine whether we need to override the firmware's
2051 * default SystemImageGUID setting.
2052 */
2053 sysimgguid = cfgprof->cp_sysimgguid;
2054 if (sysimgguid != 0) {
2055 initib->set_sysimg_guid = 1;
2056 initib->sysimg_guid = sysimgguid;
2057 }
2058
2059 /*
2060 * Determine whether we need to override the firmware's
2061 * default NodeGUID setting.
2062 */
2063 nodeguid = cfgprof->cp_nodeguid;
2064 if (nodeguid != 0) {
2065 initib->set_node_guid = 1;
2066 initib->node_guid = nodeguid;
2067 }
2068
2069 /*
2070 * Determine whether we need to override the firmware's
2071 * default PortGUID setting.
2072 */
2073 portguid = cfgprof->cp_portguid[i];
2074 if (portguid != 0) {
2075 initib->set_port_guid0 = 1;
2076 initib->guid0 = portguid;
2077 }
2078
2079 /* Validate max MTU size */
2080 maxval = state->ts_devlim.max_mtu;
2081 val = cfgprof->cp_max_mtu;
2082 if (val > maxval) {
2083 goto init_ports_fail;
2084 }
2085 initib->mtu_cap = val;
2086
2087 /* Validate the max port width */
2088 maxval = state->ts_devlim.max_port_width;
2089 val = cfgprof->cp_max_port_width;
2090 if (val > maxval) {
2091 goto init_ports_fail;
2092 }
2093 initib->port_width_cap = val;
2094
2095 /* Validate max VL cap size */
2096 maxval = state->ts_devlim.max_vl;
2097 val = cfgprof->cp_max_vlcap;
2098 if (val > maxval) {
2099 goto init_ports_fail;
2100 }
2101 initib->vl_cap = val;
2102
2103 /* Validate max GID table size */
2104 maxval = ((uint64_t)1 << state->ts_devlim.log_max_gid);
2105 val = ((uint64_t)1 << cfgprof->cp_log_max_gidtbl);
2106 if (val > maxval) {
2107 goto init_ports_fail;
2108 }
2109 initib->max_gid = val;
2110
2111 /* Validate max PKey table size */
2112 maxval = ((uint64_t)1 << state->ts_devlim.log_max_pkey);
2113 val = ((uint64_t)1 << cfgprof->cp_log_max_pkeytbl);
2114 if (val > maxval) {
2115 goto init_ports_fail;
2116 }
2117 initib->max_pkey = val;
2118
2119 /*
2120 * Post the INIT_IB command to Tavor firmware. When this
2121 * command completes, the corresponding Tavor port will be
2122 * physically "Up" and initialized.
2123 */
2124 status = tavor_init_ib_cmd_post(state, initib, i + 1,
2125 TAVOR_CMD_NOSLEEP_SPIN);
2126 if (status != TAVOR_CMD_SUCCESS) {
2127 cmn_err(CE_CONT, "Tavor: INIT_IB (port %02d) command "
2128 "failed: %08x\n", i + 1, status);
2129 goto init_ports_fail;
2130 }
2131 }
2132
2133 /* Free up the memory for Tavor port init struct(s), return success */
2134 kmem_free(portinits, num_ports * sizeof (tavor_hw_initib_t));
2135 return (DDI_SUCCESS);
2136
2137 init_ports_fail:
2138 /*
2139 * Free up the memory for Tavor port init struct(s), shutdown any
2140 * successfully initialized ports, and return failure
2141 */
2142 kmem_free(portinits, num_ports * sizeof (tavor_hw_initib_t));
2143 (void) tavor_hca_ports_shutdown(state, i);
2144
2145 return (DDI_FAILURE);
2146 }
2147
2148
2149 /*
2150 * tavor_hca_ports_shutdown()
2151 * Context: Only called from attach() and/or detach() path contexts
2152 */
2153 static int
tavor_hca_ports_shutdown(tavor_state_t * state,uint_t num_init)2154 tavor_hca_ports_shutdown(tavor_state_t *state, uint_t num_init)
2155 {
2156 int i, status;
2157
2158 /*
2159 * Post commands to shutdown all init'd Tavor HCA ports. Note: if
2160 * any of these commands fail for any reason, it would be entirely
2161 * unexpected and probably indicative a serious problem (HW or SW).
2162 * Although we do return void from this function, this type of failure
2163 * should not go unreported.
2164 */
2165 for (i = 0; i < num_init; i++) {
2166 status = tavor_close_ib_cmd_post(state, i + 1,
2167 TAVOR_CMD_NOSLEEP_SPIN);
2168 if (status != TAVOR_CMD_SUCCESS) {
2169 TAVOR_WARNING(state, "failed to shutdown HCA port");
2170 return (status);
2171 }
2172 }
2173
2174 return (TAVOR_CMD_SUCCESS);
2175 }
2176
2177
2178 /*
2179 * tavor_internal_uarpgs_init
2180 * Context: Only called from attach() path context
2181 */
2182 static int
tavor_internal_uarpgs_init(tavor_state_t * state)2183 tavor_internal_uarpgs_init(tavor_state_t *state)
2184 {
2185 int status;
2186
2187 /*
2188 * Save away reserved Tavor UAR page #0. This UAR page is not to
2189 * be used by software.
2190 */
2191 status = tavor_rsrc_alloc(state, TAVOR_UARPG, 1, TAVOR_SLEEP,
2192 &state->ts_uarpg0_rsrc_rsrvd);
2193 if (status != DDI_SUCCESS) {
2194 return (DDI_FAILURE);
2195 }
2196
2197 /*
2198 * Save away Tavor UAR page #1 (for internal use). This UAR page is
2199 * the privileged UAR page through which all kernel generated
2200 * doorbells will be rung.
2201 */
2202 status = tavor_rsrc_alloc(state, TAVOR_UARPG, 1, TAVOR_SLEEP,
2203 &state->ts_uarpg1_rsrc);
2204 if (status != DDI_SUCCESS) {
2205 tavor_rsrc_free(state, &state->ts_uarpg0_rsrc_rsrvd);
2206 return (DDI_FAILURE);
2207 }
2208
2209 /* Setup pointer to UAR page #1 doorbells */
2210 state->ts_uar = (tavor_hw_uar_t *)state->ts_uarpg1_rsrc->tr_addr;
2211
2212 return (DDI_SUCCESS);
2213 }
2214
2215
2216 /*
2217 * tavor_internal_uarpgs_fini
2218 * Context: Only called from attach() and/or detach() path contexts
2219 */
2220 static void
tavor_internal_uarpgs_fini(tavor_state_t * state)2221 tavor_internal_uarpgs_fini(tavor_state_t *state)
2222 {
2223 /* Free up Tavor UAR page #1 (kernel driver doorbells) */
2224 tavor_rsrc_free(state, &state->ts_uarpg1_rsrc);
2225
2226 /* Free up Tavor UAR page #0 (reserved) */
2227 tavor_rsrc_free(state, &state->ts_uarpg0_rsrc_rsrvd);
2228 }
2229
2230
2231 /*
2232 * tavor_special_qp_contexts_reserve()
2233 * Context: Only called from attach() path context
2234 */
2235 static int
tavor_special_qp_contexts_reserve(tavor_state_t * state)2236 tavor_special_qp_contexts_reserve(tavor_state_t *state)
2237 {
2238 tavor_rsrc_t *qp0_rsrc, *qp1_rsrc;
2239 int status;
2240
2241 /* Initialize the lock used for special QP rsrc management */
2242 mutex_init(&state->ts_spec_qplock, NULL, MUTEX_DRIVER,
2243 DDI_INTR_PRI(state->ts_intrmsi_pri));
2244
2245 /*
2246 * Reserve contexts for QP0. These QP contexts will be setup to
2247 * act as aliases for the real QP0. Note: We are required to grab
2248 * two QPs (one per port) even if we are operating in single-port
2249 * mode.
2250 */
2251 status = tavor_rsrc_alloc(state, TAVOR_QPC, 2, TAVOR_SLEEP, &qp0_rsrc);
2252 if (status != DDI_SUCCESS) {
2253 mutex_destroy(&state->ts_spec_qplock);
2254 return (DDI_FAILURE);
2255 }
2256 state->ts_spec_qp0 = qp0_rsrc;
2257
2258 /*
2259 * Reserve contexts for QP1. These QP contexts will be setup to
2260 * act as aliases for the real QP1. Note: We are required to grab
2261 * two QPs (one per port) even if we are operating in single-port
2262 * mode.
2263 */
2264 status = tavor_rsrc_alloc(state, TAVOR_QPC, 2, TAVOR_SLEEP, &qp1_rsrc);
2265 if (status != DDI_SUCCESS) {
2266 tavor_rsrc_free(state, &qp0_rsrc);
2267 mutex_destroy(&state->ts_spec_qplock);
2268 return (DDI_FAILURE);
2269 }
2270 state->ts_spec_qp1 = qp1_rsrc;
2271
2272 return (DDI_SUCCESS);
2273 }
2274
2275
2276 /*
2277 * tavor_special_qp_contexts_unreserve()
2278 * Context: Only called from attach() and/or detach() path contexts
2279 */
2280 static void
tavor_special_qp_contexts_unreserve(tavor_state_t * state)2281 tavor_special_qp_contexts_unreserve(tavor_state_t *state)
2282 {
2283 /* Unreserve contexts for QP1 */
2284 tavor_rsrc_free(state, &state->ts_spec_qp1);
2285
2286 /* Unreserve contexts for QP0 */
2287 tavor_rsrc_free(state, &state->ts_spec_qp0);
2288
2289 /* Destroy the lock used for special QP rsrc management */
2290 mutex_destroy(&state->ts_spec_qplock);
2291 }
2292
2293
2294 /*
2295 * tavor_sw_reset()
2296 * Context: Currently called only from attach() path context
2297 */
2298 static int
tavor_sw_reset(tavor_state_t * state)2299 tavor_sw_reset(tavor_state_t *state)
2300 {
2301 dev_info_t *dip, *pdip;
2302 ddi_acc_handle_t hdl = state->ts_pci_cfghdl, phdl;
2303 uint32_t reset_delay;
2304 int status, i;
2305
2306 /*
2307 * If the configured software reset delay is set to zero, then we
2308 * will not attempt a software reset of the Tavor device.
2309 */
2310 reset_delay = state->ts_cfg_profile->cp_sw_reset_delay;
2311 if (reset_delay == 0) {
2312 return (DDI_SUCCESS);
2313 }
2314
2315 /*
2316 * Get dip for HCA device _and_ parent device as well. Parent access
2317 * is necessary here because software reset of the Tavor hardware
2318 * will reinitialize both the config registers of the PCI bridge
2319 * (parent, if it exists) and the IB HCA (self)
2320 */
2321 dip = state->ts_dip;
2322 pdip = ddi_get_parent(dip);
2323
2324 /* Query the PCI capabilities of the HCA device */
2325 tavor_pci_capability_list(state, hdl);
2326
2327 /*
2328 * Read all PCI config info (reg0...reg63). Note: According to the
2329 * Tavor software reset application note, we should not read or
2330 * restore the values in reg22 and reg23.
2331 */
2332 for (i = 0; i < TAVOR_SW_RESET_NUMREGS; i++) {
2333 if ((i != TAVOR_SW_RESET_REG22_RSVD) &&
2334 (i != TAVOR_SW_RESET_REG23_RSVD)) {
2335 state->ts_cfg_data[i] = pci_config_get32(hdl, i << 2);
2336 }
2337 }
2338
2339 if (TAVOR_PARENT_IS_BRIDGE(pdip)) {
2340 /*
2341 * Setup for PCI config read/write of bridge device
2342 */
2343 status = pci_config_setup(pdip, &phdl);
2344 if (status != DDI_SUCCESS) {
2345 return (DDI_FAILURE);
2346 }
2347
2348 /*
2349 * Read all PCI config info (reg0...reg63). Note: According to
2350 * the Tavor software reset application note, we should not
2351 * read or restore the values in reg22 and reg23.
2352 */
2353 for (i = 0; i < TAVOR_SW_RESET_NUMREGS; i++) {
2354 if ((i != TAVOR_SW_RESET_REG22_RSVD) &&
2355 (i != TAVOR_SW_RESET_REG23_RSVD)) {
2356 state->ts_cfg_pdata[i] =
2357 pci_config_get32(phdl, i << 2);
2358 }
2359 }
2360 }
2361
2362 /*
2363 * Perform the software reset (by writing 1 at offset 0xF0010)
2364 */
2365 ddi_put32(state->ts_reg_cmdhdl, state->ts_cmd_regs.sw_reset,
2366 TAVOR_SW_RESET_START);
2367
2368 drv_usecwait(reset_delay);
2369
2370 if (TAVOR_PARENT_IS_BRIDGE(pdip)) {
2371 /*
2372 * Bridge exists, so wait for the bridge to become ready.
2373 *
2374 * The above delay is necessary to avoid system panic from
2375 * Master Abort. If the device is accessed before this delay,
2376 * device will not respond to config cycles and they will be
2377 * terminate with a Master Abort which will panic the system.
2378 * Below is the loop we use to poll status from the device to
2379 * determine if it is OK to proceed.
2380 */
2381 i = 0;
2382 while (pci_config_get32(phdl, 0) == TAVOR_SW_RESET_NOTDONE) {
2383 drv_usecwait(TAVOR_SW_RESET_POLL_DELAY);
2384 }
2385
2386 /*
2387 * Write all the PCI config registers back into each device
2388 * (except for reg22 and reg23 - see above)
2389 */
2390 for (i = 0; i < TAVOR_SW_RESET_NUMREGS; i++) {
2391 if ((i != TAVOR_SW_RESET_REG22_RSVD) &&
2392 (i != TAVOR_SW_RESET_REG23_RSVD)) {
2393 pci_config_put32(phdl, i << 2,
2394 state->ts_cfg_pdata[i]);
2395 }
2396 }
2397
2398 /*
2399 * Tear down the config setup (for bridge device)
2400 */
2401 pci_config_teardown(&phdl);
2402
2403 /* No Bridge Device */
2404 } else {
2405 /*
2406 * Bridge does not exist, so instead wait for the device itself
2407 * to become ready.
2408 *
2409 * The above delay is necessary to avoid system panic from
2410 * Master Abort. If the device is accessed before this delay,
2411 * device will not respond to config cycles and they will be
2412 * terminate with a Master Abort which will panic the system.
2413 * Below is the loop we use to poll status from the device to
2414 * determine if it is OK to proceed.
2415 */
2416 i = 0;
2417 while (pci_config_get32(hdl, 0) == TAVOR_SW_RESET_NOTDONE) {
2418 drv_usecwait(TAVOR_SW_RESET_POLL_DELAY);
2419 }
2420 }
2421
2422 for (i = 0; i < TAVOR_SW_RESET_NUMREGS; i++) {
2423 if ((i != TAVOR_SW_RESET_REG22_RSVD) &&
2424 (i != TAVOR_SW_RESET_REG23_RSVD)) {
2425 pci_config_put32(hdl, i << 2, state->ts_cfg_data[i]);
2426 }
2427 }
2428
2429 return (DDI_SUCCESS);
2430 }
2431
2432
2433 /*
2434 * tavor_mcg_init()
2435 * Context: Only called from attach() path context
2436 */
2437 static int
tavor_mcg_init(tavor_state_t * state)2438 tavor_mcg_init(tavor_state_t *state)
2439 {
2440 uint_t mcg_tmp_sz;
2441
2442 /*
2443 * Allocate space for the MCG temporary copy buffer. This is
2444 * used by the Attach/Detach Multicast Group code
2445 */
2446 mcg_tmp_sz = TAVOR_MCGMEM_SZ(state);
2447 state->ts_mcgtmp = kmem_zalloc(mcg_tmp_sz, KM_SLEEP);
2448
2449 /*
2450 * Initialize the multicast group mutex. This ensures atomic
2451 * access to add, modify, and remove entries in the multicast
2452 * group hash lists.
2453 */
2454 mutex_init(&state->ts_mcglock, NULL, MUTEX_DRIVER,
2455 DDI_INTR_PRI(state->ts_intrmsi_pri));
2456
2457 return (DDI_SUCCESS);
2458 }
2459
2460
2461 /*
2462 * tavor_mcg_fini()
2463 * Context: Only called from attach() and/or detach() path contexts
2464 */
2465 static void
tavor_mcg_fini(tavor_state_t * state)2466 tavor_mcg_fini(tavor_state_t *state)
2467 {
2468 uint_t mcg_tmp_sz;
2469
2470 /* Free up the space used for the MCG temporary copy buffer */
2471 mcg_tmp_sz = TAVOR_MCGMEM_SZ(state);
2472 kmem_free(state->ts_mcgtmp, mcg_tmp_sz);
2473
2474 /* Destroy the multicast group mutex */
2475 mutex_destroy(&state->ts_mcglock);
2476 }
2477
2478
2479 /*
2480 * tavor_fw_version_check()
2481 * Context: Only called from attach() path context
2482 */
2483 static int
tavor_fw_version_check(tavor_state_t * state)2484 tavor_fw_version_check(tavor_state_t *state)
2485 {
2486 uint_t tavor_fw_ver_major;
2487 uint_t tavor_fw_ver_minor;
2488 uint_t tavor_fw_ver_subminor;
2489
2490 /*
2491 * Depending on which version of driver we have attached, the firmware
2492 * version checks will be different. We set up the comparison values
2493 * for both HCA Mode (Tavor hardware) or COMPAT Mode (Arbel hardware
2494 * running in tavor mode).
2495 */
2496 switch (state->ts_operational_mode) {
2497 case TAVOR_HCA_MODE:
2498 tavor_fw_ver_major = TAVOR_FW_VER_MAJOR;
2499 tavor_fw_ver_minor = TAVOR_FW_VER_MINOR;
2500 tavor_fw_ver_subminor = TAVOR_FW_VER_SUBMINOR;
2501 break;
2502
2503 case TAVOR_COMPAT_MODE:
2504 tavor_fw_ver_major = TAVOR_COMPAT_FW_VER_MAJOR;
2505 tavor_fw_ver_minor = TAVOR_COMPAT_FW_VER_MINOR;
2506 tavor_fw_ver_subminor = TAVOR_COMPAT_FW_VER_SUBMINOR;
2507 break;
2508
2509 default:
2510 return (DDI_FAILURE);
2511 }
2512
2513 /*
2514 * If FW revision major number is less than acceptable,
2515 * return failure, else if greater return success. If
2516 * the major numbers are equal than check the minor number
2517 */
2518 if (state->ts_fw.fw_rev_major < tavor_fw_ver_major) {
2519 return (DDI_FAILURE);
2520 } else if (state->ts_fw.fw_rev_major > tavor_fw_ver_major) {
2521 return (DDI_SUCCESS);
2522 }
2523 /*
2524 * Do the same check as above, except for minor revision numbers
2525 * If the minor numbers are equal than check the subminor number
2526 */
2527 if (state->ts_fw.fw_rev_minor < tavor_fw_ver_minor) {
2528 return (DDI_FAILURE);
2529 } else if (state->ts_fw.fw_rev_minor > tavor_fw_ver_minor) {
2530 return (DDI_SUCCESS);
2531 }
2532
2533 /*
2534 * Once again we do the same check as above, except for the subminor
2535 * revision number. If the subminor numbers are equal here, then
2536 * these are the same firmware version, return success
2537 */
2538 if (state->ts_fw.fw_rev_subminor < tavor_fw_ver_subminor) {
2539 return (DDI_FAILURE);
2540 } else if (state->ts_fw.fw_rev_subminor > tavor_fw_ver_subminor) {
2541 return (DDI_SUCCESS);
2542 }
2543
2544 return (DDI_SUCCESS);
2545 }
2546
2547
2548 /*
2549 * tavor_device_info_report()
2550 * Context: Only called from attach() path context
2551 */
2552 static void
tavor_device_info_report(tavor_state_t * state)2553 tavor_device_info_report(tavor_state_t *state)
2554 {
2555 cmn_err(CE_CONT, "?tavor%d: FW ver: %04d.%04d.%04d, "
2556 "HW rev: %02x\n", state->ts_instance, state->ts_fw.fw_rev_major,
2557 state->ts_fw.fw_rev_minor, state->ts_fw.fw_rev_subminor,
2558 state->ts_adapter.rev_id);
2559 cmn_err(CE_CONT, "?tavor%d: %64s (0x%016" PRIx64 ")\n",
2560 state->ts_instance, state->ts_nodedesc, state->ts_nodeguid);
2561 }
2562
2563
2564 /*
2565 * tavor_pci_capability_list()
2566 * Context: Only called from attach() path context
2567 */
2568 static void
tavor_pci_capability_list(tavor_state_t * state,ddi_acc_handle_t hdl)2569 tavor_pci_capability_list(tavor_state_t *state, ddi_acc_handle_t hdl)
2570 {
2571 uint_t offset, data;
2572
2573 /*
2574 * Check for the "PCI Capabilities" bit in the "Status Register".
2575 * Bit 4 in this register indicates the presence of a "PCI
2576 * Capabilities" list.
2577 */
2578 data = pci_config_get16(hdl, 0x6);
2579 if ((data & 0x10) == 0) {
2580 return;
2581 }
2582
2583 /*
2584 * Starting from offset 0x34 in PCI config space, find the
2585 * head of "PCI capabilities" list, and walk the list. If
2586 * capabilities of a known type are encountered (e.g.
2587 * "PCI-X Capability"), then call the appropriate handler
2588 * function.
2589 */
2590 offset = pci_config_get8(hdl, 0x34);
2591 while (offset != 0x0) {
2592 data = pci_config_get8(hdl, offset);
2593
2594 /*
2595 * Check for known capability types. Tavor has the
2596 * following:
2597 * o VPD Capability (0x03)
2598 * o PCI-X Capability (0x07)
2599 * o MSI Capability (0x05)
2600 * o MSIX Capability (0x11)
2601 */
2602 switch (data) {
2603 case 0x03:
2604 tavor_pci_capability_vpd(state, hdl, offset);
2605 break;
2606 case 0x07:
2607 tavor_pci_capability_pcix(state, hdl, offset);
2608 break;
2609 case 0x05:
2610 break;
2611 default:
2612 break;
2613 }
2614
2615 /* Get offset of next entry in list */
2616 offset = pci_config_get8(hdl, offset + 1);
2617 }
2618 }
2619
2620 /*
2621 * tavor_pci_read_vpd()
2622 * Context: Only called from attach() path context
2623 * utility routine for tavor_pci_capability_vpd()
2624 */
2625 static int
tavor_pci_read_vpd(ddi_acc_handle_t hdl,uint_t offset,uint32_t addr,uint32_t * data)2626 tavor_pci_read_vpd(ddi_acc_handle_t hdl, uint_t offset, uint32_t addr,
2627 uint32_t *data)
2628 {
2629 int retry = 4; /* retry counter for EEPROM poll */
2630 uint32_t val;
2631 int vpd_addr = offset + 2;
2632 int vpd_data = offset + 4;
2633
2634 /*
2635 * In order to read a 32-bit value from VPD, we are to write down
2636 * the address (offset in the VPD itself) to the address register.
2637 * To signal the read, we also clear bit 31. We then poll on bit 31
2638 * and when it is set, we can then read our 4 bytes from the data
2639 * register.
2640 */
2641 (void) pci_config_put32(hdl, offset, addr << 16);
2642 do {
2643 drv_usecwait(1000);
2644 val = pci_config_get16(hdl, vpd_addr);
2645 if ((val >> 15) & 0x01) {
2646 *data = pci_config_get32(hdl, vpd_data);
2647 return (DDI_SUCCESS);
2648 }
2649 } while (--retry);
2650
2651 return (DDI_FAILURE);
2652 }
2653
2654
2655 /*
2656 * tavor_pci_capability_vpd()
2657 * Context: Only called from attach() path context
2658 */
2659 static void
tavor_pci_capability_vpd(tavor_state_t * state,ddi_acc_handle_t hdl,uint_t offset)2660 tavor_pci_capability_vpd(tavor_state_t *state, ddi_acc_handle_t hdl,
2661 uint_t offset)
2662 {
2663 uint8_t name_length;
2664 uint8_t pn_length;
2665 int i, err = 0;
2666 int vpd_str_id = 0;
2667 int vpd_ro_desc;
2668 int vpd_ro_pn_desc;
2669 #ifndef _LITTLE_ENDIAN
2670 uint32_t data32;
2671 #endif /* _LITTLE_ENDIAN */
2672 union {
2673 uint32_t vpd_int[TAVOR_VPD_HDR_DWSIZE];
2674 uchar_t vpd_char[TAVOR_VPD_HDR_BSIZE];
2675 } vpd;
2676
2677 /*
2678 * Read Vital Product Data (VPD) from PCI-X capability.
2679 */
2680 for (i = 0; i < TAVOR_VPD_HDR_DWSIZE; i++) {
2681 err = tavor_pci_read_vpd(hdl, offset, i << 2, &vpd.vpd_int[i]);
2682 if (err != DDI_SUCCESS) {
2683 cmn_err(CE_NOTE, "!VPD read failed\n");
2684 goto out;
2685 }
2686 }
2687
2688 #ifndef _LITTLE_ENDIAN
2689 /*
2690 * Need to swap bytes for big endian.
2691 */
2692 for (i = 0; i < TAVOR_VPD_HDR_DWSIZE; i++) {
2693 data32 = vpd.vpd_int[i];
2694 vpd.vpd_char[(i << 2) + 3] =
2695 (uchar_t)((data32 & 0xFF000000) >> 24);
2696 vpd.vpd_char[(i << 2) + 2] =
2697 (uchar_t)((data32 & 0x00FF0000) >> 16);
2698 vpd.vpd_char[(i << 2) + 1] =
2699 (uchar_t)((data32 & 0x0000FF00) >> 8);
2700 vpd.vpd_char[i << 2] = (uchar_t)(data32 & 0x000000FF);
2701 }
2702 #endif /* _LITTLE_ENDIAN */
2703
2704 /* Check for VPD String ID Tag */
2705 if (vpd.vpd_char[vpd_str_id] == 0x82) {
2706 /* get the product name */
2707 name_length = (uint8_t)vpd.vpd_char[vpd_str_id + 1];
2708 if (name_length > sizeof (state->ts_hca_name)) {
2709 cmn_err(CE_NOTE, "!VPD name too large (0x%x)\n",
2710 name_length);
2711 goto out;
2712 }
2713 (void) memcpy(state->ts_hca_name, &vpd.vpd_char[vpd_str_id + 3],
2714 name_length);
2715 state->ts_hca_name[name_length] = 0;
2716
2717 /* get the part number */
2718 vpd_ro_desc = name_length + 3; /* read-only tag location */
2719 vpd_ro_pn_desc = vpd_ro_desc + 3; /* P/N keyword location */
2720 /*
2721 * Verify read-only tag and Part Number keyword.
2722 */
2723 if (vpd.vpd_char[vpd_ro_desc] != 0x90 ||
2724 (vpd.vpd_char[vpd_ro_pn_desc] != 'P' &&
2725 vpd.vpd_char[vpd_ro_pn_desc + 1] != 'N')) {
2726 cmn_err(CE_NOTE, "!VPD Part Number not found\n");
2727 goto out;
2728 }
2729
2730 pn_length = (uint8_t)vpd.vpd_char[vpd_ro_pn_desc + 2];
2731 if (pn_length > sizeof (state->ts_hca_pn)) {
2732 cmn_err(CE_NOTE, "!VPD part number too large (0x%x)\n",
2733 name_length);
2734 goto out;
2735 }
2736 (void) memcpy(state->ts_hca_pn,
2737 &vpd.vpd_char[vpd_ro_pn_desc + 3],
2738 pn_length);
2739 state->ts_hca_pn[pn_length] = 0;
2740 state->ts_hca_pn_len = pn_length;
2741 } else {
2742 /* Wrong VPD String ID Tag */
2743 cmn_err(CE_NOTE, "!VPD String ID Tag not found, tag: %02x\n",
2744 vpd.vpd_char[0]);
2745 goto out;
2746 }
2747 return;
2748 out:
2749 state->ts_hca_pn_len = 0;
2750 }
2751
2752 /*
2753 * tavor_pci_capability_pcix()
2754 * Context: Only called from attach() path context
2755 */
2756 static void
tavor_pci_capability_pcix(tavor_state_t * state,ddi_acc_handle_t hdl,uint_t offset)2757 tavor_pci_capability_pcix(tavor_state_t *state, ddi_acc_handle_t hdl,
2758 uint_t offset)
2759 {
2760 uint_t command, status;
2761 int max_out_splt_trans, max_mem_rd_byte_cnt;
2762 int designed_max_out_splt_trans, designed_max_mem_rd_byte_cnt;
2763
2764 /*
2765 * Query the current values for the PCI-X Command Register and
2766 * the PCI-X Status Register.
2767 */
2768 command = pci_config_get16(hdl, offset + 2);
2769 status = pci_config_get32(hdl, offset + 4);
2770
2771 /*
2772 * Check for config property specifying "maximum outstanding
2773 * split transactions". If the property is defined and valid
2774 * (i.e. no larger than the so-called "designed maximum"),
2775 * then use the specified value to update the PCI-X Command Register.
2776 * Otherwise, extract the value from the Tavor config profile.
2777 */
2778 designed_max_out_splt_trans = ((status >> 23) & 7);
2779 max_out_splt_trans = ddi_prop_get_int(DDI_DEV_T_ANY, state->ts_dip,
2780 DDI_PROP_DONTPASS, "pcix-max-outstanding-split-trans", -1);
2781 if ((max_out_splt_trans != -1) &&
2782 ((max_out_splt_trans < 0) ||
2783 (max_out_splt_trans > designed_max_out_splt_trans))) {
2784 cmn_err(CE_NOTE, "!tavor%d: property \"pcix-max-outstanding-"
2785 "split-trans\" (%d) invalid or exceeds device maximum"
2786 " (%d), using default value (%d)\n", state->ts_instance,
2787 max_out_splt_trans, designed_max_out_splt_trans,
2788 state->ts_cfg_profile->cp_max_out_splt_trans);
2789 max_out_splt_trans =
2790 state->ts_cfg_profile->cp_max_out_splt_trans;
2791 } else if (max_out_splt_trans == -1) {
2792 max_out_splt_trans =
2793 state->ts_cfg_profile->cp_max_out_splt_trans;
2794 }
2795
2796 /*
2797 * The config profile setting for max_out_splt_trans is determined
2798 * based on arch. Check tavor_cfg.c for more information. A value of
2799 * '-1' in the patchable variable means "do not change". A value of
2800 * '0' means 1 outstanding splt trans and other values as defined by
2801 * PCI. So we do one more check here, that if 'max_out_splt_trans' is
2802 * -1 (ie: < 0) we do not set the PCI command and leave it at the
2803 * default.
2804 */
2805 if (max_out_splt_trans >= 0) {
2806 command = ((command & 0xFF8F) | max_out_splt_trans << 4);
2807 }
2808
2809 /*
2810 * Check for config property specifying "maximum memory read
2811 * byte count. If the property is defined and valid
2812 * (i.e. no larger than the so-called "designed maximum"),
2813 * then use the specified value to update the PCI-X Command Register.
2814 * Otherwise, extract the value from the Tavor config profile.
2815 */
2816 designed_max_mem_rd_byte_cnt = ((status >> 21) & 3);
2817 max_mem_rd_byte_cnt = ddi_prop_get_int(DDI_DEV_T_ANY, state->ts_dip,
2818 DDI_PROP_DONTPASS, "pcix-max-read-byte-count", -1);
2819 if ((max_mem_rd_byte_cnt != -1) &&
2820 ((max_mem_rd_byte_cnt < 0) ||
2821 (max_mem_rd_byte_cnt > designed_max_mem_rd_byte_cnt))) {
2822 cmn_err(CE_NOTE, "!tavor%d: property \"pcix-max-read-byte-"
2823 "count\" (%d) invalid or exceeds device maximum"
2824 " (%d), using default value (%d)\n", state->ts_instance,
2825 max_mem_rd_byte_cnt, designed_max_mem_rd_byte_cnt,
2826 state->ts_cfg_profile->cp_max_mem_rd_byte_cnt);
2827 max_mem_rd_byte_cnt =
2828 state->ts_cfg_profile->cp_max_mem_rd_byte_cnt;
2829 } else if (max_mem_rd_byte_cnt == -1) {
2830 max_mem_rd_byte_cnt =
2831 state->ts_cfg_profile->cp_max_mem_rd_byte_cnt;
2832 }
2833
2834 /*
2835 * The config profile setting for max_mem_rd_byte_cnt is determined
2836 * based on arch. Check tavor_cfg.c for more information. A value of
2837 * '-1' in the patchable variable means "do not change". A value of
2838 * '0' means minimum (512B) read, and other values as defined by
2839 * PCI. So we do one more check here, that if 'max_mem_rd_byte_cnt' is
2840 * -1 (ie: < 0) we do not set the PCI command and leave it at the
2841 * default.
2842 */
2843 if (max_mem_rd_byte_cnt >= 0) {
2844 command = ((command & 0xFFF3) | max_mem_rd_byte_cnt << 2);
2845 }
2846
2847 /*
2848 * Update the PCI-X Command Register with the newly configured
2849 * values.
2850 */
2851 pci_config_put16(hdl, offset + 2, command);
2852 }
2853
2854
2855 /*
2856 * tavor_intr_or_msi_init()
2857 * Context: Only called from attach() path context
2858 */
2859 static int
tavor_intr_or_msi_init(tavor_state_t * state)2860 tavor_intr_or_msi_init(tavor_state_t *state)
2861 {
2862 int status;
2863
2864 /* Query for the list of supported interrupt event types */
2865 status = ddi_intr_get_supported_types(state->ts_dip,
2866 &state->ts_intr_types_avail);
2867 if (status != DDI_SUCCESS) {
2868 return (DDI_FAILURE);
2869 }
2870
2871 /*
2872 * If Tavor/Arbel supports MSI in this system (and, if it
2873 * hasn't been overridden by a configuration variable), then
2874 * the default behavior is to use a single MSI. Otherwise,
2875 * fallback to using legacy interrupts. Also, if MSI allocatis chosen,
2876 * but fails for whatever reasons, then fallback to using legacy
2877 * interrupts.
2878 */
2879 if ((state->ts_cfg_profile->cp_use_msi_if_avail != 0) &&
2880 (state->ts_intr_types_avail & DDI_INTR_TYPE_MSI)) {
2881 status = tavor_add_intrs(state, DDI_INTR_TYPE_MSI);
2882 if (status == DDI_SUCCESS) {
2883 state->ts_intr_type_chosen = DDI_INTR_TYPE_MSI;
2884 return (DDI_SUCCESS);
2885 }
2886 }
2887
2888 /*
2889 * MSI interrupt allocation failed, or was not available. Fallback to
2890 * legacy interrupt support.
2891 */
2892 if (state->ts_intr_types_avail & DDI_INTR_TYPE_FIXED) {
2893 status = tavor_add_intrs(state, DDI_INTR_TYPE_FIXED);
2894 if (status == DDI_SUCCESS) {
2895 state->ts_intr_type_chosen = DDI_INTR_TYPE_FIXED;
2896 return (DDI_SUCCESS);
2897 }
2898 }
2899
2900 /*
2901 * Neither MSI or legacy interrupts were successful. return failure.
2902 */
2903 return (DDI_FAILURE);
2904 }
2905
2906 /*
2907 * tavor_add_intrs()
2908 * Context: Only called from attach() patch context
2909 */
2910 static int
tavor_add_intrs(tavor_state_t * state,int intr_type)2911 tavor_add_intrs(tavor_state_t *state, int intr_type)
2912 {
2913 int status;
2914
2915 /* Get number of interrupts/MSI supported */
2916 status = ddi_intr_get_nintrs(state->ts_dip, intr_type,
2917 &state->ts_intrmsi_count);
2918 if (status != DDI_SUCCESS) {
2919 return (DDI_FAILURE);
2920 }
2921
2922 /* Get number of available interrupts/MSI */
2923 status = ddi_intr_get_navail(state->ts_dip, intr_type,
2924 &state->ts_intrmsi_avail);
2925 if (status != DDI_SUCCESS) {
2926 return (DDI_FAILURE);
2927 }
2928
2929 /* Ensure that we have at least one (1) usable MSI or interrupt */
2930 if ((state->ts_intrmsi_avail < 1) || (state->ts_intrmsi_count < 1)) {
2931 return (DDI_FAILURE);
2932 }
2933
2934 /* Attempt to allocate a single interrupt/MSI handle */
2935 status = ddi_intr_alloc(state->ts_dip, &state->ts_intrmsi_hdl,
2936 intr_type, 0, 1, &state->ts_intrmsi_allocd,
2937 DDI_INTR_ALLOC_STRICT);
2938 if (status != DDI_SUCCESS) {
2939 return (DDI_FAILURE);
2940 }
2941
2942 /* Ensure that we have allocated at least one (1) MSI or interrupt */
2943 if (state->ts_intrmsi_allocd < 1) {
2944 return (DDI_FAILURE);
2945 }
2946
2947 /*
2948 * Extract the priority for the allocated interrupt/MSI. This
2949 * will be used later when initializing certain mutexes.
2950 */
2951 status = ddi_intr_get_pri(state->ts_intrmsi_hdl,
2952 &state->ts_intrmsi_pri);
2953 if (status != DDI_SUCCESS) {
2954 /* Free the allocated interrupt/MSI handle */
2955 (void) ddi_intr_free(state->ts_intrmsi_hdl);
2956
2957 return (DDI_FAILURE);
2958 }
2959
2960 /* Make sure the interrupt/MSI priority is below 'high level' */
2961 if (state->ts_intrmsi_pri >= ddi_intr_get_hilevel_pri()) {
2962 /* Free the allocated interrupt/MSI handle */
2963 (void) ddi_intr_free(state->ts_intrmsi_hdl);
2964
2965 return (DDI_FAILURE);
2966 }
2967
2968 /* Get add'l capability information regarding interrupt/MSI */
2969 status = ddi_intr_get_cap(state->ts_intrmsi_hdl,
2970 &state->ts_intrmsi_cap);
2971 if (status != DDI_SUCCESS) {
2972 /* Free the allocated interrupt/MSI handle */
2973 (void) ddi_intr_free(state->ts_intrmsi_hdl);
2974
2975 return (DDI_FAILURE);
2976 }
2977
2978 return (DDI_SUCCESS);
2979 }
2980
2981
2982 /*
2983 * tavor_intr_or_msi_fini()
2984 * Context: Only called from attach() and/or detach() path contexts
2985 */
2986 static int
tavor_intr_or_msi_fini(tavor_state_t * state)2987 tavor_intr_or_msi_fini(tavor_state_t *state)
2988 {
2989 int status;
2990
2991 /* Free the allocated interrupt/MSI handle */
2992 status = ddi_intr_free(state->ts_intrmsi_hdl);
2993 if (status != DDI_SUCCESS) {
2994 return (DDI_FAILURE);
2995 }
2996
2997 return (DDI_SUCCESS);
2998 }
2999
3000
3001 /* Disable Tavor interrupts */
3002 static int
tavor_intr_disable(tavor_state_t * state)3003 tavor_intr_disable(tavor_state_t *state)
3004 {
3005 ushort_t msi_ctrl = 0, caps_ctrl = 0;
3006 ddi_acc_handle_t pci_cfg_hdl = state->ts_pci_cfghdl;
3007 ASSERT(pci_cfg_hdl != NULL);
3008 ASSERT(state->ts_intr_types_avail &
3009 (DDI_INTR_TYPE_FIXED | DDI_INTR_TYPE_MSI));
3010
3011 /*
3012 * Check if MSI interrupts are used. If so, disable MSI interupts.
3013 * If not, since Tavor doesn't support MSI-X interrupts, assuming the
3014 * legacy interrupt is used instead, disable the legacy interrupt.
3015 */
3016 if ((state->ts_cfg_profile->cp_use_msi_if_avail != 0) &&
3017 (state->ts_intr_types_avail & DDI_INTR_TYPE_MSI)) {
3018
3019 if ((PCI_CAP_LOCATE(pci_cfg_hdl, PCI_CAP_ID_MSI,
3020 &caps_ctrl) == DDI_SUCCESS)) {
3021 if ((msi_ctrl = PCI_CAP_GET16(pci_cfg_hdl, 0,
3022 caps_ctrl, PCI_MSI_CTRL)) == PCI_CAP_EINVAL16)
3023 return (DDI_FAILURE);
3024 }
3025 ASSERT(msi_ctrl != 0);
3026
3027 if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
3028 return (DDI_SUCCESS);
3029
3030 if (msi_ctrl & PCI_MSI_PVM_MASK) {
3031 int offset = (msi_ctrl & PCI_MSI_64BIT_MASK) ?
3032 PCI_MSI_64BIT_MASKBITS : PCI_MSI_32BIT_MASK;
3033
3034 /* Clear all inums in MSI */
3035 PCI_CAP_PUT32(pci_cfg_hdl, 0, caps_ctrl, offset, 0);
3036 }
3037
3038 /* Disable MSI interrupts */
3039 msi_ctrl &= ~PCI_MSI_ENABLE_BIT;
3040 PCI_CAP_PUT16(pci_cfg_hdl, 0, caps_ctrl, PCI_MSI_CTRL,
3041 msi_ctrl);
3042
3043 } else {
3044 uint16_t cmdreg = pci_config_get16(pci_cfg_hdl, PCI_CONF_COMM);
3045 ASSERT(state->ts_intr_types_avail & DDI_INTR_TYPE_FIXED);
3046
3047 /* Disable the legacy interrupts */
3048 cmdreg |= PCI_COMM_INTX_DISABLE;
3049 pci_config_put16(pci_cfg_hdl, PCI_CONF_COMM, cmdreg);
3050 }
3051
3052 return (DDI_SUCCESS);
3053 }
3054
3055 /* Tavor quiesce(9E) entry */
3056 static int
tavor_quiesce(dev_info_t * dip)3057 tavor_quiesce(dev_info_t *dip)
3058 {
3059 tavor_state_t *state = ddi_get_soft_state(tavor_statep,
3060 DEVI(dip)->devi_instance);
3061 ASSERT(state != NULL);
3062
3063 /* start fastreboot */
3064 state->ts_quiescing = B_TRUE;
3065
3066 /* If it's in maintenance mode, do nothing but return with SUCCESS */
3067 if (!TAVOR_IS_OPERATIONAL(state->ts_operational_mode)) {
3068 return (DDI_SUCCESS);
3069 }
3070
3071 /* Shutdown HCA ports */
3072 if (tavor_hca_ports_shutdown(state,
3073 state->ts_cfg_profile->cp_num_ports) != TAVOR_CMD_SUCCESS) {
3074 state->ts_quiescing = B_FALSE;
3075 return (DDI_FAILURE);
3076 }
3077
3078 /* Close HCA */
3079 if (tavor_close_hca_cmd_post(state, TAVOR_CMD_NOSLEEP_SPIN) !=
3080 TAVOR_CMD_SUCCESS) {
3081 state->ts_quiescing = B_FALSE;
3082 return (DDI_FAILURE);
3083 }
3084
3085 /* Shutdown FW */
3086 if (tavor_sys_dis_cmd_post(state, TAVOR_CMD_NOSLEEP_SPIN) !=
3087 TAVOR_CMD_SUCCESS) {
3088 state->ts_quiescing = B_FALSE;
3089 return (DDI_FAILURE);
3090 }
3091
3092 /* Disable interrupts */
3093 if (tavor_intr_disable(state) != DDI_SUCCESS) {
3094 state->ts_quiescing = B_FALSE;
3095 return (DDI_FAILURE);
3096 }
3097
3098 /* SW-reset */
3099 if (tavor_sw_reset(state) != DDI_SUCCESS) {
3100 state->ts_quiescing = B_FALSE;
3101 return (DDI_FAILURE);
3102 }
3103
3104 return (DDI_SUCCESS);
3105 }
3106