1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2023 The University of Queensland
14 * Copyright (c) 2018, Joyent, Inc.
15 * Copyright 2023 RackTop Systems, Inc.
16 * Copyright 2023 MNX Cloud, Inc.
17 */
18
19 /*
20 * Mellanox Connect-X 4/5/6 driver.
21 *
22 * More details in mlxcx.c
23 */
24
25 #ifndef _MLXCX_H
26 #define _MLXCX_H
27
28 /*
29 * mlxcx(4D) defintions
30 */
31
32 #include <sys/ddi.h>
33 #include <sys/sunddi.h>
34 #include <sys/ddifm.h>
35 #include <sys/id_space.h>
36 #include <sys/list.h>
37 #include <sys/taskq_impl.h>
38 #include <sys/stddef.h>
39 #include <sys/stream.h>
40 #include <sys/strsun.h>
41 #include <sys/mac_provider.h>
42 #include <sys/mac_ether.h>
43 #include <sys/cpuvar.h>
44 #include <sys/ethernet.h>
45
46 #include <inet/ip.h>
47 #include <inet/ip6.h>
48
49 #include <sys/ddifm.h>
50 #include <sys/fm/protocol.h>
51 #include <sys/fm/util.h>
52 #include <sys/fm/io/ddi.h>
53
54 #include <mlxcx_reg.h>
55
56 #ifdef __cplusplus
57 extern "C" {
58 #endif
59
60 #define MLXCX_VENDOR_ID 0x15b3
61
62 /*
63 * The PCI device ids for the cards we support. The device IDs correspond to
64 * the device ids in the driver manifest, and the names were obtained from
65 * the PCI id database in /usr/share/hwdata/pci.ids
66 */
67 #define MLXCX_CX4_DEVID 0x1013
68 #define MLXCX_CX4_VF_DEVID 0x1014
69 #define MLXCX_CX4_LX_DEVID 0x1015
70 #define MLXCX_CX4_LX_VF_DEVID 0x1016
71 #define MLXCX_CX5_DEVID 0x1017
72 #define MLXCX_CX5_VF_DEVID 0x1018
73 #define MLXCX_CX5_EX_DEVID 0x1019
74 #define MLXCX_CX5_EX_VF_DEVID 0x101a
75 #define MLXCX_CX6_DEVID 0x101b
76 #define MLXCX_CX6_VF_DEVID 0x101c
77 #define MLXCX_CX6_DF_DEVID 0x101d
78 #define MLXCX_CX5_GEN_VF_DEVID 0x101e
79 #define MLXCX_CX6_LX_DEVID 0x101f
80
81 /*
82 * Get access to the first PCI BAR.
83 */
84 #define MLXCX_REG_NUMBER 1
85
86 /*
87 * The command queue is supposed to be a page, which is 4k.
88 */
89 #define MLXCX_CMD_DMA_PAGE_SIZE 4096
90
91 /*
92 * Queues can allocate in units of this much memory.
93 */
94 #define MLXCX_QUEUE_DMA_PAGE_SIZE 4096
95
96 /*
97 * We advertise two sizes of groups to MAC -- a certain number of "large"
98 * groups (including the default group, which is sized to at least ncpus)
99 * followed by a certain number of "small" groups.
100 *
101 * This allows us to have a larger amount of classification resources available
102 * for zones/VMs without resorting to software classification.
103 */
104 #define MLXCX_RX_NGROUPS_LARGE_DFLT 2
105 #define MLXCX_RX_NRINGS_PER_LARGE_GROUP_DFLT 16
106 #define MLXCX_RX_NGROUPS_SMALL_DFLT 256
107 #define MLXCX_RX_NRINGS_PER_SMALL_GROUP_DFLT 4
108
109 #define MLXCX_TX_NGROUPS_DFLT 1
110 #define MLXCX_TX_NRINGS_PER_GROUP_DFLT 64
111
112 /*
113 * Queues will be sized to (1 << *Q_SIZE_SHIFT) entries long.
114 */
115 #define MLXCX_EQ_SIZE_SHIFT_DFLT 9
116
117 /*
118 * The CQ, SQ and RQ sizes can effect throughput on higher speed interfaces.
119 * EQ less so, as it only takes a single EQ entry to indicate there are
120 * multiple completions on the CQ.
121 *
122 * Particularly on the Rx side, the RQ (and corresponding CQ) would run
123 * low on available entries. A symptom of this is the refill taskq running
124 * frequently. A larger RQ (and CQ) alleviates this, and as there is a
125 * close relationship between SQ and CQ size, the SQ is increased too.
126 */
127 #define MLXCX_CQ_SIZE_SHIFT_DFLT 10
128 #define MLXCX_CQ_SIZE_SHIFT_25G 12
129
130 /*
131 * Default to making SQs bigger than RQs for 9k MTU, since most packets will
132 * spill over into more than one slot. RQ WQEs are always 1 slot.
133 */
134 #define MLXCX_SQ_SIZE_SHIFT_DFLT 11
135 #define MLXCX_SQ_SIZE_SHIFT_25G 13
136
137 #define MLXCX_RQ_SIZE_SHIFT_DFLT 10
138 #define MLXCX_RQ_SIZE_SHIFT_25G 12
139
140 #define MLXCX_CQ_HWM_GAP 16
141 #define MLXCX_CQ_LWM_GAP 24
142
143 #define MLXCX_WQ_HWM_GAP MLXCX_CQ_HWM_GAP
144 #define MLXCX_WQ_LWM_GAP MLXCX_CQ_LWM_GAP
145
146 #define MLXCX_RQ_REFILL_STEP 64
147
148 /*
149 * CQ event moderation
150 */
151 #define MLXCX_CQEMOD_PERIOD_USEC_DFLT 50
152 #define MLXCX_CQEMOD_COUNT_DFLT \
153 (8 * ((1 << MLXCX_CQ_SIZE_SHIFT_DFLT) / 10))
154
155 /*
156 * EQ interrupt moderation
157 */
158 #define MLXCX_INTRMOD_PERIOD_USEC_DFLT 10
159
160 /* Size of root flow tables */
161 #define MLXCX_FTBL_ROOT_SIZE_SHIFT_DFLT 12
162
163 /* Size of 2nd level flow tables for VLAN filtering */
164 #define MLXCX_FTBL_VLAN_SIZE_SHIFT_DFLT 4
165
166 /*
167 * How big does an mblk have to be before we dma_bind() it instead of
168 * bcopying?
169 */
170 #define MLXCX_TX_BIND_THRESHOLD_DFLT 2048
171
172 /*
173 * How often to check the status of completion queues for overflow and
174 * other problems.
175 */
176 #define MLXCX_WQ_CHECK_INTERVAL_SEC_DFLT 300
177 #define MLXCX_CQ_CHECK_INTERVAL_SEC_DFLT 300
178 #define MLXCX_EQ_CHECK_INTERVAL_SEC_DFLT 30
179
180 /*
181 * After this many packets, the packets received so far are passed to
182 * the mac layer.
183 */
184 #define MLXCX_RX_PER_CQ_DEFAULT 256
185 #define MLXCX_RX_PER_CQ_MIN 16
186 #define MLXCX_RX_PER_CQ_MAX 4096
187
188 /*
189 * Minimum size for packets loaned when >50% of a ring's buffers are already
190 * on loan to MAC.
191 */
192 #define MLXCX_P50_LOAN_MIN_SIZE_DFLT 256
193
194 #define MLXCX_DOORBELL_TRIES_DFLT 3
195 extern uint_t mlxcx_doorbell_tries;
196
197 #define MLXCX_STUCK_INTR_COUNT_DFLT 128
198 extern uint_t mlxcx_stuck_intr_count;
199
200 #define MLXCX_BUF_BIND_MAX_ATTEMTPS 50
201
202 #define MLXCX_MTU_OFFSET \
203 (sizeof (struct ether_vlan_header) + ETHERFCSL)
204
205 /*
206 * This is the current version of the command structure that the driver expects
207 * to be found in the ISS.
208 */
209 #define MLXCX_CMD_REVISION 5
210
211 #ifdef DEBUG
212 #define MLXCX_DMA_SYNC(dma, flag) VERIFY0(ddi_dma_sync( \
213 (dma).mxdb_dma_handle, 0, 0, \
214 (flag)))
215 #else
216 #define MLXCX_DMA_SYNC(dma, flag) (void) ddi_dma_sync( \
217 (dma).mxdb_dma_handle, 0, 0, \
218 (flag))
219 #endif
220
221 #define MLXCX_FM_SERVICE_MLXCX "mlxcx"
222
223 /*
224 * This macro defines the expected value of the 'Interface Step Sequence ID'
225 * (issi) which represents the version of the start up and tear down sequence.
226 * We must check that hardware supports this and tell it which version we're
227 * using as well.
228 */
229 #define MLXCX_CURRENT_ISSI 1
230
231 /*
232 * This is the size of a page that the hardware expects from us when
233 * manipulating pages.
234 */
235 #define MLXCX_HW_PAGE_SIZE 4096
236
237 /*
238 * This is a special lkey value used to terminate a list of scatter pointers.
239 */
240 #define MLXCX_NULL_LKEY 0x100
241
242 /*
243 * The max function id we support in manage pages requests.
244 * At the moment we only support/expect func 0 from manage pages, but
245 * structures and code are in place to support any number.
246 */
247 #define MLXCX_FUNC_ID_MAX 0
248
249 /*
250 * Forwards
251 */
252 struct mlxcx;
253 typedef struct mlxcx mlxcx_t;
254 typedef struct mlxcx_cmd mlxcx_cmd_t;
255 typedef struct mlxcx_port mlxcx_port_t;
256
257 typedef struct {
258 mlxcx_t *mlp_mlx;
259 int32_t mlp_npages;
260 uint16_t mlp_func;
261 } mlxcx_pages_request_t;
262
263 typedef struct mlxcx_async_param {
264 mlxcx_t *mla_mlx;
265 taskq_ent_t mla_tqe;
266 boolean_t mla_pending;
267 kmutex_t mla_mtx;
268
269 /*
270 * Parameters specific to the function dispatched.
271 */
272 union {
273 void *mla_arg;
274 mlxcx_pages_request_t mla_pages;
275 mlxcx_port_t *mla_port;
276 };
277 } mlxcx_async_param_t;
278
279 typedef enum {
280 MLXCX_DMABUF_HDL_ALLOC = 1 << 0,
281 MLXCX_DMABUF_MEM_ALLOC = 1 << 1,
282 MLXCX_DMABUF_BOUND = 1 << 2,
283 MLXCX_DMABUF_FOREIGN = 1 << 3,
284 } mlxcx_dma_buffer_flags_t;
285
286 typedef struct mlxcx_dma_buffer {
287 mlxcx_dma_buffer_flags_t mxdb_flags;
288 caddr_t mxdb_va; /* Buffer VA */
289 size_t mxdb_len; /* Buffer logical len */
290 ddi_acc_handle_t mxdb_acc_handle;
291 ddi_dma_handle_t mxdb_dma_handle;
292 uint_t mxdb_ncookies;
293 } mlxcx_dma_buffer_t;
294
295 typedef struct mlxcx_dev_page {
296 list_node_t mxdp_list;
297 avl_node_t mxdp_tree;
298 uintptr_t mxdp_pa;
299 mlxcx_dma_buffer_t mxdp_dma;
300 } mlxcx_dev_page_t;
301
302 /*
303 * Data structure to keep track of all information related to the command queue.
304 */
305 typedef enum {
306 MLXCX_CMD_QUEUE_S_IDLE = 1,
307 MLXCX_CMD_QUEUE_S_BUSY,
308 MLXCX_CMD_QUEUE_S_BROKEN
309 } mlxcx_cmd_queue_status_t;
310
311 typedef struct mlxcx_cmd_queue {
312 kmutex_t mcmd_lock;
313 kcondvar_t mcmd_cv;
314 mlxcx_dma_buffer_t mcmd_dma;
315
316 boolean_t mcmd_polled;
317
318 uint8_t mcmd_size_l2;
319 uint8_t mcmd_stride_l2;
320 uint_t mcmd_size;
321 /*
322 * The mask has a bit for each command slot, there are a maximum
323 * of 32 slots. When the bit is set in the mask, it indicates
324 * the slot is available.
325 */
326 uint32_t mcmd_mask;
327
328 mlxcx_cmd_t *mcmd_active[MLXCX_CMD_MAX];
329
330 ddi_taskq_t *mcmd_taskq;
331 id_space_t *mcmd_tokens;
332 } mlxcx_cmd_queue_t;
333
334 typedef struct mlxcd_cmd_mbox {
335 list_node_t mlbox_node;
336 mlxcx_dma_buffer_t mlbox_dma;
337 mlxcx_cmd_mailbox_t *mlbox_data;
338 } mlxcx_cmd_mbox_t;
339
340 typedef enum {
341 MLXCX_EQ_ALLOC = 1 << 0, /* dma mem alloc'd, size set */
342 MLXCX_EQ_CREATED = 1 << 1, /* CREATE_EQ sent to hw */
343 MLXCX_EQ_DESTROYED = 1 << 2, /* DESTROY_EQ sent to hw */
344 MLXCX_EQ_ARMED = 1 << 3, /* Armed through the UAR */
345 MLXCX_EQ_POLLING = 1 << 4, /* Currently being polled */
346 MLXCX_EQ_INTR_ENABLED = 1 << 5, /* ddi_intr_enable()'d */
347 MLXCX_EQ_INTR_ACTIVE = 1 << 6, /* 'rupt handler running */
348 MLXCX_EQ_INTR_QUIESCE = 1 << 7, /* 'rupt handler to quiesce */
349 MLXCX_EQ_ATTACHING = 1 << 8, /* mlxcx_attach still running */
350 } mlxcx_eventq_state_t;
351
352 typedef struct mlxcx_bf {
353 kmutex_t mbf_mtx;
354 uint_t mbf_cnt;
355 uint_t mbf_even;
356 uint_t mbf_odd;
357 } mlxcx_bf_t;
358
359 typedef struct mlxcx_uar {
360 boolean_t mlu_allocated;
361 uint_t mlu_num;
362 uint_t mlu_base;
363
364 volatile uint_t mlu_bfcnt;
365 mlxcx_bf_t mlu_bf[MLXCX_BF_PER_UAR];
366 } mlxcx_uar_t;
367
368 typedef struct mlxcx_pd {
369 boolean_t mlpd_allocated;
370 uint32_t mlpd_num;
371 } mlxcx_pd_t;
372
373 typedef struct mlxcx_tdom {
374 boolean_t mltd_allocated;
375 uint32_t mltd_num;
376 } mlxcx_tdom_t;
377
378 typedef enum {
379 MLXCX_PORT_VPORT_PROMISC = 1 << 0,
380 } mlxcx_port_flags_t;
381
382 typedef struct mlxcx_flow_table mlxcx_flow_table_t;
383 typedef struct mlxcx_flow_group mlxcx_flow_group_t;
384
385 typedef struct {
386 uint64_t mlps_rx_drops;
387 } mlxcx_port_stats_t;
388
389 typedef enum {
390 MLXCX_PORT_INIT = 1 << 0
391 } mlxcx_port_init_t;
392
393 struct mlxcx_port {
394 kmutex_t mlp_mtx;
395 mlxcx_port_init_t mlp_init;
396 mlxcx_t *mlp_mlx;
397 /*
398 * The mlp_num we have here starts at zero (it's an index), but the
399 * numbering we have to use for register access starts at 1. We
400 * currently write mlp_num into the other_vport fields in mlxcx_cmd.c
401 * (where 0 is a magic number meaning "my vport") so if we ever add
402 * support for virtualisation features and deal with more than one
403 * vport, we will probably have to change this.
404 */
405 uint_t mlp_num;
406 mlxcx_port_flags_t mlp_flags;
407 uint64_t mlp_guid;
408 uint8_t mlp_mac_address[ETHERADDRL];
409
410 uint_t mlp_mtu;
411 uint_t mlp_max_mtu;
412
413 mlxcx_port_status_t mlp_admin_status;
414 mlxcx_port_status_t mlp_oper_status;
415
416 boolean_t mlp_autoneg;
417 mlxcx_eth_proto_t mlp_max_proto;
418 mlxcx_eth_proto_t mlp_admin_proto;
419 mlxcx_eth_proto_t mlp_oper_proto;
420 mlxcx_ext_eth_proto_t mlp_ext_max_proto;
421 mlxcx_ext_eth_proto_t mlp_ext_admin_proto;
422 mlxcx_ext_eth_proto_t mlp_ext_oper_proto;
423 mlxcx_pplm_fec_active_t mlp_fec_active;
424 link_fec_t mlp_fec_requested;
425
426 mlxcx_eth_inline_mode_t mlp_wqe_min_inline;
427
428 /* Root flow tables */
429 mlxcx_flow_table_t *mlp_rx_flow;
430 mlxcx_flow_table_t *mlp_tx_flow;
431
432 mlxcx_flow_group_t *mlp_promisc;
433 mlxcx_flow_group_t *mlp_bcast;
434 mlxcx_flow_group_t *mlp_umcast;
435
436 avl_tree_t mlp_dmac_fe;
437
438 mlxcx_port_stats_t mlp_stats;
439
440 mlxcx_module_status_t mlp_last_modstate;
441 mlxcx_module_error_type_t mlp_last_moderr;
442
443 mlxcx_async_param_t mlx_port_event;
444 };
445
446 typedef enum {
447 MLXCX_EQ_TYPE_ANY,
448 MLXCX_EQ_TYPE_RX,
449 MLXCX_EQ_TYPE_TX
450 } mlxcx_eventq_type_t;
451
452 /*
453 * mlxcx_event_queue_t is a representation of an event queue (EQ).
454 * There is a 1-1 tie in between an EQ and an interrupt vector, and
455 * knowledge of that effects how some members of the struct are used
456 * and modified.
457 *
458 * Most of the struct members are immmutable except for during set up and
459 * teardown, for those it is safe to access them without a mutex once
460 * the driver is initialized.
461 *
462 * Members which are not immutable and are protected by mleq_mtx are:
463 * * mleq_state - EQ state. Changes during transitions between
464 * polling modes.
465 * * mleq_cq - an AVL tree of completions queues using this EQ.
466 *
467 * Another member which is not immutable is mleq_cc. This is the EQ
468 * consumer counter, it *must* only be incremented in the EQ's interrupt
469 * context. It is also fed back to the hardware during re-arming of
470 * the EQ, again this *must* only happen in the EQ's interrupt context.
471 *
472 * There are a couple of struct members (mleq_check_disarm_cc and
473 * mleq_check_disarm_cnt) which are used to help monitor the health
474 * and consistency of the EQ. They are only used and modified during health
475 * monitoring, which is both infrequent and single threaded, consequently
476 * no mutex guards are needed.
477 *
478 * Care is taken not to use the mleq_mtx when possible, both to avoid
479 * contention in what is "hot" code and avoid breaking requirements
480 * of mac(9E).
481 */
482 typedef struct mlxcx_event_queue {
483 kmutex_t mleq_mtx;
484 kcondvar_t mleq_cv;
485 mlxcx_t *mleq_mlx;
486 mlxcx_eventq_state_t mleq_state;
487 mlxcx_eventq_type_t mleq_type;
488
489 mlxcx_dma_buffer_t mleq_dma;
490
491 size_t mleq_entshift;
492 size_t mleq_nents;
493 mlxcx_eventq_ent_t *mleq_ent;
494 uint32_t mleq_cc; /* consumer counter */
495 uint32_t mleq_cc_armed;
496
497 uint32_t mleq_events;
498
499 uint32_t mleq_badintrs;
500
501 /* Hardware eq number */
502 uint_t mleq_num;
503 /* Index into the mlxcx_t's interrupts array */
504 uint_t mleq_intr_index;
505
506 /* UAR region that has this EQ's doorbell in it */
507 mlxcx_uar_t *mleq_uar;
508
509 /* Tree of CQn => mlxcx_completion_queue_t */
510 avl_tree_t mleq_cqs;
511
512 uint32_t mleq_check_disarm_cc;
513 uint_t mleq_check_disarm_cnt;
514 } mlxcx_event_queue_t;
515
516 typedef enum {
517 MLXCX_TIS_CREATED = 1 << 0,
518 MLXCX_TIS_DESTROYED = 1 << 1,
519 } mlxcx_tis_state_t;
520
521 typedef struct mlxcx_tis {
522 mlxcx_tis_state_t mltis_state;
523 list_node_t mltis_entry;
524 uint_t mltis_num;
525 mlxcx_tdom_t *mltis_tdom;
526 } mlxcx_tis_t;
527
528 typedef enum {
529 MLXCX_BUFFER_INIT,
530 MLXCX_BUFFER_FREE,
531 MLXCX_BUFFER_ON_WQ,
532 MLXCX_BUFFER_ON_LOAN,
533 MLXCX_BUFFER_ON_CHAIN,
534 } mlxcx_buffer_state_t;
535
536 typedef enum {
537 MLXCX_SHARD_READY,
538 MLXCX_SHARD_DRAINING,
539 } mlxcx_shard_state_t;
540
541 typedef struct mlxcx_buf_shard {
542 mlxcx_shard_state_t mlbs_state;
543 list_node_t mlbs_entry;
544 kmutex_t mlbs_mtx;
545 uint64_t mlbs_ntotal;
546 uint64_t mlbs_nloaned;
547 uint64_t mlbs_hiwat1;
548 uint64_t mlbs_hiwat2;
549 list_t mlbs_busy;
550 list_t mlbs_free;
551 list_t mlbs_loaned;
552 kcondvar_t mlbs_free_nonempty;
553 } mlxcx_buf_shard_t;
554
555 typedef struct mlxcx_buffer {
556 mlxcx_buf_shard_t *mlb_shard;
557 list_node_t mlb_entry;
558 list_node_t mlb_cq_entry;
559
560 struct mlxcx_buffer *mlb_tx_head; /* head of tx chain */
561 list_t mlb_tx_chain;
562 list_node_t mlb_tx_chain_entry;
563
564 boolean_t mlb_foreign;
565 size_t mlb_used;
566 mblk_t *mlb_tx_mp;
567
568 /*
569 * The number of work queue basic blocks this buf uses.
570 */
571 uint_t mlb_wqebbs;
572
573 mlxcx_t *mlb_mlx;
574 mlxcx_buffer_state_t mlb_state;
575 uint_t mlb_wqe_index;
576 mlxcx_dma_buffer_t mlb_dma;
577 mblk_t *mlb_mp;
578 frtn_t mlb_frtn;
579 } mlxcx_buffer_t;
580
581 typedef enum {
582 MLXCX_CQ_ALLOC = 1 << 0,
583 MLXCX_CQ_CREATED = 1 << 1,
584 MLXCX_CQ_DESTROYED = 1 << 2,
585 MLXCX_CQ_EQAVL = 1 << 3,
586 MLXCX_CQ_BLOCKED_MAC = 1 << 4,
587 MLXCX_CQ_TEARDOWN = 1 << 5,
588 MLXCX_CQ_POLLING = 1 << 6,
589 MLXCX_CQ_ARMED = 1 << 7,
590 } mlxcx_completionq_state_t;
591
592 typedef struct mlxcx_work_queue mlxcx_work_queue_t;
593
594 typedef struct mlxcx_completion_queue {
595 kmutex_t mlcq_mtx;
596 kmutex_t mlcq_arm_mtx;
597 mlxcx_t *mlcq_mlx;
598 mlxcx_completionq_state_t mlcq_state;
599
600 mlxcx_port_stats_t *mlcq_stats;
601
602 list_node_t mlcq_entry;
603 avl_node_t mlcq_eq_entry;
604
605 uint_t mlcq_num;
606
607 mlxcx_work_queue_t *mlcq_wq;
608 mlxcx_event_queue_t *mlcq_eq;
609
610 /* UAR region that has this CQ's UAR doorbell in it */
611 mlxcx_uar_t *mlcq_uar;
612
613 mlxcx_dma_buffer_t mlcq_dma;
614
615 size_t mlcq_entshift;
616 size_t mlcq_nents;
617 mlxcx_completionq_ent_t *mlcq_ent;
618 uint32_t mlcq_cc; /* consumer counter */
619 uint32_t mlcq_cc_armed; /* cc at last arm */
620 uint32_t mlcq_ec; /* event counter */
621 uint32_t mlcq_ec_armed; /* ec at last arm */
622
623 mlxcx_dma_buffer_t mlcq_doorbell_dma;
624 mlxcx_completionq_doorbell_t *mlcq_doorbell;
625
626 uint64_t mlcq_bufcnt;
627 size_t mlcq_bufhwm;
628 size_t mlcq_buflwm;
629 list_t mlcq_buffers;
630 kmutex_t mlcq_bufbmtx;
631 list_t mlcq_buffers_b;
632
633 uint_t mlcq_check_disarm_cnt;
634 uint64_t mlcq_check_disarm_cc;
635
636 uint_t mlcq_cqemod_period_usec;
637 uint_t mlcq_cqemod_count;
638
639 mac_ring_handle_t mlcq_mac_hdl;
640 uint64_t mlcq_mac_gen;
641
642 boolean_t mlcq_fm_repd_qstate;
643 } mlxcx_completion_queue_t;
644
645 typedef enum {
646 MLXCX_WQ_ALLOC = 1 << 0,
647 MLXCX_WQ_CREATED = 1 << 1,
648 MLXCX_WQ_STARTED = 1 << 2,
649 MLXCX_WQ_DESTROYED = 1 << 3,
650 MLXCX_WQ_TEARDOWN = 1 << 4,
651 MLXCX_WQ_BUFFERS = 1 << 5,
652 MLXCX_WQ_REFILLING = 1 << 6,
653 MLXCX_WQ_BLOCKED_MAC = 1 << 7
654 } mlxcx_workq_state_t;
655
656 typedef enum {
657 MLXCX_WQ_TYPE_SENDQ = 1,
658 MLXCX_WQ_TYPE_RECVQ
659 } mlxcx_workq_type_t;
660
661 typedef struct mlxcx_ring_group mlxcx_ring_group_t;
662
663 struct mlxcx_work_queue {
664 kmutex_t mlwq_mtx;
665 mlxcx_t *mlwq_mlx;
666 mlxcx_workq_type_t mlwq_type;
667 mlxcx_workq_state_t mlwq_state;
668
669 list_node_t mlwq_entry;
670 list_node_t mlwq_group_entry;
671
672 mlxcx_ring_group_t *mlwq_group;
673
674 uint_t mlwq_num;
675
676 mlxcx_completion_queue_t *mlwq_cq;
677 mlxcx_pd_t *mlwq_pd;
678
679 /* Required for send queues */
680 mlxcx_tis_t *mlwq_tis;
681
682 /* UAR region that has this WQ's blueflame buffers in it */
683 mlxcx_uar_t *mlwq_uar;
684
685 mlxcx_dma_buffer_t mlwq_dma;
686
687 mlxcx_eth_inline_mode_t mlwq_inline_mode;
688 size_t mlwq_entshift;
689 size_t mlwq_nents;
690 /* Discriminate based on mwq_type */
691 union {
692 mlxcx_sendq_ent_t *mlwq_send_ent;
693 mlxcx_sendq_extra_ent_t *mlwq_send_extra_ent;
694 mlxcx_recvq_ent_t *mlwq_recv_ent;
695 mlxcx_sendq_bf_t *mlwq_bf_ent;
696 };
697 uint64_t mlwq_pc; /* producer counter */
698
699 uint64_t mlwq_wqebb_used;
700 size_t mlwq_bufhwm;
701 size_t mlwq_buflwm;
702
703 mlxcx_dma_buffer_t mlwq_doorbell_dma;
704 mlxcx_workq_doorbell_t *mlwq_doorbell;
705
706 mlxcx_buf_shard_t *mlwq_bufs;
707 mlxcx_buf_shard_t *mlwq_foreign_bufs;
708
709 taskq_ent_t mlwq_tqe;
710
711 boolean_t mlwq_fm_repd_qstate;
712 };
713
714 #define MLXCX_RQT_MAX_SIZE 64
715
716 typedef enum {
717 MLXCX_RQT_CREATED = 1 << 0,
718 MLXCX_RQT_DESTROYED = 1 << 1,
719 MLXCX_RQT_DIRTY = 1 << 2,
720 } mlxcx_rqtable_state_t;
721
722 typedef struct mlxcx_rqtable {
723 mlxcx_rqtable_state_t mlrqt_state;
724 list_node_t mlrqt_entry;
725 uint_t mlrqt_num;
726
727 size_t mlrqt_max;
728 size_t mlrqt_used;
729
730 size_t mlrqt_rq_size;
731 mlxcx_work_queue_t **mlrqt_rq;
732 } mlxcx_rqtable_t;
733
734 typedef enum {
735 MLXCX_TIR_CREATED = 1 << 0,
736 MLXCX_TIR_DESTROYED = 1 << 1,
737 } mlxcx_tir_state_t;
738
739 typedef struct mlxcx_tir {
740 mlxcx_tir_state_t mltir_state;
741 list_node_t mltir_entry;
742 uint_t mltir_num;
743 mlxcx_tdom_t *mltir_tdom;
744 mlxcx_tir_type_t mltir_type;
745 union {
746 mlxcx_rqtable_t *mltir_rqtable;
747 mlxcx_work_queue_t *mltir_rq;
748 };
749 mlxcx_tir_hash_fn_t mltir_hash_fn;
750 uint8_t mltir_toeplitz_key[40];
751 mlxcx_tir_rx_hash_l3_type_t mltir_l3_type;
752 mlxcx_tir_rx_hash_l4_type_t mltir_l4_type;
753 mlxcx_tir_rx_hash_fields_t mltir_hash_fields;
754 } mlxcx_tir_t;
755
756 typedef enum {
757 MLXCX_FLOW_GROUP_CREATED = 1 << 0,
758 MLXCX_FLOW_GROUP_BUSY = 1 << 1,
759 MLXCX_FLOW_GROUP_DESTROYED = 1 << 2,
760 } mlxcx_flow_group_state_t;
761
762 typedef enum {
763 MLXCX_FLOW_MATCH_SMAC = 1 << 0,
764 MLXCX_FLOW_MATCH_DMAC = 1 << 1,
765 MLXCX_FLOW_MATCH_VLAN = 1 << 2,
766 MLXCX_FLOW_MATCH_VID = 1 << 3,
767 MLXCX_FLOW_MATCH_IP_VER = 1 << 4,
768 MLXCX_FLOW_MATCH_SRCIP = 1 << 5,
769 MLXCX_FLOW_MATCH_DSTIP = 1 << 6,
770 MLXCX_FLOW_MATCH_IP_PROTO = 1 << 7,
771 MLXCX_FLOW_MATCH_SQN = 1 << 8,
772 MLXCX_FLOW_MATCH_VXLAN = 1 << 9,
773 } mlxcx_flow_mask_t;
774
775 struct mlxcx_flow_group {
776 list_node_t mlfg_entry;
777 list_node_t mlfg_role_entry;
778 mlxcx_flow_group_state_t mlfg_state;
779 mlxcx_flow_table_t *mlfg_table;
780 uint_t mlfg_num;
781 size_t mlfg_start_idx;
782 size_t mlfg_size;
783 size_t mlfg_avail;
784 list_t mlfg_entries;
785 mlxcx_flow_mask_t mlfg_mask;
786 };
787
788 typedef enum {
789 MLXCX_FLOW_ENTRY_RESERVED = 1 << 0,
790 MLXCX_FLOW_ENTRY_CREATED = 1 << 1,
791 MLXCX_FLOW_ENTRY_DELETED = 1 << 2,
792 MLXCX_FLOW_ENTRY_DIRTY = 1 << 3,
793 } mlxcx_flow_entry_state_t;
794
795 typedef struct {
796 mlxcx_tir_t *mlfed_tir;
797 mlxcx_flow_table_t *mlfed_flow;
798 } mlxcx_flow_entry_dest_t;
799
800 typedef struct mlxcx_flow_entry {
801 list_node_t mlfe_group_entry;
802 avl_node_t mlfe_dmac_entry;
803 mlxcx_flow_entry_state_t mlfe_state;
804 mlxcx_flow_table_t *mlfe_table;
805 mlxcx_flow_group_t *mlfe_group;
806 uint_t mlfe_index;
807
808 mlxcx_flow_action_t mlfe_action;
809
810 /* Criteria for match */
811 uint8_t mlfe_smac[ETHERADDRL];
812 uint8_t mlfe_dmac[ETHERADDRL];
813
814 mlxcx_vlan_type_t mlfe_vlan_type;
815 uint16_t mlfe_vid;
816
817 uint_t mlfe_ip_version;
818 uint8_t mlfe_srcip[IPV6_ADDR_LEN];
819 uint8_t mlfe_dstip[IPV6_ADDR_LEN];
820
821 uint_t mlfe_ip_proto;
822 uint16_t mlfe_sport;
823 uint16_t mlfe_dport;
824
825 uint32_t mlfe_sqn;
826 uint32_t mlfe_vxlan_vni;
827
828 /* Destinations */
829 size_t mlfe_ndest;
830 mlxcx_flow_entry_dest_t mlfe_dest[MLXCX_FLOW_MAX_DESTINATIONS];
831
832 /*
833 * mlxcx_group_mac_ts joining this entry to N ring groups
834 * only used by FEs on the root rx flow table
835 */
836 list_t mlfe_ring_groups;
837 } mlxcx_flow_entry_t;
838
839 typedef enum {
840 MLXCX_FLOW_TABLE_CREATED = 1 << 0,
841 MLXCX_FLOW_TABLE_DESTROYED = 1 << 1,
842 MLXCX_FLOW_TABLE_ROOT = 1 << 2
843 } mlxcx_flow_table_state_t;
844
845 struct mlxcx_flow_table {
846 kmutex_t mlft_mtx;
847 mlxcx_flow_table_state_t mlft_state;
848 uint_t mlft_level;
849 uint_t mlft_num;
850 mlxcx_flow_table_type_t mlft_type;
851
852 mlxcx_port_t *mlft_port;
853
854 size_t mlft_entshift;
855 size_t mlft_nents;
856
857 size_t mlft_entsize;
858 mlxcx_flow_entry_t *mlft_ent;
859
860 /* First entry not yet claimed by a group */
861 size_t mlft_next_ent;
862
863 list_t mlft_groups;
864 };
865
866 typedef enum {
867 MLXCX_GROUP_RX,
868 MLXCX_GROUP_TX
869 } mlxcx_group_type_t;
870
871 typedef enum {
872 MLXCX_GROUP_INIT = 1 << 0,
873 MLXCX_GROUP_WQS = 1 << 1,
874 MLXCX_GROUP_TIRTIS = 1 << 2,
875 MLXCX_GROUP_FLOWS = 1 << 3,
876 MLXCX_GROUP_RUNNING = 1 << 4,
877 MLXCX_GROUP_RQT = 1 << 5,
878 } mlxcx_group_state_t;
879
880 #define MLXCX_RX_HASH_FT_SIZE_SHIFT 4
881
882 typedef enum {
883 MLXCX_TIR_ROLE_IPv4 = 0,
884 MLXCX_TIR_ROLE_IPv6,
885 MLXCX_TIR_ROLE_TCPv4,
886 MLXCX_TIR_ROLE_TCPv6,
887 MLXCX_TIR_ROLE_UDPv4,
888 MLXCX_TIR_ROLE_UDPv6,
889 MLXCX_TIR_ROLE_OTHER,
890
891 MLXCX_TIRS_PER_GROUP
892 } mlxcx_tir_role_t;
893
894 typedef struct {
895 avl_node_t mlgm_group_entry;
896 list_node_t mlgm_fe_entry;
897 mlxcx_ring_group_t *mlgm_group;
898 uint8_t mlgm_mac[6];
899 mlxcx_flow_entry_t *mlgm_fe;
900 } mlxcx_group_mac_t;
901
902 typedef struct {
903 list_node_t mlgv_entry;
904 boolean_t mlgv_tagged;
905 uint16_t mlgv_vid;
906 mlxcx_flow_entry_t *mlgv_fe;
907 } mlxcx_group_vlan_t;
908
909 struct mlxcx_ring_group {
910 kmutex_t mlg_mtx;
911 mlxcx_t *mlg_mlx;
912 mlxcx_group_state_t mlg_state;
913 mlxcx_group_type_t mlg_type;
914
915 mac_group_handle_t mlg_mac_hdl;
916
917 union {
918 mlxcx_tis_t mlg_tis;
919 mlxcx_tir_t mlg_tir[MLXCX_TIRS_PER_GROUP];
920 };
921 mlxcx_port_t *mlg_port;
922
923 size_t mlg_nwqs;
924 size_t mlg_wqs_size;
925 mlxcx_work_queue_t *mlg_wqs;
926
927 mlxcx_rqtable_t *mlg_rqt;
928
929 /*
930 * Flow table for matching VLAN IDs
931 */
932 mlxcx_flow_table_t *mlg_rx_vlan_ft;
933 mlxcx_flow_group_t *mlg_rx_vlan_fg;
934 mlxcx_flow_group_t *mlg_rx_vlan_def_fg;
935 mlxcx_flow_group_t *mlg_rx_vlan_promisc_fg;
936 list_t mlg_rx_vlans;
937
938 taskq_t *mlg_refill_tq;
939
940 /*
941 * Flow table for separating out by protocol before hashing
942 */
943 mlxcx_flow_table_t *mlg_rx_hash_ft;
944
945 /*
946 * Links to flow entries on the root flow table which are pointing to
947 * our rx_vlan_ft.
948 */
949 avl_tree_t mlg_rx_macs;
950 };
951
952 typedef enum mlxcx_cmd_state {
953 MLXCX_CMD_S_DONE = 1 << 0,
954 MLXCX_CMD_S_ERROR = 1 << 1
955 } mlxcx_cmd_state_t;
956
957 struct mlxcx_cmd {
958 struct mlxcx *mlcmd_mlxp;
959 kmutex_t mlcmd_lock;
960 kcondvar_t mlcmd_cv;
961
962 boolean_t mlcmd_poll;
963 uint8_t mlcmd_token;
964 mlxcx_cmd_op_t mlcmd_op;
965
966 /*
967 * Command data and extended mailboxes for responses.
968 */
969 const void *mlcmd_in;
970 uint32_t mlcmd_inlen;
971 void *mlcmd_out;
972 uint32_t mlcmd_outlen;
973 list_t mlcmd_mbox_in;
974 uint8_t mlcmd_nboxes_in;
975 list_t mlcmd_mbox_out;
976 uint8_t mlcmd_nboxes_out;
977 /*
978 * Status information.
979 */
980 mlxcx_cmd_state_t mlcmd_state;
981 uint8_t mlcmd_status;
982 };
983
984 /*
985 * Our view of capabilities.
986 */
987 typedef struct mlxcx_hca_cap {
988 mlxcx_hca_cap_mode_t mhc_mode;
989 mlxcx_hca_cap_type_t mhc_type;
990 union {
991 uint8_t mhc_bulk[MLXCX_HCA_CAP_SIZE];
992 mlxcx_hca_cap_general_caps_t mhc_general;
993 mlxcx_hca_cap_eth_caps_t mhc_eth;
994 mlxcx_hca_cap_flow_caps_t mhc_flow;
995 };
996 } mlxcx_hca_cap_t;
997
998 typedef struct {
999 /* Cooked values */
1000 boolean_t mlc_checksum;
1001 boolean_t mlc_lso;
1002 boolean_t mlc_vxlan;
1003 boolean_t mlc_pcam;
1004 boolean_t mlc_ext_ptys;
1005 size_t mlc_max_lso_size;
1006 size_t mlc_max_rqt_size;
1007
1008 size_t mlc_max_rx_ft_shift;
1009 size_t mlc_max_rx_fe_dest;
1010 size_t mlc_max_rx_flows;
1011 size_t mlc_max_rx_ft;
1012
1013 size_t mlc_max_tir;
1014
1015 /* Raw caps data */
1016 mlxcx_hca_cap_t mlc_hca_cur;
1017 mlxcx_hca_cap_t mlc_hca_max;
1018 mlxcx_hca_cap_t mlc_ether_cur;
1019 mlxcx_hca_cap_t mlc_ether_max;
1020 mlxcx_hca_cap_t mlc_nic_flow_cur;
1021 mlxcx_hca_cap_t mlc_nic_flow_max;
1022 } mlxcx_caps_t;
1023
1024 typedef struct {
1025 uint_t mldp_eq_size_shift;
1026 uint_t mldp_cq_size_shift;
1027 uint_t mldp_cq_size_shift_default;
1028 uint_t mldp_rq_size_shift;
1029 uint_t mldp_rq_size_shift_default;
1030 uint_t mldp_sq_size_shift;
1031 uint_t mldp_sq_size_shift_default;
1032 uint_t mldp_cqemod_period_usec;
1033 uint_t mldp_cqemod_count;
1034 uint_t mldp_intrmod_period_usec;
1035 uint_t mldp_rx_ngroups_large;
1036 uint_t mldp_rx_ngroups_small;
1037 uint_t mldp_rx_nrings_per_large_group;
1038 uint_t mldp_rx_nrings_per_small_group;
1039 uint_t mldp_rx_per_cq;
1040 uint_t mldp_tx_ngroups;
1041 uint_t mldp_tx_nrings_per_group;
1042 uint_t mldp_ftbl_root_size_shift;
1043 size_t mldp_tx_bind_threshold;
1044 uint_t mldp_ftbl_vlan_size_shift;
1045 uint64_t mldp_eq_check_interval_sec;
1046 uint64_t mldp_cq_check_interval_sec;
1047 uint64_t mldp_wq_check_interval_sec;
1048 uint_t mldp_rx_p50_loan_min_size;
1049 } mlxcx_drv_props_t;
1050
1051 typedef struct {
1052 mlxcx_t *mlts_mlx;
1053 uint8_t mlts_index;
1054 id_t mlts_ksensor;
1055 int16_t mlts_value;
1056 int16_t mlts_max_value;
1057 uint8_t mlts_name[MLXCX_MTMP_NAMELEN];
1058 } mlxcx_temp_sensor_t;
1059
1060 /*
1061 * The oldest card supported by this driver is ConnectX-4. So far (at least),
1062 * newer models tend to just add features vs. replacing them, so it seems
1063 * reasonable to assume an unknown model likely supports everything the
1064 * ConnectX-6 cards do.
1065 */
1066 typedef enum {
1067 MLXCX_DEV_CX4 = 0,
1068 MLXCX_DEV_CX5 = 1,
1069 MLXCX_DEV_CX6 = 2,
1070 MLXCX_DEV_UNKNOWN = 3,
1071 } mlxcx_dev_type_t;
1072
1073 typedef enum {
1074 MLXCX_ATTACH_FM = 1 << 0,
1075 MLXCX_ATTACH_PCI_CONFIG = 1 << 1,
1076 MLXCX_ATTACH_REGS = 1 << 2,
1077 MLXCX_ATTACH_CMD = 1 << 3,
1078 MLXCX_ATTACH_ENABLE_HCA = 1 << 4,
1079 MLXCX_ATTACH_PAGE_LIST = 1 << 5,
1080 MLXCX_ATTACH_INIT_HCA = 1 << 6,
1081 MLXCX_ATTACH_UAR_PD_TD = 1 << 7,
1082 MLXCX_ATTACH_INTRS = 1 << 8,
1083 MLXCX_ATTACH_PORTS = 1 << 9,
1084 MLXCX_ATTACH_MAC_HDL = 1 << 10,
1085 MLXCX_ATTACH_CQS = 1 << 11,
1086 MLXCX_ATTACH_WQS = 1 << 12,
1087 MLXCX_ATTACH_GROUPS = 1 << 13,
1088 MLXCX_ATTACH_BUFS = 1 << 14,
1089 MLXCX_ATTACH_CAPS = 1 << 15,
1090 MLXCX_ATTACH_CHKTIMERS = 1 << 16,
1091 MLXCX_ATTACH_ASYNC_TQ = 1 << 17,
1092 MLXCX_ATTACH_SENSORS = 1 << 18
1093 } mlxcx_attach_progress_t;
1094
1095 struct mlxcx {
1096 /* entry on the mlxcx_glist */
1097 list_node_t mlx_gentry;
1098
1099 dev_info_t *mlx_dip;
1100 int mlx_inst;
1101 mlxcx_attach_progress_t mlx_attach;
1102
1103 mlxcx_dev_type_t mlx_type;
1104 mlxcx_drv_props_t mlx_props;
1105
1106 /*
1107 * Misc. data
1108 */
1109 uint16_t mlx_fw_maj;
1110 uint16_t mlx_fw_min;
1111 uint16_t mlx_fw_rev;
1112 uint16_t mlx_cmd_rev;
1113
1114 /*
1115 * Various capabilities of hardware.
1116 */
1117 mlxcx_caps_t *mlx_caps;
1118
1119 uint_t mlx_max_sdu;
1120 uint_t mlx_sdu;
1121
1122 /*
1123 * FM State
1124 */
1125 int mlx_fm_caps;
1126
1127 /*
1128 * PCI Data
1129 */
1130 ddi_acc_handle_t mlx_cfg_handle;
1131 ddi_acc_handle_t mlx_regs_handle;
1132 caddr_t mlx_regs_base;
1133
1134 /*
1135 * MAC handle
1136 */
1137 mac_handle_t mlx_mac_hdl;
1138
1139 /*
1140 * Main command queue for issuing general FW control commands.
1141 */
1142 mlxcx_cmd_queue_t mlx_cmd;
1143
1144 /*
1145 * Interrupts
1146 */
1147 uint_t mlx_intr_pri;
1148 uint_t mlx_async_intr_pri;
1149 uint_t mlx_intr_type; /* always MSI-X */
1150 int mlx_intr_count;
1151 size_t mlx_intr_size; /* allocation size */
1152 int mlx_intr_cq0;
1153 ddi_intr_handle_t *mlx_intr_handles;
1154
1155 /*
1156 * Basic firmware resources which we use for a variety of things.
1157 * The UAR is a reference to a page where CQ and EQ doorbells are
1158 * located. It also holds all the BlueFlame stuff (which we don't
1159 * use).
1160 */
1161 mlxcx_uar_t mlx_uar;
1162 /*
1163 * The PD (Protection Domain) and TDOM (Transport Domain) are opaque
1164 * entities to us (they're Infiniband constructs we don't actually care
1165 * about) -- we just allocate them and shove their ID numbers in
1166 * whenever we're asked for one.
1167 *
1168 * The "reserved" LKEY is what we should put in queue entries that
1169 * have references to memory to indicate that they're using linear
1170 * addresses (comes from the QUERY_SPECIAL_CONTEXTS cmd).
1171 */
1172 mlxcx_pd_t mlx_pd;
1173 mlxcx_tdom_t mlx_tdom;
1174 uint_t mlx_rsvd_lkey;
1175
1176 /*
1177 * Our event queues. These are 1:1 with interrupts.
1178 */
1179 size_t mlx_eqs_size; /* allocation size */
1180 mlxcx_event_queue_t *mlx_eqs;
1181
1182 /*
1183 * Page list. These represent the set of 4k pages we've given to
1184 * hardware.
1185 *
1186 * We can add to this list at the request of hardware from interrupt
1187 * context (the PAGE_REQUEST event), so it's protected by pagemtx.
1188 */
1189 kmutex_t mlx_pagemtx;
1190 uint_t mlx_npages;
1191 avl_tree_t mlx_pages;
1192
1193 mlxcx_async_param_t mlx_npages_req[MLXCX_FUNC_ID_MAX + 1];
1194
1195 /*
1196 * Taskq for processing asynchronous events which may issue
1197 * commands to the HCA.
1198 */
1199 taskq_t *mlx_async_tq;
1200
1201 /*
1202 * Port state
1203 */
1204 uint_t mlx_nports;
1205 size_t mlx_ports_size;
1206 mlxcx_port_t *mlx_ports;
1207
1208 /*
1209 * Completion queues (CQs). These are also indexed off the
1210 * event_queue_ts that they each report to.
1211 */
1212 list_t mlx_cqs;
1213
1214 uint_t mlx_next_eq;
1215
1216 /*
1217 * Work queues (WQs).
1218 */
1219 list_t mlx_wqs;
1220
1221 /*
1222 * Ring groups
1223 */
1224 size_t mlx_rx_ngroups;
1225 size_t mlx_rx_groups_size;
1226 mlxcx_ring_group_t *mlx_rx_groups;
1227
1228 size_t mlx_tx_ngroups;
1229 size_t mlx_tx_groups_size;
1230 mlxcx_ring_group_t *mlx_tx_groups;
1231
1232 kmem_cache_t *mlx_bufs_cache;
1233 list_t mlx_buf_shards;
1234
1235 ddi_periodic_t mlx_eq_checktimer;
1236 ddi_periodic_t mlx_cq_checktimer;
1237 ddi_periodic_t mlx_wq_checktimer;
1238
1239 /*
1240 * Sensors
1241 */
1242 uint8_t mlx_temp_nsensors;
1243 mlxcx_temp_sensor_t *mlx_temp_sensors;
1244 };
1245
1246 /*
1247 * Register access
1248 */
1249 extern uint16_t mlxcx_get16(mlxcx_t *, uintptr_t);
1250 extern uint32_t mlxcx_get32(mlxcx_t *, uintptr_t);
1251 extern uint64_t mlxcx_get64(mlxcx_t *, uintptr_t);
1252
1253 extern void mlxcx_put32(mlxcx_t *, uintptr_t, uint32_t);
1254 extern void mlxcx_put64(mlxcx_t *, uintptr_t, uint64_t);
1255
1256 extern void mlxcx_uar_put32(mlxcx_t *, mlxcx_uar_t *, uintptr_t, uint32_t);
1257 extern void mlxcx_uar_put64(mlxcx_t *, mlxcx_uar_t *, uintptr_t, uint64_t);
1258
1259 /*
1260 * Logging functions.
1261 */
1262 extern void mlxcx_warn(mlxcx_t *, const char *, ...);
1263 extern void mlxcx_note(mlxcx_t *, const char *, ...);
1264 extern void mlxcx_panic(mlxcx_t *, const char *, ...);
1265
1266 extern void mlxcx_fm_ereport(mlxcx_t *, const char *);
1267
1268 extern void mlxcx_check_sq(mlxcx_t *, mlxcx_work_queue_t *);
1269 extern void mlxcx_check_rq(mlxcx_t *, mlxcx_work_queue_t *);
1270
1271 /*
1272 * DMA Functions
1273 */
1274 extern void mlxcx_dma_free(mlxcx_dma_buffer_t *);
1275 extern boolean_t mlxcx_dma_alloc(mlxcx_t *, mlxcx_dma_buffer_t *,
1276 ddi_dma_attr_t *, ddi_device_acc_attr_t *, boolean_t, size_t, boolean_t);
1277 extern boolean_t mlxcx_dma_init(mlxcx_t *, mlxcx_dma_buffer_t *,
1278 ddi_dma_attr_t *, boolean_t);
1279 extern boolean_t mlxcx_dma_bind_mblk(mlxcx_t *, mlxcx_dma_buffer_t *,
1280 const mblk_t *, size_t, boolean_t);
1281 extern boolean_t mlxcx_dma_alloc_offset(mlxcx_t *, mlxcx_dma_buffer_t *,
1282 ddi_dma_attr_t *, ddi_device_acc_attr_t *, boolean_t,
1283 size_t, size_t, boolean_t);
1284 extern void mlxcx_dma_unbind(mlxcx_t *, mlxcx_dma_buffer_t *);
1285 extern void mlxcx_dma_acc_attr(mlxcx_t *, ddi_device_acc_attr_t *);
1286 extern void mlxcx_dma_page_attr(mlxcx_t *, ddi_dma_attr_t *);
1287 extern void mlxcx_dma_queue_attr(mlxcx_t *, ddi_dma_attr_t *);
1288 extern void mlxcx_dma_qdbell_attr(mlxcx_t *, ddi_dma_attr_t *);
1289 extern void mlxcx_dma_buf_attr(mlxcx_t *, ddi_dma_attr_t *);
1290
1291 extern boolean_t mlxcx_give_pages(mlxcx_t *, int32_t, int32_t *);
1292
1293 static inline const ddi_dma_cookie_t *
mlxcx_dma_cookie_iter(const mlxcx_dma_buffer_t * db,const ddi_dma_cookie_t * prev)1294 mlxcx_dma_cookie_iter(const mlxcx_dma_buffer_t *db,
1295 const ddi_dma_cookie_t *prev)
1296 {
1297 ASSERT(db->mxdb_flags & MLXCX_DMABUF_BOUND);
1298 return (ddi_dma_cookie_iter(db->mxdb_dma_handle, prev));
1299 }
1300
1301 static inline const ddi_dma_cookie_t *
mlxcx_dma_cookie_one(const mlxcx_dma_buffer_t * db)1302 mlxcx_dma_cookie_one(const mlxcx_dma_buffer_t *db)
1303 {
1304 ASSERT(db->mxdb_flags & MLXCX_DMABUF_BOUND);
1305 return (ddi_dma_cookie_one(db->mxdb_dma_handle));
1306 }
1307
1308 /*
1309 * From mlxcx_intr.c
1310 */
1311 extern boolean_t mlxcx_intr_setup(mlxcx_t *);
1312 extern void mlxcx_intr_disable(mlxcx_t *);
1313 extern void mlxcx_intr_teardown(mlxcx_t *);
1314 extern void mlxcx_arm_eq(mlxcx_t *, mlxcx_event_queue_t *);
1315 extern void mlxcx_arm_cq(mlxcx_t *, mlxcx_completion_queue_t *);
1316 extern void mlxcx_update_cqci(mlxcx_t *, mlxcx_completion_queue_t *);
1317
1318 extern mblk_t *mlxcx_rx_poll(mlxcx_t *, mlxcx_completion_queue_t *, size_t);
1319
1320 /*
1321 * From mlxcx_gld.c
1322 */
1323 extern boolean_t mlxcx_register_mac(mlxcx_t *);
1324
1325 /*
1326 * From mlxcx_ring.c
1327 */
1328 extern boolean_t mlxcx_wq_alloc_dma(mlxcx_t *, mlxcx_work_queue_t *);
1329 extern void mlxcx_wq_rele_dma(mlxcx_t *, mlxcx_work_queue_t *);
1330
1331 extern boolean_t mlxcx_buf_create(mlxcx_t *, mlxcx_buf_shard_t *,
1332 mlxcx_buffer_t **);
1333 extern boolean_t mlxcx_buf_create_foreign(mlxcx_t *, mlxcx_buf_shard_t *,
1334 mlxcx_buffer_t **);
1335 extern mlxcx_buffer_t *mlxcx_buf_take(mlxcx_t *, mlxcx_work_queue_t *);
1336 extern size_t mlxcx_buf_take_n(mlxcx_t *, mlxcx_work_queue_t *,
1337 mlxcx_buffer_t **, size_t);
1338 extern boolean_t mlxcx_buf_loan(mlxcx_t *, mlxcx_buffer_t *);
1339 extern void mlxcx_buf_return(mlxcx_t *, mlxcx_buffer_t *);
1340 extern void mlxcx_buf_return_chain(mlxcx_t *, mlxcx_buffer_t *, boolean_t);
1341 extern void mlxcx_buf_destroy(mlxcx_t *, mlxcx_buffer_t *);
1342 extern void mlxcx_shard_ready(mlxcx_buf_shard_t *);
1343 extern void mlxcx_shard_draining(mlxcx_buf_shard_t *);
1344
1345 extern uint_t mlxcx_buf_bind_or_copy(mlxcx_t *, mlxcx_work_queue_t *,
1346 mblk_t *, size_t, mlxcx_buffer_t **);
1347
1348 extern boolean_t mlxcx_rx_group_setup(mlxcx_t *, mlxcx_ring_group_t *);
1349 extern boolean_t mlxcx_tx_group_setup(mlxcx_t *, mlxcx_ring_group_t *);
1350
1351 extern boolean_t mlxcx_rx_group_start(mlxcx_t *, mlxcx_ring_group_t *);
1352 extern boolean_t mlxcx_tx_ring_start(mlxcx_t *, mlxcx_ring_group_t *,
1353 mlxcx_work_queue_t *);
1354 extern boolean_t mlxcx_rx_ring_start(mlxcx_t *, mlxcx_ring_group_t *,
1355 mlxcx_work_queue_t *);
1356
1357 extern boolean_t mlxcx_rq_add_buffer(mlxcx_t *, mlxcx_work_queue_t *,
1358 mlxcx_buffer_t *);
1359 extern boolean_t mlxcx_rq_add_buffers(mlxcx_t *, mlxcx_work_queue_t *,
1360 mlxcx_buffer_t **, size_t);
1361 extern boolean_t mlxcx_sq_add_buffer(mlxcx_t *, mlxcx_work_queue_t *,
1362 uint8_t *, size_t, uint32_t, mlxcx_buffer_t *);
1363 extern boolean_t mlxcx_sq_add_nop(mlxcx_t *, mlxcx_work_queue_t *);
1364 extern void mlxcx_rq_refill(mlxcx_t *, mlxcx_work_queue_t *);
1365
1366 extern void mlxcx_teardown_groups(mlxcx_t *);
1367 extern void mlxcx_wq_teardown(mlxcx_t *, mlxcx_work_queue_t *);
1368 extern void mlxcx_cq_teardown(mlxcx_t *, mlxcx_completion_queue_t *);
1369 extern void mlxcx_teardown_rx_group(mlxcx_t *, mlxcx_ring_group_t *);
1370 extern void mlxcx_teardown_tx_group(mlxcx_t *, mlxcx_ring_group_t *);
1371
1372 extern void mlxcx_tx_completion(mlxcx_t *, mlxcx_completion_queue_t *,
1373 mlxcx_completionq_ent_t *, mlxcx_buffer_t *);
1374 extern mblk_t *mlxcx_rx_completion(mlxcx_t *, mlxcx_completion_queue_t *,
1375 mlxcx_completionq_ent_t *, mlxcx_buffer_t *);
1376
1377 extern mlxcx_buf_shard_t *mlxcx_mlbs_create(mlxcx_t *);
1378
1379 /*
1380 * Flow mgmt
1381 */
1382 extern boolean_t mlxcx_add_umcast_entry(mlxcx_t *, mlxcx_port_t *,
1383 mlxcx_ring_group_t *, const uint8_t *);
1384 extern boolean_t mlxcx_remove_umcast_entry(mlxcx_t *, mlxcx_port_t *,
1385 mlxcx_ring_group_t *, const uint8_t *);
1386 extern void mlxcx_remove_all_umcast_entries(mlxcx_t *, mlxcx_port_t *,
1387 mlxcx_ring_group_t *);
1388 extern boolean_t mlxcx_setup_flow_group(mlxcx_t *, mlxcx_flow_table_t *,
1389 mlxcx_flow_group_t *);
1390 extern void mlxcx_teardown_flow_table(mlxcx_t *, mlxcx_flow_table_t *);
1391
1392 extern void mlxcx_remove_all_vlan_entries(mlxcx_t *, mlxcx_ring_group_t *);
1393 extern boolean_t mlxcx_remove_vlan_entry(mlxcx_t *, mlxcx_ring_group_t *,
1394 boolean_t, uint16_t);
1395 extern boolean_t mlxcx_add_vlan_entry(mlxcx_t *, mlxcx_ring_group_t *,
1396 boolean_t, uint16_t);
1397
1398 /*
1399 * Command functions
1400 */
1401 extern boolean_t mlxcx_cmd_queue_init(mlxcx_t *);
1402 extern void mlxcx_cmd_queue_fini(mlxcx_t *);
1403
1404 extern void mlxcx_cmd_completion(mlxcx_t *, mlxcx_eventq_ent_t *);
1405 extern void mlxcx_cmd_eq_enable(mlxcx_t *);
1406 extern void mlxcx_cmd_eq_disable(mlxcx_t *);
1407
1408 extern boolean_t mlxcx_cmd_enable_hca(mlxcx_t *);
1409 extern boolean_t mlxcx_cmd_disable_hca(mlxcx_t *);
1410
1411 extern boolean_t mlxcx_cmd_query_issi(mlxcx_t *, uint_t *);
1412 extern boolean_t mlxcx_cmd_set_issi(mlxcx_t *, uint16_t);
1413
1414 extern boolean_t mlxcx_cmd_query_pages(mlxcx_t *, uint_t, int32_t *);
1415 extern boolean_t mlxcx_cmd_give_pages(mlxcx_t *, uint_t, int32_t,
1416 mlxcx_dev_page_t **);
1417 extern boolean_t mlxcx_cmd_return_pages(mlxcx_t *, int32_t, uint64_t *,
1418 int32_t *);
1419
1420 extern boolean_t mlxcx_cmd_query_hca_cap(mlxcx_t *, mlxcx_hca_cap_type_t,
1421 mlxcx_hca_cap_mode_t, mlxcx_hca_cap_t *);
1422
1423 extern boolean_t mlxcx_cmd_set_driver_version(mlxcx_t *, const char *);
1424
1425 extern boolean_t mlxcx_cmd_init_hca(mlxcx_t *);
1426 extern boolean_t mlxcx_cmd_teardown_hca(mlxcx_t *);
1427
1428 extern boolean_t mlxcx_cmd_alloc_uar(mlxcx_t *, mlxcx_uar_t *);
1429 extern boolean_t mlxcx_cmd_dealloc_uar(mlxcx_t *, mlxcx_uar_t *);
1430
1431 extern boolean_t mlxcx_cmd_alloc_pd(mlxcx_t *, mlxcx_pd_t *);
1432 extern boolean_t mlxcx_cmd_dealloc_pd(mlxcx_t *, mlxcx_pd_t *);
1433
1434 extern boolean_t mlxcx_cmd_alloc_tdom(mlxcx_t *, mlxcx_tdom_t *);
1435 extern boolean_t mlxcx_cmd_dealloc_tdom(mlxcx_t *, mlxcx_tdom_t *);
1436
1437 extern boolean_t mlxcx_cmd_create_eq(mlxcx_t *, mlxcx_event_queue_t *);
1438 extern boolean_t mlxcx_cmd_destroy_eq(mlxcx_t *, mlxcx_event_queue_t *);
1439 extern boolean_t mlxcx_cmd_query_eq(mlxcx_t *, mlxcx_event_queue_t *,
1440 mlxcx_eventq_ctx_t *);
1441
1442 extern boolean_t mlxcx_cmd_create_cq(mlxcx_t *, mlxcx_completion_queue_t *);
1443 extern boolean_t mlxcx_cmd_destroy_cq(mlxcx_t *, mlxcx_completion_queue_t *);
1444 extern boolean_t mlxcx_cmd_query_cq(mlxcx_t *, mlxcx_completion_queue_t *,
1445 mlxcx_completionq_ctx_t *);
1446
1447 extern boolean_t mlxcx_cmd_create_rq(mlxcx_t *, mlxcx_work_queue_t *);
1448 extern boolean_t mlxcx_cmd_start_rq(mlxcx_t *, mlxcx_work_queue_t *);
1449 extern boolean_t mlxcx_cmd_stop_rq(mlxcx_t *, mlxcx_work_queue_t *);
1450 extern boolean_t mlxcx_cmd_destroy_rq(mlxcx_t *, mlxcx_work_queue_t *);
1451 extern boolean_t mlxcx_cmd_query_rq(mlxcx_t *, mlxcx_work_queue_t *,
1452 mlxcx_rq_ctx_t *);
1453
1454 extern boolean_t mlxcx_cmd_create_tir(mlxcx_t *, mlxcx_tir_t *);
1455 extern boolean_t mlxcx_cmd_destroy_tir(mlxcx_t *, mlxcx_tir_t *);
1456
1457 extern boolean_t mlxcx_cmd_create_sq(mlxcx_t *, mlxcx_work_queue_t *);
1458 extern boolean_t mlxcx_cmd_start_sq(mlxcx_t *, mlxcx_work_queue_t *);
1459 extern boolean_t mlxcx_cmd_stop_sq(mlxcx_t *, mlxcx_work_queue_t *);
1460 extern boolean_t mlxcx_cmd_destroy_sq(mlxcx_t *, mlxcx_work_queue_t *);
1461 extern boolean_t mlxcx_cmd_query_sq(mlxcx_t *, mlxcx_work_queue_t *,
1462 mlxcx_sq_ctx_t *);
1463
1464 extern boolean_t mlxcx_cmd_create_tis(mlxcx_t *, mlxcx_tis_t *);
1465 extern boolean_t mlxcx_cmd_destroy_tis(mlxcx_t *, mlxcx_tis_t *);
1466
1467 extern boolean_t mlxcx_cmd_query_nic_vport_ctx(mlxcx_t *, mlxcx_port_t *);
1468 extern boolean_t mlxcx_cmd_query_special_ctxs(mlxcx_t *);
1469
1470 extern boolean_t mlxcx_cmd_modify_nic_vport_ctx(mlxcx_t *, mlxcx_port_t *,
1471 mlxcx_modify_nic_vport_ctx_fields_t);
1472
1473 extern boolean_t mlxcx_cmd_create_flow_table(mlxcx_t *, mlxcx_flow_table_t *);
1474 extern boolean_t mlxcx_cmd_destroy_flow_table(mlxcx_t *, mlxcx_flow_table_t *);
1475 extern boolean_t mlxcx_cmd_set_flow_table_root(mlxcx_t *, mlxcx_flow_table_t *);
1476
1477 extern boolean_t mlxcx_cmd_create_flow_group(mlxcx_t *, mlxcx_flow_group_t *);
1478 extern boolean_t mlxcx_cmd_set_flow_table_entry(mlxcx_t *,
1479 mlxcx_flow_entry_t *);
1480 extern boolean_t mlxcx_cmd_delete_flow_table_entry(mlxcx_t *,
1481 mlxcx_flow_entry_t *);
1482 extern boolean_t mlxcx_cmd_destroy_flow_group(mlxcx_t *, mlxcx_flow_group_t *);
1483
1484 extern boolean_t mlxcx_cmd_access_register(mlxcx_t *, mlxcx_cmd_reg_opmod_t,
1485 mlxcx_register_id_t, mlxcx_register_data_t *);
1486 extern boolean_t mlxcx_cmd_query_port_mtu(mlxcx_t *, mlxcx_port_t *);
1487 extern boolean_t mlxcx_cmd_query_port_status(mlxcx_t *, mlxcx_port_t *);
1488 extern boolean_t mlxcx_cmd_modify_port_status(mlxcx_t *, mlxcx_port_t *,
1489 mlxcx_port_status_t);
1490 extern boolean_t mlxcx_cmd_query_port_speed(mlxcx_t *, mlxcx_port_t *);
1491 extern boolean_t mlxcx_cmd_query_port_fec(mlxcx_t *, mlxcx_port_t *);
1492 extern boolean_t mlxcx_cmd_modify_port_fec(mlxcx_t *, mlxcx_port_t *,
1493 mlxcx_pplm_fec_caps_t);
1494
1495 extern boolean_t mlxcx_cmd_set_port_mtu(mlxcx_t *, mlxcx_port_t *);
1496
1497 extern boolean_t mlxcx_cmd_create_rqt(mlxcx_t *, mlxcx_rqtable_t *);
1498 extern boolean_t mlxcx_cmd_destroy_rqt(mlxcx_t *, mlxcx_rqtable_t *);
1499
1500 extern boolean_t mlxcx_cmd_set_int_mod(mlxcx_t *, uint_t, uint_t);
1501
1502 extern boolean_t mlxcx_cmd_query_module_status(mlxcx_t *, uint_t,
1503 mlxcx_module_status_t *, mlxcx_module_error_type_t *);
1504 extern boolean_t mlxcx_cmd_set_port_led(mlxcx_t *, mlxcx_port_t *, uint16_t);
1505
1506 /* Comparator for avl_ts */
1507 extern int mlxcx_cq_compare(const void *, const void *);
1508 extern int mlxcx_dmac_fe_compare(const void *, const void *);
1509 extern int mlxcx_grmac_compare(const void *, const void *);
1510 extern int mlxcx_page_compare(const void *, const void *);
1511
1512 extern void mlxcx_update_link_state(mlxcx_t *, mlxcx_port_t *);
1513
1514 extern void mlxcx_eth_proto_to_string(mlxcx_eth_proto_t, mlxcx_ext_eth_proto_t,
1515 char *, size_t);
1516 extern const char *mlxcx_port_status_string(mlxcx_port_status_t);
1517
1518 extern const char *mlxcx_event_name(mlxcx_event_t);
1519
1520 /*
1521 * Sensor Functions
1522 */
1523 extern boolean_t mlxcx_setup_sensors(mlxcx_t *);
1524 extern void mlxcx_teardown_sensors(mlxcx_t *);
1525
1526 #ifdef __cplusplus
1527 }
1528 #endif
1529
1530 #endif /* _MLXCX_H */
1531