xref: /illumos-gate/usr/src/uts/common/io/mlxcx/mlxcx.h (revision b0709259972397f63fd4534cac354c6a6810045d)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2021, The University of Queensland
14  * Copyright (c) 2018, Joyent, Inc.
15  * Copyright 2020 RackTop Systems, Inc.
16  * Copyright 2023 MNX Cloud, Inc.
17  */
18 
19 /*
20  * Mellanox Connect-X 4/5/6 driver.
21  *
22  * More details in mlxcx.c
23  */
24 
25 #ifndef _MLXCX_H
26 #define	_MLXCX_H
27 
28 /*
29  * mlxcx(4D) defintions
30  */
31 
32 #include <sys/ddi.h>
33 #include <sys/sunddi.h>
34 #include <sys/ddifm.h>
35 #include <sys/id_space.h>
36 #include <sys/list.h>
37 #include <sys/taskq_impl.h>
38 #include <sys/stddef.h>
39 #include <sys/stream.h>
40 #include <sys/strsun.h>
41 #include <sys/mac_provider.h>
42 #include <sys/mac_ether.h>
43 #include <sys/cpuvar.h>
44 #include <sys/ethernet.h>
45 
46 #include <inet/ip.h>
47 #include <inet/ip6.h>
48 
49 #include <sys/ddifm.h>
50 #include <sys/fm/protocol.h>
51 #include <sys/fm/util.h>
52 #include <sys/fm/io/ddi.h>
53 
54 #include <mlxcx_reg.h>
55 
56 #ifdef __cplusplus
57 extern "C" {
58 #endif
59 
60 /*
61  * Get access to the first PCI BAR.
62  */
63 #define	MLXCX_REG_NUMBER		1
64 
65 /*
66  * The command queue is supposed to be a page, which is 4k.
67  */
68 #define	MLXCX_CMD_DMA_PAGE_SIZE		4096
69 
70 /*
71  * Queues can allocate in units of this much memory.
72  */
73 #define	MLXCX_QUEUE_DMA_PAGE_SIZE	4096
74 
75 /*
76  * We advertise two sizes of groups to MAC -- a certain number of "large"
77  * groups (including the default group, which is sized to at least ncpus)
78  * followed by a certain number of "small" groups.
79  *
80  * This allows us to have a larger amount of classification resources available
81  * for zones/VMs without resorting to software classification.
82  */
83 #define	MLXCX_RX_NGROUPS_LARGE_DFLT		2
84 #define	MLXCX_RX_NRINGS_PER_LARGE_GROUP_DFLT	16
85 #define	MLXCX_RX_NGROUPS_SMALL_DFLT		256
86 #define	MLXCX_RX_NRINGS_PER_SMALL_GROUP_DFLT	4
87 
88 #define	MLXCX_TX_NGROUPS_DFLT		1
89 #define	MLXCX_TX_NRINGS_PER_GROUP_DFLT	64
90 
91 /*
92  * Queues will be sized to (1 << *Q_SIZE_SHIFT) entries long.
93  */
94 #define	MLXCX_EQ_SIZE_SHIFT_DFLT	9
95 
96 /*
97  * The CQ, SQ and RQ sizes can effect throughput on higher speed interfaces.
98  * EQ less so, as it only takes a single EQ entry to indicate there are
99  * multiple completions on the CQ.
100  *
101  * Particularly on the Rx side, the RQ (and corresponding CQ) would run
102  * low on available entries. A symptom of this is the refill taskq running
103  * frequently. A larger RQ (and CQ) alleviates this, and as there is a
104  * close relationship between SQ and CQ size, the SQ is increased too.
105  */
106 #define	MLXCX_CQ_SIZE_SHIFT_DFLT	10
107 #define	MLXCX_CQ_SIZE_SHIFT_25G		12
108 
109 /*
110  * Default to making SQs bigger than RQs for 9k MTU, since most packets will
111  * spill over into more than one slot. RQ WQEs are always 1 slot.
112  */
113 #define	MLXCX_SQ_SIZE_SHIFT_DFLT	11
114 #define	MLXCX_SQ_SIZE_SHIFT_25G		13
115 
116 #define	MLXCX_RQ_SIZE_SHIFT_DFLT	10
117 #define	MLXCX_RQ_SIZE_SHIFT_25G		12
118 
119 #define	MLXCX_CQ_HWM_GAP		16
120 #define	MLXCX_CQ_LWM_GAP		24
121 
122 #define	MLXCX_WQ_HWM_GAP		MLXCX_CQ_HWM_GAP
123 #define	MLXCX_WQ_LWM_GAP		MLXCX_CQ_LWM_GAP
124 
125 #define	MLXCX_RQ_REFILL_STEP		64
126 
127 /*
128  * CQ event moderation
129  */
130 #define	MLXCX_CQEMOD_PERIOD_USEC_DFLT	50
131 #define	MLXCX_CQEMOD_COUNT_DFLT		\
132 	(8 * ((1 << MLXCX_CQ_SIZE_SHIFT_DFLT) / 10))
133 
134 /*
135  * EQ interrupt moderation
136  */
137 #define	MLXCX_INTRMOD_PERIOD_USEC_DFLT	10
138 
139 /* Size of root flow tables */
140 #define	MLXCX_FTBL_ROOT_SIZE_SHIFT_DFLT		12
141 
142 /* Size of 2nd level flow tables for VLAN filtering */
143 #define	MLXCX_FTBL_VLAN_SIZE_SHIFT_DFLT		4
144 
145 /*
146  * How big does an mblk have to be before we dma_bind() it instead of
147  * bcopying?
148  */
149 #define	MLXCX_TX_BIND_THRESHOLD_DFLT	2048
150 
151 /*
152  * How often to check the status of completion queues for overflow and
153  * other problems.
154  */
155 #define	MLXCX_WQ_CHECK_INTERVAL_SEC_DFLT		300
156 #define	MLXCX_CQ_CHECK_INTERVAL_SEC_DFLT		300
157 #define	MLXCX_EQ_CHECK_INTERVAL_SEC_DFLT		30
158 
159 /*
160  * After this many packets, the packets received so far are passed to
161  * the mac layer.
162  */
163 #define	MLXCX_RX_PER_CQ_DEFAULT			256
164 #define	MLXCX_RX_PER_CQ_MIN			16
165 #define	MLXCX_RX_PER_CQ_MAX			4096
166 
167 #define	MLXCX_DOORBELL_TRIES_DFLT		3
168 extern uint_t mlxcx_doorbell_tries;
169 
170 #define	MLXCX_STUCK_INTR_COUNT_DFLT		128
171 extern uint_t mlxcx_stuck_intr_count;
172 
173 #define	MLXCX_BUF_BIND_MAX_ATTEMTPS		50
174 
175 #define	MLXCX_MTU_OFFSET	\
176 	(sizeof (struct ether_vlan_header) + ETHERFCSL)
177 
178 /*
179  * This is the current version of the command structure that the driver expects
180  * to be found in the ISS.
181  */
182 #define	MLXCX_CMD_REVISION	5
183 
184 #ifdef	DEBUG
185 #define	MLXCX_DMA_SYNC(dma, flag)	VERIFY0(ddi_dma_sync( \
186 					    (dma).mxdb_dma_handle, 0, 0, \
187 					    (flag)))
188 #else
189 #define	MLXCX_DMA_SYNC(dma, flag)	(void) ddi_dma_sync( \
190 					    (dma).mxdb_dma_handle, 0, 0, \
191 					    (flag))
192 #endif
193 
194 #define	MLXCX_FM_SERVICE_MLXCX	"mlxcx"
195 
196 /*
197  * This macro defines the expected value of the 'Interface Step Sequence ID'
198  * (issi) which represents the version of the start up and tear down sequence.
199  * We must check that hardware supports this and tell it which version we're
200  * using as well.
201  */
202 #define	MLXCX_CURRENT_ISSI	1
203 
204 /*
205  * This is the size of a page that the hardware expects from us when
206  * manipulating pages.
207  */
208 #define	MLXCX_HW_PAGE_SIZE	4096
209 
210 /*
211  * This is a special lkey value used to terminate a list of scatter pointers.
212  */
213 #define	MLXCX_NULL_LKEY		0x100
214 
215 /*
216  * The max function id we support in manage pages requests.
217  * At the moment we only support/expect func 0 from manage pages, but
218  * structures and code are in place to support any number.
219  */
220 #define	MLXCX_FUNC_ID_MAX	0
221 
222 /*
223  * Forwards
224  */
225 struct mlxcx;
226 typedef struct mlxcx mlxcx_t;
227 typedef struct mlxcx_cmd mlxcx_cmd_t;
228 typedef struct mlxcx_port mlxcx_port_t;
229 
230 typedef struct {
231 	mlxcx_t		*mlp_mlx;
232 	int32_t		mlp_npages;
233 	uint16_t	mlp_func;
234 } mlxcx_pages_request_t;
235 
236 typedef struct mlxcx_async_param {
237 	mlxcx_t		*mla_mlx;
238 	taskq_ent_t	mla_tqe;
239 	boolean_t	mla_pending;
240 	kmutex_t	mla_mtx;
241 
242 	/*
243 	 * Parameters specific to the function dispatched.
244 	 */
245 	union {
246 		void			*mla_arg;
247 		mlxcx_pages_request_t	mla_pages;
248 		mlxcx_port_t		*mla_port;
249 	};
250 } mlxcx_async_param_t;
251 
252 typedef enum {
253 	MLXCX_DMABUF_HDL_ALLOC		= 1 << 0,
254 	MLXCX_DMABUF_MEM_ALLOC		= 1 << 1,
255 	MLXCX_DMABUF_BOUND		= 1 << 2,
256 	MLXCX_DMABUF_FOREIGN		= 1 << 3,
257 } mlxcx_dma_buffer_flags_t;
258 
259 typedef struct mlxcx_dma_buffer {
260 	mlxcx_dma_buffer_flags_t	mxdb_flags;
261 	caddr_t				mxdb_va;	/* Buffer VA */
262 	size_t				mxdb_len;	/* Buffer logical len */
263 	ddi_acc_handle_t		mxdb_acc_handle;
264 	ddi_dma_handle_t		mxdb_dma_handle;
265 	uint_t				mxdb_ncookies;
266 } mlxcx_dma_buffer_t;
267 
268 typedef struct mlxcx_dev_page {
269 	list_node_t		mxdp_list;
270 	avl_node_t		mxdp_tree;
271 	uintptr_t		mxdp_pa;
272 	mlxcx_dma_buffer_t	mxdp_dma;
273 } mlxcx_dev_page_t;
274 
275 /*
276  * Data structure to keep track of all information related to the command queue.
277  */
278 typedef enum {
279 	MLXCX_CMD_QUEUE_S_IDLE = 1,
280 	MLXCX_CMD_QUEUE_S_BUSY,
281 	MLXCX_CMD_QUEUE_S_BROKEN
282 } mlxcx_cmd_queue_status_t;
283 
284 typedef struct mlxcx_cmd_queue {
285 	kmutex_t		mcmd_lock;
286 	kcondvar_t		mcmd_cv;
287 	mlxcx_dma_buffer_t	mcmd_dma;
288 
289 	boolean_t		mcmd_polled;
290 
291 	uint8_t			mcmd_size_l2;
292 	uint8_t			mcmd_stride_l2;
293 	uint_t			mcmd_size;
294 	/*
295 	 * The mask has a bit for each command slot, there are a maximum
296 	 * of 32 slots. When the bit is set in the mask, it indicates
297 	 * the slot is available.
298 	 */
299 	uint32_t		mcmd_mask;
300 
301 	mlxcx_cmd_t		*mcmd_active[MLXCX_CMD_MAX];
302 
303 	ddi_taskq_t		*mcmd_taskq;
304 	id_space_t		*mcmd_tokens;
305 } mlxcx_cmd_queue_t;
306 
307 typedef struct mlxcd_cmd_mbox {
308 	list_node_t		mlbox_node;
309 	mlxcx_dma_buffer_t	mlbox_dma;
310 	mlxcx_cmd_mailbox_t	*mlbox_data;
311 } mlxcx_cmd_mbox_t;
312 
313 typedef enum {
314 	MLXCX_EQ_ALLOC		= 1 << 0,	/* dma mem alloc'd, size set */
315 	MLXCX_EQ_CREATED	= 1 << 1,	/* CREATE_EQ sent to hw */
316 	MLXCX_EQ_DESTROYED	= 1 << 2,	/* DESTROY_EQ sent to hw */
317 	MLXCX_EQ_ARMED		= 1 << 3,	/* Armed through the UAR */
318 	MLXCX_EQ_POLLING	= 1 << 4,	/* Currently being polled */
319 	MLXCX_EQ_INTR_ENABLED	= 1 << 5,	/* ddi_intr_enable()'d */
320 	MLXCX_EQ_INTR_ACTIVE	= 1 << 6,	/* 'rupt handler running */
321 	MLXCX_EQ_INTR_QUIESCE	= 1 << 7,	/* 'rupt handler to quiesce */
322 	MLXCX_EQ_ATTACHING	= 1 << 8,	/* mlxcx_attach still running */
323 } mlxcx_eventq_state_t;
324 
325 typedef struct mlxcx_bf {
326 	kmutex_t		mbf_mtx;
327 	uint_t			mbf_cnt;
328 	uint_t			mbf_even;
329 	uint_t			mbf_odd;
330 } mlxcx_bf_t;
331 
332 typedef struct mlxcx_uar {
333 	boolean_t		mlu_allocated;
334 	uint_t			mlu_num;
335 	uint_t			mlu_base;
336 
337 	volatile uint_t		mlu_bfcnt;
338 	mlxcx_bf_t		mlu_bf[MLXCX_BF_PER_UAR];
339 } mlxcx_uar_t;
340 
341 typedef struct mlxcx_pd {
342 	boolean_t		mlpd_allocated;
343 	uint32_t		mlpd_num;
344 } mlxcx_pd_t;
345 
346 typedef struct mlxcx_tdom {
347 	boolean_t		mltd_allocated;
348 	uint32_t		mltd_num;
349 } mlxcx_tdom_t;
350 
351 typedef enum {
352 	MLXCX_PORT_VPORT_PROMISC	= 1 << 0,
353 } mlxcx_port_flags_t;
354 
355 typedef struct mlxcx_flow_table mlxcx_flow_table_t;
356 typedef struct mlxcx_flow_group mlxcx_flow_group_t;
357 
358 typedef struct {
359 	uint64_t		mlps_rx_drops;
360 } mlxcx_port_stats_t;
361 
362 typedef enum {
363 	MLXCX_PORT_INIT		= 1 << 0
364 } mlxcx_port_init_t;
365 
366 struct mlxcx_port {
367 	kmutex_t		mlp_mtx;
368 	mlxcx_port_init_t	mlp_init;
369 	mlxcx_t			*mlp_mlx;
370 	/*
371 	 * The mlp_num we have here starts at zero (it's an index), but the
372 	 * numbering we have to use for register access starts at 1. We
373 	 * currently write mlp_num into the other_vport fields in mlxcx_cmd.c
374 	 * (where 0 is a magic number meaning "my vport") so if we ever add
375 	 * support for virtualisation features and deal with more than one
376 	 * vport, we will probably have to change this.
377 	 */
378 	uint_t			mlp_num;
379 	mlxcx_port_flags_t	mlp_flags;
380 	uint64_t		mlp_guid;
381 	uint8_t			mlp_mac_address[ETHERADDRL];
382 
383 	uint_t			mlp_mtu;
384 	uint_t			mlp_max_mtu;
385 
386 	mlxcx_port_status_t	mlp_admin_status;
387 	mlxcx_port_status_t	mlp_oper_status;
388 
389 	boolean_t		mlp_autoneg;
390 	mlxcx_eth_proto_t	mlp_max_proto;
391 	mlxcx_eth_proto_t	mlp_admin_proto;
392 	mlxcx_eth_proto_t	mlp_oper_proto;
393 	mlxcx_ext_eth_proto_t	mlp_ext_max_proto;
394 	mlxcx_ext_eth_proto_t	mlp_ext_admin_proto;
395 	mlxcx_ext_eth_proto_t	mlp_ext_oper_proto;
396 	mlxcx_pplm_fec_active_t	mlp_fec_active;
397 	link_fec_t		mlp_fec_requested;
398 
399 	mlxcx_eth_inline_mode_t	mlp_wqe_min_inline;
400 
401 	/* Root flow tables */
402 	mlxcx_flow_table_t	*mlp_rx_flow;
403 	mlxcx_flow_table_t	*mlp_tx_flow;
404 
405 	mlxcx_flow_group_t	*mlp_promisc;
406 	mlxcx_flow_group_t	*mlp_bcast;
407 	mlxcx_flow_group_t	*mlp_umcast;
408 
409 	avl_tree_t		mlp_dmac_fe;
410 
411 	mlxcx_port_stats_t	mlp_stats;
412 
413 	mlxcx_module_status_t	mlp_last_modstate;
414 	mlxcx_module_error_type_t	mlp_last_moderr;
415 
416 	mlxcx_async_param_t	mlx_port_event;
417 };
418 
419 typedef enum {
420 	MLXCX_EQ_TYPE_ANY,
421 	MLXCX_EQ_TYPE_RX,
422 	MLXCX_EQ_TYPE_TX
423 } mlxcx_eventq_type_t;
424 
425 /*
426  * mlxcx_event_queue_t is a representation of an event queue (EQ).
427  * There is a 1-1 tie in between an EQ and an interrupt vector, and
428  * knowledge of that effects how some members of the struct are used
429  * and modified.
430  *
431  * Most of the struct members are immmutable except for during set up and
432  * teardown, for those it is safe to access them without a mutex once
433  * the driver is initialized.
434  *
435  * Members which are not immutable and are protected by mleq_mtx are:
436  *	* mleq_state - EQ state. Changes during transitions between
437  *		       polling modes.
438  *	* mleq_cq - an AVL tree of completions queues using this EQ.
439  *
440  * Another member which is not immutable is mleq_cc. This is the EQ
441  * consumer counter, it *must* only be incremented in the EQ's interrupt
442  * context. It is also fed back to the hardware during re-arming of
443  * the EQ, again this *must* only happen in the EQ's interrupt context.
444  *
445  * There are a couple of struct members (mleq_check_disarm_cc and
446  * mleq_check_disarm_cnt) which are used to help monitor the health
447  * and consistency of the EQ. They are only used and modified during health
448  * monitoring, which is both infrequent and single threaded, consequently
449  * no mutex guards are needed.
450  *
451  * Care is taken not to use the mleq_mtx when possible, both to avoid
452  * contention in what is "hot" code and avoid breaking requirements
453  * of mac(9E).
454  */
455 typedef struct mlxcx_event_queue {
456 	kmutex_t		mleq_mtx;
457 	kcondvar_t		mleq_cv;
458 	mlxcx_t			*mleq_mlx;
459 	mlxcx_eventq_state_t	mleq_state;
460 	mlxcx_eventq_type_t	mleq_type;
461 
462 	mlxcx_dma_buffer_t	mleq_dma;
463 
464 	size_t			mleq_entshift;
465 	size_t			mleq_nents;
466 	mlxcx_eventq_ent_t	*mleq_ent;
467 	uint32_t		mleq_cc;	/* consumer counter */
468 	uint32_t		mleq_cc_armed;
469 
470 	uint32_t		mleq_events;
471 
472 	uint32_t		mleq_badintrs;
473 
474 	/* Hardware eq number */
475 	uint_t			mleq_num;
476 	/* Index into the mlxcx_t's interrupts array */
477 	uint_t			mleq_intr_index;
478 
479 	/* UAR region that has this EQ's doorbell in it */
480 	mlxcx_uar_t		*mleq_uar;
481 
482 	/* Tree of CQn => mlxcx_completion_queue_t */
483 	avl_tree_t		mleq_cqs;
484 
485 	uint32_t		mleq_check_disarm_cc;
486 	uint_t			mleq_check_disarm_cnt;
487 } mlxcx_event_queue_t;
488 
489 typedef enum {
490 	MLXCX_TIS_CREATED		= 1 << 0,
491 	MLXCX_TIS_DESTROYED		= 1 << 1,
492 } mlxcx_tis_state_t;
493 
494 typedef struct mlxcx_tis {
495 	mlxcx_tis_state_t		mltis_state;
496 	list_node_t			mltis_entry;
497 	uint_t				mltis_num;
498 	mlxcx_tdom_t			*mltis_tdom;
499 } mlxcx_tis_t;
500 
501 typedef enum {
502 	MLXCX_BUFFER_INIT,
503 	MLXCX_BUFFER_FREE,
504 	MLXCX_BUFFER_ON_WQ,
505 	MLXCX_BUFFER_ON_LOAN,
506 	MLXCX_BUFFER_ON_CHAIN,
507 } mlxcx_buffer_state_t;
508 
509 typedef enum {
510 	MLXCX_SHARD_READY,
511 	MLXCX_SHARD_DRAINING,
512 } mlxcx_shard_state_t;
513 
514 typedef struct mlxcx_buf_shard {
515 	mlxcx_shard_state_t	mlbs_state;
516 	list_node_t		mlbs_entry;
517 	kmutex_t		mlbs_mtx;
518 	list_t			mlbs_busy;
519 	list_t			mlbs_free;
520 	list_t			mlbs_loaned;
521 	kcondvar_t		mlbs_free_nonempty;
522 } mlxcx_buf_shard_t;
523 
524 typedef struct mlxcx_buffer {
525 	mlxcx_buf_shard_t	*mlb_shard;
526 	list_node_t		mlb_entry;
527 	list_node_t		mlb_cq_entry;
528 
529 	struct mlxcx_buffer	*mlb_tx_head;	/* head of tx chain */
530 	list_t			mlb_tx_chain;
531 	list_node_t		mlb_tx_chain_entry;
532 
533 	boolean_t		mlb_foreign;
534 	size_t			mlb_used;
535 	mblk_t			*mlb_tx_mp;
536 
537 	/*
538 	 * The number of work queue basic blocks this buf uses.
539 	 */
540 	uint_t			mlb_wqebbs;
541 
542 	mlxcx_t			*mlb_mlx;
543 	mlxcx_buffer_state_t	mlb_state;
544 	uint_t			mlb_wqe_index;
545 	mlxcx_dma_buffer_t	mlb_dma;
546 	mblk_t			*mlb_mp;
547 	frtn_t			mlb_frtn;
548 } mlxcx_buffer_t;
549 
550 typedef enum {
551 	MLXCX_CQ_ALLOC		= 1 << 0,
552 	MLXCX_CQ_CREATED	= 1 << 1,
553 	MLXCX_CQ_DESTROYED	= 1 << 2,
554 	MLXCX_CQ_EQAVL		= 1 << 3,
555 	MLXCX_CQ_BLOCKED_MAC	= 1 << 4,
556 	MLXCX_CQ_TEARDOWN	= 1 << 5,
557 	MLXCX_CQ_POLLING	= 1 << 6,
558 	MLXCX_CQ_ARMED		= 1 << 7,
559 } mlxcx_completionq_state_t;
560 
561 typedef struct mlxcx_work_queue mlxcx_work_queue_t;
562 
563 typedef struct mlxcx_completion_queue {
564 	kmutex_t			mlcq_mtx;
565 	kmutex_t			mlcq_arm_mtx;
566 	mlxcx_t				*mlcq_mlx;
567 	mlxcx_completionq_state_t	mlcq_state;
568 
569 	mlxcx_port_stats_t		*mlcq_stats;
570 
571 	list_node_t			mlcq_entry;
572 	avl_node_t			mlcq_eq_entry;
573 
574 	uint_t				mlcq_num;
575 
576 	mlxcx_work_queue_t		*mlcq_wq;
577 	mlxcx_event_queue_t		*mlcq_eq;
578 
579 	/* UAR region that has this CQ's UAR doorbell in it */
580 	mlxcx_uar_t			*mlcq_uar;
581 
582 	mlxcx_dma_buffer_t		mlcq_dma;
583 
584 	size_t				mlcq_entshift;
585 	size_t				mlcq_nents;
586 	mlxcx_completionq_ent_t		*mlcq_ent;
587 	uint32_t			mlcq_cc;	/* consumer counter */
588 	uint32_t			mlcq_cc_armed;	/* cc at last arm */
589 	uint32_t			mlcq_ec;	/* event counter */
590 	uint32_t			mlcq_ec_armed;	/* ec at last arm */
591 
592 	mlxcx_dma_buffer_t		mlcq_doorbell_dma;
593 	mlxcx_completionq_doorbell_t	*mlcq_doorbell;
594 
595 	uint64_t			mlcq_bufcnt;
596 	size_t				mlcq_bufhwm;
597 	size_t				mlcq_buflwm;
598 	list_t				mlcq_buffers;
599 	kmutex_t			mlcq_bufbmtx;
600 	list_t				mlcq_buffers_b;
601 
602 	uint_t				mlcq_check_disarm_cnt;
603 	uint64_t			mlcq_check_disarm_cc;
604 
605 	uint_t				mlcq_cqemod_period_usec;
606 	uint_t				mlcq_cqemod_count;
607 
608 	mac_ring_handle_t		mlcq_mac_hdl;
609 	uint64_t			mlcq_mac_gen;
610 
611 	boolean_t			mlcq_fm_repd_qstate;
612 } mlxcx_completion_queue_t;
613 
614 typedef enum {
615 	MLXCX_WQ_ALLOC		= 1 << 0,
616 	MLXCX_WQ_CREATED	= 1 << 1,
617 	MLXCX_WQ_STARTED	= 1 << 2,
618 	MLXCX_WQ_DESTROYED	= 1 << 3,
619 	MLXCX_WQ_TEARDOWN	= 1 << 4,
620 	MLXCX_WQ_BUFFERS	= 1 << 5,
621 	MLXCX_WQ_REFILLING	= 1 << 6,
622 	MLXCX_WQ_BLOCKED_MAC	= 1 << 7
623 } mlxcx_workq_state_t;
624 
625 typedef enum {
626 	MLXCX_WQ_TYPE_SENDQ = 1,
627 	MLXCX_WQ_TYPE_RECVQ
628 } mlxcx_workq_type_t;
629 
630 typedef struct mlxcx_ring_group mlxcx_ring_group_t;
631 
632 struct mlxcx_work_queue {
633 	kmutex_t			mlwq_mtx;
634 	mlxcx_t				*mlwq_mlx;
635 	mlxcx_workq_type_t		mlwq_type;
636 	mlxcx_workq_state_t		mlwq_state;
637 
638 	list_node_t			mlwq_entry;
639 	list_node_t			mlwq_group_entry;
640 
641 	mlxcx_ring_group_t		*mlwq_group;
642 
643 	uint_t				mlwq_num;
644 
645 	mlxcx_completion_queue_t	*mlwq_cq;
646 	mlxcx_pd_t			*mlwq_pd;
647 
648 	/* Required for send queues */
649 	mlxcx_tis_t			*mlwq_tis;
650 
651 	/* UAR region that has this WQ's blueflame buffers in it */
652 	mlxcx_uar_t			*mlwq_uar;
653 
654 	mlxcx_dma_buffer_t		mlwq_dma;
655 
656 	mlxcx_eth_inline_mode_t		mlwq_inline_mode;
657 	size_t				mlwq_entshift;
658 	size_t				mlwq_nents;
659 	/* Discriminate based on mwq_type */
660 	union {
661 		mlxcx_sendq_ent_t	*mlwq_send_ent;
662 		mlxcx_sendq_extra_ent_t	*mlwq_send_extra_ent;
663 		mlxcx_recvq_ent_t	*mlwq_recv_ent;
664 		mlxcx_sendq_bf_t	*mlwq_bf_ent;
665 	};
666 	uint64_t			mlwq_pc;	/* producer counter */
667 
668 	uint64_t			mlwq_wqebb_used;
669 	size_t				mlwq_bufhwm;
670 	size_t				mlwq_buflwm;
671 
672 	mlxcx_dma_buffer_t		mlwq_doorbell_dma;
673 	mlxcx_workq_doorbell_t		*mlwq_doorbell;
674 
675 	mlxcx_buf_shard_t		*mlwq_bufs;
676 	mlxcx_buf_shard_t		*mlwq_foreign_bufs;
677 
678 	taskq_ent_t			mlwq_tqe;
679 
680 	boolean_t			mlwq_fm_repd_qstate;
681 };
682 
683 #define	MLXCX_RQT_MAX_SIZE		64
684 
685 typedef enum {
686 	MLXCX_RQT_CREATED		= 1 << 0,
687 	MLXCX_RQT_DESTROYED		= 1 << 1,
688 	MLXCX_RQT_DIRTY			= 1 << 2,
689 } mlxcx_rqtable_state_t;
690 
691 typedef struct mlxcx_rqtable {
692 	mlxcx_rqtable_state_t		mlrqt_state;
693 	list_node_t			mlrqt_entry;
694 	uint_t				mlrqt_num;
695 
696 	size_t				mlrqt_max;
697 	size_t				mlrqt_used;
698 
699 	size_t				mlrqt_rq_size;
700 	mlxcx_work_queue_t		**mlrqt_rq;
701 } mlxcx_rqtable_t;
702 
703 typedef enum {
704 	MLXCX_TIR_CREATED		= 1 << 0,
705 	MLXCX_TIR_DESTROYED		= 1 << 1,
706 } mlxcx_tir_state_t;
707 
708 typedef struct mlxcx_tir {
709 	mlxcx_tir_state_t		mltir_state;
710 	list_node_t			mltir_entry;
711 	uint_t				mltir_num;
712 	mlxcx_tdom_t			*mltir_tdom;
713 	mlxcx_tir_type_t		mltir_type;
714 	union {
715 		mlxcx_rqtable_t			*mltir_rqtable;
716 		mlxcx_work_queue_t		*mltir_rq;
717 	};
718 	mlxcx_tir_hash_fn_t		mltir_hash_fn;
719 	uint8_t				mltir_toeplitz_key[40];
720 	mlxcx_tir_rx_hash_l3_type_t	mltir_l3_type;
721 	mlxcx_tir_rx_hash_l4_type_t	mltir_l4_type;
722 	mlxcx_tir_rx_hash_fields_t	mltir_hash_fields;
723 } mlxcx_tir_t;
724 
725 typedef enum {
726 	MLXCX_FLOW_GROUP_CREATED	= 1 << 0,
727 	MLXCX_FLOW_GROUP_BUSY		= 1 << 1,
728 	MLXCX_FLOW_GROUP_DESTROYED	= 1 << 2,
729 } mlxcx_flow_group_state_t;
730 
731 typedef enum {
732 	MLXCX_FLOW_MATCH_SMAC		= 1 << 0,
733 	MLXCX_FLOW_MATCH_DMAC		= 1 << 1,
734 	MLXCX_FLOW_MATCH_VLAN		= 1 << 2,
735 	MLXCX_FLOW_MATCH_VID		= 1 << 3,
736 	MLXCX_FLOW_MATCH_IP_VER		= 1 << 4,
737 	MLXCX_FLOW_MATCH_SRCIP		= 1 << 5,
738 	MLXCX_FLOW_MATCH_DSTIP		= 1 << 6,
739 	MLXCX_FLOW_MATCH_IP_PROTO	= 1 << 7,
740 	MLXCX_FLOW_MATCH_SQN		= 1 << 8,
741 	MLXCX_FLOW_MATCH_VXLAN		= 1 << 9,
742 } mlxcx_flow_mask_t;
743 
744 struct mlxcx_flow_group {
745 	list_node_t			mlfg_entry;
746 	list_node_t			mlfg_role_entry;
747 	mlxcx_flow_group_state_t	mlfg_state;
748 	mlxcx_flow_table_t		*mlfg_table;
749 	uint_t				mlfg_num;
750 	size_t				mlfg_start_idx;
751 	size_t				mlfg_size;
752 	size_t				mlfg_avail;
753 	list_t				mlfg_entries;
754 	mlxcx_flow_mask_t		mlfg_mask;
755 };
756 
757 typedef enum {
758 	MLXCX_FLOW_ENTRY_RESERVED	= 1 << 0,
759 	MLXCX_FLOW_ENTRY_CREATED	= 1 << 1,
760 	MLXCX_FLOW_ENTRY_DELETED	= 1 << 2,
761 	MLXCX_FLOW_ENTRY_DIRTY		= 1 << 3,
762 } mlxcx_flow_entry_state_t;
763 
764 typedef struct {
765 	mlxcx_tir_t			*mlfed_tir;
766 	mlxcx_flow_table_t		*mlfed_flow;
767 } mlxcx_flow_entry_dest_t;
768 
769 typedef struct mlxcx_flow_entry {
770 	list_node_t			mlfe_group_entry;
771 	avl_node_t			mlfe_dmac_entry;
772 	mlxcx_flow_entry_state_t	mlfe_state;
773 	mlxcx_flow_table_t		*mlfe_table;
774 	mlxcx_flow_group_t		*mlfe_group;
775 	uint_t				mlfe_index;
776 
777 	mlxcx_flow_action_t		mlfe_action;
778 
779 	/* Criteria for match */
780 	uint8_t				mlfe_smac[ETHERADDRL];
781 	uint8_t				mlfe_dmac[ETHERADDRL];
782 
783 	mlxcx_vlan_type_t		mlfe_vlan_type;
784 	uint16_t			mlfe_vid;
785 
786 	uint_t				mlfe_ip_version;
787 	uint8_t				mlfe_srcip[IPV6_ADDR_LEN];
788 	uint8_t				mlfe_dstip[IPV6_ADDR_LEN];
789 
790 	uint_t				mlfe_ip_proto;
791 	uint16_t			mlfe_sport;
792 	uint16_t			mlfe_dport;
793 
794 	uint32_t			mlfe_sqn;
795 	uint32_t			mlfe_vxlan_vni;
796 
797 	/* Destinations */
798 	size_t				mlfe_ndest;
799 	mlxcx_flow_entry_dest_t		mlfe_dest[MLXCX_FLOW_MAX_DESTINATIONS];
800 
801 	/*
802 	 * mlxcx_group_mac_ts joining this entry to N ring groups
803 	 * only used by FEs on the root rx flow table
804 	 */
805 	list_t				mlfe_ring_groups;
806 } mlxcx_flow_entry_t;
807 
808 typedef enum {
809 	MLXCX_FLOW_TABLE_CREATED	= 1 << 0,
810 	MLXCX_FLOW_TABLE_DESTROYED	= 1 << 1,
811 	MLXCX_FLOW_TABLE_ROOT		= 1 << 2
812 } mlxcx_flow_table_state_t;
813 
814 struct mlxcx_flow_table {
815 	kmutex_t			mlft_mtx;
816 	mlxcx_flow_table_state_t	mlft_state;
817 	uint_t				mlft_level;
818 	uint_t				mlft_num;
819 	mlxcx_flow_table_type_t		mlft_type;
820 
821 	mlxcx_port_t			*mlft_port;
822 
823 	size_t				mlft_entshift;
824 	size_t				mlft_nents;
825 
826 	size_t				mlft_entsize;
827 	mlxcx_flow_entry_t		*mlft_ent;
828 
829 	/* First entry not yet claimed by a group */
830 	size_t				mlft_next_ent;
831 
832 	list_t				mlft_groups;
833 };
834 
835 typedef enum {
836 	MLXCX_GROUP_RX,
837 	MLXCX_GROUP_TX
838 } mlxcx_group_type_t;
839 
840 typedef enum {
841 	MLXCX_GROUP_INIT		= 1 << 0,
842 	MLXCX_GROUP_WQS			= 1 << 1,
843 	MLXCX_GROUP_TIRTIS		= 1 << 2,
844 	MLXCX_GROUP_FLOWS		= 1 << 3,
845 	MLXCX_GROUP_RUNNING		= 1 << 4,
846 	MLXCX_GROUP_RQT			= 1 << 5,
847 } mlxcx_group_state_t;
848 
849 #define	MLXCX_RX_HASH_FT_SIZE_SHIFT	4
850 
851 typedef enum {
852 	MLXCX_TIR_ROLE_IPv4 = 0,
853 	MLXCX_TIR_ROLE_IPv6,
854 	MLXCX_TIR_ROLE_TCPv4,
855 	MLXCX_TIR_ROLE_TCPv6,
856 	MLXCX_TIR_ROLE_UDPv4,
857 	MLXCX_TIR_ROLE_UDPv6,
858 	MLXCX_TIR_ROLE_OTHER,
859 
860 	MLXCX_TIRS_PER_GROUP
861 } mlxcx_tir_role_t;
862 
863 typedef struct {
864 	avl_node_t		mlgm_group_entry;
865 	list_node_t		mlgm_fe_entry;
866 	mlxcx_ring_group_t	*mlgm_group;
867 	uint8_t			mlgm_mac[6];
868 	mlxcx_flow_entry_t	*mlgm_fe;
869 } mlxcx_group_mac_t;
870 
871 typedef struct {
872 	list_node_t		mlgv_entry;
873 	boolean_t		mlgv_tagged;
874 	uint16_t		mlgv_vid;
875 	mlxcx_flow_entry_t	*mlgv_fe;
876 } mlxcx_group_vlan_t;
877 
878 struct mlxcx_ring_group {
879 	kmutex_t			mlg_mtx;
880 	mlxcx_t				*mlg_mlx;
881 	mlxcx_group_state_t		mlg_state;
882 	mlxcx_group_type_t		mlg_type;
883 
884 	mac_group_handle_t		mlg_mac_hdl;
885 
886 	union {
887 		mlxcx_tis_t		mlg_tis;
888 		mlxcx_tir_t		mlg_tir[MLXCX_TIRS_PER_GROUP];
889 	};
890 	mlxcx_port_t			*mlg_port;
891 
892 	size_t				mlg_nwqs;
893 	size_t				mlg_wqs_size;
894 	mlxcx_work_queue_t		*mlg_wqs;
895 
896 	mlxcx_rqtable_t			*mlg_rqt;
897 
898 	/*
899 	 * Flow table for matching VLAN IDs
900 	 */
901 	mlxcx_flow_table_t		*mlg_rx_vlan_ft;
902 	mlxcx_flow_group_t		*mlg_rx_vlan_fg;
903 	mlxcx_flow_group_t		*mlg_rx_vlan_def_fg;
904 	mlxcx_flow_group_t		*mlg_rx_vlan_promisc_fg;
905 	list_t				mlg_rx_vlans;
906 
907 	taskq_t				*mlg_refill_tq;
908 
909 	/*
910 	 * Flow table for separating out by protocol before hashing
911 	 */
912 	mlxcx_flow_table_t		*mlg_rx_hash_ft;
913 
914 	/*
915 	 * Links to flow entries on the root flow table which are pointing to
916 	 * our rx_vlan_ft.
917 	 */
918 	avl_tree_t			mlg_rx_macs;
919 };
920 
921 typedef enum mlxcx_cmd_state {
922 	MLXCX_CMD_S_DONE	= 1 << 0,
923 	MLXCX_CMD_S_ERROR	= 1 << 1
924 } mlxcx_cmd_state_t;
925 
926 struct mlxcx_cmd {
927 	struct mlxcx		*mlcmd_mlxp;
928 	kmutex_t		mlcmd_lock;
929 	kcondvar_t		mlcmd_cv;
930 
931 	boolean_t		mlcmd_poll;
932 	uint8_t			mlcmd_token;
933 	mlxcx_cmd_op_t		mlcmd_op;
934 
935 	/*
936 	 * Command data and extended mailboxes for responses.
937 	 */
938 	const void		*mlcmd_in;
939 	uint32_t		mlcmd_inlen;
940 	void			*mlcmd_out;
941 	uint32_t		mlcmd_outlen;
942 	list_t			mlcmd_mbox_in;
943 	uint8_t			mlcmd_nboxes_in;
944 	list_t			mlcmd_mbox_out;
945 	uint8_t			mlcmd_nboxes_out;
946 	/*
947 	 * Status information.
948 	 */
949 	mlxcx_cmd_state_t	mlcmd_state;
950 	uint8_t			mlcmd_status;
951 };
952 
953 /*
954  * Our view of capabilities.
955  */
956 typedef struct mlxcx_hca_cap {
957 	mlxcx_hca_cap_mode_t	mhc_mode;
958 	mlxcx_hca_cap_type_t	mhc_type;
959 	union {
960 		uint8_t				mhc_bulk[MLXCX_HCA_CAP_SIZE];
961 		mlxcx_hca_cap_general_caps_t	mhc_general;
962 		mlxcx_hca_cap_eth_caps_t	mhc_eth;
963 		mlxcx_hca_cap_flow_caps_t	mhc_flow;
964 	};
965 } mlxcx_hca_cap_t;
966 
967 typedef struct {
968 	/* Cooked values */
969 	boolean_t		mlc_checksum;
970 	boolean_t		mlc_lso;
971 	boolean_t		mlc_vxlan;
972 	boolean_t		mlc_pcam;
973 	boolean_t		mlc_ext_ptys;
974 	size_t			mlc_max_lso_size;
975 	size_t			mlc_max_rqt_size;
976 
977 	size_t			mlc_max_rx_ft_shift;
978 	size_t			mlc_max_rx_fe_dest;
979 	size_t			mlc_max_rx_flows;
980 	size_t			mlc_max_rx_ft;
981 
982 	size_t			mlc_max_tir;
983 
984 	/* Raw caps data */
985 	mlxcx_hca_cap_t		mlc_hca_cur;
986 	mlxcx_hca_cap_t		mlc_hca_max;
987 	mlxcx_hca_cap_t		mlc_ether_cur;
988 	mlxcx_hca_cap_t		mlc_ether_max;
989 	mlxcx_hca_cap_t		mlc_nic_flow_cur;
990 	mlxcx_hca_cap_t		mlc_nic_flow_max;
991 } mlxcx_caps_t;
992 
993 typedef struct {
994 	uint_t			mldp_eq_size_shift;
995 	uint_t			mldp_cq_size_shift;
996 	uint_t			mldp_cq_size_shift_default;
997 	uint_t			mldp_rq_size_shift;
998 	uint_t			mldp_rq_size_shift_default;
999 	uint_t			mldp_sq_size_shift;
1000 	uint_t			mldp_sq_size_shift_default;
1001 	uint_t			mldp_cqemod_period_usec;
1002 	uint_t			mldp_cqemod_count;
1003 	uint_t			mldp_intrmod_period_usec;
1004 	uint_t			mldp_rx_ngroups_large;
1005 	uint_t			mldp_rx_ngroups_small;
1006 	uint_t			mldp_rx_nrings_per_large_group;
1007 	uint_t			mldp_rx_nrings_per_small_group;
1008 	uint_t			mldp_rx_per_cq;
1009 	uint_t			mldp_tx_ngroups;
1010 	uint_t			mldp_tx_nrings_per_group;
1011 	uint_t			mldp_ftbl_root_size_shift;
1012 	size_t			mldp_tx_bind_threshold;
1013 	uint_t			mldp_ftbl_vlan_size_shift;
1014 	uint64_t		mldp_eq_check_interval_sec;
1015 	uint64_t		mldp_cq_check_interval_sec;
1016 	uint64_t		mldp_wq_check_interval_sec;
1017 } mlxcx_drv_props_t;
1018 
1019 typedef struct {
1020 	mlxcx_t	*mlts_mlx;
1021 	uint8_t	mlts_index;
1022 	id_t	mlts_ksensor;
1023 	int16_t	mlts_value;
1024 	int16_t	mlts_max_value;
1025 	uint8_t	mlts_name[MLXCX_MTMP_NAMELEN];
1026 } mlxcx_temp_sensor_t;
1027 
1028 typedef enum {
1029 	MLXCX_ATTACH_FM		= 1 << 0,
1030 	MLXCX_ATTACH_PCI_CONFIG	= 1 << 1,
1031 	MLXCX_ATTACH_REGS	= 1 << 2,
1032 	MLXCX_ATTACH_CMD	= 1 << 3,
1033 	MLXCX_ATTACH_ENABLE_HCA	= 1 << 4,
1034 	MLXCX_ATTACH_PAGE_LIST	= 1 << 5,
1035 	MLXCX_ATTACH_INIT_HCA	= 1 << 6,
1036 	MLXCX_ATTACH_UAR_PD_TD	= 1 << 7,
1037 	MLXCX_ATTACH_INTRS	= 1 << 8,
1038 	MLXCX_ATTACH_PORTS	= 1 << 9,
1039 	MLXCX_ATTACH_MAC_HDL	= 1 << 10,
1040 	MLXCX_ATTACH_CQS	= 1 << 11,
1041 	MLXCX_ATTACH_WQS	= 1 << 12,
1042 	MLXCX_ATTACH_GROUPS	= 1 << 13,
1043 	MLXCX_ATTACH_BUFS	= 1 << 14,
1044 	MLXCX_ATTACH_CAPS	= 1 << 15,
1045 	MLXCX_ATTACH_CHKTIMERS	= 1 << 16,
1046 	MLXCX_ATTACH_ASYNC_TQ	= 1 << 17,
1047 	MLXCX_ATTACH_SENSORS	= 1 << 18
1048 } mlxcx_attach_progress_t;
1049 
1050 struct mlxcx {
1051 	/* entry on the mlxcx_glist */
1052 	list_node_t		mlx_gentry;
1053 
1054 	dev_info_t		*mlx_dip;
1055 	int			mlx_inst;
1056 	mlxcx_attach_progress_t	mlx_attach;
1057 
1058 	mlxcx_drv_props_t	mlx_props;
1059 
1060 	/*
1061 	 * Misc. data
1062 	 */
1063 	uint16_t		mlx_fw_maj;
1064 	uint16_t		mlx_fw_min;
1065 	uint16_t		mlx_fw_rev;
1066 	uint16_t		mlx_cmd_rev;
1067 
1068 	/*
1069 	 * Various capabilities of hardware.
1070 	 */
1071 	mlxcx_caps_t		*mlx_caps;
1072 
1073 	uint_t			mlx_max_sdu;
1074 	uint_t			mlx_sdu;
1075 
1076 	/*
1077 	 * FM State
1078 	 */
1079 	int			mlx_fm_caps;
1080 
1081 	/*
1082 	 * PCI Data
1083 	 */
1084 	ddi_acc_handle_t	mlx_cfg_handle;
1085 	ddi_acc_handle_t	mlx_regs_handle;
1086 	caddr_t			mlx_regs_base;
1087 
1088 	/*
1089 	 * MAC handle
1090 	 */
1091 	mac_handle_t		mlx_mac_hdl;
1092 
1093 	/*
1094 	 * Main command queue for issuing general FW control commands.
1095 	 */
1096 	mlxcx_cmd_queue_t	mlx_cmd;
1097 
1098 	/*
1099 	 * Interrupts
1100 	 */
1101 	uint_t			mlx_intr_pri;
1102 	uint_t			mlx_async_intr_pri;
1103 	uint_t			mlx_intr_type;		/* always MSI-X */
1104 	int			mlx_intr_count;
1105 	size_t			mlx_intr_size;		/* allocation size */
1106 	int			mlx_intr_cq0;
1107 	ddi_intr_handle_t	*mlx_intr_handles;
1108 
1109 	/*
1110 	 * Basic firmware resources which we use for a variety of things.
1111 	 * The UAR is a reference to a page where CQ and EQ doorbells are
1112 	 * located. It also holds all the BlueFlame stuff (which we don't
1113 	 * use).
1114 	 */
1115 	mlxcx_uar_t		mlx_uar;
1116 	/*
1117 	 * The PD (Protection Domain) and TDOM (Transport Domain) are opaque
1118 	 * entities to us (they're Infiniband constructs we don't actually care
1119 	 * about) -- we just allocate them and shove their ID numbers in
1120 	 * whenever we're asked for one.
1121 	 *
1122 	 * The "reserved" LKEY is what we should put in queue entries that
1123 	 * have references to memory to indicate that they're using linear
1124 	 * addresses (comes from the QUERY_SPECIAL_CONTEXTS cmd).
1125 	 */
1126 	mlxcx_pd_t		mlx_pd;
1127 	mlxcx_tdom_t		mlx_tdom;
1128 	uint_t			mlx_rsvd_lkey;
1129 
1130 	/*
1131 	 * Our event queues. These are 1:1 with interrupts.
1132 	 */
1133 	size_t			mlx_eqs_size;		/* allocation size */
1134 	mlxcx_event_queue_t	*mlx_eqs;
1135 
1136 	/*
1137 	 * Page list. These represent the set of 4k pages we've given to
1138 	 * hardware.
1139 	 *
1140 	 * We can add to this list at the request of hardware from interrupt
1141 	 * context (the PAGE_REQUEST event), so it's protected by pagemtx.
1142 	 */
1143 	kmutex_t		mlx_pagemtx;
1144 	uint_t			mlx_npages;
1145 	avl_tree_t		mlx_pages;
1146 
1147 	mlxcx_async_param_t	mlx_npages_req[MLXCX_FUNC_ID_MAX + 1];
1148 
1149 	/*
1150 	 * Taskq for processing asynchronous events which may issue
1151 	 * commands to the HCA.
1152 	 */
1153 	taskq_t			*mlx_async_tq;
1154 
1155 	/*
1156 	 * Port state
1157 	 */
1158 	uint_t			mlx_nports;
1159 	size_t			mlx_ports_size;
1160 	mlxcx_port_t		*mlx_ports;
1161 
1162 	/*
1163 	 * Completion queues (CQs). These are also indexed off the
1164 	 * event_queue_ts that they each report to.
1165 	 */
1166 	list_t			mlx_cqs;
1167 
1168 	uint_t			mlx_next_eq;
1169 
1170 	/*
1171 	 * Work queues (WQs).
1172 	 */
1173 	list_t			mlx_wqs;
1174 
1175 	/*
1176 	 * Ring groups
1177 	 */
1178 	size_t			mlx_rx_ngroups;
1179 	size_t			mlx_rx_groups_size;
1180 	mlxcx_ring_group_t	*mlx_rx_groups;
1181 
1182 	size_t			mlx_tx_ngroups;
1183 	size_t			mlx_tx_groups_size;
1184 	mlxcx_ring_group_t	*mlx_tx_groups;
1185 
1186 	kmem_cache_t		*mlx_bufs_cache;
1187 	list_t			mlx_buf_shards;
1188 
1189 	ddi_periodic_t		mlx_eq_checktimer;
1190 	ddi_periodic_t		mlx_cq_checktimer;
1191 	ddi_periodic_t		mlx_wq_checktimer;
1192 
1193 	/*
1194 	 * Sensors
1195 	 */
1196 	uint8_t			mlx_temp_nsensors;
1197 	mlxcx_temp_sensor_t	*mlx_temp_sensors;
1198 };
1199 
1200 /*
1201  * Register access
1202  */
1203 extern uint16_t mlxcx_get16(mlxcx_t *, uintptr_t);
1204 extern uint32_t mlxcx_get32(mlxcx_t *, uintptr_t);
1205 extern uint64_t mlxcx_get64(mlxcx_t *, uintptr_t);
1206 
1207 extern void mlxcx_put32(mlxcx_t *, uintptr_t, uint32_t);
1208 extern void mlxcx_put64(mlxcx_t *, uintptr_t, uint64_t);
1209 
1210 extern void mlxcx_uar_put32(mlxcx_t *, mlxcx_uar_t *, uintptr_t, uint32_t);
1211 extern void mlxcx_uar_put64(mlxcx_t *, mlxcx_uar_t *, uintptr_t, uint64_t);
1212 
1213 /*
1214  * Logging functions.
1215  */
1216 extern void mlxcx_warn(mlxcx_t *, const char *, ...);
1217 extern void mlxcx_note(mlxcx_t *, const char *, ...);
1218 extern void mlxcx_panic(mlxcx_t *, const char *, ...);
1219 
1220 extern void mlxcx_fm_ereport(mlxcx_t *, const char *);
1221 
1222 extern void mlxcx_check_sq(mlxcx_t *, mlxcx_work_queue_t *);
1223 extern void mlxcx_check_rq(mlxcx_t *, mlxcx_work_queue_t *);
1224 
1225 /*
1226  * DMA Functions
1227  */
1228 extern void mlxcx_dma_free(mlxcx_dma_buffer_t *);
1229 extern boolean_t mlxcx_dma_alloc(mlxcx_t *, mlxcx_dma_buffer_t *,
1230     ddi_dma_attr_t *, ddi_device_acc_attr_t *, boolean_t, size_t, boolean_t);
1231 extern boolean_t mlxcx_dma_init(mlxcx_t *, mlxcx_dma_buffer_t *,
1232     ddi_dma_attr_t *, boolean_t);
1233 extern boolean_t mlxcx_dma_bind_mblk(mlxcx_t *, mlxcx_dma_buffer_t *,
1234     const mblk_t *, size_t, boolean_t);
1235 extern boolean_t mlxcx_dma_alloc_offset(mlxcx_t *, mlxcx_dma_buffer_t *,
1236     ddi_dma_attr_t *, ddi_device_acc_attr_t *, boolean_t,
1237     size_t, size_t, boolean_t);
1238 extern void mlxcx_dma_unbind(mlxcx_t *, mlxcx_dma_buffer_t *);
1239 extern void mlxcx_dma_acc_attr(mlxcx_t *, ddi_device_acc_attr_t *);
1240 extern void mlxcx_dma_page_attr(mlxcx_t *, ddi_dma_attr_t *);
1241 extern void mlxcx_dma_queue_attr(mlxcx_t *, ddi_dma_attr_t *);
1242 extern void mlxcx_dma_qdbell_attr(mlxcx_t *, ddi_dma_attr_t *);
1243 extern void mlxcx_dma_buf_attr(mlxcx_t *, ddi_dma_attr_t *);
1244 
1245 extern boolean_t mlxcx_give_pages(mlxcx_t *, int32_t, int32_t *);
1246 
1247 static inline const ddi_dma_cookie_t *
1248 mlxcx_dma_cookie_iter(const mlxcx_dma_buffer_t *db,
1249     const ddi_dma_cookie_t *prev)
1250 {
1251 	ASSERT(db->mxdb_flags & MLXCX_DMABUF_BOUND);
1252 	return (ddi_dma_cookie_iter(db->mxdb_dma_handle, prev));
1253 }
1254 
1255 static inline const ddi_dma_cookie_t *
1256 mlxcx_dma_cookie_one(const mlxcx_dma_buffer_t *db)
1257 {
1258 	ASSERT(db->mxdb_flags & MLXCX_DMABUF_BOUND);
1259 	return (ddi_dma_cookie_one(db->mxdb_dma_handle));
1260 }
1261 
1262 /*
1263  * From mlxcx_intr.c
1264  */
1265 extern boolean_t mlxcx_intr_setup(mlxcx_t *);
1266 extern void mlxcx_intr_disable(mlxcx_t *);
1267 extern void mlxcx_intr_teardown(mlxcx_t *);
1268 extern void mlxcx_arm_eq(mlxcx_t *, mlxcx_event_queue_t *);
1269 extern void mlxcx_arm_cq(mlxcx_t *, mlxcx_completion_queue_t *);
1270 extern void mlxcx_update_cqci(mlxcx_t *, mlxcx_completion_queue_t *);
1271 
1272 extern mblk_t *mlxcx_rx_poll(mlxcx_t *, mlxcx_completion_queue_t *, size_t);
1273 
1274 /*
1275  * From mlxcx_gld.c
1276  */
1277 extern boolean_t mlxcx_register_mac(mlxcx_t *);
1278 
1279 /*
1280  * From mlxcx_ring.c
1281  */
1282 extern boolean_t mlxcx_wq_alloc_dma(mlxcx_t *, mlxcx_work_queue_t *);
1283 extern void mlxcx_wq_rele_dma(mlxcx_t *, mlxcx_work_queue_t *);
1284 
1285 extern boolean_t mlxcx_buf_create(mlxcx_t *, mlxcx_buf_shard_t *,
1286     mlxcx_buffer_t **);
1287 extern boolean_t mlxcx_buf_create_foreign(mlxcx_t *, mlxcx_buf_shard_t *,
1288     mlxcx_buffer_t **);
1289 extern mlxcx_buffer_t *mlxcx_buf_take(mlxcx_t *, mlxcx_work_queue_t *);
1290 extern size_t mlxcx_buf_take_n(mlxcx_t *, mlxcx_work_queue_t *,
1291     mlxcx_buffer_t **, size_t);
1292 extern boolean_t mlxcx_buf_loan(mlxcx_t *, mlxcx_buffer_t *);
1293 extern void mlxcx_buf_return(mlxcx_t *, mlxcx_buffer_t *);
1294 extern void mlxcx_buf_return_chain(mlxcx_t *, mlxcx_buffer_t *, boolean_t);
1295 extern void mlxcx_buf_destroy(mlxcx_t *, mlxcx_buffer_t *);
1296 extern void mlxcx_shard_ready(mlxcx_buf_shard_t *);
1297 extern void mlxcx_shard_draining(mlxcx_buf_shard_t *);
1298 
1299 extern uint_t mlxcx_buf_bind_or_copy(mlxcx_t *, mlxcx_work_queue_t *,
1300     mblk_t *, size_t, mlxcx_buffer_t **);
1301 
1302 extern boolean_t mlxcx_rx_group_setup(mlxcx_t *, mlxcx_ring_group_t *);
1303 extern boolean_t mlxcx_tx_group_setup(mlxcx_t *, mlxcx_ring_group_t *);
1304 
1305 extern boolean_t mlxcx_rx_group_start(mlxcx_t *, mlxcx_ring_group_t *);
1306 extern boolean_t mlxcx_tx_ring_start(mlxcx_t *, mlxcx_ring_group_t *,
1307     mlxcx_work_queue_t *);
1308 extern boolean_t mlxcx_rx_ring_start(mlxcx_t *, mlxcx_ring_group_t *,
1309     mlxcx_work_queue_t *);
1310 
1311 extern boolean_t mlxcx_rq_add_buffer(mlxcx_t *, mlxcx_work_queue_t *,
1312     mlxcx_buffer_t *);
1313 extern boolean_t mlxcx_rq_add_buffers(mlxcx_t *, mlxcx_work_queue_t *,
1314     mlxcx_buffer_t **, size_t);
1315 extern boolean_t mlxcx_sq_add_buffer(mlxcx_t *, mlxcx_work_queue_t *,
1316     uint8_t *, size_t, uint32_t, mlxcx_buffer_t *);
1317 extern boolean_t mlxcx_sq_add_nop(mlxcx_t *, mlxcx_work_queue_t *);
1318 extern void mlxcx_rq_refill(mlxcx_t *, mlxcx_work_queue_t *);
1319 
1320 extern void mlxcx_teardown_groups(mlxcx_t *);
1321 extern void mlxcx_wq_teardown(mlxcx_t *, mlxcx_work_queue_t *);
1322 extern void mlxcx_cq_teardown(mlxcx_t *, mlxcx_completion_queue_t *);
1323 extern void mlxcx_teardown_rx_group(mlxcx_t *, mlxcx_ring_group_t *);
1324 extern void mlxcx_teardown_tx_group(mlxcx_t *, mlxcx_ring_group_t *);
1325 
1326 extern void mlxcx_tx_completion(mlxcx_t *, mlxcx_completion_queue_t *,
1327     mlxcx_completionq_ent_t *, mlxcx_buffer_t *);
1328 extern mblk_t *mlxcx_rx_completion(mlxcx_t *, mlxcx_completion_queue_t *,
1329     mlxcx_completionq_ent_t *, mlxcx_buffer_t *);
1330 
1331 extern mlxcx_buf_shard_t *mlxcx_mlbs_create(mlxcx_t *);
1332 
1333 /*
1334  * Flow mgmt
1335  */
1336 extern boolean_t mlxcx_add_umcast_entry(mlxcx_t *, mlxcx_port_t *,
1337     mlxcx_ring_group_t *, const uint8_t *);
1338 extern boolean_t mlxcx_remove_umcast_entry(mlxcx_t *, mlxcx_port_t *,
1339     mlxcx_ring_group_t *, const uint8_t *);
1340 extern void mlxcx_remove_all_umcast_entries(mlxcx_t *, mlxcx_port_t *,
1341     mlxcx_ring_group_t *);
1342 extern boolean_t mlxcx_setup_flow_group(mlxcx_t *, mlxcx_flow_table_t *,
1343     mlxcx_flow_group_t *);
1344 extern void mlxcx_teardown_flow_table(mlxcx_t *, mlxcx_flow_table_t *);
1345 
1346 extern void mlxcx_remove_all_vlan_entries(mlxcx_t *, mlxcx_ring_group_t *);
1347 extern boolean_t mlxcx_remove_vlan_entry(mlxcx_t *, mlxcx_ring_group_t *,
1348     boolean_t, uint16_t);
1349 extern boolean_t mlxcx_add_vlan_entry(mlxcx_t *, mlxcx_ring_group_t *,
1350     boolean_t, uint16_t);
1351 
1352 /*
1353  * Command functions
1354  */
1355 extern boolean_t mlxcx_cmd_queue_init(mlxcx_t *);
1356 extern void mlxcx_cmd_queue_fini(mlxcx_t *);
1357 
1358 extern void mlxcx_cmd_completion(mlxcx_t *, mlxcx_eventq_ent_t *);
1359 extern void mlxcx_cmd_eq_enable(mlxcx_t *);
1360 extern void mlxcx_cmd_eq_disable(mlxcx_t *);
1361 
1362 extern boolean_t mlxcx_cmd_enable_hca(mlxcx_t *);
1363 extern boolean_t mlxcx_cmd_disable_hca(mlxcx_t *);
1364 
1365 extern boolean_t mlxcx_cmd_query_issi(mlxcx_t *, uint_t *);
1366 extern boolean_t mlxcx_cmd_set_issi(mlxcx_t *, uint16_t);
1367 
1368 extern boolean_t mlxcx_cmd_query_pages(mlxcx_t *, uint_t, int32_t *);
1369 extern boolean_t mlxcx_cmd_give_pages(mlxcx_t *, uint_t, int32_t,
1370     mlxcx_dev_page_t **);
1371 extern boolean_t mlxcx_cmd_return_pages(mlxcx_t *, int32_t, uint64_t *,
1372     int32_t *);
1373 
1374 extern boolean_t mlxcx_cmd_query_hca_cap(mlxcx_t *, mlxcx_hca_cap_type_t,
1375     mlxcx_hca_cap_mode_t, mlxcx_hca_cap_t *);
1376 
1377 extern boolean_t mlxcx_cmd_set_driver_version(mlxcx_t *, const char *);
1378 
1379 extern boolean_t mlxcx_cmd_init_hca(mlxcx_t *);
1380 extern boolean_t mlxcx_cmd_teardown_hca(mlxcx_t *);
1381 
1382 extern boolean_t mlxcx_cmd_alloc_uar(mlxcx_t *, mlxcx_uar_t *);
1383 extern boolean_t mlxcx_cmd_dealloc_uar(mlxcx_t *, mlxcx_uar_t *);
1384 
1385 extern boolean_t mlxcx_cmd_alloc_pd(mlxcx_t *, mlxcx_pd_t *);
1386 extern boolean_t mlxcx_cmd_dealloc_pd(mlxcx_t *, mlxcx_pd_t *);
1387 
1388 extern boolean_t mlxcx_cmd_alloc_tdom(mlxcx_t *, mlxcx_tdom_t *);
1389 extern boolean_t mlxcx_cmd_dealloc_tdom(mlxcx_t *, mlxcx_tdom_t *);
1390 
1391 extern boolean_t mlxcx_cmd_create_eq(mlxcx_t *, mlxcx_event_queue_t *);
1392 extern boolean_t mlxcx_cmd_destroy_eq(mlxcx_t *, mlxcx_event_queue_t *);
1393 extern boolean_t mlxcx_cmd_query_eq(mlxcx_t *, mlxcx_event_queue_t *,
1394     mlxcx_eventq_ctx_t *);
1395 
1396 extern boolean_t mlxcx_cmd_create_cq(mlxcx_t *, mlxcx_completion_queue_t *);
1397 extern boolean_t mlxcx_cmd_destroy_cq(mlxcx_t *, mlxcx_completion_queue_t *);
1398 extern boolean_t mlxcx_cmd_query_cq(mlxcx_t *, mlxcx_completion_queue_t *,
1399     mlxcx_completionq_ctx_t *);
1400 
1401 extern boolean_t mlxcx_cmd_create_rq(mlxcx_t *, mlxcx_work_queue_t *);
1402 extern boolean_t mlxcx_cmd_start_rq(mlxcx_t *, mlxcx_work_queue_t *);
1403 extern boolean_t mlxcx_cmd_stop_rq(mlxcx_t *, mlxcx_work_queue_t *);
1404 extern boolean_t mlxcx_cmd_destroy_rq(mlxcx_t *, mlxcx_work_queue_t *);
1405 extern boolean_t mlxcx_cmd_query_rq(mlxcx_t *, mlxcx_work_queue_t *,
1406     mlxcx_rq_ctx_t *);
1407 
1408 extern boolean_t mlxcx_cmd_create_tir(mlxcx_t *, mlxcx_tir_t *);
1409 extern boolean_t mlxcx_cmd_destroy_tir(mlxcx_t *, mlxcx_tir_t *);
1410 
1411 extern boolean_t mlxcx_cmd_create_sq(mlxcx_t *, mlxcx_work_queue_t *);
1412 extern boolean_t mlxcx_cmd_start_sq(mlxcx_t *, mlxcx_work_queue_t *);
1413 extern boolean_t mlxcx_cmd_stop_sq(mlxcx_t *, mlxcx_work_queue_t *);
1414 extern boolean_t mlxcx_cmd_destroy_sq(mlxcx_t *, mlxcx_work_queue_t *);
1415 extern boolean_t mlxcx_cmd_query_sq(mlxcx_t *, mlxcx_work_queue_t *,
1416     mlxcx_sq_ctx_t *);
1417 
1418 extern boolean_t mlxcx_cmd_create_tis(mlxcx_t *, mlxcx_tis_t *);
1419 extern boolean_t mlxcx_cmd_destroy_tis(mlxcx_t *, mlxcx_tis_t *);
1420 
1421 extern boolean_t mlxcx_cmd_query_nic_vport_ctx(mlxcx_t *, mlxcx_port_t *);
1422 extern boolean_t mlxcx_cmd_query_special_ctxs(mlxcx_t *);
1423 
1424 extern boolean_t mlxcx_cmd_modify_nic_vport_ctx(mlxcx_t *, mlxcx_port_t *,
1425     mlxcx_modify_nic_vport_ctx_fields_t);
1426 
1427 extern boolean_t mlxcx_cmd_create_flow_table(mlxcx_t *, mlxcx_flow_table_t *);
1428 extern boolean_t mlxcx_cmd_destroy_flow_table(mlxcx_t *, mlxcx_flow_table_t *);
1429 extern boolean_t mlxcx_cmd_set_flow_table_root(mlxcx_t *, mlxcx_flow_table_t *);
1430 
1431 extern boolean_t mlxcx_cmd_create_flow_group(mlxcx_t *, mlxcx_flow_group_t *);
1432 extern boolean_t mlxcx_cmd_set_flow_table_entry(mlxcx_t *,
1433     mlxcx_flow_entry_t *);
1434 extern boolean_t mlxcx_cmd_delete_flow_table_entry(mlxcx_t *,
1435     mlxcx_flow_entry_t *);
1436 extern boolean_t mlxcx_cmd_destroy_flow_group(mlxcx_t *, mlxcx_flow_group_t *);
1437 
1438 extern boolean_t mlxcx_cmd_access_register(mlxcx_t *, mlxcx_cmd_reg_opmod_t,
1439     mlxcx_register_id_t, mlxcx_register_data_t *);
1440 extern boolean_t mlxcx_cmd_query_port_mtu(mlxcx_t *, mlxcx_port_t *);
1441 extern boolean_t mlxcx_cmd_query_port_status(mlxcx_t *, mlxcx_port_t *);
1442 extern boolean_t mlxcx_cmd_modify_port_status(mlxcx_t *, mlxcx_port_t *,
1443     mlxcx_port_status_t);
1444 extern boolean_t mlxcx_cmd_query_port_speed(mlxcx_t *, mlxcx_port_t *);
1445 extern boolean_t mlxcx_cmd_query_port_fec(mlxcx_t *, mlxcx_port_t *);
1446 extern boolean_t mlxcx_cmd_modify_port_fec(mlxcx_t *, mlxcx_port_t *,
1447     mlxcx_pplm_fec_caps_t);
1448 
1449 extern boolean_t mlxcx_cmd_set_port_mtu(mlxcx_t *, mlxcx_port_t *);
1450 
1451 extern boolean_t mlxcx_cmd_create_rqt(mlxcx_t *, mlxcx_rqtable_t *);
1452 extern boolean_t mlxcx_cmd_destroy_rqt(mlxcx_t *, mlxcx_rqtable_t *);
1453 
1454 extern boolean_t mlxcx_cmd_set_int_mod(mlxcx_t *, uint_t, uint_t);
1455 
1456 extern boolean_t mlxcx_cmd_query_module_status(mlxcx_t *, uint_t,
1457     mlxcx_module_status_t *, mlxcx_module_error_type_t *);
1458 extern boolean_t mlxcx_cmd_set_port_led(mlxcx_t *, mlxcx_port_t *, uint16_t);
1459 
1460 /* Comparator for avl_ts */
1461 extern int mlxcx_cq_compare(const void *, const void *);
1462 extern int mlxcx_dmac_fe_compare(const void *, const void *);
1463 extern int mlxcx_grmac_compare(const void *, const void *);
1464 extern int mlxcx_page_compare(const void *, const void *);
1465 
1466 extern void mlxcx_update_link_state(mlxcx_t *, mlxcx_port_t *);
1467 
1468 extern void mlxcx_eth_proto_to_string(mlxcx_eth_proto_t, mlxcx_ext_eth_proto_t,
1469     char *, size_t);
1470 extern const char *mlxcx_port_status_string(mlxcx_port_status_t);
1471 
1472 extern const char *mlxcx_event_name(mlxcx_event_t);
1473 
1474 /*
1475  * Sensor Functions
1476  */
1477 extern boolean_t mlxcx_setup_sensors(mlxcx_t *);
1478 extern void mlxcx_teardown_sensors(mlxcx_t *);
1479 
1480 #ifdef __cplusplus
1481 }
1482 #endif
1483 
1484 #endif /* _MLXCX_H */
1485