xref: /illumos-gate/usr/src/uts/common/io/mlxcx/mlxcx.h (revision 5014e1fa5f21d62166ea238737adcb2691e91963)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2023 The University of Queensland
14  * Copyright (c) 2018, Joyent, Inc.
15  * Copyright 2023 RackTop Systems, Inc.
16  * Copyright 2023 MNX Cloud, Inc.
17  */
18 
19 /*
20  * Mellanox Connect-X 4/5/6 driver.
21  *
22  * More details in mlxcx.c
23  */
24 
25 #ifndef _MLXCX_H
26 #define	_MLXCX_H
27 
28 /*
29  * mlxcx(4D) defintions
30  */
31 
32 #include <sys/ddi.h>
33 #include <sys/sunddi.h>
34 #include <sys/ddifm.h>
35 #include <sys/id_space.h>
36 #include <sys/list.h>
37 #include <sys/taskq_impl.h>
38 #include <sys/stddef.h>
39 #include <sys/stream.h>
40 #include <sys/strsun.h>
41 #include <sys/mac_provider.h>
42 #include <sys/mac_ether.h>
43 #include <sys/cpuvar.h>
44 #include <sys/ethernet.h>
45 
46 #include <inet/ip.h>
47 #include <inet/ip6.h>
48 
49 #include <sys/ddifm.h>
50 #include <sys/fm/protocol.h>
51 #include <sys/fm/util.h>
52 #include <sys/fm/io/ddi.h>
53 
54 #include <mlxcx_reg.h>
55 
56 #ifdef __cplusplus
57 extern "C" {
58 #endif
59 
60 #define	MLXCX_VENDOR_ID			0x15b3
61 
62 /*
63  * The PCI device ids for the cards we support. The device IDs correspond to
64  * the device ids in the driver manifest, and the names were obtained from
65  * the PCI id database in /usr/share/hwdata/pci.ids
66  */
67 #define	MLXCX_CX4_DEVID			0x1013
68 #define	MLXCX_CX4_VF_DEVID		0x1014
69 #define	MLXCX_CX4_LX_DEVID		0x1015
70 #define	MLXCX_CX4_LX_VF_DEVID		0x1016
71 #define	MLXCX_CX5_DEVID			0x1017
72 #define	MLXCX_CX5_VF_DEVID		0x1018
73 #define	MLXCX_CX5_EX_DEVID		0x1019
74 #define	MLXCX_CX5_EX_VF_DEVID		0x101a
75 #define	MLXCX_CX6_DEVID			0x101b
76 #define	MLXCX_CX6_VF_DEVID		0x101c
77 #define	MLXCX_CX6_DF_DEVID		0x101d
78 #define	MLXCX_CX5_GEN_VF_DEVID		0x101e
79 #define	MLXCX_CX6_LX_DEVID		0x101f
80 
81 /*
82  * Get access to the first PCI BAR.
83  */
84 #define	MLXCX_REG_NUMBER		1
85 
86 /*
87  * The command queue is supposed to be a page, which is 4k.
88  */
89 #define	MLXCX_CMD_DMA_PAGE_SIZE		4096
90 
91 /*
92  * Queues can allocate in units of this much memory.
93  */
94 #define	MLXCX_QUEUE_DMA_PAGE_SIZE	4096
95 
96 /*
97  * We advertise two sizes of groups to MAC -- a certain number of "large"
98  * groups (including the default group, which is sized to at least ncpus)
99  * followed by a certain number of "small" groups.
100  *
101  * This allows us to have a larger amount of classification resources available
102  * for zones/VMs without resorting to software classification.
103  */
104 #define	MLXCX_RX_NGROUPS_LARGE_DFLT		2
105 #define	MLXCX_RX_NRINGS_PER_LARGE_GROUP_DFLT	16
106 #define	MLXCX_RX_NGROUPS_SMALL_DFLT		256
107 #define	MLXCX_RX_NRINGS_PER_SMALL_GROUP_DFLT	4
108 
109 #define	MLXCX_TX_NGROUPS_DFLT		1
110 #define	MLXCX_TX_NRINGS_PER_GROUP_DFLT	64
111 
112 /*
113  * Queues will be sized to (1 << *Q_SIZE_SHIFT) entries long.
114  */
115 #define	MLXCX_EQ_SIZE_SHIFT_DFLT	9
116 
117 /*
118  * The CQ, SQ and RQ sizes can effect throughput on higher speed interfaces.
119  * EQ less so, as it only takes a single EQ entry to indicate there are
120  * multiple completions on the CQ.
121  *
122  * Particularly on the Rx side, the RQ (and corresponding CQ) would run
123  * low on available entries. A symptom of this is the refill taskq running
124  * frequently. A larger RQ (and CQ) alleviates this, and as there is a
125  * close relationship between SQ and CQ size, the SQ is increased too.
126  */
127 #define	MLXCX_CQ_SIZE_SHIFT_DFLT	10
128 #define	MLXCX_CQ_SIZE_SHIFT_25G		12
129 
130 /*
131  * Default to making SQs bigger than RQs for 9k MTU, since most packets will
132  * spill over into more than one slot. RQ WQEs are always 1 slot.
133  */
134 #define	MLXCX_SQ_SIZE_SHIFT_DFLT	11
135 #define	MLXCX_SQ_SIZE_SHIFT_25G		13
136 
137 #define	MLXCX_RQ_SIZE_SHIFT_DFLT	10
138 #define	MLXCX_RQ_SIZE_SHIFT_25G		12
139 
140 #define	MLXCX_CQ_HWM_GAP		16
141 #define	MLXCX_CQ_LWM_GAP		24
142 
143 #define	MLXCX_WQ_HWM_GAP		MLXCX_CQ_HWM_GAP
144 #define	MLXCX_WQ_LWM_GAP		MLXCX_CQ_LWM_GAP
145 
146 #define	MLXCX_RQ_REFILL_STEP		64
147 
148 /*
149  * CQ event moderation
150  */
151 #define	MLXCX_CQEMOD_PERIOD_USEC_DFLT	50
152 #define	MLXCX_CQEMOD_COUNT_DFLT		\
153 	(8 * ((1 << MLXCX_CQ_SIZE_SHIFT_DFLT) / 10))
154 
155 /*
156  * EQ interrupt moderation
157  */
158 #define	MLXCX_INTRMOD_PERIOD_USEC_DFLT	10
159 
160 /* Size of root flow tables */
161 #define	MLXCX_FTBL_ROOT_SIZE_SHIFT_DFLT		12
162 
163 /* Size of 2nd level flow tables for VLAN filtering */
164 #define	MLXCX_FTBL_VLAN_SIZE_SHIFT_DFLT		4
165 
166 /*
167  * How big does an mblk have to be before we dma_bind() it instead of
168  * bcopying?
169  */
170 #define	MLXCX_TX_BIND_THRESHOLD_DFLT	2048
171 
172 /*
173  * How often to check the status of completion queues for overflow and
174  * other problems.
175  */
176 #define	MLXCX_WQ_CHECK_INTERVAL_SEC_DFLT		300
177 #define	MLXCX_CQ_CHECK_INTERVAL_SEC_DFLT		300
178 #define	MLXCX_EQ_CHECK_INTERVAL_SEC_DFLT		30
179 
180 /*
181  * After this many packets, the packets received so far are passed to
182  * the mac layer.
183  */
184 #define	MLXCX_RX_PER_CQ_DEFAULT			256
185 #define	MLXCX_RX_PER_CQ_MIN			16
186 #define	MLXCX_RX_PER_CQ_MAX			4096
187 
188 /*
189  * Minimum size for packets loaned when >50% of a ring's buffers are already
190  * on loan to MAC.
191  */
192 #define	MLXCX_P50_LOAN_MIN_SIZE_DFLT		256
193 
194 #define	MLXCX_DOORBELL_TRIES_DFLT		3
195 extern uint_t mlxcx_doorbell_tries;
196 
197 #define	MLXCX_STUCK_INTR_COUNT_DFLT		128
198 extern uint_t mlxcx_stuck_intr_count;
199 
200 #define	MLXCX_BUF_BIND_MAX_ATTEMTPS		50
201 
202 #define	MLXCX_MTU_OFFSET	\
203 	(sizeof (struct ether_vlan_header) + ETHERFCSL)
204 
205 /*
206  * This is the current version of the command structure that the driver expects
207  * to be found in the ISS.
208  */
209 #define	MLXCX_CMD_REVISION	5
210 
211 #ifdef	DEBUG
212 #define	MLXCX_DMA_SYNC(dma, flag)	VERIFY0(ddi_dma_sync( \
213 					    (dma).mxdb_dma_handle, 0, 0, \
214 					    (flag)))
215 #else
216 #define	MLXCX_DMA_SYNC(dma, flag)	(void) ddi_dma_sync( \
217 					    (dma).mxdb_dma_handle, 0, 0, \
218 					    (flag))
219 #endif
220 
221 #define	MLXCX_FM_SERVICE_MLXCX	"mlxcx"
222 
223 /*
224  * This macro defines the expected value of the 'Interface Step Sequence ID'
225  * (issi) which represents the version of the start up and tear down sequence.
226  * We must check that hardware supports this and tell it which version we're
227  * using as well.
228  */
229 #define	MLXCX_CURRENT_ISSI	1
230 
231 /*
232  * This is the size of a page that the hardware expects from us when
233  * manipulating pages.
234  */
235 #define	MLXCX_HW_PAGE_SIZE	4096
236 
237 /*
238  * This is a special lkey value used to terminate a list of scatter pointers.
239  */
240 #define	MLXCX_NULL_LKEY		0x100
241 
242 /*
243  * The max function id we support in manage pages requests.
244  * At the moment we only support/expect func 0 from manage pages, but
245  * structures and code are in place to support any number.
246  */
247 #define	MLXCX_FUNC_ID_MAX	0
248 
249 /*
250  * Forwards
251  */
252 struct mlxcx;
253 typedef struct mlxcx mlxcx_t;
254 typedef struct mlxcx_cmd mlxcx_cmd_t;
255 typedef struct mlxcx_port mlxcx_port_t;
256 
257 typedef struct {
258 	mlxcx_t		*mlp_mlx;
259 	int32_t		mlp_npages;
260 	uint16_t	mlp_func;
261 } mlxcx_pages_request_t;
262 
263 typedef struct mlxcx_async_param {
264 	mlxcx_t		*mla_mlx;
265 	taskq_ent_t	mla_tqe;
266 	boolean_t	mla_pending;
267 	kmutex_t	mla_mtx;
268 
269 	/*
270 	 * Parameters specific to the function dispatched.
271 	 */
272 	union {
273 		void			*mla_arg;
274 		mlxcx_pages_request_t	mla_pages;
275 		mlxcx_port_t		*mla_port;
276 	};
277 } mlxcx_async_param_t;
278 
279 typedef enum {
280 	MLXCX_DMABUF_HDL_ALLOC		= 1 << 0,
281 	MLXCX_DMABUF_MEM_ALLOC		= 1 << 1,
282 	MLXCX_DMABUF_BOUND		= 1 << 2,
283 	MLXCX_DMABUF_FOREIGN		= 1 << 3,
284 } mlxcx_dma_buffer_flags_t;
285 
286 typedef struct mlxcx_dma_buffer {
287 	mlxcx_dma_buffer_flags_t	mxdb_flags;
288 	caddr_t				mxdb_va;	/* Buffer VA */
289 	size_t				mxdb_len;	/* Buffer logical len */
290 	ddi_acc_handle_t		mxdb_acc_handle;
291 	ddi_dma_handle_t		mxdb_dma_handle;
292 	uint_t				mxdb_ncookies;
293 } mlxcx_dma_buffer_t;
294 
295 typedef struct mlxcx_dev_page {
296 	list_node_t		mxdp_list;
297 	avl_node_t		mxdp_tree;
298 	uintptr_t		mxdp_pa;
299 	mlxcx_dma_buffer_t	mxdp_dma;
300 } mlxcx_dev_page_t;
301 
302 /*
303  * Data structure to keep track of all information related to the command queue.
304  */
305 typedef enum {
306 	MLXCX_CMD_QUEUE_S_IDLE = 1,
307 	MLXCX_CMD_QUEUE_S_BUSY,
308 	MLXCX_CMD_QUEUE_S_BROKEN
309 } mlxcx_cmd_queue_status_t;
310 
311 typedef struct mlxcx_cmd_queue {
312 	kmutex_t		mcmd_lock;
313 	kcondvar_t		mcmd_cv;
314 	mlxcx_dma_buffer_t	mcmd_dma;
315 
316 	boolean_t		mcmd_polled;
317 
318 	uint8_t			mcmd_size_l2;
319 	uint8_t			mcmd_stride_l2;
320 	uint_t			mcmd_size;
321 	/*
322 	 * The mask has a bit for each command slot, there are a maximum
323 	 * of 32 slots. When the bit is set in the mask, it indicates
324 	 * the slot is available.
325 	 */
326 	uint32_t		mcmd_mask;
327 
328 	mlxcx_cmd_t		*mcmd_active[MLXCX_CMD_MAX];
329 
330 	ddi_taskq_t		*mcmd_taskq;
331 	id_space_t		*mcmd_tokens;
332 } mlxcx_cmd_queue_t;
333 
334 typedef struct mlxcd_cmd_mbox {
335 	list_node_t		mlbox_node;
336 	mlxcx_dma_buffer_t	mlbox_dma;
337 	mlxcx_cmd_mailbox_t	*mlbox_data;
338 } mlxcx_cmd_mbox_t;
339 
340 typedef enum {
341 	MLXCX_EQ_ALLOC		= 1 << 0,	/* dma mem alloc'd, size set */
342 	MLXCX_EQ_CREATED	= 1 << 1,	/* CREATE_EQ sent to hw */
343 	MLXCX_EQ_DESTROYED	= 1 << 2,	/* DESTROY_EQ sent to hw */
344 	MLXCX_EQ_ARMED		= 1 << 3,	/* Armed through the UAR */
345 	MLXCX_EQ_POLLING	= 1 << 4,	/* Currently being polled */
346 	MLXCX_EQ_INTR_ENABLED	= 1 << 5,	/* ddi_intr_enable()'d */
347 	MLXCX_EQ_INTR_ACTIVE	= 1 << 6,	/* 'rupt handler running */
348 	MLXCX_EQ_INTR_QUIESCE	= 1 << 7,	/* 'rupt handler to quiesce */
349 	MLXCX_EQ_ATTACHING	= 1 << 8,	/* mlxcx_attach still running */
350 } mlxcx_eventq_state_t;
351 
352 typedef struct mlxcx_bf {
353 	kmutex_t		mbf_mtx;
354 	uint_t			mbf_cnt;
355 	uint_t			mbf_even;
356 	uint_t			mbf_odd;
357 } mlxcx_bf_t;
358 
359 typedef struct mlxcx_uar {
360 	boolean_t		mlu_allocated;
361 	uint_t			mlu_num;
362 	uint_t			mlu_base;
363 
364 	volatile uint_t		mlu_bfcnt;
365 	mlxcx_bf_t		mlu_bf[MLXCX_BF_PER_UAR];
366 } mlxcx_uar_t;
367 
368 typedef struct mlxcx_pd {
369 	boolean_t		mlpd_allocated;
370 	uint32_t		mlpd_num;
371 } mlxcx_pd_t;
372 
373 typedef struct mlxcx_tdom {
374 	boolean_t		mltd_allocated;
375 	uint32_t		mltd_num;
376 } mlxcx_tdom_t;
377 
378 typedef enum {
379 	MLXCX_PORT_VPORT_PROMISC	= 1 << 0,
380 } mlxcx_port_flags_t;
381 
382 typedef struct mlxcx_flow_table mlxcx_flow_table_t;
383 typedef struct mlxcx_flow_group mlxcx_flow_group_t;
384 
385 typedef struct {
386 	uint64_t		mlps_rx_drops;
387 } mlxcx_port_stats_t;
388 
389 typedef enum {
390 	MLXCX_PORT_INIT		= 1 << 0
391 } mlxcx_port_init_t;
392 
393 struct mlxcx_port {
394 	kmutex_t		mlp_mtx;
395 	mlxcx_port_init_t	mlp_init;
396 	mlxcx_t			*mlp_mlx;
397 	/*
398 	 * The mlp_num we have here starts at zero (it's an index), but the
399 	 * numbering we have to use for register access starts at 1. We
400 	 * currently write mlp_num into the other_vport fields in mlxcx_cmd.c
401 	 * (where 0 is a magic number meaning "my vport") so if we ever add
402 	 * support for virtualisation features and deal with more than one
403 	 * vport, we will probably have to change this.
404 	 */
405 	uint_t			mlp_num;
406 	mlxcx_port_flags_t	mlp_flags;
407 	uint64_t		mlp_guid;
408 	uint8_t			mlp_mac_address[ETHERADDRL];
409 
410 	uint_t			mlp_mtu;
411 	uint_t			mlp_max_mtu;
412 
413 	mlxcx_port_status_t	mlp_admin_status;
414 	mlxcx_port_status_t	mlp_oper_status;
415 
416 	boolean_t		mlp_autoneg;
417 	mlxcx_eth_proto_t	mlp_max_proto;
418 	mlxcx_eth_proto_t	mlp_admin_proto;
419 	mlxcx_eth_proto_t	mlp_oper_proto;
420 	mlxcx_ext_eth_proto_t	mlp_ext_max_proto;
421 	mlxcx_ext_eth_proto_t	mlp_ext_admin_proto;
422 	mlxcx_ext_eth_proto_t	mlp_ext_oper_proto;
423 	mlxcx_pplm_fec_active_t	mlp_fec_active;
424 	link_fec_t		mlp_fec_requested;
425 
426 	mlxcx_eth_inline_mode_t	mlp_wqe_min_inline;
427 
428 	/* Root flow tables */
429 	mlxcx_flow_table_t	*mlp_rx_flow;
430 	mlxcx_flow_table_t	*mlp_tx_flow;
431 
432 	mlxcx_flow_group_t	*mlp_promisc;
433 	mlxcx_flow_group_t	*mlp_bcast;
434 	mlxcx_flow_group_t	*mlp_umcast;
435 
436 	avl_tree_t		mlp_dmac_fe;
437 
438 	mlxcx_port_stats_t	mlp_stats;
439 
440 	mlxcx_module_status_t	mlp_last_modstate;
441 	mlxcx_module_error_type_t	mlp_last_moderr;
442 
443 	mlxcx_async_param_t	mlx_port_event;
444 };
445 
446 typedef enum {
447 	MLXCX_EQ_TYPE_ANY,
448 	MLXCX_EQ_TYPE_RX,
449 	MLXCX_EQ_TYPE_TX
450 } mlxcx_eventq_type_t;
451 
452 /*
453  * mlxcx_event_queue_t is a representation of an event queue (EQ).
454  * There is a 1-1 tie in between an EQ and an interrupt vector, and
455  * knowledge of that effects how some members of the struct are used
456  * and modified.
457  *
458  * Most of the struct members are immmutable except for during set up and
459  * teardown, for those it is safe to access them without a mutex once
460  * the driver is initialized.
461  *
462  * Members which are not immutable and are protected by mleq_mtx are:
463  *	* mleq_state - EQ state. Changes during transitions between
464  *		       polling modes.
465  *	* mleq_cq - an AVL tree of completions queues using this EQ.
466  *
467  * Another member which is not immutable is mleq_cc. This is the EQ
468  * consumer counter, it *must* only be incremented in the EQ's interrupt
469  * context. It is also fed back to the hardware during re-arming of
470  * the EQ, again this *must* only happen in the EQ's interrupt context.
471  *
472  * There are a couple of struct members (mleq_check_disarm_cc and
473  * mleq_check_disarm_cnt) which are used to help monitor the health
474  * and consistency of the EQ. They are only used and modified during health
475  * monitoring, which is both infrequent and single threaded, consequently
476  * no mutex guards are needed.
477  *
478  * Care is taken not to use the mleq_mtx when possible, both to avoid
479  * contention in what is "hot" code and avoid breaking requirements
480  * of mac(9E).
481  */
482 typedef struct mlxcx_event_queue {
483 	kmutex_t		mleq_mtx;
484 	kcondvar_t		mleq_cv;
485 	mlxcx_t			*mleq_mlx;
486 	mlxcx_eventq_state_t	mleq_state;
487 	mlxcx_eventq_type_t	mleq_type;
488 
489 	mlxcx_dma_buffer_t	mleq_dma;
490 
491 	size_t			mleq_entshift;
492 	size_t			mleq_nents;
493 	mlxcx_eventq_ent_t	*mleq_ent;
494 	uint32_t		mleq_cc;	/* consumer counter */
495 	uint32_t		mleq_cc_armed;
496 
497 	uint32_t		mleq_events;
498 
499 	uint32_t		mleq_badintrs;
500 
501 	/* Hardware eq number */
502 	uint_t			mleq_num;
503 	/* Index into the mlxcx_t's interrupts array */
504 	uint_t			mleq_intr_index;
505 
506 	/* UAR region that has this EQ's doorbell in it */
507 	mlxcx_uar_t		*mleq_uar;
508 
509 	/* Tree of CQn => mlxcx_completion_queue_t */
510 	avl_tree_t		mleq_cqs;
511 
512 	uint32_t		mleq_check_disarm_cc;
513 	uint_t			mleq_check_disarm_cnt;
514 } mlxcx_event_queue_t;
515 
516 typedef enum {
517 	MLXCX_TIS_CREATED		= 1 << 0,
518 	MLXCX_TIS_DESTROYED		= 1 << 1,
519 } mlxcx_tis_state_t;
520 
521 typedef struct mlxcx_tis {
522 	mlxcx_tis_state_t		mltis_state;
523 	list_node_t			mltis_entry;
524 	uint_t				mltis_num;
525 	mlxcx_tdom_t			*mltis_tdom;
526 } mlxcx_tis_t;
527 
528 typedef enum {
529 	MLXCX_BUFFER_INIT,
530 	MLXCX_BUFFER_FREE,
531 	MLXCX_BUFFER_ON_WQ,
532 	MLXCX_BUFFER_ON_LOAN,
533 	MLXCX_BUFFER_ON_CHAIN,
534 } mlxcx_buffer_state_t;
535 
536 typedef enum {
537 	MLXCX_SHARD_READY,
538 	MLXCX_SHARD_DRAINING,
539 } mlxcx_shard_state_t;
540 
541 typedef struct mlxcx_buf_shard {
542 	mlxcx_shard_state_t	mlbs_state;
543 	list_node_t		mlbs_entry;
544 	kmutex_t		mlbs_mtx;
545 	uint64_t		mlbs_ntotal;
546 	uint64_t		mlbs_nloaned;
547 	uint64_t		mlbs_hiwat1;
548 	uint64_t		mlbs_hiwat2;
549 	list_t			mlbs_busy;
550 	list_t			mlbs_free;
551 	list_t			mlbs_loaned;
552 	kcondvar_t		mlbs_free_nonempty;
553 } mlxcx_buf_shard_t;
554 
555 typedef struct mlxcx_buffer {
556 	mlxcx_buf_shard_t	*mlb_shard;
557 	list_node_t		mlb_entry;
558 	list_node_t		mlb_cq_entry;
559 
560 	struct mlxcx_buffer	*mlb_tx_head;	/* head of tx chain */
561 	list_t			mlb_tx_chain;
562 	list_node_t		mlb_tx_chain_entry;
563 
564 	boolean_t		mlb_foreign;
565 	size_t			mlb_used;
566 	mblk_t			*mlb_tx_mp;
567 
568 	/*
569 	 * The number of work queue basic blocks this buf uses.
570 	 */
571 	uint_t			mlb_wqebbs;
572 
573 	mlxcx_t			*mlb_mlx;
574 	mlxcx_buffer_state_t	mlb_state;
575 	uint_t			mlb_wqe_index;
576 	mlxcx_dma_buffer_t	mlb_dma;
577 	mblk_t			*mlb_mp;
578 	frtn_t			mlb_frtn;
579 } mlxcx_buffer_t;
580 
581 typedef enum {
582 	MLXCX_CQ_ALLOC		= 1 << 0,
583 	MLXCX_CQ_CREATED	= 1 << 1,
584 	MLXCX_CQ_DESTROYED	= 1 << 2,
585 	MLXCX_CQ_EQAVL		= 1 << 3,
586 	MLXCX_CQ_BLOCKED_MAC	= 1 << 4,
587 	MLXCX_CQ_TEARDOWN	= 1 << 5,
588 	MLXCX_CQ_POLLING	= 1 << 6,
589 	MLXCX_CQ_ARMED		= 1 << 7,
590 } mlxcx_completionq_state_t;
591 
592 typedef struct mlxcx_work_queue mlxcx_work_queue_t;
593 
594 typedef struct mlxcx_completion_queue {
595 	kmutex_t			mlcq_mtx;
596 	kmutex_t			mlcq_arm_mtx;
597 	mlxcx_t				*mlcq_mlx;
598 	mlxcx_completionq_state_t	mlcq_state;
599 
600 	mlxcx_port_stats_t		*mlcq_stats;
601 
602 	list_node_t			mlcq_entry;
603 	avl_node_t			mlcq_eq_entry;
604 
605 	uint_t				mlcq_num;
606 
607 	mlxcx_work_queue_t		*mlcq_wq;
608 	mlxcx_event_queue_t		*mlcq_eq;
609 
610 	/* UAR region that has this CQ's UAR doorbell in it */
611 	mlxcx_uar_t			*mlcq_uar;
612 
613 	mlxcx_dma_buffer_t		mlcq_dma;
614 
615 	size_t				mlcq_entshift;
616 	size_t				mlcq_nents;
617 	mlxcx_completionq_ent_t		*mlcq_ent;
618 	uint32_t			mlcq_cc;	/* consumer counter */
619 	uint32_t			mlcq_cc_armed;	/* cc at last arm */
620 	uint32_t			mlcq_ec;	/* event counter */
621 	uint32_t			mlcq_ec_armed;	/* ec at last arm */
622 
623 	mlxcx_dma_buffer_t		mlcq_doorbell_dma;
624 	mlxcx_completionq_doorbell_t	*mlcq_doorbell;
625 
626 	uint64_t			mlcq_bufcnt;
627 	size_t				mlcq_bufhwm;
628 	size_t				mlcq_buflwm;
629 	list_t				mlcq_buffers;
630 	kmutex_t			mlcq_bufbmtx;
631 	list_t				mlcq_buffers_b;
632 
633 	uint_t				mlcq_check_disarm_cnt;
634 	uint64_t			mlcq_check_disarm_cc;
635 
636 	uint_t				mlcq_cqemod_period_usec;
637 	uint_t				mlcq_cqemod_count;
638 
639 	mac_ring_handle_t		mlcq_mac_hdl;
640 	uint64_t			mlcq_mac_gen;
641 
642 	boolean_t			mlcq_fm_repd_qstate;
643 } mlxcx_completion_queue_t;
644 
645 typedef enum {
646 	MLXCX_WQ_ALLOC		= 1 << 0,
647 	MLXCX_WQ_CREATED	= 1 << 1,
648 	MLXCX_WQ_STARTED	= 1 << 2,
649 	MLXCX_WQ_DESTROYED	= 1 << 3,
650 	MLXCX_WQ_TEARDOWN	= 1 << 4,
651 	MLXCX_WQ_BUFFERS	= 1 << 5,
652 	MLXCX_WQ_REFILLING	= 1 << 6,
653 	MLXCX_WQ_BLOCKED_MAC	= 1 << 7
654 } mlxcx_workq_state_t;
655 
656 typedef enum {
657 	MLXCX_WQ_TYPE_SENDQ = 1,
658 	MLXCX_WQ_TYPE_RECVQ
659 } mlxcx_workq_type_t;
660 
661 typedef struct mlxcx_ring_group mlxcx_ring_group_t;
662 
663 struct mlxcx_work_queue {
664 	kmutex_t			mlwq_mtx;
665 	mlxcx_t				*mlwq_mlx;
666 	mlxcx_workq_type_t		mlwq_type;
667 	mlxcx_workq_state_t		mlwq_state;
668 
669 	list_node_t			mlwq_entry;
670 	list_node_t			mlwq_group_entry;
671 
672 	mlxcx_ring_group_t		*mlwq_group;
673 
674 	uint_t				mlwq_num;
675 
676 	mlxcx_completion_queue_t	*mlwq_cq;
677 	mlxcx_pd_t			*mlwq_pd;
678 
679 	/* Required for send queues */
680 	mlxcx_tis_t			*mlwq_tis;
681 
682 	/* UAR region that has this WQ's blueflame buffers in it */
683 	mlxcx_uar_t			*mlwq_uar;
684 
685 	mlxcx_dma_buffer_t		mlwq_dma;
686 
687 	mlxcx_eth_inline_mode_t		mlwq_inline_mode;
688 	size_t				mlwq_entshift;
689 	size_t				mlwq_nents;
690 	/* Discriminate based on mwq_type */
691 	union {
692 		mlxcx_sendq_ent_t	*mlwq_send_ent;
693 		mlxcx_sendq_extra_ent_t	*mlwq_send_extra_ent;
694 		mlxcx_recvq_ent_t	*mlwq_recv_ent;
695 		mlxcx_sendq_bf_t	*mlwq_bf_ent;
696 	};
697 	uint64_t			mlwq_pc;	/* producer counter */
698 
699 	uint64_t			mlwq_wqebb_used;
700 	size_t				mlwq_bufhwm;
701 	size_t				mlwq_buflwm;
702 
703 	mlxcx_dma_buffer_t		mlwq_doorbell_dma;
704 	mlxcx_workq_doorbell_t		*mlwq_doorbell;
705 
706 	mlxcx_buf_shard_t		*mlwq_bufs;
707 	mlxcx_buf_shard_t		*mlwq_foreign_bufs;
708 
709 	taskq_ent_t			mlwq_tqe;
710 
711 	boolean_t			mlwq_fm_repd_qstate;
712 };
713 
714 #define	MLXCX_RQT_MAX_SIZE		64
715 
716 typedef enum {
717 	MLXCX_RQT_CREATED		= 1 << 0,
718 	MLXCX_RQT_DESTROYED		= 1 << 1,
719 	MLXCX_RQT_DIRTY			= 1 << 2,
720 } mlxcx_rqtable_state_t;
721 
722 typedef struct mlxcx_rqtable {
723 	mlxcx_rqtable_state_t		mlrqt_state;
724 	list_node_t			mlrqt_entry;
725 	uint_t				mlrqt_num;
726 
727 	size_t				mlrqt_max;
728 	size_t				mlrqt_used;
729 
730 	size_t				mlrqt_rq_size;
731 	mlxcx_work_queue_t		**mlrqt_rq;
732 } mlxcx_rqtable_t;
733 
734 typedef enum {
735 	MLXCX_TIR_CREATED		= 1 << 0,
736 	MLXCX_TIR_DESTROYED		= 1 << 1,
737 } mlxcx_tir_state_t;
738 
739 typedef struct mlxcx_tir {
740 	mlxcx_tir_state_t		mltir_state;
741 	list_node_t			mltir_entry;
742 	uint_t				mltir_num;
743 	mlxcx_tdom_t			*mltir_tdom;
744 	mlxcx_tir_type_t		mltir_type;
745 	union {
746 		mlxcx_rqtable_t			*mltir_rqtable;
747 		mlxcx_work_queue_t		*mltir_rq;
748 	};
749 	mlxcx_tir_hash_fn_t		mltir_hash_fn;
750 	uint8_t				mltir_toeplitz_key[40];
751 	mlxcx_tir_rx_hash_l3_type_t	mltir_l3_type;
752 	mlxcx_tir_rx_hash_l4_type_t	mltir_l4_type;
753 	mlxcx_tir_rx_hash_fields_t	mltir_hash_fields;
754 } mlxcx_tir_t;
755 
756 typedef enum {
757 	MLXCX_FLOW_GROUP_CREATED	= 1 << 0,
758 	MLXCX_FLOW_GROUP_BUSY		= 1 << 1,
759 	MLXCX_FLOW_GROUP_DESTROYED	= 1 << 2,
760 } mlxcx_flow_group_state_t;
761 
762 typedef enum {
763 	MLXCX_FLOW_MATCH_SMAC		= 1 << 0,
764 	MLXCX_FLOW_MATCH_DMAC		= 1 << 1,
765 	MLXCX_FLOW_MATCH_VLAN		= 1 << 2,
766 	MLXCX_FLOW_MATCH_VID		= 1 << 3,
767 	MLXCX_FLOW_MATCH_IP_VER		= 1 << 4,
768 	MLXCX_FLOW_MATCH_SRCIP		= 1 << 5,
769 	MLXCX_FLOW_MATCH_DSTIP		= 1 << 6,
770 	MLXCX_FLOW_MATCH_IP_PROTO	= 1 << 7,
771 	MLXCX_FLOW_MATCH_SQN		= 1 << 8,
772 	MLXCX_FLOW_MATCH_VXLAN		= 1 << 9,
773 } mlxcx_flow_mask_t;
774 
775 struct mlxcx_flow_group {
776 	list_node_t			mlfg_entry;
777 	list_node_t			mlfg_role_entry;
778 	mlxcx_flow_group_state_t	mlfg_state;
779 	mlxcx_flow_table_t		*mlfg_table;
780 	uint_t				mlfg_num;
781 	size_t				mlfg_start_idx;
782 	size_t				mlfg_size;
783 	size_t				mlfg_avail;
784 	list_t				mlfg_entries;
785 	mlxcx_flow_mask_t		mlfg_mask;
786 };
787 
788 typedef enum {
789 	MLXCX_FLOW_ENTRY_RESERVED	= 1 << 0,
790 	MLXCX_FLOW_ENTRY_CREATED	= 1 << 1,
791 	MLXCX_FLOW_ENTRY_DELETED	= 1 << 2,
792 	MLXCX_FLOW_ENTRY_DIRTY		= 1 << 3,
793 } mlxcx_flow_entry_state_t;
794 
795 typedef struct {
796 	mlxcx_tir_t			*mlfed_tir;
797 	mlxcx_flow_table_t		*mlfed_flow;
798 } mlxcx_flow_entry_dest_t;
799 
800 typedef struct mlxcx_flow_entry {
801 	list_node_t			mlfe_group_entry;
802 	avl_node_t			mlfe_dmac_entry;
803 	mlxcx_flow_entry_state_t	mlfe_state;
804 	mlxcx_flow_table_t		*mlfe_table;
805 	mlxcx_flow_group_t		*mlfe_group;
806 	uint_t				mlfe_index;
807 
808 	mlxcx_flow_action_t		mlfe_action;
809 
810 	/* Criteria for match */
811 	uint8_t				mlfe_smac[ETHERADDRL];
812 	uint8_t				mlfe_dmac[ETHERADDRL];
813 
814 	mlxcx_vlan_type_t		mlfe_vlan_type;
815 	uint16_t			mlfe_vid;
816 
817 	uint_t				mlfe_ip_version;
818 	uint8_t				mlfe_srcip[IPV6_ADDR_LEN];
819 	uint8_t				mlfe_dstip[IPV6_ADDR_LEN];
820 
821 	uint_t				mlfe_ip_proto;
822 	uint16_t			mlfe_sport;
823 	uint16_t			mlfe_dport;
824 
825 	uint32_t			mlfe_sqn;
826 	uint32_t			mlfe_vxlan_vni;
827 
828 	/* Destinations */
829 	size_t				mlfe_ndest;
830 	mlxcx_flow_entry_dest_t		mlfe_dest[MLXCX_FLOW_MAX_DESTINATIONS];
831 
832 	/*
833 	 * mlxcx_group_mac_ts joining this entry to N ring groups
834 	 * only used by FEs on the root rx flow table
835 	 */
836 	list_t				mlfe_ring_groups;
837 } mlxcx_flow_entry_t;
838 
839 typedef enum {
840 	MLXCX_FLOW_TABLE_CREATED	= 1 << 0,
841 	MLXCX_FLOW_TABLE_DESTROYED	= 1 << 1,
842 	MLXCX_FLOW_TABLE_ROOT		= 1 << 2
843 } mlxcx_flow_table_state_t;
844 
845 struct mlxcx_flow_table {
846 	kmutex_t			mlft_mtx;
847 	mlxcx_flow_table_state_t	mlft_state;
848 	uint_t				mlft_level;
849 	uint_t				mlft_num;
850 	mlxcx_flow_table_type_t		mlft_type;
851 
852 	mlxcx_port_t			*mlft_port;
853 
854 	size_t				mlft_entshift;
855 	size_t				mlft_nents;
856 
857 	size_t				mlft_entsize;
858 	mlxcx_flow_entry_t		*mlft_ent;
859 
860 	/* First entry not yet claimed by a group */
861 	size_t				mlft_next_ent;
862 
863 	list_t				mlft_groups;
864 };
865 
866 typedef enum {
867 	MLXCX_GROUP_RX,
868 	MLXCX_GROUP_TX
869 } mlxcx_group_type_t;
870 
871 typedef enum {
872 	MLXCX_GROUP_INIT		= 1 << 0,
873 	MLXCX_GROUP_WQS			= 1 << 1,
874 	MLXCX_GROUP_TIRTIS		= 1 << 2,
875 	MLXCX_GROUP_FLOWS		= 1 << 3,
876 	MLXCX_GROUP_RUNNING		= 1 << 4,
877 	MLXCX_GROUP_RQT			= 1 << 5,
878 } mlxcx_group_state_t;
879 
880 #define	MLXCX_RX_HASH_FT_SIZE_SHIFT	4
881 
882 typedef enum {
883 	MLXCX_TIR_ROLE_IPv4 = 0,
884 	MLXCX_TIR_ROLE_IPv6,
885 	MLXCX_TIR_ROLE_TCPv4,
886 	MLXCX_TIR_ROLE_TCPv6,
887 	MLXCX_TIR_ROLE_UDPv4,
888 	MLXCX_TIR_ROLE_UDPv6,
889 	MLXCX_TIR_ROLE_OTHER,
890 
891 	MLXCX_TIRS_PER_GROUP
892 } mlxcx_tir_role_t;
893 
894 typedef struct {
895 	avl_node_t		mlgm_group_entry;
896 	list_node_t		mlgm_fe_entry;
897 	mlxcx_ring_group_t	*mlgm_group;
898 	uint8_t			mlgm_mac[6];
899 	mlxcx_flow_entry_t	*mlgm_fe;
900 } mlxcx_group_mac_t;
901 
902 typedef struct {
903 	list_node_t		mlgv_entry;
904 	boolean_t		mlgv_tagged;
905 	uint16_t		mlgv_vid;
906 	mlxcx_flow_entry_t	*mlgv_fe;
907 } mlxcx_group_vlan_t;
908 
909 struct mlxcx_ring_group {
910 	kmutex_t			mlg_mtx;
911 	mlxcx_t				*mlg_mlx;
912 	mlxcx_group_state_t		mlg_state;
913 	mlxcx_group_type_t		mlg_type;
914 
915 	mac_group_handle_t		mlg_mac_hdl;
916 
917 	union {
918 		mlxcx_tis_t		mlg_tis;
919 		mlxcx_tir_t		mlg_tir[MLXCX_TIRS_PER_GROUP];
920 	};
921 	mlxcx_port_t			*mlg_port;
922 
923 	size_t				mlg_nwqs;
924 	size_t				mlg_wqs_size;
925 	mlxcx_work_queue_t		*mlg_wqs;
926 
927 	mlxcx_rqtable_t			*mlg_rqt;
928 
929 	/*
930 	 * Flow table for matching VLAN IDs
931 	 */
932 	mlxcx_flow_table_t		*mlg_rx_vlan_ft;
933 	mlxcx_flow_group_t		*mlg_rx_vlan_fg;
934 	mlxcx_flow_group_t		*mlg_rx_vlan_def_fg;
935 	mlxcx_flow_group_t		*mlg_rx_vlan_promisc_fg;
936 	list_t				mlg_rx_vlans;
937 
938 	taskq_t				*mlg_refill_tq;
939 
940 	/*
941 	 * Flow table for separating out by protocol before hashing
942 	 */
943 	mlxcx_flow_table_t		*mlg_rx_hash_ft;
944 
945 	/*
946 	 * Links to flow entries on the root flow table which are pointing to
947 	 * our rx_vlan_ft.
948 	 */
949 	avl_tree_t			mlg_rx_macs;
950 };
951 
952 typedef enum mlxcx_cmd_state {
953 	MLXCX_CMD_S_DONE	= 1 << 0,
954 	MLXCX_CMD_S_ERROR	= 1 << 1
955 } mlxcx_cmd_state_t;
956 
957 struct mlxcx_cmd {
958 	struct mlxcx		*mlcmd_mlxp;
959 	kmutex_t		mlcmd_lock;
960 	kcondvar_t		mlcmd_cv;
961 
962 	boolean_t		mlcmd_poll;
963 	uint8_t			mlcmd_token;
964 	mlxcx_cmd_op_t		mlcmd_op;
965 
966 	/*
967 	 * Command data and extended mailboxes for responses.
968 	 */
969 	const void		*mlcmd_in;
970 	uint32_t		mlcmd_inlen;
971 	void			*mlcmd_out;
972 	uint32_t		mlcmd_outlen;
973 	list_t			mlcmd_mbox_in;
974 	uint8_t			mlcmd_nboxes_in;
975 	list_t			mlcmd_mbox_out;
976 	uint8_t			mlcmd_nboxes_out;
977 	/*
978 	 * Status information.
979 	 */
980 	mlxcx_cmd_state_t	mlcmd_state;
981 	uint8_t			mlcmd_status;
982 };
983 
984 /*
985  * Our view of capabilities.
986  */
987 typedef struct mlxcx_hca_cap {
988 	mlxcx_hca_cap_mode_t	mhc_mode;
989 	mlxcx_hca_cap_type_t	mhc_type;
990 	union {
991 		uint8_t				mhc_bulk[MLXCX_HCA_CAP_SIZE];
992 		mlxcx_hca_cap_general_caps_t	mhc_general;
993 		mlxcx_hca_cap_eth_caps_t	mhc_eth;
994 		mlxcx_hca_cap_flow_caps_t	mhc_flow;
995 	};
996 } mlxcx_hca_cap_t;
997 
998 typedef struct {
999 	/* Cooked values */
1000 	boolean_t		mlc_checksum;
1001 	boolean_t		mlc_lso;
1002 	boolean_t		mlc_vxlan;
1003 	boolean_t		mlc_pcam;
1004 	boolean_t		mlc_ext_ptys;
1005 	size_t			mlc_max_lso_size;
1006 	size_t			mlc_max_rqt_size;
1007 
1008 	size_t			mlc_max_rx_ft_shift;
1009 	size_t			mlc_max_rx_fe_dest;
1010 	size_t			mlc_max_rx_flows;
1011 	size_t			mlc_max_rx_ft;
1012 
1013 	size_t			mlc_max_tir;
1014 
1015 	/* Raw caps data */
1016 	mlxcx_hca_cap_t		mlc_hca_cur;
1017 	mlxcx_hca_cap_t		mlc_hca_max;
1018 	mlxcx_hca_cap_t		mlc_ether_cur;
1019 	mlxcx_hca_cap_t		mlc_ether_max;
1020 	mlxcx_hca_cap_t		mlc_nic_flow_cur;
1021 	mlxcx_hca_cap_t		mlc_nic_flow_max;
1022 } mlxcx_caps_t;
1023 
1024 typedef struct {
1025 	uint_t			mldp_eq_size_shift;
1026 	uint_t			mldp_cq_size_shift;
1027 	uint_t			mldp_cq_size_shift_default;
1028 	uint_t			mldp_rq_size_shift;
1029 	uint_t			mldp_rq_size_shift_default;
1030 	uint_t			mldp_sq_size_shift;
1031 	uint_t			mldp_sq_size_shift_default;
1032 	uint_t			mldp_cqemod_period_usec;
1033 	uint_t			mldp_cqemod_count;
1034 	uint_t			mldp_intrmod_period_usec;
1035 	uint_t			mldp_rx_ngroups_large;
1036 	uint_t			mldp_rx_ngroups_small;
1037 	uint_t			mldp_rx_nrings_per_large_group;
1038 	uint_t			mldp_rx_nrings_per_small_group;
1039 	uint_t			mldp_rx_per_cq;
1040 	uint_t			mldp_tx_ngroups;
1041 	uint_t			mldp_tx_nrings_per_group;
1042 	uint_t			mldp_ftbl_root_size_shift;
1043 	size_t			mldp_tx_bind_threshold;
1044 	uint_t			mldp_ftbl_vlan_size_shift;
1045 	uint64_t		mldp_eq_check_interval_sec;
1046 	uint64_t		mldp_cq_check_interval_sec;
1047 	uint64_t		mldp_wq_check_interval_sec;
1048 	uint_t			mldp_rx_p50_loan_min_size;
1049 } mlxcx_drv_props_t;
1050 
1051 typedef struct {
1052 	mlxcx_t	*mlts_mlx;
1053 	uint8_t	mlts_index;
1054 	id_t	mlts_ksensor;
1055 	int16_t	mlts_value;
1056 	int16_t	mlts_max_value;
1057 	uint8_t	mlts_name[MLXCX_MTMP_NAMELEN];
1058 } mlxcx_temp_sensor_t;
1059 
1060 /*
1061  * The oldest card supported by this driver is ConnectX-4. So far (at least),
1062  * newer models tend to just add features vs. replacing them, so it seems
1063  * reasonable to assume an unknown model likely supports everything the
1064  * ConnectX-6 cards do.
1065  */
1066 typedef enum {
1067 	MLXCX_DEV_CX4		= 0,
1068 	MLXCX_DEV_CX5		= 1,
1069 	MLXCX_DEV_CX6		= 2,
1070 	MLXCX_DEV_UNKNOWN	= 3,
1071 } mlxcx_dev_type_t;
1072 
1073 typedef enum {
1074 	MLXCX_ATTACH_FM		= 1 << 0,
1075 	MLXCX_ATTACH_PCI_CONFIG	= 1 << 1,
1076 	MLXCX_ATTACH_REGS	= 1 << 2,
1077 	MLXCX_ATTACH_CMD	= 1 << 3,
1078 	MLXCX_ATTACH_ENABLE_HCA	= 1 << 4,
1079 	MLXCX_ATTACH_PAGE_LIST	= 1 << 5,
1080 	MLXCX_ATTACH_INIT_HCA	= 1 << 6,
1081 	MLXCX_ATTACH_UAR_PD_TD	= 1 << 7,
1082 	MLXCX_ATTACH_INTRS	= 1 << 8,
1083 	MLXCX_ATTACH_PORTS	= 1 << 9,
1084 	MLXCX_ATTACH_MAC_HDL	= 1 << 10,
1085 	MLXCX_ATTACH_CQS	= 1 << 11,
1086 	MLXCX_ATTACH_WQS	= 1 << 12,
1087 	MLXCX_ATTACH_GROUPS	= 1 << 13,
1088 	MLXCX_ATTACH_BUFS	= 1 << 14,
1089 	MLXCX_ATTACH_CAPS	= 1 << 15,
1090 	MLXCX_ATTACH_CHKTIMERS	= 1 << 16,
1091 	MLXCX_ATTACH_ASYNC_TQ	= 1 << 17,
1092 	MLXCX_ATTACH_SENSORS	= 1 << 18
1093 } mlxcx_attach_progress_t;
1094 
1095 struct mlxcx {
1096 	/* entry on the mlxcx_glist */
1097 	list_node_t		mlx_gentry;
1098 
1099 	dev_info_t		*mlx_dip;
1100 	int			mlx_inst;
1101 	mlxcx_attach_progress_t	mlx_attach;
1102 
1103 	mlxcx_dev_type_t	mlx_type;
1104 	mlxcx_drv_props_t	mlx_props;
1105 
1106 	/*
1107 	 * Misc. data
1108 	 */
1109 	uint16_t		mlx_fw_maj;
1110 	uint16_t		mlx_fw_min;
1111 	uint16_t		mlx_fw_rev;
1112 	uint16_t		mlx_cmd_rev;
1113 
1114 	/*
1115 	 * Various capabilities of hardware.
1116 	 */
1117 	mlxcx_caps_t		*mlx_caps;
1118 
1119 	uint_t			mlx_max_sdu;
1120 	uint_t			mlx_sdu;
1121 
1122 	/*
1123 	 * FM State
1124 	 */
1125 	int			mlx_fm_caps;
1126 
1127 	/*
1128 	 * PCI Data
1129 	 */
1130 	ddi_acc_handle_t	mlx_cfg_handle;
1131 	ddi_acc_handle_t	mlx_regs_handle;
1132 	caddr_t			mlx_regs_base;
1133 
1134 	/*
1135 	 * MAC handle
1136 	 */
1137 	mac_handle_t		mlx_mac_hdl;
1138 
1139 	/*
1140 	 * Main command queue for issuing general FW control commands.
1141 	 */
1142 	mlxcx_cmd_queue_t	mlx_cmd;
1143 
1144 	/*
1145 	 * Interrupts
1146 	 */
1147 	uint_t			mlx_intr_pri;
1148 	uint_t			mlx_async_intr_pri;
1149 	uint_t			mlx_intr_type;		/* always MSI-X */
1150 	int			mlx_intr_count;
1151 	size_t			mlx_intr_size;		/* allocation size */
1152 	int			mlx_intr_cq0;
1153 	ddi_intr_handle_t	*mlx_intr_handles;
1154 
1155 	/*
1156 	 * Basic firmware resources which we use for a variety of things.
1157 	 * The UAR is a reference to a page where CQ and EQ doorbells are
1158 	 * located. It also holds all the BlueFlame stuff (which we don't
1159 	 * use).
1160 	 */
1161 	mlxcx_uar_t		mlx_uar;
1162 	/*
1163 	 * The PD (Protection Domain) and TDOM (Transport Domain) are opaque
1164 	 * entities to us (they're Infiniband constructs we don't actually care
1165 	 * about) -- we just allocate them and shove their ID numbers in
1166 	 * whenever we're asked for one.
1167 	 *
1168 	 * The "reserved" LKEY is what we should put in queue entries that
1169 	 * have references to memory to indicate that they're using linear
1170 	 * addresses (comes from the QUERY_SPECIAL_CONTEXTS cmd).
1171 	 */
1172 	mlxcx_pd_t		mlx_pd;
1173 	mlxcx_tdom_t		mlx_tdom;
1174 	uint_t			mlx_rsvd_lkey;
1175 
1176 	/*
1177 	 * Our event queues. These are 1:1 with interrupts.
1178 	 */
1179 	size_t			mlx_eqs_size;		/* allocation size */
1180 	mlxcx_event_queue_t	*mlx_eqs;
1181 
1182 	/*
1183 	 * Page list. These represent the set of 4k pages we've given to
1184 	 * hardware.
1185 	 *
1186 	 * We can add to this list at the request of hardware from interrupt
1187 	 * context (the PAGE_REQUEST event), so it's protected by pagemtx.
1188 	 */
1189 	kmutex_t		mlx_pagemtx;
1190 	uint_t			mlx_npages;
1191 	avl_tree_t		mlx_pages;
1192 
1193 	mlxcx_async_param_t	mlx_npages_req[MLXCX_FUNC_ID_MAX + 1];
1194 
1195 	/*
1196 	 * Taskq for processing asynchronous events which may issue
1197 	 * commands to the HCA.
1198 	 */
1199 	taskq_t			*mlx_async_tq;
1200 
1201 	/*
1202 	 * Port state
1203 	 */
1204 	uint_t			mlx_nports;
1205 	size_t			mlx_ports_size;
1206 	mlxcx_port_t		*mlx_ports;
1207 
1208 	/*
1209 	 * Completion queues (CQs). These are also indexed off the
1210 	 * event_queue_ts that they each report to.
1211 	 */
1212 	list_t			mlx_cqs;
1213 
1214 	uint_t			mlx_next_eq;
1215 
1216 	/*
1217 	 * Work queues (WQs).
1218 	 */
1219 	list_t			mlx_wqs;
1220 
1221 	/*
1222 	 * Ring groups
1223 	 */
1224 	size_t			mlx_rx_ngroups;
1225 	size_t			mlx_rx_groups_size;
1226 	mlxcx_ring_group_t	*mlx_rx_groups;
1227 
1228 	size_t			mlx_tx_ngroups;
1229 	size_t			mlx_tx_groups_size;
1230 	mlxcx_ring_group_t	*mlx_tx_groups;
1231 
1232 	kmem_cache_t		*mlx_bufs_cache;
1233 	list_t			mlx_buf_shards;
1234 
1235 	ddi_periodic_t		mlx_eq_checktimer;
1236 	ddi_periodic_t		mlx_cq_checktimer;
1237 	ddi_periodic_t		mlx_wq_checktimer;
1238 
1239 	/*
1240 	 * Sensors
1241 	 */
1242 	uint8_t			mlx_temp_nsensors;
1243 	mlxcx_temp_sensor_t	*mlx_temp_sensors;
1244 };
1245 
1246 /*
1247  * Register access
1248  */
1249 extern uint16_t mlxcx_get16(mlxcx_t *, uintptr_t);
1250 extern uint32_t mlxcx_get32(mlxcx_t *, uintptr_t);
1251 extern uint64_t mlxcx_get64(mlxcx_t *, uintptr_t);
1252 
1253 extern void mlxcx_put32(mlxcx_t *, uintptr_t, uint32_t);
1254 extern void mlxcx_put64(mlxcx_t *, uintptr_t, uint64_t);
1255 
1256 extern void mlxcx_uar_put32(mlxcx_t *, mlxcx_uar_t *, uintptr_t, uint32_t);
1257 extern void mlxcx_uar_put64(mlxcx_t *, mlxcx_uar_t *, uintptr_t, uint64_t);
1258 
1259 /*
1260  * Logging functions.
1261  */
1262 extern void mlxcx_warn(mlxcx_t *, const char *, ...);
1263 extern void mlxcx_note(mlxcx_t *, const char *, ...);
1264 extern void mlxcx_panic(mlxcx_t *, const char *, ...);
1265 
1266 extern void mlxcx_fm_ereport(mlxcx_t *, const char *);
1267 
1268 extern void mlxcx_check_sq(mlxcx_t *, mlxcx_work_queue_t *);
1269 extern void mlxcx_check_rq(mlxcx_t *, mlxcx_work_queue_t *);
1270 
1271 /*
1272  * DMA Functions
1273  */
1274 extern void mlxcx_dma_free(mlxcx_dma_buffer_t *);
1275 extern boolean_t mlxcx_dma_alloc(mlxcx_t *, mlxcx_dma_buffer_t *,
1276     ddi_dma_attr_t *, ddi_device_acc_attr_t *, boolean_t, size_t, boolean_t);
1277 extern boolean_t mlxcx_dma_init(mlxcx_t *, mlxcx_dma_buffer_t *,
1278     ddi_dma_attr_t *, boolean_t);
1279 extern boolean_t mlxcx_dma_bind_mblk(mlxcx_t *, mlxcx_dma_buffer_t *,
1280     const mblk_t *, size_t, boolean_t);
1281 extern boolean_t mlxcx_dma_alloc_offset(mlxcx_t *, mlxcx_dma_buffer_t *,
1282     ddi_dma_attr_t *, ddi_device_acc_attr_t *, boolean_t,
1283     size_t, size_t, boolean_t);
1284 extern void mlxcx_dma_unbind(mlxcx_t *, mlxcx_dma_buffer_t *);
1285 extern void mlxcx_dma_acc_attr(mlxcx_t *, ddi_device_acc_attr_t *);
1286 extern void mlxcx_dma_page_attr(mlxcx_t *, ddi_dma_attr_t *);
1287 extern void mlxcx_dma_queue_attr(mlxcx_t *, ddi_dma_attr_t *);
1288 extern void mlxcx_dma_qdbell_attr(mlxcx_t *, ddi_dma_attr_t *);
1289 extern void mlxcx_dma_buf_attr(mlxcx_t *, ddi_dma_attr_t *);
1290 
1291 extern boolean_t mlxcx_give_pages(mlxcx_t *, int32_t, int32_t *);
1292 
1293 static inline const ddi_dma_cookie_t *
mlxcx_dma_cookie_iter(const mlxcx_dma_buffer_t * db,const ddi_dma_cookie_t * prev)1294 mlxcx_dma_cookie_iter(const mlxcx_dma_buffer_t *db,
1295     const ddi_dma_cookie_t *prev)
1296 {
1297 	ASSERT(db->mxdb_flags & MLXCX_DMABUF_BOUND);
1298 	return (ddi_dma_cookie_iter(db->mxdb_dma_handle, prev));
1299 }
1300 
1301 static inline const ddi_dma_cookie_t *
mlxcx_dma_cookie_one(const mlxcx_dma_buffer_t * db)1302 mlxcx_dma_cookie_one(const mlxcx_dma_buffer_t *db)
1303 {
1304 	ASSERT(db->mxdb_flags & MLXCX_DMABUF_BOUND);
1305 	return (ddi_dma_cookie_one(db->mxdb_dma_handle));
1306 }
1307 
1308 /*
1309  * From mlxcx_intr.c
1310  */
1311 extern boolean_t mlxcx_intr_setup(mlxcx_t *);
1312 extern void mlxcx_intr_disable(mlxcx_t *);
1313 extern void mlxcx_intr_teardown(mlxcx_t *);
1314 extern void mlxcx_arm_eq(mlxcx_t *, mlxcx_event_queue_t *);
1315 extern void mlxcx_arm_cq(mlxcx_t *, mlxcx_completion_queue_t *);
1316 extern void mlxcx_update_cqci(mlxcx_t *, mlxcx_completion_queue_t *);
1317 
1318 extern mblk_t *mlxcx_rx_poll(mlxcx_t *, mlxcx_completion_queue_t *, size_t);
1319 
1320 /*
1321  * From mlxcx_gld.c
1322  */
1323 extern boolean_t mlxcx_register_mac(mlxcx_t *);
1324 
1325 /*
1326  * From mlxcx_ring.c
1327  */
1328 extern boolean_t mlxcx_wq_alloc_dma(mlxcx_t *, mlxcx_work_queue_t *);
1329 extern void mlxcx_wq_rele_dma(mlxcx_t *, mlxcx_work_queue_t *);
1330 
1331 extern boolean_t mlxcx_buf_create(mlxcx_t *, mlxcx_buf_shard_t *,
1332     mlxcx_buffer_t **);
1333 extern boolean_t mlxcx_buf_create_foreign(mlxcx_t *, mlxcx_buf_shard_t *,
1334     mlxcx_buffer_t **);
1335 extern mlxcx_buffer_t *mlxcx_buf_take(mlxcx_t *, mlxcx_work_queue_t *);
1336 extern size_t mlxcx_buf_take_n(mlxcx_t *, mlxcx_work_queue_t *,
1337     mlxcx_buffer_t **, size_t);
1338 extern boolean_t mlxcx_buf_loan(mlxcx_t *, mlxcx_buffer_t *);
1339 extern void mlxcx_buf_return(mlxcx_t *, mlxcx_buffer_t *);
1340 extern void mlxcx_buf_return_chain(mlxcx_t *, mlxcx_buffer_t *, boolean_t);
1341 extern void mlxcx_buf_destroy(mlxcx_t *, mlxcx_buffer_t *);
1342 extern void mlxcx_shard_ready(mlxcx_buf_shard_t *);
1343 extern void mlxcx_shard_draining(mlxcx_buf_shard_t *);
1344 
1345 extern uint_t mlxcx_buf_bind_or_copy(mlxcx_t *, mlxcx_work_queue_t *,
1346     mblk_t *, size_t, mlxcx_buffer_t **);
1347 
1348 extern boolean_t mlxcx_rx_group_setup(mlxcx_t *, mlxcx_ring_group_t *);
1349 extern boolean_t mlxcx_tx_group_setup(mlxcx_t *, mlxcx_ring_group_t *);
1350 
1351 extern boolean_t mlxcx_rx_group_start(mlxcx_t *, mlxcx_ring_group_t *);
1352 extern boolean_t mlxcx_tx_ring_start(mlxcx_t *, mlxcx_ring_group_t *,
1353     mlxcx_work_queue_t *);
1354 extern boolean_t mlxcx_rx_ring_start(mlxcx_t *, mlxcx_ring_group_t *,
1355     mlxcx_work_queue_t *);
1356 
1357 extern boolean_t mlxcx_rq_add_buffer(mlxcx_t *, mlxcx_work_queue_t *,
1358     mlxcx_buffer_t *);
1359 extern boolean_t mlxcx_rq_add_buffers(mlxcx_t *, mlxcx_work_queue_t *,
1360     mlxcx_buffer_t **, size_t);
1361 extern boolean_t mlxcx_sq_add_buffer(mlxcx_t *, mlxcx_work_queue_t *,
1362     uint8_t *, size_t, uint32_t, mlxcx_buffer_t *);
1363 extern boolean_t mlxcx_sq_add_nop(mlxcx_t *, mlxcx_work_queue_t *);
1364 extern void mlxcx_rq_refill(mlxcx_t *, mlxcx_work_queue_t *);
1365 
1366 extern void mlxcx_teardown_groups(mlxcx_t *);
1367 extern void mlxcx_wq_teardown(mlxcx_t *, mlxcx_work_queue_t *);
1368 extern void mlxcx_cq_teardown(mlxcx_t *, mlxcx_completion_queue_t *);
1369 extern void mlxcx_teardown_rx_group(mlxcx_t *, mlxcx_ring_group_t *);
1370 extern void mlxcx_teardown_tx_group(mlxcx_t *, mlxcx_ring_group_t *);
1371 
1372 extern void mlxcx_tx_completion(mlxcx_t *, mlxcx_completion_queue_t *,
1373     mlxcx_completionq_ent_t *, mlxcx_buffer_t *);
1374 extern mblk_t *mlxcx_rx_completion(mlxcx_t *, mlxcx_completion_queue_t *,
1375     mlxcx_completionq_ent_t *, mlxcx_buffer_t *);
1376 
1377 extern mlxcx_buf_shard_t *mlxcx_mlbs_create(mlxcx_t *);
1378 
1379 /*
1380  * Flow mgmt
1381  */
1382 extern boolean_t mlxcx_add_umcast_entry(mlxcx_t *, mlxcx_port_t *,
1383     mlxcx_ring_group_t *, const uint8_t *);
1384 extern boolean_t mlxcx_remove_umcast_entry(mlxcx_t *, mlxcx_port_t *,
1385     mlxcx_ring_group_t *, const uint8_t *);
1386 extern void mlxcx_remove_all_umcast_entries(mlxcx_t *, mlxcx_port_t *,
1387     mlxcx_ring_group_t *);
1388 extern boolean_t mlxcx_setup_flow_group(mlxcx_t *, mlxcx_flow_table_t *,
1389     mlxcx_flow_group_t *);
1390 extern void mlxcx_teardown_flow_table(mlxcx_t *, mlxcx_flow_table_t *);
1391 
1392 extern void mlxcx_remove_all_vlan_entries(mlxcx_t *, mlxcx_ring_group_t *);
1393 extern boolean_t mlxcx_remove_vlan_entry(mlxcx_t *, mlxcx_ring_group_t *,
1394     boolean_t, uint16_t);
1395 extern boolean_t mlxcx_add_vlan_entry(mlxcx_t *, mlxcx_ring_group_t *,
1396     boolean_t, uint16_t);
1397 
1398 /*
1399  * Command functions
1400  */
1401 extern boolean_t mlxcx_cmd_queue_init(mlxcx_t *);
1402 extern void mlxcx_cmd_queue_fini(mlxcx_t *);
1403 
1404 extern void mlxcx_cmd_completion(mlxcx_t *, mlxcx_eventq_ent_t *);
1405 extern void mlxcx_cmd_eq_enable(mlxcx_t *);
1406 extern void mlxcx_cmd_eq_disable(mlxcx_t *);
1407 
1408 extern boolean_t mlxcx_cmd_enable_hca(mlxcx_t *);
1409 extern boolean_t mlxcx_cmd_disable_hca(mlxcx_t *);
1410 
1411 extern boolean_t mlxcx_cmd_query_issi(mlxcx_t *, uint_t *);
1412 extern boolean_t mlxcx_cmd_set_issi(mlxcx_t *, uint16_t);
1413 
1414 extern boolean_t mlxcx_cmd_query_pages(mlxcx_t *, uint_t, int32_t *);
1415 extern boolean_t mlxcx_cmd_give_pages(mlxcx_t *, uint_t, int32_t,
1416     mlxcx_dev_page_t **);
1417 extern boolean_t mlxcx_cmd_return_pages(mlxcx_t *, int32_t, uint64_t *,
1418     int32_t *);
1419 
1420 extern boolean_t mlxcx_cmd_query_hca_cap(mlxcx_t *, mlxcx_hca_cap_type_t,
1421     mlxcx_hca_cap_mode_t, mlxcx_hca_cap_t *);
1422 
1423 extern boolean_t mlxcx_cmd_set_driver_version(mlxcx_t *, const char *);
1424 
1425 extern boolean_t mlxcx_cmd_init_hca(mlxcx_t *);
1426 extern boolean_t mlxcx_cmd_teardown_hca(mlxcx_t *);
1427 
1428 extern boolean_t mlxcx_cmd_alloc_uar(mlxcx_t *, mlxcx_uar_t *);
1429 extern boolean_t mlxcx_cmd_dealloc_uar(mlxcx_t *, mlxcx_uar_t *);
1430 
1431 extern boolean_t mlxcx_cmd_alloc_pd(mlxcx_t *, mlxcx_pd_t *);
1432 extern boolean_t mlxcx_cmd_dealloc_pd(mlxcx_t *, mlxcx_pd_t *);
1433 
1434 extern boolean_t mlxcx_cmd_alloc_tdom(mlxcx_t *, mlxcx_tdom_t *);
1435 extern boolean_t mlxcx_cmd_dealloc_tdom(mlxcx_t *, mlxcx_tdom_t *);
1436 
1437 extern boolean_t mlxcx_cmd_create_eq(mlxcx_t *, mlxcx_event_queue_t *);
1438 extern boolean_t mlxcx_cmd_destroy_eq(mlxcx_t *, mlxcx_event_queue_t *);
1439 extern boolean_t mlxcx_cmd_query_eq(mlxcx_t *, mlxcx_event_queue_t *,
1440     mlxcx_eventq_ctx_t *);
1441 
1442 extern boolean_t mlxcx_cmd_create_cq(mlxcx_t *, mlxcx_completion_queue_t *);
1443 extern boolean_t mlxcx_cmd_destroy_cq(mlxcx_t *, mlxcx_completion_queue_t *);
1444 extern boolean_t mlxcx_cmd_query_cq(mlxcx_t *, mlxcx_completion_queue_t *,
1445     mlxcx_completionq_ctx_t *);
1446 
1447 extern boolean_t mlxcx_cmd_create_rq(mlxcx_t *, mlxcx_work_queue_t *);
1448 extern boolean_t mlxcx_cmd_start_rq(mlxcx_t *, mlxcx_work_queue_t *);
1449 extern boolean_t mlxcx_cmd_stop_rq(mlxcx_t *, mlxcx_work_queue_t *);
1450 extern boolean_t mlxcx_cmd_destroy_rq(mlxcx_t *, mlxcx_work_queue_t *);
1451 extern boolean_t mlxcx_cmd_query_rq(mlxcx_t *, mlxcx_work_queue_t *,
1452     mlxcx_rq_ctx_t *);
1453 
1454 extern boolean_t mlxcx_cmd_create_tir(mlxcx_t *, mlxcx_tir_t *);
1455 extern boolean_t mlxcx_cmd_destroy_tir(mlxcx_t *, mlxcx_tir_t *);
1456 
1457 extern boolean_t mlxcx_cmd_create_sq(mlxcx_t *, mlxcx_work_queue_t *);
1458 extern boolean_t mlxcx_cmd_start_sq(mlxcx_t *, mlxcx_work_queue_t *);
1459 extern boolean_t mlxcx_cmd_stop_sq(mlxcx_t *, mlxcx_work_queue_t *);
1460 extern boolean_t mlxcx_cmd_destroy_sq(mlxcx_t *, mlxcx_work_queue_t *);
1461 extern boolean_t mlxcx_cmd_query_sq(mlxcx_t *, mlxcx_work_queue_t *,
1462     mlxcx_sq_ctx_t *);
1463 
1464 extern boolean_t mlxcx_cmd_create_tis(mlxcx_t *, mlxcx_tis_t *);
1465 extern boolean_t mlxcx_cmd_destroy_tis(mlxcx_t *, mlxcx_tis_t *);
1466 
1467 extern boolean_t mlxcx_cmd_query_nic_vport_ctx(mlxcx_t *, mlxcx_port_t *);
1468 extern boolean_t mlxcx_cmd_query_special_ctxs(mlxcx_t *);
1469 
1470 extern boolean_t mlxcx_cmd_modify_nic_vport_ctx(mlxcx_t *, mlxcx_port_t *,
1471     mlxcx_modify_nic_vport_ctx_fields_t);
1472 
1473 extern boolean_t mlxcx_cmd_create_flow_table(mlxcx_t *, mlxcx_flow_table_t *);
1474 extern boolean_t mlxcx_cmd_destroy_flow_table(mlxcx_t *, mlxcx_flow_table_t *);
1475 extern boolean_t mlxcx_cmd_set_flow_table_root(mlxcx_t *, mlxcx_flow_table_t *);
1476 
1477 extern boolean_t mlxcx_cmd_create_flow_group(mlxcx_t *, mlxcx_flow_group_t *);
1478 extern boolean_t mlxcx_cmd_set_flow_table_entry(mlxcx_t *,
1479     mlxcx_flow_entry_t *);
1480 extern boolean_t mlxcx_cmd_delete_flow_table_entry(mlxcx_t *,
1481     mlxcx_flow_entry_t *);
1482 extern boolean_t mlxcx_cmd_destroy_flow_group(mlxcx_t *, mlxcx_flow_group_t *);
1483 
1484 extern boolean_t mlxcx_cmd_access_register(mlxcx_t *, mlxcx_cmd_reg_opmod_t,
1485     mlxcx_register_id_t, mlxcx_register_data_t *);
1486 extern boolean_t mlxcx_cmd_query_port_mtu(mlxcx_t *, mlxcx_port_t *);
1487 extern boolean_t mlxcx_cmd_query_port_status(mlxcx_t *, mlxcx_port_t *);
1488 extern boolean_t mlxcx_cmd_modify_port_status(mlxcx_t *, mlxcx_port_t *,
1489     mlxcx_port_status_t);
1490 extern boolean_t mlxcx_cmd_query_port_speed(mlxcx_t *, mlxcx_port_t *);
1491 extern boolean_t mlxcx_cmd_query_port_fec(mlxcx_t *, mlxcx_port_t *);
1492 extern boolean_t mlxcx_cmd_modify_port_fec(mlxcx_t *, mlxcx_port_t *,
1493     mlxcx_pplm_fec_caps_t);
1494 
1495 extern boolean_t mlxcx_cmd_set_port_mtu(mlxcx_t *, mlxcx_port_t *);
1496 
1497 extern boolean_t mlxcx_cmd_create_rqt(mlxcx_t *, mlxcx_rqtable_t *);
1498 extern boolean_t mlxcx_cmd_destroy_rqt(mlxcx_t *, mlxcx_rqtable_t *);
1499 
1500 extern boolean_t mlxcx_cmd_set_int_mod(mlxcx_t *, uint_t, uint_t);
1501 
1502 extern boolean_t mlxcx_cmd_query_module_status(mlxcx_t *, uint_t,
1503     mlxcx_module_status_t *, mlxcx_module_error_type_t *);
1504 extern boolean_t mlxcx_cmd_set_port_led(mlxcx_t *, mlxcx_port_t *, uint16_t);
1505 
1506 /* Comparator for avl_ts */
1507 extern int mlxcx_cq_compare(const void *, const void *);
1508 extern int mlxcx_dmac_fe_compare(const void *, const void *);
1509 extern int mlxcx_grmac_compare(const void *, const void *);
1510 extern int mlxcx_page_compare(const void *, const void *);
1511 
1512 extern void mlxcx_update_link_state(mlxcx_t *, mlxcx_port_t *);
1513 
1514 extern void mlxcx_eth_proto_to_string(mlxcx_eth_proto_t, mlxcx_ext_eth_proto_t,
1515     char *, size_t);
1516 extern const char *mlxcx_port_status_string(mlxcx_port_status_t);
1517 
1518 extern const char *mlxcx_event_name(mlxcx_event_t);
1519 
1520 /*
1521  * Sensor Functions
1522  */
1523 extern boolean_t mlxcx_setup_sensors(mlxcx_t *);
1524 extern void mlxcx_teardown_sensors(mlxcx_t *);
1525 
1526 #ifdef __cplusplus
1527 }
1528 #endif
1529 
1530 #endif /* _MLXCX_H */
1531