xref: /illumos-gate/usr/src/lib/udapl/udapl_tavor/tavor/dapl_tavor_hw.h (revision 9e39c5ba00a55fa05777cc94b148296af305e135)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_DAPL_TAVOR_HW_H
28 #define	_DAPL_TAVOR_HW_H
29 
30 /*
31  * dapl_tavor_hw.h
32  *    Contains all the structure definitions and #defines for all Tavor
33  *    hardware resources and registers.
34  *    Most of these definitions have been replicated from the tavor_hw.h
35  *    header file used by the tavor device driver.
36  */
37 
38 #ifdef __cplusplus
39 extern "C" {
40 #endif
41 
42 #include "dapl.h"
43 #include "dapl_tavor_ibtf.h"
44 
45 
46 /*
47  * Ownership flags used to define hardware or software ownership for
48  * various Tavor resources
49  */
50 #define	TAVOR_HW_OWNER			0x1U
51 #define	TAVOR_SW_OWNER			0x0
52 
53 /*
54  * Tavor Completion Queue Entries (CQE)
55  *    Each CQE contains enough information for the software to associate the
56  *    completion with the Work Queue Element (WQE) to which it corresponds.
57  *
58  *    Note: The following structure is not #define'd with both little-endian
59  *    and big-endian definitions.  This is because each CQE's individual
60  *    fields are not directly accessed except through the macros defined below.
61  */
62 
63 /*
64  * The following defines are used for Tavor CQ error handling.  Note: For
65  * CQEs which correspond to error events, the Tavor device requires some
66  * special handling by software.  These defines are used to identify and
67  * extract the necessary information from each error CQE, including status
68  * code (above), doorbell count, and whether a error completion is for a
69  * send or receive work request.
70  */
71 #define	TAVOR_CQE_ERR_STATUS_SHIFT	24
72 #define	TAVOR_CQE_ERR_STATUS_MASK	0xFF
73 #define	TAVOR_CQE_ERR_DBDCNT_MASK	0xFFFF
74 #define	TAVOR_CQE_SEND_ERR_OPCODE	0xFF
75 #define	TAVOR_CQE_RECV_ERR_OPCODE	0xFE
76 #define	TAVOR_CQ_SYNC_AND_DB		0
77 #define	TAVOR_CQ_RECYCLE_ENTRY		1
78 
79 /*
80  * These are the defines for the Tavor CQ entry types.  They are also
81  * specified by the Tavor register specification.  They indicate what type
82  * of work request is completing (for successful completions).  Note: The
83  * "SND" or "RCV" in each define is used to indicate whether the completion
84  * work request was from the Send work queue or the Receive work queue on
85  * the associated QP.
86  */
87 #define	TAVOR_CQE_SND_RDMAWR		0x8
88 #define	TAVOR_CQE_SND_RDMAWR_IMM	0x9
89 #define	TAVOR_CQE_SND_SEND		0xA
90 #define	TAVOR_CQE_SND_SEND_IMM		0xB
91 #define	TAVOR_CQE_SND_RDMARD		0x10
92 #define	TAVOR_CQE_SND_ATOMIC_CS		0x11
93 #define	TAVOR_CQE_SND_ATOMIC_FA		0x12
94 #define	TAVOR_CQE_SND_BIND_MW		0x18
95 #define	TAVOR_CQE_RCV_RECV_IMM		0x3
96 #define	TAVOR_CQE_RCV_RECV_IMM2		0x5
97 #define	TAVOR_CQE_RCV_RECV		0x2
98 #define	TAVOR_CQE_RCV_RECV2		0x4
99 #define	TAVOR_CQE_RCV_RDMAWR_IMM	0x9
100 #define	TAVOR_CQE_RCV_RDMAWR_IMM2	0xB
101 
102 /*
103  * These are the defines for the Tavor CQ completion statuses.  They are
104  * specified by the Tavor register specification.
105  */
106 #define	TAVOR_CQE_SUCCESS		0x0
107 #define	TAVOR_CQE_LOC_LEN_ERR		0x1
108 #define	TAVOR_CQE_LOC_OP_ERR		0x2
109 #define	TAVOR_CQE_LOC_EEC_ERR		0x3	/* unsupported: RD */
110 #define	TAVOR_CQE_LOC_PROT_ERR		0x4
111 #define	TAVOR_CQE_WR_FLUSHED_ERR	0x5
112 #define	TAVOR_CQE_MW_BIND_ERR		0x6
113 #define	TAVOR_CQE_BAD_RESPONSE_ERR	0x10
114 #define	TAVOR_CQE_LOCAL_ACCESS_ERR	0x11
115 #define	TAVOR_CQE_REM_INV_REQ_ERR	0x12
116 #define	TAVOR_CQE_REM_ACC_ERR		0x13
117 #define	TAVOR_CQE_REM_OP_ERR		0x14
118 #define	TAVOR_CQE_TRANS_TO_ERR		0x15
119 #define	TAVOR_CQE_RNRNAK_TO_ERR		0x16
120 #define	TAVOR_CQE_LOCAL_RDD_VIO_ERR	0x20	/* unsupported: RD */
121 #define	TAVOR_CQE_REM_INV_RD_REQ_ERR	0x21	/* unsupported: RD */
122 #define	TAVOR_CQE_EEC_REM_ABORTED_ERR	0x22	/* unsupported: RD */
123 #define	TAVOR_CQE_INV_EEC_NUM_ERR	0x23	/* unsupported: RD */
124 #define	TAVOR_CQE_INV_EEC_STATE_ERR	0x24	/* unsupported: RD */
125 
126 typedef struct tavor_hw_cqe_s {
127 	uint32_t	ver		:4;
128 	uint32_t			:4;
129 	uint32_t	my_qpn		:24;
130 	uint32_t			:8;
131 	uint32_t	my_ee		:24;
132 	uint32_t			:8;
133 	uint32_t	rqpn		:24;
134 	uint32_t	sl		:4;
135 	uint32_t			:4;
136 	uint32_t	grh		:1;
137 	uint32_t	ml_path		:7;
138 	uint32_t	rlid		:16;
139 	uint32_t	imm_eth_pkey_cred;
140 	uint32_t	byte_cnt;
141 	uint32_t	wqe_addr	:26;
142 	uint32_t	wqe_sz		:6;
143 	uint32_t	opcode		:8;
144 	uint32_t	send_or_recv	:1;
145 	uint32_t			:15;
146 	uint32_t	owner		:1;
147 	uint32_t	status		:7;
148 } tavor_hw_cqe_t;
149 #define	TAVOR_COMPLETION_RECV		0x0
150 #define	TAVOR_COMPLETION_SEND		0x1
151 
152 #define	TAVOR_CQE_DEFAULT_VERSION	0x0
153 
154 /*
155  * The following macros are used for extracting (and in some cases filling in)
156  * information from CQEs
157  */
158 #define	TAVOR_CQE_QPNUM_MASK		0x00FFFFFF
159 #define	TAVOR_CQE_QPNUM_SHIFT		0
160 #define	TAVOR_CQE_DQPN_MASK		0x00FFFFFF
161 #define	TAVOR_CQE_DQPN_SHIFT		0
162 #define	TAVOR_CQE_SL_MASK		0xF0000000
163 #define	TAVOR_CQE_SL_SHIFT		28
164 #define	TAVOR_CQE_GRH_MASK		0x00800000
165 #define	TAVOR_CQE_GRH_SHIFT		23
166 #define	TAVOR_CQE_PATHBITS_MASK		0x007F0000
167 #define	TAVOR_CQE_PATHBITS_SHIFT	16
168 #define	TAVOR_CQE_DLID_MASK		0x0000FFFF
169 #define	TAVOR_CQE_DLID_SHIFT		0
170 #define	TAVOR_CQE_OPCODE_MASK		0xFF000000
171 #define	TAVOR_CQE_OPCODE_SHIFT		24
172 #define	TAVOR_CQE_SENDRECV_MASK		0x00800000
173 #define	TAVOR_CQE_SENDRECV_SHIFT	23
174 #define	TAVOR_CQE_OWNER_MASK		0x00000080
175 #define	TAVOR_CQE_OWNER_SHIFT		7
176 
177 #define	TAVOR_CQE_QPNUM_GET(cqe)					\
178 	((BETOH_32(((uint32_t *)(cqe))[0]) & TAVOR_CQE_QPNUM_MASK) >>	\
179 	    TAVOR_CQE_QPNUM_SHIFT)
180 #define	TAVOR_CQE_DQPN_GET(cqe)						\
181 	((BETOH_32(((uint32_t *)(cqe))[2]) & TAVOR_CQE_DQPN_MASK) >>	\
182 	    TAVOR_CQE_DQPN_SHIFT)
183 #define	TAVOR_CQE_SL_GET(cqe)						\
184 	((BETOH_32(((uint32_t *)(cqe))[3]) & TAVOR_CQE_SL_MASK) >>	\
185 	    TAVOR_CQE_SL_SHIFT)
186 #define	TAVOR_CQE_GRH_GET(cqe)						\
187 	((BETOH_32(((uint32_t *)(cqe))[3]) & TAVOR_CQE_GRH_MASK) >>	\
188 	    TAVOR_CQE_GRH_SHIFT)
189 #define	TAVOR_CQE_PATHBITS_GET(cqe)					\
190 	((BETOH_32(((uint32_t *)(cqe))[3]) & TAVOR_CQE_PATHBITS_MASK) >>\
191 	    TAVOR_CQE_PATHBITS_SHIFT)
192 #define	TAVOR_CQE_DLID_GET(cqe)						\
193 	((BETOH_32(((uint32_t *)(cqe))[3]) & TAVOR_CQE_DLID_MASK) >>	\
194 	    TAVOR_CQE_DLID_SHIFT)
195 #define	TAVOR_CQE_IMM_ETH_PKEY_CRED_GET(cqe)				\
196 	(BETOH_32(((uint32_t *)(cqe))[4]))
197 #define	TAVOR_CQE_IMM_ETH_PKEY_CRED_SET(cqe, arg)			\
198 	(((uint32_t *)(cqe))[4] = HTOBE_32((arg)))
199 #define	TAVOR_CQE_BYTECNT_GET(cqe)					\
200 	(BETOH_32(((uint32_t *)(cqe))[5]))
201 #define	TAVOR_CQE_WQEADDRSZ_GET(cqe)					\
202 	(BETOH_32(((uint32_t *)(cqe))[6]))
203 #define	TAVOR_CQE_WQEADDRSZ_SET(cqe, arg)				\
204 	(((uint32_t *)(cqe))[6] = HTOBE_32((arg)))
205 #define	TAVOR_CQE_OPCODE_GET(cqe)					\
206 	((BETOH_32(((uint32_t *)(cqe))[7]) & TAVOR_CQE_OPCODE_MASK) >>	\
207 	    TAVOR_CQE_OPCODE_SHIFT)
208 #define	TAVOR_CQE_SENDRECV_GET(cqe)					\
209 	((BETOH_32(((uint32_t *)(cqe))[7]) & TAVOR_CQE_SENDRECV_MASK) >>\
210 	    TAVOR_CQE_SENDRECV_SHIFT)
211 #define	TAVOR_CQE_OWNER_IS_SW(cqe)					\
212 	(((BETOH_32(((uint32_t *)(cqe))[7]) & TAVOR_CQE_OWNER_MASK) >>	\
213 	    TAVOR_CQE_OWNER_SHIFT) == TAVOR_SW_OWNER)
214 #define	TAVOR_CQE_OWNER_SET_HW(cqe)					\
215 	(((uint32_t *)(cqe))[7] =					\
216 	    BETOH_32((TAVOR_HW_OWNER << TAVOR_CQE_OWNER_SHIFT) &	\
217 	    TAVOR_CQE_OWNER_MASK))
218 
219 /*
220  * Tavor User Access Region (UAR)
221  *    Tavor doorbells are each rung by writing to the doorbell registers that
222  *    form a User Access Region (UAR).  A doorbell is a write-only hardware
223  *    register which enables passing information from software to hardware
224  *    with minimum software latency. A write operation from the host software
225  *    to these doorbell registers passes information about the HCA resources
226  *    and initiates processing of the doorbell data.  There are 6 types of
227  *    doorbells in Tavor.
228  *
229  *    "Send Doorbell" for synchronizing the attachment of a WQE (or a chain
230  *        of WQEs) to the send queue.
231  *    "RD Send Doorbell" (Same as above, except for RD QPs) is not supported.
232  *    "Receive Doorbell" for synchronizing the attachment of a WQE (or a chain
233  *        of WQEs) to the receive queue.
234  *    "CQ Doorbell" for updating the CQ consumer index and requesting
235  *        completion notifications.
236  *    "EQ Doorbell" for updating the EQ consumer index, arming interrupt
237  *        triggering, and disarming CQ notification requests.
238  *    "InfiniBlast" (which would have enabled access to the "InfiniBlast
239  *        buffer") is not supported.
240  *
241  *    Note: The tavor_hw_uar_t below is the container for all of the various
242  *    doorbell types.  Below we first define several structures which make up
243  *    the contents of those doorbell types.
244  *
245  *    Note also: The following structures are not #define'd with both little-
246  *    endian and big-endian definitions.  This is because each doorbell type
247  *    is not directly accessed except through a single ddi_put64() operation
248  *    (see tavor_qp_send_doorbell, tavor_qp_recv_doorbell, tavor_cq_doorbell,
249  *    or tavor_eq_doorbell)
250  */
251 typedef struct tavor_hw_uar_send_s {
252 	uint32_t	nda		:26;
253 	uint32_t	fence		:1;
254 	uint32_t	nopcode		:5;
255 	uint32_t	qpn		:24;
256 	uint32_t			:2;
257 	uint32_t	nds		:6;
258 } tavor_hw_uar_send_t;
259 #define	TAVOR_QPSNDDB_NDA_MASK		0xFFFFFFC0
260 #define	TAVOR_QPSNDDB_NDA_SHIFT		0x20
261 #define	TAVOR_QPSNDDB_F_SHIFT		0x25
262 #define	TAVOR_QPSNDDB_NOPCODE_SHIFT	0x20
263 #define	TAVOR_QPSNDDB_QPN_SHIFT		0x8
264 
265 typedef struct tavor_hw_uar_recv_s {
266 	uint32_t	nda		:26;
267 	uint32_t	nds		:6;
268 	uint32_t	qpn		:24;
269 	uint32_t	credits		:8;
270 } tavor_hw_uar_recv_t;
271 #define	TAVOR_QPRCVDB_NDA_MASK		0xFFFFFFC0
272 #define	TAVOR_QPRCVDB_NDA_SHIFT		0x20
273 #define	TAVOR_QPRCVDB_NDS_SHIFT		0x20
274 #define	TAVOR_QPRCVDB_QPN_SHIFT		0x8
275 /* Max descriptors per Tavor doorbell */
276 #define	TAVOR_QP_MAXDESC_PER_DB		256
277 
278 typedef struct tavor_hw_uar_cq_s {
279 	uint32_t	cmd		:8;
280 	uint32_t	cqn		:24;
281 	uint32_t	param;
282 } tavor_hw_uar_cq_t;
283 #define	TAVOR_CQDB_CMD_SHIFT		0x38
284 #define	TAVOR_CQDB_CQN_SHIFT		0x20
285 
286 #define	TAVOR_CQDB_INCR_CONSINDX	0x01
287 #define	TAVOR_CQDB_NOTIFY_CQ		0x02
288 #define	TAVOR_CQDB_NOTIFY_CQ_SOLICIT	0x03
289 #define	TAVOR_CQDB_SET_CONSINDX		0x04
290 #define	TAVOR_CQDB_NOTIFY_NCQ		0x05
291 /* Default value for use in NOTIFY_CQ doorbell */
292 #define	TAVOR_CQDB_DEFAULT_PARAM	0xFFFFFFFF
293 
294 typedef struct tavor_hw_uar_eq_s {
295 	uint32_t	cmd		:8;
296 	uint32_t			:18;
297 	uint32_t	eqn		:6;
298 	uint32_t	param;
299 } tavor_hw_uar_eq_t;
300 
301 typedef struct tavor_hw_uar_s {
302 	uint32_t		rsrv0[4];	/* "RD Send" unsupported */
303 	uint64_t		send;		/* tavor_hw_uar_send_t */
304 	uint64_t		recv;		/* tavor_hw_uar_recv_t */
305 	uint64_t		cq;		/* tavor_hw_uar_cq_t   */
306 	uint64_t		eq;		/* tavor_hw_uar_eq_t   */
307 	uint32_t		rsrv1[244];
308 	uint32_t		iblast[256];	/* "InfiniBlast" unsupported */
309 } tavor_hw_uar_t;
310 
311 typedef struct tavor_hw_uar32_s {
312 	uint32_t		rsrv0[4];	/* "RD Send" unsupported */
313 	uint32_t		send[2];	/* tavor_hw_uar_send_t */
314 	uint32_t		recv[2];	/* tavor_hw_uar_recv_t */
315 	uint32_t		cq[2];		/* tavor_hw_uar_cq_t   */
316 	uint32_t		eq[2];		/* tavor_hw_uar_eq_t   */
317 	uint32_t		rsrv1[244];
318 	uint32_t		iblast[256];	/* "InfiniBlast" unsupported */
319 } tavor_hw_uar32_t;
320 
321 
322 /*
323  * Tavor Send Work Queue Element (WQE)
324  *    A Tavor Send WQE is built of the following segments, each of which is a
325  *    multiple of 16 bytes.  Note: Each individual WQE may contain only a
326  *    subset of these segments described below (according to the operation type
327  *    and transport type of the QP).
328  *
329  *    The first 16 bytes of ever WQE are formed from the "Next/Ctrl" segment.
330  *    This segment contains the address of the next WQE to be executed and the
331  *    information required in order to allocate the resources to execute the
332  *    next WQE.  The "Ctrl" part of this segment contains the control
333  *    information required to execute the WQE, including the opcode and other
334  *    control information.
335  *    The "Datagram" segment contains address information required in order to
336  *    form a UD message.
337  *    The "Bind" segment contains the parameters required for a Bind Memory
338  *    Window operation.
339  *    The "Remote Address" segment is present only in RDMA or Atomic WQEs and
340  *    specifies remote virtual addresses and RKey, respectively.  Length of
341  *    the remote access is calculated from the scatter/gather list (for
342  *    RDMA-write/RDMA-read) or set to eight (for Atomic).
343  *    The "Atomic" segment is present only in Atomic WQEs and specifies
344  *    Swap/Add and Compare data.
345  *
346  *    Note: The following structures are not #define'd with both little-endian
347  *    and big-endian definitions.  This is because their individual fields are
348  *    not directly accessed except through macros defined below.
349  */
350 typedef struct tavor_hw_snd_wqe_nextctrl_s {
351 	uint32_t	next_wqe_addr	:26;
352 	uint32_t			:1;
353 	uint32_t	nopcode		:5;
354 	uint32_t	next_eec	:24;
355 	uint32_t	dbd		:1;
356 	uint32_t	fence		:1;
357 	uint32_t	nds		:6;
358 
359 	uint32_t			:28;
360 	uint32_t	c		:1;
361 	uint32_t	e		:1;
362 	uint32_t	s		:1;
363 	uint32_t	i		:1;
364 	uint32_t	immediate	:32;
365 } tavor_hw_snd_wqe_nextctrl_t;
366 
367 #define	TAVOR_WQE_NDA_MASK		0x00000000FFFFFFC0
368 #define	TAVOR_WQE_NDS_MASK		0x3F
369 #define	TAVOR_WQE_DBD_MASK		0x80
370 
371 #define	TAVOR_WQE_SEND_FENCE_MASK	0x40
372 #define	TAVOR_WQE_SEND_NOPCODE_RDMAW	0x8
373 #define	TAVOR_WQE_SEND_NOPCODE_RDMAWI	0x9
374 #define	TAVOR_WQE_SEND_NOPCODE_SEND	0xA
375 #define	TAVOR_WQE_SEND_NOPCODE_SENDI	0xB
376 #define	TAVOR_WQE_SEND_NOPCODE_RDMAR	0x10
377 #define	TAVOR_WQE_SEND_NOPCODE_ATMCS	0x11
378 #define	TAVOR_WQE_SEND_NOPCODE_ATMFA	0x12
379 #define	TAVOR_WQE_SEND_NOPCODE_BIND	0x18
380 
381 #define	TAVOR_WQE_SEND_SIGNALED_MASK	0x800000000ULL
382 #define	TAVOR_WQE_SEND_EVENT_MASK	0x400000000ULL
383 #define	TAVOR_WQE_SEND_SOLICIT_MASK	0x200000000ULL
384 #define	TAVOR_WQE_SEND_IMMEDIATE_MASK	0x100000000ULL
385 
386 #define	TAVOR_WQE_SENDHDR_UD_AV_MASK	0xFFFFFFFFFFFFFFE0
387 #define	TAVOR_WQE_SENDHDR_UD_DQPN_MASK	0xFFFFFF
388 
389 typedef struct tavor_hw_snd_wqe_bind_s {
390 	uint32_t	ae		:1;
391 	uint32_t	rw		:1;
392 	uint32_t	rr		:1;
393 	uint32_t			:29;
394 	uint32_t			:32;
395 	uint32_t	new_rkey;
396 	uint32_t	reg_lkey;
397 	uint64_t	addr;
398 	uint64_t	len;
399 } tavor_hw_snd_wqe_bind_t;
400 #define	TAVOR_WQE_SENDHDR_BIND_ATOM	0x8000000000000000ULL
401 #define	TAVOR_WQE_SENDHDR_BIND_WR	0x4000000000000000ULL
402 #define	TAVOR_WQE_SENDHDR_BIND_RD	0x2000000000000000ULL
403 
404 typedef struct tavor_hw_snd_wqe_remaddr_s {
405 	uint64_t	vaddr;
406 	uint32_t	rkey;
407 	uint32_t			:32;
408 } tavor_hw_snd_wqe_remaddr_t;
409 
410 /*
411  * Tavor Receive Work Queue Element (WQE)
412  *    Like the Send WQE, the Receive WQE is built of 16-byte segments. The
413  *    segment is the "Next/Ctrl" segment (defined below).  It is followed by
414  *    some number of scatter list entries for the incoming message.
415  *
416  *    The format of the scatter-gather list entries is also shown below.  For
417  *    Receive WQEs the "inline_data" field must be cleared (i.e. data segments
418  *    cannot contain inline data).
419  */
420 typedef struct tavor_hw_rcv_wqe_nextctrl_s {
421 	uint32_t	next_wqe_addr	:26;
422 	uint32_t			:5;
423 	uint32_t	one		:1;
424 	uint32_t			:24;
425 	uint32_t	dbd		:1;
426 	uint32_t			:1;
427 	uint32_t	nds		:6;
428 
429 	uint32_t			:28;
430 	uint32_t	c		:1;
431 	uint32_t	e		:1;
432 	uint32_t			:2;
433 	uint32_t			:32;
434 } tavor_hw_rcv_wqe_nextctrl_t;
435 
436 /*
437  * This bit must be set in the next/ctrl field of all Receive WQEs
438  * as a workaround to a Tavor hardware erratum related to having
439  * the first 32-bits in the WQE set to zero.
440  */
441 #define	TAVOR_RCV_WQE_NDA0_WA_MASK	0x0000000100000000ULL
442 #define	TAVOR_WQE_RCV_SIGNALED_MASK	0x800000000ULL
443 #define	TAVOR_WQE_RCV_EVENT_MASK	0x400000000ULL
444 
445 typedef struct tavor_hw_wqe_sgl_s {
446 	uint32_t	inline_data	:1;
447 	uint32_t	byte_cnt	:31;
448 	uint32_t	lkey;
449 	uint64_t	addr;
450 } tavor_hw_wqe_sgl_t;
451 #define	TAVOR_WQE_SGL_BYTE_CNT_MASK	0x7FFFFFFF
452 #define	TAVOR_WQE_SGL_INLINE_MASK	0x80000000
453 /*
454  * The tavor_sw_wqe_dbinfo_t structure is used internally by the Tavor
455  * driver to return information (from the tavor_wqe_mlx_build_nextctl() and
456  * tavor_wqe_send_build_nextctl() routines) regarding the type of Tavor
457  * doorbell necessary.
458  */
459 typedef struct tavor_sw_wqe_dbinfo_s {
460 	uint_t  db_nopcode;
461 	uint_t  db_fence;
462 } tavor_sw_wqe_dbinfo_t;
463 
464 
465 /*
466  * The following macros are used for building each of the individual
467  * segments that can make up a Tavor WQE.  Note: We try not to use the
468  * structures (with their associated bitfields) here, instead opting to
469  * build and put 64-bit or 32-bit chunks to the WQEs as appropriate,
470  * primarily because using the bitfields appears to force more read-modify-
471  * write operations.
472  *
473  *    TAVOR_WQE_BUILD_REMADDR		- Builds Remote Address Segment using
474  *					    RDMA info from the work request
475  *    TAVOR_WQE_BUILD_BIND		- Builds the Bind Memory Window
476  *					    Segment using bind info from the
477  *					    work request
478  *    TAVOR_WQE_LINKNEXT		- Links the current WQE to the
479  *					    previous one
480  *    TAVOR_WQE_LINKFIRST		- Links the first WQE on the current
481  *					    chain to the previous WQE
482  */
483 
484 #define	TAVOR_WQE_BUILD_REMADDR(ra,  wr_rdma)				\
485 {									\
486 	uint64_t		*tmp;					\
487 									\
488 	tmp	= (uint64_t *)(ra);					\
489 	tmp[0] = HTOBE_64((wr_rdma)->rdma_raddr);			\
490 	tmp[1] = HTOBE_64((uint64_t)(wr_rdma)->rdma_rkey << 32);	\
491 }
492 #define	TAVOR_WQE_BUILD_BIND(bn, wr_bind)				\
493 {									\
494 	uint64_t		*tmp;					\
495 	uint64_t		bn0_tmp;				\
496 	ibt_bind_flags_t	bind_flags;				\
497 									\
498 	tmp	   = (uint64_t *)(bn);					\
499 	bind_flags = (wr_bind)->bind_flags;				\
500 	bn0_tmp	   = (bind_flags & IBT_WR_BIND_ATOMIC) ?		\
501 	    TAVOR_WQE_SENDHDR_BIND_ATOM : 0;				\
502 	bn0_tmp	  |= (bind_flags & IBT_WR_BIND_WRITE) ?			\
503 	    TAVOR_WQE_SENDHDR_BIND_WR : 0;				\
504 	bn0_tmp	  |= (bind_flags & IBT_WR_BIND_READ) ?			\
505 	    TAVOR_WQE_SENDHDR_BIND_RD : 0;				\
506 	tmp[0] = HTOBE_64(bn0_tmp);					\
507 	tmp[1] = HTOBE_64(((uint64_t)(wr_bind)->bind_rkey_out << 32) |	\
508 			(wr_bind)->bind_lkey);				\
509 	tmp[2] = HTOBE_64((wr_bind)->bind_va);				\
510 	tmp[3] = HTOBE_64((wr_bind)->bind_len);				\
511 }
512 
513 #define	TAVOR_WQE_BUILD_DATA_SEG(ds, sgl)				\
514 {									\
515 	uint64_t		*tmp;					\
516 									\
517 	tmp	= (uint64_t *)(ds);					\
518 	tmp[0]	= HTOBE_64(((uint64_t)((sgl)->ds_len &			\
519 		TAVOR_WQE_SGL_BYTE_CNT_MASK) << 32) | (sgl)->ds_key);	\
520 	tmp[1]	= HTOBE_64((sgl)->ds_va);				\
521 }
522 
523 #define	TAVOR_WQE_LINKNEXT(prev, ctrl, next)				\
524 {									\
525 	((uint64_t *)(prev))[1] = HTOBE_64((ctrl));			\
526 	((uint64_t *)(prev))[0] = HTOBE_64((next));			\
527 }
528 
529 #define	TAVOR_WQE_LINKFIRST(prev, next)					\
530 {									\
531 	((uint64_t *)(prev))[0] = HTOBE_64((next));			\
532 }
533 
534 /*
535  * The following macro is used to convert WQE address and size into the
536  * "wqeaddrsz" value needed in the tavor_wrid_entry_t (see below).
537  */
538 #define	TAVOR_QP_WQEADDRSZ(addr, size)                                  \
539 	((((uintptr_t)(addr)) & ~TAVOR_WQE_NDS_MASK) |                   \
540 	((size) & TAVOR_WQE_NDS_MASK))
541 
542 /*
543  * The following macros are used to calculate pointers to the Send or Receive
544  * WQEs on a given QP, respectively
545  */
546 #define	TAVOR_QP_SQ_ENTRY(qp, tail)                                     \
547 	((uint64_t *)((uintptr_t)((qp)->qp_sq_buf) +			\
548 	((tail) * (qp)->qp_sq_wqesz)))
549 #define	TAVOR_QP_SQ_DESC(qp, tail)					\
550 	((uint32_t)((qp)->qp_sq_desc_addr +				\
551 	((tail) * (qp)->qp_sq_wqesz)))
552 #define	TAVOR_QP_RQ_ENTRY(qp, tail)                                     \
553 	((uint64_t *)((uintptr_t)((qp)->qp_rq_buf) +		 	\
554 	((tail) * (qp)->qp_rq_wqesz)))
555 #define	TAVOR_QP_RQ_DESC(qp, tail)					\
556 	((uint32_t)((qp)->qp_rq_desc_addr +				\
557 	((tail) * (qp)->qp_rq_wqesz)))
558 #define	TAVOR_SRQ_RQ_ENTRY(srq, tail)					\
559 	((uint64_t *)((uintptr_t)((srq)->srq_wq_buf) +		 	\
560 	((tail) * (srq)->srq_wq_wqesz)))
561 #define	TAVOR_SRQ_RQ_DESC(srq, tail)					\
562 	((uint32_t)((srq)->srq_wq_desc_addr +				\
563 	((tail) * (srq)->srq_wq_wqesz)))
564 #define	TAVOR_SRQ_WQ_INDEX(srq_wq_desc_addr, desc_addr, wqesz)		\
565 	((uint32_t)(((desc_addr) - (srq_wq_desc_addr)) / (wqesz)))
566 #define	TAVOR_SRQ_WQ_ENTRY(srq, index)					\
567 	((uint64_t *)(((uintptr_t)(srq)->srq_addr) +			\
568 	((index) * (srq)->srq_wq_wqesz)))
569 
570 /*
571  * Maximum header before the data bytes when inlining data.
572  * "Header" includes the link (nextctrl) struct, a remote address struct
573  * (only for RDMA Write, not for Send) and the 32-bit byte count field.
574  */
575 #define	TAVOR_INLINE_HEADER_SIZE_MAX	0x40	/* from tavor driver */
576 #define	TAVOR_INLINE_HEADER_SIZE_RDMAW	\
577 	(sizeof (tavor_hw_snd_wqe_nextctrl_t) + \
578 	sizeof (tavor_hw_snd_wqe_remaddr_t) + \
579 	sizeof (uint32_t))
580 #define	TAVOR_INLINE_HEADER_SIZE_SEND \
581 	(sizeof (tavor_hw_snd_wqe_nextctrl_t) + \
582 	sizeof (uint32_t))
583 
584 /*
585  * Function signatures
586  */
587 extern int dapls_tavor_max_inline(void);
588 
589 #ifdef __cplusplus
590 }
591 #endif
592 
593 #endif	/* _DAPL_TAVOR_HW_H */
594