xref: /illumos-gate/usr/src/lib/udapl/udapl_tavor/tavor/dapl_arbel_hw.c (revision 9e39c5ba00a55fa05777cc94b148296af305e135)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include "dapl.h"
28 #include "dapl_tavor_hw.h"
29 #include "dapl_tavor_wr.h"
30 #include "dapl_tavor_ibtf_impl.h"
31 
32 #define	bt_debug	0
33 
34 enum arbel_db_type_e {
35 	ARBEL_DBR_CQ_SET_CI	= 0x1 << 5,
36 	ARBEL_DBR_CQ_ARM	= 0x2 << 5,
37 	ARBEL_DBR_SQ		= 0x3 << 5,
38 	ARBEL_DBR_RQ		= 0x4 << 5,
39 	ARBEL_DBR_SRQ		= 0x5 << 5
40 };
41 
42 #define	ARBEL_WQE_SGL_INVALID_LKEY	0x00000100
43 #define	ARBEL_WQE_SEND_SIGNALED_MASK	0x0000000800000000ull
44 #define	ARBEL_WQE_SEND_SOLICIT_MASK	0x0000000200000000ull
45 #define	ARBEL_WQE_CTRL_REQBIT_MASK	0x0000000100000000ull
46 #define	ARBEL_WQE_NEXT_REQBIT_MASK	0x80
47 #define	ARBEL_WQE_SETCTRL(qp, desc, ctrl) \
48 	((uint64_t *)(desc))[1] = HTOBE_64(ctrl)
49 #define	ARBEL_WQE_SETNEXT(qp, desc, nda_op, ee_nds) \
50 	{ \
51 		((uint32_t *)(desc))[0] = HTOBE_32(nda_op); \
52 		((uint32_t *)(desc))[1] = HTOBE_32(ee_nds); \
53 	}
54 #define	ARBEL_WQE_SEND_FENCE_MASK	0x40
55 #define	ARBEL_WQE_SEND_NOPCODE_RDMAW	0x8
56 #define	ARBEL_WQE_SEND_NOPCODE_SEND	0xA
57 #define	ARBEL_WQE_SEND_NOPCODE_RDMAR	0x10
58 #define	ARBEL_WQE_SEND_NOPCODE_BIND	0x18
59 #define	ARBEL_WQE_NDA_MASK		0x00000000FFFFFFC0ull
60 #define	ARBEL_WQE_NDS_MASK		0x3F
61 #define	ARBEL_QPSNDDB_WQE_CNT_SHIFT	0x38
62 #define	ARBEL_QPSNDDB_WQE_COUNTER_SHIFT	0x28
63 #define	ARBEL_QPSNDDB_F_SHIFT		0x25
64 #define	ARBEL_QPSNDDB_NOPCODE_SHIFT	0x20
65 #define	ARBEL_QPSNDDB_QPN_SHIFT		0x8
66 #define	ARBEL_DBR_QP_WQE_COUNTER_SHIFT	0x20
67 #define	ARBEL_DBR_QN_SHIFT		0x8
68 
69 #define	ARBEL_CQDB_NOTIFY_CQ_SOLICIT	0x1
70 #define	ARBEL_CQDB_NOTIFY_CQ		0x2
71 
72 /*
73  * Function signatures
74  */
75 extern uint64_t dapls_tavor_wrid_get_entry(ib_cq_handle_t, tavor_hw_cqe_t *,
76     uint_t, uint_t, dapls_tavor_wrid_entry_t *);
77 extern void dapls_tavor_wrid_cq_reap(ib_cq_handle_t);
78 extern DAPL_OS_LOCK g_tavor_uar_lock;
79 
80 #ifndef	_LP64
81 extern void dapls_atomic_assign_64(uint64_t, uint64_t *);
82 #endif
83 
84 static int dapli_arbel_wqe_send_build(ib_qp_handle_t, ibt_send_wr_t *,
85     uint64_t *, uint_t *);
86 static DAT_RETURN dapli_arbel_wqe_recv_build(ib_qp_handle_t, ibt_recv_wr_t *,
87     uint64_t *, uint_t *);
88 static int dapli_arbel_cq_cqe_consume(ib_cq_handle_t, tavor_hw_cqe_t *,
89     ibt_wc_t *);
90 static int dapli_arbel_cq_errcqe_consume(ib_cq_handle_t, tavor_hw_cqe_t *,
91     ibt_wc_t *);
92 extern void dapli_tavor_wrid_add_entry(dapls_tavor_workq_hdr_t *, uint64_t,
93     uint32_t, uint_t);
94 extern void dapli_tavor_wrid_add_entry_srq(ib_srq_handle_t, uint64_t, uint32_t);
95 
96 /*
97  * Note: The 64 bit doorbells need to written atomically.
98  * In 32 bit libraries we need to use the special assembly rtn
99  * because compiler generated code splits into 2 word writes
100  */
101 
102 /*
103  * dapli_arbel_cq_doorbell()
104  * Takes the specified cq cmd and cq number and rings the cq doorbell
105  */
106 static void
dapli_arbel_cq_doorbell(dapls_hw_uar_t ia_uar,uint32_t cq_cmd,uint32_t cqn,uint32_t cmd_sn,uint32_t cq_param)107 dapli_arbel_cq_doorbell(dapls_hw_uar_t ia_uar, uint32_t cq_cmd, uint32_t cqn,
108     uint32_t cmd_sn, uint32_t cq_param)
109 {
110 	uint64_t doorbell;
111 
112 	/* Build the doorbell from the parameters */
113 	doorbell = (cmd_sn << 4) | cq_cmd;
114 	doorbell = (doorbell << 24) | cqn;
115 	doorbell = (doorbell << 32) | cq_param;
116 
117 	/* Write the doorbell to UAR */
118 #ifdef _LP64
119 	((tavor_hw_uar_t *)ia_uar)->cq = HTOBE_64(doorbell);
120 	/* 32 bit version */
121 #elif defined(i386)
122 	dapl_os_lock(&g_tavor_uar_lock);
123 	/*
124 	 * For 32 bit intel we assign the doorbell in the order
125 	 * prescribed by the Tavor PRM, lower to upper addresses
126 	 */
127 	((tavor_hw_uar32_t *)ia_uar)->cq[0] =
128 	    (uint32_t)HTOBE_32(doorbell >> 32);
129 	((tavor_hw_uar32_t *)ia_uar)->cq[1] =
130 	    (uint32_t)HTOBE_32(doorbell & 0x00000000ffffffff);
131 	dapl_os_unlock(&g_tavor_uar_lock);
132 #else
133 	dapls_atomic_assign_64(HTOBE_64(doorbell),
134 	    &((tavor_hw_uar_t *)ia_uar)->cq);
135 #endif
136 }
137 
138 /*
139  * dapli_arbel_qp_send_doorbell()
140  * Takes the specified next descriptor information, qp number, opcode and
141  * rings the send doorbell
142  */
143 static void
dapli_arbel_sq_dbrec(ib_qp_handle_t qp,uint16_t wqe_counter)144 dapli_arbel_sq_dbrec(ib_qp_handle_t qp, uint16_t wqe_counter)
145 {
146 	qp->qp_sq_dbp[0] = HTOBE_32((wqe_counter + 1) & 0xffff);
147 }
148 
149 static void
dapli_arbel_sq_dbreg(dapls_hw_uar_t ia_uar,uint32_t qpn,uint32_t fence,uint32_t nopcode,uint16_t wqe_counter,uint32_t nds)150 dapli_arbel_sq_dbreg(dapls_hw_uar_t ia_uar, uint32_t qpn, uint32_t fence,
151     uint32_t nopcode, uint16_t wqe_counter, uint32_t nds)
152 {
153 	uint64_t doorbell;
154 
155 	doorbell = ((uint64_t)1 << ARBEL_QPSNDDB_WQE_CNT_SHIFT) |
156 	    ((uint64_t)wqe_counter << ARBEL_QPSNDDB_WQE_COUNTER_SHIFT) |
157 	    ((uint64_t)fence << ARBEL_QPSNDDB_F_SHIFT) |
158 	    ((uint64_t)nopcode << ARBEL_QPSNDDB_NOPCODE_SHIFT) |
159 	    (qpn << ARBEL_QPSNDDB_QPN_SHIFT) | nds;
160 
161 	/* Write the doorbell to UAR */
162 #ifdef _LP64
163 	((tavor_hw_uar_t *)ia_uar)->send = HTOBE_64(doorbell);
164 #else
165 #if defined(i386)
166 	dapl_os_lock(&g_tavor_uar_lock);
167 	/*
168 	 * For 32 bit intel we assign the doorbell in the order
169 	 * prescribed by the Tavor PRM, lower to upper addresses
170 	 */
171 	((tavor_hw_uar32_t *)ia_uar)->send[0] =
172 	    (uint32_t)HTOBE_32(doorbell >> 32);
173 	((tavor_hw_uar32_t *)ia_uar)->send[1] =
174 	    (uint32_t)HTOBE_32(doorbell & 0x00000000ffffffff);
175 	dapl_os_unlock(&g_tavor_uar_lock);
176 #else
177 	dapls_atomic_assign_64(HTOBE_64(doorbell),
178 	    &((tavor_hw_uar_t *)ia_uar)->send);
179 #endif
180 #endif
181 }
182 
183 /*
184  * dapli_arbel_wqe_send_build()
185  * Constructs a WQE for a given ibt_send_wr_t
186  */
187 static int
dapli_arbel_wqe_send_build(ib_qp_handle_t qp,ibt_send_wr_t * wr,uint64_t * addr,uint_t * size)188 dapli_arbel_wqe_send_build(ib_qp_handle_t qp, ibt_send_wr_t *wr,
189     uint64_t *addr, uint_t *size)
190 {
191 	tavor_hw_snd_wqe_remaddr_t	*rc;
192 	tavor_hw_snd_wqe_bind_t		*bn;
193 	tavor_hw_wqe_sgl_t		*ds;
194 	ibt_wr_ds_t			*sgl;
195 	uint32_t			nds;
196 	uint32_t			len, total_len;
197 	uint32_t			new_rkey;
198 	uint32_t			old_rkey;
199 	int				i, num_ds;
200 	int				max_inline_bytes = -1;
201 	uint64_t			ctrl;
202 
203 	nds = wr->wr_nds;
204 	sgl = wr->wr_sgl;
205 	num_ds = 0;
206 	ctrl = ((wr->wr_flags & IBT_WR_SEND_SIGNAL) ?
207 	    ARBEL_WQE_SEND_SIGNALED_MASK : 0) |
208 	    ((wr->wr_flags & IBT_WR_SEND_SOLICIT) ?
209 	    ARBEL_WQE_SEND_SOLICIT_MASK : 0) |
210 	    ARBEL_WQE_CTRL_REQBIT_MASK;
211 
212 	/*
213 	 * RC is the only supported transport in UDAPL
214 	 * For RC requests, we allow "Send", "RDMA Read", "RDMA Write"
215 	 */
216 	switch (wr->wr_opcode) {
217 	case IBT_WRC_SEND:
218 		/*
219 		 * If this is a Send request, then all we need is
220 		 * the Data Segment processing below.
221 		 * Initialize the information for the Data Segments
222 		 */
223 		ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)addr +
224 		    sizeof (tavor_hw_snd_wqe_nextctrl_t));
225 		if (qp->qp_sq_inline != 0)
226 			max_inline_bytes =
227 			    qp->qp_sq_wqesz - TAVOR_INLINE_HEADER_SIZE_SEND;
228 		break;
229 	case IBT_WRC_RDMAW:
230 		if (qp->qp_sq_inline != 0)
231 			max_inline_bytes =
232 			    qp->qp_sq_wqesz - TAVOR_INLINE_HEADER_SIZE_RDMAW;
233 		/* FALLTHROUGH */
234 	case IBT_WRC_RDMAR:
235 		if (qp->qp_sq_inline < 0 && wr->wr_opcode == IBT_WRC_RDMAR)
236 			qp->qp_sq_inline = 0;
237 		/*
238 		 * If this is an RDMA Read or RDMA Write request, then fill
239 		 * in the "Remote Address" header fields.
240 		 */
241 		rc = (tavor_hw_snd_wqe_remaddr_t *)((uintptr_t)addr +
242 		    sizeof (tavor_hw_snd_wqe_nextctrl_t));
243 
244 		/*
245 		 * Build the Remote Address Segment for the WQE, using
246 		 * the information from the RC work request.
247 		 */
248 		TAVOR_WQE_BUILD_REMADDR(rc, &wr->wr.rc.rcwr.rdma);
249 
250 		/* Update "ds" for filling in Data Segments (below) */
251 		ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)rc +
252 		    sizeof (tavor_hw_snd_wqe_remaddr_t));
253 		break;
254 	case IBT_WRC_BIND:
255 		/*
256 		 * Generate a new R_key
257 		 * Increment the upper "unconstrained" bits and need to keep
258 		 * the lower "constrained" bits the same it represents
259 		 * the MPT index.
260 		 */
261 #if 0
262 	/* XXX - need equiv of "arbel_wr_bind_check(state, wr);" */
263 	/* XXX - uses arbel_mr_keycalc - what about Sinai vs. Arbel??? */
264 #endif
265 		old_rkey = wr->wr.rc.rcwr.bind->bind_rkey;
266 		new_rkey = old_rkey >> 8;	/* index */
267 		old_rkey = ((old_rkey & 0xff) + 1) & 0xff; /* incremented key */
268 		new_rkey = (new_rkey << 8) | old_rkey;
269 
270 		wr->wr.rc.rcwr.bind->bind_rkey_out = new_rkey;
271 
272 		bn = (tavor_hw_snd_wqe_bind_t *)((uintptr_t)addr +
273 		    sizeof (tavor_hw_snd_wqe_nextctrl_t));
274 
275 		/*
276 		 * Build the Bind Memory Window Segments for the WQE,
277 		 * using the information from the RC Bind memory
278 		 * window work request.
279 		 */
280 		TAVOR_WQE_BUILD_BIND(bn, wr->wr.rc.rcwr.bind);
281 
282 		/*
283 		 * Update the "ds" pointer.  Even though the "bind"
284 		 * operation requires no SGLs, this is necessary to
285 		 * facilitate the correct descriptor size calculations
286 		 * (below).
287 		 */
288 		ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)bn +
289 		    sizeof (tavor_hw_snd_wqe_bind_t));
290 		nds = 0;
291 		break;
292 	default:
293 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
294 		    "dapli_arbel_wqe_send_build: invalid wr_opcode=%d\n",
295 		    wr->wr_opcode);
296 		return (DAT_INTERNAL_ERROR);
297 	}
298 
299 	/*
300 	 * Now fill in the Data Segments (SGL) for the Send WQE based on
301 	 * the values setup above (i.e. "sgl", "nds", and the "ds" pointer
302 	 * Start by checking for a valid number of SGL entries
303 	 */
304 	if (nds > qp->qp_sq_sgl) {
305 		return (DAT_INVALID_PARAMETER);
306 	}
307 
308 	/*
309 	 * For each SGL in the Send Work Request, fill in the Send WQE's data
310 	 * segments.  Note: We skip any SGL with zero size because Tavor
311 	 * hardware cannot handle a zero for "byte_cnt" in the WQE.  Actually
312 	 * the encoding for zero means a 2GB transfer.  Because of this special
313 	 * encoding in the hardware, we mask the requested length with
314 	 * TAVOR_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
315 	 * zero.)
316 	 */
317 	if (max_inline_bytes != -1) {		/* compute total_len */
318 		total_len = 0;
319 		for (i = 0; i < nds; i++)
320 			total_len += sgl[i].ds_len;
321 		if (total_len > max_inline_bytes)
322 			max_inline_bytes = -1;	/* too big, do not "inline" */
323 	}
324 	if (max_inline_bytes != -1) {		/* do "inline" */
325 		uint8_t *dst = (uint8_t *)((uint32_t *)ds + 1);
326 		*(uint32_t *)ds =
327 		    HTOBE_32(total_len | TAVOR_WQE_SGL_INLINE_MASK);
328 		for (i = 0; i < nds; i++) {
329 			if ((len = sgl[i].ds_len) == 0) {
330 				continue;
331 			}
332 			(void) dapl_os_memcpy(dst,
333 			    (void *)(uintptr_t)sgl[i].ds_va, len);
334 			dst += len;
335 		}
336 		/* Return the size of descriptor (in 16-byte chunks) */
337 		*size = ((uintptr_t)dst - (uintptr_t)addr + 15) >> 4;
338 	} else {
339 		for (i = 0; i < nds; i++) {
340 			if (sgl[i].ds_len == 0) {
341 				continue;
342 			}
343 
344 			/*
345 			 * Fill in the Data Segment(s) for the current WQE,
346 			 * using the information contained in the
347 			 * scatter-gather list of the work request.
348 			 */
349 			TAVOR_WQE_BUILD_DATA_SEG(&ds[num_ds], &sgl[i]);
350 			num_ds++;
351 		}
352 
353 		/* Return the size of descriptor (in 16-byte chunks) */
354 		*size = ((uintptr_t)&ds[num_ds] - (uintptr_t)addr) >> 4;
355 	}
356 	ARBEL_WQE_SETCTRL(qp, addr, ctrl);
357 
358 	return (DAT_SUCCESS);
359 }
360 
361 /*
362  * dapli_arbel_wqe_send_linknext()
363  * Takes a WQE and links it to the prev WQE chain
364  */
365 static void
dapli_arbel_wqe_send_linknext(ibt_send_wr_t * curr_wr,uint32_t curr_desc,uint_t curr_descsz,uint64_t * prev_addr,tavor_sw_wqe_dbinfo_t * dbinfo)366 dapli_arbel_wqe_send_linknext(ibt_send_wr_t *curr_wr,
367     uint32_t curr_desc, uint_t curr_descsz, uint64_t *prev_addr,
368     tavor_sw_wqe_dbinfo_t *dbinfo)
369 {
370 	uint32_t	nopcode, fence, nda_op, ee_nds;
371 
372 	/*
373 	 * Calculate the "next" field of the prev descriptor.  This amounts
374 	 * to setting up the "next_wqe_addr", "nopcode", "fence", and "nds"
375 	 * fields (see tavor_hw.h for more).
376 	 */
377 
378 	/*
379 	 * Determine the value for the Tavor WQE "nopcode" field
380 	 * by using the IBTF opcode from the work request
381 	 */
382 	switch (curr_wr->wr_opcode) {
383 	case IBT_WRC_RDMAW:
384 		nopcode = ARBEL_WQE_SEND_NOPCODE_RDMAW;
385 		break;
386 
387 	case IBT_WRC_SEND:
388 		nopcode = ARBEL_WQE_SEND_NOPCODE_SEND;
389 		break;
390 
391 	case IBT_WRC_RDMAR:
392 		nopcode = ARBEL_WQE_SEND_NOPCODE_RDMAR;
393 		break;
394 
395 	case IBT_WRC_BIND:
396 		nopcode = ARBEL_WQE_SEND_NOPCODE_BIND;
397 		break;
398 	default:
399 		/* Unsupported opcodes in UDAPL */
400 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
401 		    "dapli_arbel_wqe_send_linknext: invalid nopcode=%d\n",
402 		    nopcode);
403 		return;
404 	}
405 
406 	fence = (curr_wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0;
407 	nda_op = ((uintptr_t)curr_desc & ARBEL_WQE_NDA_MASK) | nopcode;
408 	ee_nds = ((fence == 1) ? ARBEL_WQE_SEND_FENCE_MASK : 0) |
409 	    (curr_descsz & ARBEL_WQE_NDS_MASK) |
410 	    ARBEL_WQE_NEXT_REQBIT_MASK;
411 
412 	/*
413 	 * A send queue doorbell will be rung for the next
414 	 * WQE on the chain, set the current WQE's "dbd" bit.
415 	 * Note: We also update the "dbinfo" structure here to pass
416 	 * back information about what should (later) be included
417 	 * in the send queue doorbell.
418 	 */
419 	dbinfo->db_nopcode = nopcode;
420 	dbinfo->db_fence   = fence;
421 
422 	ARBEL_WQE_SETNEXT(qp, prev_addr, nda_op, ee_nds);
423 }
424 
425 
426 /*
427  * dapli_arbel_wqe_recv_build()
428  * Builds the recv WQE for a given ibt_recv_wr_t
429  */
430 static DAT_RETURN
dapli_arbel_wqe_recv_build(ib_qp_handle_t qp,ibt_recv_wr_t * wr,uint64_t * addr,uint_t * size)431 dapli_arbel_wqe_recv_build(ib_qp_handle_t qp, ibt_recv_wr_t *wr,
432     uint64_t *addr, uint_t *size)
433 {
434 	tavor_hw_wqe_sgl_t	*ds;
435 	int			i;
436 	int			num_ds;
437 
438 	/* Fill in the Data Segments (SGL) for the Recv WQE */
439 	ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)addr +
440 	    sizeof (tavor_hw_rcv_wqe_nextctrl_t));
441 	num_ds = 0;
442 
443 	/* Check for valid number of SGL entries */
444 	if (wr->wr_nds > qp->qp_rq_sgl) {
445 		return (DAT_INVALID_PARAMETER);
446 	}
447 
448 	/*
449 	 * For each SGL in the Recv Work Request, fill in the Recv WQE's data
450 	 * segments.  Note: We skip any SGL with zero size because Tavor
451 	 * hardware cannot handle a zero for "byte_cnt" in the WQE.  Actually
452 	 * the encoding for zero means a 2GB transfer.  Because of this special
453 	 * encoding in the hardware, we mask the requested length with
454 	 * TAVOR_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
455 	 * zero.)
456 	 */
457 	for (i = 0; i < wr->wr_nds; i++) {
458 		if (wr->wr_sgl[i].ds_len == 0) {
459 			continue;
460 		}
461 
462 		/*
463 		 * Fill in the Data Segment(s) for the receive WQE, using the
464 		 * information contained in the scatter-gather list of the
465 		 * work request.
466 		 */
467 		TAVOR_WQE_BUILD_DATA_SEG(&ds[num_ds], &wr->wr_sgl[i]);
468 		num_ds++;
469 	}
470 	if (i < qp->qp_rq_sgl) {
471 		ibt_wr_ds_t sgl;
472 		sgl.ds_va  = (ib_vaddr_t)0;
473 		sgl.ds_len = (ib_msglen_t)0;
474 		sgl.ds_key = (ibt_lkey_t)ARBEL_WQE_SGL_INVALID_LKEY;
475 		TAVOR_WQE_BUILD_DATA_SEG(&ds[num_ds], &sgl);
476 	}
477 
478 	/* Return the size of descriptor (in 16-byte chunks) */
479 	*size = qp->qp_rq_wqesz >> 4;
480 
481 	return (DAT_SUCCESS);
482 }
483 
484 /*
485  * dapli_arbel_wqe_srq_build()
486  * Builds the recv WQE for a given ibt_recv_wr_t
487  */
488 static DAT_RETURN
dapli_arbel_wqe_srq_build(ib_srq_handle_t srq,ibt_recv_wr_t * wr,uint64_t * addr)489 dapli_arbel_wqe_srq_build(ib_srq_handle_t srq, ibt_recv_wr_t *wr,
490     uint64_t *addr)
491 {
492 	tavor_hw_wqe_sgl_t	*ds;
493 	ibt_wr_ds_t		end_sgl;
494 	int			i;
495 	int			num_ds;
496 
497 	/* Fill in the Data Segments (SGL) for the Recv WQE */
498 	ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)addr +
499 	    sizeof (tavor_hw_rcv_wqe_nextctrl_t));
500 	num_ds = 0;
501 
502 	/* Check for valid number of SGL entries */
503 	if (wr->wr_nds > srq->srq_wq_sgl) {
504 		return (DAT_INVALID_PARAMETER);
505 	}
506 
507 	/*
508 	 * For each SGL in the Recv Work Request, fill in the Recv WQE's data
509 	 * segments.  Note: We skip any SGL with zero size because Tavor
510 	 * hardware cannot handle a zero for "byte_cnt" in the WQE.  Actually
511 	 * the encoding for zero means a 2GB transfer.  Because of this special
512 	 * encoding in the hardware, we mask the requested length with
513 	 * TAVOR_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
514 	 * zero.)
515 	 */
516 	for (i = 0; i < wr->wr_nds; i++) {
517 		if (wr->wr_sgl[i].ds_len == 0) {
518 			continue;
519 		}
520 
521 		/*
522 		 * Fill in the Data Segment(s) for the receive WQE, using the
523 		 * information contained in the scatter-gather list of the
524 		 * work request.
525 		 */
526 		TAVOR_WQE_BUILD_DATA_SEG(&ds[num_ds], &wr->wr_sgl[i]);
527 		num_ds++;
528 	}
529 
530 	/*
531 	 * For SRQ, if the number of data segments is less than the maximum
532 	 * specified at alloc, then we have to fill in a special "key" entry in
533 	 * the sgl entry after the last valid one in this post request.  We do
534 	 * that here.
535 	 */
536 	if (num_ds < srq->srq_wq_sgl) {
537 		end_sgl.ds_va  = (ib_vaddr_t)0;
538 		end_sgl.ds_len = (ib_msglen_t)0;
539 		end_sgl.ds_key = (ibt_lkey_t)ARBEL_WQE_SGL_INVALID_LKEY;
540 		TAVOR_WQE_BUILD_DATA_SEG(&ds[num_ds], &end_sgl);
541 	}
542 
543 	return (DAT_SUCCESS);
544 }
545 
546 /*
547  * dapli_arbel_cq_peek()
548  * Peeks into a given CQ to check if there are any events that can be
549  * polled. It returns the number of CQEs that can be polled.
550  */
551 static void
dapli_arbel_cq_peek(ib_cq_handle_t cq,int * num_cqe)552 dapli_arbel_cq_peek(ib_cq_handle_t cq, int *num_cqe)
553 {
554 	tavor_hw_cqe_t		*cqe;
555 	uint32_t		imm_eth_pkey_cred;
556 	uint32_t		cons_indx;
557 	uint32_t		wrap_around_mask;
558 	uint32_t		polled_cnt;
559 	uint_t			doorbell_cnt;
560 	uint_t			opcode;
561 
562 	/* Get the consumer index */
563 	cons_indx = cq->cq_consindx;
564 
565 	/*
566 	 * Calculate the wrap around mask.  Note: This operation only works
567 	 * because all Tavor completion queues have power-of-2 sizes
568 	 */
569 	wrap_around_mask = (cq->cq_size - 1);
570 
571 	/* Calculate the pointer to the first CQ entry */
572 	cqe = &cq->cq_addr[cons_indx];
573 
574 	/*
575 	 * Count entries in the CQ until we find an entry owned by
576 	 * the hardware.
577 	 */
578 	polled_cnt = 0;
579 	while (TAVOR_CQE_OWNER_IS_SW(cqe)) {
580 		opcode = TAVOR_CQE_OPCODE_GET(cqe);
581 		/* Error CQE map to multiple work completions */
582 		if ((opcode == TAVOR_CQE_SEND_ERR_OPCODE) ||
583 		    (opcode == TAVOR_CQE_RECV_ERR_OPCODE)) {
584 			imm_eth_pkey_cred =
585 			    TAVOR_CQE_IMM_ETH_PKEY_CRED_GET(cqe);
586 			doorbell_cnt =
587 			    imm_eth_pkey_cred & TAVOR_CQE_ERR_DBDCNT_MASK;
588 			polled_cnt += (doorbell_cnt + 1);
589 		} else {
590 			polled_cnt++;
591 		}
592 		/* Increment the consumer index */
593 		cons_indx = (cons_indx + 1) & wrap_around_mask;
594 
595 		/* Update the pointer to the next CQ entry */
596 		cqe = &cq->cq_addr[cons_indx];
597 	}
598 
599 	*num_cqe = polled_cnt;
600 }
601 
602 #define	dapli_arbel_cq_update_ci(cq, dbp) \
603 	(dbp)[0] = HTOBE_32(cq->cq_consindx)
604 
605 /*
606  * dapli_arbel_cq_poll()
607  * This routine polls CQEs out of a CQ and puts them into the ibt_wc_t
608  * array that is passed in.
609  */
610 static DAT_RETURN
dapli_arbel_cq_poll(ib_cq_handle_t cq,ibt_wc_t * wc_p,uint_t num_wc,uint_t * num_polled)611 dapli_arbel_cq_poll(ib_cq_handle_t cq, ibt_wc_t *wc_p, uint_t num_wc,
612     uint_t *num_polled)
613 {
614 	tavor_hw_cqe_t		*cqe;
615 	uint32_t		cons_indx;
616 	uint32_t		wrap_around_mask;
617 	uint32_t		polled_cnt;
618 	DAT_RETURN		dat_status;
619 	int			status;
620 
621 	/* Get the consumer index */
622 	cons_indx = cq->cq_consindx;
623 
624 	/*
625 	 * Calculate the wrap around mask.  Note: This operation only works
626 	 * because all Tavor completion queues have power-of-2 sizes
627 	 */
628 	wrap_around_mask = (cq->cq_size - 1);
629 
630 	/* Calculate the pointer to the first CQ entry */
631 	cqe = &cq->cq_addr[cons_indx];
632 
633 	/*
634 	 * Keep pulling entries from the CQ until we find an entry owned by
635 	 * the hardware.  As long as there the CQE's owned by SW, process
636 	 * each entry by calling dapli_arbel_cq_cqe_consume() and updating the
637 	 * CQ consumer index.  Note:  We only update the consumer index if
638 	 * dapli_arbel_cq_cqe_consume() returns TAVOR_CQ_SYNC_AND_DB.
639 	 * Otherwise, it indicates that we are going to "recycle" the CQE
640 	 * (probably because it is a error CQE and corresponds to more than one
641 	 * completion).
642 	 */
643 	polled_cnt = 0;
644 	while (TAVOR_CQE_OWNER_IS_SW(cqe)) {
645 		status = dapli_arbel_cq_cqe_consume(cq, cqe,
646 		    &wc_p[polled_cnt++]);
647 		if (status == TAVOR_CQ_SYNC_AND_DB) {
648 			/* Reset entry to hardware ownership */
649 			TAVOR_CQE_OWNER_SET_HW(cqe);
650 
651 			/* Increment the consumer index */
652 			cons_indx = (cons_indx + 1) & wrap_around_mask;
653 
654 			/* Update the pointer to the next CQ entry */
655 			cqe = &cq->cq_addr[cons_indx];
656 		}
657 
658 		/*
659 		 * If we have run out of space to store work completions,
660 		 * then stop and return the ones we have pulled of the CQ.
661 		 */
662 		if (polled_cnt >= num_wc) {
663 			break;
664 		}
665 	}
666 
667 	dat_status = DAT_SUCCESS;
668 	/*
669 	 * Now we only ring the doorbell (to update the consumer index) if
670 	 * we've actually consumed a CQ entry.  If we have, for example,
671 	 * pulled from a CQE that we are still in the process of "recycling"
672 	 * for error purposes, then we would not update the consumer index.
673 	 */
674 	if ((polled_cnt != 0) && (cq->cq_consindx != cons_indx)) {
675 		/*
676 		 * Update the consumer index in both the CQ handle and the
677 		 * doorbell record.
678 		 */
679 		cq->cq_consindx = cons_indx;
680 		dapli_arbel_cq_update_ci(cq, cq->cq_poll_dbp);
681 	} else if (polled_cnt == 0) {
682 		/*
683 		 * If the CQ is empty, we can try to free up some of the WRID
684 		 * list containers.
685 		 */
686 		if (cq->cq_wrid_reap_head)	/* look before leaping */
687 			dapls_tavor_wrid_cq_reap(cq);
688 		dat_status = DAT_ERROR(DAT_QUEUE_EMPTY, 0);
689 	}
690 
691 	if (num_polled != NULL) {
692 		*num_polled = polled_cnt;
693 	}
694 
695 	return (dat_status);
696 }
697 
698 /*
699  * dapli_arbel_cq_poll_one()
700  * This routine polls one CQE out of a CQ and puts ot into the ibt_wc_t
701  * that is passed in.  See above for more comments/details.
702  */
703 static DAT_RETURN
dapli_arbel_cq_poll_one(ib_cq_handle_t cq,ibt_wc_t * wc_p)704 dapli_arbel_cq_poll_one(ib_cq_handle_t cq, ibt_wc_t *wc_p)
705 {
706 	tavor_hw_cqe_t		*cqe;
707 	uint32_t		cons_indx;
708 	DAT_RETURN		dat_status;
709 	int			status;
710 
711 	/* Get the consumer index */
712 	cons_indx = cq->cq_consindx;
713 
714 	/* Calculate the pointer to the first CQ entry */
715 	cqe = &cq->cq_addr[cons_indx];
716 
717 	/*
718 	 * Keep pulling entries from the CQ until we find an entry owned by
719 	 * the hardware.  As long as there the CQE's owned by SW, process
720 	 * each entry by calling dapli_arbel_cq_cqe_consume() and updating the
721 	 * CQ consumer index.  Note:  We only update the consumer index if
722 	 * dapli_arbel_cq_cqe_consume() returns TAVOR_CQ_SYNC_AND_DB.
723 	 * Otherwise, it indicates that we are going to "recycle" the CQE
724 	 * (probably because it is a error CQE and corresponds to more than one
725 	 * completion).
726 	 */
727 	if (TAVOR_CQE_OWNER_IS_SW(cqe)) {
728 		status = dapli_arbel_cq_cqe_consume(cq, cqe, wc_p);
729 		if (status == TAVOR_CQ_SYNC_AND_DB) {
730 			/* Reset entry to hardware ownership */
731 			TAVOR_CQE_OWNER_SET_HW(cqe);
732 
733 			/* Increment the consumer index */
734 			cq->cq_consindx =
735 			    (cons_indx + 1) & (cq->cq_size - 1);
736 			dapli_arbel_cq_update_ci(cq, cq->cq_poll_dbp);
737 		}
738 		dat_status = DAT_SUCCESS;
739 	} else {
740 		if (cq->cq_wrid_reap_head)	/* look before leaping */
741 			dapls_tavor_wrid_cq_reap(cq);
742 		dat_status = DAT_ERROR(DAT_QUEUE_EMPTY, 0);
743 	}
744 	return (dat_status);
745 }
746 
747 /*
748  * dapli_arbel_cq_cqe_consume()
749  * Converts a given CQE into a ibt_wc_t object
750  */
751 static int
dapli_arbel_cq_cqe_consume(ib_cq_handle_t cqhdl,tavor_hw_cqe_t * cqe,ibt_wc_t * wc)752 dapli_arbel_cq_cqe_consume(ib_cq_handle_t cqhdl, tavor_hw_cqe_t *cqe,
753     ibt_wc_t *wc)
754 {
755 	uint_t		flags;
756 	uint_t		type;
757 	uint_t		opcode;
758 	int		status;
759 
760 	/* strip off the size in wqeaddrsz */
761 	TAVOR_CQE_WQEADDRSZ_SET(cqe, TAVOR_CQE_WQEADDRSZ_GET(cqe) &
762 	    ~ARBEL_WQE_NDS_MASK);
763 
764 	/*
765 	 * Determine if this is an "error" CQE by examining "opcode".  If it
766 	 * is an error CQE, then call dapli_arbel_cq_errcqe_consume() and return
767 	 * whatever status it returns.  Otherwise, this is a successful
768 	 * completion.
769 	 */
770 	opcode = TAVOR_CQE_OPCODE_GET(cqe);
771 	if ((opcode == TAVOR_CQE_SEND_ERR_OPCODE) ||
772 	    (opcode == TAVOR_CQE_RECV_ERR_OPCODE)) {
773 		status = dapli_arbel_cq_errcqe_consume(cqhdl, cqe, wc);
774 		return (status);
775 	}
776 
777 	/*
778 	 * Fetch the Work Request ID using the information in the CQE.
779 	 * See tavor_wr.c for more details.
780 	 */
781 	wc->wc_id = dapls_tavor_wrid_get_entry(cqhdl, cqe,
782 	    TAVOR_CQE_SENDRECV_GET(cqe), 0, NULL);
783 	wc->wc_qpn = TAVOR_CQE_QPNUM_GET(cqe);
784 
785 	/*
786 	 * Parse the CQE opcode to determine completion type.  This will set
787 	 * not only the type of the completion, but also any flags that might
788 	 * be associated with it (e.g. whether immediate data is present).
789 	 */
790 	flags = IBT_WC_NO_FLAGS;
791 	if (TAVOR_CQE_SENDRECV_GET(cqe) != TAVOR_COMPLETION_RECV) {
792 
793 		/*
794 		 * Send CQE
795 		 *
796 		 * The following opcodes will not be generated in uDAPL
797 		 * case TAVOR_CQE_SND_RDMAWR_IMM:
798 		 * case TAVOR_CQE_SND_SEND_IMM:
799 		 * case TAVOR_CQE_SND_ATOMIC_CS:
800 		 * case TAVOR_CQE_SND_ATOMIC_FA:
801 		 */
802 		switch (opcode) {
803 		case TAVOR_CQE_SND_RDMAWR:
804 			type = IBT_WRC_RDMAW;
805 			break;
806 
807 		case TAVOR_CQE_SND_SEND:
808 			type = IBT_WRC_SEND;
809 			break;
810 
811 		case TAVOR_CQE_SND_RDMARD:
812 			type = IBT_WRC_RDMAR;
813 			wc->wc_bytes_xfer = TAVOR_CQE_BYTECNT_GET(cqe);
814 			break;
815 
816 		case TAVOR_CQE_SND_BIND_MW:
817 			type = IBT_WRC_BIND;
818 			break;
819 
820 		default:
821 			wc->wc_status = IBT_WC_LOCAL_CHAN_OP_ERR;
822 			return (TAVOR_CQ_SYNC_AND_DB);
823 		}
824 	} else {
825 
826 		/*
827 		 * Receive CQE
828 		 *
829 		 * The following opcodes will not be generated in uDAPL
830 		 *
831 		 * case TAVOR_CQE_RCV_RECV_IMM:
832 		 * case TAVOR_CQE_RCV_RECV_IMM2:
833 		 * case TAVOR_CQE_RCV_RDMAWR_IMM:
834 		 * case TAVOR_CQE_RCV_RDMAWR_IMM2:
835 		 */
836 		switch (opcode & 0x1F) {
837 		case TAVOR_CQE_RCV_RECV:
838 			/* FALLTHROUGH */
839 		case TAVOR_CQE_RCV_RECV2:
840 			type = IBT_WRC_RECV;
841 			wc->wc_bytes_xfer = TAVOR_CQE_BYTECNT_GET(cqe);
842 			break;
843 		default:
844 			wc->wc_status = IBT_WC_LOCAL_CHAN_OP_ERR;
845 			return (TAVOR_CQ_SYNC_AND_DB);
846 		}
847 	}
848 	wc->wc_type = type;
849 	wc->wc_flags = flags;
850 	/* If we got here, completion status must be success */
851 	wc->wc_status = IBT_WC_SUCCESS;
852 
853 	return (TAVOR_CQ_SYNC_AND_DB);
854 }
855 
856 
857 /*
858  * dapli_arbel_cq_errcqe_consume()
859  */
860 static int
dapli_arbel_cq_errcqe_consume(ib_cq_handle_t cqhdl,tavor_hw_cqe_t * cqe,ibt_wc_t * wc)861 dapli_arbel_cq_errcqe_consume(ib_cq_handle_t cqhdl, tavor_hw_cqe_t *cqe,
862     ibt_wc_t *wc)
863 {
864 	dapls_tavor_wrid_entry_t	wre;
865 	uint32_t		imm_eth_pkey_cred;
866 	uint_t			status;
867 	uint_t			opcode = TAVOR_CQE_OPCODE_GET(cqe);
868 
869 	dapl_dbg_log(DAPL_DBG_TYPE_EVD, "errcqe_consume:cqe.eth=%x, wqe=%x\n",
870 	    TAVOR_CQE_IMM_ETH_PKEY_CRED_GET(cqe),
871 	    TAVOR_CQE_WQEADDRSZ_GET(cqe));
872 
873 	/*
874 	 * Fetch the Work Request ID using the information in the CQE.
875 	 * See tavor_wr.c for more details.
876 	 */
877 	wc->wc_id = dapls_tavor_wrid_get_entry(cqhdl, cqe,
878 	    (opcode == TAVOR_CQE_SEND_ERR_OPCODE) ? TAVOR_COMPLETION_SEND :
879 	    TAVOR_COMPLETION_RECV, 1, &wre);
880 	wc->wc_qpn = TAVOR_CQE_QPNUM_GET(cqe);
881 
882 	/*
883 	 * Parse the CQE opcode to determine completion type.  We know that
884 	 * the CQE is an error completion, so we extract only the completion
885 	 * status here.
886 	 */
887 	imm_eth_pkey_cred = TAVOR_CQE_IMM_ETH_PKEY_CRED_GET(cqe);
888 	status = imm_eth_pkey_cred >> TAVOR_CQE_ERR_STATUS_SHIFT;
889 	switch (status) {
890 	case TAVOR_CQE_LOC_LEN_ERR:
891 		status = IBT_WC_LOCAL_LEN_ERR;
892 		break;
893 
894 	case TAVOR_CQE_LOC_OP_ERR:
895 		status = IBT_WC_LOCAL_CHAN_OP_ERR;
896 		break;
897 
898 	case TAVOR_CQE_LOC_PROT_ERR:
899 		status = IBT_WC_LOCAL_PROTECT_ERR;
900 		break;
901 
902 	case TAVOR_CQE_WR_FLUSHED_ERR:
903 		status = IBT_WC_WR_FLUSHED_ERR;
904 		break;
905 
906 	case TAVOR_CQE_MW_BIND_ERR:
907 		status = IBT_WC_MEM_WIN_BIND_ERR;
908 		break;
909 
910 	case TAVOR_CQE_BAD_RESPONSE_ERR:
911 		status = IBT_WC_BAD_RESPONSE_ERR;
912 		break;
913 
914 	case TAVOR_CQE_LOCAL_ACCESS_ERR:
915 		status = IBT_WC_LOCAL_ACCESS_ERR;
916 		break;
917 
918 	case TAVOR_CQE_REM_INV_REQ_ERR:
919 		status = IBT_WC_REMOTE_INVALID_REQ_ERR;
920 		break;
921 
922 	case TAVOR_CQE_REM_ACC_ERR:
923 		status = IBT_WC_REMOTE_ACCESS_ERR;
924 		break;
925 
926 	case TAVOR_CQE_REM_OP_ERR:
927 		status = IBT_WC_REMOTE_OP_ERR;
928 		break;
929 
930 	case TAVOR_CQE_TRANS_TO_ERR:
931 		status = IBT_WC_TRANS_TIMEOUT_ERR;
932 		break;
933 
934 	case TAVOR_CQE_RNRNAK_TO_ERR:
935 		status = IBT_WC_RNR_NAK_TIMEOUT_ERR;
936 		break;
937 
938 	/*
939 	 * The following error codes are not supported in the Tavor driver
940 	 * as they relate only to Reliable Datagram completion statuses:
941 	 *    case TAVOR_CQE_LOCAL_RDD_VIO_ERR:
942 	 *    case TAVOR_CQE_REM_INV_RD_REQ_ERR:
943 	 *    case TAVOR_CQE_EEC_REM_ABORTED_ERR:
944 	 *    case TAVOR_CQE_INV_EEC_NUM_ERR:
945 	 *    case TAVOR_CQE_INV_EEC_STATE_ERR:
946 	 *    case TAVOR_CQE_LOC_EEC_ERR:
947 	 */
948 
949 	default:
950 		status = IBT_WC_LOCAL_CHAN_OP_ERR;
951 		break;
952 	}
953 	wc->wc_status = status;
954 	wc->wc_type = 0;
955 
956 	/*
957 	 * Consume the CQE
958 	 *    Return status to indicate that doorbell and sync may be
959 	 *    necessary.
960 	 */
961 	return (TAVOR_CQ_SYNC_AND_DB);
962 }
963 
964 /*
965  * dapli_arbel_cq_notify()
966  * This function is used for arming the CQ by ringing the CQ doorbell.
967  *
968  * Note: there is something very subtle here.  This code assumes a very
969  * specific behavior of the kernel driver.  The cmd_sn field of the
970  * arm_dbr is updated by the kernel driver whenever a notification
971  * event for the cq is received.  This code extracts the cmd_sn field
972  * from the arm_dbr to know the right value to use.  The arm_dbr is
973  * always updated atomically so that neither the kernel driver nor this
974  * will get confused about what the other is doing.
975  *
976  * Note: param is not used here.  It is necessary for arming a CQ for
977  * N completions (param is N), but no uDAPL API supports this for now.
978  * Thus, we declare ARGSUSED to make lint happy.
979  */
980 /*ARGSUSED*/
981 static DAT_RETURN
dapli_arbel_cq_notify(ib_cq_handle_t cq,int flags,uint32_t param)982 dapli_arbel_cq_notify(ib_cq_handle_t cq, int flags, uint32_t param)
983 {
984 	uint32_t	cqnum;
985 	uint32_t	*target;
986 	uint32_t	old_cmd, cmp, new, tmp, cmd_sn;
987 
988 	/*
989 	 * Determine if we are trying to get the next completion or the next
990 	 * "solicited" completion.  Then hit the appropriate doorbell.
991 	 */
992 	dapli_arbel_cq_update_ci(cq, cq->cq_arm_dbp);
993 	cqnum = cq->cq_num;
994 	target = cq->cq_arm_dbp + 1;
995 retry:
996 	cmp = *target;
997 	tmp = HTOBE_32(cmp);
998 	old_cmd = tmp & 0x7;
999 	cmd_sn = (tmp & 0x18) >> 3;
1000 
1001 	if (flags == IB_NOTIFY_ON_NEXT_COMP) {
1002 		if (old_cmd != ARBEL_CQDB_NOTIFY_CQ) {
1003 			new = HTOBE_32((tmp & ~0x7) | ARBEL_CQDB_NOTIFY_CQ);
1004 			tmp = atomic_cas_32(target, cmp, new);
1005 			if (tmp != cmp)
1006 				goto retry;
1007 			dapli_arbel_cq_doorbell(cq->cq_iauar,
1008 			    ARBEL_CQDB_NOTIFY_CQ, cqnum,
1009 			    cmd_sn, cq->cq_consindx);
1010 		} /* else it's already armed */
1011 	} else if (flags == IB_NOTIFY_ON_NEXT_SOLICITED) {
1012 		if (old_cmd != ARBEL_CQDB_NOTIFY_CQ &&
1013 		    old_cmd != ARBEL_CQDB_NOTIFY_CQ_SOLICIT) {
1014 			new = HTOBE_32((tmp & ~0x7) |
1015 			    ARBEL_CQDB_NOTIFY_CQ_SOLICIT);
1016 			tmp = atomic_cas_32(target, cmp, new);
1017 			if (tmp != cmp)
1018 				goto retry;
1019 			dapli_arbel_cq_doorbell(cq->cq_iauar,
1020 			    ARBEL_CQDB_NOTIFY_CQ_SOLICIT, cqnum,
1021 			    cmd_sn, cq->cq_consindx);
1022 		} /* else it's already armed */
1023 	} else {
1024 		return (DAT_INVALID_PARAMETER);
1025 	}
1026 
1027 	return (DAT_SUCCESS);
1028 }
1029 
1030 /*
1031  * dapli_arbel_post_send()
1032  */
1033 /* ARGSUSED */
1034 static DAT_RETURN
dapli_arbel_post_send(DAPL_EP * ep,ibt_send_wr_t * wr,boolean_t ns)1035 dapli_arbel_post_send(DAPL_EP *ep, ibt_send_wr_t *wr, boolean_t ns)
1036 {
1037 	tavor_sw_wqe_dbinfo_t		dbinfo;
1038 	dapls_tavor_wrid_list_hdr_t	*wridlist;
1039 	dapls_tavor_wrid_entry_t	*wre_last;
1040 	uint32_t			desc;
1041 	uint64_t			*wqe_addr;
1042 	uint32_t			desc_sz;
1043 	uint32_t			wqeaddrsz, signaled_dbd;
1044 	uint32_t			head, tail, next_tail, qsize_msk;
1045 	int				status;
1046 	ib_qp_handle_t			qp;
1047 
1048 	if ((ep->qp_state == IBT_STATE_RESET) ||
1049 	    (ep->qp_state == IBT_STATE_INIT) ||
1050 	    (ep->qp_state == IBT_STATE_RTR)) {
1051 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1052 		    "post_send: invalid qp_state %d\n", ep->qp_state);
1053 		return (DAT_INVALID_STATE);
1054 	}
1055 
1056 	qp = ep->qp_handle;
1057 
1058 	/* Grab the lock for the WRID list */
1059 	dapl_os_lock(&qp->qp_sq_wqhdr->wq_wrid_lock->wrl_lock);
1060 	wridlist  = qp->qp_sq_wqhdr->wq_wrid_post;
1061 
1062 	/* Save away some initial QP state */
1063 	qsize_msk = qp->qp_sq_wqhdr->wq_size - 1;
1064 	tail	  = qp->qp_sq_wqhdr->wq_tail;
1065 	head	  = qp->qp_sq_wqhdr->wq_head;
1066 
1067 	/*
1068 	 * Check for "queue full" condition.  If the queue is already full,
1069 	 * then no more WQEs can be posted, return an error
1070 	 */
1071 	if (qp->qp_sq_wqhdr->wq_full != 0) {
1072 		dapl_os_unlock(&qp->qp_sq_wqhdr->wq_wrid_lock->wrl_lock);
1073 		return (DAT_INSUFFICIENT_RESOURCES);
1074 	}
1075 
1076 	/*
1077 	 * Increment the "tail index" and check for "queue full" condition.
1078 	 * If we detect that the current work request is going to fill the
1079 	 * work queue, then we mark this condition and continue.
1080 	 */
1081 	next_tail = (tail + 1) & qsize_msk;
1082 	if (next_tail == head) {
1083 		qp->qp_sq_wqhdr->wq_full = 1;
1084 	}
1085 
1086 	/*
1087 	 * Get the user virtual address of the location where the next
1088 	 * Send WQE should be built
1089 	 */
1090 	wqe_addr = TAVOR_QP_SQ_ENTRY(qp, tail);
1091 
1092 	/*
1093 	 * Call tavor_wqe_send_build() to build the WQE at the given address.
1094 	 * This routine uses the information in the ibt_send_wr_t and
1095 	 * returns the size of the WQE when it returns.
1096 	 */
1097 	status = dapli_arbel_wqe_send_build(qp, wr, wqe_addr, &desc_sz);
1098 	if (status != DAT_SUCCESS) {
1099 		dapl_os_unlock(&qp->qp_sq_wqhdr->wq_wrid_lock->wrl_lock);
1100 		return (status);
1101 	}
1102 
1103 	/*
1104 	 * Get the descriptor (io address) corresponding to the location
1105 	 * Send WQE was built.
1106 	 */
1107 	desc = TAVOR_QP_SQ_DESC(qp, tail);
1108 
1109 	dapl_os_assert(desc >= qp->qp_sq_desc_addr &&
1110 	    desc <= (qp->qp_sq_desc_addr +
1111 	    qp->qp_sq_numwqe*qp->qp_sq_wqesz));
1112 
1113 	/*
1114 	 * Add a WRID entry to the WRID list.  Need to calculate the
1115 	 * "wqeaddr" to pass to dapli_tavor_wrid_add_entry().
1116 	 * signaled_dbd is still calculated, but ignored.
1117 	 */
1118 	wqeaddrsz = TAVOR_QP_WQEADDRSZ(desc, 0);
1119 
1120 	if (wr->wr_flags & IBT_WR_SEND_SIGNAL) {
1121 		signaled_dbd = TAVOR_WRID_ENTRY_SIGNALED;
1122 	}
1123 
1124 	dapli_tavor_wrid_add_entry(qp->qp_sq_wqhdr, wr->wr_id, wqeaddrsz,
1125 	    signaled_dbd);
1126 
1127 	/*
1128 	 * Now link the wqe to the old chain (if there was one)
1129 	 */
1130 	dapli_arbel_wqe_send_linknext(wr, desc, desc_sz,
1131 	    qp->qp_sq_lastwqeaddr, &dbinfo);
1132 
1133 	/*
1134 	 * Now if the WRID tail entry is non-NULL, then this
1135 	 * represents the entry to which we are chaining the
1136 	 * new entries.  Since we are going to ring the
1137 	 * doorbell for this WQE, we want set its "dbd" bit.
1138 	 *
1139 	 * On the other hand, if the tail is NULL, even though
1140 	 * we will have rung the doorbell for the previous WQE
1141 	 * (for the hardware's sake) it is irrelevant to our
1142 	 * purposes (for tracking WRIDs) because we know the
1143 	 * request must have already completed.
1144 	 */
1145 	wre_last = wridlist->wl_wre_old_tail;
1146 	if (wre_last != NULL) {
1147 		wre_last->wr_signaled_dbd |= TAVOR_WRID_ENTRY_DOORBELLED;
1148 	}
1149 
1150 	/* Update some of the state in the QP */
1151 	qp->qp_sq_lastwqeaddr	 = wqe_addr;
1152 	qp->qp_sq_wqhdr->wq_tail = next_tail;
1153 
1154 	/* Set the doorbell decord */
1155 	dapli_arbel_sq_dbrec(qp, qp->qp_sq_counter);
1156 
1157 	/* Ring the doorbell */
1158 	dapli_arbel_sq_dbreg(qp->qp_iauar, qp->qp_num, dbinfo.db_fence,
1159 	    dbinfo.db_nopcode, qp->qp_sq_counter, desc_sz);
1160 	qp->qp_sq_counter++;
1161 
1162 	dapl_os_unlock(&qp->qp_sq_wqhdr->wq_wrid_lock->wrl_lock);
1163 
1164 	return (DAT_SUCCESS);
1165 }
1166 
1167 /*
1168  * dapli_arbel_post_recv()
1169  */
1170 /* ARGSUSED */
1171 static DAT_RETURN
dapli_arbel_post_recv(DAPL_EP * ep,ibt_recv_wr_t * wr,boolean_t ns)1172 dapli_arbel_post_recv(DAPL_EP	*ep, ibt_recv_wr_t *wr, boolean_t ns)
1173 {
1174 	dapls_tavor_wrid_list_hdr_t	*wridlist;
1175 	dapls_tavor_wrid_entry_t	*wre_last;
1176 	ib_qp_handle_t			qp;
1177 	DAT_RETURN			status;
1178 	uint32_t			desc;
1179 	uint64_t			*wqe_addr;
1180 	uint32_t			desc_sz;
1181 	uint32_t			wqeaddrsz;
1182 	uint32_t			head, tail, next_tail, qsize_msk;
1183 
1184 	if (ep->qp_state == IBT_STATE_RESET) {
1185 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1186 		    "post_recv: invalid qp_state %d\n", ep->qp_state);
1187 		return (DAT_INVALID_STATE);
1188 	}
1189 	qp = ep->qp_handle;
1190 
1191 	/* Grab the lock for the WRID list */
1192 	dapl_os_lock(&qp->qp_rq_wqhdr->wq_wrid_lock->wrl_lock);
1193 	wridlist  = qp->qp_rq_wqhdr->wq_wrid_post;
1194 
1195 	/* Save away some initial QP state */
1196 	qsize_msk = qp->qp_rq_wqhdr->wq_size - 1;
1197 	tail	  = qp->qp_rq_wqhdr->wq_tail;
1198 	head	  = qp->qp_rq_wqhdr->wq_head;
1199 
1200 	/*
1201 	 * For the ibt_recv_wr_t passed in, parse the request and build a
1202 	 * Recv WQE. Link the WQE with the previous WQE and ring the
1203 	 * door bell.
1204 	 */
1205 
1206 	/*
1207 	 * Check for "queue full" condition.  If the queue is already full,
1208 	 * then no more WQEs can be posted. So return an error.
1209 	 */
1210 	if (qp->qp_rq_wqhdr->wq_full != 0) {
1211 		dapl_os_unlock(&qp->qp_rq_wqhdr->wq_wrid_lock->wrl_lock);
1212 		return (DAT_INSUFFICIENT_RESOURCES);
1213 	}
1214 
1215 	/*
1216 	 * Increment the "tail index" and check for "queue
1217 	 * full" condition.  If we detect that the current
1218 	 * work request is going to fill the work queue, then
1219 	 * we mark this condition and continue.
1220 	 */
1221 	next_tail = (tail + 1) & qsize_msk;
1222 	if (next_tail == head) {
1223 		qp->qp_rq_wqhdr->wq_full = 1;
1224 	}
1225 
1226 	/* Get the descriptor (IO Address) of the WQE to be built */
1227 	desc = TAVOR_QP_RQ_DESC(qp, tail);
1228 	/* The user virtual address of the WQE to be built */
1229 	wqe_addr = TAVOR_QP_RQ_ENTRY(qp, tail);
1230 
1231 	/*
1232 	 * Call tavor_wqe_recv_build() to build the WQE at the given
1233 	 * address. This routine uses the information in the
1234 	 * ibt_recv_wr_t and returns the size of the WQE.
1235 	 */
1236 	status = dapli_arbel_wqe_recv_build(qp, wr, wqe_addr, &desc_sz);
1237 	if (status != DAT_SUCCESS) {
1238 		dapl_os_unlock(&qp->qp_rq_wqhdr->wq_wrid_lock->wrl_lock);
1239 		return (DAT_INTERNAL_ERROR);
1240 	}
1241 
1242 	/*
1243 	 * Add a WRID entry to the WRID list.  Need to calculate the
1244 	 * "wqeaddr" and "signaled_dbd" values to pass to
1245 	 * dapli_tavor_wrid_add_entry().
1246 	 * Note: all Recv WQEs are essentially "signaled"
1247 	 */
1248 	wqeaddrsz = TAVOR_QP_WQEADDRSZ(desc, 0);
1249 	dapli_tavor_wrid_add_entry(qp->qp_rq_wqhdr, wr->wr_id, wqeaddrsz,
1250 	    (uint32_t)TAVOR_WRID_ENTRY_SIGNALED);
1251 
1252 	/*
1253 	 * Now if the WRID tail entry is non-NULL, then this
1254 	 * represents the entry to which we are chaining the
1255 	 * new entries.  Since we are going to ring the
1256 	 * doorbell for this WQE, we want set its "dbd" bit.
1257 	 *
1258 	 * On the other hand, if the tail is NULL, even though
1259 	 * we will have rung the doorbell for the previous WQE
1260 	 * (for the hardware's sake) it is irrelevant to our
1261 	 * purposes (for tracking WRIDs) because we know the
1262 	 * request must have already completed.
1263 	 */
1264 	wre_last = wridlist->wl_wre_old_tail;
1265 	if (wre_last != NULL) {
1266 		wre_last->wr_signaled_dbd |= TAVOR_WRID_ENTRY_DOORBELLED;
1267 	}
1268 
1269 	/* Update some of the state in the QP */
1270 	qp->qp_rq_lastwqeaddr	 = wqe_addr;
1271 	qp->qp_rq_wqhdr->wq_tail = next_tail;
1272 
1273 	/* Update the doorbell record */
1274 	qp->qp_rq_counter++;
1275 	(qp->qp_rq_dbp)[0] = HTOBE_32(qp->qp_rq_counter);
1276 
1277 	dapl_os_unlock(&qp->qp_rq_wqhdr->wq_wrid_lock->wrl_lock);
1278 
1279 	return (DAT_SUCCESS);
1280 }
1281 
1282 /*
1283  * dapli_arbel_post_srq()
1284  */
1285 /* ARGSUSED */
1286 static DAT_RETURN
dapli_arbel_post_srq(DAPL_SRQ * srqp,ibt_recv_wr_t * wr,boolean_t ns)1287 dapli_arbel_post_srq(DAPL_SRQ *srqp, ibt_recv_wr_t *wr, boolean_t ns)
1288 {
1289 	ib_srq_handle_t			srq;
1290 	DAT_RETURN			status;
1291 	uint32_t			desc;
1292 	uint64_t			*wqe_addr;
1293 	uint32_t			head, next_head, qsize_msk;
1294 	uint32_t			wqe_index;
1295 
1296 
1297 	srq = srqp->srq_handle;
1298 
1299 	/* Grab the lock for the WRID list */
1300 	dapl_os_lock(&srq->srq_wridlist->wl_lock->wrl_lock);
1301 
1302 	/*
1303 	 * For the ibt_recv_wr_t passed in, parse the request and build a
1304 	 * Recv WQE. Link the WQE with the previous WQE and ring the
1305 	 * door bell.
1306 	 */
1307 
1308 	/*
1309 	 * Check for "queue full" condition.  If the queue is already full,
1310 	 * ie. there are no free entries, then no more WQEs can be posted.
1311 	 * So return an error.
1312 	 */
1313 	if (srq->srq_wridlist->wl_freel_entries == 0) {
1314 		dapl_os_unlock(&srq->srq_wridlist->wl_lock->wrl_lock);
1315 		return (DAT_INSUFFICIENT_RESOURCES);
1316 	}
1317 
1318 	/* Save away some initial SRQ state */
1319 	qsize_msk = srq->srq_wridlist->wl_size - 1;
1320 	head	  = srq->srq_wridlist->wl_freel_head;
1321 
1322 	next_head = (head + 1) & qsize_msk;
1323 
1324 	/* Get the descriptor (IO Address) of the WQE to be built */
1325 	desc = srq->srq_wridlist->wl_free_list[head];
1326 
1327 	wqe_index = TAVOR_SRQ_WQ_INDEX(srq->srq_wq_desc_addr, desc,
1328 	    srq->srq_wq_wqesz);
1329 
1330 	/* The user virtual address of the WQE to be built */
1331 	wqe_addr = TAVOR_SRQ_WQ_ENTRY(srq, wqe_index);
1332 
1333 	/*
1334 	 * Call dapli_arbel_wqe_srq_build() to build the WQE at the given
1335 	 * address. This routine uses the information in the
1336 	 * ibt_recv_wr_t and returns the size of the WQE.
1337 	 */
1338 	status = dapli_arbel_wqe_srq_build(srq, wr, wqe_addr);
1339 	if (status != DAT_SUCCESS) {
1340 		dapl_os_unlock(&srq->srq_wridlist->wl_lock->wrl_lock);
1341 		return (status);
1342 	}
1343 
1344 	/*
1345 	 * Add a WRID entry to the WRID list.
1346 	 */
1347 	dapli_tavor_wrid_add_entry_srq(srq, wr->wr_id, wqe_index);
1348 
1349 #if 0
1350 	if (srq->srq_wq_lastwqeindex == -1) {
1351 		last_wqe_addr = NULL;
1352 	} else {
1353 		last_wqe_addr = TAVOR_SRQ_WQ_ENTRY(srq,
1354 		    srq->srq_wq_lastwqeindex);
1355 	}
1356 	/*
1357 	 * Now link the chain to the old chain (if there was one)
1358 	 * and update the wqe_counter in the doorbell record.
1359 	 */
1360 XXX
1361 	dapli_tavor_wqe_srq_linknext(wqe_addr, ns, desc, last_wqe_addr);
1362 #endif
1363 
1364 	/* Update some of the state in the SRQ */
1365 	srq->srq_wq_lastwqeindex	 = wqe_index;
1366 	srq->srq_wridlist->wl_freel_head = next_head;
1367 	srq->srq_wridlist->wl_freel_entries--;
1368 	dapl_os_assert(srq->srq_wridlist->wl_freel_entries <=
1369 	    srq->srq_wridlist->wl_size);
1370 
1371 	/* Update the doorbell record */
1372 	srq->srq_counter++;
1373 	(srq->srq_dbp)[0] = HTOBE_32(srq->srq_counter);
1374 
1375 	dapl_os_unlock(&srq->srq_wridlist->wl_lock->wrl_lock);
1376 
1377 	return (DAT_SUCCESS);
1378 }
1379 
1380 /*
1381  * dapli_arbel_cq_srq_entries_flush()
1382  */
1383 static void
dapli_arbel_cq_srq_entries_flush(ib_qp_handle_t qp)1384 dapli_arbel_cq_srq_entries_flush(ib_qp_handle_t qp)
1385 {
1386 	ib_cq_handle_t		cq;
1387 	dapls_tavor_workq_hdr_t	*wqhdr;
1388 	tavor_hw_cqe_t		*cqe;
1389 	tavor_hw_cqe_t		*next_cqe;
1390 	uint32_t		cons_indx, tail_cons_indx, wrap_around_mask;
1391 	uint32_t		new_indx, check_indx, indx;
1392 	int			cqe_qpnum, cqe_type;
1393 	int			outstanding_cqes, removed_cqes;
1394 	int			i;
1395 
1396 	/* ASSERT(MUTEX_HELD(&qp->qp_rq_cqhdl->cq_lock)); */
1397 
1398 	cq = qp->qp_rq_cqhdl;
1399 	wqhdr = qp->qp_rq_wqhdr;
1400 
1401 	dapl_os_assert(wqhdr->wq_wrid_post != NULL);
1402 	dapl_os_assert(wqhdr->wq_wrid_post->wl_srq_en != 0);
1403 
1404 	/* Get the consumer index */
1405 	cons_indx = cq->cq_consindx;
1406 
1407 	/*
1408 	 * Calculate the wrap around mask.  Note: This operation only works
1409 	 * because all Tavor completion queues have power-of-2 sizes
1410 	 */
1411 	wrap_around_mask = (cq->cq_size - 1);
1412 
1413 	/* Calculate the pointer to the first CQ entry */
1414 	cqe = &cq->cq_addr[cons_indx];
1415 
1416 	/*
1417 	 * Loop through the CQ looking for entries owned by software.  If an
1418 	 * entry is owned by software then we increment an 'outstanding_cqes'
1419 	 * count to know how many entries total we have on our CQ.  We use this
1420 	 * value further down to know how many entries to loop through looking
1421 	 * for our same QP number.
1422 	 */
1423 	outstanding_cqes = 0;
1424 	tail_cons_indx = cons_indx;
1425 	while (TAVOR_CQE_OWNER_IS_SW(cqe)) {
1426 		/* increment total cqes count */
1427 		outstanding_cqes++;
1428 
1429 		/* increment the consumer index */
1430 		tail_cons_indx = (tail_cons_indx + 1) & wrap_around_mask;
1431 
1432 		/* update the pointer to the next cq entry */
1433 		cqe = &cq->cq_addr[tail_cons_indx];
1434 	}
1435 
1436 	/*
1437 	 * Using the 'tail_cons_indx' that was just set, we now know how many
1438 	 * total CQEs possible there are.  Set the 'check_indx' and the
1439 	 * 'new_indx' to the last entry identified by 'tail_cons_indx'
1440 	 */
1441 	check_indx = new_indx = (tail_cons_indx - 1) & wrap_around_mask;
1442 
1443 	for (i = 0; i < outstanding_cqes; i++) {
1444 		cqe = &cq->cq_addr[check_indx];
1445 
1446 		/* Grab QP number from CQE */
1447 		cqe_qpnum = TAVOR_CQE_QPNUM_GET(cqe);
1448 		cqe_type = TAVOR_CQE_SENDRECV_GET(cqe);
1449 
1450 		/*
1451 		 * If the QP number is the same in the CQE as the QP that we
1452 		 * have on this SRQ, then we must free up the entry off the
1453 		 * SRQ.  We also make sure that the completion type is of the
1454 		 * 'TAVOR_COMPLETION_RECV' type.  So any send completions on
1455 		 * this CQ will be left as-is.  The handling of returning
1456 		 * entries back to HW ownership happens further down.
1457 		 */
1458 		if (cqe_qpnum == qp->qp_num &&
1459 		    cqe_type == TAVOR_COMPLETION_RECV) {
1460 			/* Add back to SRQ free list */
1461 			(void) dapli_tavor_wrid_find_match_srq(
1462 			    wqhdr->wq_wrid_post, cqe);
1463 		} else {
1464 			/* Do Copy */
1465 			if (check_indx != new_indx) {
1466 				next_cqe = &cq->cq_addr[new_indx];
1467 				/*
1468 				 * Copy the CQE into the "next_cqe"
1469 				 * pointer.
1470 				 */
1471 				(void) dapl_os_memcpy(next_cqe, cqe,
1472 				    sizeof (tavor_hw_cqe_t));
1473 			}
1474 			new_indx = (new_indx - 1) & wrap_around_mask;
1475 		}
1476 		/* Move index to next CQE to check */
1477 		check_indx = (check_indx - 1) & wrap_around_mask;
1478 	}
1479 
1480 	/* Initialize removed cqes count */
1481 	removed_cqes = 0;
1482 
1483 	/* If an entry was removed */
1484 	if (check_indx != new_indx) {
1485 
1486 		/*
1487 		 * Set current pointer back to the beginning consumer index.
1488 		 * At this point, all unclaimed entries have been copied to the
1489 		 * index specified by 'new_indx'.  This 'new_indx' will be used
1490 		 * as the new consumer index after we mark all freed entries as
1491 		 * having HW ownership.  We do that here.
1492 		 */
1493 
1494 		/* Loop through all entries until we reach our new pointer */
1495 		for (indx = cons_indx; indx <= new_indx;
1496 		    indx = (indx + 1) & wrap_around_mask) {
1497 			removed_cqes++;
1498 			cqe = &cq->cq_addr[indx];
1499 
1500 			/* Reset entry to hardware ownership */
1501 			TAVOR_CQE_OWNER_SET_HW(cqe);
1502 		}
1503 	}
1504 
1505 	/*
1506 	 * Update consumer index to be the 'new_indx'.  This moves it past all
1507 	 * removed entries.  Because 'new_indx' is pointing to the last
1508 	 * previously valid SW owned entry, we add 1 to point the cons_indx to
1509 	 * the first HW owned entry.
1510 	 */
1511 	cons_indx = (new_indx + 1) & wrap_around_mask;
1512 
1513 	/*
1514 	 * Now we only ring the doorbell (to update the consumer index) if
1515 	 * we've actually consumed a CQ entry.  If we found no QP number
1516 	 * matches above, then we would not have removed anything.  So only if
1517 	 * something was removed do we ring the doorbell.
1518 	 */
1519 	if ((removed_cqes != 0) && (cq->cq_consindx != cons_indx)) {
1520 		/*
1521 		 * Update the consumer index in both the CQ handle and the
1522 		 * doorbell record.
1523 		 */
1524 		cq->cq_consindx = cons_indx;
1525 		dapli_arbel_cq_update_ci(cq, cq->cq_poll_dbp);
1526 	}
1527 }
1528 
1529 static void
dapli_arbel_rq_prelink(caddr_t first,uint32_t desc_off,uint32_t wqesz,uint32_t numwqe,uint32_t nds)1530 dapli_arbel_rq_prelink(caddr_t first, uint32_t desc_off, uint32_t wqesz,
1531     uint32_t numwqe, uint32_t nds)
1532 {
1533 	int i;
1534 	uint32_t *p = (uint32_t *)(uintptr_t)first;
1535 	uint32_t off = desc_off;
1536 	uint32_t pincr = wqesz / sizeof (uint32_t);
1537 	ibt_wr_ds_t sgl;
1538 
1539 	sgl.ds_va = (ib_vaddr_t)0;
1540 	sgl.ds_key = ARBEL_WQE_SGL_INVALID_LKEY;
1541 	sgl.ds_len = (ib_msglen_t)0;
1542 
1543 	for (i = 0; i < numwqe - 1; i++, p += pincr) {
1544 		off += wqesz;
1545 		p[0] = HTOBE_32(off);	/* link curr to next */
1546 		p[1] = nds;		/* nds is 0 for SRQ */
1547 		TAVOR_WQE_BUILD_DATA_SEG((void *)&p[2], &sgl);
1548 	}
1549 	p[0] = HTOBE_32(desc_off); /* link last to first */
1550 	p[1] = nds;
1551 	TAVOR_WQE_BUILD_DATA_SEG((void *)&p[2], &sgl);
1552 }
1553 
1554 static void
dapli_arbel_sq_prelink(caddr_t first,uint32_t desc_off,uint32_t wqesz,uint32_t numwqe)1555 dapli_arbel_sq_prelink(caddr_t first, uint32_t desc_off, uint32_t wqesz,
1556     uint32_t numwqe)
1557 {
1558 	int i;
1559 	uint32_t *p = (uint32_t *)(uintptr_t)first;
1560 	uint32_t off = desc_off;
1561 	uint32_t pincr = wqesz / sizeof (uint32_t);
1562 
1563 	for (i = 0; i < numwqe - 1; i++, p += pincr) {
1564 		off += wqesz;
1565 		p[0] = HTOBE_32(off);	/* link curr to next */
1566 	}
1567 	p[0] = HTOBE_32(desc_off); /* link last to first */
1568 }
1569 
1570 static void
dapli_arbel_qp_init(ib_qp_handle_t qp)1571 dapli_arbel_qp_init(ib_qp_handle_t qp)
1572 {
1573 	(qp->qp_sq_dbp)[1] = HTOBE_32((qp->qp_num << 8) | ARBEL_DBR_SQ);
1574 	if (qp->qp_srq_enabled == 0) {
1575 		(qp->qp_rq_dbp)[1] = HTOBE_32((qp->qp_num << 8) | ARBEL_DBR_RQ);
1576 
1577 		/* pre-link the whole receive queue */
1578 		dapli_arbel_rq_prelink(qp->qp_rq_buf, qp->qp_rq_desc_addr,
1579 		    qp->qp_rq_wqesz, qp->qp_rq_numwqe,
1580 		    HTOBE_32(qp->qp_rq_wqesz >> 4));
1581 	}
1582 	dapli_arbel_sq_prelink(qp->qp_sq_buf, qp->qp_sq_desc_addr,
1583 	    qp->qp_sq_wqesz, qp->qp_sq_numwqe);
1584 	qp->qp_sq_lastwqeaddr = (uint64_t *)((uintptr_t)qp->qp_sq_buf +
1585 	    ((qp->qp_sq_numwqe - 1) * qp->qp_sq_wqesz));
1586 	qp->qp_rq_counter = 0;
1587 	qp->qp_sq_counter = 0;
1588 }
1589 
1590 static void
dapli_arbel_cq_init(ib_cq_handle_t cq)1591 dapli_arbel_cq_init(ib_cq_handle_t cq)
1592 {
1593 	(cq->cq_poll_dbp)[1] =
1594 	    HTOBE_32((cq->cq_num << 8) | ARBEL_DBR_CQ_SET_CI);
1595 	(cq->cq_arm_dbp)[1] =
1596 	    HTOBE_32((cq->cq_num << 8) | ARBEL_DBR_CQ_ARM | 0x8);
1597 	/* cq_resize -- needs testing */
1598 }
1599 
1600 static void
dapli_arbel_srq_init(ib_srq_handle_t srq)1601 dapli_arbel_srq_init(ib_srq_handle_t srq)
1602 {
1603 	(srq->srq_dbp)[1] =
1604 	    HTOBE_32((srq->srq_num << 8) | ARBEL_DBR_SRQ);
1605 
1606 	/* pre-link the whole shared receive queue */
1607 	dapli_arbel_rq_prelink(srq->srq_addr, srq->srq_wq_desc_addr,
1608 	    srq->srq_wq_wqesz, srq->srq_wq_numwqe, 0);
1609 	srq->srq_counter = 0;
1610 
1611 	/* needs testing */
1612 }
1613 
1614 void
dapls_init_funcs_arbel(DAPL_HCA * hca_ptr)1615 dapls_init_funcs_arbel(DAPL_HCA *hca_ptr)
1616 {
1617 	hca_ptr->post_send = dapli_arbel_post_send;
1618 	hca_ptr->post_recv = dapli_arbel_post_recv;
1619 	hca_ptr->post_srq = dapli_arbel_post_srq;
1620 	hca_ptr->cq_peek = dapli_arbel_cq_peek;
1621 	hca_ptr->cq_poll = dapli_arbel_cq_poll;
1622 	hca_ptr->cq_poll_one = dapli_arbel_cq_poll_one;
1623 	hca_ptr->cq_notify = dapli_arbel_cq_notify;
1624 	hca_ptr->srq_flush = dapli_arbel_cq_srq_entries_flush;
1625 	hca_ptr->qp_init = dapli_arbel_qp_init;
1626 	hca_ptr->cq_init = dapli_arbel_cq_init;
1627 	hca_ptr->srq_init = dapli_arbel_srq_init;
1628 	hca_ptr->hermon_resize_cq = 0;
1629 }
1630