1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * hermon_wr.c
28 * Hermon Work Request Processing Routines
29 *
30 * Implements all the routines necessary to provide the PostSend(),
31 * PostRecv() and PostSRQ() verbs. Also contains all the code
32 * necessary to implement the Hermon WRID tracking mechanism.
33 */
34
35 #include <sys/types.h>
36 #include <sys/conf.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/modctl.h>
40 #include <sys/avl.h>
41
42 #include <sys/ib/adapters/hermon/hermon.h>
43
44 static uint32_t hermon_wr_get_immediate(ibt_send_wr_t *wr);
45 static int hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr);
46 static int hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp,
47 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size);
48 static int hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp,
49 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size);
50 static void hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp);
51 static int hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp,
52 ibt_recv_wr_t *wr, uint64_t *desc);
53 static int hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq,
54 ibt_recv_wr_t *wr, uint64_t *desc);
55 static hermon_workq_avl_t *hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn,
56 uint_t send_or_recv);
57 static void hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl);
58 static void hermon_cq_workq_remove(hermon_cqhdl_t cq,
59 hermon_workq_avl_t *wqavl);
60
61 static ibt_wr_ds_t null_sgl = { 0, 0x00000100, 0 };
62
63 /*
64 * Add ability to try to debug RDMA_READ/RDMA_WRITE failures.
65 *
66 * 0x1 - print rkey used during post_send
67 * 0x2 - print sgls used during post_send
68 * 0x4 - print FMR comings and goings
69 */
70 int hermon_rdma_debug = 0x0;
71
72 static int
hermon_post_send_ud(hermon_state_t * state,hermon_qphdl_t qp,ibt_send_wr_t * wr,uint_t num_wr,uint_t * num_posted)73 hermon_post_send_ud(hermon_state_t *state, hermon_qphdl_t qp,
74 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted)
75 {
76 hermon_hw_snd_wqe_ud_t *ud;
77 hermon_workq_hdr_t *wq;
78 hermon_ahhdl_t ah;
79 ibt_wr_rfci_send_t *rfci;
80 ibt_wr_init_send_t *is;
81 ibt_ud_dest_t *dest;
82 uint64_t *desc;
83 uint32_t desc_sz;
84 uint32_t signaled_dbd, solicited;
85 uint32_t head, tail, next_tail, qsize_msk;
86 uint32_t hdrmwqes;
87 uint32_t nopcode, fence, immed_data = 0;
88 hermon_hw_wqe_sgl_t *ds, *old_ds;
89 ibt_wr_ds_t *sgl;
90 int nds;
91 int i, j, last_ds, num_ds, status;
92 uint32_t *wqe_start;
93 int sectperwqe;
94 uint_t posted_cnt = 0;
95 int total_len, strong_order, fc_bits, cksum;
96
97
98 /* initialize the FMA retry loop */
99 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num);
100
101 ASSERT(MUTEX_HELD(&qp->qp_sq_lock));
102 _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&qp->qp_sq_lock))
103
104 /* Grab the lock for the WRID list */
105 membar_consumer();
106
107 /* Save away some initial QP state */
108 wq = qp->qp_sq_wqhdr;
109 qsize_msk = wq->wq_mask;
110 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */
111 sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2);
112
113 tail = wq->wq_tail;
114 head = wq->wq_head;
115 status = DDI_SUCCESS;
116
117 post_next:
118 /*
119 * Check for "queue full" condition. If the queue
120 * is already full, then no more WQEs can be posted.
121 * So break out, ring a doorbell (if necessary) and
122 * return an error
123 */
124 if (wq->wq_full != 0) {
125 status = IBT_QP_FULL;
126 goto done;
127 }
128
129 next_tail = (tail + 1) & qsize_msk;
130 if (((tail + hdrmwqes) & qsize_msk) == head) {
131 wq->wq_full = 1;
132 }
133
134 desc = HERMON_QP_SQ_ENTRY(qp, tail);
135
136 nds = wr->wr_nds;
137 sgl = wr->wr_sgl;
138 num_ds = 0;
139 strong_order = 0;
140 fc_bits = 0;
141 cksum = 0;
142
143 /*
144 * Build a Send or Send_LSO WQE
145 */
146 switch (wr->wr_opcode) {
147 case IBT_WRC_SEND_LSO:
148 if (wr->wr_trans != IBT_UD_SRV) {
149 status = IBT_QP_SRV_TYPE_INVALID;
150 goto done;
151 }
152 nopcode = HERMON_WQE_SEND_NOPCODE_LSO;
153 if (wr->wr_flags & IBT_WR_SEND_CKSUM)
154 cksum = 0x30;
155 if (wr->wr.ud_lso.lso_hdr_sz > 60) {
156 nopcode |= (1 << 6); /* ReRead bit must be set */
157 }
158 dest = wr->wr.ud_lso.lso_ud_dest;
159 ah = (hermon_ahhdl_t)dest->ud_ah;
160 if (ah == NULL) {
161 status = IBT_AH_HDL_INVALID;
162 goto done;
163 }
164 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc +
165 sizeof (hermon_hw_snd_wqe_ctrl_t));
166 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud +
167 sizeof (hermon_hw_snd_wqe_ud_t));
168 HERMON_WQE_BUILD_UD(qp, ud, ah, dest);
169
170 total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf;
171 if ((uintptr_t)ds + total_len + (nds * 16) >
172 (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz)) {
173 status = IBT_QP_SGL_LEN_INVALID;
174 goto done;
175 }
176 old_ds = ds;
177 bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)old_ds + 1,
178 wr->wr.ud_lso.lso_hdr_sz);
179 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len);
180 i = 0;
181 break;
182
183 case IBT_WRC_SEND:
184 nopcode = HERMON_WQE_SEND_NOPCODE_SEND;
185 if (qp->qp_serv_type == HERMON_QP_UD) {
186 if (wr->wr_trans != IBT_UD_SRV) {
187 status = IBT_QP_SRV_TYPE_INVALID;
188 goto done;
189 }
190 if (wr->wr_flags & IBT_WR_SEND_CKSUM)
191 cksum = 0x30;
192 dest = wr->wr.ud.udwr_dest;
193 } else if (qp->qp_serv_type == HERMON_QP_RFCI) {
194 if (wr->wr_trans != IBT_RFCI_SRV) {
195 status = IBT_QP_SRV_TYPE_INVALID;
196 goto done;
197 }
198 rfci = &wr->wr.fc.rfci_send;
199 if ((wr->wr_flags & IBT_WR_SEND_FC_CRC) != 0) {
200 nopcode |= (rfci->rfci_eof << 16);
201 fc_bits = 0x40; /* set FCRC */
202 }
203 dest = rfci->rfci_dest;
204 } else {
205 status = IBT_QP_OP_TYPE_INVALID;
206 goto done;
207 }
208 if (wr->wr_flags & IBT_WR_SEND_IMMED) {
209 /* "|=" changes 0xa to 0xb without touching FCEOF */
210 nopcode |= HERMON_WQE_SEND_NOPCODE_SENDI;
211 immed_data = wr->wr.ud.udwr_immed;
212 }
213 ah = (hermon_ahhdl_t)dest->ud_ah;
214 if (ah == NULL) {
215 status = IBT_AH_HDL_INVALID;
216 goto done;
217 }
218 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc +
219 sizeof (hermon_hw_snd_wqe_ctrl_t));
220 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud +
221 sizeof (hermon_hw_snd_wqe_ud_t));
222 HERMON_WQE_BUILD_UD(qp, ud, ah, dest);
223 i = 0;
224 break;
225
226 case IBT_WRC_INIT_SEND_FCMD:
227 if (qp->qp_serv_type != HERMON_QP_FCMND) {
228 status = IBT_QP_OP_TYPE_INVALID;
229 goto done;
230 }
231 if (wr->wr_trans != IBT_FCMD_SRV) {
232 status = IBT_QP_SRV_TYPE_INVALID;
233 goto done;
234 }
235 nopcode = HERMON_WQE_FCP_OPCODE_INIT_AND_SEND;
236 is = wr->wr.fc.fc_is;
237 dest = is->is_ctl.fc_dest;
238 ah = (hermon_ahhdl_t)dest->ud_ah;
239 if (ah == NULL) {
240 status = IBT_AH_HDL_INVALID;
241 goto done;
242 }
243 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc +
244 sizeof (hermon_hw_snd_wqe_ctrl_t));
245 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud +
246 sizeof (hermon_hw_snd_wqe_ud_t));
247 HERMON_WQE_BUILD_UD(qp, ud, ah, dest);
248 old_ds = ds;
249 /* move ds beyond the FCP-3 Init Segment */
250 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + 0x10);
251 i = 0;
252 break;
253
254 case IBT_WRC_FAST_REG_PMR:
255 {
256 hermon_hw_snd_wqe_frwr_t *frwr;
257
258 if (qp->qp_serv_type != HERMON_QP_FCMND) {
259 status = IBT_QP_OP_TYPE_INVALID;
260 goto done;
261 }
262 if (wr->wr_trans != IBT_FCMD_SRV) {
263 status = IBT_QP_SRV_TYPE_INVALID;
264 goto done;
265 }
266 nopcode = HERMON_WQE_SEND_NOPCODE_FRWR;
267 frwr = (hermon_hw_snd_wqe_frwr_t *)((uintptr_t)desc +
268 sizeof (hermon_hw_snd_wqe_ctrl_t));
269 HERMON_WQE_BUILD_FRWR(qp, frwr, wr->wr.fc.reg_pmr);
270 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)frwr +
271 sizeof (hermon_hw_snd_wqe_frwr_t));
272 nds = 0;
273 strong_order = 0x80;
274 break;
275 }
276
277 #if 0
278 /* firmware does not support this */
279 case IBT_WRC_LOCAL_INVALIDATE:
280 {
281 hermon_hw_snd_wqe_local_inv_t *li;
282
283 if (qp->qp_serv_type != HERMON_QP_FCMND) {
284 status = IBT_QP_OP_TYPE_INVALID;
285 goto done;
286 }
287 if (wr->wr_trans != IBT_FCMD_SRV) {
288 status = IBT_QP_SRV_TYPE_INVALID;
289 goto done;
290 }
291 nopcode = HERMON_WQE_SEND_NOPCODE_LCL_INV;
292 li = (hermon_hw_snd_wqe_local_inv_t *)((uintptr_t)desc +
293 sizeof (hermon_hw_snd_wqe_ctrl_t));
294 HERMON_WQE_BUILD_LI(qp, li, wr->wr.fc.li);
295 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)li +
296 sizeof (hermon_hw_snd_wqe_local_inv_t));
297 nds = 0;
298 strong_order = 0x80;
299 break;
300 }
301 #endif
302 default:
303 status = IBT_QP_OP_TYPE_INVALID;
304 goto done;
305 }
306
307 if (nds > qp->qp_sq_sgl) {
308 status = IBT_QP_SGL_LEN_INVALID;
309 goto done;
310 }
311 for (last_ds = num_ds, j = i; j < nds; j++) {
312 if (sgl[j].ds_len != 0)
313 last_ds++; /* real last ds of wqe to fill */
314 }
315 desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4;
316 for (j = nds; --j >= i; ) {
317 if (sgl[j].ds_len == 0) {
318 continue;
319 }
320
321 /*
322 * Fill in the Data Segment(s) for the current WQE, using the
323 * information contained in the scatter-gather list of the
324 * work request.
325 */
326 last_ds--;
327 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]);
328 }
329
330 membar_producer();
331
332 if (wr->wr_opcode == IBT_WRC_SEND_LSO) {
333 HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss,
334 wr->wr.ud_lso.lso_hdr_sz);
335 } else if (wr->wr_opcode == IBT_WRC_INIT_SEND_FCMD) {
336 /* This sits in the STAMP, so must be set after setting SGL */
337 HERMON_WQE_BUILD_FCP3_INIT(old_ds, is->is_ctl.fc_frame_ctrl,
338 is->is_cs_priority, is->is_tx_seq_id, is->is_fc_mtu,
339 is->is_dest_id, is->is_op, is->is_rem_exch,
340 is->is_exch_qp_idx);
341
342 /* The following will be used in HERMON_WQE_SET_CTRL_SEGMENT */
343 /* SIT bit in FCP-3 ctrl segment */
344 desc_sz |= (is->is_ctl.fc_frame_ctrl & IBT_FCTL_SIT) ? 0x80 : 0;
345 /* LS bit in FCP-3 ctrl segment */
346 fc_bits |= (is->is_ctl.fc_frame_ctrl & IBT_FCTL_LAST_SEQ) ?
347 0x10000 : 0;
348 fc_bits |= ((is->is_ctl.fc_routing_ctrl & 0xF) << 20) |
349 (is->is_ctl.fc_seq_id << 24);
350 immed_data = is->is_ctl.fc_parameter;
351 }
352
353 fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0;
354
355 signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) ||
356 (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 0xC : 0;
357
358 solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 0x2 : 0;
359
360 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data,
361 solicited, signaled_dbd, cksum, qp, strong_order, fc_bits);
362
363 wq->wq_wrid[tail] = wr->wr_id;
364
365 tail = next_tail;
366
367 /* Update some of the state in the QP */
368 wq->wq_tail = tail;
369
370 membar_producer();
371
372 /* Now set the ownership bit and opcode (first dword). */
373 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode);
374
375 posted_cnt++;
376 if (--num_wr > 0) {
377 /* do the invalidate of the headroom */
378 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp,
379 (tail + hdrmwqes) & qsize_msk);
380 for (i = 16; i < sectperwqe; i += 16) {
381 wqe_start[i] = 0xFFFFFFFF;
382 }
383
384 wr++;
385 goto post_next;
386 }
387 done:
388 if (posted_cnt != 0) {
389 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
390
391 membar_producer();
392
393 /* the FMA retry loop starts for Hermon doorbell register. */
394 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt,
395 fm_status, fm_test_num);
396
397 HERMON_UAR_DOORBELL(state, uarhdl,
398 (uint64_t *)(void *)&state->hs_uar->send,
399 (uint64_t)qp->qp_ring);
400
401 /* the FMA retry loop ends. */
402 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt,
403 fm_status, fm_test_num);
404
405 /* do the invalidate of the headroom */
406 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp,
407 (tail + hdrmwqes) & qsize_msk);
408 for (i = 16; i < sectperwqe; i += 16) {
409 wqe_start[i] = 0xFFFFFFFF;
410 }
411 }
412 if (num_posted != NULL)
413 *num_posted = posted_cnt;
414
415 mutex_exit(&qp->qp_sq_lock);
416
417 return (status);
418
419 pio_error:
420 mutex_exit(&qp->qp_sq_lock);
421 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
422 return (ibc_get_ci_failure(0));
423 }
424
425 static int
hermon_post_send_rc(hermon_state_t * state,hermon_qphdl_t qp,ibt_send_wr_t * wr,uint_t num_wr,uint_t * num_posted)426 hermon_post_send_rc(hermon_state_t *state, hermon_qphdl_t qp,
427 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted)
428 {
429 uint64_t *desc;
430 hermon_workq_hdr_t *wq;
431 uint32_t desc_sz;
432 uint32_t signaled_dbd, solicited;
433 uint32_t head, tail, next_tail, qsize_msk;
434 uint32_t hdrmwqes;
435 int status;
436 uint32_t nopcode, fence, immed_data = 0;
437 hermon_hw_snd_wqe_remaddr_t *rc;
438 hermon_hw_snd_wqe_atomic_t *at;
439 hermon_hw_snd_wqe_bind_t *bn;
440 hermon_hw_snd_wqe_frwr_t *frwr;
441 hermon_hw_snd_wqe_local_inv_t *li;
442 hermon_hw_wqe_sgl_t *ds;
443 ibt_wr_ds_t *sgl;
444 int nds;
445 int i, last_ds, num_ds;
446 uint32_t *wqe_start;
447 int sectperwqe;
448 uint_t posted_cnt = 0;
449 int strong_order;
450 int print_rdma;
451 int rlen;
452 uint32_t rkey;
453 uint64_t raddr;
454
455 /* initialize the FMA retry loop */
456 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num);
457
458 ASSERT(MUTEX_HELD(&qp->qp_sq_lock));
459 _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&qp->qp_sq_lock))
460
461 /* Save away some initial QP state */
462 wq = qp->qp_sq_wqhdr;
463 qsize_msk = wq->wq_mask;
464 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */
465 sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2);
466
467 tail = wq->wq_tail;
468 head = wq->wq_head;
469 status = DDI_SUCCESS;
470
471 post_next:
472 print_rdma = 0;
473 rlen = 0;
474 strong_order = 0;
475
476 /*
477 * Check for "queue full" condition. If the queue
478 * is already full, then no more WQEs can be posted.
479 * So break out, ring a doorbell (if necessary) and
480 * return an error
481 */
482 if (wq->wq_full != 0) {
483 status = IBT_QP_FULL;
484 goto done;
485 }
486 next_tail = (tail + 1) & qsize_msk;
487 if (((tail + hdrmwqes) & qsize_msk) == head) {
488 wq->wq_full = 1;
489 }
490
491 desc = HERMON_QP_SQ_ENTRY(qp, tail);
492
493 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc +
494 sizeof (hermon_hw_snd_wqe_ctrl_t));
495 nds = wr->wr_nds;
496 sgl = wr->wr_sgl;
497 num_ds = 0;
498 if (wr->wr_trans != IBT_RC_SRV) {
499 status = IBT_QP_SRV_TYPE_INVALID;
500 goto done;
501 }
502
503 /*
504 * Validate the operation type. For RC requests, we allow
505 * "Send", "RDMA Read", "RDMA Write", various "Atomic"
506 * operations, and memory window "Bind"
507 */
508 switch (wr->wr_opcode) {
509 default:
510 status = IBT_QP_OP_TYPE_INVALID;
511 goto done;
512
513 case IBT_WRC_SEND:
514 if (wr->wr_flags & IBT_WR_SEND_REMOTE_INVAL) {
515 nopcode = HERMON_WQE_SEND_NOPCODE_SND_INV;
516 immed_data = wr->wr.rc.rcwr.send_inval;
517 } else if (wr->wr_flags & IBT_WR_SEND_IMMED) {
518 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI;
519 immed_data = wr->wr.rc.rcwr.send_immed;
520 } else {
521 nopcode = HERMON_WQE_SEND_NOPCODE_SEND;
522 }
523 break;
524
525 /*
526 * If this is an RDMA Read or RDMA Write request, then fill
527 * in the "Remote Address" header fields.
528 */
529 case IBT_WRC_RDMAW:
530 if (wr->wr_flags & IBT_WR_SEND_IMMED) {
531 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAWI;
532 immed_data = wr->wr.rc.rcwr.rdma.rdma_immed;
533 } else {
534 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW;
535 }
536 /* FALLTHROUGH */
537 case IBT_WRC_RDMAR:
538 if (wr->wr_opcode == IBT_WRC_RDMAR)
539 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR;
540 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
541 sizeof (hermon_hw_snd_wqe_ctrl_t));
542
543 /*
544 * Build the Remote Address Segment for the WQE, using
545 * the information from the RC work request.
546 */
547 HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma);
548
549 if (hermon_rdma_debug) {
550 print_rdma = hermon_rdma_debug;
551 rkey = wr->wr.rc.rcwr.rdma.rdma_rkey;
552 raddr = wr->wr.rc.rcwr.rdma.rdma_raddr;
553 }
554
555 /* Update "ds" for filling in Data Segments (below) */
556 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc +
557 sizeof (hermon_hw_snd_wqe_remaddr_t));
558 break;
559
560 /*
561 * If this is one of the Atomic type operations (i.e
562 * Compare-Swap or Fetch-Add), then fill in both the "Remote
563 * Address" header fields and the "Atomic" header fields.
564 */
565 case IBT_WRC_CSWAP:
566 nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS;
567 /* FALLTHROUGH */
568 case IBT_WRC_FADD:
569 if (wr->wr_opcode == IBT_WRC_FADD)
570 nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA;
571 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
572 sizeof (hermon_hw_snd_wqe_ctrl_t));
573 at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc +
574 sizeof (hermon_hw_snd_wqe_remaddr_t));
575
576 /*
577 * Build the Remote Address and Atomic Segments for
578 * the WQE, using the information from the RC Atomic
579 * work request.
580 */
581 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr);
582 HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic);
583
584 /* Update "ds" for filling in Data Segments (below) */
585 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at +
586 sizeof (hermon_hw_snd_wqe_atomic_t));
587
588 /*
589 * Update "nds" and "sgl" because Atomic requests have
590 * only a single Data Segment.
591 */
592 nds = 1;
593 sgl = wr->wr_sgl;
594 break;
595
596 /*
597 * If this is memory window Bind operation, then we call the
598 * hermon_wr_bind_check() routine to validate the request and
599 * to generate the updated RKey. If this is successful, then
600 * we fill in the WQE's "Bind" header fields.
601 */
602 case IBT_WRC_BIND:
603 nopcode = HERMON_WQE_SEND_NOPCODE_BIND;
604 status = hermon_wr_bind_check(state, wr);
605 if (status != DDI_SUCCESS)
606 goto done;
607
608 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc +
609 sizeof (hermon_hw_snd_wqe_ctrl_t));
610
611 /*
612 * Build the Bind Memory Window Segments for the WQE,
613 * using the information from the RC Bind memory
614 * window work request.
615 */
616 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind);
617
618 /*
619 * Update the "ds" pointer. Even though the "bind"
620 * operation requires no SGLs, this is necessary to
621 * facilitate the correct descriptor size calculations
622 * (below).
623 */
624 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn +
625 sizeof (hermon_hw_snd_wqe_bind_t));
626 nds = 0;
627 break;
628
629 case IBT_WRC_FAST_REG_PMR:
630 nopcode = HERMON_WQE_SEND_NOPCODE_FRWR;
631 frwr = (hermon_hw_snd_wqe_frwr_t *)((uintptr_t)desc +
632 sizeof (hermon_hw_snd_wqe_ctrl_t));
633 HERMON_WQE_BUILD_FRWR(qp, frwr, wr->wr.rc.rcwr.reg_pmr);
634 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)frwr +
635 sizeof (hermon_hw_snd_wqe_frwr_t));
636 nds = 0;
637 strong_order = 0x80;
638 break;
639
640 case IBT_WRC_LOCAL_INVALIDATE:
641 nopcode = HERMON_WQE_SEND_NOPCODE_LCL_INV;
642 li = (hermon_hw_snd_wqe_local_inv_t *)((uintptr_t)desc +
643 sizeof (hermon_hw_snd_wqe_ctrl_t));
644 HERMON_WQE_BUILD_LI(qp, li, wr->wr.rc.rcwr.li);
645 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)li +
646 sizeof (hermon_hw_snd_wqe_local_inv_t));
647 nds = 0;
648 strong_order = 0x80;
649 break;
650 }
651
652 /*
653 * Now fill in the Data Segments (SGL) for the Send WQE based
654 * on the values setup above (i.e. "sgl", "nds", and the "ds"
655 * pointer. Start by checking for a valid number of SGL entries
656 */
657 if (nds > qp->qp_sq_sgl) {
658 status = IBT_QP_SGL_LEN_INVALID;
659 goto done;
660 }
661
662 for (last_ds = num_ds, i = 0; i < nds; i++) {
663 if (sgl[i].ds_len != 0)
664 last_ds++; /* real last ds of wqe to fill */
665 }
666 desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4;
667 for (i = nds; --i >= 0; ) {
668 if (sgl[i].ds_len == 0) {
669 continue;
670 }
671 rlen += sgl[i].ds_len;
672 if (print_rdma & 0x2)
673 IBTF_DPRINTF_L2("rdma", "post: [%d]: laddr %llx "
674 "llen %x", i, sgl[i].ds_va, sgl[i].ds_len);
675
676 /*
677 * Fill in the Data Segment(s) for the current WQE, using the
678 * information contained in the scatter-gather list of the
679 * work request.
680 */
681 last_ds--;
682 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[i]);
683 }
684 /* ensure RDMA READ does not exceed HCA limit */
685 if ((wr->wr_opcode == IBT_WRC_RDMAR) && (desc_sz >
686 state->hs_ibtfinfo.hca_attr->hca_conn_rdma_read_sgl_sz + 2)) {
687 status = IBT_QP_SGL_LEN_INVALID;
688 goto done;
689 }
690
691 if (print_rdma & 0x1) {
692 IBTF_DPRINTF_L2("rdma", "post: indx %x rkey %x raddr %llx "
693 "total len %x", tail, rkey, raddr, rlen);
694 }
695
696 fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0;
697
698 signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) ||
699 (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 0xC : 0;
700
701 solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 0x2 : 0;
702
703 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data, solicited,
704 signaled_dbd, 0, qp, strong_order, 0);
705
706 wq->wq_wrid[tail] = wr->wr_id;
707
708 tail = next_tail;
709
710 /* Update some of the state in the QP */
711 wq->wq_tail = tail;
712
713 membar_producer();
714
715 /* Now set the ownership bit of the first one in the chain. */
716 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode);
717
718 posted_cnt++;
719 if (--num_wr > 0) {
720 /* do the invalidate of the headroom */
721 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp,
722 (tail + hdrmwqes) & qsize_msk);
723 for (i = 16; i < sectperwqe; i += 16) {
724 wqe_start[i] = 0xFFFFFFFF;
725 }
726
727 wr++;
728 goto post_next;
729 }
730 done:
731
732 if (posted_cnt != 0) {
733 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
734
735 membar_producer();
736
737 /* the FMA retry loop starts for Hermon doorbell register. */
738 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt,
739 fm_status, fm_test_num);
740
741 /* Ring the doorbell */
742 HERMON_UAR_DOORBELL(state, uarhdl,
743 (uint64_t *)(void *)&state->hs_uar->send,
744 (uint64_t)qp->qp_ring);
745
746 /* the FMA retry loop ends. */
747 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt,
748 fm_status, fm_test_num);
749
750 /* do the invalidate of the headroom */
751 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp,
752 (tail + hdrmwqes) & qsize_msk);
753 for (i = 16; i < sectperwqe; i += 16) {
754 wqe_start[i] = 0xFFFFFFFF;
755 }
756 }
757 /*
758 * Update the "num_posted" return value (if necessary).
759 * Then drop the locks and return success.
760 */
761 if (num_posted != NULL) {
762 *num_posted = posted_cnt;
763 }
764
765 mutex_exit(&qp->qp_sq_lock);
766 return (status);
767
768 pio_error:
769 mutex_exit(&qp->qp_sq_lock);
770 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
771 return (ibc_get_ci_failure(0));
772 }
773
774 /*
775 * hermon_post_send()
776 * Context: Can be called from interrupt or base context.
777 */
778 int
hermon_post_send(hermon_state_t * state,hermon_qphdl_t qp,ibt_send_wr_t * wr,uint_t num_wr,uint_t * num_posted)779 hermon_post_send(hermon_state_t *state, hermon_qphdl_t qp,
780 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted)
781 {
782 ibt_send_wr_t *curr_wr;
783 hermon_workq_hdr_t *wq;
784 hermon_ahhdl_t ah;
785 uint64_t *desc, *prev;
786 uint32_t desc_sz;
787 uint32_t signaled_dbd, solicited;
788 uint32_t head, tail, next_tail, qsize_msk;
789 uint32_t hdrmwqes;
790 uint_t currindx, wrindx, numremain;
791 uint_t chainlen;
792 uint_t posted_cnt, maxstat;
793 uint_t total_posted;
794 int status;
795 uint32_t nopcode, fence, immed_data = 0;
796 uint32_t prev_nopcode;
797 uint_t qp_state;
798
799 /* initialize the FMA retry loop */
800 hermon_pio_init(fm_loop_cnt, fm_status, fm_test);
801
802 /*
803 * Check for user-mappable QP memory. Note: We do not allow kernel
804 * clients to post to QP memory that is accessible directly by the
805 * user. If the QP memory is user accessible, then return an error.
806 */
807 if (qp->qp_alloc_flags & IBT_QP_USER_MAP) {
808 return (IBT_QP_HDL_INVALID);
809 }
810
811 mutex_enter(&qp->qp_sq_lock);
812
813 /*
814 * Check QP state. Can not post Send requests from the "Reset",
815 * "Init", or "RTR" states
816 */
817 qp_state = qp->qp_state_for_post_send;
818 if ((qp_state == HERMON_QP_RESET) ||
819 (qp_state == HERMON_QP_INIT) ||
820 (qp_state == HERMON_QP_RTR)) {
821 mutex_exit(&qp->qp_sq_lock);
822 return (IBT_QP_STATE_INVALID);
823 }
824
825 if (qp->qp_is_special)
826 goto post_many;
827
828 /* Use these optimized functions most of the time */
829 if (qp->qp_type == IBT_UD_RQP) {
830 return (hermon_post_send_ud(state, qp, wr, num_wr, num_posted));
831 }
832
833 if (qp->qp_serv_type == HERMON_QP_RC) {
834 return (hermon_post_send_rc(state, qp, wr, num_wr, num_posted));
835 }
836
837 if (qp->qp_serv_type == HERMON_QP_UC)
838 goto post_many;
839
840 mutex_exit(&qp->qp_sq_lock);
841 return (IBT_QP_SRV_TYPE_INVALID);
842
843 post_many:
844 /* general loop for non-optimized posting */
845
846 /* Save away some initial QP state */
847 wq = qp->qp_sq_wqhdr;
848 qsize_msk = wq->wq_mask;
849 tail = wq->wq_tail;
850 head = wq->wq_head;
851 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */
852
853 /* Initialize posted_cnt */
854 posted_cnt = 0;
855 total_posted = 0;
856
857 /*
858 * For each ibt_send_wr_t in the wr[] list passed in, parse the
859 * request and build a Send WQE. NOTE: Because we are potentially
860 * building a chain of WQEs to post, we want to build them all first,
861 * and set the valid (HW Ownership) bit on all but the first.
862 * However, we do not want to validate the first one until the
863 * entire chain of WQEs has been built. Then in the final
864 * we set the valid bit in the first, flush if needed, and as a last
865 * step ring the appropriate doorbell. NOTE: the doorbell ring may
866 * NOT be needed if the HCA is already processing, but the doorbell
867 * ring will be done regardless. NOTE ALSO: It is possible for
868 * more Work Requests to be posted than the HW will support at one
869 * shot. If this happens, we need to be able to post and ring
870 * several chains here until the the entire request is complete.
871 * NOTE ALSO: the term "chain" is used to differentiate it from
872 * Work Request List passed in; and because that's the terminology
873 * from the previous generations of HCA - but the WQEs are not, in fact
874 * chained together for Hermon
875 */
876
877 wrindx = 0;
878 numremain = num_wr;
879 status = DDI_SUCCESS;
880 while ((wrindx < num_wr) && (status == DDI_SUCCESS)) {
881 /*
882 * For the first WQE on a new chain we need "prev" to point
883 * to the current descriptor.
884 */
885 prev = HERMON_QP_SQ_ENTRY(qp, tail);
886
887 /*
888 * Break the request up into lists that are less than or
889 * equal to the maximum number of WQEs that can be posted
890 * per doorbell ring - 256 currently
891 */
892 chainlen = (numremain > HERMON_QP_MAXDESC_PER_DB) ?
893 HERMON_QP_MAXDESC_PER_DB : numremain;
894 numremain -= chainlen;
895
896 for (currindx = 0; currindx < chainlen; currindx++, wrindx++) {
897 /*
898 * Check for "queue full" condition. If the queue
899 * is already full, then no more WQEs can be posted.
900 * So break out, ring a doorbell (if necessary) and
901 * return an error
902 */
903 if (wq->wq_full != 0) {
904 status = IBT_QP_FULL;
905 break;
906 }
907
908 /*
909 * Increment the "tail index". Check for "queue
910 * full" condition incl. headroom. If we detect that
911 * the current work request is going to fill the work
912 * queue, then we mark this condition and continue.
913 * Don't need >=, because going one-by-one we have to
914 * hit it exactly sooner or later
915 */
916
917 next_tail = (tail + 1) & qsize_msk;
918 if (((tail + hdrmwqes) & qsize_msk) == head) {
919 wq->wq_full = 1;
920 }
921
922 /*
923 * Get the address of the location where the next
924 * Send WQE should be built
925 */
926 desc = HERMON_QP_SQ_ENTRY(qp, tail);
927 /*
928 * Call hermon_wqe_send_build() to build the WQE
929 * at the given address. This routine uses the
930 * information in the ibt_send_wr_t list (wr[]) and
931 * returns the size of the WQE when it returns.
932 */
933 status = hermon_wqe_send_build(state, qp,
934 &wr[wrindx], desc, &desc_sz);
935 if (status != DDI_SUCCESS) {
936 break;
937 }
938
939 /*
940 * Now, build the Ctrl Segment based on
941 * what was just done
942 */
943 curr_wr = &wr[wrindx];
944
945 switch (curr_wr->wr_opcode) {
946 case IBT_WRC_RDMAW:
947 if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) {
948 nopcode =
949 HERMON_WQE_SEND_NOPCODE_RDMAWI;
950 immed_data =
951 hermon_wr_get_immediate(curr_wr);
952 } else {
953 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW;
954 }
955 break;
956
957 case IBT_WRC_SEND:
958 if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) {
959 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI;
960 immed_data =
961 hermon_wr_get_immediate(curr_wr);
962 } else {
963 nopcode = HERMON_WQE_SEND_NOPCODE_SEND;
964 }
965 break;
966
967 case IBT_WRC_SEND_LSO:
968 nopcode = HERMON_WQE_SEND_NOPCODE_LSO;
969 break;
970
971 case IBT_WRC_RDMAR:
972 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR;
973 break;
974
975 case IBT_WRC_CSWAP:
976 nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS;
977 break;
978
979 case IBT_WRC_FADD:
980 nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA;
981 break;
982
983 case IBT_WRC_BIND:
984 nopcode = HERMON_WQE_SEND_NOPCODE_BIND;
985 break;
986 }
987
988 fence = (curr_wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0;
989
990 /*
991 * now, build up the control segment, leaving the
992 * owner bit as it is
993 */
994
995 if ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) ||
996 (curr_wr->wr_flags & IBT_WR_SEND_SIGNAL)) {
997 signaled_dbd = 0xC;
998 } else {
999 signaled_dbd = 0;
1000 }
1001 if (curr_wr->wr_flags & IBT_WR_SEND_SOLICIT)
1002 solicited = 0x2;
1003 else
1004 solicited = 0;
1005
1006 if (qp->qp_is_special) {
1007 /* Ensure correctness, set the ReRead bit */
1008 nopcode |= (1 << 6);
1009 ah = (hermon_ahhdl_t)
1010 curr_wr->wr.ud.udwr_dest->ud_ah;
1011 mutex_enter(&ah->ah_lock);
1012 maxstat = ah->ah_udav->max_stat_rate;
1013 HERMON_WQE_SET_MLX_CTRL_SEGMENT(desc, desc_sz,
1014 signaled_dbd, maxstat, ah->ah_udav->rlid,
1015 qp, ah->ah_udav->sl);
1016 mutex_exit(&ah->ah_lock);
1017 } else {
1018 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz,
1019 fence, immed_data, solicited,
1020 signaled_dbd, 0, qp, 0, 0);
1021 }
1022 wq->wq_wrid[tail] = curr_wr->wr_id;
1023
1024 /*
1025 * If this is not the first descriptor on the current
1026 * chain, then set the ownership bit.
1027 */
1028 if (currindx != 0) { /* not the first */
1029 membar_producer();
1030 HERMON_SET_SEND_WQE_OWNER(qp,
1031 (uint32_t *)desc, nopcode);
1032 } else
1033 prev_nopcode = nopcode;
1034
1035 /*
1036 * Update the current "tail index" and increment
1037 * "posted_cnt"
1038 */
1039 tail = next_tail;
1040 posted_cnt++;
1041 }
1042
1043 /*
1044 * If we reach here and there are one or more WQEs which have
1045 * been successfully built as a chain, we have to finish up
1046 * and prepare them for writing to the HW
1047 * The steps are:
1048 * 1. do the headroom fixup
1049 * 2. add in the size of the headroom for the sync
1050 * 3. write the owner bit for the first WQE
1051 * 4. sync them
1052 * 5. fix up the structures
1053 * 6. hit the doorbell in UAR
1054 */
1055 if (posted_cnt != 0) {
1056 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
1057
1058 /* do the invalidate of the headroom */
1059
1060 hermon_wqe_headroom(tail, qp);
1061
1062 /* Update some of the state in the QP */
1063 wq->wq_tail = tail;
1064 total_posted += posted_cnt;
1065 posted_cnt = 0;
1066
1067 membar_producer();
1068
1069 /*
1070 * Now set the ownership bit of the first
1071 * one in the chain
1072 */
1073 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)prev,
1074 prev_nopcode);
1075
1076 /* the FMA retry loop starts for Hermon doorbell. */
1077 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt,
1078 fm_status, fm_test);
1079
1080 HERMON_UAR_DOORBELL(state, uarhdl,
1081 (uint64_t *)(void *)&state->hs_uar->send,
1082 (uint64_t)qp->qp_ring);
1083
1084 /* the FMA retry loop ends. */
1085 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt,
1086 fm_status, fm_test);
1087 }
1088 }
1089
1090 /*
1091 * Update the "num_posted" return value (if necessary).
1092 * Then drop the locks and return success.
1093 */
1094 if (num_posted != NULL) {
1095 *num_posted = total_posted;
1096 }
1097 mutex_exit(&qp->qp_sq_lock);
1098 return (status);
1099
1100 pio_error:
1101 mutex_exit(&qp->qp_sq_lock);
1102 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1103 return (ibc_get_ci_failure(0));
1104 }
1105
1106
1107 /*
1108 * hermon_post_recv()
1109 * Context: Can be called from interrupt or base context.
1110 */
1111 int
hermon_post_recv(hermon_state_t * state,hermon_qphdl_t qp,ibt_recv_wr_t * wr,uint_t num_wr,uint_t * num_posted)1112 hermon_post_recv(hermon_state_t *state, hermon_qphdl_t qp,
1113 ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted)
1114 {
1115 uint64_t *desc;
1116 hermon_workq_hdr_t *wq;
1117 uint32_t head, tail, next_tail, qsize_msk;
1118 uint_t wrindx;
1119 uint_t posted_cnt;
1120 int status;
1121
1122 /*
1123 * Check for user-mappable QP memory. Note: We do not allow kernel
1124 * clients to post to QP memory that is accessible directly by the
1125 * user. If the QP memory is user accessible, then return an error.
1126 */
1127 if (qp->qp_alloc_flags & IBT_QP_USER_MAP) {
1128 return (IBT_QP_HDL_INVALID);
1129 }
1130
1131 /* Initialize posted_cnt */
1132 posted_cnt = 0;
1133
1134 mutex_enter(&qp->qp_lock);
1135
1136 /*
1137 * Check if QP is associated with an SRQ
1138 */
1139 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) {
1140 mutex_exit(&qp->qp_lock);
1141 return (IBT_SRQ_IN_USE);
1142 }
1143
1144 /*
1145 * Check QP state. Can not post Recv requests from the "Reset" state
1146 */
1147 if (qp->qp_state == HERMON_QP_RESET) {
1148 mutex_exit(&qp->qp_lock);
1149 return (IBT_QP_STATE_INVALID);
1150 }
1151
1152 /* Check that work request transport type is valid */
1153 if ((qp->qp_type != IBT_UD_RQP) &&
1154 (qp->qp_serv_type != HERMON_QP_RC) &&
1155 (qp->qp_serv_type != HERMON_QP_UC)) {
1156 mutex_exit(&qp->qp_lock);
1157 return (IBT_QP_SRV_TYPE_INVALID);
1158 }
1159
1160 /*
1161 * Grab the lock for the WRID list, i.e., membar_consumer().
1162 * This is not needed because the mutex_enter() above has
1163 * the same effect.
1164 */
1165
1166 /* Save away some initial QP state */
1167 wq = qp->qp_rq_wqhdr;
1168 qsize_msk = wq->wq_mask;
1169 tail = wq->wq_tail;
1170 head = wq->wq_head;
1171
1172 wrindx = 0;
1173 status = DDI_SUCCESS;
1174
1175 for (wrindx = 0; wrindx < num_wr; wrindx++) {
1176 if (wq->wq_full != 0) {
1177 status = IBT_QP_FULL;
1178 break;
1179 }
1180 next_tail = (tail + 1) & qsize_msk;
1181 if (next_tail == head) {
1182 wq->wq_full = 1;
1183 }
1184 desc = HERMON_QP_RQ_ENTRY(qp, tail);
1185 status = hermon_wqe_recv_build(state, qp, &wr[wrindx], desc);
1186 if (status != DDI_SUCCESS) {
1187 break;
1188 }
1189
1190 wq->wq_wrid[tail] = wr[wrindx].wr_id;
1191 qp->qp_rq_wqecntr++;
1192
1193 tail = next_tail;
1194 posted_cnt++;
1195 }
1196
1197 if (posted_cnt != 0) {
1198
1199 wq->wq_tail = tail;
1200
1201 membar_producer(); /* ensure wrids are visible */
1202
1203 /* Update the doorbell record w/ wqecntr */
1204 HERMON_UAR_DB_RECORD_WRITE(qp->qp_rq_vdbr,
1205 qp->qp_rq_wqecntr & 0xFFFF);
1206 }
1207
1208 if (num_posted != NULL) {
1209 *num_posted = posted_cnt;
1210 }
1211
1212
1213 mutex_exit(&qp->qp_lock);
1214 return (status);
1215 }
1216
1217 /*
1218 * hermon_post_srq()
1219 * Context: Can be called from interrupt or base context.
1220 */
1221 int
hermon_post_srq(hermon_state_t * state,hermon_srqhdl_t srq,ibt_recv_wr_t * wr,uint_t num_wr,uint_t * num_posted)1222 hermon_post_srq(hermon_state_t *state, hermon_srqhdl_t srq,
1223 ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted)
1224 {
1225 uint64_t *desc;
1226 hermon_workq_hdr_t *wq;
1227 uint_t indx, wrindx;
1228 uint_t posted_cnt;
1229 int status;
1230
1231 mutex_enter(&srq->srq_lock);
1232
1233 /*
1234 * Check for user-mappable QP memory. Note: We do not allow kernel
1235 * clients to post to QP memory that is accessible directly by the
1236 * user. If the QP memory is user accessible, then return an error.
1237 */
1238 if (srq->srq_is_umap) {
1239 mutex_exit(&srq->srq_lock);
1240 return (IBT_SRQ_HDL_INVALID);
1241 }
1242
1243 /*
1244 * Check SRQ state. Can not post Recv requests when SRQ is in error
1245 */
1246 if (srq->srq_state == HERMON_SRQ_STATE_ERROR) {
1247 mutex_exit(&srq->srq_lock);
1248 return (IBT_QP_STATE_INVALID);
1249 }
1250
1251 status = DDI_SUCCESS;
1252 posted_cnt = 0;
1253 wq = srq->srq_wq_wqhdr;
1254 indx = wq->wq_head;
1255
1256 for (wrindx = 0; wrindx < num_wr; wrindx++) {
1257
1258 if (indx == wq->wq_tail) {
1259 status = IBT_QP_FULL;
1260 break;
1261 }
1262 desc = HERMON_SRQ_WQE_ADDR(srq, indx);
1263
1264 wq->wq_wrid[indx] = wr[wrindx].wr_id;
1265
1266 status = hermon_wqe_srq_build(state, srq, &wr[wrindx], desc);
1267 if (status != DDI_SUCCESS) {
1268 break;
1269 }
1270
1271 posted_cnt++;
1272 indx = htons(((uint16_t *)desc)[1]);
1273 wq->wq_head = indx;
1274 }
1275
1276 if (posted_cnt != 0) {
1277
1278 srq->srq_wq_wqecntr += posted_cnt;
1279
1280 membar_producer(); /* ensure wrids are visible */
1281
1282 /* Ring the doorbell w/ wqecntr */
1283 HERMON_UAR_DB_RECORD_WRITE(srq->srq_wq_vdbr,
1284 srq->srq_wq_wqecntr & 0xFFFF);
1285 }
1286
1287 if (num_posted != NULL) {
1288 *num_posted = posted_cnt;
1289 }
1290
1291 mutex_exit(&srq->srq_lock);
1292 return (status);
1293 }
1294
1295
1296 /*
1297 * hermon_wqe_send_build()
1298 * Context: Can be called from interrupt or base context.
1299 */
1300 static int
hermon_wqe_send_build(hermon_state_t * state,hermon_qphdl_t qp,ibt_send_wr_t * wr,uint64_t * desc,uint_t * size)1301 hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp,
1302 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size)
1303 {
1304 hermon_hw_snd_wqe_ud_t *ud;
1305 hermon_hw_snd_wqe_remaddr_t *rc;
1306 hermon_hw_snd_wqe_atomic_t *at;
1307 hermon_hw_snd_wqe_remaddr_t *uc;
1308 hermon_hw_snd_wqe_bind_t *bn;
1309 hermon_hw_wqe_sgl_t *ds, *old_ds;
1310 ibt_ud_dest_t *dest;
1311 ibt_wr_ds_t *sgl;
1312 hermon_ahhdl_t ah;
1313 uint32_t nds;
1314 int i, j, last_ds, num_ds, status;
1315 int tmpsize;
1316
1317 ASSERT(MUTEX_HELD(&qp->qp_sq_lock));
1318
1319 /* Initialize the information for the Data Segments */
1320 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc +
1321 sizeof (hermon_hw_snd_wqe_ctrl_t));
1322 nds = wr->wr_nds;
1323 sgl = wr->wr_sgl;
1324 num_ds = 0;
1325 i = 0;
1326
1327 /*
1328 * Build a Send WQE depends first and foremost on the transport
1329 * type of Work Request (i.e. UD, RC, or UC)
1330 */
1331 switch (wr->wr_trans) {
1332 case IBT_UD_SRV:
1333 /* Ensure that work request transport type matches QP type */
1334 if (qp->qp_serv_type != HERMON_QP_UD) {
1335 return (IBT_QP_SRV_TYPE_INVALID);
1336 }
1337
1338 /*
1339 * Validate the operation type. For UD requests, only the
1340 * "Send" and "Send LSO" operations are valid.
1341 */
1342 if (wr->wr_opcode != IBT_WRC_SEND &&
1343 wr->wr_opcode != IBT_WRC_SEND_LSO) {
1344 return (IBT_QP_OP_TYPE_INVALID);
1345 }
1346
1347 /*
1348 * If this is a Special QP (QP0 or QP1), then we need to
1349 * build MLX WQEs instead. So jump to hermon_wqe_mlx_build()
1350 * and return whatever status it returns
1351 */
1352 if (qp->qp_is_special) {
1353 if (wr->wr_opcode == IBT_WRC_SEND_LSO) {
1354 return (IBT_QP_OP_TYPE_INVALID);
1355 }
1356 status = hermon_wqe_mlx_build(state, qp,
1357 wr, desc, size);
1358 return (status);
1359 }
1360
1361 /*
1362 * Otherwise, if this is a normal UD Send request, then fill
1363 * all the fields in the Hermon UD header for the WQE. Note:
1364 * to do this we'll need to extract some information from the
1365 * Address Handle passed with the work request.
1366 */
1367 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc +
1368 sizeof (hermon_hw_snd_wqe_ctrl_t));
1369 if (wr->wr_opcode == IBT_WRC_SEND) {
1370 dest = wr->wr.ud.udwr_dest;
1371 } else {
1372 dest = wr->wr.ud_lso.lso_ud_dest;
1373 }
1374 ah = (hermon_ahhdl_t)dest->ud_ah;
1375 if (ah == NULL) {
1376 return (IBT_AH_HDL_INVALID);
1377 }
1378
1379 /*
1380 * Build the Unreliable Datagram Segment for the WQE, using
1381 * the information from the address handle and the work
1382 * request.
1383 */
1384 /* mutex_enter(&ah->ah_lock); */
1385 if (wr->wr_opcode == IBT_WRC_SEND) {
1386 HERMON_WQE_BUILD_UD(qp, ud, ah, wr->wr.ud.udwr_dest);
1387 } else { /* IBT_WRC_SEND_LSO */
1388 HERMON_WQE_BUILD_UD(qp, ud, ah,
1389 wr->wr.ud_lso.lso_ud_dest);
1390 }
1391 /* mutex_exit(&ah->ah_lock); */
1392
1393 /* Update "ds" for filling in Data Segments (below) */
1394 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud +
1395 sizeof (hermon_hw_snd_wqe_ud_t));
1396
1397 if (wr->wr_opcode == IBT_WRC_SEND_LSO) {
1398 int total_len;
1399
1400 total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf;
1401 if ((uintptr_t)ds + total_len + (nds * 16) >
1402 (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz))
1403 return (IBT_QP_SGL_LEN_INVALID);
1404
1405 bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)ds + 1,
1406 wr->wr.ud_lso.lso_hdr_sz);
1407 old_ds = ds;
1408 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len);
1409 for (; i < nds; i++) {
1410 if (sgl[i].ds_len == 0)
1411 continue;
1412 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds],
1413 &sgl[i]);
1414 num_ds++;
1415 i++;
1416 break;
1417 }
1418 membar_producer();
1419 HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss,
1420 wr->wr.ud_lso.lso_hdr_sz);
1421 }
1422
1423 break;
1424
1425 case IBT_RC_SRV:
1426 /* Ensure that work request transport type matches QP type */
1427 if (qp->qp_serv_type != HERMON_QP_RC) {
1428 return (IBT_QP_SRV_TYPE_INVALID);
1429 }
1430
1431 /*
1432 * Validate the operation type. For RC requests, we allow
1433 * "Send", "RDMA Read", "RDMA Write", various "Atomic"
1434 * operations, and memory window "Bind"
1435 */
1436 if ((wr->wr_opcode != IBT_WRC_SEND) &&
1437 (wr->wr_opcode != IBT_WRC_RDMAR) &&
1438 (wr->wr_opcode != IBT_WRC_RDMAW) &&
1439 (wr->wr_opcode != IBT_WRC_CSWAP) &&
1440 (wr->wr_opcode != IBT_WRC_FADD) &&
1441 (wr->wr_opcode != IBT_WRC_BIND)) {
1442 return (IBT_QP_OP_TYPE_INVALID);
1443 }
1444
1445 /*
1446 * If this is a Send request, then all we need to do is break
1447 * out and here and begin the Data Segment processing below
1448 */
1449 if (wr->wr_opcode == IBT_WRC_SEND) {
1450 break;
1451 }
1452
1453 /*
1454 * If this is an RDMA Read or RDMA Write request, then fill
1455 * in the "Remote Address" header fields.
1456 */
1457 if ((wr->wr_opcode == IBT_WRC_RDMAR) ||
1458 (wr->wr_opcode == IBT_WRC_RDMAW)) {
1459 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
1460 sizeof (hermon_hw_snd_wqe_ctrl_t));
1461
1462 /*
1463 * Build the Remote Address Segment for the WQE, using
1464 * the information from the RC work request.
1465 */
1466 HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma);
1467
1468 /* Update "ds" for filling in Data Segments (below) */
1469 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc +
1470 sizeof (hermon_hw_snd_wqe_remaddr_t));
1471 break;
1472 }
1473
1474 /*
1475 * If this is one of the Atomic type operations (i.e
1476 * Compare-Swap or Fetch-Add), then fill in both the "Remote
1477 * Address" header fields and the "Atomic" header fields.
1478 */
1479 if ((wr->wr_opcode == IBT_WRC_CSWAP) ||
1480 (wr->wr_opcode == IBT_WRC_FADD)) {
1481 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
1482 sizeof (hermon_hw_snd_wqe_ctrl_t));
1483 at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc +
1484 sizeof (hermon_hw_snd_wqe_remaddr_t));
1485
1486 /*
1487 * Build the Remote Address and Atomic Segments for
1488 * the WQE, using the information from the RC Atomic
1489 * work request.
1490 */
1491 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr);
1492 HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic);
1493
1494 /* Update "ds" for filling in Data Segments (below) */
1495 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at +
1496 sizeof (hermon_hw_snd_wqe_atomic_t));
1497
1498 /*
1499 * Update "nds" and "sgl" because Atomic requests have
1500 * only a single Data Segment (and they are encoded
1501 * somewhat differently in the work request.
1502 */
1503 nds = 1;
1504 sgl = wr->wr_sgl;
1505 break;
1506 }
1507
1508 /*
1509 * If this is memory window Bind operation, then we call the
1510 * hermon_wr_bind_check() routine to validate the request and
1511 * to generate the updated RKey. If this is successful, then
1512 * we fill in the WQE's "Bind" header fields.
1513 */
1514 if (wr->wr_opcode == IBT_WRC_BIND) {
1515 status = hermon_wr_bind_check(state, wr);
1516 if (status != DDI_SUCCESS) {
1517 return (status);
1518 }
1519
1520 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc +
1521 sizeof (hermon_hw_snd_wqe_ctrl_t));
1522
1523 /*
1524 * Build the Bind Memory Window Segments for the WQE,
1525 * using the information from the RC Bind memory
1526 * window work request.
1527 */
1528 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind);
1529
1530 /*
1531 * Update the "ds" pointer. Even though the "bind"
1532 * operation requires no SGLs, this is necessary to
1533 * facilitate the correct descriptor size calculations
1534 * (below).
1535 */
1536 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn +
1537 sizeof (hermon_hw_snd_wqe_bind_t));
1538 nds = 0;
1539 }
1540 break;
1541
1542 case IBT_UC_SRV:
1543 /* Ensure that work request transport type matches QP type */
1544 if (qp->qp_serv_type != HERMON_QP_UC) {
1545 return (IBT_QP_SRV_TYPE_INVALID);
1546 }
1547
1548 /*
1549 * Validate the operation type. For UC requests, we only
1550 * allow "Send", "RDMA Write", and memory window "Bind".
1551 * Note: Unlike RC, UC does not allow "RDMA Read" or "Atomic"
1552 * operations
1553 */
1554 if ((wr->wr_opcode != IBT_WRC_SEND) &&
1555 (wr->wr_opcode != IBT_WRC_RDMAW) &&
1556 (wr->wr_opcode != IBT_WRC_BIND)) {
1557 return (IBT_QP_OP_TYPE_INVALID);
1558 }
1559
1560 /*
1561 * If this is a Send request, then all we need to do is break
1562 * out and here and begin the Data Segment processing below
1563 */
1564 if (wr->wr_opcode == IBT_WRC_SEND) {
1565 break;
1566 }
1567
1568 /*
1569 * If this is an RDMA Write request, then fill in the "Remote
1570 * Address" header fields.
1571 */
1572 if (wr->wr_opcode == IBT_WRC_RDMAW) {
1573 uc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
1574 sizeof (hermon_hw_snd_wqe_ctrl_t));
1575
1576 /*
1577 * Build the Remote Address Segment for the WQE, using
1578 * the information from the UC work request.
1579 */
1580 HERMON_WQE_BUILD_REMADDR(qp, uc, &wr->wr.uc.ucwr.rdma);
1581
1582 /* Update "ds" for filling in Data Segments (below) */
1583 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)uc +
1584 sizeof (hermon_hw_snd_wqe_remaddr_t));
1585 break;
1586 }
1587
1588 /*
1589 * If this is memory window Bind operation, then we call the
1590 * hermon_wr_bind_check() routine to validate the request and
1591 * to generate the updated RKey. If this is successful, then
1592 * we fill in the WQE's "Bind" header fields.
1593 */
1594 if (wr->wr_opcode == IBT_WRC_BIND) {
1595 status = hermon_wr_bind_check(state, wr);
1596 if (status != DDI_SUCCESS) {
1597 return (status);
1598 }
1599
1600 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc +
1601 sizeof (hermon_hw_snd_wqe_ctrl_t));
1602
1603 /*
1604 * Build the Bind Memory Window Segments for the WQE,
1605 * using the information from the UC Bind memory
1606 * window work request.
1607 */
1608 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.uc.ucwr.bind);
1609
1610 /*
1611 * Update the "ds" pointer. Even though the "bind"
1612 * operation requires no SGLs, this is necessary to
1613 * facilitate the correct descriptor size calculations
1614 * (below).
1615 */
1616 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn +
1617 sizeof (hermon_hw_snd_wqe_bind_t));
1618 nds = 0;
1619 }
1620 break;
1621
1622 default:
1623 return (IBT_QP_SRV_TYPE_INVALID);
1624 }
1625
1626 /*
1627 * Now fill in the Data Segments (SGL) for the Send WQE based on
1628 * the values setup above (i.e. "sgl", "nds", and the "ds" pointer
1629 * Start by checking for a valid number of SGL entries
1630 */
1631 if (nds > qp->qp_sq_sgl) {
1632 return (IBT_QP_SGL_LEN_INVALID);
1633 }
1634
1635 /*
1636 * For each SGL in the Send Work Request, fill in the Send WQE's data
1637 * segments. Note: We skip any SGL with zero size because Hermon
1638 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
1639 * the encoding for zero means a 2GB transfer.
1640 */
1641 for (last_ds = num_ds, j = i; j < nds; j++) {
1642 if (sgl[j].ds_len != 0)
1643 last_ds++; /* real last ds of wqe to fill */
1644 }
1645
1646 /*
1647 * Return the size of descriptor (in 16-byte chunks)
1648 * For Hermon, we want them (for now) to be on stride size
1649 * boundaries, which was implicit in Tavor/Arbel
1650 *
1651 */
1652 tmpsize = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc);
1653
1654 *size = tmpsize >> 0x4;
1655
1656 for (j = nds; --j >= i; ) {
1657 if (sgl[j].ds_len == 0) {
1658 continue;
1659 }
1660
1661 /*
1662 * Fill in the Data Segment(s) for the current WQE, using the
1663 * information contained in the scatter-gather list of the
1664 * work request.
1665 */
1666 last_ds--;
1667 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]);
1668 }
1669
1670 return (DDI_SUCCESS);
1671 }
1672
1673
1674
1675 /*
1676 * hermon_wqe_mlx_build()
1677 * Context: Can be called from interrupt or base context.
1678 */
1679 static int
hermon_wqe_mlx_build(hermon_state_t * state,hermon_qphdl_t qp,ibt_send_wr_t * wr,uint64_t * desc,uint_t * size)1680 hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp,
1681 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size)
1682 {
1683 hermon_ahhdl_t ah;
1684 hermon_hw_udav_t *udav;
1685 ib_lrh_hdr_t *lrh;
1686 ib_grh_t *grh;
1687 ib_bth_hdr_t *bth;
1688 ib_deth_hdr_t *deth;
1689 hermon_hw_wqe_sgl_t *ds;
1690 ibt_wr_ds_t *sgl;
1691 uint8_t *mgmtclass, *hpoint, *hcount;
1692 uint32_t nds, offset, pktlen;
1693 uint32_t desc_sz;
1694 int i, num_ds;
1695 int tmpsize;
1696
1697 ASSERT(MUTEX_HELD(&qp->qp_sq_lock));
1698
1699 /* Initialize the information for the Data Segments */
1700 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc +
1701 sizeof (hermon_hw_mlx_wqe_nextctrl_t));
1702
1703 /*
1704 * Pull the address handle from the work request. The UDAV will
1705 * be used to answer some questions about the request.
1706 */
1707 ah = (hermon_ahhdl_t)wr->wr.ud.udwr_dest->ud_ah;
1708 if (ah == NULL) {
1709 return (IBT_AH_HDL_INVALID);
1710 }
1711 mutex_enter(&ah->ah_lock);
1712 udav = ah->ah_udav;
1713
1714 /*
1715 * If the request is for QP1 and the destination LID is equal to
1716 * the Permissive LID, then return an error. This combination is
1717 * not allowed
1718 */
1719 if ((udav->rlid == IB_LID_PERMISSIVE) &&
1720 (qp->qp_is_special == HERMON_QP_GSI)) {
1721 mutex_exit(&ah->ah_lock);
1722 return (IBT_AH_HDL_INVALID);
1723 }
1724
1725 /*
1726 * Calculate the size of the packet headers, including the GRH
1727 * (if necessary)
1728 */
1729 desc_sz = sizeof (ib_lrh_hdr_t) + sizeof (ib_bth_hdr_t) +
1730 sizeof (ib_deth_hdr_t);
1731 if (udav->grh) {
1732 desc_sz += sizeof (ib_grh_t);
1733 }
1734
1735 /*
1736 * Begin to build the first "inline" data segment for the packet
1737 * headers. Note: By specifying "inline" we can build the contents
1738 * of the MAD packet headers directly into the work queue (as part
1739 * descriptor). This has the advantage of both speeding things up
1740 * and of not requiring the driver to allocate/register any additional
1741 * memory for the packet headers.
1742 */
1743 HERMON_WQE_BUILD_INLINE(qp, &ds[0], desc_sz);
1744 desc_sz += 4;
1745
1746 /*
1747 * Build Local Route Header (LRH)
1748 * We start here by building the LRH into a temporary location.
1749 * When we have finished we copy the LRH data into the descriptor.
1750 *
1751 * Notice that the VL values are hardcoded. This is not a problem
1752 * because VL15 is decided later based on the value in the MLX
1753 * transport "next/ctrl" header (see the "vl15" bit below), and it
1754 * is otherwise (meaning for QP1) chosen from the SL-to-VL table
1755 * values. This rule does not hold for loopback packets however
1756 * (all of which bypass the SL-to-VL tables) and it is the reason
1757 * that non-QP0 MADs are setup with VL hardcoded to zero below.
1758 *
1759 * Notice also that Source LID is hardcoded to the Permissive LID
1760 * (0xFFFF). This is also not a problem because if the Destination
1761 * LID is not the Permissive LID, then the "slr" value in the MLX
1762 * transport "next/ctrl" header will be set to zero and the hardware
1763 * will pull the LID from value in the port.
1764 */
1765 lrh = (ib_lrh_hdr_t *)((uintptr_t)&ds[0] + 4);
1766 pktlen = (desc_sz + 0x100) >> 2;
1767 HERMON_WQE_BUILD_MLX_LRH(lrh, qp, udav, pktlen);
1768
1769 /*
1770 * Build Global Route Header (GRH)
1771 * This is only built if necessary as defined by the "grh" bit in
1772 * the address vector. Note: We also calculate the offset to the
1773 * next header (BTH) based on whether or not the "grh" bit is set.
1774 */
1775 if (udav->grh) {
1776 /*
1777 * If the request is for QP0, then return an error. The
1778 * combination of global routine (GRH) and QP0 is not allowed.
1779 */
1780 if (qp->qp_is_special == HERMON_QP_SMI) {
1781 mutex_exit(&ah->ah_lock);
1782 return (IBT_AH_HDL_INVALID);
1783 }
1784 grh = (ib_grh_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t));
1785 HERMON_WQE_BUILD_MLX_GRH(state, grh, qp, udav, pktlen);
1786
1787 bth = (ib_bth_hdr_t *)((uintptr_t)grh + sizeof (ib_grh_t));
1788 } else {
1789 bth = (ib_bth_hdr_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t));
1790 }
1791 mutex_exit(&ah->ah_lock);
1792
1793
1794 /*
1795 * Build Base Transport Header (BTH)
1796 * Notice that the M, PadCnt, and TVer fields are all set
1797 * to zero implicitly. This is true for all Management Datagrams
1798 * MADs whether GSI are SMI.
1799 */
1800 HERMON_WQE_BUILD_MLX_BTH(state, bth, qp, wr);
1801
1802 /*
1803 * Build Datagram Extended Transport Header (DETH)
1804 */
1805 deth = (ib_deth_hdr_t *)((uintptr_t)bth + sizeof (ib_bth_hdr_t));
1806 HERMON_WQE_BUILD_MLX_DETH(deth, qp);
1807
1808 /* Ensure that the Data Segment is aligned on a 16-byte boundary */
1809 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)deth + sizeof (ib_deth_hdr_t));
1810 ds = (hermon_hw_wqe_sgl_t *)(((uintptr_t)ds + 0xF) & ~0xF);
1811 nds = wr->wr_nds;
1812 sgl = wr->wr_sgl;
1813 num_ds = 0;
1814
1815 /*
1816 * Now fill in the Data Segments (SGL) for the MLX WQE based on the
1817 * values set up above (i.e. "sgl", "nds", and the "ds" pointer
1818 * Start by checking for a valid number of SGL entries
1819 */
1820 if (nds > qp->qp_sq_sgl) {
1821 return (IBT_QP_SGL_LEN_INVALID);
1822 }
1823
1824 /*
1825 * For each SGL in the Send Work Request, fill in the MLX WQE's data
1826 * segments. Note: We skip any SGL with zero size because Hermon
1827 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
1828 * the encoding for zero means a 2GB transfer. Because of this special
1829 * encoding in the hardware, we mask the requested length with
1830 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
1831 * zero.)
1832 */
1833 mgmtclass = hpoint = hcount = NULL;
1834 offset = 0;
1835 for (i = 0; i < nds; i++) {
1836 if (sgl[i].ds_len == 0) {
1837 continue;
1838 }
1839
1840 /*
1841 * Fill in the Data Segment(s) for the MLX send WQE, using
1842 * the information contained in the scatter-gather list of
1843 * the work request.
1844 */
1845 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds], &sgl[i]);
1846
1847 /*
1848 * Search through the contents of all MADs posted to QP0 to
1849 * initialize pointers to the places where Directed Route "hop
1850 * pointer", "hop count", and "mgmtclass" would be. Hermon
1851 * needs these updated (i.e. incremented or decremented, as
1852 * necessary) by software.
1853 */
1854 if (qp->qp_is_special == HERMON_QP_SMI) {
1855
1856 HERMON_SPECIAL_QP_DRMAD_GET_MGMTCLASS(mgmtclass,
1857 offset, sgl[i].ds_va, sgl[i].ds_len);
1858
1859 HERMON_SPECIAL_QP_DRMAD_GET_HOPPOINTER(hpoint,
1860 offset, sgl[i].ds_va, sgl[i].ds_len);
1861
1862 HERMON_SPECIAL_QP_DRMAD_GET_HOPCOUNT(hcount,
1863 offset, sgl[i].ds_va, sgl[i].ds_len);
1864
1865 offset += sgl[i].ds_len;
1866 }
1867 num_ds++;
1868 }
1869
1870 /*
1871 * Hermon's Directed Route MADs need to have the "hop pointer"
1872 * incremented/decremented (as necessary) depending on whether it is
1873 * currently less than or greater than the "hop count" (i.e. whether
1874 * the MAD is a request or a response.)
1875 */
1876 if (qp->qp_is_special == HERMON_QP_SMI) {
1877 HERMON_SPECIAL_QP_DRMAD_DO_HOPPOINTER_MODIFY(*mgmtclass,
1878 *hpoint, *hcount);
1879 }
1880
1881 /*
1882 * Now fill in the ICRC Data Segment. This data segment is inlined
1883 * just like the packets headers above, but it is only four bytes and
1884 * set to zero (to indicate that we wish the hardware to generate ICRC.
1885 */
1886 HERMON_WQE_BUILD_INLINE_ICRC(qp, &ds[num_ds], 4, 0);
1887 num_ds++;
1888
1889 /*
1890 * Return the size of descriptor (in 16-byte chunks)
1891 * For Hermon, we want them (for now) to be on stride size
1892 * boundaries, which was implicit in Tavor/Arbel
1893 */
1894 tmpsize = ((uintptr_t)&ds[num_ds] - (uintptr_t)desc);
1895
1896 *size = tmpsize >> 0x04;
1897
1898 return (DDI_SUCCESS);
1899 }
1900
1901
1902
1903 /*
1904 * hermon_wqe_recv_build()
1905 * Context: Can be called from interrupt or base context.
1906 */
1907 /* ARGSUSED */
1908 static int
hermon_wqe_recv_build(hermon_state_t * state,hermon_qphdl_t qp,ibt_recv_wr_t * wr,uint64_t * desc)1909 hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp,
1910 ibt_recv_wr_t *wr, uint64_t *desc)
1911 {
1912 hermon_hw_wqe_sgl_t *ds;
1913 int i, num_ds;
1914
1915 ASSERT(MUTEX_HELD(&qp->qp_lock));
1916
1917 /*
1918 * Fill in the Data Segments (SGL) for the Recv WQE - don't
1919 * need to have a reserved for the ctrl, there is none on the
1920 * recv queue for hermon, but will need to put an invalid
1921 * (null) scatter pointer per PRM
1922 */
1923 ds = (hermon_hw_wqe_sgl_t *)(uintptr_t)desc;
1924 num_ds = 0;
1925
1926 /* Check for valid number of SGL entries */
1927 if (wr->wr_nds > qp->qp_rq_sgl) {
1928 return (IBT_QP_SGL_LEN_INVALID);
1929 }
1930
1931 /*
1932 * For each SGL in the Recv Work Request, fill in the Recv WQE's data
1933 * segments. Note: We skip any SGL with zero size because Hermon
1934 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
1935 * the encoding for zero means a 2GB transfer. Because of this special
1936 * encoding in the hardware, we mask the requested length with
1937 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
1938 * zero.)
1939 */
1940 for (i = 0; i < wr->wr_nds; i++) {
1941 if (wr->wr_sgl[i].ds_len == 0) {
1942 continue;
1943 }
1944
1945 /*
1946 * Fill in the Data Segment(s) for the receive WQE, using the
1947 * information contained in the scatter-gather list of the
1948 * work request.
1949 */
1950 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]);
1951 num_ds++;
1952 }
1953
1954 /* put the null sgl pointer as well if needed */
1955 if (num_ds < qp->qp_rq_sgl) {
1956 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl);
1957 }
1958
1959 return (DDI_SUCCESS);
1960 }
1961
1962
1963
1964 /*
1965 * hermon_wqe_srq_build()
1966 * Context: Can be called from interrupt or base context.
1967 */
1968 /* ARGSUSED */
1969 static int
hermon_wqe_srq_build(hermon_state_t * state,hermon_srqhdl_t srq,ibt_recv_wr_t * wr,uint64_t * desc)1970 hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq,
1971 ibt_recv_wr_t *wr, uint64_t *desc)
1972 {
1973 hermon_hw_wqe_sgl_t *ds;
1974 int i, num_ds;
1975
1976 ASSERT(MUTEX_HELD(&srq->srq_lock));
1977
1978 /* Fill in the Data Segments (SGL) for the Recv WQE */
1979 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc +
1980 sizeof (hermon_hw_srq_wqe_next_t));
1981 num_ds = 0;
1982
1983 /* Check for valid number of SGL entries */
1984 if (wr->wr_nds > srq->srq_wq_sgl) {
1985 return (IBT_QP_SGL_LEN_INVALID);
1986 }
1987
1988 /*
1989 * For each SGL in the Recv Work Request, fill in the Recv WQE's data
1990 * segments. Note: We skip any SGL with zero size because Hermon
1991 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
1992 * the encoding for zero means a 2GB transfer. Because of this special
1993 * encoding in the hardware, we mask the requested length with
1994 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
1995 * zero.)
1996 */
1997 for (i = 0; i < wr->wr_nds; i++) {
1998 if (wr->wr_sgl[i].ds_len == 0) {
1999 continue;
2000 }
2001
2002 /*
2003 * Fill in the Data Segment(s) for the receive WQE, using the
2004 * information contained in the scatter-gather list of the
2005 * work request.
2006 */
2007 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]);
2008 num_ds++;
2009 }
2010
2011 /*
2012 * put in the null sgl pointer as well, if needed
2013 */
2014 if (num_ds < srq->srq_wq_sgl) {
2015 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl);
2016 }
2017
2018 return (DDI_SUCCESS);
2019 }
2020
2021
2022 /*
2023 * hermon_wr_get_immediate()
2024 * Context: Can be called from interrupt or base context.
2025 */
2026 static uint32_t
hermon_wr_get_immediate(ibt_send_wr_t * wr)2027 hermon_wr_get_immediate(ibt_send_wr_t *wr)
2028 {
2029 /*
2030 * This routine extracts the "immediate data" from the appropriate
2031 * location in the IBTF work request. Because of the way the
2032 * work request structure is defined, the location for this data
2033 * depends on the actual work request operation type.
2034 */
2035
2036 /* For RDMA Write, test if RC or UC */
2037 if (wr->wr_opcode == IBT_WRC_RDMAW) {
2038 if (wr->wr_trans == IBT_RC_SRV) {
2039 return (wr->wr.rc.rcwr.rdma.rdma_immed);
2040 } else { /* IBT_UC_SRV */
2041 return (wr->wr.uc.ucwr.rdma.rdma_immed);
2042 }
2043 }
2044
2045 /* For Send, test if RC, UD, or UC */
2046 if (wr->wr_opcode == IBT_WRC_SEND) {
2047 if (wr->wr_trans == IBT_RC_SRV) {
2048 return (wr->wr.rc.rcwr.send_immed);
2049 } else if (wr->wr_trans == IBT_UD_SRV) {
2050 return (wr->wr.ud.udwr_immed);
2051 } else { /* IBT_UC_SRV */
2052 return (wr->wr.uc.ucwr.send_immed);
2053 }
2054 }
2055
2056 /*
2057 * If any other type of request, then immediate is undefined
2058 */
2059 return (0);
2060 }
2061
2062 /*
2063 * hermon_wqe_headroom()
2064 * Context: can be called from interrupt or base, currently only from
2065 * base context.
2066 * Routine that fills in the headroom for the Send Queue
2067 */
2068
2069 static void
hermon_wqe_headroom(uint_t from,hermon_qphdl_t qp)2070 hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp)
2071 {
2072 uint32_t *wqe_start, *wqe_top, *wqe_base, qsize;
2073 int hdrmwqes, wqesizebytes, sectperwqe;
2074 uint32_t invalue;
2075 int i, j;
2076
2077 qsize = qp->qp_sq_bufsz;
2078 wqesizebytes = 1 << qp->qp_sq_log_wqesz;
2079 sectperwqe = wqesizebytes >> 6; /* 64 bytes/section */
2080 hdrmwqes = qp->qp_sq_hdrmwqes;
2081 wqe_base = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 0);
2082 wqe_top = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, qsize);
2083 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, from);
2084
2085 for (i = 0; i < hdrmwqes; i++) {
2086 for (j = 0; j < sectperwqe; j++) {
2087 if (j == 0) { /* 1st section of wqe */
2088 /* perserve ownership bit */
2089 invalue = ddi_get32(qp->qp_wqinfo.qa_acchdl,
2090 wqe_start) | 0x7FFFFFFF;
2091 } else {
2092 /* or just invalidate it */
2093 invalue = 0xFFFFFFFF;
2094 }
2095 ddi_put32(qp->qp_wqinfo.qa_acchdl, wqe_start, invalue);
2096 wqe_start += 16; /* move 64 bytes */
2097 }
2098 if (wqe_start == wqe_top) /* hit the end of the queue */
2099 wqe_start = wqe_base; /* wrap to start */
2100 }
2101 }
2102
2103 /*
2104 * hermon_wr_bind_check()
2105 * Context: Can be called from interrupt or base context.
2106 */
2107 /* ARGSUSED */
2108 static int
hermon_wr_bind_check(hermon_state_t * state,ibt_send_wr_t * wr)2109 hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr)
2110 {
2111 ibt_bind_flags_t bind_flags;
2112 uint64_t vaddr, len;
2113 uint64_t reg_start_addr, reg_end_addr;
2114 hermon_mwhdl_t mw;
2115 hermon_mrhdl_t mr;
2116 hermon_rsrc_t *mpt;
2117 uint32_t new_rkey;
2118
2119 /* Check for a valid Memory Window handle in the WR */
2120 mw = (hermon_mwhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mw_hdl;
2121 if (mw == NULL) {
2122 return (IBT_MW_HDL_INVALID);
2123 }
2124
2125 /* Check for a valid Memory Region handle in the WR */
2126 mr = (hermon_mrhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mr_hdl;
2127 if (mr == NULL) {
2128 return (IBT_MR_HDL_INVALID);
2129 }
2130
2131 mutex_enter(&mr->mr_lock);
2132 mutex_enter(&mw->mr_lock);
2133
2134 /*
2135 * Check here to see if the memory region has already been partially
2136 * deregistered as a result of a hermon_umap_umemlock_cb() callback.
2137 * If so, this is an error, return failure.
2138 */
2139 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
2140 mutex_exit(&mr->mr_lock);
2141 mutex_exit(&mw->mr_lock);
2142 return (IBT_MR_HDL_INVALID);
2143 }
2144
2145 /* Check for a valid Memory Window RKey (i.e. a matching RKey) */
2146 if (mw->mr_rkey != wr->wr.rc.rcwr.bind->bind_rkey) {
2147 mutex_exit(&mr->mr_lock);
2148 mutex_exit(&mw->mr_lock);
2149 return (IBT_MR_RKEY_INVALID);
2150 }
2151
2152 /* Check for a valid Memory Region LKey (i.e. a matching LKey) */
2153 if (mr->mr_lkey != wr->wr.rc.rcwr.bind->bind_lkey) {
2154 mutex_exit(&mr->mr_lock);
2155 mutex_exit(&mw->mr_lock);
2156 return (IBT_MR_LKEY_INVALID);
2157 }
2158
2159 /*
2160 * Now check for valid "vaddr" and "len". Note: We don't check the
2161 * "vaddr" range when "len == 0" (i.e. on unbind operations)
2162 */
2163 len = wr->wr.rc.rcwr.bind->bind_len;
2164 if (len != 0) {
2165 vaddr = wr->wr.rc.rcwr.bind->bind_va;
2166 reg_start_addr = mr->mr_bindinfo.bi_addr;
2167 reg_end_addr = mr->mr_bindinfo.bi_addr +
2168 (mr->mr_bindinfo.bi_len - 1);
2169 if ((vaddr < reg_start_addr) || (vaddr > reg_end_addr)) {
2170 mutex_exit(&mr->mr_lock);
2171 mutex_exit(&mw->mr_lock);
2172 return (IBT_MR_VA_INVALID);
2173 }
2174 vaddr = (vaddr + len) - 1;
2175 if (vaddr > reg_end_addr) {
2176 mutex_exit(&mr->mr_lock);
2177 mutex_exit(&mw->mr_lock);
2178 return (IBT_MR_LEN_INVALID);
2179 }
2180 }
2181
2182 /*
2183 * Validate the bind access flags. Remote Write and Atomic access for
2184 * the Memory Window require that Local Write access be set in the
2185 * corresponding Memory Region.
2186 */
2187 bind_flags = wr->wr.rc.rcwr.bind->bind_flags;
2188 if (((bind_flags & IBT_WR_BIND_WRITE) ||
2189 (bind_flags & IBT_WR_BIND_ATOMIC)) &&
2190 !(mr->mr_accflag & IBT_MR_LOCAL_WRITE)) {
2191 mutex_exit(&mr->mr_lock);
2192 mutex_exit(&mw->mr_lock);
2193 return (IBT_MR_ACCESS_REQ_INVALID);
2194 }
2195
2196 /* Calculate the new RKey for the Memory Window */
2197 mpt = mw->mr_mptrsrcp;
2198 new_rkey = hermon_mr_keycalc(mpt->hr_indx);
2199 new_rkey = hermon_mr_key_swap(new_rkey);
2200
2201 wr->wr.rc.rcwr.bind->bind_rkey_out = new_rkey;
2202 mw->mr_rkey = new_rkey;
2203
2204 mutex_exit(&mr->mr_lock);
2205 mutex_exit(&mw->mr_lock);
2206 return (DDI_SUCCESS);
2207 }
2208
2209
2210 /*
2211 * hermon_wrid_from_reset_handling()
2212 * Context: Can be called from interrupt or base context.
2213 */
2214 /* ARGSUSED */
2215 int
hermon_wrid_from_reset_handling(hermon_state_t * state,hermon_qphdl_t qp)2216 hermon_wrid_from_reset_handling(hermon_state_t *state, hermon_qphdl_t qp)
2217 {
2218 hermon_workq_hdr_t *swq, *rwq;
2219
2220 if (qp->qp_alloc_flags & IBT_QP_USER_MAP)
2221 return (DDI_SUCCESS);
2222
2223 #ifdef __lock_lint
2224 mutex_enter(&qp->qp_rq_cqhdl->cq_lock);
2225 mutex_enter(&qp->qp_sq_cqhdl->cq_lock);
2226 #else
2227 /* grab the cq lock(s) to modify the wqavl tree */
2228 if (qp->qp_rq_cqhdl)
2229 mutex_enter(&qp->qp_rq_cqhdl->cq_lock);
2230 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl &&
2231 qp->qp_sq_cqhdl != NULL)
2232 mutex_enter(&qp->qp_sq_cqhdl->cq_lock);
2233 #endif
2234
2235 /* Chain the newly allocated work queue header to the CQ's list */
2236 if (qp->qp_sq_cqhdl)
2237 hermon_cq_workq_add(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl);
2238
2239 swq = qp->qp_sq_wqhdr;
2240 swq->wq_head = 0;
2241 swq->wq_tail = 0;
2242 swq->wq_full = 0;
2243
2244 /*
2245 * Now we repeat all the above operations for the receive work queue,
2246 * or shared receive work queue.
2247 *
2248 * Note: We still use the 'qp_rq_cqhdl' even in the SRQ case.
2249 */
2250
2251 #ifdef __lock_lint
2252 mutex_enter(&qp->qp_srqhdl->srq_lock);
2253 #else
2254 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) {
2255 mutex_enter(&qp->qp_srqhdl->srq_lock);
2256 } else {
2257 rwq = qp->qp_rq_wqhdr;
2258 rwq->wq_head = 0;
2259 rwq->wq_tail = 0;
2260 rwq->wq_full = 0;
2261 qp->qp_rq_wqecntr = 0;
2262 }
2263 #endif
2264 hermon_cq_workq_add(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl);
2265
2266 #ifdef __lock_lint
2267 mutex_exit(&qp->qp_srqhdl->srq_lock);
2268 #else
2269 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) {
2270 mutex_exit(&qp->qp_srqhdl->srq_lock);
2271 }
2272 #endif
2273
2274 #ifdef __lock_lint
2275 mutex_exit(&qp->qp_sq_cqhdl->cq_lock);
2276 mutex_exit(&qp->qp_rq_cqhdl->cq_lock);
2277 #else
2278 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl &&
2279 qp->qp_sq_cqhdl != NULL)
2280 mutex_exit(&qp->qp_sq_cqhdl->cq_lock);
2281 if (qp->qp_rq_cqhdl)
2282 mutex_exit(&qp->qp_rq_cqhdl->cq_lock);
2283 #endif
2284 return (DDI_SUCCESS);
2285 }
2286
2287
2288 /*
2289 * hermon_wrid_to_reset_handling()
2290 * Context: Can be called from interrupt or base context.
2291 */
2292 int
hermon_wrid_to_reset_handling(hermon_state_t * state,hermon_qphdl_t qp)2293 hermon_wrid_to_reset_handling(hermon_state_t *state, hermon_qphdl_t qp)
2294 {
2295 if (qp->qp_alloc_flags & IBT_QP_USER_MAP)
2296 return (DDI_SUCCESS);
2297
2298 /*
2299 * If there are unpolled entries in these CQs, they are
2300 * polled/flushed.
2301 * Grab the CQ lock(s) before manipulating the lists.
2302 */
2303 #ifdef __lock_lint
2304 mutex_enter(&qp->qp_rq_cqhdl->cq_lock);
2305 mutex_enter(&qp->qp_sq_cqhdl->cq_lock);
2306 #else
2307 /* grab the cq lock(s) to modify the wqavl tree */
2308 if (qp->qp_rq_cqhdl)
2309 mutex_enter(&qp->qp_rq_cqhdl->cq_lock);
2310 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl &&
2311 qp->qp_sq_cqhdl != NULL)
2312 mutex_enter(&qp->qp_sq_cqhdl->cq_lock);
2313 #endif
2314
2315 #ifdef __lock_lint
2316 mutex_enter(&qp->qp_srqhdl->srq_lock);
2317 #else
2318 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) {
2319 mutex_enter(&qp->qp_srqhdl->srq_lock);
2320 }
2321 #endif
2322 /*
2323 * Flush the entries on the CQ for this QP's QPN.
2324 */
2325 hermon_cq_entries_flush(state, qp);
2326
2327 #ifdef __lock_lint
2328 mutex_exit(&qp->qp_srqhdl->srq_lock);
2329 #else
2330 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) {
2331 mutex_exit(&qp->qp_srqhdl->srq_lock);
2332 }
2333 #endif
2334
2335 hermon_cq_workq_remove(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl);
2336 if (qp->qp_sq_cqhdl != NULL)
2337 hermon_cq_workq_remove(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl);
2338
2339 #ifdef __lock_lint
2340 mutex_exit(&qp->qp_sq_cqhdl->cq_lock);
2341 mutex_exit(&qp->qp_rq_cqhdl->cq_lock);
2342 #else
2343 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl &&
2344 qp->qp_sq_cqhdl != NULL)
2345 mutex_exit(&qp->qp_sq_cqhdl->cq_lock);
2346 if (qp->qp_rq_cqhdl)
2347 mutex_exit(&qp->qp_rq_cqhdl->cq_lock);
2348 #endif
2349
2350 return (IBT_SUCCESS);
2351 }
2352
2353
2354 /*
2355 * hermon_wrid_get_entry()
2356 * Context: Can be called from interrupt or base context.
2357 */
2358 uint64_t
hermon_wrid_get_entry(hermon_cqhdl_t cq,hermon_hw_cqe_t * cqe)2359 hermon_wrid_get_entry(hermon_cqhdl_t cq, hermon_hw_cqe_t *cqe)
2360 {
2361 hermon_workq_avl_t *wqa;
2362 hermon_workq_hdr_t *wq;
2363 uint64_t wrid;
2364 uint_t send_or_recv, qpnum;
2365 uint32_t indx;
2366
2367 /*
2368 * Determine whether this CQE is a send or receive completion.
2369 */
2370 send_or_recv = HERMON_CQE_SENDRECV_GET(cq, cqe);
2371
2372 /* Find the work queue for this QP number (send or receive side) */
2373 qpnum = HERMON_CQE_QPNUM_GET(cq, cqe);
2374 wqa = hermon_wrid_wqavl_find(cq, qpnum, send_or_recv);
2375 wq = wqa->wqa_wq;
2376
2377 /*
2378 * Regardless of whether the completion is the result of a "success"
2379 * or a "failure", we lock the list of "containers" and attempt to
2380 * search for the the first matching completion (i.e. the first WR
2381 * with a matching WQE addr and size). Once we find it, we pull out
2382 * the "wrid" field and return it (see below). XXX Note: One possible
2383 * future enhancement would be to enable this routine to skip over
2384 * any "unsignaled" completions to go directly to the next "signaled"
2385 * entry on success.
2386 */
2387 indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) & wq->wq_mask;
2388 wrid = wq->wq_wrid[indx];
2389 if (wqa->wqa_srq_en) {
2390 struct hermon_sw_srq_s *srq;
2391 uint64_t *desc;
2392
2393 /* put wqe back on the srq free list */
2394 srq = wqa->wqa_srq;
2395 mutex_enter(&srq->srq_lock);
2396 desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail);
2397 ((uint16_t *)desc)[1] = htons(indx);
2398 wq->wq_tail = indx;
2399 mutex_exit(&srq->srq_lock);
2400 } else {
2401 wq->wq_head = (indx + 1) & wq->wq_mask;
2402 wq->wq_full = 0;
2403 }
2404
2405 return (wrid);
2406 }
2407
2408
2409 int
hermon_wrid_workq_compare(const void * p1,const void * p2)2410 hermon_wrid_workq_compare(const void *p1, const void *p2)
2411 {
2412 hermon_workq_compare_t *cmpp;
2413 hermon_workq_avl_t *curr;
2414
2415 cmpp = (hermon_workq_compare_t *)p1;
2416 curr = (hermon_workq_avl_t *)p2;
2417
2418 if (cmpp->cmp_qpn < curr->wqa_qpn)
2419 return (-1);
2420 else if (cmpp->cmp_qpn > curr->wqa_qpn)
2421 return (+1);
2422 else if (cmpp->cmp_type < curr->wqa_type)
2423 return (-1);
2424 else if (cmpp->cmp_type > curr->wqa_type)
2425 return (+1);
2426 else
2427 return (0);
2428 }
2429
2430
2431 /*
2432 * hermon_wrid_workq_find()
2433 * Context: Can be called from interrupt or base context.
2434 */
2435 static hermon_workq_avl_t *
hermon_wrid_wqavl_find(hermon_cqhdl_t cq,uint_t qpn,uint_t wq_type)2436 hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn, uint_t wq_type)
2437 {
2438 hermon_workq_avl_t *curr;
2439 hermon_workq_compare_t cmp;
2440
2441 /*
2442 * Walk the CQ's work queue list, trying to find a send or recv queue
2443 * with the same QP number. We do this even if we are going to later
2444 * create a new entry because it helps us easily find the end of the
2445 * list.
2446 */
2447 cmp.cmp_qpn = qpn;
2448 cmp.cmp_type = wq_type;
2449 #ifdef __lock_lint
2450 hermon_wrid_workq_compare(NULL, NULL);
2451 #endif
2452 curr = avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, NULL);
2453
2454 return (curr);
2455 }
2456
2457
2458 /*
2459 * hermon_wrid_wqhdr_create()
2460 * Context: Can be called from base context.
2461 */
2462 /* ARGSUSED */
2463 hermon_workq_hdr_t *
hermon_wrid_wqhdr_create(int bufsz)2464 hermon_wrid_wqhdr_create(int bufsz)
2465 {
2466 hermon_workq_hdr_t *wqhdr;
2467
2468 /*
2469 * Allocate space for the wqhdr, and an array to record all the wrids.
2470 */
2471 wqhdr = (hermon_workq_hdr_t *)kmem_zalloc(sizeof (*wqhdr), KM_NOSLEEP);
2472 if (wqhdr == NULL) {
2473 return (NULL);
2474 }
2475 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wqhdr))
2476 wqhdr->wq_wrid = kmem_zalloc(bufsz * sizeof (uint64_t), KM_NOSLEEP);
2477 if (wqhdr->wq_wrid == NULL) {
2478 kmem_free(wqhdr, sizeof (*wqhdr));
2479 return (NULL);
2480 }
2481 wqhdr->wq_size = bufsz;
2482 wqhdr->wq_mask = bufsz - 1;
2483
2484 return (wqhdr);
2485 }
2486
2487 void
hermon_wrid_wqhdr_destroy(hermon_workq_hdr_t * wqhdr)2488 hermon_wrid_wqhdr_destroy(hermon_workq_hdr_t *wqhdr)
2489 {
2490 kmem_free(wqhdr->wq_wrid, wqhdr->wq_size * sizeof (uint64_t));
2491 kmem_free(wqhdr, sizeof (*wqhdr));
2492 }
2493
2494
2495 /*
2496 * hermon_cq_workq_add()
2497 * Context: Can be called from interrupt or base context.
2498 */
2499 static void
hermon_cq_workq_add(hermon_cqhdl_t cq,hermon_workq_avl_t * wqavl)2500 hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl)
2501 {
2502 hermon_workq_compare_t cmp;
2503 avl_index_t where;
2504
2505 cmp.cmp_qpn = wqavl->wqa_qpn;
2506 cmp.cmp_type = wqavl->wqa_type;
2507 #ifdef __lock_lint
2508 hermon_wrid_workq_compare(NULL, NULL);
2509 #endif
2510 (void) avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, &where);
2511 avl_insert(&cq->cq_wrid_wqhdr_avl_tree, wqavl, where);
2512 }
2513
2514
2515 /*
2516 * hermon_cq_workq_remove()
2517 * Context: Can be called from interrupt or base context.
2518 */
2519 static void
hermon_cq_workq_remove(hermon_cqhdl_t cq,hermon_workq_avl_t * wqavl)2520 hermon_cq_workq_remove(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl)
2521 {
2522 #ifdef __lock_lint
2523 hermon_wrid_workq_compare(NULL, NULL);
2524 #endif
2525 avl_remove(&cq->cq_wrid_wqhdr_avl_tree, wqavl);
2526 }
2527