1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * hermon_wr.c 28 * Hermon Work Request Processing Routines 29 * 30 * Implements all the routines necessary to provide the PostSend(), 31 * PostRecv() and PostSRQ() verbs. Also contains all the code 32 * necessary to implement the Hermon WRID tracking mechanism. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/conf.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/modctl.h> 40 #include <sys/avl.h> 41 42 #include <sys/ib/adapters/hermon/hermon.h> 43 44 static uint32_t hermon_wr_get_immediate(ibt_send_wr_t *wr); 45 static int hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr); 46 static int hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp, 47 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size); 48 static int hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp, 49 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size); 50 static void hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp); 51 static int hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp, 52 ibt_recv_wr_t *wr, uint64_t *desc); 53 static int hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq, 54 ibt_recv_wr_t *wr, uint64_t *desc); 55 static hermon_workq_avl_t *hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn, 56 uint_t send_or_recv); 57 static void hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl); 58 static void hermon_cq_workq_remove(hermon_cqhdl_t cq, 59 hermon_workq_avl_t *wqavl); 60 61 static ibt_wr_ds_t null_sgl = { 0, 0x00000100, 0 }; 62 63 /* 64 * Add ability to try to debug RDMA_READ/RDMA_WRITE failures. 65 * 66 * 0x1 - print rkey used during post_send 67 * 0x2 - print sgls used during post_send 68 * 0x4 - print FMR comings and goings 69 */ 70 int hermon_rdma_debug = 0x0; 71 72 static int 73 hermon_post_send_ud(hermon_state_t *state, hermon_qphdl_t qp, 74 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted) 75 { 76 hermon_hw_snd_wqe_ud_t *ud; 77 hermon_workq_hdr_t *wq; 78 hermon_ahhdl_t ah; 79 ibt_wr_rfci_send_t *rfci; 80 ibt_wr_init_send_t *is; 81 ibt_ud_dest_t *dest; 82 uint64_t *desc; 83 uint32_t desc_sz; 84 uint32_t signaled_dbd, solicited; 85 uint32_t head, tail, next_tail, qsize_msk; 86 uint32_t hdrmwqes; 87 uint32_t nopcode, fence, immed_data = 0; 88 hermon_hw_wqe_sgl_t *ds, *old_ds; 89 ibt_wr_ds_t *sgl; 90 int nds; 91 int i, j, last_ds, num_ds, status; 92 uint32_t *wqe_start; 93 int sectperwqe; 94 uint_t posted_cnt = 0; 95 int total_len, strong_order, fc_bits, cksum; 96 97 98 /* initialize the FMA retry loop */ 99 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num); 100 101 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 102 _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&qp->qp_sq_lock)) 103 104 /* Grab the lock for the WRID list */ 105 membar_consumer(); 106 107 /* Save away some initial QP state */ 108 wq = qp->qp_sq_wqhdr; 109 qsize_msk = wq->wq_mask; 110 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */ 111 sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2); 112 113 tail = wq->wq_tail; 114 head = wq->wq_head; 115 status = DDI_SUCCESS; 116 117 post_next: 118 /* 119 * Check for "queue full" condition. If the queue 120 * is already full, then no more WQEs can be posted. 121 * So break out, ring a doorbell (if necessary) and 122 * return an error 123 */ 124 if (wq->wq_full != 0) { 125 status = IBT_QP_FULL; 126 goto done; 127 } 128 129 next_tail = (tail + 1) & qsize_msk; 130 if (((tail + hdrmwqes) & qsize_msk) == head) { 131 wq->wq_full = 1; 132 } 133 134 desc = HERMON_QP_SQ_ENTRY(qp, tail); 135 136 nds = wr->wr_nds; 137 sgl = wr->wr_sgl; 138 num_ds = 0; 139 strong_order = 0; 140 fc_bits = 0; 141 cksum = 0; 142 143 /* 144 * Build a Send or Send_LSO WQE 145 */ 146 switch (wr->wr_opcode) { 147 case IBT_WRC_SEND_LSO: 148 if (wr->wr_trans != IBT_UD_SRV) { 149 status = IBT_QP_SRV_TYPE_INVALID; 150 goto done; 151 } 152 nopcode = HERMON_WQE_SEND_NOPCODE_LSO; 153 if (wr->wr_flags & IBT_WR_SEND_CKSUM) 154 cksum = 0x30; 155 if (wr->wr.ud_lso.lso_hdr_sz > 60) { 156 nopcode |= (1 << 6); /* ReRead bit must be set */ 157 } 158 dest = wr->wr.ud_lso.lso_ud_dest; 159 ah = (hermon_ahhdl_t)dest->ud_ah; 160 if (ah == NULL) { 161 status = IBT_AH_HDL_INVALID; 162 goto done; 163 } 164 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + 165 sizeof (hermon_hw_snd_wqe_ctrl_t)); 166 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + 167 sizeof (hermon_hw_snd_wqe_ud_t)); 168 HERMON_WQE_BUILD_UD(qp, ud, ah, dest); 169 170 total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf; 171 if ((uintptr_t)ds + total_len + (nds * 16) > 172 (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz)) { 173 status = IBT_QP_SGL_LEN_INVALID; 174 goto done; 175 } 176 old_ds = ds; 177 bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)old_ds + 1, 178 wr->wr.ud_lso.lso_hdr_sz); 179 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len); 180 i = 0; 181 break; 182 183 case IBT_WRC_SEND: 184 nopcode = HERMON_WQE_SEND_NOPCODE_SEND; 185 if (qp->qp_serv_type == HERMON_QP_UD) { 186 if (wr->wr_trans != IBT_UD_SRV) { 187 status = IBT_QP_SRV_TYPE_INVALID; 188 goto done; 189 } 190 if (wr->wr_flags & IBT_WR_SEND_CKSUM) 191 cksum = 0x30; 192 dest = wr->wr.ud.udwr_dest; 193 } else if (qp->qp_serv_type == HERMON_QP_RFCI) { 194 if (wr->wr_trans != IBT_RFCI_SRV) { 195 status = IBT_QP_SRV_TYPE_INVALID; 196 goto done; 197 } 198 rfci = &wr->wr.fc.rfci_send; 199 if ((wr->wr_flags & IBT_WR_SEND_FC_CRC) != 0) { 200 nopcode |= (rfci->rfci_eof << 16); 201 fc_bits = 0x40; /* set FCRC */ 202 } 203 dest = rfci->rfci_dest; 204 } else { 205 status = IBT_QP_OP_TYPE_INVALID; 206 goto done; 207 } 208 if (wr->wr_flags & IBT_WR_SEND_IMMED) { 209 /* "|=" changes 0xa to 0xb without touching FCEOF */ 210 nopcode |= HERMON_WQE_SEND_NOPCODE_SENDI; 211 immed_data = wr->wr.ud.udwr_immed; 212 } 213 ah = (hermon_ahhdl_t)dest->ud_ah; 214 if (ah == NULL) { 215 status = IBT_AH_HDL_INVALID; 216 goto done; 217 } 218 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + 219 sizeof (hermon_hw_snd_wqe_ctrl_t)); 220 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + 221 sizeof (hermon_hw_snd_wqe_ud_t)); 222 HERMON_WQE_BUILD_UD(qp, ud, ah, dest); 223 i = 0; 224 break; 225 226 case IBT_WRC_INIT_SEND_FCMD: 227 if (qp->qp_serv_type != HERMON_QP_FCMND) { 228 status = IBT_QP_OP_TYPE_INVALID; 229 goto done; 230 } 231 if (wr->wr_trans != IBT_FCMD_SRV) { 232 status = IBT_QP_SRV_TYPE_INVALID; 233 goto done; 234 } 235 nopcode = HERMON_WQE_FCP_OPCODE_INIT_AND_SEND; 236 is = wr->wr.fc.fc_is; 237 dest = is->is_ctl.fc_dest; 238 ah = (hermon_ahhdl_t)dest->ud_ah; 239 if (ah == NULL) { 240 status = IBT_AH_HDL_INVALID; 241 goto done; 242 } 243 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + 244 sizeof (hermon_hw_snd_wqe_ctrl_t)); 245 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + 246 sizeof (hermon_hw_snd_wqe_ud_t)); 247 HERMON_WQE_BUILD_UD(qp, ud, ah, dest); 248 old_ds = ds; 249 /* move ds beyond the FCP-3 Init Segment */ 250 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + 0x10); 251 i = 0; 252 break; 253 254 case IBT_WRC_FAST_REG_PMR: 255 { 256 hermon_hw_snd_wqe_frwr_t *frwr; 257 258 if (qp->qp_serv_type != HERMON_QP_FCMND) { 259 status = IBT_QP_OP_TYPE_INVALID; 260 goto done; 261 } 262 if (wr->wr_trans != IBT_FCMD_SRV) { 263 status = IBT_QP_SRV_TYPE_INVALID; 264 goto done; 265 } 266 nopcode = HERMON_WQE_SEND_NOPCODE_FRWR; 267 frwr = (hermon_hw_snd_wqe_frwr_t *)((uintptr_t)desc + 268 sizeof (hermon_hw_snd_wqe_ctrl_t)); 269 HERMON_WQE_BUILD_FRWR(qp, frwr, wr->wr.fc.reg_pmr); 270 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)frwr + 271 sizeof (hermon_hw_snd_wqe_frwr_t)); 272 nds = 0; 273 strong_order = 0x80; 274 break; 275 } 276 277 #if 0 278 /* firmware does not support this */ 279 case IBT_WRC_LOCAL_INVALIDATE: 280 { 281 hermon_hw_snd_wqe_local_inv_t *li; 282 283 if (qp->qp_serv_type != HERMON_QP_FCMND) { 284 status = IBT_QP_OP_TYPE_INVALID; 285 goto done; 286 } 287 if (wr->wr_trans != IBT_FCMD_SRV) { 288 status = IBT_QP_SRV_TYPE_INVALID; 289 goto done; 290 } 291 nopcode = HERMON_WQE_SEND_NOPCODE_LCL_INV; 292 li = (hermon_hw_snd_wqe_local_inv_t *)((uintptr_t)desc + 293 sizeof (hermon_hw_snd_wqe_ctrl_t)); 294 HERMON_WQE_BUILD_LI(qp, li, wr->wr.fc.li); 295 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)li + 296 sizeof (hermon_hw_snd_wqe_local_inv_t)); 297 nds = 0; 298 strong_order = 0x80; 299 break; 300 } 301 #endif 302 default: 303 status = IBT_QP_OP_TYPE_INVALID; 304 goto done; 305 } 306 307 if (nds > qp->qp_sq_sgl) { 308 status = IBT_QP_SGL_LEN_INVALID; 309 goto done; 310 } 311 for (last_ds = num_ds, j = i; j < nds; j++) { 312 if (sgl[j].ds_len != 0) 313 last_ds++; /* real last ds of wqe to fill */ 314 } 315 desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4; 316 for (j = nds; --j >= i; ) { 317 if (sgl[j].ds_len == 0) { 318 continue; 319 } 320 321 /* 322 * Fill in the Data Segment(s) for the current WQE, using the 323 * information contained in the scatter-gather list of the 324 * work request. 325 */ 326 last_ds--; 327 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]); 328 } 329 330 membar_producer(); 331 332 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 333 HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss, 334 wr->wr.ud_lso.lso_hdr_sz); 335 } else if (wr->wr_opcode == IBT_WRC_INIT_SEND_FCMD) { 336 /* This sits in the STAMP, so must be set after setting SGL */ 337 HERMON_WQE_BUILD_FCP3_INIT(old_ds, is->is_ctl.fc_frame_ctrl, 338 is->is_cs_priority, is->is_tx_seq_id, is->is_fc_mtu, 339 is->is_dest_id, is->is_op, is->is_rem_exch, 340 is->is_exch_qp_idx); 341 342 /* The following will be used in HERMON_WQE_SET_CTRL_SEGMENT */ 343 /* SIT bit in FCP-3 ctrl segment */ 344 desc_sz |= (is->is_ctl.fc_frame_ctrl & IBT_FCTL_SIT) ? 0x80 : 0; 345 /* LS bit in FCP-3 ctrl segment */ 346 fc_bits |= (is->is_ctl.fc_frame_ctrl & IBT_FCTL_LAST_SEQ) ? 347 0x10000 : 0; 348 fc_bits |= ((is->is_ctl.fc_routing_ctrl & 0xF) << 20) | 349 (is->is_ctl.fc_seq_id << 24); 350 immed_data = is->is_ctl.fc_parameter; 351 } 352 353 fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; 354 355 signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || 356 (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 0xC : 0; 357 358 solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 0x2 : 0; 359 360 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data, 361 solicited, signaled_dbd, cksum, qp, strong_order, fc_bits); 362 363 wq->wq_wrid[tail] = wr->wr_id; 364 365 tail = next_tail; 366 367 /* Update some of the state in the QP */ 368 wq->wq_tail = tail; 369 370 membar_producer(); 371 372 /* Now set the ownership bit and opcode (first dword). */ 373 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode); 374 375 posted_cnt++; 376 if (--num_wr > 0) { 377 /* do the invalidate of the headroom */ 378 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 379 (tail + hdrmwqes) & qsize_msk); 380 for (i = 16; i < sectperwqe; i += 16) { 381 wqe_start[i] = 0xFFFFFFFF; 382 } 383 384 wr++; 385 goto post_next; 386 } 387 done: 388 if (posted_cnt != 0) { 389 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 390 391 membar_producer(); 392 393 /* the FMA retry loop starts for Hermon doorbell register. */ 394 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, 395 fm_status, fm_test_num); 396 397 HERMON_UAR_DOORBELL(state, uarhdl, 398 (uint64_t *)(void *)&state->hs_uar->send, 399 (uint64_t)qp->qp_ring); 400 401 /* the FMA retry loop ends. */ 402 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, 403 fm_status, fm_test_num); 404 405 /* do the invalidate of the headroom */ 406 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 407 (tail + hdrmwqes) & qsize_msk); 408 for (i = 16; i < sectperwqe; i += 16) { 409 wqe_start[i] = 0xFFFFFFFF; 410 } 411 } 412 if (num_posted != NULL) 413 *num_posted = posted_cnt; 414 415 mutex_exit(&qp->qp_sq_lock); 416 417 return (status); 418 419 pio_error: 420 mutex_exit(&qp->qp_sq_lock); 421 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 422 return (ibc_get_ci_failure(0)); 423 } 424 425 static int 426 hermon_post_send_rc(hermon_state_t *state, hermon_qphdl_t qp, 427 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted) 428 { 429 uint64_t *desc; 430 hermon_workq_hdr_t *wq; 431 uint32_t desc_sz; 432 uint32_t signaled_dbd, solicited; 433 uint32_t head, tail, next_tail, qsize_msk; 434 uint32_t hdrmwqes; 435 int status; 436 uint32_t nopcode, fence, immed_data = 0; 437 hermon_hw_snd_wqe_remaddr_t *rc; 438 hermon_hw_snd_wqe_atomic_t *at; 439 hermon_hw_snd_wqe_bind_t *bn; 440 hermon_hw_snd_wqe_frwr_t *frwr; 441 hermon_hw_snd_wqe_local_inv_t *li; 442 hermon_hw_wqe_sgl_t *ds; 443 ibt_wr_ds_t *sgl; 444 int nds; 445 int i, last_ds, num_ds; 446 uint32_t *wqe_start; 447 int sectperwqe; 448 uint_t posted_cnt = 0; 449 int strong_order; 450 int print_rdma; 451 int rlen; 452 uint32_t rkey; 453 uint64_t raddr; 454 455 /* initialize the FMA retry loop */ 456 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num); 457 458 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 459 _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&qp->qp_sq_lock)) 460 461 /* Save away some initial QP state */ 462 wq = qp->qp_sq_wqhdr; 463 qsize_msk = wq->wq_mask; 464 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */ 465 sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2); 466 467 tail = wq->wq_tail; 468 head = wq->wq_head; 469 status = DDI_SUCCESS; 470 471 post_next: 472 print_rdma = 0; 473 rlen = 0; 474 strong_order = 0; 475 476 /* 477 * Check for "queue full" condition. If the queue 478 * is already full, then no more WQEs can be posted. 479 * So break out, ring a doorbell (if necessary) and 480 * return an error 481 */ 482 if (wq->wq_full != 0) { 483 status = IBT_QP_FULL; 484 goto done; 485 } 486 next_tail = (tail + 1) & qsize_msk; 487 if (((tail + hdrmwqes) & qsize_msk) == head) { 488 wq->wq_full = 1; 489 } 490 491 desc = HERMON_QP_SQ_ENTRY(qp, tail); 492 493 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 494 sizeof (hermon_hw_snd_wqe_ctrl_t)); 495 nds = wr->wr_nds; 496 sgl = wr->wr_sgl; 497 num_ds = 0; 498 if (wr->wr_trans != IBT_RC_SRV) { 499 status = IBT_QP_SRV_TYPE_INVALID; 500 goto done; 501 } 502 503 /* 504 * Validate the operation type. For RC requests, we allow 505 * "Send", "RDMA Read", "RDMA Write", various "Atomic" 506 * operations, and memory window "Bind" 507 */ 508 switch (wr->wr_opcode) { 509 default: 510 status = IBT_QP_OP_TYPE_INVALID; 511 goto done; 512 513 case IBT_WRC_SEND: 514 if (wr->wr_flags & IBT_WR_SEND_REMOTE_INVAL) { 515 nopcode = HERMON_WQE_SEND_NOPCODE_SND_INV; 516 immed_data = wr->wr.rc.rcwr.send_inval; 517 } else if (wr->wr_flags & IBT_WR_SEND_IMMED) { 518 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI; 519 immed_data = wr->wr.rc.rcwr.send_immed; 520 } else { 521 nopcode = HERMON_WQE_SEND_NOPCODE_SEND; 522 } 523 break; 524 525 /* 526 * If this is an RDMA Read or RDMA Write request, then fill 527 * in the "Remote Address" header fields. 528 */ 529 case IBT_WRC_RDMAW: 530 if (wr->wr_flags & IBT_WR_SEND_IMMED) { 531 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAWI; 532 immed_data = wr->wr.rc.rcwr.rdma.rdma_immed; 533 } else { 534 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW; 535 } 536 /* FALLTHROUGH */ 537 case IBT_WRC_RDMAR: 538 if (wr->wr_opcode == IBT_WRC_RDMAR) 539 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR; 540 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 541 sizeof (hermon_hw_snd_wqe_ctrl_t)); 542 543 /* 544 * Build the Remote Address Segment for the WQE, using 545 * the information from the RC work request. 546 */ 547 HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma); 548 549 if (hermon_rdma_debug) { 550 print_rdma = hermon_rdma_debug; 551 rkey = wr->wr.rc.rcwr.rdma.rdma_rkey; 552 raddr = wr->wr.rc.rcwr.rdma.rdma_raddr; 553 } 554 555 /* Update "ds" for filling in Data Segments (below) */ 556 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc + 557 sizeof (hermon_hw_snd_wqe_remaddr_t)); 558 break; 559 560 /* 561 * If this is one of the Atomic type operations (i.e 562 * Compare-Swap or Fetch-Add), then fill in both the "Remote 563 * Address" header fields and the "Atomic" header fields. 564 */ 565 case IBT_WRC_CSWAP: 566 nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS; 567 /* FALLTHROUGH */ 568 case IBT_WRC_FADD: 569 if (wr->wr_opcode == IBT_WRC_FADD) 570 nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA; 571 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 572 sizeof (hermon_hw_snd_wqe_ctrl_t)); 573 at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc + 574 sizeof (hermon_hw_snd_wqe_remaddr_t)); 575 576 /* 577 * Build the Remote Address and Atomic Segments for 578 * the WQE, using the information from the RC Atomic 579 * work request. 580 */ 581 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr); 582 HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic); 583 584 /* Update "ds" for filling in Data Segments (below) */ 585 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at + 586 sizeof (hermon_hw_snd_wqe_atomic_t)); 587 588 /* 589 * Update "nds" and "sgl" because Atomic requests have 590 * only a single Data Segment. 591 */ 592 nds = 1; 593 sgl = wr->wr_sgl; 594 break; 595 596 /* 597 * If this is memory window Bind operation, then we call the 598 * hermon_wr_bind_check() routine to validate the request and 599 * to generate the updated RKey. If this is successful, then 600 * we fill in the WQE's "Bind" header fields. 601 */ 602 case IBT_WRC_BIND: 603 nopcode = HERMON_WQE_SEND_NOPCODE_BIND; 604 status = hermon_wr_bind_check(state, wr); 605 if (status != DDI_SUCCESS) 606 goto done; 607 608 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc + 609 sizeof (hermon_hw_snd_wqe_ctrl_t)); 610 611 /* 612 * Build the Bind Memory Window Segments for the WQE, 613 * using the information from the RC Bind memory 614 * window work request. 615 */ 616 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind); 617 618 /* 619 * Update the "ds" pointer. Even though the "bind" 620 * operation requires no SGLs, this is necessary to 621 * facilitate the correct descriptor size calculations 622 * (below). 623 */ 624 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn + 625 sizeof (hermon_hw_snd_wqe_bind_t)); 626 nds = 0; 627 break; 628 629 case IBT_WRC_FAST_REG_PMR: 630 nopcode = HERMON_WQE_SEND_NOPCODE_FRWR; 631 frwr = (hermon_hw_snd_wqe_frwr_t *)((uintptr_t)desc + 632 sizeof (hermon_hw_snd_wqe_ctrl_t)); 633 HERMON_WQE_BUILD_FRWR(qp, frwr, wr->wr.rc.rcwr.reg_pmr); 634 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)frwr + 635 sizeof (hermon_hw_snd_wqe_frwr_t)); 636 nds = 0; 637 strong_order = 0x80; 638 break; 639 640 case IBT_WRC_LOCAL_INVALIDATE: 641 nopcode = HERMON_WQE_SEND_NOPCODE_LCL_INV; 642 li = (hermon_hw_snd_wqe_local_inv_t *)((uintptr_t)desc + 643 sizeof (hermon_hw_snd_wqe_ctrl_t)); 644 HERMON_WQE_BUILD_LI(qp, li, wr->wr.rc.rcwr.li); 645 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)li + 646 sizeof (hermon_hw_snd_wqe_local_inv_t)); 647 nds = 0; 648 strong_order = 0x80; 649 break; 650 } 651 652 /* 653 * Now fill in the Data Segments (SGL) for the Send WQE based 654 * on the values setup above (i.e. "sgl", "nds", and the "ds" 655 * pointer. Start by checking for a valid number of SGL entries 656 */ 657 if (nds > qp->qp_sq_sgl) { 658 status = IBT_QP_SGL_LEN_INVALID; 659 goto done; 660 } 661 662 for (last_ds = num_ds, i = 0; i < nds; i++) { 663 if (sgl[i].ds_len != 0) 664 last_ds++; /* real last ds of wqe to fill */ 665 } 666 desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4; 667 for (i = nds; --i >= 0; ) { 668 if (sgl[i].ds_len == 0) { 669 continue; 670 } 671 rlen += sgl[i].ds_len; 672 if (print_rdma & 0x2) 673 IBTF_DPRINTF_L2("rdma", "post: [%d]: laddr %llx " 674 "llen %x", i, sgl[i].ds_va, sgl[i].ds_len); 675 676 /* 677 * Fill in the Data Segment(s) for the current WQE, using the 678 * information contained in the scatter-gather list of the 679 * work request. 680 */ 681 last_ds--; 682 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[i]); 683 } 684 /* ensure RDMA READ does not exceed HCA limit */ 685 if ((wr->wr_opcode == IBT_WRC_RDMAR) && (desc_sz > 686 state->hs_ibtfinfo.hca_attr->hca_conn_rdma_read_sgl_sz + 2)) { 687 status = IBT_QP_SGL_LEN_INVALID; 688 goto done; 689 } 690 691 if (print_rdma & 0x1) { 692 IBTF_DPRINTF_L2("rdma", "post: indx %x rkey %x raddr %llx " 693 "total len %x", tail, rkey, raddr, rlen); 694 } 695 696 fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; 697 698 signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || 699 (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 0xC : 0; 700 701 solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 0x2 : 0; 702 703 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data, solicited, 704 signaled_dbd, 0, qp, strong_order, 0); 705 706 wq->wq_wrid[tail] = wr->wr_id; 707 708 tail = next_tail; 709 710 /* Update some of the state in the QP */ 711 wq->wq_tail = tail; 712 713 membar_producer(); 714 715 /* Now set the ownership bit of the first one in the chain. */ 716 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode); 717 718 posted_cnt++; 719 if (--num_wr > 0) { 720 /* do the invalidate of the headroom */ 721 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 722 (tail + hdrmwqes) & qsize_msk); 723 for (i = 16; i < sectperwqe; i += 16) { 724 wqe_start[i] = 0xFFFFFFFF; 725 } 726 727 wr++; 728 goto post_next; 729 } 730 done: 731 732 if (posted_cnt != 0) { 733 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 734 735 membar_producer(); 736 737 /* the FMA retry loop starts for Hermon doorbell register. */ 738 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, 739 fm_status, fm_test_num); 740 741 /* Ring the doorbell */ 742 HERMON_UAR_DOORBELL(state, uarhdl, 743 (uint64_t *)(void *)&state->hs_uar->send, 744 (uint64_t)qp->qp_ring); 745 746 /* the FMA retry loop ends. */ 747 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, 748 fm_status, fm_test_num); 749 750 /* do the invalidate of the headroom */ 751 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 752 (tail + hdrmwqes) & qsize_msk); 753 for (i = 16; i < sectperwqe; i += 16) { 754 wqe_start[i] = 0xFFFFFFFF; 755 } 756 } 757 /* 758 * Update the "num_posted" return value (if necessary). 759 * Then drop the locks and return success. 760 */ 761 if (num_posted != NULL) { 762 *num_posted = posted_cnt; 763 } 764 765 mutex_exit(&qp->qp_sq_lock); 766 return (status); 767 768 pio_error: 769 mutex_exit(&qp->qp_sq_lock); 770 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 771 return (ibc_get_ci_failure(0)); 772 } 773 774 /* 775 * hermon_post_send() 776 * Context: Can be called from interrupt or base context. 777 */ 778 int 779 hermon_post_send(hermon_state_t *state, hermon_qphdl_t qp, 780 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted) 781 { 782 ibt_send_wr_t *curr_wr; 783 hermon_workq_hdr_t *wq; 784 hermon_ahhdl_t ah; 785 uint64_t *desc, *prev; 786 uint32_t desc_sz; 787 uint32_t signaled_dbd, solicited; 788 uint32_t head, tail, next_tail, qsize_msk; 789 uint32_t hdrmwqes; 790 uint_t currindx, wrindx, numremain; 791 uint_t chainlen; 792 uint_t posted_cnt, maxstat; 793 uint_t total_posted; 794 int status; 795 uint32_t nopcode, fence, immed_data = 0; 796 uint32_t prev_nopcode; 797 uint_t qp_state; 798 799 /* initialize the FMA retry loop */ 800 hermon_pio_init(fm_loop_cnt, fm_status, fm_test); 801 802 /* 803 * Check for user-mappable QP memory. Note: We do not allow kernel 804 * clients to post to QP memory that is accessible directly by the 805 * user. If the QP memory is user accessible, then return an error. 806 */ 807 if (qp->qp_alloc_flags & IBT_QP_USER_MAP) { 808 return (IBT_QP_HDL_INVALID); 809 } 810 811 mutex_enter(&qp->qp_sq_lock); 812 813 /* 814 * Check QP state. Can not post Send requests from the "Reset", 815 * "Init", or "RTR" states 816 */ 817 qp_state = qp->qp_state_for_post_send; 818 if ((qp_state == HERMON_QP_RESET) || 819 (qp_state == HERMON_QP_INIT) || 820 (qp_state == HERMON_QP_RTR)) { 821 mutex_exit(&qp->qp_sq_lock); 822 return (IBT_QP_STATE_INVALID); 823 } 824 825 if (qp->qp_is_special) 826 goto post_many; 827 828 /* Use these optimized functions most of the time */ 829 if (qp->qp_type == IBT_UD_RQP) { 830 return (hermon_post_send_ud(state, qp, wr, num_wr, num_posted)); 831 } 832 833 if (qp->qp_serv_type == HERMON_QP_RC) { 834 return (hermon_post_send_rc(state, qp, wr, num_wr, num_posted)); 835 } 836 837 if (qp->qp_serv_type == HERMON_QP_UC) 838 goto post_many; 839 840 mutex_exit(&qp->qp_sq_lock); 841 return (IBT_QP_SRV_TYPE_INVALID); 842 843 post_many: 844 /* general loop for non-optimized posting */ 845 846 /* Save away some initial QP state */ 847 wq = qp->qp_sq_wqhdr; 848 qsize_msk = wq->wq_mask; 849 tail = wq->wq_tail; 850 head = wq->wq_head; 851 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */ 852 853 /* Initialize posted_cnt */ 854 posted_cnt = 0; 855 total_posted = 0; 856 857 /* 858 * For each ibt_send_wr_t in the wr[] list passed in, parse the 859 * request and build a Send WQE. NOTE: Because we are potentially 860 * building a chain of WQEs to post, we want to build them all first, 861 * and set the valid (HW Ownership) bit on all but the first. 862 * However, we do not want to validate the first one until the 863 * entire chain of WQEs has been built. Then in the final 864 * we set the valid bit in the first, flush if needed, and as a last 865 * step ring the appropriate doorbell. NOTE: the doorbell ring may 866 * NOT be needed if the HCA is already processing, but the doorbell 867 * ring will be done regardless. NOTE ALSO: It is possible for 868 * more Work Requests to be posted than the HW will support at one 869 * shot. If this happens, we need to be able to post and ring 870 * several chains here until the the entire request is complete. 871 * NOTE ALSO: the term "chain" is used to differentiate it from 872 * Work Request List passed in; and because that's the terminology 873 * from the previous generations of HCA - but the WQEs are not, in fact 874 * chained together for Hermon 875 */ 876 877 wrindx = 0; 878 numremain = num_wr; 879 status = DDI_SUCCESS; 880 while ((wrindx < num_wr) && (status == DDI_SUCCESS)) { 881 /* 882 * For the first WQE on a new chain we need "prev" to point 883 * to the current descriptor. 884 */ 885 prev = HERMON_QP_SQ_ENTRY(qp, tail); 886 887 /* 888 * Break the request up into lists that are less than or 889 * equal to the maximum number of WQEs that can be posted 890 * per doorbell ring - 256 currently 891 */ 892 chainlen = (numremain > HERMON_QP_MAXDESC_PER_DB) ? 893 HERMON_QP_MAXDESC_PER_DB : numremain; 894 numremain -= chainlen; 895 896 for (currindx = 0; currindx < chainlen; currindx++, wrindx++) { 897 /* 898 * Check for "queue full" condition. If the queue 899 * is already full, then no more WQEs can be posted. 900 * So break out, ring a doorbell (if necessary) and 901 * return an error 902 */ 903 if (wq->wq_full != 0) { 904 status = IBT_QP_FULL; 905 break; 906 } 907 908 /* 909 * Increment the "tail index". Check for "queue 910 * full" condition incl. headroom. If we detect that 911 * the current work request is going to fill the work 912 * queue, then we mark this condition and continue. 913 * Don't need >=, because going one-by-one we have to 914 * hit it exactly sooner or later 915 */ 916 917 next_tail = (tail + 1) & qsize_msk; 918 if (((tail + hdrmwqes) & qsize_msk) == head) { 919 wq->wq_full = 1; 920 } 921 922 /* 923 * Get the address of the location where the next 924 * Send WQE should be built 925 */ 926 desc = HERMON_QP_SQ_ENTRY(qp, tail); 927 /* 928 * Call hermon_wqe_send_build() to build the WQE 929 * at the given address. This routine uses the 930 * information in the ibt_send_wr_t list (wr[]) and 931 * returns the size of the WQE when it returns. 932 */ 933 status = hermon_wqe_send_build(state, qp, 934 &wr[wrindx], desc, &desc_sz); 935 if (status != DDI_SUCCESS) { 936 break; 937 } 938 939 /* 940 * Now, build the Ctrl Segment based on 941 * what was just done 942 */ 943 curr_wr = &wr[wrindx]; 944 945 switch (curr_wr->wr_opcode) { 946 case IBT_WRC_RDMAW: 947 if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) { 948 nopcode = 949 HERMON_WQE_SEND_NOPCODE_RDMAWI; 950 immed_data = 951 hermon_wr_get_immediate(curr_wr); 952 } else { 953 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW; 954 } 955 break; 956 957 case IBT_WRC_SEND: 958 if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) { 959 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI; 960 immed_data = 961 hermon_wr_get_immediate(curr_wr); 962 } else { 963 nopcode = HERMON_WQE_SEND_NOPCODE_SEND; 964 } 965 break; 966 967 case IBT_WRC_SEND_LSO: 968 nopcode = HERMON_WQE_SEND_NOPCODE_LSO; 969 break; 970 971 case IBT_WRC_RDMAR: 972 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR; 973 break; 974 975 case IBT_WRC_CSWAP: 976 nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS; 977 break; 978 979 case IBT_WRC_FADD: 980 nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA; 981 break; 982 983 case IBT_WRC_BIND: 984 nopcode = HERMON_WQE_SEND_NOPCODE_BIND; 985 break; 986 } 987 988 fence = (curr_wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; 989 990 /* 991 * now, build up the control segment, leaving the 992 * owner bit as it is 993 */ 994 995 if ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || 996 (curr_wr->wr_flags & IBT_WR_SEND_SIGNAL)) { 997 signaled_dbd = 0xC; 998 } else { 999 signaled_dbd = 0; 1000 } 1001 if (curr_wr->wr_flags & IBT_WR_SEND_SOLICIT) 1002 solicited = 0x2; 1003 else 1004 solicited = 0; 1005 1006 if (qp->qp_is_special) { 1007 /* Ensure correctness, set the ReRead bit */ 1008 nopcode |= (1 << 6); 1009 ah = (hermon_ahhdl_t) 1010 curr_wr->wr.ud.udwr_dest->ud_ah; 1011 mutex_enter(&ah->ah_lock); 1012 maxstat = ah->ah_udav->max_stat_rate; 1013 HERMON_WQE_SET_MLX_CTRL_SEGMENT(desc, desc_sz, 1014 signaled_dbd, maxstat, ah->ah_udav->rlid, 1015 qp, ah->ah_udav->sl); 1016 mutex_exit(&ah->ah_lock); 1017 } else { 1018 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, 1019 fence, immed_data, solicited, 1020 signaled_dbd, 0, qp, 0, 0); 1021 } 1022 wq->wq_wrid[tail] = curr_wr->wr_id; 1023 1024 /* 1025 * If this is not the first descriptor on the current 1026 * chain, then set the ownership bit. 1027 */ 1028 if (currindx != 0) { /* not the first */ 1029 membar_producer(); 1030 HERMON_SET_SEND_WQE_OWNER(qp, 1031 (uint32_t *)desc, nopcode); 1032 } else 1033 prev_nopcode = nopcode; 1034 1035 /* 1036 * Update the current "tail index" and increment 1037 * "posted_cnt" 1038 */ 1039 tail = next_tail; 1040 posted_cnt++; 1041 } 1042 1043 /* 1044 * If we reach here and there are one or more WQEs which have 1045 * been successfully built as a chain, we have to finish up 1046 * and prepare them for writing to the HW 1047 * The steps are: 1048 * 1. do the headroom fixup 1049 * 2. add in the size of the headroom for the sync 1050 * 3. write the owner bit for the first WQE 1051 * 4. sync them 1052 * 5. fix up the structures 1053 * 6. hit the doorbell in UAR 1054 */ 1055 if (posted_cnt != 0) { 1056 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 1057 1058 /* do the invalidate of the headroom */ 1059 1060 hermon_wqe_headroom(tail, qp); 1061 1062 /* Update some of the state in the QP */ 1063 wq->wq_tail = tail; 1064 total_posted += posted_cnt; 1065 posted_cnt = 0; 1066 1067 membar_producer(); 1068 1069 /* 1070 * Now set the ownership bit of the first 1071 * one in the chain 1072 */ 1073 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)prev, 1074 prev_nopcode); 1075 1076 /* the FMA retry loop starts for Hermon doorbell. */ 1077 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, 1078 fm_status, fm_test); 1079 1080 HERMON_UAR_DOORBELL(state, uarhdl, 1081 (uint64_t *)(void *)&state->hs_uar->send, 1082 (uint64_t)qp->qp_ring); 1083 1084 /* the FMA retry loop ends. */ 1085 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, 1086 fm_status, fm_test); 1087 } 1088 } 1089 1090 /* 1091 * Update the "num_posted" return value (if necessary). 1092 * Then drop the locks and return success. 1093 */ 1094 if (num_posted != NULL) { 1095 *num_posted = total_posted; 1096 } 1097 mutex_exit(&qp->qp_sq_lock); 1098 return (status); 1099 1100 pio_error: 1101 mutex_exit(&qp->qp_sq_lock); 1102 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1103 return (ibc_get_ci_failure(0)); 1104 } 1105 1106 1107 /* 1108 * hermon_post_recv() 1109 * Context: Can be called from interrupt or base context. 1110 */ 1111 int 1112 hermon_post_recv(hermon_state_t *state, hermon_qphdl_t qp, 1113 ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted) 1114 { 1115 uint64_t *desc; 1116 hermon_workq_hdr_t *wq; 1117 uint32_t head, tail, next_tail, qsize_msk; 1118 uint_t wrindx; 1119 uint_t posted_cnt; 1120 int status; 1121 1122 /* 1123 * Check for user-mappable QP memory. Note: We do not allow kernel 1124 * clients to post to QP memory that is accessible directly by the 1125 * user. If the QP memory is user accessible, then return an error. 1126 */ 1127 if (qp->qp_alloc_flags & IBT_QP_USER_MAP) { 1128 return (IBT_QP_HDL_INVALID); 1129 } 1130 1131 /* Initialize posted_cnt */ 1132 posted_cnt = 0; 1133 1134 mutex_enter(&qp->qp_lock); 1135 1136 /* 1137 * Check if QP is associated with an SRQ 1138 */ 1139 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) { 1140 mutex_exit(&qp->qp_lock); 1141 return (IBT_SRQ_IN_USE); 1142 } 1143 1144 /* 1145 * Check QP state. Can not post Recv requests from the "Reset" state 1146 */ 1147 if (qp->qp_state == HERMON_QP_RESET) { 1148 mutex_exit(&qp->qp_lock); 1149 return (IBT_QP_STATE_INVALID); 1150 } 1151 1152 /* Check that work request transport type is valid */ 1153 if ((qp->qp_type != IBT_UD_RQP) && 1154 (qp->qp_serv_type != HERMON_QP_RC) && 1155 (qp->qp_serv_type != HERMON_QP_UC)) { 1156 mutex_exit(&qp->qp_lock); 1157 return (IBT_QP_SRV_TYPE_INVALID); 1158 } 1159 1160 /* 1161 * Grab the lock for the WRID list, i.e., membar_consumer(). 1162 * This is not needed because the mutex_enter() above has 1163 * the same effect. 1164 */ 1165 1166 /* Save away some initial QP state */ 1167 wq = qp->qp_rq_wqhdr; 1168 qsize_msk = wq->wq_mask; 1169 tail = wq->wq_tail; 1170 head = wq->wq_head; 1171 1172 wrindx = 0; 1173 status = DDI_SUCCESS; 1174 1175 for (wrindx = 0; wrindx < num_wr; wrindx++) { 1176 if (wq->wq_full != 0) { 1177 status = IBT_QP_FULL; 1178 break; 1179 } 1180 next_tail = (tail + 1) & qsize_msk; 1181 if (next_tail == head) { 1182 wq->wq_full = 1; 1183 } 1184 desc = HERMON_QP_RQ_ENTRY(qp, tail); 1185 status = hermon_wqe_recv_build(state, qp, &wr[wrindx], desc); 1186 if (status != DDI_SUCCESS) { 1187 break; 1188 } 1189 1190 wq->wq_wrid[tail] = wr[wrindx].wr_id; 1191 qp->qp_rq_wqecntr++; 1192 1193 tail = next_tail; 1194 posted_cnt++; 1195 } 1196 1197 if (posted_cnt != 0) { 1198 1199 wq->wq_tail = tail; 1200 1201 membar_producer(); /* ensure wrids are visible */ 1202 1203 /* Update the doorbell record w/ wqecntr */ 1204 HERMON_UAR_DB_RECORD_WRITE(qp->qp_rq_vdbr, 1205 qp->qp_rq_wqecntr & 0xFFFF); 1206 } 1207 1208 if (num_posted != NULL) { 1209 *num_posted = posted_cnt; 1210 } 1211 1212 1213 mutex_exit(&qp->qp_lock); 1214 return (status); 1215 } 1216 1217 /* 1218 * hermon_post_srq() 1219 * Context: Can be called from interrupt or base context. 1220 */ 1221 int 1222 hermon_post_srq(hermon_state_t *state, hermon_srqhdl_t srq, 1223 ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted) 1224 { 1225 uint64_t *desc; 1226 hermon_workq_hdr_t *wq; 1227 uint_t indx, wrindx; 1228 uint_t posted_cnt; 1229 int status; 1230 1231 mutex_enter(&srq->srq_lock); 1232 1233 /* 1234 * Check for user-mappable QP memory. Note: We do not allow kernel 1235 * clients to post to QP memory that is accessible directly by the 1236 * user. If the QP memory is user accessible, then return an error. 1237 */ 1238 if (srq->srq_is_umap) { 1239 mutex_exit(&srq->srq_lock); 1240 return (IBT_SRQ_HDL_INVALID); 1241 } 1242 1243 /* 1244 * Check SRQ state. Can not post Recv requests when SRQ is in error 1245 */ 1246 if (srq->srq_state == HERMON_SRQ_STATE_ERROR) { 1247 mutex_exit(&srq->srq_lock); 1248 return (IBT_QP_STATE_INVALID); 1249 } 1250 1251 status = DDI_SUCCESS; 1252 posted_cnt = 0; 1253 wq = srq->srq_wq_wqhdr; 1254 indx = wq->wq_head; 1255 1256 for (wrindx = 0; wrindx < num_wr; wrindx++) { 1257 1258 if (indx == wq->wq_tail) { 1259 status = IBT_QP_FULL; 1260 break; 1261 } 1262 desc = HERMON_SRQ_WQE_ADDR(srq, indx); 1263 1264 wq->wq_wrid[indx] = wr[wrindx].wr_id; 1265 1266 status = hermon_wqe_srq_build(state, srq, &wr[wrindx], desc); 1267 if (status != DDI_SUCCESS) { 1268 break; 1269 } 1270 1271 posted_cnt++; 1272 indx = htons(((uint16_t *)desc)[1]); 1273 wq->wq_head = indx; 1274 } 1275 1276 if (posted_cnt != 0) { 1277 1278 srq->srq_wq_wqecntr += posted_cnt; 1279 1280 membar_producer(); /* ensure wrids are visible */ 1281 1282 /* Ring the doorbell w/ wqecntr */ 1283 HERMON_UAR_DB_RECORD_WRITE(srq->srq_wq_vdbr, 1284 srq->srq_wq_wqecntr & 0xFFFF); 1285 } 1286 1287 if (num_posted != NULL) { 1288 *num_posted = posted_cnt; 1289 } 1290 1291 mutex_exit(&srq->srq_lock); 1292 return (status); 1293 } 1294 1295 1296 /* 1297 * hermon_wqe_send_build() 1298 * Context: Can be called from interrupt or base context. 1299 */ 1300 static int 1301 hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp, 1302 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size) 1303 { 1304 hermon_hw_snd_wqe_ud_t *ud; 1305 hermon_hw_snd_wqe_remaddr_t *rc; 1306 hermon_hw_snd_wqe_atomic_t *at; 1307 hermon_hw_snd_wqe_remaddr_t *uc; 1308 hermon_hw_snd_wqe_bind_t *bn; 1309 hermon_hw_wqe_sgl_t *ds, *old_ds; 1310 ibt_ud_dest_t *dest; 1311 ibt_wr_ds_t *sgl; 1312 hermon_ahhdl_t ah; 1313 uint32_t nds; 1314 int i, j, last_ds, num_ds, status; 1315 int tmpsize; 1316 1317 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 1318 1319 /* Initialize the information for the Data Segments */ 1320 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 1321 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1322 nds = wr->wr_nds; 1323 sgl = wr->wr_sgl; 1324 num_ds = 0; 1325 i = 0; 1326 1327 /* 1328 * Build a Send WQE depends first and foremost on the transport 1329 * type of Work Request (i.e. UD, RC, or UC) 1330 */ 1331 switch (wr->wr_trans) { 1332 case IBT_UD_SRV: 1333 /* Ensure that work request transport type matches QP type */ 1334 if (qp->qp_serv_type != HERMON_QP_UD) { 1335 return (IBT_QP_SRV_TYPE_INVALID); 1336 } 1337 1338 /* 1339 * Validate the operation type. For UD requests, only the 1340 * "Send" and "Send LSO" operations are valid. 1341 */ 1342 if (wr->wr_opcode != IBT_WRC_SEND && 1343 wr->wr_opcode != IBT_WRC_SEND_LSO) { 1344 return (IBT_QP_OP_TYPE_INVALID); 1345 } 1346 1347 /* 1348 * If this is a Special QP (QP0 or QP1), then we need to 1349 * build MLX WQEs instead. So jump to hermon_wqe_mlx_build() 1350 * and return whatever status it returns 1351 */ 1352 if (qp->qp_is_special) { 1353 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 1354 return (IBT_QP_OP_TYPE_INVALID); 1355 } 1356 status = hermon_wqe_mlx_build(state, qp, 1357 wr, desc, size); 1358 return (status); 1359 } 1360 1361 /* 1362 * Otherwise, if this is a normal UD Send request, then fill 1363 * all the fields in the Hermon UD header for the WQE. Note: 1364 * to do this we'll need to extract some information from the 1365 * Address Handle passed with the work request. 1366 */ 1367 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + 1368 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1369 if (wr->wr_opcode == IBT_WRC_SEND) { 1370 dest = wr->wr.ud.udwr_dest; 1371 } else { 1372 dest = wr->wr.ud_lso.lso_ud_dest; 1373 } 1374 ah = (hermon_ahhdl_t)dest->ud_ah; 1375 if (ah == NULL) { 1376 return (IBT_AH_HDL_INVALID); 1377 } 1378 1379 /* 1380 * Build the Unreliable Datagram Segment for the WQE, using 1381 * the information from the address handle and the work 1382 * request. 1383 */ 1384 /* mutex_enter(&ah->ah_lock); */ 1385 if (wr->wr_opcode == IBT_WRC_SEND) { 1386 HERMON_WQE_BUILD_UD(qp, ud, ah, wr->wr.ud.udwr_dest); 1387 } else { /* IBT_WRC_SEND_LSO */ 1388 HERMON_WQE_BUILD_UD(qp, ud, ah, 1389 wr->wr.ud_lso.lso_ud_dest); 1390 } 1391 /* mutex_exit(&ah->ah_lock); */ 1392 1393 /* Update "ds" for filling in Data Segments (below) */ 1394 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + 1395 sizeof (hermon_hw_snd_wqe_ud_t)); 1396 1397 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 1398 int total_len; 1399 1400 total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf; 1401 if ((uintptr_t)ds + total_len + (nds * 16) > 1402 (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz)) 1403 return (IBT_QP_SGL_LEN_INVALID); 1404 1405 bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)ds + 1, 1406 wr->wr.ud_lso.lso_hdr_sz); 1407 old_ds = ds; 1408 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len); 1409 for (; i < nds; i++) { 1410 if (sgl[i].ds_len == 0) 1411 continue; 1412 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds], 1413 &sgl[i]); 1414 num_ds++; 1415 i++; 1416 break; 1417 } 1418 membar_producer(); 1419 HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss, 1420 wr->wr.ud_lso.lso_hdr_sz); 1421 } 1422 1423 break; 1424 1425 case IBT_RC_SRV: 1426 /* Ensure that work request transport type matches QP type */ 1427 if (qp->qp_serv_type != HERMON_QP_RC) { 1428 return (IBT_QP_SRV_TYPE_INVALID); 1429 } 1430 1431 /* 1432 * Validate the operation type. For RC requests, we allow 1433 * "Send", "RDMA Read", "RDMA Write", various "Atomic" 1434 * operations, and memory window "Bind" 1435 */ 1436 if ((wr->wr_opcode != IBT_WRC_SEND) && 1437 (wr->wr_opcode != IBT_WRC_RDMAR) && 1438 (wr->wr_opcode != IBT_WRC_RDMAW) && 1439 (wr->wr_opcode != IBT_WRC_CSWAP) && 1440 (wr->wr_opcode != IBT_WRC_FADD) && 1441 (wr->wr_opcode != IBT_WRC_BIND)) { 1442 return (IBT_QP_OP_TYPE_INVALID); 1443 } 1444 1445 /* 1446 * If this is a Send request, then all we need to do is break 1447 * out and here and begin the Data Segment processing below 1448 */ 1449 if (wr->wr_opcode == IBT_WRC_SEND) { 1450 break; 1451 } 1452 1453 /* 1454 * If this is an RDMA Read or RDMA Write request, then fill 1455 * in the "Remote Address" header fields. 1456 */ 1457 if ((wr->wr_opcode == IBT_WRC_RDMAR) || 1458 (wr->wr_opcode == IBT_WRC_RDMAW)) { 1459 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 1460 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1461 1462 /* 1463 * Build the Remote Address Segment for the WQE, using 1464 * the information from the RC work request. 1465 */ 1466 HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma); 1467 1468 /* Update "ds" for filling in Data Segments (below) */ 1469 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc + 1470 sizeof (hermon_hw_snd_wqe_remaddr_t)); 1471 break; 1472 } 1473 1474 /* 1475 * If this is one of the Atomic type operations (i.e 1476 * Compare-Swap or Fetch-Add), then fill in both the "Remote 1477 * Address" header fields and the "Atomic" header fields. 1478 */ 1479 if ((wr->wr_opcode == IBT_WRC_CSWAP) || 1480 (wr->wr_opcode == IBT_WRC_FADD)) { 1481 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 1482 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1483 at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc + 1484 sizeof (hermon_hw_snd_wqe_remaddr_t)); 1485 1486 /* 1487 * Build the Remote Address and Atomic Segments for 1488 * the WQE, using the information from the RC Atomic 1489 * work request. 1490 */ 1491 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr); 1492 HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic); 1493 1494 /* Update "ds" for filling in Data Segments (below) */ 1495 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at + 1496 sizeof (hermon_hw_snd_wqe_atomic_t)); 1497 1498 /* 1499 * Update "nds" and "sgl" because Atomic requests have 1500 * only a single Data Segment (and they are encoded 1501 * somewhat differently in the work request. 1502 */ 1503 nds = 1; 1504 sgl = wr->wr_sgl; 1505 break; 1506 } 1507 1508 /* 1509 * If this is memory window Bind operation, then we call the 1510 * hermon_wr_bind_check() routine to validate the request and 1511 * to generate the updated RKey. If this is successful, then 1512 * we fill in the WQE's "Bind" header fields. 1513 */ 1514 if (wr->wr_opcode == IBT_WRC_BIND) { 1515 status = hermon_wr_bind_check(state, wr); 1516 if (status != DDI_SUCCESS) { 1517 return (status); 1518 } 1519 1520 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc + 1521 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1522 1523 /* 1524 * Build the Bind Memory Window Segments for the WQE, 1525 * using the information from the RC Bind memory 1526 * window work request. 1527 */ 1528 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind); 1529 1530 /* 1531 * Update the "ds" pointer. Even though the "bind" 1532 * operation requires no SGLs, this is necessary to 1533 * facilitate the correct descriptor size calculations 1534 * (below). 1535 */ 1536 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn + 1537 sizeof (hermon_hw_snd_wqe_bind_t)); 1538 nds = 0; 1539 } 1540 break; 1541 1542 case IBT_UC_SRV: 1543 /* Ensure that work request transport type matches QP type */ 1544 if (qp->qp_serv_type != HERMON_QP_UC) { 1545 return (IBT_QP_SRV_TYPE_INVALID); 1546 } 1547 1548 /* 1549 * Validate the operation type. For UC requests, we only 1550 * allow "Send", "RDMA Write", and memory window "Bind". 1551 * Note: Unlike RC, UC does not allow "RDMA Read" or "Atomic" 1552 * operations 1553 */ 1554 if ((wr->wr_opcode != IBT_WRC_SEND) && 1555 (wr->wr_opcode != IBT_WRC_RDMAW) && 1556 (wr->wr_opcode != IBT_WRC_BIND)) { 1557 return (IBT_QP_OP_TYPE_INVALID); 1558 } 1559 1560 /* 1561 * If this is a Send request, then all we need to do is break 1562 * out and here and begin the Data Segment processing below 1563 */ 1564 if (wr->wr_opcode == IBT_WRC_SEND) { 1565 break; 1566 } 1567 1568 /* 1569 * If this is an RDMA Write request, then fill in the "Remote 1570 * Address" header fields. 1571 */ 1572 if (wr->wr_opcode == IBT_WRC_RDMAW) { 1573 uc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 1574 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1575 1576 /* 1577 * Build the Remote Address Segment for the WQE, using 1578 * the information from the UC work request. 1579 */ 1580 HERMON_WQE_BUILD_REMADDR(qp, uc, &wr->wr.uc.ucwr.rdma); 1581 1582 /* Update "ds" for filling in Data Segments (below) */ 1583 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)uc + 1584 sizeof (hermon_hw_snd_wqe_remaddr_t)); 1585 break; 1586 } 1587 1588 /* 1589 * If this is memory window Bind operation, then we call the 1590 * hermon_wr_bind_check() routine to validate the request and 1591 * to generate the updated RKey. If this is successful, then 1592 * we fill in the WQE's "Bind" header fields. 1593 */ 1594 if (wr->wr_opcode == IBT_WRC_BIND) { 1595 status = hermon_wr_bind_check(state, wr); 1596 if (status != DDI_SUCCESS) { 1597 return (status); 1598 } 1599 1600 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc + 1601 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1602 1603 /* 1604 * Build the Bind Memory Window Segments for the WQE, 1605 * using the information from the UC Bind memory 1606 * window work request. 1607 */ 1608 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.uc.ucwr.bind); 1609 1610 /* 1611 * Update the "ds" pointer. Even though the "bind" 1612 * operation requires no SGLs, this is necessary to 1613 * facilitate the correct descriptor size calculations 1614 * (below). 1615 */ 1616 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn + 1617 sizeof (hermon_hw_snd_wqe_bind_t)); 1618 nds = 0; 1619 } 1620 break; 1621 1622 default: 1623 return (IBT_QP_SRV_TYPE_INVALID); 1624 } 1625 1626 /* 1627 * Now fill in the Data Segments (SGL) for the Send WQE based on 1628 * the values setup above (i.e. "sgl", "nds", and the "ds" pointer 1629 * Start by checking for a valid number of SGL entries 1630 */ 1631 if (nds > qp->qp_sq_sgl) { 1632 return (IBT_QP_SGL_LEN_INVALID); 1633 } 1634 1635 /* 1636 * For each SGL in the Send Work Request, fill in the Send WQE's data 1637 * segments. Note: We skip any SGL with zero size because Hermon 1638 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1639 * the encoding for zero means a 2GB transfer. 1640 */ 1641 for (last_ds = num_ds, j = i; j < nds; j++) { 1642 if (sgl[j].ds_len != 0) 1643 last_ds++; /* real last ds of wqe to fill */ 1644 } 1645 1646 /* 1647 * Return the size of descriptor (in 16-byte chunks) 1648 * For Hermon, we want them (for now) to be on stride size 1649 * boundaries, which was implicit in Tavor/Arbel 1650 * 1651 */ 1652 tmpsize = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc); 1653 1654 *size = tmpsize >> 0x4; 1655 1656 for (j = nds; --j >= i; ) { 1657 if (sgl[j].ds_len == 0) { 1658 continue; 1659 } 1660 1661 /* 1662 * Fill in the Data Segment(s) for the current WQE, using the 1663 * information contained in the scatter-gather list of the 1664 * work request. 1665 */ 1666 last_ds--; 1667 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]); 1668 } 1669 1670 return (DDI_SUCCESS); 1671 } 1672 1673 1674 1675 /* 1676 * hermon_wqe_mlx_build() 1677 * Context: Can be called from interrupt or base context. 1678 */ 1679 static int 1680 hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp, 1681 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size) 1682 { 1683 hermon_ahhdl_t ah; 1684 hermon_hw_udav_t *udav; 1685 ib_lrh_hdr_t *lrh; 1686 ib_grh_t *grh; 1687 ib_bth_hdr_t *bth; 1688 ib_deth_hdr_t *deth; 1689 hermon_hw_wqe_sgl_t *ds; 1690 ibt_wr_ds_t *sgl; 1691 uint8_t *mgmtclass, *hpoint, *hcount; 1692 uint32_t nds, offset, pktlen; 1693 uint32_t desc_sz; 1694 int i, num_ds; 1695 int tmpsize; 1696 1697 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 1698 1699 /* Initialize the information for the Data Segments */ 1700 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 1701 sizeof (hermon_hw_mlx_wqe_nextctrl_t)); 1702 1703 /* 1704 * Pull the address handle from the work request. The UDAV will 1705 * be used to answer some questions about the request. 1706 */ 1707 ah = (hermon_ahhdl_t)wr->wr.ud.udwr_dest->ud_ah; 1708 if (ah == NULL) { 1709 return (IBT_AH_HDL_INVALID); 1710 } 1711 mutex_enter(&ah->ah_lock); 1712 udav = ah->ah_udav; 1713 1714 /* 1715 * If the request is for QP1 and the destination LID is equal to 1716 * the Permissive LID, then return an error. This combination is 1717 * not allowed 1718 */ 1719 if ((udav->rlid == IB_LID_PERMISSIVE) && 1720 (qp->qp_is_special == HERMON_QP_GSI)) { 1721 mutex_exit(&ah->ah_lock); 1722 return (IBT_AH_HDL_INVALID); 1723 } 1724 1725 /* 1726 * Calculate the size of the packet headers, including the GRH 1727 * (if necessary) 1728 */ 1729 desc_sz = sizeof (ib_lrh_hdr_t) + sizeof (ib_bth_hdr_t) + 1730 sizeof (ib_deth_hdr_t); 1731 if (udav->grh) { 1732 desc_sz += sizeof (ib_grh_t); 1733 } 1734 1735 /* 1736 * Begin to build the first "inline" data segment for the packet 1737 * headers. Note: By specifying "inline" we can build the contents 1738 * of the MAD packet headers directly into the work queue (as part 1739 * descriptor). This has the advantage of both speeding things up 1740 * and of not requiring the driver to allocate/register any additional 1741 * memory for the packet headers. 1742 */ 1743 HERMON_WQE_BUILD_INLINE(qp, &ds[0], desc_sz); 1744 desc_sz += 4; 1745 1746 /* 1747 * Build Local Route Header (LRH) 1748 * We start here by building the LRH into a temporary location. 1749 * When we have finished we copy the LRH data into the descriptor. 1750 * 1751 * Notice that the VL values are hardcoded. This is not a problem 1752 * because VL15 is decided later based on the value in the MLX 1753 * transport "next/ctrl" header (see the "vl15" bit below), and it 1754 * is otherwise (meaning for QP1) chosen from the SL-to-VL table 1755 * values. This rule does not hold for loopback packets however 1756 * (all of which bypass the SL-to-VL tables) and it is the reason 1757 * that non-QP0 MADs are setup with VL hardcoded to zero below. 1758 * 1759 * Notice also that Source LID is hardcoded to the Permissive LID 1760 * (0xFFFF). This is also not a problem because if the Destination 1761 * LID is not the Permissive LID, then the "slr" value in the MLX 1762 * transport "next/ctrl" header will be set to zero and the hardware 1763 * will pull the LID from value in the port. 1764 */ 1765 lrh = (ib_lrh_hdr_t *)((uintptr_t)&ds[0] + 4); 1766 pktlen = (desc_sz + 0x100) >> 2; 1767 HERMON_WQE_BUILD_MLX_LRH(lrh, qp, udav, pktlen); 1768 1769 /* 1770 * Build Global Route Header (GRH) 1771 * This is only built if necessary as defined by the "grh" bit in 1772 * the address vector. Note: We also calculate the offset to the 1773 * next header (BTH) based on whether or not the "grh" bit is set. 1774 */ 1775 if (udav->grh) { 1776 /* 1777 * If the request is for QP0, then return an error. The 1778 * combination of global routine (GRH) and QP0 is not allowed. 1779 */ 1780 if (qp->qp_is_special == HERMON_QP_SMI) { 1781 mutex_exit(&ah->ah_lock); 1782 return (IBT_AH_HDL_INVALID); 1783 } 1784 grh = (ib_grh_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t)); 1785 HERMON_WQE_BUILD_MLX_GRH(state, grh, qp, udav, pktlen); 1786 1787 bth = (ib_bth_hdr_t *)((uintptr_t)grh + sizeof (ib_grh_t)); 1788 } else { 1789 bth = (ib_bth_hdr_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t)); 1790 } 1791 mutex_exit(&ah->ah_lock); 1792 1793 1794 /* 1795 * Build Base Transport Header (BTH) 1796 * Notice that the M, PadCnt, and TVer fields are all set 1797 * to zero implicitly. This is true for all Management Datagrams 1798 * MADs whether GSI are SMI. 1799 */ 1800 HERMON_WQE_BUILD_MLX_BTH(state, bth, qp, wr); 1801 1802 /* 1803 * Build Datagram Extended Transport Header (DETH) 1804 */ 1805 deth = (ib_deth_hdr_t *)((uintptr_t)bth + sizeof (ib_bth_hdr_t)); 1806 HERMON_WQE_BUILD_MLX_DETH(deth, qp); 1807 1808 /* Ensure that the Data Segment is aligned on a 16-byte boundary */ 1809 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)deth + sizeof (ib_deth_hdr_t)); 1810 ds = (hermon_hw_wqe_sgl_t *)(((uintptr_t)ds + 0xF) & ~0xF); 1811 nds = wr->wr_nds; 1812 sgl = wr->wr_sgl; 1813 num_ds = 0; 1814 1815 /* 1816 * Now fill in the Data Segments (SGL) for the MLX WQE based on the 1817 * values set up above (i.e. "sgl", "nds", and the "ds" pointer 1818 * Start by checking for a valid number of SGL entries 1819 */ 1820 if (nds > qp->qp_sq_sgl) { 1821 return (IBT_QP_SGL_LEN_INVALID); 1822 } 1823 1824 /* 1825 * For each SGL in the Send Work Request, fill in the MLX WQE's data 1826 * segments. Note: We skip any SGL with zero size because Hermon 1827 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1828 * the encoding for zero means a 2GB transfer. Because of this special 1829 * encoding in the hardware, we mask the requested length with 1830 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as 1831 * zero.) 1832 */ 1833 mgmtclass = hpoint = hcount = NULL; 1834 offset = 0; 1835 for (i = 0; i < nds; i++) { 1836 if (sgl[i].ds_len == 0) { 1837 continue; 1838 } 1839 1840 /* 1841 * Fill in the Data Segment(s) for the MLX send WQE, using 1842 * the information contained in the scatter-gather list of 1843 * the work request. 1844 */ 1845 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds], &sgl[i]); 1846 1847 /* 1848 * Search through the contents of all MADs posted to QP0 to 1849 * initialize pointers to the places where Directed Route "hop 1850 * pointer", "hop count", and "mgmtclass" would be. Hermon 1851 * needs these updated (i.e. incremented or decremented, as 1852 * necessary) by software. 1853 */ 1854 if (qp->qp_is_special == HERMON_QP_SMI) { 1855 1856 HERMON_SPECIAL_QP_DRMAD_GET_MGMTCLASS(mgmtclass, 1857 offset, sgl[i].ds_va, sgl[i].ds_len); 1858 1859 HERMON_SPECIAL_QP_DRMAD_GET_HOPPOINTER(hpoint, 1860 offset, sgl[i].ds_va, sgl[i].ds_len); 1861 1862 HERMON_SPECIAL_QP_DRMAD_GET_HOPCOUNT(hcount, 1863 offset, sgl[i].ds_va, sgl[i].ds_len); 1864 1865 offset += sgl[i].ds_len; 1866 } 1867 num_ds++; 1868 } 1869 1870 /* 1871 * Hermon's Directed Route MADs need to have the "hop pointer" 1872 * incremented/decremented (as necessary) depending on whether it is 1873 * currently less than or greater than the "hop count" (i.e. whether 1874 * the MAD is a request or a response.) 1875 */ 1876 if (qp->qp_is_special == HERMON_QP_SMI) { 1877 HERMON_SPECIAL_QP_DRMAD_DO_HOPPOINTER_MODIFY(*mgmtclass, 1878 *hpoint, *hcount); 1879 } 1880 1881 /* 1882 * Now fill in the ICRC Data Segment. This data segment is inlined 1883 * just like the packets headers above, but it is only four bytes and 1884 * set to zero (to indicate that we wish the hardware to generate ICRC. 1885 */ 1886 HERMON_WQE_BUILD_INLINE_ICRC(qp, &ds[num_ds], 4, 0); 1887 num_ds++; 1888 1889 /* 1890 * Return the size of descriptor (in 16-byte chunks) 1891 * For Hermon, we want them (for now) to be on stride size 1892 * boundaries, which was implicit in Tavor/Arbel 1893 */ 1894 tmpsize = ((uintptr_t)&ds[num_ds] - (uintptr_t)desc); 1895 1896 *size = tmpsize >> 0x04; 1897 1898 return (DDI_SUCCESS); 1899 } 1900 1901 1902 1903 /* 1904 * hermon_wqe_recv_build() 1905 * Context: Can be called from interrupt or base context. 1906 */ 1907 /* ARGSUSED */ 1908 static int 1909 hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp, 1910 ibt_recv_wr_t *wr, uint64_t *desc) 1911 { 1912 hermon_hw_wqe_sgl_t *ds; 1913 int i, num_ds; 1914 1915 ASSERT(MUTEX_HELD(&qp->qp_lock)); 1916 1917 /* 1918 * Fill in the Data Segments (SGL) for the Recv WQE - don't 1919 * need to have a reserved for the ctrl, there is none on the 1920 * recv queue for hermon, but will need to put an invalid 1921 * (null) scatter pointer per PRM 1922 */ 1923 ds = (hermon_hw_wqe_sgl_t *)(uintptr_t)desc; 1924 num_ds = 0; 1925 1926 /* Check for valid number of SGL entries */ 1927 if (wr->wr_nds > qp->qp_rq_sgl) { 1928 return (IBT_QP_SGL_LEN_INVALID); 1929 } 1930 1931 /* 1932 * For each SGL in the Recv Work Request, fill in the Recv WQE's data 1933 * segments. Note: We skip any SGL with zero size because Hermon 1934 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1935 * the encoding for zero means a 2GB transfer. Because of this special 1936 * encoding in the hardware, we mask the requested length with 1937 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as 1938 * zero.) 1939 */ 1940 for (i = 0; i < wr->wr_nds; i++) { 1941 if (wr->wr_sgl[i].ds_len == 0) { 1942 continue; 1943 } 1944 1945 /* 1946 * Fill in the Data Segment(s) for the receive WQE, using the 1947 * information contained in the scatter-gather list of the 1948 * work request. 1949 */ 1950 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]); 1951 num_ds++; 1952 } 1953 1954 /* put the null sgl pointer as well if needed */ 1955 if (num_ds < qp->qp_rq_sgl) { 1956 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl); 1957 } 1958 1959 return (DDI_SUCCESS); 1960 } 1961 1962 1963 1964 /* 1965 * hermon_wqe_srq_build() 1966 * Context: Can be called from interrupt or base context. 1967 */ 1968 /* ARGSUSED */ 1969 static int 1970 hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq, 1971 ibt_recv_wr_t *wr, uint64_t *desc) 1972 { 1973 hermon_hw_wqe_sgl_t *ds; 1974 int i, num_ds; 1975 1976 ASSERT(MUTEX_HELD(&srq->srq_lock)); 1977 1978 /* Fill in the Data Segments (SGL) for the Recv WQE */ 1979 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 1980 sizeof (hermon_hw_srq_wqe_next_t)); 1981 num_ds = 0; 1982 1983 /* Check for valid number of SGL entries */ 1984 if (wr->wr_nds > srq->srq_wq_sgl) { 1985 return (IBT_QP_SGL_LEN_INVALID); 1986 } 1987 1988 /* 1989 * For each SGL in the Recv Work Request, fill in the Recv WQE's data 1990 * segments. Note: We skip any SGL with zero size because Hermon 1991 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1992 * the encoding for zero means a 2GB transfer. Because of this special 1993 * encoding in the hardware, we mask the requested length with 1994 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as 1995 * zero.) 1996 */ 1997 for (i = 0; i < wr->wr_nds; i++) { 1998 if (wr->wr_sgl[i].ds_len == 0) { 1999 continue; 2000 } 2001 2002 /* 2003 * Fill in the Data Segment(s) for the receive WQE, using the 2004 * information contained in the scatter-gather list of the 2005 * work request. 2006 */ 2007 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]); 2008 num_ds++; 2009 } 2010 2011 /* 2012 * put in the null sgl pointer as well, if needed 2013 */ 2014 if (num_ds < srq->srq_wq_sgl) { 2015 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl); 2016 } 2017 2018 return (DDI_SUCCESS); 2019 } 2020 2021 2022 /* 2023 * hermon_wr_get_immediate() 2024 * Context: Can be called from interrupt or base context. 2025 */ 2026 static uint32_t 2027 hermon_wr_get_immediate(ibt_send_wr_t *wr) 2028 { 2029 /* 2030 * This routine extracts the "immediate data" from the appropriate 2031 * location in the IBTF work request. Because of the way the 2032 * work request structure is defined, the location for this data 2033 * depends on the actual work request operation type. 2034 */ 2035 2036 /* For RDMA Write, test if RC or UC */ 2037 if (wr->wr_opcode == IBT_WRC_RDMAW) { 2038 if (wr->wr_trans == IBT_RC_SRV) { 2039 return (wr->wr.rc.rcwr.rdma.rdma_immed); 2040 } else { /* IBT_UC_SRV */ 2041 return (wr->wr.uc.ucwr.rdma.rdma_immed); 2042 } 2043 } 2044 2045 /* For Send, test if RC, UD, or UC */ 2046 if (wr->wr_opcode == IBT_WRC_SEND) { 2047 if (wr->wr_trans == IBT_RC_SRV) { 2048 return (wr->wr.rc.rcwr.send_immed); 2049 } else if (wr->wr_trans == IBT_UD_SRV) { 2050 return (wr->wr.ud.udwr_immed); 2051 } else { /* IBT_UC_SRV */ 2052 return (wr->wr.uc.ucwr.send_immed); 2053 } 2054 } 2055 2056 /* 2057 * If any other type of request, then immediate is undefined 2058 */ 2059 return (0); 2060 } 2061 2062 /* 2063 * hermon_wqe_headroom() 2064 * Context: can be called from interrupt or base, currently only from 2065 * base context. 2066 * Routine that fills in the headroom for the Send Queue 2067 */ 2068 2069 static void 2070 hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp) 2071 { 2072 uint32_t *wqe_start, *wqe_top, *wqe_base, qsize; 2073 int hdrmwqes, wqesizebytes, sectperwqe; 2074 uint32_t invalue; 2075 int i, j; 2076 2077 qsize = qp->qp_sq_bufsz; 2078 wqesizebytes = 1 << qp->qp_sq_log_wqesz; 2079 sectperwqe = wqesizebytes >> 6; /* 64 bytes/section */ 2080 hdrmwqes = qp->qp_sq_hdrmwqes; 2081 wqe_base = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 0); 2082 wqe_top = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, qsize); 2083 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, from); 2084 2085 for (i = 0; i < hdrmwqes; i++) { 2086 for (j = 0; j < sectperwqe; j++) { 2087 if (j == 0) { /* 1st section of wqe */ 2088 /* perserve ownership bit */ 2089 invalue = ddi_get32(qp->qp_wqinfo.qa_acchdl, 2090 wqe_start) | 0x7FFFFFFF; 2091 } else { 2092 /* or just invalidate it */ 2093 invalue = 0xFFFFFFFF; 2094 } 2095 ddi_put32(qp->qp_wqinfo.qa_acchdl, wqe_start, invalue); 2096 wqe_start += 16; /* move 64 bytes */ 2097 } 2098 if (wqe_start == wqe_top) /* hit the end of the queue */ 2099 wqe_start = wqe_base; /* wrap to start */ 2100 } 2101 } 2102 2103 /* 2104 * hermon_wr_bind_check() 2105 * Context: Can be called from interrupt or base context. 2106 */ 2107 /* ARGSUSED */ 2108 static int 2109 hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr) 2110 { 2111 ibt_bind_flags_t bind_flags; 2112 uint64_t vaddr, len; 2113 uint64_t reg_start_addr, reg_end_addr; 2114 hermon_mwhdl_t mw; 2115 hermon_mrhdl_t mr; 2116 hermon_rsrc_t *mpt; 2117 uint32_t new_rkey; 2118 2119 /* Check for a valid Memory Window handle in the WR */ 2120 mw = (hermon_mwhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mw_hdl; 2121 if (mw == NULL) { 2122 return (IBT_MW_HDL_INVALID); 2123 } 2124 2125 /* Check for a valid Memory Region handle in the WR */ 2126 mr = (hermon_mrhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mr_hdl; 2127 if (mr == NULL) { 2128 return (IBT_MR_HDL_INVALID); 2129 } 2130 2131 mutex_enter(&mr->mr_lock); 2132 mutex_enter(&mw->mr_lock); 2133 2134 /* 2135 * Check here to see if the memory region has already been partially 2136 * deregistered as a result of a hermon_umap_umemlock_cb() callback. 2137 * If so, this is an error, return failure. 2138 */ 2139 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 2140 mutex_exit(&mr->mr_lock); 2141 mutex_exit(&mw->mr_lock); 2142 return (IBT_MR_HDL_INVALID); 2143 } 2144 2145 /* Check for a valid Memory Window RKey (i.e. a matching RKey) */ 2146 if (mw->mr_rkey != wr->wr.rc.rcwr.bind->bind_rkey) { 2147 mutex_exit(&mr->mr_lock); 2148 mutex_exit(&mw->mr_lock); 2149 return (IBT_MR_RKEY_INVALID); 2150 } 2151 2152 /* Check for a valid Memory Region LKey (i.e. a matching LKey) */ 2153 if (mr->mr_lkey != wr->wr.rc.rcwr.bind->bind_lkey) { 2154 mutex_exit(&mr->mr_lock); 2155 mutex_exit(&mw->mr_lock); 2156 return (IBT_MR_LKEY_INVALID); 2157 } 2158 2159 /* 2160 * Now check for valid "vaddr" and "len". Note: We don't check the 2161 * "vaddr" range when "len == 0" (i.e. on unbind operations) 2162 */ 2163 len = wr->wr.rc.rcwr.bind->bind_len; 2164 if (len != 0) { 2165 vaddr = wr->wr.rc.rcwr.bind->bind_va; 2166 reg_start_addr = mr->mr_bindinfo.bi_addr; 2167 reg_end_addr = mr->mr_bindinfo.bi_addr + 2168 (mr->mr_bindinfo.bi_len - 1); 2169 if ((vaddr < reg_start_addr) || (vaddr > reg_end_addr)) { 2170 mutex_exit(&mr->mr_lock); 2171 mutex_exit(&mw->mr_lock); 2172 return (IBT_MR_VA_INVALID); 2173 } 2174 vaddr = (vaddr + len) - 1; 2175 if (vaddr > reg_end_addr) { 2176 mutex_exit(&mr->mr_lock); 2177 mutex_exit(&mw->mr_lock); 2178 return (IBT_MR_LEN_INVALID); 2179 } 2180 } 2181 2182 /* 2183 * Validate the bind access flags. Remote Write and Atomic access for 2184 * the Memory Window require that Local Write access be set in the 2185 * corresponding Memory Region. 2186 */ 2187 bind_flags = wr->wr.rc.rcwr.bind->bind_flags; 2188 if (((bind_flags & IBT_WR_BIND_WRITE) || 2189 (bind_flags & IBT_WR_BIND_ATOMIC)) && 2190 !(mr->mr_accflag & IBT_MR_LOCAL_WRITE)) { 2191 mutex_exit(&mr->mr_lock); 2192 mutex_exit(&mw->mr_lock); 2193 return (IBT_MR_ACCESS_REQ_INVALID); 2194 } 2195 2196 /* Calculate the new RKey for the Memory Window */ 2197 mpt = mw->mr_mptrsrcp; 2198 new_rkey = hermon_mr_keycalc(mpt->hr_indx); 2199 new_rkey = hermon_mr_key_swap(new_rkey); 2200 2201 wr->wr.rc.rcwr.bind->bind_rkey_out = new_rkey; 2202 mw->mr_rkey = new_rkey; 2203 2204 mutex_exit(&mr->mr_lock); 2205 mutex_exit(&mw->mr_lock); 2206 return (DDI_SUCCESS); 2207 } 2208 2209 2210 /* 2211 * hermon_wrid_from_reset_handling() 2212 * Context: Can be called from interrupt or base context. 2213 */ 2214 /* ARGSUSED */ 2215 int 2216 hermon_wrid_from_reset_handling(hermon_state_t *state, hermon_qphdl_t qp) 2217 { 2218 hermon_workq_hdr_t *swq, *rwq; 2219 2220 if (qp->qp_alloc_flags & IBT_QP_USER_MAP) 2221 return (DDI_SUCCESS); 2222 2223 #ifdef __lock_lint 2224 mutex_enter(&qp->qp_rq_cqhdl->cq_lock); 2225 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2226 #else 2227 /* grab the cq lock(s) to modify the wqavl tree */ 2228 if (qp->qp_rq_cqhdl) 2229 mutex_enter(&qp->qp_rq_cqhdl->cq_lock); 2230 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl && 2231 qp->qp_sq_cqhdl != NULL) 2232 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2233 #endif 2234 2235 /* Chain the newly allocated work queue header to the CQ's list */ 2236 if (qp->qp_sq_cqhdl) 2237 hermon_cq_workq_add(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl); 2238 2239 swq = qp->qp_sq_wqhdr; 2240 swq->wq_head = 0; 2241 swq->wq_tail = 0; 2242 swq->wq_full = 0; 2243 2244 /* 2245 * Now we repeat all the above operations for the receive work queue, 2246 * or shared receive work queue. 2247 * 2248 * Note: We still use the 'qp_rq_cqhdl' even in the SRQ case. 2249 */ 2250 2251 #ifdef __lock_lint 2252 mutex_enter(&qp->qp_srqhdl->srq_lock); 2253 #else 2254 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) { 2255 mutex_enter(&qp->qp_srqhdl->srq_lock); 2256 } else { 2257 rwq = qp->qp_rq_wqhdr; 2258 rwq->wq_head = 0; 2259 rwq->wq_tail = 0; 2260 rwq->wq_full = 0; 2261 qp->qp_rq_wqecntr = 0; 2262 } 2263 #endif 2264 hermon_cq_workq_add(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl); 2265 2266 #ifdef __lock_lint 2267 mutex_exit(&qp->qp_srqhdl->srq_lock); 2268 #else 2269 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) { 2270 mutex_exit(&qp->qp_srqhdl->srq_lock); 2271 } 2272 #endif 2273 2274 #ifdef __lock_lint 2275 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2276 mutex_exit(&qp->qp_rq_cqhdl->cq_lock); 2277 #else 2278 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl && 2279 qp->qp_sq_cqhdl != NULL) 2280 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2281 if (qp->qp_rq_cqhdl) 2282 mutex_exit(&qp->qp_rq_cqhdl->cq_lock); 2283 #endif 2284 return (DDI_SUCCESS); 2285 } 2286 2287 2288 /* 2289 * hermon_wrid_to_reset_handling() 2290 * Context: Can be called from interrupt or base context. 2291 */ 2292 int 2293 hermon_wrid_to_reset_handling(hermon_state_t *state, hermon_qphdl_t qp) 2294 { 2295 if (qp->qp_alloc_flags & IBT_QP_USER_MAP) 2296 return (DDI_SUCCESS); 2297 2298 /* 2299 * If there are unpolled entries in these CQs, they are 2300 * polled/flushed. 2301 * Grab the CQ lock(s) before manipulating the lists. 2302 */ 2303 #ifdef __lock_lint 2304 mutex_enter(&qp->qp_rq_cqhdl->cq_lock); 2305 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2306 #else 2307 /* grab the cq lock(s) to modify the wqavl tree */ 2308 if (qp->qp_rq_cqhdl) 2309 mutex_enter(&qp->qp_rq_cqhdl->cq_lock); 2310 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl && 2311 qp->qp_sq_cqhdl != NULL) 2312 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2313 #endif 2314 2315 #ifdef __lock_lint 2316 mutex_enter(&qp->qp_srqhdl->srq_lock); 2317 #else 2318 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) { 2319 mutex_enter(&qp->qp_srqhdl->srq_lock); 2320 } 2321 #endif 2322 /* 2323 * Flush the entries on the CQ for this QP's QPN. 2324 */ 2325 hermon_cq_entries_flush(state, qp); 2326 2327 #ifdef __lock_lint 2328 mutex_exit(&qp->qp_srqhdl->srq_lock); 2329 #else 2330 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) { 2331 mutex_exit(&qp->qp_srqhdl->srq_lock); 2332 } 2333 #endif 2334 2335 hermon_cq_workq_remove(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl); 2336 if (qp->qp_sq_cqhdl != NULL) 2337 hermon_cq_workq_remove(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl); 2338 2339 #ifdef __lock_lint 2340 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2341 mutex_exit(&qp->qp_rq_cqhdl->cq_lock); 2342 #else 2343 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl && 2344 qp->qp_sq_cqhdl != NULL) 2345 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2346 if (qp->qp_rq_cqhdl) 2347 mutex_exit(&qp->qp_rq_cqhdl->cq_lock); 2348 #endif 2349 2350 return (IBT_SUCCESS); 2351 } 2352 2353 2354 /* 2355 * hermon_wrid_get_entry() 2356 * Context: Can be called from interrupt or base context. 2357 */ 2358 uint64_t 2359 hermon_wrid_get_entry(hermon_cqhdl_t cq, hermon_hw_cqe_t *cqe) 2360 { 2361 hermon_workq_avl_t *wqa; 2362 hermon_workq_hdr_t *wq; 2363 uint64_t wrid; 2364 uint_t send_or_recv, qpnum; 2365 uint32_t indx; 2366 2367 /* 2368 * Determine whether this CQE is a send or receive completion. 2369 */ 2370 send_or_recv = HERMON_CQE_SENDRECV_GET(cq, cqe); 2371 2372 /* Find the work queue for this QP number (send or receive side) */ 2373 qpnum = HERMON_CQE_QPNUM_GET(cq, cqe); 2374 wqa = hermon_wrid_wqavl_find(cq, qpnum, send_or_recv); 2375 wq = wqa->wqa_wq; 2376 2377 /* 2378 * Regardless of whether the completion is the result of a "success" 2379 * or a "failure", we lock the list of "containers" and attempt to 2380 * search for the the first matching completion (i.e. the first WR 2381 * with a matching WQE addr and size). Once we find it, we pull out 2382 * the "wrid" field and return it (see below). XXX Note: One possible 2383 * future enhancement would be to enable this routine to skip over 2384 * any "unsignaled" completions to go directly to the next "signaled" 2385 * entry on success. 2386 */ 2387 indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) & wq->wq_mask; 2388 wrid = wq->wq_wrid[indx]; 2389 if (wqa->wqa_srq_en) { 2390 struct hermon_sw_srq_s *srq; 2391 uint64_t *desc; 2392 2393 /* put wqe back on the srq free list */ 2394 srq = wqa->wqa_srq; 2395 mutex_enter(&srq->srq_lock); 2396 desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail); 2397 ((uint16_t *)desc)[1] = htons(indx); 2398 wq->wq_tail = indx; 2399 mutex_exit(&srq->srq_lock); 2400 } else { 2401 wq->wq_head = (indx + 1) & wq->wq_mask; 2402 wq->wq_full = 0; 2403 } 2404 2405 return (wrid); 2406 } 2407 2408 2409 int 2410 hermon_wrid_workq_compare(const void *p1, const void *p2) 2411 { 2412 hermon_workq_compare_t *cmpp; 2413 hermon_workq_avl_t *curr; 2414 2415 cmpp = (hermon_workq_compare_t *)p1; 2416 curr = (hermon_workq_avl_t *)p2; 2417 2418 if (cmpp->cmp_qpn < curr->wqa_qpn) 2419 return (-1); 2420 else if (cmpp->cmp_qpn > curr->wqa_qpn) 2421 return (+1); 2422 else if (cmpp->cmp_type < curr->wqa_type) 2423 return (-1); 2424 else if (cmpp->cmp_type > curr->wqa_type) 2425 return (+1); 2426 else 2427 return (0); 2428 } 2429 2430 2431 /* 2432 * hermon_wrid_workq_find() 2433 * Context: Can be called from interrupt or base context. 2434 */ 2435 static hermon_workq_avl_t * 2436 hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn, uint_t wq_type) 2437 { 2438 hermon_workq_avl_t *curr; 2439 hermon_workq_compare_t cmp; 2440 2441 /* 2442 * Walk the CQ's work queue list, trying to find a send or recv queue 2443 * with the same QP number. We do this even if we are going to later 2444 * create a new entry because it helps us easily find the end of the 2445 * list. 2446 */ 2447 cmp.cmp_qpn = qpn; 2448 cmp.cmp_type = wq_type; 2449 #ifdef __lock_lint 2450 hermon_wrid_workq_compare(NULL, NULL); 2451 #endif 2452 curr = avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, NULL); 2453 2454 return (curr); 2455 } 2456 2457 2458 /* 2459 * hermon_wrid_wqhdr_create() 2460 * Context: Can be called from base context. 2461 */ 2462 /* ARGSUSED */ 2463 hermon_workq_hdr_t * 2464 hermon_wrid_wqhdr_create(int bufsz) 2465 { 2466 hermon_workq_hdr_t *wqhdr; 2467 2468 /* 2469 * Allocate space for the wqhdr, and an array to record all the wrids. 2470 */ 2471 wqhdr = (hermon_workq_hdr_t *)kmem_zalloc(sizeof (*wqhdr), KM_NOSLEEP); 2472 if (wqhdr == NULL) { 2473 return (NULL); 2474 } 2475 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wqhdr)) 2476 wqhdr->wq_wrid = kmem_zalloc(bufsz * sizeof (uint64_t), KM_NOSLEEP); 2477 if (wqhdr->wq_wrid == NULL) { 2478 kmem_free(wqhdr, sizeof (*wqhdr)); 2479 return (NULL); 2480 } 2481 wqhdr->wq_size = bufsz; 2482 wqhdr->wq_mask = bufsz - 1; 2483 2484 return (wqhdr); 2485 } 2486 2487 void 2488 hermon_wrid_wqhdr_destroy(hermon_workq_hdr_t *wqhdr) 2489 { 2490 kmem_free(wqhdr->wq_wrid, wqhdr->wq_size * sizeof (uint64_t)); 2491 kmem_free(wqhdr, sizeof (*wqhdr)); 2492 } 2493 2494 2495 /* 2496 * hermon_cq_workq_add() 2497 * Context: Can be called from interrupt or base context. 2498 */ 2499 static void 2500 hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl) 2501 { 2502 hermon_workq_compare_t cmp; 2503 avl_index_t where; 2504 2505 cmp.cmp_qpn = wqavl->wqa_qpn; 2506 cmp.cmp_type = wqavl->wqa_type; 2507 #ifdef __lock_lint 2508 hermon_wrid_workq_compare(NULL, NULL); 2509 #endif 2510 (void) avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, &where); 2511 avl_insert(&cq->cq_wrid_wqhdr_avl_tree, wqavl, where); 2512 } 2513 2514 2515 /* 2516 * hermon_cq_workq_remove() 2517 * Context: Can be called from interrupt or base context. 2518 */ 2519 static void 2520 hermon_cq_workq_remove(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl) 2521 { 2522 #ifdef __lock_lint 2523 hermon_wrid_workq_compare(NULL, NULL); 2524 #endif 2525 avl_remove(&cq->cq_wrid_wqhdr_avl_tree, wqavl); 2526 } 2527