1 /* 2 * Copyright (c) 2012 Mellanox Technologies, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <config.h> 34 35 #include <stdlib.h> 36 #include <pthread.h> 37 #include <string.h> 38 #include <errno.h> 39 #include <stdio.h> 40 41 #include "mlx5.h" 42 #include "doorbell.h" 43 #include "wqe.h" 44 45 #define MLX5_ATOMIC_SIZE 8 46 47 static const uint32_t mlx5_ib_opcode[] = { 48 [IBV_WR_SEND] = MLX5_OPCODE_SEND, 49 [IBV_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL, 50 [IBV_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM, 51 [IBV_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE, 52 [IBV_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM, 53 [IBV_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ, 54 [IBV_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS, 55 [IBV_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA, 56 [IBV_WR_BIND_MW] = MLX5_OPCODE_UMR, 57 [IBV_WR_LOCAL_INV] = MLX5_OPCODE_UMR, 58 [IBV_WR_TSO] = MLX5_OPCODE_TSO, 59 }; 60 61 static void *get_recv_wqe(struct mlx5_qp *qp, int n) 62 { 63 return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift); 64 } 65 66 static void *get_wq_recv_wqe(struct mlx5_rwq *rwq, int n) 67 { 68 return rwq->pbuff + (n << rwq->rq.wqe_shift); 69 } 70 71 static int copy_to_scat(struct mlx5_wqe_data_seg *scat, void *buf, int *size, 72 int max) 73 { 74 int copy; 75 int i; 76 77 if (unlikely(!(*size))) 78 return IBV_WC_SUCCESS; 79 80 for (i = 0; i < max; ++i) { 81 copy = min_t(long, *size, be32toh(scat->byte_count)); 82 memcpy((void *)(unsigned long)be64toh(scat->addr), buf, copy); 83 *size -= copy; 84 if (*size == 0) 85 return IBV_WC_SUCCESS; 86 87 buf += copy; 88 ++scat; 89 } 90 return IBV_WC_LOC_LEN_ERR; 91 } 92 93 int mlx5_copy_to_recv_wqe(struct mlx5_qp *qp, int idx, void *buf, int size) 94 { 95 struct mlx5_wqe_data_seg *scat; 96 int max = 1 << (qp->rq.wqe_shift - 4); 97 98 scat = get_recv_wqe(qp, idx); 99 if (unlikely(qp->wq_sig)) 100 ++scat; 101 102 return copy_to_scat(scat, buf, &size, max); 103 } 104 105 int mlx5_copy_to_send_wqe(struct mlx5_qp *qp, int idx, void *buf, int size) 106 { 107 struct mlx5_wqe_ctrl_seg *ctrl; 108 struct mlx5_wqe_data_seg *scat; 109 void *p; 110 int max; 111 112 idx &= (qp->sq.wqe_cnt - 1); 113 ctrl = mlx5_get_send_wqe(qp, idx); 114 if (qp->ibv_qp->qp_type != IBV_QPT_RC) { 115 fprintf(stderr, "scatter to CQE is supported only for RC QPs\n"); 116 return IBV_WC_GENERAL_ERR; 117 } 118 p = ctrl + 1; 119 120 switch (be32toh(ctrl->opmod_idx_opcode) & 0xff) { 121 case MLX5_OPCODE_RDMA_READ: 122 p = p + sizeof(struct mlx5_wqe_raddr_seg); 123 break; 124 125 case MLX5_OPCODE_ATOMIC_CS: 126 case MLX5_OPCODE_ATOMIC_FA: 127 p = p + sizeof(struct mlx5_wqe_raddr_seg) + 128 sizeof(struct mlx5_wqe_atomic_seg); 129 break; 130 131 default: 132 fprintf(stderr, "scatter to CQE for opcode %d\n", 133 be32toh(ctrl->opmod_idx_opcode) & 0xff); 134 return IBV_WC_REM_INV_REQ_ERR; 135 } 136 137 scat = p; 138 max = (be32toh(ctrl->qpn_ds) & 0x3F) - (((void *)scat - (void *)ctrl) >> 4); 139 if (unlikely((void *)(scat + max) > qp->sq.qend)) { 140 int tmp = ((void *)qp->sq.qend - (void *)scat) >> 4; 141 int orig_size = size; 142 143 if (copy_to_scat(scat, buf, &size, tmp) == IBV_WC_SUCCESS) 144 return IBV_WC_SUCCESS; 145 max = max - tmp; 146 buf += orig_size - size; 147 scat = mlx5_get_send_wqe(qp, 0); 148 } 149 150 return copy_to_scat(scat, buf, &size, max); 151 } 152 153 void *mlx5_get_send_wqe(struct mlx5_qp *qp, int n) 154 { 155 return qp->sq_start + (n << MLX5_SEND_WQE_SHIFT); 156 } 157 158 void mlx5_init_rwq_indices(struct mlx5_rwq *rwq) 159 { 160 rwq->rq.head = 0; 161 rwq->rq.tail = 0; 162 } 163 164 void mlx5_init_qp_indices(struct mlx5_qp *qp) 165 { 166 qp->sq.head = 0; 167 qp->sq.tail = 0; 168 qp->rq.head = 0; 169 qp->rq.tail = 0; 170 qp->sq.cur_post = 0; 171 } 172 173 static int mlx5_wq_overflow(struct mlx5_wq *wq, int nreq, struct mlx5_cq *cq) 174 { 175 unsigned cur; 176 177 cur = wq->head - wq->tail; 178 if (cur + nreq < wq->max_post) 179 return 0; 180 181 mlx5_spin_lock(&cq->lock); 182 cur = wq->head - wq->tail; 183 mlx5_spin_unlock(&cq->lock); 184 185 return cur + nreq >= wq->max_post; 186 } 187 188 static inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, 189 uint64_t remote_addr, uint32_t rkey) 190 { 191 rseg->raddr = htobe64(remote_addr); 192 rseg->rkey = htobe32(rkey); 193 rseg->reserved = 0; 194 } 195 196 static void set_atomic_seg(struct mlx5_wqe_atomic_seg *aseg, 197 enum ibv_wr_opcode opcode, 198 uint64_t swap, 199 uint64_t compare_add) 200 { 201 if (opcode == IBV_WR_ATOMIC_CMP_AND_SWP) { 202 aseg->swap_add = htobe64(swap); 203 aseg->compare = htobe64(compare_add); 204 } else { 205 aseg->swap_add = htobe64(compare_add); 206 } 207 } 208 209 static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, 210 struct ibv_send_wr *wr) 211 { 212 memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof dseg->av); 213 dseg->av.dqp_dct = htobe32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV); 214 dseg->av.key.qkey.qkey = htobe32(wr->wr.ud.remote_qkey); 215 } 216 217 static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ibv_sge *sg, 218 int offset) 219 { 220 dseg->byte_count = htobe32(sg->length - offset); 221 dseg->lkey = htobe32(sg->lkey); 222 dseg->addr = htobe64(sg->addr + offset); 223 } 224 225 static void set_data_ptr_seg_atomic(struct mlx5_wqe_data_seg *dseg, 226 struct ibv_sge *sg) 227 { 228 dseg->byte_count = htobe32(MLX5_ATOMIC_SIZE); 229 dseg->lkey = htobe32(sg->lkey); 230 dseg->addr = htobe64(sg->addr); 231 } 232 233 /* 234 * Avoid using memcpy() to copy to BlueFlame page, since memcpy() 235 * implementations may use move-string-buffer assembler instructions, 236 * which do not guarantee order of copying. 237 */ 238 static void mlx5_bf_copy(unsigned long long *dst, unsigned long long *src, 239 unsigned bytecnt, struct mlx5_qp *qp) 240 { 241 while (bytecnt > 0) { 242 *dst++ = *src++; 243 *dst++ = *src++; 244 *dst++ = *src++; 245 *dst++ = *src++; 246 *dst++ = *src++; 247 *dst++ = *src++; 248 *dst++ = *src++; 249 *dst++ = *src++; 250 bytecnt -= 8 * sizeof(unsigned long long); 251 if (unlikely(src == qp->sq.qend)) 252 src = qp->sq_start; 253 } 254 } 255 256 static uint32_t send_ieth(struct ibv_send_wr *wr) 257 { 258 switch (wr->opcode) { 259 case IBV_WR_SEND_WITH_IMM: 260 case IBV_WR_RDMA_WRITE_WITH_IMM: 261 return wr->imm_data; 262 case IBV_WR_SEND_WITH_INV: 263 return htobe32(wr->imm_data); 264 default: 265 return 0; 266 } 267 } 268 269 static int set_data_inl_seg(struct mlx5_qp *qp, struct ibv_send_wr *wr, 270 void *wqe, int *sz, 271 struct mlx5_sg_copy_ptr *sg_copy_ptr) 272 { 273 struct mlx5_wqe_inline_seg *seg; 274 void *addr; 275 int len; 276 int i; 277 int inl = 0; 278 void *qend = qp->sq.qend; 279 int copy; 280 int offset = sg_copy_ptr->offset; 281 282 seg = wqe; 283 wqe += sizeof *seg; 284 for (i = sg_copy_ptr->index; i < wr->num_sge; ++i) { 285 addr = (void *) (unsigned long)(wr->sg_list[i].addr + offset); 286 len = wr->sg_list[i].length - offset; 287 inl += len; 288 offset = 0; 289 290 if (unlikely(inl > qp->max_inline_data)) 291 return ENOMEM; 292 293 if (unlikely(wqe + len > qend)) { 294 copy = qend - wqe; 295 memcpy(wqe, addr, copy); 296 addr += copy; 297 len -= copy; 298 wqe = mlx5_get_send_wqe(qp, 0); 299 } 300 memcpy(wqe, addr, len); 301 wqe += len; 302 } 303 304 if (likely(inl)) { 305 seg->byte_count = htobe32(inl | MLX5_INLINE_SEG); 306 *sz = align(inl + sizeof seg->byte_count, 16) / 16; 307 } else 308 *sz = 0; 309 310 return 0; 311 } 312 313 static uint8_t wq_sig(struct mlx5_wqe_ctrl_seg *ctrl) 314 { 315 return calc_sig(ctrl, be32toh(ctrl->qpn_ds)); 316 } 317 318 #ifdef MLX5_DEBUG 319 static void dump_wqe(FILE *fp, int idx, int size_16, struct mlx5_qp *qp) 320 { 321 uint32_t *p = NULL; 322 int i, j; 323 int tidx = idx; 324 325 fprintf(fp, "dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx)); 326 for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) { 327 if ((i & 0xf) == 0) { 328 void *buf = mlx5_get_send_wqe(qp, tidx); 329 tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1); 330 p = buf; 331 j = 0; 332 } 333 fprintf(fp, "%08x %08x %08x %08x\n", be32toh(p[j]), be32toh(p[j + 1]), 334 be32toh(p[j + 2]), be32toh(p[j + 3])); 335 } 336 } 337 #endif /* MLX5_DEBUG */ 338 339 340 void *mlx5_get_atomic_laddr(struct mlx5_qp *qp, uint16_t idx, int *byte_count) 341 { 342 struct mlx5_wqe_data_seg *dpseg; 343 void *addr; 344 345 dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) + 346 sizeof(struct mlx5_wqe_raddr_seg) + 347 sizeof(struct mlx5_wqe_atomic_seg); 348 addr = (void *)(unsigned long)be64toh(dpseg->addr); 349 350 /* 351 * Currently byte count is always 8 bytes. Fix this when 352 * we support variable size of atomics 353 */ 354 *byte_count = 8; 355 return addr; 356 } 357 358 static inline int copy_eth_inline_headers(struct ibv_qp *ibqp, 359 struct ibv_send_wr *wr, 360 struct mlx5_wqe_eth_seg *eseg, 361 struct mlx5_sg_copy_ptr *sg_copy_ptr) 362 { 363 uint32_t inl_hdr_size = MLX5_ETH_L2_INLINE_HEADER_SIZE; 364 int inl_hdr_copy_size = 0; 365 int j = 0; 366 FILE *fp = to_mctx(ibqp->context)->dbg_fp; 367 368 if (unlikely(wr->num_sge < 1)) { 369 mlx5_dbg(fp, MLX5_DBG_QP_SEND, "illegal num_sge: %d, minimum is 1\n", 370 wr->num_sge); 371 return EINVAL; 372 } 373 374 if (likely(wr->sg_list[0].length >= MLX5_ETH_L2_INLINE_HEADER_SIZE)) { 375 inl_hdr_copy_size = MLX5_ETH_L2_INLINE_HEADER_SIZE; 376 memcpy(eseg->inline_hdr_start, 377 (void *)(uintptr_t)wr->sg_list[0].addr, 378 inl_hdr_copy_size); 379 } else { 380 for (j = 0; j < wr->num_sge && inl_hdr_size > 0; ++j) { 381 inl_hdr_copy_size = min(wr->sg_list[j].length, 382 inl_hdr_size); 383 memcpy(eseg->inline_hdr_start + 384 (MLX5_ETH_L2_INLINE_HEADER_SIZE - inl_hdr_size), 385 (void *)(uintptr_t)wr->sg_list[j].addr, 386 inl_hdr_copy_size); 387 inl_hdr_size -= inl_hdr_copy_size; 388 } 389 if (unlikely(inl_hdr_size)) { 390 mlx5_dbg(fp, MLX5_DBG_QP_SEND, "Ethernet headers < 16 bytes\n"); 391 return EINVAL; 392 } 393 --j; 394 } 395 396 397 eseg->inline_hdr_sz = htobe16(MLX5_ETH_L2_INLINE_HEADER_SIZE); 398 399 /* If we copied all the sge into the inline-headers, then we need to 400 * start copying from the next sge into the data-segment. 401 */ 402 if (unlikely(wr->sg_list[j].length == inl_hdr_copy_size)) { 403 ++j; 404 inl_hdr_copy_size = 0; 405 } 406 407 sg_copy_ptr->index = j; 408 sg_copy_ptr->offset = inl_hdr_copy_size; 409 410 return 0; 411 } 412 413 #undef ALIGN 414 #define ALIGN(x, log_a) ((((x) + (1 << (log_a)) - 1)) & ~((1 << (log_a)) - 1)) 415 416 static inline uint16_t get_klm_octo(int nentries) 417 { 418 return htobe16(ALIGN(nentries, 3) / 2); 419 } 420 421 static void set_umr_data_seg(struct mlx5_qp *qp, enum ibv_mw_type type, 422 int32_t rkey, struct ibv_mw_bind_info *bind_info, 423 uint32_t qpn, void **seg, int *size) 424 { 425 union { 426 struct mlx5_wqe_umr_klm_seg klm; 427 uint8_t reserved[64]; 428 } *data = *seg; 429 430 data->klm.byte_count = htobe32(bind_info->length); 431 data->klm.mkey = htobe32(bind_info->mr->lkey); 432 data->klm.address = htobe64(bind_info->addr); 433 434 memset(&data->klm + 1, 0, sizeof(data->reserved) - 435 sizeof(data->klm)); 436 437 *seg += sizeof(*data); 438 *size += (sizeof(*data) / 16); 439 } 440 441 static void set_umr_mkey_seg(struct mlx5_qp *qp, enum ibv_mw_type type, 442 int32_t rkey, struct ibv_mw_bind_info *bind_info, 443 uint32_t qpn, void **seg, int *size) 444 { 445 struct mlx5_wqe_mkey_context_seg *mkey = *seg; 446 447 mkey->qpn_mkey = htobe32((rkey & 0xFF) | 448 ((type == IBV_MW_TYPE_1 || !bind_info->length) ? 449 0xFFFFFF00 : qpn << 8)); 450 if (bind_info->length) { 451 /* Local read is set in kernel */ 452 mkey->access_flags = 0; 453 mkey->free = 0; 454 if (bind_info->mw_access_flags & IBV_ACCESS_LOCAL_WRITE) 455 mkey->access_flags |= 456 MLX5_WQE_MKEY_CONTEXT_ACCESS_FLAGS_LOCAL_WRITE; 457 if (bind_info->mw_access_flags & IBV_ACCESS_REMOTE_WRITE) 458 mkey->access_flags |= 459 MLX5_WQE_MKEY_CONTEXT_ACCESS_FLAGS_REMOTE_WRITE; 460 if (bind_info->mw_access_flags & IBV_ACCESS_REMOTE_READ) 461 mkey->access_flags |= 462 MLX5_WQE_MKEY_CONTEXT_ACCESS_FLAGS_REMOTE_READ; 463 if (bind_info->mw_access_flags & IBV_ACCESS_REMOTE_ATOMIC) 464 mkey->access_flags |= 465 MLX5_WQE_MKEY_CONTEXT_ACCESS_FLAGS_ATOMIC; 466 if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) 467 mkey->start_addr = 0; 468 else 469 mkey->start_addr = htobe64(bind_info->addr); 470 mkey->len = htobe64(bind_info->length); 471 } else { 472 mkey->free = MLX5_WQE_MKEY_CONTEXT_FREE; 473 } 474 475 *seg += sizeof(struct mlx5_wqe_mkey_context_seg); 476 *size += (sizeof(struct mlx5_wqe_mkey_context_seg) / 16); 477 } 478 479 static inline void set_umr_control_seg(struct mlx5_qp *qp, enum ibv_mw_type type, 480 int32_t rkey, struct ibv_mw_bind_info *bind_info, 481 uint32_t qpn, void **seg, int *size) 482 { 483 struct mlx5_wqe_umr_ctrl_seg *ctrl = *seg; 484 485 ctrl->flags = MLX5_WQE_UMR_CTRL_FLAG_TRNSLATION_OFFSET | 486 MLX5_WQE_UMR_CTRL_FLAG_INLINE; 487 ctrl->mkey_mask = htobe64(MLX5_WQE_UMR_CTRL_MKEY_MASK_FREE | 488 MLX5_WQE_UMR_CTRL_MKEY_MASK_MKEY); 489 ctrl->translation_offset = 0; 490 memset(ctrl->rsvd0, 0, sizeof(ctrl->rsvd0)); 491 memset(ctrl->rsvd1, 0, sizeof(ctrl->rsvd1)); 492 493 if (type == IBV_MW_TYPE_2) 494 ctrl->mkey_mask |= htobe64(MLX5_WQE_UMR_CTRL_MKEY_MASK_QPN); 495 496 if (bind_info->length) { 497 ctrl->klm_octowords = get_klm_octo(1); 498 if (type == IBV_MW_TYPE_2) 499 ctrl->flags |= MLX5_WQE_UMR_CTRL_FLAG_CHECK_FREE; 500 ctrl->mkey_mask |= htobe64(MLX5_WQE_UMR_CTRL_MKEY_MASK_LEN | 501 MLX5_WQE_UMR_CTRL_MKEY_MASK_START_ADDR | 502 MLX5_WQE_UMR_CTRL_MKEY_MASK_ACCESS_LOCAL_WRITE | 503 MLX5_WQE_UMR_CTRL_MKEY_MASK_ACCESS_REMOTE_READ | 504 MLX5_WQE_UMR_CTRL_MKEY_MASK_ACCESS_REMOTE_WRITE | 505 MLX5_WQE_UMR_CTRL_MKEY_MASK_ACCESS_ATOMIC); 506 } else { 507 ctrl->klm_octowords = get_klm_octo(0); 508 if (type == IBV_MW_TYPE_2) 509 ctrl->flags |= MLX5_WQE_UMR_CTRL_FLAG_CHECK_QPN; 510 } 511 512 *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); 513 *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; 514 } 515 516 static inline int set_bind_wr(struct mlx5_qp *qp, enum ibv_mw_type type, 517 int32_t rkey, struct ibv_mw_bind_info *bind_info, 518 uint32_t qpn, void **seg, int *size) 519 { 520 void *qend = qp->sq.qend; 521 522 #ifdef MW_DEBUG 523 if (bind_info->mw_access_flags & 524 ~(IBV_ACCESS_REMOTE_ATOMIC | IBV_ACCESS_REMOTE_READ | 525 IBV_ACCESS_REMOTE_WRITE)) 526 return EINVAL; 527 528 if (bind_info->mr && 529 (bind_info->mr->addr > (void *)bind_info->addr || 530 bind_info->mr->addr + bind_info->mr->length < 531 (void *)bind_info->addr + bind_info->length || 532 !(to_mmr(bind_info->mr)->alloc_flags & IBV_ACCESS_MW_BIND) || 533 (bind_info->mw_access_flags & 534 (IBV_ACCESS_REMOTE_ATOMIC | IBV_ACCESS_REMOTE_WRITE) && 535 !(to_mmr(bind_info->mr)->alloc_flags & IBV_ACCESS_LOCAL_WRITE)))) 536 return EINVAL; 537 538 #endif 539 540 /* check that len > 2GB because KLM support only 2GB */ 541 if (bind_info->length > 1UL << 31) 542 return EOPNOTSUPP; 543 544 set_umr_control_seg(qp, type, rkey, bind_info, qpn, seg, size); 545 if (unlikely((*seg == qend))) 546 *seg = mlx5_get_send_wqe(qp, 0); 547 548 set_umr_mkey_seg(qp, type, rkey, bind_info, qpn, seg, size); 549 if (!bind_info->length) 550 return 0; 551 552 if (unlikely((seg == qend))) 553 *seg = mlx5_get_send_wqe(qp, 0); 554 555 set_umr_data_seg(qp, type, rkey, bind_info, qpn, seg, size); 556 return 0; 557 } 558 559 /* Copy tso header to eth segment with considering padding and WQE 560 * wrap around in WQ buffer. 561 */ 562 static inline int set_tso_eth_seg(void **seg, struct ibv_send_wr *wr, 563 void *qend, struct mlx5_qp *qp, int *size) 564 { 565 struct mlx5_wqe_eth_seg *eseg = *seg; 566 int size_of_inl_hdr_start = sizeof(eseg->inline_hdr_start); 567 uint64_t left, left_len, copy_sz; 568 void *pdata = wr->tso.hdr; 569 FILE *fp = to_mctx(qp->ibv_qp->context)->dbg_fp; 570 571 if (unlikely(wr->tso.hdr_sz < MLX5_ETH_L2_MIN_HEADER_SIZE || 572 wr->tso.hdr_sz > qp->max_tso_header)) { 573 mlx5_dbg(fp, MLX5_DBG_QP_SEND, 574 "TSO header size should be at least %d and at most %d\n", 575 MLX5_ETH_L2_MIN_HEADER_SIZE, 576 qp->max_tso_header); 577 return EINVAL; 578 } 579 580 left = wr->tso.hdr_sz; 581 eseg->mss = htobe16(wr->tso.mss); 582 eseg->inline_hdr_sz = htobe16(wr->tso.hdr_sz); 583 584 /* Check if there is space till the end of queue, if yes, 585 * copy all in one shot, otherwise copy till the end of queue, 586 * rollback and then copy the left 587 */ 588 left_len = qend - (void *)eseg->inline_hdr_start; 589 copy_sz = min(left_len, left); 590 591 memcpy(eseg->inline_hdr_start, pdata, copy_sz); 592 593 /* The -1 is because there are already 16 bytes included in 594 * eseg->inline_hdr[16] 595 */ 596 *seg += align(copy_sz - size_of_inl_hdr_start, 16) - 16; 597 *size += align(copy_sz - size_of_inl_hdr_start, 16) / 16 - 1; 598 599 /* The last wqe in the queue */ 600 if (unlikely(copy_sz < left)) { 601 *seg = mlx5_get_send_wqe(qp, 0); 602 left -= copy_sz; 603 pdata += copy_sz; 604 memcpy(*seg, pdata, left); 605 *seg += align(left, 16); 606 *size += align(left, 16) / 16; 607 } 608 609 return 0; 610 } 611 612 static inline int _mlx5_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, 613 struct ibv_send_wr **bad_wr) 614 { 615 struct mlx5_context *ctx; 616 struct mlx5_qp *qp = to_mqp(ibqp); 617 void *seg; 618 struct mlx5_wqe_eth_seg *eseg; 619 struct mlx5_wqe_ctrl_seg *ctrl = NULL; 620 struct mlx5_wqe_data_seg *dpseg; 621 struct mlx5_sg_copy_ptr sg_copy_ptr = {.index = 0, .offset = 0}; 622 int nreq; 623 int inl = 0; 624 int err = 0; 625 int size = 0; 626 int i; 627 unsigned idx; 628 uint8_t opmod = 0; 629 struct mlx5_bf *bf = qp->bf; 630 void *qend = qp->sq.qend; 631 uint32_t mlx5_opcode; 632 struct mlx5_wqe_xrc_seg *xrc; 633 uint8_t fence; 634 uint8_t next_fence; 635 uint32_t max_tso = 0; 636 FILE *fp = to_mctx(ibqp->context)->dbg_fp; /* The compiler ignores in non-debug mode */ 637 638 mlx5_spin_lock(&qp->sq.lock); 639 640 next_fence = qp->fm_cache; 641 642 for (nreq = 0; wr; ++nreq, wr = wr->next) { 643 if (unlikely(wr->opcode < 0 || 644 wr->opcode >= sizeof mlx5_ib_opcode / sizeof mlx5_ib_opcode[0])) { 645 mlx5_dbg(fp, MLX5_DBG_QP_SEND, "bad opcode %d\n", wr->opcode); 646 err = EINVAL; 647 *bad_wr = wr; 648 goto out; 649 } 650 651 if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, 652 to_mcq(qp->ibv_qp->send_cq)))) { 653 mlx5_dbg(fp, MLX5_DBG_QP_SEND, "work queue overflow\n"); 654 err = ENOMEM; 655 *bad_wr = wr; 656 goto out; 657 } 658 659 if (unlikely(wr->num_sge > qp->sq.max_gs)) { 660 mlx5_dbg(fp, MLX5_DBG_QP_SEND, "max gs exceeded %d (max = %d)\n", 661 wr->num_sge, qp->sq.max_gs); 662 err = ENOMEM; 663 *bad_wr = wr; 664 goto out; 665 } 666 667 if (wr->send_flags & IBV_SEND_FENCE) 668 fence = MLX5_WQE_CTRL_FENCE; 669 else 670 fence = next_fence; 671 next_fence = 0; 672 idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); 673 ctrl = seg = mlx5_get_send_wqe(qp, idx); 674 *(uint32_t *)(seg + 8) = 0; 675 ctrl->imm = send_ieth(wr); 676 ctrl->fm_ce_se = qp->sq_signal_bits | fence | 677 (wr->send_flags & IBV_SEND_SIGNALED ? 678 MLX5_WQE_CTRL_CQ_UPDATE : 0) | 679 (wr->send_flags & IBV_SEND_SOLICITED ? 680 MLX5_WQE_CTRL_SOLICITED : 0); 681 682 seg += sizeof *ctrl; 683 size = sizeof *ctrl / 16; 684 685 switch (ibqp->qp_type) { 686 case IBV_QPT_XRC_SEND: 687 if (unlikely(wr->opcode != IBV_WR_BIND_MW && 688 wr->opcode != IBV_WR_LOCAL_INV)) { 689 xrc = seg; 690 xrc->xrc_srqn = htobe32(wr->qp_type.xrc.remote_srqn); 691 seg += sizeof(*xrc); 692 size += sizeof(*xrc) / 16; 693 } 694 /* fall through */ 695 case IBV_QPT_RC: 696 switch (wr->opcode) { 697 case IBV_WR_RDMA_READ: 698 case IBV_WR_RDMA_WRITE: 699 case IBV_WR_RDMA_WRITE_WITH_IMM: 700 set_raddr_seg(seg, wr->wr.rdma.remote_addr, 701 wr->wr.rdma.rkey); 702 seg += sizeof(struct mlx5_wqe_raddr_seg); 703 size += sizeof(struct mlx5_wqe_raddr_seg) / 16; 704 break; 705 706 case IBV_WR_ATOMIC_CMP_AND_SWP: 707 case IBV_WR_ATOMIC_FETCH_AND_ADD: 708 if (unlikely(!qp->atomics_enabled)) { 709 mlx5_dbg(fp, MLX5_DBG_QP_SEND, "atomic operations are not supported\n"); 710 err = ENOSYS; 711 *bad_wr = wr; 712 goto out; 713 } 714 set_raddr_seg(seg, wr->wr.atomic.remote_addr, 715 wr->wr.atomic.rkey); 716 seg += sizeof(struct mlx5_wqe_raddr_seg); 717 718 set_atomic_seg(seg, wr->opcode, 719 wr->wr.atomic.swap, 720 wr->wr.atomic.compare_add); 721 seg += sizeof(struct mlx5_wqe_atomic_seg); 722 723 size += (sizeof(struct mlx5_wqe_raddr_seg) + 724 sizeof(struct mlx5_wqe_atomic_seg)) / 16; 725 break; 726 727 case IBV_WR_BIND_MW: 728 next_fence = MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE; 729 ctrl->imm = htobe32(wr->bind_mw.mw->rkey); 730 err = set_bind_wr(qp, wr->bind_mw.mw->type, 731 wr->bind_mw.rkey, 732 &wr->bind_mw.bind_info, 733 ibqp->qp_num, &seg, &size); 734 if (err) { 735 *bad_wr = wr; 736 goto out; 737 } 738 739 qp->sq.wr_data[idx] = IBV_WC_BIND_MW; 740 break; 741 case IBV_WR_LOCAL_INV: { 742 struct ibv_mw_bind_info bind_info = {}; 743 744 next_fence = MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE; 745 ctrl->imm = htobe32(wr->imm_data); 746 err = set_bind_wr(qp, IBV_MW_TYPE_2, 0, 747 &bind_info, ibqp->qp_num, 748 &seg, &size); 749 if (err) { 750 *bad_wr = wr; 751 goto out; 752 } 753 754 qp->sq.wr_data[idx] = IBV_WC_LOCAL_INV; 755 break; 756 } 757 758 default: 759 break; 760 } 761 break; 762 763 case IBV_QPT_UC: 764 switch (wr->opcode) { 765 case IBV_WR_RDMA_WRITE: 766 case IBV_WR_RDMA_WRITE_WITH_IMM: 767 set_raddr_seg(seg, wr->wr.rdma.remote_addr, 768 wr->wr.rdma.rkey); 769 seg += sizeof(struct mlx5_wqe_raddr_seg); 770 size += sizeof(struct mlx5_wqe_raddr_seg) / 16; 771 break; 772 case IBV_WR_BIND_MW: 773 next_fence = MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE; 774 ctrl->imm = htobe32(wr->bind_mw.mw->rkey); 775 err = set_bind_wr(qp, wr->bind_mw.mw->type, 776 wr->bind_mw.rkey, 777 &wr->bind_mw.bind_info, 778 ibqp->qp_num, &seg, &size); 779 if (err) { 780 *bad_wr = wr; 781 goto out; 782 } 783 784 qp->sq.wr_data[idx] = IBV_WC_BIND_MW; 785 break; 786 case IBV_WR_LOCAL_INV: { 787 struct ibv_mw_bind_info bind_info = {}; 788 789 next_fence = MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE; 790 ctrl->imm = htobe32(wr->imm_data); 791 err = set_bind_wr(qp, IBV_MW_TYPE_2, 0, 792 &bind_info, ibqp->qp_num, 793 &seg, &size); 794 if (err) { 795 *bad_wr = wr; 796 goto out; 797 } 798 799 qp->sq.wr_data[idx] = IBV_WC_LOCAL_INV; 800 break; 801 } 802 803 default: 804 break; 805 } 806 break; 807 808 case IBV_QPT_UD: 809 set_datagram_seg(seg, wr); 810 seg += sizeof(struct mlx5_wqe_datagram_seg); 811 size += sizeof(struct mlx5_wqe_datagram_seg) / 16; 812 if (unlikely((seg == qend))) 813 seg = mlx5_get_send_wqe(qp, 0); 814 break; 815 816 case IBV_QPT_RAW_PACKET: 817 memset(seg, 0, sizeof(struct mlx5_wqe_eth_seg)); 818 eseg = seg; 819 820 if (wr->send_flags & IBV_SEND_IP_CSUM) { 821 if (!(qp->qp_cap_cache & MLX5_CSUM_SUPPORT_RAW_OVER_ETH)) { 822 err = EINVAL; 823 *bad_wr = wr; 824 goto out; 825 } 826 827 eseg->cs_flags |= MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; 828 } 829 830 if (wr->opcode == IBV_WR_TSO) { 831 max_tso = qp->max_tso; 832 err = set_tso_eth_seg(&seg, wr, qend, qp, &size); 833 if (unlikely(err)) { 834 *bad_wr = wr; 835 goto out; 836 } 837 } else { 838 err = copy_eth_inline_headers(ibqp, wr, seg, &sg_copy_ptr); 839 if (unlikely(err)) { 840 *bad_wr = wr; 841 mlx5_dbg(fp, MLX5_DBG_QP_SEND, 842 "copy_eth_inline_headers failed, err: %d\n", 843 err); 844 goto out; 845 } 846 } 847 848 seg += sizeof(struct mlx5_wqe_eth_seg); 849 size += sizeof(struct mlx5_wqe_eth_seg) / 16; 850 break; 851 852 default: 853 break; 854 } 855 856 if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) { 857 int sz = 0; 858 859 err = set_data_inl_seg(qp, wr, seg, &sz, &sg_copy_ptr); 860 if (unlikely(err)) { 861 *bad_wr = wr; 862 mlx5_dbg(fp, MLX5_DBG_QP_SEND, 863 "inline layout failed, err %d\n", err); 864 goto out; 865 } 866 inl = 1; 867 size += sz; 868 } else { 869 dpseg = seg; 870 for (i = sg_copy_ptr.index; i < wr->num_sge; ++i) { 871 if (unlikely(dpseg == qend)) { 872 seg = mlx5_get_send_wqe(qp, 0); 873 dpseg = seg; 874 } 875 if (likely(wr->sg_list[i].length)) { 876 if (unlikely(wr->opcode == 877 IBV_WR_ATOMIC_CMP_AND_SWP || 878 wr->opcode == 879 IBV_WR_ATOMIC_FETCH_AND_ADD)) 880 set_data_ptr_seg_atomic(dpseg, wr->sg_list + i); 881 else { 882 if (unlikely(wr->opcode == IBV_WR_TSO)) { 883 if (max_tso < wr->sg_list[i].length) { 884 err = EINVAL; 885 *bad_wr = wr; 886 goto out; 887 } 888 max_tso -= wr->sg_list[i].length; 889 } 890 set_data_ptr_seg(dpseg, wr->sg_list + i, 891 sg_copy_ptr.offset); 892 } 893 sg_copy_ptr.offset = 0; 894 ++dpseg; 895 size += sizeof(struct mlx5_wqe_data_seg) / 16; 896 } 897 } 898 } 899 900 mlx5_opcode = mlx5_ib_opcode[wr->opcode]; 901 ctrl->opmod_idx_opcode = htobe32(((qp->sq.cur_post & 0xffff) << 8) | 902 mlx5_opcode | 903 (opmod << 24)); 904 ctrl->qpn_ds = htobe32(size | (ibqp->qp_num << 8)); 905 906 if (unlikely(qp->wq_sig)) 907 ctrl->signature = wq_sig(ctrl); 908 909 qp->sq.wrid[idx] = wr->wr_id; 910 qp->sq.wqe_head[idx] = qp->sq.head + nreq; 911 qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); 912 913 #ifdef MLX5_DEBUG 914 if (mlx5_debug_mask & MLX5_DBG_QP_SEND) 915 dump_wqe(to_mctx(ibqp->context)->dbg_fp, idx, size, qp); 916 #endif 917 } 918 919 out: 920 if (likely(nreq)) { 921 qp->sq.head += nreq; 922 qp->fm_cache = next_fence; 923 924 /* 925 * Make sure that descriptors are written before 926 * updating doorbell record and ringing the doorbell 927 */ 928 udma_to_device_barrier(); 929 qp->db[MLX5_SND_DBR] = htobe32(qp->sq.cur_post & 0xffff); 930 931 /* Make sure that the doorbell write happens before the memcpy 932 * to WC memory below */ 933 ctx = to_mctx(ibqp->context); 934 if (bf->need_lock) 935 mmio_wc_spinlock(&bf->lock.lock); 936 else 937 mmio_wc_start(); 938 939 if (!ctx->shut_up_bf && nreq == 1 && bf->uuarn && 940 (inl || ctx->prefer_bf) && size > 1 && 941 size <= bf->buf_size / 16) 942 mlx5_bf_copy(bf->reg + bf->offset, (unsigned long long *)ctrl, 943 align(size * 16, 64), qp); 944 else 945 mlx5_write64((__be32 *)ctrl, bf->reg + bf->offset, 946 &ctx->lock32); 947 948 /* 949 * use mmio_flush_writes() to ensure write combining buffers are flushed out 950 * of the running CPU. This must be carried inside the spinlock. 951 * Otherwise, there is a potential race. In the race, CPU A 952 * writes doorbell 1, which is waiting in the WC buffer. CPU B 953 * writes doorbell 2, and it's write is flushed earlier. Since 954 * the mmio_flush_writes is CPU local, this will result in the HCA seeing 955 * doorbell 2, followed by doorbell 1. 956 * Flush before toggling bf_offset to be latency oriented. 957 */ 958 mmio_flush_writes(); 959 bf->offset ^= bf->buf_size; 960 if (bf->need_lock) 961 mlx5_spin_unlock(&bf->lock); 962 } 963 964 mlx5_spin_unlock(&qp->sq.lock); 965 966 return err; 967 } 968 969 int mlx5_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, 970 struct ibv_send_wr **bad_wr) 971 { 972 #ifdef MW_DEBUG 973 if (wr->opcode == IBV_WR_BIND_MW) { 974 if (wr->bind_mw.mw->type == IBV_MW_TYPE_1) 975 return EINVAL; 976 977 if (!wr->bind_mw.bind_info.mr || 978 !wr->bind_mw.bind_info.addr || 979 !wr->bind_mw.bind_info.length) 980 return EINVAL; 981 982 if (wr->bind_mw.bind_info.mr->pd != wr->bind_mw.mw->pd) 983 return EINVAL; 984 } 985 #endif 986 987 return _mlx5_post_send(ibqp, wr, bad_wr); 988 } 989 990 int mlx5_bind_mw(struct ibv_qp *qp, struct ibv_mw *mw, 991 struct ibv_mw_bind *mw_bind) 992 { 993 struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info; 994 struct ibv_send_wr wr = {}; 995 struct ibv_send_wr *bad_wr = NULL; 996 int ret; 997 998 if (!bind_info->mr && (bind_info->addr || bind_info->length)) { 999 errno = EINVAL; 1000 return errno; 1001 } 1002 1003 if (bind_info->mw_access_flags & IBV_ACCESS_ZERO_BASED) { 1004 errno = EINVAL; 1005 return errno; 1006 } 1007 1008 if (bind_info->mr) { 1009 if (to_mmr(bind_info->mr)->alloc_flags & IBV_ACCESS_ZERO_BASED) { 1010 errno = EINVAL; 1011 return errno; 1012 } 1013 1014 if (mw->pd != bind_info->mr->pd) { 1015 errno = EPERM; 1016 return errno; 1017 } 1018 } 1019 1020 wr.opcode = IBV_WR_BIND_MW; 1021 wr.next = NULL; 1022 wr.wr_id = mw_bind->wr_id; 1023 wr.send_flags = mw_bind->send_flags; 1024 wr.bind_mw.bind_info = mw_bind->bind_info; 1025 wr.bind_mw.mw = mw; 1026 wr.bind_mw.rkey = ibv_inc_rkey(mw->rkey); 1027 1028 ret = _mlx5_post_send(qp, &wr, &bad_wr); 1029 if (ret) 1030 return ret; 1031 1032 mw->rkey = wr.bind_mw.rkey; 1033 1034 return 0; 1035 } 1036 1037 static void set_sig_seg(struct mlx5_qp *qp, struct mlx5_rwqe_sig *sig, 1038 int size, uint16_t idx) 1039 { 1040 uint8_t sign; 1041 uint32_t qpn = qp->ibv_qp->qp_num; 1042 1043 sign = calc_sig(sig, size); 1044 sign ^= calc_sig(&qpn, 4); 1045 sign ^= calc_sig(&idx, 2); 1046 sig->signature = sign; 1047 } 1048 1049 static void set_wq_sig_seg(struct mlx5_rwq *rwq, struct mlx5_rwqe_sig *sig, 1050 int size, uint16_t idx) 1051 { 1052 uint8_t sign; 1053 uint32_t qpn = rwq->wq.wq_num; 1054 1055 sign = calc_sig(sig, size); 1056 sign ^= calc_sig(&qpn, 4); 1057 sign ^= calc_sig(&idx, 2); 1058 sig->signature = sign; 1059 } 1060 1061 int mlx5_post_wq_recv(struct ibv_wq *ibwq, struct ibv_recv_wr *wr, 1062 struct ibv_recv_wr **bad_wr) 1063 { 1064 struct mlx5_rwq *rwq = to_mrwq(ibwq); 1065 struct mlx5_wqe_data_seg *scat; 1066 int err = 0; 1067 int nreq; 1068 int ind; 1069 int i, j; 1070 struct mlx5_rwqe_sig *sig; 1071 1072 mlx5_spin_lock(&rwq->rq.lock); 1073 1074 ind = rwq->rq.head & (rwq->rq.wqe_cnt - 1); 1075 1076 for (nreq = 0; wr; ++nreq, wr = wr->next) { 1077 if (unlikely(mlx5_wq_overflow(&rwq->rq, nreq, 1078 to_mcq(rwq->wq.cq)))) { 1079 err = ENOMEM; 1080 *bad_wr = wr; 1081 goto out; 1082 } 1083 1084 if (unlikely(wr->num_sge > rwq->rq.max_gs)) { 1085 err = EINVAL; 1086 *bad_wr = wr; 1087 goto out; 1088 } 1089 1090 scat = get_wq_recv_wqe(rwq, ind); 1091 sig = (struct mlx5_rwqe_sig *)scat; 1092 if (unlikely(rwq->wq_sig)) { 1093 memset(sig, 0, 1 << rwq->rq.wqe_shift); 1094 ++scat; 1095 } 1096 1097 for (i = 0, j = 0; i < wr->num_sge; ++i) { 1098 if (unlikely(!wr->sg_list[i].length)) 1099 continue; 1100 set_data_ptr_seg(scat + j++, wr->sg_list + i, 0); 1101 } 1102 1103 if (j < rwq->rq.max_gs) { 1104 scat[j].byte_count = 0; 1105 scat[j].lkey = htobe32(MLX5_INVALID_LKEY); 1106 scat[j].addr = 0; 1107 } 1108 1109 if (unlikely(rwq->wq_sig)) 1110 set_wq_sig_seg(rwq, sig, (wr->num_sge + 1) << 4, 1111 rwq->rq.head & 0xffff); 1112 1113 rwq->rq.wrid[ind] = wr->wr_id; 1114 1115 ind = (ind + 1) & (rwq->rq.wqe_cnt - 1); 1116 } 1117 1118 out: 1119 if (likely(nreq)) { 1120 rwq->rq.head += nreq; 1121 /* 1122 * Make sure that descriptors are written before 1123 * doorbell record. 1124 */ 1125 udma_to_device_barrier(); 1126 *(rwq->recv_db) = htobe32(rwq->rq.head & 0xffff); 1127 } 1128 1129 mlx5_spin_unlock(&rwq->rq.lock); 1130 1131 return err; 1132 } 1133 1134 int mlx5_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, 1135 struct ibv_recv_wr **bad_wr) 1136 { 1137 struct mlx5_qp *qp = to_mqp(ibqp); 1138 struct mlx5_wqe_data_seg *scat; 1139 int err = 0; 1140 int nreq; 1141 int ind; 1142 int i, j; 1143 struct mlx5_rwqe_sig *sig; 1144 1145 mlx5_spin_lock(&qp->rq.lock); 1146 1147 ind = qp->rq.head & (qp->rq.wqe_cnt - 1); 1148 1149 for (nreq = 0; wr; ++nreq, wr = wr->next) { 1150 if (unlikely(mlx5_wq_overflow(&qp->rq, nreq, 1151 to_mcq(qp->ibv_qp->recv_cq)))) { 1152 err = ENOMEM; 1153 *bad_wr = wr; 1154 goto out; 1155 } 1156 1157 if (unlikely(wr->num_sge > qp->rq.max_gs)) { 1158 err = EINVAL; 1159 *bad_wr = wr; 1160 goto out; 1161 } 1162 1163 scat = get_recv_wqe(qp, ind); 1164 sig = (struct mlx5_rwqe_sig *)scat; 1165 if (unlikely(qp->wq_sig)) { 1166 memset(sig, 0, 1 << qp->rq.wqe_shift); 1167 ++scat; 1168 } 1169 1170 for (i = 0, j = 0; i < wr->num_sge; ++i) { 1171 if (unlikely(!wr->sg_list[i].length)) 1172 continue; 1173 set_data_ptr_seg(scat + j++, wr->sg_list + i, 0); 1174 } 1175 1176 if (j < qp->rq.max_gs) { 1177 scat[j].byte_count = 0; 1178 scat[j].lkey = htobe32(MLX5_INVALID_LKEY); 1179 scat[j].addr = 0; 1180 } 1181 1182 if (unlikely(qp->wq_sig)) 1183 set_sig_seg(qp, sig, (wr->num_sge + 1) << 4, 1184 qp->rq.head & 0xffff); 1185 1186 qp->rq.wrid[ind] = wr->wr_id; 1187 1188 ind = (ind + 1) & (qp->rq.wqe_cnt - 1); 1189 } 1190 1191 out: 1192 if (likely(nreq)) { 1193 qp->rq.head += nreq; 1194 1195 /* 1196 * Make sure that descriptors are written before 1197 * doorbell record. 1198 */ 1199 udma_to_device_barrier(); 1200 1201 /* 1202 * For Raw Packet QP, avoid updating the doorbell record 1203 * as long as the QP isn't in RTR state, to avoid receiving 1204 * packets in illegal states. 1205 * This is only for Raw Packet QPs since they are represented 1206 * differently in the hardware. 1207 */ 1208 if (likely(!(ibqp->qp_type == IBV_QPT_RAW_PACKET && 1209 ibqp->state < IBV_QPS_RTR))) 1210 qp->db[MLX5_RCV_DBR] = htobe32(qp->rq.head & 0xffff); 1211 } 1212 1213 mlx5_spin_unlock(&qp->rq.lock); 1214 1215 return err; 1216 } 1217 1218 int mlx5_use_huge(const char *key) 1219 { 1220 char *e; 1221 e = getenv(key); 1222 if (e && !strcmp(e, "y")) 1223 return 1; 1224 1225 return 0; 1226 } 1227 1228 struct mlx5_qp *mlx5_find_qp(struct mlx5_context *ctx, uint32_t qpn) 1229 { 1230 int tind = qpn >> MLX5_QP_TABLE_SHIFT; 1231 1232 if (ctx->qp_table[tind].refcnt) 1233 return ctx->qp_table[tind].table[qpn & MLX5_QP_TABLE_MASK]; 1234 else 1235 return NULL; 1236 } 1237 1238 int mlx5_store_qp(struct mlx5_context *ctx, uint32_t qpn, struct mlx5_qp *qp) 1239 { 1240 int tind = qpn >> MLX5_QP_TABLE_SHIFT; 1241 1242 if (!ctx->qp_table[tind].refcnt) { 1243 ctx->qp_table[tind].table = calloc(MLX5_QP_TABLE_MASK + 1, 1244 sizeof(struct mlx5_qp *)); 1245 if (!ctx->qp_table[tind].table) 1246 return -1; 1247 } 1248 1249 ++ctx->qp_table[tind].refcnt; 1250 ctx->qp_table[tind].table[qpn & MLX5_QP_TABLE_MASK] = qp; 1251 return 0; 1252 } 1253 1254 void mlx5_clear_qp(struct mlx5_context *ctx, uint32_t qpn) 1255 { 1256 int tind = qpn >> MLX5_QP_TABLE_SHIFT; 1257 1258 if (!--ctx->qp_table[tind].refcnt) 1259 free(ctx->qp_table[tind].table); 1260 else 1261 ctx->qp_table[tind].table[qpn & MLX5_QP_TABLE_MASK] = NULL; 1262 } 1263