1 /* 2 * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. 4 * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35 #include <config.h> 36 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <pthread.h> 40 #include <string.h> 41 42 #include <infiniband/opcode.h> 43 44 #include "mlx4.h" 45 #include "doorbell.h" 46 47 enum { 48 MLX4_CQ_DOORBELL = 0x20 49 }; 50 51 enum { 52 CQ_OK = 0, 53 CQ_EMPTY = -1, 54 CQ_POLL_ERR = -2 55 }; 56 57 #define MLX4_CQ_DB_REQ_NOT_SOL (1 << 24) 58 #define MLX4_CQ_DB_REQ_NOT (2 << 24) 59 60 enum { 61 MLX4_CQE_VLAN_PRESENT_MASK = 1 << 29, 62 MLX4_CQE_QPN_MASK = 0xffffff, 63 }; 64 65 enum { 66 MLX4_CQE_OWNER_MASK = 0x80, 67 MLX4_CQE_IS_SEND_MASK = 0x40, 68 MLX4_CQE_OPCODE_MASK = 0x1f 69 }; 70 71 enum { 72 MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR = 0x01, 73 MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR = 0x02, 74 MLX4_CQE_SYNDROME_LOCAL_PROT_ERR = 0x04, 75 MLX4_CQE_SYNDROME_WR_FLUSH_ERR = 0x05, 76 MLX4_CQE_SYNDROME_MW_BIND_ERR = 0x06, 77 MLX4_CQE_SYNDROME_BAD_RESP_ERR = 0x10, 78 MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR = 0x11, 79 MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12, 80 MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR = 0x13, 81 MLX4_CQE_SYNDROME_REMOTE_OP_ERR = 0x14, 82 MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR = 0x15, 83 MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR = 0x16, 84 MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR = 0x22, 85 }; 86 87 struct mlx4_err_cqe { 88 uint32_t vlan_my_qpn; 89 uint32_t reserved1[5]; 90 uint16_t wqe_index; 91 uint8_t vendor_err; 92 uint8_t syndrome; 93 uint8_t reserved2[3]; 94 uint8_t owner_sr_opcode; 95 }; 96 97 static struct mlx4_cqe *get_cqe(struct mlx4_cq *cq, int entry) 98 { 99 return cq->buf.buf + entry * cq->cqe_size; 100 } 101 102 static void *get_sw_cqe(struct mlx4_cq *cq, int n) 103 { 104 struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibv_cq.cqe); 105 struct mlx4_cqe *tcqe = cq->cqe_size == 64 ? cqe + 1 : cqe; 106 107 return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ 108 !!(n & (cq->ibv_cq.cqe + 1))) ? NULL : cqe; 109 } 110 111 static struct mlx4_cqe *next_cqe_sw(struct mlx4_cq *cq) 112 { 113 return get_sw_cqe(cq, cq->cons_index); 114 } 115 116 static enum ibv_wc_status mlx4_handle_error_cqe(struct mlx4_err_cqe *cqe) 117 { 118 if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) 119 printf(PFX "local QP operation err " 120 "(QPN %06x, WQE index %x, vendor syndrome %02x, " 121 "opcode = %02x)\n", 122 htobe32(cqe->vlan_my_qpn), htobe32(cqe->wqe_index), 123 cqe->vendor_err, 124 cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK); 125 126 switch (cqe->syndrome) { 127 case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR: 128 return IBV_WC_LOC_LEN_ERR; 129 case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR: 130 return IBV_WC_LOC_QP_OP_ERR; 131 case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR: 132 return IBV_WC_LOC_PROT_ERR; 133 case MLX4_CQE_SYNDROME_WR_FLUSH_ERR: 134 return IBV_WC_WR_FLUSH_ERR; 135 case MLX4_CQE_SYNDROME_MW_BIND_ERR: 136 return IBV_WC_MW_BIND_ERR; 137 case MLX4_CQE_SYNDROME_BAD_RESP_ERR: 138 return IBV_WC_BAD_RESP_ERR; 139 case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR: 140 return IBV_WC_LOC_ACCESS_ERR; 141 case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: 142 return IBV_WC_REM_INV_REQ_ERR; 143 case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR: 144 return IBV_WC_REM_ACCESS_ERR; 145 case MLX4_CQE_SYNDROME_REMOTE_OP_ERR: 146 return IBV_WC_REM_OP_ERR; 147 case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: 148 return IBV_WC_RETRY_EXC_ERR; 149 case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR: 150 return IBV_WC_RNR_RETRY_EXC_ERR; 151 case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR: 152 return IBV_WC_REM_ABORT_ERR; 153 default: 154 return IBV_WC_GENERAL_ERR; 155 } 156 } 157 158 static inline void handle_good_req(struct ibv_wc *wc, struct mlx4_cqe *cqe) 159 { 160 wc->wc_flags = 0; 161 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { 162 case MLX4_OPCODE_RDMA_WRITE_IMM: 163 wc->wc_flags |= IBV_WC_WITH_IMM; 164 SWITCH_FALLTHROUGH; 165 case MLX4_OPCODE_RDMA_WRITE: 166 wc->opcode = IBV_WC_RDMA_WRITE; 167 break; 168 case MLX4_OPCODE_SEND_IMM: 169 wc->wc_flags |= IBV_WC_WITH_IMM; 170 SWITCH_FALLTHROUGH; 171 case MLX4_OPCODE_SEND: 172 case MLX4_OPCODE_SEND_INVAL: 173 wc->opcode = IBV_WC_SEND; 174 break; 175 case MLX4_OPCODE_RDMA_READ: 176 wc->opcode = IBV_WC_RDMA_READ; 177 wc->byte_len = be32toh(cqe->byte_cnt); 178 break; 179 case MLX4_OPCODE_ATOMIC_CS: 180 wc->opcode = IBV_WC_COMP_SWAP; 181 wc->byte_len = 8; 182 break; 183 case MLX4_OPCODE_ATOMIC_FA: 184 wc->opcode = IBV_WC_FETCH_ADD; 185 wc->byte_len = 8; 186 break; 187 case MLX4_OPCODE_LOCAL_INVAL: 188 wc->opcode = IBV_WC_LOCAL_INV; 189 break; 190 case MLX4_OPCODE_BIND_MW: 191 wc->opcode = IBV_WC_BIND_MW; 192 break; 193 default: 194 /* assume it's a send completion */ 195 wc->opcode = IBV_WC_SEND; 196 break; 197 } 198 } 199 200 static inline int mlx4_get_next_cqe(struct mlx4_cq *cq, 201 struct mlx4_cqe **pcqe) 202 ALWAYS_INLINE; 203 static inline int mlx4_get_next_cqe(struct mlx4_cq *cq, 204 struct mlx4_cqe **pcqe) 205 { 206 struct mlx4_cqe *cqe; 207 208 cqe = next_cqe_sw(cq); 209 if (!cqe) 210 return CQ_EMPTY; 211 212 if (cq->cqe_size == 64) 213 ++cqe; 214 215 ++cq->cons_index; 216 217 VALGRIND_MAKE_MEM_DEFINED(cqe, sizeof *cqe); 218 219 /* 220 * Make sure we read CQ entry contents after we've checked the 221 * ownership bit. 222 */ 223 udma_from_device_barrier(); 224 225 *pcqe = cqe; 226 227 return CQ_OK; 228 } 229 230 static inline int mlx4_parse_cqe(struct mlx4_cq *cq, 231 struct mlx4_cqe *cqe, 232 struct mlx4_qp **cur_qp, 233 struct ibv_wc *wc, int lazy) 234 ALWAYS_INLINE; 235 static inline int mlx4_parse_cqe(struct mlx4_cq *cq, 236 struct mlx4_cqe *cqe, 237 struct mlx4_qp **cur_qp, 238 struct ibv_wc *wc, int lazy) 239 { 240 struct mlx4_wq *wq; 241 struct mlx4_srq *srq; 242 uint32_t qpn; 243 uint32_t g_mlpath_rqpn; 244 uint64_t *pwr_id; 245 uint16_t wqe_index; 246 struct mlx4_err_cqe *ecqe; 247 struct mlx4_context *mctx; 248 int is_error; 249 int is_send; 250 enum ibv_wc_status *pstatus; 251 252 mctx = to_mctx(cq->ibv_cq.context); 253 qpn = be32toh(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK; 254 if (lazy) { 255 cq->cqe = cqe; 256 cq->flags &= (~MLX4_CQ_FLAGS_RX_CSUM_VALID); 257 } else 258 wc->qp_num = qpn; 259 260 is_send = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK; 261 is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == 262 MLX4_CQE_OPCODE_ERROR; 263 264 if ((qpn & MLX4_XRC_QPN_BIT) && !is_send) { 265 /* 266 * We do not have to take the XSRQ table lock here, 267 * because CQs will be locked while SRQs are removed 268 * from the table. 269 */ 270 srq = mlx4_find_xsrq(&mctx->xsrq_table, 271 be32toh(cqe->g_mlpath_rqpn) & MLX4_CQE_QPN_MASK); 272 if (!srq) 273 return CQ_POLL_ERR; 274 } else { 275 if (!*cur_qp || (qpn != (*cur_qp)->verbs_qp.qp.qp_num)) { 276 /* 277 * We do not have to take the QP table lock here, 278 * because CQs will be locked while QPs are removed 279 * from the table. 280 */ 281 *cur_qp = mlx4_find_qp(mctx, qpn); 282 if (!*cur_qp) 283 return CQ_POLL_ERR; 284 } 285 srq = ((*cur_qp)->verbs_qp.qp.srq) ? to_msrq((*cur_qp)->verbs_qp.qp.srq) : NULL; 286 } 287 288 pwr_id = lazy ? &cq->ibv_cq.wr_id : &wc->wr_id; 289 if (is_send) { 290 wq = &(*cur_qp)->sq; 291 wqe_index = be16toh(cqe->wqe_index); 292 wq->tail += (uint16_t) (wqe_index - (uint16_t) wq->tail); 293 *pwr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; 294 ++wq->tail; 295 } else if (srq) { 296 wqe_index = be16toh(cqe->wqe_index); 297 *pwr_id = srq->wrid[wqe_index]; 298 mlx4_free_srq_wqe(srq, wqe_index); 299 } else { 300 wq = &(*cur_qp)->rq; 301 *pwr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; 302 ++wq->tail; 303 } 304 305 pstatus = lazy ? &cq->ibv_cq.status : &wc->status; 306 if (is_error) { 307 ecqe = (struct mlx4_err_cqe *)cqe; 308 *pstatus = mlx4_handle_error_cqe(ecqe); 309 if (!lazy) 310 wc->vendor_err = ecqe->vendor_err; 311 return CQ_OK; 312 } 313 314 *pstatus = IBV_WC_SUCCESS; 315 if (lazy) { 316 if (!is_send) 317 if ((*cur_qp) && ((*cur_qp)->qp_cap_cache & MLX4_RX_CSUM_VALID)) 318 cq->flags |= MLX4_CQ_FLAGS_RX_CSUM_VALID; 319 } else if (is_send) { 320 handle_good_req(wc, cqe); 321 } else { 322 wc->byte_len = be32toh(cqe->byte_cnt); 323 324 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { 325 case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: 326 wc->opcode = IBV_WC_RECV_RDMA_WITH_IMM; 327 wc->wc_flags = IBV_WC_WITH_IMM; 328 wc->imm_data = cqe->immed_rss_invalid; 329 break; 330 case MLX4_RECV_OPCODE_SEND_INVAL: 331 wc->opcode = IBV_WC_RECV; 332 wc->wc_flags |= IBV_WC_WITH_INV; 333 wc->imm_data = be32toh(cqe->immed_rss_invalid); 334 break; 335 case MLX4_RECV_OPCODE_SEND: 336 wc->opcode = IBV_WC_RECV; 337 wc->wc_flags = 0; 338 break; 339 case MLX4_RECV_OPCODE_SEND_IMM: 340 wc->opcode = IBV_WC_RECV; 341 wc->wc_flags = IBV_WC_WITH_IMM; 342 wc->imm_data = cqe->immed_rss_invalid; 343 break; 344 } 345 346 wc->slid = be16toh(cqe->rlid); 347 g_mlpath_rqpn = be32toh(cqe->g_mlpath_rqpn); 348 wc->src_qp = g_mlpath_rqpn & 0xffffff; 349 wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; 350 wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IBV_WC_GRH : 0; 351 wc->pkey_index = be32toh(cqe->immed_rss_invalid) & 0x7f; 352 /* When working with xrc srqs, don't have qp to check link layer. 353 * Using IB SL, should consider Roce. (TBD) 354 */ 355 if ((*cur_qp) && (*cur_qp)->link_layer == IBV_LINK_LAYER_ETHERNET) 356 wc->sl = be16toh(cqe->sl_vid) >> 13; 357 else 358 wc->sl = be16toh(cqe->sl_vid) >> 12; 359 360 if ((*cur_qp) && ((*cur_qp)->qp_cap_cache & MLX4_RX_CSUM_VALID)) { 361 wc->wc_flags |= ((cqe->status & htobe32(MLX4_CQE_STATUS_IPV4_CSUM_OK)) == 362 htobe32(MLX4_CQE_STATUS_IPV4_CSUM_OK)) << 363 IBV_WC_IP_CSUM_OK_SHIFT; 364 } 365 } 366 367 return CQ_OK; 368 } 369 370 static inline int mlx4_parse_lazy_cqe(struct mlx4_cq *cq, 371 struct mlx4_cqe *cqe) 372 ALWAYS_INLINE; 373 static inline int mlx4_parse_lazy_cqe(struct mlx4_cq *cq, 374 struct mlx4_cqe *cqe) 375 { 376 return mlx4_parse_cqe(cq, cqe, &cq->cur_qp, NULL, 1); 377 } 378 379 static inline int mlx4_poll_one(struct mlx4_cq *cq, 380 struct mlx4_qp **cur_qp, 381 struct ibv_wc *wc) 382 ALWAYS_INLINE; 383 static inline int mlx4_poll_one(struct mlx4_cq *cq, 384 struct mlx4_qp **cur_qp, 385 struct ibv_wc *wc) 386 { 387 struct mlx4_cqe *cqe; 388 int err; 389 390 err = mlx4_get_next_cqe(cq, &cqe); 391 if (err == CQ_EMPTY) 392 return err; 393 394 return mlx4_parse_cqe(cq, cqe, cur_qp, wc, 0); 395 } 396 397 int mlx4_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) 398 { 399 struct mlx4_cq *cq = to_mcq(ibcq); 400 struct mlx4_qp *qp = NULL; 401 int npolled; 402 int err = CQ_OK; 403 404 pthread_spin_lock(&cq->lock); 405 406 for (npolled = 0; npolled < ne; ++npolled) { 407 err = mlx4_poll_one(cq, &qp, wc + npolled); 408 if (err != CQ_OK) 409 break; 410 } 411 412 if (npolled || err == CQ_POLL_ERR) 413 mlx4_update_cons_index(cq); 414 415 pthread_spin_unlock(&cq->lock); 416 417 return err == CQ_POLL_ERR ? err : npolled; 418 } 419 420 static inline void _mlx4_end_poll(struct ibv_cq_ex *ibcq, int lock) 421 ALWAYS_INLINE; 422 static inline void _mlx4_end_poll(struct ibv_cq_ex *ibcq, int lock) 423 { 424 struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 425 426 mlx4_update_cons_index(cq); 427 428 if (lock) 429 pthread_spin_unlock(&cq->lock); 430 } 431 432 static inline int _mlx4_start_poll(struct ibv_cq_ex *ibcq, 433 struct ibv_poll_cq_attr *attr, 434 int lock) 435 ALWAYS_INLINE; 436 static inline int _mlx4_start_poll(struct ibv_cq_ex *ibcq, 437 struct ibv_poll_cq_attr *attr, 438 int lock) 439 { 440 struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 441 struct mlx4_cqe *cqe; 442 int err; 443 444 if (unlikely(attr->comp_mask)) 445 return EINVAL; 446 447 if (lock) 448 pthread_spin_lock(&cq->lock); 449 450 cq->cur_qp = NULL; 451 452 err = mlx4_get_next_cqe(cq, &cqe); 453 if (err == CQ_EMPTY) { 454 if (lock) 455 pthread_spin_unlock(&cq->lock); 456 return ENOENT; 457 } 458 459 err = mlx4_parse_lazy_cqe(cq, cqe); 460 if (lock && err) 461 pthread_spin_unlock(&cq->lock); 462 463 return err; 464 } 465 466 static int mlx4_next_poll(struct ibv_cq_ex *ibcq) 467 { 468 struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 469 struct mlx4_cqe *cqe; 470 int err; 471 472 err = mlx4_get_next_cqe(cq, &cqe); 473 if (err == CQ_EMPTY) 474 return ENOENT; 475 476 return mlx4_parse_lazy_cqe(cq, cqe); 477 } 478 479 static void mlx4_end_poll(struct ibv_cq_ex *ibcq) 480 { 481 _mlx4_end_poll(ibcq, 0); 482 } 483 484 static void mlx4_end_poll_lock(struct ibv_cq_ex *ibcq) 485 { 486 _mlx4_end_poll(ibcq, 1); 487 } 488 489 static int mlx4_start_poll(struct ibv_cq_ex *ibcq, 490 struct ibv_poll_cq_attr *attr) 491 { 492 return _mlx4_start_poll(ibcq, attr, 0); 493 } 494 495 static int mlx4_start_poll_lock(struct ibv_cq_ex *ibcq, 496 struct ibv_poll_cq_attr *attr) 497 { 498 return _mlx4_start_poll(ibcq, attr, 1); 499 } 500 501 static enum ibv_wc_opcode mlx4_cq_read_wc_opcode(struct ibv_cq_ex *ibcq) 502 { 503 struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 504 505 if (cq->cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK) { 506 switch (cq->cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { 507 case MLX4_OPCODE_RDMA_WRITE_IMM: 508 case MLX4_OPCODE_RDMA_WRITE: 509 return IBV_WC_RDMA_WRITE; 510 case MLX4_OPCODE_SEND_INVAL: 511 case MLX4_OPCODE_SEND_IMM: 512 case MLX4_OPCODE_SEND: 513 return IBV_WC_SEND; 514 case MLX4_OPCODE_RDMA_READ: 515 return IBV_WC_RDMA_READ; 516 case MLX4_OPCODE_ATOMIC_CS: 517 return IBV_WC_COMP_SWAP; 518 case MLX4_OPCODE_ATOMIC_FA: 519 return IBV_WC_FETCH_ADD; 520 case MLX4_OPCODE_LOCAL_INVAL: 521 return IBV_WC_LOCAL_INV; 522 case MLX4_OPCODE_BIND_MW: 523 return IBV_WC_BIND_MW; 524 } 525 } else { 526 switch (cq->cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { 527 case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: 528 return IBV_WC_RECV_RDMA_WITH_IMM; 529 case MLX4_RECV_OPCODE_SEND_INVAL: 530 case MLX4_RECV_OPCODE_SEND_IMM: 531 case MLX4_RECV_OPCODE_SEND: 532 return IBV_WC_RECV; 533 } 534 } 535 536 return 0; 537 } 538 539 static uint32_t mlx4_cq_read_wc_qp_num(struct ibv_cq_ex *ibcq) 540 { 541 struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 542 543 return be32toh(cq->cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK; 544 } 545 546 static int mlx4_cq_read_wc_flags(struct ibv_cq_ex *ibcq) 547 { 548 struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 549 int is_send = cq->cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK; 550 int wc_flags = 0; 551 552 if (is_send) { 553 switch (cq->cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { 554 case MLX4_OPCODE_RDMA_WRITE_IMM: 555 case MLX4_OPCODE_SEND_IMM: 556 wc_flags |= IBV_WC_WITH_IMM; 557 break; 558 } 559 } else { 560 if (cq->flags & MLX4_CQ_FLAGS_RX_CSUM_VALID) 561 wc_flags |= ((cq->cqe->status & 562 htobe32(MLX4_CQE_STATUS_IPV4_CSUM_OK)) == 563 htobe32(MLX4_CQE_STATUS_IPV4_CSUM_OK)) << 564 IBV_WC_IP_CSUM_OK_SHIFT; 565 566 switch (cq->cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { 567 case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: 568 case MLX4_RECV_OPCODE_SEND_IMM: 569 wc_flags |= IBV_WC_WITH_IMM; 570 break; 571 case MLX4_RECV_OPCODE_SEND_INVAL: 572 wc_flags |= IBV_WC_WITH_INV; 573 break; 574 } 575 wc_flags |= (be32toh(cq->cqe->g_mlpath_rqpn) & 0x80000000) ? IBV_WC_GRH : 0; 576 } 577 578 return wc_flags; 579 } 580 581 static uint32_t mlx4_cq_read_wc_byte_len(struct ibv_cq_ex *ibcq) 582 { 583 struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 584 585 return be32toh(cq->cqe->byte_cnt); 586 } 587 588 static uint32_t mlx4_cq_read_wc_vendor_err(struct ibv_cq_ex *ibcq) 589 { 590 struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 591 struct mlx4_err_cqe *ecqe = (struct mlx4_err_cqe *)cq->cqe; 592 593 return ecqe->vendor_err; 594 } 595 596 static uint32_t mlx4_cq_read_wc_imm_data(struct ibv_cq_ex *ibcq) 597 { 598 struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 599 600 switch (cq->cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { 601 case MLX4_RECV_OPCODE_SEND_INVAL: 602 return be32toh(cq->cqe->immed_rss_invalid); 603 default: 604 return cq->cqe->immed_rss_invalid; 605 } 606 } 607 608 static uint32_t mlx4_cq_read_wc_slid(struct ibv_cq_ex *ibcq) 609 { 610 struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 611 612 return (uint32_t)be16toh(cq->cqe->rlid); 613 } 614 615 static uint8_t mlx4_cq_read_wc_sl(struct ibv_cq_ex *ibcq) 616 { 617 struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 618 619 if ((cq->cur_qp) && (cq->cur_qp->link_layer == IBV_LINK_LAYER_ETHERNET)) 620 return be16toh(cq->cqe->sl_vid) >> 13; 621 else 622 return be16toh(cq->cqe->sl_vid) >> 12; 623 } 624 625 static uint32_t mlx4_cq_read_wc_src_qp(struct ibv_cq_ex *ibcq) 626 { 627 struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 628 629 return be32toh(cq->cqe->g_mlpath_rqpn) & 0xffffff; 630 } 631 632 static uint8_t mlx4_cq_read_wc_dlid_path_bits(struct ibv_cq_ex *ibcq) 633 { 634 struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 635 636 return (be32toh(cq->cqe->g_mlpath_rqpn) >> 24) & 0x7f; 637 } 638 639 static uint64_t mlx4_cq_read_wc_completion_ts(struct ibv_cq_ex *ibcq) 640 { 641 struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 642 643 return ((uint64_t)be32toh(cq->cqe->ts_47_16) << 16) | 644 (cq->cqe->ts_15_8 << 8) | 645 (cq->cqe->ts_7_0); 646 } 647 648 void mlx4_cq_fill_pfns(struct mlx4_cq *cq, const struct ibv_cq_init_attr_ex *cq_attr) 649 { 650 651 if (cq->flags & MLX4_CQ_FLAGS_SINGLE_THREADED) { 652 cq->ibv_cq.start_poll = mlx4_start_poll; 653 cq->ibv_cq.end_poll = mlx4_end_poll; 654 } else { 655 cq->ibv_cq.start_poll = mlx4_start_poll_lock; 656 cq->ibv_cq.end_poll = mlx4_end_poll_lock; 657 } 658 cq->ibv_cq.next_poll = mlx4_next_poll; 659 660 cq->ibv_cq.read_opcode = mlx4_cq_read_wc_opcode; 661 cq->ibv_cq.read_vendor_err = mlx4_cq_read_wc_vendor_err; 662 cq->ibv_cq.read_wc_flags = mlx4_cq_read_wc_flags; 663 if (cq_attr->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) 664 cq->ibv_cq.read_byte_len = mlx4_cq_read_wc_byte_len; 665 if (cq_attr->wc_flags & IBV_WC_EX_WITH_IMM) 666 cq->ibv_cq.read_imm_data = mlx4_cq_read_wc_imm_data; 667 if (cq_attr->wc_flags & IBV_WC_EX_WITH_QP_NUM) 668 cq->ibv_cq.read_qp_num = mlx4_cq_read_wc_qp_num; 669 if (cq_attr->wc_flags & IBV_WC_EX_WITH_SRC_QP) 670 cq->ibv_cq.read_src_qp = mlx4_cq_read_wc_src_qp; 671 if (cq_attr->wc_flags & IBV_WC_EX_WITH_SLID) 672 cq->ibv_cq.read_slid = mlx4_cq_read_wc_slid; 673 if (cq_attr->wc_flags & IBV_WC_EX_WITH_SL) 674 cq->ibv_cq.read_sl = mlx4_cq_read_wc_sl; 675 if (cq_attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS) 676 cq->ibv_cq.read_dlid_path_bits = mlx4_cq_read_wc_dlid_path_bits; 677 if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP) 678 cq->ibv_cq.read_completion_ts = mlx4_cq_read_wc_completion_ts; 679 } 680 681 int mlx4_arm_cq(struct ibv_cq *ibvcq, int solicited) 682 { 683 struct mlx4_cq *cq = to_mcq(ibvcq); 684 uint32_t doorbell[2]; 685 uint32_t sn; 686 uint32_t ci; 687 uint32_t cmd; 688 689 sn = cq->arm_sn & 3; 690 ci = cq->cons_index & 0xffffff; 691 cmd = solicited ? MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT; 692 693 *cq->arm_db = htobe32(sn << 28 | cmd | ci); 694 695 /* 696 * Make sure that the doorbell record in host memory is 697 * written before ringing the doorbell via PCI MMIO. 698 */ 699 udma_to_device_barrier(); 700 701 doorbell[0] = htobe32(sn << 28 | cmd | cq->cqn); 702 doorbell[1] = htobe32(ci); 703 704 mlx4_write64(doorbell, to_mctx(ibvcq->context), MLX4_CQ_DOORBELL); 705 706 return 0; 707 } 708 709 void mlx4_cq_event(struct ibv_cq *cq) 710 { 711 to_mcq(cq)->arm_sn++; 712 } 713 714 void __mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq) 715 { 716 struct mlx4_cqe *cqe, *dest; 717 uint32_t prod_index; 718 uint8_t owner_bit; 719 int nfreed = 0; 720 int cqe_inc = cq->cqe_size == 64 ? 1 : 0; 721 722 /* 723 * First we need to find the current producer index, so we 724 * know where to start cleaning from. It doesn't matter if HW 725 * adds new entries after this loop -- the QP we're worried 726 * about is already in RESET, so the new entries won't come 727 * from our QP and therefore don't need to be checked. 728 */ 729 for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); ++prod_index) 730 if (prod_index == cq->cons_index + cq->ibv_cq.cqe) 731 break; 732 733 /* 734 * Now sweep backwards through the CQ, removing CQ entries 735 * that match our QP by copying older entries on top of them. 736 */ 737 while ((int) --prod_index - (int) cq->cons_index >= 0) { 738 cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe); 739 cqe += cqe_inc; 740 if (srq && srq->ext_srq && 741 (be32toh(cqe->g_mlpath_rqpn) & MLX4_CQE_QPN_MASK) == srq->verbs_srq.srq_num && 742 !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) { 743 mlx4_free_srq_wqe(srq, be16toh(cqe->wqe_index)); 744 ++nfreed; 745 } else if ((be32toh(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) { 746 if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) 747 mlx4_free_srq_wqe(srq, be16toh(cqe->wqe_index)); 748 ++nfreed; 749 } else if (nfreed) { 750 dest = get_cqe(cq, (prod_index + nfreed) & cq->ibv_cq.cqe); 751 dest += cqe_inc; 752 owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK; 753 memcpy(dest, cqe, sizeof *cqe); 754 dest->owner_sr_opcode = owner_bit | 755 (dest->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK); 756 } 757 } 758 759 if (nfreed) { 760 cq->cons_index += nfreed; 761 /* 762 * Make sure update of buffer contents is done before 763 * updating consumer index. 764 */ 765 udma_to_device_barrier(); 766 mlx4_update_cons_index(cq); 767 } 768 } 769 770 void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq) 771 { 772 pthread_spin_lock(&cq->lock); 773 __mlx4_cq_clean(cq, qpn, srq); 774 pthread_spin_unlock(&cq->lock); 775 } 776 777 int mlx4_get_outstanding_cqes(struct mlx4_cq *cq) 778 { 779 uint32_t i; 780 781 for (i = cq->cons_index; get_sw_cqe(cq, i); ++i) 782 ; 783 784 return i - cq->cons_index; 785 } 786 787 void mlx4_cq_resize_copy_cqes(struct mlx4_cq *cq, void *buf, int old_cqe) 788 { 789 struct mlx4_cqe *cqe; 790 int i; 791 int cqe_inc = cq->cqe_size == 64 ? 1 : 0; 792 793 i = cq->cons_index; 794 cqe = get_cqe(cq, (i & old_cqe)); 795 cqe += cqe_inc; 796 797 while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) { 798 cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) | 799 (((i + 1) & (cq->ibv_cq.cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0); 800 memcpy(buf + ((i + 1) & cq->ibv_cq.cqe) * cq->cqe_size, 801 cqe - cqe_inc, cq->cqe_size); 802 ++i; 803 cqe = get_cqe(cq, (i & old_cqe)); 804 cqe += cqe_inc; 805 } 806 807 ++cq->cons_index; 808 } 809 810 int mlx4_alloc_cq_buf(struct mlx4_device *dev, struct mlx4_buf *buf, int nent, 811 int entry_size) 812 { 813 if (mlx4_alloc_buf(buf, align(nent * entry_size, dev->page_size), 814 dev->page_size)) 815 return -1; 816 memset(buf->buf, 0, nent * entry_size); 817 818 return 0; 819 } 820