1 /* 2 * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include "iw_cxgb4.h" 34 35 static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, 36 struct c4iw_dev_ucontext *uctx, struct sk_buff *skb, 37 struct c4iw_wr_wait *wr_waitp) 38 { 39 struct fw_ri_res_wr *res_wr; 40 struct fw_ri_res *res; 41 int wr_len; 42 int ret; 43 44 wr_len = sizeof *res_wr + sizeof *res; 45 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); 46 47 res_wr = __skb_put_zero(skb, wr_len); 48 res_wr->op_nres = cpu_to_be32( 49 FW_WR_OP_V(FW_RI_RES_WR) | 50 FW_RI_RES_WR_NRES_V(1) | 51 FW_WR_COMPL_F); 52 res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); 53 res_wr->cookie = (uintptr_t)wr_waitp; 54 res = res_wr->res; 55 res->u.cq.restype = FW_RI_RES_TYPE_CQ; 56 res->u.cq.op = FW_RI_RES_OP_RESET; 57 res->u.cq.iqid = cpu_to_be32(cq->cqid); 58 59 c4iw_init_wr_wait(wr_waitp); 60 ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); 61 62 kfree(cq->sw_queue); 63 dma_free_coherent(&(rdev->lldi.pdev->dev), 64 cq->memsize, cq->queue, 65 dma_unmap_addr(cq, mapping)); 66 c4iw_put_cqid(rdev, cq->cqid, uctx); 67 return ret; 68 } 69 70 static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, 71 struct c4iw_dev_ucontext *uctx, 72 struct c4iw_wr_wait *wr_waitp) 73 { 74 struct fw_ri_res_wr *res_wr; 75 struct fw_ri_res *res; 76 int wr_len; 77 int user = (uctx != &rdev->uctx); 78 int ret; 79 struct sk_buff *skb; 80 81 cq->cqid = c4iw_get_cqid(rdev, uctx); 82 if (!cq->cqid) { 83 ret = -ENOMEM; 84 goto err1; 85 } 86 87 if (!user) { 88 cq->sw_queue = kzalloc(cq->memsize, GFP_KERNEL); 89 if (!cq->sw_queue) { 90 ret = -ENOMEM; 91 goto err2; 92 } 93 } 94 cq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, cq->memsize, 95 &cq->dma_addr, GFP_KERNEL); 96 if (!cq->queue) { 97 ret = -ENOMEM; 98 goto err3; 99 } 100 dma_unmap_addr_set(cq, mapping, cq->dma_addr); 101 memset(cq->queue, 0, cq->memsize); 102 103 /* build fw_ri_res_wr */ 104 wr_len = sizeof *res_wr + sizeof *res; 105 106 skb = alloc_skb(wr_len, GFP_KERNEL); 107 if (!skb) { 108 ret = -ENOMEM; 109 goto err4; 110 } 111 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); 112 113 res_wr = __skb_put_zero(skb, wr_len); 114 res_wr->op_nres = cpu_to_be32( 115 FW_WR_OP_V(FW_RI_RES_WR) | 116 FW_RI_RES_WR_NRES_V(1) | 117 FW_WR_COMPL_F); 118 res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); 119 res_wr->cookie = (uintptr_t)wr_waitp; 120 res = res_wr->res; 121 res->u.cq.restype = FW_RI_RES_TYPE_CQ; 122 res->u.cq.op = FW_RI_RES_OP_WRITE; 123 res->u.cq.iqid = cpu_to_be32(cq->cqid); 124 res->u.cq.iqandst_to_iqandstindex = cpu_to_be32( 125 FW_RI_RES_WR_IQANUS_V(0) | 126 FW_RI_RES_WR_IQANUD_V(1) | 127 FW_RI_RES_WR_IQANDST_F | 128 FW_RI_RES_WR_IQANDSTINDEX_V( 129 rdev->lldi.ciq_ids[cq->vector])); 130 res->u.cq.iqdroprss_to_iqesize = cpu_to_be16( 131 FW_RI_RES_WR_IQDROPRSS_F | 132 FW_RI_RES_WR_IQPCIECH_V(2) | 133 FW_RI_RES_WR_IQINTCNTTHRESH_V(0) | 134 FW_RI_RES_WR_IQO_F | 135 FW_RI_RES_WR_IQESIZE_V(1)); 136 res->u.cq.iqsize = cpu_to_be16(cq->size); 137 res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr); 138 139 c4iw_init_wr_wait(wr_waitp); 140 ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); 141 if (ret) 142 goto err4; 143 144 cq->gen = 1; 145 cq->gts = rdev->lldi.gts_reg; 146 cq->rdev = rdev; 147 148 cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS, 149 &cq->bar2_qid, 150 user ? &cq->bar2_pa : NULL); 151 if (user && !cq->bar2_pa) { 152 pr_warn("%s: cqid %u not in BAR2 range\n", 153 pci_name(rdev->lldi.pdev), cq->cqid); 154 ret = -EINVAL; 155 goto err4; 156 } 157 return 0; 158 err4: 159 dma_free_coherent(&rdev->lldi.pdev->dev, cq->memsize, cq->queue, 160 dma_unmap_addr(cq, mapping)); 161 err3: 162 kfree(cq->sw_queue); 163 err2: 164 c4iw_put_cqid(rdev, cq->cqid, uctx); 165 err1: 166 return ret; 167 } 168 169 static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) 170 { 171 struct t4_cqe cqe; 172 173 pr_debug("wq %p cq %p sw_cidx %u sw_pidx %u\n", 174 wq, cq, cq->sw_cidx, cq->sw_pidx); 175 memset(&cqe, 0, sizeof(cqe)); 176 cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | 177 CQE_OPCODE_V(FW_RI_SEND) | 178 CQE_TYPE_V(0) | 179 CQE_SWCQE_V(1) | 180 CQE_QPID_V(wq->sq.qid)); 181 cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); 182 cq->sw_queue[cq->sw_pidx] = cqe; 183 t4_swcq_produce(cq); 184 } 185 186 int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count) 187 { 188 int flushed = 0; 189 int in_use = wq->rq.in_use - count; 190 191 pr_debug("wq %p cq %p rq.in_use %u skip count %u\n", 192 wq, cq, wq->rq.in_use, count); 193 while (in_use--) { 194 insert_recv_cqe(wq, cq); 195 flushed++; 196 } 197 return flushed; 198 } 199 200 static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq, 201 struct t4_swsqe *swcqe) 202 { 203 struct t4_cqe cqe; 204 205 pr_debug("wq %p cq %p sw_cidx %u sw_pidx %u\n", 206 wq, cq, cq->sw_cidx, cq->sw_pidx); 207 memset(&cqe, 0, sizeof(cqe)); 208 cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | 209 CQE_OPCODE_V(swcqe->opcode) | 210 CQE_TYPE_V(1) | 211 CQE_SWCQE_V(1) | 212 CQE_QPID_V(wq->sq.qid)); 213 CQE_WRID_SQ_IDX(&cqe) = swcqe->idx; 214 cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); 215 cq->sw_queue[cq->sw_pidx] = cqe; 216 t4_swcq_produce(cq); 217 } 218 219 static void advance_oldest_read(struct t4_wq *wq); 220 221 int c4iw_flush_sq(struct c4iw_qp *qhp) 222 { 223 int flushed = 0; 224 struct t4_wq *wq = &qhp->wq; 225 struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq); 226 struct t4_cq *cq = &chp->cq; 227 int idx; 228 struct t4_swsqe *swsqe; 229 230 if (wq->sq.flush_cidx == -1) 231 wq->sq.flush_cidx = wq->sq.cidx; 232 idx = wq->sq.flush_cidx; 233 while (idx != wq->sq.pidx) { 234 swsqe = &wq->sq.sw_sq[idx]; 235 swsqe->flushed = 1; 236 insert_sq_cqe(wq, cq, swsqe); 237 if (wq->sq.oldest_read == swsqe) { 238 advance_oldest_read(wq); 239 } 240 flushed++; 241 if (++idx == wq->sq.size) 242 idx = 0; 243 } 244 wq->sq.flush_cidx += flushed; 245 if (wq->sq.flush_cidx >= wq->sq.size) 246 wq->sq.flush_cidx -= wq->sq.size; 247 return flushed; 248 } 249 250 static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) 251 { 252 struct t4_swsqe *swsqe; 253 int cidx; 254 255 if (wq->sq.flush_cidx == -1) 256 wq->sq.flush_cidx = wq->sq.cidx; 257 cidx = wq->sq.flush_cidx; 258 259 while (cidx != wq->sq.pidx) { 260 swsqe = &wq->sq.sw_sq[cidx]; 261 if (!swsqe->signaled) { 262 if (++cidx == wq->sq.size) 263 cidx = 0; 264 } else if (swsqe->complete) { 265 266 /* 267 * Insert this completed cqe into the swcq. 268 */ 269 pr_debug("moving cqe into swcq sq idx %u cq idx %u\n", 270 cidx, cq->sw_pidx); 271 swsqe->cqe.header |= htonl(CQE_SWCQE_V(1)); 272 cq->sw_queue[cq->sw_pidx] = swsqe->cqe; 273 t4_swcq_produce(cq); 274 swsqe->flushed = 1; 275 if (++cidx == wq->sq.size) 276 cidx = 0; 277 wq->sq.flush_cidx = cidx; 278 } else 279 break; 280 } 281 } 282 283 static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe, 284 struct t4_cqe *read_cqe) 285 { 286 read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx; 287 read_cqe->len = htonl(wq->sq.oldest_read->read_len); 288 read_cqe->header = htonl(CQE_QPID_V(CQE_QPID(hw_cqe)) | 289 CQE_SWCQE_V(SW_CQE(hw_cqe)) | 290 CQE_OPCODE_V(FW_RI_READ_REQ) | 291 CQE_TYPE_V(1)); 292 read_cqe->bits_type_ts = hw_cqe->bits_type_ts; 293 } 294 295 static void advance_oldest_read(struct t4_wq *wq) 296 { 297 298 u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1; 299 300 if (rptr == wq->sq.size) 301 rptr = 0; 302 while (rptr != wq->sq.pidx) { 303 wq->sq.oldest_read = &wq->sq.sw_sq[rptr]; 304 305 if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ) 306 return; 307 if (++rptr == wq->sq.size) 308 rptr = 0; 309 } 310 wq->sq.oldest_read = NULL; 311 } 312 313 /* 314 * Move all CQEs from the HWCQ into the SWCQ. 315 * Deal with out-of-order and/or completions that complete 316 * prior unsignalled WRs. 317 */ 318 void c4iw_flush_hw_cq(struct c4iw_cq *chp, struct c4iw_qp *flush_qhp) 319 { 320 struct t4_cqe *hw_cqe, *swcqe, read_cqe; 321 struct c4iw_qp *qhp; 322 struct t4_swsqe *swsqe; 323 int ret; 324 325 pr_debug("cqid 0x%x\n", chp->cq.cqid); 326 ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); 327 328 /* 329 * This logic is similar to poll_cq(), but not quite the same 330 * unfortunately. Need to move pertinent HW CQEs to the SW CQ but 331 * also do any translation magic that poll_cq() normally does. 332 */ 333 while (!ret) { 334 qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe)); 335 336 /* 337 * drop CQEs with no associated QP 338 */ 339 if (qhp == NULL) 340 goto next_cqe; 341 342 if (flush_qhp != qhp) { 343 spin_lock(&qhp->lock); 344 345 if (qhp->wq.flushed == 1) 346 goto next_cqe; 347 } 348 349 if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) 350 goto next_cqe; 351 352 if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) { 353 354 /* If we have reached here because of async 355 * event or other error, and have egress error 356 * then drop 357 */ 358 if (CQE_TYPE(hw_cqe) == 1) 359 goto next_cqe; 360 361 /* drop peer2peer RTR reads. 362 */ 363 if (CQE_WRID_STAG(hw_cqe) == 1) 364 goto next_cqe; 365 366 /* 367 * Eat completions for unsignaled read WRs. 368 */ 369 if (!qhp->wq.sq.oldest_read->signaled) { 370 advance_oldest_read(&qhp->wq); 371 goto next_cqe; 372 } 373 374 /* 375 * Don't write to the HWCQ, create a new read req CQE 376 * in local memory and move it into the swcq. 377 */ 378 create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe); 379 hw_cqe = &read_cqe; 380 advance_oldest_read(&qhp->wq); 381 } 382 383 /* if its a SQ completion, then do the magic to move all the 384 * unsignaled and now in-order completions into the swcq. 385 */ 386 if (SQ_TYPE(hw_cqe)) { 387 swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; 388 swsqe->cqe = *hw_cqe; 389 swsqe->complete = 1; 390 flush_completed_wrs(&qhp->wq, &chp->cq); 391 } else { 392 swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx]; 393 *swcqe = *hw_cqe; 394 swcqe->header |= cpu_to_be32(CQE_SWCQE_V(1)); 395 t4_swcq_produce(&chp->cq); 396 } 397 next_cqe: 398 t4_hwcq_consume(&chp->cq); 399 ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); 400 if (qhp && flush_qhp != qhp) 401 spin_unlock(&qhp->lock); 402 } 403 } 404 405 static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) 406 { 407 if (DRAIN_CQE(cqe)) { 408 WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid); 409 return 0; 410 } 411 412 if (CQE_OPCODE(cqe) == FW_RI_TERMINATE) 413 return 0; 414 415 if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe)) 416 return 0; 417 418 if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe)) 419 return 0; 420 421 if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq)) 422 return 0; 423 return 1; 424 } 425 426 void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) 427 { 428 struct t4_cqe *cqe; 429 u32 ptr; 430 431 *count = 0; 432 pr_debug("count zero %d\n", *count); 433 ptr = cq->sw_cidx; 434 while (ptr != cq->sw_pidx) { 435 cqe = &cq->sw_queue[ptr]; 436 if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) && 437 (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq)) 438 (*count)++; 439 if (++ptr == cq->size) 440 ptr = 0; 441 } 442 pr_debug("cq %p count %d\n", cq, *count); 443 } 444 445 /* 446 * poll_cq 447 * 448 * Caller must: 449 * check the validity of the first CQE, 450 * supply the wq assicated with the qpid. 451 * 452 * credit: cq credit to return to sge. 453 * cqe_flushed: 1 iff the CQE is flushed. 454 * cqe: copy of the polled CQE. 455 * 456 * return value: 457 * 0 CQE returned ok. 458 * -EAGAIN CQE skipped, try again. 459 * -EOVERFLOW CQ overflow detected. 460 */ 461 static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, 462 u8 *cqe_flushed, u64 *cookie, u32 *credit) 463 { 464 int ret = 0; 465 struct t4_cqe *hw_cqe, read_cqe; 466 467 *cqe_flushed = 0; 468 *credit = 0; 469 ret = t4_next_cqe(cq, &hw_cqe); 470 if (ret) 471 return ret; 472 473 pr_debug("CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", 474 CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe), 475 CQE_GENBIT(hw_cqe), CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe), 476 CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe), 477 CQE_WRID_LOW(hw_cqe)); 478 479 /* 480 * skip cqe's not affiliated with a QP. 481 */ 482 if (wq == NULL) { 483 ret = -EAGAIN; 484 goto skip_cqe; 485 } 486 487 /* 488 * skip hw cqe's if the wq is flushed. 489 */ 490 if (wq->flushed && !SW_CQE(hw_cqe)) { 491 ret = -EAGAIN; 492 goto skip_cqe; 493 } 494 495 /* 496 * skip TERMINATE cqes... 497 */ 498 if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { 499 ret = -EAGAIN; 500 goto skip_cqe; 501 } 502 503 /* 504 * Special cqe for drain WR completions... 505 */ 506 if (DRAIN_CQE(hw_cqe)) { 507 *cookie = CQE_DRAIN_COOKIE(hw_cqe); 508 *cqe = *hw_cqe; 509 goto skip_cqe; 510 } 511 512 /* 513 * Gotta tweak READ completions: 514 * 1) the cqe doesn't contain the sq_wptr from the wr. 515 * 2) opcode not reflected from the wr. 516 * 3) read_len not reflected from the wr. 517 * 4) cq_type is RQ_TYPE not SQ_TYPE. 518 */ 519 if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) { 520 521 /* If we have reached here because of async 522 * event or other error, and have egress error 523 * then drop 524 */ 525 if (CQE_TYPE(hw_cqe) == 1) { 526 if (CQE_STATUS(hw_cqe)) 527 t4_set_wq_in_error(wq); 528 ret = -EAGAIN; 529 goto skip_cqe; 530 } 531 532 /* If this is an unsolicited read response, then the read 533 * was generated by the kernel driver as part of peer-2-peer 534 * connection setup. So ignore the completion. 535 */ 536 if (CQE_WRID_STAG(hw_cqe) == 1) { 537 if (CQE_STATUS(hw_cqe)) 538 t4_set_wq_in_error(wq); 539 ret = -EAGAIN; 540 goto skip_cqe; 541 } 542 543 /* 544 * Eat completions for unsignaled read WRs. 545 */ 546 if (!wq->sq.oldest_read->signaled) { 547 advance_oldest_read(wq); 548 ret = -EAGAIN; 549 goto skip_cqe; 550 } 551 552 /* 553 * Don't write to the HWCQ, so create a new read req CQE 554 * in local memory. 555 */ 556 create_read_req_cqe(wq, hw_cqe, &read_cqe); 557 hw_cqe = &read_cqe; 558 advance_oldest_read(wq); 559 } 560 561 if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { 562 *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH); 563 t4_set_wq_in_error(wq); 564 } 565 566 /* 567 * RECV completion. 568 */ 569 if (RQ_TYPE(hw_cqe)) { 570 571 /* 572 * HW only validates 4 bits of MSN. So we must validate that 573 * the MSN in the SEND is the next expected MSN. If its not, 574 * then we complete this with T4_ERR_MSN and mark the wq in 575 * error. 576 */ 577 578 if (t4_rq_empty(wq)) { 579 t4_set_wq_in_error(wq); 580 ret = -EAGAIN; 581 goto skip_cqe; 582 } 583 if (unlikely(!CQE_STATUS(hw_cqe) && 584 CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) { 585 t4_set_wq_in_error(wq); 586 hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN)); 587 } 588 goto proc_cqe; 589 } 590 591 /* 592 * If we get here its a send completion. 593 * 594 * Handle out of order completion. These get stuffed 595 * in the SW SQ. Then the SW SQ is walked to move any 596 * now in-order completions into the SW CQ. This handles 597 * 2 cases: 598 * 1) reaping unsignaled WRs when the first subsequent 599 * signaled WR is completed. 600 * 2) out of order read completions. 601 */ 602 if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) { 603 struct t4_swsqe *swsqe; 604 605 pr_debug("out of order completion going in sw_sq at idx %u\n", 606 CQE_WRID_SQ_IDX(hw_cqe)); 607 swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; 608 swsqe->cqe = *hw_cqe; 609 swsqe->complete = 1; 610 ret = -EAGAIN; 611 goto flush_wq; 612 } 613 614 proc_cqe: 615 *cqe = *hw_cqe; 616 617 /* 618 * Reap the associated WR(s) that are freed up with this 619 * completion. 620 */ 621 if (SQ_TYPE(hw_cqe)) { 622 int idx = CQE_WRID_SQ_IDX(hw_cqe); 623 624 /* 625 * Account for any unsignaled completions completed by 626 * this signaled completion. In this case, cidx points 627 * to the first unsignaled one, and idx points to the 628 * signaled one. So adjust in_use based on this delta. 629 * if this is not completing any unsigned wrs, then the 630 * delta will be 0. Handle wrapping also! 631 */ 632 if (idx < wq->sq.cidx) 633 wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx; 634 else 635 wq->sq.in_use -= idx - wq->sq.cidx; 636 637 wq->sq.cidx = (uint16_t)idx; 638 pr_debug("completing sq idx %u\n", wq->sq.cidx); 639 *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id; 640 if (c4iw_wr_log) 641 c4iw_log_wr_stats(wq, hw_cqe); 642 t4_sq_consume(wq); 643 } else { 644 pr_debug("completing rq idx %u\n", wq->rq.cidx); 645 *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; 646 if (c4iw_wr_log) 647 c4iw_log_wr_stats(wq, hw_cqe); 648 t4_rq_consume(wq); 649 goto skip_cqe; 650 } 651 652 flush_wq: 653 /* 654 * Flush any completed cqes that are now in-order. 655 */ 656 flush_completed_wrs(wq, cq); 657 658 skip_cqe: 659 if (SW_CQE(hw_cqe)) { 660 pr_debug("cq %p cqid 0x%x skip sw cqe cidx %u\n", 661 cq, cq->cqid, cq->sw_cidx); 662 t4_swcq_consume(cq); 663 } else { 664 pr_debug("cq %p cqid 0x%x skip hw cqe cidx %u\n", 665 cq, cq->cqid, cq->cidx); 666 t4_hwcq_consume(cq); 667 } 668 return ret; 669 } 670 671 /* 672 * Get one cq entry from c4iw and map it to openib. 673 * 674 * Returns: 675 * 0 cqe returned 676 * -ENODATA EMPTY; 677 * -EAGAIN caller must try again 678 * any other -errno fatal error 679 */ 680 static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) 681 { 682 struct c4iw_qp *qhp = NULL; 683 struct t4_cqe uninitialized_var(cqe), *rd_cqe; 684 struct t4_wq *wq; 685 u32 credit = 0; 686 u8 cqe_flushed; 687 u64 cookie = 0; 688 int ret; 689 690 ret = t4_next_cqe(&chp->cq, &rd_cqe); 691 692 if (ret) 693 return ret; 694 695 qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); 696 if (!qhp) 697 wq = NULL; 698 else { 699 spin_lock(&qhp->lock); 700 wq = &(qhp->wq); 701 } 702 ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); 703 if (ret) 704 goto out; 705 706 wc->wr_id = cookie; 707 wc->qp = &qhp->ibqp; 708 wc->vendor_err = CQE_STATUS(&cqe); 709 wc->wc_flags = 0; 710 711 pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n", 712 CQE_QPID(&cqe), 713 CQE_TYPE(&cqe), CQE_OPCODE(&cqe), 714 CQE_STATUS(&cqe), CQE_LEN(&cqe), 715 CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe), 716 (unsigned long long)cookie); 717 718 if (CQE_TYPE(&cqe) == 0) { 719 if (!CQE_STATUS(&cqe)) 720 wc->byte_len = CQE_LEN(&cqe); 721 else 722 wc->byte_len = 0; 723 wc->opcode = IB_WC_RECV; 724 if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV || 725 CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { 726 wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); 727 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 728 c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); 729 } 730 } else { 731 switch (CQE_OPCODE(&cqe)) { 732 case FW_RI_RDMA_WRITE: 733 wc->opcode = IB_WC_RDMA_WRITE; 734 break; 735 case FW_RI_READ_REQ: 736 wc->opcode = IB_WC_RDMA_READ; 737 wc->byte_len = CQE_LEN(&cqe); 738 break; 739 case FW_RI_SEND_WITH_INV: 740 case FW_RI_SEND_WITH_SE_INV: 741 wc->opcode = IB_WC_SEND; 742 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 743 break; 744 case FW_RI_SEND: 745 case FW_RI_SEND_WITH_SE: 746 wc->opcode = IB_WC_SEND; 747 break; 748 749 case FW_RI_LOCAL_INV: 750 wc->opcode = IB_WC_LOCAL_INV; 751 break; 752 case FW_RI_FAST_REGISTER: 753 wc->opcode = IB_WC_REG_MR; 754 755 /* Invalidate the MR if the fastreg failed */ 756 if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS) 757 c4iw_invalidate_mr(qhp->rhp, 758 CQE_WRID_FR_STAG(&cqe)); 759 break; 760 default: 761 pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", 762 CQE_OPCODE(&cqe), CQE_QPID(&cqe)); 763 ret = -EINVAL; 764 goto out; 765 } 766 } 767 768 if (cqe_flushed) 769 wc->status = IB_WC_WR_FLUSH_ERR; 770 else { 771 772 switch (CQE_STATUS(&cqe)) { 773 case T4_ERR_SUCCESS: 774 wc->status = IB_WC_SUCCESS; 775 break; 776 case T4_ERR_STAG: 777 wc->status = IB_WC_LOC_ACCESS_ERR; 778 break; 779 case T4_ERR_PDID: 780 wc->status = IB_WC_LOC_PROT_ERR; 781 break; 782 case T4_ERR_QPID: 783 case T4_ERR_ACCESS: 784 wc->status = IB_WC_LOC_ACCESS_ERR; 785 break; 786 case T4_ERR_WRAP: 787 wc->status = IB_WC_GENERAL_ERR; 788 break; 789 case T4_ERR_BOUND: 790 wc->status = IB_WC_LOC_LEN_ERR; 791 break; 792 case T4_ERR_INVALIDATE_SHARED_MR: 793 case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND: 794 wc->status = IB_WC_MW_BIND_ERR; 795 break; 796 case T4_ERR_CRC: 797 case T4_ERR_MARKER: 798 case T4_ERR_PDU_LEN_ERR: 799 case T4_ERR_OUT_OF_RQE: 800 case T4_ERR_DDP_VERSION: 801 case T4_ERR_RDMA_VERSION: 802 case T4_ERR_DDP_QUEUE_NUM: 803 case T4_ERR_MSN: 804 case T4_ERR_TBIT: 805 case T4_ERR_MO: 806 case T4_ERR_MSN_RANGE: 807 case T4_ERR_IRD_OVERFLOW: 808 case T4_ERR_OPCODE: 809 case T4_ERR_INTERNAL_ERR: 810 wc->status = IB_WC_FATAL_ERR; 811 break; 812 case T4_ERR_SWFLUSH: 813 wc->status = IB_WC_WR_FLUSH_ERR; 814 break; 815 default: 816 pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n", 817 CQE_STATUS(&cqe), CQE_QPID(&cqe)); 818 wc->status = IB_WC_FATAL_ERR; 819 } 820 } 821 out: 822 if (wq) 823 spin_unlock(&qhp->lock); 824 return ret; 825 } 826 827 int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) 828 { 829 struct c4iw_cq *chp; 830 unsigned long flags; 831 int npolled; 832 int err = 0; 833 834 chp = to_c4iw_cq(ibcq); 835 836 spin_lock_irqsave(&chp->lock, flags); 837 for (npolled = 0; npolled < num_entries; ++npolled) { 838 do { 839 err = c4iw_poll_cq_one(chp, wc + npolled); 840 } while (err == -EAGAIN); 841 if (err) 842 break; 843 } 844 spin_unlock_irqrestore(&chp->lock, flags); 845 return !err || err == -ENODATA ? npolled : err; 846 } 847 848 int c4iw_destroy_cq(struct ib_cq *ib_cq) 849 { 850 struct c4iw_cq *chp; 851 struct c4iw_ucontext *ucontext; 852 853 pr_debug("ib_cq %p\n", ib_cq); 854 chp = to_c4iw_cq(ib_cq); 855 856 remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid); 857 atomic_dec(&chp->refcnt); 858 wait_event(chp->wait, !atomic_read(&chp->refcnt)); 859 860 ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context) 861 : NULL; 862 destroy_cq(&chp->rhp->rdev, &chp->cq, 863 ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx, 864 chp->destroy_skb, chp->wr_waitp); 865 c4iw_put_wr_wait(chp->wr_waitp); 866 kfree(chp); 867 return 0; 868 } 869 870 struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, 871 const struct ib_cq_init_attr *attr, 872 struct ib_ucontext *ib_context, 873 struct ib_udata *udata) 874 { 875 int entries = attr->cqe; 876 int vector = attr->comp_vector; 877 struct c4iw_dev *rhp; 878 struct c4iw_cq *chp; 879 struct c4iw_create_cq_resp uresp; 880 struct c4iw_ucontext *ucontext = NULL; 881 int ret, wr_len; 882 size_t memsize, hwentries; 883 struct c4iw_mm_entry *mm, *mm2; 884 885 pr_debug("ib_dev %p entries %d\n", ibdev, entries); 886 if (attr->flags) 887 return ERR_PTR(-EINVAL); 888 889 rhp = to_c4iw_dev(ibdev); 890 891 if (vector >= rhp->rdev.lldi.nciq) 892 return ERR_PTR(-EINVAL); 893 894 chp = kzalloc(sizeof(*chp), GFP_KERNEL); 895 if (!chp) 896 return ERR_PTR(-ENOMEM); 897 chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); 898 if (!chp->wr_waitp) { 899 ret = -ENOMEM; 900 goto err_free_chp; 901 } 902 c4iw_init_wr_wait(chp->wr_waitp); 903 904 wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res); 905 chp->destroy_skb = alloc_skb(wr_len, GFP_KERNEL); 906 if (!chp->destroy_skb) { 907 ret = -ENOMEM; 908 goto err_free_wr_wait; 909 } 910 911 if (ib_context) 912 ucontext = to_c4iw_ucontext(ib_context); 913 914 /* account for the status page. */ 915 entries++; 916 917 /* IQ needs one extra entry to differentiate full vs empty. */ 918 entries++; 919 920 /* 921 * entries must be multiple of 16 for HW. 922 */ 923 entries = roundup(entries, 16); 924 925 /* 926 * Make actual HW queue 2x to avoid cdix_inc overflows. 927 */ 928 hwentries = min(entries * 2, rhp->rdev.hw_queue.t4_max_iq_size); 929 930 /* 931 * Make HW queue at least 64 entries so GTS updates aren't too 932 * frequent. 933 */ 934 if (hwentries < 64) 935 hwentries = 64; 936 937 memsize = hwentries * sizeof *chp->cq.queue; 938 939 /* 940 * memsize must be a multiple of the page size if its a user cq. 941 */ 942 if (ucontext) 943 memsize = roundup(memsize, PAGE_SIZE); 944 chp->cq.size = hwentries; 945 chp->cq.memsize = memsize; 946 chp->cq.vector = vector; 947 948 ret = create_cq(&rhp->rdev, &chp->cq, 949 ucontext ? &ucontext->uctx : &rhp->rdev.uctx, 950 chp->wr_waitp); 951 if (ret) 952 goto err_free_skb; 953 954 chp->rhp = rhp; 955 chp->cq.size--; /* status page */ 956 chp->ibcq.cqe = entries - 2; 957 spin_lock_init(&chp->lock); 958 spin_lock_init(&chp->comp_handler_lock); 959 atomic_set(&chp->refcnt, 1); 960 init_waitqueue_head(&chp->wait); 961 ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); 962 if (ret) 963 goto err_destroy_cq; 964 965 if (ucontext) { 966 ret = -ENOMEM; 967 mm = kmalloc(sizeof *mm, GFP_KERNEL); 968 if (!mm) 969 goto err_remove_handle; 970 mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); 971 if (!mm2) 972 goto err_free_mm; 973 974 uresp.qid_mask = rhp->rdev.cqmask; 975 uresp.cqid = chp->cq.cqid; 976 uresp.size = chp->cq.size; 977 uresp.memsize = chp->cq.memsize; 978 spin_lock(&ucontext->mmap_lock); 979 uresp.key = ucontext->key; 980 ucontext->key += PAGE_SIZE; 981 uresp.gts_key = ucontext->key; 982 ucontext->key += PAGE_SIZE; 983 spin_unlock(&ucontext->mmap_lock); 984 ret = ib_copy_to_udata(udata, &uresp, 985 sizeof(uresp) - sizeof(uresp.reserved)); 986 if (ret) 987 goto err_free_mm2; 988 989 mm->key = uresp.key; 990 mm->addr = virt_to_phys(chp->cq.queue); 991 mm->len = chp->cq.memsize; 992 insert_mmap(ucontext, mm); 993 994 mm2->key = uresp.gts_key; 995 mm2->addr = chp->cq.bar2_pa; 996 mm2->len = PAGE_SIZE; 997 insert_mmap(ucontext, mm2); 998 } 999 pr_debug("cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n", 1000 chp->cq.cqid, chp, chp->cq.size, 1001 chp->cq.memsize, (unsigned long long)chp->cq.dma_addr); 1002 return &chp->ibcq; 1003 err_free_mm2: 1004 kfree(mm2); 1005 err_free_mm: 1006 kfree(mm); 1007 err_remove_handle: 1008 remove_handle(rhp, &rhp->cqidr, chp->cq.cqid); 1009 err_destroy_cq: 1010 destroy_cq(&chp->rhp->rdev, &chp->cq, 1011 ucontext ? &ucontext->uctx : &rhp->rdev.uctx, 1012 chp->destroy_skb, chp->wr_waitp); 1013 err_free_skb: 1014 kfree_skb(chp->destroy_skb); 1015 err_free_wr_wait: 1016 c4iw_put_wr_wait(chp->wr_waitp); 1017 err_free_chp: 1018 kfree(chp); 1019 return ERR_PTR(ret); 1020 } 1021 1022 int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) 1023 { 1024 return -ENOSYS; 1025 } 1026 1027 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 1028 { 1029 struct c4iw_cq *chp; 1030 int ret = 0; 1031 unsigned long flag; 1032 1033 chp = to_c4iw_cq(ibcq); 1034 spin_lock_irqsave(&chp->lock, flag); 1035 t4_arm_cq(&chp->cq, 1036 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); 1037 if (flags & IB_CQ_REPORT_MISSED_EVENTS) 1038 ret = t4_cq_notempty(&chp->cq); 1039 spin_unlock_irqrestore(&chp->lock, flag); 1040 return ret; 1041 } 1042