1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 #include <sys/cdefs.h> 35 #include "opt_inet.h" 36 37 #ifdef TCP_OFFLOAD 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/kernel.h> 41 #include <sys/ktr.h> 42 #include <sys/bus.h> 43 #include <sys/lock.h> 44 #include <sys/mutex.h> 45 #include <sys/rwlock.h> 46 #include <sys/socket.h> 47 #include <sys/sbuf.h> 48 49 #include "iw_cxgbe.h" 50 #include "user.h" 51 52 static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, 53 struct c4iw_dev_ucontext *uctx) 54 { 55 struct adapter *sc = rdev->adap; 56 struct c4iw_dev *rhp = rdev_to_c4iw_dev(rdev); 57 struct fw_ri_res_wr *res_wr; 58 struct fw_ri_res *res; 59 int wr_len; 60 struct c4iw_wr_wait wr_wait; 61 struct wrqe *wr; 62 63 wr_len = sizeof *res_wr + sizeof *res; 64 wr = alloc_wrqe(wr_len, &sc->sge.ctrlq[0]); 65 if (wr == NULL) 66 return (0); 67 res_wr = wrtod(wr); 68 memset(res_wr, 0, wr_len); 69 res_wr->op_nres = cpu_to_be32( 70 V_FW_WR_OP(FW_RI_RES_WR) | 71 V_FW_RI_RES_WR_NRES(1) | 72 F_FW_WR_COMPL); 73 res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); 74 res_wr->cookie = (unsigned long) &wr_wait; 75 res = res_wr->res; 76 res->u.cq.restype = FW_RI_RES_TYPE_CQ; 77 res->u.cq.op = FW_RI_RES_OP_RESET; 78 res->u.cq.iqid = cpu_to_be32(cq->cqid); 79 80 c4iw_init_wr_wait(&wr_wait); 81 82 t4_wrq_tx(sc, wr); 83 84 c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, NULL, __func__); 85 86 kfree(cq->sw_queue); 87 dma_free_coherent(rhp->ibdev.dma_device, 88 cq->memsize, cq->queue, 89 dma_unmap_addr(cq, mapping)); 90 c4iw_put_cqid(rdev, cq->cqid, uctx); 91 return 0; 92 } 93 94 static int 95 create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, 96 struct c4iw_dev_ucontext *uctx) 97 { 98 struct adapter *sc = rdev->adap; 99 struct c4iw_dev *rhp = rdev_to_c4iw_dev(rdev); 100 struct fw_ri_res_wr *res_wr; 101 struct fw_ri_res *res; 102 int wr_len; 103 int user = (uctx != &rdev->uctx); 104 struct c4iw_wr_wait wr_wait; 105 int ret; 106 struct wrqe *wr; 107 u64 cq_bar2_qoffset = 0; 108 109 cq->cqid = c4iw_get_cqid(rdev, uctx); 110 if (!cq->cqid) { 111 ret = -ENOMEM; 112 goto err1; 113 } 114 115 if (!user) { 116 cq->sw_queue = kzalloc(cq->memsize, GFP_KERNEL); 117 if (!cq->sw_queue) { 118 ret = -ENOMEM; 119 goto err2; 120 } 121 } 122 cq->queue = dma_alloc_coherent(rhp->ibdev.dma_device, cq->memsize, 123 &cq->dma_addr, GFP_KERNEL); 124 if (!cq->queue) { 125 ret = -ENOMEM; 126 goto err3; 127 } 128 dma_unmap_addr_set(cq, mapping, cq->dma_addr); 129 memset(cq->queue, 0, cq->memsize); 130 131 /* build fw_ri_res_wr */ 132 wr_len = sizeof *res_wr + sizeof *res; 133 134 wr = alloc_wrqe(wr_len, &sc->sge.ctrlq[0]); 135 if (wr == NULL) 136 return (0); 137 res_wr = wrtod(wr); 138 139 memset(res_wr, 0, wr_len); 140 res_wr->op_nres = cpu_to_be32( 141 V_FW_WR_OP(FW_RI_RES_WR) | 142 V_FW_RI_RES_WR_NRES(1) | 143 F_FW_WR_COMPL); 144 res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); 145 res_wr->cookie = (unsigned long) &wr_wait; 146 res = res_wr->res; 147 res->u.cq.restype = FW_RI_RES_TYPE_CQ; 148 res->u.cq.op = FW_RI_RES_OP_WRITE; 149 res->u.cq.iqid = cpu_to_be32(cq->cqid); 150 //Fixme: Always use first queue id for IQANDSTINDEX. Linux does the same. 151 res->u.cq.iqandst_to_iqandstindex = cpu_to_be32( 152 V_FW_RI_RES_WR_IQANUS(0) | 153 V_FW_RI_RES_WR_IQANUD(1) | 154 F_FW_RI_RES_WR_IQANDST | 155 V_FW_RI_RES_WR_IQANDSTINDEX(sc->sge.ofld_rxq[0].iq.abs_id)); 156 res->u.cq.iqdroprss_to_iqesize = cpu_to_be16( 157 F_FW_RI_RES_WR_IQDROPRSS | 158 V_FW_RI_RES_WR_IQPCIECH(2) | 159 V_FW_RI_RES_WR_IQINTCNTTHRESH(0) | 160 F_FW_RI_RES_WR_IQO | 161 V_FW_RI_RES_WR_IQESIZE(1)); 162 res->u.cq.iqsize = cpu_to_be16(cq->size); 163 res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr); 164 165 c4iw_init_wr_wait(&wr_wait); 166 167 t4_wrq_tx(sc, wr); 168 169 CTR2(KTR_IW_CXGBE, "%s wait_event wr_wait %p", __func__, &wr_wait); 170 ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, NULL, __func__); 171 if (ret) 172 goto err4; 173 174 cq->gen = 1; 175 cq->rdev = rdev; 176 177 /* Determine the BAR2 queue offset and qid. */ 178 t4_bar2_sge_qregs(rdev->adap, cq->cqid, T4_BAR2_QTYPE_INGRESS, user, 179 &cq_bar2_qoffset, &cq->bar2_qid); 180 181 /* If user mapping then compute the page-aligned physical 182 * address for mapping. 183 */ 184 if (user) 185 cq->bar2_pa = (rdev->bar2_pa + cq_bar2_qoffset) & PAGE_MASK; 186 else 187 cq->bar2_va = (void __iomem *)((u64)rdev->bar2_kva + 188 cq_bar2_qoffset); 189 190 return 0; 191 err4: 192 dma_free_coherent(rhp->ibdev.dma_device, cq->memsize, cq->queue, 193 dma_unmap_addr(cq, mapping)); 194 err3: 195 kfree(cq->sw_queue); 196 err2: 197 c4iw_put_cqid(rdev, cq->cqid, uctx); 198 err1: 199 return ret; 200 } 201 202 static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) 203 { 204 struct t4_cqe cqe; 205 206 CTR5(KTR_IW_CXGBE, "%s wq %p cq %p sw_cidx %u sw_pidx %u", __func__, wq, 207 cq, cq->sw_cidx, cq->sw_pidx); 208 memset(&cqe, 0, sizeof(cqe)); 209 cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) | 210 V_CQE_OPCODE(FW_RI_SEND) | 211 V_CQE_TYPE(0) | 212 V_CQE_SWCQE(1) | 213 V_CQE_QPID(wq->sq.qid)); 214 cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen)); 215 cq->sw_queue[cq->sw_pidx] = cqe; 216 t4_swcq_produce(cq); 217 } 218 219 int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count) 220 { 221 int flushed = 0; 222 int in_use = wq->rq.in_use - count; 223 224 BUG_ON(in_use < 0); 225 CTR5(KTR_IW_CXGBE, "%s wq %p cq %p rq.in_use %u skip count %u", 226 __func__, wq, cq, wq->rq.in_use, count); 227 while (in_use--) { 228 insert_recv_cqe(wq, cq); 229 flushed++; 230 } 231 return flushed; 232 } 233 234 static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq, 235 struct t4_swsqe *swcqe) 236 { 237 struct t4_cqe cqe; 238 239 CTR5(KTR_IW_CXGBE, "%s wq %p cq %p sw_cidx %u sw_pidx %u", __func__, wq, 240 cq, cq->sw_cidx, cq->sw_pidx); 241 memset(&cqe, 0, sizeof(cqe)); 242 cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) | 243 V_CQE_OPCODE(swcqe->opcode) | 244 V_CQE_TYPE(1) | 245 V_CQE_SWCQE(1) | 246 V_CQE_QPID(wq->sq.qid)); 247 CQE_WRID_SQ_IDX(&cqe) = swcqe->idx; 248 cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen)); 249 cq->sw_queue[cq->sw_pidx] = cqe; 250 t4_swcq_produce(cq); 251 } 252 253 static void advance_oldest_read(struct t4_wq *wq); 254 255 int c4iw_flush_sq(struct c4iw_qp *qhp) 256 { 257 int flushed = 0; 258 struct t4_wq *wq = &qhp->wq; 259 struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq); 260 struct t4_cq *cq = &chp->cq; 261 int idx; 262 struct t4_swsqe *swsqe; 263 264 if (wq->sq.flush_cidx == -1) 265 wq->sq.flush_cidx = wq->sq.cidx; 266 idx = wq->sq.flush_cidx; 267 BUG_ON(idx >= wq->sq.size); 268 while (idx != wq->sq.pidx) { 269 swsqe = &wq->sq.sw_sq[idx]; 270 BUG_ON(swsqe->flushed); 271 swsqe->flushed = 1; 272 insert_sq_cqe(wq, cq, swsqe); 273 if (wq->sq.oldest_read == swsqe) { 274 BUG_ON(swsqe->opcode != FW_RI_READ_REQ); 275 advance_oldest_read(wq); 276 } 277 flushed++; 278 if (++idx == wq->sq.size) 279 idx = 0; 280 } 281 wq->sq.flush_cidx += flushed; 282 if (wq->sq.flush_cidx >= wq->sq.size) 283 wq->sq.flush_cidx -= wq->sq.size; 284 return flushed; 285 } 286 287 static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) 288 { 289 struct t4_swsqe *swsqe; 290 int cidx; 291 292 if (wq->sq.flush_cidx == -1) 293 wq->sq.flush_cidx = wq->sq.cidx; 294 cidx = wq->sq.flush_cidx; 295 BUG_ON(cidx > wq->sq.size); 296 297 while (cidx != wq->sq.pidx) { 298 swsqe = &wq->sq.sw_sq[cidx]; 299 if (!swsqe->signaled) { 300 if (++cidx == wq->sq.size) 301 cidx = 0; 302 } else if (swsqe->complete) { 303 304 BUG_ON(swsqe->flushed); 305 306 /* 307 * Insert this completed cqe into the swcq. 308 */ 309 CTR3(KTR_IW_CXGBE, 310 "%s moving cqe into swcq sq idx %u cq idx %u\n", 311 __func__, cidx, cq->sw_pidx); 312 swsqe->cqe.header |= htonl(V_CQE_SWCQE(1)); 313 cq->sw_queue[cq->sw_pidx] = swsqe->cqe; 314 t4_swcq_produce(cq); 315 swsqe->flushed = 1; 316 if (++cidx == wq->sq.size) 317 cidx = 0; 318 wq->sq.flush_cidx = cidx; 319 } else 320 break; 321 } 322 } 323 324 static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe, 325 struct t4_cqe *read_cqe) 326 { 327 read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx; 328 read_cqe->len = htonl(wq->sq.oldest_read->read_len); 329 read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) | 330 V_CQE_SWCQE(SW_CQE(hw_cqe)) | 331 V_CQE_OPCODE(FW_RI_READ_REQ) | 332 V_CQE_TYPE(1)); 333 read_cqe->bits_type_ts = hw_cqe->bits_type_ts; 334 } 335 336 static void advance_oldest_read(struct t4_wq *wq) 337 { 338 339 u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1; 340 341 if (rptr == wq->sq.size) 342 rptr = 0; 343 while (rptr != wq->sq.pidx) { 344 wq->sq.oldest_read = &wq->sq.sw_sq[rptr]; 345 346 if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ) 347 return; 348 if (++rptr == wq->sq.size) 349 rptr = 0; 350 } 351 wq->sq.oldest_read = NULL; 352 } 353 354 /* 355 * Move all CQEs from the HWCQ into the SWCQ. 356 * Deal with out-of-order and/or completions that complete 357 * prior unsignalled WRs. 358 */ 359 void c4iw_flush_hw_cq(struct c4iw_cq *chp) 360 { 361 struct t4_cqe *hw_cqe, *swcqe, read_cqe; 362 struct c4iw_qp *qhp; 363 struct t4_swsqe *swsqe; 364 int ret; 365 366 CTR3(KTR_IW_CXGBE, "%s cq %p cqid 0x%x", __func__, &chp->cq, 367 chp->cq.cqid); 368 ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); 369 370 /* 371 * This logic is similar to poll_cq(), but not quite the same 372 * unfortunately. Need to move pertinent HW CQEs to the SW CQ but 373 * also do any translation magic that poll_cq() normally does. 374 */ 375 while (!ret) { 376 qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe)); 377 378 /* 379 * drop CQEs with no associated QP 380 */ 381 if (qhp == NULL) 382 goto next_cqe; 383 384 if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) 385 goto next_cqe; 386 387 if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) { 388 389 /* If we have reached here because of async 390 * event or other error, and have egress error 391 * then drop 392 */ 393 if (CQE_TYPE(hw_cqe) == 1) 394 goto next_cqe; 395 396 /* drop peer2peer RTR reads. 397 */ 398 if (CQE_WRID_STAG(hw_cqe) == 1) 399 goto next_cqe; 400 401 /* 402 * Eat completions for unsignaled read WRs. 403 */ 404 if (!qhp->wq.sq.oldest_read->signaled) { 405 advance_oldest_read(&qhp->wq); 406 goto next_cqe; 407 } 408 409 /* 410 * Don't write to the HWCQ, create a new read req CQE 411 * in local memory and move it into the swcq. 412 */ 413 create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe); 414 hw_cqe = &read_cqe; 415 advance_oldest_read(&qhp->wq); 416 } 417 418 /* if its a SQ completion, then do the magic to move all the 419 * unsignaled and now in-order completions into the swcq. 420 */ 421 if (SQ_TYPE(hw_cqe)) { 422 swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; 423 swsqe->cqe = *hw_cqe; 424 swsqe->complete = 1; 425 flush_completed_wrs(&qhp->wq, &chp->cq); 426 } else { 427 swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx]; 428 *swcqe = *hw_cqe; 429 swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); 430 t4_swcq_produce(&chp->cq); 431 } 432 next_cqe: 433 t4_hwcq_consume(&chp->cq); 434 ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); 435 } 436 } 437 438 static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) 439 { 440 if (CQE_OPCODE(cqe) == FW_RI_TERMINATE) 441 return 0; 442 443 if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe)) 444 return 0; 445 446 if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe)) 447 return 0; 448 449 if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq)) 450 return 0; 451 return 1; 452 } 453 454 void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) 455 { 456 struct t4_cqe *cqe; 457 u32 ptr; 458 459 *count = 0; 460 CTR2(KTR_IW_CXGBE, "%s count zero %d", __func__, *count); 461 ptr = cq->sw_cidx; 462 while (ptr != cq->sw_pidx) { 463 cqe = &cq->sw_queue[ptr]; 464 if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) && 465 (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq)) 466 (*count)++; 467 if (++ptr == cq->size) 468 ptr = 0; 469 } 470 CTR3(KTR_IW_CXGBE, "%s cq %p count %d", __func__, cq, *count); 471 } 472 473 /* 474 * poll_cq 475 * 476 * Caller must: 477 * check the validity of the first CQE, 478 * supply the wq assicated with the qpid. 479 * 480 * credit: cq credit to return to sge. 481 * cqe_flushed: 1 iff the CQE is flushed. 482 * cqe: copy of the polled CQE. 483 * 484 * return value: 485 * 0 CQE returned ok. 486 * -EAGAIN CQE skipped, try again. 487 * -EOVERFLOW CQ overflow detected. 488 */ 489 static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, 490 u8 *cqe_flushed, u64 *cookie, u32 *credit) 491 { 492 int ret = 0; 493 struct t4_cqe *hw_cqe, read_cqe; 494 495 *cqe_flushed = 0; 496 *credit = 0; 497 ret = t4_next_cqe(cq, &hw_cqe); 498 if (ret) 499 return ret; 500 501 CTR6(KTR_IW_CXGBE, 502 "%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x", __func__, 503 CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe), CQE_GENBIT(hw_cqe), 504 CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe)); 505 CTR5(KTR_IW_CXGBE, 506 "%s opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x", 507 __func__, CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe), 508 CQE_WRID_LOW(hw_cqe)); 509 510 /* 511 * skip cqe's not affiliated with a QP. 512 */ 513 if (wq == NULL) { 514 ret = -EAGAIN; 515 goto skip_cqe; 516 } 517 518 /* 519 * skip hw cqe's if the wq is flushed. 520 */ 521 if (wq->flushed && !SW_CQE(hw_cqe)) { 522 ret = -EAGAIN; 523 goto skip_cqe; 524 } 525 526 /* 527 * skip TERMINATE cqes... 528 */ 529 if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { 530 ret = -EAGAIN; 531 goto skip_cqe; 532 } 533 534 /* 535 * Special cqe for drain WR completions... 536 */ 537 if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) { 538 *cookie = CQE_DRAIN_COOKIE(hw_cqe); 539 *cqe = *hw_cqe; 540 goto skip_cqe; 541 } 542 543 /* 544 * Gotta tweak READ completions: 545 * 1) the cqe doesn't contain the sq_wptr from the wr. 546 * 2) opcode not reflected from the wr. 547 * 3) read_len not reflected from the wr. 548 * 4) cq_type is RQ_TYPE not SQ_TYPE. 549 */ 550 if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) { 551 552 /* If we have reached here because of async 553 * event or other error, and have egress error 554 * then drop 555 */ 556 if (CQE_TYPE(hw_cqe) == 1) { 557 if (CQE_STATUS(hw_cqe)) 558 t4_set_wq_in_error(wq); 559 ret = -EAGAIN; 560 goto skip_cqe; 561 } 562 563 /* If this is an unsolicited read response, then the read 564 * was generated by the kernel driver as part of peer-2-peer 565 * connection setup. So ignore the completion. 566 */ 567 if (CQE_WRID_STAG(hw_cqe) == 1) { 568 if (CQE_STATUS(hw_cqe)) 569 t4_set_wq_in_error(wq); 570 ret = -EAGAIN; 571 goto skip_cqe; 572 } 573 574 /* 575 * Eat completions for unsignaled read WRs. 576 */ 577 if (!wq->sq.oldest_read->signaled) { 578 advance_oldest_read(wq); 579 ret = -EAGAIN; 580 goto skip_cqe; 581 } 582 583 /* 584 * Don't write to the HWCQ, so create a new read req CQE 585 * in local memory. 586 */ 587 create_read_req_cqe(wq, hw_cqe, &read_cqe); 588 hw_cqe = &read_cqe; 589 advance_oldest_read(wq); 590 } 591 592 if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { 593 *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH); 594 t4_set_wq_in_error(wq); 595 } 596 597 /* 598 * RECV completion. 599 */ 600 if (RQ_TYPE(hw_cqe)) { 601 602 /* 603 * HW only validates 4 bits of MSN. So we must validate that 604 * the MSN in the SEND is the next expected MSN. If its not, 605 * then we complete this with T4_ERR_MSN and mark the wq in 606 * error. 607 */ 608 609 if (t4_rq_empty(wq)) { 610 t4_set_wq_in_error(wq); 611 ret = -EAGAIN; 612 goto skip_cqe; 613 } 614 if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) { 615 t4_set_wq_in_error(wq); 616 hw_cqe->header |= htonl(V_CQE_STATUS(T4_ERR_MSN)); 617 goto proc_cqe; 618 } 619 goto proc_cqe; 620 } 621 622 /* 623 * If we get here its a send completion. 624 * 625 * Handle out of order completion. These get stuffed 626 * in the SW SQ. Then the SW SQ is walked to move any 627 * now in-order completions into the SW CQ. This handles 628 * 2 cases: 629 * 1) reaping unsignaled WRs when the first subsequent 630 * signaled WR is completed. 631 * 2) out of order read completions. 632 */ 633 if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) { 634 struct t4_swsqe *swsqe; 635 636 CTR2(KTR_IW_CXGBE, 637 "%s out of order completion going in sw_sq at idx %u", 638 __func__, CQE_WRID_SQ_IDX(hw_cqe)); 639 swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; 640 swsqe->cqe = *hw_cqe; 641 swsqe->complete = 1; 642 ret = -EAGAIN; 643 goto flush_wq; 644 } 645 646 proc_cqe: 647 *cqe = *hw_cqe; 648 649 /* 650 * Reap the associated WR(s) that are freed up with this 651 * completion. 652 */ 653 if (SQ_TYPE(hw_cqe)) { 654 int idx = CQE_WRID_SQ_IDX(hw_cqe); 655 BUG_ON(idx >= wq->sq.size); 656 657 /* 658 * Account for any unsignaled completions completed by 659 * this signaled completion. In this case, cidx points 660 * to the first unsignaled one, and idx points to the 661 * signaled one. So adjust in_use based on this delta. 662 * if this is not completing any unsigned wrs, then the 663 * delta will be 0. Handle wrapping also! 664 */ 665 if (idx < wq->sq.cidx) 666 wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx; 667 else 668 wq->sq.in_use -= idx - wq->sq.cidx; 669 BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size); 670 671 wq->sq.cidx = (uint16_t)idx; 672 CTR2(KTR_IW_CXGBE, "%s completing sq idx %u", 673 __func__, wq->sq.cidx); 674 *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id; 675 t4_sq_consume(wq); 676 } else { 677 CTR2(KTR_IW_CXGBE, "%s completing rq idx %u", 678 __func__, wq->rq.cidx); 679 *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; 680 BUG_ON(t4_rq_empty(wq)); 681 t4_rq_consume(wq); 682 goto skip_cqe; 683 } 684 685 flush_wq: 686 /* 687 * Flush any completed cqes that are now in-order. 688 */ 689 flush_completed_wrs(wq, cq); 690 691 skip_cqe: 692 if (SW_CQE(hw_cqe)) { 693 CTR4(KTR_IW_CXGBE, "%s cq %p cqid 0x%x skip sw cqe cidx %u", 694 __func__, cq, cq->cqid, cq->sw_cidx); 695 t4_swcq_consume(cq); 696 } else { 697 CTR4(KTR_IW_CXGBE, "%s cq %p cqid 0x%x skip hw cqe cidx %u", 698 __func__, cq, cq->cqid, cq->cidx); 699 t4_hwcq_consume(cq); 700 } 701 return ret; 702 } 703 704 /* 705 * Get one cq entry from c4iw and map it to openib. 706 * 707 * Returns: 708 * 0 cqe returned 709 * -ENODATA EMPTY; 710 * -EAGAIN caller must try again 711 * any other -errno fatal error 712 */ 713 static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) 714 { 715 struct c4iw_qp *qhp = NULL; 716 struct t4_cqe cqe = {0, 0}, *rd_cqe; 717 struct t4_wq *wq; 718 u32 credit = 0; 719 u8 cqe_flushed; 720 u64 cookie = 0; 721 int ret; 722 723 ret = t4_next_cqe(&chp->cq, &rd_cqe); 724 725 if (ret) 726 return ret; 727 728 qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); 729 if (!qhp) 730 wq = NULL; 731 else { 732 spin_lock(&qhp->lock); 733 wq = &(qhp->wq); 734 } 735 ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); 736 if (ret) 737 goto out; 738 739 wc->wr_id = cookie; 740 wc->qp = &qhp->ibqp; 741 wc->vendor_err = CQE_STATUS(&cqe); 742 wc->wc_flags = 0; 743 744 CTR5(KTR_IW_CXGBE, "%s qpid 0x%x type %d opcode %d status 0x%x", 745 __func__, CQE_QPID(&cqe), CQE_TYPE(&cqe), CQE_OPCODE(&cqe), 746 CQE_STATUS(&cqe)); 747 CTR5(KTR_IW_CXGBE, "%s len %u wrid hi 0x%x lo 0x%x cookie 0x%llx", 748 __func__, CQE_LEN(&cqe), CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe), 749 (unsigned long long)cookie); 750 751 if (CQE_TYPE(&cqe) == 0) { 752 if (!CQE_STATUS(&cqe)) 753 wc->byte_len = CQE_LEN(&cqe); 754 else 755 wc->byte_len = 0; 756 wc->opcode = IB_WC_RECV; 757 if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV || 758 CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { 759 wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); 760 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 761 c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); 762 } 763 } else { 764 switch (CQE_OPCODE(&cqe)) { 765 case FW_RI_RDMA_WRITE: 766 wc->opcode = IB_WC_RDMA_WRITE; 767 break; 768 case FW_RI_READ_REQ: 769 wc->opcode = IB_WC_RDMA_READ; 770 wc->byte_len = CQE_LEN(&cqe); 771 break; 772 case FW_RI_SEND_WITH_INV: 773 case FW_RI_SEND_WITH_SE_INV: 774 wc->opcode = IB_WC_SEND; 775 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 776 break; 777 case FW_RI_SEND: 778 case FW_RI_SEND_WITH_SE: 779 wc->opcode = IB_WC_SEND; 780 break; 781 case FW_RI_LOCAL_INV: 782 wc->opcode = IB_WC_LOCAL_INV; 783 break; 784 case FW_RI_FAST_REGISTER: 785 wc->opcode = IB_WC_REG_MR; 786 787 /* Invalidate the MR if the fastreg failed */ 788 if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS) 789 c4iw_invalidate_mr(qhp->rhp, 790 CQE_WRID_FR_STAG(&cqe)); 791 break; 792 case C4IW_DRAIN_OPCODE: 793 wc->opcode = IB_WC_SEND; 794 break; 795 default: 796 printf("Unexpected opcode %d " 797 "in the CQE received for QPID = 0x%0x\n", 798 CQE_OPCODE(&cqe), CQE_QPID(&cqe)); 799 ret = -EINVAL; 800 goto out; 801 } 802 } 803 804 if (cqe_flushed) 805 wc->status = IB_WC_WR_FLUSH_ERR; 806 else { 807 808 switch (CQE_STATUS(&cqe)) { 809 case T4_ERR_SUCCESS: 810 wc->status = IB_WC_SUCCESS; 811 break; 812 case T4_ERR_STAG: 813 wc->status = IB_WC_LOC_ACCESS_ERR; 814 break; 815 case T4_ERR_PDID: 816 wc->status = IB_WC_LOC_PROT_ERR; 817 break; 818 case T4_ERR_QPID: 819 case T4_ERR_ACCESS: 820 wc->status = IB_WC_LOC_ACCESS_ERR; 821 break; 822 case T4_ERR_WRAP: 823 wc->status = IB_WC_GENERAL_ERR; 824 break; 825 case T4_ERR_BOUND: 826 wc->status = IB_WC_LOC_LEN_ERR; 827 break; 828 case T4_ERR_INVALIDATE_SHARED_MR: 829 case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND: 830 wc->status = IB_WC_MW_BIND_ERR; 831 break; 832 case T4_ERR_CRC: 833 case T4_ERR_MARKER: 834 case T4_ERR_PDU_LEN_ERR: 835 case T4_ERR_OUT_OF_RQE: 836 case T4_ERR_DDP_VERSION: 837 case T4_ERR_RDMA_VERSION: 838 case T4_ERR_DDP_QUEUE_NUM: 839 case T4_ERR_MSN: 840 case T4_ERR_TBIT: 841 case T4_ERR_MO: 842 case T4_ERR_MSN_RANGE: 843 case T4_ERR_IRD_OVERFLOW: 844 case T4_ERR_OPCODE: 845 case T4_ERR_INTERNAL_ERR: 846 wc->status = IB_WC_FATAL_ERR; 847 break; 848 case T4_ERR_SWFLUSH: 849 wc->status = IB_WC_WR_FLUSH_ERR; 850 break; 851 default: 852 printf("Unexpected cqe_status 0x%x for QPID = 0x%0x\n", 853 CQE_STATUS(&cqe), CQE_QPID(&cqe)); 854 wc->status = IB_WC_FATAL_ERR; 855 } 856 } 857 out: 858 if (wq) 859 spin_unlock(&qhp->lock); 860 return ret; 861 } 862 863 int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) 864 { 865 struct c4iw_cq *chp; 866 unsigned long flags; 867 int npolled; 868 int err = 0; 869 870 chp = to_c4iw_cq(ibcq); 871 872 spin_lock_irqsave(&chp->lock, flags); 873 for (npolled = 0; npolled < num_entries; ++npolled) { 874 do { 875 err = c4iw_poll_cq_one(chp, wc + npolled); 876 } while (err == -EAGAIN); 877 if (err) 878 break; 879 } 880 spin_unlock_irqrestore(&chp->lock, flags); 881 return !err || err == -ENODATA ? npolled : err; 882 } 883 884 void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) 885 { 886 struct c4iw_cq *chp; 887 struct c4iw_ucontext *ucontext; 888 889 CTR2(KTR_IW_CXGBE, "%s ib_cq %p", __func__, ib_cq); 890 chp = to_c4iw_cq(ib_cq); 891 892 remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid); 893 atomic_dec(&chp->refcnt); 894 wait_event(chp->wait, !atomic_read(&chp->refcnt)); 895 896 ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, 897 ibucontext); 898 destroy_cq(&chp->rhp->rdev, &chp->cq, 899 ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx); 900 } 901 902 int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, 903 struct ib_udata *udata) 904 { 905 struct ib_device *ibdev = ibcq->device; 906 int entries = attr->cqe; 907 int vector = attr->comp_vector; 908 struct c4iw_dev *rhp; 909 struct c4iw_cq *chp = to_c4iw_cq(ibcq); 910 struct c4iw_create_cq_resp uresp; 911 struct c4iw_ucontext *ucontext = NULL; 912 int ret; 913 size_t memsize, hwentries; 914 struct c4iw_mm_entry *mm, *mm2; 915 916 CTR3(KTR_IW_CXGBE, "%s ib_dev %p entries %d", __func__, ibdev, entries); 917 if (attr->flags) 918 return -EINVAL; 919 920 rhp = to_c4iw_dev(ibdev); 921 922 ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, 923 ibucontext); 924 925 /* account for the status page. */ 926 entries++; 927 928 /* IQ needs one extra entry to differentiate full vs empty. */ 929 entries++; 930 931 /* 932 * entries must be multiple of 16 for HW. 933 */ 934 entries = roundup(entries, 16); 935 936 /* 937 * Make actual HW queue 2x to avoid cdix_inc overflows. 938 */ 939 hwentries = min(entries * 2, rhp->rdev.hw_queue.t4_max_iq_size); 940 941 /* 942 * Make HW queue at least 64 entries so GTS updates aren't too 943 * frequent. 944 */ 945 if (hwentries < 64) 946 hwentries = 64; 947 948 memsize = hwentries * sizeof *chp->cq.queue; 949 950 /* 951 * memsize must be a multiple of the page size if its a user cq. 952 */ 953 if (ucontext) 954 memsize = roundup(memsize, PAGE_SIZE); 955 chp->cq.size = hwentries; 956 chp->cq.memsize = memsize; 957 chp->cq.vector = vector; 958 959 ret = create_cq(&rhp->rdev, &chp->cq, 960 ucontext ? &ucontext->uctx : &rhp->rdev.uctx); 961 if (ret) 962 goto err1; 963 964 chp->rhp = rhp; 965 chp->cq.size--; /* status page */ 966 chp->ibcq.cqe = entries - 2; 967 spin_lock_init(&chp->lock); 968 spin_lock_init(&chp->comp_handler_lock); 969 atomic_set(&chp->refcnt, 1); 970 init_waitqueue_head(&chp->wait); 971 ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); 972 if (ret) 973 goto err2; 974 975 if (ucontext) { 976 ret = -ENOMEM; 977 mm = kmalloc(sizeof *mm, GFP_KERNEL); 978 if (!mm) 979 goto err3; 980 mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); 981 if (!mm2) 982 goto err4; 983 984 memset(&uresp, 0, sizeof(uresp)); 985 uresp.qid_mask = rhp->rdev.cqmask; 986 uresp.cqid = chp->cq.cqid; 987 uresp.size = chp->cq.size; 988 uresp.memsize = chp->cq.memsize; 989 spin_lock(&ucontext->mmap_lock); 990 uresp.key = ucontext->key; 991 ucontext->key += PAGE_SIZE; 992 uresp.gts_key = ucontext->key; 993 ucontext->key += PAGE_SIZE; 994 spin_unlock(&ucontext->mmap_lock); 995 ret = ib_copy_to_udata(udata, &uresp, 996 sizeof(uresp) - sizeof(uresp.reserved)); 997 if (ret) 998 goto err5; 999 1000 mm->key = uresp.key; 1001 mm->addr = vtophys(chp->cq.queue); 1002 mm->len = chp->cq.memsize; 1003 insert_mmap(ucontext, mm); 1004 1005 mm2->key = uresp.gts_key; 1006 mm2->addr = chp->cq.bar2_pa; 1007 mm2->len = PAGE_SIZE; 1008 insert_mmap(ucontext, mm2); 1009 } 1010 CTR6(KTR_IW_CXGBE, 1011 "%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx", 1012 __func__, chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize, 1013 (unsigned long long) chp->cq.dma_addr); 1014 return 0; 1015 err5: 1016 kfree(mm2); 1017 err4: 1018 kfree(mm); 1019 err3: 1020 remove_handle(rhp, &rhp->cqidr, chp->cq.cqid); 1021 err2: 1022 destroy_cq(&chp->rhp->rdev, &chp->cq, 1023 ucontext ? &ucontext->uctx : &rhp->rdev.uctx); 1024 err1: 1025 return ret; 1026 } 1027 1028 int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) 1029 { 1030 return -ENOSYS; 1031 } 1032 1033 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 1034 { 1035 struct c4iw_cq *chp; 1036 int ret = 0; 1037 unsigned long flag; 1038 1039 chp = to_c4iw_cq(ibcq); 1040 spin_lock_irqsave(&chp->lock, flag); 1041 t4_arm_cq(&chp->cq, 1042 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); 1043 if (flags & IB_CQ_REPORT_MISSED_EVENTS) 1044 ret = t4_cq_notempty(&chp->cq); 1045 spin_unlock_irqrestore(&chp->lock, flag); 1046 return ret; 1047 } 1048 #endif 1049