1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_inet.h" 38 39 #ifdef TCP_OFFLOAD 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/ktr.h> 44 #include <sys/bus.h> 45 #include <sys/lock.h> 46 #include <sys/mutex.h> 47 #include <sys/rwlock.h> 48 #include <sys/socket.h> 49 #include <sys/sbuf.h> 50 51 #include "iw_cxgbe.h" 52 #include "user.h" 53 54 static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, 55 struct c4iw_dev_ucontext *uctx) 56 { 57 struct adapter *sc = rdev->adap; 58 struct c4iw_dev *rhp = rdev_to_c4iw_dev(rdev); 59 struct fw_ri_res_wr *res_wr; 60 struct fw_ri_res *res; 61 int wr_len; 62 struct c4iw_wr_wait wr_wait; 63 struct wrqe *wr; 64 65 wr_len = sizeof *res_wr + sizeof *res; 66 wr = alloc_wrqe(wr_len, &sc->sge.ctrlq[0]); 67 if (wr == NULL) 68 return (0); 69 res_wr = wrtod(wr); 70 memset(res_wr, 0, wr_len); 71 res_wr->op_nres = cpu_to_be32( 72 V_FW_WR_OP(FW_RI_RES_WR) | 73 V_FW_RI_RES_WR_NRES(1) | 74 F_FW_WR_COMPL); 75 res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); 76 res_wr->cookie = (unsigned long) &wr_wait; 77 res = res_wr->res; 78 res->u.cq.restype = FW_RI_RES_TYPE_CQ; 79 res->u.cq.op = FW_RI_RES_OP_RESET; 80 res->u.cq.iqid = cpu_to_be32(cq->cqid); 81 82 c4iw_init_wr_wait(&wr_wait); 83 84 t4_wrq_tx(sc, wr); 85 86 c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, NULL, __func__); 87 88 kfree(cq->sw_queue); 89 dma_free_coherent(rhp->ibdev.dma_device, 90 cq->memsize, cq->queue, 91 dma_unmap_addr(cq, mapping)); 92 c4iw_put_cqid(rdev, cq->cqid, uctx); 93 return 0; 94 } 95 96 static int 97 create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, 98 struct c4iw_dev_ucontext *uctx) 99 { 100 struct adapter *sc = rdev->adap; 101 struct c4iw_dev *rhp = rdev_to_c4iw_dev(rdev); 102 struct fw_ri_res_wr *res_wr; 103 struct fw_ri_res *res; 104 int wr_len; 105 int user = (uctx != &rdev->uctx); 106 struct c4iw_wr_wait wr_wait; 107 int ret; 108 struct wrqe *wr; 109 u64 cq_bar2_qoffset = 0; 110 111 cq->cqid = c4iw_get_cqid(rdev, uctx); 112 if (!cq->cqid) { 113 ret = -ENOMEM; 114 goto err1; 115 } 116 117 if (!user) { 118 cq->sw_queue = kzalloc(cq->memsize, GFP_KERNEL); 119 if (!cq->sw_queue) { 120 ret = -ENOMEM; 121 goto err2; 122 } 123 } 124 cq->queue = dma_alloc_coherent(rhp->ibdev.dma_device, cq->memsize, 125 &cq->dma_addr, GFP_KERNEL); 126 if (!cq->queue) { 127 ret = -ENOMEM; 128 goto err3; 129 } 130 dma_unmap_addr_set(cq, mapping, cq->dma_addr); 131 memset(cq->queue, 0, cq->memsize); 132 133 /* build fw_ri_res_wr */ 134 wr_len = sizeof *res_wr + sizeof *res; 135 136 wr = alloc_wrqe(wr_len, &sc->sge.ctrlq[0]); 137 if (wr == NULL) 138 return (0); 139 res_wr = wrtod(wr); 140 141 memset(res_wr, 0, wr_len); 142 res_wr->op_nres = cpu_to_be32( 143 V_FW_WR_OP(FW_RI_RES_WR) | 144 V_FW_RI_RES_WR_NRES(1) | 145 F_FW_WR_COMPL); 146 res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); 147 res_wr->cookie = (unsigned long) &wr_wait; 148 res = res_wr->res; 149 res->u.cq.restype = FW_RI_RES_TYPE_CQ; 150 res->u.cq.op = FW_RI_RES_OP_WRITE; 151 res->u.cq.iqid = cpu_to_be32(cq->cqid); 152 //Fixme: Always use first queue id for IQANDSTINDEX. Linux does the same. 153 res->u.cq.iqandst_to_iqandstindex = cpu_to_be32( 154 V_FW_RI_RES_WR_IQANUS(0) | 155 V_FW_RI_RES_WR_IQANUD(1) | 156 F_FW_RI_RES_WR_IQANDST | 157 V_FW_RI_RES_WR_IQANDSTINDEX(sc->sge.ofld_rxq[0].iq.abs_id)); 158 res->u.cq.iqdroprss_to_iqesize = cpu_to_be16( 159 F_FW_RI_RES_WR_IQDROPRSS | 160 V_FW_RI_RES_WR_IQPCIECH(2) | 161 V_FW_RI_RES_WR_IQINTCNTTHRESH(0) | 162 F_FW_RI_RES_WR_IQO | 163 V_FW_RI_RES_WR_IQESIZE(1)); 164 res->u.cq.iqsize = cpu_to_be16(cq->size); 165 res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr); 166 167 c4iw_init_wr_wait(&wr_wait); 168 169 t4_wrq_tx(sc, wr); 170 171 CTR2(KTR_IW_CXGBE, "%s wait_event wr_wait %p", __func__, &wr_wait); 172 ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, NULL, __func__); 173 if (ret) 174 goto err4; 175 176 cq->gen = 1; 177 cq->rdev = rdev; 178 179 /* Determine the BAR2 queue offset and qid. */ 180 t4_bar2_sge_qregs(rdev->adap, cq->cqid, T4_BAR2_QTYPE_INGRESS, user, 181 &cq_bar2_qoffset, &cq->bar2_qid); 182 183 /* If user mapping then compute the page-aligned physical 184 * address for mapping. 185 */ 186 if (user) 187 cq->bar2_pa = (rdev->bar2_pa + cq_bar2_qoffset) & PAGE_MASK; 188 else 189 cq->bar2_va = (void __iomem *)((u64)rdev->bar2_kva + 190 cq_bar2_qoffset); 191 192 return 0; 193 err4: 194 dma_free_coherent(rhp->ibdev.dma_device, cq->memsize, cq->queue, 195 dma_unmap_addr(cq, mapping)); 196 err3: 197 kfree(cq->sw_queue); 198 err2: 199 c4iw_put_cqid(rdev, cq->cqid, uctx); 200 err1: 201 return ret; 202 } 203 204 static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) 205 { 206 struct t4_cqe cqe; 207 208 CTR5(KTR_IW_CXGBE, "%s wq %p cq %p sw_cidx %u sw_pidx %u", __func__, wq, 209 cq, cq->sw_cidx, cq->sw_pidx); 210 memset(&cqe, 0, sizeof(cqe)); 211 cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) | 212 V_CQE_OPCODE(FW_RI_SEND) | 213 V_CQE_TYPE(0) | 214 V_CQE_SWCQE(1) | 215 V_CQE_QPID(wq->sq.qid)); 216 cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen)); 217 cq->sw_queue[cq->sw_pidx] = cqe; 218 t4_swcq_produce(cq); 219 } 220 221 int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count) 222 { 223 int flushed = 0; 224 int in_use = wq->rq.in_use - count; 225 226 BUG_ON(in_use < 0); 227 CTR5(KTR_IW_CXGBE, "%s wq %p cq %p rq.in_use %u skip count %u", 228 __func__, wq, cq, wq->rq.in_use, count); 229 while (in_use--) { 230 insert_recv_cqe(wq, cq); 231 flushed++; 232 } 233 return flushed; 234 } 235 236 static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq, 237 struct t4_swsqe *swcqe) 238 { 239 struct t4_cqe cqe; 240 241 CTR5(KTR_IW_CXGBE, "%s wq %p cq %p sw_cidx %u sw_pidx %u", __func__, wq, 242 cq, cq->sw_cidx, cq->sw_pidx); 243 memset(&cqe, 0, sizeof(cqe)); 244 cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) | 245 V_CQE_OPCODE(swcqe->opcode) | 246 V_CQE_TYPE(1) | 247 V_CQE_SWCQE(1) | 248 V_CQE_QPID(wq->sq.qid)); 249 CQE_WRID_SQ_IDX(&cqe) = swcqe->idx; 250 cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen)); 251 cq->sw_queue[cq->sw_pidx] = cqe; 252 t4_swcq_produce(cq); 253 } 254 255 static void advance_oldest_read(struct t4_wq *wq); 256 257 int c4iw_flush_sq(struct c4iw_qp *qhp) 258 { 259 int flushed = 0; 260 struct t4_wq *wq = &qhp->wq; 261 struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq); 262 struct t4_cq *cq = &chp->cq; 263 int idx; 264 struct t4_swsqe *swsqe; 265 266 if (wq->sq.flush_cidx == -1) 267 wq->sq.flush_cidx = wq->sq.cidx; 268 idx = wq->sq.flush_cidx; 269 BUG_ON(idx >= wq->sq.size); 270 while (idx != wq->sq.pidx) { 271 swsqe = &wq->sq.sw_sq[idx]; 272 BUG_ON(swsqe->flushed); 273 swsqe->flushed = 1; 274 insert_sq_cqe(wq, cq, swsqe); 275 if (wq->sq.oldest_read == swsqe) { 276 BUG_ON(swsqe->opcode != FW_RI_READ_REQ); 277 advance_oldest_read(wq); 278 } 279 flushed++; 280 if (++idx == wq->sq.size) 281 idx = 0; 282 } 283 wq->sq.flush_cidx += flushed; 284 if (wq->sq.flush_cidx >= wq->sq.size) 285 wq->sq.flush_cidx -= wq->sq.size; 286 return flushed; 287 } 288 289 static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) 290 { 291 struct t4_swsqe *swsqe; 292 int cidx; 293 294 if (wq->sq.flush_cidx == -1) 295 wq->sq.flush_cidx = wq->sq.cidx; 296 cidx = wq->sq.flush_cidx; 297 BUG_ON(cidx > wq->sq.size); 298 299 while (cidx != wq->sq.pidx) { 300 swsqe = &wq->sq.sw_sq[cidx]; 301 if (!swsqe->signaled) { 302 if (++cidx == wq->sq.size) 303 cidx = 0; 304 } else if (swsqe->complete) { 305 306 BUG_ON(swsqe->flushed); 307 308 /* 309 * Insert this completed cqe into the swcq. 310 */ 311 CTR3(KTR_IW_CXGBE, 312 "%s moving cqe into swcq sq idx %u cq idx %u\n", 313 __func__, cidx, cq->sw_pidx); 314 swsqe->cqe.header |= htonl(V_CQE_SWCQE(1)); 315 cq->sw_queue[cq->sw_pidx] = swsqe->cqe; 316 t4_swcq_produce(cq); 317 swsqe->flushed = 1; 318 if (++cidx == wq->sq.size) 319 cidx = 0; 320 wq->sq.flush_cidx = cidx; 321 } else 322 break; 323 } 324 } 325 326 static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe, 327 struct t4_cqe *read_cqe) 328 { 329 read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx; 330 read_cqe->len = htonl(wq->sq.oldest_read->read_len); 331 read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) | 332 V_CQE_SWCQE(SW_CQE(hw_cqe)) | 333 V_CQE_OPCODE(FW_RI_READ_REQ) | 334 V_CQE_TYPE(1)); 335 read_cqe->bits_type_ts = hw_cqe->bits_type_ts; 336 } 337 338 static void advance_oldest_read(struct t4_wq *wq) 339 { 340 341 u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1; 342 343 if (rptr == wq->sq.size) 344 rptr = 0; 345 while (rptr != wq->sq.pidx) { 346 wq->sq.oldest_read = &wq->sq.sw_sq[rptr]; 347 348 if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ) 349 return; 350 if (++rptr == wq->sq.size) 351 rptr = 0; 352 } 353 wq->sq.oldest_read = NULL; 354 } 355 356 /* 357 * Move all CQEs from the HWCQ into the SWCQ. 358 * Deal with out-of-order and/or completions that complete 359 * prior unsignalled WRs. 360 */ 361 void c4iw_flush_hw_cq(struct c4iw_cq *chp) 362 { 363 struct t4_cqe *hw_cqe, *swcqe, read_cqe; 364 struct c4iw_qp *qhp; 365 struct t4_swsqe *swsqe; 366 int ret; 367 368 CTR3(KTR_IW_CXGBE, "%s cq %p cqid 0x%x", __func__, &chp->cq, 369 chp->cq.cqid); 370 ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); 371 372 /* 373 * This logic is similar to poll_cq(), but not quite the same 374 * unfortunately. Need to move pertinent HW CQEs to the SW CQ but 375 * also do any translation magic that poll_cq() normally does. 376 */ 377 while (!ret) { 378 qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe)); 379 380 /* 381 * drop CQEs with no associated QP 382 */ 383 if (qhp == NULL) 384 goto next_cqe; 385 386 if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) 387 goto next_cqe; 388 389 if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) { 390 391 /* If we have reached here because of async 392 * event or other error, and have egress error 393 * then drop 394 */ 395 if (CQE_TYPE(hw_cqe) == 1) 396 goto next_cqe; 397 398 /* drop peer2peer RTR reads. 399 */ 400 if (CQE_WRID_STAG(hw_cqe) == 1) 401 goto next_cqe; 402 403 /* 404 * Eat completions for unsignaled read WRs. 405 */ 406 if (!qhp->wq.sq.oldest_read->signaled) { 407 advance_oldest_read(&qhp->wq); 408 goto next_cqe; 409 } 410 411 /* 412 * Don't write to the HWCQ, create a new read req CQE 413 * in local memory and move it into the swcq. 414 */ 415 create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe); 416 hw_cqe = &read_cqe; 417 advance_oldest_read(&qhp->wq); 418 } 419 420 /* if its a SQ completion, then do the magic to move all the 421 * unsignaled and now in-order completions into the swcq. 422 */ 423 if (SQ_TYPE(hw_cqe)) { 424 swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; 425 swsqe->cqe = *hw_cqe; 426 swsqe->complete = 1; 427 flush_completed_wrs(&qhp->wq, &chp->cq); 428 } else { 429 swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx]; 430 *swcqe = *hw_cqe; 431 swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); 432 t4_swcq_produce(&chp->cq); 433 } 434 next_cqe: 435 t4_hwcq_consume(&chp->cq); 436 ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); 437 } 438 } 439 440 static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) 441 { 442 if (CQE_OPCODE(cqe) == FW_RI_TERMINATE) 443 return 0; 444 445 if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe)) 446 return 0; 447 448 if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe)) 449 return 0; 450 451 if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq)) 452 return 0; 453 return 1; 454 } 455 456 void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) 457 { 458 struct t4_cqe *cqe; 459 u32 ptr; 460 461 *count = 0; 462 CTR2(KTR_IW_CXGBE, "%s count zero %d", __func__, *count); 463 ptr = cq->sw_cidx; 464 while (ptr != cq->sw_pidx) { 465 cqe = &cq->sw_queue[ptr]; 466 if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) && 467 (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq)) 468 (*count)++; 469 if (++ptr == cq->size) 470 ptr = 0; 471 } 472 CTR3(KTR_IW_CXGBE, "%s cq %p count %d", __func__, cq, *count); 473 } 474 475 /* 476 * poll_cq 477 * 478 * Caller must: 479 * check the validity of the first CQE, 480 * supply the wq assicated with the qpid. 481 * 482 * credit: cq credit to return to sge. 483 * cqe_flushed: 1 iff the CQE is flushed. 484 * cqe: copy of the polled CQE. 485 * 486 * return value: 487 * 0 CQE returned ok. 488 * -EAGAIN CQE skipped, try again. 489 * -EOVERFLOW CQ overflow detected. 490 */ 491 static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, 492 u8 *cqe_flushed, u64 *cookie, u32 *credit) 493 { 494 int ret = 0; 495 struct t4_cqe *hw_cqe, read_cqe; 496 497 *cqe_flushed = 0; 498 *credit = 0; 499 ret = t4_next_cqe(cq, &hw_cqe); 500 if (ret) 501 return ret; 502 503 CTR6(KTR_IW_CXGBE, 504 "%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x", __func__, 505 CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe), CQE_GENBIT(hw_cqe), 506 CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe)); 507 CTR5(KTR_IW_CXGBE, 508 "%s opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x", 509 __func__, CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe), 510 CQE_WRID_LOW(hw_cqe)); 511 512 /* 513 * skip cqe's not affiliated with a QP. 514 */ 515 if (wq == NULL) { 516 ret = -EAGAIN; 517 goto skip_cqe; 518 } 519 520 /* 521 * skip hw cqe's if the wq is flushed. 522 */ 523 if (wq->flushed && !SW_CQE(hw_cqe)) { 524 ret = -EAGAIN; 525 goto skip_cqe; 526 } 527 528 /* 529 * skip TERMINATE cqes... 530 */ 531 if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { 532 ret = -EAGAIN; 533 goto skip_cqe; 534 } 535 536 /* 537 * Special cqe for drain WR completions... 538 */ 539 if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) { 540 *cookie = CQE_DRAIN_COOKIE(hw_cqe); 541 *cqe = *hw_cqe; 542 goto skip_cqe; 543 } 544 545 /* 546 * Gotta tweak READ completions: 547 * 1) the cqe doesn't contain the sq_wptr from the wr. 548 * 2) opcode not reflected from the wr. 549 * 3) read_len not reflected from the wr. 550 * 4) cq_type is RQ_TYPE not SQ_TYPE. 551 */ 552 if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) { 553 554 /* If we have reached here because of async 555 * event or other error, and have egress error 556 * then drop 557 */ 558 if (CQE_TYPE(hw_cqe) == 1) { 559 if (CQE_STATUS(hw_cqe)) 560 t4_set_wq_in_error(wq); 561 ret = -EAGAIN; 562 goto skip_cqe; 563 } 564 565 /* If this is an unsolicited read response, then the read 566 * was generated by the kernel driver as part of peer-2-peer 567 * connection setup. So ignore the completion. 568 */ 569 if (CQE_WRID_STAG(hw_cqe) == 1) { 570 if (CQE_STATUS(hw_cqe)) 571 t4_set_wq_in_error(wq); 572 ret = -EAGAIN; 573 goto skip_cqe; 574 } 575 576 /* 577 * Eat completions for unsignaled read WRs. 578 */ 579 if (!wq->sq.oldest_read->signaled) { 580 advance_oldest_read(wq); 581 ret = -EAGAIN; 582 goto skip_cqe; 583 } 584 585 /* 586 * Don't write to the HWCQ, so create a new read req CQE 587 * in local memory. 588 */ 589 create_read_req_cqe(wq, hw_cqe, &read_cqe); 590 hw_cqe = &read_cqe; 591 advance_oldest_read(wq); 592 } 593 594 if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { 595 *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH); 596 t4_set_wq_in_error(wq); 597 } 598 599 /* 600 * RECV completion. 601 */ 602 if (RQ_TYPE(hw_cqe)) { 603 604 /* 605 * HW only validates 4 bits of MSN. So we must validate that 606 * the MSN in the SEND is the next expected MSN. If its not, 607 * then we complete this with T4_ERR_MSN and mark the wq in 608 * error. 609 */ 610 611 if (t4_rq_empty(wq)) { 612 t4_set_wq_in_error(wq); 613 ret = -EAGAIN; 614 goto skip_cqe; 615 } 616 if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) { 617 t4_set_wq_in_error(wq); 618 hw_cqe->header |= htonl(V_CQE_STATUS(T4_ERR_MSN)); 619 goto proc_cqe; 620 } 621 goto proc_cqe; 622 } 623 624 /* 625 * If we get here its a send completion. 626 * 627 * Handle out of order completion. These get stuffed 628 * in the SW SQ. Then the SW SQ is walked to move any 629 * now in-order completions into the SW CQ. This handles 630 * 2 cases: 631 * 1) reaping unsignaled WRs when the first subsequent 632 * signaled WR is completed. 633 * 2) out of order read completions. 634 */ 635 if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) { 636 struct t4_swsqe *swsqe; 637 638 CTR2(KTR_IW_CXGBE, 639 "%s out of order completion going in sw_sq at idx %u", 640 __func__, CQE_WRID_SQ_IDX(hw_cqe)); 641 swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; 642 swsqe->cqe = *hw_cqe; 643 swsqe->complete = 1; 644 ret = -EAGAIN; 645 goto flush_wq; 646 } 647 648 proc_cqe: 649 *cqe = *hw_cqe; 650 651 /* 652 * Reap the associated WR(s) that are freed up with this 653 * completion. 654 */ 655 if (SQ_TYPE(hw_cqe)) { 656 int idx = CQE_WRID_SQ_IDX(hw_cqe); 657 BUG_ON(idx >= wq->sq.size); 658 659 /* 660 * Account for any unsignaled completions completed by 661 * this signaled completion. In this case, cidx points 662 * to the first unsignaled one, and idx points to the 663 * signaled one. So adjust in_use based on this delta. 664 * if this is not completing any unsigned wrs, then the 665 * delta will be 0. Handle wrapping also! 666 */ 667 if (idx < wq->sq.cidx) 668 wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx; 669 else 670 wq->sq.in_use -= idx - wq->sq.cidx; 671 BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size); 672 673 wq->sq.cidx = (uint16_t)idx; 674 CTR2(KTR_IW_CXGBE, "%s completing sq idx %u", 675 __func__, wq->sq.cidx); 676 *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id; 677 t4_sq_consume(wq); 678 } else { 679 CTR2(KTR_IW_CXGBE, "%s completing rq idx %u", 680 __func__, wq->rq.cidx); 681 *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; 682 BUG_ON(t4_rq_empty(wq)); 683 t4_rq_consume(wq); 684 goto skip_cqe; 685 } 686 687 flush_wq: 688 /* 689 * Flush any completed cqes that are now in-order. 690 */ 691 flush_completed_wrs(wq, cq); 692 693 skip_cqe: 694 if (SW_CQE(hw_cqe)) { 695 CTR4(KTR_IW_CXGBE, "%s cq %p cqid 0x%x skip sw cqe cidx %u", 696 __func__, cq, cq->cqid, cq->sw_cidx); 697 t4_swcq_consume(cq); 698 } else { 699 CTR4(KTR_IW_CXGBE, "%s cq %p cqid 0x%x skip hw cqe cidx %u", 700 __func__, cq, cq->cqid, cq->cidx); 701 t4_hwcq_consume(cq); 702 } 703 return ret; 704 } 705 706 /* 707 * Get one cq entry from c4iw and map it to openib. 708 * 709 * Returns: 710 * 0 cqe returned 711 * -ENODATA EMPTY; 712 * -EAGAIN caller must try again 713 * any other -errno fatal error 714 */ 715 static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) 716 { 717 struct c4iw_qp *qhp = NULL; 718 struct t4_cqe cqe = {0, 0}, *rd_cqe; 719 struct t4_wq *wq; 720 u32 credit = 0; 721 u8 cqe_flushed; 722 u64 cookie = 0; 723 int ret; 724 725 ret = t4_next_cqe(&chp->cq, &rd_cqe); 726 727 if (ret) 728 return ret; 729 730 qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); 731 if (!qhp) 732 wq = NULL; 733 else { 734 spin_lock(&qhp->lock); 735 wq = &(qhp->wq); 736 } 737 ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); 738 if (ret) 739 goto out; 740 741 wc->wr_id = cookie; 742 wc->qp = &qhp->ibqp; 743 wc->vendor_err = CQE_STATUS(&cqe); 744 wc->wc_flags = 0; 745 746 CTR5(KTR_IW_CXGBE, "%s qpid 0x%x type %d opcode %d status 0x%x", 747 __func__, CQE_QPID(&cqe), CQE_TYPE(&cqe), CQE_OPCODE(&cqe), 748 CQE_STATUS(&cqe)); 749 CTR5(KTR_IW_CXGBE, "%s len %u wrid hi 0x%x lo 0x%x cookie 0x%llx", 750 __func__, CQE_LEN(&cqe), CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe), 751 (unsigned long long)cookie); 752 753 if (CQE_TYPE(&cqe) == 0) { 754 if (!CQE_STATUS(&cqe)) 755 wc->byte_len = CQE_LEN(&cqe); 756 else 757 wc->byte_len = 0; 758 wc->opcode = IB_WC_RECV; 759 if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV || 760 CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { 761 wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); 762 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 763 c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); 764 } 765 } else { 766 switch (CQE_OPCODE(&cqe)) { 767 case FW_RI_RDMA_WRITE: 768 wc->opcode = IB_WC_RDMA_WRITE; 769 break; 770 case FW_RI_READ_REQ: 771 wc->opcode = IB_WC_RDMA_READ; 772 wc->byte_len = CQE_LEN(&cqe); 773 break; 774 case FW_RI_SEND_WITH_INV: 775 case FW_RI_SEND_WITH_SE_INV: 776 wc->opcode = IB_WC_SEND; 777 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 778 break; 779 case FW_RI_SEND: 780 case FW_RI_SEND_WITH_SE: 781 wc->opcode = IB_WC_SEND; 782 break; 783 case FW_RI_LOCAL_INV: 784 wc->opcode = IB_WC_LOCAL_INV; 785 break; 786 case FW_RI_FAST_REGISTER: 787 wc->opcode = IB_WC_REG_MR; 788 789 /* Invalidate the MR if the fastreg failed */ 790 if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS) 791 c4iw_invalidate_mr(qhp->rhp, 792 CQE_WRID_FR_STAG(&cqe)); 793 break; 794 case C4IW_DRAIN_OPCODE: 795 wc->opcode = IB_WC_SEND; 796 break; 797 default: 798 printf("Unexpected opcode %d " 799 "in the CQE received for QPID = 0x%0x\n", 800 CQE_OPCODE(&cqe), CQE_QPID(&cqe)); 801 ret = -EINVAL; 802 goto out; 803 } 804 } 805 806 if (cqe_flushed) 807 wc->status = IB_WC_WR_FLUSH_ERR; 808 else { 809 810 switch (CQE_STATUS(&cqe)) { 811 case T4_ERR_SUCCESS: 812 wc->status = IB_WC_SUCCESS; 813 break; 814 case T4_ERR_STAG: 815 wc->status = IB_WC_LOC_ACCESS_ERR; 816 break; 817 case T4_ERR_PDID: 818 wc->status = IB_WC_LOC_PROT_ERR; 819 break; 820 case T4_ERR_QPID: 821 case T4_ERR_ACCESS: 822 wc->status = IB_WC_LOC_ACCESS_ERR; 823 break; 824 case T4_ERR_WRAP: 825 wc->status = IB_WC_GENERAL_ERR; 826 break; 827 case T4_ERR_BOUND: 828 wc->status = IB_WC_LOC_LEN_ERR; 829 break; 830 case T4_ERR_INVALIDATE_SHARED_MR: 831 case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND: 832 wc->status = IB_WC_MW_BIND_ERR; 833 break; 834 case T4_ERR_CRC: 835 case T4_ERR_MARKER: 836 case T4_ERR_PDU_LEN_ERR: 837 case T4_ERR_OUT_OF_RQE: 838 case T4_ERR_DDP_VERSION: 839 case T4_ERR_RDMA_VERSION: 840 case T4_ERR_DDP_QUEUE_NUM: 841 case T4_ERR_MSN: 842 case T4_ERR_TBIT: 843 case T4_ERR_MO: 844 case T4_ERR_MSN_RANGE: 845 case T4_ERR_IRD_OVERFLOW: 846 case T4_ERR_OPCODE: 847 case T4_ERR_INTERNAL_ERR: 848 wc->status = IB_WC_FATAL_ERR; 849 break; 850 case T4_ERR_SWFLUSH: 851 wc->status = IB_WC_WR_FLUSH_ERR; 852 break; 853 default: 854 printf("Unexpected cqe_status 0x%x for QPID = 0x%0x\n", 855 CQE_STATUS(&cqe), CQE_QPID(&cqe)); 856 wc->status = IB_WC_FATAL_ERR; 857 } 858 } 859 out: 860 if (wq) 861 spin_unlock(&qhp->lock); 862 return ret; 863 } 864 865 int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) 866 { 867 struct c4iw_cq *chp; 868 unsigned long flags; 869 int npolled; 870 int err = 0; 871 872 chp = to_c4iw_cq(ibcq); 873 874 spin_lock_irqsave(&chp->lock, flags); 875 for (npolled = 0; npolled < num_entries; ++npolled) { 876 do { 877 err = c4iw_poll_cq_one(chp, wc + npolled); 878 } while (err == -EAGAIN); 879 if (err) 880 break; 881 } 882 spin_unlock_irqrestore(&chp->lock, flags); 883 return !err || err == -ENODATA ? npolled : err; 884 } 885 886 void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) 887 { 888 struct c4iw_cq *chp; 889 struct c4iw_ucontext *ucontext; 890 891 CTR2(KTR_IW_CXGBE, "%s ib_cq %p", __func__, ib_cq); 892 chp = to_c4iw_cq(ib_cq); 893 894 remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid); 895 atomic_dec(&chp->refcnt); 896 wait_event(chp->wait, !atomic_read(&chp->refcnt)); 897 898 ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, 899 ibucontext); 900 destroy_cq(&chp->rhp->rdev, &chp->cq, 901 ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx); 902 } 903 904 int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, 905 struct ib_udata *udata) 906 { 907 struct ib_device *ibdev = ibcq->device; 908 int entries = attr->cqe; 909 int vector = attr->comp_vector; 910 struct c4iw_dev *rhp; 911 struct c4iw_cq *chp = to_c4iw_cq(ibcq); 912 struct c4iw_create_cq_resp uresp; 913 struct c4iw_ucontext *ucontext = NULL; 914 int ret; 915 size_t memsize, hwentries; 916 struct c4iw_mm_entry *mm, *mm2; 917 918 CTR3(KTR_IW_CXGBE, "%s ib_dev %p entries %d", __func__, ibdev, entries); 919 if (attr->flags) 920 return -EINVAL; 921 922 rhp = to_c4iw_dev(ibdev); 923 924 ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, 925 ibucontext); 926 927 /* account for the status page. */ 928 entries++; 929 930 /* IQ needs one extra entry to differentiate full vs empty. */ 931 entries++; 932 933 /* 934 * entries must be multiple of 16 for HW. 935 */ 936 entries = roundup(entries, 16); 937 938 /* 939 * Make actual HW queue 2x to avoid cdix_inc overflows. 940 */ 941 hwentries = min(entries * 2, rhp->rdev.hw_queue.t4_max_iq_size); 942 943 /* 944 * Make HW queue at least 64 entries so GTS updates aren't too 945 * frequent. 946 */ 947 if (hwentries < 64) 948 hwentries = 64; 949 950 memsize = hwentries * sizeof *chp->cq.queue; 951 952 /* 953 * memsize must be a multiple of the page size if its a user cq. 954 */ 955 if (ucontext) 956 memsize = roundup(memsize, PAGE_SIZE); 957 chp->cq.size = hwentries; 958 chp->cq.memsize = memsize; 959 chp->cq.vector = vector; 960 961 ret = create_cq(&rhp->rdev, &chp->cq, 962 ucontext ? &ucontext->uctx : &rhp->rdev.uctx); 963 if (ret) 964 goto err1; 965 966 chp->rhp = rhp; 967 chp->cq.size--; /* status page */ 968 chp->ibcq.cqe = entries - 2; 969 spin_lock_init(&chp->lock); 970 spin_lock_init(&chp->comp_handler_lock); 971 atomic_set(&chp->refcnt, 1); 972 init_waitqueue_head(&chp->wait); 973 ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); 974 if (ret) 975 goto err2; 976 977 if (ucontext) { 978 ret = -ENOMEM; 979 mm = kmalloc(sizeof *mm, GFP_KERNEL); 980 if (!mm) 981 goto err3; 982 mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); 983 if (!mm2) 984 goto err4; 985 986 memset(&uresp, 0, sizeof(uresp)); 987 uresp.qid_mask = rhp->rdev.cqmask; 988 uresp.cqid = chp->cq.cqid; 989 uresp.size = chp->cq.size; 990 uresp.memsize = chp->cq.memsize; 991 spin_lock(&ucontext->mmap_lock); 992 uresp.key = ucontext->key; 993 ucontext->key += PAGE_SIZE; 994 uresp.gts_key = ucontext->key; 995 ucontext->key += PAGE_SIZE; 996 spin_unlock(&ucontext->mmap_lock); 997 ret = ib_copy_to_udata(udata, &uresp, 998 sizeof(uresp) - sizeof(uresp.reserved)); 999 if (ret) 1000 goto err5; 1001 1002 mm->key = uresp.key; 1003 mm->addr = vtophys(chp->cq.queue); 1004 mm->len = chp->cq.memsize; 1005 insert_mmap(ucontext, mm); 1006 1007 mm2->key = uresp.gts_key; 1008 mm2->addr = chp->cq.bar2_pa; 1009 mm2->len = PAGE_SIZE; 1010 insert_mmap(ucontext, mm2); 1011 } 1012 CTR6(KTR_IW_CXGBE, 1013 "%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx", 1014 __func__, chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize, 1015 (unsigned long long) chp->cq.dma_addr); 1016 return 0; 1017 err5: 1018 kfree(mm2); 1019 err4: 1020 kfree(mm); 1021 err3: 1022 remove_handle(rhp, &rhp->cqidr, chp->cq.cqid); 1023 err2: 1024 destroy_cq(&chp->rhp->rdev, &chp->cq, 1025 ucontext ? &ucontext->uctx : &rhp->rdev.uctx); 1026 err1: 1027 return ret; 1028 } 1029 1030 int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) 1031 { 1032 return -ENOSYS; 1033 } 1034 1035 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 1036 { 1037 struct c4iw_cq *chp; 1038 int ret = 0; 1039 unsigned long flag; 1040 1041 chp = to_c4iw_cq(ibcq); 1042 spin_lock_irqsave(&chp->lock, flag); 1043 t4_arm_cq(&chp->cq, 1044 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); 1045 if (flags & IB_CQ_REPORT_MISSED_EVENTS) 1046 ret = t4_cq_notempty(&chp->cq); 1047 spin_unlock_irqrestore(&chp->lock, flag); 1048 return ret; 1049 } 1050 #endif 1051