1 /*- 2 * Copyright (c) 2011 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 33 #include <sys/types.h> 34 #include <sys/mbuf.h> 35 #include <sys/socket.h> 36 #include <sys/kernel.h> 37 #include <sys/malloc.h> 38 #include <sys/queue.h> 39 #include <sys/taskqueue.h> 40 #include <sys/sysctl.h> 41 #include <net/bpf.h> 42 #include <net/ethernet.h> 43 #include <net/if.h> 44 #include <net/if_vlan_var.h> 45 #include <netinet/in.h> 46 #include <netinet/ip.h> 47 #include <netinet/tcp.h> 48 49 #include "common/common.h" 50 #include "common/t4_regs.h" 51 #include "common/t4_regs_values.h" 52 #include "common/t4_msg.h" 53 #include "common/t4fw_interface.h" 54 55 struct fl_buf_info { 56 int size; 57 int type; 58 uma_zone_t zone; 59 }; 60 61 /* Filled up by t4_sge_modload */ 62 static struct fl_buf_info fl_buf_info[FL_BUF_SIZES]; 63 64 #define FL_BUF_SIZE(x) (fl_buf_info[x].size) 65 #define FL_BUF_TYPE(x) (fl_buf_info[x].type) 66 #define FL_BUF_ZONE(x) (fl_buf_info[x].zone) 67 68 enum { 69 FL_PKTSHIFT = 2 70 }; 71 72 #define FL_ALIGN min(CACHE_LINE_SIZE, 32) 73 #if CACHE_LINE_SIZE > 64 74 #define SPG_LEN 128 75 #else 76 #define SPG_LEN 64 77 #endif 78 79 /* Used to track coalesced tx work request */ 80 struct txpkts { 81 uint64_t *flitp; /* ptr to flit where next pkt should start */ 82 uint8_t npkt; /* # of packets in this work request */ 83 uint8_t nflits; /* # of flits used by this work request */ 84 uint16_t plen; /* total payload (sum of all packets) */ 85 }; 86 87 /* A packet's SGL. This + m_pkthdr has all info needed for tx */ 88 struct sgl { 89 int nsegs; /* # of segments in the SGL, 0 means imm. tx */ 90 int nflits; /* # of flits needed for the SGL */ 91 bus_dma_segment_t seg[TX_SGL_SEGS]; 92 }; 93 94 static void t4_evt_rx(void *); 95 static void t4_eth_rx(void *); 96 static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int, 97 int, iq_intr_handler_t *, char *); 98 static inline void init_fl(struct sge_fl *, int, char *); 99 static inline void init_eq(struct sge_eq *, int, char *); 100 static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, 101 bus_addr_t *, void **); 102 static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, 103 void *); 104 static int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *, 105 int, int); 106 static int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *); 107 static int alloc_intrq(struct adapter *, int, int, int); 108 static int free_intrq(struct sge_iq *); 109 static int alloc_fwq(struct adapter *, int); 110 static int free_fwq(struct sge_iq *); 111 static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int); 112 static int free_rxq(struct port_info *, struct sge_rxq *); 113 static int alloc_ctrlq(struct adapter *, struct sge_ctrlq *, int); 114 static int free_ctrlq(struct adapter *, struct sge_ctrlq *); 115 static int alloc_txq(struct port_info *, struct sge_txq *, int); 116 static int free_txq(struct port_info *, struct sge_txq *); 117 static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); 118 static inline bool is_new_response(const struct sge_iq *, struct rsp_ctrl **); 119 static inline void iq_next(struct sge_iq *); 120 static inline void ring_fl_db(struct adapter *, struct sge_fl *); 121 static void refill_fl(struct adapter *, struct sge_fl *, int, int); 122 static int alloc_fl_sdesc(struct sge_fl *); 123 static void free_fl_sdesc(struct sge_fl *); 124 static int alloc_tx_maps(struct sge_txq *); 125 static void free_tx_maps(struct sge_txq *); 126 static void set_fl_tag_idx(struct sge_fl *, int); 127 128 static int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int); 129 static int free_pkt_sgl(struct sge_txq *, struct sgl *); 130 static int write_txpkt_wr(struct port_info *, struct sge_txq *, struct mbuf *, 131 struct sgl *); 132 static int add_to_txpkts(struct port_info *, struct sge_txq *, struct txpkts *, 133 struct mbuf *, struct sgl *); 134 static void write_txpkts_wr(struct sge_txq *, struct txpkts *); 135 static inline void write_ulp_cpl_sgl(struct port_info *, struct sge_txq *, 136 struct txpkts *, struct mbuf *, struct sgl *); 137 static int write_sgl_to_txd(struct sge_eq *, struct sgl *, caddr_t *); 138 static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int); 139 static inline void ring_eq_db(struct adapter *, struct sge_eq *); 140 static inline int reclaimable(struct sge_eq *); 141 static int reclaim_tx_descs(struct sge_txq *, int, int); 142 static void write_eqflush_wr(struct sge_eq *); 143 static __be64 get_flit(bus_dma_segment_t *, int, int); 144 static int handle_sge_egr_update(struct adapter *, 145 const struct cpl_sge_egr_update *); 146 static void handle_cpl(struct adapter *, struct sge_iq *); 147 148 static int ctrl_tx(struct adapter *, struct sge_ctrlq *, struct mbuf *); 149 static int sysctl_uint16(SYSCTL_HANDLER_ARGS); 150 151 extern void filter_rpl(struct adapter *, const struct cpl_set_tcb_rpl *); 152 153 /* 154 * Called on MOD_LOAD and fills up fl_buf_info[]. 155 */ 156 void 157 t4_sge_modload(void) 158 { 159 int i; 160 int bufsize[FL_BUF_SIZES] = { 161 MCLBYTES, 162 #if MJUMPAGESIZE != MCLBYTES 163 MJUMPAGESIZE, 164 #endif 165 MJUM9BYTES, 166 MJUM16BYTES 167 }; 168 169 for (i = 0; i < FL_BUF_SIZES; i++) { 170 FL_BUF_SIZE(i) = bufsize[i]; 171 FL_BUF_TYPE(i) = m_gettype(bufsize[i]); 172 FL_BUF_ZONE(i) = m_getzone(bufsize[i]); 173 } 174 } 175 176 /** 177 * t4_sge_init - initialize SGE 178 * @sc: the adapter 179 * 180 * Performs SGE initialization needed every time after a chip reset. 181 * We do not initialize any of the queues here, instead the driver 182 * top-level must request them individually. 183 */ 184 void 185 t4_sge_init(struct adapter *sc) 186 { 187 struct sge *s = &sc->sge; 188 int i; 189 190 t4_set_reg_field(sc, A_SGE_CONTROL, V_PKTSHIFT(M_PKTSHIFT) | 191 V_INGPADBOUNDARY(M_INGPADBOUNDARY) | 192 F_EGRSTATUSPAGESIZE, 193 V_INGPADBOUNDARY(ilog2(FL_ALIGN) - 5) | 194 V_PKTSHIFT(FL_PKTSHIFT) | 195 F_RXPKTCPLMODE | 196 V_EGRSTATUSPAGESIZE(SPG_LEN == 128)); 197 t4_set_reg_field(sc, A_SGE_HOST_PAGE_SIZE, 198 V_HOSTPAGESIZEPF0(M_HOSTPAGESIZEPF0), 199 V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10)); 200 201 for (i = 0; i < FL_BUF_SIZES; i++) { 202 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i), 203 FL_BUF_SIZE(i)); 204 } 205 206 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, 207 V_THRESHOLD_0(s->counter_val[0]) | 208 V_THRESHOLD_1(s->counter_val[1]) | 209 V_THRESHOLD_2(s->counter_val[2]) | 210 V_THRESHOLD_3(s->counter_val[3])); 211 212 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, 213 V_TIMERVALUE0(us_to_core_ticks(sc, s->timer_val[0])) | 214 V_TIMERVALUE1(us_to_core_ticks(sc, s->timer_val[1]))); 215 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, 216 V_TIMERVALUE2(us_to_core_ticks(sc, s->timer_val[2])) | 217 V_TIMERVALUE3(us_to_core_ticks(sc, s->timer_val[3]))); 218 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, 219 V_TIMERVALUE4(us_to_core_ticks(sc, s->timer_val[4])) | 220 V_TIMERVALUE5(us_to_core_ticks(sc, s->timer_val[5]))); 221 } 222 223 int 224 t4_create_dma_tag(struct adapter *sc) 225 { 226 int rc; 227 228 rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, 229 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 230 BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, 231 NULL, &sc->dmat); 232 if (rc != 0) { 233 device_printf(sc->dev, 234 "failed to create main DMA tag: %d\n", rc); 235 } 236 237 return (rc); 238 } 239 240 int 241 t4_destroy_dma_tag(struct adapter *sc) 242 { 243 if (sc->dmat) 244 bus_dma_tag_destroy(sc->dmat); 245 246 return (0); 247 } 248 249 /* 250 * Allocate and initialize the firmware event queue, control queues, and the 251 * interrupt queues. The adapter owns all of these queues. 252 * 253 * Returns errno on failure. Resources allocated up to that point may still be 254 * allocated. Caller is responsible for cleanup in case this function fails. 255 */ 256 int 257 t4_setup_adapter_queues(struct adapter *sc) 258 { 259 int i, j, rc, intr_idx, qsize; 260 struct sge_iq *iq; 261 struct sge_ctrlq *ctrlq; 262 iq_intr_handler_t *handler; 263 char name[16]; 264 265 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 266 267 if (sysctl_ctx_init(&sc->ctx) == 0) { 268 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); 269 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 270 271 sc->oid_fwq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, 272 "fwq", CTLFLAG_RD, NULL, "firmware event queue"); 273 sc->oid_ctrlq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, 274 "ctrlq", CTLFLAG_RD, NULL, "ctrl queues"); 275 sc->oid_intrq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, 276 "intrq", CTLFLAG_RD, NULL, "interrupt queues"); 277 } 278 279 /* 280 * Interrupt queues 281 */ 282 intr_idx = sc->intr_count - NINTRQ(sc); 283 if (sc->flags & INTR_SHARED) { 284 qsize = max((sc->sge.nrxq + 1) * 2, INTR_IQ_QSIZE); 285 for (i = 0; i < NINTRQ(sc); i++, intr_idx++) { 286 snprintf(name, sizeof(name), "%s intrq%d", 287 device_get_nameunit(sc->dev), i); 288 289 iq = &sc->sge.intrq[i]; 290 init_iq(iq, sc, 0, 0, qsize, INTR_IQ_ESIZE, NULL, name); 291 rc = alloc_intrq(sc, i % sc->params.nports, i, 292 intr_idx); 293 294 if (rc != 0) { 295 device_printf(sc->dev, 296 "failed to create %s: %d\n", name, rc); 297 return (rc); 298 } 299 } 300 } else { 301 int qidx = 0; 302 struct port_info *pi; 303 304 for (i = 0; i < sc->params.nports; i++) { 305 pi = sc->port[i]; 306 qsize = max((pi->nrxq + 1) * 2, INTR_IQ_QSIZE); 307 for (j = 0; j < pi->nrxq; j++, qidx++, intr_idx++) { 308 snprintf(name, sizeof(name), "%s intrq%d", 309 device_get_nameunit(pi->dev), j); 310 311 iq = &sc->sge.intrq[qidx]; 312 init_iq(iq, sc, 0, 0, qsize, INTR_IQ_ESIZE, 313 NULL, name); 314 rc = alloc_intrq(sc, i, qidx, intr_idx); 315 316 if (rc != 0) { 317 device_printf(sc->dev, 318 "failed to create %s: %d\n", 319 name, rc); 320 return (rc); 321 } 322 } 323 } 324 } 325 326 /* 327 * Firmware event queue 328 */ 329 snprintf(name, sizeof(name), "%s fwq", device_get_nameunit(sc->dev)); 330 if (sc->intr_count > T4_EXTRA_INTR) { 331 handler = NULL; 332 intr_idx = 1; 333 } else { 334 handler = t4_evt_rx; 335 intr_idx = 0; 336 } 337 338 iq = &sc->sge.fwq; 339 init_iq(iq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE, handler, name); 340 rc = alloc_fwq(sc, intr_idx); 341 if (rc != 0) { 342 device_printf(sc->dev, 343 "failed to create firmware event queue: %d\n", rc); 344 345 return (rc); 346 } 347 348 /* 349 * Control queues - one per port. 350 */ 351 ctrlq = &sc->sge.ctrlq[0]; 352 for (i = 0; i < sc->params.nports; i++, ctrlq++) { 353 snprintf(name, sizeof(name), "%s ctrlq%d", 354 device_get_nameunit(sc->dev), i); 355 init_eq(&ctrlq->eq, CTRL_EQ_QSIZE, name); 356 357 rc = alloc_ctrlq(sc, ctrlq, i); 358 if (rc != 0) { 359 device_printf(sc->dev, 360 "failed to create control queue %d: %d\n", i, rc); 361 return (rc); 362 } 363 } 364 365 return (rc); 366 } 367 368 /* 369 * Idempotent 370 */ 371 int 372 t4_teardown_adapter_queues(struct adapter *sc) 373 { 374 int i; 375 struct sge_iq *iq; 376 377 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 378 379 /* Do this before freeing the queues */ 380 if (sc->oid_fwq || sc->oid_ctrlq || sc->oid_intrq) { 381 sysctl_ctx_free(&sc->ctx); 382 sc->oid_fwq = NULL; 383 sc->oid_ctrlq = NULL; 384 sc->oid_intrq = NULL; 385 } 386 387 for (i = 0; i < sc->params.nports; i++) 388 free_ctrlq(sc, &sc->sge.ctrlq[i]); 389 390 iq = &sc->sge.fwq; 391 free_fwq(iq); 392 393 for (i = 0; i < NINTRQ(sc); i++) { 394 iq = &sc->sge.intrq[i]; 395 free_intrq(iq); 396 } 397 398 return (0); 399 } 400 401 int 402 t4_setup_eth_queues(struct port_info *pi) 403 { 404 int rc = 0, i, intr_idx; 405 struct sge_rxq *rxq; 406 struct sge_txq *txq; 407 char name[16]; 408 struct adapter *sc = pi->adapter; 409 410 if (sysctl_ctx_init(&pi->ctx) == 0) { 411 struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev); 412 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 413 414 pi->oid_rxq = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, 415 "rxq", CTLFLAG_RD, NULL, "rx queues"); 416 pi->oid_txq = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, 417 "txq", CTLFLAG_RD, NULL, "tx queues"); 418 } 419 420 for_each_rxq(pi, i, rxq) { 421 422 snprintf(name, sizeof(name), "%s rxq%d-iq", 423 device_get_nameunit(pi->dev), i); 424 init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, 425 pi->qsize_rxq, RX_IQ_ESIZE, t4_eth_rx, name); 426 427 snprintf(name, sizeof(name), "%s rxq%d-fl", 428 device_get_nameunit(pi->dev), i); 429 init_fl(&rxq->fl, pi->qsize_rxq / 8, name); 430 431 intr_idx = pi->first_rxq + i; 432 if (sc->flags & INTR_SHARED) 433 intr_idx %= NINTRQ(sc); 434 435 rc = alloc_rxq(pi, rxq, intr_idx, i); 436 if (rc != 0) 437 goto done; 438 } 439 440 for_each_txq(pi, i, txq) { 441 442 snprintf(name, sizeof(name), "%s txq%d", 443 device_get_nameunit(pi->dev), i); 444 init_eq(&txq->eq, pi->qsize_txq, name); 445 446 rc = alloc_txq(pi, txq, i); 447 if (rc != 0) 448 goto done; 449 } 450 451 done: 452 if (rc) 453 t4_teardown_eth_queues(pi); 454 455 return (rc); 456 } 457 458 /* 459 * Idempotent 460 */ 461 int 462 t4_teardown_eth_queues(struct port_info *pi) 463 { 464 int i; 465 struct sge_rxq *rxq; 466 struct sge_txq *txq; 467 468 /* Do this before freeing the queues */ 469 if (pi->oid_txq || pi->oid_rxq) { 470 sysctl_ctx_free(&pi->ctx); 471 pi->oid_txq = pi->oid_rxq = NULL; 472 } 473 474 for_each_txq(pi, i, txq) { 475 free_txq(pi, txq); 476 } 477 478 for_each_rxq(pi, i, rxq) { 479 free_rxq(pi, rxq); 480 } 481 482 return (0); 483 } 484 485 /* Deals with errors and the first (and only) interrupt queue */ 486 void 487 t4_intr_all(void *arg) 488 { 489 struct adapter *sc = arg; 490 491 t4_intr_err(arg); 492 t4_intr(&sc->sge.intrq[0]); 493 } 494 495 /* Deals with interrupts, and a few CPLs, on the given interrupt queue */ 496 void 497 t4_intr(void *arg) 498 { 499 struct sge_iq *iq = arg, *q; 500 struct adapter *sc = iq->adapter; 501 struct rsp_ctrl *ctrl; 502 const struct rss_header *rss; 503 int ndesc_pending = 0, ndesc_total = 0; 504 int qid, rsp_type; 505 506 if (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY)) 507 return; 508 509 while (is_new_response(iq, &ctrl)) { 510 511 rmb(); 512 513 rss = (const void *)iq->cdesc; 514 rsp_type = G_RSPD_TYPE(ctrl->u.type_gen); 515 516 if (__predict_false(rsp_type == X_RSPD_TYPE_CPL)) { 517 handle_cpl(sc, iq); 518 goto nextdesc; 519 } 520 521 qid = ntohl(ctrl->pldbuflen_qid) - sc->sge.iq_start; 522 q = sc->sge.iqmap[qid]; 523 524 if (atomic_cmpset_32(&q->state, IQS_IDLE, IQS_BUSY)) { 525 q->handler(q); 526 atomic_cmpset_32(&q->state, IQS_BUSY, IQS_IDLE); 527 } 528 529 nextdesc: ndesc_total++; 530 if (++ndesc_pending >= iq->qsize / 4) { 531 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), 532 V_CIDXINC(ndesc_pending) | 533 V_INGRESSQID(iq->cntxt_id) | 534 V_SEINTARM( 535 V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 536 ndesc_pending = 0; 537 } 538 539 iq_next(iq); 540 } 541 542 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndesc_pending) | 543 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 544 545 atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE); 546 } 547 548 /* Deals with error interrupts */ 549 void 550 t4_intr_err(void *arg) 551 { 552 struct adapter *sc = arg; 553 554 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); 555 t4_slow_intr_handler(sc); 556 } 557 558 /* Deals with the firmware event queue */ 559 void 560 t4_intr_evt(void *arg) 561 { 562 struct sge_iq *iq = arg; 563 564 if (atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY)) { 565 t4_evt_rx(arg); 566 atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE); 567 } 568 } 569 570 static void 571 t4_evt_rx(void *arg) 572 { 573 struct sge_iq *iq = arg; 574 struct adapter *sc = iq->adapter; 575 struct rsp_ctrl *ctrl; 576 int ndesc_pending = 0, ndesc_total = 0; 577 578 KASSERT(iq == &sc->sge.fwq, ("%s: unexpected ingress queue", __func__)); 579 580 while (is_new_response(iq, &ctrl)) { 581 int rsp_type; 582 583 rmb(); 584 585 rsp_type = G_RSPD_TYPE(ctrl->u.type_gen); 586 if (__predict_false(rsp_type != X_RSPD_TYPE_CPL)) 587 panic("%s: unexpected rsp_type %d", __func__, rsp_type); 588 589 handle_cpl(sc, iq); 590 591 ndesc_total++; 592 if (++ndesc_pending >= iq->qsize / 4) { 593 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), 594 V_CIDXINC(ndesc_pending) | 595 V_INGRESSQID(iq->cntxt_id) | 596 V_SEINTARM( 597 V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 598 ndesc_pending = 0; 599 } 600 601 iq_next(iq); 602 } 603 604 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndesc_pending) | 605 V_INGRESSQID(iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 606 } 607 608 #ifdef T4_PKT_TIMESTAMP 609 #define RX_COPY_THRESHOLD (MINCLSIZE - 8) 610 #else 611 #define RX_COPY_THRESHOLD MINCLSIZE 612 #endif 613 614 static void 615 t4_eth_rx(void *arg) 616 { 617 struct sge_rxq *rxq = arg; 618 struct sge_iq *iq = arg; 619 struct adapter *sc = iq->adapter; 620 struct rsp_ctrl *ctrl; 621 struct ifnet *ifp = rxq->ifp; 622 struct sge_fl *fl = &rxq->fl; 623 struct fl_sdesc *sd = &fl->sdesc[fl->cidx], *sd_next; 624 const struct rss_header *rss; 625 const struct cpl_rx_pkt *cpl; 626 uint32_t len; 627 int ndescs = 0, i; 628 struct mbuf *m0, *m; 629 #ifdef INET 630 struct lro_ctrl *lro = &rxq->lro; 631 struct lro_entry *l; 632 #endif 633 634 prefetch(sd->m); 635 prefetch(sd->cl); 636 637 iq->intr_next = iq->intr_params; 638 while (is_new_response(iq, &ctrl)) { 639 640 rmb(); 641 642 rss = (const void *)iq->cdesc; 643 i = G_RSPD_TYPE(ctrl->u.type_gen); 644 645 KASSERT(i == X_RSPD_TYPE_FLBUF && rss->opcode == CPL_RX_PKT, 646 ("%s: unexpected type %d CPL opcode 0x%x", 647 __func__, i, rss->opcode)); 648 649 sd_next = sd + 1; 650 if (__predict_false(fl->cidx + 1 == fl->cap)) 651 sd_next = fl->sdesc; 652 prefetch(sd_next->m); 653 prefetch(sd_next->cl); 654 655 cpl = (const void *)(rss + 1); 656 657 m0 = sd->m; 658 sd->m = NULL; /* consumed */ 659 660 len = be32toh(ctrl->pldbuflen_qid); 661 if (__predict_false((len & F_RSPD_NEWBUF) == 0)) 662 panic("%s: cannot handle packed frames", __func__); 663 len = G_RSPD_LEN(len); 664 665 bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, 666 BUS_DMASYNC_POSTREAD); 667 668 m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR); 669 670 #ifdef T4_PKT_TIMESTAMP 671 *mtod(m0, uint64_t *) = 672 be64toh(ctrl->u.last_flit & 0xfffffffffffffff); 673 m0->m_data += 8; 674 675 /* 676 * 60 bit timestamp value is *(uint64_t *)m0->m_pktdat. Note 677 * that it is in the leading free-space (see M_LEADINGSPACE) in 678 * the mbuf. The kernel can clobber it during a pullup, 679 * m_copymdata, etc. You need to make sure that the mbuf 680 * reaches you unmolested if you care about the timestamp. 681 */ 682 #endif 683 684 if (len < RX_COPY_THRESHOLD) { 685 /* copy data to mbuf, buffer will be recycled */ 686 bcopy(sd->cl, mtod(m0, caddr_t), len); 687 m0->m_len = len; 688 } else { 689 bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); 690 m_cljset(m0, sd->cl, FL_BUF_TYPE(sd->tag_idx)); 691 sd->cl = NULL; /* consumed */ 692 m0->m_len = min(len, FL_BUF_SIZE(sd->tag_idx)); 693 } 694 695 len -= FL_PKTSHIFT; 696 m0->m_len -= FL_PKTSHIFT; 697 m0->m_data += FL_PKTSHIFT; 698 699 m0->m_pkthdr.len = len; 700 m0->m_pkthdr.rcvif = ifp; 701 m0->m_flags |= M_FLOWID; 702 m0->m_pkthdr.flowid = rss->hash_val; 703 704 if (cpl->csum_calc && !cpl->err_vec && 705 ifp->if_capenable & IFCAP_RXCSUM) { 706 m0->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | 707 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 708 if (cpl->ip_frag) 709 m0->m_pkthdr.csum_data = be16toh(cpl->csum); 710 else 711 m0->m_pkthdr.csum_data = 0xffff; 712 rxq->rxcsum++; 713 } 714 715 if (cpl->vlan_ex) { 716 m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); 717 m0->m_flags |= M_VLANTAG; 718 rxq->vlan_extraction++; 719 } 720 721 i = 1; /* # of fl sdesc used */ 722 sd = sd_next; 723 if (__predict_false(++fl->cidx == fl->cap)) 724 fl->cidx = 0; 725 726 len -= m0->m_len; 727 m = m0; 728 while (len) { 729 i++; 730 731 sd_next = sd + 1; 732 if (__predict_false(fl->cidx + 1 == fl->cap)) 733 sd_next = fl->sdesc; 734 prefetch(sd_next->m); 735 prefetch(sd_next->cl); 736 737 m->m_next = sd->m; 738 sd->m = NULL; /* consumed */ 739 m = m->m_next; 740 741 bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, 742 BUS_DMASYNC_POSTREAD); 743 744 m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0); 745 if (len <= MLEN) { 746 bcopy(sd->cl, mtod(m, caddr_t), len); 747 m->m_len = len; 748 } else { 749 bus_dmamap_unload(fl->tag[sd->tag_idx], 750 sd->map); 751 m_cljset(m, sd->cl, FL_BUF_TYPE(sd->tag_idx)); 752 sd->cl = NULL; /* consumed */ 753 m->m_len = min(len, FL_BUF_SIZE(sd->tag_idx)); 754 } 755 756 i++; 757 sd = sd_next; 758 if (__predict_false(++fl->cidx == fl->cap)) 759 fl->cidx = 0; 760 761 len -= m->m_len; 762 } 763 764 #ifdef INET 765 if (cpl->l2info & htobe32(F_RXF_LRO) && 766 rxq->flags & RXQ_LRO_ENABLED && 767 tcp_lro_rx(lro, m0, 0) == 0) { 768 /* queued for LRO */ 769 } else 770 #endif 771 ifp->if_input(ifp, m0); 772 773 FL_LOCK(fl); 774 fl->needed += i; 775 if (fl->needed >= 32) 776 refill_fl(sc, fl, 64, 32); 777 FL_UNLOCK(fl); 778 779 if (++ndescs > 32) { 780 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), 781 V_CIDXINC(ndescs) | 782 V_INGRESSQID((u32)iq->cntxt_id) | 783 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 784 ndescs = 0; 785 } 786 787 iq_next(iq); 788 } 789 790 #ifdef INET 791 while (!SLIST_EMPTY(&lro->lro_active)) { 792 l = SLIST_FIRST(&lro->lro_active); 793 SLIST_REMOVE_HEAD(&lro->lro_active, next); 794 tcp_lro_flush(lro, l); 795 } 796 #endif 797 798 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) | 799 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_next)); 800 801 FL_LOCK(fl); 802 if (fl->needed >= 32) 803 refill_fl(sc, fl, 128, 8); 804 FL_UNLOCK(fl); 805 } 806 807 int 808 t4_mgmt_tx(struct adapter *sc, struct mbuf *m) 809 { 810 return ctrl_tx(sc, &sc->sge.ctrlq[0], m); 811 } 812 813 /* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */ 814 #define TXPKTS_PKT_HDR ((\ 815 sizeof(struct ulp_txpkt) + \ 816 sizeof(struct ulptx_idata) + \ 817 sizeof(struct cpl_tx_pkt_core) \ 818 ) / 8) 819 820 /* Header of a coalesced tx WR, before SGL of first packet (in flits) */ 821 #define TXPKTS_WR_HDR (\ 822 sizeof(struct fw_eth_tx_pkts_wr) / 8 + \ 823 TXPKTS_PKT_HDR) 824 825 /* Header of a tx WR, before SGL of first packet (in flits) */ 826 #define TXPKT_WR_HDR ((\ 827 sizeof(struct fw_eth_tx_pkt_wr) + \ 828 sizeof(struct cpl_tx_pkt_core) \ 829 ) / 8 ) 830 831 /* Header of a tx LSO WR, before SGL of first packet (in flits) */ 832 #define TXPKT_LSO_WR_HDR ((\ 833 sizeof(struct fw_eth_tx_pkt_wr) + \ 834 sizeof(struct cpl_tx_pkt_lso) + \ 835 sizeof(struct cpl_tx_pkt_core) \ 836 ) / 8 ) 837 838 int 839 t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m) 840 { 841 struct port_info *pi = (void *)ifp->if_softc; 842 struct adapter *sc = pi->adapter; 843 struct sge_eq *eq = &txq->eq; 844 struct buf_ring *br = txq->br; 845 struct mbuf *next; 846 int rc, coalescing, can_reclaim; 847 struct txpkts txpkts; 848 struct sgl sgl; 849 850 TXQ_LOCK_ASSERT_OWNED(txq); 851 KASSERT(m, ("%s: called with nothing to do.", __func__)); 852 853 prefetch(&eq->desc[eq->pidx]); 854 prefetch(&txq->sdesc[eq->pidx]); 855 856 txpkts.npkt = 0;/* indicates there's nothing in txpkts */ 857 coalescing = 0; 858 859 if (eq->avail < 8) 860 reclaim_tx_descs(txq, 0, 8); 861 862 for (; m; m = next ? next : drbr_dequeue(ifp, br)) { 863 864 if (eq->avail < 8) 865 break; 866 867 next = m->m_nextpkt; 868 m->m_nextpkt = NULL; 869 870 if (next || buf_ring_peek(br)) 871 coalescing = 1; 872 873 rc = get_pkt_sgl(txq, &m, &sgl, coalescing); 874 if (rc != 0) { 875 if (rc == ENOMEM) { 876 877 /* Short of resources, suspend tx */ 878 879 m->m_nextpkt = next; 880 break; 881 } 882 883 /* 884 * Unrecoverable error for this packet, throw it away 885 * and move on to the next. get_pkt_sgl may already 886 * have freed m (it will be NULL in that case and the 887 * m_freem here is still safe). 888 */ 889 890 m_freem(m); 891 continue; 892 } 893 894 if (coalescing && 895 add_to_txpkts(pi, txq, &txpkts, m, &sgl) == 0) { 896 897 /* Successfully absorbed into txpkts */ 898 899 write_ulp_cpl_sgl(pi, txq, &txpkts, m, &sgl); 900 goto doorbell; 901 } 902 903 /* 904 * We weren't coalescing to begin with, or current frame could 905 * not be coalesced (add_to_txpkts flushes txpkts if a frame 906 * given to it can't be coalesced). Either way there should be 907 * nothing in txpkts. 908 */ 909 KASSERT(txpkts.npkt == 0, 910 ("%s: txpkts not empty: %d", __func__, txpkts.npkt)); 911 912 /* We're sending out individual packets now */ 913 coalescing = 0; 914 915 if (eq->avail < 8) 916 reclaim_tx_descs(txq, 0, 8); 917 rc = write_txpkt_wr(pi, txq, m, &sgl); 918 if (rc != 0) { 919 920 /* Short of hardware descriptors, suspend tx */ 921 922 /* 923 * This is an unlikely but expensive failure. We've 924 * done all the hard work (DMA mappings etc.) and now we 925 * can't send out the packet. What's worse, we have to 926 * spend even more time freeing up everything in sgl. 927 */ 928 txq->no_desc++; 929 free_pkt_sgl(txq, &sgl); 930 931 m->m_nextpkt = next; 932 break; 933 } 934 935 ETHER_BPF_MTAP(ifp, m); 936 if (sgl.nsegs == 0) 937 m_freem(m); 938 939 doorbell: 940 /* Fewer and fewer doorbells as the queue fills up */ 941 if (eq->pending >= (1 << (fls(eq->qsize - eq->avail) / 2))) 942 ring_eq_db(sc, eq); 943 944 can_reclaim = reclaimable(eq); 945 if (can_reclaim >= 32) 946 reclaim_tx_descs(txq, can_reclaim, 32); 947 } 948 949 if (txpkts.npkt > 0) 950 write_txpkts_wr(txq, &txpkts); 951 952 /* 953 * m not NULL means there was an error but we haven't thrown it away. 954 * This can happen when we're short of tx descriptors (no_desc) or maybe 955 * even DMA maps (no_dmamap). Either way, a credit flush and reclaim 956 * will get things going again. 957 * 958 * If eq->avail is already 0 we know a credit flush was requested in the 959 * WR that reduced it to 0 so we don't need another flush (we don't have 960 * any descriptor for a flush WR anyway, duh). 961 */ 962 if (m && eq->avail > 0 && !(eq->flags & EQ_CRFLUSHED)) { 963 struct tx_sdesc *txsd = &txq->sdesc[eq->pidx]; 964 965 txsd->desc_used = 1; 966 txsd->credits = 0; 967 write_eqflush_wr(eq); 968 } 969 txq->m = m; 970 971 if (eq->pending) 972 ring_eq_db(sc, eq); 973 974 can_reclaim = reclaimable(eq); 975 if (can_reclaim >= 32) 976 reclaim_tx_descs(txq, can_reclaim, 128); 977 978 return (0); 979 } 980 981 void 982 t4_update_fl_bufsize(struct ifnet *ifp) 983 { 984 struct port_info *pi = ifp->if_softc; 985 struct sge_rxq *rxq; 986 struct sge_fl *fl; 987 int i; 988 989 for_each_rxq(pi, i, rxq) { 990 fl = &rxq->fl; 991 992 FL_LOCK(fl); 993 set_fl_tag_idx(fl, ifp->if_mtu); 994 FL_UNLOCK(fl); 995 } 996 } 997 998 /* 999 * A non-NULL handler indicates this iq will not receive direct interrupts, the 1000 * handler will be invoked by an interrupt queue. 1001 */ 1002 static inline void 1003 init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, 1004 int qsize, int esize, iq_intr_handler_t *handler, char *name) 1005 { 1006 KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, 1007 ("%s: bad tmr_idx %d", __func__, tmr_idx)); 1008 KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */ 1009 ("%s: bad pktc_idx %d", __func__, pktc_idx)); 1010 1011 iq->flags = 0; 1012 iq->adapter = sc; 1013 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx) | 1014 V_QINTR_CNT_EN(pktc_idx >= 0); 1015 iq->intr_pktc_idx = pktc_idx; 1016 iq->qsize = roundup(qsize, 16); /* See FW_IQ_CMD/iqsize */ 1017 iq->esize = max(esize, 16); /* See FW_IQ_CMD/iqesize */ 1018 iq->handler = handler; 1019 strlcpy(iq->lockname, name, sizeof(iq->lockname)); 1020 } 1021 1022 static inline void 1023 init_fl(struct sge_fl *fl, int qsize, char *name) 1024 { 1025 fl->qsize = qsize; 1026 strlcpy(fl->lockname, name, sizeof(fl->lockname)); 1027 } 1028 1029 static inline void 1030 init_eq(struct sge_eq *eq, int qsize, char *name) 1031 { 1032 eq->qsize = qsize; 1033 strlcpy(eq->lockname, name, sizeof(eq->lockname)); 1034 } 1035 1036 static int 1037 alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag, 1038 bus_dmamap_t *map, bus_addr_t *pa, void **va) 1039 { 1040 int rc; 1041 1042 rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR, 1043 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag); 1044 if (rc != 0) { 1045 device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc); 1046 goto done; 1047 } 1048 1049 rc = bus_dmamem_alloc(*tag, va, 1050 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map); 1051 if (rc != 0) { 1052 device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc); 1053 goto done; 1054 } 1055 1056 rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0); 1057 if (rc != 0) { 1058 device_printf(sc->dev, "cannot load DMA map: %d\n", rc); 1059 goto done; 1060 } 1061 done: 1062 if (rc) 1063 free_ring(sc, *tag, *map, *pa, *va); 1064 1065 return (rc); 1066 } 1067 1068 static int 1069 free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, 1070 bus_addr_t pa, void *va) 1071 { 1072 if (pa) 1073 bus_dmamap_unload(tag, map); 1074 if (va) 1075 bus_dmamem_free(tag, va, map); 1076 if (tag) 1077 bus_dma_tag_destroy(tag); 1078 1079 return (0); 1080 } 1081 1082 /* 1083 * Allocates the ring for an ingress queue and an optional freelist. If the 1084 * freelist is specified it will be allocated and then associated with the 1085 * ingress queue. 1086 * 1087 * Returns errno on failure. Resources allocated up to that point may still be 1088 * allocated. Caller is responsible for cleanup in case this function fails. 1089 * 1090 * If the ingress queue will take interrupts directly (iq->handler == NULL) then 1091 * the intr_idx specifies the vector, starting from 0. Otherwise it specifies 1092 * the index of the interrupt queue to which its interrupts will be forwarded. 1093 */ 1094 static int 1095 alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, 1096 int intr_idx, int cong) 1097 { 1098 int rc, i, cntxt_id; 1099 size_t len; 1100 struct fw_iq_cmd c; 1101 struct adapter *sc = iq->adapter; 1102 __be32 v = 0; 1103 1104 len = iq->qsize * iq->esize; 1105 rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, 1106 (void **)&iq->desc); 1107 if (rc != 0) 1108 return (rc); 1109 1110 bzero(&c, sizeof(c)); 1111 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | 1112 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | 1113 V_FW_IQ_CMD_VFN(0)); 1114 1115 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | 1116 FW_LEN16(c)); 1117 1118 /* Special handling for firmware event queue */ 1119 if (iq == &sc->sge.fwq) 1120 v |= F_FW_IQ_CMD_IQASYNCH; 1121 1122 if (iq->handler) { 1123 KASSERT(intr_idx < NINTRQ(sc), 1124 ("%s: invalid indirect intr_idx %d", __func__, intr_idx)); 1125 v |= F_FW_IQ_CMD_IQANDST; 1126 v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.intrq[intr_idx].abs_id); 1127 } else { 1128 KASSERT(intr_idx < sc->intr_count, 1129 ("%s: invalid direct intr_idx %d", __func__, intr_idx)); 1130 v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); 1131 } 1132 1133 c.type_to_iqandstindex = htobe32(v | 1134 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | 1135 V_FW_IQ_CMD_VIID(pi->viid) | 1136 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); 1137 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | 1138 F_FW_IQ_CMD_IQGTSMODE | 1139 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | 1140 V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4)); 1141 c.iqsize = htobe16(iq->qsize); 1142 c.iqaddr = htobe64(iq->ba); 1143 if (cong >= 0) 1144 c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN); 1145 1146 if (fl) { 1147 mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); 1148 1149 for (i = 0; i < FL_BUF_SIZES; i++) { 1150 1151 /* 1152 * A freelist buffer must be 16 byte aligned as the SGE 1153 * uses the low 4 bits of the bus addr to figure out the 1154 * buffer size. 1155 */ 1156 rc = bus_dma_tag_create(sc->dmat, 16, 0, 1157 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 1158 FL_BUF_SIZE(i), 1, FL_BUF_SIZE(i), BUS_DMA_ALLOCNOW, 1159 NULL, NULL, &fl->tag[i]); 1160 if (rc != 0) { 1161 device_printf(sc->dev, 1162 "failed to create fl DMA tag[%d]: %d\n", 1163 i, rc); 1164 return (rc); 1165 } 1166 } 1167 len = fl->qsize * RX_FL_ESIZE; 1168 rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, 1169 &fl->ba, (void **)&fl->desc); 1170 if (rc) 1171 return (rc); 1172 1173 /* Allocate space for one software descriptor per buffer. */ 1174 fl->cap = (fl->qsize - SPG_LEN / RX_FL_ESIZE) * 8; 1175 FL_LOCK(fl); 1176 set_fl_tag_idx(fl, pi->ifp->if_mtu); 1177 rc = alloc_fl_sdesc(fl); 1178 FL_UNLOCK(fl); 1179 if (rc != 0) { 1180 device_printf(sc->dev, 1181 "failed to setup fl software descriptors: %d\n", 1182 rc); 1183 return (rc); 1184 } 1185 fl->needed = fl->cap; 1186 1187 c.iqns_to_fl0congen = 1188 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | 1189 F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | 1190 F_FW_IQ_CMD_FL0PADEN); 1191 if (cong >= 0) { 1192 c.iqns_to_fl0congen |= 1193 htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) | 1194 F_FW_IQ_CMD_FL0CONGCIF | 1195 F_FW_IQ_CMD_FL0CONGEN); 1196 } 1197 c.fl0dcaen_to_fl0cidxfthresh = 1198 htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) | 1199 V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B)); 1200 c.fl0size = htobe16(fl->qsize); 1201 c.fl0addr = htobe64(fl->ba); 1202 } 1203 1204 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 1205 if (rc != 0) { 1206 device_printf(sc->dev, 1207 "failed to create ingress queue: %d\n", rc); 1208 return (rc); 1209 } 1210 1211 iq->cdesc = iq->desc; 1212 iq->cidx = 0; 1213 iq->gen = 1; 1214 iq->intr_next = iq->intr_params; 1215 iq->cntxt_id = be16toh(c.iqid); 1216 iq->abs_id = be16toh(c.physiqid); 1217 iq->flags |= (IQ_ALLOCATED | IQ_STARTED); 1218 1219 cntxt_id = iq->cntxt_id - sc->sge.iq_start; 1220 KASSERT(cntxt_id < sc->sge.niq, 1221 ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, 1222 cntxt_id, sc->sge.niq - 1)); 1223 sc->sge.iqmap[cntxt_id] = iq; 1224 1225 if (fl) { 1226 fl->cntxt_id = be16toh(c.fl0id); 1227 fl->pidx = fl->cidx = 0; 1228 1229 cntxt_id = fl->cntxt_id - sc->sge.eq_start; 1230 KASSERT(cntxt_id < sc->sge.neq, 1231 ("%s: fl->cntxt_id (%d) more than the max (%d)", __func__, 1232 cntxt_id, sc->sge.neq - 1)); 1233 sc->sge.eqmap[cntxt_id] = (void *)fl; 1234 1235 FL_LOCK(fl); 1236 refill_fl(sc, fl, -1, 8); 1237 FL_UNLOCK(fl); 1238 } 1239 1240 /* Enable IQ interrupts */ 1241 atomic_store_rel_32(&iq->state, IQS_IDLE); 1242 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) | 1243 V_INGRESSQID(iq->cntxt_id)); 1244 1245 return (0); 1246 } 1247 1248 /* 1249 * This can be called with the iq/fl in any state - fully allocated and 1250 * functional, partially allocated, even all-zeroed out. 1251 */ 1252 static int 1253 free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl) 1254 { 1255 int i, rc; 1256 struct adapter *sc = iq->adapter; 1257 device_t dev; 1258 1259 if (sc == NULL) 1260 return (0); /* nothing to do */ 1261 1262 dev = pi ? pi->dev : sc->dev; 1263 1264 if (iq->flags & IQ_STARTED) { 1265 rc = -t4_iq_start_stop(sc, sc->mbox, 0, sc->pf, 0, 1266 iq->cntxt_id, fl ? fl->cntxt_id : 0xffff, 0xffff); 1267 if (rc != 0) { 1268 device_printf(dev, 1269 "failed to stop queue %p: %d\n", iq, rc); 1270 return (rc); 1271 } 1272 iq->flags &= ~IQ_STARTED; 1273 1274 /* Synchronize with the interrupt handler */ 1275 while (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_DISABLED)) 1276 pause("iqfree", hz / 1000); 1277 } 1278 1279 if (iq->flags & IQ_ALLOCATED) { 1280 1281 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, 1282 FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id, 1283 fl ? fl->cntxt_id : 0xffff, 0xffff); 1284 if (rc != 0) { 1285 device_printf(dev, 1286 "failed to free queue %p: %d\n", iq, rc); 1287 return (rc); 1288 } 1289 iq->flags &= ~IQ_ALLOCATED; 1290 } 1291 1292 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc); 1293 1294 bzero(iq, sizeof(*iq)); 1295 1296 if (fl) { 1297 free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, 1298 fl->desc); 1299 1300 if (fl->sdesc) { 1301 FL_LOCK(fl); 1302 free_fl_sdesc(fl); 1303 FL_UNLOCK(fl); 1304 } 1305 1306 if (mtx_initialized(&fl->fl_lock)) 1307 mtx_destroy(&fl->fl_lock); 1308 1309 for (i = 0; i < FL_BUF_SIZES; i++) { 1310 if (fl->tag[i]) 1311 bus_dma_tag_destroy(fl->tag[i]); 1312 } 1313 1314 bzero(fl, sizeof(*fl)); 1315 } 1316 1317 return (0); 1318 } 1319 1320 static int 1321 alloc_intrq(struct adapter *sc, int port_idx, int intrq_idx, int intr_idx) 1322 { 1323 int rc; 1324 struct sysctl_oid *oid; 1325 struct sysctl_oid_list *children; 1326 char name[16]; 1327 struct sge_iq *intrq = &sc->sge.intrq[intrq_idx]; 1328 1329 rc = alloc_iq_fl(sc->port[port_idx], intrq, NULL, intr_idx, -1); 1330 if (rc != 0) 1331 return (rc); 1332 1333 children = SYSCTL_CHILDREN(sc->oid_intrq); 1334 1335 snprintf(name, sizeof(name), "%d", intrq_idx); 1336 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, name, CTLFLAG_RD, 1337 NULL, "interrupt queue"); 1338 children = SYSCTL_CHILDREN(oid); 1339 1340 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx", 1341 CTLTYPE_INT | CTLFLAG_RD, &intrq->cidx, 0, sysctl_uint16, "I", 1342 "consumer index"); 1343 1344 return (rc); 1345 } 1346 1347 static int 1348 free_intrq(struct sge_iq *iq) 1349 { 1350 return free_iq_fl(NULL, iq, NULL); 1351 1352 } 1353 1354 static int 1355 alloc_fwq(struct adapter *sc, int intr_idx) 1356 { 1357 int rc; 1358 struct sysctl_oid_list *children; 1359 struct sge_iq *fwq = &sc->sge.fwq; 1360 1361 rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1); 1362 if (rc != 0) 1363 return (rc); 1364 1365 children = SYSCTL_CHILDREN(sc->oid_fwq); 1366 1367 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx", 1368 CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I", 1369 "consumer index"); 1370 1371 return (rc); 1372 } 1373 1374 static int 1375 free_fwq(struct sge_iq *iq) 1376 { 1377 return free_iq_fl(NULL, iq, NULL); 1378 } 1379 1380 static int 1381 alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx) 1382 { 1383 int rc; 1384 struct sysctl_oid *oid; 1385 struct sysctl_oid_list *children; 1386 char name[16]; 1387 1388 rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, 1 << pi->tx_chan); 1389 if (rc != 0) 1390 return (rc); 1391 1392 #ifdef INET 1393 rc = tcp_lro_init(&rxq->lro); 1394 if (rc != 0) 1395 return (rc); 1396 rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */ 1397 1398 if (pi->ifp->if_capenable & IFCAP_LRO) 1399 rxq->flags |= RXQ_LRO_ENABLED; 1400 #endif 1401 rxq->ifp = pi->ifp; 1402 1403 children = SYSCTL_CHILDREN(pi->oid_rxq); 1404 1405 snprintf(name, sizeof(name), "%d", idx); 1406 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 1407 NULL, "rx queue"); 1408 children = SYSCTL_CHILDREN(oid); 1409 1410 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id", 1411 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_uint16, "I", 1412 "absolute id of the queue"); 1413 #ifdef INET 1414 SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, 1415 &rxq->lro.lro_queued, 0, NULL); 1416 SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, 1417 &rxq->lro.lro_flushed, 0, NULL); 1418 #endif 1419 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, 1420 &rxq->rxcsum, "# of times hardware assisted with checksum"); 1421 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_extraction", 1422 CTLFLAG_RD, &rxq->vlan_extraction, 1423 "# of times hardware extracted 802.1Q tag"); 1424 1425 return (rc); 1426 } 1427 1428 static int 1429 free_rxq(struct port_info *pi, struct sge_rxq *rxq) 1430 { 1431 int rc; 1432 1433 #ifdef INET 1434 if (rxq->lro.ifp) { 1435 tcp_lro_free(&rxq->lro); 1436 rxq->lro.ifp = NULL; 1437 } 1438 #endif 1439 1440 rc = free_iq_fl(pi, &rxq->iq, &rxq->fl); 1441 if (rc == 0) 1442 bzero(rxq, sizeof(*rxq)); 1443 1444 return (rc); 1445 } 1446 1447 static int 1448 alloc_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq, int idx) 1449 { 1450 int rc, cntxt_id; 1451 size_t len; 1452 struct fw_eq_ctrl_cmd c; 1453 struct sge_eq *eq = &ctrlq->eq; 1454 char name[16]; 1455 struct sysctl_oid *oid; 1456 struct sysctl_oid_list *children; 1457 1458 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); 1459 1460 len = eq->qsize * CTRL_EQ_ESIZE; 1461 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, 1462 &eq->ba, (void **)&eq->desc); 1463 if (rc) 1464 return (rc); 1465 1466 eq->cap = eq->qsize - SPG_LEN / CTRL_EQ_ESIZE; 1467 eq->spg = (void *)&eq->desc[eq->cap]; 1468 eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */ 1469 if (sc->flags & INTR_SHARED) 1470 eq->iqid = sc->sge.intrq[idx % NINTRQ(sc)].cntxt_id; 1471 else 1472 eq->iqid = sc->sge.intrq[sc->port[idx]->first_rxq].cntxt_id; 1473 1474 bzero(&c, sizeof(c)); 1475 1476 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | 1477 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) | 1478 V_FW_EQ_CTRL_CMD_VFN(0)); 1479 c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC | 1480 F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); 1481 c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); /* XXX */ 1482 c.physeqid_pkd = htobe32(0); 1483 c.fetchszm_to_iqid = 1484 htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 1485 V_FW_EQ_CTRL_CMD_PCIECHN(sc->port[idx]->tx_chan) | 1486 F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); 1487 c.dcaen_to_eqsize = 1488 htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 1489 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 1490 V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | 1491 V_FW_EQ_CTRL_CMD_EQSIZE(eq->qsize)); 1492 c.eqaddr = htobe64(eq->ba); 1493 1494 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 1495 if (rc != 0) { 1496 device_printf(sc->dev, 1497 "failed to create control queue %d: %d\n", idx, rc); 1498 return (rc); 1499 } 1500 1501 eq->pidx = eq->cidx = 0; 1502 eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid)); 1503 eq->flags |= (EQ_ALLOCATED | EQ_STARTED); 1504 1505 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 1506 KASSERT(cntxt_id < sc->sge.neq, 1507 ("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 1508 cntxt_id, sc->sge.neq - 1)); 1509 sc->sge.eqmap[cntxt_id] = eq; 1510 1511 children = SYSCTL_CHILDREN(sc->oid_ctrlq); 1512 1513 snprintf(name, sizeof(name), "%d", idx); 1514 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, name, CTLFLAG_RD, 1515 NULL, "ctrl queue"); 1516 children = SYSCTL_CHILDREN(oid); 1517 1518 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "pidx", 1519 CTLTYPE_INT | CTLFLAG_RD, &ctrlq->eq.pidx, 0, sysctl_uint16, "I", 1520 "producer index"); 1521 SYSCTL_ADD_UINT(&sc->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD, 1522 &ctrlq->no_desc, 0, 1523 "# of times ctrlq ran out of hardware descriptors"); 1524 1525 return (rc); 1526 } 1527 1528 static int 1529 free_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq) 1530 { 1531 int rc; 1532 struct sge_eq *eq = &ctrlq->eq; 1533 1534 if (eq->flags & (EQ_ALLOCATED | EQ_STARTED)) { 1535 rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); 1536 if (rc != 0) { 1537 device_printf(sc->dev, 1538 "failed to free ctrl queue %p: %d\n", eq, rc); 1539 return (rc); 1540 } 1541 eq->flags &= ~(EQ_ALLOCATED | EQ_STARTED); 1542 } 1543 1544 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); 1545 1546 if (mtx_initialized(&eq->eq_lock)) 1547 mtx_destroy(&eq->eq_lock); 1548 1549 bzero(ctrlq, sizeof(*ctrlq)); 1550 return (0); 1551 } 1552 1553 static int 1554 alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx) 1555 { 1556 int rc, cntxt_id; 1557 size_t len; 1558 struct adapter *sc = pi->adapter; 1559 struct fw_eq_eth_cmd c; 1560 struct sge_eq *eq = &txq->eq; 1561 char name[16]; 1562 struct sysctl_oid *oid; 1563 struct sysctl_oid_list *children; 1564 struct sge_iq *intrq; 1565 1566 txq->ifp = pi->ifp; 1567 TASK_INIT(&txq->resume_tx, 0, cxgbe_txq_start, txq); 1568 1569 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); 1570 1571 len = eq->qsize * TX_EQ_ESIZE; 1572 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, 1573 &eq->ba, (void **)&eq->desc); 1574 if (rc) 1575 return (rc); 1576 1577 eq->cap = eq->qsize - SPG_LEN / TX_EQ_ESIZE; 1578 eq->spg = (void *)&eq->desc[eq->cap]; 1579 eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */ 1580 txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE, 1581 M_ZERO | M_WAITOK); 1582 txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock); 1583 1584 intrq = &sc->sge.intrq[0]; 1585 if (sc->flags & INTR_SHARED) 1586 eq->iqid = intrq[(pi->first_txq + idx) % NINTRQ(sc)].cntxt_id; 1587 else 1588 eq->iqid = intrq[pi->first_rxq + (idx % pi->nrxq)].cntxt_id; 1589 1590 rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR, 1591 BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS, 1592 BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &txq->tx_tag); 1593 if (rc != 0) { 1594 device_printf(sc->dev, 1595 "failed to create tx DMA tag: %d\n", rc); 1596 return (rc); 1597 } 1598 1599 rc = alloc_tx_maps(txq); 1600 if (rc != 0) { 1601 device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc); 1602 return (rc); 1603 } 1604 1605 bzero(&c, sizeof(c)); 1606 1607 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | 1608 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | 1609 V_FW_EQ_ETH_CMD_VFN(0)); 1610 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | 1611 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); 1612 c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid)); 1613 c.fetchszm_to_iqid = 1614 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 1615 V_FW_EQ_ETH_CMD_PCIECHN(pi->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | 1616 V_FW_EQ_ETH_CMD_IQID(eq->iqid)); 1617 c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 1618 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 1619 V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | 1620 V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize)); 1621 c.eqaddr = htobe64(eq->ba); 1622 1623 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 1624 if (rc != 0) { 1625 device_printf(pi->dev, 1626 "failed to create egress queue: %d\n", rc); 1627 return (rc); 1628 } 1629 1630 eq->pidx = eq->cidx = 0; 1631 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); 1632 eq->flags |= (EQ_ALLOCATED | EQ_STARTED); 1633 1634 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 1635 KASSERT(cntxt_id < sc->sge.neq, 1636 ("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 1637 cntxt_id, sc->sge.neq - 1)); 1638 sc->sge.eqmap[cntxt_id] = eq; 1639 1640 children = SYSCTL_CHILDREN(pi->oid_txq); 1641 1642 snprintf(name, sizeof(name), "%d", idx); 1643 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 1644 NULL, "tx queue"); 1645 children = SYSCTL_CHILDREN(oid); 1646 1647 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, 1648 &txq->txcsum, "# of times hardware assisted with checksum"); 1649 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_insertion", 1650 CTLFLAG_RD, &txq->vlan_insertion, 1651 "# of times hardware inserted 802.1Q tag"); 1652 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, 1653 &txq->tso_wrs, "# of IPv4 TSO work requests"); 1654 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, 1655 &txq->imm_wrs, "# of work requests with immediate data"); 1656 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, 1657 &txq->sgl_wrs, "# of work requests with direct SGL"); 1658 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, 1659 &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); 1660 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_wrs", CTLFLAG_RD, 1661 &txq->txpkts_wrs, "# of txpkts work requests (multiple pkts/WR)"); 1662 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD, 1663 &txq->txpkts_pkts, "# of frames tx'd using txpkts work requests"); 1664 1665 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD, 1666 &txq->no_dmamap, 0, "# of times txq ran out of DMA maps"); 1667 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD, 1668 &txq->no_desc, 0, "# of times txq ran out of hardware descriptors"); 1669 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD, 1670 &txq->egr_update, 0, "egress update notifications from the SGE"); 1671 1672 return (rc); 1673 } 1674 1675 static int 1676 free_txq(struct port_info *pi, struct sge_txq *txq) 1677 { 1678 int rc; 1679 struct adapter *sc = pi->adapter; 1680 struct sge_eq *eq = &txq->eq; 1681 1682 if (eq->flags & (EQ_ALLOCATED | EQ_STARTED)) { 1683 1684 /* 1685 * Wait for the response to a credit flush if there's one 1686 * pending. Clearing the flag tells handle_sge_egr_update or 1687 * cxgbe_txq_start (depending on how far the response has made 1688 * it) that they should ignore the response and wake up free_txq 1689 * instead. 1690 * 1691 * The interface has been marked down by the time we get here 1692 * (both IFF_UP and IFF_DRV_RUNNING cleared). qflush has 1693 * emptied the tx buf_rings and we know nothing new is being 1694 * queued for tx so we don't have to worry about a new credit 1695 * flush request. 1696 */ 1697 TXQ_LOCK(txq); 1698 if (eq->flags & EQ_CRFLUSHED) { 1699 eq->flags &= ~EQ_CRFLUSHED; 1700 msleep(txq, &eq->eq_lock, 0, "crflush", 0); 1701 } 1702 TXQ_UNLOCK(txq); 1703 1704 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); 1705 if (rc != 0) { 1706 device_printf(pi->dev, 1707 "failed to free egress queue %p: %d\n", eq, rc); 1708 return (rc); 1709 } 1710 eq->flags &= ~(EQ_ALLOCATED | EQ_STARTED); 1711 } 1712 1713 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); 1714 1715 free(txq->sdesc, M_CXGBE); 1716 1717 if (txq->maps) 1718 free_tx_maps(txq); 1719 1720 buf_ring_free(txq->br, M_CXGBE); 1721 1722 if (txq->tx_tag) 1723 bus_dma_tag_destroy(txq->tx_tag); 1724 1725 if (mtx_initialized(&eq->eq_lock)) 1726 mtx_destroy(&eq->eq_lock); 1727 1728 bzero(txq, sizeof(*txq)); 1729 return (0); 1730 } 1731 1732 static void 1733 oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) 1734 { 1735 bus_addr_t *ba = arg; 1736 1737 KASSERT(nseg == 1, 1738 ("%s meant for single segment mappings only.", __func__)); 1739 1740 *ba = error ? 0 : segs->ds_addr; 1741 } 1742 1743 static inline bool 1744 is_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl) 1745 { 1746 *ctrl = (void *)((uintptr_t)iq->cdesc + 1747 (iq->esize - sizeof(struct rsp_ctrl))); 1748 1749 return (((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen); 1750 } 1751 1752 static inline void 1753 iq_next(struct sge_iq *iq) 1754 { 1755 iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize); 1756 if (__predict_false(++iq->cidx == iq->qsize - 1)) { 1757 iq->cidx = 0; 1758 iq->gen ^= 1; 1759 iq->cdesc = iq->desc; 1760 } 1761 } 1762 1763 #define FL_HW_IDX(x) ((x) >> 3) 1764 static inline void 1765 ring_fl_db(struct adapter *sc, struct sge_fl *fl) 1766 { 1767 int ndesc = fl->pending / 8; 1768 1769 if (FL_HW_IDX(fl->pidx) == FL_HW_IDX(fl->cidx)) 1770 ndesc--; /* hold back one credit */ 1771 1772 if (ndesc <= 0) 1773 return; /* nothing to do */ 1774 1775 wmb(); 1776 1777 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), F_DBPRIO | 1778 V_QID(fl->cntxt_id) | V_PIDX(ndesc)); 1779 fl->pending -= ndesc * 8; 1780 } 1781 1782 /* 1783 * Fill up the freelist by upto nbufs and ring its doorbell if the number of 1784 * buffers ready to be handed to the hardware >= dbthresh. 1785 */ 1786 static void 1787 refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs, int dbthresh) 1788 { 1789 __be64 *d = &fl->desc[fl->pidx]; 1790 struct fl_sdesc *sd = &fl->sdesc[fl->pidx]; 1791 bus_dma_tag_t tag; 1792 bus_addr_t pa; 1793 caddr_t cl; 1794 int rc; 1795 1796 FL_LOCK_ASSERT_OWNED(fl); 1797 1798 if (nbufs < 0 || nbufs > fl->needed) 1799 nbufs = fl->needed; 1800 1801 while (nbufs--) { 1802 1803 if (sd->cl != NULL) { 1804 1805 /* 1806 * This happens when a frame small enough to fit 1807 * entirely in an mbuf was received in cl last time. 1808 * We'd held on to cl and can reuse it now. Note that 1809 * we reuse a cluster of the old size if fl->tag_idx is 1810 * no longer the same as sd->tag_idx. 1811 */ 1812 1813 KASSERT(*d == sd->ba_tag, 1814 ("%s: recyling problem at pidx %d", 1815 __func__, fl->pidx)); 1816 1817 d++; 1818 goto recycled; 1819 } 1820 1821 1822 if (fl->tag_idx != sd->tag_idx) { 1823 bus_dmamap_t map; 1824 bus_dma_tag_t newtag = fl->tag[fl->tag_idx]; 1825 bus_dma_tag_t oldtag = fl->tag[sd->tag_idx]; 1826 1827 /* 1828 * An MTU change can get us here. Discard the old map 1829 * which was created with the old tag, but only if 1830 * we're able to get a new one. 1831 */ 1832 rc = bus_dmamap_create(newtag, 0, &map); 1833 if (rc == 0) { 1834 bus_dmamap_destroy(oldtag, sd->map); 1835 sd->map = map; 1836 sd->tag_idx = fl->tag_idx; 1837 } 1838 } 1839 1840 tag = fl->tag[sd->tag_idx]; 1841 1842 cl = m_cljget(NULL, M_NOWAIT, FL_BUF_SIZE(sd->tag_idx)); 1843 if (cl == NULL) 1844 break; 1845 1846 rc = bus_dmamap_load(tag, sd->map, cl, FL_BUF_SIZE(sd->tag_idx), 1847 oneseg_dma_callback, &pa, 0); 1848 if (rc != 0 || pa == 0) { 1849 fl->dmamap_failed++; 1850 uma_zfree(FL_BUF_ZONE(sd->tag_idx), cl); 1851 break; 1852 } 1853 1854 sd->cl = cl; 1855 *d++ = htobe64(pa | sd->tag_idx); 1856 1857 #ifdef INVARIANTS 1858 sd->ba_tag = htobe64(pa | sd->tag_idx); 1859 #endif 1860 1861 recycled: 1862 /* sd->m is never recycled, should always be NULL */ 1863 KASSERT(sd->m == NULL, ("%s: stray mbuf", __func__)); 1864 1865 sd->m = m_gethdr(M_NOWAIT, MT_NOINIT); 1866 if (sd->m == NULL) 1867 break; 1868 1869 fl->pending++; 1870 fl->needed--; 1871 sd++; 1872 if (++fl->pidx == fl->cap) { 1873 fl->pidx = 0; 1874 sd = fl->sdesc; 1875 d = fl->desc; 1876 } 1877 } 1878 1879 if (fl->pending >= dbthresh) 1880 ring_fl_db(sc, fl); 1881 } 1882 1883 static int 1884 alloc_fl_sdesc(struct sge_fl *fl) 1885 { 1886 struct fl_sdesc *sd; 1887 bus_dma_tag_t tag; 1888 int i, rc; 1889 1890 FL_LOCK_ASSERT_OWNED(fl); 1891 1892 fl->sdesc = malloc(fl->cap * sizeof(struct fl_sdesc), M_CXGBE, 1893 M_ZERO | M_WAITOK); 1894 1895 tag = fl->tag[fl->tag_idx]; 1896 sd = fl->sdesc; 1897 for (i = 0; i < fl->cap; i++, sd++) { 1898 1899 sd->tag_idx = fl->tag_idx; 1900 rc = bus_dmamap_create(tag, 0, &sd->map); 1901 if (rc != 0) 1902 goto failed; 1903 } 1904 1905 return (0); 1906 failed: 1907 while (--i >= 0) { 1908 sd--; 1909 bus_dmamap_destroy(tag, sd->map); 1910 if (sd->m) { 1911 m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0); 1912 m_free(sd->m); 1913 sd->m = NULL; 1914 } 1915 } 1916 KASSERT(sd == fl->sdesc, ("%s: EDOOFUS", __func__)); 1917 1918 free(fl->sdesc, M_CXGBE); 1919 fl->sdesc = NULL; 1920 1921 return (rc); 1922 } 1923 1924 static void 1925 free_fl_sdesc(struct sge_fl *fl) 1926 { 1927 struct fl_sdesc *sd; 1928 int i; 1929 1930 FL_LOCK_ASSERT_OWNED(fl); 1931 1932 sd = fl->sdesc; 1933 for (i = 0; i < fl->cap; i++, sd++) { 1934 1935 if (sd->m) { 1936 m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0); 1937 m_free(sd->m); 1938 sd->m = NULL; 1939 } 1940 1941 if (sd->cl) { 1942 bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); 1943 uma_zfree(FL_BUF_ZONE(sd->tag_idx), sd->cl); 1944 sd->cl = NULL; 1945 } 1946 1947 bus_dmamap_destroy(fl->tag[sd->tag_idx], sd->map); 1948 } 1949 1950 free(fl->sdesc, M_CXGBE); 1951 fl->sdesc = NULL; 1952 } 1953 1954 static int 1955 alloc_tx_maps(struct sge_txq *txq) 1956 { 1957 struct tx_map *txm; 1958 int i, rc, count; 1959 1960 /* 1961 * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE 1962 * limit for any WR). txq->no_dmamap events shouldn't occur if maps is 1963 * sized for the worst case. 1964 */ 1965 count = txq->eq.qsize * 10 / 8; 1966 txq->map_total = txq->map_avail = count; 1967 txq->map_cidx = txq->map_pidx = 0; 1968 1969 txq->maps = malloc(count * sizeof(struct tx_map), M_CXGBE, 1970 M_ZERO | M_WAITOK); 1971 1972 txm = txq->maps; 1973 for (i = 0; i < count; i++, txm++) { 1974 rc = bus_dmamap_create(txq->tx_tag, 0, &txm->map); 1975 if (rc != 0) 1976 goto failed; 1977 } 1978 1979 return (0); 1980 failed: 1981 while (--i >= 0) { 1982 txm--; 1983 bus_dmamap_destroy(txq->tx_tag, txm->map); 1984 } 1985 KASSERT(txm == txq->maps, ("%s: EDOOFUS", __func__)); 1986 1987 free(txq->maps, M_CXGBE); 1988 txq->maps = NULL; 1989 1990 return (rc); 1991 } 1992 1993 static void 1994 free_tx_maps(struct sge_txq *txq) 1995 { 1996 struct tx_map *txm; 1997 int i; 1998 1999 txm = txq->maps; 2000 for (i = 0; i < txq->map_total; i++, txm++) { 2001 2002 if (txm->m) { 2003 bus_dmamap_unload(txq->tx_tag, txm->map); 2004 m_freem(txm->m); 2005 txm->m = NULL; 2006 } 2007 2008 bus_dmamap_destroy(txq->tx_tag, txm->map); 2009 } 2010 2011 free(txq->maps, M_CXGBE); 2012 txq->maps = NULL; 2013 } 2014 2015 /* 2016 * We'll do immediate data tx for non-TSO, but only when not coalescing. We're 2017 * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes 2018 * of immediate data. 2019 */ 2020 #define IMM_LEN ( \ 2021 2 * TX_EQ_ESIZE \ 2022 - sizeof(struct fw_eth_tx_pkt_wr) \ 2023 - sizeof(struct cpl_tx_pkt_core)) 2024 2025 /* 2026 * Returns non-zero on failure, no need to cleanup anything in that case. 2027 * 2028 * Note 1: We always try to defrag the mbuf if required and return EFBIG only 2029 * if the resulting chain still won't fit in a tx descriptor. 2030 * 2031 * Note 2: We'll pullup the mbuf chain if TSO is requested and the first mbuf 2032 * does not have the TCP header in it. 2033 */ 2034 static int 2035 get_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl, 2036 int sgl_only) 2037 { 2038 struct mbuf *m = *fp; 2039 struct tx_map *txm; 2040 int rc, defragged = 0, n; 2041 2042 TXQ_LOCK_ASSERT_OWNED(txq); 2043 2044 if (m->m_pkthdr.tso_segsz) 2045 sgl_only = 1; /* Do not allow immediate data with LSO */ 2046 2047 start: sgl->nsegs = 0; 2048 2049 if (m->m_pkthdr.len <= IMM_LEN && !sgl_only) 2050 return (0); /* nsegs = 0 tells caller to use imm. tx */ 2051 2052 if (txq->map_avail == 0) { 2053 txq->no_dmamap++; 2054 return (ENOMEM); 2055 } 2056 txm = &txq->maps[txq->map_pidx]; 2057 2058 if (m->m_pkthdr.tso_segsz && m->m_len < 50) { 2059 *fp = m_pullup(m, 50); 2060 m = *fp; 2061 if (m == NULL) 2062 return (ENOBUFS); 2063 } 2064 2065 rc = bus_dmamap_load_mbuf_sg(txq->tx_tag, txm->map, m, sgl->seg, 2066 &sgl->nsegs, BUS_DMA_NOWAIT); 2067 if (rc == EFBIG && defragged == 0) { 2068 m = m_defrag(m, M_DONTWAIT); 2069 if (m == NULL) 2070 return (EFBIG); 2071 2072 defragged = 1; 2073 *fp = m; 2074 goto start; 2075 } 2076 if (rc != 0) 2077 return (rc); 2078 2079 txm->m = m; 2080 txq->map_avail--; 2081 if (++txq->map_pidx == txq->map_total) 2082 txq->map_pidx = 0; 2083 2084 KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS, 2085 ("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs)); 2086 2087 /* 2088 * Store the # of flits required to hold this frame's SGL in nflits. An 2089 * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by 2090 * multiple (len0 + len1, addr0, addr1) tuples. If addr1 is not used 2091 * then len1 must be set to 0. 2092 */ 2093 n = sgl->nsegs - 1; 2094 sgl->nflits = (3 * n) / 2 + (n & 1) + 2; 2095 2096 return (0); 2097 } 2098 2099 2100 /* 2101 * Releases all the txq resources used up in the specified sgl. 2102 */ 2103 static int 2104 free_pkt_sgl(struct sge_txq *txq, struct sgl *sgl) 2105 { 2106 struct tx_map *txm; 2107 2108 TXQ_LOCK_ASSERT_OWNED(txq); 2109 2110 if (sgl->nsegs == 0) 2111 return (0); /* didn't use any map */ 2112 2113 /* 1 pkt uses exactly 1 map, back it out */ 2114 2115 txq->map_avail++; 2116 if (txq->map_pidx > 0) 2117 txq->map_pidx--; 2118 else 2119 txq->map_pidx = txq->map_total - 1; 2120 2121 txm = &txq->maps[txq->map_pidx]; 2122 bus_dmamap_unload(txq->tx_tag, txm->map); 2123 txm->m = NULL; 2124 2125 return (0); 2126 } 2127 2128 static int 2129 write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m, 2130 struct sgl *sgl) 2131 { 2132 struct sge_eq *eq = &txq->eq; 2133 struct fw_eth_tx_pkt_wr *wr; 2134 struct cpl_tx_pkt_core *cpl; 2135 uint32_t ctrl; /* used in many unrelated places */ 2136 uint64_t ctrl1; 2137 int nflits, ndesc, pktlen; 2138 struct tx_sdesc *txsd; 2139 caddr_t dst; 2140 2141 TXQ_LOCK_ASSERT_OWNED(txq); 2142 2143 pktlen = m->m_pkthdr.len; 2144 2145 /* 2146 * Do we have enough flits to send this frame out? 2147 */ 2148 ctrl = sizeof(struct cpl_tx_pkt_core); 2149 if (m->m_pkthdr.tso_segsz) { 2150 nflits = TXPKT_LSO_WR_HDR; 2151 ctrl += sizeof(struct cpl_tx_pkt_lso); 2152 } else 2153 nflits = TXPKT_WR_HDR; 2154 if (sgl->nsegs > 0) 2155 nflits += sgl->nflits; 2156 else { 2157 nflits += howmany(pktlen, 8); 2158 ctrl += pktlen; 2159 } 2160 ndesc = howmany(nflits, 8); 2161 if (ndesc > eq->avail) 2162 return (ENOMEM); 2163 2164 /* Firmware work request header */ 2165 wr = (void *)&eq->desc[eq->pidx]; 2166 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | 2167 V_FW_WR_IMMDLEN(ctrl)); 2168 ctrl = V_FW_WR_LEN16(howmany(nflits, 2)); 2169 if (eq->avail == ndesc && !(eq->flags & EQ_CRFLUSHED)) { 2170 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; 2171 eq->flags |= EQ_CRFLUSHED; 2172 } 2173 2174 wr->equiq_to_len16 = htobe32(ctrl); 2175 wr->r3 = 0; 2176 2177 if (m->m_pkthdr.tso_segsz) { 2178 struct cpl_tx_pkt_lso *lso = (void *)(wr + 1); 2179 struct ether_header *eh; 2180 struct ip *ip; 2181 struct tcphdr *tcp; 2182 2183 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | 2184 F_LSO_LAST_SLICE; 2185 2186 eh = mtod(m, struct ether_header *); 2187 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2188 ctrl |= V_LSO_ETHHDR_LEN(1); 2189 ip = (void *)((struct ether_vlan_header *)eh + 1); 2190 } else 2191 ip = (void *)(eh + 1); 2192 2193 tcp = (void *)((uintptr_t)ip + ip->ip_hl * 4); 2194 ctrl |= V_LSO_IPHDR_LEN(ip->ip_hl) | 2195 V_LSO_TCPHDR_LEN(tcp->th_off); 2196 2197 lso->lso_ctrl = htobe32(ctrl); 2198 lso->ipid_ofst = htobe16(0); 2199 lso->mss = htobe16(m->m_pkthdr.tso_segsz); 2200 lso->seqno_offset = htobe32(0); 2201 lso->len = htobe32(pktlen); 2202 2203 cpl = (void *)(lso + 1); 2204 2205 txq->tso_wrs++; 2206 } else 2207 cpl = (void *)(wr + 1); 2208 2209 /* Checksum offload */ 2210 ctrl1 = 0; 2211 if (!(m->m_pkthdr.csum_flags & CSUM_IP)) 2212 ctrl1 |= F_TXPKT_IPCSUM_DIS; 2213 if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))) 2214 ctrl1 |= F_TXPKT_L4CSUM_DIS; 2215 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP)) 2216 txq->txcsum++; /* some hardware assistance provided */ 2217 2218 /* VLAN tag insertion */ 2219 if (m->m_flags & M_VLANTAG) { 2220 ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 2221 txq->vlan_insertion++; 2222 } 2223 2224 /* CPL header */ 2225 cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 2226 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf)); 2227 cpl->pack = 0; 2228 cpl->len = htobe16(pktlen); 2229 cpl->ctrl1 = htobe64(ctrl1); 2230 2231 /* Software descriptor */ 2232 txsd = &txq->sdesc[eq->pidx]; 2233 txsd->desc_used = ndesc; 2234 2235 eq->pending += ndesc; 2236 eq->avail -= ndesc; 2237 eq->pidx += ndesc; 2238 if (eq->pidx >= eq->cap) 2239 eq->pidx -= eq->cap; 2240 2241 /* SGL */ 2242 dst = (void *)(cpl + 1); 2243 if (sgl->nsegs > 0) { 2244 txsd->credits = 1; 2245 txq->sgl_wrs++; 2246 write_sgl_to_txd(eq, sgl, &dst); 2247 } else { 2248 txsd->credits = 0; 2249 txq->imm_wrs++; 2250 for (; m; m = m->m_next) { 2251 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 2252 #ifdef INVARIANTS 2253 pktlen -= m->m_len; 2254 #endif 2255 } 2256 #ifdef INVARIANTS 2257 KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen)); 2258 #endif 2259 2260 } 2261 2262 txq->txpkt_wrs++; 2263 return (0); 2264 } 2265 2266 /* 2267 * Returns 0 to indicate that m has been accepted into a coalesced tx work 2268 * request. It has either been folded into txpkts or txpkts was flushed and m 2269 * has started a new coalesced work request (as the first frame in a fresh 2270 * txpkts). 2271 * 2272 * Returns non-zero to indicate a failure - caller is responsible for 2273 * transmitting m, if there was anything in txpkts it has been flushed. 2274 */ 2275 static int 2276 add_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts, 2277 struct mbuf *m, struct sgl *sgl) 2278 { 2279 struct sge_eq *eq = &txq->eq; 2280 int can_coalesce; 2281 struct tx_sdesc *txsd; 2282 int flits; 2283 2284 TXQ_LOCK_ASSERT_OWNED(txq); 2285 2286 if (txpkts->npkt > 0) { 2287 flits = TXPKTS_PKT_HDR + sgl->nflits; 2288 can_coalesce = m->m_pkthdr.tso_segsz == 0 && 2289 txpkts->nflits + flits <= TX_WR_FLITS && 2290 txpkts->nflits + flits <= eq->avail * 8 && 2291 txpkts->plen + m->m_pkthdr.len < 65536; 2292 2293 if (can_coalesce) { 2294 txpkts->npkt++; 2295 txpkts->nflits += flits; 2296 txpkts->plen += m->m_pkthdr.len; 2297 2298 txsd = &txq->sdesc[eq->pidx]; 2299 txsd->credits++; 2300 2301 return (0); 2302 } 2303 2304 /* 2305 * Couldn't coalesce m into txpkts. The first order of business 2306 * is to send txpkts on its way. Then we'll revisit m. 2307 */ 2308 write_txpkts_wr(txq, txpkts); 2309 } 2310 2311 /* 2312 * Check if we can start a new coalesced tx work request with m as 2313 * the first packet in it. 2314 */ 2315 2316 KASSERT(txpkts->npkt == 0, ("%s: txpkts not empty", __func__)); 2317 2318 flits = TXPKTS_WR_HDR + sgl->nflits; 2319 can_coalesce = m->m_pkthdr.tso_segsz == 0 && 2320 flits <= eq->avail * 8 && flits <= TX_WR_FLITS; 2321 2322 if (can_coalesce == 0) 2323 return (EINVAL); 2324 2325 /* 2326 * Start a fresh coalesced tx WR with m as the first frame in it. 2327 */ 2328 txpkts->npkt = 1; 2329 txpkts->nflits = flits; 2330 txpkts->flitp = &eq->desc[eq->pidx].flit[2]; 2331 txpkts->plen = m->m_pkthdr.len; 2332 2333 txsd = &txq->sdesc[eq->pidx]; 2334 txsd->credits = 1; 2335 2336 return (0); 2337 } 2338 2339 /* 2340 * Note that write_txpkts_wr can never run out of hardware descriptors (but 2341 * write_txpkt_wr can). add_to_txpkts ensures that a frame is accepted for 2342 * coalescing only if sufficient hardware descriptors are available. 2343 */ 2344 static void 2345 write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts) 2346 { 2347 struct sge_eq *eq = &txq->eq; 2348 struct fw_eth_tx_pkts_wr *wr; 2349 struct tx_sdesc *txsd; 2350 uint32_t ctrl; 2351 int ndesc; 2352 2353 TXQ_LOCK_ASSERT_OWNED(txq); 2354 2355 ndesc = howmany(txpkts->nflits, 8); 2356 2357 wr = (void *)&eq->desc[eq->pidx]; 2358 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR) | 2359 V_FW_WR_IMMDLEN(0)); /* immdlen does not matter in this WR */ 2360 ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2)); 2361 if (eq->avail == ndesc && !(eq->flags & EQ_CRFLUSHED)) { 2362 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; 2363 eq->flags |= EQ_CRFLUSHED; 2364 } 2365 wr->equiq_to_len16 = htobe32(ctrl); 2366 wr->plen = htobe16(txpkts->plen); 2367 wr->npkt = txpkts->npkt; 2368 wr->r3 = wr->type = 0; 2369 2370 /* Everything else already written */ 2371 2372 txsd = &txq->sdesc[eq->pidx]; 2373 txsd->desc_used = ndesc; 2374 2375 KASSERT(eq->avail >= ndesc, ("%s: out of descriptors", __func__)); 2376 2377 eq->pending += ndesc; 2378 eq->avail -= ndesc; 2379 eq->pidx += ndesc; 2380 if (eq->pidx >= eq->cap) 2381 eq->pidx -= eq->cap; 2382 2383 txq->txpkts_pkts += txpkts->npkt; 2384 txq->txpkts_wrs++; 2385 txpkts->npkt = 0; /* emptied */ 2386 } 2387 2388 static inline void 2389 write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq, 2390 struct txpkts *txpkts, struct mbuf *m, struct sgl *sgl) 2391 { 2392 struct ulp_txpkt *ulpmc; 2393 struct ulptx_idata *ulpsc; 2394 struct cpl_tx_pkt_core *cpl; 2395 struct sge_eq *eq = &txq->eq; 2396 uintptr_t flitp, start, end; 2397 uint64_t ctrl; 2398 caddr_t dst; 2399 2400 KASSERT(txpkts->npkt > 0, ("%s: txpkts is empty", __func__)); 2401 2402 start = (uintptr_t)eq->desc; 2403 end = (uintptr_t)eq->spg; 2404 2405 /* Checksum offload */ 2406 ctrl = 0; 2407 if (!(m->m_pkthdr.csum_flags & CSUM_IP)) 2408 ctrl |= F_TXPKT_IPCSUM_DIS; 2409 if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))) 2410 ctrl |= F_TXPKT_L4CSUM_DIS; 2411 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP)) 2412 txq->txcsum++; /* some hardware assistance provided */ 2413 2414 /* VLAN tag insertion */ 2415 if (m->m_flags & M_VLANTAG) { 2416 ctrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 2417 txq->vlan_insertion++; 2418 } 2419 2420 /* 2421 * The previous packet's SGL must have ended at a 16 byte boundary (this 2422 * is required by the firmware/hardware). It follows that flitp cannot 2423 * wrap around between the ULPTX master command and ULPTX subcommand (8 2424 * bytes each), and that it can not wrap around in the middle of the 2425 * cpl_tx_pkt_core either. 2426 */ 2427 flitp = (uintptr_t)txpkts->flitp; 2428 KASSERT((flitp & 0xf) == 0, 2429 ("%s: last SGL did not end at 16 byte boundary: %p", 2430 __func__, txpkts->flitp)); 2431 2432 /* ULP master command */ 2433 ulpmc = (void *)flitp; 2434 ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) | 2435 V_ULP_TXPKT_FID(eq->iqid)); 2436 ulpmc->len = htonl(howmany(sizeof(*ulpmc) + sizeof(*ulpsc) + 2437 sizeof(*cpl) + 8 * sgl->nflits, 16)); 2438 2439 /* ULP subcommand */ 2440 ulpsc = (void *)(ulpmc + 1); 2441 ulpsc->cmd_more = htobe32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) | 2442 F_ULP_TX_SC_MORE); 2443 ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core)); 2444 2445 flitp += sizeof(*ulpmc) + sizeof(*ulpsc); 2446 if (flitp == end) 2447 flitp = start; 2448 2449 /* CPL_TX_PKT */ 2450 cpl = (void *)flitp; 2451 cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 2452 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf)); 2453 cpl->pack = 0; 2454 cpl->len = htobe16(m->m_pkthdr.len); 2455 cpl->ctrl1 = htobe64(ctrl); 2456 2457 flitp += sizeof(*cpl); 2458 if (flitp == end) 2459 flitp = start; 2460 2461 /* SGL for this frame */ 2462 dst = (caddr_t)flitp; 2463 txpkts->nflits += write_sgl_to_txd(eq, sgl, &dst); 2464 txpkts->flitp = (void *)dst; 2465 2466 KASSERT(((uintptr_t)dst & 0xf) == 0, 2467 ("%s: SGL ends at %p (not a 16 byte boundary)", __func__, dst)); 2468 } 2469 2470 /* 2471 * If the SGL ends on an address that is not 16 byte aligned, this function will 2472 * add a 0 filled flit at the end. It returns 1 in that case. 2473 */ 2474 static int 2475 write_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to) 2476 { 2477 __be64 *flitp, *end; 2478 struct ulptx_sgl *usgl; 2479 bus_dma_segment_t *seg; 2480 int i, padded; 2481 2482 KASSERT(sgl->nsegs > 0 && sgl->nflits > 0, 2483 ("%s: bad SGL - nsegs=%d, nflits=%d", 2484 __func__, sgl->nsegs, sgl->nflits)); 2485 2486 KASSERT(((uintptr_t)(*to) & 0xf) == 0, 2487 ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to)); 2488 2489 flitp = (__be64 *)(*to); 2490 end = flitp + sgl->nflits; 2491 seg = &sgl->seg[0]; 2492 usgl = (void *)flitp; 2493 2494 /* 2495 * We start at a 16 byte boundary somewhere inside the tx descriptor 2496 * ring, so we're at least 16 bytes away from the status page. There is 2497 * no chance of a wrap around in the middle of usgl (which is 16 bytes). 2498 */ 2499 2500 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 2501 V_ULPTX_NSGE(sgl->nsegs)); 2502 usgl->len0 = htobe32(seg->ds_len); 2503 usgl->addr0 = htobe64(seg->ds_addr); 2504 seg++; 2505 2506 if ((uintptr_t)end <= (uintptr_t)eq->spg) { 2507 2508 /* Won't wrap around at all */ 2509 2510 for (i = 0; i < sgl->nsegs - 1; i++, seg++) { 2511 usgl->sge[i / 2].len[i & 1] = htobe32(seg->ds_len); 2512 usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ds_addr); 2513 } 2514 if (i & 1) 2515 usgl->sge[i / 2].len[1] = htobe32(0); 2516 } else { 2517 2518 /* Will wrap somewhere in the rest of the SGL */ 2519 2520 /* 2 flits already written, write the rest flit by flit */ 2521 flitp = (void *)(usgl + 1); 2522 for (i = 0; i < sgl->nflits - 2; i++) { 2523 if ((uintptr_t)flitp == (uintptr_t)eq->spg) 2524 flitp = (void *)eq->desc; 2525 *flitp++ = get_flit(seg, sgl->nsegs - 1, i); 2526 } 2527 end = flitp; 2528 } 2529 2530 if ((uintptr_t)end & 0xf) { 2531 *(uint64_t *)end = 0; 2532 end++; 2533 padded = 1; 2534 } else 2535 padded = 0; 2536 2537 if ((uintptr_t)end == (uintptr_t)eq->spg) 2538 *to = (void *)eq->desc; 2539 else 2540 *to = (void *)end; 2541 2542 return (padded); 2543 } 2544 2545 static inline void 2546 copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) 2547 { 2548 if ((uintptr_t)(*to) + len <= (uintptr_t)eq->spg) { 2549 bcopy(from, *to, len); 2550 (*to) += len; 2551 } else { 2552 int portion = (uintptr_t)eq->spg - (uintptr_t)(*to); 2553 2554 bcopy(from, *to, portion); 2555 from += portion; 2556 portion = len - portion; /* remaining */ 2557 bcopy(from, (void *)eq->desc, portion); 2558 (*to) = (caddr_t)eq->desc + portion; 2559 } 2560 } 2561 2562 static inline void 2563 ring_eq_db(struct adapter *sc, struct sge_eq *eq) 2564 { 2565 wmb(); 2566 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), 2567 V_QID(eq->cntxt_id) | V_PIDX(eq->pending)); 2568 eq->pending = 0; 2569 } 2570 2571 static inline int 2572 reclaimable(struct sge_eq *eq) 2573 { 2574 unsigned int cidx; 2575 2576 cidx = eq->spg->cidx; /* stable snapshot */ 2577 cidx = be16_to_cpu(cidx); 2578 2579 if (cidx >= eq->cidx) 2580 return (cidx - eq->cidx); 2581 else 2582 return (cidx + eq->cap - eq->cidx); 2583 } 2584 2585 /* 2586 * There are "can_reclaim" tx descriptors ready to be reclaimed. Reclaim as 2587 * many as possible but stop when there are around "n" mbufs to free. 2588 * 2589 * The actual number reclaimed is provided as the return value. 2590 */ 2591 static int 2592 reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n) 2593 { 2594 struct tx_sdesc *txsd; 2595 struct tx_map *txm; 2596 unsigned int reclaimed, maps; 2597 struct sge_eq *eq = &txq->eq; 2598 2599 EQ_LOCK_ASSERT_OWNED(eq); 2600 2601 if (can_reclaim == 0) 2602 can_reclaim = reclaimable(eq); 2603 2604 maps = reclaimed = 0; 2605 while (can_reclaim && maps < n) { 2606 int ndesc; 2607 2608 txsd = &txq->sdesc[eq->cidx]; 2609 ndesc = txsd->desc_used; 2610 2611 /* Firmware doesn't return "partial" credits. */ 2612 KASSERT(can_reclaim >= ndesc, 2613 ("%s: unexpected number of credits: %d, %d", 2614 __func__, can_reclaim, ndesc)); 2615 2616 maps += txsd->credits; 2617 2618 reclaimed += ndesc; 2619 can_reclaim -= ndesc; 2620 2621 eq->cidx += ndesc; 2622 if (__predict_false(eq->cidx >= eq->cap)) 2623 eq->cidx -= eq->cap; 2624 } 2625 2626 txm = &txq->maps[txq->map_cidx]; 2627 if (maps) 2628 prefetch(txm->m); 2629 2630 eq->avail += reclaimed; 2631 KASSERT(eq->avail < eq->cap, /* avail tops out at (cap - 1) */ 2632 ("%s: too many descriptors available", __func__)); 2633 2634 txq->map_avail += maps; 2635 KASSERT(txq->map_avail <= txq->map_total, 2636 ("%s: too many maps available", __func__)); 2637 2638 while (maps--) { 2639 struct tx_map *next; 2640 2641 next = txm + 1; 2642 if (__predict_false(txq->map_cidx + 1 == txq->map_total)) 2643 next = txq->maps; 2644 prefetch(next->m); 2645 2646 bus_dmamap_unload(txq->tx_tag, txm->map); 2647 m_freem(txm->m); 2648 txm->m = NULL; 2649 2650 txm = next; 2651 if (__predict_false(++txq->map_cidx == txq->map_total)) 2652 txq->map_cidx = 0; 2653 } 2654 2655 return (reclaimed); 2656 } 2657 2658 static void 2659 write_eqflush_wr(struct sge_eq *eq) 2660 { 2661 struct fw_eq_flush_wr *wr; 2662 2663 EQ_LOCK_ASSERT_OWNED(eq); 2664 KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__)); 2665 2666 wr = (void *)&eq->desc[eq->pidx]; 2667 bzero(wr, sizeof(*wr)); 2668 wr->opcode = FW_EQ_FLUSH_WR; 2669 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) | 2670 F_FW_WR_EQUEQ | F_FW_WR_EQUIQ); 2671 2672 eq->flags |= EQ_CRFLUSHED; 2673 eq->pending++; 2674 eq->avail--; 2675 if (++eq->pidx == eq->cap) 2676 eq->pidx = 0; 2677 } 2678 2679 static __be64 2680 get_flit(bus_dma_segment_t *sgl, int nsegs, int idx) 2681 { 2682 int i = (idx / 3) * 2; 2683 2684 switch (idx % 3) { 2685 case 0: { 2686 __be64 rc; 2687 2688 rc = htobe32(sgl[i].ds_len); 2689 if (i + 1 < nsegs) 2690 rc |= (uint64_t)htobe32(sgl[i + 1].ds_len) << 32; 2691 2692 return (rc); 2693 } 2694 case 1: 2695 return htobe64(sgl[i].ds_addr); 2696 case 2: 2697 return htobe64(sgl[i + 1].ds_addr); 2698 } 2699 2700 return (0); 2701 } 2702 2703 static void 2704 set_fl_tag_idx(struct sge_fl *fl, int mtu) 2705 { 2706 int i; 2707 2708 FL_LOCK_ASSERT_OWNED(fl); 2709 2710 for (i = 0; i < FL_BUF_SIZES - 1; i++) { 2711 if (FL_BUF_SIZE(i) >= (mtu + FL_PKTSHIFT)) 2712 break; 2713 } 2714 2715 fl->tag_idx = i; 2716 } 2717 2718 static int 2719 handle_sge_egr_update(struct adapter *sc, const struct cpl_sge_egr_update *cpl) 2720 { 2721 unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); 2722 struct sge *s = &sc->sge; 2723 struct sge_txq *txq; 2724 struct port_info *pi; 2725 2726 txq = (void *)s->eqmap[qid - s->eq_start]; 2727 TXQ_LOCK(txq); 2728 if (txq->eq.flags & EQ_CRFLUSHED) { 2729 pi = txq->ifp->if_softc; 2730 taskqueue_enqueue(pi->tq, &txq->resume_tx); 2731 txq->egr_update++; 2732 } else 2733 wakeup_one(txq); /* txq is going away, wakeup free_txq */ 2734 TXQ_UNLOCK(txq); 2735 2736 return (0); 2737 } 2738 2739 static void 2740 handle_cpl(struct adapter *sc, struct sge_iq *iq) 2741 { 2742 const struct rss_header *rss = (const void *)iq->cdesc; 2743 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); 2744 2745 switch (rss->opcode) { 2746 case CPL_FW4_MSG: 2747 case CPL_FW6_MSG: 2748 if (cpl->type == FW6_TYPE_CMD_RPL) 2749 t4_handle_fw_rpl(sc, cpl->data); 2750 break; 2751 2752 case CPL_SGE_EGR_UPDATE: 2753 handle_sge_egr_update(sc, (const void *)cpl); 2754 break; 2755 2756 case CPL_SET_TCB_RPL: 2757 filter_rpl(sc, (const void *)cpl); 2758 break; 2759 2760 default: 2761 panic("%s: unexpected CPL opcode 0x%x", __func__, rss->opcode); 2762 } 2763 } 2764 2765 /* 2766 * m0 is freed on successful transmission. 2767 */ 2768 static int 2769 ctrl_tx(struct adapter *sc, struct sge_ctrlq *ctrlq, struct mbuf *m0) 2770 { 2771 struct sge_eq *eq = &ctrlq->eq; 2772 int rc = 0, ndesc; 2773 int can_reclaim; 2774 caddr_t dst; 2775 struct mbuf *m; 2776 2777 M_ASSERTPKTHDR(m0); 2778 2779 if (m0->m_pkthdr.len > SGE_MAX_WR_LEN) { 2780 log(LOG_ERR, "%s: %s work request too long (%d)", 2781 device_get_nameunit(sc->dev), __func__, m0->m_pkthdr.len); 2782 return (EMSGSIZE); 2783 } 2784 ndesc = howmany(m0->m_pkthdr.len, CTRL_EQ_ESIZE); 2785 2786 EQ_LOCK(eq); 2787 2788 can_reclaim = reclaimable(eq); 2789 eq->cidx += can_reclaim; 2790 eq->avail += can_reclaim; 2791 if (__predict_false(eq->cidx >= eq->cap)) 2792 eq->cidx -= eq->cap; 2793 2794 if (eq->avail < ndesc) { 2795 rc = EAGAIN; 2796 ctrlq->no_desc++; 2797 goto failed; 2798 } 2799 2800 dst = (void *)&eq->desc[eq->pidx]; 2801 for (m = m0; m; m = m->m_next) 2802 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 2803 2804 eq->pidx += ndesc; 2805 if (__predict_false(eq->pidx >= eq->cap)) 2806 eq->pidx -= eq->cap; 2807 2808 eq->pending += ndesc; 2809 ring_eq_db(sc, eq); 2810 failed: 2811 EQ_UNLOCK(eq); 2812 if (rc == 0) 2813 m_freem(m0); 2814 2815 return (rc); 2816 } 2817 2818 static int 2819 sysctl_uint16(SYSCTL_HANDLER_ARGS) 2820 { 2821 uint16_t *id = arg1; 2822 int i = *id; 2823 2824 return sysctl_handle_int(oidp, &i, 0, req); 2825 } 2826