1 /************************************************************************** 2 3 Copyright (c) 2007, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 #define DEBUG_BUFRING 30 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/module.h> 39 #include <sys/bus.h> 40 #include <sys/conf.h> 41 #include <machine/bus.h> 42 #include <machine/resource.h> 43 #include <sys/bus_dma.h> 44 #include <sys/rman.h> 45 #include <sys/queue.h> 46 #include <sys/sysctl.h> 47 #include <sys/taskqueue.h> 48 49 #include <sys/proc.h> 50 #include <sys/sbuf.h> 51 #include <sys/sched.h> 52 #include <sys/smp.h> 53 #include <sys/systm.h> 54 #include <sys/syslog.h> 55 56 #include <netinet/in_systm.h> 57 #include <netinet/in.h> 58 #include <netinet/ip.h> 59 #include <netinet/tcp.h> 60 61 #include <dev/pci/pcireg.h> 62 #include <dev/pci/pcivar.h> 63 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 67 #ifdef CONFIG_DEFINED 68 #include <cxgb_include.h> 69 #include <sys/mvec.h> 70 #else 71 #include <dev/cxgb/cxgb_include.h> 72 #include <dev/cxgb/sys/mvec.h> 73 #endif 74 75 int txq_fills = 0; 76 /* 77 * XXX don't re-enable this until TOE stops assuming 78 * we have an m_ext 79 */ 80 static int recycle_enable = 0; 81 extern int cxgb_txq_buf_ring_size; 82 int cxgb_cached_allocations; 83 int cxgb_cached; 84 int cxgb_ext_freed = 0; 85 int cxgb_ext_inited = 0; 86 int fl_q_size = 0; 87 int jumbo_q_size = 0; 88 89 extern int cxgb_use_16k_clusters; 90 extern int cxgb_pcpu_cache_enable; 91 extern int nmbjumbo4; 92 extern int nmbjumbo9; 93 extern int nmbjumbo16; 94 95 96 97 98 #define USE_GTS 0 99 100 #define SGE_RX_SM_BUF_SIZE 1536 101 #define SGE_RX_DROP_THRES 16 102 #define SGE_RX_COPY_THRES 128 103 104 /* 105 * Period of the Tx buffer reclaim timer. This timer does not need to run 106 * frequently as Tx buffers are usually reclaimed by new Tx packets. 107 */ 108 #define TX_RECLAIM_PERIOD (hz >> 1) 109 110 /* 111 * Values for sge_txq.flags 112 */ 113 enum { 114 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 115 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 116 }; 117 118 struct tx_desc { 119 uint64_t flit[TX_DESC_FLITS]; 120 } __packed; 121 122 struct rx_desc { 123 uint32_t addr_lo; 124 uint32_t len_gen; 125 uint32_t gen2; 126 uint32_t addr_hi; 127 } __packed;; 128 129 struct rsp_desc { /* response queue descriptor */ 130 struct rss_header rss_hdr; 131 uint32_t flags; 132 uint32_t len_cq; 133 uint8_t imm_data[47]; 134 uint8_t intr_gen; 135 } __packed; 136 137 #define RX_SW_DESC_MAP_CREATED (1 << 0) 138 #define TX_SW_DESC_MAP_CREATED (1 << 1) 139 #define RX_SW_DESC_INUSE (1 << 3) 140 #define TX_SW_DESC_MAPPED (1 << 4) 141 142 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 143 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 144 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 145 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 146 147 struct tx_sw_desc { /* SW state per Tx descriptor */ 148 struct mbuf_iovec mi; 149 bus_dmamap_t map; 150 int flags; 151 }; 152 153 struct rx_sw_desc { /* SW state per Rx descriptor */ 154 caddr_t rxsd_cl; 155 caddr_t data; 156 bus_dmamap_t map; 157 int flags; 158 }; 159 160 struct txq_state { 161 unsigned int compl; 162 unsigned int gen; 163 unsigned int pidx; 164 }; 165 166 struct refill_fl_cb_arg { 167 int error; 168 bus_dma_segment_t seg; 169 int nseg; 170 }; 171 172 /* 173 * Maps a number of flits to the number of Tx descriptors that can hold them. 174 * The formula is 175 * 176 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 177 * 178 * HW allows up to 4 descriptors to be combined into a WR. 179 */ 180 static uint8_t flit_desc_map[] = { 181 0, 182 #if SGE_NUM_GENBITS == 1 183 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 184 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 185 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 186 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 187 #elif SGE_NUM_GENBITS == 2 188 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 189 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 190 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 191 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 192 #else 193 # error "SGE_NUM_GENBITS must be 1 or 2" 194 #endif 195 }; 196 197 198 int cxgb_debug = 0; 199 200 static void sge_timer_cb(void *arg); 201 static void sge_timer_reclaim(void *arg, int ncount); 202 static void sge_txq_reclaim_handler(void *arg, int ncount); 203 204 /** 205 * reclaim_completed_tx - reclaims completed Tx descriptors 206 * @adapter: the adapter 207 * @q: the Tx queue to reclaim completed descriptors from 208 * 209 * Reclaims Tx descriptors that the SGE has indicated it has processed, 210 * and frees the associated buffers if possible. Called with the Tx 211 * queue's lock held. 212 */ 213 static __inline int 214 reclaim_completed_tx_(struct sge_txq *q, int reclaim_min) 215 { 216 int reclaim = desc_reclaimable(q); 217 218 if (reclaim < reclaim_min) 219 return (0); 220 221 mtx_assert(&q->lock, MA_OWNED); 222 if (reclaim > 0) { 223 t3_free_tx_desc(q, reclaim); 224 q->cleaned += reclaim; 225 q->in_use -= reclaim; 226 } 227 return (reclaim); 228 } 229 230 /** 231 * should_restart_tx - are there enough resources to restart a Tx queue? 232 * @q: the Tx queue 233 * 234 * Checks if there are enough descriptors to restart a suspended Tx queue. 235 */ 236 static __inline int 237 should_restart_tx(const struct sge_txq *q) 238 { 239 unsigned int r = q->processed - q->cleaned; 240 241 return q->in_use - r < (q->size >> 1); 242 } 243 244 /** 245 * t3_sge_init - initialize SGE 246 * @adap: the adapter 247 * @p: the SGE parameters 248 * 249 * Performs SGE initialization needed every time after a chip reset. 250 * We do not initialize any of the queue sets here, instead the driver 251 * top-level must request those individually. We also do not enable DMA 252 * here, that should be done after the queues have been set up. 253 */ 254 void 255 t3_sge_init(adapter_t *adap, struct sge_params *p) 256 { 257 u_int ctrl, ups; 258 259 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 260 261 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 262 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | 263 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 264 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 265 #if SGE_NUM_GENBITS == 1 266 ctrl |= F_EGRGENCTRL; 267 #endif 268 if (adap->params.rev > 0) { 269 if (!(adap->flags & (USING_MSIX | USING_MSI))) 270 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 271 } 272 t3_write_reg(adap, A_SG_CONTROL, ctrl); 273 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 274 V_LORCQDRBTHRSH(512)); 275 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 276 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 277 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 278 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 279 adap->params.rev < T3_REV_C ? 1000 : 500); 280 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 281 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 282 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 283 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 284 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 285 } 286 287 288 /** 289 * sgl_len - calculates the size of an SGL of the given capacity 290 * @n: the number of SGL entries 291 * 292 * Calculates the number of flits needed for a scatter/gather list that 293 * can hold the given number of entries. 294 */ 295 static __inline unsigned int 296 sgl_len(unsigned int n) 297 { 298 return ((3 * n) / 2 + (n & 1)); 299 } 300 301 /** 302 * get_imm_packet - return the next ingress packet buffer from a response 303 * @resp: the response descriptor containing the packet data 304 * 305 * Return a packet containing the immediate data of the given response. 306 */ 307 static int 308 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) 309 { 310 311 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE; 312 m->m_ext.ext_buf = NULL; 313 m->m_ext.ext_type = 0; 314 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 315 return (0); 316 } 317 318 static __inline u_int 319 flits_to_desc(u_int n) 320 { 321 return (flit_desc_map[n]); 322 } 323 324 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ 325 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ 326 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ 327 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ 328 F_HIRCQPARITYERROR) 329 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) 330 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ 331 F_RSPQDISABLED) 332 333 /** 334 * t3_sge_err_intr_handler - SGE async event interrupt handler 335 * @adapter: the adapter 336 * 337 * Interrupt handler for SGE asynchronous (non-data) events. 338 */ 339 void 340 t3_sge_err_intr_handler(adapter_t *adapter) 341 { 342 unsigned int v, status; 343 344 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 345 if (status & SGE_PARERR) 346 CH_ALERT(adapter, "SGE parity error (0x%x)\n", 347 status & SGE_PARERR); 348 if (status & SGE_FRAMINGERR) 349 CH_ALERT(adapter, "SGE framing error (0x%x)\n", 350 status & SGE_FRAMINGERR); 351 if (status & F_RSPQCREDITOVERFOW) 352 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 353 354 if (status & F_RSPQDISABLED) { 355 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 356 357 CH_ALERT(adapter, 358 "packet delivered to disabled response queue (0x%x)\n", 359 (v >> S_RSPQ0DISABLED) & 0xff); 360 } 361 362 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 363 if (status & SGE_FATALERR) 364 t3_fatal_err(adapter); 365 } 366 367 void 368 t3_sge_prep(adapter_t *adap, struct sge_params *p) 369 { 370 int i, nqsets; 371 372 nqsets = min(SGE_QSETS, mp_ncpus*4); 373 374 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); 375 376 while (!powerof2(fl_q_size)) 377 fl_q_size--; 378 #if __FreeBSD_version > 800000 379 if (cxgb_use_16k_clusters) 380 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); 381 else 382 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); 383 #else 384 jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE); 385 #endif 386 while (!powerof2(jumbo_q_size)) 387 jumbo_q_size--; 388 389 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 390 p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data); 391 392 for (i = 0; i < SGE_QSETS; ++i) { 393 struct qset_params *q = p->qset + i; 394 395 if (adap->params.nports > 2) { 396 q->coalesce_usecs = 50; 397 } else { 398 #ifdef INVARIANTS 399 q->coalesce_usecs = 10; 400 #else 401 q->coalesce_usecs = 5; 402 #endif 403 } 404 q->polling = adap->params.rev > 0; 405 q->rspq_size = RSPQ_Q_SIZE; 406 q->fl_size = fl_q_size; 407 q->jumbo_size = jumbo_q_size; 408 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 409 q->txq_size[TXQ_OFLD] = 1024; 410 q->txq_size[TXQ_CTRL] = 256; 411 q->cong_thres = 0; 412 } 413 } 414 415 int 416 t3_sge_alloc(adapter_t *sc) 417 { 418 419 /* The parent tag. */ 420 if (bus_dma_tag_create( NULL, /* parent */ 421 1, 0, /* algnmnt, boundary */ 422 BUS_SPACE_MAXADDR, /* lowaddr */ 423 BUS_SPACE_MAXADDR, /* highaddr */ 424 NULL, NULL, /* filter, filterarg */ 425 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 426 BUS_SPACE_UNRESTRICTED, /* nsegments */ 427 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 428 0, /* flags */ 429 NULL, NULL, /* lock, lockarg */ 430 &sc->parent_dmat)) { 431 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 432 return (ENOMEM); 433 } 434 435 /* 436 * DMA tag for normal sized RX frames 437 */ 438 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 439 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 440 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 441 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 442 return (ENOMEM); 443 } 444 445 /* 446 * DMA tag for jumbo sized RX frames. 447 */ 448 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, 449 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, 450 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 451 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 452 return (ENOMEM); 453 } 454 455 /* 456 * DMA tag for TX frames. 457 */ 458 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 459 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 460 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 461 NULL, NULL, &sc->tx_dmat)) { 462 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 463 return (ENOMEM); 464 } 465 466 return (0); 467 } 468 469 int 470 t3_sge_free(struct adapter * sc) 471 { 472 473 if (sc->tx_dmat != NULL) 474 bus_dma_tag_destroy(sc->tx_dmat); 475 476 if (sc->rx_jumbo_dmat != NULL) 477 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 478 479 if (sc->rx_dmat != NULL) 480 bus_dma_tag_destroy(sc->rx_dmat); 481 482 if (sc->parent_dmat != NULL) 483 bus_dma_tag_destroy(sc->parent_dmat); 484 485 return (0); 486 } 487 488 void 489 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 490 { 491 492 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); 493 qs->rspq.polling = 0 /* p->polling */; 494 } 495 496 #if !defined(__i386__) && !defined(__amd64__) 497 static void 498 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 499 { 500 struct refill_fl_cb_arg *cb_arg = arg; 501 502 cb_arg->error = error; 503 cb_arg->seg = segs[0]; 504 cb_arg->nseg = nseg; 505 506 } 507 #endif 508 /** 509 * refill_fl - refill an SGE free-buffer list 510 * @sc: the controller softc 511 * @q: the free-list to refill 512 * @n: the number of new buffers to allocate 513 * 514 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 515 * The caller must assure that @n does not exceed the queue's capacity. 516 */ 517 static void 518 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 519 { 520 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 521 struct rx_desc *d = &q->desc[q->pidx]; 522 struct refill_fl_cb_arg cb_arg; 523 caddr_t cl; 524 int err, count = 0; 525 int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); 526 527 cb_arg.error = 0; 528 while (n--) { 529 /* 530 * We only allocate a cluster, mbuf allocation happens after rx 531 */ 532 if ((cl = cxgb_cache_get(q->zone)) == NULL) { 533 log(LOG_WARNING, "Failed to allocate cluster\n"); 534 goto done; 535 } 536 537 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 538 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 539 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 540 uma_zfree(q->zone, cl); 541 goto done; 542 } 543 sd->flags |= RX_SW_DESC_MAP_CREATED; 544 } 545 #if !defined(__i386__) && !defined(__amd64__) 546 err = bus_dmamap_load(q->entry_tag, sd->map, 547 cl + header_size, q->buf_size, 548 refill_fl_cb, &cb_arg, 0); 549 550 if (err != 0 || cb_arg.error) { 551 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error); 552 /* 553 * XXX free cluster 554 */ 555 return; 556 } 557 #else 558 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + header_size)); 559 #endif 560 sd->flags |= RX_SW_DESC_INUSE; 561 sd->rxsd_cl = cl; 562 sd->data = cl + header_size; 563 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 564 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 565 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 566 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 567 568 d++; 569 sd++; 570 571 if (++q->pidx == q->size) { 572 q->pidx = 0; 573 q->gen ^= 1; 574 sd = q->sdesc; 575 d = q->desc; 576 } 577 q->credits++; 578 count++; 579 } 580 581 done: 582 if (count) 583 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 584 } 585 586 587 /** 588 * free_rx_bufs - free the Rx buffers on an SGE free list 589 * @sc: the controle softc 590 * @q: the SGE free list to clean up 591 * 592 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 593 * this queue should be stopped before calling this function. 594 */ 595 static void 596 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 597 { 598 u_int cidx = q->cidx; 599 600 while (q->credits--) { 601 struct rx_sw_desc *d = &q->sdesc[cidx]; 602 603 if (d->flags & RX_SW_DESC_INUSE) { 604 bus_dmamap_unload(q->entry_tag, d->map); 605 bus_dmamap_destroy(q->entry_tag, d->map); 606 uma_zfree(q->zone, d->rxsd_cl); 607 } 608 d->rxsd_cl = NULL; 609 if (++cidx == q->size) 610 cidx = 0; 611 } 612 } 613 614 static __inline void 615 __refill_fl(adapter_t *adap, struct sge_fl *fl) 616 { 617 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 618 } 619 620 static __inline void 621 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) 622 { 623 if ((fl->size - fl->credits) < max) 624 refill_fl(adap, fl, min(max, fl->size - fl->credits)); 625 } 626 627 void 628 refill_fl_service(adapter_t *adap, struct sge_fl *fl) 629 { 630 __refill_fl_lt(adap, fl, 512); 631 } 632 633 /** 634 * recycle_rx_buf - recycle a receive buffer 635 * @adapter: the adapter 636 * @q: the SGE free list 637 * @idx: index of buffer to recycle 638 * 639 * Recycles the specified buffer on the given free list by adding it at 640 * the next available slot on the list. 641 */ 642 static void 643 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 644 { 645 struct rx_desc *from = &q->desc[idx]; 646 struct rx_desc *to = &q->desc[q->pidx]; 647 648 q->sdesc[q->pidx] = q->sdesc[idx]; 649 to->addr_lo = from->addr_lo; // already big endian 650 to->addr_hi = from->addr_hi; // likewise 651 wmb(); 652 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 653 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 654 q->credits++; 655 656 if (++q->pidx == q->size) { 657 q->pidx = 0; 658 q->gen ^= 1; 659 } 660 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 661 } 662 663 static void 664 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 665 { 666 uint32_t *addr; 667 668 addr = arg; 669 *addr = segs[0].ds_addr; 670 } 671 672 static int 673 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 674 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 675 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 676 { 677 size_t len = nelem * elem_size; 678 void *s = NULL; 679 void *p = NULL; 680 int err; 681 682 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 683 BUS_SPACE_MAXADDR_32BIT, 684 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 685 len, 0, NULL, NULL, tag)) != 0) { 686 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 687 return (ENOMEM); 688 } 689 690 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 691 map)) != 0) { 692 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 693 return (ENOMEM); 694 } 695 696 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 697 bzero(p, len); 698 *(void **)desc = p; 699 700 if (sw_size) { 701 len = nelem * sw_size; 702 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 703 *(void **)sdesc = s; 704 } 705 if (parent_entry_tag == NULL) 706 return (0); 707 708 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 709 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 710 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 711 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 712 NULL, NULL, entry_tag)) != 0) { 713 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 714 return (ENOMEM); 715 } 716 return (0); 717 } 718 719 static void 720 sge_slow_intr_handler(void *arg, int ncount) 721 { 722 adapter_t *sc = arg; 723 724 t3_slow_intr_handler(sc); 725 } 726 727 /** 728 * sge_timer_cb - perform periodic maintenance of an SGE qset 729 * @data: the SGE queue set to maintain 730 * 731 * Runs periodically from a timer to perform maintenance of an SGE queue 732 * set. It performs two tasks: 733 * 734 * a) Cleans up any completed Tx descriptors that may still be pending. 735 * Normal descriptor cleanup happens when new packets are added to a Tx 736 * queue so this timer is relatively infrequent and does any cleanup only 737 * if the Tx queue has not seen any new packets in a while. We make a 738 * best effort attempt to reclaim descriptors, in that we don't wait 739 * around if we cannot get a queue's lock (which most likely is because 740 * someone else is queueing new packets and so will also handle the clean 741 * up). Since control queues use immediate data exclusively we don't 742 * bother cleaning them up here. 743 * 744 * b) Replenishes Rx queues that have run out due to memory shortage. 745 * Normally new Rx buffers are added when existing ones are consumed but 746 * when out of memory a queue can become empty. We try to add only a few 747 * buffers here, the queue will be replenished fully as these new buffers 748 * are used up if memory shortage has subsided. 749 * 750 * c) Return coalesced response queue credits in case a response queue is 751 * starved. 752 * 753 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 754 * fifo overflows and the FW doesn't implement any recovery scheme yet. 755 */ 756 static void 757 sge_timer_cb(void *arg) 758 { 759 adapter_t *sc = arg; 760 #ifndef IFNET_MULTIQUEUE 761 struct port_info *pi; 762 struct sge_qset *qs; 763 struct sge_txq *txq; 764 int i, j; 765 int reclaim_ofl, refill_rx; 766 767 for (i = 0; i < sc->params.nports; i++) 768 for (j = 0; j < sc->port[i].nqsets; j++) { 769 qs = &sc->sge.qs[i + j]; 770 txq = &qs->txq[0]; 771 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 772 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 773 (qs->fl[1].credits < qs->fl[1].size)); 774 if (reclaim_ofl || refill_rx) { 775 pi = &sc->port[i]; 776 taskqueue_enqueue(pi->tq, &pi->timer_reclaim_task); 777 break; 778 } 779 } 780 #endif 781 if (sc->params.nports > 2) { 782 int i; 783 784 for_each_port(sc, i) { 785 struct port_info *pi = &sc->port[i]; 786 787 t3_write_reg(sc, A_SG_KDOORBELL, 788 F_SELEGRCNTX | 789 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 790 } 791 } 792 if (sc->open_device_map != 0) 793 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 794 } 795 796 /* 797 * This is meant to be a catch-all function to keep sge state private 798 * to sge.c 799 * 800 */ 801 int 802 t3_sge_init_adapter(adapter_t *sc) 803 { 804 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 805 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 806 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 807 mi_init(); 808 cxgb_cache_init(); 809 return (0); 810 } 811 812 int 813 t3_sge_reset_adapter(adapter_t *sc) 814 { 815 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 816 return (0); 817 } 818 819 int 820 t3_sge_init_port(struct port_info *pi) 821 { 822 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); 823 return (0); 824 } 825 826 void 827 t3_sge_deinit_sw(adapter_t *sc) 828 { 829 830 mi_deinit(); 831 } 832 833 /** 834 * refill_rspq - replenish an SGE response queue 835 * @adapter: the adapter 836 * @q: the response queue to replenish 837 * @credits: how many new responses to make available 838 * 839 * Replenishes a response queue by making the supplied number of responses 840 * available to HW. 841 */ 842 static __inline void 843 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 844 { 845 846 /* mbufs are allocated on demand when a rspq entry is processed. */ 847 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 848 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 849 } 850 851 static __inline void 852 sge_txq_reclaim_(struct sge_txq *txq, int force) 853 { 854 855 if (desc_reclaimable(txq) < 16) 856 return; 857 if (mtx_trylock(&txq->lock) == 0) 858 return; 859 reclaim_completed_tx_(txq, 16); 860 mtx_unlock(&txq->lock); 861 862 } 863 864 static void 865 sge_txq_reclaim_handler(void *arg, int ncount) 866 { 867 struct sge_txq *q = arg; 868 869 sge_txq_reclaim_(q, TRUE); 870 } 871 872 873 874 static void 875 sge_timer_reclaim(void *arg, int ncount) 876 { 877 struct port_info *pi = arg; 878 int i, nqsets = pi->nqsets; 879 adapter_t *sc = pi->adapter; 880 struct sge_qset *qs; 881 struct sge_txq *txq; 882 struct mtx *lock; 883 884 #ifdef IFNET_MULTIQUEUE 885 panic("%s should not be called with multiqueue support\n", __FUNCTION__); 886 #endif 887 for (i = 0; i < nqsets; i++) { 888 qs = &sc->sge.qs[i]; 889 890 txq = &qs->txq[TXQ_OFLD]; 891 sge_txq_reclaim_(txq, FALSE); 892 893 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 894 &sc->sge.qs[0].rspq.lock; 895 896 if (mtx_trylock(lock)) { 897 /* XXX currently assume that we are *NOT* polling */ 898 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 899 900 if (qs->fl[0].credits < qs->fl[0].size - 16) 901 __refill_fl(sc, &qs->fl[0]); 902 if (qs->fl[1].credits < qs->fl[1].size - 16) 903 __refill_fl(sc, &qs->fl[1]); 904 905 if (status & (1 << qs->rspq.cntxt_id)) { 906 if (qs->rspq.credits) { 907 refill_rspq(sc, &qs->rspq, 1); 908 qs->rspq.credits--; 909 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 910 1 << qs->rspq.cntxt_id); 911 } 912 } 913 mtx_unlock(lock); 914 } 915 } 916 } 917 918 /** 919 * init_qset_cntxt - initialize an SGE queue set context info 920 * @qs: the queue set 921 * @id: the queue set id 922 * 923 * Initializes the TIDs and context ids for the queues of a queue set. 924 */ 925 static void 926 init_qset_cntxt(struct sge_qset *qs, u_int id) 927 { 928 929 qs->rspq.cntxt_id = id; 930 qs->fl[0].cntxt_id = 2 * id; 931 qs->fl[1].cntxt_id = 2 * id + 1; 932 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 933 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 934 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 935 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 936 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 937 938 mbufq_init(&qs->txq[TXQ_ETH].sendq); 939 mbufq_init(&qs->txq[TXQ_OFLD].sendq); 940 mbufq_init(&qs->txq[TXQ_CTRL].sendq); 941 } 942 943 944 static void 945 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 946 { 947 txq->in_use += ndesc; 948 /* 949 * XXX we don't handle stopping of queue 950 * presumably start handles this when we bump against the end 951 */ 952 txqs->gen = txq->gen; 953 txq->unacked += ndesc; 954 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); 955 txq->unacked &= 31; 956 txqs->pidx = txq->pidx; 957 txq->pidx += ndesc; 958 #ifdef INVARIANTS 959 if (((txqs->pidx > txq->cidx) && 960 (txq->pidx < txqs->pidx) && 961 (txq->pidx >= txq->cidx)) || 962 ((txqs->pidx < txq->cidx) && 963 (txq->pidx >= txq-> cidx)) || 964 ((txqs->pidx < txq->cidx) && 965 (txq->cidx < txqs->pidx))) 966 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", 967 txqs->pidx, txq->pidx, txq->cidx); 968 #endif 969 if (txq->pidx >= txq->size) { 970 txq->pidx -= txq->size; 971 txq->gen ^= 1; 972 } 973 974 } 975 976 /** 977 * calc_tx_descs - calculate the number of Tx descriptors for a packet 978 * @m: the packet mbufs 979 * @nsegs: the number of segments 980 * 981 * Returns the number of Tx descriptors needed for the given Ethernet 982 * packet. Ethernet packets require addition of WR and CPL headers. 983 */ 984 static __inline unsigned int 985 calc_tx_descs(const struct mbuf *m, int nsegs) 986 { 987 unsigned int flits; 988 989 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) 990 return 1; 991 992 flits = sgl_len(nsegs) + 2; 993 #ifdef TSO_SUPPORTED 994 if (m->m_pkthdr.csum_flags & CSUM_TSO) 995 flits++; 996 #endif 997 return flits_to_desc(flits); 998 } 999 1000 static unsigned int 1001 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 1002 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs) 1003 { 1004 struct mbuf *m0; 1005 int err, pktlen, pass = 0; 1006 1007 retry: 1008 err = 0; 1009 m0 = *m; 1010 pktlen = m0->m_pkthdr.len; 1011 #if defined(__i386__) || defined(__amd64__) 1012 if (busdma_map_sg_collapse(m, segs, nsegs) == 0) { 1013 goto done; 1014 } else 1015 #endif 1016 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0); 1017 1018 if (err == 0) { 1019 goto done; 1020 } 1021 if (err == EFBIG && pass == 0) { 1022 pass = 1; 1023 /* Too many segments, try to defrag */ 1024 m0 = m_defrag(m0, M_DONTWAIT); 1025 if (m0 == NULL) { 1026 m_freem(*m); 1027 *m = NULL; 1028 return (ENOBUFS); 1029 } 1030 *m = m0; 1031 goto retry; 1032 } else if (err == ENOMEM) { 1033 return (err); 1034 } if (err) { 1035 if (cxgb_debug) 1036 printf("map failure err=%d pktlen=%d\n", err, pktlen); 1037 m_freem(m0); 1038 *m = NULL; 1039 return (err); 1040 } 1041 done: 1042 #if !defined(__i386__) && !defined(__amd64__) 1043 bus_dmamap_sync(txq->entry_tag, txsd->map, BUS_DMASYNC_PREWRITE); 1044 #endif 1045 txsd->flags |= TX_SW_DESC_MAPPED; 1046 1047 return (0); 1048 } 1049 1050 /** 1051 * make_sgl - populate a scatter/gather list for a packet 1052 * @sgp: the SGL to populate 1053 * @segs: the packet dma segments 1054 * @nsegs: the number of segments 1055 * 1056 * Generates a scatter/gather list for the buffers that make up a packet 1057 * and returns the SGL size in 8-byte words. The caller must size the SGL 1058 * appropriately. 1059 */ 1060 static __inline void 1061 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1062 { 1063 int i, idx; 1064 1065 for (idx = 0, i = 0; i < nsegs; i++) { 1066 /* 1067 * firmware doesn't like empty segments 1068 */ 1069 if (segs[i].ds_len == 0) 1070 continue; 1071 if (i && idx == 0) 1072 ++sgp; 1073 1074 sgp->len[idx] = htobe32(segs[i].ds_len); 1075 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1076 idx ^= 1; 1077 } 1078 1079 if (idx) { 1080 sgp->len[idx] = 0; 1081 sgp->addr[idx] = 0; 1082 } 1083 } 1084 1085 /** 1086 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1087 * @adap: the adapter 1088 * @q: the Tx queue 1089 * 1090 * Ring the doorbel if a Tx queue is asleep. There is a natural race, 1091 * where the HW is going to sleep just after we checked, however, 1092 * then the interrupt handler will detect the outstanding TX packet 1093 * and ring the doorbell for us. 1094 * 1095 * When GTS is disabled we unconditionally ring the doorbell. 1096 */ 1097 static __inline void 1098 check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 1099 { 1100 #if USE_GTS 1101 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1102 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1103 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1104 #ifdef T3_TRACE 1105 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1106 q->cntxt_id); 1107 #endif 1108 t3_write_reg(adap, A_SG_KDOORBELL, 1109 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1110 } 1111 #else 1112 wmb(); /* write descriptors before telling HW */ 1113 t3_write_reg(adap, A_SG_KDOORBELL, 1114 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1115 #endif 1116 } 1117 1118 static __inline void 1119 wr_gen2(struct tx_desc *d, unsigned int gen) 1120 { 1121 #if SGE_NUM_GENBITS == 2 1122 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1123 #endif 1124 } 1125 1126 /** 1127 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1128 * @ndesc: number of Tx descriptors spanned by the SGL 1129 * @txd: first Tx descriptor to be written 1130 * @txqs: txq state (generation and producer index) 1131 * @txq: the SGE Tx queue 1132 * @sgl: the SGL 1133 * @flits: number of flits to the start of the SGL in the first descriptor 1134 * @sgl_flits: the SGL size in flits 1135 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1136 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1137 * 1138 * Write a work request header and an associated SGL. If the SGL is 1139 * small enough to fit into one Tx descriptor it has already been written 1140 * and we just need to write the WR header. Otherwise we distribute the 1141 * SGL across the number of descriptors it spans. 1142 */ 1143 static void 1144 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1145 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1146 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1147 { 1148 1149 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1150 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1151 1152 if (__predict_true(ndesc == 1)) { 1153 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1154 V_WR_SGLSFLT(flits)) | wr_hi; 1155 wmb(); 1156 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | 1157 V_WR_GEN(txqs->gen)) | wr_lo; 1158 /* XXX gen? */ 1159 wr_gen2(txd, txqs->gen); 1160 1161 } else { 1162 unsigned int ogen = txqs->gen; 1163 const uint64_t *fp = (const uint64_t *)sgl; 1164 struct work_request_hdr *wp = wrp; 1165 1166 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1167 V_WR_SGLSFLT(flits)) | wr_hi; 1168 1169 while (sgl_flits) { 1170 unsigned int avail = WR_FLITS - flits; 1171 1172 if (avail > sgl_flits) 1173 avail = sgl_flits; 1174 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1175 sgl_flits -= avail; 1176 ndesc--; 1177 if (!sgl_flits) 1178 break; 1179 1180 fp += avail; 1181 txd++; 1182 txsd++; 1183 if (++txqs->pidx == txq->size) { 1184 txqs->pidx = 0; 1185 txqs->gen ^= 1; 1186 txd = txq->desc; 1187 txsd = txq->sdesc; 1188 } 1189 1190 /* 1191 * when the head of the mbuf chain 1192 * is freed all clusters will be freed 1193 * with it 1194 */ 1195 KASSERT(txsd->mi.mi_base == NULL, 1196 ("overwriting valid entry mi_base==%p", txsd->mi.mi_base)); 1197 wrp = (struct work_request_hdr *)txd; 1198 wrp->wr_hi = htonl(V_WR_DATATYPE(1) | 1199 V_WR_SGLSFLT(1)) | wr_hi; 1200 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, 1201 sgl_flits + 1)) | 1202 V_WR_GEN(txqs->gen)) | wr_lo; 1203 wr_gen2(txd, txqs->gen); 1204 flits = 1; 1205 } 1206 wrp->wr_hi |= htonl(F_WR_EOP); 1207 wmb(); 1208 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1209 wr_gen2((struct tx_desc *)wp, ogen); 1210 } 1211 } 1212 1213 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 1214 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 1215 1216 #ifdef VLAN_SUPPORTED 1217 #define GET_VTAG(cntrl, m) \ 1218 do { \ 1219 if ((m)->m_flags & M_VLANTAG) \ 1220 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ 1221 } while (0) 1222 1223 #define GET_VTAG_MI(cntrl, mi) \ 1224 do { \ 1225 if ((mi)->mi_flags & M_VLANTAG) \ 1226 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((mi)->mi_ether_vtag); \ 1227 } while (0) 1228 #else 1229 #define GET_VTAG(cntrl, m) 1230 #define GET_VTAG_MI(cntrl, m) 1231 #endif 1232 1233 int 1234 t3_encap(struct sge_qset *qs, struct mbuf **m, int count) 1235 { 1236 adapter_t *sc; 1237 struct mbuf *m0; 1238 struct sge_txq *txq; 1239 struct txq_state txqs; 1240 struct port_info *pi; 1241 unsigned int ndesc, flits, cntrl, mlen; 1242 int err, nsegs, tso_info = 0; 1243 1244 struct work_request_hdr *wrp; 1245 struct tx_sw_desc *txsd; 1246 struct sg_ent *sgp, *sgl; 1247 uint32_t wr_hi, wr_lo, sgl_flits; 1248 bus_dma_segment_t segs[TX_MAX_SEGS]; 1249 1250 struct tx_desc *txd; 1251 struct mbuf_vec *mv; 1252 struct mbuf_iovec *mi; 1253 1254 DPRINTF("t3_encap cpu=%d ", curcpu); 1255 1256 mi = NULL; 1257 pi = qs->port; 1258 sc = pi->adapter; 1259 txq = &qs->txq[TXQ_ETH]; 1260 txd = &txq->desc[txq->pidx]; 1261 txsd = &txq->sdesc[txq->pidx]; 1262 sgl = txq->txq_sgl; 1263 m0 = *m; 1264 1265 DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset); 1266 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan); 1267 if (cxgb_debug) 1268 printf("mi_base=%p cidx=%d pidx=%d\n\n", txsd->mi.mi_base, txq->cidx, txq->pidx); 1269 1270 mtx_assert(&txq->lock, MA_OWNED); 1271 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1272 /* 1273 * XXX need to add VLAN support for 6.x 1274 */ 1275 #ifdef VLAN_SUPPORTED 1276 if (m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1277 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1278 #endif 1279 KASSERT(txsd->mi.mi_base == NULL, 1280 ("overwriting valid entry mi_base==%p", txsd->mi.mi_base)); 1281 if (count > 1) { 1282 panic("count > 1 not support in CVS\n"); 1283 if ((err = busdma_map_sg_vec(m, &m0, segs, count))) 1284 return (err); 1285 nsegs = count; 1286 } else if ((err = busdma_map_sg_collapse(&m0, segs, &nsegs))) { 1287 if (cxgb_debug) 1288 printf("failed ... err=%d\n", err); 1289 return (err); 1290 } 1291 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count)); 1292 1293 if (!(m0->m_pkthdr.len <= PIO_LEN)) { 1294 mi_collapse_mbuf(&txsd->mi, m0); 1295 mi = &txsd->mi; 1296 } 1297 if (count > 1) { 1298 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; 1299 int i, fidx; 1300 struct mbuf_iovec *batchmi; 1301 1302 mv = mtomv(m0); 1303 batchmi = mv->mv_vec; 1304 1305 wrp = (struct work_request_hdr *)txd; 1306 1307 flits = count*2 + 1; 1308 txq_prod(txq, 1, &txqs); 1309 1310 for (fidx = 1, i = 0; i < count; i++, batchmi++, fidx += 2) { 1311 struct cpl_tx_pkt_batch_entry *cbe = &cpl_batch->pkt_entry[i]; 1312 1313 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1314 GET_VTAG_MI(cntrl, batchmi); 1315 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1316 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) 1317 cntrl |= F_TXPKT_IPCSUM_DIS; 1318 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))) 1319 cntrl |= F_TXPKT_L4CSUM_DIS; 1320 cbe->cntrl = htonl(cntrl); 1321 cbe->len = htonl(batchmi->mi_len | 0x80000000); 1322 cbe->addr = htobe64(segs[i].ds_addr); 1323 txd->flit[fidx] |= htobe64(1 << 24); 1324 } 1325 1326 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1327 V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1328 wmb(); 1329 wrp->wr_lo = htonl(V_WR_LEN(flits) | 1330 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); 1331 /* XXX gen? */ 1332 wr_gen2(txd, txqs.gen); 1333 check_ring_tx_db(sc, txq); 1334 1335 return (0); 1336 } else if (tso_info) { 1337 int min_size = TCPPKTHDRSIZE, eth_type, tagged; 1338 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; 1339 struct ip *ip; 1340 struct tcphdr *tcp; 1341 char *pkthdr; 1342 1343 txd->flit[2] = 0; 1344 GET_VTAG(cntrl, m0); 1345 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1346 hdr->cntrl = htonl(cntrl); 1347 mlen = m0->m_pkthdr.len; 1348 hdr->len = htonl(mlen | 0x80000000); 1349 1350 DPRINTF("tso buf len=%d\n", mlen); 1351 1352 tagged = m0->m_flags & M_VLANTAG; 1353 if (!tagged) 1354 min_size -= ETHER_VLAN_ENCAP_LEN; 1355 1356 if (__predict_false(mlen < min_size)) { 1357 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", 1358 m0, mlen, m0->m_pkthdr.tso_segsz, 1359 m0->m_pkthdr.csum_flags, m0->m_flags); 1360 panic("tx tso packet too small"); 1361 } 1362 1363 /* Make sure that ether, ip, tcp headers are all in m0 */ 1364 if (__predict_false(m0->m_len < min_size)) { 1365 m0 = m_pullup(m0, min_size); 1366 if (__predict_false(m0 == NULL)) { 1367 /* XXX panic probably an overreaction */ 1368 panic("couldn't fit header into mbuf"); 1369 } 1370 } 1371 pkthdr = m0->m_data; 1372 1373 if (tagged) { 1374 eth_type = CPL_ETH_II_VLAN; 1375 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1376 ETHER_VLAN_ENCAP_LEN); 1377 } else { 1378 eth_type = CPL_ETH_II; 1379 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1380 } 1381 tcp = (struct tcphdr *)((uint8_t *)ip + 1382 sizeof(*ip)); 1383 1384 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1385 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1386 V_LSO_TCPHDR_WORDS(tcp->th_off); 1387 hdr->lso_info = htonl(tso_info); 1388 1389 if (__predict_false(mlen <= PIO_LEN)) { 1390 /* pkt not undersized but fits in PIO_LEN */ 1391 printf("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", 1392 m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags); 1393 txq_prod(txq, 1, &txqs); 1394 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); 1395 m_freem(m0); 1396 m0 = NULL; 1397 flits = (mlen + 7) / 8 + 3; 1398 hdr->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1399 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1400 F_WR_SOP | F_WR_EOP | txqs.compl); 1401 wmb(); 1402 hdr->wr.wr_lo = htonl(V_WR_LEN(flits) | 1403 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1404 1405 wr_gen2(txd, txqs.gen); 1406 check_ring_tx_db(sc, txq); 1407 return (0); 1408 } 1409 flits = 3; 1410 } else { 1411 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; 1412 1413 GET_VTAG(cntrl, m0); 1414 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1415 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) 1416 cntrl |= F_TXPKT_IPCSUM_DIS; 1417 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))) 1418 cntrl |= F_TXPKT_L4CSUM_DIS; 1419 cpl->cntrl = htonl(cntrl); 1420 mlen = m0->m_pkthdr.len; 1421 cpl->len = htonl(mlen | 0x80000000); 1422 1423 if (mlen <= PIO_LEN) { 1424 txq_prod(txq, 1, &txqs); 1425 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1426 m_freem(m0); 1427 m0 = NULL; 1428 flits = (mlen + 7) / 8 + 2; 1429 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1430 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1431 F_WR_SOP | F_WR_EOP | txqs.compl); 1432 wmb(); 1433 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | 1434 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1435 1436 wr_gen2(txd, txqs.gen); 1437 check_ring_tx_db(sc, txq); 1438 DPRINTF("pio buf\n"); 1439 return (0); 1440 } 1441 DPRINTF("regular buf\n"); 1442 flits = 2; 1443 } 1444 wrp = (struct work_request_hdr *)txd; 1445 1446 #ifdef nomore 1447 /* 1448 * XXX need to move into one of the helper routines above 1449 * 1450 */ 1451 if ((err = busdma_map_mbufs(m, txq, txsd, segs, &nsegs)) != 0) 1452 return (err); 1453 m0 = *m; 1454 #endif 1455 ndesc = calc_tx_descs(m0, nsegs); 1456 1457 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1458 make_sgl(sgp, segs, nsegs); 1459 1460 sgl_flits = sgl_len(nsegs); 1461 1462 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); 1463 txq_prod(txq, ndesc, &txqs); 1464 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1465 wr_lo = htonl(V_WR_TID(txq->token)); 1466 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); 1467 check_ring_tx_db(pi->adapter, txq); 1468 1469 if ((m0->m_type == MT_DATA) && 1470 ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) && 1471 (m0->m_ext.ext_type != EXT_PACKET)) { 1472 m0->m_flags &= ~M_EXT ; 1473 cxgb_mbufs_outstanding--; 1474 m_free(m0); 1475 } 1476 1477 return (0); 1478 } 1479 1480 1481 /** 1482 * write_imm - write a packet into a Tx descriptor as immediate data 1483 * @d: the Tx descriptor to write 1484 * @m: the packet 1485 * @len: the length of packet data to write as immediate data 1486 * @gen: the generation bit value to write 1487 * 1488 * Writes a packet as immediate data into a Tx descriptor. The packet 1489 * contains a work request at its beginning. We must write the packet 1490 * carefully so the SGE doesn't read accidentally before it's written in 1491 * its entirety. 1492 */ 1493 static __inline void 1494 write_imm(struct tx_desc *d, struct mbuf *m, 1495 unsigned int len, unsigned int gen) 1496 { 1497 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1498 struct work_request_hdr *to = (struct work_request_hdr *)d; 1499 1500 if (len > WR_LEN) 1501 panic("len too big %d\n", len); 1502 if (len < sizeof(*from)) 1503 panic("len too small %d", len); 1504 1505 memcpy(&to[1], &from[1], len - sizeof(*from)); 1506 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | 1507 V_WR_BCNTLFLT(len & 7)); 1508 wmb(); 1509 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | 1510 V_WR_LEN((len + 7) / 8)); 1511 wr_gen2(d, gen); 1512 1513 /* 1514 * This check is a hack we should really fix the logic so 1515 * that this can't happen 1516 */ 1517 if (m->m_type != MT_DONTFREE) 1518 m_freem(m); 1519 1520 } 1521 1522 /** 1523 * check_desc_avail - check descriptor availability on a send queue 1524 * @adap: the adapter 1525 * @q: the TX queue 1526 * @m: the packet needing the descriptors 1527 * @ndesc: the number of Tx descriptors needed 1528 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1529 * 1530 * Checks if the requested number of Tx descriptors is available on an 1531 * SGE send queue. If the queue is already suspended or not enough 1532 * descriptors are available the packet is queued for later transmission. 1533 * Must be called with the Tx queue locked. 1534 * 1535 * Returns 0 if enough descriptors are available, 1 if there aren't 1536 * enough descriptors and the packet has been queued, and 2 if the caller 1537 * needs to retry because there weren't enough descriptors at the 1538 * beginning of the call but some freed up in the mean time. 1539 */ 1540 static __inline int 1541 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1542 struct mbuf *m, unsigned int ndesc, 1543 unsigned int qid) 1544 { 1545 /* 1546 * XXX We currently only use this for checking the control queue 1547 * the control queue is only used for binding qsets which happens 1548 * at init time so we are guaranteed enough descriptors 1549 */ 1550 if (__predict_false(!mbufq_empty(&q->sendq))) { 1551 addq_exit: mbufq_tail(&q->sendq, m); 1552 return 1; 1553 } 1554 if (__predict_false(q->size - q->in_use < ndesc)) { 1555 1556 struct sge_qset *qs = txq_to_qset(q, qid); 1557 1558 printf("stopping q\n"); 1559 1560 setbit(&qs->txq_stopped, qid); 1561 smp_mb(); 1562 1563 if (should_restart_tx(q) && 1564 test_and_clear_bit(qid, &qs->txq_stopped)) 1565 return 2; 1566 1567 q->stops++; 1568 goto addq_exit; 1569 } 1570 return 0; 1571 } 1572 1573 1574 /** 1575 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1576 * @q: the SGE control Tx queue 1577 * 1578 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1579 * that send only immediate data (presently just the control queues) and 1580 * thus do not have any mbufs 1581 */ 1582 static __inline void 1583 reclaim_completed_tx_imm(struct sge_txq *q) 1584 { 1585 unsigned int reclaim = q->processed - q->cleaned; 1586 1587 mtx_assert(&q->lock, MA_OWNED); 1588 1589 q->in_use -= reclaim; 1590 q->cleaned += reclaim; 1591 } 1592 1593 static __inline int 1594 immediate(const struct mbuf *m) 1595 { 1596 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1597 } 1598 1599 /** 1600 * ctrl_xmit - send a packet through an SGE control Tx queue 1601 * @adap: the adapter 1602 * @q: the control queue 1603 * @m: the packet 1604 * 1605 * Send a packet through an SGE control Tx queue. Packets sent through 1606 * a control queue must fit entirely as immediate data in a single Tx 1607 * descriptor and have no page fragments. 1608 */ 1609 static int 1610 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1611 { 1612 int ret; 1613 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1614 1615 if (__predict_false(!immediate(m))) { 1616 m_freem(m); 1617 return 0; 1618 } 1619 1620 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); 1621 wrp->wr_lo = htonl(V_WR_TID(q->token)); 1622 1623 mtx_lock(&q->lock); 1624 again: reclaim_completed_tx_imm(q); 1625 1626 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1627 if (__predict_false(ret)) { 1628 if (ret == 1) { 1629 mtx_unlock(&q->lock); 1630 log(LOG_ERR, "no desc available\n"); 1631 return (ENOSPC); 1632 } 1633 goto again; 1634 } 1635 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1636 1637 q->in_use++; 1638 if (++q->pidx >= q->size) { 1639 q->pidx = 0; 1640 q->gen ^= 1; 1641 } 1642 mtx_unlock(&q->lock); 1643 wmb(); 1644 t3_write_reg(adap, A_SG_KDOORBELL, 1645 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1646 return (0); 1647 } 1648 1649 1650 /** 1651 * restart_ctrlq - restart a suspended control queue 1652 * @qs: the queue set cotaining the control queue 1653 * 1654 * Resumes transmission on a suspended Tx control queue. 1655 */ 1656 static void 1657 restart_ctrlq(void *data, int npending) 1658 { 1659 struct mbuf *m; 1660 struct sge_qset *qs = (struct sge_qset *)data; 1661 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1662 adapter_t *adap = qs->port->adapter; 1663 1664 log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use); 1665 1666 mtx_lock(&q->lock); 1667 again: reclaim_completed_tx_imm(q); 1668 1669 while (q->in_use < q->size && 1670 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1671 1672 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1673 1674 if (++q->pidx >= q->size) { 1675 q->pidx = 0; 1676 q->gen ^= 1; 1677 } 1678 q->in_use++; 1679 } 1680 if (!mbufq_empty(&q->sendq)) { 1681 setbit(&qs->txq_stopped, TXQ_CTRL); 1682 smp_mb(); 1683 1684 if (should_restart_tx(q) && 1685 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1686 goto again; 1687 q->stops++; 1688 } 1689 mtx_unlock(&q->lock); 1690 wmb(); 1691 t3_write_reg(adap, A_SG_KDOORBELL, 1692 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1693 } 1694 1695 1696 /* 1697 * Send a management message through control queue 0 1698 */ 1699 int 1700 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1701 { 1702 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); 1703 } 1704 1705 1706 /** 1707 * free_qset - free the resources of an SGE queue set 1708 * @sc: the controller owning the queue set 1709 * @q: the queue set 1710 * 1711 * Release the HW and SW resources associated with an SGE queue set, such 1712 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1713 * queue set must be quiesced prior to calling this. 1714 */ 1715 void 1716 t3_free_qset(adapter_t *sc, struct sge_qset *q) 1717 { 1718 int i; 1719 1720 t3_free_tx_desc_all(&q->txq[TXQ_ETH]); 1721 1722 for (i = 0; i < SGE_TXQ_PER_SET; i++) 1723 if (q->txq[i].txq_mr.br_ring != NULL) { 1724 free(q->txq[i].txq_mr.br_ring, M_DEVBUF); 1725 mtx_destroy(&q->txq[i].txq_mr.br_lock); 1726 } 1727 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1728 if (q->fl[i].desc) { 1729 mtx_lock_spin(&sc->sge.reg_lock); 1730 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 1731 mtx_unlock_spin(&sc->sge.reg_lock); 1732 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 1733 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 1734 q->fl[i].desc_map); 1735 bus_dma_tag_destroy(q->fl[i].desc_tag); 1736 bus_dma_tag_destroy(q->fl[i].entry_tag); 1737 } 1738 if (q->fl[i].sdesc) { 1739 free_rx_bufs(sc, &q->fl[i]); 1740 free(q->fl[i].sdesc, M_DEVBUF); 1741 } 1742 } 1743 1744 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 1745 if (q->txq[i].desc) { 1746 mtx_lock_spin(&sc->sge.reg_lock); 1747 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 1748 mtx_unlock_spin(&sc->sge.reg_lock); 1749 bus_dmamap_unload(q->txq[i].desc_tag, 1750 q->txq[i].desc_map); 1751 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 1752 q->txq[i].desc_map); 1753 bus_dma_tag_destroy(q->txq[i].desc_tag); 1754 bus_dma_tag_destroy(q->txq[i].entry_tag); 1755 MTX_DESTROY(&q->txq[i].lock); 1756 } 1757 if (q->txq[i].sdesc) { 1758 free(q->txq[i].sdesc, M_DEVBUF); 1759 } 1760 } 1761 1762 if (q->rspq.desc) { 1763 mtx_lock_spin(&sc->sge.reg_lock); 1764 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 1765 mtx_unlock_spin(&sc->sge.reg_lock); 1766 1767 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 1768 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 1769 q->rspq.desc_map); 1770 bus_dma_tag_destroy(q->rspq.desc_tag); 1771 MTX_DESTROY(&q->rspq.lock); 1772 } 1773 1774 tcp_lro_free(&q->lro.ctrl); 1775 1776 bzero(q, sizeof(*q)); 1777 } 1778 1779 /** 1780 * t3_free_sge_resources - free SGE resources 1781 * @sc: the adapter softc 1782 * 1783 * Frees resources used by the SGE queue sets. 1784 */ 1785 void 1786 t3_free_sge_resources(adapter_t *sc) 1787 { 1788 int i, nqsets; 1789 1790 #ifdef IFNET_MULTIQUEUE 1791 panic("%s should not be called when IFNET_MULTIQUEUE is defined", __FUNCTION__); 1792 #endif 1793 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1794 nqsets += sc->port[i].nqsets; 1795 1796 for (i = 0; i < nqsets; ++i) 1797 t3_free_qset(sc, &sc->sge.qs[i]); 1798 } 1799 1800 /** 1801 * t3_sge_start - enable SGE 1802 * @sc: the controller softc 1803 * 1804 * Enables the SGE for DMAs. This is the last step in starting packet 1805 * transfers. 1806 */ 1807 void 1808 t3_sge_start(adapter_t *sc) 1809 { 1810 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 1811 } 1812 1813 /** 1814 * t3_sge_stop - disable SGE operation 1815 * @sc: the adapter 1816 * 1817 * Disables the DMA engine. This can be called in emeregencies (e.g., 1818 * from error interrupts) or from normal process context. In the latter 1819 * case it also disables any pending queue restart tasklets. Note that 1820 * if it is called in interrupt context it cannot disable the restart 1821 * tasklets as it cannot wait, however the tasklets will have no effect 1822 * since the doorbells are disabled and the driver will call this again 1823 * later from process context, at which time the tasklets will be stopped 1824 * if they are still running. 1825 */ 1826 void 1827 t3_sge_stop(adapter_t *sc) 1828 { 1829 int i, nqsets; 1830 1831 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 1832 1833 if (sc->tq == NULL) 1834 return; 1835 1836 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1837 nqsets += sc->port[i].nqsets; 1838 #ifdef notyet 1839 /* 1840 * 1841 * XXX 1842 */ 1843 for (i = 0; i < nqsets; ++i) { 1844 struct sge_qset *qs = &sc->sge.qs[i]; 1845 1846 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 1847 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 1848 } 1849 #endif 1850 } 1851 1852 /** 1853 * t3_free_tx_desc - reclaims Tx descriptors and their buffers 1854 * @adapter: the adapter 1855 * @q: the Tx queue to reclaim descriptors from 1856 * @reclaimable: the number of descriptors to reclaim 1857 * @m_vec_size: maximum number of buffers to reclaim 1858 * @desc_reclaimed: returns the number of descriptors reclaimed 1859 * 1860 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 1861 * Tx buffers. Called with the Tx queue lock held. 1862 * 1863 * Returns number of buffers of reclaimed 1864 */ 1865 void 1866 t3_free_tx_desc(struct sge_txq *q, int reclaimable) 1867 { 1868 struct tx_sw_desc *txsd; 1869 unsigned int cidx; 1870 1871 #ifdef T3_TRACE 1872 T3_TRACE2(sc->tb[q->cntxt_id & 7], 1873 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); 1874 #endif 1875 cidx = q->cidx; 1876 txsd = &q->sdesc[cidx]; 1877 DPRINTF("reclaiming %d WR\n", reclaimable); 1878 mtx_assert(&q->lock, MA_OWNED); 1879 while (reclaimable--) { 1880 DPRINTF("cidx=%d d=%p\n", cidx, txsd); 1881 if (txsd->mi.mi_base != NULL) { 1882 if (txsd->flags & TX_SW_DESC_MAPPED) { 1883 bus_dmamap_unload(q->entry_tag, txsd->map); 1884 txsd->flags &= ~TX_SW_DESC_MAPPED; 1885 } 1886 m_freem_iovec(&txsd->mi); 1887 buf_ring_scan(&q->txq_mr, txsd->mi.mi_base, __FILE__, __LINE__); 1888 txsd->mi.mi_base = NULL; 1889 1890 #if defined(DIAGNOSTIC) && 0 1891 if (m_get_priority(txsd->m[0]) != cidx) 1892 printf("pri=%d cidx=%d\n", 1893 (int)m_get_priority(txsd->m[0]), cidx); 1894 #endif 1895 1896 } else 1897 q->txq_skipped++; 1898 1899 ++txsd; 1900 if (++cidx == q->size) { 1901 cidx = 0; 1902 txsd = q->sdesc; 1903 } 1904 } 1905 q->cidx = cidx; 1906 1907 } 1908 1909 void 1910 t3_free_tx_desc_all(struct sge_txq *q) 1911 { 1912 int i; 1913 struct tx_sw_desc *txsd; 1914 1915 for (i = 0; i < q->size; i++) { 1916 txsd = &q->sdesc[i]; 1917 if (txsd->mi.mi_base != NULL) { 1918 if (txsd->flags & TX_SW_DESC_MAPPED) { 1919 bus_dmamap_unload(q->entry_tag, txsd->map); 1920 txsd->flags &= ~TX_SW_DESC_MAPPED; 1921 } 1922 m_freem_iovec(&txsd->mi); 1923 bzero(&txsd->mi, sizeof(txsd->mi)); 1924 } 1925 } 1926 } 1927 1928 /** 1929 * is_new_response - check if a response is newly written 1930 * @r: the response descriptor 1931 * @q: the response queue 1932 * 1933 * Returns true if a response descriptor contains a yet unprocessed 1934 * response. 1935 */ 1936 static __inline int 1937 is_new_response(const struct rsp_desc *r, 1938 const struct sge_rspq *q) 1939 { 1940 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 1941 } 1942 1943 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 1944 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 1945 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 1946 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 1947 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 1948 1949 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 1950 #define NOMEM_INTR_DELAY 2500 1951 1952 /** 1953 * write_ofld_wr - write an offload work request 1954 * @adap: the adapter 1955 * @m: the packet to send 1956 * @q: the Tx queue 1957 * @pidx: index of the first Tx descriptor to write 1958 * @gen: the generation value to use 1959 * @ndesc: number of descriptors the packet will occupy 1960 * 1961 * Write an offload work request to send the supplied packet. The packet 1962 * data already carry the work request with most fields populated. 1963 */ 1964 static void 1965 write_ofld_wr(adapter_t *adap, struct mbuf *m, 1966 struct sge_txq *q, unsigned int pidx, 1967 unsigned int gen, unsigned int ndesc, 1968 bus_dma_segment_t *segs, unsigned int nsegs) 1969 { 1970 unsigned int sgl_flits, flits; 1971 struct work_request_hdr *from; 1972 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1973 struct tx_desc *d = &q->desc[pidx]; 1974 struct txq_state txqs; 1975 1976 if (immediate(m) && nsegs == 0) { 1977 write_imm(d, m, m->m_len, gen); 1978 return; 1979 } 1980 1981 /* Only TX_DATA builds SGLs */ 1982 from = mtod(m, struct work_request_hdr *); 1983 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from)); 1984 1985 flits = m->m_len / 8; 1986 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 1987 1988 make_sgl(sgp, segs, nsegs); 1989 sgl_flits = sgl_len(nsegs); 1990 1991 txqs.gen = gen; 1992 txqs.pidx = pidx; 1993 txqs.compl = 0; 1994 1995 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 1996 from->wr_hi, from->wr_lo); 1997 } 1998 1999 /** 2000 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 2001 * @m: the packet 2002 * 2003 * Returns the number of Tx descriptors needed for the given offload 2004 * packet. These packets are already fully constructed. 2005 */ 2006 static __inline unsigned int 2007 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 2008 { 2009 unsigned int flits, cnt = 0; 2010 int ndescs; 2011 2012 if (m->m_len <= WR_LEN && nsegs == 0) 2013 return (1); /* packet fits as immediate data */ 2014 2015 if (m->m_flags & M_IOVEC) 2016 cnt = mtomv(m)->mv_count; 2017 else 2018 cnt = nsegs; 2019 2020 /* headers */ 2021 flits = m->m_len / 8; 2022 2023 ndescs = flits_to_desc(flits + sgl_len(cnt)); 2024 2025 CTR4(KTR_CXGB, "flits=%d sgl_len=%d nsegs=%d ndescs=%d", 2026 flits, sgl_len(cnt), nsegs, ndescs); 2027 2028 return (ndescs); 2029 } 2030 2031 /** 2032 * ofld_xmit - send a packet through an offload queue 2033 * @adap: the adapter 2034 * @q: the Tx offload queue 2035 * @m: the packet 2036 * 2037 * Send an offload packet through an SGE offload queue. 2038 */ 2039 static int 2040 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 2041 { 2042 int ret, nsegs; 2043 unsigned int ndesc; 2044 unsigned int pidx, gen; 2045 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs; 2046 struct tx_sw_desc *stx; 2047 2048 nsegs = m_get_sgllen(m); 2049 vsegs = m_get_sgl(m); 2050 ndesc = calc_tx_descs_ofld(m, nsegs); 2051 busdma_map_sgl(vsegs, segs, nsegs); 2052 2053 stx = &q->sdesc[q->pidx]; 2054 KASSERT(stx->mi.mi_base == NULL, ("mi_base set")); 2055 2056 mtx_lock(&q->lock); 2057 again: reclaim_completed_tx_(q, 16); 2058 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 2059 if (__predict_false(ret)) { 2060 if (ret == 1) { 2061 printf("no ofld desc avail\n"); 2062 2063 m_set_priority(m, ndesc); /* save for restart */ 2064 mtx_unlock(&q->lock); 2065 return (EINTR); 2066 } 2067 goto again; 2068 } 2069 2070 gen = q->gen; 2071 q->in_use += ndesc; 2072 pidx = q->pidx; 2073 q->pidx += ndesc; 2074 if (q->pidx >= q->size) { 2075 q->pidx -= q->size; 2076 q->gen ^= 1; 2077 } 2078 #ifdef T3_TRACE 2079 T3_TRACE5(adap->tb[q->cntxt_id & 7], 2080 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 2081 ndesc, pidx, skb->len, skb->len - skb->data_len, 2082 skb_shinfo(skb)->nr_frags); 2083 #endif 2084 mtx_unlock(&q->lock); 2085 2086 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2087 check_ring_tx_db(adap, q); 2088 return (0); 2089 } 2090 2091 /** 2092 * restart_offloadq - restart a suspended offload queue 2093 * @qs: the queue set cotaining the offload queue 2094 * 2095 * Resumes transmission on a suspended Tx offload queue. 2096 */ 2097 static void 2098 restart_offloadq(void *data, int npending) 2099 { 2100 struct mbuf *m; 2101 struct sge_qset *qs = data; 2102 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2103 adapter_t *adap = qs->port->adapter; 2104 bus_dma_segment_t segs[TX_MAX_SEGS]; 2105 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 2106 int nsegs, cleaned; 2107 2108 mtx_lock(&q->lock); 2109 again: cleaned = reclaim_completed_tx_(q, 16); 2110 2111 while ((m = mbufq_peek(&q->sendq)) != NULL) { 2112 unsigned int gen, pidx; 2113 unsigned int ndesc = m_get_priority(m); 2114 2115 if (__predict_false(q->size - q->in_use < ndesc)) { 2116 setbit(&qs->txq_stopped, TXQ_OFLD); 2117 smp_mb(); 2118 2119 if (should_restart_tx(q) && 2120 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 2121 goto again; 2122 q->stops++; 2123 break; 2124 } 2125 2126 gen = q->gen; 2127 q->in_use += ndesc; 2128 pidx = q->pidx; 2129 q->pidx += ndesc; 2130 if (q->pidx >= q->size) { 2131 q->pidx -= q->size; 2132 q->gen ^= 1; 2133 } 2134 2135 (void)mbufq_dequeue(&q->sendq); 2136 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 2137 mtx_unlock(&q->lock); 2138 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2139 mtx_lock(&q->lock); 2140 } 2141 mtx_unlock(&q->lock); 2142 2143 #if USE_GTS 2144 set_bit(TXQ_RUNNING, &q->flags); 2145 set_bit(TXQ_LAST_PKT_DB, &q->flags); 2146 #endif 2147 wmb(); 2148 t3_write_reg(adap, A_SG_KDOORBELL, 2149 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2150 } 2151 2152 /** 2153 * queue_set - return the queue set a packet should use 2154 * @m: the packet 2155 * 2156 * Maps a packet to the SGE queue set it should use. The desired queue 2157 * set is carried in bits 1-3 in the packet's priority. 2158 */ 2159 static __inline int 2160 queue_set(const struct mbuf *m) 2161 { 2162 return m_get_priority(m) >> 1; 2163 } 2164 2165 /** 2166 * is_ctrl_pkt - return whether an offload packet is a control packet 2167 * @m: the packet 2168 * 2169 * Determines whether an offload packet should use an OFLD or a CTRL 2170 * Tx queue. This is indicated by bit 0 in the packet's priority. 2171 */ 2172 static __inline int 2173 is_ctrl_pkt(const struct mbuf *m) 2174 { 2175 return m_get_priority(m) & 1; 2176 } 2177 2178 /** 2179 * t3_offload_tx - send an offload packet 2180 * @tdev: the offload device to send to 2181 * @m: the packet 2182 * 2183 * Sends an offload packet. We use the packet priority to select the 2184 * appropriate Tx queue as follows: bit 0 indicates whether the packet 2185 * should be sent as regular or control, bits 1-3 select the queue set. 2186 */ 2187 int 2188 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m) 2189 { 2190 adapter_t *adap = tdev2adap(tdev); 2191 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 2192 2193 if (__predict_false(is_ctrl_pkt(m))) 2194 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m); 2195 2196 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m); 2197 } 2198 2199 /** 2200 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 2201 * @tdev: the offload device that will be receiving the packets 2202 * @q: the SGE response queue that assembled the bundle 2203 * @m: the partial bundle 2204 * @n: the number of packets in the bundle 2205 * 2206 * Delivers a (partial) bundle of Rx offload packets to an offload device. 2207 */ 2208 static __inline void 2209 deliver_partial_bundle(struct t3cdev *tdev, 2210 struct sge_rspq *q, 2211 struct mbuf *mbufs[], int n) 2212 { 2213 if (n) { 2214 q->offload_bundles++; 2215 cxgb_ofld_recv(tdev, mbufs, n); 2216 } 2217 } 2218 2219 static __inline int 2220 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 2221 struct mbuf *m, struct mbuf *rx_gather[], 2222 unsigned int gather_idx) 2223 { 2224 2225 rq->offload_pkts++; 2226 m->m_pkthdr.header = mtod(m, void *); 2227 rx_gather[gather_idx++] = m; 2228 if (gather_idx == RX_BUNDLE_SIZE) { 2229 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 2230 gather_idx = 0; 2231 rq->offload_bundles++; 2232 } 2233 return (gather_idx); 2234 } 2235 2236 static void 2237 restart_tx(struct sge_qset *qs) 2238 { 2239 struct adapter *sc = qs->port->adapter; 2240 2241 2242 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2243 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2244 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2245 qs->txq[TXQ_OFLD].restarts++; 2246 DPRINTF("restarting TXQ_OFLD\n"); 2247 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2248 } 2249 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n", 2250 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]), 2251 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned, 2252 qs->txq[TXQ_CTRL].in_use); 2253 2254 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2255 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2256 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2257 qs->txq[TXQ_CTRL].restarts++; 2258 DPRINTF("restarting TXQ_CTRL\n"); 2259 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2260 } 2261 } 2262 2263 /** 2264 * t3_sge_alloc_qset - initialize an SGE queue set 2265 * @sc: the controller softc 2266 * @id: the queue set id 2267 * @nports: how many Ethernet ports will be using this queue set 2268 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2269 * @p: configuration parameters for this queue set 2270 * @ntxq: number of Tx queues for the queue set 2271 * @pi: port info for queue set 2272 * 2273 * Allocate resources and initialize an SGE queue set. A queue set 2274 * comprises a response queue, two Rx free-buffer queues, and up to 3 2275 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2276 * queue, offload queue, and control queue. 2277 */ 2278 int 2279 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2280 const struct qset_params *p, int ntxq, struct port_info *pi) 2281 { 2282 struct sge_qset *q = &sc->sge.qs[id]; 2283 int i, header_size, ret = 0; 2284 2285 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2286 if ((q->txq[i].txq_mr.br_ring = malloc(cxgb_txq_buf_ring_size*sizeof(struct mbuf *), 2287 M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) { 2288 device_printf(sc->dev, "failed to allocate mbuf ring\n"); 2289 goto err; 2290 } 2291 q->txq[i].txq_mr.br_prod = q->txq[i].txq_mr.br_cons = 0; 2292 q->txq[i].txq_mr.br_size = cxgb_txq_buf_ring_size; 2293 mtx_init(&q->txq[i].txq_mr.br_lock, "txq mbuf ring", NULL, MTX_DEF); 2294 } 2295 2296 init_qset_cntxt(q, id); 2297 q->idx = id; 2298 2299 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2300 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2301 &q->fl[0].desc, &q->fl[0].sdesc, 2302 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2303 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2304 printf("error %d from alloc ring fl0\n", ret); 2305 goto err; 2306 } 2307 2308 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2309 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2310 &q->fl[1].desc, &q->fl[1].sdesc, 2311 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2312 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2313 printf("error %d from alloc ring fl1\n", ret); 2314 goto err; 2315 } 2316 2317 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2318 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2319 &q->rspq.desc_tag, &q->rspq.desc_map, 2320 NULL, NULL)) != 0) { 2321 printf("error %d from alloc ring rspq\n", ret); 2322 goto err; 2323 } 2324 2325 for (i = 0; i < ntxq; ++i) { 2326 /* 2327 * The control queue always uses immediate data so does not 2328 * need to keep track of any mbufs. 2329 * XXX Placeholder for future TOE support. 2330 */ 2331 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2332 2333 if ((ret = alloc_ring(sc, p->txq_size[i], 2334 sizeof(struct tx_desc), sz, 2335 &q->txq[i].phys_addr, &q->txq[i].desc, 2336 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2337 &q->txq[i].desc_map, 2338 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2339 printf("error %d from alloc ring tx %i\n", ret, i); 2340 goto err; 2341 } 2342 mbufq_init(&q->txq[i].sendq); 2343 q->txq[i].gen = 1; 2344 q->txq[i].size = p->txq_size[i]; 2345 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d", 2346 device_get_unit(sc->dev), irq_vec_idx, i); 2347 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF); 2348 } 2349 2350 q->txq[TXQ_ETH].port = pi; 2351 2352 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2353 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2354 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]); 2355 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]); 2356 2357 q->fl[0].gen = q->fl[1].gen = 1; 2358 q->fl[0].size = p->fl_size; 2359 q->fl[1].size = p->jumbo_size; 2360 2361 q->rspq.gen = 1; 2362 q->rspq.cidx = 0; 2363 q->rspq.size = p->rspq_size; 2364 2365 2366 header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); 2367 q->txq[TXQ_ETH].stop_thres = nports * 2368 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2369 2370 q->fl[0].buf_size = (MCLBYTES - header_size); 2371 q->fl[0].zone = zone_clust; 2372 q->fl[0].type = EXT_CLUSTER; 2373 #if __FreeBSD_version > 800000 2374 if (cxgb_use_16k_clusters) { 2375 q->fl[1].buf_size = MJUM16BYTES - header_size; 2376 q->fl[1].zone = zone_jumbo16; 2377 q->fl[1].type = EXT_JUMBO16; 2378 } else { 2379 q->fl[1].buf_size = MJUM9BYTES - header_size; 2380 q->fl[1].zone = zone_jumbo9; 2381 q->fl[1].type = EXT_JUMBO9; 2382 } 2383 #else 2384 q->fl[1].buf_size = MJUMPAGESIZE - header_size; 2385 q->fl[1].zone = zone_jumbop; 2386 q->fl[1].type = EXT_JUMBOP; 2387 #endif 2388 2389 /* 2390 * We allocate and setup the lro_ctrl structure irrespective of whether 2391 * lro is available and/or enabled. 2392 */ 2393 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); 2394 ret = tcp_lro_init(&q->lro.ctrl); 2395 if (ret) { 2396 printf("error %d from tcp_lro_init\n", ret); 2397 goto err; 2398 } 2399 q->lro.ctrl.ifp = pi->ifp; 2400 2401 mtx_lock_spin(&sc->sge.reg_lock); 2402 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2403 q->rspq.phys_addr, q->rspq.size, 2404 q->fl[0].buf_size, 1, 0); 2405 if (ret) { 2406 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2407 goto err_unlock; 2408 } 2409 2410 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2411 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2412 q->fl[i].phys_addr, q->fl[i].size, 2413 q->fl[i].buf_size, p->cong_thres, 1, 2414 0); 2415 if (ret) { 2416 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2417 goto err_unlock; 2418 } 2419 } 2420 2421 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2422 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2423 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2424 1, 0); 2425 if (ret) { 2426 printf("error %d from t3_sge_init_ecntxt\n", ret); 2427 goto err_unlock; 2428 } 2429 2430 if (ntxq > 1) { 2431 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2432 USE_GTS, SGE_CNTXT_OFLD, id, 2433 q->txq[TXQ_OFLD].phys_addr, 2434 q->txq[TXQ_OFLD].size, 0, 1, 0); 2435 if (ret) { 2436 printf("error %d from t3_sge_init_ecntxt\n", ret); 2437 goto err_unlock; 2438 } 2439 } 2440 2441 if (ntxq > 2) { 2442 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2443 SGE_CNTXT_CTRL, id, 2444 q->txq[TXQ_CTRL].phys_addr, 2445 q->txq[TXQ_CTRL].size, 2446 q->txq[TXQ_CTRL].token, 1, 0); 2447 if (ret) { 2448 printf("error %d from t3_sge_init_ecntxt\n", ret); 2449 goto err_unlock; 2450 } 2451 } 2452 2453 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2454 device_get_unit(sc->dev), irq_vec_idx); 2455 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2456 2457 mtx_unlock_spin(&sc->sge.reg_lock); 2458 t3_update_qset_coalesce(q, p); 2459 q->port = pi; 2460 2461 refill_fl(sc, &q->fl[0], q->fl[0].size); 2462 refill_fl(sc, &q->fl[1], q->fl[1].size); 2463 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2464 2465 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2466 V_NEWTIMER(q->rspq.holdoff_tmr)); 2467 2468 return (0); 2469 2470 err_unlock: 2471 mtx_unlock_spin(&sc->sge.reg_lock); 2472 err: 2473 t3_free_qset(sc, q); 2474 2475 return (ret); 2476 } 2477 2478 /* 2479 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with 2480 * ethernet data. Hardware assistance with various checksums and any vlan tag 2481 * will also be taken into account here. 2482 */ 2483 void 2484 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2485 { 2486 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2487 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2488 struct ifnet *ifp = pi->ifp; 2489 2490 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2491 2492 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2493 cpl->csum_valid && cpl->csum == 0xffff) { 2494 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2495 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2496 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2497 m->m_pkthdr.csum_data = 0xffff; 2498 } 2499 /* 2500 * XXX need to add VLAN support for 6.x 2501 */ 2502 #ifdef VLAN_SUPPORTED 2503 if (__predict_false(cpl->vlan_valid)) { 2504 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2505 m->m_flags |= M_VLANTAG; 2506 } 2507 #endif 2508 2509 m->m_pkthdr.rcvif = ifp; 2510 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2511 #ifndef DISABLE_MBUF_IOVEC 2512 m_explode(m); 2513 #endif 2514 /* 2515 * adjust after conversion to mbuf chain 2516 */ 2517 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); 2518 m->m_len -= (sizeof(*cpl) + ethpad); 2519 m->m_data += (sizeof(*cpl) + ethpad); 2520 } 2521 2522 static void 2523 ext_free_handler(void *arg1, void * arg2) 2524 { 2525 uintptr_t type = (uintptr_t)arg2; 2526 uma_zone_t zone; 2527 struct mbuf *m; 2528 2529 m = arg1; 2530 zone = m_getzonefromtype(type); 2531 m->m_ext.ext_type = (int)type; 2532 cxgb_ext_freed++; 2533 cxgb_cache_put(zone, m); 2534 } 2535 2536 static void 2537 init_cluster_mbuf(caddr_t cl, int flags, int type, uma_zone_t zone) 2538 { 2539 struct mbuf *m; 2540 int header_size; 2541 2542 header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + 2543 sizeof(struct m_ext_) + sizeof(uint32_t); 2544 2545 bzero(cl, header_size); 2546 m = (struct mbuf *)cl; 2547 2548 cxgb_ext_inited++; 2549 SLIST_INIT(&m->m_pkthdr.tags); 2550 m->m_type = MT_DATA; 2551 m->m_flags = flags | M_NOFREE | M_EXT; 2552 m->m_data = cl + header_size; 2553 m->m_ext.ext_buf = cl; 2554 m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t)); 2555 m->m_ext.ext_size = m_getsizefromtype(type); 2556 m->m_ext.ext_free = ext_free_handler; 2557 m->m_ext.ext_arg1 = cl; 2558 m->m_ext.ext_arg2 = (void *)(uintptr_t)type; 2559 m->m_ext.ext_type = EXT_EXTREF; 2560 *(m->m_ext.ref_cnt) = 1; 2561 DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt); 2562 } 2563 2564 2565 /** 2566 * get_packet - return the next ingress packet buffer from a free list 2567 * @adap: the adapter that received the packet 2568 * @drop_thres: # of remaining buffers before we start dropping packets 2569 * @qs: the qset that the SGE free list holding the packet belongs to 2570 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2571 * @r: response descriptor 2572 * 2573 * Get the next packet from a free list and complete setup of the 2574 * sk_buff. If the packet is small we make a copy and recycle the 2575 * original buffer, otherwise we use the original buffer itself. If a 2576 * positive drop threshold is supplied packets are dropped and their 2577 * buffers recycled if (a) the number of remaining buffers is under the 2578 * threshold and the packet is too big to copy, or (b) the packet should 2579 * be copied but there is no memory for the copy. 2580 */ 2581 #ifdef DISABLE_MBUF_IOVEC 2582 2583 static int 2584 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2585 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 2586 { 2587 2588 unsigned int len_cq = ntohl(r->len_cq); 2589 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2590 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2591 uint32_t len = G_RSPD_LEN(len_cq); 2592 uint32_t flags = ntohl(r->flags); 2593 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2594 caddr_t cl; 2595 struct mbuf *m, *m0; 2596 int ret = 0; 2597 2598 prefetch(sd->rxsd_cl); 2599 2600 fl->credits--; 2601 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2602 2603 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { 2604 if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2605 goto skip_recycle; 2606 cl = mtod(m0, void *); 2607 memcpy(cl, sd->data, len); 2608 recycle_rx_buf(adap, fl, fl->cidx); 2609 m = m0; 2610 m0->m_len = len; 2611 } else { 2612 skip_recycle: 2613 2614 bus_dmamap_unload(fl->entry_tag, sd->map); 2615 cl = sd->rxsd_cl; 2616 m = m0 = (struct mbuf *)cl; 2617 2618 if ((sopeop == RSPQ_SOP_EOP) || 2619 (sopeop == RSPQ_SOP)) 2620 flags = M_PKTHDR; 2621 init_cluster_mbuf(cl, flags, fl->type, fl->zone); 2622 m0->m_len = len; 2623 } 2624 switch(sopeop) { 2625 case RSPQ_SOP_EOP: 2626 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2627 mh->mh_head = mh->mh_tail = m; 2628 m->m_pkthdr.len = len; 2629 ret = 1; 2630 break; 2631 case RSPQ_NSOP_NEOP: 2632 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2633 if (mh->mh_tail == NULL) { 2634 log(LOG_ERR, "discarding intermediate descriptor entry\n"); 2635 m_freem(m); 2636 break; 2637 } 2638 mh->mh_tail->m_next = m; 2639 mh->mh_tail = m; 2640 mh->mh_head->m_pkthdr.len += len; 2641 ret = 0; 2642 break; 2643 case RSPQ_SOP: 2644 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2645 m->m_pkthdr.len = len; 2646 mh->mh_head = mh->mh_tail = m; 2647 ret = 0; 2648 break; 2649 case RSPQ_EOP: 2650 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2651 mh->mh_head->m_pkthdr.len += len; 2652 mh->mh_tail->m_next = m; 2653 mh->mh_tail = m; 2654 ret = 1; 2655 break; 2656 } 2657 if (++fl->cidx == fl->size) 2658 fl->cidx = 0; 2659 2660 return (ret); 2661 } 2662 2663 #else 2664 2665 static int 2666 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2667 struct mbuf **m, struct rsp_desc *r) 2668 { 2669 2670 unsigned int len_cq = ntohl(r->len_cq); 2671 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2672 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2673 uint32_t len = G_RSPD_LEN(len_cq); 2674 uint32_t flags = ntohl(r->flags); 2675 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2676 void *cl; 2677 int ret = 0; 2678 struct mbuf *m0; 2679 #if 0 2680 if ((sd + 1 )->rxsd_cl) 2681 prefetch((sd + 1)->rxsd_cl); 2682 if ((sd + 2)->rxsd_cl) 2683 prefetch((sd + 2)->rxsd_cl); 2684 #endif 2685 DPRINTF("rx cpu=%d\n", curcpu); 2686 fl->credits--; 2687 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2688 2689 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { 2690 if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2691 goto skip_recycle; 2692 cl = mtod(m0, void *); 2693 memcpy(cl, sd->data, len); 2694 recycle_rx_buf(adap, fl, fl->cidx); 2695 *m = m0; 2696 } else { 2697 skip_recycle: 2698 bus_dmamap_unload(fl->entry_tag, sd->map); 2699 cl = sd->rxsd_cl; 2700 *m = m0 = (struct mbuf *)cl; 2701 } 2702 2703 switch(sopeop) { 2704 case RSPQ_SOP_EOP: 2705 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2706 if (cl == sd->rxsd_cl) 2707 init_cluster_mbuf(cl, M_PKTHDR, fl->type, fl->zone); 2708 m0->m_len = m0->m_pkthdr.len = len; 2709 ret = 1; 2710 goto done; 2711 break; 2712 case RSPQ_NSOP_NEOP: 2713 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2714 panic("chaining unsupported"); 2715 ret = 0; 2716 break; 2717 case RSPQ_SOP: 2718 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2719 panic("chaining unsupported"); 2720 m_iovinit(m0); 2721 ret = 0; 2722 break; 2723 case RSPQ_EOP: 2724 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2725 panic("chaining unsupported"); 2726 ret = 1; 2727 break; 2728 } 2729 panic("append not supported"); 2730 #if 0 2731 m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref); 2732 #endif 2733 done: 2734 if (++fl->cidx == fl->size) 2735 fl->cidx = 0; 2736 2737 return (ret); 2738 } 2739 #endif 2740 /** 2741 * handle_rsp_cntrl_info - handles control information in a response 2742 * @qs: the queue set corresponding to the response 2743 * @flags: the response control flags 2744 * 2745 * Handles the control information of an SGE response, such as GTS 2746 * indications and completion credits for the queue set's Tx queues. 2747 * HW coalesces credits, we don't do any extra SW coalescing. 2748 */ 2749 static __inline void 2750 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2751 { 2752 unsigned int credits; 2753 2754 #if USE_GTS 2755 if (flags & F_RSPD_TXQ0_GTS) 2756 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2757 #endif 2758 credits = G_RSPD_TXQ0_CR(flags); 2759 if (credits) 2760 qs->txq[TXQ_ETH].processed += credits; 2761 2762 credits = G_RSPD_TXQ2_CR(flags); 2763 if (credits) 2764 qs->txq[TXQ_CTRL].processed += credits; 2765 2766 # if USE_GTS 2767 if (flags & F_RSPD_TXQ1_GTS) 2768 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2769 # endif 2770 credits = G_RSPD_TXQ1_CR(flags); 2771 if (credits) 2772 qs->txq[TXQ_OFLD].processed += credits; 2773 2774 } 2775 2776 static void 2777 check_ring_db(adapter_t *adap, struct sge_qset *qs, 2778 unsigned int sleeping) 2779 { 2780 ; 2781 } 2782 2783 /** 2784 * process_responses - process responses from an SGE response queue 2785 * @adap: the adapter 2786 * @qs: the queue set to which the response queue belongs 2787 * @budget: how many responses can be processed in this round 2788 * 2789 * Process responses from an SGE response queue up to the supplied budget. 2790 * Responses include received packets as well as credits and other events 2791 * for the queues that belong to the response queue's queue set. 2792 * A negative budget is effectively unlimited. 2793 * 2794 * Additionally choose the interrupt holdoff time for the next interrupt 2795 * on this queue. If the system is under memory shortage use a fairly 2796 * long delay to help recovery. 2797 */ 2798 int 2799 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2800 { 2801 struct sge_rspq *rspq = &qs->rspq; 2802 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2803 int budget_left = budget; 2804 unsigned int sleeping = 0; 2805 int lro_enabled = qs->lro.enabled; 2806 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; 2807 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2808 int ngathered = 0; 2809 #ifdef DEBUG 2810 static int last_holdoff = 0; 2811 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2812 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2813 last_holdoff = rspq->holdoff_tmr; 2814 } 2815 #endif 2816 rspq->next_holdoff = rspq->holdoff_tmr; 2817 2818 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2819 int eth, eop = 0, ethpad = 0; 2820 uint32_t flags = ntohl(r->flags); 2821 uint32_t rss_csum = *(const uint32_t *)r; 2822 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); 2823 2824 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2825 2826 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2827 struct mbuf *m; 2828 2829 if (cxgb_debug) 2830 printf("async notification\n"); 2831 2832 if (rspq->rspq_mh.mh_head == NULL) { 2833 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 2834 m = rspq->rspq_mh.mh_head; 2835 } else { 2836 m = m_gethdr(M_DONTWAIT, MT_DATA); 2837 } 2838 2839 /* XXX m is lost here if rspq->rspq_mbuf is not NULL */ 2840 2841 if (m == NULL) 2842 goto no_mem; 2843 2844 memcpy(mtod(m, char *), r, AN_PKT_SIZE); 2845 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; 2846 *mtod(m, char *) = CPL_ASYNC_NOTIF; 2847 rss_csum = htonl(CPL_ASYNC_NOTIF << 24); 2848 eop = 1; 2849 rspq->async_notif++; 2850 goto skip; 2851 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2852 struct mbuf *m = NULL; 2853 2854 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", 2855 r->rss_hdr.opcode, rspq->cidx); 2856 if (rspq->rspq_mh.mh_head == NULL) 2857 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 2858 else 2859 m = m_gethdr(M_DONTWAIT, MT_DATA); 2860 2861 if (rspq->rspq_mh.mh_head == NULL && m == NULL) { 2862 no_mem: 2863 rspq->next_holdoff = NOMEM_INTR_DELAY; 2864 budget_left--; 2865 break; 2866 } 2867 get_imm_packet(adap, r, rspq->rspq_mh.mh_head); 2868 eop = 1; 2869 rspq->imm_data++; 2870 } else if (r->len_cq) { 2871 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 2872 2873 #ifdef DISABLE_MBUF_IOVEC 2874 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r); 2875 #else 2876 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r); 2877 #endif 2878 #ifdef IFNET_MULTIQUEUE 2879 rspq->rspq_mh.mh_head->m_pkthdr.rss_hash = rss_hash; 2880 #endif 2881 ethpad = 2; 2882 } else { 2883 DPRINTF("pure response\n"); 2884 rspq->pure_rsps++; 2885 } 2886 skip: 2887 if (flags & RSPD_CTRL_MASK) { 2888 sleeping |= flags & RSPD_GTS_MASK; 2889 handle_rsp_cntrl_info(qs, flags); 2890 } 2891 2892 r++; 2893 if (__predict_false(++rspq->cidx == rspq->size)) { 2894 rspq->cidx = 0; 2895 rspq->gen ^= 1; 2896 r = rspq->desc; 2897 } 2898 prefetch(r); 2899 if (++rspq->credits >= (rspq->size / 4)) { 2900 refill_rspq(adap, rspq, rspq->credits); 2901 rspq->credits = 0; 2902 } 2903 DPRINTF("eth=%d eop=%d flags=0x%x\n", eth, eop, flags); 2904 2905 if (!eth && eop) { 2906 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum; 2907 /* 2908 * XXX size mismatch 2909 */ 2910 m_set_priority(rspq->rspq_mh.mh_head, rss_hash); 2911 2912 2913 ngathered = rx_offload(&adap->tdev, rspq, 2914 rspq->rspq_mh.mh_head, offload_mbufs, ngathered); 2915 rspq->rspq_mh.mh_head = NULL; 2916 DPRINTF("received offload packet\n"); 2917 2918 } else if (eth && eop) { 2919 struct mbuf *m = rspq->rspq_mh.mh_head; 2920 prefetch(mtod(m, uint8_t *)); 2921 prefetch(mtod(m, uint8_t *) + L1_CACHE_BYTES); 2922 2923 t3_rx_eth(adap, rspq, m, ethpad); 2924 if (lro_enabled && lro_ctrl->lro_cnt && 2925 (tcp_lro_rx(lro_ctrl, m, 0) == 0)) { 2926 /* successfully queue'd for LRO */ 2927 } else { 2928 /* 2929 * LRO not enabled, packet unsuitable for LRO, 2930 * or unable to queue. Pass it up right now in 2931 * either case. 2932 */ 2933 struct ifnet *ifp = m->m_pkthdr.rcvif; 2934 (*ifp->if_input)(ifp, m); 2935 } 2936 DPRINTF("received tunnel packet\n"); 2937 rspq->rspq_mh.mh_head = NULL; 2938 2939 } 2940 __refill_fl_lt(adap, &qs->fl[0], 32); 2941 __refill_fl_lt(adap, &qs->fl[1], 32); 2942 --budget_left; 2943 } 2944 2945 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 2946 2947 /* Flush LRO */ 2948 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) { 2949 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active); 2950 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next); 2951 tcp_lro_flush(lro_ctrl, queued); 2952 } 2953 2954 if (sleeping) 2955 check_ring_db(adap, qs, sleeping); 2956 2957 smp_mb(); /* commit Tx queue processed updates */ 2958 if (__predict_false(qs->txq_stopped > 1)) { 2959 printf("restarting tx on %p\n", qs); 2960 2961 restart_tx(qs); 2962 } 2963 2964 __refill_fl_lt(adap, &qs->fl[0], 512); 2965 __refill_fl_lt(adap, &qs->fl[1], 512); 2966 budget -= budget_left; 2967 return (budget); 2968 } 2969 2970 /* 2971 * A helper function that processes responses and issues GTS. 2972 */ 2973 static __inline int 2974 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 2975 { 2976 int work; 2977 static int last_holdoff = 0; 2978 2979 work = process_responses(adap, rspq_to_qset(rq), -1); 2980 2981 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 2982 printf("next_holdoff=%d\n", rq->next_holdoff); 2983 last_holdoff = rq->next_holdoff; 2984 } 2985 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 2986 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 2987 2988 return (work); 2989 } 2990 2991 2992 /* 2993 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 2994 * Handles data events from SGE response queues as well as error and other 2995 * async events as they all use the same interrupt pin. We use one SGE 2996 * response queue per port in this mode and protect all response queues with 2997 * queue 0's lock. 2998 */ 2999 void 3000 t3b_intr(void *data) 3001 { 3002 uint32_t i, map; 3003 adapter_t *adap = data; 3004 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3005 3006 t3_write_reg(adap, A_PL_CLI, 0); 3007 map = t3_read_reg(adap, A_SG_DATA_INTR); 3008 3009 if (!map) 3010 return; 3011 3012 if (__predict_false(map & F_ERRINTR)) 3013 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3014 3015 mtx_lock(&q0->lock); 3016 for_each_port(adap, i) 3017 if (map & (1 << i)) 3018 process_responses_gts(adap, &adap->sge.qs[i].rspq); 3019 mtx_unlock(&q0->lock); 3020 } 3021 3022 /* 3023 * The MSI interrupt handler. This needs to handle data events from SGE 3024 * response queues as well as error and other async events as they all use 3025 * the same MSI vector. We use one SGE response queue per port in this mode 3026 * and protect all response queues with queue 0's lock. 3027 */ 3028 void 3029 t3_intr_msi(void *data) 3030 { 3031 adapter_t *adap = data; 3032 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3033 int i, new_packets = 0; 3034 3035 mtx_lock(&q0->lock); 3036 3037 for_each_port(adap, i) 3038 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 3039 new_packets = 1; 3040 mtx_unlock(&q0->lock); 3041 if (new_packets == 0) 3042 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3043 } 3044 3045 void 3046 t3_intr_msix(void *data) 3047 { 3048 struct sge_qset *qs = data; 3049 adapter_t *adap = qs->port->adapter; 3050 struct sge_rspq *rspq = &qs->rspq; 3051 #ifndef IFNET_MULTIQUEUE 3052 mtx_lock(&rspq->lock); 3053 #else 3054 if (mtx_trylock(&rspq->lock)) 3055 #endif 3056 { 3057 3058 if (process_responses_gts(adap, rspq) == 0) 3059 rspq->unhandled_irqs++; 3060 mtx_unlock(&rspq->lock); 3061 } 3062 } 3063 3064 #define QDUMP_SBUF_SIZE 32 * 400 3065 static int 3066 t3_dump_rspq(SYSCTL_HANDLER_ARGS) 3067 { 3068 struct sge_rspq *rspq; 3069 struct sge_qset *qs; 3070 int i, err, dump_end, idx; 3071 static int multiplier = 1; 3072 struct sbuf *sb; 3073 struct rsp_desc *rspd; 3074 uint32_t data[4]; 3075 3076 rspq = arg1; 3077 qs = rspq_to_qset(rspq); 3078 if (rspq->rspq_dump_count == 0) 3079 return (0); 3080 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { 3081 log(LOG_WARNING, 3082 "dump count is too large %d\n", rspq->rspq_dump_count); 3083 rspq->rspq_dump_count = 0; 3084 return (EINVAL); 3085 } 3086 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { 3087 log(LOG_WARNING, 3088 "dump start of %d is greater than queue size\n", 3089 rspq->rspq_dump_start); 3090 rspq->rspq_dump_start = 0; 3091 return (EINVAL); 3092 } 3093 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); 3094 if (err) 3095 return (err); 3096 retry_sbufops: 3097 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3098 3099 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", 3100 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), 3101 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); 3102 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", 3103 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); 3104 3105 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, 3106 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); 3107 3108 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; 3109 for (i = rspq->rspq_dump_start; i < dump_end; i++) { 3110 idx = i & (RSPQ_Q_SIZE-1); 3111 3112 rspd = &rspq->desc[idx]; 3113 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", 3114 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, 3115 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); 3116 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", 3117 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), 3118 be32toh(rspd->len_cq), rspd->intr_gen); 3119 } 3120 if (sbuf_overflowed(sb)) { 3121 sbuf_delete(sb); 3122 multiplier++; 3123 goto retry_sbufops; 3124 } 3125 sbuf_finish(sb); 3126 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3127 sbuf_delete(sb); 3128 return (err); 3129 } 3130 3131 static int 3132 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) 3133 { 3134 struct sge_txq *txq; 3135 struct sge_qset *qs; 3136 int i, j, err, dump_end; 3137 static int multiplier = 1; 3138 struct sbuf *sb; 3139 struct tx_desc *txd; 3140 uint32_t *WR, wr_hi, wr_lo, gen; 3141 uint32_t data[4]; 3142 3143 txq = arg1; 3144 qs = txq_to_qset(txq, TXQ_ETH); 3145 if (txq->txq_dump_count == 0) { 3146 return (0); 3147 } 3148 if (txq->txq_dump_count > TX_ETH_Q_SIZE) { 3149 log(LOG_WARNING, 3150 "dump count is too large %d\n", txq->txq_dump_count); 3151 txq->txq_dump_count = 1; 3152 return (EINVAL); 3153 } 3154 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { 3155 log(LOG_WARNING, 3156 "dump start of %d is greater than queue size\n", 3157 txq->txq_dump_start); 3158 txq->txq_dump_start = 0; 3159 return (EINVAL); 3160 } 3161 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); 3162 if (err) 3163 return (err); 3164 3165 3166 retry_sbufops: 3167 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3168 3169 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", 3170 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 3171 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); 3172 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", 3173 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), 3174 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); 3175 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3176 txq->txq_dump_start, 3177 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); 3178 3179 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3180 for (i = txq->txq_dump_start; i < dump_end; i++) { 3181 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; 3182 WR = (uint32_t *)txd->flit; 3183 wr_hi = ntohl(WR[0]); 3184 wr_lo = ntohl(WR[1]); 3185 gen = G_WR_GEN(wr_lo); 3186 3187 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3188 wr_hi, wr_lo, gen); 3189 for (j = 2; j < 30; j += 4) 3190 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3191 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3192 3193 } 3194 if (sbuf_overflowed(sb)) { 3195 sbuf_delete(sb); 3196 multiplier++; 3197 goto retry_sbufops; 3198 } 3199 sbuf_finish(sb); 3200 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3201 sbuf_delete(sb); 3202 return (err); 3203 } 3204 3205 static int 3206 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) 3207 { 3208 struct sge_txq *txq; 3209 struct sge_qset *qs; 3210 int i, j, err, dump_end; 3211 static int multiplier = 1; 3212 struct sbuf *sb; 3213 struct tx_desc *txd; 3214 uint32_t *WR, wr_hi, wr_lo, gen; 3215 3216 txq = arg1; 3217 qs = txq_to_qset(txq, TXQ_CTRL); 3218 if (txq->txq_dump_count == 0) { 3219 return (0); 3220 } 3221 if (txq->txq_dump_count > 256) { 3222 log(LOG_WARNING, 3223 "dump count is too large %d\n", txq->txq_dump_count); 3224 txq->txq_dump_count = 1; 3225 return (EINVAL); 3226 } 3227 if (txq->txq_dump_start > 255) { 3228 log(LOG_WARNING, 3229 "dump start of %d is greater than queue size\n", 3230 txq->txq_dump_start); 3231 txq->txq_dump_start = 0; 3232 return (EINVAL); 3233 } 3234 3235 retry_sbufops: 3236 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3237 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3238 txq->txq_dump_start, 3239 (txq->txq_dump_start + txq->txq_dump_count) & 255); 3240 3241 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3242 for (i = txq->txq_dump_start; i < dump_end; i++) { 3243 txd = &txq->desc[i & (255)]; 3244 WR = (uint32_t *)txd->flit; 3245 wr_hi = ntohl(WR[0]); 3246 wr_lo = ntohl(WR[1]); 3247 gen = G_WR_GEN(wr_lo); 3248 3249 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3250 wr_hi, wr_lo, gen); 3251 for (j = 2; j < 30; j += 4) 3252 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3253 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3254 3255 } 3256 if (sbuf_overflowed(sb)) { 3257 sbuf_delete(sb); 3258 multiplier++; 3259 goto retry_sbufops; 3260 } 3261 sbuf_finish(sb); 3262 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3263 sbuf_delete(sb); 3264 return (err); 3265 } 3266 3267 static int 3268 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) 3269 { 3270 adapter_t *sc = arg1; 3271 struct qset_params *qsp = &sc->params.sge.qset[0]; 3272 int coalesce_usecs; 3273 struct sge_qset *qs; 3274 int i, j, err, nqsets = 0; 3275 struct mtx *lock; 3276 3277 if ((sc->flags & FULL_INIT_DONE) == 0) 3278 return (ENXIO); 3279 3280 coalesce_usecs = qsp->coalesce_usecs; 3281 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); 3282 3283 if (err != 0) { 3284 return (err); 3285 } 3286 if (coalesce_usecs == qsp->coalesce_usecs) 3287 return (0); 3288 3289 for (i = 0; i < sc->params.nports; i++) 3290 for (j = 0; j < sc->port[i].nqsets; j++) 3291 nqsets++; 3292 3293 coalesce_usecs = max(1, coalesce_usecs); 3294 3295 for (i = 0; i < nqsets; i++) { 3296 qs = &sc->sge.qs[i]; 3297 qsp = &sc->params.sge.qset[i]; 3298 qsp->coalesce_usecs = coalesce_usecs; 3299 3300 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 3301 &sc->sge.qs[0].rspq.lock; 3302 3303 mtx_lock(lock); 3304 t3_update_qset_coalesce(qs, qsp); 3305 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 3306 V_NEWTIMER(qs->rspq.holdoff_tmr)); 3307 mtx_unlock(lock); 3308 } 3309 3310 return (0); 3311 } 3312 3313 3314 void 3315 t3_add_attach_sysctls(adapter_t *sc) 3316 { 3317 struct sysctl_ctx_list *ctx; 3318 struct sysctl_oid_list *children; 3319 3320 ctx = device_get_sysctl_ctx(sc->dev); 3321 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3322 3323 /* random information */ 3324 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3325 "firmware_version", 3326 CTLFLAG_RD, &sc->fw_version, 3327 0, "firmware version"); 3328 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3329 "hw_revision", 3330 CTLFLAG_RD, &sc->params.rev, 3331 0, "chip model"); 3332 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3333 "enable_debug", 3334 CTLFLAG_RW, &cxgb_debug, 3335 0, "enable verbose debugging output"); 3336 SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tunq_coalesce", 3337 CTLFLAG_RD, &sc->tunq_coalesce, 3338 "#tunneled packets freed"); 3339 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3340 "txq_overrun", 3341 CTLFLAG_RD, &txq_fills, 3342 0, "#times txq overrun"); 3343 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3344 "pcpu_cache_enable", 3345 CTLFLAG_RW, &cxgb_pcpu_cache_enable, 3346 0, "#enable driver local pcpu caches"); 3347 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3348 "cache_alloc", 3349 CTLFLAG_RD, &cxgb_cached_allocations, 3350 0, "#times a cluster was allocated from cache"); 3351 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3352 "cached", 3353 CTLFLAG_RD, &cxgb_cached, 3354 0, "#times a cluster was cached"); 3355 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3356 "ext_freed", 3357 CTLFLAG_RD, &cxgb_ext_freed, 3358 0, "#times a cluster was freed through ext_free"); 3359 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3360 "ext_inited", 3361 CTLFLAG_RD, &cxgb_ext_inited, 3362 0, "#times a cluster was initialized for ext_free"); 3363 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3364 "mbufs_outstanding", 3365 CTLFLAG_RD, &cxgb_mbufs_outstanding, 3366 0, "#mbufs in flight in the driver"); 3367 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3368 "pack_outstanding", 3369 CTLFLAG_RD, &cxgb_pack_outstanding, 3370 0, "#packet in flight in the driver"); 3371 } 3372 3373 3374 static const char *rspq_name = "rspq"; 3375 static const char *txq_names[] = 3376 { 3377 "txq_eth", 3378 "txq_ofld", 3379 "txq_ctrl" 3380 }; 3381 3382 static int 3383 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) 3384 { 3385 struct port_info *p = arg1; 3386 uint64_t *parg; 3387 3388 if (!p) 3389 return (EINVAL); 3390 3391 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); 3392 3393 PORT_LOCK(p); 3394 t3_mac_update_stats(&p->mac); 3395 PORT_UNLOCK(p); 3396 3397 return (sysctl_handle_quad(oidp, parg, 0, req)); 3398 } 3399 3400 void 3401 t3_add_configured_sysctls(adapter_t *sc) 3402 { 3403 struct sysctl_ctx_list *ctx; 3404 struct sysctl_oid_list *children; 3405 int i, j; 3406 3407 ctx = device_get_sysctl_ctx(sc->dev); 3408 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3409 3410 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3411 "intr_coal", 3412 CTLTYPE_INT|CTLFLAG_RW, sc, 3413 0, t3_set_coalesce_usecs, 3414 "I", "interrupt coalescing timer (us)"); 3415 3416 for (i = 0; i < sc->params.nports; i++) { 3417 struct port_info *pi = &sc->port[i]; 3418 struct sysctl_oid *poid; 3419 struct sysctl_oid_list *poidlist; 3420 struct mac_stats *mstats = &pi->mac.stats; 3421 3422 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); 3423 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 3424 pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); 3425 poidlist = SYSCTL_CHILDREN(poid); 3426 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO, 3427 "nqsets", CTLFLAG_RD, &pi->nqsets, 3428 0, "#queue sets"); 3429 3430 for (j = 0; j < pi->nqsets; j++) { 3431 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; 3432 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, *ctrlqpoid, *lropoid; 3433 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist, *ctrlqpoidlist, *lropoidlist; 3434 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 3435 3436 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); 3437 3438 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 3439 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); 3440 qspoidlist = SYSCTL_CHILDREN(qspoid); 3441 3442 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3443 rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); 3444 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); 3445 3446 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3447 txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); 3448 txqpoidlist = SYSCTL_CHILDREN(txqpoid); 3449 3450 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3451 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics"); 3452 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); 3453 3454 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3455 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics"); 3456 lropoidlist = SYSCTL_CHILDREN(lropoid); 3457 3458 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", 3459 CTLFLAG_RD, &qs->rspq.size, 3460 0, "#entries in response queue"); 3461 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", 3462 CTLFLAG_RD, &qs->rspq.cidx, 3463 0, "consumer index"); 3464 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", 3465 CTLFLAG_RD, &qs->rspq.credits, 3466 0, "#credits"); 3467 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr", 3468 CTLFLAG_RD, &qs->rspq.phys_addr, 3469 "physical_address_of the queue"); 3470 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", 3471 CTLFLAG_RW, &qs->rspq.rspq_dump_start, 3472 0, "start rspq dump entry"); 3473 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", 3474 CTLFLAG_RW, &qs->rspq.rspq_dump_count, 3475 0, "#rspq entries to dump"); 3476 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", 3477 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 3478 0, t3_dump_rspq, "A", "dump of the response queue"); 3479 3480 3481 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped", 3482 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops, 3483 0, "#tunneled packets dropped"); 3484 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen", 3485 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen, 3486 0, "#tunneled packets waiting to be sent"); 3487 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", 3488 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 3489 0, "#tunneled packets queue producer index"); 3490 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", 3491 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 3492 0, "#tunneled packets queue consumer index"); 3493 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed", 3494 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 3495 0, "#tunneled packets processed by the card"); 3496 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", 3497 CTLFLAG_RD, &txq->cleaned, 3498 0, "#tunneled packets cleaned"); 3499 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", 3500 CTLFLAG_RD, &txq->in_use, 3501 0, "#tunneled packet slots in use"); 3502 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees", 3503 CTLFLAG_RD, &txq->txq_frees, 3504 "#tunneled packets freed"); 3505 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", 3506 CTLFLAG_RD, &txq->txq_skipped, 3507 0, "#tunneled packet descriptors skipped"); 3508 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "coalesced", 3509 CTLFLAG_RD, &txq->txq_coalesced, 3510 0, "#tunneled packets coalesced"); 3511 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", 3512 CTLFLAG_RD, &txq->txq_enqueued, 3513 0, "#tunneled packets enqueued to hardware"); 3514 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", 3515 CTLFLAG_RD, &qs->txq_stopped, 3516 0, "tx queues stopped"); 3517 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr", 3518 CTLFLAG_RD, &txq->phys_addr, 3519 "physical_address_of the queue"); 3520 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", 3521 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 3522 0, "txq generation"); 3523 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", 3524 CTLFLAG_RD, &txq->cidx, 3525 0, "hardware queue cidx"); 3526 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", 3527 CTLFLAG_RD, &txq->pidx, 3528 0, "hardware queue pidx"); 3529 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", 3530 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 3531 0, "txq start idx for dump"); 3532 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", 3533 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 3534 0, "txq #entries to dump"); 3535 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", 3536 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 3537 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); 3538 3539 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", 3540 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 3541 0, "ctrlq start idx for dump"); 3542 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", 3543 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 3544 0, "ctrl #entries to dump"); 3545 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", 3546 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], 3547 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); 3548 3549 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued", 3550 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); 3551 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed", 3552 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); 3553 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", 3554 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); 3555 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", 3556 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); 3557 } 3558 3559 /* Now add a node for mac stats. */ 3560 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", 3561 CTLFLAG_RD, NULL, "MAC statistics"); 3562 poidlist = SYSCTL_CHILDREN(poid); 3563 3564 /* 3565 * We (ab)use the length argument (arg2) to pass on the offset 3566 * of the data that we are interested in. This is only required 3567 * for the quad counters that are updated from the hardware (we 3568 * make sure that we return the latest value). 3569 * sysctl_handle_macstat first updates *all* the counters from 3570 * the hardware, and then returns the latest value of the 3571 * requested counter. Best would be to update only the 3572 * requested counter from hardware, but t3_mac_update_stats() 3573 * hides all the register details and we don't want to dive into 3574 * all that here. 3575 */ 3576 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ 3577 (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \ 3578 sysctl_handle_macstat, "QU", 0) 3579 CXGB_SYSCTL_ADD_QUAD(tx_octets); 3580 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); 3581 CXGB_SYSCTL_ADD_QUAD(tx_frames); 3582 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); 3583 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); 3584 CXGB_SYSCTL_ADD_QUAD(tx_pause); 3585 CXGB_SYSCTL_ADD_QUAD(tx_deferred); 3586 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); 3587 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); 3588 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); 3589 CXGB_SYSCTL_ADD_QUAD(tx_underrun); 3590 CXGB_SYSCTL_ADD_QUAD(tx_len_errs); 3591 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); 3592 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); 3593 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); 3594 CXGB_SYSCTL_ADD_QUAD(tx_frames_64); 3595 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); 3596 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); 3597 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); 3598 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); 3599 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); 3600 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); 3601 CXGB_SYSCTL_ADD_QUAD(rx_octets); 3602 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); 3603 CXGB_SYSCTL_ADD_QUAD(rx_frames); 3604 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); 3605 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); 3606 CXGB_SYSCTL_ADD_QUAD(rx_pause); 3607 CXGB_SYSCTL_ADD_QUAD(rx_align_errs); 3608 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); 3609 CXGB_SYSCTL_ADD_QUAD(rx_data_errs); 3610 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); 3611 CXGB_SYSCTL_ADD_QUAD(rx_runt); 3612 CXGB_SYSCTL_ADD_QUAD(rx_jabber); 3613 CXGB_SYSCTL_ADD_QUAD(rx_short); 3614 CXGB_SYSCTL_ADD_QUAD(rx_too_long); 3615 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); 3616 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); 3617 CXGB_SYSCTL_ADD_QUAD(rx_frames_64); 3618 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); 3619 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); 3620 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); 3621 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); 3622 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); 3623 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); 3624 #undef CXGB_SYSCTL_ADD_QUAD 3625 3626 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ 3627 CTLFLAG_RD, &mstats->a, 0) 3628 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); 3629 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); 3630 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); 3631 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); 3632 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); 3633 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); 3634 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); 3635 CXGB_SYSCTL_ADD_ULONG(num_toggled); 3636 CXGB_SYSCTL_ADD_ULONG(num_resets); 3637 #undef CXGB_SYSCTL_ADD_ULONG 3638 } 3639 } 3640 3641 /** 3642 * t3_get_desc - dump an SGE descriptor for debugging purposes 3643 * @qs: the queue set 3644 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 3645 * @idx: the descriptor index in the queue 3646 * @data: where to dump the descriptor contents 3647 * 3648 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 3649 * size of the descriptor. 3650 */ 3651 int 3652 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 3653 unsigned char *data) 3654 { 3655 if (qnum >= 6) 3656 return (EINVAL); 3657 3658 if (qnum < 3) { 3659 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 3660 return -EINVAL; 3661 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 3662 return sizeof(struct tx_desc); 3663 } 3664 3665 if (qnum == 3) { 3666 if (!qs->rspq.desc || idx >= qs->rspq.size) 3667 return (EINVAL); 3668 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 3669 return sizeof(struct rsp_desc); 3670 } 3671 3672 qnum -= 4; 3673 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 3674 return (EINVAL); 3675 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 3676 return sizeof(struct rx_desc); 3677 } 3678