1 /************************************************************************** 2 3 Copyright (c) 2007, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 #define DEBUG_BUFRING 30 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/module.h> 39 #include <sys/bus.h> 40 #include <sys/conf.h> 41 #include <machine/bus.h> 42 #include <machine/resource.h> 43 #include <sys/bus_dma.h> 44 #include <sys/rman.h> 45 #include <sys/queue.h> 46 #include <sys/sysctl.h> 47 #include <sys/taskqueue.h> 48 49 #include <sys/proc.h> 50 #include <sys/sbuf.h> 51 #include <sys/sched.h> 52 #include <sys/smp.h> 53 #include <sys/systm.h> 54 #include <sys/syslog.h> 55 56 #include <netinet/in_systm.h> 57 #include <netinet/in.h> 58 #include <netinet/ip.h> 59 #include <netinet/tcp.h> 60 61 #include <dev/pci/pcireg.h> 62 #include <dev/pci/pcivar.h> 63 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 67 #ifdef CONFIG_DEFINED 68 #include <cxgb_include.h> 69 #include <sys/mvec.h> 70 #else 71 #include <dev/cxgb/cxgb_include.h> 72 #include <dev/cxgb/sys/mvec.h> 73 #endif 74 75 int txq_fills = 0; 76 static int bogus_imm = 0; 77 static int recycle_enable = 0; 78 extern int cxgb_txq_buf_ring_size; 79 int cxgb_cached_allocations; 80 int cxgb_cached; 81 int cxgb_ext_freed; 82 extern int cxgb_use_16k_clusters; 83 84 85 #define USE_GTS 0 86 87 #define SGE_RX_SM_BUF_SIZE 1536 88 #define SGE_RX_DROP_THRES 16 89 #define SGE_RX_COPY_THRES 128 90 91 /* 92 * Period of the Tx buffer reclaim timer. This timer does not need to run 93 * frequently as Tx buffers are usually reclaimed by new Tx packets. 94 */ 95 #define TX_RECLAIM_PERIOD (hz >> 1) 96 97 98 /* 99 * Values for sge_txq.flags 100 */ 101 enum { 102 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 103 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 104 }; 105 106 struct tx_desc { 107 uint64_t flit[TX_DESC_FLITS]; 108 } __packed; 109 110 struct rx_desc { 111 uint32_t addr_lo; 112 uint32_t len_gen; 113 uint32_t gen2; 114 uint32_t addr_hi; 115 } __packed;; 116 117 struct rsp_desc { /* response queue descriptor */ 118 struct rss_header rss_hdr; 119 uint32_t flags; 120 uint32_t len_cq; 121 uint8_t imm_data[47]; 122 uint8_t intr_gen; 123 } __packed; 124 125 #define RX_SW_DESC_MAP_CREATED (1 << 0) 126 #define TX_SW_DESC_MAP_CREATED (1 << 1) 127 #define RX_SW_DESC_INUSE (1 << 3) 128 #define TX_SW_DESC_MAPPED (1 << 4) 129 130 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 131 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 132 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 133 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 134 135 struct tx_sw_desc { /* SW state per Tx descriptor */ 136 struct mbuf_iovec mi; 137 bus_dmamap_t map; 138 int flags; 139 }; 140 141 struct rx_sw_desc { /* SW state per Rx descriptor */ 142 caddr_t rxsd_cl; 143 caddr_t data; 144 bus_dmamap_t map; 145 int flags; 146 }; 147 148 struct txq_state { 149 unsigned int compl; 150 unsigned int gen; 151 unsigned int pidx; 152 }; 153 154 struct refill_fl_cb_arg { 155 int error; 156 bus_dma_segment_t seg; 157 int nseg; 158 }; 159 160 /* 161 * Maps a number of flits to the number of Tx descriptors that can hold them. 162 * The formula is 163 * 164 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 165 * 166 * HW allows up to 4 descriptors to be combined into a WR. 167 */ 168 static uint8_t flit_desc_map[] = { 169 0, 170 #if SGE_NUM_GENBITS == 1 171 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 172 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 174 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 175 #elif SGE_NUM_GENBITS == 2 176 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 177 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 178 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 179 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 180 #else 181 # error "SGE_NUM_GENBITS must be 1 or 2" 182 #endif 183 }; 184 185 186 static int lro_default = 0; 187 int cxgb_debug = 0; 188 189 static void sge_timer_cb(void *arg); 190 static void sge_timer_reclaim(void *arg, int ncount); 191 static void sge_txq_reclaim_handler(void *arg, int ncount); 192 193 /** 194 * reclaim_completed_tx - reclaims completed Tx descriptors 195 * @adapter: the adapter 196 * @q: the Tx queue to reclaim completed descriptors from 197 * 198 * Reclaims Tx descriptors that the SGE has indicated it has processed, 199 * and frees the associated buffers if possible. Called with the Tx 200 * queue's lock held. 201 */ 202 static __inline int 203 reclaim_completed_tx_(struct sge_txq *q, int reclaim_min) 204 { 205 int reclaim = desc_reclaimable(q); 206 207 if (reclaim < reclaim_min) 208 return (0); 209 210 mtx_assert(&q->lock, MA_OWNED); 211 if (reclaim > 0) { 212 t3_free_tx_desc(q, reclaim); 213 q->cleaned += reclaim; 214 q->in_use -= reclaim; 215 } 216 return (reclaim); 217 } 218 219 /** 220 * should_restart_tx - are there enough resources to restart a Tx queue? 221 * @q: the Tx queue 222 * 223 * Checks if there are enough descriptors to restart a suspended Tx queue. 224 */ 225 static __inline int 226 should_restart_tx(const struct sge_txq *q) 227 { 228 unsigned int r = q->processed - q->cleaned; 229 230 return q->in_use - r < (q->size >> 1); 231 } 232 233 /** 234 * t3_sge_init - initialize SGE 235 * @adap: the adapter 236 * @p: the SGE parameters 237 * 238 * Performs SGE initialization needed every time after a chip reset. 239 * We do not initialize any of the queue sets here, instead the driver 240 * top-level must request those individually. We also do not enable DMA 241 * here, that should be done after the queues have been set up. 242 */ 243 void 244 t3_sge_init(adapter_t *adap, struct sge_params *p) 245 { 246 u_int ctrl, ups; 247 248 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 249 250 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 251 F_CQCRDTCTRL | 252 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 253 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 254 #if SGE_NUM_GENBITS == 1 255 ctrl |= F_EGRGENCTRL; 256 #endif 257 if (adap->params.rev > 0) { 258 if (!(adap->flags & (USING_MSIX | USING_MSI))) 259 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 260 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL; 261 } 262 t3_write_reg(adap, A_SG_CONTROL, ctrl); 263 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 264 V_LORCQDRBTHRSH(512)); 265 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 266 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 267 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 268 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000); 269 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 270 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 271 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 272 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 273 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 274 } 275 276 277 /** 278 * sgl_len - calculates the size of an SGL of the given capacity 279 * @n: the number of SGL entries 280 * 281 * Calculates the number of flits needed for a scatter/gather list that 282 * can hold the given number of entries. 283 */ 284 static __inline unsigned int 285 sgl_len(unsigned int n) 286 { 287 return ((3 * n) / 2 + (n & 1)); 288 } 289 290 /** 291 * get_imm_packet - return the next ingress packet buffer from a response 292 * @resp: the response descriptor containing the packet data 293 * 294 * Return a packet containing the immediate data of the given response. 295 */ 296 #ifdef DISABLE_MBUF_IOVEC 297 static __inline int 298 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct t3_mbuf_hdr *mh) 299 { 300 struct mbuf *m = mh->m_head; 301 302 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 303 m->m_pkthdr.len = m->m_len = len; 304 return (0); 305 } 306 307 #else 308 static int 309 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl, uint32_t flags) 310 { 311 312 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE; 313 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 314 return (0); 315 316 } 317 #endif 318 319 static __inline u_int 320 flits_to_desc(u_int n) 321 { 322 return (flit_desc_map[n]); 323 } 324 325 void 326 t3_sge_err_intr_handler(adapter_t *adapter) 327 { 328 unsigned int v, status; 329 330 331 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 332 333 if (status & F_RSPQCREDITOVERFOW) 334 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 335 336 if (status & F_RSPQDISABLED) { 337 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 338 339 CH_ALERT(adapter, 340 "packet delivered to disabled response queue (0x%x)\n", 341 (v >> S_RSPQ0DISABLED) & 0xff); 342 } 343 344 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 345 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED)) 346 t3_fatal_err(adapter); 347 } 348 349 void 350 t3_sge_prep(adapter_t *adap, struct sge_params *p) 351 { 352 int i; 353 354 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 355 p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data); 356 357 for (i = 0; i < SGE_QSETS; ++i) { 358 struct qset_params *q = p->qset + i; 359 360 q->polling = adap->params.rev > 0; 361 362 if (adap->params.nports > 2) { 363 q->coalesce_nsecs = 50000; 364 } else { 365 #ifdef INVARIANTS 366 q->coalesce_nsecs = 10000; 367 #else 368 q->coalesce_nsecs = 5000; 369 #endif 370 } 371 q->rspq_size = RSPQ_Q_SIZE; 372 q->fl_size = FL_Q_SIZE; 373 q->jumbo_size = JUMBO_Q_SIZE; 374 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 375 q->txq_size[TXQ_OFLD] = 1024; 376 q->txq_size[TXQ_CTRL] = 256; 377 q->cong_thres = 0; 378 } 379 } 380 381 int 382 t3_sge_alloc(adapter_t *sc) 383 { 384 385 /* The parent tag. */ 386 if (bus_dma_tag_create( NULL, /* parent */ 387 1, 0, /* algnmnt, boundary */ 388 BUS_SPACE_MAXADDR, /* lowaddr */ 389 BUS_SPACE_MAXADDR, /* highaddr */ 390 NULL, NULL, /* filter, filterarg */ 391 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 392 BUS_SPACE_UNRESTRICTED, /* nsegments */ 393 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 394 0, /* flags */ 395 NULL, NULL, /* lock, lockarg */ 396 &sc->parent_dmat)) { 397 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 398 return (ENOMEM); 399 } 400 401 /* 402 * DMA tag for normal sized RX frames 403 */ 404 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 405 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 406 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 407 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 408 return (ENOMEM); 409 } 410 411 /* 412 * DMA tag for jumbo sized RX frames. 413 */ 414 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, 415 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, 416 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 417 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 418 return (ENOMEM); 419 } 420 421 /* 422 * DMA tag for TX frames. 423 */ 424 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 425 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 426 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 427 NULL, NULL, &sc->tx_dmat)) { 428 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 429 return (ENOMEM); 430 } 431 432 return (0); 433 } 434 435 int 436 t3_sge_free(struct adapter * sc) 437 { 438 439 if (sc->tx_dmat != NULL) 440 bus_dma_tag_destroy(sc->tx_dmat); 441 442 if (sc->rx_jumbo_dmat != NULL) 443 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 444 445 if (sc->rx_dmat != NULL) 446 bus_dma_tag_destroy(sc->rx_dmat); 447 448 if (sc->parent_dmat != NULL) 449 bus_dma_tag_destroy(sc->parent_dmat); 450 451 return (0); 452 } 453 454 void 455 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 456 { 457 458 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U); 459 qs->rspq.polling = 0 /* p->polling */; 460 } 461 462 #if !defined(__i386__) && !defined(__amd64__) 463 static void 464 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 465 { 466 struct refill_fl_cb_arg *cb_arg = arg; 467 468 cb_arg->error = error; 469 cb_arg->seg = segs[0]; 470 cb_arg->nseg = nseg; 471 472 } 473 #endif 474 /** 475 * refill_fl - refill an SGE free-buffer list 476 * @sc: the controller softc 477 * @q: the free-list to refill 478 * @n: the number of new buffers to allocate 479 * 480 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 481 * The caller must assure that @n does not exceed the queue's capacity. 482 */ 483 static void 484 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 485 { 486 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 487 struct rx_desc *d = &q->desc[q->pidx]; 488 struct refill_fl_cb_arg cb_arg; 489 caddr_t cl; 490 int err; 491 int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); 492 493 cb_arg.error = 0; 494 while (n--) { 495 /* 496 * We only allocate a cluster, mbuf allocation happens after rx 497 */ 498 if ((cl = cxgb_cache_get(q->zone)) == NULL) { 499 log(LOG_WARNING, "Failed to allocate cluster\n"); 500 goto done; 501 } 502 503 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 504 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 505 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 506 uma_zfree(q->zone, cl); 507 goto done; 508 } 509 sd->flags |= RX_SW_DESC_MAP_CREATED; 510 } 511 #if !defined(__i386__) && !defined(__amd64__) 512 err = bus_dmamap_load(q->entry_tag, sd->map, 513 cl + header_size, q->buf_size, 514 refill_fl_cb, &cb_arg, 0); 515 516 if (err != 0 || cb_arg.error) { 517 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error); 518 /* 519 * XXX free cluster 520 */ 521 return; 522 } 523 #else 524 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + header_size)); 525 #endif 526 sd->flags |= RX_SW_DESC_INUSE; 527 sd->rxsd_cl = cl; 528 sd->data = cl + header_size; 529 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 530 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 531 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 532 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 533 534 d++; 535 sd++; 536 537 if (++q->pidx == q->size) { 538 q->pidx = 0; 539 q->gen ^= 1; 540 sd = q->sdesc; 541 d = q->desc; 542 } 543 q->credits++; 544 } 545 546 done: 547 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 548 } 549 550 551 /** 552 * free_rx_bufs - free the Rx buffers on an SGE free list 553 * @sc: the controle softc 554 * @q: the SGE free list to clean up 555 * 556 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 557 * this queue should be stopped before calling this function. 558 */ 559 static void 560 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 561 { 562 u_int cidx = q->cidx; 563 564 while (q->credits--) { 565 struct rx_sw_desc *d = &q->sdesc[cidx]; 566 567 if (d->flags & RX_SW_DESC_INUSE) { 568 bus_dmamap_unload(q->entry_tag, d->map); 569 bus_dmamap_destroy(q->entry_tag, d->map); 570 uma_zfree(q->zone, d->rxsd_cl); 571 } 572 d->rxsd_cl = NULL; 573 if (++cidx == q->size) 574 cidx = 0; 575 } 576 } 577 578 static __inline void 579 __refill_fl(adapter_t *adap, struct sge_fl *fl) 580 { 581 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 582 } 583 584 static __inline void 585 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) 586 { 587 if ((fl->size - fl->credits) < max) 588 refill_fl(adap, fl, min(max, fl->size - fl->credits)); 589 } 590 591 void 592 refill_fl_service(adapter_t *adap, struct sge_fl *fl) 593 { 594 __refill_fl_lt(adap, fl, 512); 595 } 596 597 /** 598 * recycle_rx_buf - recycle a receive buffer 599 * @adapter: the adapter 600 * @q: the SGE free list 601 * @idx: index of buffer to recycle 602 * 603 * Recycles the specified buffer on the given free list by adding it at 604 * the next available slot on the list. 605 */ 606 static void 607 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 608 { 609 struct rx_desc *from = &q->desc[idx]; 610 struct rx_desc *to = &q->desc[q->pidx]; 611 612 q->sdesc[q->pidx] = q->sdesc[idx]; 613 to->addr_lo = from->addr_lo; // already big endian 614 to->addr_hi = from->addr_hi; // likewise 615 wmb(); 616 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 617 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 618 q->credits++; 619 620 if (++q->pidx == q->size) { 621 q->pidx = 0; 622 q->gen ^= 1; 623 } 624 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 625 } 626 627 static void 628 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 629 { 630 uint32_t *addr; 631 632 addr = arg; 633 *addr = segs[0].ds_addr; 634 } 635 636 static int 637 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 638 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 639 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 640 { 641 size_t len = nelem * elem_size; 642 void *s = NULL; 643 void *p = NULL; 644 int err; 645 646 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 647 BUS_SPACE_MAXADDR_32BIT, 648 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 649 len, 0, NULL, NULL, tag)) != 0) { 650 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 651 return (ENOMEM); 652 } 653 654 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 655 map)) != 0) { 656 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 657 return (ENOMEM); 658 } 659 660 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 661 bzero(p, len); 662 *(void **)desc = p; 663 664 if (sw_size) { 665 len = nelem * sw_size; 666 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 667 *(void **)sdesc = s; 668 } 669 if (parent_entry_tag == NULL) 670 return (0); 671 672 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 673 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 674 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 675 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 676 NULL, NULL, entry_tag)) != 0) { 677 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 678 return (ENOMEM); 679 } 680 return (0); 681 } 682 683 static void 684 sge_slow_intr_handler(void *arg, int ncount) 685 { 686 adapter_t *sc = arg; 687 688 t3_slow_intr_handler(sc); 689 } 690 691 /** 692 * sge_timer_cb - perform periodic maintenance of an SGE qset 693 * @data: the SGE queue set to maintain 694 * 695 * Runs periodically from a timer to perform maintenance of an SGE queue 696 * set. It performs two tasks: 697 * 698 * a) Cleans up any completed Tx descriptors that may still be pending. 699 * Normal descriptor cleanup happens when new packets are added to a Tx 700 * queue so this timer is relatively infrequent and does any cleanup only 701 * if the Tx queue has not seen any new packets in a while. We make a 702 * best effort attempt to reclaim descriptors, in that we don't wait 703 * around if we cannot get a queue's lock (which most likely is because 704 * someone else is queueing new packets and so will also handle the clean 705 * up). Since control queues use immediate data exclusively we don't 706 * bother cleaning them up here. 707 * 708 * b) Replenishes Rx queues that have run out due to memory shortage. 709 * Normally new Rx buffers are added when existing ones are consumed but 710 * when out of memory a queue can become empty. We try to add only a few 711 * buffers here, the queue will be replenished fully as these new buffers 712 * are used up if memory shortage has subsided. 713 * 714 * c) Return coalesced response queue credits in case a response queue is 715 * starved. 716 * 717 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 718 * fifo overflows and the FW doesn't implement any recovery scheme yet. 719 */ 720 static void 721 sge_timer_cb(void *arg) 722 { 723 adapter_t *sc = arg; 724 #ifndef IFNET_MULTIQUEUE 725 struct port_info *pi; 726 struct sge_qset *qs; 727 struct sge_txq *txq; 728 int i, j; 729 int reclaim_ofl, refill_rx; 730 731 for (i = 0; i < sc->params.nports; i++) 732 for (j = 0; j < sc->port[i].nqsets; j++) { 733 qs = &sc->sge.qs[i + j]; 734 txq = &qs->txq[0]; 735 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 736 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 737 (qs->fl[1].credits < qs->fl[1].size)); 738 if (reclaim_ofl || refill_rx) { 739 pi = &sc->port[i]; 740 taskqueue_enqueue(pi->tq, &pi->timer_reclaim_task); 741 break; 742 } 743 } 744 #endif 745 if (sc->params.nports > 2) { 746 int i; 747 748 for_each_port(sc, i) { 749 struct port_info *pi = &sc->port[i]; 750 751 t3_write_reg(sc, A_SG_KDOORBELL, 752 F_SELEGRCNTX | 753 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 754 } 755 } 756 if (sc->open_device_map != 0) 757 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 758 } 759 760 /* 761 * This is meant to be a catch-all function to keep sge state private 762 * to sge.c 763 * 764 */ 765 int 766 t3_sge_init_adapter(adapter_t *sc) 767 { 768 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 769 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 770 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 771 mi_init(); 772 cxgb_cache_init(); 773 return (0); 774 } 775 776 int 777 t3_sge_reset_adapter(adapter_t *sc) 778 { 779 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 780 return (0); 781 } 782 783 int 784 t3_sge_init_port(struct port_info *pi) 785 { 786 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); 787 return (0); 788 } 789 790 void 791 t3_sge_deinit_sw(adapter_t *sc) 792 { 793 int i; 794 795 callout_drain(&sc->sge_timer_ch); 796 if (sc->tq) 797 taskqueue_drain(sc->tq, &sc->slow_intr_task); 798 for (i = 0; i < sc->params.nports; i++) 799 if (sc->port[i].tq != NULL) 800 taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task); 801 802 mi_deinit(); 803 } 804 805 /** 806 * refill_rspq - replenish an SGE response queue 807 * @adapter: the adapter 808 * @q: the response queue to replenish 809 * @credits: how many new responses to make available 810 * 811 * Replenishes a response queue by making the supplied number of responses 812 * available to HW. 813 */ 814 static __inline void 815 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 816 { 817 818 /* mbufs are allocated on demand when a rspq entry is processed. */ 819 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 820 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 821 } 822 823 static __inline void 824 sge_txq_reclaim_(struct sge_txq *txq, int force) 825 { 826 827 if (desc_reclaimable(txq) < 16) 828 return; 829 if (mtx_trylock(&txq->lock) == 0) 830 return; 831 reclaim_completed_tx_(txq, 16); 832 mtx_unlock(&txq->lock); 833 834 } 835 836 static void 837 sge_txq_reclaim_handler(void *arg, int ncount) 838 { 839 struct sge_txq *q = arg; 840 841 sge_txq_reclaim_(q, TRUE); 842 } 843 844 845 846 static void 847 sge_timer_reclaim(void *arg, int ncount) 848 { 849 struct port_info *pi = arg; 850 int i, nqsets = pi->nqsets; 851 adapter_t *sc = pi->adapter; 852 struct sge_qset *qs; 853 struct sge_txq *txq; 854 struct mtx *lock; 855 856 #ifdef IFNET_MULTIQUEUE 857 panic("%s should not be called with multiqueue support\n", __FUNCTION__); 858 #endif 859 for (i = 0; i < nqsets; i++) { 860 qs = &sc->sge.qs[i]; 861 862 txq = &qs->txq[TXQ_OFLD]; 863 sge_txq_reclaim_(txq, FALSE); 864 865 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 866 &sc->sge.qs[0].rspq.lock; 867 868 if (mtx_trylock(lock)) { 869 /* XXX currently assume that we are *NOT* polling */ 870 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 871 872 if (qs->fl[0].credits < qs->fl[0].size - 16) 873 __refill_fl(sc, &qs->fl[0]); 874 if (qs->fl[1].credits < qs->fl[1].size - 16) 875 __refill_fl(sc, &qs->fl[1]); 876 877 if (status & (1 << qs->rspq.cntxt_id)) { 878 if (qs->rspq.credits) { 879 refill_rspq(sc, &qs->rspq, 1); 880 qs->rspq.credits--; 881 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 882 1 << qs->rspq.cntxt_id); 883 } 884 } 885 mtx_unlock(lock); 886 } 887 } 888 } 889 890 /** 891 * init_qset_cntxt - initialize an SGE queue set context info 892 * @qs: the queue set 893 * @id: the queue set id 894 * 895 * Initializes the TIDs and context ids for the queues of a queue set. 896 */ 897 static void 898 init_qset_cntxt(struct sge_qset *qs, u_int id) 899 { 900 901 qs->rspq.cntxt_id = id; 902 qs->fl[0].cntxt_id = 2 * id; 903 qs->fl[1].cntxt_id = 2 * id + 1; 904 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 905 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 906 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 907 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 908 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 909 910 mbufq_init(&qs->txq[TXQ_ETH].sendq); 911 mbufq_init(&qs->txq[TXQ_OFLD].sendq); 912 mbufq_init(&qs->txq[TXQ_CTRL].sendq); 913 } 914 915 916 static void 917 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 918 { 919 txq->in_use += ndesc; 920 /* 921 * XXX we don't handle stopping of queue 922 * presumably start handles this when we bump against the end 923 */ 924 txqs->gen = txq->gen; 925 txq->unacked += ndesc; 926 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3); 927 txq->unacked &= 7; 928 txqs->pidx = txq->pidx; 929 txq->pidx += ndesc; 930 #ifdef INVARIANTS 931 if (((txqs->pidx > txq->cidx) && 932 (txq->pidx < txqs->pidx) && 933 (txq->pidx >= txq->cidx)) || 934 ((txqs->pidx < txq->cidx) && 935 (txq->pidx >= txq-> cidx)) || 936 ((txqs->pidx < txq->cidx) && 937 (txq->cidx < txqs->pidx))) 938 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", 939 txqs->pidx, txq->pidx, txq->cidx); 940 #endif 941 if (txq->pidx >= txq->size) { 942 txq->pidx -= txq->size; 943 txq->gen ^= 1; 944 } 945 946 } 947 948 /** 949 * calc_tx_descs - calculate the number of Tx descriptors for a packet 950 * @m: the packet mbufs 951 * @nsegs: the number of segments 952 * 953 * Returns the number of Tx descriptors needed for the given Ethernet 954 * packet. Ethernet packets require addition of WR and CPL headers. 955 */ 956 static __inline unsigned int 957 calc_tx_descs(const struct mbuf *m, int nsegs) 958 { 959 unsigned int flits; 960 961 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) 962 return 1; 963 964 flits = sgl_len(nsegs) + 2; 965 #ifdef TSO_SUPPORTED 966 if (m->m_pkthdr.csum_flags & CSUM_TSO) 967 flits++; 968 #endif 969 return flits_to_desc(flits); 970 } 971 972 static unsigned int 973 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 974 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs) 975 { 976 struct mbuf *m0; 977 int err, pktlen, pass = 0; 978 979 retry: 980 err = 0; 981 m0 = *m; 982 pktlen = m0->m_pkthdr.len; 983 #if defined(__i386__) || defined(__amd64__) 984 if (busdma_map_sg_collapse(m, segs, nsegs) == 0) { 985 goto done; 986 } else 987 #endif 988 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0); 989 990 if (err == 0) { 991 goto done; 992 } 993 if (err == EFBIG && pass == 0) { 994 pass = 1; 995 /* Too many segments, try to defrag */ 996 m0 = m_defrag(m0, M_DONTWAIT); 997 if (m0 == NULL) { 998 m_freem(*m); 999 *m = NULL; 1000 return (ENOBUFS); 1001 } 1002 *m = m0; 1003 goto retry; 1004 } else if (err == ENOMEM) { 1005 return (err); 1006 } if (err) { 1007 if (cxgb_debug) 1008 printf("map failure err=%d pktlen=%d\n", err, pktlen); 1009 m_freem(m0); 1010 *m = NULL; 1011 return (err); 1012 } 1013 done: 1014 #if !defined(__i386__) && !defined(__amd64__) 1015 bus_dmamap_sync(txq->entry_tag, txsd->map, BUS_DMASYNC_PREWRITE); 1016 #endif 1017 txsd->flags |= TX_SW_DESC_MAPPED; 1018 1019 return (0); 1020 } 1021 1022 /** 1023 * make_sgl - populate a scatter/gather list for a packet 1024 * @sgp: the SGL to populate 1025 * @segs: the packet dma segments 1026 * @nsegs: the number of segments 1027 * 1028 * Generates a scatter/gather list for the buffers that make up a packet 1029 * and returns the SGL size in 8-byte words. The caller must size the SGL 1030 * appropriately. 1031 */ 1032 static __inline void 1033 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1034 { 1035 int i, idx; 1036 1037 for (idx = 0, i = 0; i < nsegs; i++) { 1038 /* 1039 * firmware doesn't like empty segments 1040 */ 1041 if (segs[i].ds_len == 0) 1042 continue; 1043 if (i && idx == 0) 1044 ++sgp; 1045 1046 sgp->len[idx] = htobe32(segs[i].ds_len); 1047 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1048 idx ^= 1; 1049 } 1050 1051 if (idx) { 1052 sgp->len[idx] = 0; 1053 sgp->addr[idx] = 0; 1054 } 1055 } 1056 1057 /** 1058 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1059 * @adap: the adapter 1060 * @q: the Tx queue 1061 * 1062 * Ring the doorbel if a Tx queue is asleep. There is a natural race, 1063 * where the HW is going to sleep just after we checked, however, 1064 * then the interrupt handler will detect the outstanding TX packet 1065 * and ring the doorbell for us. 1066 * 1067 * When GTS is disabled we unconditionally ring the doorbell. 1068 */ 1069 static __inline void 1070 check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 1071 { 1072 #if USE_GTS 1073 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1074 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1075 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1076 #ifdef T3_TRACE 1077 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1078 q->cntxt_id); 1079 #endif 1080 t3_write_reg(adap, A_SG_KDOORBELL, 1081 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1082 } 1083 #else 1084 wmb(); /* write descriptors before telling HW */ 1085 t3_write_reg(adap, A_SG_KDOORBELL, 1086 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1087 #endif 1088 } 1089 1090 static __inline void 1091 wr_gen2(struct tx_desc *d, unsigned int gen) 1092 { 1093 #if SGE_NUM_GENBITS == 2 1094 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1095 #endif 1096 } 1097 1098 /** 1099 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1100 * @ndesc: number of Tx descriptors spanned by the SGL 1101 * @txd: first Tx descriptor to be written 1102 * @txqs: txq state (generation and producer index) 1103 * @txq: the SGE Tx queue 1104 * @sgl: the SGL 1105 * @flits: number of flits to the start of the SGL in the first descriptor 1106 * @sgl_flits: the SGL size in flits 1107 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1108 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1109 * 1110 * Write a work request header and an associated SGL. If the SGL is 1111 * small enough to fit into one Tx descriptor it has already been written 1112 * and we just need to write the WR header. Otherwise we distribute the 1113 * SGL across the number of descriptors it spans. 1114 */ 1115 static void 1116 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1117 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1118 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1119 { 1120 1121 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1122 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1123 1124 if (__predict_true(ndesc == 1)) { 1125 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1126 V_WR_SGLSFLT(flits)) | wr_hi; 1127 wmb(); 1128 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | 1129 V_WR_GEN(txqs->gen)) | wr_lo; 1130 /* XXX gen? */ 1131 wr_gen2(txd, txqs->gen); 1132 1133 } else { 1134 unsigned int ogen = txqs->gen; 1135 const uint64_t *fp = (const uint64_t *)sgl; 1136 struct work_request_hdr *wp = wrp; 1137 1138 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1139 V_WR_SGLSFLT(flits)) | wr_hi; 1140 1141 while (sgl_flits) { 1142 unsigned int avail = WR_FLITS - flits; 1143 1144 if (avail > sgl_flits) 1145 avail = sgl_flits; 1146 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1147 sgl_flits -= avail; 1148 ndesc--; 1149 if (!sgl_flits) 1150 break; 1151 1152 fp += avail; 1153 txd++; 1154 txsd++; 1155 if (++txqs->pidx == txq->size) { 1156 txqs->pidx = 0; 1157 txqs->gen ^= 1; 1158 txd = txq->desc; 1159 txsd = txq->sdesc; 1160 } 1161 1162 /* 1163 * when the head of the mbuf chain 1164 * is freed all clusters will be freed 1165 * with it 1166 */ 1167 KASSERT(txsd->mi.mi_base == NULL, ("overwrting valid entry mi_base==%p", txsd->mi.mi_base)); 1168 wrp = (struct work_request_hdr *)txd; 1169 wrp->wr_hi = htonl(V_WR_DATATYPE(1) | 1170 V_WR_SGLSFLT(1)) | wr_hi; 1171 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, 1172 sgl_flits + 1)) | 1173 V_WR_GEN(txqs->gen)) | wr_lo; 1174 wr_gen2(txd, txqs->gen); 1175 flits = 1; 1176 } 1177 wrp->wr_hi |= htonl(F_WR_EOP); 1178 wmb(); 1179 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1180 wr_gen2((struct tx_desc *)wp, ogen); 1181 } 1182 } 1183 1184 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 1185 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 1186 1187 #ifdef VLAN_SUPPORTED 1188 #define GET_VTAG(cntrl, m) \ 1189 do { \ 1190 if ((m)->m_flags & M_VLANTAG) \ 1191 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ 1192 } while (0) 1193 1194 #define GET_VTAG_MI(cntrl, mi) \ 1195 do { \ 1196 if ((mi)->mi_flags & M_VLANTAG) \ 1197 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((mi)->mi_ether_vtag); \ 1198 } while (0) 1199 #else 1200 #define GET_VTAG(cntrl, m) 1201 #define GET_VTAG_MI(cntrl, m) 1202 #endif 1203 1204 int 1205 t3_encap(struct sge_qset *qs, struct mbuf **m, int count) 1206 { 1207 adapter_t *sc; 1208 struct mbuf *m0; 1209 struct sge_txq *txq; 1210 struct txq_state txqs; 1211 struct port_info *pi; 1212 unsigned int ndesc, flits, cntrl, mlen; 1213 int err, nsegs, tso_info = 0; 1214 1215 struct work_request_hdr *wrp; 1216 struct tx_sw_desc *txsd; 1217 struct sg_ent *sgp, *sgl; 1218 uint32_t wr_hi, wr_lo, sgl_flits; 1219 bus_dma_segment_t segs[TX_MAX_SEGS]; 1220 1221 struct tx_desc *txd; 1222 struct mbuf_vec *mv; 1223 struct mbuf_iovec *mi; 1224 1225 DPRINTF("t3_encap cpu=%d ", curcpu); 1226 KASSERT(qs->idx == 0, ("invalid qs %d", qs->idx)); 1227 1228 mi = NULL; 1229 pi = qs->port; 1230 sc = pi->adapter; 1231 txq = &qs->txq[TXQ_ETH]; 1232 txd = &txq->desc[txq->pidx]; 1233 txsd = &txq->sdesc[txq->pidx]; 1234 sgl = txq->txq_sgl; 1235 m0 = *m; 1236 1237 DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset); 1238 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan); 1239 if (cxgb_debug) 1240 printf("mi_base=%p cidx=%d pidx=%d\n\n", txsd->mi.mi_base, txq->cidx, txq->pidx); 1241 1242 mtx_assert(&txq->lock, MA_OWNED); 1243 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1244 /* 1245 * XXX need to add VLAN support for 6.x 1246 */ 1247 #ifdef VLAN_SUPPORTED 1248 if (m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1249 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1250 #endif 1251 KASSERT(txsd->mi.mi_base == NULL, ("overwrting valid entry mi_base==%p", 1252 txsd->mi.mi_base)); 1253 if (cxgb_debug) 1254 printf("uipc_mvec PIO_LEN=%ld\n", PIO_LEN); 1255 1256 if (count > 1) { 1257 panic("count > 1 not support in CVS\n"); 1258 if ((err = busdma_map_sg_vec(m, &m0, segs, count))) 1259 return (err); 1260 nsegs = count; 1261 } else if ((err = busdma_map_sg_collapse(&m0, segs, &nsegs))) { 1262 if (cxgb_debug) 1263 printf("failed ... err=%d\n", err); 1264 return (err); 1265 } 1266 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count)); 1267 1268 if (!(m0->m_pkthdr.len <= PIO_LEN)) { 1269 mi_collapse_mbuf(&txsd->mi, m0); 1270 mi = &txsd->mi; 1271 } 1272 if (count > 1) { 1273 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; 1274 int i, fidx; 1275 struct mbuf_iovec *batchmi; 1276 1277 mv = mtomv(m0); 1278 batchmi = mv->mv_vec; 1279 1280 wrp = (struct work_request_hdr *)txd; 1281 1282 flits = count*2 + 1; 1283 txq_prod(txq, 1, &txqs); 1284 1285 for (fidx = 1, i = 0; i < count; i++, batchmi++, fidx += 2) { 1286 struct cpl_tx_pkt_batch_entry *cbe = &cpl_batch->pkt_entry[i]; 1287 1288 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1289 GET_VTAG_MI(cntrl, batchmi); 1290 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1291 cbe->cntrl = htonl(cntrl); 1292 cbe->len = htonl(batchmi->mi_len | 0x80000000); 1293 cbe->addr = htobe64(segs[i].ds_addr); 1294 txd->flit[fidx] |= htobe64(1 << 24); 1295 } 1296 1297 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1298 V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1299 wmb(); 1300 wrp->wr_lo = htonl(V_WR_LEN(flits) | 1301 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); 1302 /* XXX gen? */ 1303 wr_gen2(txd, txqs.gen); 1304 check_ring_tx_db(sc, txq); 1305 1306 return (0); 1307 } else if (tso_info) { 1308 int undersized, eth_type; 1309 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; 1310 struct ip *ip; 1311 struct tcphdr *tcp; 1312 char *pkthdr, tmp[TCPPKTHDRSIZE]; 1313 struct mbuf_vec *mv; 1314 struct mbuf_iovec *tmpmi; 1315 1316 mv = mtomv(m0); 1317 tmpmi = mv->mv_vec; 1318 1319 txd->flit[2] = 0; 1320 GET_VTAG_MI(cntrl, mi); 1321 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1322 hdr->cntrl = htonl(cntrl); 1323 mlen = m0->m_pkthdr.len; 1324 hdr->len = htonl(mlen | 0x80000000); 1325 1326 DPRINTF("tso buf len=%d\n", mlen); 1327 undersized = (((tmpmi->mi_len < TCPPKTHDRSIZE) && 1328 (m0->m_flags & M_VLANTAG)) || 1329 (tmpmi->mi_len < TCPPKTHDRSIZE - ETHER_VLAN_ENCAP_LEN)); 1330 if (__predict_false(undersized)) { 1331 pkthdr = tmp; 1332 dump_mi(mi); 1333 panic("discontig packet - fixxorz"); 1334 } else 1335 pkthdr = m0->m_data; 1336 1337 if (__predict_false(m0->m_flags & M_VLANTAG)) { 1338 eth_type = CPL_ETH_II_VLAN; 1339 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1340 ETHER_VLAN_ENCAP_LEN); 1341 } else { 1342 eth_type = CPL_ETH_II; 1343 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1344 } 1345 tcp = (struct tcphdr *)((uint8_t *)ip + 1346 sizeof(*ip)); 1347 1348 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1349 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1350 V_LSO_TCPHDR_WORDS(tcp->th_off); 1351 hdr->lso_info = htonl(tso_info); 1352 flits = 3; 1353 } else { 1354 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; 1355 1356 GET_VTAG(cntrl, m0); 1357 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1358 cpl->cntrl = htonl(cntrl); 1359 mlen = m0->m_pkthdr.len; 1360 cpl->len = htonl(mlen | 0x80000000); 1361 1362 if (mlen <= PIO_LEN) { 1363 txq_prod(txq, 1, &txqs); 1364 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1365 m_freem(m0); 1366 m0 = NULL; 1367 flits = (mlen + 7) / 8 + 2; 1368 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1369 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1370 F_WR_SOP | F_WR_EOP | txqs.compl); 1371 wmb(); 1372 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | 1373 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1374 1375 wr_gen2(txd, txqs.gen); 1376 check_ring_tx_db(sc, txq); 1377 DPRINTF("pio buf\n"); 1378 return (0); 1379 } 1380 DPRINTF("regular buf\n"); 1381 flits = 2; 1382 } 1383 wrp = (struct work_request_hdr *)txd; 1384 1385 #ifdef nomore 1386 /* 1387 * XXX need to move into one of the helper routines above 1388 * 1389 */ 1390 if ((err = busdma_map_mbufs(m, txq, txsd, segs, &nsegs)) != 0) 1391 return (err); 1392 m0 = *m; 1393 #endif 1394 ndesc = calc_tx_descs(m0, nsegs); 1395 1396 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1397 make_sgl(sgp, segs, nsegs); 1398 1399 sgl_flits = sgl_len(nsegs); 1400 1401 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); 1402 txq_prod(txq, ndesc, &txqs); 1403 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1404 wr_lo = htonl(V_WR_TID(txq->token)); 1405 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); 1406 check_ring_tx_db(pi->adapter, txq); 1407 1408 if ((m0->m_type == MT_DATA) && 1409 ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) && 1410 (m0->m_ext.ext_type != EXT_PACKET)) { 1411 m0->m_flags &= ~M_EXT ; 1412 mbufs_outstanding--; 1413 m_free(m0); 1414 } 1415 1416 return (0); 1417 } 1418 1419 1420 /** 1421 * write_imm - write a packet into a Tx descriptor as immediate data 1422 * @d: the Tx descriptor to write 1423 * @m: the packet 1424 * @len: the length of packet data to write as immediate data 1425 * @gen: the generation bit value to write 1426 * 1427 * Writes a packet as immediate data into a Tx descriptor. The packet 1428 * contains a work request at its beginning. We must write the packet 1429 * carefully so the SGE doesn't read accidentally before it's written in 1430 * its entirety. 1431 */ 1432 static __inline void 1433 write_imm(struct tx_desc *d, struct mbuf *m, 1434 unsigned int len, unsigned int gen) 1435 { 1436 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1437 struct work_request_hdr *to = (struct work_request_hdr *)d; 1438 1439 if (len > WR_LEN) 1440 panic("len too big %d\n", len); 1441 if (len < sizeof(*from)) 1442 panic("len too small %d", len); 1443 1444 memcpy(&to[1], &from[1], len - sizeof(*from)); 1445 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | 1446 V_WR_BCNTLFLT(len & 7)); 1447 wmb(); 1448 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | 1449 V_WR_LEN((len + 7) / 8)); 1450 wr_gen2(d, gen); 1451 1452 /* 1453 * This check is a hack we should really fix the logic so 1454 * that this can't happen 1455 */ 1456 if (m->m_type != MT_DONTFREE) 1457 m_freem(m); 1458 1459 } 1460 1461 /** 1462 * check_desc_avail - check descriptor availability on a send queue 1463 * @adap: the adapter 1464 * @q: the TX queue 1465 * @m: the packet needing the descriptors 1466 * @ndesc: the number of Tx descriptors needed 1467 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1468 * 1469 * Checks if the requested number of Tx descriptors is available on an 1470 * SGE send queue. If the queue is already suspended or not enough 1471 * descriptors are available the packet is queued for later transmission. 1472 * Must be called with the Tx queue locked. 1473 * 1474 * Returns 0 if enough descriptors are available, 1 if there aren't 1475 * enough descriptors and the packet has been queued, and 2 if the caller 1476 * needs to retry because there weren't enough descriptors at the 1477 * beginning of the call but some freed up in the mean time. 1478 */ 1479 static __inline int 1480 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1481 struct mbuf *m, unsigned int ndesc, 1482 unsigned int qid) 1483 { 1484 /* 1485 * XXX We currently only use this for checking the control queue 1486 * the control queue is only used for binding qsets which happens 1487 * at init time so we are guaranteed enough descriptors 1488 */ 1489 if (__predict_false(!mbufq_empty(&q->sendq))) { 1490 addq_exit: mbufq_tail(&q->sendq, m); 1491 return 1; 1492 } 1493 if (__predict_false(q->size - q->in_use < ndesc)) { 1494 1495 struct sge_qset *qs = txq_to_qset(q, qid); 1496 1497 printf("stopping q\n"); 1498 1499 setbit(&qs->txq_stopped, qid); 1500 smp_mb(); 1501 1502 if (should_restart_tx(q) && 1503 test_and_clear_bit(qid, &qs->txq_stopped)) 1504 return 2; 1505 1506 q->stops++; 1507 goto addq_exit; 1508 } 1509 return 0; 1510 } 1511 1512 1513 /** 1514 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1515 * @q: the SGE control Tx queue 1516 * 1517 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1518 * that send only immediate data (presently just the control queues) and 1519 * thus do not have any mbufs 1520 */ 1521 static __inline void 1522 reclaim_completed_tx_imm(struct sge_txq *q) 1523 { 1524 unsigned int reclaim = q->processed - q->cleaned; 1525 1526 mtx_assert(&q->lock, MA_OWNED); 1527 1528 q->in_use -= reclaim; 1529 q->cleaned += reclaim; 1530 } 1531 1532 static __inline int 1533 immediate(const struct mbuf *m) 1534 { 1535 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1536 } 1537 1538 /** 1539 * ctrl_xmit - send a packet through an SGE control Tx queue 1540 * @adap: the adapter 1541 * @q: the control queue 1542 * @m: the packet 1543 * 1544 * Send a packet through an SGE control Tx queue. Packets sent through 1545 * a control queue must fit entirely as immediate data in a single Tx 1546 * descriptor and have no page fragments. 1547 */ 1548 static int 1549 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1550 { 1551 int ret; 1552 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1553 1554 if (__predict_false(!immediate(m))) { 1555 m_freem(m); 1556 return 0; 1557 } 1558 1559 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); 1560 wrp->wr_lo = htonl(V_WR_TID(q->token)); 1561 1562 mtx_lock(&q->lock); 1563 again: reclaim_completed_tx_imm(q); 1564 1565 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1566 if (__predict_false(ret)) { 1567 if (ret == 1) { 1568 mtx_unlock(&q->lock); 1569 log(LOG_ERR, "no desc available\n"); 1570 1571 return (ENOSPC); 1572 } 1573 goto again; 1574 } 1575 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1576 1577 q->in_use++; 1578 if (++q->pidx >= q->size) { 1579 q->pidx = 0; 1580 q->gen ^= 1; 1581 } 1582 mtx_unlock(&q->lock); 1583 wmb(); 1584 t3_write_reg(adap, A_SG_KDOORBELL, 1585 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1586 return (0); 1587 } 1588 1589 1590 /** 1591 * restart_ctrlq - restart a suspended control queue 1592 * @qs: the queue set cotaining the control queue 1593 * 1594 * Resumes transmission on a suspended Tx control queue. 1595 */ 1596 static void 1597 restart_ctrlq(void *data, int npending) 1598 { 1599 struct mbuf *m; 1600 struct sge_qset *qs = (struct sge_qset *)data; 1601 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1602 adapter_t *adap = qs->port->adapter; 1603 1604 log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use); 1605 1606 mtx_lock(&q->lock); 1607 again: reclaim_completed_tx_imm(q); 1608 1609 while (q->in_use < q->size && 1610 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1611 1612 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1613 1614 if (++q->pidx >= q->size) { 1615 q->pidx = 0; 1616 q->gen ^= 1; 1617 } 1618 q->in_use++; 1619 } 1620 if (!mbufq_empty(&q->sendq)) { 1621 setbit(&qs->txq_stopped, TXQ_CTRL); 1622 smp_mb(); 1623 1624 if (should_restart_tx(q) && 1625 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1626 goto again; 1627 q->stops++; 1628 } 1629 mtx_unlock(&q->lock); 1630 t3_write_reg(adap, A_SG_KDOORBELL, 1631 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1632 } 1633 1634 1635 /* 1636 * Send a management message through control queue 0 1637 */ 1638 int 1639 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1640 { 1641 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); 1642 } 1643 1644 1645 /** 1646 * free_qset - free the resources of an SGE queue set 1647 * @sc: the controller owning the queue set 1648 * @q: the queue set 1649 * 1650 * Release the HW and SW resources associated with an SGE queue set, such 1651 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1652 * queue set must be quiesced prior to calling this. 1653 */ 1654 void 1655 t3_free_qset(adapter_t *sc, struct sge_qset *q) 1656 { 1657 int i; 1658 1659 t3_free_tx_desc_all(&q->txq[TXQ_ETH]); 1660 1661 for (i = 0; i < SGE_TXQ_PER_SET; i++) 1662 if (q->txq[i].txq_mr.br_ring != NULL) { 1663 free(q->txq[i].txq_mr.br_ring, M_DEVBUF); 1664 mtx_destroy(&q->txq[i].txq_mr.br_lock); 1665 } 1666 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1667 if (q->fl[i].desc) { 1668 mtx_lock(&sc->sge.reg_lock); 1669 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 1670 mtx_unlock(&sc->sge.reg_lock); 1671 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 1672 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 1673 q->fl[i].desc_map); 1674 bus_dma_tag_destroy(q->fl[i].desc_tag); 1675 bus_dma_tag_destroy(q->fl[i].entry_tag); 1676 } 1677 if (q->fl[i].sdesc) { 1678 free_rx_bufs(sc, &q->fl[i]); 1679 free(q->fl[i].sdesc, M_DEVBUF); 1680 } 1681 } 1682 1683 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 1684 if (q->txq[i].desc) { 1685 mtx_lock(&sc->sge.reg_lock); 1686 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 1687 mtx_unlock(&sc->sge.reg_lock); 1688 bus_dmamap_unload(q->txq[i].desc_tag, 1689 q->txq[i].desc_map); 1690 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 1691 q->txq[i].desc_map); 1692 bus_dma_tag_destroy(q->txq[i].desc_tag); 1693 bus_dma_tag_destroy(q->txq[i].entry_tag); 1694 MTX_DESTROY(&q->txq[i].lock); 1695 } 1696 if (q->txq[i].sdesc) { 1697 free(q->txq[i].sdesc, M_DEVBUF); 1698 } 1699 } 1700 1701 if (q->rspq.desc) { 1702 mtx_lock(&sc->sge.reg_lock); 1703 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 1704 mtx_unlock(&sc->sge.reg_lock); 1705 1706 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 1707 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 1708 q->rspq.desc_map); 1709 bus_dma_tag_destroy(q->rspq.desc_tag); 1710 MTX_DESTROY(&q->rspq.lock); 1711 } 1712 1713 bzero(q, sizeof(*q)); 1714 } 1715 1716 /** 1717 * t3_free_sge_resources - free SGE resources 1718 * @sc: the adapter softc 1719 * 1720 * Frees resources used by the SGE queue sets. 1721 */ 1722 void 1723 t3_free_sge_resources(adapter_t *sc) 1724 { 1725 int i, nqsets; 1726 1727 #ifdef IFNET_MULTIQUEUE 1728 panic("%s should not be called when IFNET_MULTIQUEUE is defined", __FUNCTION__); 1729 #endif 1730 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1731 nqsets += sc->port[i].nqsets; 1732 1733 for (i = 0; i < nqsets; ++i) 1734 t3_free_qset(sc, &sc->sge.qs[i]); 1735 } 1736 1737 /** 1738 * t3_sge_start - enable SGE 1739 * @sc: the controller softc 1740 * 1741 * Enables the SGE for DMAs. This is the last step in starting packet 1742 * transfers. 1743 */ 1744 void 1745 t3_sge_start(adapter_t *sc) 1746 { 1747 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 1748 } 1749 1750 /** 1751 * t3_sge_stop - disable SGE operation 1752 * @sc: the adapter 1753 * 1754 * Disables the DMA engine. This can be called in emeregencies (e.g., 1755 * from error interrupts) or from normal process context. In the latter 1756 * case it also disables any pending queue restart tasklets. Note that 1757 * if it is called in interrupt context it cannot disable the restart 1758 * tasklets as it cannot wait, however the tasklets will have no effect 1759 * since the doorbells are disabled and the driver will call this again 1760 * later from process context, at which time the tasklets will be stopped 1761 * if they are still running. 1762 */ 1763 void 1764 t3_sge_stop(adapter_t *sc) 1765 { 1766 int i, nqsets; 1767 1768 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 1769 1770 if (sc->tq == NULL) 1771 return; 1772 1773 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1774 nqsets += sc->port[i].nqsets; 1775 #ifdef notyet 1776 /* 1777 * 1778 * XXX 1779 */ 1780 for (i = 0; i < nqsets; ++i) { 1781 struct sge_qset *qs = &sc->sge.qs[i]; 1782 1783 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 1784 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 1785 } 1786 #endif 1787 } 1788 1789 /** 1790 * t3_free_tx_desc - reclaims Tx descriptors and their buffers 1791 * @adapter: the adapter 1792 * @q: the Tx queue to reclaim descriptors from 1793 * @reclaimable: the number of descriptors to reclaim 1794 * @m_vec_size: maximum number of buffers to reclaim 1795 * @desc_reclaimed: returns the number of descriptors reclaimed 1796 * 1797 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 1798 * Tx buffers. Called with the Tx queue lock held. 1799 * 1800 * Returns number of buffers of reclaimed 1801 */ 1802 void 1803 t3_free_tx_desc(struct sge_txq *q, int reclaimable) 1804 { 1805 struct tx_sw_desc *txsd; 1806 unsigned int cidx; 1807 1808 #ifdef T3_TRACE 1809 T3_TRACE2(sc->tb[q->cntxt_id & 7], 1810 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); 1811 #endif 1812 cidx = q->cidx; 1813 txsd = &q->sdesc[cidx]; 1814 DPRINTF("reclaiming %d WR\n", reclaimable); 1815 mtx_assert(&q->lock, MA_OWNED); 1816 while (reclaimable--) { 1817 DPRINTF("cidx=%d d=%p\n", cidx, txsd); 1818 if (txsd->mi.mi_base != NULL) { 1819 if (txsd->flags & TX_SW_DESC_MAPPED) { 1820 bus_dmamap_unload(q->entry_tag, txsd->map); 1821 txsd->flags &= ~TX_SW_DESC_MAPPED; 1822 } 1823 m_freem_iovec(&txsd->mi); 1824 buf_ring_scan(&q->txq_mr, txsd->mi.mi_base, __FILE__, __LINE__); 1825 txsd->mi.mi_base = NULL; 1826 1827 #if defined(DIAGNOSTIC) && 0 1828 if (m_get_priority(txsd->m[0]) != cidx) 1829 printf("pri=%d cidx=%d\n", 1830 (int)m_get_priority(txsd->m[0]), cidx); 1831 #endif 1832 1833 } else 1834 q->txq_skipped++; 1835 1836 ++txsd; 1837 if (++cidx == q->size) { 1838 cidx = 0; 1839 txsd = q->sdesc; 1840 } 1841 } 1842 q->cidx = cidx; 1843 1844 } 1845 1846 void 1847 t3_free_tx_desc_all(struct sge_txq *q) 1848 { 1849 int i; 1850 struct tx_sw_desc *txsd; 1851 1852 for (i = 0; i < q->size; i++) { 1853 txsd = &q->sdesc[i]; 1854 if (txsd->mi.mi_base != NULL) { 1855 if (txsd->flags & TX_SW_DESC_MAPPED) { 1856 bus_dmamap_unload(q->entry_tag, txsd->map); 1857 txsd->flags &= ~TX_SW_DESC_MAPPED; 1858 } 1859 m_freem_iovec(&txsd->mi); 1860 bzero(&txsd->mi, sizeof(txsd->mi)); 1861 } 1862 } 1863 } 1864 1865 /** 1866 * is_new_response - check if a response is newly written 1867 * @r: the response descriptor 1868 * @q: the response queue 1869 * 1870 * Returns true if a response descriptor contains a yet unprocessed 1871 * response. 1872 */ 1873 static __inline int 1874 is_new_response(const struct rsp_desc *r, 1875 const struct sge_rspq *q) 1876 { 1877 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 1878 } 1879 1880 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 1881 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 1882 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 1883 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 1884 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 1885 1886 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 1887 #define NOMEM_INTR_DELAY 2500 1888 1889 /** 1890 * write_ofld_wr - write an offload work request 1891 * @adap: the adapter 1892 * @m: the packet to send 1893 * @q: the Tx queue 1894 * @pidx: index of the first Tx descriptor to write 1895 * @gen: the generation value to use 1896 * @ndesc: number of descriptors the packet will occupy 1897 * 1898 * Write an offload work request to send the supplied packet. The packet 1899 * data already carry the work request with most fields populated. 1900 */ 1901 static void 1902 write_ofld_wr(adapter_t *adap, struct mbuf *m, 1903 struct sge_txq *q, unsigned int pidx, 1904 unsigned int gen, unsigned int ndesc, 1905 bus_dma_segment_t *segs, unsigned int nsegs) 1906 { 1907 unsigned int sgl_flits, flits; 1908 struct work_request_hdr *from; 1909 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1910 struct tx_desc *d = &q->desc[pidx]; 1911 struct txq_state txqs; 1912 1913 if (immediate(m) && segs == NULL) { 1914 write_imm(d, m, m->m_len, gen); 1915 return; 1916 } 1917 1918 /* Only TX_DATA builds SGLs */ 1919 from = mtod(m, struct work_request_hdr *); 1920 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from)); 1921 1922 flits = m->m_len / 8; 1923 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 1924 1925 make_sgl(sgp, segs, nsegs); 1926 sgl_flits = sgl_len(nsegs); 1927 1928 txqs.gen = gen; 1929 txqs.pidx = pidx; 1930 txqs.compl = 0; 1931 1932 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 1933 from->wr_hi, from->wr_lo); 1934 } 1935 1936 /** 1937 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 1938 * @m: the packet 1939 * 1940 * Returns the number of Tx descriptors needed for the given offload 1941 * packet. These packets are already fully constructed. 1942 */ 1943 static __inline unsigned int 1944 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 1945 { 1946 unsigned int flits, cnt = 0; 1947 1948 1949 if (m->m_len <= WR_LEN) 1950 return 1; /* packet fits as immediate data */ 1951 1952 if (m->m_flags & M_IOVEC) 1953 cnt = mtomv(m)->mv_count; 1954 1955 /* headers */ 1956 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; 1957 1958 return flits_to_desc(flits + sgl_len(cnt)); 1959 } 1960 1961 /** 1962 * ofld_xmit - send a packet through an offload queue 1963 * @adap: the adapter 1964 * @q: the Tx offload queue 1965 * @m: the packet 1966 * 1967 * Send an offload packet through an SGE offload queue. 1968 */ 1969 static int 1970 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1971 { 1972 int ret, nsegs; 1973 unsigned int ndesc; 1974 unsigned int pidx, gen; 1975 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs; 1976 struct tx_sw_desc *stx; 1977 1978 nsegs = m_get_sgllen(m); 1979 vsegs = m_get_sgl(m); 1980 ndesc = calc_tx_descs_ofld(m, nsegs); 1981 busdma_map_sgl(vsegs, segs, nsegs); 1982 1983 stx = &q->sdesc[q->pidx]; 1984 KASSERT(stx->mi.mi_base == NULL, ("mi_base set")); 1985 1986 mtx_lock(&q->lock); 1987 again: reclaim_completed_tx_(q, 16); 1988 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 1989 if (__predict_false(ret)) { 1990 if (ret == 1) { 1991 printf("no ofld desc avail\n"); 1992 1993 m_set_priority(m, ndesc); /* save for restart */ 1994 mtx_unlock(&q->lock); 1995 return (EINTR); 1996 } 1997 goto again; 1998 } 1999 2000 gen = q->gen; 2001 q->in_use += ndesc; 2002 pidx = q->pidx; 2003 q->pidx += ndesc; 2004 if (q->pidx >= q->size) { 2005 q->pidx -= q->size; 2006 q->gen ^= 1; 2007 } 2008 #ifdef T3_TRACE 2009 T3_TRACE5(adap->tb[q->cntxt_id & 7], 2010 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 2011 ndesc, pidx, skb->len, skb->len - skb->data_len, 2012 skb_shinfo(skb)->nr_frags); 2013 #endif 2014 mtx_unlock(&q->lock); 2015 2016 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2017 check_ring_tx_db(adap, q); 2018 2019 return (0); 2020 } 2021 2022 /** 2023 * restart_offloadq - restart a suspended offload queue 2024 * @qs: the queue set cotaining the offload queue 2025 * 2026 * Resumes transmission on a suspended Tx offload queue. 2027 */ 2028 static void 2029 restart_offloadq(void *data, int npending) 2030 { 2031 struct mbuf *m; 2032 struct sge_qset *qs = data; 2033 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2034 adapter_t *adap = qs->port->adapter; 2035 bus_dma_segment_t segs[TX_MAX_SEGS]; 2036 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 2037 int nsegs, cleaned; 2038 2039 mtx_lock(&q->lock); 2040 again: cleaned = reclaim_completed_tx_(q, 16); 2041 2042 while ((m = mbufq_peek(&q->sendq)) != NULL) { 2043 unsigned int gen, pidx; 2044 unsigned int ndesc = m_get_priority(m); 2045 2046 if (__predict_false(q->size - q->in_use < ndesc)) { 2047 setbit(&qs->txq_stopped, TXQ_OFLD); 2048 smp_mb(); 2049 2050 if (should_restart_tx(q) && 2051 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 2052 goto again; 2053 q->stops++; 2054 break; 2055 } 2056 2057 gen = q->gen; 2058 q->in_use += ndesc; 2059 pidx = q->pidx; 2060 q->pidx += ndesc; 2061 if (q->pidx >= q->size) { 2062 q->pidx -= q->size; 2063 q->gen ^= 1; 2064 } 2065 2066 (void)mbufq_dequeue(&q->sendq); 2067 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 2068 mtx_unlock(&q->lock); 2069 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2070 mtx_lock(&q->lock); 2071 } 2072 mtx_unlock(&q->lock); 2073 2074 #if USE_GTS 2075 set_bit(TXQ_RUNNING, &q->flags); 2076 set_bit(TXQ_LAST_PKT_DB, &q->flags); 2077 #endif 2078 t3_write_reg(adap, A_SG_KDOORBELL, 2079 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2080 } 2081 2082 /** 2083 * queue_set - return the queue set a packet should use 2084 * @m: the packet 2085 * 2086 * Maps a packet to the SGE queue set it should use. The desired queue 2087 * set is carried in bits 1-3 in the packet's priority. 2088 */ 2089 static __inline int 2090 queue_set(const struct mbuf *m) 2091 { 2092 return m_get_priority(m) >> 1; 2093 } 2094 2095 /** 2096 * is_ctrl_pkt - return whether an offload packet is a control packet 2097 * @m: the packet 2098 * 2099 * Determines whether an offload packet should use an OFLD or a CTRL 2100 * Tx queue. This is indicated by bit 0 in the packet's priority. 2101 */ 2102 static __inline int 2103 is_ctrl_pkt(const struct mbuf *m) 2104 { 2105 return m_get_priority(m) & 1; 2106 } 2107 2108 /** 2109 * t3_offload_tx - send an offload packet 2110 * @tdev: the offload device to send to 2111 * @m: the packet 2112 * 2113 * Sends an offload packet. We use the packet priority to select the 2114 * appropriate Tx queue as follows: bit 0 indicates whether the packet 2115 * should be sent as regular or control, bits 1-3 select the queue set. 2116 */ 2117 int 2118 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m) 2119 { 2120 adapter_t *adap = tdev2adap(tdev); 2121 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 2122 2123 if (__predict_false(is_ctrl_pkt(m))) 2124 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m); 2125 2126 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m); 2127 } 2128 2129 /** 2130 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 2131 * @tdev: the offload device that will be receiving the packets 2132 * @q: the SGE response queue that assembled the bundle 2133 * @m: the partial bundle 2134 * @n: the number of packets in the bundle 2135 * 2136 * Delivers a (partial) bundle of Rx offload packets to an offload device. 2137 */ 2138 static __inline void 2139 deliver_partial_bundle(struct t3cdev *tdev, 2140 struct sge_rspq *q, 2141 struct mbuf *mbufs[], int n) 2142 { 2143 if (n) { 2144 q->offload_bundles++; 2145 cxgb_ofld_recv(tdev, mbufs, n); 2146 } 2147 } 2148 2149 static __inline int 2150 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 2151 struct mbuf *m, struct mbuf *rx_gather[], 2152 unsigned int gather_idx) 2153 { 2154 2155 rq->offload_pkts++; 2156 m->m_pkthdr.header = mtod(m, void *); 2157 rx_gather[gather_idx++] = m; 2158 if (gather_idx == RX_BUNDLE_SIZE) { 2159 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 2160 gather_idx = 0; 2161 rq->offload_bundles++; 2162 } 2163 return (gather_idx); 2164 } 2165 2166 static void 2167 restart_tx(struct sge_qset *qs) 2168 { 2169 struct adapter *sc = qs->port->adapter; 2170 2171 2172 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2173 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2174 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2175 qs->txq[TXQ_OFLD].restarts++; 2176 DPRINTF("restarting TXQ_OFLD\n"); 2177 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2178 } 2179 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n", 2180 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]), 2181 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned, 2182 qs->txq[TXQ_CTRL].in_use); 2183 2184 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2185 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2186 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2187 qs->txq[TXQ_CTRL].restarts++; 2188 DPRINTF("restarting TXQ_CTRL\n"); 2189 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2190 } 2191 } 2192 2193 /** 2194 * t3_sge_alloc_qset - initialize an SGE queue set 2195 * @sc: the controller softc 2196 * @id: the queue set id 2197 * @nports: how many Ethernet ports will be using this queue set 2198 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2199 * @p: configuration parameters for this queue set 2200 * @ntxq: number of Tx queues for the queue set 2201 * @pi: port info for queue set 2202 * 2203 * Allocate resources and initialize an SGE queue set. A queue set 2204 * comprises a response queue, two Rx free-buffer queues, and up to 3 2205 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2206 * queue, offload queue, and control queue. 2207 */ 2208 int 2209 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2210 const struct qset_params *p, int ntxq, struct port_info *pi) 2211 { 2212 struct sge_qset *q = &sc->sge.qs[id]; 2213 int i, header_size, ret = 0; 2214 2215 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2216 if ((q->txq[i].txq_mr.br_ring = malloc(cxgb_txq_buf_ring_size*sizeof(struct mbuf *), 2217 M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) { 2218 device_printf(sc->dev, "failed to allocate mbuf ring\n"); 2219 goto err; 2220 } 2221 q->txq[i].txq_mr.br_prod = q->txq[i].txq_mr.br_cons = 0; 2222 q->txq[i].txq_mr.br_size = cxgb_txq_buf_ring_size; 2223 mtx_init(&q->txq[i].txq_mr.br_lock, "txq mbuf ring", NULL, MTX_DEF); 2224 } 2225 2226 init_qset_cntxt(q, id); 2227 q->idx = id; 2228 2229 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2230 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2231 &q->fl[0].desc, &q->fl[0].sdesc, 2232 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2233 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2234 printf("error %d from alloc ring fl0\n", ret); 2235 goto err; 2236 } 2237 2238 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2239 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2240 &q->fl[1].desc, &q->fl[1].sdesc, 2241 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2242 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2243 printf("error %d from alloc ring fl1\n", ret); 2244 goto err; 2245 } 2246 2247 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2248 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2249 &q->rspq.desc_tag, &q->rspq.desc_map, 2250 NULL, NULL)) != 0) { 2251 printf("error %d from alloc ring rspq\n", ret); 2252 goto err; 2253 } 2254 2255 for (i = 0; i < ntxq; ++i) { 2256 /* 2257 * The control queue always uses immediate data so does not 2258 * need to keep track of any mbufs. 2259 * XXX Placeholder for future TOE support. 2260 */ 2261 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2262 2263 if ((ret = alloc_ring(sc, p->txq_size[i], 2264 sizeof(struct tx_desc), sz, 2265 &q->txq[i].phys_addr, &q->txq[i].desc, 2266 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2267 &q->txq[i].desc_map, 2268 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2269 printf("error %d from alloc ring tx %i\n", ret, i); 2270 goto err; 2271 } 2272 mbufq_init(&q->txq[i].sendq); 2273 q->txq[i].gen = 1; 2274 q->txq[i].size = p->txq_size[i]; 2275 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d", 2276 device_get_unit(sc->dev), irq_vec_idx, i); 2277 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF); 2278 } 2279 2280 q->txq[TXQ_ETH].port = pi; 2281 2282 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2283 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2284 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]); 2285 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]); 2286 2287 q->fl[0].gen = q->fl[1].gen = 1; 2288 q->fl[0].size = p->fl_size; 2289 q->fl[1].size = p->jumbo_size; 2290 2291 q->rspq.gen = 1; 2292 q->rspq.cidx = 0; 2293 q->rspq.size = p->rspq_size; 2294 2295 2296 header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); 2297 q->txq[TXQ_ETH].stop_thres = nports * 2298 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2299 2300 q->fl[0].buf_size = (MCLBYTES - header_size); 2301 q->fl[0].zone = zone_clust; 2302 q->fl[0].type = EXT_CLUSTER; 2303 #if __FreeBSD_version > 800000 2304 if (cxgb_use_16k_clusters) { 2305 q->fl[1].buf_size = MJUM16BYTES - header_size; 2306 q->fl[1].zone = zone_jumbo16; 2307 q->fl[1].type = EXT_JUMBO16; 2308 } else { 2309 q->fl[1].buf_size = MJUM9BYTES - header_size; 2310 q->fl[1].zone = zone_jumbo9; 2311 q->fl[1].type = EXT_JUMBO9; 2312 } 2313 #else 2314 q->fl[1].buf_size = MJUMPAGESIZE - header_size; 2315 q->fl[1].zone = zone_jumbop; 2316 q->fl[1].type = EXT_JUMBOP; 2317 #endif 2318 q->lro.enabled = lro_default; 2319 2320 mtx_lock(&sc->sge.reg_lock); 2321 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2322 q->rspq.phys_addr, q->rspq.size, 2323 q->fl[0].buf_size, 1, 0); 2324 if (ret) { 2325 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2326 goto err_unlock; 2327 } 2328 2329 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2330 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2331 q->fl[i].phys_addr, q->fl[i].size, 2332 q->fl[i].buf_size, p->cong_thres, 1, 2333 0); 2334 if (ret) { 2335 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2336 goto err_unlock; 2337 } 2338 } 2339 2340 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2341 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2342 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2343 1, 0); 2344 if (ret) { 2345 printf("error %d from t3_sge_init_ecntxt\n", ret); 2346 goto err_unlock; 2347 } 2348 2349 if (ntxq > 1) { 2350 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2351 USE_GTS, SGE_CNTXT_OFLD, id, 2352 q->txq[TXQ_OFLD].phys_addr, 2353 q->txq[TXQ_OFLD].size, 0, 1, 0); 2354 if (ret) { 2355 printf("error %d from t3_sge_init_ecntxt\n", ret); 2356 goto err_unlock; 2357 } 2358 } 2359 2360 if (ntxq > 2) { 2361 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2362 SGE_CNTXT_CTRL, id, 2363 q->txq[TXQ_CTRL].phys_addr, 2364 q->txq[TXQ_CTRL].size, 2365 q->txq[TXQ_CTRL].token, 1, 0); 2366 if (ret) { 2367 printf("error %d from t3_sge_init_ecntxt\n", ret); 2368 goto err_unlock; 2369 } 2370 } 2371 2372 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2373 device_get_unit(sc->dev), irq_vec_idx); 2374 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2375 2376 mtx_unlock(&sc->sge.reg_lock); 2377 t3_update_qset_coalesce(q, p); 2378 q->port = pi; 2379 2380 refill_fl(sc, &q->fl[0], q->fl[0].size); 2381 refill_fl(sc, &q->fl[1], q->fl[1].size); 2382 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2383 2384 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2385 V_NEWTIMER(q->rspq.holdoff_tmr)); 2386 2387 return (0); 2388 2389 err_unlock: 2390 mtx_unlock(&sc->sge.reg_lock); 2391 err: 2392 t3_free_qset(sc, q); 2393 2394 return (ret); 2395 } 2396 2397 void 2398 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2399 { 2400 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2401 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2402 struct ifnet *ifp = pi->ifp; 2403 2404 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2405 2406 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2407 cpl->csum_valid && cpl->csum == 0xffff) { 2408 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2409 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2410 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2411 m->m_pkthdr.csum_data = 0xffff; 2412 } 2413 /* 2414 * XXX need to add VLAN support for 6.x 2415 */ 2416 #ifdef VLAN_SUPPORTED 2417 if (__predict_false(cpl->vlan_valid)) { 2418 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2419 m->m_flags |= M_VLANTAG; 2420 } 2421 #endif 2422 2423 m->m_pkthdr.rcvif = ifp; 2424 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2425 #ifndef DISABLE_MBUF_IOVEC 2426 m_explode(m); 2427 #endif 2428 /* 2429 * adjust after conversion to mbuf chain 2430 */ 2431 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); 2432 m->m_len -= (sizeof(*cpl) + ethpad); 2433 m->m_data += (sizeof(*cpl) + ethpad); 2434 2435 (*ifp->if_input)(ifp, m); 2436 } 2437 2438 static void 2439 ext_free_handler(void *cl, void * arg) 2440 { 2441 uintptr_t type = (uintptr_t)arg; 2442 uma_zone_t zone; 2443 struct mbuf *m; 2444 2445 m = cl; 2446 zone = m_getzonefromtype(type); 2447 m->m_ext.ext_type = (int)type; 2448 cxgb_ext_freed++; 2449 cxgb_cache_put(zone, cl); 2450 } 2451 2452 static void 2453 init_cluster_mbuf(caddr_t cl, int flags, int type, uma_zone_t zone) 2454 { 2455 struct mbuf *m; 2456 int header_size; 2457 2458 header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + 2459 sizeof(struct m_ext_) + sizeof(uint32_t); 2460 2461 bzero(cl, header_size); 2462 m = (struct mbuf *)cl; 2463 2464 SLIST_INIT(&m->m_pkthdr.tags); 2465 m->m_type = MT_DATA; 2466 m->m_flags = flags | M_NOFREE | M_EXT; 2467 m->m_data = cl + header_size; 2468 m->m_ext.ext_buf = cl; 2469 m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t)); 2470 m->m_ext.ext_size = m_getsizefromtype(type); 2471 m->m_ext.ext_free = ext_free_handler; 2472 m->m_ext.ext_args = (void *)(uintptr_t)type; 2473 m->m_ext.ext_type = EXT_EXTREF; 2474 *(m->m_ext.ref_cnt) = 1; 2475 DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt); 2476 } 2477 2478 2479 /** 2480 * get_packet - return the next ingress packet buffer from a free list 2481 * @adap: the adapter that received the packet 2482 * @drop_thres: # of remaining buffers before we start dropping packets 2483 * @qs: the qset that the SGE free list holding the packet belongs to 2484 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2485 * @r: response descriptor 2486 * 2487 * Get the next packet from a free list and complete setup of the 2488 * sk_buff. If the packet is small we make a copy and recycle the 2489 * original buffer, otherwise we use the original buffer itself. If a 2490 * positive drop threshold is supplied packets are dropped and their 2491 * buffers recycled if (a) the number of remaining buffers is under the 2492 * threshold and the packet is too big to copy, or (b) the packet should 2493 * be copied but there is no memory for the copy. 2494 */ 2495 #ifdef DISABLE_MBUF_IOVEC 2496 2497 static int 2498 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2499 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 2500 { 2501 2502 unsigned int len_cq = ntohl(r->len_cq); 2503 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2504 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2505 uint32_t len = G_RSPD_LEN(len_cq); 2506 uint32_t flags = ntohl(r->flags); 2507 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2508 struct mbuf *m; 2509 uint32_t *ref; 2510 int ret = 0; 2511 2512 prefetch(sd->rxsd_cl); 2513 2514 fl->credits--; 2515 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2516 2517 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { 2518 if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2519 goto skip_recycle; 2520 cl = mtod(m0, void *); 2521 memcpy(cl, sd->data, len); 2522 recycle_rx_buf(adap, fl, fl->cidx); 2523 m = m0; 2524 } else { 2525 skip_recycle: 2526 int flags = 0; 2527 bus_dmamap_unload(fl->entry_tag, sd->map); 2528 cl = sd->rxsd_cl; 2529 m = m0 = (struct mbuf *)cl; 2530 2531 m0->m_len = len; 2532 if ((sopeop == RSPQ_SOP_EOP) || 2533 (sopeop == RSPQ_SOP)) 2534 flags = M_PKTHDR; 2535 init_cluster_mbuf(cl, flags, fl->type, fl->zone); 2536 } 2537 2538 switch(sopeop) { 2539 case RSPQ_SOP_EOP: 2540 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2541 mh->mh_head = mh->mh_tail = m; 2542 m->m_pkthdr.len = len; 2543 ret = 1; 2544 break; 2545 case RSPQ_NSOP_NEOP: 2546 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2547 if (mh->mh_tail == NULL) { 2548 printf("discarding intermediate descriptor entry\n"); 2549 m_freem(m); 2550 break; 2551 } 2552 mh->mh_tail->m_next = m; 2553 mh->mh_tail = m; 2554 mh->mh_head->m_pkthdr.len += len; 2555 ret = 0; 2556 break; 2557 case RSPQ_SOP: 2558 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2559 m->m_pkthdr.len = len; 2560 mh->mh_head = mh->mh_tail = m; 2561 ret = 0; 2562 break; 2563 case RSPQ_EOP: 2564 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2565 mh->mh_head->m_pkthdr.len += len; 2566 mh->mh_tail->m_next = m; 2567 mh->mh_tail = m; 2568 ret = 1; 2569 break; 2570 } 2571 if (++fl->cidx == fl->size) 2572 fl->cidx = 0; 2573 2574 return (ret); 2575 } 2576 2577 #else 2578 2579 static int 2580 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2581 struct mbuf **m, struct rsp_desc *r) 2582 { 2583 2584 unsigned int len_cq = ntohl(r->len_cq); 2585 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2586 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2587 uint32_t len = G_RSPD_LEN(len_cq); 2588 uint32_t flags = ntohl(r->flags); 2589 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2590 void *cl; 2591 int ret = 0; 2592 struct mbuf *m0; 2593 #if 0 2594 if ((sd + 1 )->rxsd_cl) 2595 prefetch((sd + 1)->rxsd_cl); 2596 if ((sd + 2)->rxsd_cl) 2597 prefetch((sd + 2)->rxsd_cl); 2598 #endif 2599 DPRINTF("rx cpu=%d\n", curcpu); 2600 fl->credits--; 2601 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2602 2603 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { 2604 if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2605 goto skip_recycle; 2606 cl = mtod(m0, void *); 2607 memcpy(cl, sd->data, len); 2608 recycle_rx_buf(adap, fl, fl->cidx); 2609 *m = m0; 2610 } else { 2611 skip_recycle: 2612 bus_dmamap_unload(fl->entry_tag, sd->map); 2613 cl = sd->rxsd_cl; 2614 *m = m0 = (struct mbuf *)cl; 2615 } 2616 2617 switch(sopeop) { 2618 case RSPQ_SOP_EOP: 2619 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2620 if (cl == sd->rxsd_cl) 2621 init_cluster_mbuf(cl, M_PKTHDR, fl->type, fl->zone); 2622 m0->m_len = m0->m_pkthdr.len = len; 2623 ret = 1; 2624 goto done; 2625 break; 2626 case RSPQ_NSOP_NEOP: 2627 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2628 panic("chaining unsupported"); 2629 ret = 0; 2630 break; 2631 case RSPQ_SOP: 2632 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2633 panic("chaining unsupported"); 2634 m_iovinit(m0); 2635 ret = 0; 2636 break; 2637 case RSPQ_EOP: 2638 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2639 panic("chaining unsupported"); 2640 ret = 1; 2641 break; 2642 } 2643 panic("append not supported"); 2644 #if 0 2645 m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref); 2646 #endif 2647 done: 2648 if (++fl->cidx == fl->size) 2649 fl->cidx = 0; 2650 2651 return (ret); 2652 } 2653 #endif 2654 /** 2655 * handle_rsp_cntrl_info - handles control information in a response 2656 * @qs: the queue set corresponding to the response 2657 * @flags: the response control flags 2658 * 2659 * Handles the control information of an SGE response, such as GTS 2660 * indications and completion credits for the queue set's Tx queues. 2661 * HW coalesces credits, we don't do any extra SW coalescing. 2662 */ 2663 static __inline void 2664 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2665 { 2666 unsigned int credits; 2667 2668 #if USE_GTS 2669 if (flags & F_RSPD_TXQ0_GTS) 2670 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2671 #endif 2672 credits = G_RSPD_TXQ0_CR(flags); 2673 if (credits) 2674 qs->txq[TXQ_ETH].processed += credits; 2675 2676 credits = G_RSPD_TXQ2_CR(flags); 2677 if (credits) 2678 qs->txq[TXQ_CTRL].processed += credits; 2679 2680 # if USE_GTS 2681 if (flags & F_RSPD_TXQ1_GTS) 2682 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2683 # endif 2684 credits = G_RSPD_TXQ1_CR(flags); 2685 if (credits) 2686 qs->txq[TXQ_OFLD].processed += credits; 2687 2688 } 2689 2690 static void 2691 check_ring_db(adapter_t *adap, struct sge_qset *qs, 2692 unsigned int sleeping) 2693 { 2694 ; 2695 } 2696 2697 /** 2698 * process_responses - process responses from an SGE response queue 2699 * @adap: the adapter 2700 * @qs: the queue set to which the response queue belongs 2701 * @budget: how many responses can be processed in this round 2702 * 2703 * Process responses from an SGE response queue up to the supplied budget. 2704 * Responses include received packets as well as credits and other events 2705 * for the queues that belong to the response queue's queue set. 2706 * A negative budget is effectively unlimited. 2707 * 2708 * Additionally choose the interrupt holdoff time for the next interrupt 2709 * on this queue. If the system is under memory shortage use a fairly 2710 * long delay to help recovery. 2711 */ 2712 int 2713 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2714 { 2715 struct sge_rspq *rspq = &qs->rspq; 2716 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2717 int budget_left = budget; 2718 unsigned int sleeping = 0; 2719 int lro = qs->lro.enabled; 2720 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2721 int ngathered = 0; 2722 #ifdef DEBUG 2723 static int last_holdoff = 0; 2724 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2725 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2726 last_holdoff = rspq->holdoff_tmr; 2727 } 2728 #endif 2729 rspq->next_holdoff = rspq->holdoff_tmr; 2730 2731 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2732 int eth, eop = 0, ethpad = 0; 2733 uint32_t flags = ntohl(r->flags); 2734 uint32_t rss_csum = *(const uint32_t *)r; 2735 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); 2736 2737 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2738 2739 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2740 /* XXX */ 2741 printf("async notification\n"); 2742 2743 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2744 struct mbuf *m = NULL; 2745 2746 #ifdef DISABLE_MBUF_IOVEC 2747 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", 2748 r->rss_hdr.opcode, rspq->cidx); 2749 2750 m = rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 2751 if (m == NULL) { 2752 rspq->next_holdoff = NOMEM_INTR_DELAY; 2753 budget_left--; 2754 break; 2755 } 2756 2757 get_imm_packet(adap, r, &rspq->rspq_mh); 2758 eop = 1; 2759 #else 2760 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", 2761 r->rss_hdr.opcode, rspq->cidx); 2762 if (rspq->rspq_mbuf == NULL) 2763 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA); 2764 else 2765 m = m_gethdr(M_DONTWAIT, MT_DATA); 2766 2767 /* 2768 * XXX revisit me 2769 */ 2770 if (rspq->rspq_mbuf == NULL && m == NULL) { 2771 rspq->next_holdoff = NOMEM_INTR_DELAY; 2772 budget_left--; 2773 break; 2774 } 2775 get_imm_packet(adap, r, rspq->rspq_mbuf, m, flags); 2776 2777 eop = 1; 2778 rspq->imm_data++; 2779 #endif 2780 } else if (r->len_cq) { 2781 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 2782 2783 #ifdef DISABLE_MBUF_IOVEC 2784 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r); 2785 #else 2786 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r); 2787 #ifdef IFNET_MULTIQUEUE 2788 rspq->rspq_mbuf->m_pkthdr.rss_hash = rss_hash; 2789 #endif 2790 #endif 2791 ethpad = 2; 2792 } else { 2793 DPRINTF("pure response\n"); 2794 rspq->pure_rsps++; 2795 } 2796 2797 if (flags & RSPD_CTRL_MASK) { 2798 sleeping |= flags & RSPD_GTS_MASK; 2799 handle_rsp_cntrl_info(qs, flags); 2800 } 2801 2802 r++; 2803 if (__predict_false(++rspq->cidx == rspq->size)) { 2804 rspq->cidx = 0; 2805 rspq->gen ^= 1; 2806 r = rspq->desc; 2807 } 2808 prefetch(r); 2809 if (++rspq->credits >= (rspq->size / 4)) { 2810 refill_rspq(adap, rspq, rspq->credits); 2811 rspq->credits = 0; 2812 } 2813 DPRINTF("eth=%d eop=%d flags=0x%x\n", eth, eop, flags); 2814 2815 if (!eth && eop) { 2816 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum; 2817 /* 2818 * XXX size mismatch 2819 */ 2820 m_set_priority(rspq->rspq_mh.mh_head, rss_hash); 2821 2822 ngathered = rx_offload(&adap->tdev, rspq, 2823 rspq->rspq_mh.mh_head, offload_mbufs, ngathered); 2824 rspq->rspq_mh.mh_head = NULL; 2825 DPRINTF("received offload packet\n"); 2826 2827 } else if (eth && eop) { 2828 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *)); 2829 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES); 2830 2831 t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad, 2832 rss_hash, rss_csum, lro); 2833 DPRINTF("received tunnel packet\n"); 2834 rspq->rspq_mh.mh_head = NULL; 2835 2836 } 2837 __refill_fl_lt(adap, &qs->fl[0], 32); 2838 __refill_fl_lt(adap, &qs->fl[1], 32); 2839 --budget_left; 2840 } 2841 2842 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 2843 t3_lro_flush(adap, qs, &qs->lro); 2844 2845 if (sleeping) 2846 check_ring_db(adap, qs, sleeping); 2847 2848 smp_mb(); /* commit Tx queue processed updates */ 2849 if (__predict_false(qs->txq_stopped > 1)) { 2850 printf("restarting tx on %p\n", qs); 2851 2852 restart_tx(qs); 2853 } 2854 2855 __refill_fl_lt(adap, &qs->fl[0], 512); 2856 __refill_fl_lt(adap, &qs->fl[1], 512); 2857 budget -= budget_left; 2858 return (budget); 2859 } 2860 2861 /* 2862 * A helper function that processes responses and issues GTS. 2863 */ 2864 static __inline int 2865 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 2866 { 2867 int work; 2868 static int last_holdoff = 0; 2869 2870 work = process_responses(adap, rspq_to_qset(rq), -1); 2871 2872 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 2873 printf("next_holdoff=%d\n", rq->next_holdoff); 2874 last_holdoff = rq->next_holdoff; 2875 } 2876 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 2877 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 2878 2879 return (work); 2880 } 2881 2882 2883 /* 2884 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 2885 * Handles data events from SGE response queues as well as error and other 2886 * async events as they all use the same interrupt pin. We use one SGE 2887 * response queue per port in this mode and protect all response queues with 2888 * queue 0's lock. 2889 */ 2890 void 2891 t3b_intr(void *data) 2892 { 2893 uint32_t i, map; 2894 adapter_t *adap = data; 2895 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2896 2897 t3_write_reg(adap, A_PL_CLI, 0); 2898 map = t3_read_reg(adap, A_SG_DATA_INTR); 2899 2900 if (!map) 2901 return; 2902 2903 if (__predict_false(map & F_ERRINTR)) 2904 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2905 2906 mtx_lock(&q0->lock); 2907 for_each_port(adap, i) 2908 if (map & (1 << i)) 2909 process_responses_gts(adap, &adap->sge.qs[i].rspq); 2910 mtx_unlock(&q0->lock); 2911 } 2912 2913 /* 2914 * The MSI interrupt handler. This needs to handle data events from SGE 2915 * response queues as well as error and other async events as they all use 2916 * the same MSI vector. We use one SGE response queue per port in this mode 2917 * and protect all response queues with queue 0's lock. 2918 */ 2919 void 2920 t3_intr_msi(void *data) 2921 { 2922 adapter_t *adap = data; 2923 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2924 int i, new_packets = 0; 2925 2926 mtx_lock(&q0->lock); 2927 2928 for_each_port(adap, i) 2929 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 2930 new_packets = 1; 2931 mtx_unlock(&q0->lock); 2932 if (new_packets == 0) 2933 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2934 } 2935 2936 void 2937 t3_intr_msix(void *data) 2938 { 2939 struct sge_qset *qs = data; 2940 adapter_t *adap = qs->port->adapter; 2941 struct sge_rspq *rspq = &qs->rspq; 2942 #ifndef IFNET_MULTIQUEUE 2943 mtx_lock(&rspq->lock); 2944 #else 2945 if (mtx_trylock(&rspq->lock)) 2946 #endif 2947 { 2948 2949 if (process_responses_gts(adap, rspq) == 0) 2950 rspq->unhandled_irqs++; 2951 mtx_unlock(&rspq->lock); 2952 } 2953 } 2954 2955 #define QDUMP_SBUF_SIZE 32 * 400 2956 static int 2957 t3_dump_rspq(SYSCTL_HANDLER_ARGS) 2958 { 2959 struct sge_rspq *rspq; 2960 struct sge_qset *qs; 2961 int i, err, dump_end, idx; 2962 static int multiplier = 1; 2963 struct sbuf *sb; 2964 struct rsp_desc *rspd; 2965 uint32_t data[4]; 2966 2967 rspq = arg1; 2968 qs = rspq_to_qset(rspq); 2969 if (rspq->rspq_dump_count == 0) 2970 return (0); 2971 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { 2972 log(LOG_WARNING, 2973 "dump count is too large %d\n", rspq->rspq_dump_count); 2974 rspq->rspq_dump_count = 0; 2975 return (EINVAL); 2976 } 2977 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { 2978 log(LOG_WARNING, 2979 "dump start of %d is greater than queue size\n", 2980 rspq->rspq_dump_start); 2981 rspq->rspq_dump_start = 0; 2982 return (EINVAL); 2983 } 2984 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); 2985 if (err) 2986 return (err); 2987 retry_sbufops: 2988 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 2989 2990 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", 2991 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), 2992 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); 2993 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", 2994 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); 2995 2996 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, 2997 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); 2998 2999 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; 3000 for (i = rspq->rspq_dump_start; i < dump_end; i++) { 3001 idx = i & (RSPQ_Q_SIZE-1); 3002 3003 rspd = &rspq->desc[idx]; 3004 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", 3005 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, 3006 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); 3007 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", 3008 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), 3009 be32toh(rspd->len_cq), rspd->intr_gen); 3010 } 3011 if (sbuf_overflowed(sb)) { 3012 sbuf_delete(sb); 3013 multiplier++; 3014 goto retry_sbufops; 3015 } 3016 sbuf_finish(sb); 3017 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3018 sbuf_delete(sb); 3019 return (err); 3020 } 3021 3022 3023 /* 3024 * broken by recent mbuf changes 3025 */ 3026 static int 3027 t3_dump_txq(SYSCTL_HANDLER_ARGS) 3028 { 3029 struct sge_txq *txq; 3030 struct sge_qset *qs; 3031 int i, j, err, dump_end; 3032 static int multiplier = 1; 3033 struct sbuf *sb; 3034 struct tx_desc *txd; 3035 uint32_t *WR, wr_hi, wr_lo, gen; 3036 uint32_t data[4]; 3037 3038 txq = arg1; 3039 qs = txq_to_qset(txq, TXQ_ETH); 3040 if (txq->txq_dump_count == 0) { 3041 return (0); 3042 } 3043 if (txq->txq_dump_count > TX_ETH_Q_SIZE) { 3044 log(LOG_WARNING, 3045 "dump count is too large %d\n", txq->txq_dump_count); 3046 txq->txq_dump_count = 1; 3047 return (EINVAL); 3048 } 3049 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { 3050 log(LOG_WARNING, 3051 "dump start of %d is greater than queue size\n", 3052 txq->txq_dump_start); 3053 txq->txq_dump_start = 0; 3054 return (EINVAL); 3055 } 3056 err = t3_sge_read_ecntxt(qs->port->adapter, txq->cntxt_id, data); 3057 if (err) 3058 return (err); 3059 3060 3061 retry_sbufops: 3062 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3063 3064 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", 3065 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 3066 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); 3067 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", 3068 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), 3069 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); 3070 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3071 txq->txq_dump_start, 3072 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); 3073 3074 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3075 for (i = txq->txq_dump_start; i < dump_end; i++) { 3076 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; 3077 WR = (uint32_t *)txd->flit; 3078 wr_hi = ntohl(WR[0]); 3079 wr_lo = ntohl(WR[1]); 3080 gen = G_WR_GEN(wr_lo); 3081 3082 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3083 wr_hi, wr_lo, gen); 3084 for (j = 2; j < 30; j += 4) 3085 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3086 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3087 3088 } 3089 if (sbuf_overflowed(sb)) { 3090 sbuf_delete(sb); 3091 multiplier++; 3092 goto retry_sbufops; 3093 } 3094 sbuf_finish(sb); 3095 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3096 sbuf_delete(sb); 3097 return (err); 3098 } 3099 3100 3101 static int 3102 t3_lro_enable(SYSCTL_HANDLER_ARGS) 3103 { 3104 adapter_t *sc; 3105 int i, j, enabled, err, nqsets = 0; 3106 3107 #ifndef LRO_WORKING 3108 return (0); 3109 #endif 3110 sc = arg1; 3111 enabled = sc->sge.qs[0].lro.enabled; 3112 err = sysctl_handle_int(oidp, &enabled, arg2, req); 3113 3114 if (err != 0) 3115 return (err); 3116 if (enabled == sc->sge.qs[0].lro.enabled) 3117 return (0); 3118 3119 for (i = 0; i < sc->params.nports; i++) 3120 for (j = 0; j < sc->port[i].nqsets; j++) 3121 nqsets++; 3122 3123 for (i = 0; i < nqsets; i++) 3124 sc->sge.qs[i].lro.enabled = enabled; 3125 3126 return (0); 3127 } 3128 3129 static int 3130 t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS) 3131 { 3132 adapter_t *sc = arg1; 3133 struct qset_params *qsp = &sc->params.sge.qset[0]; 3134 int coalesce_nsecs; 3135 struct sge_qset *qs; 3136 int i, j, err, nqsets = 0; 3137 struct mtx *lock; 3138 3139 if ((sc->flags & FULL_INIT_DONE) == 0) 3140 return (ENXIO); 3141 3142 coalesce_nsecs = qsp->coalesce_nsecs; 3143 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req); 3144 3145 if (err != 0) { 3146 return (err); 3147 } 3148 if (coalesce_nsecs == qsp->coalesce_nsecs) 3149 return (0); 3150 3151 for (i = 0; i < sc->params.nports; i++) 3152 for (j = 0; j < sc->port[i].nqsets; j++) 3153 nqsets++; 3154 3155 coalesce_nsecs = max(100, coalesce_nsecs); 3156 3157 for (i = 0; i < nqsets; i++) { 3158 qs = &sc->sge.qs[i]; 3159 qsp = &sc->params.sge.qset[i]; 3160 qsp->coalesce_nsecs = coalesce_nsecs; 3161 3162 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 3163 &sc->sge.qs[0].rspq.lock; 3164 3165 mtx_lock(lock); 3166 t3_update_qset_coalesce(qs, qsp); 3167 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 3168 V_NEWTIMER(qs->rspq.holdoff_tmr)); 3169 mtx_unlock(lock); 3170 } 3171 3172 return (0); 3173 } 3174 3175 3176 void 3177 t3_add_attach_sysctls(adapter_t *sc) 3178 { 3179 struct sysctl_ctx_list *ctx; 3180 struct sysctl_oid_list *children; 3181 3182 ctx = device_get_sysctl_ctx(sc->dev); 3183 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3184 3185 /* random information */ 3186 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3187 "firmware_version", 3188 CTLFLAG_RD, &sc->fw_version, 3189 0, "firmware version"); 3190 3191 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3192 "enable_lro", 3193 CTLTYPE_INT|CTLFLAG_RW, sc, 3194 0, t3_lro_enable, 3195 "I", "enable large receive offload"); 3196 3197 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3198 "enable_debug", 3199 CTLFLAG_RW, &cxgb_debug, 3200 0, "enable verbose debugging output"); 3201 SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tunq_coalesce", 3202 CTLFLAG_RD, &sc->tunq_coalesce, 3203 "#tunneled packets freed"); 3204 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3205 "txq_overrun", 3206 CTLFLAG_RD, &txq_fills, 3207 0, "#times txq overrun"); 3208 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3209 "bogus_imm", 3210 CTLFLAG_RD, &bogus_imm, 3211 0, "#times a bogus immediate response was seen"); 3212 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3213 "cache_alloc", 3214 CTLFLAG_RD, &cxgb_cached_allocations, 3215 0, "#times a cluster was allocated from cache"); 3216 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3217 "cached", 3218 CTLFLAG_RD, &cxgb_cached, 3219 0, "#times a cluster was cached"); 3220 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3221 "ext_freed", 3222 CTLFLAG_RD, &cxgb_ext_freed, 3223 0, "#times a cluster was freed through ext_free"); 3224 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3225 "mbufs_outstanding", 3226 CTLFLAG_RD, &mbufs_outstanding, 3227 0, "#mbufs in flight in the driver"); 3228 } 3229 3230 3231 static const char *rspq_name = "rspq"; 3232 static const char *txq_names[] = 3233 { 3234 "txq_eth", 3235 "txq_ofld", 3236 "txq_ctrl" 3237 }; 3238 3239 void 3240 t3_add_configured_sysctls(adapter_t *sc) 3241 { 3242 struct sysctl_ctx_list *ctx; 3243 struct sysctl_oid_list *children; 3244 int i, j; 3245 3246 ctx = device_get_sysctl_ctx(sc->dev); 3247 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3248 3249 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3250 "intr_coal", 3251 CTLTYPE_INT|CTLFLAG_RW, sc, 3252 0, t3_set_coalesce_nsecs, 3253 "I", "interrupt coalescing timer (ns)"); 3254 3255 for (i = 0; i < sc->params.nports; i++) { 3256 struct port_info *pi = &sc->port[i]; 3257 struct sysctl_oid *poid; 3258 struct sysctl_oid_list *poidlist; 3259 3260 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); 3261 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 3262 pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); 3263 poidlist = SYSCTL_CHILDREN(poid); 3264 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO, 3265 "nqsets", CTLFLAG_RD, &pi->nqsets, 3266 0, "#queue sets"); 3267 3268 for (j = 0; j < pi->nqsets; j++) { 3269 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; 3270 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid; 3271 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist; 3272 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 3273 3274 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); 3275 3276 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 3277 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); 3278 qspoidlist = SYSCTL_CHILDREN(qspoid); 3279 3280 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3281 rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); 3282 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); 3283 3284 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3285 txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); 3286 txqpoidlist = SYSCTL_CHILDREN(txqpoid); 3287 3288 3289 3290 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", 3291 CTLFLAG_RD, &qs->rspq.size, 3292 0, "#entries in response queue"); 3293 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", 3294 CTLFLAG_RD, &qs->rspq.cidx, 3295 0, "consumer index"); 3296 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", 3297 CTLFLAG_RD, &qs->rspq.credits, 3298 0, "#credits"); 3299 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr", 3300 CTLFLAG_RD, &qs->rspq.phys_addr, 3301 "physical_address_of the queue"); 3302 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", 3303 CTLFLAG_RW, &qs->rspq.rspq_dump_start, 3304 0, "start rspq dump entry"); 3305 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", 3306 CTLFLAG_RW, &qs->rspq.rspq_dump_count, 3307 0, "#rspq entries to dump"); 3308 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", 3309 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 3310 0, t3_dump_rspq, "A", "dump of the response queue"); 3311 3312 3313 3314 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped", 3315 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops, 3316 0, "#tunneled packets dropped"); 3317 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen", 3318 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen, 3319 0, "#tunneled packets waiting to be sent"); 3320 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", 3321 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 3322 0, "#tunneled packets queue producer index"); 3323 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", 3324 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 3325 0, "#tunneled packets queue consumer index"); 3326 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed", 3327 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 3328 0, "#tunneled packets processed by the card"); 3329 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", 3330 CTLFLAG_RD, &txq->cleaned, 3331 0, "#tunneled packets cleaned"); 3332 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", 3333 CTLFLAG_RD, &txq->in_use, 3334 0, "#tunneled packet slots in use"); 3335 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees", 3336 CTLFLAG_RD, &txq->txq_frees, 3337 "#tunneled packets freed"); 3338 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", 3339 CTLFLAG_RD, &txq->txq_skipped, 3340 0, "#tunneled packet descriptors skipped"); 3341 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "coalesced", 3342 CTLFLAG_RD, &txq->txq_coalesced, 3343 0, "#tunneled packets coalesced"); 3344 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", 3345 CTLFLAG_RD, &txq->txq_enqueued, 3346 0, "#tunneled packets enqueued to hardware"); 3347 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", 3348 CTLFLAG_RD, &qs->txq_stopped, 3349 0, "tx queues stopped"); 3350 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr", 3351 CTLFLAG_RD, &txq->phys_addr, 3352 "physical_address_of the queue"); 3353 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", 3354 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 3355 0, "txq generation"); 3356 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", 3357 CTLFLAG_RD, &txq->cidx, 3358 0, "hardware queue cidx"); 3359 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", 3360 CTLFLAG_RD, &txq->pidx, 3361 0, "hardware queue pidx"); 3362 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", 3363 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 3364 0, "txq start idx for dump"); 3365 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", 3366 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 3367 0, "txq #entries to dump"); 3368 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", 3369 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 3370 0, t3_dump_txq, "A", "dump of the transmit queue"); 3371 } 3372 } 3373 } 3374 3375 /** 3376 * t3_get_desc - dump an SGE descriptor for debugging purposes 3377 * @qs: the queue set 3378 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 3379 * @idx: the descriptor index in the queue 3380 * @data: where to dump the descriptor contents 3381 * 3382 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 3383 * size of the descriptor. 3384 */ 3385 int 3386 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 3387 unsigned char *data) 3388 { 3389 if (qnum >= 6) 3390 return (EINVAL); 3391 3392 if (qnum < 3) { 3393 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 3394 return -EINVAL; 3395 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 3396 return sizeof(struct tx_desc); 3397 } 3398 3399 if (qnum == 3) { 3400 if (!qs->rspq.desc || idx >= qs->rspq.size) 3401 return (EINVAL); 3402 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 3403 return sizeof(struct rsp_desc); 3404 } 3405 3406 qnum -= 4; 3407 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 3408 return (EINVAL); 3409 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 3410 return sizeof(struct rx_desc); 3411 } 3412