1 /************************************************************************** 2 3 Copyright (c) 2007, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 #define DEBUG_BUFRING 30 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/module.h> 39 #include <sys/bus.h> 40 #include <sys/conf.h> 41 #include <machine/bus.h> 42 #include <machine/resource.h> 43 #include <sys/bus_dma.h> 44 #include <sys/rman.h> 45 #include <sys/queue.h> 46 #include <sys/sysctl.h> 47 #include <sys/taskqueue.h> 48 49 #include <sys/proc.h> 50 #include <sys/sbuf.h> 51 #include <sys/sched.h> 52 #include <sys/smp.h> 53 #include <sys/systm.h> 54 #include <sys/syslog.h> 55 56 #include <netinet/in_systm.h> 57 #include <netinet/in.h> 58 #include <netinet/ip.h> 59 #include <netinet/tcp.h> 60 61 #include <dev/pci/pcireg.h> 62 #include <dev/pci/pcivar.h> 63 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 67 #ifdef CONFIG_DEFINED 68 #include <cxgb_include.h> 69 #include <sys/mvec.h> 70 #else 71 #include <dev/cxgb/cxgb_include.h> 72 #include <dev/cxgb/sys/mvec.h> 73 #endif 74 75 int txq_fills = 0; 76 static int recycle_enable = 1; 77 extern int cxgb_txq_buf_ring_size; 78 int cxgb_cached_allocations; 79 int cxgb_cached; 80 int cxgb_ext_freed; 81 extern int cxgb_use_16k_clusters; 82 extern int cxgb_pcpu_cache_enable; 83 84 85 #define USE_GTS 0 86 87 #define SGE_RX_SM_BUF_SIZE 1536 88 #define SGE_RX_DROP_THRES 16 89 #define SGE_RX_COPY_THRES 128 90 91 /* 92 * Period of the Tx buffer reclaim timer. This timer does not need to run 93 * frequently as Tx buffers are usually reclaimed by new Tx packets. 94 */ 95 #define TX_RECLAIM_PERIOD (hz >> 1) 96 97 /* 98 * Values for sge_txq.flags 99 */ 100 enum { 101 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 102 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 103 }; 104 105 struct tx_desc { 106 uint64_t flit[TX_DESC_FLITS]; 107 } __packed; 108 109 struct rx_desc { 110 uint32_t addr_lo; 111 uint32_t len_gen; 112 uint32_t gen2; 113 uint32_t addr_hi; 114 } __packed;; 115 116 struct rsp_desc { /* response queue descriptor */ 117 struct rss_header rss_hdr; 118 uint32_t flags; 119 uint32_t len_cq; 120 uint8_t imm_data[47]; 121 uint8_t intr_gen; 122 } __packed; 123 124 #define RX_SW_DESC_MAP_CREATED (1 << 0) 125 #define TX_SW_DESC_MAP_CREATED (1 << 1) 126 #define RX_SW_DESC_INUSE (1 << 3) 127 #define TX_SW_DESC_MAPPED (1 << 4) 128 129 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 130 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 131 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 132 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 133 134 struct tx_sw_desc { /* SW state per Tx descriptor */ 135 struct mbuf_iovec mi; 136 bus_dmamap_t map; 137 int flags; 138 }; 139 140 struct rx_sw_desc { /* SW state per Rx descriptor */ 141 caddr_t rxsd_cl; 142 caddr_t data; 143 bus_dmamap_t map; 144 int flags; 145 }; 146 147 struct txq_state { 148 unsigned int compl; 149 unsigned int gen; 150 unsigned int pidx; 151 }; 152 153 struct refill_fl_cb_arg { 154 int error; 155 bus_dma_segment_t seg; 156 int nseg; 157 }; 158 159 /* 160 * Maps a number of flits to the number of Tx descriptors that can hold them. 161 * The formula is 162 * 163 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 164 * 165 * HW allows up to 4 descriptors to be combined into a WR. 166 */ 167 static uint8_t flit_desc_map[] = { 168 0, 169 #if SGE_NUM_GENBITS == 1 170 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 171 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 172 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 173 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 174 #elif SGE_NUM_GENBITS == 2 175 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 176 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 177 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 178 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 179 #else 180 # error "SGE_NUM_GENBITS must be 1 or 2" 181 #endif 182 }; 183 184 185 static int lro_default = 0; 186 int cxgb_debug = 0; 187 188 static void sge_timer_cb(void *arg); 189 static void sge_timer_reclaim(void *arg, int ncount); 190 static void sge_txq_reclaim_handler(void *arg, int ncount); 191 192 /** 193 * reclaim_completed_tx - reclaims completed Tx descriptors 194 * @adapter: the adapter 195 * @q: the Tx queue to reclaim completed descriptors from 196 * 197 * Reclaims Tx descriptors that the SGE has indicated it has processed, 198 * and frees the associated buffers if possible. Called with the Tx 199 * queue's lock held. 200 */ 201 static __inline int 202 reclaim_completed_tx_(struct sge_txq *q, int reclaim_min) 203 { 204 int reclaim = desc_reclaimable(q); 205 206 if (reclaim < reclaim_min) 207 return (0); 208 209 mtx_assert(&q->lock, MA_OWNED); 210 if (reclaim > 0) { 211 t3_free_tx_desc(q, reclaim); 212 q->cleaned += reclaim; 213 q->in_use -= reclaim; 214 } 215 return (reclaim); 216 } 217 218 /** 219 * should_restart_tx - are there enough resources to restart a Tx queue? 220 * @q: the Tx queue 221 * 222 * Checks if there are enough descriptors to restart a suspended Tx queue. 223 */ 224 static __inline int 225 should_restart_tx(const struct sge_txq *q) 226 { 227 unsigned int r = q->processed - q->cleaned; 228 229 return q->in_use - r < (q->size >> 1); 230 } 231 232 /** 233 * t3_sge_init - initialize SGE 234 * @adap: the adapter 235 * @p: the SGE parameters 236 * 237 * Performs SGE initialization needed every time after a chip reset. 238 * We do not initialize any of the queue sets here, instead the driver 239 * top-level must request those individually. We also do not enable DMA 240 * here, that should be done after the queues have been set up. 241 */ 242 void 243 t3_sge_init(adapter_t *adap, struct sge_params *p) 244 { 245 u_int ctrl, ups; 246 247 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 248 249 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 250 F_CQCRDTCTRL | 251 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 252 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 253 #if SGE_NUM_GENBITS == 1 254 ctrl |= F_EGRGENCTRL; 255 #endif 256 if (adap->params.rev > 0) { 257 if (!(adap->flags & (USING_MSIX | USING_MSI))) 258 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 259 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL; 260 } 261 t3_write_reg(adap, A_SG_CONTROL, ctrl); 262 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 263 V_LORCQDRBTHRSH(512)); 264 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 265 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 266 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 267 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000); 268 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 269 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 270 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 271 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 272 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 273 } 274 275 276 /** 277 * sgl_len - calculates the size of an SGL of the given capacity 278 * @n: the number of SGL entries 279 * 280 * Calculates the number of flits needed for a scatter/gather list that 281 * can hold the given number of entries. 282 */ 283 static __inline unsigned int 284 sgl_len(unsigned int n) 285 { 286 return ((3 * n) / 2 + (n & 1)); 287 } 288 289 /** 290 * get_imm_packet - return the next ingress packet buffer from a response 291 * @resp: the response descriptor containing the packet data 292 * 293 * Return a packet containing the immediate data of the given response. 294 */ 295 static int 296 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl, uint32_t flags) 297 { 298 299 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE; 300 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 301 return (0); 302 303 } 304 305 static __inline u_int 306 flits_to_desc(u_int n) 307 { 308 return (flit_desc_map[n]); 309 } 310 311 void 312 t3_sge_err_intr_handler(adapter_t *adapter) 313 { 314 unsigned int v, status; 315 316 317 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 318 319 if (status & F_RSPQCREDITOVERFOW) 320 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 321 322 if (status & F_RSPQDISABLED) { 323 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 324 325 CH_ALERT(adapter, 326 "packet delivered to disabled response queue (0x%x)\n", 327 (v >> S_RSPQ0DISABLED) & 0xff); 328 } 329 330 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 331 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED)) 332 t3_fatal_err(adapter); 333 } 334 335 void 336 t3_sge_prep(adapter_t *adap, struct sge_params *p) 337 { 338 int i; 339 340 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 341 p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data); 342 343 for (i = 0; i < SGE_QSETS; ++i) { 344 struct qset_params *q = p->qset + i; 345 346 q->polling = adap->params.rev > 0; 347 348 if (adap->params.nports > 2) { 349 q->coalesce_nsecs = 50000; 350 } else { 351 #ifdef INVARIANTS 352 q->coalesce_nsecs = 10000; 353 #else 354 q->coalesce_nsecs = 5000; 355 #endif 356 } 357 q->rspq_size = RSPQ_Q_SIZE; 358 q->fl_size = FL_Q_SIZE; 359 q->jumbo_size = JUMBO_Q_SIZE; 360 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 361 q->txq_size[TXQ_OFLD] = 1024; 362 q->txq_size[TXQ_CTRL] = 256; 363 q->cong_thres = 0; 364 } 365 } 366 367 int 368 t3_sge_alloc(adapter_t *sc) 369 { 370 371 /* The parent tag. */ 372 if (bus_dma_tag_create( NULL, /* parent */ 373 1, 0, /* algnmnt, boundary */ 374 BUS_SPACE_MAXADDR, /* lowaddr */ 375 BUS_SPACE_MAXADDR, /* highaddr */ 376 NULL, NULL, /* filter, filterarg */ 377 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 378 BUS_SPACE_UNRESTRICTED, /* nsegments */ 379 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 380 0, /* flags */ 381 NULL, NULL, /* lock, lockarg */ 382 &sc->parent_dmat)) { 383 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 384 return (ENOMEM); 385 } 386 387 /* 388 * DMA tag for normal sized RX frames 389 */ 390 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 391 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 392 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 393 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 394 return (ENOMEM); 395 } 396 397 /* 398 * DMA tag for jumbo sized RX frames. 399 */ 400 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, 401 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, 402 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 403 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 404 return (ENOMEM); 405 } 406 407 /* 408 * DMA tag for TX frames. 409 */ 410 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 411 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 412 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 413 NULL, NULL, &sc->tx_dmat)) { 414 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 415 return (ENOMEM); 416 } 417 418 return (0); 419 } 420 421 int 422 t3_sge_free(struct adapter * sc) 423 { 424 425 if (sc->tx_dmat != NULL) 426 bus_dma_tag_destroy(sc->tx_dmat); 427 428 if (sc->rx_jumbo_dmat != NULL) 429 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 430 431 if (sc->rx_dmat != NULL) 432 bus_dma_tag_destroy(sc->rx_dmat); 433 434 if (sc->parent_dmat != NULL) 435 bus_dma_tag_destroy(sc->parent_dmat); 436 437 return (0); 438 } 439 440 void 441 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 442 { 443 444 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U); 445 qs->rspq.polling = 0 /* p->polling */; 446 } 447 448 #if !defined(__i386__) && !defined(__amd64__) 449 static void 450 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 451 { 452 struct refill_fl_cb_arg *cb_arg = arg; 453 454 cb_arg->error = error; 455 cb_arg->seg = segs[0]; 456 cb_arg->nseg = nseg; 457 458 } 459 #endif 460 /** 461 * refill_fl - refill an SGE free-buffer list 462 * @sc: the controller softc 463 * @q: the free-list to refill 464 * @n: the number of new buffers to allocate 465 * 466 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 467 * The caller must assure that @n does not exceed the queue's capacity. 468 */ 469 static void 470 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 471 { 472 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 473 struct rx_desc *d = &q->desc[q->pidx]; 474 struct refill_fl_cb_arg cb_arg; 475 caddr_t cl; 476 int err; 477 int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); 478 479 cb_arg.error = 0; 480 while (n--) { 481 /* 482 * We only allocate a cluster, mbuf allocation happens after rx 483 */ 484 if ((cl = cxgb_cache_get(q->zone)) == NULL) { 485 log(LOG_WARNING, "Failed to allocate cluster\n"); 486 goto done; 487 } 488 489 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 490 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 491 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 492 uma_zfree(q->zone, cl); 493 goto done; 494 } 495 sd->flags |= RX_SW_DESC_MAP_CREATED; 496 } 497 #if !defined(__i386__) && !defined(__amd64__) 498 err = bus_dmamap_load(q->entry_tag, sd->map, 499 cl + header_size, q->buf_size, 500 refill_fl_cb, &cb_arg, 0); 501 502 if (err != 0 || cb_arg.error) { 503 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error); 504 /* 505 * XXX free cluster 506 */ 507 return; 508 } 509 #else 510 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + header_size)); 511 #endif 512 sd->flags |= RX_SW_DESC_INUSE; 513 sd->rxsd_cl = cl; 514 sd->data = cl + header_size; 515 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 516 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 517 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 518 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 519 520 d++; 521 sd++; 522 523 if (++q->pidx == q->size) { 524 q->pidx = 0; 525 q->gen ^= 1; 526 sd = q->sdesc; 527 d = q->desc; 528 } 529 q->credits++; 530 } 531 532 done: 533 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 534 } 535 536 537 /** 538 * free_rx_bufs - free the Rx buffers on an SGE free list 539 * @sc: the controle softc 540 * @q: the SGE free list to clean up 541 * 542 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 543 * this queue should be stopped before calling this function. 544 */ 545 static void 546 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 547 { 548 u_int cidx = q->cidx; 549 550 while (q->credits--) { 551 struct rx_sw_desc *d = &q->sdesc[cidx]; 552 553 if (d->flags & RX_SW_DESC_INUSE) { 554 bus_dmamap_unload(q->entry_tag, d->map); 555 bus_dmamap_destroy(q->entry_tag, d->map); 556 uma_zfree(q->zone, d->rxsd_cl); 557 } 558 d->rxsd_cl = NULL; 559 if (++cidx == q->size) 560 cidx = 0; 561 } 562 } 563 564 static __inline void 565 __refill_fl(adapter_t *adap, struct sge_fl *fl) 566 { 567 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 568 } 569 570 static __inline void 571 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) 572 { 573 if ((fl->size - fl->credits) < max) 574 refill_fl(adap, fl, min(max, fl->size - fl->credits)); 575 } 576 577 void 578 refill_fl_service(adapter_t *adap, struct sge_fl *fl) 579 { 580 __refill_fl_lt(adap, fl, 512); 581 } 582 583 /** 584 * recycle_rx_buf - recycle a receive buffer 585 * @adapter: the adapter 586 * @q: the SGE free list 587 * @idx: index of buffer to recycle 588 * 589 * Recycles the specified buffer on the given free list by adding it at 590 * the next available slot on the list. 591 */ 592 static void 593 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 594 { 595 struct rx_desc *from = &q->desc[idx]; 596 struct rx_desc *to = &q->desc[q->pidx]; 597 598 q->sdesc[q->pidx] = q->sdesc[idx]; 599 to->addr_lo = from->addr_lo; // already big endian 600 to->addr_hi = from->addr_hi; // likewise 601 wmb(); 602 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 603 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 604 q->credits++; 605 606 if (++q->pidx == q->size) { 607 q->pidx = 0; 608 q->gen ^= 1; 609 } 610 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 611 } 612 613 static void 614 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 615 { 616 uint32_t *addr; 617 618 addr = arg; 619 *addr = segs[0].ds_addr; 620 } 621 622 static int 623 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 624 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 625 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 626 { 627 size_t len = nelem * elem_size; 628 void *s = NULL; 629 void *p = NULL; 630 int err; 631 632 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 633 BUS_SPACE_MAXADDR_32BIT, 634 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 635 len, 0, NULL, NULL, tag)) != 0) { 636 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 637 return (ENOMEM); 638 } 639 640 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 641 map)) != 0) { 642 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 643 return (ENOMEM); 644 } 645 646 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 647 bzero(p, len); 648 *(void **)desc = p; 649 650 if (sw_size) { 651 len = nelem * sw_size; 652 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 653 *(void **)sdesc = s; 654 } 655 if (parent_entry_tag == NULL) 656 return (0); 657 658 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 659 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 660 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 661 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 662 NULL, NULL, entry_tag)) != 0) { 663 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 664 return (ENOMEM); 665 } 666 return (0); 667 } 668 669 static void 670 sge_slow_intr_handler(void *arg, int ncount) 671 { 672 adapter_t *sc = arg; 673 674 t3_slow_intr_handler(sc); 675 } 676 677 /** 678 * sge_timer_cb - perform periodic maintenance of an SGE qset 679 * @data: the SGE queue set to maintain 680 * 681 * Runs periodically from a timer to perform maintenance of an SGE queue 682 * set. It performs two tasks: 683 * 684 * a) Cleans up any completed Tx descriptors that may still be pending. 685 * Normal descriptor cleanup happens when new packets are added to a Tx 686 * queue so this timer is relatively infrequent and does any cleanup only 687 * if the Tx queue has not seen any new packets in a while. We make a 688 * best effort attempt to reclaim descriptors, in that we don't wait 689 * around if we cannot get a queue's lock (which most likely is because 690 * someone else is queueing new packets and so will also handle the clean 691 * up). Since control queues use immediate data exclusively we don't 692 * bother cleaning them up here. 693 * 694 * b) Replenishes Rx queues that have run out due to memory shortage. 695 * Normally new Rx buffers are added when existing ones are consumed but 696 * when out of memory a queue can become empty. We try to add only a few 697 * buffers here, the queue will be replenished fully as these new buffers 698 * are used up if memory shortage has subsided. 699 * 700 * c) Return coalesced response queue credits in case a response queue is 701 * starved. 702 * 703 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 704 * fifo overflows and the FW doesn't implement any recovery scheme yet. 705 */ 706 static void 707 sge_timer_cb(void *arg) 708 { 709 adapter_t *sc = arg; 710 #ifndef IFNET_MULTIQUEUE 711 struct port_info *pi; 712 struct sge_qset *qs; 713 struct sge_txq *txq; 714 int i, j; 715 int reclaim_ofl, refill_rx; 716 717 for (i = 0; i < sc->params.nports; i++) 718 for (j = 0; j < sc->port[i].nqsets; j++) { 719 qs = &sc->sge.qs[i + j]; 720 txq = &qs->txq[0]; 721 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 722 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 723 (qs->fl[1].credits < qs->fl[1].size)); 724 if (reclaim_ofl || refill_rx) { 725 pi = &sc->port[i]; 726 taskqueue_enqueue(pi->tq, &pi->timer_reclaim_task); 727 break; 728 } 729 } 730 #endif 731 if (sc->params.nports > 2) { 732 int i; 733 734 for_each_port(sc, i) { 735 struct port_info *pi = &sc->port[i]; 736 737 t3_write_reg(sc, A_SG_KDOORBELL, 738 F_SELEGRCNTX | 739 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 740 } 741 } 742 if (sc->open_device_map != 0) 743 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 744 } 745 746 /* 747 * This is meant to be a catch-all function to keep sge state private 748 * to sge.c 749 * 750 */ 751 int 752 t3_sge_init_adapter(adapter_t *sc) 753 { 754 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 755 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 756 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 757 mi_init(); 758 cxgb_cache_init(); 759 return (0); 760 } 761 762 int 763 t3_sge_reset_adapter(adapter_t *sc) 764 { 765 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 766 return (0); 767 } 768 769 int 770 t3_sge_init_port(struct port_info *pi) 771 { 772 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); 773 return (0); 774 } 775 776 void 777 t3_sge_deinit_sw(adapter_t *sc) 778 { 779 int i; 780 781 callout_drain(&sc->sge_timer_ch); 782 if (sc->tq) 783 taskqueue_drain(sc->tq, &sc->slow_intr_task); 784 for (i = 0; i < sc->params.nports; i++) 785 if (sc->port[i].tq != NULL) 786 taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task); 787 788 mi_deinit(); 789 } 790 791 /** 792 * refill_rspq - replenish an SGE response queue 793 * @adapter: the adapter 794 * @q: the response queue to replenish 795 * @credits: how many new responses to make available 796 * 797 * Replenishes a response queue by making the supplied number of responses 798 * available to HW. 799 */ 800 static __inline void 801 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 802 { 803 804 /* mbufs are allocated on demand when a rspq entry is processed. */ 805 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 806 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 807 } 808 809 static __inline void 810 sge_txq_reclaim_(struct sge_txq *txq, int force) 811 { 812 813 if (desc_reclaimable(txq) < 16) 814 return; 815 if (mtx_trylock(&txq->lock) == 0) 816 return; 817 reclaim_completed_tx_(txq, 16); 818 mtx_unlock(&txq->lock); 819 820 } 821 822 static void 823 sge_txq_reclaim_handler(void *arg, int ncount) 824 { 825 struct sge_txq *q = arg; 826 827 sge_txq_reclaim_(q, TRUE); 828 } 829 830 831 832 static void 833 sge_timer_reclaim(void *arg, int ncount) 834 { 835 struct port_info *pi = arg; 836 int i, nqsets = pi->nqsets; 837 adapter_t *sc = pi->adapter; 838 struct sge_qset *qs; 839 struct sge_txq *txq; 840 struct mtx *lock; 841 842 #ifdef IFNET_MULTIQUEUE 843 panic("%s should not be called with multiqueue support\n", __FUNCTION__); 844 #endif 845 for (i = 0; i < nqsets; i++) { 846 qs = &sc->sge.qs[i]; 847 848 txq = &qs->txq[TXQ_OFLD]; 849 sge_txq_reclaim_(txq, FALSE); 850 851 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 852 &sc->sge.qs[0].rspq.lock; 853 854 if (mtx_trylock(lock)) { 855 /* XXX currently assume that we are *NOT* polling */ 856 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 857 858 if (qs->fl[0].credits < qs->fl[0].size - 16) 859 __refill_fl(sc, &qs->fl[0]); 860 if (qs->fl[1].credits < qs->fl[1].size - 16) 861 __refill_fl(sc, &qs->fl[1]); 862 863 if (status & (1 << qs->rspq.cntxt_id)) { 864 if (qs->rspq.credits) { 865 refill_rspq(sc, &qs->rspq, 1); 866 qs->rspq.credits--; 867 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 868 1 << qs->rspq.cntxt_id); 869 } 870 } 871 mtx_unlock(lock); 872 } 873 } 874 } 875 876 /** 877 * init_qset_cntxt - initialize an SGE queue set context info 878 * @qs: the queue set 879 * @id: the queue set id 880 * 881 * Initializes the TIDs and context ids for the queues of a queue set. 882 */ 883 static void 884 init_qset_cntxt(struct sge_qset *qs, u_int id) 885 { 886 887 qs->rspq.cntxt_id = id; 888 qs->fl[0].cntxt_id = 2 * id; 889 qs->fl[1].cntxt_id = 2 * id + 1; 890 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 891 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 892 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 893 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 894 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 895 896 mbufq_init(&qs->txq[TXQ_ETH].sendq); 897 mbufq_init(&qs->txq[TXQ_OFLD].sendq); 898 mbufq_init(&qs->txq[TXQ_CTRL].sendq); 899 } 900 901 902 static void 903 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 904 { 905 txq->in_use += ndesc; 906 /* 907 * XXX we don't handle stopping of queue 908 * presumably start handles this when we bump against the end 909 */ 910 txqs->gen = txq->gen; 911 txq->unacked += ndesc; 912 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3); 913 txq->unacked &= 7; 914 txqs->pidx = txq->pidx; 915 txq->pidx += ndesc; 916 #ifdef INVARIANTS 917 if (((txqs->pidx > txq->cidx) && 918 (txq->pidx < txqs->pidx) && 919 (txq->pidx >= txq->cidx)) || 920 ((txqs->pidx < txq->cidx) && 921 (txq->pidx >= txq-> cidx)) || 922 ((txqs->pidx < txq->cidx) && 923 (txq->cidx < txqs->pidx))) 924 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", 925 txqs->pidx, txq->pidx, txq->cidx); 926 #endif 927 if (txq->pidx >= txq->size) { 928 txq->pidx -= txq->size; 929 txq->gen ^= 1; 930 } 931 932 } 933 934 /** 935 * calc_tx_descs - calculate the number of Tx descriptors for a packet 936 * @m: the packet mbufs 937 * @nsegs: the number of segments 938 * 939 * Returns the number of Tx descriptors needed for the given Ethernet 940 * packet. Ethernet packets require addition of WR and CPL headers. 941 */ 942 static __inline unsigned int 943 calc_tx_descs(const struct mbuf *m, int nsegs) 944 { 945 unsigned int flits; 946 947 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) 948 return 1; 949 950 flits = sgl_len(nsegs) + 2; 951 #ifdef TSO_SUPPORTED 952 if (m->m_pkthdr.csum_flags & CSUM_TSO) 953 flits++; 954 #endif 955 return flits_to_desc(flits); 956 } 957 958 static unsigned int 959 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 960 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs) 961 { 962 struct mbuf *m0; 963 int err, pktlen, pass = 0; 964 965 retry: 966 err = 0; 967 m0 = *m; 968 pktlen = m0->m_pkthdr.len; 969 #if defined(__i386__) || defined(__amd64__) 970 if (busdma_map_sg_collapse(m, segs, nsegs) == 0) { 971 goto done; 972 } else 973 #endif 974 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0); 975 976 if (err == 0) { 977 goto done; 978 } 979 if (err == EFBIG && pass == 0) { 980 pass = 1; 981 /* Too many segments, try to defrag */ 982 m0 = m_defrag(m0, M_DONTWAIT); 983 if (m0 == NULL) { 984 m_freem(*m); 985 *m = NULL; 986 return (ENOBUFS); 987 } 988 *m = m0; 989 goto retry; 990 } else if (err == ENOMEM) { 991 return (err); 992 } if (err) { 993 if (cxgb_debug) 994 printf("map failure err=%d pktlen=%d\n", err, pktlen); 995 m_freem(m0); 996 *m = NULL; 997 return (err); 998 } 999 done: 1000 #if !defined(__i386__) && !defined(__amd64__) 1001 bus_dmamap_sync(txq->entry_tag, txsd->map, BUS_DMASYNC_PREWRITE); 1002 #endif 1003 txsd->flags |= TX_SW_DESC_MAPPED; 1004 1005 return (0); 1006 } 1007 1008 /** 1009 * make_sgl - populate a scatter/gather list for a packet 1010 * @sgp: the SGL to populate 1011 * @segs: the packet dma segments 1012 * @nsegs: the number of segments 1013 * 1014 * Generates a scatter/gather list for the buffers that make up a packet 1015 * and returns the SGL size in 8-byte words. The caller must size the SGL 1016 * appropriately. 1017 */ 1018 static __inline void 1019 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1020 { 1021 int i, idx; 1022 1023 for (idx = 0, i = 0; i < nsegs; i++) { 1024 /* 1025 * firmware doesn't like empty segments 1026 */ 1027 if (segs[i].ds_len == 0) 1028 continue; 1029 if (i && idx == 0) 1030 ++sgp; 1031 1032 sgp->len[idx] = htobe32(segs[i].ds_len); 1033 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1034 idx ^= 1; 1035 } 1036 1037 if (idx) { 1038 sgp->len[idx] = 0; 1039 sgp->addr[idx] = 0; 1040 } 1041 } 1042 1043 /** 1044 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1045 * @adap: the adapter 1046 * @q: the Tx queue 1047 * 1048 * Ring the doorbel if a Tx queue is asleep. There is a natural race, 1049 * where the HW is going to sleep just after we checked, however, 1050 * then the interrupt handler will detect the outstanding TX packet 1051 * and ring the doorbell for us. 1052 * 1053 * When GTS is disabled we unconditionally ring the doorbell. 1054 */ 1055 static __inline void 1056 check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 1057 { 1058 #if USE_GTS 1059 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1060 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1061 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1062 #ifdef T3_TRACE 1063 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1064 q->cntxt_id); 1065 #endif 1066 t3_write_reg(adap, A_SG_KDOORBELL, 1067 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1068 } 1069 #else 1070 wmb(); /* write descriptors before telling HW */ 1071 t3_write_reg(adap, A_SG_KDOORBELL, 1072 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1073 #endif 1074 } 1075 1076 static __inline void 1077 wr_gen2(struct tx_desc *d, unsigned int gen) 1078 { 1079 #if SGE_NUM_GENBITS == 2 1080 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1081 #endif 1082 } 1083 1084 /** 1085 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1086 * @ndesc: number of Tx descriptors spanned by the SGL 1087 * @txd: first Tx descriptor to be written 1088 * @txqs: txq state (generation and producer index) 1089 * @txq: the SGE Tx queue 1090 * @sgl: the SGL 1091 * @flits: number of flits to the start of the SGL in the first descriptor 1092 * @sgl_flits: the SGL size in flits 1093 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1094 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1095 * 1096 * Write a work request header and an associated SGL. If the SGL is 1097 * small enough to fit into one Tx descriptor it has already been written 1098 * and we just need to write the WR header. Otherwise we distribute the 1099 * SGL across the number of descriptors it spans. 1100 */ 1101 static void 1102 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1103 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1104 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1105 { 1106 1107 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1108 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1109 1110 if (__predict_true(ndesc == 1)) { 1111 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1112 V_WR_SGLSFLT(flits)) | wr_hi; 1113 wmb(); 1114 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | 1115 V_WR_GEN(txqs->gen)) | wr_lo; 1116 /* XXX gen? */ 1117 wr_gen2(txd, txqs->gen); 1118 1119 } else { 1120 unsigned int ogen = txqs->gen; 1121 const uint64_t *fp = (const uint64_t *)sgl; 1122 struct work_request_hdr *wp = wrp; 1123 1124 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1125 V_WR_SGLSFLT(flits)) | wr_hi; 1126 1127 while (sgl_flits) { 1128 unsigned int avail = WR_FLITS - flits; 1129 1130 if (avail > sgl_flits) 1131 avail = sgl_flits; 1132 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1133 sgl_flits -= avail; 1134 ndesc--; 1135 if (!sgl_flits) 1136 break; 1137 1138 fp += avail; 1139 txd++; 1140 txsd++; 1141 if (++txqs->pidx == txq->size) { 1142 txqs->pidx = 0; 1143 txqs->gen ^= 1; 1144 txd = txq->desc; 1145 txsd = txq->sdesc; 1146 } 1147 1148 /* 1149 * when the head of the mbuf chain 1150 * is freed all clusters will be freed 1151 * with it 1152 */ 1153 KASSERT(txsd->mi.mi_base == NULL, ("overwrting valid entry mi_base==%p", txsd->mi.mi_base)); 1154 wrp = (struct work_request_hdr *)txd; 1155 wrp->wr_hi = htonl(V_WR_DATATYPE(1) | 1156 V_WR_SGLSFLT(1)) | wr_hi; 1157 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, 1158 sgl_flits + 1)) | 1159 V_WR_GEN(txqs->gen)) | wr_lo; 1160 wr_gen2(txd, txqs->gen); 1161 flits = 1; 1162 } 1163 wrp->wr_hi |= htonl(F_WR_EOP); 1164 wmb(); 1165 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1166 wr_gen2((struct tx_desc *)wp, ogen); 1167 } 1168 } 1169 1170 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 1171 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 1172 1173 #ifdef VLAN_SUPPORTED 1174 #define GET_VTAG(cntrl, m) \ 1175 do { \ 1176 if ((m)->m_flags & M_VLANTAG) \ 1177 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ 1178 } while (0) 1179 1180 #define GET_VTAG_MI(cntrl, mi) \ 1181 do { \ 1182 if ((mi)->mi_flags & M_VLANTAG) \ 1183 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((mi)->mi_ether_vtag); \ 1184 } while (0) 1185 #else 1186 #define GET_VTAG(cntrl, m) 1187 #define GET_VTAG_MI(cntrl, m) 1188 #endif 1189 1190 int 1191 t3_encap(struct sge_qset *qs, struct mbuf **m, int count) 1192 { 1193 adapter_t *sc; 1194 struct mbuf *m0; 1195 struct sge_txq *txq; 1196 struct txq_state txqs; 1197 struct port_info *pi; 1198 unsigned int ndesc, flits, cntrl, mlen; 1199 int err, nsegs, tso_info = 0; 1200 1201 struct work_request_hdr *wrp; 1202 struct tx_sw_desc *txsd; 1203 struct sg_ent *sgp, *sgl; 1204 uint32_t wr_hi, wr_lo, sgl_flits; 1205 bus_dma_segment_t segs[TX_MAX_SEGS]; 1206 1207 struct tx_desc *txd; 1208 struct mbuf_vec *mv; 1209 struct mbuf_iovec *mi; 1210 1211 DPRINTF("t3_encap cpu=%d ", curcpu); 1212 KASSERT(qs->idx == 0, ("invalid qs %d", qs->idx)); 1213 1214 mi = NULL; 1215 pi = qs->port; 1216 sc = pi->adapter; 1217 txq = &qs->txq[TXQ_ETH]; 1218 txd = &txq->desc[txq->pidx]; 1219 txsd = &txq->sdesc[txq->pidx]; 1220 sgl = txq->txq_sgl; 1221 m0 = *m; 1222 1223 DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset); 1224 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan); 1225 if (cxgb_debug) 1226 printf("mi_base=%p cidx=%d pidx=%d\n\n", txsd->mi.mi_base, txq->cidx, txq->pidx); 1227 1228 mtx_assert(&txq->lock, MA_OWNED); 1229 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1230 /* 1231 * XXX need to add VLAN support for 6.x 1232 */ 1233 #ifdef VLAN_SUPPORTED 1234 if (m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1235 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1236 #endif 1237 KASSERT(txsd->mi.mi_base == NULL, ("overwrting valid entry mi_base==%p", 1238 txsd->mi.mi_base)); 1239 if (count > 1) { 1240 panic("count > 1 not support in CVS\n"); 1241 if ((err = busdma_map_sg_vec(m, &m0, segs, count))) 1242 return (err); 1243 nsegs = count; 1244 } else if ((err = busdma_map_sg_collapse(&m0, segs, &nsegs))) { 1245 if (cxgb_debug) 1246 printf("failed ... err=%d\n", err); 1247 return (err); 1248 } 1249 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count)); 1250 1251 if (!(m0->m_pkthdr.len <= PIO_LEN)) { 1252 mi_collapse_mbuf(&txsd->mi, m0); 1253 mi = &txsd->mi; 1254 } 1255 if (count > 1) { 1256 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; 1257 int i, fidx; 1258 struct mbuf_iovec *batchmi; 1259 1260 mv = mtomv(m0); 1261 batchmi = mv->mv_vec; 1262 1263 wrp = (struct work_request_hdr *)txd; 1264 1265 flits = count*2 + 1; 1266 txq_prod(txq, 1, &txqs); 1267 1268 for (fidx = 1, i = 0; i < count; i++, batchmi++, fidx += 2) { 1269 struct cpl_tx_pkt_batch_entry *cbe = &cpl_batch->pkt_entry[i]; 1270 1271 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1272 GET_VTAG_MI(cntrl, batchmi); 1273 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1274 cbe->cntrl = htonl(cntrl); 1275 cbe->len = htonl(batchmi->mi_len | 0x80000000); 1276 cbe->addr = htobe64(segs[i].ds_addr); 1277 txd->flit[fidx] |= htobe64(1 << 24); 1278 } 1279 1280 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1281 V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1282 wmb(); 1283 wrp->wr_lo = htonl(V_WR_LEN(flits) | 1284 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); 1285 /* XXX gen? */ 1286 wr_gen2(txd, txqs.gen); 1287 check_ring_tx_db(sc, txq); 1288 1289 return (0); 1290 } else if (tso_info) { 1291 int undersized, eth_type; 1292 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; 1293 struct ip *ip; 1294 struct tcphdr *tcp; 1295 char *pkthdr, tmp[TCPPKTHDRSIZE]; 1296 struct mbuf_vec *mv; 1297 struct mbuf_iovec *tmpmi; 1298 1299 mv = mtomv(m0); 1300 tmpmi = mv->mv_vec; 1301 1302 txd->flit[2] = 0; 1303 GET_VTAG_MI(cntrl, mi); 1304 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1305 hdr->cntrl = htonl(cntrl); 1306 mlen = m0->m_pkthdr.len; 1307 hdr->len = htonl(mlen | 0x80000000); 1308 1309 DPRINTF("tso buf len=%d\n", mlen); 1310 undersized = (((tmpmi->mi_len < TCPPKTHDRSIZE) && 1311 (m0->m_flags & M_VLANTAG)) || 1312 (tmpmi->mi_len < TCPPKTHDRSIZE - ETHER_VLAN_ENCAP_LEN)); 1313 if (__predict_false(undersized)) { 1314 pkthdr = tmp; 1315 dump_mi(mi); 1316 panic("discontig packet - fixxorz"); 1317 } else 1318 pkthdr = m0->m_data; 1319 1320 if (__predict_false(m0->m_flags & M_VLANTAG)) { 1321 eth_type = CPL_ETH_II_VLAN; 1322 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1323 ETHER_VLAN_ENCAP_LEN); 1324 } else { 1325 eth_type = CPL_ETH_II; 1326 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1327 } 1328 tcp = (struct tcphdr *)((uint8_t *)ip + 1329 sizeof(*ip)); 1330 1331 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1332 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1333 V_LSO_TCPHDR_WORDS(tcp->th_off); 1334 hdr->lso_info = htonl(tso_info); 1335 flits = 3; 1336 } else { 1337 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; 1338 1339 GET_VTAG(cntrl, m0); 1340 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1341 cpl->cntrl = htonl(cntrl); 1342 mlen = m0->m_pkthdr.len; 1343 cpl->len = htonl(mlen | 0x80000000); 1344 1345 if (mlen <= PIO_LEN) { 1346 txq_prod(txq, 1, &txqs); 1347 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1348 m_freem(m0); 1349 m0 = NULL; 1350 flits = (mlen + 7) / 8 + 2; 1351 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1352 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1353 F_WR_SOP | F_WR_EOP | txqs.compl); 1354 wmb(); 1355 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | 1356 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1357 1358 wr_gen2(txd, txqs.gen); 1359 check_ring_tx_db(sc, txq); 1360 DPRINTF("pio buf\n"); 1361 return (0); 1362 } 1363 DPRINTF("regular buf\n"); 1364 flits = 2; 1365 } 1366 wrp = (struct work_request_hdr *)txd; 1367 1368 #ifdef nomore 1369 /* 1370 * XXX need to move into one of the helper routines above 1371 * 1372 */ 1373 if ((err = busdma_map_mbufs(m, txq, txsd, segs, &nsegs)) != 0) 1374 return (err); 1375 m0 = *m; 1376 #endif 1377 ndesc = calc_tx_descs(m0, nsegs); 1378 1379 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1380 make_sgl(sgp, segs, nsegs); 1381 1382 sgl_flits = sgl_len(nsegs); 1383 1384 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); 1385 txq_prod(txq, ndesc, &txqs); 1386 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1387 wr_lo = htonl(V_WR_TID(txq->token)); 1388 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); 1389 check_ring_tx_db(pi->adapter, txq); 1390 1391 if ((m0->m_type == MT_DATA) && 1392 ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) && 1393 (m0->m_ext.ext_type != EXT_PACKET)) { 1394 m0->m_flags &= ~M_EXT ; 1395 cxgb_mbufs_outstanding--; 1396 m_free(m0); 1397 } 1398 1399 return (0); 1400 } 1401 1402 1403 /** 1404 * write_imm - write a packet into a Tx descriptor as immediate data 1405 * @d: the Tx descriptor to write 1406 * @m: the packet 1407 * @len: the length of packet data to write as immediate data 1408 * @gen: the generation bit value to write 1409 * 1410 * Writes a packet as immediate data into a Tx descriptor. The packet 1411 * contains a work request at its beginning. We must write the packet 1412 * carefully so the SGE doesn't read accidentally before it's written in 1413 * its entirety. 1414 */ 1415 static __inline void 1416 write_imm(struct tx_desc *d, struct mbuf *m, 1417 unsigned int len, unsigned int gen) 1418 { 1419 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1420 struct work_request_hdr *to = (struct work_request_hdr *)d; 1421 1422 if (len > WR_LEN) 1423 panic("len too big %d\n", len); 1424 if (len < sizeof(*from)) 1425 panic("len too small %d", len); 1426 1427 memcpy(&to[1], &from[1], len - sizeof(*from)); 1428 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | 1429 V_WR_BCNTLFLT(len & 7)); 1430 wmb(); 1431 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | 1432 V_WR_LEN((len + 7) / 8)); 1433 wr_gen2(d, gen); 1434 1435 /* 1436 * This check is a hack we should really fix the logic so 1437 * that this can't happen 1438 */ 1439 if (m->m_type != MT_DONTFREE) 1440 m_freem(m); 1441 1442 } 1443 1444 /** 1445 * check_desc_avail - check descriptor availability on a send queue 1446 * @adap: the adapter 1447 * @q: the TX queue 1448 * @m: the packet needing the descriptors 1449 * @ndesc: the number of Tx descriptors needed 1450 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1451 * 1452 * Checks if the requested number of Tx descriptors is available on an 1453 * SGE send queue. If the queue is already suspended or not enough 1454 * descriptors are available the packet is queued for later transmission. 1455 * Must be called with the Tx queue locked. 1456 * 1457 * Returns 0 if enough descriptors are available, 1 if there aren't 1458 * enough descriptors and the packet has been queued, and 2 if the caller 1459 * needs to retry because there weren't enough descriptors at the 1460 * beginning of the call but some freed up in the mean time. 1461 */ 1462 static __inline int 1463 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1464 struct mbuf *m, unsigned int ndesc, 1465 unsigned int qid) 1466 { 1467 /* 1468 * XXX We currently only use this for checking the control queue 1469 * the control queue is only used for binding qsets which happens 1470 * at init time so we are guaranteed enough descriptors 1471 */ 1472 if (__predict_false(!mbufq_empty(&q->sendq))) { 1473 addq_exit: mbufq_tail(&q->sendq, m); 1474 return 1; 1475 } 1476 if (__predict_false(q->size - q->in_use < ndesc)) { 1477 1478 struct sge_qset *qs = txq_to_qset(q, qid); 1479 1480 printf("stopping q\n"); 1481 1482 setbit(&qs->txq_stopped, qid); 1483 smp_mb(); 1484 1485 if (should_restart_tx(q) && 1486 test_and_clear_bit(qid, &qs->txq_stopped)) 1487 return 2; 1488 1489 q->stops++; 1490 goto addq_exit; 1491 } 1492 return 0; 1493 } 1494 1495 1496 /** 1497 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1498 * @q: the SGE control Tx queue 1499 * 1500 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1501 * that send only immediate data (presently just the control queues) and 1502 * thus do not have any mbufs 1503 */ 1504 static __inline void 1505 reclaim_completed_tx_imm(struct sge_txq *q) 1506 { 1507 unsigned int reclaim = q->processed - q->cleaned; 1508 1509 mtx_assert(&q->lock, MA_OWNED); 1510 1511 q->in_use -= reclaim; 1512 q->cleaned += reclaim; 1513 } 1514 1515 static __inline int 1516 immediate(const struct mbuf *m) 1517 { 1518 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1519 } 1520 1521 /** 1522 * ctrl_xmit - send a packet through an SGE control Tx queue 1523 * @adap: the adapter 1524 * @q: the control queue 1525 * @m: the packet 1526 * 1527 * Send a packet through an SGE control Tx queue. Packets sent through 1528 * a control queue must fit entirely as immediate data in a single Tx 1529 * descriptor and have no page fragments. 1530 */ 1531 static int 1532 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1533 { 1534 int ret; 1535 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1536 1537 if (__predict_false(!immediate(m))) { 1538 m_freem(m); 1539 return 0; 1540 } 1541 1542 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); 1543 wrp->wr_lo = htonl(V_WR_TID(q->token)); 1544 1545 mtx_lock(&q->lock); 1546 again: reclaim_completed_tx_imm(q); 1547 1548 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1549 if (__predict_false(ret)) { 1550 if (ret == 1) { 1551 mtx_unlock(&q->lock); 1552 log(LOG_ERR, "no desc available\n"); 1553 1554 return (ENOSPC); 1555 } 1556 goto again; 1557 } 1558 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1559 1560 q->in_use++; 1561 if (++q->pidx >= q->size) { 1562 q->pidx = 0; 1563 q->gen ^= 1; 1564 } 1565 mtx_unlock(&q->lock); 1566 wmb(); 1567 t3_write_reg(adap, A_SG_KDOORBELL, 1568 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1569 return (0); 1570 } 1571 1572 1573 /** 1574 * restart_ctrlq - restart a suspended control queue 1575 * @qs: the queue set cotaining the control queue 1576 * 1577 * Resumes transmission on a suspended Tx control queue. 1578 */ 1579 static void 1580 restart_ctrlq(void *data, int npending) 1581 { 1582 struct mbuf *m; 1583 struct sge_qset *qs = (struct sge_qset *)data; 1584 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1585 adapter_t *adap = qs->port->adapter; 1586 1587 log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use); 1588 1589 mtx_lock(&q->lock); 1590 again: reclaim_completed_tx_imm(q); 1591 1592 while (q->in_use < q->size && 1593 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1594 1595 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1596 1597 if (++q->pidx >= q->size) { 1598 q->pidx = 0; 1599 q->gen ^= 1; 1600 } 1601 q->in_use++; 1602 } 1603 if (!mbufq_empty(&q->sendq)) { 1604 setbit(&qs->txq_stopped, TXQ_CTRL); 1605 smp_mb(); 1606 1607 if (should_restart_tx(q) && 1608 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1609 goto again; 1610 q->stops++; 1611 } 1612 mtx_unlock(&q->lock); 1613 t3_write_reg(adap, A_SG_KDOORBELL, 1614 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1615 } 1616 1617 1618 /* 1619 * Send a management message through control queue 0 1620 */ 1621 int 1622 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1623 { 1624 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); 1625 } 1626 1627 1628 /** 1629 * free_qset - free the resources of an SGE queue set 1630 * @sc: the controller owning the queue set 1631 * @q: the queue set 1632 * 1633 * Release the HW and SW resources associated with an SGE queue set, such 1634 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1635 * queue set must be quiesced prior to calling this. 1636 */ 1637 void 1638 t3_free_qset(adapter_t *sc, struct sge_qset *q) 1639 { 1640 int i; 1641 1642 t3_free_tx_desc_all(&q->txq[TXQ_ETH]); 1643 1644 for (i = 0; i < SGE_TXQ_PER_SET; i++) 1645 if (q->txq[i].txq_mr.br_ring != NULL) { 1646 free(q->txq[i].txq_mr.br_ring, M_DEVBUF); 1647 mtx_destroy(&q->txq[i].txq_mr.br_lock); 1648 } 1649 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1650 if (q->fl[i].desc) { 1651 mtx_lock(&sc->sge.reg_lock); 1652 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 1653 mtx_unlock(&sc->sge.reg_lock); 1654 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 1655 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 1656 q->fl[i].desc_map); 1657 bus_dma_tag_destroy(q->fl[i].desc_tag); 1658 bus_dma_tag_destroy(q->fl[i].entry_tag); 1659 } 1660 if (q->fl[i].sdesc) { 1661 free_rx_bufs(sc, &q->fl[i]); 1662 free(q->fl[i].sdesc, M_DEVBUF); 1663 } 1664 } 1665 1666 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 1667 if (q->txq[i].desc) { 1668 mtx_lock(&sc->sge.reg_lock); 1669 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 1670 mtx_unlock(&sc->sge.reg_lock); 1671 bus_dmamap_unload(q->txq[i].desc_tag, 1672 q->txq[i].desc_map); 1673 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 1674 q->txq[i].desc_map); 1675 bus_dma_tag_destroy(q->txq[i].desc_tag); 1676 bus_dma_tag_destroy(q->txq[i].entry_tag); 1677 MTX_DESTROY(&q->txq[i].lock); 1678 } 1679 if (q->txq[i].sdesc) { 1680 free(q->txq[i].sdesc, M_DEVBUF); 1681 } 1682 } 1683 1684 if (q->rspq.desc) { 1685 mtx_lock(&sc->sge.reg_lock); 1686 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 1687 mtx_unlock(&sc->sge.reg_lock); 1688 1689 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 1690 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 1691 q->rspq.desc_map); 1692 bus_dma_tag_destroy(q->rspq.desc_tag); 1693 MTX_DESTROY(&q->rspq.lock); 1694 } 1695 1696 bzero(q, sizeof(*q)); 1697 } 1698 1699 /** 1700 * t3_free_sge_resources - free SGE resources 1701 * @sc: the adapter softc 1702 * 1703 * Frees resources used by the SGE queue sets. 1704 */ 1705 void 1706 t3_free_sge_resources(adapter_t *sc) 1707 { 1708 int i, nqsets; 1709 1710 #ifdef IFNET_MULTIQUEUE 1711 panic("%s should not be called when IFNET_MULTIQUEUE is defined", __FUNCTION__); 1712 #endif 1713 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1714 nqsets += sc->port[i].nqsets; 1715 1716 for (i = 0; i < nqsets; ++i) 1717 t3_free_qset(sc, &sc->sge.qs[i]); 1718 } 1719 1720 /** 1721 * t3_sge_start - enable SGE 1722 * @sc: the controller softc 1723 * 1724 * Enables the SGE for DMAs. This is the last step in starting packet 1725 * transfers. 1726 */ 1727 void 1728 t3_sge_start(adapter_t *sc) 1729 { 1730 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 1731 } 1732 1733 /** 1734 * t3_sge_stop - disable SGE operation 1735 * @sc: the adapter 1736 * 1737 * Disables the DMA engine. This can be called in emeregencies (e.g., 1738 * from error interrupts) or from normal process context. In the latter 1739 * case it also disables any pending queue restart tasklets. Note that 1740 * if it is called in interrupt context it cannot disable the restart 1741 * tasklets as it cannot wait, however the tasklets will have no effect 1742 * since the doorbells are disabled and the driver will call this again 1743 * later from process context, at which time the tasklets will be stopped 1744 * if they are still running. 1745 */ 1746 void 1747 t3_sge_stop(adapter_t *sc) 1748 { 1749 int i, nqsets; 1750 1751 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 1752 1753 if (sc->tq == NULL) 1754 return; 1755 1756 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1757 nqsets += sc->port[i].nqsets; 1758 #ifdef notyet 1759 /* 1760 * 1761 * XXX 1762 */ 1763 for (i = 0; i < nqsets; ++i) { 1764 struct sge_qset *qs = &sc->sge.qs[i]; 1765 1766 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 1767 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 1768 } 1769 #endif 1770 } 1771 1772 /** 1773 * t3_free_tx_desc - reclaims Tx descriptors and their buffers 1774 * @adapter: the adapter 1775 * @q: the Tx queue to reclaim descriptors from 1776 * @reclaimable: the number of descriptors to reclaim 1777 * @m_vec_size: maximum number of buffers to reclaim 1778 * @desc_reclaimed: returns the number of descriptors reclaimed 1779 * 1780 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 1781 * Tx buffers. Called with the Tx queue lock held. 1782 * 1783 * Returns number of buffers of reclaimed 1784 */ 1785 void 1786 t3_free_tx_desc(struct sge_txq *q, int reclaimable) 1787 { 1788 struct tx_sw_desc *txsd; 1789 unsigned int cidx; 1790 1791 #ifdef T3_TRACE 1792 T3_TRACE2(sc->tb[q->cntxt_id & 7], 1793 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); 1794 #endif 1795 cidx = q->cidx; 1796 txsd = &q->sdesc[cidx]; 1797 DPRINTF("reclaiming %d WR\n", reclaimable); 1798 mtx_assert(&q->lock, MA_OWNED); 1799 while (reclaimable--) { 1800 DPRINTF("cidx=%d d=%p\n", cidx, txsd); 1801 if (txsd->mi.mi_base != NULL) { 1802 if (txsd->flags & TX_SW_DESC_MAPPED) { 1803 bus_dmamap_unload(q->entry_tag, txsd->map); 1804 txsd->flags &= ~TX_SW_DESC_MAPPED; 1805 } 1806 m_freem_iovec(&txsd->mi); 1807 buf_ring_scan(&q->txq_mr, txsd->mi.mi_base, __FILE__, __LINE__); 1808 txsd->mi.mi_base = NULL; 1809 1810 #if defined(DIAGNOSTIC) && 0 1811 if (m_get_priority(txsd->m[0]) != cidx) 1812 printf("pri=%d cidx=%d\n", 1813 (int)m_get_priority(txsd->m[0]), cidx); 1814 #endif 1815 1816 } else 1817 q->txq_skipped++; 1818 1819 ++txsd; 1820 if (++cidx == q->size) { 1821 cidx = 0; 1822 txsd = q->sdesc; 1823 } 1824 } 1825 q->cidx = cidx; 1826 1827 } 1828 1829 void 1830 t3_free_tx_desc_all(struct sge_txq *q) 1831 { 1832 int i; 1833 struct tx_sw_desc *txsd; 1834 1835 for (i = 0; i < q->size; i++) { 1836 txsd = &q->sdesc[i]; 1837 if (txsd->mi.mi_base != NULL) { 1838 if (txsd->flags & TX_SW_DESC_MAPPED) { 1839 bus_dmamap_unload(q->entry_tag, txsd->map); 1840 txsd->flags &= ~TX_SW_DESC_MAPPED; 1841 } 1842 m_freem_iovec(&txsd->mi); 1843 bzero(&txsd->mi, sizeof(txsd->mi)); 1844 } 1845 } 1846 } 1847 1848 /** 1849 * is_new_response - check if a response is newly written 1850 * @r: the response descriptor 1851 * @q: the response queue 1852 * 1853 * Returns true if a response descriptor contains a yet unprocessed 1854 * response. 1855 */ 1856 static __inline int 1857 is_new_response(const struct rsp_desc *r, 1858 const struct sge_rspq *q) 1859 { 1860 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 1861 } 1862 1863 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 1864 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 1865 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 1866 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 1867 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 1868 1869 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 1870 #define NOMEM_INTR_DELAY 2500 1871 1872 /** 1873 * write_ofld_wr - write an offload work request 1874 * @adap: the adapter 1875 * @m: the packet to send 1876 * @q: the Tx queue 1877 * @pidx: index of the first Tx descriptor to write 1878 * @gen: the generation value to use 1879 * @ndesc: number of descriptors the packet will occupy 1880 * 1881 * Write an offload work request to send the supplied packet. The packet 1882 * data already carry the work request with most fields populated. 1883 */ 1884 static void 1885 write_ofld_wr(adapter_t *adap, struct mbuf *m, 1886 struct sge_txq *q, unsigned int pidx, 1887 unsigned int gen, unsigned int ndesc, 1888 bus_dma_segment_t *segs, unsigned int nsegs) 1889 { 1890 unsigned int sgl_flits, flits; 1891 struct work_request_hdr *from; 1892 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1893 struct tx_desc *d = &q->desc[pidx]; 1894 struct txq_state txqs; 1895 1896 if (immediate(m) && segs == NULL) { 1897 write_imm(d, m, m->m_len, gen); 1898 return; 1899 } 1900 1901 /* Only TX_DATA builds SGLs */ 1902 from = mtod(m, struct work_request_hdr *); 1903 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from)); 1904 1905 flits = m->m_len / 8; 1906 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 1907 1908 make_sgl(sgp, segs, nsegs); 1909 sgl_flits = sgl_len(nsegs); 1910 1911 txqs.gen = gen; 1912 txqs.pidx = pidx; 1913 txqs.compl = 0; 1914 1915 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 1916 from->wr_hi, from->wr_lo); 1917 } 1918 1919 /** 1920 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 1921 * @m: the packet 1922 * 1923 * Returns the number of Tx descriptors needed for the given offload 1924 * packet. These packets are already fully constructed. 1925 */ 1926 static __inline unsigned int 1927 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 1928 { 1929 unsigned int flits, cnt = 0; 1930 1931 1932 if (m->m_len <= WR_LEN) 1933 return 1; /* packet fits as immediate data */ 1934 1935 if (m->m_flags & M_IOVEC) 1936 cnt = mtomv(m)->mv_count; 1937 1938 /* headers */ 1939 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; 1940 1941 return flits_to_desc(flits + sgl_len(cnt)); 1942 } 1943 1944 /** 1945 * ofld_xmit - send a packet through an offload queue 1946 * @adap: the adapter 1947 * @q: the Tx offload queue 1948 * @m: the packet 1949 * 1950 * Send an offload packet through an SGE offload queue. 1951 */ 1952 static int 1953 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1954 { 1955 int ret, nsegs; 1956 unsigned int ndesc; 1957 unsigned int pidx, gen; 1958 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs; 1959 struct tx_sw_desc *stx; 1960 1961 nsegs = m_get_sgllen(m); 1962 vsegs = m_get_sgl(m); 1963 ndesc = calc_tx_descs_ofld(m, nsegs); 1964 busdma_map_sgl(vsegs, segs, nsegs); 1965 1966 stx = &q->sdesc[q->pidx]; 1967 KASSERT(stx->mi.mi_base == NULL, ("mi_base set")); 1968 1969 mtx_lock(&q->lock); 1970 again: reclaim_completed_tx_(q, 16); 1971 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 1972 if (__predict_false(ret)) { 1973 if (ret == 1) { 1974 printf("no ofld desc avail\n"); 1975 1976 m_set_priority(m, ndesc); /* save for restart */ 1977 mtx_unlock(&q->lock); 1978 return (EINTR); 1979 } 1980 goto again; 1981 } 1982 1983 gen = q->gen; 1984 q->in_use += ndesc; 1985 pidx = q->pidx; 1986 q->pidx += ndesc; 1987 if (q->pidx >= q->size) { 1988 q->pidx -= q->size; 1989 q->gen ^= 1; 1990 } 1991 #ifdef T3_TRACE 1992 T3_TRACE5(adap->tb[q->cntxt_id & 7], 1993 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 1994 ndesc, pidx, skb->len, skb->len - skb->data_len, 1995 skb_shinfo(skb)->nr_frags); 1996 #endif 1997 mtx_unlock(&q->lock); 1998 1999 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2000 check_ring_tx_db(adap, q); 2001 2002 return (0); 2003 } 2004 2005 /** 2006 * restart_offloadq - restart a suspended offload queue 2007 * @qs: the queue set cotaining the offload queue 2008 * 2009 * Resumes transmission on a suspended Tx offload queue. 2010 */ 2011 static void 2012 restart_offloadq(void *data, int npending) 2013 { 2014 struct mbuf *m; 2015 struct sge_qset *qs = data; 2016 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2017 adapter_t *adap = qs->port->adapter; 2018 bus_dma_segment_t segs[TX_MAX_SEGS]; 2019 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 2020 int nsegs, cleaned; 2021 2022 mtx_lock(&q->lock); 2023 again: cleaned = reclaim_completed_tx_(q, 16); 2024 2025 while ((m = mbufq_peek(&q->sendq)) != NULL) { 2026 unsigned int gen, pidx; 2027 unsigned int ndesc = m_get_priority(m); 2028 2029 if (__predict_false(q->size - q->in_use < ndesc)) { 2030 setbit(&qs->txq_stopped, TXQ_OFLD); 2031 smp_mb(); 2032 2033 if (should_restart_tx(q) && 2034 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 2035 goto again; 2036 q->stops++; 2037 break; 2038 } 2039 2040 gen = q->gen; 2041 q->in_use += ndesc; 2042 pidx = q->pidx; 2043 q->pidx += ndesc; 2044 if (q->pidx >= q->size) { 2045 q->pidx -= q->size; 2046 q->gen ^= 1; 2047 } 2048 2049 (void)mbufq_dequeue(&q->sendq); 2050 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 2051 mtx_unlock(&q->lock); 2052 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2053 mtx_lock(&q->lock); 2054 } 2055 mtx_unlock(&q->lock); 2056 2057 #if USE_GTS 2058 set_bit(TXQ_RUNNING, &q->flags); 2059 set_bit(TXQ_LAST_PKT_DB, &q->flags); 2060 #endif 2061 t3_write_reg(adap, A_SG_KDOORBELL, 2062 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2063 } 2064 2065 /** 2066 * queue_set - return the queue set a packet should use 2067 * @m: the packet 2068 * 2069 * Maps a packet to the SGE queue set it should use. The desired queue 2070 * set is carried in bits 1-3 in the packet's priority. 2071 */ 2072 static __inline int 2073 queue_set(const struct mbuf *m) 2074 { 2075 return m_get_priority(m) >> 1; 2076 } 2077 2078 /** 2079 * is_ctrl_pkt - return whether an offload packet is a control packet 2080 * @m: the packet 2081 * 2082 * Determines whether an offload packet should use an OFLD or a CTRL 2083 * Tx queue. This is indicated by bit 0 in the packet's priority. 2084 */ 2085 static __inline int 2086 is_ctrl_pkt(const struct mbuf *m) 2087 { 2088 return m_get_priority(m) & 1; 2089 } 2090 2091 /** 2092 * t3_offload_tx - send an offload packet 2093 * @tdev: the offload device to send to 2094 * @m: the packet 2095 * 2096 * Sends an offload packet. We use the packet priority to select the 2097 * appropriate Tx queue as follows: bit 0 indicates whether the packet 2098 * should be sent as regular or control, bits 1-3 select the queue set. 2099 */ 2100 int 2101 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m) 2102 { 2103 adapter_t *adap = tdev2adap(tdev); 2104 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 2105 2106 if (__predict_false(is_ctrl_pkt(m))) 2107 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m); 2108 2109 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m); 2110 } 2111 2112 /** 2113 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 2114 * @tdev: the offload device that will be receiving the packets 2115 * @q: the SGE response queue that assembled the bundle 2116 * @m: the partial bundle 2117 * @n: the number of packets in the bundle 2118 * 2119 * Delivers a (partial) bundle of Rx offload packets to an offload device. 2120 */ 2121 static __inline void 2122 deliver_partial_bundle(struct t3cdev *tdev, 2123 struct sge_rspq *q, 2124 struct mbuf *mbufs[], int n) 2125 { 2126 if (n) { 2127 q->offload_bundles++; 2128 cxgb_ofld_recv(tdev, mbufs, n); 2129 } 2130 } 2131 2132 static __inline int 2133 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 2134 struct mbuf *m, struct mbuf *rx_gather[], 2135 unsigned int gather_idx) 2136 { 2137 2138 rq->offload_pkts++; 2139 m->m_pkthdr.header = mtod(m, void *); 2140 rx_gather[gather_idx++] = m; 2141 if (gather_idx == RX_BUNDLE_SIZE) { 2142 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 2143 gather_idx = 0; 2144 rq->offload_bundles++; 2145 } 2146 return (gather_idx); 2147 } 2148 2149 static void 2150 restart_tx(struct sge_qset *qs) 2151 { 2152 struct adapter *sc = qs->port->adapter; 2153 2154 2155 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2156 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2157 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2158 qs->txq[TXQ_OFLD].restarts++; 2159 DPRINTF("restarting TXQ_OFLD\n"); 2160 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2161 } 2162 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n", 2163 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]), 2164 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned, 2165 qs->txq[TXQ_CTRL].in_use); 2166 2167 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2168 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2169 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2170 qs->txq[TXQ_CTRL].restarts++; 2171 DPRINTF("restarting TXQ_CTRL\n"); 2172 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2173 } 2174 } 2175 2176 /** 2177 * t3_sge_alloc_qset - initialize an SGE queue set 2178 * @sc: the controller softc 2179 * @id: the queue set id 2180 * @nports: how many Ethernet ports will be using this queue set 2181 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2182 * @p: configuration parameters for this queue set 2183 * @ntxq: number of Tx queues for the queue set 2184 * @pi: port info for queue set 2185 * 2186 * Allocate resources and initialize an SGE queue set. A queue set 2187 * comprises a response queue, two Rx free-buffer queues, and up to 3 2188 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2189 * queue, offload queue, and control queue. 2190 */ 2191 int 2192 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2193 const struct qset_params *p, int ntxq, struct port_info *pi) 2194 { 2195 struct sge_qset *q = &sc->sge.qs[id]; 2196 int i, header_size, ret = 0; 2197 2198 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2199 if ((q->txq[i].txq_mr.br_ring = malloc(cxgb_txq_buf_ring_size*sizeof(struct mbuf *), 2200 M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) { 2201 device_printf(sc->dev, "failed to allocate mbuf ring\n"); 2202 goto err; 2203 } 2204 q->txq[i].txq_mr.br_prod = q->txq[i].txq_mr.br_cons = 0; 2205 q->txq[i].txq_mr.br_size = cxgb_txq_buf_ring_size; 2206 mtx_init(&q->txq[i].txq_mr.br_lock, "txq mbuf ring", NULL, MTX_DEF); 2207 } 2208 2209 init_qset_cntxt(q, id); 2210 q->idx = id; 2211 2212 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2213 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2214 &q->fl[0].desc, &q->fl[0].sdesc, 2215 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2216 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2217 printf("error %d from alloc ring fl0\n", ret); 2218 goto err; 2219 } 2220 2221 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2222 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2223 &q->fl[1].desc, &q->fl[1].sdesc, 2224 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2225 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2226 printf("error %d from alloc ring fl1\n", ret); 2227 goto err; 2228 } 2229 2230 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2231 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2232 &q->rspq.desc_tag, &q->rspq.desc_map, 2233 NULL, NULL)) != 0) { 2234 printf("error %d from alloc ring rspq\n", ret); 2235 goto err; 2236 } 2237 2238 for (i = 0; i < ntxq; ++i) { 2239 /* 2240 * The control queue always uses immediate data so does not 2241 * need to keep track of any mbufs. 2242 * XXX Placeholder for future TOE support. 2243 */ 2244 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2245 2246 if ((ret = alloc_ring(sc, p->txq_size[i], 2247 sizeof(struct tx_desc), sz, 2248 &q->txq[i].phys_addr, &q->txq[i].desc, 2249 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2250 &q->txq[i].desc_map, 2251 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2252 printf("error %d from alloc ring tx %i\n", ret, i); 2253 goto err; 2254 } 2255 mbufq_init(&q->txq[i].sendq); 2256 q->txq[i].gen = 1; 2257 q->txq[i].size = p->txq_size[i]; 2258 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d", 2259 device_get_unit(sc->dev), irq_vec_idx, i); 2260 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF); 2261 } 2262 2263 q->txq[TXQ_ETH].port = pi; 2264 2265 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2266 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2267 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]); 2268 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]); 2269 2270 q->fl[0].gen = q->fl[1].gen = 1; 2271 q->fl[0].size = p->fl_size; 2272 q->fl[1].size = p->jumbo_size; 2273 2274 q->rspq.gen = 1; 2275 q->rspq.cidx = 0; 2276 q->rspq.size = p->rspq_size; 2277 2278 2279 header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t); 2280 q->txq[TXQ_ETH].stop_thres = nports * 2281 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2282 2283 q->fl[0].buf_size = (MCLBYTES - header_size); 2284 q->fl[0].zone = zone_clust; 2285 q->fl[0].type = EXT_CLUSTER; 2286 #if __FreeBSD_version > 800000 2287 if (cxgb_use_16k_clusters) { 2288 q->fl[1].buf_size = MJUM16BYTES - header_size; 2289 q->fl[1].zone = zone_jumbo16; 2290 q->fl[1].type = EXT_JUMBO16; 2291 } else { 2292 q->fl[1].buf_size = MJUM9BYTES - header_size; 2293 q->fl[1].zone = zone_jumbo9; 2294 q->fl[1].type = EXT_JUMBO9; 2295 } 2296 #else 2297 q->fl[1].buf_size = MJUMPAGESIZE - header_size; 2298 q->fl[1].zone = zone_jumbop; 2299 q->fl[1].type = EXT_JUMBOP; 2300 #endif 2301 q->lro.enabled = lro_default; 2302 2303 mtx_lock(&sc->sge.reg_lock); 2304 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2305 q->rspq.phys_addr, q->rspq.size, 2306 q->fl[0].buf_size, 1, 0); 2307 if (ret) { 2308 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2309 goto err_unlock; 2310 } 2311 2312 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2313 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2314 q->fl[i].phys_addr, q->fl[i].size, 2315 q->fl[i].buf_size, p->cong_thres, 1, 2316 0); 2317 if (ret) { 2318 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2319 goto err_unlock; 2320 } 2321 } 2322 2323 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2324 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2325 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2326 1, 0); 2327 if (ret) { 2328 printf("error %d from t3_sge_init_ecntxt\n", ret); 2329 goto err_unlock; 2330 } 2331 2332 if (ntxq > 1) { 2333 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2334 USE_GTS, SGE_CNTXT_OFLD, id, 2335 q->txq[TXQ_OFLD].phys_addr, 2336 q->txq[TXQ_OFLD].size, 0, 1, 0); 2337 if (ret) { 2338 printf("error %d from t3_sge_init_ecntxt\n", ret); 2339 goto err_unlock; 2340 } 2341 } 2342 2343 if (ntxq > 2) { 2344 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2345 SGE_CNTXT_CTRL, id, 2346 q->txq[TXQ_CTRL].phys_addr, 2347 q->txq[TXQ_CTRL].size, 2348 q->txq[TXQ_CTRL].token, 1, 0); 2349 if (ret) { 2350 printf("error %d from t3_sge_init_ecntxt\n", ret); 2351 goto err_unlock; 2352 } 2353 } 2354 2355 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2356 device_get_unit(sc->dev), irq_vec_idx); 2357 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2358 2359 mtx_unlock(&sc->sge.reg_lock); 2360 t3_update_qset_coalesce(q, p); 2361 q->port = pi; 2362 2363 refill_fl(sc, &q->fl[0], q->fl[0].size); 2364 refill_fl(sc, &q->fl[1], q->fl[1].size); 2365 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2366 2367 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2368 V_NEWTIMER(q->rspq.holdoff_tmr)); 2369 2370 return (0); 2371 2372 err_unlock: 2373 mtx_unlock(&sc->sge.reg_lock); 2374 err: 2375 t3_free_qset(sc, q); 2376 2377 return (ret); 2378 } 2379 2380 void 2381 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2382 { 2383 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2384 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2385 struct ifnet *ifp = pi->ifp; 2386 2387 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2388 2389 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2390 cpl->csum_valid && cpl->csum == 0xffff) { 2391 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2392 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2393 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2394 m->m_pkthdr.csum_data = 0xffff; 2395 } 2396 /* 2397 * XXX need to add VLAN support for 6.x 2398 */ 2399 #ifdef VLAN_SUPPORTED 2400 if (__predict_false(cpl->vlan_valid)) { 2401 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2402 m->m_flags |= M_VLANTAG; 2403 } 2404 #endif 2405 2406 m->m_pkthdr.rcvif = ifp; 2407 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2408 #ifndef DISABLE_MBUF_IOVEC 2409 m_explode(m); 2410 #endif 2411 /* 2412 * adjust after conversion to mbuf chain 2413 */ 2414 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); 2415 m->m_len -= (sizeof(*cpl) + ethpad); 2416 m->m_data += (sizeof(*cpl) + ethpad); 2417 2418 (*ifp->if_input)(ifp, m); 2419 } 2420 2421 static void 2422 ext_free_handler(void *cl, void * arg) 2423 { 2424 uintptr_t type = (uintptr_t)arg; 2425 uma_zone_t zone; 2426 struct mbuf *m; 2427 2428 m = cl; 2429 zone = m_getzonefromtype(type); 2430 m->m_ext.ext_type = (int)type; 2431 cxgb_ext_freed++; 2432 cxgb_cache_put(zone, cl); 2433 } 2434 2435 static void 2436 init_cluster_mbuf(caddr_t cl, int flags, int type, uma_zone_t zone) 2437 { 2438 struct mbuf *m; 2439 int header_size; 2440 2441 header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + 2442 sizeof(struct m_ext_) + sizeof(uint32_t); 2443 2444 bzero(cl, header_size); 2445 m = (struct mbuf *)cl; 2446 2447 SLIST_INIT(&m->m_pkthdr.tags); 2448 m->m_type = MT_DATA; 2449 m->m_flags = flags | M_NOFREE | M_EXT; 2450 m->m_data = cl + header_size; 2451 m->m_ext.ext_buf = cl; 2452 m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t)); 2453 m->m_ext.ext_size = m_getsizefromtype(type); 2454 m->m_ext.ext_free = ext_free_handler; 2455 m->m_ext.ext_arg1 = cl; 2456 m->m_ext.ext_arg2 = (void *)(uintptr_t)type; 2457 m->m_ext.ext_type = EXT_EXTREF; 2458 *(m->m_ext.ref_cnt) = 1; 2459 DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt); 2460 } 2461 2462 2463 /** 2464 * get_packet - return the next ingress packet buffer from a free list 2465 * @adap: the adapter that received the packet 2466 * @drop_thres: # of remaining buffers before we start dropping packets 2467 * @qs: the qset that the SGE free list holding the packet belongs to 2468 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2469 * @r: response descriptor 2470 * 2471 * Get the next packet from a free list and complete setup of the 2472 * sk_buff. If the packet is small we make a copy and recycle the 2473 * original buffer, otherwise we use the original buffer itself. If a 2474 * positive drop threshold is supplied packets are dropped and their 2475 * buffers recycled if (a) the number of remaining buffers is under the 2476 * threshold and the packet is too big to copy, or (b) the packet should 2477 * be copied but there is no memory for the copy. 2478 */ 2479 #ifdef DISABLE_MBUF_IOVEC 2480 2481 static int 2482 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2483 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 2484 { 2485 2486 unsigned int len_cq = ntohl(r->len_cq); 2487 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2488 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2489 uint32_t len = G_RSPD_LEN(len_cq); 2490 uint32_t flags = ntohl(r->flags); 2491 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2492 caddr_t cl; 2493 struct mbuf *m, *m0; 2494 int ret = 0; 2495 2496 prefetch(sd->rxsd_cl); 2497 2498 fl->credits--; 2499 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2500 2501 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { 2502 if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2503 goto skip_recycle; 2504 cl = mtod(m0, void *); 2505 memcpy(cl, sd->data, len); 2506 recycle_rx_buf(adap, fl, fl->cidx); 2507 m = m0; 2508 m0->m_len = len; 2509 } else { 2510 skip_recycle: 2511 2512 bus_dmamap_unload(fl->entry_tag, sd->map); 2513 cl = sd->rxsd_cl; 2514 m = m0 = (struct mbuf *)cl; 2515 2516 if ((sopeop == RSPQ_SOP_EOP) || 2517 (sopeop == RSPQ_SOP)) 2518 flags = M_PKTHDR; 2519 init_cluster_mbuf(cl, flags, fl->type, fl->zone); 2520 m0->m_len = len; 2521 } 2522 switch(sopeop) { 2523 case RSPQ_SOP_EOP: 2524 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2525 mh->mh_head = mh->mh_tail = m; 2526 m->m_pkthdr.len = len; 2527 ret = 1; 2528 break; 2529 case RSPQ_NSOP_NEOP: 2530 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2531 if (mh->mh_tail == NULL) { 2532 log(LOG_ERR, "discarding intermediate descriptor entry\n"); 2533 m_freem(m); 2534 break; 2535 } 2536 mh->mh_tail->m_next = m; 2537 mh->mh_tail = m; 2538 mh->mh_head->m_pkthdr.len += len; 2539 ret = 0; 2540 break; 2541 case RSPQ_SOP: 2542 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2543 m->m_pkthdr.len = len; 2544 mh->mh_head = mh->mh_tail = m; 2545 ret = 0; 2546 break; 2547 case RSPQ_EOP: 2548 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2549 mh->mh_head->m_pkthdr.len += len; 2550 mh->mh_tail->m_next = m; 2551 mh->mh_tail = m; 2552 ret = 1; 2553 break; 2554 } 2555 if (++fl->cidx == fl->size) 2556 fl->cidx = 0; 2557 2558 return (ret); 2559 } 2560 2561 #else 2562 2563 static int 2564 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2565 struct mbuf **m, struct rsp_desc *r) 2566 { 2567 2568 unsigned int len_cq = ntohl(r->len_cq); 2569 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2570 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2571 uint32_t len = G_RSPD_LEN(len_cq); 2572 uint32_t flags = ntohl(r->flags); 2573 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2574 void *cl; 2575 int ret = 0; 2576 struct mbuf *m0; 2577 #if 0 2578 if ((sd + 1 )->rxsd_cl) 2579 prefetch((sd + 1)->rxsd_cl); 2580 if ((sd + 2)->rxsd_cl) 2581 prefetch((sd + 2)->rxsd_cl); 2582 #endif 2583 DPRINTF("rx cpu=%d\n", curcpu); 2584 fl->credits--; 2585 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2586 2587 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { 2588 if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2589 goto skip_recycle; 2590 cl = mtod(m0, void *); 2591 memcpy(cl, sd->data, len); 2592 recycle_rx_buf(adap, fl, fl->cidx); 2593 *m = m0; 2594 } else { 2595 skip_recycle: 2596 bus_dmamap_unload(fl->entry_tag, sd->map); 2597 cl = sd->rxsd_cl; 2598 *m = m0 = (struct mbuf *)cl; 2599 } 2600 2601 switch(sopeop) { 2602 case RSPQ_SOP_EOP: 2603 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2604 if (cl == sd->rxsd_cl) 2605 init_cluster_mbuf(cl, M_PKTHDR, fl->type, fl->zone); 2606 m0->m_len = m0->m_pkthdr.len = len; 2607 ret = 1; 2608 goto done; 2609 break; 2610 case RSPQ_NSOP_NEOP: 2611 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2612 panic("chaining unsupported"); 2613 ret = 0; 2614 break; 2615 case RSPQ_SOP: 2616 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2617 panic("chaining unsupported"); 2618 m_iovinit(m0); 2619 ret = 0; 2620 break; 2621 case RSPQ_EOP: 2622 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2623 panic("chaining unsupported"); 2624 ret = 1; 2625 break; 2626 } 2627 panic("append not supported"); 2628 #if 0 2629 m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref); 2630 #endif 2631 done: 2632 if (++fl->cidx == fl->size) 2633 fl->cidx = 0; 2634 2635 return (ret); 2636 } 2637 #endif 2638 /** 2639 * handle_rsp_cntrl_info - handles control information in a response 2640 * @qs: the queue set corresponding to the response 2641 * @flags: the response control flags 2642 * 2643 * Handles the control information of an SGE response, such as GTS 2644 * indications and completion credits for the queue set's Tx queues. 2645 * HW coalesces credits, we don't do any extra SW coalescing. 2646 */ 2647 static __inline void 2648 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2649 { 2650 unsigned int credits; 2651 2652 #if USE_GTS 2653 if (flags & F_RSPD_TXQ0_GTS) 2654 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2655 #endif 2656 credits = G_RSPD_TXQ0_CR(flags); 2657 if (credits) 2658 qs->txq[TXQ_ETH].processed += credits; 2659 2660 credits = G_RSPD_TXQ2_CR(flags); 2661 if (credits) 2662 qs->txq[TXQ_CTRL].processed += credits; 2663 2664 # if USE_GTS 2665 if (flags & F_RSPD_TXQ1_GTS) 2666 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2667 # endif 2668 credits = G_RSPD_TXQ1_CR(flags); 2669 if (credits) 2670 qs->txq[TXQ_OFLD].processed += credits; 2671 2672 } 2673 2674 static void 2675 check_ring_db(adapter_t *adap, struct sge_qset *qs, 2676 unsigned int sleeping) 2677 { 2678 ; 2679 } 2680 2681 /** 2682 * process_responses - process responses from an SGE response queue 2683 * @adap: the adapter 2684 * @qs: the queue set to which the response queue belongs 2685 * @budget: how many responses can be processed in this round 2686 * 2687 * Process responses from an SGE response queue up to the supplied budget. 2688 * Responses include received packets as well as credits and other events 2689 * for the queues that belong to the response queue's queue set. 2690 * A negative budget is effectively unlimited. 2691 * 2692 * Additionally choose the interrupt holdoff time for the next interrupt 2693 * on this queue. If the system is under memory shortage use a fairly 2694 * long delay to help recovery. 2695 */ 2696 int 2697 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2698 { 2699 struct sge_rspq *rspq = &qs->rspq; 2700 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2701 int budget_left = budget; 2702 unsigned int sleeping = 0; 2703 int lro = qs->lro.enabled; 2704 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2705 int ngathered = 0; 2706 #ifdef DEBUG 2707 static int last_holdoff = 0; 2708 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2709 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2710 last_holdoff = rspq->holdoff_tmr; 2711 } 2712 #endif 2713 rspq->next_holdoff = rspq->holdoff_tmr; 2714 2715 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2716 int eth, eop = 0, ethpad = 0; 2717 uint32_t flags = ntohl(r->flags); 2718 uint32_t rss_csum = *(const uint32_t *)r; 2719 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); 2720 2721 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2722 2723 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2724 /* XXX */ 2725 printf("async notification\n"); 2726 2727 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2728 struct mbuf *m = NULL; 2729 2730 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", 2731 r->rss_hdr.opcode, rspq->cidx); 2732 if (rspq->rspq_mh.mh_head == NULL) 2733 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 2734 else 2735 m = m_gethdr(M_DONTWAIT, MT_DATA); 2736 2737 /* 2738 * XXX revisit me 2739 */ 2740 if (rspq->rspq_mh.mh_head == NULL && m == NULL) { 2741 rspq->next_holdoff = NOMEM_INTR_DELAY; 2742 budget_left--; 2743 break; 2744 } 2745 get_imm_packet(adap, r, rspq->rspq_mh.mh_head, m, flags); 2746 2747 eop = 1; 2748 rspq->imm_data++; 2749 } else if (r->len_cq) { 2750 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 2751 2752 #ifdef DISABLE_MBUF_IOVEC 2753 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r); 2754 #else 2755 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r); 2756 #ifdef IFNET_MULTIQUEUE 2757 rspq->rspq_mbuf->m_pkthdr.rss_hash = rss_hash; 2758 #endif 2759 #endif 2760 ethpad = 2; 2761 } else { 2762 DPRINTF("pure response\n"); 2763 rspq->pure_rsps++; 2764 } 2765 2766 if (flags & RSPD_CTRL_MASK) { 2767 sleeping |= flags & RSPD_GTS_MASK; 2768 handle_rsp_cntrl_info(qs, flags); 2769 } 2770 2771 r++; 2772 if (__predict_false(++rspq->cidx == rspq->size)) { 2773 rspq->cidx = 0; 2774 rspq->gen ^= 1; 2775 r = rspq->desc; 2776 } 2777 prefetch(r); 2778 if (++rspq->credits >= (rspq->size / 4)) { 2779 refill_rspq(adap, rspq, rspq->credits); 2780 rspq->credits = 0; 2781 } 2782 DPRINTF("eth=%d eop=%d flags=0x%x\n", eth, eop, flags); 2783 2784 if (!eth && eop) { 2785 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum; 2786 /* 2787 * XXX size mismatch 2788 */ 2789 m_set_priority(rspq->rspq_mh.mh_head, rss_hash); 2790 2791 ngathered = rx_offload(&adap->tdev, rspq, 2792 rspq->rspq_mh.mh_head, offload_mbufs, ngathered); 2793 rspq->rspq_mh.mh_head = NULL; 2794 DPRINTF("received offload packet\n"); 2795 2796 } else if (eth && eop) { 2797 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *)); 2798 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES); 2799 2800 t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad, 2801 rss_hash, rss_csum, lro); 2802 DPRINTF("received tunnel packet\n"); 2803 rspq->rspq_mh.mh_head = NULL; 2804 2805 } 2806 __refill_fl_lt(adap, &qs->fl[0], 32); 2807 __refill_fl_lt(adap, &qs->fl[1], 32); 2808 --budget_left; 2809 } 2810 2811 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 2812 t3_lro_flush(adap, qs, &qs->lro); 2813 2814 if (sleeping) 2815 check_ring_db(adap, qs, sleeping); 2816 2817 smp_mb(); /* commit Tx queue processed updates */ 2818 if (__predict_false(qs->txq_stopped > 1)) { 2819 printf("restarting tx on %p\n", qs); 2820 2821 restart_tx(qs); 2822 } 2823 2824 __refill_fl_lt(adap, &qs->fl[0], 512); 2825 __refill_fl_lt(adap, &qs->fl[1], 512); 2826 budget -= budget_left; 2827 return (budget); 2828 } 2829 2830 /* 2831 * A helper function that processes responses and issues GTS. 2832 */ 2833 static __inline int 2834 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 2835 { 2836 int work; 2837 static int last_holdoff = 0; 2838 2839 work = process_responses(adap, rspq_to_qset(rq), -1); 2840 2841 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 2842 printf("next_holdoff=%d\n", rq->next_holdoff); 2843 last_holdoff = rq->next_holdoff; 2844 } 2845 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 2846 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 2847 2848 return (work); 2849 } 2850 2851 2852 /* 2853 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 2854 * Handles data events from SGE response queues as well as error and other 2855 * async events as they all use the same interrupt pin. We use one SGE 2856 * response queue per port in this mode and protect all response queues with 2857 * queue 0's lock. 2858 */ 2859 void 2860 t3b_intr(void *data) 2861 { 2862 uint32_t i, map; 2863 adapter_t *adap = data; 2864 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2865 2866 t3_write_reg(adap, A_PL_CLI, 0); 2867 map = t3_read_reg(adap, A_SG_DATA_INTR); 2868 2869 if (!map) 2870 return; 2871 2872 if (__predict_false(map & F_ERRINTR)) 2873 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2874 2875 mtx_lock(&q0->lock); 2876 for_each_port(adap, i) 2877 if (map & (1 << i)) 2878 process_responses_gts(adap, &adap->sge.qs[i].rspq); 2879 mtx_unlock(&q0->lock); 2880 } 2881 2882 /* 2883 * The MSI interrupt handler. This needs to handle data events from SGE 2884 * response queues as well as error and other async events as they all use 2885 * the same MSI vector. We use one SGE response queue per port in this mode 2886 * and protect all response queues with queue 0's lock. 2887 */ 2888 void 2889 t3_intr_msi(void *data) 2890 { 2891 adapter_t *adap = data; 2892 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2893 int i, new_packets = 0; 2894 2895 mtx_lock(&q0->lock); 2896 2897 for_each_port(adap, i) 2898 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 2899 new_packets = 1; 2900 mtx_unlock(&q0->lock); 2901 if (new_packets == 0) 2902 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2903 } 2904 2905 void 2906 t3_intr_msix(void *data) 2907 { 2908 struct sge_qset *qs = data; 2909 adapter_t *adap = qs->port->adapter; 2910 struct sge_rspq *rspq = &qs->rspq; 2911 #ifndef IFNET_MULTIQUEUE 2912 mtx_lock(&rspq->lock); 2913 #else 2914 if (mtx_trylock(&rspq->lock)) 2915 #endif 2916 { 2917 2918 if (process_responses_gts(adap, rspq) == 0) 2919 rspq->unhandled_irqs++; 2920 mtx_unlock(&rspq->lock); 2921 } 2922 } 2923 2924 #define QDUMP_SBUF_SIZE 32 * 400 2925 static int 2926 t3_dump_rspq(SYSCTL_HANDLER_ARGS) 2927 { 2928 struct sge_rspq *rspq; 2929 struct sge_qset *qs; 2930 int i, err, dump_end, idx; 2931 static int multiplier = 1; 2932 struct sbuf *sb; 2933 struct rsp_desc *rspd; 2934 uint32_t data[4]; 2935 2936 rspq = arg1; 2937 qs = rspq_to_qset(rspq); 2938 if (rspq->rspq_dump_count == 0) 2939 return (0); 2940 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { 2941 log(LOG_WARNING, 2942 "dump count is too large %d\n", rspq->rspq_dump_count); 2943 rspq->rspq_dump_count = 0; 2944 return (EINVAL); 2945 } 2946 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { 2947 log(LOG_WARNING, 2948 "dump start of %d is greater than queue size\n", 2949 rspq->rspq_dump_start); 2950 rspq->rspq_dump_start = 0; 2951 return (EINVAL); 2952 } 2953 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); 2954 if (err) 2955 return (err); 2956 retry_sbufops: 2957 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 2958 2959 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", 2960 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), 2961 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); 2962 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", 2963 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); 2964 2965 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, 2966 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); 2967 2968 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; 2969 for (i = rspq->rspq_dump_start; i < dump_end; i++) { 2970 idx = i & (RSPQ_Q_SIZE-1); 2971 2972 rspd = &rspq->desc[idx]; 2973 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", 2974 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, 2975 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); 2976 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", 2977 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), 2978 be32toh(rspd->len_cq), rspd->intr_gen); 2979 } 2980 if (sbuf_overflowed(sb)) { 2981 sbuf_delete(sb); 2982 multiplier++; 2983 goto retry_sbufops; 2984 } 2985 sbuf_finish(sb); 2986 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 2987 sbuf_delete(sb); 2988 return (err); 2989 } 2990 2991 2992 /* 2993 * broken by recent mbuf changes 2994 */ 2995 static int 2996 t3_dump_txq(SYSCTL_HANDLER_ARGS) 2997 { 2998 struct sge_txq *txq; 2999 struct sge_qset *qs; 3000 int i, j, err, dump_end; 3001 static int multiplier = 1; 3002 struct sbuf *sb; 3003 struct tx_desc *txd; 3004 uint32_t *WR, wr_hi, wr_lo, gen; 3005 uint32_t data[4]; 3006 3007 txq = arg1; 3008 qs = txq_to_qset(txq, TXQ_ETH); 3009 if (txq->txq_dump_count == 0) { 3010 return (0); 3011 } 3012 if (txq->txq_dump_count > TX_ETH_Q_SIZE) { 3013 log(LOG_WARNING, 3014 "dump count is too large %d\n", txq->txq_dump_count); 3015 txq->txq_dump_count = 1; 3016 return (EINVAL); 3017 } 3018 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { 3019 log(LOG_WARNING, 3020 "dump start of %d is greater than queue size\n", 3021 txq->txq_dump_start); 3022 txq->txq_dump_start = 0; 3023 return (EINVAL); 3024 } 3025 err = t3_sge_read_ecntxt(qs->port->adapter, txq->cntxt_id, data); 3026 if (err) 3027 return (err); 3028 3029 3030 retry_sbufops: 3031 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3032 3033 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", 3034 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 3035 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); 3036 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", 3037 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), 3038 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); 3039 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3040 txq->txq_dump_start, 3041 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); 3042 3043 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3044 for (i = txq->txq_dump_start; i < dump_end; i++) { 3045 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; 3046 WR = (uint32_t *)txd->flit; 3047 wr_hi = ntohl(WR[0]); 3048 wr_lo = ntohl(WR[1]); 3049 gen = G_WR_GEN(wr_lo); 3050 3051 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3052 wr_hi, wr_lo, gen); 3053 for (j = 2; j < 30; j += 4) 3054 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3055 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3056 3057 } 3058 if (sbuf_overflowed(sb)) { 3059 sbuf_delete(sb); 3060 multiplier++; 3061 goto retry_sbufops; 3062 } 3063 sbuf_finish(sb); 3064 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3065 sbuf_delete(sb); 3066 return (err); 3067 } 3068 3069 3070 static int 3071 t3_lro_enable(SYSCTL_HANDLER_ARGS) 3072 { 3073 adapter_t *sc; 3074 int i, j, enabled, err, nqsets = 0; 3075 3076 #ifndef LRO_WORKING 3077 return (0); 3078 #endif 3079 sc = arg1; 3080 enabled = sc->sge.qs[0].lro.enabled; 3081 err = sysctl_handle_int(oidp, &enabled, arg2, req); 3082 3083 if (err != 0) 3084 return (err); 3085 if (enabled == sc->sge.qs[0].lro.enabled) 3086 return (0); 3087 3088 for (i = 0; i < sc->params.nports; i++) 3089 for (j = 0; j < sc->port[i].nqsets; j++) 3090 nqsets++; 3091 3092 for (i = 0; i < nqsets; i++) 3093 sc->sge.qs[i].lro.enabled = enabled; 3094 3095 return (0); 3096 } 3097 3098 static int 3099 t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS) 3100 { 3101 adapter_t *sc = arg1; 3102 struct qset_params *qsp = &sc->params.sge.qset[0]; 3103 int coalesce_nsecs; 3104 struct sge_qset *qs; 3105 int i, j, err, nqsets = 0; 3106 struct mtx *lock; 3107 3108 if ((sc->flags & FULL_INIT_DONE) == 0) 3109 return (ENXIO); 3110 3111 coalesce_nsecs = qsp->coalesce_nsecs; 3112 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req); 3113 3114 if (err != 0) { 3115 return (err); 3116 } 3117 if (coalesce_nsecs == qsp->coalesce_nsecs) 3118 return (0); 3119 3120 for (i = 0; i < sc->params.nports; i++) 3121 for (j = 0; j < sc->port[i].nqsets; j++) 3122 nqsets++; 3123 3124 coalesce_nsecs = max(100, coalesce_nsecs); 3125 3126 for (i = 0; i < nqsets; i++) { 3127 qs = &sc->sge.qs[i]; 3128 qsp = &sc->params.sge.qset[i]; 3129 qsp->coalesce_nsecs = coalesce_nsecs; 3130 3131 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 3132 &sc->sge.qs[0].rspq.lock; 3133 3134 mtx_lock(lock); 3135 t3_update_qset_coalesce(qs, qsp); 3136 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 3137 V_NEWTIMER(qs->rspq.holdoff_tmr)); 3138 mtx_unlock(lock); 3139 } 3140 3141 return (0); 3142 } 3143 3144 3145 void 3146 t3_add_attach_sysctls(adapter_t *sc) 3147 { 3148 struct sysctl_ctx_list *ctx; 3149 struct sysctl_oid_list *children; 3150 3151 ctx = device_get_sysctl_ctx(sc->dev); 3152 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3153 3154 /* random information */ 3155 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3156 "firmware_version", 3157 CTLFLAG_RD, &sc->fw_version, 3158 0, "firmware version"); 3159 3160 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3161 "enable_lro", 3162 CTLTYPE_INT|CTLFLAG_RW, sc, 3163 0, t3_lro_enable, 3164 "I", "enable large receive offload"); 3165 3166 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3167 "enable_debug", 3168 CTLFLAG_RW, &cxgb_debug, 3169 0, "enable verbose debugging output"); 3170 SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tunq_coalesce", 3171 CTLFLAG_RD, &sc->tunq_coalesce, 3172 "#tunneled packets freed"); 3173 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3174 "txq_overrun", 3175 CTLFLAG_RD, &txq_fills, 3176 0, "#times txq overrun"); 3177 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3178 "pcpu_cache_enable", 3179 CTLFLAG_RW, &cxgb_pcpu_cache_enable, 3180 0, "#enable driver local pcpu caches"); 3181 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3182 "cache_alloc", 3183 CTLFLAG_RD, &cxgb_cached_allocations, 3184 0, "#times a cluster was allocated from cache"); 3185 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3186 "cached", 3187 CTLFLAG_RD, &cxgb_cached, 3188 0, "#times a cluster was cached"); 3189 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3190 "ext_freed", 3191 CTLFLAG_RD, &cxgb_ext_freed, 3192 0, "#times a cluster was freed through ext_free"); 3193 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3194 "mbufs_outstanding", 3195 CTLFLAG_RD, &cxgb_mbufs_outstanding, 3196 0, "#mbufs in flight in the driver"); 3197 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3198 "pack_outstanding", 3199 CTLFLAG_RD, &cxgb_pack_outstanding, 3200 0, "#packet in flight in the driver"); 3201 } 3202 3203 3204 static const char *rspq_name = "rspq"; 3205 static const char *txq_names[] = 3206 { 3207 "txq_eth", 3208 "txq_ofld", 3209 "txq_ctrl" 3210 }; 3211 3212 void 3213 t3_add_configured_sysctls(adapter_t *sc) 3214 { 3215 struct sysctl_ctx_list *ctx; 3216 struct sysctl_oid_list *children; 3217 int i, j; 3218 3219 ctx = device_get_sysctl_ctx(sc->dev); 3220 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3221 3222 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3223 "intr_coal", 3224 CTLTYPE_INT|CTLFLAG_RW, sc, 3225 0, t3_set_coalesce_nsecs, 3226 "I", "interrupt coalescing timer (ns)"); 3227 3228 for (i = 0; i < sc->params.nports; i++) { 3229 struct port_info *pi = &sc->port[i]; 3230 struct sysctl_oid *poid; 3231 struct sysctl_oid_list *poidlist; 3232 3233 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); 3234 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 3235 pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); 3236 poidlist = SYSCTL_CHILDREN(poid); 3237 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO, 3238 "nqsets", CTLFLAG_RD, &pi->nqsets, 3239 0, "#queue sets"); 3240 3241 for (j = 0; j < pi->nqsets; j++) { 3242 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; 3243 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid; 3244 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist; 3245 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 3246 3247 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); 3248 3249 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 3250 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); 3251 qspoidlist = SYSCTL_CHILDREN(qspoid); 3252 3253 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3254 rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); 3255 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); 3256 3257 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3258 txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); 3259 txqpoidlist = SYSCTL_CHILDREN(txqpoid); 3260 3261 3262 3263 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", 3264 CTLFLAG_RD, &qs->rspq.size, 3265 0, "#entries in response queue"); 3266 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", 3267 CTLFLAG_RD, &qs->rspq.cidx, 3268 0, "consumer index"); 3269 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", 3270 CTLFLAG_RD, &qs->rspq.credits, 3271 0, "#credits"); 3272 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr", 3273 CTLFLAG_RD, &qs->rspq.phys_addr, 3274 "physical_address_of the queue"); 3275 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", 3276 CTLFLAG_RW, &qs->rspq.rspq_dump_start, 3277 0, "start rspq dump entry"); 3278 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", 3279 CTLFLAG_RW, &qs->rspq.rspq_dump_count, 3280 0, "#rspq entries to dump"); 3281 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", 3282 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 3283 0, t3_dump_rspq, "A", "dump of the response queue"); 3284 3285 3286 3287 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped", 3288 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops, 3289 0, "#tunneled packets dropped"); 3290 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen", 3291 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen, 3292 0, "#tunneled packets waiting to be sent"); 3293 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", 3294 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 3295 0, "#tunneled packets queue producer index"); 3296 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", 3297 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 3298 0, "#tunneled packets queue consumer index"); 3299 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed", 3300 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 3301 0, "#tunneled packets processed by the card"); 3302 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", 3303 CTLFLAG_RD, &txq->cleaned, 3304 0, "#tunneled packets cleaned"); 3305 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", 3306 CTLFLAG_RD, &txq->in_use, 3307 0, "#tunneled packet slots in use"); 3308 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees", 3309 CTLFLAG_RD, &txq->txq_frees, 3310 "#tunneled packets freed"); 3311 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", 3312 CTLFLAG_RD, &txq->txq_skipped, 3313 0, "#tunneled packet descriptors skipped"); 3314 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "coalesced", 3315 CTLFLAG_RD, &txq->txq_coalesced, 3316 0, "#tunneled packets coalesced"); 3317 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", 3318 CTLFLAG_RD, &txq->txq_enqueued, 3319 0, "#tunneled packets enqueued to hardware"); 3320 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", 3321 CTLFLAG_RD, &qs->txq_stopped, 3322 0, "tx queues stopped"); 3323 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr", 3324 CTLFLAG_RD, &txq->phys_addr, 3325 "physical_address_of the queue"); 3326 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", 3327 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 3328 0, "txq generation"); 3329 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", 3330 CTLFLAG_RD, &txq->cidx, 3331 0, "hardware queue cidx"); 3332 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", 3333 CTLFLAG_RD, &txq->pidx, 3334 0, "hardware queue pidx"); 3335 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", 3336 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 3337 0, "txq start idx for dump"); 3338 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", 3339 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 3340 0, "txq #entries to dump"); 3341 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", 3342 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 3343 0, t3_dump_txq, "A", "dump of the transmit queue"); 3344 } 3345 } 3346 } 3347 3348 /** 3349 * t3_get_desc - dump an SGE descriptor for debugging purposes 3350 * @qs: the queue set 3351 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 3352 * @idx: the descriptor index in the queue 3353 * @data: where to dump the descriptor contents 3354 * 3355 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 3356 * size of the descriptor. 3357 */ 3358 int 3359 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 3360 unsigned char *data) 3361 { 3362 if (qnum >= 6) 3363 return (EINVAL); 3364 3365 if (qnum < 3) { 3366 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 3367 return -EINVAL; 3368 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 3369 return sizeof(struct tx_desc); 3370 } 3371 3372 if (qnum == 3) { 3373 if (!qs->rspq.desc || idx >= qs->rspq.size) 3374 return (EINVAL); 3375 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 3376 return sizeof(struct rsp_desc); 3377 } 3378 3379 qnum -= 4; 3380 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 3381 return (EINVAL); 3382 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 3383 return sizeof(struct rx_desc); 3384 } 3385