1 /************************************************************************** 2 3 Copyright (c) 2007, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Chelsio Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ***************************************************************************/ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/module.h> 41 #include <sys/bus.h> 42 #include <sys/conf.h> 43 #include <machine/bus.h> 44 #include <machine/resource.h> 45 #include <sys/bus_dma.h> 46 #include <sys/rman.h> 47 #include <sys/queue.h> 48 #include <sys/sysctl.h> 49 #include <sys/taskqueue.h> 50 51 52 #include <sys/proc.h> 53 #include <sys/sched.h> 54 #include <sys/smp.h> 55 #include <sys/systm.h> 56 57 #include <netinet/in_systm.h> 58 #include <netinet/in.h> 59 #include <netinet/ip.h> 60 #include <netinet/tcp.h> 61 62 #include <dev/pci/pcireg.h> 63 #include <dev/pci/pcivar.h> 64 #include <dev/cxgb/common/cxgb_common.h> 65 #include <dev/cxgb/common/cxgb_regs.h> 66 #include <dev/cxgb/common/cxgb_sge_defs.h> 67 #include <dev/cxgb/common/cxgb_t3_cpl.h> 68 #include <dev/cxgb/common/cxgb_firmware_exports.h> 69 70 #include <dev/cxgb/sys/mvec.h> 71 72 uint32_t collapse_free = 0; 73 uint32_t mb_free_vec_free = 0; 74 int collapse_mbufs = 1; 75 76 #define USE_GTS 0 77 78 #define SGE_RX_SM_BUF_SIZE 1536 79 #define SGE_RX_DROP_THRES 16 80 81 /* 82 * Period of the Tx buffer reclaim timer. This timer does not need to run 83 * frequently as Tx buffers are usually reclaimed by new Tx packets. 84 */ 85 #define TX_RECLAIM_PERIOD (hz >> 2) 86 87 /* 88 * work request size in bytes 89 */ 90 #define WR_LEN (WR_FLITS * 8) 91 92 /* 93 * Values for sge_txq.flags 94 */ 95 enum { 96 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 97 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 98 }; 99 100 struct tx_desc { 101 uint64_t flit[TX_DESC_FLITS]; 102 } __packed; 103 104 struct rx_desc { 105 uint32_t addr_lo; 106 uint32_t len_gen; 107 uint32_t gen2; 108 uint32_t addr_hi; 109 } __packed;; 110 111 struct rsp_desc { /* response queue descriptor */ 112 struct rss_header rss_hdr; 113 uint32_t flags; 114 uint32_t len_cq; 115 uint8_t imm_data[47]; 116 uint8_t intr_gen; 117 } __packed; 118 119 #define RX_SW_DESC_MAP_CREATED (1 << 0) 120 #define TX_SW_DESC_MAP_CREATED (1 << 1) 121 #define RX_SW_DESC_INUSE (1 << 3) 122 #define TX_SW_DESC_MAPPED (1 << 4) 123 124 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 125 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 126 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 127 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 128 129 struct tx_sw_desc { /* SW state per Tx descriptor */ 130 struct mbuf *m; 131 bus_dmamap_t map; 132 int flags; 133 }; 134 135 struct rx_sw_desc { /* SW state per Rx descriptor */ 136 void *cl; 137 bus_dmamap_t map; 138 int flags; 139 }; 140 141 struct txq_state { 142 unsigned int compl; 143 unsigned int gen; 144 unsigned int pidx; 145 }; 146 147 struct refill_fl_cb_arg { 148 int error; 149 bus_dma_segment_t seg; 150 int nseg; 151 }; 152 153 /* 154 * Maps a number of flits to the number of Tx descriptors that can hold them. 155 * The formula is 156 * 157 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 158 * 159 * HW allows up to 4 descriptors to be combined into a WR. 160 */ 161 static uint8_t flit_desc_map[] = { 162 0, 163 #if SGE_NUM_GENBITS == 1 164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 165 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 166 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 167 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 168 #elif SGE_NUM_GENBITS == 2 169 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 170 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 171 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 172 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 173 #else 174 # error "SGE_NUM_GENBITS must be 1 or 2" 175 #endif 176 }; 177 178 179 static int lro_default = 0; 180 int cxgb_debug = 0; 181 182 static void t3_free_qset(adapter_t *sc, struct sge_qset *q); 183 static void sge_timer_cb(void *arg); 184 static void sge_timer_reclaim(void *arg, int ncount); 185 static int free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec); 186 187 /** 188 * reclaim_completed_tx - reclaims completed Tx descriptors 189 * @adapter: the adapter 190 * @q: the Tx queue to reclaim completed descriptors from 191 * 192 * Reclaims Tx descriptors that the SGE has indicated it has processed, 193 * and frees the associated buffers if possible. Called with the Tx 194 * queue's lock held. 195 */ 196 static __inline int 197 reclaim_completed_tx(adapter_t *adapter, struct sge_txq *q, int nbufs, struct mbuf **mvec) 198 { 199 int reclaimed, reclaim = desc_reclaimable(q); 200 int n = 0; 201 202 mtx_assert(&q->lock, MA_OWNED); 203 204 if (reclaim > 0) { 205 n = free_tx_desc(adapter, q, min(reclaim, nbufs), mvec); 206 reclaimed = min(reclaim, nbufs); 207 q->cleaned += reclaimed; 208 q->in_use -= reclaimed; 209 } 210 211 return (n); 212 } 213 214 /** 215 * t3_sge_init - initialize SGE 216 * @adap: the adapter 217 * @p: the SGE parameters 218 * 219 * Performs SGE initialization needed every time after a chip reset. 220 * We do not initialize any of the queue sets here, instead the driver 221 * top-level must request those individually. We also do not enable DMA 222 * here, that should be done after the queues have been set up. 223 */ 224 void 225 t3_sge_init(adapter_t *adap, struct sge_params *p) 226 { 227 u_int ctrl, ups; 228 229 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 230 231 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 232 F_CQCRDTCTRL | 233 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 234 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 235 #if SGE_NUM_GENBITS == 1 236 ctrl |= F_EGRGENCTRL; 237 #endif 238 if (adap->params.rev > 0) { 239 if (!(adap->flags & (USING_MSIX | USING_MSI))) 240 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 241 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL; 242 } 243 t3_write_reg(adap, A_SG_CONTROL, ctrl); 244 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 245 V_LORCQDRBTHRSH(512)); 246 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 247 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 248 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 249 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000); 250 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 251 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 252 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 253 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 254 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 255 } 256 257 258 /** 259 * sgl_len - calculates the size of an SGL of the given capacity 260 * @n: the number of SGL entries 261 * 262 * Calculates the number of flits needed for a scatter/gather list that 263 * can hold the given number of entries. 264 */ 265 static __inline unsigned int 266 sgl_len(unsigned int n) 267 { 268 return ((3 * n) / 2 + (n & 1)); 269 } 270 271 /** 272 * get_imm_packet - return the next ingress packet buffer from a response 273 * @resp: the response descriptor containing the packet data 274 * 275 * Return a packet containing the immediate data of the given response. 276 */ 277 static __inline void 278 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl) 279 { 280 int len; 281 uint32_t flags = ntohl(resp->flags); 282 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 283 284 /* 285 * would be a firmware bug 286 */ 287 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) 288 return; 289 290 len = G_RSPD_LEN(ntohl(resp->len_cq)); 291 switch (sopeop) { 292 case RSPQ_SOP_EOP: 293 m->m_len = m->m_pkthdr.len = len; 294 memcpy(m->m_data, resp->imm_data, len); 295 break; 296 case RSPQ_EOP: 297 memcpy(cl, resp->imm_data, len); 298 m_iovappend(m, cl, MSIZE, len, 0); 299 break; 300 } 301 } 302 303 304 static __inline u_int 305 flits_to_desc(u_int n) 306 { 307 return (flit_desc_map[n]); 308 } 309 310 void 311 t3_sge_err_intr_handler(adapter_t *adapter) 312 { 313 unsigned int v, status; 314 315 316 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 317 318 if (status & F_RSPQCREDITOVERFOW) 319 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 320 321 if (status & F_RSPQDISABLED) { 322 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 323 324 CH_ALERT(adapter, 325 "packet delivered to disabled response queue (0x%x)\n", 326 (v >> S_RSPQ0DISABLED) & 0xff); 327 } 328 329 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 330 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED)) 331 t3_fatal_err(adapter); 332 } 333 334 void 335 t3_sge_prep(adapter_t *adap, struct sge_params *p) 336 { 337 int i; 338 339 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 340 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data); 341 342 for (i = 0; i < SGE_QSETS; ++i) { 343 struct qset_params *q = p->qset + i; 344 345 q->polling = adap->params.rev > 0; 346 347 if (adap->flags & USING_MSIX) 348 q->coalesce_nsecs = 6000; 349 else 350 q->coalesce_nsecs = 3500; 351 352 q->rspq_size = RSPQ_Q_SIZE; 353 q->fl_size = FL_Q_SIZE; 354 q->jumbo_size = JUMBO_Q_SIZE; 355 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 356 q->txq_size[TXQ_OFLD] = 1024; 357 q->txq_size[TXQ_CTRL] = 256; 358 q->cong_thres = 0; 359 } 360 } 361 362 int 363 t3_sge_alloc(adapter_t *sc) 364 { 365 366 /* The parent tag. */ 367 if (bus_dma_tag_create( NULL, /* parent */ 368 1, 0, /* algnmnt, boundary */ 369 BUS_SPACE_MAXADDR, /* lowaddr */ 370 BUS_SPACE_MAXADDR, /* highaddr */ 371 NULL, NULL, /* filter, filterarg */ 372 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 373 BUS_SPACE_UNRESTRICTED, /* nsegments */ 374 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 375 0, /* flags */ 376 NULL, NULL, /* lock, lockarg */ 377 &sc->parent_dmat)) { 378 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 379 return (ENOMEM); 380 } 381 382 /* 383 * DMA tag for normal sized RX frames 384 */ 385 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 386 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 387 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 388 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 389 return (ENOMEM); 390 } 391 392 /* 393 * DMA tag for jumbo sized RX frames. 394 */ 395 if (bus_dma_tag_create(sc->parent_dmat, MJUMPAGESIZE, 0, BUS_SPACE_MAXADDR, 396 BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE, 397 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 398 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 399 return (ENOMEM); 400 } 401 402 /* 403 * DMA tag for TX frames. 404 */ 405 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 406 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 407 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 408 NULL, NULL, &sc->tx_dmat)) { 409 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 410 return (ENOMEM); 411 } 412 413 return (0); 414 } 415 416 int 417 t3_sge_free(struct adapter * sc) 418 { 419 420 if (sc->tx_dmat != NULL) 421 bus_dma_tag_destroy(sc->tx_dmat); 422 423 if (sc->rx_jumbo_dmat != NULL) 424 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 425 426 if (sc->rx_dmat != NULL) 427 bus_dma_tag_destroy(sc->rx_dmat); 428 429 if (sc->parent_dmat != NULL) 430 bus_dma_tag_destroy(sc->parent_dmat); 431 432 return (0); 433 } 434 435 void 436 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 437 { 438 439 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U); 440 qs->rspq.polling = 0 /* p->polling */; 441 } 442 443 static void 444 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 445 { 446 struct refill_fl_cb_arg *cb_arg = arg; 447 448 cb_arg->error = error; 449 cb_arg->seg = segs[0]; 450 cb_arg->nseg = nseg; 451 452 } 453 454 /** 455 * refill_fl - refill an SGE free-buffer list 456 * @sc: the controller softc 457 * @q: the free-list to refill 458 * @n: the number of new buffers to allocate 459 * 460 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 461 * The caller must assure that @n does not exceed the queue's capacity. 462 */ 463 static void 464 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 465 { 466 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 467 struct rx_desc *d = &q->desc[q->pidx]; 468 struct refill_fl_cb_arg cb_arg; 469 void *cl; 470 int err; 471 472 cb_arg.error = 0; 473 while (n--) { 474 /* 475 * We only allocate a cluster, mbuf allocation happens after rx 476 */ 477 if ((cl = m_cljget(NULL, M_DONTWAIT, q->buf_size)) == NULL) { 478 log(LOG_WARNING, "Failed to allocate cluster\n"); 479 goto done; 480 } 481 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 482 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 483 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 484 /* 485 * XXX free cluster 486 */ 487 goto done; 488 } 489 sd->flags |= RX_SW_DESC_MAP_CREATED; 490 } 491 err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size, 492 refill_fl_cb, &cb_arg, 0); 493 494 if (err != 0 || cb_arg.error) { 495 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error); 496 /* 497 * XXX free cluster 498 */ 499 return; 500 } 501 502 sd->flags |= RX_SW_DESC_INUSE; 503 sd->cl = cl; 504 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 505 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 506 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 507 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 508 509 d++; 510 sd++; 511 512 if (++q->pidx == q->size) { 513 q->pidx = 0; 514 q->gen ^= 1; 515 sd = q->sdesc; 516 d = q->desc; 517 } 518 q->credits++; 519 } 520 521 done: 522 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 523 } 524 525 526 /** 527 * free_rx_bufs - free the Rx buffers on an SGE free list 528 * @sc: the controle softc 529 * @q: the SGE free list to clean up 530 * 531 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 532 * this queue should be stopped before calling this function. 533 */ 534 static void 535 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 536 { 537 u_int cidx = q->cidx; 538 539 while (q->credits--) { 540 struct rx_sw_desc *d = &q->sdesc[cidx]; 541 542 if (d->flags & RX_SW_DESC_INUSE) { 543 bus_dmamap_unload(q->entry_tag, d->map); 544 bus_dmamap_destroy(q->entry_tag, d->map); 545 uma_zfree(q->zone, d->cl); 546 } 547 d->cl = NULL; 548 if (++cidx == q->size) 549 cidx = 0; 550 } 551 } 552 553 static __inline void 554 __refill_fl(adapter_t *adap, struct sge_fl *fl) 555 { 556 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 557 } 558 559 static void 560 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 561 { 562 uint32_t *addr; 563 564 addr = arg; 565 *addr = segs[0].ds_addr; 566 } 567 568 static int 569 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 570 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 571 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 572 { 573 size_t len = nelem * elem_size; 574 void *s = NULL; 575 void *p = NULL; 576 int err; 577 578 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 579 BUS_SPACE_MAXADDR_32BIT, 580 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 581 len, 0, NULL, NULL, tag)) != 0) { 582 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 583 return (ENOMEM); 584 } 585 586 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 587 map)) != 0) { 588 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 589 return (ENOMEM); 590 } 591 592 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 593 bzero(p, len); 594 *(void **)desc = p; 595 596 if (sw_size) { 597 len = nelem * sw_size; 598 s = malloc(len, M_DEVBUF, M_WAITOK); 599 bzero(s, len); 600 *(void **)sdesc = s; 601 } 602 if (parent_entry_tag == NULL) 603 return (0); 604 605 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 606 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 607 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 608 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 609 NULL, NULL, entry_tag)) != 0) { 610 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 611 return (ENOMEM); 612 } 613 return (0); 614 } 615 616 static void 617 sge_slow_intr_handler(void *arg, int ncount) 618 { 619 adapter_t *sc = arg; 620 621 t3_slow_intr_handler(sc); 622 } 623 624 static void 625 sge_timer_cb(void *arg) 626 { 627 adapter_t *sc = arg; 628 struct sge_qset *qs; 629 struct sge_txq *txq; 630 int i, j; 631 int reclaim_eth, reclaim_ofl, refill_rx; 632 633 for (i = 0; i < sc->params.nports; i++) 634 for (j = 0; j < sc->port[i].nqsets; j++) { 635 qs = &sc->sge.qs[i + j]; 636 txq = &qs->txq[0]; 637 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned; 638 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 639 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 640 (qs->fl[1].credits < qs->fl[1].size)); 641 if (reclaim_eth || reclaim_ofl || refill_rx) { 642 taskqueue_enqueue(sc->tq, &sc->timer_reclaim_task); 643 goto done; 644 } 645 } 646 done: 647 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 648 } 649 650 /* 651 * This is meant to be a catch-all function to keep sge state private 652 * to sge.c 653 * 654 */ 655 int 656 t3_sge_init_sw(adapter_t *sc) 657 { 658 659 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 660 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 661 TASK_INIT(&sc->timer_reclaim_task, 0, sge_timer_reclaim, sc); 662 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 663 return (0); 664 } 665 666 void 667 t3_sge_deinit_sw(adapter_t *sc) 668 { 669 callout_drain(&sc->sge_timer_ch); 670 if (sc->tq) { 671 taskqueue_drain(sc->tq, &sc->timer_reclaim_task); 672 taskqueue_drain(sc->tq, &sc->slow_intr_task); 673 } 674 } 675 676 /** 677 * refill_rspq - replenish an SGE response queue 678 * @adapter: the adapter 679 * @q: the response queue to replenish 680 * @credits: how many new responses to make available 681 * 682 * Replenishes a response queue by making the supplied number of responses 683 * available to HW. 684 */ 685 static __inline void 686 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 687 { 688 689 /* mbufs are allocated on demand when a rspq entry is processed. */ 690 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 691 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 692 } 693 694 695 static void 696 sge_timer_reclaim(void *arg, int ncount) 697 { 698 adapter_t *sc = arg; 699 int i, nqsets = 0; 700 struct sge_qset *qs; 701 struct sge_txq *txq; 702 struct mtx *lock; 703 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 704 int n, reclaimable; 705 /* 706 * XXX assuming these quantities are allowed to change during operation 707 */ 708 for (i = 0; i < sc->params.nports; i++) 709 nqsets += sc->port[i].nqsets; 710 711 for (i = 0; i < nqsets; i++) { 712 qs = &sc->sge.qs[i]; 713 txq = &qs->txq[TXQ_ETH]; 714 reclaimable = desc_reclaimable(txq); 715 if (reclaimable > 0) { 716 mtx_lock(&txq->lock); 717 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); 718 mtx_unlock(&txq->lock); 719 720 for (i = 0; i < n; i++) { 721 m_freem_vec(m_vec[i]); 722 } 723 } 724 725 txq = &qs->txq[TXQ_OFLD]; 726 reclaimable = desc_reclaimable(txq); 727 if (reclaimable > 0) { 728 mtx_lock(&txq->lock); 729 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); 730 mtx_unlock(&txq->lock); 731 732 for (i = 0; i < n; i++) { 733 m_freem_vec(m_vec[i]); 734 } 735 } 736 737 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 738 &sc->sge.qs[0].rspq.lock; 739 740 if (mtx_trylock(lock)) { 741 /* XXX currently assume that we are *NOT* polling */ 742 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 743 744 if (qs->fl[0].credits < qs->fl[0].size - 16) 745 __refill_fl(sc, &qs->fl[0]); 746 if (qs->fl[1].credits < qs->fl[1].size - 16) 747 __refill_fl(sc, &qs->fl[1]); 748 749 if (status & (1 << qs->rspq.cntxt_id)) { 750 if (qs->rspq.credits) { 751 refill_rspq(sc, &qs->rspq, 1); 752 qs->rspq.credits--; 753 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 754 1 << qs->rspq.cntxt_id); 755 } 756 } 757 mtx_unlock(lock); 758 } 759 } 760 } 761 762 /** 763 * init_qset_cntxt - initialize an SGE queue set context info 764 * @qs: the queue set 765 * @id: the queue set id 766 * 767 * Initializes the TIDs and context ids for the queues of a queue set. 768 */ 769 static void 770 init_qset_cntxt(struct sge_qset *qs, u_int id) 771 { 772 773 qs->rspq.cntxt_id = id; 774 qs->fl[0].cntxt_id = 2 * id; 775 qs->fl[1].cntxt_id = 2 * id + 1; 776 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 777 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 778 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 779 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 780 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 781 } 782 783 784 static void 785 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 786 { 787 txq->in_use += ndesc; 788 /* 789 * XXX we don't handle stopping of queue 790 * presumably start handles this when we bump against the end 791 */ 792 txqs->gen = txq->gen; 793 txq->unacked += ndesc; 794 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3); 795 txq->unacked &= 7; 796 txqs->pidx = txq->pidx; 797 txq->pidx += ndesc; 798 799 if (txq->pidx >= txq->size) { 800 txq->pidx -= txq->size; 801 txq->gen ^= 1; 802 } 803 804 } 805 806 /** 807 * calc_tx_descs - calculate the number of Tx descriptors for a packet 808 * @m: the packet mbufs 809 * @nsegs: the number of segments 810 * 811 * Returns the number of Tx descriptors needed for the given Ethernet 812 * packet. Ethernet packets require addition of WR and CPL headers. 813 */ 814 static __inline unsigned int 815 calc_tx_descs(const struct mbuf *m, int nsegs) 816 { 817 unsigned int flits; 818 819 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) 820 return 1; 821 822 flits = sgl_len(nsegs) + 2; 823 #ifdef TSO_SUPPORTED 824 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) 825 flits++; 826 #endif 827 return flits_to_desc(flits); 828 } 829 830 static unsigned int 831 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 832 struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs) 833 { 834 struct mbuf *m0; 835 int err, pktlen; 836 837 m0 = *m; 838 pktlen = m0->m_pkthdr.len; 839 840 err = bus_dmamap_load_mvec_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); 841 #ifdef DEBUG 842 if (err) { 843 int n = 0; 844 struct mbuf *mtmp = m0; 845 while(mtmp) { 846 n++; 847 mtmp = mtmp->m_next; 848 } 849 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n", 850 err, m0->m_pkthdr.len, n); 851 } 852 #endif 853 if (err == EFBIG) { 854 /* Too many segments, try to defrag */ 855 m0 = m_defrag(m0, M_NOWAIT); 856 if (m0 == NULL) { 857 m_freem(*m); 858 *m = NULL; 859 return (ENOBUFS); 860 } 861 *m = m0; 862 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); 863 } 864 865 if (err == ENOMEM) { 866 return (err); 867 } 868 869 if (err) { 870 if (cxgb_debug) 871 printf("map failure err=%d pktlen=%d\n", err, pktlen); 872 m_freem_vec(m0); 873 *m = NULL; 874 return (err); 875 } 876 877 bus_dmamap_sync(txq->entry_tag, stx->map, BUS_DMASYNC_PREWRITE); 878 stx->flags |= TX_SW_DESC_MAPPED; 879 880 return (0); 881 } 882 883 /** 884 * make_sgl - populate a scatter/gather list for a packet 885 * @sgp: the SGL to populate 886 * @segs: the packet dma segments 887 * @nsegs: the number of segments 888 * 889 * Generates a scatter/gather list for the buffers that make up a packet 890 * and returns the SGL size in 8-byte words. The caller must size the SGL 891 * appropriately. 892 */ 893 static __inline void 894 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 895 { 896 int i, idx; 897 898 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) { 899 if (i && idx == 0) 900 ++sgp; 901 902 sgp->len[idx] = htobe32(segs[i].ds_len); 903 sgp->addr[idx] = htobe64(segs[i].ds_addr); 904 } 905 906 if (idx) 907 sgp->len[idx] = 0; 908 } 909 910 /** 911 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 912 * @adap: the adapter 913 * @q: the Tx queue 914 * 915 * Ring the doorbel if a Tx queue is asleep. There is a natural race, 916 * where the HW is going to sleep just after we checked, however, 917 * then the interrupt handler will detect the outstanding TX packet 918 * and ring the doorbell for us. 919 * 920 * When GTS is disabled we unconditionally ring the doorbell. 921 */ 922 static __inline void 923 check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 924 { 925 #if USE_GTS 926 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 927 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 928 set_bit(TXQ_LAST_PKT_DB, &q->flags); 929 #ifdef T3_TRACE 930 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 931 q->cntxt_id); 932 #endif 933 t3_write_reg(adap, A_SG_KDOORBELL, 934 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 935 } 936 #else 937 wmb(); /* write descriptors before telling HW */ 938 t3_write_reg(adap, A_SG_KDOORBELL, 939 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 940 #endif 941 } 942 943 static __inline void 944 wr_gen2(struct tx_desc *d, unsigned int gen) 945 { 946 #if SGE_NUM_GENBITS == 2 947 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 948 #endif 949 } 950 951 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 952 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 953 954 int 955 t3_encap(struct port_info *p, struct mbuf **m) 956 { 957 adapter_t *sc; 958 struct mbuf *m0; 959 struct sge_qset *qs; 960 struct sge_txq *txq; 961 struct tx_sw_desc *stx; 962 struct txq_state txqs; 963 unsigned int nsegs, ndesc, flits, cntrl, mlen; 964 int err, tso_info = 0; 965 966 struct work_request_hdr *wrp; 967 struct tx_sw_desc *txsd; 968 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 969 bus_dma_segment_t segs[TX_MAX_SEGS]; 970 uint32_t wr_hi, wr_lo, sgl_flits; 971 972 struct tx_desc *txd; 973 struct cpl_tx_pkt *cpl; 974 975 DPRINTF("t3_encap "); 976 m0 = *m; 977 sc = p->adapter; 978 qs = &sc->sge.qs[p->first_qset]; 979 txq = &qs->txq[TXQ_ETH]; 980 stx = &txq->sdesc[txq->pidx]; 981 txd = &txq->desc[txq->pidx]; 982 cpl = (struct cpl_tx_pkt *)txd; 983 mlen = m0->m_pkthdr.len; 984 cpl->len = htonl(mlen | 0x80000000); 985 986 DPRINTF("mlen=%d\n", mlen); 987 /* 988 * XXX handle checksum, TSO, and VLAN here 989 * 990 */ 991 cntrl = V_TXPKT_INTF(p->port); 992 993 /* 994 * XXX need to add VLAN support for 6.x 995 */ 996 #ifdef VLAN_SUPPORTED 997 if (m0->m_flags & M_VLANTAG) 998 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 999 if (m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1000 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1001 #endif 1002 if (tso_info) { 1003 int eth_type; 1004 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl; 1005 struct ip *ip; 1006 struct tcphdr *tcp; 1007 uint8_t *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */ 1008 1009 txd->flit[2] = 0; 1010 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1011 hdr->cntrl = htonl(cntrl); 1012 1013 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1014 pkthdr = &tmp[0]; 1015 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr); 1016 } else { 1017 pkthdr = m0->m_data; 1018 } 1019 1020 if (__predict_false(m0->m_flags & M_VLANTAG)) { 1021 eth_type = CPL_ETH_II_VLAN; 1022 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1023 ETHER_VLAN_ENCAP_LEN); 1024 } else { 1025 eth_type = CPL_ETH_II; 1026 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1027 } 1028 tcp = (struct tcphdr *)((uint8_t *)ip + 1029 sizeof(*ip)); 1030 1031 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1032 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1033 V_LSO_TCPHDR_WORDS(tcp->th_off); 1034 hdr->lso_info = htonl(tso_info); 1035 flits = 3; 1036 } else { 1037 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1038 cpl->cntrl = htonl(cntrl); 1039 1040 if (mlen <= WR_LEN - sizeof(*cpl)) { 1041 txq_prod(txq, 1, &txqs); 1042 txq->sdesc[txqs.pidx].m = m0; 1043 1044 if (m0->m_len == m0->m_pkthdr.len) 1045 memcpy(&txd->flit[2], m0->m_data, mlen); 1046 else 1047 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1048 1049 flits = (mlen + 7) / 8 + 2; 1050 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1051 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1052 F_WR_SOP | F_WR_EOP | txqs.compl); 1053 wmb(); 1054 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | 1055 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1056 1057 wr_gen2(txd, txqs.gen); 1058 check_ring_tx_db(sc, txq); 1059 return (0); 1060 } 1061 flits = 2; 1062 } 1063 1064 wrp = (struct work_request_hdr *)txd; 1065 1066 if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) { 1067 return (err); 1068 } 1069 m0 = *m; 1070 ndesc = calc_tx_descs(m0, nsegs); 1071 1072 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : &sgl[0]; 1073 make_sgl(sgp, segs, nsegs); 1074 1075 sgl_flits = sgl_len(nsegs); 1076 1077 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); 1078 txq_prod(txq, ndesc, &txqs); 1079 txsd = &txq->sdesc[txqs.pidx]; 1080 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1081 wr_lo = htonl(V_WR_TID(txq->token)); 1082 txsd->m = m0; 1083 1084 if (__predict_true(ndesc == 1)) { 1085 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1086 V_WR_SGLSFLT(flits)) | wr_hi; 1087 wmb(); 1088 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | 1089 V_WR_GEN(txqs.gen)) | wr_lo; 1090 /* XXX gen? */ 1091 wr_gen2(txd, txqs.gen); 1092 } else { 1093 unsigned int ogen = txqs.gen; 1094 const uint64_t *fp = (const uint64_t *)sgl; 1095 struct work_request_hdr *wp = wrp; 1096 1097 /* XXX - CHECK ME */ 1098 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1099 V_WR_SGLSFLT(flits)) | wr_hi; 1100 1101 while (sgl_flits) { 1102 unsigned int avail = WR_FLITS - flits; 1103 1104 if (avail > sgl_flits) 1105 avail = sgl_flits; 1106 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1107 sgl_flits -= avail; 1108 ndesc--; 1109 if (!sgl_flits) 1110 break; 1111 1112 fp += avail; 1113 txd++; 1114 txsd++; 1115 if (++txqs.pidx == txq->size) { 1116 txqs.pidx = 0; 1117 txqs.gen ^= 1; 1118 txd = txq->desc; 1119 txsd = txq->sdesc; 1120 } 1121 1122 /* 1123 * when the head of the mbuf chain 1124 * is freed all clusters will be freed 1125 * with it 1126 */ 1127 txsd->m = NULL; 1128 wrp = (struct work_request_hdr *)txd; 1129 wrp->wr_hi = htonl(V_WR_DATATYPE(1) | 1130 V_WR_SGLSFLT(1)) | wr_hi; 1131 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, 1132 sgl_flits + 1)) | 1133 V_WR_GEN(txqs.gen)) | wr_lo; 1134 wr_gen2(txd, txqs.gen); 1135 flits = 1; 1136 } 1137 #ifdef WHY 1138 skb->priority = pidx; 1139 #endif 1140 wrp->wr_hi |= htonl(F_WR_EOP); 1141 wmb(); 1142 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1143 wr_gen2((struct tx_desc *)wp, ogen); 1144 } 1145 check_ring_tx_db(p->adapter, txq); 1146 1147 return (0); 1148 } 1149 1150 1151 /** 1152 * write_imm - write a packet into a Tx descriptor as immediate data 1153 * @d: the Tx descriptor to write 1154 * @m: the packet 1155 * @len: the length of packet data to write as immediate data 1156 * @gen: the generation bit value to write 1157 * 1158 * Writes a packet as immediate data into a Tx descriptor. The packet 1159 * contains a work request at its beginning. We must write the packet 1160 * carefully so the SGE doesn't read accidentally before it's written in 1161 * its entirety. 1162 */ 1163 static __inline void write_imm(struct tx_desc *d, struct mbuf *m, 1164 unsigned int len, unsigned int gen) 1165 { 1166 struct work_request_hdr *from = (struct work_request_hdr *)m->m_data; 1167 struct work_request_hdr *to = (struct work_request_hdr *)d; 1168 1169 memcpy(&to[1], &from[1], len - sizeof(*from)); 1170 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | 1171 V_WR_BCNTLFLT(len & 7)); 1172 wmb(); 1173 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | 1174 V_WR_LEN((len + 7) / 8)); 1175 wr_gen2(d, gen); 1176 m_freem(m); 1177 } 1178 1179 /** 1180 * check_desc_avail - check descriptor availability on a send queue 1181 * @adap: the adapter 1182 * @q: the TX queue 1183 * @m: the packet needing the descriptors 1184 * @ndesc: the number of Tx descriptors needed 1185 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1186 * 1187 * Checks if the requested number of Tx descriptors is available on an 1188 * SGE send queue. If the queue is already suspended or not enough 1189 * descriptors are available the packet is queued for later transmission. 1190 * Must be called with the Tx queue locked. 1191 * 1192 * Returns 0 if enough descriptors are available, 1 if there aren't 1193 * enough descriptors and the packet has been queued, and 2 if the caller 1194 * needs to retry because there weren't enough descriptors at the 1195 * beginning of the call but some freed up in the mean time. 1196 */ 1197 static __inline int 1198 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1199 struct mbuf *m, unsigned int ndesc, 1200 unsigned int qid) 1201 { 1202 /* 1203 * XXX We currently only use this for checking the control queue 1204 * the control queue is only used for binding qsets which happens 1205 * at init time so we are guaranteed enough descriptors 1206 */ 1207 #if 0 1208 if (__predict_false(!skb_queue_empty(&q->sendq))) { 1209 addq_exit: __skb_queue_tail(&q->sendq, skb); 1210 return 1; 1211 } 1212 if (__predict_false(q->size - q->in_use < ndesc)) { 1213 1214 struct sge_qset *qs = txq_to_qset(q, qid); 1215 1216 set_bit(qid, &qs->txq_stopped); 1217 smp_mb__after_clear_bit(); 1218 1219 if (should_restart_tx(q) && 1220 test_and_clear_bit(qid, &qs->txq_stopped)) 1221 return 2; 1222 1223 q->stops++; 1224 goto addq_exit; 1225 } 1226 #endif 1227 return 0; 1228 } 1229 1230 1231 /** 1232 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1233 * @q: the SGE control Tx queue 1234 * 1235 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1236 * that send only immediate data (presently just the control queues) and 1237 * thus do not have any sk_buffs to release. 1238 */ 1239 static __inline void 1240 reclaim_completed_tx_imm(struct sge_txq *q) 1241 { 1242 unsigned int reclaim = q->processed - q->cleaned; 1243 1244 mtx_assert(&q->lock, MA_OWNED); 1245 1246 q->in_use -= reclaim; 1247 q->cleaned += reclaim; 1248 } 1249 1250 static __inline int 1251 immediate(const struct mbuf *m) 1252 { 1253 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1254 } 1255 1256 /** 1257 * ctrl_xmit - send a packet through an SGE control Tx queue 1258 * @adap: the adapter 1259 * @q: the control queue 1260 * @m: the packet 1261 * 1262 * Send a packet through an SGE control Tx queue. Packets sent through 1263 * a control queue must fit entirely as immediate data in a single Tx 1264 * descriptor and have no page fragments. 1265 */ 1266 static int 1267 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1268 { 1269 int ret; 1270 struct work_request_hdr *wrp = (struct work_request_hdr *)m->m_data; 1271 1272 if (__predict_false(!immediate(m))) { 1273 m_freem(m); 1274 return 0; 1275 } 1276 1277 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); 1278 wrp->wr_lo = htonl(V_WR_TID(q->token)); 1279 1280 mtx_lock(&q->lock); 1281 again: reclaim_completed_tx_imm(q); 1282 1283 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1284 if (__predict_false(ret)) { 1285 if (ret == 1) { 1286 mtx_unlock(&q->lock); 1287 return (-1); 1288 } 1289 goto again; 1290 } 1291 1292 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1293 1294 q->in_use++; 1295 if (++q->pidx >= q->size) { 1296 q->pidx = 0; 1297 q->gen ^= 1; 1298 } 1299 mtx_unlock(&q->lock); 1300 wmb(); 1301 t3_write_reg(adap, A_SG_KDOORBELL, 1302 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1303 return (0); 1304 } 1305 1306 #ifdef RESTART_CTRLQ 1307 /** 1308 * restart_ctrlq - restart a suspended control queue 1309 * @qs: the queue set cotaining the control queue 1310 * 1311 * Resumes transmission on a suspended Tx control queue. 1312 */ 1313 static void 1314 restart_ctrlq(unsigned long data) 1315 { 1316 struct mbuf *m; 1317 struct sge_qset *qs = (struct sge_qset *)data; 1318 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1319 adapter_t *adap = qs->port->adapter; 1320 1321 mtx_lock(&q->lock); 1322 again: reclaim_completed_tx_imm(q); 1323 1324 while (q->in_use < q->size && 1325 (skb = __skb_dequeue(&q->sendq)) != NULL) { 1326 1327 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen); 1328 1329 if (++q->pidx >= q->size) { 1330 q->pidx = 0; 1331 q->gen ^= 1; 1332 } 1333 q->in_use++; 1334 } 1335 if (!skb_queue_empty(&q->sendq)) { 1336 set_bit(TXQ_CTRL, &qs->txq_stopped); 1337 smp_mb__after_clear_bit(); 1338 1339 if (should_restart_tx(q) && 1340 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1341 goto again; 1342 q->stops++; 1343 } 1344 1345 mtx_unlock(&q->lock); 1346 t3_write_reg(adap, A_SG_KDOORBELL, 1347 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1348 } 1349 #endif 1350 1351 /* 1352 * Send a management message through control queue 0 1353 */ 1354 int 1355 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1356 { 1357 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); 1358 } 1359 1360 /** 1361 * t3_sge_alloc_qset - initialize an SGE queue set 1362 * @sc: the controller softc 1363 * @id: the queue set id 1364 * @nports: how many Ethernet ports will be using this queue set 1365 * @irq_vec_idx: the IRQ vector index for response queue interrupts 1366 * @p: configuration parameters for this queue set 1367 * @ntxq: number of Tx queues for the queue set 1368 * @pi: port info for queue set 1369 * 1370 * Allocate resources and initialize an SGE queue set. A queue set 1371 * comprises a response queue, two Rx free-buffer queues, and up to 3 1372 * Tx queues. The Tx queues are assigned roles in the order Ethernet 1373 * queue, offload queue, and control queue. 1374 */ 1375 int 1376 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 1377 const struct qset_params *p, int ntxq, struct port_info *pi) 1378 { 1379 struct sge_qset *q = &sc->sge.qs[id]; 1380 int i, ret = 0; 1381 1382 init_qset_cntxt(q, id); 1383 1384 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 1385 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 1386 &q->fl[0].desc, &q->fl[0].sdesc, 1387 &q->fl[0].desc_tag, &q->fl[0].desc_map, 1388 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 1389 printf("error %d from alloc ring fl0\n", ret); 1390 goto err; 1391 } 1392 1393 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 1394 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 1395 &q->fl[1].desc, &q->fl[1].sdesc, 1396 &q->fl[1].desc_tag, &q->fl[1].desc_map, 1397 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 1398 printf("error %d from alloc ring fl1\n", ret); 1399 goto err; 1400 } 1401 1402 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 1403 &q->rspq.phys_addr, &q->rspq.desc, NULL, 1404 &q->rspq.desc_tag, &q->rspq.desc_map, 1405 NULL, NULL)) != 0) { 1406 printf("error %d from alloc ring rspq\n", ret); 1407 goto err; 1408 } 1409 1410 for (i = 0; i < ntxq; ++i) { 1411 /* 1412 * The control queue always uses immediate data so does not 1413 * need to keep track of any mbufs. 1414 * XXX Placeholder for future TOE support. 1415 */ 1416 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 1417 1418 if ((ret = alloc_ring(sc, p->txq_size[i], 1419 sizeof(struct tx_desc), sz, 1420 &q->txq[i].phys_addr, &q->txq[i].desc, 1421 &q->txq[i].sdesc, &q->txq[i].desc_tag, 1422 &q->txq[i].desc_map, 1423 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 1424 printf("error %d from alloc ring tx %i\n", ret, i); 1425 goto err; 1426 } 1427 q->txq[i].gen = 1; 1428 q->txq[i].size = p->txq_size[i]; 1429 mtx_init(&q->txq[i].lock, "t3 txq lock", NULL, MTX_DEF); 1430 } 1431 1432 q->fl[0].gen = q->fl[1].gen = 1; 1433 q->fl[0].size = p->fl_size; 1434 q->fl[1].size = p->jumbo_size; 1435 1436 q->rspq.gen = 1; 1437 q->rspq.size = p->rspq_size; 1438 mtx_init(&q->rspq.lock, "t3 rspq lock", NULL, MTX_DEF); 1439 1440 q->txq[TXQ_ETH].stop_thres = nports * 1441 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 1442 1443 q->fl[0].buf_size = MCLBYTES; 1444 q->fl[0].zone = zone_clust; 1445 q->fl[0].type = EXT_CLUSTER; 1446 q->fl[1].buf_size = MJUMPAGESIZE; 1447 q->fl[1].zone = zone_jumbop; 1448 q->fl[1].type = EXT_JUMBOP; 1449 1450 q->lro.enabled = lro_default; 1451 1452 mtx_lock(&sc->sge.reg_lock); 1453 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 1454 q->rspq.phys_addr, q->rspq.size, 1455 q->fl[0].buf_size, 1, 0); 1456 if (ret) { 1457 printf("error %d from t3_sge_init_rspcntxt\n", ret); 1458 goto err_unlock; 1459 } 1460 1461 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1462 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 1463 q->fl[i].phys_addr, q->fl[i].size, 1464 q->fl[i].buf_size, p->cong_thres, 1, 1465 0); 1466 if (ret) { 1467 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 1468 goto err_unlock; 1469 } 1470 } 1471 1472 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 1473 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 1474 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 1475 1, 0); 1476 if (ret) { 1477 printf("error %d from t3_sge_init_ecntxt\n", ret); 1478 goto err_unlock; 1479 } 1480 1481 if (ntxq > 1) { 1482 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 1483 USE_GTS, SGE_CNTXT_OFLD, id, 1484 q->txq[TXQ_OFLD].phys_addr, 1485 q->txq[TXQ_OFLD].size, 0, 1, 0); 1486 if (ret) { 1487 printf("error %d from t3_sge_init_ecntxt\n", ret); 1488 goto err_unlock; 1489 } 1490 } 1491 1492 if (ntxq > 2) { 1493 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 1494 SGE_CNTXT_CTRL, id, 1495 q->txq[TXQ_CTRL].phys_addr, 1496 q->txq[TXQ_CTRL].size, 1497 q->txq[TXQ_CTRL].token, 1, 0); 1498 if (ret) { 1499 printf("error %d from t3_sge_init_ecntxt\n", ret); 1500 goto err_unlock; 1501 } 1502 } 1503 1504 mtx_unlock(&sc->sge.reg_lock); 1505 t3_update_qset_coalesce(q, p); 1506 q->port = pi; 1507 1508 refill_fl(sc, &q->fl[0], q->fl[0].size); 1509 refill_fl(sc, &q->fl[1], q->fl[1].size); 1510 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 1511 1512 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 1513 V_NEWTIMER(q->rspq.holdoff_tmr)); 1514 1515 return (0); 1516 1517 err_unlock: 1518 mtx_unlock(&sc->sge.reg_lock); 1519 err: 1520 t3_free_qset(sc, q); 1521 1522 return (ret); 1523 } 1524 1525 1526 /** 1527 * free_qset - free the resources of an SGE queue set 1528 * @sc: the controller owning the queue set 1529 * @q: the queue set 1530 * 1531 * Release the HW and SW resources associated with an SGE queue set, such 1532 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1533 * queue set must be quiesced prior to calling this. 1534 */ 1535 static void 1536 t3_free_qset(adapter_t *sc, struct sge_qset *q) 1537 { 1538 int i; 1539 1540 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1541 if (q->fl[i].desc) { 1542 mtx_lock(&sc->sge.reg_lock); 1543 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 1544 mtx_unlock(&sc->sge.reg_lock); 1545 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 1546 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 1547 q->fl[i].desc_map); 1548 bus_dma_tag_destroy(q->fl[i].desc_tag); 1549 bus_dma_tag_destroy(q->fl[i].entry_tag); 1550 } 1551 if (q->fl[i].sdesc) { 1552 free_rx_bufs(sc, &q->fl[i]); 1553 free(q->fl[i].sdesc, M_DEVBUF); 1554 } 1555 } 1556 1557 for (i = 0; i < SGE_TXQ_PER_SET; ++i) { 1558 if (q->txq[i].desc) { 1559 mtx_lock(&sc->sge.reg_lock); 1560 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 1561 mtx_unlock(&sc->sge.reg_lock); 1562 bus_dmamap_unload(q->txq[i].desc_tag, 1563 q->txq[i].desc_map); 1564 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 1565 q->txq[i].desc_map); 1566 bus_dma_tag_destroy(q->txq[i].desc_tag); 1567 bus_dma_tag_destroy(q->txq[i].entry_tag); 1568 } 1569 if (q->txq[i].sdesc) { 1570 free(q->txq[i].sdesc, M_DEVBUF); 1571 } 1572 if (mtx_initialized(&q->txq[i].lock)) { 1573 mtx_destroy(&q->txq[i].lock); 1574 } 1575 } 1576 1577 if (q->rspq.desc) { 1578 mtx_lock(&sc->sge.reg_lock); 1579 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 1580 mtx_unlock(&sc->sge.reg_lock); 1581 1582 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 1583 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 1584 q->rspq.desc_map); 1585 bus_dma_tag_destroy(q->rspq.desc_tag); 1586 } 1587 1588 if (mtx_initialized(&q->rspq.lock)) 1589 mtx_destroy(&q->rspq.lock); 1590 1591 bzero(q, sizeof(*q)); 1592 } 1593 1594 /** 1595 * t3_free_sge_resources - free SGE resources 1596 * @sc: the adapter softc 1597 * 1598 * Frees resources used by the SGE queue sets. 1599 */ 1600 void 1601 t3_free_sge_resources(adapter_t *sc) 1602 { 1603 int i; 1604 1605 for (i = 0; i < SGE_QSETS; ++i) 1606 t3_free_qset(sc, &sc->sge.qs[i]); 1607 } 1608 1609 /** 1610 * t3_sge_start - enable SGE 1611 * @sc: the controller softc 1612 * 1613 * Enables the SGE for DMAs. This is the last step in starting packet 1614 * transfers. 1615 */ 1616 void 1617 t3_sge_start(adapter_t *sc) 1618 { 1619 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 1620 } 1621 1622 1623 /** 1624 * free_tx_desc - reclaims Tx descriptors and their buffers 1625 * @adapter: the adapter 1626 * @q: the Tx queue to reclaim descriptors from 1627 * @n: the number of descriptors to reclaim 1628 * 1629 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 1630 * Tx buffers. Called with the Tx queue lock held. 1631 */ 1632 int 1633 free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec) 1634 { 1635 struct tx_sw_desc *d; 1636 unsigned int cidx = q->cidx; 1637 int nbufs = 0; 1638 1639 #ifdef T3_TRACE 1640 T3_TRACE2(sc->tb[q->cntxt_id & 7], 1641 "reclaiming %u Tx descriptors at cidx %u", n, cidx); 1642 #endif 1643 d = &q->sdesc[cidx]; 1644 1645 while (n-- > 0) { 1646 DPRINTF("cidx=%d d=%p\n", cidx, d); 1647 if (d->m) { 1648 if (d->flags & TX_SW_DESC_MAPPED) { 1649 bus_dmamap_unload(q->entry_tag, d->map); 1650 bus_dmamap_destroy(q->entry_tag, d->map); 1651 d->flags &= ~TX_SW_DESC_MAPPED; 1652 } 1653 m_vec[nbufs] = d->m; 1654 d->m = NULL; 1655 nbufs++; 1656 } 1657 ++d; 1658 if (++cidx == q->size) { 1659 cidx = 0; 1660 d = q->sdesc; 1661 } 1662 } 1663 q->cidx = cidx; 1664 1665 return (nbufs); 1666 } 1667 1668 /** 1669 * is_new_response - check if a response is newly written 1670 * @r: the response descriptor 1671 * @q: the response queue 1672 * 1673 * Returns true if a response descriptor contains a yet unprocessed 1674 * response. 1675 */ 1676 static __inline int 1677 is_new_response(const struct rsp_desc *r, 1678 const struct sge_rspq *q) 1679 { 1680 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 1681 } 1682 1683 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 1684 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 1685 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 1686 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 1687 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 1688 1689 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 1690 #define NOMEM_INTR_DELAY 2500 1691 1692 static __inline void 1693 deliver_partial_bundle(struct t3cdev *tdev, struct sge_rspq *q) 1694 { 1695 ; 1696 } 1697 1698 static __inline void 1699 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 1700 struct mbuf *m) 1701 { 1702 #ifdef notyet 1703 if (rq->polling) { 1704 rq->offload_skbs[rq->offload_skbs_idx++] = skb; 1705 if (rq->offload_skbs_idx == RX_BUNDLE_SIZE) { 1706 cxgb_ofld_recv(tdev, rq->offload_skbs, RX_BUNDLE_SIZE); 1707 rq->offload_skbs_idx = 0; 1708 rq->offload_bundles++; 1709 } 1710 } else 1711 #endif 1712 { 1713 /* XXX */ 1714 panic("implement offload enqueue\n"); 1715 } 1716 1717 } 1718 1719 static void 1720 restart_tx(struct sge_qset *qs) 1721 { 1722 ; 1723 } 1724 1725 void 1726 t3_rx_eth(struct port_info *pi, struct sge_rspq *rq, struct mbuf *m, int ethpad) 1727 { 1728 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(m->m_data + ethpad); 1729 struct ifnet *ifp = pi->ifp; 1730 1731 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, m->m_data, cpl->iff); 1732 if (&pi->adapter->port[cpl->iff] != pi) 1733 panic("bad port index %d m->m_data=%p\n", cpl->iff, m->m_data); 1734 1735 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 1736 cpl->csum_valid && cpl->csum == 0xffff) { 1737 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 1738 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 1739 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 1740 m->m_pkthdr.csum_data = 0xffff; 1741 } 1742 /* 1743 * XXX need to add VLAN support for 6.x 1744 */ 1745 #ifdef VLAN_SUPPORTED 1746 if (__predict_false(cpl->vlan_valid)) { 1747 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 1748 m->m_flags |= M_VLANTAG; 1749 } 1750 #endif 1751 1752 m->m_pkthdr.rcvif = ifp; 1753 m->m_pkthdr.header = m->m_data + sizeof(*cpl) + ethpad; 1754 m_explode(m); 1755 /* 1756 * adjust after conversion to mbuf chain 1757 */ 1758 m_adj(m, sizeof(*cpl) + ethpad); 1759 1760 (*ifp->if_input)(ifp, m); 1761 } 1762 1763 /** 1764 * get_packet - return the next ingress packet buffer from a free list 1765 * @adap: the adapter that received the packet 1766 * @drop_thres: # of remaining buffers before we start dropping packets 1767 * @qs: the qset that the SGE free list holding the packet belongs to 1768 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 1769 * @r: response descriptor 1770 * 1771 * Get the next packet from a free list and complete setup of the 1772 * sk_buff. If the packet is small we make a copy and recycle the 1773 * original buffer, otherwise we use the original buffer itself. If a 1774 * positive drop threshold is supplied packets are dropped and their 1775 * buffers recycled if (a) the number of remaining buffers is under the 1776 * threshold and the packet is too big to copy, or (b) the packet should 1777 * be copied but there is no memory for the copy. 1778 */ 1779 1780 static int 1781 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 1782 struct mbuf *m, struct rsp_desc *r) 1783 { 1784 1785 unsigned int len_cq = ntohl(r->len_cq); 1786 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 1787 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 1788 uint32_t len = G_RSPD_LEN(len_cq); 1789 uint32_t flags = ntohl(r->flags); 1790 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 1791 int ret = 0; 1792 1793 prefetch(sd->cl); 1794 1795 fl->credits--; 1796 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 1797 bus_dmamap_unload(fl->entry_tag, sd->map); 1798 1799 1800 switch(sopeop) { 1801 case RSPQ_SOP_EOP: 1802 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 1803 m_cljset(m, sd->cl, fl->type); 1804 m->m_len = m->m_pkthdr.len = len; 1805 ret = 1; 1806 goto done; 1807 break; 1808 case RSPQ_NSOP_NEOP: 1809 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 1810 ret = 0; 1811 break; 1812 case RSPQ_SOP: 1813 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 1814 m_iovinit(m); 1815 ret = 0; 1816 break; 1817 case RSPQ_EOP: 1818 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 1819 ret = 1; 1820 break; 1821 } 1822 m_iovappend(m, sd->cl, fl->buf_size, len, 0); 1823 1824 done: 1825 if (++fl->cidx == fl->size) 1826 fl->cidx = 0; 1827 1828 return (ret); 1829 } 1830 1831 1832 /** 1833 * handle_rsp_cntrl_info - handles control information in a response 1834 * @qs: the queue set corresponding to the response 1835 * @flags: the response control flags 1836 * 1837 * Handles the control information of an SGE response, such as GTS 1838 * indications and completion credits for the queue set's Tx queues. 1839 * HW coalesces credits, we don't do any extra SW coalescing. 1840 */ 1841 static __inline void 1842 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 1843 { 1844 unsigned int credits; 1845 1846 #if USE_GTS 1847 if (flags & F_RSPD_TXQ0_GTS) 1848 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 1849 #endif 1850 credits = G_RSPD_TXQ0_CR(flags); 1851 if (credits) { 1852 qs->txq[TXQ_ETH].processed += credits; 1853 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC) 1854 taskqueue_enqueue(qs->port->adapter->tq, 1855 &qs->port->adapter->timer_reclaim_task); 1856 } 1857 1858 credits = G_RSPD_TXQ2_CR(flags); 1859 if (credits) 1860 qs->txq[TXQ_CTRL].processed += credits; 1861 1862 # if USE_GTS 1863 if (flags & F_RSPD_TXQ1_GTS) 1864 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 1865 # endif 1866 credits = G_RSPD_TXQ1_CR(flags); 1867 if (credits) 1868 qs->txq[TXQ_OFLD].processed += credits; 1869 } 1870 1871 static void 1872 check_ring_db(adapter_t *adap, struct sge_qset *qs, 1873 unsigned int sleeping) 1874 { 1875 ; 1876 } 1877 1878 /* 1879 * This is an awful hack to bind the ithread to CPU 1 1880 * to work around lack of ithread affinity 1881 */ 1882 static void 1883 bind_ithread(int cpu) 1884 { 1885 #if 0 1886 KASSERT(cpu < mp_ncpus, ("invalid cpu identifier")); 1887 if (mp_ncpus > 1) { 1888 mtx_lock_spin(&sched_lock); 1889 sched_bind(curthread, cpu); 1890 mtx_unlock_spin(&sched_lock); 1891 } 1892 #endif 1893 } 1894 1895 /** 1896 * process_responses - process responses from an SGE response queue 1897 * @adap: the adapter 1898 * @qs: the queue set to which the response queue belongs 1899 * @budget: how many responses can be processed in this round 1900 * 1901 * Process responses from an SGE response queue up to the supplied budget. 1902 * Responses include received packets as well as credits and other events 1903 * for the queues that belong to the response queue's queue set. 1904 * A negative budget is effectively unlimited. 1905 * 1906 * Additionally choose the interrupt holdoff time for the next interrupt 1907 * on this queue. If the system is under memory shortage use a fairly 1908 * long delay to help recovery. 1909 */ 1910 static int 1911 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 1912 { 1913 struct sge_rspq *rspq = &qs->rspq; 1914 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 1915 int budget_left = budget; 1916 unsigned int sleeping = 0; 1917 int lro = qs->lro.enabled; 1918 1919 static uint8_t pinned[MAXCPU]; 1920 1921 #ifdef DEBUG 1922 static int last_holdoff = 0; 1923 if (rspq->holdoff_tmr != last_holdoff) { 1924 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 1925 last_holdoff = rspq->holdoff_tmr; 1926 } 1927 #endif 1928 if (pinned[qs->rspq.cntxt_id * adap->params.nports] == 0) { 1929 /* 1930 * Assumes that cntxt_id < mp_ncpus 1931 */ 1932 bind_ithread(qs->rspq.cntxt_id); 1933 pinned[qs->rspq.cntxt_id * adap->params.nports] = 1; 1934 } 1935 rspq->next_holdoff = rspq->holdoff_tmr; 1936 1937 while (__predict_true(budget_left && is_new_response(r, rspq))) { 1938 int eth, eop = 0, ethpad = 0; 1939 uint32_t flags = ntohl(r->flags); 1940 uint32_t rss_csum = *(const uint32_t *)r; 1941 uint32_t rss_hash = r->rss_hdr.rss_hash_val; 1942 1943 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 1944 1945 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 1946 /* XXX */ 1947 printf("async notification\n"); 1948 1949 } else if (flags & F_RSPD_IMM_DATA_VALID) { 1950 struct mbuf *m = NULL; 1951 if (cxgb_debug) 1952 printf("IMM DATA VALID\n"); 1953 if (rspq->m == NULL) 1954 rspq->m = m_gethdr(M_NOWAIT, MT_DATA); 1955 else 1956 m = m_gethdr(M_NOWAIT, MT_DATA); 1957 1958 if (rspq->m == NULL || m == NULL) { 1959 rspq->next_holdoff = NOMEM_INTR_DELAY; 1960 budget_left--; 1961 break; 1962 } 1963 get_imm_packet(adap, r, rspq->m, m); 1964 eop = 1; 1965 rspq->imm_data++; 1966 } else if (r->len_cq) { 1967 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 1968 1969 if (rspq->m == NULL) 1970 rspq->m = m_gethdr(M_NOWAIT, MT_DATA); 1971 if (rspq->m == NULL) { 1972 log(LOG_WARNING, "failed to get mbuf for packet\n"); 1973 break; 1974 } 1975 1976 ethpad = 2; 1977 eop = get_packet(adap, drop_thresh, qs, rspq->m, r); 1978 } else { 1979 DPRINTF("pure response\n"); 1980 rspq->pure_rsps++; 1981 } 1982 1983 if (flags & RSPD_CTRL_MASK) { 1984 sleeping |= flags & RSPD_GTS_MASK; 1985 handle_rsp_cntrl_info(qs, flags); 1986 } 1987 1988 r++; 1989 if (__predict_false(++rspq->cidx == rspq->size)) { 1990 rspq->cidx = 0; 1991 rspq->gen ^= 1; 1992 r = rspq->desc; 1993 } 1994 1995 prefetch(r); 1996 if (++rspq->credits >= (rspq->size / 4)) { 1997 refill_rspq(adap, rspq, rspq->credits); 1998 rspq->credits = 0; 1999 } 2000 2001 if (eop) { 2002 prefetch(rspq->m->m_data); 2003 prefetch(rspq->m->m_data + L1_CACHE_BYTES); 2004 2005 if (eth) { 2006 t3_rx_eth_lro(adap, rspq, rspq->m, ethpad, 2007 rss_hash, rss_csum, lro); 2008 2009 rspq->m = NULL; 2010 } else { 2011 #ifdef notyet 2012 if (__predict_false(r->rss_hdr.opcode == CPL_TRACE_PKT)) 2013 m_adj(m, 2); 2014 2015 rx_offload(&adap->tdev, rspq, m); 2016 #endif 2017 } 2018 #ifdef notyet 2019 taskqueue_enqueue(adap->tq, &adap->timer_reclaim_task); 2020 #else 2021 __refill_fl(adap, &qs->fl[0]); 2022 __refill_fl(adap, &qs->fl[1]); 2023 #endif 2024 } 2025 --budget_left; 2026 } 2027 t3_sge_lro_flush_all(adap, qs); 2028 deliver_partial_bundle(&adap->tdev, rspq); 2029 2030 if (sleeping) 2031 check_ring_db(adap, qs, sleeping); 2032 2033 smp_mb(); /* commit Tx queue processed updates */ 2034 if (__predict_false(qs->txq_stopped != 0)) 2035 restart_tx(qs); 2036 2037 budget -= budget_left; 2038 return (budget); 2039 } 2040 2041 /* 2042 * A helper function that processes responses and issues GTS. 2043 */ 2044 static __inline int 2045 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 2046 { 2047 int work; 2048 static int last_holdoff = 0; 2049 2050 work = process_responses(adap, rspq_to_qset(rq), -1); 2051 2052 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 2053 printf("next_holdoff=%d\n", rq->next_holdoff); 2054 last_holdoff = rq->next_holdoff; 2055 } 2056 2057 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 2058 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 2059 return work; 2060 } 2061 2062 2063 /* 2064 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 2065 * Handles data events from SGE response queues as well as error and other 2066 * async events as they all use the same interrupt pin. We use one SGE 2067 * response queue per port in this mode and protect all response queues with 2068 * queue 0's lock. 2069 */ 2070 void 2071 t3b_intr(void *data) 2072 { 2073 uint32_t map; 2074 adapter_t *adap = data; 2075 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2076 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2077 2078 t3_write_reg(adap, A_PL_CLI, 0); 2079 map = t3_read_reg(adap, A_SG_DATA_INTR); 2080 2081 if (!map) 2082 return; 2083 2084 if (__predict_false(map & F_ERRINTR)) 2085 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2086 2087 mtx_lock(&q0->lock); 2088 2089 if (__predict_true(map & 1)) 2090 process_responses_gts(adap, q0); 2091 2092 if (map & 2) 2093 process_responses_gts(adap, q1); 2094 2095 mtx_unlock(&q0->lock); 2096 } 2097 2098 /* 2099 * The MSI interrupt handler. This needs to handle data events from SGE 2100 * response queues as well as error and other async events as they all use 2101 * the same MSI vector. We use one SGE response queue per port in this mode 2102 * and protect all response queues with queue 0's lock. 2103 */ 2104 void 2105 t3_intr_msi(void *data) 2106 { 2107 adapter_t *adap = data; 2108 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2109 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2110 int new_packets = 0; 2111 2112 mtx_lock(&q0->lock); 2113 if (process_responses_gts(adap, q0)) { 2114 new_packets = 1; 2115 } 2116 2117 if (adap->params.nports == 2 && 2118 process_responses_gts(adap, q1)) { 2119 new_packets = 1; 2120 } 2121 2122 mtx_unlock(&q0->lock); 2123 if (new_packets == 0) 2124 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2125 } 2126 2127 void 2128 t3_intr_msix(void *data) 2129 { 2130 struct sge_qset *qs = data; 2131 adapter_t *adap = qs->port->adapter; 2132 struct sge_rspq *rspq = &qs->rspq; 2133 2134 mtx_lock(&rspq->lock); 2135 if (process_responses_gts(adap, rspq) == 0) { 2136 #ifdef notyet 2137 rspq->unhandled_irqs++; 2138 #endif 2139 } 2140 mtx_unlock(&rspq->lock); 2141 } 2142 2143 /* 2144 * broken by recent mbuf changes 2145 */ 2146 static int 2147 t3_lro_enable(SYSCTL_HANDLER_ARGS) 2148 { 2149 adapter_t *sc; 2150 int i, j, enabled, err, nqsets = 0; 2151 2152 #ifndef LRO_WORKING 2153 return (0); 2154 #endif 2155 2156 sc = arg1; 2157 enabled = sc->sge.qs[0].lro.enabled; 2158 err = sysctl_handle_int(oidp, &enabled, arg2, req); 2159 2160 if (err != 0) { 2161 return (err); 2162 } 2163 if (enabled == sc->sge.qs[0].lro.enabled) 2164 return (0); 2165 2166 for (i = 0; i < sc->params.nports; i++) 2167 for (j = 0; j < sc->port[i].nqsets; j++) 2168 nqsets++; 2169 2170 for (i = 0; i < nqsets; i++) { 2171 sc->sge.qs[i].lro.enabled = enabled; 2172 } 2173 2174 return (0); 2175 } 2176 2177 static int 2178 t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS) 2179 { 2180 adapter_t *sc = arg1; 2181 struct qset_params *qsp = &sc->params.sge.qset[0]; 2182 int coalesce_nsecs; 2183 struct sge_qset *qs; 2184 int i, j, err, nqsets = 0; 2185 struct mtx *lock; 2186 2187 coalesce_nsecs = qsp->coalesce_nsecs; 2188 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req); 2189 2190 if (err != 0) { 2191 return (err); 2192 } 2193 if (coalesce_nsecs == qsp->coalesce_nsecs) 2194 return (0); 2195 2196 for (i = 0; i < sc->params.nports; i++) 2197 for (j = 0; j < sc->port[i].nqsets; j++) 2198 nqsets++; 2199 2200 coalesce_nsecs = max(100, coalesce_nsecs); 2201 2202 for (i = 0; i < nqsets; i++) { 2203 qs = &sc->sge.qs[i]; 2204 qsp = &sc->params.sge.qset[i]; 2205 qsp->coalesce_nsecs = coalesce_nsecs; 2206 2207 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 2208 &sc->sge.qs[0].rspq.lock; 2209 2210 mtx_lock(lock); 2211 t3_update_qset_coalesce(qs, qsp); 2212 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 2213 V_NEWTIMER(qs->rspq.holdoff_tmr)); 2214 mtx_unlock(lock); 2215 } 2216 2217 return (0); 2218 } 2219 2220 2221 void 2222 t3_add_sysctls(adapter_t *sc) 2223 { 2224 struct sysctl_ctx_list *ctx; 2225 struct sysctl_oid_list *children; 2226 2227 ctx = device_get_sysctl_ctx(sc->dev); 2228 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 2229 2230 /* random information */ 2231 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 2232 "firmware_version", 2233 CTLFLAG_RD, &sc->fw_version, 2234 0, "firmware version"); 2235 2236 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2237 "enable_lro", 2238 CTLTYPE_INT|CTLFLAG_RW, sc, 2239 0, t3_lro_enable, 2240 "I", "enable large receive offload"); 2241 2242 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2243 "intr_coal", 2244 CTLTYPE_INT|CTLFLAG_RW, sc, 2245 0, t3_set_coalesce_nsecs, 2246 "I", "interrupt coalescing timer (ns)"); 2247 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2248 "enable_debug", 2249 CTLFLAG_RW, &cxgb_debug, 2250 0, "enable verbose debugging output"); 2251 2252 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2253 "collapse_free", 2254 CTLFLAG_RD, &collapse_free, 2255 0, "frees during collapse"); 2256 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2257 "mb_free_vec_free", 2258 CTLFLAG_RD, &mb_free_vec_free, 2259 0, "frees during mb_free_vec"); 2260 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2261 "collapse_mbufs", 2262 CTLFLAG_RW, &collapse_mbufs, 2263 0, "collapse mbuf chains into iovecs"); 2264 } 2265 2266 /** 2267 * t3_get_desc - dump an SGE descriptor for debugging purposes 2268 * @qs: the queue set 2269 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 2270 * @idx: the descriptor index in the queue 2271 * @data: where to dump the descriptor contents 2272 * 2273 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 2274 * size of the descriptor. 2275 */ 2276 int 2277 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 2278 unsigned char *data) 2279 { 2280 if (qnum >= 6) 2281 return (EINVAL); 2282 2283 if (qnum < 3) { 2284 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 2285 return -EINVAL; 2286 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 2287 return sizeof(struct tx_desc); 2288 } 2289 2290 if (qnum == 3) { 2291 if (!qs->rspq.desc || idx >= qs->rspq.size) 2292 return (EINVAL); 2293 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 2294 return sizeof(struct rsp_desc); 2295 } 2296 2297 qnum -= 4; 2298 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 2299 return (EINVAL); 2300 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 2301 return sizeof(struct rx_desc); 2302 } 2303