1 /************************************************************************** 2 3 Copyright (c) 2007, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Chelsio Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ***************************************************************************/ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/module.h> 41 #include <sys/bus.h> 42 #include <sys/conf.h> 43 #include <machine/bus.h> 44 #include <machine/resource.h> 45 #include <sys/bus_dma.h> 46 #include <sys/rman.h> 47 #include <sys/queue.h> 48 #include <sys/sysctl.h> 49 #include <sys/taskqueue.h> 50 51 52 #include <sys/proc.h> 53 #include <sys/sched.h> 54 #include <sys/smp.h> 55 #include <sys/systm.h> 56 57 #include <netinet/in_systm.h> 58 #include <netinet/in.h> 59 #include <netinet/ip.h> 60 #include <netinet/tcp.h> 61 62 #include <dev/pci/pcireg.h> 63 #include <dev/pci/pcivar.h> 64 #include <dev/cxgb/common/cxgb_common.h> 65 #include <dev/cxgb/common/cxgb_regs.h> 66 #include <dev/cxgb/common/cxgb_sge_defs.h> 67 #include <dev/cxgb/common/cxgb_t3_cpl.h> 68 #include <dev/cxgb/common/cxgb_firmware_exports.h> 69 70 #include <dev/cxgb/sys/mvec.h> 71 72 uint32_t collapse_free = 0; 73 uint32_t mb_free_vec_free = 0; 74 int collapse_mbufs = 1; 75 76 #define USE_GTS 0 77 78 #define SGE_RX_SM_BUF_SIZE 1536 79 #define SGE_RX_DROP_THRES 16 80 81 /* 82 * Period of the Tx buffer reclaim timer. This timer does not need to run 83 * frequently as Tx buffers are usually reclaimed by new Tx packets. 84 */ 85 #define TX_RECLAIM_PERIOD (hz >> 2) 86 87 /* 88 * work request size in bytes 89 */ 90 #define WR_LEN (WR_FLITS * 8) 91 92 /* 93 * Values for sge_txq.flags 94 */ 95 enum { 96 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 97 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 98 }; 99 100 struct tx_desc { 101 uint64_t flit[TX_DESC_FLITS]; 102 } __packed; 103 104 struct rx_desc { 105 uint32_t addr_lo; 106 uint32_t len_gen; 107 uint32_t gen2; 108 uint32_t addr_hi; 109 } __packed;; 110 111 struct rsp_desc { /* response queue descriptor */ 112 struct rss_header rss_hdr; 113 uint32_t flags; 114 uint32_t len_cq; 115 uint8_t imm_data[47]; 116 uint8_t intr_gen; 117 } __packed; 118 119 #define RX_SW_DESC_MAP_CREATED (1 << 0) 120 #define TX_SW_DESC_MAP_CREATED (1 << 1) 121 #define RX_SW_DESC_INUSE (1 << 3) 122 #define TX_SW_DESC_MAPPED (1 << 4) 123 124 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 125 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 126 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 127 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 128 129 struct tx_sw_desc { /* SW state per Tx descriptor */ 130 struct mbuf *m; 131 bus_dmamap_t map; 132 int flags; 133 }; 134 135 struct rx_sw_desc { /* SW state per Rx descriptor */ 136 void *cl; 137 bus_dmamap_t map; 138 int flags; 139 }; 140 141 struct txq_state { 142 unsigned int compl; 143 unsigned int gen; 144 unsigned int pidx; 145 }; 146 147 struct refill_fl_cb_arg { 148 int error; 149 bus_dma_segment_t seg; 150 int nseg; 151 }; 152 153 /* 154 * Maps a number of flits to the number of Tx descriptors that can hold them. 155 * The formula is 156 * 157 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 158 * 159 * HW allows up to 4 descriptors to be combined into a WR. 160 */ 161 static uint8_t flit_desc_map[] = { 162 0, 163 #if SGE_NUM_GENBITS == 1 164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 165 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 166 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 167 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 168 #elif SGE_NUM_GENBITS == 2 169 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 170 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 171 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 172 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 173 #else 174 # error "SGE_NUM_GENBITS must be 1 or 2" 175 #endif 176 }; 177 178 179 static int lro_default = 0; 180 int cxgb_debug = 0; 181 182 static void t3_free_qset(adapter_t *sc, struct sge_qset *q); 183 static void sge_timer_cb(void *arg); 184 static void sge_timer_reclaim(void *arg, int ncount); 185 static int free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec); 186 187 /** 188 * reclaim_completed_tx - reclaims completed Tx descriptors 189 * @adapter: the adapter 190 * @q: the Tx queue to reclaim completed descriptors from 191 * 192 * Reclaims Tx descriptors that the SGE has indicated it has processed, 193 * and frees the associated buffers if possible. Called with the Tx 194 * queue's lock held. 195 */ 196 static __inline int 197 reclaim_completed_tx(adapter_t *adapter, struct sge_txq *q, int nbufs, struct mbuf **mvec) 198 { 199 int reclaimed, reclaim = desc_reclaimable(q); 200 int n = 0; 201 202 mtx_assert(&q->lock, MA_OWNED); 203 204 if (reclaim > 0) { 205 n = free_tx_desc(adapter, q, min(reclaim, nbufs), mvec); 206 reclaimed = min(reclaim, nbufs); 207 q->cleaned += reclaimed; 208 q->in_use -= reclaimed; 209 } 210 211 return (n); 212 } 213 214 /** 215 * t3_sge_init - initialize SGE 216 * @adap: the adapter 217 * @p: the SGE parameters 218 * 219 * Performs SGE initialization needed every time after a chip reset. 220 * We do not initialize any of the queue sets here, instead the driver 221 * top-level must request those individually. We also do not enable DMA 222 * here, that should be done after the queues have been set up. 223 */ 224 void 225 t3_sge_init(adapter_t *adap, struct sge_params *p) 226 { 227 u_int ctrl, ups; 228 229 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 230 231 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 232 F_CQCRDTCTRL | 233 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 234 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 235 #if SGE_NUM_GENBITS == 1 236 ctrl |= F_EGRGENCTRL; 237 #endif 238 if (adap->params.rev > 0) { 239 if (!(adap->flags & (USING_MSIX | USING_MSI))) 240 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 241 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL; 242 } 243 t3_write_reg(adap, A_SG_CONTROL, ctrl); 244 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 245 V_LORCQDRBTHRSH(512)); 246 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 247 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 248 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 249 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000); 250 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 251 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 252 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 253 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 254 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 255 } 256 257 258 /** 259 * sgl_len - calculates the size of an SGL of the given capacity 260 * @n: the number of SGL entries 261 * 262 * Calculates the number of flits needed for a scatter/gather list that 263 * can hold the given number of entries. 264 */ 265 static __inline unsigned int 266 sgl_len(unsigned int n) 267 { 268 return ((3 * n) / 2 + (n & 1)); 269 } 270 271 /** 272 * get_imm_packet - return the next ingress packet buffer from a response 273 * @resp: the response descriptor containing the packet data 274 * 275 * Return a packet containing the immediate data of the given response. 276 */ 277 static __inline void 278 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl) 279 { 280 int len; 281 uint32_t flags = ntohl(resp->flags); 282 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 283 284 /* 285 * would be a firmware bug 286 */ 287 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) 288 return; 289 290 len = G_RSPD_LEN(ntohl(resp->len_cq)); 291 switch (sopeop) { 292 case RSPQ_SOP_EOP: 293 m->m_len = m->m_pkthdr.len = len; 294 memcpy(m->m_data, resp->imm_data, len); 295 break; 296 case RSPQ_EOP: 297 memcpy(cl, resp->imm_data, len); 298 m_iovappend(m, cl, MSIZE, len, 0); 299 break; 300 } 301 } 302 303 304 static __inline u_int 305 flits_to_desc(u_int n) 306 { 307 return (flit_desc_map[n]); 308 } 309 310 void 311 t3_sge_err_intr_handler(adapter_t *adapter) 312 { 313 unsigned int v, status; 314 315 316 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 317 318 if (status & F_RSPQCREDITOVERFOW) 319 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 320 321 if (status & F_RSPQDISABLED) { 322 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 323 324 CH_ALERT(adapter, 325 "packet delivered to disabled response queue (0x%x)\n", 326 (v >> S_RSPQ0DISABLED) & 0xff); 327 } 328 329 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 330 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED)) 331 t3_fatal_err(adapter); 332 } 333 334 void 335 t3_sge_prep(adapter_t *adap, struct sge_params *p) 336 { 337 int i; 338 339 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 340 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data); 341 342 for (i = 0; i < SGE_QSETS; ++i) { 343 struct qset_params *q = p->qset + i; 344 345 q->polling = adap->params.rev > 0; 346 347 if (adap->flags & USING_MSIX) 348 q->coalesce_nsecs = 6000; 349 else 350 q->coalesce_nsecs = 3500; 351 352 q->rspq_size = RSPQ_Q_SIZE; 353 q->fl_size = FL_Q_SIZE; 354 q->jumbo_size = JUMBO_Q_SIZE; 355 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 356 q->txq_size[TXQ_OFLD] = 1024; 357 q->txq_size[TXQ_CTRL] = 256; 358 q->cong_thres = 0; 359 } 360 } 361 362 int 363 t3_sge_alloc(adapter_t *sc) 364 { 365 366 /* The parent tag. */ 367 if (bus_dma_tag_create( NULL, /* parent */ 368 1, 0, /* algnmnt, boundary */ 369 BUS_SPACE_MAXADDR, /* lowaddr */ 370 BUS_SPACE_MAXADDR, /* highaddr */ 371 NULL, NULL, /* filter, filterarg */ 372 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 373 BUS_SPACE_UNRESTRICTED, /* nsegments */ 374 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 375 0, /* flags */ 376 NULL, NULL, /* lock, lockarg */ 377 &sc->parent_dmat)) { 378 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 379 return (ENOMEM); 380 } 381 382 /* 383 * DMA tag for normal sized RX frames 384 */ 385 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 386 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 387 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 388 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 389 return (ENOMEM); 390 } 391 392 /* 393 * DMA tag for jumbo sized RX frames. 394 */ 395 if (bus_dma_tag_create(sc->parent_dmat, MJUMPAGESIZE, 0, BUS_SPACE_MAXADDR, 396 BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE, 397 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 398 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 399 return (ENOMEM); 400 } 401 402 /* 403 * DMA tag for TX frames. 404 */ 405 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 406 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 407 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 408 NULL, NULL, &sc->tx_dmat)) { 409 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 410 return (ENOMEM); 411 } 412 413 return (0); 414 } 415 416 int 417 t3_sge_free(struct adapter * sc) 418 { 419 420 if (sc->tx_dmat != NULL) 421 bus_dma_tag_destroy(sc->tx_dmat); 422 423 if (sc->rx_jumbo_dmat != NULL) 424 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 425 426 if (sc->rx_dmat != NULL) 427 bus_dma_tag_destroy(sc->rx_dmat); 428 429 if (sc->parent_dmat != NULL) 430 bus_dma_tag_destroy(sc->parent_dmat); 431 432 return (0); 433 } 434 435 void 436 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 437 { 438 439 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U); 440 qs->rspq.polling = 0 /* p->polling */; 441 } 442 443 static void 444 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 445 { 446 struct refill_fl_cb_arg *cb_arg = arg; 447 448 cb_arg->error = error; 449 cb_arg->seg = segs[0]; 450 cb_arg->nseg = nseg; 451 452 } 453 454 /** 455 * refill_fl - refill an SGE free-buffer list 456 * @sc: the controller softc 457 * @q: the free-list to refill 458 * @n: the number of new buffers to allocate 459 * 460 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 461 * The caller must assure that @n does not exceed the queue's capacity. 462 */ 463 static void 464 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 465 { 466 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 467 struct rx_desc *d = &q->desc[q->pidx]; 468 struct refill_fl_cb_arg cb_arg; 469 void *cl; 470 int err; 471 472 cb_arg.error = 0; 473 while (n--) { 474 /* 475 * We only allocate a cluster, mbuf allocation happens after rx 476 */ 477 if ((cl = m_cljget(NULL, M_DONTWAIT, q->buf_size)) == NULL) { 478 log(LOG_WARNING, "Failed to allocate cluster\n"); 479 goto done; 480 } 481 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 482 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 483 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 484 uma_zfree(q->zone, cl); 485 goto done; 486 } 487 sd->flags |= RX_SW_DESC_MAP_CREATED; 488 } 489 err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size, 490 refill_fl_cb, &cb_arg, 0); 491 492 if (err != 0 || cb_arg.error) { 493 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error); 494 /* 495 * XXX free cluster 496 */ 497 return; 498 } 499 500 sd->flags |= RX_SW_DESC_INUSE; 501 sd->cl = cl; 502 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 503 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 504 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 505 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 506 507 d++; 508 sd++; 509 510 if (++q->pidx == q->size) { 511 q->pidx = 0; 512 q->gen ^= 1; 513 sd = q->sdesc; 514 d = q->desc; 515 } 516 q->credits++; 517 } 518 519 done: 520 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 521 } 522 523 524 /** 525 * free_rx_bufs - free the Rx buffers on an SGE free list 526 * @sc: the controle softc 527 * @q: the SGE free list to clean up 528 * 529 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 530 * this queue should be stopped before calling this function. 531 */ 532 static void 533 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 534 { 535 u_int cidx = q->cidx; 536 537 while (q->credits--) { 538 struct rx_sw_desc *d = &q->sdesc[cidx]; 539 540 if (d->flags & RX_SW_DESC_INUSE) { 541 bus_dmamap_unload(q->entry_tag, d->map); 542 bus_dmamap_destroy(q->entry_tag, d->map); 543 uma_zfree(q->zone, d->cl); 544 } 545 d->cl = NULL; 546 if (++cidx == q->size) 547 cidx = 0; 548 } 549 } 550 551 static __inline void 552 __refill_fl(adapter_t *adap, struct sge_fl *fl) 553 { 554 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 555 } 556 557 static void 558 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 559 { 560 uint32_t *addr; 561 562 addr = arg; 563 *addr = segs[0].ds_addr; 564 } 565 566 static int 567 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 568 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 569 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 570 { 571 size_t len = nelem * elem_size; 572 void *s = NULL; 573 void *p = NULL; 574 int err; 575 576 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 577 BUS_SPACE_MAXADDR_32BIT, 578 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 579 len, 0, NULL, NULL, tag)) != 0) { 580 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 581 return (ENOMEM); 582 } 583 584 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 585 map)) != 0) { 586 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 587 return (ENOMEM); 588 } 589 590 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 591 bzero(p, len); 592 *(void **)desc = p; 593 594 if (sw_size) { 595 len = nelem * sw_size; 596 s = malloc(len, M_DEVBUF, M_WAITOK); 597 bzero(s, len); 598 *(void **)sdesc = s; 599 } 600 if (parent_entry_tag == NULL) 601 return (0); 602 603 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 604 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 605 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 606 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 607 NULL, NULL, entry_tag)) != 0) { 608 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 609 return (ENOMEM); 610 } 611 return (0); 612 } 613 614 static void 615 sge_slow_intr_handler(void *arg, int ncount) 616 { 617 adapter_t *sc = arg; 618 619 t3_slow_intr_handler(sc); 620 } 621 622 static void 623 sge_timer_cb(void *arg) 624 { 625 adapter_t *sc = arg; 626 struct sge_qset *qs; 627 struct sge_txq *txq; 628 int i, j; 629 int reclaim_eth, reclaim_ofl, refill_rx; 630 631 for (i = 0; i < sc->params.nports; i++) 632 for (j = 0; j < sc->port[i].nqsets; j++) { 633 qs = &sc->sge.qs[i + j]; 634 txq = &qs->txq[0]; 635 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned; 636 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 637 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 638 (qs->fl[1].credits < qs->fl[1].size)); 639 if (reclaim_eth || reclaim_ofl || refill_rx) { 640 taskqueue_enqueue(sc->tq, &sc->timer_reclaim_task); 641 goto done; 642 } 643 } 644 done: 645 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 646 } 647 648 /* 649 * This is meant to be a catch-all function to keep sge state private 650 * to sge.c 651 * 652 */ 653 int 654 t3_sge_init_sw(adapter_t *sc) 655 { 656 657 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 658 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 659 TASK_INIT(&sc->timer_reclaim_task, 0, sge_timer_reclaim, sc); 660 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 661 return (0); 662 } 663 664 void 665 t3_sge_deinit_sw(adapter_t *sc) 666 { 667 callout_drain(&sc->sge_timer_ch); 668 if (sc->tq) { 669 taskqueue_drain(sc->tq, &sc->timer_reclaim_task); 670 taskqueue_drain(sc->tq, &sc->slow_intr_task); 671 } 672 } 673 674 /** 675 * refill_rspq - replenish an SGE response queue 676 * @adapter: the adapter 677 * @q: the response queue to replenish 678 * @credits: how many new responses to make available 679 * 680 * Replenishes a response queue by making the supplied number of responses 681 * available to HW. 682 */ 683 static __inline void 684 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 685 { 686 687 /* mbufs are allocated on demand when a rspq entry is processed. */ 688 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 689 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 690 } 691 692 693 static void 694 sge_timer_reclaim(void *arg, int ncount) 695 { 696 adapter_t *sc = arg; 697 int i, nqsets = 0; 698 struct sge_qset *qs; 699 struct sge_txq *txq; 700 struct mtx *lock; 701 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 702 int n, reclaimable; 703 /* 704 * XXX assuming these quantities are allowed to change during operation 705 */ 706 for (i = 0; i < sc->params.nports; i++) 707 nqsets += sc->port[i].nqsets; 708 709 for (i = 0; i < nqsets; i++) { 710 qs = &sc->sge.qs[i]; 711 txq = &qs->txq[TXQ_ETH]; 712 reclaimable = desc_reclaimable(txq); 713 if (reclaimable > 0) { 714 mtx_lock(&txq->lock); 715 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); 716 mtx_unlock(&txq->lock); 717 718 for (i = 0; i < n; i++) { 719 m_freem_vec(m_vec[i]); 720 } 721 } 722 723 txq = &qs->txq[TXQ_OFLD]; 724 reclaimable = desc_reclaimable(txq); 725 if (reclaimable > 0) { 726 mtx_lock(&txq->lock); 727 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); 728 mtx_unlock(&txq->lock); 729 730 for (i = 0; i < n; i++) { 731 m_freem_vec(m_vec[i]); 732 } 733 } 734 735 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 736 &sc->sge.qs[0].rspq.lock; 737 738 if (mtx_trylock(lock)) { 739 /* XXX currently assume that we are *NOT* polling */ 740 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 741 742 if (qs->fl[0].credits < qs->fl[0].size - 16) 743 __refill_fl(sc, &qs->fl[0]); 744 if (qs->fl[1].credits < qs->fl[1].size - 16) 745 __refill_fl(sc, &qs->fl[1]); 746 747 if (status & (1 << qs->rspq.cntxt_id)) { 748 if (qs->rspq.credits) { 749 refill_rspq(sc, &qs->rspq, 1); 750 qs->rspq.credits--; 751 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 752 1 << qs->rspq.cntxt_id); 753 } 754 } 755 mtx_unlock(lock); 756 } 757 } 758 } 759 760 /** 761 * init_qset_cntxt - initialize an SGE queue set context info 762 * @qs: the queue set 763 * @id: the queue set id 764 * 765 * Initializes the TIDs and context ids for the queues of a queue set. 766 */ 767 static void 768 init_qset_cntxt(struct sge_qset *qs, u_int id) 769 { 770 771 qs->rspq.cntxt_id = id; 772 qs->fl[0].cntxt_id = 2 * id; 773 qs->fl[1].cntxt_id = 2 * id + 1; 774 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 775 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 776 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 777 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 778 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 779 } 780 781 782 static void 783 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 784 { 785 txq->in_use += ndesc; 786 /* 787 * XXX we don't handle stopping of queue 788 * presumably start handles this when we bump against the end 789 */ 790 txqs->gen = txq->gen; 791 txq->unacked += ndesc; 792 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3); 793 txq->unacked &= 7; 794 txqs->pidx = txq->pidx; 795 txq->pidx += ndesc; 796 797 if (txq->pidx >= txq->size) { 798 txq->pidx -= txq->size; 799 txq->gen ^= 1; 800 } 801 802 } 803 804 /** 805 * calc_tx_descs - calculate the number of Tx descriptors for a packet 806 * @m: the packet mbufs 807 * @nsegs: the number of segments 808 * 809 * Returns the number of Tx descriptors needed for the given Ethernet 810 * packet. Ethernet packets require addition of WR and CPL headers. 811 */ 812 static __inline unsigned int 813 calc_tx_descs(const struct mbuf *m, int nsegs) 814 { 815 unsigned int flits; 816 817 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) 818 return 1; 819 820 flits = sgl_len(nsegs) + 2; 821 #ifdef TSO_SUPPORTED 822 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) 823 flits++; 824 #endif 825 return flits_to_desc(flits); 826 } 827 828 static unsigned int 829 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 830 struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs) 831 { 832 struct mbuf *m0; 833 int err, pktlen; 834 835 m0 = *m; 836 pktlen = m0->m_pkthdr.len; 837 838 err = bus_dmamap_load_mvec_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); 839 #ifdef DEBUG 840 if (err) { 841 int n = 0; 842 struct mbuf *mtmp = m0; 843 while(mtmp) { 844 n++; 845 mtmp = mtmp->m_next; 846 } 847 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n", 848 err, m0->m_pkthdr.len, n); 849 } 850 #endif 851 if (err == EFBIG) { 852 /* Too many segments, try to defrag */ 853 m0 = m_defrag(m0, M_NOWAIT); 854 if (m0 == NULL) { 855 m_freem(*m); 856 *m = NULL; 857 return (ENOBUFS); 858 } 859 *m = m0; 860 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); 861 } 862 863 if (err == ENOMEM) { 864 return (err); 865 } 866 867 if (err) { 868 if (cxgb_debug) 869 printf("map failure err=%d pktlen=%d\n", err, pktlen); 870 m_freem_vec(m0); 871 *m = NULL; 872 return (err); 873 } 874 875 bus_dmamap_sync(txq->entry_tag, stx->map, BUS_DMASYNC_PREWRITE); 876 stx->flags |= TX_SW_DESC_MAPPED; 877 878 return (0); 879 } 880 881 /** 882 * make_sgl - populate a scatter/gather list for a packet 883 * @sgp: the SGL to populate 884 * @segs: the packet dma segments 885 * @nsegs: the number of segments 886 * 887 * Generates a scatter/gather list for the buffers that make up a packet 888 * and returns the SGL size in 8-byte words. The caller must size the SGL 889 * appropriately. 890 */ 891 static __inline void 892 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 893 { 894 int i, idx; 895 896 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) { 897 if (i && idx == 0) 898 ++sgp; 899 900 sgp->len[idx] = htobe32(segs[i].ds_len); 901 sgp->addr[idx] = htobe64(segs[i].ds_addr); 902 } 903 904 if (idx) 905 sgp->len[idx] = 0; 906 } 907 908 /** 909 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 910 * @adap: the adapter 911 * @q: the Tx queue 912 * 913 * Ring the doorbel if a Tx queue is asleep. There is a natural race, 914 * where the HW is going to sleep just after we checked, however, 915 * then the interrupt handler will detect the outstanding TX packet 916 * and ring the doorbell for us. 917 * 918 * When GTS is disabled we unconditionally ring the doorbell. 919 */ 920 static __inline void 921 check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 922 { 923 #if USE_GTS 924 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 925 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 926 set_bit(TXQ_LAST_PKT_DB, &q->flags); 927 #ifdef T3_TRACE 928 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 929 q->cntxt_id); 930 #endif 931 t3_write_reg(adap, A_SG_KDOORBELL, 932 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 933 } 934 #else 935 wmb(); /* write descriptors before telling HW */ 936 t3_write_reg(adap, A_SG_KDOORBELL, 937 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 938 #endif 939 } 940 941 static __inline void 942 wr_gen2(struct tx_desc *d, unsigned int gen) 943 { 944 #if SGE_NUM_GENBITS == 2 945 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 946 #endif 947 } 948 949 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 950 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 951 952 int 953 t3_encap(struct port_info *p, struct mbuf **m) 954 { 955 adapter_t *sc; 956 struct mbuf *m0; 957 struct sge_qset *qs; 958 struct sge_txq *txq; 959 struct tx_sw_desc *stx; 960 struct txq_state txqs; 961 unsigned int nsegs, ndesc, flits, cntrl, mlen; 962 int err, tso_info = 0; 963 964 struct work_request_hdr *wrp; 965 struct tx_sw_desc *txsd; 966 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 967 bus_dma_segment_t segs[TX_MAX_SEGS]; 968 uint32_t wr_hi, wr_lo, sgl_flits; 969 970 struct tx_desc *txd; 971 struct cpl_tx_pkt *cpl; 972 973 DPRINTF("t3_encap "); 974 m0 = *m; 975 sc = p->adapter; 976 qs = &sc->sge.qs[p->first_qset]; 977 txq = &qs->txq[TXQ_ETH]; 978 stx = &txq->sdesc[txq->pidx]; 979 txd = &txq->desc[txq->pidx]; 980 cpl = (struct cpl_tx_pkt *)txd; 981 mlen = m0->m_pkthdr.len; 982 cpl->len = htonl(mlen | 0x80000000); 983 984 DPRINTF("mlen=%d\n", mlen); 985 /* 986 * XXX handle checksum, TSO, and VLAN here 987 * 988 */ 989 cntrl = V_TXPKT_INTF(p->port); 990 991 /* 992 * XXX need to add VLAN support for 6.x 993 */ 994 #ifdef VLAN_SUPPORTED 995 if (m0->m_flags & M_VLANTAG) 996 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 997 if (m0->m_pkthdr.csum_flags & (CSUM_TSO)) 998 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 999 #endif 1000 if (tso_info) { 1001 int eth_type; 1002 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl; 1003 struct ip *ip; 1004 struct tcphdr *tcp; 1005 uint8_t *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */ 1006 1007 txd->flit[2] = 0; 1008 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1009 hdr->cntrl = htonl(cntrl); 1010 1011 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1012 pkthdr = &tmp[0]; 1013 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr); 1014 } else { 1015 pkthdr = m0->m_data; 1016 } 1017 1018 if (__predict_false(m0->m_flags & M_VLANTAG)) { 1019 eth_type = CPL_ETH_II_VLAN; 1020 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1021 ETHER_VLAN_ENCAP_LEN); 1022 } else { 1023 eth_type = CPL_ETH_II; 1024 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1025 } 1026 tcp = (struct tcphdr *)((uint8_t *)ip + 1027 sizeof(*ip)); 1028 1029 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1030 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1031 V_LSO_TCPHDR_WORDS(tcp->th_off); 1032 hdr->lso_info = htonl(tso_info); 1033 flits = 3; 1034 } else { 1035 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1036 cpl->cntrl = htonl(cntrl); 1037 1038 if (mlen <= WR_LEN - sizeof(*cpl)) { 1039 txq_prod(txq, 1, &txqs); 1040 txq->sdesc[txqs.pidx].m = m0; 1041 1042 if (m0->m_len == m0->m_pkthdr.len) 1043 memcpy(&txd->flit[2], m0->m_data, mlen); 1044 else 1045 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1046 1047 flits = (mlen + 7) / 8 + 2; 1048 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1049 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1050 F_WR_SOP | F_WR_EOP | txqs.compl); 1051 wmb(); 1052 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | 1053 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1054 1055 wr_gen2(txd, txqs.gen); 1056 check_ring_tx_db(sc, txq); 1057 return (0); 1058 } 1059 flits = 2; 1060 } 1061 1062 wrp = (struct work_request_hdr *)txd; 1063 1064 if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) { 1065 return (err); 1066 } 1067 m0 = *m; 1068 ndesc = calc_tx_descs(m0, nsegs); 1069 1070 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : &sgl[0]; 1071 make_sgl(sgp, segs, nsegs); 1072 1073 sgl_flits = sgl_len(nsegs); 1074 1075 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); 1076 txq_prod(txq, ndesc, &txqs); 1077 txsd = &txq->sdesc[txqs.pidx]; 1078 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1079 wr_lo = htonl(V_WR_TID(txq->token)); 1080 txsd->m = m0; 1081 1082 if (__predict_true(ndesc == 1)) { 1083 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1084 V_WR_SGLSFLT(flits)) | wr_hi; 1085 wmb(); 1086 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | 1087 V_WR_GEN(txqs.gen)) | wr_lo; 1088 /* XXX gen? */ 1089 wr_gen2(txd, txqs.gen); 1090 } else { 1091 unsigned int ogen = txqs.gen; 1092 const uint64_t *fp = (const uint64_t *)sgl; 1093 struct work_request_hdr *wp = wrp; 1094 1095 /* XXX - CHECK ME */ 1096 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1097 V_WR_SGLSFLT(flits)) | wr_hi; 1098 1099 while (sgl_flits) { 1100 unsigned int avail = WR_FLITS - flits; 1101 1102 if (avail > sgl_flits) 1103 avail = sgl_flits; 1104 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1105 sgl_flits -= avail; 1106 ndesc--; 1107 if (!sgl_flits) 1108 break; 1109 1110 fp += avail; 1111 txd++; 1112 txsd++; 1113 if (++txqs.pidx == txq->size) { 1114 txqs.pidx = 0; 1115 txqs.gen ^= 1; 1116 txd = txq->desc; 1117 txsd = txq->sdesc; 1118 } 1119 1120 /* 1121 * when the head of the mbuf chain 1122 * is freed all clusters will be freed 1123 * with it 1124 */ 1125 txsd->m = NULL; 1126 wrp = (struct work_request_hdr *)txd; 1127 wrp->wr_hi = htonl(V_WR_DATATYPE(1) | 1128 V_WR_SGLSFLT(1)) | wr_hi; 1129 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, 1130 sgl_flits + 1)) | 1131 V_WR_GEN(txqs.gen)) | wr_lo; 1132 wr_gen2(txd, txqs.gen); 1133 flits = 1; 1134 } 1135 #ifdef WHY 1136 skb->priority = pidx; 1137 #endif 1138 wrp->wr_hi |= htonl(F_WR_EOP); 1139 wmb(); 1140 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1141 wr_gen2((struct tx_desc *)wp, ogen); 1142 } 1143 check_ring_tx_db(p->adapter, txq); 1144 1145 return (0); 1146 } 1147 1148 1149 /** 1150 * write_imm - write a packet into a Tx descriptor as immediate data 1151 * @d: the Tx descriptor to write 1152 * @m: the packet 1153 * @len: the length of packet data to write as immediate data 1154 * @gen: the generation bit value to write 1155 * 1156 * Writes a packet as immediate data into a Tx descriptor. The packet 1157 * contains a work request at its beginning. We must write the packet 1158 * carefully so the SGE doesn't read accidentally before it's written in 1159 * its entirety. 1160 */ 1161 static __inline void write_imm(struct tx_desc *d, struct mbuf *m, 1162 unsigned int len, unsigned int gen) 1163 { 1164 struct work_request_hdr *from = (struct work_request_hdr *)m->m_data; 1165 struct work_request_hdr *to = (struct work_request_hdr *)d; 1166 1167 memcpy(&to[1], &from[1], len - sizeof(*from)); 1168 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | 1169 V_WR_BCNTLFLT(len & 7)); 1170 wmb(); 1171 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | 1172 V_WR_LEN((len + 7) / 8)); 1173 wr_gen2(d, gen); 1174 m_freem(m); 1175 } 1176 1177 /** 1178 * check_desc_avail - check descriptor availability on a send queue 1179 * @adap: the adapter 1180 * @q: the TX queue 1181 * @m: the packet needing the descriptors 1182 * @ndesc: the number of Tx descriptors needed 1183 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1184 * 1185 * Checks if the requested number of Tx descriptors is available on an 1186 * SGE send queue. If the queue is already suspended or not enough 1187 * descriptors are available the packet is queued for later transmission. 1188 * Must be called with the Tx queue locked. 1189 * 1190 * Returns 0 if enough descriptors are available, 1 if there aren't 1191 * enough descriptors and the packet has been queued, and 2 if the caller 1192 * needs to retry because there weren't enough descriptors at the 1193 * beginning of the call but some freed up in the mean time. 1194 */ 1195 static __inline int 1196 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1197 struct mbuf *m, unsigned int ndesc, 1198 unsigned int qid) 1199 { 1200 /* 1201 * XXX We currently only use this for checking the control queue 1202 * the control queue is only used for binding qsets which happens 1203 * at init time so we are guaranteed enough descriptors 1204 */ 1205 #if 0 1206 if (__predict_false(!skb_queue_empty(&q->sendq))) { 1207 addq_exit: __skb_queue_tail(&q->sendq, skb); 1208 return 1; 1209 } 1210 if (__predict_false(q->size - q->in_use < ndesc)) { 1211 1212 struct sge_qset *qs = txq_to_qset(q, qid); 1213 1214 set_bit(qid, &qs->txq_stopped); 1215 smp_mb__after_clear_bit(); 1216 1217 if (should_restart_tx(q) && 1218 test_and_clear_bit(qid, &qs->txq_stopped)) 1219 return 2; 1220 1221 q->stops++; 1222 goto addq_exit; 1223 } 1224 #endif 1225 return 0; 1226 } 1227 1228 1229 /** 1230 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1231 * @q: the SGE control Tx queue 1232 * 1233 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1234 * that send only immediate data (presently just the control queues) and 1235 * thus do not have any sk_buffs to release. 1236 */ 1237 static __inline void 1238 reclaim_completed_tx_imm(struct sge_txq *q) 1239 { 1240 unsigned int reclaim = q->processed - q->cleaned; 1241 1242 mtx_assert(&q->lock, MA_OWNED); 1243 1244 q->in_use -= reclaim; 1245 q->cleaned += reclaim; 1246 } 1247 1248 static __inline int 1249 immediate(const struct mbuf *m) 1250 { 1251 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1252 } 1253 1254 /** 1255 * ctrl_xmit - send a packet through an SGE control Tx queue 1256 * @adap: the adapter 1257 * @q: the control queue 1258 * @m: the packet 1259 * 1260 * Send a packet through an SGE control Tx queue. Packets sent through 1261 * a control queue must fit entirely as immediate data in a single Tx 1262 * descriptor and have no page fragments. 1263 */ 1264 static int 1265 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1266 { 1267 int ret; 1268 struct work_request_hdr *wrp = (struct work_request_hdr *)m->m_data; 1269 1270 if (__predict_false(!immediate(m))) { 1271 m_freem(m); 1272 return 0; 1273 } 1274 1275 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); 1276 wrp->wr_lo = htonl(V_WR_TID(q->token)); 1277 1278 mtx_lock(&q->lock); 1279 again: reclaim_completed_tx_imm(q); 1280 1281 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1282 if (__predict_false(ret)) { 1283 if (ret == 1) { 1284 mtx_unlock(&q->lock); 1285 return (-1); 1286 } 1287 goto again; 1288 } 1289 1290 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1291 1292 q->in_use++; 1293 if (++q->pidx >= q->size) { 1294 q->pidx = 0; 1295 q->gen ^= 1; 1296 } 1297 mtx_unlock(&q->lock); 1298 wmb(); 1299 t3_write_reg(adap, A_SG_KDOORBELL, 1300 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1301 return (0); 1302 } 1303 1304 #ifdef RESTART_CTRLQ 1305 /** 1306 * restart_ctrlq - restart a suspended control queue 1307 * @qs: the queue set cotaining the control queue 1308 * 1309 * Resumes transmission on a suspended Tx control queue. 1310 */ 1311 static void 1312 restart_ctrlq(unsigned long data) 1313 { 1314 struct mbuf *m; 1315 struct sge_qset *qs = (struct sge_qset *)data; 1316 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1317 adapter_t *adap = qs->port->adapter; 1318 1319 mtx_lock(&q->lock); 1320 again: reclaim_completed_tx_imm(q); 1321 1322 while (q->in_use < q->size && 1323 (skb = __skb_dequeue(&q->sendq)) != NULL) { 1324 1325 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen); 1326 1327 if (++q->pidx >= q->size) { 1328 q->pidx = 0; 1329 q->gen ^= 1; 1330 } 1331 q->in_use++; 1332 } 1333 if (!skb_queue_empty(&q->sendq)) { 1334 set_bit(TXQ_CTRL, &qs->txq_stopped); 1335 smp_mb__after_clear_bit(); 1336 1337 if (should_restart_tx(q) && 1338 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1339 goto again; 1340 q->stops++; 1341 } 1342 1343 mtx_unlock(&q->lock); 1344 t3_write_reg(adap, A_SG_KDOORBELL, 1345 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1346 } 1347 #endif 1348 1349 /* 1350 * Send a management message through control queue 0 1351 */ 1352 int 1353 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1354 { 1355 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); 1356 } 1357 1358 /** 1359 * t3_sge_alloc_qset - initialize an SGE queue set 1360 * @sc: the controller softc 1361 * @id: the queue set id 1362 * @nports: how many Ethernet ports will be using this queue set 1363 * @irq_vec_idx: the IRQ vector index for response queue interrupts 1364 * @p: configuration parameters for this queue set 1365 * @ntxq: number of Tx queues for the queue set 1366 * @pi: port info for queue set 1367 * 1368 * Allocate resources and initialize an SGE queue set. A queue set 1369 * comprises a response queue, two Rx free-buffer queues, and up to 3 1370 * Tx queues. The Tx queues are assigned roles in the order Ethernet 1371 * queue, offload queue, and control queue. 1372 */ 1373 int 1374 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 1375 const struct qset_params *p, int ntxq, struct port_info *pi) 1376 { 1377 struct sge_qset *q = &sc->sge.qs[id]; 1378 int i, ret = 0; 1379 1380 init_qset_cntxt(q, id); 1381 1382 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 1383 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 1384 &q->fl[0].desc, &q->fl[0].sdesc, 1385 &q->fl[0].desc_tag, &q->fl[0].desc_map, 1386 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 1387 printf("error %d from alloc ring fl0\n", ret); 1388 goto err; 1389 } 1390 1391 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 1392 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 1393 &q->fl[1].desc, &q->fl[1].sdesc, 1394 &q->fl[1].desc_tag, &q->fl[1].desc_map, 1395 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 1396 printf("error %d from alloc ring fl1\n", ret); 1397 goto err; 1398 } 1399 1400 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 1401 &q->rspq.phys_addr, &q->rspq.desc, NULL, 1402 &q->rspq.desc_tag, &q->rspq.desc_map, 1403 NULL, NULL)) != 0) { 1404 printf("error %d from alloc ring rspq\n", ret); 1405 goto err; 1406 } 1407 1408 for (i = 0; i < ntxq; ++i) { 1409 /* 1410 * The control queue always uses immediate data so does not 1411 * need to keep track of any mbufs. 1412 * XXX Placeholder for future TOE support. 1413 */ 1414 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 1415 1416 if ((ret = alloc_ring(sc, p->txq_size[i], 1417 sizeof(struct tx_desc), sz, 1418 &q->txq[i].phys_addr, &q->txq[i].desc, 1419 &q->txq[i].sdesc, &q->txq[i].desc_tag, 1420 &q->txq[i].desc_map, 1421 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 1422 printf("error %d from alloc ring tx %i\n", ret, i); 1423 goto err; 1424 } 1425 q->txq[i].gen = 1; 1426 q->txq[i].size = p->txq_size[i]; 1427 mtx_init(&q->txq[i].lock, "t3 txq lock", NULL, MTX_DEF); 1428 } 1429 1430 q->fl[0].gen = q->fl[1].gen = 1; 1431 q->fl[0].size = p->fl_size; 1432 q->fl[1].size = p->jumbo_size; 1433 1434 q->rspq.gen = 1; 1435 q->rspq.size = p->rspq_size; 1436 mtx_init(&q->rspq.lock, "t3 rspq lock", NULL, MTX_DEF); 1437 1438 q->txq[TXQ_ETH].stop_thres = nports * 1439 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 1440 1441 q->fl[0].buf_size = MCLBYTES; 1442 q->fl[0].zone = zone_clust; 1443 q->fl[0].type = EXT_CLUSTER; 1444 q->fl[1].buf_size = MJUMPAGESIZE; 1445 q->fl[1].zone = zone_jumbop; 1446 q->fl[1].type = EXT_JUMBOP; 1447 1448 q->lro.enabled = lro_default; 1449 1450 mtx_lock(&sc->sge.reg_lock); 1451 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 1452 q->rspq.phys_addr, q->rspq.size, 1453 q->fl[0].buf_size, 1, 0); 1454 if (ret) { 1455 printf("error %d from t3_sge_init_rspcntxt\n", ret); 1456 goto err_unlock; 1457 } 1458 1459 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1460 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 1461 q->fl[i].phys_addr, q->fl[i].size, 1462 q->fl[i].buf_size, p->cong_thres, 1, 1463 0); 1464 if (ret) { 1465 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 1466 goto err_unlock; 1467 } 1468 } 1469 1470 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 1471 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 1472 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 1473 1, 0); 1474 if (ret) { 1475 printf("error %d from t3_sge_init_ecntxt\n", ret); 1476 goto err_unlock; 1477 } 1478 1479 if (ntxq > 1) { 1480 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 1481 USE_GTS, SGE_CNTXT_OFLD, id, 1482 q->txq[TXQ_OFLD].phys_addr, 1483 q->txq[TXQ_OFLD].size, 0, 1, 0); 1484 if (ret) { 1485 printf("error %d from t3_sge_init_ecntxt\n", ret); 1486 goto err_unlock; 1487 } 1488 } 1489 1490 if (ntxq > 2) { 1491 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 1492 SGE_CNTXT_CTRL, id, 1493 q->txq[TXQ_CTRL].phys_addr, 1494 q->txq[TXQ_CTRL].size, 1495 q->txq[TXQ_CTRL].token, 1, 0); 1496 if (ret) { 1497 printf("error %d from t3_sge_init_ecntxt\n", ret); 1498 goto err_unlock; 1499 } 1500 } 1501 1502 mtx_unlock(&sc->sge.reg_lock); 1503 t3_update_qset_coalesce(q, p); 1504 q->port = pi; 1505 1506 refill_fl(sc, &q->fl[0], q->fl[0].size); 1507 refill_fl(sc, &q->fl[1], q->fl[1].size); 1508 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 1509 1510 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 1511 V_NEWTIMER(q->rspq.holdoff_tmr)); 1512 1513 return (0); 1514 1515 err_unlock: 1516 mtx_unlock(&sc->sge.reg_lock); 1517 err: 1518 t3_free_qset(sc, q); 1519 1520 return (ret); 1521 } 1522 1523 1524 /** 1525 * free_qset - free the resources of an SGE queue set 1526 * @sc: the controller owning the queue set 1527 * @q: the queue set 1528 * 1529 * Release the HW and SW resources associated with an SGE queue set, such 1530 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1531 * queue set must be quiesced prior to calling this. 1532 */ 1533 static void 1534 t3_free_qset(adapter_t *sc, struct sge_qset *q) 1535 { 1536 int i; 1537 1538 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1539 if (q->fl[i].desc) { 1540 mtx_lock(&sc->sge.reg_lock); 1541 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 1542 mtx_unlock(&sc->sge.reg_lock); 1543 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 1544 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 1545 q->fl[i].desc_map); 1546 bus_dma_tag_destroy(q->fl[i].desc_tag); 1547 bus_dma_tag_destroy(q->fl[i].entry_tag); 1548 } 1549 if (q->fl[i].sdesc) { 1550 free_rx_bufs(sc, &q->fl[i]); 1551 free(q->fl[i].sdesc, M_DEVBUF); 1552 } 1553 } 1554 1555 for (i = 0; i < SGE_TXQ_PER_SET; ++i) { 1556 if (q->txq[i].desc) { 1557 mtx_lock(&sc->sge.reg_lock); 1558 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 1559 mtx_unlock(&sc->sge.reg_lock); 1560 bus_dmamap_unload(q->txq[i].desc_tag, 1561 q->txq[i].desc_map); 1562 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 1563 q->txq[i].desc_map); 1564 bus_dma_tag_destroy(q->txq[i].desc_tag); 1565 bus_dma_tag_destroy(q->txq[i].entry_tag); 1566 } 1567 if (q->txq[i].sdesc) { 1568 free(q->txq[i].sdesc, M_DEVBUF); 1569 } 1570 if (mtx_initialized(&q->txq[i].lock)) { 1571 mtx_destroy(&q->txq[i].lock); 1572 } 1573 } 1574 1575 if (q->rspq.desc) { 1576 mtx_lock(&sc->sge.reg_lock); 1577 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 1578 mtx_unlock(&sc->sge.reg_lock); 1579 1580 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 1581 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 1582 q->rspq.desc_map); 1583 bus_dma_tag_destroy(q->rspq.desc_tag); 1584 } 1585 1586 if (mtx_initialized(&q->rspq.lock)) 1587 mtx_destroy(&q->rspq.lock); 1588 1589 bzero(q, sizeof(*q)); 1590 } 1591 1592 /** 1593 * t3_free_sge_resources - free SGE resources 1594 * @sc: the adapter softc 1595 * 1596 * Frees resources used by the SGE queue sets. 1597 */ 1598 void 1599 t3_free_sge_resources(adapter_t *sc) 1600 { 1601 int i; 1602 1603 for (i = 0; i < SGE_QSETS; ++i) 1604 t3_free_qset(sc, &sc->sge.qs[i]); 1605 } 1606 1607 /** 1608 * t3_sge_start - enable SGE 1609 * @sc: the controller softc 1610 * 1611 * Enables the SGE for DMAs. This is the last step in starting packet 1612 * transfers. 1613 */ 1614 void 1615 t3_sge_start(adapter_t *sc) 1616 { 1617 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 1618 } 1619 1620 1621 /** 1622 * free_tx_desc - reclaims Tx descriptors and their buffers 1623 * @adapter: the adapter 1624 * @q: the Tx queue to reclaim descriptors from 1625 * @n: the number of descriptors to reclaim 1626 * 1627 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 1628 * Tx buffers. Called with the Tx queue lock held. 1629 */ 1630 int 1631 free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec) 1632 { 1633 struct tx_sw_desc *d; 1634 unsigned int cidx = q->cidx; 1635 int nbufs = 0; 1636 1637 #ifdef T3_TRACE 1638 T3_TRACE2(sc->tb[q->cntxt_id & 7], 1639 "reclaiming %u Tx descriptors at cidx %u", n, cidx); 1640 #endif 1641 d = &q->sdesc[cidx]; 1642 1643 while (n-- > 0) { 1644 DPRINTF("cidx=%d d=%p\n", cidx, d); 1645 if (d->m) { 1646 if (d->flags & TX_SW_DESC_MAPPED) { 1647 bus_dmamap_unload(q->entry_tag, d->map); 1648 bus_dmamap_destroy(q->entry_tag, d->map); 1649 d->flags &= ~TX_SW_DESC_MAPPED; 1650 } 1651 m_vec[nbufs] = d->m; 1652 d->m = NULL; 1653 nbufs++; 1654 } 1655 ++d; 1656 if (++cidx == q->size) { 1657 cidx = 0; 1658 d = q->sdesc; 1659 } 1660 } 1661 q->cidx = cidx; 1662 1663 return (nbufs); 1664 } 1665 1666 /** 1667 * is_new_response - check if a response is newly written 1668 * @r: the response descriptor 1669 * @q: the response queue 1670 * 1671 * Returns true if a response descriptor contains a yet unprocessed 1672 * response. 1673 */ 1674 static __inline int 1675 is_new_response(const struct rsp_desc *r, 1676 const struct sge_rspq *q) 1677 { 1678 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 1679 } 1680 1681 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 1682 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 1683 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 1684 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 1685 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 1686 1687 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 1688 #define NOMEM_INTR_DELAY 2500 1689 1690 static __inline void 1691 deliver_partial_bundle(struct t3cdev *tdev, struct sge_rspq *q) 1692 { 1693 ; 1694 } 1695 1696 static __inline void 1697 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 1698 struct mbuf *m) 1699 { 1700 #ifdef notyet 1701 if (rq->polling) { 1702 rq->offload_skbs[rq->offload_skbs_idx++] = skb; 1703 if (rq->offload_skbs_idx == RX_BUNDLE_SIZE) { 1704 cxgb_ofld_recv(tdev, rq->offload_skbs, RX_BUNDLE_SIZE); 1705 rq->offload_skbs_idx = 0; 1706 rq->offload_bundles++; 1707 } 1708 } else 1709 #endif 1710 { 1711 /* XXX */ 1712 panic("implement offload enqueue\n"); 1713 } 1714 1715 } 1716 1717 static void 1718 restart_tx(struct sge_qset *qs) 1719 { 1720 ; 1721 } 1722 1723 void 1724 t3_rx_eth(struct port_info *pi, struct sge_rspq *rq, struct mbuf *m, int ethpad) 1725 { 1726 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(m->m_data + ethpad); 1727 struct ifnet *ifp = pi->ifp; 1728 1729 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, m->m_data, cpl->iff); 1730 if (&pi->adapter->port[cpl->iff] != pi) 1731 panic("bad port index %d m->m_data=%p\n", cpl->iff, m->m_data); 1732 1733 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 1734 cpl->csum_valid && cpl->csum == 0xffff) { 1735 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 1736 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 1737 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 1738 m->m_pkthdr.csum_data = 0xffff; 1739 } 1740 /* 1741 * XXX need to add VLAN support for 6.x 1742 */ 1743 #ifdef VLAN_SUPPORTED 1744 if (__predict_false(cpl->vlan_valid)) { 1745 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 1746 m->m_flags |= M_VLANTAG; 1747 } 1748 #endif 1749 1750 m->m_pkthdr.rcvif = ifp; 1751 m->m_pkthdr.header = m->m_data + sizeof(*cpl) + ethpad; 1752 m_explode(m); 1753 /* 1754 * adjust after conversion to mbuf chain 1755 */ 1756 m_adj(m, sizeof(*cpl) + ethpad); 1757 1758 (*ifp->if_input)(ifp, m); 1759 } 1760 1761 /** 1762 * get_packet - return the next ingress packet buffer from a free list 1763 * @adap: the adapter that received the packet 1764 * @drop_thres: # of remaining buffers before we start dropping packets 1765 * @qs: the qset that the SGE free list holding the packet belongs to 1766 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 1767 * @r: response descriptor 1768 * 1769 * Get the next packet from a free list and complete setup of the 1770 * sk_buff. If the packet is small we make a copy and recycle the 1771 * original buffer, otherwise we use the original buffer itself. If a 1772 * positive drop threshold is supplied packets are dropped and their 1773 * buffers recycled if (a) the number of remaining buffers is under the 1774 * threshold and the packet is too big to copy, or (b) the packet should 1775 * be copied but there is no memory for the copy. 1776 */ 1777 1778 static int 1779 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 1780 struct mbuf *m, struct rsp_desc *r) 1781 { 1782 1783 unsigned int len_cq = ntohl(r->len_cq); 1784 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 1785 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 1786 uint32_t len = G_RSPD_LEN(len_cq); 1787 uint32_t flags = ntohl(r->flags); 1788 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 1789 int ret = 0; 1790 1791 prefetch(sd->cl); 1792 1793 fl->credits--; 1794 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 1795 bus_dmamap_unload(fl->entry_tag, sd->map); 1796 1797 1798 switch(sopeop) { 1799 case RSPQ_SOP_EOP: 1800 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 1801 m_cljset(m, sd->cl, fl->type); 1802 m->m_len = m->m_pkthdr.len = len; 1803 ret = 1; 1804 goto done; 1805 break; 1806 case RSPQ_NSOP_NEOP: 1807 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 1808 ret = 0; 1809 break; 1810 case RSPQ_SOP: 1811 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 1812 m_iovinit(m); 1813 ret = 0; 1814 break; 1815 case RSPQ_EOP: 1816 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 1817 ret = 1; 1818 break; 1819 } 1820 m_iovappend(m, sd->cl, fl->buf_size, len, 0); 1821 1822 done: 1823 if (++fl->cidx == fl->size) 1824 fl->cidx = 0; 1825 1826 return (ret); 1827 } 1828 1829 1830 /** 1831 * handle_rsp_cntrl_info - handles control information in a response 1832 * @qs: the queue set corresponding to the response 1833 * @flags: the response control flags 1834 * 1835 * Handles the control information of an SGE response, such as GTS 1836 * indications and completion credits for the queue set's Tx queues. 1837 * HW coalesces credits, we don't do any extra SW coalescing. 1838 */ 1839 static __inline void 1840 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 1841 { 1842 unsigned int credits; 1843 1844 #if USE_GTS 1845 if (flags & F_RSPD_TXQ0_GTS) 1846 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 1847 #endif 1848 credits = G_RSPD_TXQ0_CR(flags); 1849 if (credits) { 1850 qs->txq[TXQ_ETH].processed += credits; 1851 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC) 1852 taskqueue_enqueue(qs->port->adapter->tq, 1853 &qs->port->adapter->timer_reclaim_task); 1854 } 1855 1856 credits = G_RSPD_TXQ2_CR(flags); 1857 if (credits) 1858 qs->txq[TXQ_CTRL].processed += credits; 1859 1860 # if USE_GTS 1861 if (flags & F_RSPD_TXQ1_GTS) 1862 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 1863 # endif 1864 credits = G_RSPD_TXQ1_CR(flags); 1865 if (credits) 1866 qs->txq[TXQ_OFLD].processed += credits; 1867 } 1868 1869 static void 1870 check_ring_db(adapter_t *adap, struct sge_qset *qs, 1871 unsigned int sleeping) 1872 { 1873 ; 1874 } 1875 1876 /* 1877 * This is an awful hack to bind the ithread to CPU 1 1878 * to work around lack of ithread affinity 1879 */ 1880 static void 1881 bind_ithread(int cpu) 1882 { 1883 #if 0 1884 KASSERT(cpu < mp_ncpus, ("invalid cpu identifier")); 1885 if (mp_ncpus > 1) { 1886 mtx_lock_spin(&sched_lock); 1887 sched_bind(curthread, cpu); 1888 mtx_unlock_spin(&sched_lock); 1889 } 1890 #endif 1891 } 1892 1893 /** 1894 * process_responses - process responses from an SGE response queue 1895 * @adap: the adapter 1896 * @qs: the queue set to which the response queue belongs 1897 * @budget: how many responses can be processed in this round 1898 * 1899 * Process responses from an SGE response queue up to the supplied budget. 1900 * Responses include received packets as well as credits and other events 1901 * for the queues that belong to the response queue's queue set. 1902 * A negative budget is effectively unlimited. 1903 * 1904 * Additionally choose the interrupt holdoff time for the next interrupt 1905 * on this queue. If the system is under memory shortage use a fairly 1906 * long delay to help recovery. 1907 */ 1908 static int 1909 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 1910 { 1911 struct sge_rspq *rspq = &qs->rspq; 1912 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 1913 int budget_left = budget; 1914 unsigned int sleeping = 0; 1915 int lro = qs->lro.enabled; 1916 1917 static uint8_t pinned[MAXCPU]; 1918 1919 #ifdef DEBUG 1920 static int last_holdoff = 0; 1921 if (rspq->holdoff_tmr != last_holdoff) { 1922 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 1923 last_holdoff = rspq->holdoff_tmr; 1924 } 1925 #endif 1926 if (pinned[qs->rspq.cntxt_id * adap->params.nports] == 0) { 1927 /* 1928 * Assumes that cntxt_id < mp_ncpus 1929 */ 1930 bind_ithread(qs->rspq.cntxt_id); 1931 pinned[qs->rspq.cntxt_id * adap->params.nports] = 1; 1932 } 1933 rspq->next_holdoff = rspq->holdoff_tmr; 1934 1935 while (__predict_true(budget_left && is_new_response(r, rspq))) { 1936 int eth, eop = 0, ethpad = 0; 1937 uint32_t flags = ntohl(r->flags); 1938 uint32_t rss_csum = *(const uint32_t *)r; 1939 uint32_t rss_hash = r->rss_hdr.rss_hash_val; 1940 1941 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 1942 1943 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 1944 /* XXX */ 1945 printf("async notification\n"); 1946 1947 } else if (flags & F_RSPD_IMM_DATA_VALID) { 1948 struct mbuf *m = NULL; 1949 if (cxgb_debug) 1950 printf("IMM DATA VALID\n"); 1951 if (rspq->m == NULL) 1952 rspq->m = m_gethdr(M_NOWAIT, MT_DATA); 1953 else 1954 m = m_gethdr(M_NOWAIT, MT_DATA); 1955 1956 if (rspq->m == NULL || m == NULL) { 1957 rspq->next_holdoff = NOMEM_INTR_DELAY; 1958 budget_left--; 1959 break; 1960 } 1961 get_imm_packet(adap, r, rspq->m, m); 1962 eop = 1; 1963 rspq->imm_data++; 1964 } else if (r->len_cq) { 1965 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 1966 1967 if (rspq->m == NULL) 1968 rspq->m = m_gethdr(M_NOWAIT, MT_DATA); 1969 if (rspq->m == NULL) { 1970 log(LOG_WARNING, "failed to get mbuf for packet\n"); 1971 break; 1972 } 1973 1974 ethpad = 2; 1975 eop = get_packet(adap, drop_thresh, qs, rspq->m, r); 1976 } else { 1977 DPRINTF("pure response\n"); 1978 rspq->pure_rsps++; 1979 } 1980 1981 if (flags & RSPD_CTRL_MASK) { 1982 sleeping |= flags & RSPD_GTS_MASK; 1983 handle_rsp_cntrl_info(qs, flags); 1984 } 1985 1986 r++; 1987 if (__predict_false(++rspq->cidx == rspq->size)) { 1988 rspq->cidx = 0; 1989 rspq->gen ^= 1; 1990 r = rspq->desc; 1991 } 1992 1993 prefetch(r); 1994 if (++rspq->credits >= (rspq->size / 4)) { 1995 refill_rspq(adap, rspq, rspq->credits); 1996 rspq->credits = 0; 1997 } 1998 1999 if (eop) { 2000 prefetch(rspq->m->m_data); 2001 prefetch(rspq->m->m_data + L1_CACHE_BYTES); 2002 2003 if (eth) { 2004 t3_rx_eth_lro(adap, rspq, rspq->m, ethpad, 2005 rss_hash, rss_csum, lro); 2006 2007 rspq->m = NULL; 2008 } else { 2009 #ifdef notyet 2010 if (__predict_false(r->rss_hdr.opcode == CPL_TRACE_PKT)) 2011 m_adj(m, 2); 2012 2013 rx_offload(&adap->tdev, rspq, m); 2014 #endif 2015 } 2016 #ifdef notyet 2017 taskqueue_enqueue(adap->tq, &adap->timer_reclaim_task); 2018 #else 2019 __refill_fl(adap, &qs->fl[0]); 2020 __refill_fl(adap, &qs->fl[1]); 2021 #endif 2022 } 2023 --budget_left; 2024 } 2025 t3_sge_lro_flush_all(adap, qs); 2026 deliver_partial_bundle(&adap->tdev, rspq); 2027 2028 if (sleeping) 2029 check_ring_db(adap, qs, sleeping); 2030 2031 smp_mb(); /* commit Tx queue processed updates */ 2032 if (__predict_false(qs->txq_stopped != 0)) 2033 restart_tx(qs); 2034 2035 budget -= budget_left; 2036 return (budget); 2037 } 2038 2039 /* 2040 * A helper function that processes responses and issues GTS. 2041 */ 2042 static __inline int 2043 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 2044 { 2045 int work; 2046 static int last_holdoff = 0; 2047 2048 work = process_responses(adap, rspq_to_qset(rq), -1); 2049 2050 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 2051 printf("next_holdoff=%d\n", rq->next_holdoff); 2052 last_holdoff = rq->next_holdoff; 2053 } 2054 2055 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 2056 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 2057 return work; 2058 } 2059 2060 2061 /* 2062 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 2063 * Handles data events from SGE response queues as well as error and other 2064 * async events as they all use the same interrupt pin. We use one SGE 2065 * response queue per port in this mode and protect all response queues with 2066 * queue 0's lock. 2067 */ 2068 void 2069 t3b_intr(void *data) 2070 { 2071 uint32_t map; 2072 adapter_t *adap = data; 2073 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2074 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2075 2076 t3_write_reg(adap, A_PL_CLI, 0); 2077 map = t3_read_reg(adap, A_SG_DATA_INTR); 2078 2079 if (!map) 2080 return; 2081 2082 if (__predict_false(map & F_ERRINTR)) 2083 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2084 2085 mtx_lock(&q0->lock); 2086 2087 if (__predict_true(map & 1)) 2088 process_responses_gts(adap, q0); 2089 2090 if (map & 2) 2091 process_responses_gts(adap, q1); 2092 2093 mtx_unlock(&q0->lock); 2094 } 2095 2096 /* 2097 * The MSI interrupt handler. This needs to handle data events from SGE 2098 * response queues as well as error and other async events as they all use 2099 * the same MSI vector. We use one SGE response queue per port in this mode 2100 * and protect all response queues with queue 0's lock. 2101 */ 2102 void 2103 t3_intr_msi(void *data) 2104 { 2105 adapter_t *adap = data; 2106 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2107 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2108 int new_packets = 0; 2109 2110 mtx_lock(&q0->lock); 2111 if (process_responses_gts(adap, q0)) { 2112 new_packets = 1; 2113 } 2114 2115 if (adap->params.nports == 2 && 2116 process_responses_gts(adap, q1)) { 2117 new_packets = 1; 2118 } 2119 2120 mtx_unlock(&q0->lock); 2121 if (new_packets == 0) 2122 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2123 } 2124 2125 void 2126 t3_intr_msix(void *data) 2127 { 2128 struct sge_qset *qs = data; 2129 adapter_t *adap = qs->port->adapter; 2130 struct sge_rspq *rspq = &qs->rspq; 2131 2132 mtx_lock(&rspq->lock); 2133 if (process_responses_gts(adap, rspq) == 0) { 2134 #ifdef notyet 2135 rspq->unhandled_irqs++; 2136 #endif 2137 } 2138 mtx_unlock(&rspq->lock); 2139 } 2140 2141 /* 2142 * broken by recent mbuf changes 2143 */ 2144 static int 2145 t3_lro_enable(SYSCTL_HANDLER_ARGS) 2146 { 2147 adapter_t *sc; 2148 int i, j, enabled, err, nqsets = 0; 2149 2150 #ifndef LRO_WORKING 2151 return (0); 2152 #endif 2153 2154 sc = arg1; 2155 enabled = sc->sge.qs[0].lro.enabled; 2156 err = sysctl_handle_int(oidp, &enabled, arg2, req); 2157 2158 if (err != 0) { 2159 return (err); 2160 } 2161 if (enabled == sc->sge.qs[0].lro.enabled) 2162 return (0); 2163 2164 for (i = 0; i < sc->params.nports; i++) 2165 for (j = 0; j < sc->port[i].nqsets; j++) 2166 nqsets++; 2167 2168 for (i = 0; i < nqsets; i++) { 2169 sc->sge.qs[i].lro.enabled = enabled; 2170 } 2171 2172 return (0); 2173 } 2174 2175 static int 2176 t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS) 2177 { 2178 adapter_t *sc = arg1; 2179 struct qset_params *qsp = &sc->params.sge.qset[0]; 2180 int coalesce_nsecs; 2181 struct sge_qset *qs; 2182 int i, j, err, nqsets = 0; 2183 struct mtx *lock; 2184 2185 coalesce_nsecs = qsp->coalesce_nsecs; 2186 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req); 2187 2188 if (err != 0) { 2189 return (err); 2190 } 2191 if (coalesce_nsecs == qsp->coalesce_nsecs) 2192 return (0); 2193 2194 for (i = 0; i < sc->params.nports; i++) 2195 for (j = 0; j < sc->port[i].nqsets; j++) 2196 nqsets++; 2197 2198 coalesce_nsecs = max(100, coalesce_nsecs); 2199 2200 for (i = 0; i < nqsets; i++) { 2201 qs = &sc->sge.qs[i]; 2202 qsp = &sc->params.sge.qset[i]; 2203 qsp->coalesce_nsecs = coalesce_nsecs; 2204 2205 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 2206 &sc->sge.qs[0].rspq.lock; 2207 2208 mtx_lock(lock); 2209 t3_update_qset_coalesce(qs, qsp); 2210 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 2211 V_NEWTIMER(qs->rspq.holdoff_tmr)); 2212 mtx_unlock(lock); 2213 } 2214 2215 return (0); 2216 } 2217 2218 2219 void 2220 t3_add_sysctls(adapter_t *sc) 2221 { 2222 struct sysctl_ctx_list *ctx; 2223 struct sysctl_oid_list *children; 2224 2225 ctx = device_get_sysctl_ctx(sc->dev); 2226 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 2227 2228 /* random information */ 2229 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 2230 "firmware_version", 2231 CTLFLAG_RD, &sc->fw_version, 2232 0, "firmware version"); 2233 2234 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2235 "enable_lro", 2236 CTLTYPE_INT|CTLFLAG_RW, sc, 2237 0, t3_lro_enable, 2238 "I", "enable large receive offload"); 2239 2240 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2241 "intr_coal", 2242 CTLTYPE_INT|CTLFLAG_RW, sc, 2243 0, t3_set_coalesce_nsecs, 2244 "I", "interrupt coalescing timer (ns)"); 2245 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2246 "enable_debug", 2247 CTLFLAG_RW, &cxgb_debug, 2248 0, "enable verbose debugging output"); 2249 2250 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2251 "collapse_free", 2252 CTLFLAG_RD, &collapse_free, 2253 0, "frees during collapse"); 2254 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2255 "mb_free_vec_free", 2256 CTLFLAG_RD, &mb_free_vec_free, 2257 0, "frees during mb_free_vec"); 2258 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2259 "collapse_mbufs", 2260 CTLFLAG_RW, &collapse_mbufs, 2261 0, "collapse mbuf chains into iovecs"); 2262 } 2263 2264 /** 2265 * t3_get_desc - dump an SGE descriptor for debugging purposes 2266 * @qs: the queue set 2267 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 2268 * @idx: the descriptor index in the queue 2269 * @data: where to dump the descriptor contents 2270 * 2271 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 2272 * size of the descriptor. 2273 */ 2274 int 2275 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 2276 unsigned char *data) 2277 { 2278 if (qnum >= 6) 2279 return (EINVAL); 2280 2281 if (qnum < 3) { 2282 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 2283 return -EINVAL; 2284 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 2285 return sizeof(struct tx_desc); 2286 } 2287 2288 if (qnum == 3) { 2289 if (!qs->rspq.desc || idx >= qs->rspq.size) 2290 return (EINVAL); 2291 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 2292 return sizeof(struct rsp_desc); 2293 } 2294 2295 qnum -= 4; 2296 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 2297 return (EINVAL); 2298 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 2299 return sizeof(struct rx_desc); 2300 } 2301