1 /************************************************************************** 2 3 Copyright (c) 2007, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Chelsio Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ***************************************************************************/ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/module.h> 41 #include <sys/bus.h> 42 #include <sys/conf.h> 43 #include <machine/bus.h> 44 #include <machine/resource.h> 45 #include <sys/bus_dma.h> 46 #include <sys/rman.h> 47 #include <sys/queue.h> 48 #include <sys/sysctl.h> 49 #include <sys/taskqueue.h> 50 51 52 #include <sys/proc.h> 53 #include <sys/sched.h> 54 #include <sys/smp.h> 55 #include <sys/systm.h> 56 57 #include <netinet/in_systm.h> 58 #include <netinet/in.h> 59 #include <netinet/ip.h> 60 #include <netinet/tcp.h> 61 62 #include <dev/pci/pcireg.h> 63 #include <dev/pci/pcivar.h> 64 #include <dev/cxgb/common/cxgb_common.h> 65 #include <dev/cxgb/common/cxgb_regs.h> 66 #include <dev/cxgb/common/cxgb_sge_defs.h> 67 #include <dev/cxgb/common/cxgb_t3_cpl.h> 68 #include <dev/cxgb/common/cxgb_firmware_exports.h> 69 70 #define USE_GTS 0 71 72 #define SGE_RX_SM_BUF_SIZE 1536 73 #define SGE_RX_DROP_THRES 16 74 75 /* 76 * Period of the Tx buffer reclaim timer. This timer does not need to run 77 * frequently as Tx buffers are usually reclaimed by new Tx packets. 78 */ 79 #define TX_RECLAIM_PERIOD (hz >> 2) 80 81 /* 82 * work request size in bytes 83 */ 84 #define WR_LEN (WR_FLITS * 8) 85 86 /* 87 * Values for sge_txq.flags 88 */ 89 enum { 90 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 91 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 92 }; 93 94 struct tx_desc { 95 uint64_t flit[TX_DESC_FLITS]; 96 } __packed; 97 98 struct rx_desc { 99 uint32_t addr_lo; 100 uint32_t len_gen; 101 uint32_t gen2; 102 uint32_t addr_hi; 103 } __packed;; 104 105 struct rsp_desc { /* response queue descriptor */ 106 struct rss_header rss_hdr; 107 uint32_t flags; 108 uint32_t len_cq; 109 uint8_t imm_data[47]; 110 uint8_t intr_gen; 111 } __packed; 112 113 #define RX_SW_DESC_MAP_CREATED (1 << 0) 114 #define RX_SW_DESC_INUSE (1 << 3) 115 #define TX_SW_DESC_MAPPED (1 << 4) 116 117 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 118 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 119 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 120 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 121 122 struct tx_sw_desc { /* SW state per Tx descriptor */ 123 struct mbuf *m; 124 bus_dmamap_t map; 125 int flags; 126 }; 127 128 struct rx_sw_desc { /* SW state per Rx descriptor */ 129 struct mbuf *m; 130 bus_dmamap_t map; 131 int flags; 132 }; 133 134 struct txq_state { 135 unsigned int compl; 136 unsigned int gen; 137 unsigned int pidx; 138 }; 139 140 /* 141 * Maps a number of flits to the number of Tx descriptors that can hold them. 142 * The formula is 143 * 144 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 145 * 146 * HW allows up to 4 descriptors to be combined into a WR. 147 */ 148 static uint8_t flit_desc_map[] = { 149 0, 150 #if SGE_NUM_GENBITS == 1 151 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 152 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 153 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 154 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 155 #elif SGE_NUM_GENBITS == 2 156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 157 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 158 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 159 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 160 #else 161 # error "SGE_NUM_GENBITS must be 1 or 2" 162 #endif 163 }; 164 165 166 static int lro_default = 0; 167 int cxgb_debug = 0; 168 169 static void t3_free_qset(adapter_t *sc, struct sge_qset *q); 170 static void sge_timer_cb(void *arg); 171 static void sge_timer_reclaim(void *arg, int ncount); 172 static int free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec); 173 174 /** 175 * reclaim_completed_tx - reclaims completed Tx descriptors 176 * @adapter: the adapter 177 * @q: the Tx queue to reclaim completed descriptors from 178 * 179 * Reclaims Tx descriptors that the SGE has indicated it has processed, 180 * and frees the associated buffers if possible. Called with the Tx 181 * queue's lock held. 182 */ 183 static __inline int 184 reclaim_completed_tx(adapter_t *adapter, struct sge_txq *q, int nbufs, struct mbuf **mvec) 185 { 186 int reclaimed, reclaim = desc_reclaimable(q); 187 int n = 0; 188 189 mtx_assert(&q->lock, MA_OWNED); 190 191 if (reclaim > 0) { 192 n = free_tx_desc(adapter, q, min(reclaim, nbufs), mvec); 193 reclaimed = min(reclaim, nbufs); 194 q->cleaned += reclaimed; 195 q->in_use -= reclaimed; 196 } 197 198 return (n); 199 } 200 201 /** 202 * t3_sge_init - initialize SGE 203 * @adap: the adapter 204 * @p: the SGE parameters 205 * 206 * Performs SGE initialization needed every time after a chip reset. 207 * We do not initialize any of the queue sets here, instead the driver 208 * top-level must request those individually. We also do not enable DMA 209 * here, that should be done after the queues have been set up. 210 */ 211 void 212 t3_sge_init(adapter_t *adap, struct sge_params *p) 213 { 214 u_int ctrl, ups; 215 216 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 217 218 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 219 F_CQCRDTCTRL | 220 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 221 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 222 #if SGE_NUM_GENBITS == 1 223 ctrl |= F_EGRGENCTRL; 224 #endif 225 if (adap->params.rev > 0) { 226 if (!(adap->flags & (USING_MSIX | USING_MSI))) 227 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 228 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL; 229 } 230 t3_write_reg(adap, A_SG_CONTROL, ctrl); 231 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 232 V_LORCQDRBTHRSH(512)); 233 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 234 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 235 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 236 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000); 237 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 238 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 239 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 240 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 241 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 242 } 243 244 245 /** 246 * sgl_len - calculates the size of an SGL of the given capacity 247 * @n: the number of SGL entries 248 * 249 * Calculates the number of flits needed for a scatter/gather list that 250 * can hold the given number of entries. 251 */ 252 static __inline unsigned int 253 sgl_len(unsigned int n) 254 { 255 return ((3 * n) / 2 + (n & 1)); 256 } 257 258 /** 259 * get_imm_packet - return the next ingress packet buffer from a response 260 * @resp: the response descriptor containing the packet data 261 * 262 * Return a packet containing the immediate data of the given response. 263 */ 264 static __inline int 265 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct t3_mbuf_hdr *mh) 266 { 267 struct mbuf *m; 268 int len; 269 uint32_t flags = ntohl(resp->flags); 270 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 271 272 /* 273 * would be a firmware bug 274 */ 275 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) 276 return (0); 277 278 m = m_gethdr(M_NOWAIT, MT_DATA); 279 len = G_RSPD_LEN(ntohl(resp->len_cq)); 280 281 if (m) { 282 MH_ALIGN(m, IMMED_PKT_SIZE); 283 memcpy(m->m_data, resp->imm_data, IMMED_PKT_SIZE); 284 m->m_len = len; 285 286 switch (sopeop) { 287 case RSPQ_SOP_EOP: 288 mh->mh_head = mh->mh_tail = m; 289 m->m_pkthdr.len = len; 290 m->m_flags |= M_PKTHDR; 291 break; 292 case RSPQ_EOP: 293 m->m_flags &= ~M_PKTHDR; 294 mh->mh_head->m_pkthdr.len += len; 295 mh->mh_tail->m_next = m; 296 mh->mh_tail = m; 297 break; 298 } 299 } 300 return (m != NULL); 301 } 302 303 304 static __inline u_int 305 flits_to_desc(u_int n) 306 { 307 return (flit_desc_map[n]); 308 } 309 310 void 311 t3_sge_err_intr_handler(adapter_t *adapter) 312 { 313 unsigned int v, status; 314 315 316 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 317 318 if (status & F_RSPQCREDITOVERFOW) 319 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 320 321 if (status & F_RSPQDISABLED) { 322 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 323 324 CH_ALERT(adapter, 325 "packet delivered to disabled response queue (0x%x)\n", 326 (v >> S_RSPQ0DISABLED) & 0xff); 327 } 328 329 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 330 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED)) 331 t3_fatal_err(adapter); 332 } 333 334 void 335 t3_sge_prep(adapter_t *adap, struct sge_params *p) 336 { 337 int i; 338 339 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 340 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data); 341 342 for (i = 0; i < SGE_QSETS; ++i) { 343 struct qset_params *q = p->qset + i; 344 345 q->polling = adap->params.rev > 0; 346 347 if (adap->flags & USING_MSIX) 348 q->coalesce_nsecs = 6000; 349 else 350 q->coalesce_nsecs = 3500; 351 352 q->rspq_size = RSPQ_Q_SIZE; 353 q->fl_size = FL_Q_SIZE; 354 q->jumbo_size = JUMBO_Q_SIZE; 355 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 356 q->txq_size[TXQ_OFLD] = 1024; 357 q->txq_size[TXQ_CTRL] = 256; 358 q->cong_thres = 0; 359 } 360 } 361 362 int 363 t3_sge_alloc(adapter_t *sc) 364 { 365 366 /* The parent tag. */ 367 if (bus_dma_tag_create( NULL, /* parent */ 368 1, 0, /* algnmnt, boundary */ 369 BUS_SPACE_MAXADDR, /* lowaddr */ 370 BUS_SPACE_MAXADDR, /* highaddr */ 371 NULL, NULL, /* filter, filterarg */ 372 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 373 BUS_SPACE_UNRESTRICTED, /* nsegments */ 374 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 375 0, /* flags */ 376 NULL, NULL, /* lock, lockarg */ 377 &sc->parent_dmat)) { 378 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 379 return (ENOMEM); 380 } 381 382 /* 383 * DMA tag for normal sized RX frames 384 */ 385 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 386 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 387 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 388 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 389 return (ENOMEM); 390 } 391 392 /* 393 * DMA tag for jumbo sized RX frames. 394 */ 395 if (bus_dma_tag_create(sc->parent_dmat, MJUMPAGESIZE, 0, BUS_SPACE_MAXADDR, 396 BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE, 397 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 398 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 399 return (ENOMEM); 400 } 401 402 /* 403 * DMA tag for TX frames. 404 */ 405 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 406 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 407 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 408 NULL, NULL, &sc->tx_dmat)) { 409 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 410 return (ENOMEM); 411 } 412 413 return (0); 414 } 415 416 int 417 t3_sge_free(struct adapter * sc) 418 { 419 420 if (sc->tx_dmat != NULL) 421 bus_dma_tag_destroy(sc->tx_dmat); 422 423 if (sc->rx_jumbo_dmat != NULL) 424 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 425 426 if (sc->rx_dmat != NULL) 427 bus_dma_tag_destroy(sc->rx_dmat); 428 429 if (sc->parent_dmat != NULL) 430 bus_dma_tag_destroy(sc->parent_dmat); 431 432 return (0); 433 } 434 435 void 436 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 437 { 438 439 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U); 440 qs->rspq.polling = 0 /* p->polling */; 441 } 442 443 444 /** 445 * refill_fl - refill an SGE free-buffer list 446 * @sc: the controller softc 447 * @q: the free-list to refill 448 * @n: the number of new buffers to allocate 449 * 450 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 451 * The caller must assure that @n does not exceed the queue's capacity. 452 */ 453 static void 454 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 455 { 456 bus_dma_segment_t seg; 457 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 458 struct rx_desc *d = &q->desc[q->pidx]; 459 struct mbuf *m; 460 int err, nsegs; 461 462 while (n--) { 463 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, q->buf_size); 464 465 if (m == NULL) { 466 log(LOG_WARNING, "Failed to allocate mbuf\n"); 467 goto done; 468 } 469 470 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 471 if ((err = bus_dmamap_create(sc->rx_jumbo_dmat, 0, &sd->map))) { 472 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 473 goto done; 474 } 475 sd->flags |= RX_SW_DESC_MAP_CREATED; 476 } 477 sd->flags |= RX_SW_DESC_INUSE; 478 479 m->m_pkthdr.len = m->m_len = q->buf_size; 480 err = bus_dmamap_load_mbuf_sg(sc->rx_jumbo_dmat, sd->map, m, &seg, 481 &nsegs, BUS_DMA_NOWAIT); 482 if (err != 0) { 483 log(LOG_WARNING, "failure in refill_fl %d\n", err); 484 m_freem(m); 485 return; 486 } 487 488 sd->m = m; 489 d->addr_lo = htobe32(seg.ds_addr & 0xffffffff); 490 d->addr_hi = htobe32(((uint64_t)seg.ds_addr >>32) & 0xffffffff); 491 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 492 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 493 494 d++; 495 sd++; 496 497 if (++q->pidx == q->size) { 498 q->pidx = 0; 499 q->gen ^= 1; 500 sd = q->sdesc; 501 d = q->desc; 502 } 503 q->credits++; 504 } 505 506 done: 507 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 508 } 509 510 511 /** 512 * free_rx_bufs - free the Rx buffers on an SGE free list 513 * @sc: the controle softc 514 * @q: the SGE free list to clean up 515 * 516 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 517 * this queue should be stopped before calling this function. 518 */ 519 static void 520 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 521 { 522 u_int cidx = q->cidx; 523 524 while (q->credits--) { 525 struct rx_sw_desc *d = &q->sdesc[cidx]; 526 527 if (d->flags & RX_SW_DESC_INUSE) { 528 bus_dmamap_unload(sc->rx_jumbo_dmat, d->map); 529 bus_dmamap_destroy(sc->rx_jumbo_dmat, d->map); 530 m_freem(d->m); 531 } 532 d->m = NULL; 533 if (++cidx == q->size) 534 cidx = 0; 535 } 536 } 537 538 static __inline void 539 __refill_fl(adapter_t *adap, struct sge_fl *fl) 540 { 541 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 542 } 543 544 static void 545 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 546 { 547 uint32_t *addr; 548 549 addr = arg; 550 *addr = segs[0].ds_addr; 551 } 552 553 static int 554 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 555 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 556 bus_dmamap_t *map) 557 { 558 size_t len = nelem * elem_size; 559 void *s = NULL; 560 void *p = NULL; 561 int err; 562 563 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 564 BUS_SPACE_MAXADDR_32BIT, 565 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 566 len, 0, NULL, NULL, tag)) != 0) { 567 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 568 return (ENOMEM); 569 } 570 571 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 572 map)) != 0) { 573 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 574 return (ENOMEM); 575 } 576 577 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 578 bzero(p, len); 579 *(void **)desc = p; 580 581 if (sw_size) { 582 len = nelem * sw_size; 583 s = malloc(len, M_DEVBUF, M_WAITOK); 584 bzero(s, len); 585 *(void **)sdesc = s; 586 } 587 return (0); 588 } 589 590 static void 591 sge_slow_intr_handler(void *arg, int ncount) 592 { 593 adapter_t *sc = arg; 594 595 t3_slow_intr_handler(sc); 596 } 597 598 static void 599 sge_timer_cb(void *arg) 600 { 601 adapter_t *sc = arg; 602 struct sge_qset *qs; 603 struct sge_txq *txq; 604 int i, j; 605 int reclaim_eth, reclaim_ofl, refill_rx; 606 607 for (i = 0; i < sc->params.nports; i++) 608 for (j = 0; j < sc->port[i].nqsets; j++) { 609 qs = &sc->sge.qs[i + j]; 610 txq = &qs->txq[0]; 611 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned; 612 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 613 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 614 (qs->fl[1].credits < qs->fl[1].size)); 615 if (reclaim_eth || reclaim_ofl || refill_rx) { 616 taskqueue_enqueue(sc->tq, &sc->timer_reclaim_task); 617 goto done; 618 } 619 } 620 done: 621 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 622 } 623 624 /* 625 * This is meant to be a catch-all function to keep sge state private 626 * to sge.c 627 * 628 */ 629 int 630 t3_sge_init_sw(adapter_t *sc) 631 { 632 633 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 634 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 635 TASK_INIT(&sc->timer_reclaim_task, 0, sge_timer_reclaim, sc); 636 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 637 return (0); 638 } 639 640 void 641 t3_sge_deinit_sw(adapter_t *sc) 642 { 643 callout_drain(&sc->sge_timer_ch); 644 if (sc->tq) { 645 taskqueue_drain(sc->tq, &sc->timer_reclaim_task); 646 taskqueue_drain(sc->tq, &sc->slow_intr_task); 647 } 648 } 649 650 /** 651 * refill_rspq - replenish an SGE response queue 652 * @adapter: the adapter 653 * @q: the response queue to replenish 654 * @credits: how many new responses to make available 655 * 656 * Replenishes a response queue by making the supplied number of responses 657 * available to HW. 658 */ 659 static __inline void 660 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 661 { 662 663 /* mbufs are allocated on demand when a rspq entry is processed. */ 664 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 665 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 666 } 667 668 669 static void 670 sge_timer_reclaim(void *arg, int ncount) 671 { 672 adapter_t *sc = arg; 673 int i, nqsets = 0; 674 struct sge_qset *qs; 675 struct sge_txq *txq; 676 struct mtx *lock; 677 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 678 int n, reclaimable; 679 /* 680 * XXX assuming these quantities are allowed to change during operation 681 */ 682 for (i = 0; i < sc->params.nports; i++) 683 nqsets += sc->port[i].nqsets; 684 685 for (i = 0; i < nqsets; i++) { 686 qs = &sc->sge.qs[i]; 687 txq = &qs->txq[TXQ_ETH]; 688 reclaimable = desc_reclaimable(txq); 689 if (reclaimable > 0) { 690 mtx_lock(&txq->lock); 691 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); 692 mtx_unlock(&txq->lock); 693 694 for (i = 0; i < n; i++) { 695 m_freem(m_vec[i]); 696 } 697 } 698 699 txq = &qs->txq[TXQ_OFLD]; 700 reclaimable = desc_reclaimable(txq); 701 if (reclaimable > 0) { 702 mtx_lock(&txq->lock); 703 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); 704 mtx_unlock(&txq->lock); 705 706 for (i = 0; i < n; i++) { 707 m_freem(m_vec[i]); 708 } 709 } 710 711 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 712 &sc->sge.qs[0].rspq.lock; 713 714 if (mtx_trylock(lock)) { 715 /* XXX currently assume that we are *NOT* polling */ 716 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 717 718 if (qs->fl[0].credits < qs->fl[0].size - 16) 719 __refill_fl(sc, &qs->fl[0]); 720 if (qs->fl[1].credits < qs->fl[1].size - 16) 721 __refill_fl(sc, &qs->fl[1]); 722 723 if (status & (1 << qs->rspq.cntxt_id)) { 724 if (qs->rspq.credits) { 725 refill_rspq(sc, &qs->rspq, 1); 726 qs->rspq.credits--; 727 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 728 1 << qs->rspq.cntxt_id); 729 } 730 } 731 mtx_unlock(lock); 732 } 733 } 734 } 735 736 /** 737 * init_qset_cntxt - initialize an SGE queue set context info 738 * @qs: the queue set 739 * @id: the queue set id 740 * 741 * Initializes the TIDs and context ids for the queues of a queue set. 742 */ 743 static void 744 init_qset_cntxt(struct sge_qset *qs, u_int id) 745 { 746 747 qs->rspq.cntxt_id = id; 748 qs->fl[0].cntxt_id = 2 * id; 749 qs->fl[1].cntxt_id = 2 * id + 1; 750 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 751 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 752 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 753 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 754 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 755 } 756 757 758 static void 759 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 760 { 761 txq->in_use += ndesc; 762 /* 763 * XXX we don't handle stopping of queue 764 * presumably start handles this when we bump against the end 765 */ 766 txqs->gen = txq->gen; 767 txq->unacked += ndesc; 768 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3); 769 txq->unacked &= 7; 770 txqs->pidx = txq->pidx; 771 txq->pidx += ndesc; 772 773 if (txq->pidx >= txq->size) { 774 txq->pidx -= txq->size; 775 txq->gen ^= 1; 776 } 777 778 } 779 780 /** 781 * calc_tx_descs - calculate the number of Tx descriptors for a packet 782 * @m: the packet mbufs 783 * @nsegs: the number of segments 784 * 785 * Returns the number of Tx descriptors needed for the given Ethernet 786 * packet. Ethernet packets require addition of WR and CPL headers. 787 */ 788 static __inline unsigned int 789 calc_tx_descs(const struct mbuf *m, int nsegs) 790 { 791 unsigned int flits; 792 793 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) 794 return 1; 795 796 flits = sgl_len(nsegs) + 2; 797 #ifdef TSO_SUPPORTED 798 if (m->m_pkthdr.tso_segsz) 799 flits++; 800 #endif 801 return flits_to_desc(flits); 802 } 803 804 static __inline unsigned int 805 busdma_map_mbufs(struct mbuf **m, adapter_t *sc, struct tx_sw_desc *stx, 806 bus_dma_segment_t *segs, int *nsegs) 807 { 808 struct mbuf *m0, *mtmp; 809 int err, pktlen; 810 811 m0 = *m; 812 pktlen = m0->m_pkthdr.len; 813 err = bus_dmamap_load_mbuf_sg(sc->tx_dmat, stx->map, m0, segs, nsegs, 0); 814 if (err) { 815 int n = 0; 816 mtmp = m0; 817 while(mtmp) { 818 n++; 819 mtmp = mtmp->m_next; 820 } 821 #ifdef DEBUG 822 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n", 823 err, m0->m_pkthdr.len, n); 824 #endif 825 } 826 827 828 if (err == EFBIG) { 829 /* Too many segments, try to defrag */ 830 m0 = m_defrag(m0, M_NOWAIT); 831 if (m0 == NULL) { 832 m_freem(*m); 833 *m = NULL; 834 return (ENOBUFS); 835 } 836 *m = m0; 837 err = bus_dmamap_load_mbuf_sg(sc->tx_dmat, stx->map, m0, segs, nsegs, 0); 838 } 839 840 if (err == ENOMEM) { 841 return (err); 842 } 843 844 if (err) { 845 if (cxgb_debug) 846 printf("map failure err=%d pktlen=%d\n", err, pktlen); 847 m_freem(m0); 848 *m = NULL; 849 return (err); 850 } 851 852 bus_dmamap_sync(sc->tx_dmat, stx->map, BUS_DMASYNC_PREWRITE); 853 stx->flags |= TX_SW_DESC_MAPPED; 854 855 return (0); 856 } 857 858 /** 859 * make_sgl - populate a scatter/gather list for a packet 860 * @sgp: the SGL to populate 861 * @segs: the packet dma segments 862 * @nsegs: the number of segments 863 * 864 * Generates a scatter/gather list for the buffers that make up a packet 865 * and returns the SGL size in 8-byte words. The caller must size the SGL 866 * appropriately. 867 */ 868 static __inline void 869 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 870 { 871 int i, idx; 872 873 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) { 874 if (i && idx == 0) 875 ++sgp; 876 877 sgp->len[idx] = htobe32(segs[i].ds_len); 878 sgp->addr[idx] = htobe64(segs[i].ds_addr); 879 } 880 881 if (idx) 882 sgp->len[idx] = 0; 883 } 884 885 /** 886 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 887 * @adap: the adapter 888 * @q: the Tx queue 889 * 890 * Ring the doorbel if a Tx queue is asleep. There is a natural race, 891 * where the HW is going to sleep just after we checked, however, 892 * then the interrupt handler will detect the outstanding TX packet 893 * and ring the doorbell for us. 894 * 895 * When GTS is disabled we unconditionally ring the doorbell. 896 */ 897 static __inline void 898 check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 899 { 900 #if USE_GTS 901 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 902 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 903 set_bit(TXQ_LAST_PKT_DB, &q->flags); 904 #ifdef T3_TRACE 905 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 906 q->cntxt_id); 907 #endif 908 t3_write_reg(adap, A_SG_KDOORBELL, 909 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 910 } 911 #else 912 wmb(); /* write descriptors before telling HW */ 913 t3_write_reg(adap, A_SG_KDOORBELL, 914 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 915 #endif 916 } 917 918 static __inline void 919 wr_gen2(struct tx_desc *d, unsigned int gen) 920 { 921 #if SGE_NUM_GENBITS == 2 922 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 923 #endif 924 } 925 926 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 927 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 928 929 int 930 t3_encap(struct port_info *p, struct mbuf **m) 931 { 932 adapter_t *sc; 933 struct mbuf *m0; 934 struct sge_qset *qs; 935 struct sge_txq *txq; 936 struct tx_sw_desc *stx; 937 struct txq_state txqs; 938 unsigned int nsegs, ndesc, flits, cntrl, mlen, tso_info; 939 int err; 940 941 struct work_request_hdr *wrp; 942 struct tx_sw_desc *txsd; 943 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 944 bus_dma_segment_t segs[TX_MAX_SEGS]; 945 uint32_t wr_hi, wr_lo, sgl_flits; 946 947 struct tx_desc *txd; 948 struct cpl_tx_pkt *cpl; 949 950 DPRINTF("t3_encap "); 951 m0 = *m; 952 sc = p->adapter; 953 qs = &sc->sge.qs[p->first_qset]; 954 txq = &qs->txq[TXQ_ETH]; 955 stx = &txq->sdesc[txq->pidx]; 956 txd = &txq->desc[txq->pidx]; 957 cpl = (struct cpl_tx_pkt *)txd; 958 mlen = m0->m_pkthdr.len; 959 cpl->len = htonl(mlen | 0x80000000); 960 961 DPRINTF("mlen=%d\n", mlen); 962 /* 963 * XXX handle checksum, TSO, and VLAN here 964 * 965 */ 966 cntrl = V_TXPKT_INTF(p->port); 967 968 /* 969 * XXX need to add VLAN support for 6.x 970 */ 971 #ifdef VLAN_SUPPORTED 972 if (m0->m_flags & M_VLANTAG) 973 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 974 975 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 976 #else 977 tso_info = 0; 978 #endif 979 if (tso_info) { 980 int eth_type; 981 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl; 982 struct ip *ip; 983 struct tcphdr *tcp; 984 uint8_t *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */ 985 986 txd->flit[2] = 0; 987 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 988 hdr->cntrl = htonl(cntrl); 989 990 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 991 pkthdr = &tmp[0]; 992 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr); 993 } else { 994 pkthdr = m0->m_data; 995 } 996 997 if (__predict_false(m0->m_flags & M_VLANTAG)) { 998 eth_type = CPL_ETH_II_VLAN; 999 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1000 ETHER_VLAN_ENCAP_LEN); 1001 } else { 1002 eth_type = CPL_ETH_II; 1003 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1004 } 1005 tcp = (struct tcphdr *)((uint8_t *)ip + 1006 sizeof(*ip)); 1007 1008 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1009 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1010 V_LSO_TCPHDR_WORDS(tcp->th_off); 1011 hdr->lso_info = htonl(tso_info); 1012 1013 flits = 3; 1014 } else { 1015 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1016 cpl->cntrl = htonl(cntrl); 1017 1018 if (mlen <= WR_LEN - sizeof(*cpl)) { 1019 txq_prod(txq, 1, &txqs); 1020 txq->sdesc[txqs.pidx].m = m0; 1021 1022 if (m0->m_len == m0->m_pkthdr.len) 1023 memcpy(&txd->flit[2], m0->m_data, mlen); 1024 else 1025 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1026 1027 flits = (mlen + 7) / 8 + 2; 1028 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1029 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1030 F_WR_SOP | F_WR_EOP | txqs.compl); 1031 wmb(); 1032 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | 1033 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1034 1035 wr_gen2(txd, txqs.gen); 1036 check_ring_tx_db(sc, txq); 1037 return (0); 1038 } 1039 flits = 2; 1040 } 1041 1042 wrp = (struct work_request_hdr *)txd; 1043 1044 if ((err = busdma_map_mbufs(m, sc, stx, segs, &nsegs)) != 0) { 1045 return (err); 1046 } 1047 m0 = *m; 1048 ndesc = calc_tx_descs(m0, nsegs); 1049 1050 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : &sgl[0]; 1051 make_sgl(sgp, segs, nsegs); 1052 1053 sgl_flits = sgl_len(nsegs); 1054 1055 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); 1056 txq_prod(txq, ndesc, &txqs); 1057 txsd = &txq->sdesc[txqs.pidx]; 1058 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1059 wr_lo = htonl(V_WR_TID(txq->token)); 1060 txsd->m = m0; 1061 1062 if (__predict_true(ndesc == 1)) { 1063 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1064 V_WR_SGLSFLT(flits)) | wr_hi; 1065 wmb(); 1066 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | 1067 V_WR_GEN(txqs.gen)) | wr_lo; 1068 /* XXX gen? */ 1069 wr_gen2(txd, txqs.gen); 1070 } else { 1071 unsigned int ogen = txqs.gen; 1072 const uint64_t *fp = (const uint64_t *)sgl; 1073 struct work_request_hdr *wp = wrp; 1074 1075 /* XXX - CHECK ME */ 1076 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1077 V_WR_SGLSFLT(flits)) | wr_hi; 1078 1079 while (sgl_flits) { 1080 unsigned int avail = WR_FLITS - flits; 1081 1082 if (avail > sgl_flits) 1083 avail = sgl_flits; 1084 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1085 sgl_flits -= avail; 1086 ndesc--; 1087 if (!sgl_flits) 1088 break; 1089 1090 fp += avail; 1091 txd++; 1092 txsd++; 1093 if (++txqs.pidx == txq->size) { 1094 txqs.pidx = 0; 1095 txqs.gen ^= 1; 1096 txd = txq->desc; 1097 txsd = txq->sdesc; 1098 } 1099 1100 /* 1101 * when the head of the mbuf chain 1102 * is freed all clusters will be freed 1103 * with it 1104 */ 1105 txsd->m = NULL; 1106 wrp = (struct work_request_hdr *)txd; 1107 wrp->wr_hi = htonl(V_WR_DATATYPE(1) | 1108 V_WR_SGLSFLT(1)) | wr_hi; 1109 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, 1110 sgl_flits + 1)) | 1111 V_WR_GEN(txqs.gen)) | wr_lo; 1112 wr_gen2(txd, txqs.gen); 1113 flits = 1; 1114 } 1115 #ifdef WHY 1116 skb->priority = pidx; 1117 #endif 1118 wrp->wr_hi |= htonl(F_WR_EOP); 1119 wmb(); 1120 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1121 wr_gen2((struct tx_desc *)wp, ogen); 1122 } 1123 check_ring_tx_db(p->adapter, txq); 1124 1125 return (0); 1126 } 1127 1128 1129 /** 1130 * write_imm - write a packet into a Tx descriptor as immediate data 1131 * @d: the Tx descriptor to write 1132 * @m: the packet 1133 * @len: the length of packet data to write as immediate data 1134 * @gen: the generation bit value to write 1135 * 1136 * Writes a packet as immediate data into a Tx descriptor. The packet 1137 * contains a work request at its beginning. We must write the packet 1138 * carefully so the SGE doesn't read accidentally before it's written in 1139 * its entirety. 1140 */ 1141 static __inline void write_imm(struct tx_desc *d, struct mbuf *m, 1142 unsigned int len, unsigned int gen) 1143 { 1144 struct work_request_hdr *from = (struct work_request_hdr *)m->m_data; 1145 struct work_request_hdr *to = (struct work_request_hdr *)d; 1146 1147 memcpy(&to[1], &from[1], len - sizeof(*from)); 1148 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | 1149 V_WR_BCNTLFLT(len & 7)); 1150 wmb(); 1151 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | 1152 V_WR_LEN((len + 7) / 8)); 1153 wr_gen2(d, gen); 1154 m_freem(m); 1155 } 1156 1157 /** 1158 * check_desc_avail - check descriptor availability on a send queue 1159 * @adap: the adapter 1160 * @q: the TX queue 1161 * @m: the packet needing the descriptors 1162 * @ndesc: the number of Tx descriptors needed 1163 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1164 * 1165 * Checks if the requested number of Tx descriptors is available on an 1166 * SGE send queue. If the queue is already suspended or not enough 1167 * descriptors are available the packet is queued for later transmission. 1168 * Must be called with the Tx queue locked. 1169 * 1170 * Returns 0 if enough descriptors are available, 1 if there aren't 1171 * enough descriptors and the packet has been queued, and 2 if the caller 1172 * needs to retry because there weren't enough descriptors at the 1173 * beginning of the call but some freed up in the mean time. 1174 */ 1175 static __inline int 1176 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1177 struct mbuf *m, unsigned int ndesc, 1178 unsigned int qid) 1179 { 1180 /* 1181 * XXX We currently only use this for checking the control queue 1182 * the control queue is only used for binding qsets which happens 1183 * at init time so we are guaranteed enough descriptors 1184 */ 1185 #if 0 1186 if (__predict_false(!skb_queue_empty(&q->sendq))) { 1187 addq_exit: __skb_queue_tail(&q->sendq, skb); 1188 return 1; 1189 } 1190 if (__predict_false(q->size - q->in_use < ndesc)) { 1191 1192 struct sge_qset *qs = txq_to_qset(q, qid); 1193 1194 set_bit(qid, &qs->txq_stopped); 1195 smp_mb__after_clear_bit(); 1196 1197 if (should_restart_tx(q) && 1198 test_and_clear_bit(qid, &qs->txq_stopped)) 1199 return 2; 1200 1201 q->stops++; 1202 goto addq_exit; 1203 } 1204 #endif 1205 return 0; 1206 } 1207 1208 1209 /** 1210 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1211 * @q: the SGE control Tx queue 1212 * 1213 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1214 * that send only immediate data (presently just the control queues) and 1215 * thus do not have any sk_buffs to release. 1216 */ 1217 static __inline void 1218 reclaim_completed_tx_imm(struct sge_txq *q) 1219 { 1220 unsigned int reclaim = q->processed - q->cleaned; 1221 1222 mtx_assert(&q->lock, MA_OWNED); 1223 1224 q->in_use -= reclaim; 1225 q->cleaned += reclaim; 1226 } 1227 1228 static __inline int 1229 immediate(const struct mbuf *m) 1230 { 1231 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1232 } 1233 1234 /** 1235 * ctrl_xmit - send a packet through an SGE control Tx queue 1236 * @adap: the adapter 1237 * @q: the control queue 1238 * @m: the packet 1239 * 1240 * Send a packet through an SGE control Tx queue. Packets sent through 1241 * a control queue must fit entirely as immediate data in a single Tx 1242 * descriptor and have no page fragments. 1243 */ 1244 static int 1245 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1246 { 1247 int ret; 1248 struct work_request_hdr *wrp = (struct work_request_hdr *)m->m_data; 1249 1250 if (__predict_false(!immediate(m))) { 1251 m_freem(m); 1252 return 0; 1253 } 1254 1255 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); 1256 wrp->wr_lo = htonl(V_WR_TID(q->token)); 1257 1258 mtx_lock(&q->lock); 1259 again: reclaim_completed_tx_imm(q); 1260 1261 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1262 if (__predict_false(ret)) { 1263 if (ret == 1) { 1264 mtx_unlock(&q->lock); 1265 return (-1); 1266 } 1267 goto again; 1268 } 1269 1270 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1271 1272 q->in_use++; 1273 if (++q->pidx >= q->size) { 1274 q->pidx = 0; 1275 q->gen ^= 1; 1276 } 1277 mtx_unlock(&q->lock); 1278 wmb(); 1279 t3_write_reg(adap, A_SG_KDOORBELL, 1280 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1281 return (0); 1282 } 1283 1284 #ifdef RESTART_CTRLQ 1285 /** 1286 * restart_ctrlq - restart a suspended control queue 1287 * @qs: the queue set cotaining the control queue 1288 * 1289 * Resumes transmission on a suspended Tx control queue. 1290 */ 1291 static void 1292 restart_ctrlq(unsigned long data) 1293 { 1294 struct mbuf *m; 1295 struct sge_qset *qs = (struct sge_qset *)data; 1296 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1297 adapter_t *adap = qs->port->adapter; 1298 1299 mtx_lock(&q->lock); 1300 again: reclaim_completed_tx_imm(q); 1301 1302 while (q->in_use < q->size && 1303 (skb = __skb_dequeue(&q->sendq)) != NULL) { 1304 1305 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen); 1306 1307 if (++q->pidx >= q->size) { 1308 q->pidx = 0; 1309 q->gen ^= 1; 1310 } 1311 q->in_use++; 1312 } 1313 if (!skb_queue_empty(&q->sendq)) { 1314 set_bit(TXQ_CTRL, &qs->txq_stopped); 1315 smp_mb__after_clear_bit(); 1316 1317 if (should_restart_tx(q) && 1318 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1319 goto again; 1320 q->stops++; 1321 } 1322 1323 mtx_unlock(&q->lock); 1324 t3_write_reg(adap, A_SG_KDOORBELL, 1325 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1326 } 1327 #endif 1328 1329 /* 1330 * Send a management message through control queue 0 1331 */ 1332 int 1333 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1334 { 1335 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); 1336 } 1337 1338 /** 1339 * t3_sge_alloc_qset - initialize an SGE queue set 1340 * @sc: the controller softc 1341 * @id: the queue set id 1342 * @nports: how many Ethernet ports will be using this queue set 1343 * @irq_vec_idx: the IRQ vector index for response queue interrupts 1344 * @p: configuration parameters for this queue set 1345 * @ntxq: number of Tx queues for the queue set 1346 * @pi: port info for queue set 1347 * 1348 * Allocate resources and initialize an SGE queue set. A queue set 1349 * comprises a response queue, two Rx free-buffer queues, and up to 3 1350 * Tx queues. The Tx queues are assigned roles in the order Ethernet 1351 * queue, offload queue, and control queue. 1352 */ 1353 int 1354 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 1355 const struct qset_params *p, int ntxq, struct port_info *pi) 1356 { 1357 struct sge_qset *q = &sc->sge.qs[id]; 1358 int i, ret = 0; 1359 1360 init_qset_cntxt(q, id); 1361 1362 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 1363 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 1364 &q->fl[0].desc, &q->fl[0].sdesc, 1365 &q->fl[0].desc_tag, &q->fl[0].desc_map)) != 0) { 1366 printf("error %d from alloc ring fl0\n", ret); 1367 goto err; 1368 } 1369 1370 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 1371 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 1372 &q->fl[1].desc, &q->fl[1].sdesc, 1373 &q->fl[1].desc_tag, &q->fl[1].desc_map)) != 0) { 1374 printf("error %d from alloc ring fl1\n", ret); 1375 goto err; 1376 } 1377 1378 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 1379 &q->rspq.phys_addr, &q->rspq.desc, NULL, 1380 &q->rspq.desc_tag, &q->rspq.desc_map)) != 0) { 1381 printf("error %d from alloc ring rspq\n", ret); 1382 goto err; 1383 } 1384 1385 for (i = 0; i < ntxq; ++i) { 1386 /* 1387 * The control queue always uses immediate data so does not 1388 * need to keep track of any mbufs. 1389 * XXX Placeholder for future TOE support. 1390 */ 1391 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 1392 1393 if ((ret = alloc_ring(sc, p->txq_size[i], 1394 sizeof(struct tx_desc), sz, 1395 &q->txq[i].phys_addr, &q->txq[i].desc, 1396 &q->txq[i].sdesc, &q->txq[i].desc_tag, 1397 &q->txq[i].desc_map)) != 0) { 1398 printf("error %d from alloc ring tx %i\n", ret, i); 1399 goto err; 1400 } 1401 1402 q->txq[i].gen = 1; 1403 q->txq[i].size = p->txq_size[i]; 1404 mtx_init(&q->txq[i].lock, "t3 txq lock", NULL, MTX_DEF); 1405 } 1406 1407 q->fl[0].gen = q->fl[1].gen = 1; 1408 q->fl[0].size = p->fl_size; 1409 q->fl[1].size = p->jumbo_size; 1410 1411 q->rspq.gen = 1; 1412 q->rspq.size = p->rspq_size; 1413 mtx_init(&q->rspq.lock, "t3 rspq lock", NULL, MTX_DEF); 1414 1415 q->txq[TXQ_ETH].stop_thres = nports * 1416 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 1417 1418 q->fl[0].buf_size = MCLBYTES; 1419 q->fl[1].buf_size = MJUMPAGESIZE; 1420 q->lro.enabled = lro_default; 1421 1422 mtx_lock(&sc->sge.reg_lock); 1423 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 1424 q->rspq.phys_addr, q->rspq.size, 1425 q->fl[0].buf_size, 1, 0); 1426 if (ret) { 1427 printf("error %d from t3_sge_init_rspcntxt\n", ret); 1428 goto err_unlock; 1429 } 1430 1431 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1432 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 1433 q->fl[i].phys_addr, q->fl[i].size, 1434 q->fl[i].buf_size, p->cong_thres, 1, 1435 0); 1436 if (ret) { 1437 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 1438 goto err_unlock; 1439 } 1440 } 1441 1442 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 1443 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 1444 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 1445 1, 0); 1446 if (ret) { 1447 printf("error %d from t3_sge_init_ecntxt\n", ret); 1448 goto err_unlock; 1449 } 1450 1451 if (ntxq > 1) { 1452 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 1453 USE_GTS, SGE_CNTXT_OFLD, id, 1454 q->txq[TXQ_OFLD].phys_addr, 1455 q->txq[TXQ_OFLD].size, 0, 1, 0); 1456 if (ret) { 1457 printf("error %d from t3_sge_init_ecntxt\n", ret); 1458 goto err_unlock; 1459 } 1460 } 1461 1462 if (ntxq > 2) { 1463 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 1464 SGE_CNTXT_CTRL, id, 1465 q->txq[TXQ_CTRL].phys_addr, 1466 q->txq[TXQ_CTRL].size, 1467 q->txq[TXQ_CTRL].token, 1, 0); 1468 if (ret) { 1469 printf("error %d from t3_sge_init_ecntxt\n", ret); 1470 goto err_unlock; 1471 } 1472 } 1473 1474 mtx_unlock(&sc->sge.reg_lock); 1475 t3_update_qset_coalesce(q, p); 1476 q->port = pi; 1477 1478 refill_fl(sc, &q->fl[0], q->fl[0].size); 1479 refill_fl(sc, &q->fl[1], q->fl[1].size); 1480 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 1481 1482 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 1483 V_NEWTIMER(q->rspq.holdoff_tmr)); 1484 1485 return (0); 1486 1487 err_unlock: 1488 mtx_unlock(&sc->sge.reg_lock); 1489 err: 1490 t3_free_qset(sc, q); 1491 1492 return (ret); 1493 } 1494 1495 1496 /** 1497 * free_qset - free the resources of an SGE queue set 1498 * @sc: the controller owning the queue set 1499 * @q: the queue set 1500 * 1501 * Release the HW and SW resources associated with an SGE queue set, such 1502 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1503 * queue set must be quiesced prior to calling this. 1504 */ 1505 static void 1506 t3_free_qset(adapter_t *sc, struct sge_qset *q) 1507 { 1508 int i; 1509 1510 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1511 if (q->fl[i].desc) { 1512 mtx_lock(&sc->sge.reg_lock); 1513 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 1514 mtx_unlock(&sc->sge.reg_lock); 1515 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 1516 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 1517 q->fl[i].desc_map); 1518 bus_dma_tag_destroy(q->fl[i].desc_tag); 1519 } 1520 if (q->fl[i].sdesc) { 1521 free_rx_bufs(sc, &q->fl[i]); 1522 free(q->fl[i].sdesc, M_DEVBUF); 1523 } 1524 } 1525 1526 for (i = 0; i < SGE_TXQ_PER_SET; ++i) { 1527 if (q->txq[i].desc) { 1528 mtx_lock(&sc->sge.reg_lock); 1529 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 1530 mtx_unlock(&sc->sge.reg_lock); 1531 bus_dmamap_unload(q->txq[i].desc_tag, 1532 q->txq[i].desc_map); 1533 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 1534 q->txq[i].desc_map); 1535 bus_dma_tag_destroy(q->txq[i].desc_tag); 1536 } 1537 if (q->txq[i].sdesc) { 1538 free(q->txq[i].sdesc, M_DEVBUF); 1539 } 1540 if (mtx_initialized(&q->txq[i].lock)) { 1541 mtx_destroy(&q->txq[i].lock); 1542 } 1543 } 1544 1545 if (q->rspq.desc) { 1546 mtx_lock(&sc->sge.reg_lock); 1547 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 1548 mtx_unlock(&sc->sge.reg_lock); 1549 1550 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 1551 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 1552 q->rspq.desc_map); 1553 bus_dma_tag_destroy(q->rspq.desc_tag); 1554 } 1555 if (mtx_initialized(&q->rspq.lock)) { 1556 mtx_destroy(&q->rspq.lock); 1557 } 1558 1559 bzero(q, sizeof(*q)); 1560 } 1561 1562 /** 1563 * t3_free_sge_resources - free SGE resources 1564 * @sc: the adapter softc 1565 * 1566 * Frees resources used by the SGE queue sets. 1567 */ 1568 void 1569 t3_free_sge_resources(adapter_t *sc) 1570 { 1571 int i; 1572 1573 for (i = 0; i < SGE_QSETS; ++i) 1574 t3_free_qset(sc, &sc->sge.qs[i]); 1575 } 1576 1577 /** 1578 * t3_sge_start - enable SGE 1579 * @sc: the controller softc 1580 * 1581 * Enables the SGE for DMAs. This is the last step in starting packet 1582 * transfers. 1583 */ 1584 void 1585 t3_sge_start(adapter_t *sc) 1586 { 1587 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 1588 } 1589 1590 1591 /** 1592 * free_tx_desc - reclaims Tx descriptors and their buffers 1593 * @adapter: the adapter 1594 * @q: the Tx queue to reclaim descriptors from 1595 * @n: the number of descriptors to reclaim 1596 * 1597 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 1598 * Tx buffers. Called with the Tx queue lock held. 1599 */ 1600 int 1601 free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec) 1602 { 1603 struct tx_sw_desc *d; 1604 unsigned int cidx = q->cidx; 1605 int nbufs = 0; 1606 1607 #ifdef T3_TRACE 1608 T3_TRACE2(sc->tb[q->cntxt_id & 7], 1609 "reclaiming %u Tx descriptors at cidx %u", n, cidx); 1610 #endif 1611 d = &q->sdesc[cidx]; 1612 1613 while (n-- > 0) { 1614 DPRINTF("cidx=%d d=%p\n", cidx, d); 1615 if (d->m) { 1616 if (d->flags & TX_SW_DESC_MAPPED) { 1617 bus_dmamap_unload(sc->tx_dmat, d->map); 1618 bus_dmamap_destroy(sc->tx_dmat, d->map); 1619 d->flags &= ~TX_SW_DESC_MAPPED; 1620 } 1621 m_vec[nbufs] = d->m; 1622 d->m = NULL; 1623 nbufs++; 1624 } 1625 ++d; 1626 if (++cidx == q->size) { 1627 cidx = 0; 1628 d = q->sdesc; 1629 } 1630 } 1631 q->cidx = cidx; 1632 1633 return (nbufs); 1634 } 1635 1636 /** 1637 * is_new_response - check if a response is newly written 1638 * @r: the response descriptor 1639 * @q: the response queue 1640 * 1641 * Returns true if a response descriptor contains a yet unprocessed 1642 * response. 1643 */ 1644 static __inline int 1645 is_new_response(const struct rsp_desc *r, 1646 const struct sge_rspq *q) 1647 { 1648 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 1649 } 1650 1651 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 1652 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 1653 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 1654 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 1655 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 1656 1657 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 1658 #define NOMEM_INTR_DELAY 2500 1659 1660 static __inline void 1661 deliver_partial_bundle(struct t3cdev *tdev, 1662 struct sge_rspq *q) 1663 { 1664 ; 1665 } 1666 1667 static __inline void 1668 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 1669 struct mbuf *m) 1670 { 1671 #ifdef notyet 1672 if (rq->polling) { 1673 rq->offload_skbs[rq->offload_skbs_idx++] = skb; 1674 if (rq->offload_skbs_idx == RX_BUNDLE_SIZE) { 1675 cxgb_ofld_recv(tdev, rq->offload_skbs, RX_BUNDLE_SIZE); 1676 rq->offload_skbs_idx = 0; 1677 rq->offload_bundles++; 1678 } 1679 } else 1680 #endif 1681 { 1682 /* XXX */ 1683 panic("implement offload enqueue\n"); 1684 } 1685 1686 } 1687 1688 static void 1689 restart_tx(struct sge_qset *qs) 1690 { 1691 ; 1692 } 1693 1694 void 1695 t3_rx_eth(struct port_info *pi, struct sge_rspq *rq, struct mbuf *m, int ethpad) 1696 { 1697 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(m->m_data + ethpad); 1698 struct ifnet *ifp = pi->ifp; 1699 1700 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, m->m_data, cpl->iff); 1701 if (&pi->adapter->port[cpl->iff] != pi) 1702 panic("bad port index %d m->m_data=%p\n", cpl->iff, m->m_data); 1703 1704 1705 m_adj(m, sizeof(*cpl) + ethpad); 1706 1707 1708 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 1709 cpl->csum_valid && cpl->csum == 0xffff) { 1710 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 1711 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 1712 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 1713 m->m_pkthdr.csum_data = 0xffff; 1714 } 1715 /* 1716 * XXX need to add VLAN support for 6.x 1717 */ 1718 #ifdef VLAN_SUPPORTED 1719 if (__predict_false(cpl->vlan_valid)) { 1720 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 1721 m->m_flags |= M_VLANTAG; 1722 } 1723 #endif 1724 m->m_pkthdr.rcvif = ifp; 1725 1726 (*ifp->if_input)(ifp, m); 1727 } 1728 1729 /** 1730 * get_packet - return the next ingress packet buffer from a free list 1731 * @adap: the adapter that received the packet 1732 * @drop_thres: # of remaining buffers before we start dropping packets 1733 * @qs: the qset that the SGE free list holding the packet belongs to 1734 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 1735 * @r: response descriptor 1736 * 1737 * Get the next packet from a free list and complete setup of the 1738 * sk_buff. If the packet is small we make a copy and recycle the 1739 * original buffer, otherwise we use the original buffer itself. If a 1740 * positive drop threshold is supplied packets are dropped and their 1741 * buffers recycled if (a) the number of remaining buffers is under the 1742 * threshold and the packet is too big to copy, or (b) the packet should 1743 * be copied but there is no memory for the copy. 1744 */ 1745 static int 1746 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 1747 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 1748 { 1749 1750 struct mbuf *m = NULL; 1751 unsigned int len_cq = ntohl(r->len_cq); 1752 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 1753 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 1754 uint32_t len = G_RSPD_LEN(len_cq); 1755 uint32_t flags = ntohl(r->flags); 1756 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 1757 int ret = 0; 1758 1759 prefetch(sd->m->m_data); 1760 1761 fl->credits--; 1762 bus_dmamap_sync(adap->rx_jumbo_dmat, sd->map, BUS_DMASYNC_POSTREAD); 1763 bus_dmamap_unload(adap->rx_jumbo_dmat, sd->map); 1764 m = sd->m; 1765 m->m_len = len; 1766 1767 switch(sopeop) { 1768 case RSPQ_SOP_EOP: 1769 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 1770 mh->mh_head = mh->mh_tail = m; 1771 m->m_pkthdr.len = len; 1772 m->m_flags |= M_PKTHDR; 1773 ret = 1; 1774 break; 1775 case RSPQ_NSOP_NEOP: 1776 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 1777 m->m_flags &= ~M_PKTHDR; 1778 if (mh->mh_tail == NULL) { 1779 if (cxgb_debug) 1780 printf("discarding intermediate descriptor entry\n"); 1781 m_freem(m); 1782 break; 1783 } 1784 mh->mh_tail->m_next = m; 1785 mh->mh_tail = m; 1786 mh->mh_head->m_pkthdr.len += len; 1787 ret = 0; 1788 break; 1789 case RSPQ_SOP: 1790 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 1791 m->m_pkthdr.len = len; 1792 mh->mh_head = mh->mh_tail = m; 1793 m->m_flags |= M_PKTHDR; 1794 ret = 0; 1795 break; 1796 case RSPQ_EOP: 1797 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 1798 m->m_flags &= ~M_PKTHDR; 1799 mh->mh_head->m_pkthdr.len += len; 1800 mh->mh_tail->m_next = m; 1801 mh->mh_tail = m; 1802 ret = 1; 1803 break; 1804 } 1805 if (++fl->cidx == fl->size) 1806 fl->cidx = 0; 1807 1808 return (ret); 1809 } 1810 1811 1812 /** 1813 * handle_rsp_cntrl_info - handles control information in a response 1814 * @qs: the queue set corresponding to the response 1815 * @flags: the response control flags 1816 * 1817 * Handles the control information of an SGE response, such as GTS 1818 * indications and completion credits for the queue set's Tx queues. 1819 * HW coalesces credits, we don't do any extra SW coalescing. 1820 */ 1821 static __inline void 1822 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 1823 { 1824 unsigned int credits; 1825 1826 #if USE_GTS 1827 if (flags & F_RSPD_TXQ0_GTS) 1828 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 1829 #endif 1830 credits = G_RSPD_TXQ0_CR(flags); 1831 if (credits) { 1832 qs->txq[TXQ_ETH].processed += credits; 1833 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC) 1834 taskqueue_enqueue(qs->port->adapter->tq, 1835 &qs->port->adapter->timer_reclaim_task); 1836 } 1837 1838 credits = G_RSPD_TXQ2_CR(flags); 1839 if (credits) 1840 qs->txq[TXQ_CTRL].processed += credits; 1841 1842 # if USE_GTS 1843 if (flags & F_RSPD_TXQ1_GTS) 1844 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 1845 # endif 1846 credits = G_RSPD_TXQ1_CR(flags); 1847 if (credits) 1848 qs->txq[TXQ_OFLD].processed += credits; 1849 } 1850 1851 static void 1852 check_ring_db(adapter_t *adap, struct sge_qset *qs, 1853 unsigned int sleeping) 1854 { 1855 ; 1856 } 1857 1858 /* 1859 * This is an awful hack to bind the ithread to CPU 1 1860 * to work around lack of ithread affinity 1861 */ 1862 static void 1863 bind_ithread(int cpu) 1864 { 1865 KASSERT(cpu < mp_ncpus, ("invalid cpu identifier")); 1866 #if 0 1867 if (mp_ncpus > 1) { 1868 mtx_lock_spin(&sched_lock); 1869 sched_bind(curthread, cpu); 1870 mtx_unlock_spin(&sched_lock); 1871 } 1872 #endif 1873 } 1874 1875 /** 1876 * process_responses - process responses from an SGE response queue 1877 * @adap: the adapter 1878 * @qs: the queue set to which the response queue belongs 1879 * @budget: how many responses can be processed in this round 1880 * 1881 * Process responses from an SGE response queue up to the supplied budget. 1882 * Responses include received packets as well as credits and other events 1883 * for the queues that belong to the response queue's queue set. 1884 * A negative budget is effectively unlimited. 1885 * 1886 * Additionally choose the interrupt holdoff time for the next interrupt 1887 * on this queue. If the system is under memory shortage use a fairly 1888 * long delay to help recovery. 1889 */ 1890 static int 1891 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 1892 { 1893 struct sge_rspq *rspq = &qs->rspq; 1894 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 1895 int budget_left = budget; 1896 unsigned int sleeping = 0; 1897 int lro = qs->lro.enabled; 1898 1899 static uint8_t pinned[MAXCPU]; 1900 1901 #ifdef DEBUG 1902 static int last_holdoff = 0; 1903 if (rspq->holdoff_tmr != last_holdoff) { 1904 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 1905 last_holdoff = rspq->holdoff_tmr; 1906 } 1907 #endif 1908 if (pinned[qs->rspq.cntxt_id * adap->params.nports] == 0) { 1909 /* 1910 * Assumes that cntxt_id < mp_ncpus 1911 */ 1912 bind_ithread(qs->rspq.cntxt_id); 1913 pinned[qs->rspq.cntxt_id * adap->params.nports] = 1; 1914 } 1915 rspq->next_holdoff = rspq->holdoff_tmr; 1916 1917 while (__predict_true(budget_left && is_new_response(r, rspq))) { 1918 int eth, eop = 0, ethpad = 0; 1919 uint32_t flags = ntohl(r->flags); 1920 uint32_t rss_csum = *(const uint32_t *)r; 1921 uint32_t rss_hash = r->rss_hdr.rss_hash_val; 1922 1923 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 1924 1925 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 1926 /* XXX */ 1927 printf("async notification\n"); 1928 1929 } else if (flags & F_RSPD_IMM_DATA_VALID) { 1930 if (cxgb_debug) 1931 printf("IMM DATA VALID\n"); 1932 1933 if(get_imm_packet(adap, r, &rspq->mh) == 0) { 1934 rspq->next_holdoff = NOMEM_INTR_DELAY; 1935 budget_left--; 1936 break; 1937 } else { 1938 eop = 1; 1939 } 1940 1941 rspq->imm_data++; 1942 } else if (r->len_cq) { 1943 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 1944 1945 ethpad = 2; 1946 eop = get_packet(adap, drop_thresh, qs, &rspq->mh, r); 1947 } else { 1948 DPRINTF("pure response\n"); 1949 rspq->pure_rsps++; 1950 } 1951 1952 if (flags & RSPD_CTRL_MASK) { 1953 sleeping |= flags & RSPD_GTS_MASK; 1954 handle_rsp_cntrl_info(qs, flags); 1955 } 1956 1957 r++; 1958 if (__predict_false(++rspq->cidx == rspq->size)) { 1959 rspq->cidx = 0; 1960 rspq->gen ^= 1; 1961 r = rspq->desc; 1962 } 1963 1964 prefetch(r); 1965 if (++rspq->credits >= (rspq->size / 4)) { 1966 refill_rspq(adap, rspq, rspq->credits); 1967 rspq->credits = 0; 1968 } 1969 1970 if (eop) { 1971 prefetch(rspq->mh.mh_head->m_data); 1972 prefetch(rspq->mh.mh_head->m_data + L1_CACHE_BYTES); 1973 1974 if (eth) { 1975 t3_rx_eth_lro(adap, rspq, &rspq->mh, ethpad, 1976 rss_hash, rss_csum, lro); 1977 1978 rspq->mh.mh_tail = rspq->mh.mh_head = NULL; 1979 } else { 1980 #ifdef notyet 1981 if (__predict_false(r->rss_hdr.opcode == CPL_TRACE_PKT)) 1982 m_adj(m, 2); 1983 1984 rx_offload(&adap->tdev, rspq, m); 1985 #endif 1986 } 1987 #ifdef notyet 1988 taskqueue_enqueue(adap->tq, &adap->timer_reclaim_task); 1989 #else 1990 __refill_fl(adap, &qs->fl[0]); 1991 __refill_fl(adap, &qs->fl[1]); 1992 #endif 1993 1994 } 1995 --budget_left; 1996 } 1997 t3_sge_lro_flush_all(adap, qs); 1998 deliver_partial_bundle(&adap->tdev, rspq); 1999 2000 if (sleeping) 2001 check_ring_db(adap, qs, sleeping); 2002 2003 smp_mb(); /* commit Tx queue processed updates */ 2004 if (__predict_false(qs->txq_stopped != 0)) 2005 restart_tx(qs); 2006 2007 budget -= budget_left; 2008 return (budget); 2009 } 2010 2011 /* 2012 * A helper function that processes responses and issues GTS. 2013 */ 2014 static __inline int 2015 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 2016 { 2017 int work; 2018 static int last_holdoff = 0; 2019 2020 work = process_responses(adap, rspq_to_qset(rq), -1); 2021 2022 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 2023 printf("next_holdoff=%d\n", rq->next_holdoff); 2024 last_holdoff = rq->next_holdoff; 2025 } 2026 2027 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 2028 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 2029 return work; 2030 } 2031 2032 2033 /* 2034 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 2035 * Handles data events from SGE response queues as well as error and other 2036 * async events as they all use the same interrupt pin. We use one SGE 2037 * response queue per port in this mode and protect all response queues with 2038 * queue 0's lock. 2039 */ 2040 void 2041 t3b_intr(void *data) 2042 { 2043 uint32_t map; 2044 adapter_t *adap = data; 2045 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2046 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2047 2048 2049 t3_write_reg(adap, A_PL_CLI, 0); 2050 map = t3_read_reg(adap, A_SG_DATA_INTR); 2051 2052 if (!map) 2053 return; 2054 2055 if (__predict_false(map & F_ERRINTR)) 2056 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2057 2058 mtx_lock(&q0->lock); 2059 2060 if (__predict_true(map & 1)) 2061 process_responses_gts(adap, q0); 2062 2063 if (map & 2) 2064 process_responses_gts(adap, q1); 2065 2066 mtx_unlock(&q0->lock); 2067 } 2068 2069 /* 2070 * The MSI interrupt handler. This needs to handle data events from SGE 2071 * response queues as well as error and other async events as they all use 2072 * the same MSI vector. We use one SGE response queue per port in this mode 2073 * and protect all response queues with queue 0's lock. 2074 */ 2075 void 2076 t3_intr_msi(void *data) 2077 { 2078 adapter_t *adap = data; 2079 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2080 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2081 int new_packets = 0; 2082 2083 mtx_lock(&q0->lock); 2084 if (process_responses_gts(adap, q0)) { 2085 new_packets = 1; 2086 } 2087 2088 if (adap->params.nports == 2 && 2089 process_responses_gts(adap, q1)) { 2090 new_packets = 1; 2091 } 2092 2093 2094 mtx_unlock(&q0->lock); 2095 if (new_packets == 0) 2096 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2097 } 2098 2099 void 2100 t3_intr_msix(void *data) 2101 { 2102 struct sge_qset *qs = data; 2103 adapter_t *adap = qs->port->adapter; 2104 struct sge_rspq *rspq = &qs->rspq; 2105 2106 mtx_lock(&rspq->lock); 2107 if (process_responses_gts(adap, rspq) == 0) { 2108 #ifdef notyet 2109 rspq->unhandled_irqs++; 2110 #endif 2111 } 2112 mtx_unlock(&rspq->lock); 2113 } 2114 2115 static int 2116 t3_lro_enable(SYSCTL_HANDLER_ARGS) 2117 { 2118 adapter_t *sc; 2119 int i, j, enabled, err, nqsets = 0; 2120 2121 sc = arg1; 2122 enabled = sc->sge.qs[0].lro.enabled; 2123 err = sysctl_handle_int(oidp, &enabled, arg2, req); 2124 2125 if (err != 0) { 2126 return (err); 2127 } 2128 if (enabled == sc->sge.qs[0].lro.enabled) 2129 return (0); 2130 2131 for (i = 0; i < sc->params.nports; i++) 2132 for (j = 0; j < sc->port[i].nqsets; j++) 2133 nqsets++; 2134 2135 for (i = 0; i < nqsets; i++) { 2136 sc->sge.qs[i].lro.enabled = enabled; 2137 } 2138 2139 return (0); 2140 } 2141 2142 static int 2143 t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS) 2144 { 2145 adapter_t *sc = arg1; 2146 struct qset_params *qsp = &sc->params.sge.qset[0]; 2147 int coalesce_nsecs; 2148 struct sge_qset *qs; 2149 int i, j, err, nqsets = 0; 2150 struct mtx *lock; 2151 2152 coalesce_nsecs = qsp->coalesce_nsecs; 2153 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req); 2154 2155 if (err != 0) { 2156 return (err); 2157 } 2158 if (coalesce_nsecs == qsp->coalesce_nsecs) 2159 return (0); 2160 2161 for (i = 0; i < sc->params.nports; i++) 2162 for (j = 0; j < sc->port[i].nqsets; j++) 2163 nqsets++; 2164 2165 coalesce_nsecs = max(100, coalesce_nsecs); 2166 2167 for (i = 0; i < nqsets; i++) { 2168 qs = &sc->sge.qs[i]; 2169 qsp = &sc->params.sge.qset[i]; 2170 qsp->coalesce_nsecs = coalesce_nsecs; 2171 2172 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 2173 &sc->sge.qs[0].rspq.lock; 2174 2175 mtx_lock(lock); 2176 t3_update_qset_coalesce(qs, qsp); 2177 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 2178 V_NEWTIMER(qs->rspq.holdoff_tmr)); 2179 mtx_unlock(lock); 2180 } 2181 2182 return (0); 2183 } 2184 2185 2186 void 2187 t3_add_sysctls(adapter_t *sc) 2188 { 2189 struct sysctl_ctx_list *ctx; 2190 struct sysctl_oid_list *children; 2191 2192 ctx = device_get_sysctl_ctx(sc->dev); 2193 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 2194 2195 /* random information */ 2196 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 2197 "firmware_version", 2198 CTLFLAG_RD, &sc->fw_version, 2199 0, "firmware version"); 2200 2201 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2202 "enable_lro", 2203 CTLTYPE_INT|CTLFLAG_RW, sc, 2204 0, t3_lro_enable, 2205 "I", "enable large receive offload"); 2206 2207 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2208 "intr_coal", 2209 CTLTYPE_INT|CTLFLAG_RW, sc, 2210 0, t3_set_coalesce_nsecs, 2211 "I", "interrupt coalescing timer (ns)"); 2212 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2213 "enable_debug", 2214 CTLFLAG_RW, &cxgb_debug, 2215 0, "enable verbose debugging output"); 2216 2217 } 2218 2219 /** 2220 * t3_get_desc - dump an SGE descriptor for debugging purposes 2221 * @qs: the queue set 2222 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 2223 * @idx: the descriptor index in the queue 2224 * @data: where to dump the descriptor contents 2225 * 2226 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 2227 * size of the descriptor. 2228 */ 2229 int 2230 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 2231 unsigned char *data) 2232 { 2233 if (qnum >= 6) 2234 return (EINVAL); 2235 2236 if (qnum < 3) { 2237 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 2238 return -EINVAL; 2239 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 2240 return sizeof(struct tx_desc); 2241 } 2242 2243 if (qnum == 3) { 2244 if (!qs->rspq.desc || idx >= qs->rspq.size) 2245 return (EINVAL); 2246 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 2247 return sizeof(struct rsp_desc); 2248 } 2249 2250 qnum -= 4; 2251 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 2252 return (EINVAL); 2253 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 2254 return sizeof(struct rx_desc); 2255 } 2256