1 /************************************************************************** 2 3 Copyright (c) 2007, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Chelsio Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ***************************************************************************/ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/module.h> 41 #include <sys/bus.h> 42 #include <sys/conf.h> 43 #include <machine/bus.h> 44 #include <machine/resource.h> 45 #include <sys/bus_dma.h> 46 #include <sys/rman.h> 47 #include <sys/queue.h> 48 #include <sys/sysctl.h> 49 #include <sys/taskqueue.h> 50 51 52 #include <sys/proc.h> 53 #include <sys/sched.h> 54 #include <sys/smp.h> 55 #include <sys/systm.h> 56 57 #include <netinet/in_systm.h> 58 #include <netinet/in.h> 59 #include <netinet/ip.h> 60 #include <netinet/tcp.h> 61 62 #include <dev/pci/pcireg.h> 63 #include <dev/pci/pcivar.h> 64 #include <dev/cxgb/common/cxgb_common.h> 65 #include <dev/cxgb/common/cxgb_regs.h> 66 #include <dev/cxgb/common/cxgb_sge_defs.h> 67 #include <dev/cxgb/common/cxgb_t3_cpl.h> 68 #include <dev/cxgb/common/cxgb_firmware_exports.h> 69 70 #define USE_GTS 0 71 72 #define SGE_RX_SM_BUF_SIZE 1536 73 #define SGE_RX_DROP_THRES 16 74 75 /* 76 * Period of the Tx buffer reclaim timer. This timer does not need to run 77 * frequently as Tx buffers are usually reclaimed by new Tx packets. 78 */ 79 #define TX_RECLAIM_PERIOD (hz >> 2) 80 81 /* 82 * work request size in bytes 83 */ 84 #define WR_LEN (WR_FLITS * 8) 85 86 /* 87 * Values for sge_txq.flags 88 */ 89 enum { 90 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 91 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 92 }; 93 94 struct tx_desc { 95 uint64_t flit[TX_DESC_FLITS]; 96 } __packed; 97 98 struct rx_desc { 99 uint32_t addr_lo; 100 uint32_t len_gen; 101 uint32_t gen2; 102 uint32_t addr_hi; 103 } __packed;; 104 105 struct rsp_desc { /* response queue descriptor */ 106 struct rss_header rss_hdr; 107 uint32_t flags; 108 uint32_t len_cq; 109 uint8_t imm_data[47]; 110 uint8_t intr_gen; 111 } __packed; 112 113 #define RX_SW_DESC_MAP_CREATED (1 << 0) 114 #define RX_SW_DESC_INUSE (1 << 3) 115 #define TX_SW_DESC_MAPPED (1 << 4) 116 117 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 118 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 119 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 120 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 121 122 struct tx_sw_desc { /* SW state per Tx descriptor */ 123 struct mbuf *m; 124 bus_dmamap_t map; 125 int flags; 126 }; 127 128 struct rx_sw_desc { /* SW state per Rx descriptor */ 129 void *cl; 130 bus_dmamap_t map; 131 int flags; 132 }; 133 134 struct txq_state { 135 unsigned int compl; 136 unsigned int gen; 137 unsigned int pidx; 138 }; 139 140 struct refill_fl_cb_arg { 141 int error; 142 bus_dma_segment_t seg; 143 int nseg; 144 }; 145 146 /* 147 * Maps a number of flits to the number of Tx descriptors that can hold them. 148 * The formula is 149 * 150 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 151 * 152 * HW allows up to 4 descriptors to be combined into a WR. 153 */ 154 static uint8_t flit_desc_map[] = { 155 0, 156 #if SGE_NUM_GENBITS == 1 157 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 158 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 159 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 160 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 161 #elif SGE_NUM_GENBITS == 2 162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 163 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 164 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 165 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 166 #else 167 # error "SGE_NUM_GENBITS must be 1 or 2" 168 #endif 169 }; 170 171 172 static int lro_default = 0; 173 int cxgb_debug = 0; 174 175 static void t3_free_qset(adapter_t *sc, struct sge_qset *q); 176 static void sge_timer_cb(void *arg); 177 static void sge_timer_reclaim(void *arg, int ncount); 178 static int free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec); 179 180 /** 181 * reclaim_completed_tx - reclaims completed Tx descriptors 182 * @adapter: the adapter 183 * @q: the Tx queue to reclaim completed descriptors from 184 * 185 * Reclaims Tx descriptors that the SGE has indicated it has processed, 186 * and frees the associated buffers if possible. Called with the Tx 187 * queue's lock held. 188 */ 189 static __inline int 190 reclaim_completed_tx(adapter_t *adapter, struct sge_txq *q, int nbufs, struct mbuf **mvec) 191 { 192 int reclaimed, reclaim = desc_reclaimable(q); 193 int n = 0; 194 195 mtx_assert(&q->lock, MA_OWNED); 196 197 if (reclaim > 0) { 198 n = free_tx_desc(adapter, q, min(reclaim, nbufs), mvec); 199 reclaimed = min(reclaim, nbufs); 200 q->cleaned += reclaimed; 201 q->in_use -= reclaimed; 202 } 203 204 return (n); 205 } 206 207 /** 208 * t3_sge_init - initialize SGE 209 * @adap: the adapter 210 * @p: the SGE parameters 211 * 212 * Performs SGE initialization needed every time after a chip reset. 213 * We do not initialize any of the queue sets here, instead the driver 214 * top-level must request those individually. We also do not enable DMA 215 * here, that should be done after the queues have been set up. 216 */ 217 void 218 t3_sge_init(adapter_t *adap, struct sge_params *p) 219 { 220 u_int ctrl, ups; 221 222 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 223 224 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 225 F_CQCRDTCTRL | 226 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 227 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 228 #if SGE_NUM_GENBITS == 1 229 ctrl |= F_EGRGENCTRL; 230 #endif 231 if (adap->params.rev > 0) { 232 if (!(adap->flags & (USING_MSIX | USING_MSI))) 233 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 234 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL; 235 } 236 t3_write_reg(adap, A_SG_CONTROL, ctrl); 237 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 238 V_LORCQDRBTHRSH(512)); 239 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 240 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 241 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 242 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000); 243 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 244 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 245 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 246 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 247 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 248 } 249 250 251 /** 252 * sgl_len - calculates the size of an SGL of the given capacity 253 * @n: the number of SGL entries 254 * 255 * Calculates the number of flits needed for a scatter/gather list that 256 * can hold the given number of entries. 257 */ 258 static __inline unsigned int 259 sgl_len(unsigned int n) 260 { 261 return ((3 * n) / 2 + (n & 1)); 262 } 263 264 /** 265 * get_imm_packet - return the next ingress packet buffer from a response 266 * @resp: the response descriptor containing the packet data 267 * 268 * Return a packet containing the immediate data of the given response. 269 */ 270 static __inline int 271 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct t3_mbuf_hdr *mh) 272 { 273 struct mbuf *m; 274 int len; 275 uint32_t flags = ntohl(resp->flags); 276 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 277 278 /* 279 * would be a firmware bug 280 */ 281 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) 282 return (0); 283 284 m = m_gethdr(M_NOWAIT, MT_DATA); 285 len = G_RSPD_LEN(ntohl(resp->len_cq)); 286 287 if (m) { 288 MH_ALIGN(m, IMMED_PKT_SIZE); 289 memcpy(m->m_data, resp->imm_data, IMMED_PKT_SIZE); 290 m->m_len = len; 291 292 switch (sopeop) { 293 case RSPQ_SOP_EOP: 294 mh->mh_head = mh->mh_tail = m; 295 m->m_pkthdr.len = len; 296 m->m_flags |= M_PKTHDR; 297 break; 298 case RSPQ_EOP: 299 m->m_flags &= ~M_PKTHDR; 300 mh->mh_head->m_pkthdr.len += len; 301 mh->mh_tail->m_next = m; 302 mh->mh_tail = m; 303 break; 304 } 305 } 306 return (m != NULL); 307 } 308 309 310 static __inline u_int 311 flits_to_desc(u_int n) 312 { 313 return (flit_desc_map[n]); 314 } 315 316 void 317 t3_sge_err_intr_handler(adapter_t *adapter) 318 { 319 unsigned int v, status; 320 321 322 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 323 324 if (status & F_RSPQCREDITOVERFOW) 325 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 326 327 if (status & F_RSPQDISABLED) { 328 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 329 330 CH_ALERT(adapter, 331 "packet delivered to disabled response queue (0x%x)\n", 332 (v >> S_RSPQ0DISABLED) & 0xff); 333 } 334 335 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 336 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED)) 337 t3_fatal_err(adapter); 338 } 339 340 void 341 t3_sge_prep(adapter_t *adap, struct sge_params *p) 342 { 343 int i; 344 345 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 346 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data); 347 348 for (i = 0; i < SGE_QSETS; ++i) { 349 struct qset_params *q = p->qset + i; 350 351 q->polling = adap->params.rev > 0; 352 353 if (adap->flags & USING_MSIX) 354 q->coalesce_nsecs = 6000; 355 else 356 q->coalesce_nsecs = 3500; 357 358 q->rspq_size = RSPQ_Q_SIZE; 359 q->fl_size = FL_Q_SIZE; 360 q->jumbo_size = JUMBO_Q_SIZE; 361 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 362 q->txq_size[TXQ_OFLD] = 1024; 363 q->txq_size[TXQ_CTRL] = 256; 364 q->cong_thres = 0; 365 } 366 } 367 368 int 369 t3_sge_alloc(adapter_t *sc) 370 { 371 372 /* The parent tag. */ 373 if (bus_dma_tag_create( NULL, /* parent */ 374 1, 0, /* algnmnt, boundary */ 375 BUS_SPACE_MAXADDR, /* lowaddr */ 376 BUS_SPACE_MAXADDR, /* highaddr */ 377 NULL, NULL, /* filter, filterarg */ 378 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 379 BUS_SPACE_UNRESTRICTED, /* nsegments */ 380 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 381 0, /* flags */ 382 NULL, NULL, /* lock, lockarg */ 383 &sc->parent_dmat)) { 384 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 385 return (ENOMEM); 386 } 387 388 /* 389 * DMA tag for normal sized RX frames 390 */ 391 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 392 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 393 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 394 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 395 return (ENOMEM); 396 } 397 398 /* 399 * DMA tag for jumbo sized RX frames. 400 */ 401 if (bus_dma_tag_create(sc->parent_dmat, MJUMPAGESIZE, 0, BUS_SPACE_MAXADDR, 402 BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE, 403 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 404 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 405 return (ENOMEM); 406 } 407 408 /* 409 * DMA tag for TX frames. 410 */ 411 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 412 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 413 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 414 NULL, NULL, &sc->tx_dmat)) { 415 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 416 return (ENOMEM); 417 } 418 419 return (0); 420 } 421 422 int 423 t3_sge_free(struct adapter * sc) 424 { 425 426 if (sc->tx_dmat != NULL) 427 bus_dma_tag_destroy(sc->tx_dmat); 428 429 if (sc->rx_jumbo_dmat != NULL) 430 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 431 432 if (sc->rx_dmat != NULL) 433 bus_dma_tag_destroy(sc->rx_dmat); 434 435 if (sc->parent_dmat != NULL) 436 bus_dma_tag_destroy(sc->parent_dmat); 437 438 return (0); 439 } 440 441 void 442 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 443 { 444 445 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U); 446 qs->rspq.polling = 0 /* p->polling */; 447 } 448 449 static void 450 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 451 { 452 struct refill_fl_cb_arg *cb_arg = arg; 453 454 cb_arg->error = error; 455 cb_arg->seg = segs[0]; 456 cb_arg->nseg = nseg; 457 458 } 459 460 /** 461 * refill_fl - refill an SGE free-buffer list 462 * @sc: the controller softc 463 * @q: the free-list to refill 464 * @n: the number of new buffers to allocate 465 * 466 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 467 * The caller must assure that @n does not exceed the queue's capacity. 468 */ 469 static void 470 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 471 { 472 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 473 struct rx_desc *d = &q->desc[q->pidx]; 474 struct refill_fl_cb_arg cb_arg; 475 void *cl; 476 int err; 477 478 cb_arg.error = 0; 479 while (n--) { 480 /* 481 * We only allocate a cluster, mbuf allocation happens after rx 482 */ 483 if ((cl = m_cljget(NULL, M_DONTWAIT, q->buf_size)) == NULL) { 484 log(LOG_WARNING, "Failed to allocate cluster\n"); 485 goto done; 486 } 487 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 488 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 489 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 490 /* 491 * XXX free cluster 492 */ 493 goto done; 494 } 495 sd->flags |= RX_SW_DESC_MAP_CREATED; 496 } 497 err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size, 498 refill_fl_cb, &cb_arg, 0); 499 500 if (err != 0 || cb_arg.error) { 501 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error); 502 /* 503 * XXX free cluster 504 */ 505 return; 506 } 507 508 sd->flags |= RX_SW_DESC_INUSE; 509 sd->cl = cl; 510 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 511 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 512 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 513 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 514 515 d++; 516 sd++; 517 518 if (++q->pidx == q->size) { 519 q->pidx = 0; 520 q->gen ^= 1; 521 sd = q->sdesc; 522 d = q->desc; 523 } 524 q->credits++; 525 } 526 527 done: 528 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 529 } 530 531 532 /** 533 * free_rx_bufs - free the Rx buffers on an SGE free list 534 * @sc: the controle softc 535 * @q: the SGE free list to clean up 536 * 537 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 538 * this queue should be stopped before calling this function. 539 */ 540 static void 541 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 542 { 543 u_int cidx = q->cidx; 544 545 while (q->credits--) { 546 struct rx_sw_desc *d = &q->sdesc[cidx]; 547 548 if (d->flags & RX_SW_DESC_INUSE) { 549 bus_dmamap_unload(q->entry_tag, d->map); 550 bus_dmamap_destroy(q->entry_tag, d->map); 551 uma_zfree(q->zone, d->cl); 552 } 553 d->cl = NULL; 554 if (++cidx == q->size) 555 cidx = 0; 556 } 557 } 558 559 static __inline void 560 __refill_fl(adapter_t *adap, struct sge_fl *fl) 561 { 562 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 563 } 564 565 static void 566 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 567 { 568 uint32_t *addr; 569 570 addr = arg; 571 *addr = segs[0].ds_addr; 572 } 573 574 static int 575 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 576 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 577 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 578 { 579 size_t len = nelem * elem_size; 580 void *s = NULL; 581 void *p = NULL; 582 int err; 583 584 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 585 BUS_SPACE_MAXADDR_32BIT, 586 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 587 len, 0, NULL, NULL, tag)) != 0) { 588 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 589 return (ENOMEM); 590 } 591 592 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 593 map)) != 0) { 594 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 595 return (ENOMEM); 596 } 597 598 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 599 bzero(p, len); 600 *(void **)desc = p; 601 602 if (sw_size) { 603 len = nelem * sw_size; 604 s = malloc(len, M_DEVBUF, M_WAITOK); 605 bzero(s, len); 606 *(void **)sdesc = s; 607 } 608 if (parent_entry_tag == NULL) 609 return (0); 610 611 if ((err = bus_dma_tag_create(parent_entry_tag, PAGE_SIZE, 0, 612 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 613 NULL, NULL, PAGE_SIZE, 1, 614 PAGE_SIZE, BUS_DMA_ALLOCNOW, 615 NULL, NULL, entry_tag)) != 0) { 616 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 617 return (ENOMEM); 618 } 619 return (0); 620 } 621 622 static void 623 sge_slow_intr_handler(void *arg, int ncount) 624 { 625 adapter_t *sc = arg; 626 627 t3_slow_intr_handler(sc); 628 } 629 630 static void 631 sge_timer_cb(void *arg) 632 { 633 adapter_t *sc = arg; 634 struct sge_qset *qs; 635 struct sge_txq *txq; 636 int i, j; 637 int reclaim_eth, reclaim_ofl, refill_rx; 638 639 for (i = 0; i < sc->params.nports; i++) 640 for (j = 0; j < sc->port[i].nqsets; j++) { 641 qs = &sc->sge.qs[i + j]; 642 txq = &qs->txq[0]; 643 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned; 644 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 645 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 646 (qs->fl[1].credits < qs->fl[1].size)); 647 if (reclaim_eth || reclaim_ofl || refill_rx) { 648 taskqueue_enqueue(sc->tq, &sc->timer_reclaim_task); 649 goto done; 650 } 651 } 652 done: 653 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 654 } 655 656 /* 657 * This is meant to be a catch-all function to keep sge state private 658 * to sge.c 659 * 660 */ 661 int 662 t3_sge_init_sw(adapter_t *sc) 663 { 664 665 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 666 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 667 TASK_INIT(&sc->timer_reclaim_task, 0, sge_timer_reclaim, sc); 668 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 669 return (0); 670 } 671 672 void 673 t3_sge_deinit_sw(adapter_t *sc) 674 { 675 callout_drain(&sc->sge_timer_ch); 676 if (sc->tq) { 677 taskqueue_drain(sc->tq, &sc->timer_reclaim_task); 678 taskqueue_drain(sc->tq, &sc->slow_intr_task); 679 } 680 } 681 682 /** 683 * refill_rspq - replenish an SGE response queue 684 * @adapter: the adapter 685 * @q: the response queue to replenish 686 * @credits: how many new responses to make available 687 * 688 * Replenishes a response queue by making the supplied number of responses 689 * available to HW. 690 */ 691 static __inline void 692 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 693 { 694 695 /* mbufs are allocated on demand when a rspq entry is processed. */ 696 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 697 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 698 } 699 700 701 static void 702 sge_timer_reclaim(void *arg, int ncount) 703 { 704 adapter_t *sc = arg; 705 int i, nqsets = 0; 706 struct sge_qset *qs; 707 struct sge_txq *txq; 708 struct mtx *lock; 709 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 710 int n, reclaimable; 711 /* 712 * XXX assuming these quantities are allowed to change during operation 713 */ 714 for (i = 0; i < sc->params.nports; i++) 715 nqsets += sc->port[i].nqsets; 716 717 for (i = 0; i < nqsets; i++) { 718 qs = &sc->sge.qs[i]; 719 txq = &qs->txq[TXQ_ETH]; 720 reclaimable = desc_reclaimable(txq); 721 if (reclaimable > 0) { 722 mtx_lock(&txq->lock); 723 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); 724 mtx_unlock(&txq->lock); 725 726 for (i = 0; i < n; i++) { 727 m_freem(m_vec[i]); 728 } 729 } 730 731 txq = &qs->txq[TXQ_OFLD]; 732 reclaimable = desc_reclaimable(txq); 733 if (reclaimable > 0) { 734 mtx_lock(&txq->lock); 735 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); 736 mtx_unlock(&txq->lock); 737 738 for (i = 0; i < n; i++) { 739 m_freem(m_vec[i]); 740 } 741 } 742 743 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 744 &sc->sge.qs[0].rspq.lock; 745 746 if (mtx_trylock(lock)) { 747 /* XXX currently assume that we are *NOT* polling */ 748 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 749 750 if (qs->fl[0].credits < qs->fl[0].size - 16) 751 __refill_fl(sc, &qs->fl[0]); 752 if (qs->fl[1].credits < qs->fl[1].size - 16) 753 __refill_fl(sc, &qs->fl[1]); 754 755 if (status & (1 << qs->rspq.cntxt_id)) { 756 if (qs->rspq.credits) { 757 refill_rspq(sc, &qs->rspq, 1); 758 qs->rspq.credits--; 759 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 760 1 << qs->rspq.cntxt_id); 761 } 762 } 763 mtx_unlock(lock); 764 } 765 } 766 } 767 768 /** 769 * init_qset_cntxt - initialize an SGE queue set context info 770 * @qs: the queue set 771 * @id: the queue set id 772 * 773 * Initializes the TIDs and context ids for the queues of a queue set. 774 */ 775 static void 776 init_qset_cntxt(struct sge_qset *qs, u_int id) 777 { 778 779 qs->rspq.cntxt_id = id; 780 qs->fl[0].cntxt_id = 2 * id; 781 qs->fl[1].cntxt_id = 2 * id + 1; 782 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 783 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 784 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 785 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 786 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 787 } 788 789 790 static void 791 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 792 { 793 txq->in_use += ndesc; 794 /* 795 * XXX we don't handle stopping of queue 796 * presumably start handles this when we bump against the end 797 */ 798 txqs->gen = txq->gen; 799 txq->unacked += ndesc; 800 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3); 801 txq->unacked &= 7; 802 txqs->pidx = txq->pidx; 803 txq->pidx += ndesc; 804 805 if (txq->pidx >= txq->size) { 806 txq->pidx -= txq->size; 807 txq->gen ^= 1; 808 } 809 810 } 811 812 /** 813 * calc_tx_descs - calculate the number of Tx descriptors for a packet 814 * @m: the packet mbufs 815 * @nsegs: the number of segments 816 * 817 * Returns the number of Tx descriptors needed for the given Ethernet 818 * packet. Ethernet packets require addition of WR and CPL headers. 819 */ 820 static __inline unsigned int 821 calc_tx_descs(const struct mbuf *m, int nsegs) 822 { 823 unsigned int flits; 824 825 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) 826 return 1; 827 828 flits = sgl_len(nsegs) + 2; 829 #ifdef TSO_SUPPORTED 830 if (m->m_pkthdr.tso_segsz) 831 flits++; 832 #endif 833 return flits_to_desc(flits); 834 } 835 836 static __inline unsigned int 837 busdma_map_mbufs(struct mbuf **m, adapter_t *sc, struct tx_sw_desc *stx, 838 bus_dma_segment_t *segs, int *nsegs) 839 { 840 struct mbuf *m0, *mtmp; 841 int err, pktlen; 842 843 m0 = *m; 844 pktlen = m0->m_pkthdr.len; 845 err = bus_dmamap_load_mbuf_sg(sc->tx_dmat, stx->map, m0, segs, nsegs, 0); 846 if (err) { 847 int n = 0; 848 mtmp = m0; 849 while(mtmp) { 850 n++; 851 mtmp = mtmp->m_next; 852 } 853 #ifdef DEBUG 854 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n", 855 err, m0->m_pkthdr.len, n); 856 #endif 857 } 858 859 if (err == EFBIG) { 860 /* Too many segments, try to defrag */ 861 m0 = m_defrag(m0, M_NOWAIT); 862 if (m0 == NULL) { 863 m_freem(*m); 864 *m = NULL; 865 return (ENOBUFS); 866 } 867 *m = m0; 868 err = bus_dmamap_load_mbuf_sg(sc->tx_dmat, stx->map, m0, segs, nsegs, 0); 869 } 870 871 if (err == ENOMEM) { 872 return (err); 873 } 874 875 if (err) { 876 if (cxgb_debug) 877 printf("map failure err=%d pktlen=%d\n", err, pktlen); 878 m_freem(m0); 879 *m = NULL; 880 return (err); 881 } 882 883 bus_dmamap_sync(sc->tx_dmat, stx->map, BUS_DMASYNC_PREWRITE); 884 stx->flags |= TX_SW_DESC_MAPPED; 885 886 return (0); 887 } 888 889 /** 890 * make_sgl - populate a scatter/gather list for a packet 891 * @sgp: the SGL to populate 892 * @segs: the packet dma segments 893 * @nsegs: the number of segments 894 * 895 * Generates a scatter/gather list for the buffers that make up a packet 896 * and returns the SGL size in 8-byte words. The caller must size the SGL 897 * appropriately. 898 */ 899 static __inline void 900 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 901 { 902 int i, idx; 903 904 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) { 905 if (i && idx == 0) 906 ++sgp; 907 908 sgp->len[idx] = htobe32(segs[i].ds_len); 909 sgp->addr[idx] = htobe64(segs[i].ds_addr); 910 } 911 912 if (idx) 913 sgp->len[idx] = 0; 914 } 915 916 /** 917 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 918 * @adap: the adapter 919 * @q: the Tx queue 920 * 921 * Ring the doorbel if a Tx queue is asleep. There is a natural race, 922 * where the HW is going to sleep just after we checked, however, 923 * then the interrupt handler will detect the outstanding TX packet 924 * and ring the doorbell for us. 925 * 926 * When GTS is disabled we unconditionally ring the doorbell. 927 */ 928 static __inline void 929 check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 930 { 931 #if USE_GTS 932 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 933 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 934 set_bit(TXQ_LAST_PKT_DB, &q->flags); 935 #ifdef T3_TRACE 936 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 937 q->cntxt_id); 938 #endif 939 t3_write_reg(adap, A_SG_KDOORBELL, 940 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 941 } 942 #else 943 wmb(); /* write descriptors before telling HW */ 944 t3_write_reg(adap, A_SG_KDOORBELL, 945 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 946 #endif 947 } 948 949 static __inline void 950 wr_gen2(struct tx_desc *d, unsigned int gen) 951 { 952 #if SGE_NUM_GENBITS == 2 953 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 954 #endif 955 } 956 957 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 958 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 959 960 int 961 t3_encap(struct port_info *p, struct mbuf **m) 962 { 963 adapter_t *sc; 964 struct mbuf *m0; 965 struct sge_qset *qs; 966 struct sge_txq *txq; 967 struct tx_sw_desc *stx; 968 struct txq_state txqs; 969 unsigned int nsegs, ndesc, flits, cntrl, mlen, tso_info; 970 int err; 971 972 struct work_request_hdr *wrp; 973 struct tx_sw_desc *txsd; 974 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 975 bus_dma_segment_t segs[TX_MAX_SEGS]; 976 uint32_t wr_hi, wr_lo, sgl_flits; 977 978 struct tx_desc *txd; 979 struct cpl_tx_pkt *cpl; 980 981 DPRINTF("t3_encap "); 982 m0 = *m; 983 sc = p->adapter; 984 qs = &sc->sge.qs[p->first_qset]; 985 txq = &qs->txq[TXQ_ETH]; 986 stx = &txq->sdesc[txq->pidx]; 987 txd = &txq->desc[txq->pidx]; 988 cpl = (struct cpl_tx_pkt *)txd; 989 mlen = m0->m_pkthdr.len; 990 cpl->len = htonl(mlen | 0x80000000); 991 992 DPRINTF("mlen=%d\n", mlen); 993 /* 994 * XXX handle checksum, TSO, and VLAN here 995 * 996 */ 997 cntrl = V_TXPKT_INTF(p->port); 998 999 /* 1000 * XXX need to add VLAN support for 6.x 1001 */ 1002 #ifdef VLAN_SUPPORTED 1003 if (m0->m_flags & M_VLANTAG) 1004 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 1005 1006 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1007 #else 1008 tso_info = 0; 1009 #endif 1010 if (tso_info) { 1011 int eth_type; 1012 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl; 1013 struct ip *ip; 1014 struct tcphdr *tcp; 1015 uint8_t *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */ 1016 1017 txd->flit[2] = 0; 1018 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1019 hdr->cntrl = htonl(cntrl); 1020 1021 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1022 pkthdr = &tmp[0]; 1023 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr); 1024 } else { 1025 pkthdr = m0->m_data; 1026 } 1027 1028 if (__predict_false(m0->m_flags & M_VLANTAG)) { 1029 eth_type = CPL_ETH_II_VLAN; 1030 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1031 ETHER_VLAN_ENCAP_LEN); 1032 } else { 1033 eth_type = CPL_ETH_II; 1034 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1035 } 1036 tcp = (struct tcphdr *)((uint8_t *)ip + 1037 sizeof(*ip)); 1038 1039 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1040 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1041 V_LSO_TCPHDR_WORDS(tcp->th_off); 1042 hdr->lso_info = htonl(tso_info); 1043 1044 flits = 3; 1045 } else { 1046 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1047 cpl->cntrl = htonl(cntrl); 1048 1049 if (mlen <= WR_LEN - sizeof(*cpl)) { 1050 txq_prod(txq, 1, &txqs); 1051 txq->sdesc[txqs.pidx].m = m0; 1052 1053 if (m0->m_len == m0->m_pkthdr.len) 1054 memcpy(&txd->flit[2], m0->m_data, mlen); 1055 else 1056 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1057 1058 flits = (mlen + 7) / 8 + 2; 1059 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1060 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1061 F_WR_SOP | F_WR_EOP | txqs.compl); 1062 wmb(); 1063 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | 1064 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1065 1066 wr_gen2(txd, txqs.gen); 1067 check_ring_tx_db(sc, txq); 1068 return (0); 1069 } 1070 flits = 2; 1071 } 1072 1073 wrp = (struct work_request_hdr *)txd; 1074 1075 if ((err = busdma_map_mbufs(m, sc, stx, segs, &nsegs)) != 0) { 1076 return (err); 1077 } 1078 m0 = *m; 1079 ndesc = calc_tx_descs(m0, nsegs); 1080 1081 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : &sgl[0]; 1082 make_sgl(sgp, segs, nsegs); 1083 1084 sgl_flits = sgl_len(nsegs); 1085 1086 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); 1087 txq_prod(txq, ndesc, &txqs); 1088 txsd = &txq->sdesc[txqs.pidx]; 1089 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1090 wr_lo = htonl(V_WR_TID(txq->token)); 1091 txsd->m = m0; 1092 1093 if (__predict_true(ndesc == 1)) { 1094 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1095 V_WR_SGLSFLT(flits)) | wr_hi; 1096 wmb(); 1097 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | 1098 V_WR_GEN(txqs.gen)) | wr_lo; 1099 /* XXX gen? */ 1100 wr_gen2(txd, txqs.gen); 1101 } else { 1102 unsigned int ogen = txqs.gen; 1103 const uint64_t *fp = (const uint64_t *)sgl; 1104 struct work_request_hdr *wp = wrp; 1105 1106 /* XXX - CHECK ME */ 1107 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1108 V_WR_SGLSFLT(flits)) | wr_hi; 1109 1110 while (sgl_flits) { 1111 unsigned int avail = WR_FLITS - flits; 1112 1113 if (avail > sgl_flits) 1114 avail = sgl_flits; 1115 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1116 sgl_flits -= avail; 1117 ndesc--; 1118 if (!sgl_flits) 1119 break; 1120 1121 fp += avail; 1122 txd++; 1123 txsd++; 1124 if (++txqs.pidx == txq->size) { 1125 txqs.pidx = 0; 1126 txqs.gen ^= 1; 1127 txd = txq->desc; 1128 txsd = txq->sdesc; 1129 } 1130 1131 /* 1132 * when the head of the mbuf chain 1133 * is freed all clusters will be freed 1134 * with it 1135 */ 1136 txsd->m = NULL; 1137 wrp = (struct work_request_hdr *)txd; 1138 wrp->wr_hi = htonl(V_WR_DATATYPE(1) | 1139 V_WR_SGLSFLT(1)) | wr_hi; 1140 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, 1141 sgl_flits + 1)) | 1142 V_WR_GEN(txqs.gen)) | wr_lo; 1143 wr_gen2(txd, txqs.gen); 1144 flits = 1; 1145 } 1146 #ifdef WHY 1147 skb->priority = pidx; 1148 #endif 1149 wrp->wr_hi |= htonl(F_WR_EOP); 1150 wmb(); 1151 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1152 wr_gen2((struct tx_desc *)wp, ogen); 1153 } 1154 check_ring_tx_db(p->adapter, txq); 1155 1156 return (0); 1157 } 1158 1159 1160 /** 1161 * write_imm - write a packet into a Tx descriptor as immediate data 1162 * @d: the Tx descriptor to write 1163 * @m: the packet 1164 * @len: the length of packet data to write as immediate data 1165 * @gen: the generation bit value to write 1166 * 1167 * Writes a packet as immediate data into a Tx descriptor. The packet 1168 * contains a work request at its beginning. We must write the packet 1169 * carefully so the SGE doesn't read accidentally before it's written in 1170 * its entirety. 1171 */ 1172 static __inline void write_imm(struct tx_desc *d, struct mbuf *m, 1173 unsigned int len, unsigned int gen) 1174 { 1175 struct work_request_hdr *from = (struct work_request_hdr *)m->m_data; 1176 struct work_request_hdr *to = (struct work_request_hdr *)d; 1177 1178 memcpy(&to[1], &from[1], len - sizeof(*from)); 1179 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | 1180 V_WR_BCNTLFLT(len & 7)); 1181 wmb(); 1182 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | 1183 V_WR_LEN((len + 7) / 8)); 1184 wr_gen2(d, gen); 1185 m_freem(m); 1186 } 1187 1188 /** 1189 * check_desc_avail - check descriptor availability on a send queue 1190 * @adap: the adapter 1191 * @q: the TX queue 1192 * @m: the packet needing the descriptors 1193 * @ndesc: the number of Tx descriptors needed 1194 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1195 * 1196 * Checks if the requested number of Tx descriptors is available on an 1197 * SGE send queue. If the queue is already suspended or not enough 1198 * descriptors are available the packet is queued for later transmission. 1199 * Must be called with the Tx queue locked. 1200 * 1201 * Returns 0 if enough descriptors are available, 1 if there aren't 1202 * enough descriptors and the packet has been queued, and 2 if the caller 1203 * needs to retry because there weren't enough descriptors at the 1204 * beginning of the call but some freed up in the mean time. 1205 */ 1206 static __inline int 1207 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1208 struct mbuf *m, unsigned int ndesc, 1209 unsigned int qid) 1210 { 1211 /* 1212 * XXX We currently only use this for checking the control queue 1213 * the control queue is only used for binding qsets which happens 1214 * at init time so we are guaranteed enough descriptors 1215 */ 1216 #if 0 1217 if (__predict_false(!skb_queue_empty(&q->sendq))) { 1218 addq_exit: __skb_queue_tail(&q->sendq, skb); 1219 return 1; 1220 } 1221 if (__predict_false(q->size - q->in_use < ndesc)) { 1222 1223 struct sge_qset *qs = txq_to_qset(q, qid); 1224 1225 set_bit(qid, &qs->txq_stopped); 1226 smp_mb__after_clear_bit(); 1227 1228 if (should_restart_tx(q) && 1229 test_and_clear_bit(qid, &qs->txq_stopped)) 1230 return 2; 1231 1232 q->stops++; 1233 goto addq_exit; 1234 } 1235 #endif 1236 return 0; 1237 } 1238 1239 1240 /** 1241 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1242 * @q: the SGE control Tx queue 1243 * 1244 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1245 * that send only immediate data (presently just the control queues) and 1246 * thus do not have any sk_buffs to release. 1247 */ 1248 static __inline void 1249 reclaim_completed_tx_imm(struct sge_txq *q) 1250 { 1251 unsigned int reclaim = q->processed - q->cleaned; 1252 1253 mtx_assert(&q->lock, MA_OWNED); 1254 1255 q->in_use -= reclaim; 1256 q->cleaned += reclaim; 1257 } 1258 1259 static __inline int 1260 immediate(const struct mbuf *m) 1261 { 1262 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1263 } 1264 1265 /** 1266 * ctrl_xmit - send a packet through an SGE control Tx queue 1267 * @adap: the adapter 1268 * @q: the control queue 1269 * @m: the packet 1270 * 1271 * Send a packet through an SGE control Tx queue. Packets sent through 1272 * a control queue must fit entirely as immediate data in a single Tx 1273 * descriptor and have no page fragments. 1274 */ 1275 static int 1276 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1277 { 1278 int ret; 1279 struct work_request_hdr *wrp = (struct work_request_hdr *)m->m_data; 1280 1281 if (__predict_false(!immediate(m))) { 1282 m_freem(m); 1283 return 0; 1284 } 1285 1286 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); 1287 wrp->wr_lo = htonl(V_WR_TID(q->token)); 1288 1289 mtx_lock(&q->lock); 1290 again: reclaim_completed_tx_imm(q); 1291 1292 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1293 if (__predict_false(ret)) { 1294 if (ret == 1) { 1295 mtx_unlock(&q->lock); 1296 return (-1); 1297 } 1298 goto again; 1299 } 1300 1301 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1302 1303 q->in_use++; 1304 if (++q->pidx >= q->size) { 1305 q->pidx = 0; 1306 q->gen ^= 1; 1307 } 1308 mtx_unlock(&q->lock); 1309 wmb(); 1310 t3_write_reg(adap, A_SG_KDOORBELL, 1311 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1312 return (0); 1313 } 1314 1315 #ifdef RESTART_CTRLQ 1316 /** 1317 * restart_ctrlq - restart a suspended control queue 1318 * @qs: the queue set cotaining the control queue 1319 * 1320 * Resumes transmission on a suspended Tx control queue. 1321 */ 1322 static void 1323 restart_ctrlq(unsigned long data) 1324 { 1325 struct mbuf *m; 1326 struct sge_qset *qs = (struct sge_qset *)data; 1327 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1328 adapter_t *adap = qs->port->adapter; 1329 1330 mtx_lock(&q->lock); 1331 again: reclaim_completed_tx_imm(q); 1332 1333 while (q->in_use < q->size && 1334 (skb = __skb_dequeue(&q->sendq)) != NULL) { 1335 1336 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen); 1337 1338 if (++q->pidx >= q->size) { 1339 q->pidx = 0; 1340 q->gen ^= 1; 1341 } 1342 q->in_use++; 1343 } 1344 if (!skb_queue_empty(&q->sendq)) { 1345 set_bit(TXQ_CTRL, &qs->txq_stopped); 1346 smp_mb__after_clear_bit(); 1347 1348 if (should_restart_tx(q) && 1349 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1350 goto again; 1351 q->stops++; 1352 } 1353 1354 mtx_unlock(&q->lock); 1355 t3_write_reg(adap, A_SG_KDOORBELL, 1356 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1357 } 1358 #endif 1359 1360 /* 1361 * Send a management message through control queue 0 1362 */ 1363 int 1364 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1365 { 1366 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); 1367 } 1368 1369 /** 1370 * t3_sge_alloc_qset - initialize an SGE queue set 1371 * @sc: the controller softc 1372 * @id: the queue set id 1373 * @nports: how many Ethernet ports will be using this queue set 1374 * @irq_vec_idx: the IRQ vector index for response queue interrupts 1375 * @p: configuration parameters for this queue set 1376 * @ntxq: number of Tx queues for the queue set 1377 * @pi: port info for queue set 1378 * 1379 * Allocate resources and initialize an SGE queue set. A queue set 1380 * comprises a response queue, two Rx free-buffer queues, and up to 3 1381 * Tx queues. The Tx queues are assigned roles in the order Ethernet 1382 * queue, offload queue, and control queue. 1383 */ 1384 int 1385 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 1386 const struct qset_params *p, int ntxq, struct port_info *pi) 1387 { 1388 struct sge_qset *q = &sc->sge.qs[id]; 1389 int i, ret = 0; 1390 1391 init_qset_cntxt(q, id); 1392 1393 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 1394 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 1395 &q->fl[0].desc, &q->fl[0].sdesc, 1396 &q->fl[0].desc_tag, &q->fl[0].desc_map, 1397 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 1398 printf("error %d from alloc ring fl0\n", ret); 1399 goto err; 1400 } 1401 1402 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 1403 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 1404 &q->fl[1].desc, &q->fl[1].sdesc, 1405 &q->fl[1].desc_tag, &q->fl[1].desc_map, 1406 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 1407 printf("error %d from alloc ring fl1\n", ret); 1408 goto err; 1409 } 1410 1411 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 1412 &q->rspq.phys_addr, &q->rspq.desc, NULL, 1413 &q->rspq.desc_tag, &q->rspq.desc_map, 1414 NULL, NULL)) != 0) { 1415 printf("error %d from alloc ring rspq\n", ret); 1416 goto err; 1417 } 1418 1419 for (i = 0; i < ntxq; ++i) { 1420 /* 1421 * The control queue always uses immediate data so does not 1422 * need to keep track of any mbufs. 1423 * XXX Placeholder for future TOE support. 1424 */ 1425 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 1426 1427 if ((ret = alloc_ring(sc, p->txq_size[i], 1428 sizeof(struct tx_desc), sz, 1429 &q->txq[i].phys_addr, &q->txq[i].desc, 1430 &q->txq[i].sdesc, &q->txq[i].desc_tag, 1431 &q->txq[i].desc_map, 1432 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 1433 printf("error %d from alloc ring tx %i\n", ret, i); 1434 goto err; 1435 } 1436 1437 q->txq[i].gen = 1; 1438 q->txq[i].size = p->txq_size[i]; 1439 mtx_init(&q->txq[i].lock, "t3 txq lock", NULL, MTX_DEF); 1440 } 1441 1442 q->fl[0].gen = q->fl[1].gen = 1; 1443 q->fl[0].size = p->fl_size; 1444 q->fl[1].size = p->jumbo_size; 1445 1446 q->rspq.gen = 1; 1447 q->rspq.size = p->rspq_size; 1448 mtx_init(&q->rspq.lock, "t3 rspq lock", NULL, MTX_DEF); 1449 1450 q->txq[TXQ_ETH].stop_thres = nports * 1451 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 1452 1453 q->fl[0].buf_size = MCLBYTES; 1454 q->fl[0].zone = zone_clust; 1455 q->fl[0].type = EXT_CLUSTER; 1456 q->fl[1].buf_size = MJUMPAGESIZE; 1457 q->fl[1].zone = zone_jumbop; 1458 q->fl[1].type = EXT_JUMBOP; 1459 1460 q->lro.enabled = lro_default; 1461 1462 mtx_lock(&sc->sge.reg_lock); 1463 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 1464 q->rspq.phys_addr, q->rspq.size, 1465 q->fl[0].buf_size, 1, 0); 1466 if (ret) { 1467 printf("error %d from t3_sge_init_rspcntxt\n", ret); 1468 goto err_unlock; 1469 } 1470 1471 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1472 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 1473 q->fl[i].phys_addr, q->fl[i].size, 1474 q->fl[i].buf_size, p->cong_thres, 1, 1475 0); 1476 if (ret) { 1477 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 1478 goto err_unlock; 1479 } 1480 } 1481 1482 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 1483 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 1484 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 1485 1, 0); 1486 if (ret) { 1487 printf("error %d from t3_sge_init_ecntxt\n", ret); 1488 goto err_unlock; 1489 } 1490 1491 if (ntxq > 1) { 1492 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 1493 USE_GTS, SGE_CNTXT_OFLD, id, 1494 q->txq[TXQ_OFLD].phys_addr, 1495 q->txq[TXQ_OFLD].size, 0, 1, 0); 1496 if (ret) { 1497 printf("error %d from t3_sge_init_ecntxt\n", ret); 1498 goto err_unlock; 1499 } 1500 } 1501 1502 if (ntxq > 2) { 1503 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 1504 SGE_CNTXT_CTRL, id, 1505 q->txq[TXQ_CTRL].phys_addr, 1506 q->txq[TXQ_CTRL].size, 1507 q->txq[TXQ_CTRL].token, 1, 0); 1508 if (ret) { 1509 printf("error %d from t3_sge_init_ecntxt\n", ret); 1510 goto err_unlock; 1511 } 1512 } 1513 1514 mtx_unlock(&sc->sge.reg_lock); 1515 t3_update_qset_coalesce(q, p); 1516 q->port = pi; 1517 1518 refill_fl(sc, &q->fl[0], q->fl[0].size); 1519 refill_fl(sc, &q->fl[1], q->fl[1].size); 1520 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 1521 1522 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 1523 V_NEWTIMER(q->rspq.holdoff_tmr)); 1524 1525 return (0); 1526 1527 err_unlock: 1528 mtx_unlock(&sc->sge.reg_lock); 1529 err: 1530 t3_free_qset(sc, q); 1531 1532 return (ret); 1533 } 1534 1535 1536 /** 1537 * free_qset - free the resources of an SGE queue set 1538 * @sc: the controller owning the queue set 1539 * @q: the queue set 1540 * 1541 * Release the HW and SW resources associated with an SGE queue set, such 1542 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1543 * queue set must be quiesced prior to calling this. 1544 */ 1545 static void 1546 t3_free_qset(adapter_t *sc, struct sge_qset *q) 1547 { 1548 int i; 1549 1550 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1551 if (q->fl[i].desc) { 1552 mtx_lock(&sc->sge.reg_lock); 1553 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 1554 mtx_unlock(&sc->sge.reg_lock); 1555 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 1556 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 1557 q->fl[i].desc_map); 1558 bus_dma_tag_destroy(q->fl[i].desc_tag); 1559 bus_dma_tag_destroy(q->fl[i].entry_tag); 1560 } 1561 if (q->fl[i].sdesc) { 1562 free_rx_bufs(sc, &q->fl[i]); 1563 free(q->fl[i].sdesc, M_DEVBUF); 1564 } 1565 } 1566 1567 for (i = 0; i < SGE_TXQ_PER_SET; ++i) { 1568 if (q->txq[i].desc) { 1569 mtx_lock(&sc->sge.reg_lock); 1570 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 1571 mtx_unlock(&sc->sge.reg_lock); 1572 bus_dmamap_unload(q->txq[i].desc_tag, 1573 q->txq[i].desc_map); 1574 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 1575 q->txq[i].desc_map); 1576 bus_dma_tag_destroy(q->txq[i].desc_tag); 1577 bus_dma_tag_destroy(q->txq[i].entry_tag); 1578 } 1579 if (q->txq[i].sdesc) { 1580 free(q->txq[i].sdesc, M_DEVBUF); 1581 } 1582 if (mtx_initialized(&q->txq[i].lock)) { 1583 mtx_destroy(&q->txq[i].lock); 1584 } 1585 } 1586 1587 if (q->rspq.desc) { 1588 mtx_lock(&sc->sge.reg_lock); 1589 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 1590 mtx_unlock(&sc->sge.reg_lock); 1591 1592 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 1593 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 1594 q->rspq.desc_map); 1595 bus_dma_tag_destroy(q->rspq.desc_tag); 1596 } 1597 1598 if (mtx_initialized(&q->rspq.lock)) 1599 mtx_destroy(&q->rspq.lock); 1600 1601 bzero(q, sizeof(*q)); 1602 } 1603 1604 /** 1605 * t3_free_sge_resources - free SGE resources 1606 * @sc: the adapter softc 1607 * 1608 * Frees resources used by the SGE queue sets. 1609 */ 1610 void 1611 t3_free_sge_resources(adapter_t *sc) 1612 { 1613 int i; 1614 1615 for (i = 0; i < SGE_QSETS; ++i) 1616 t3_free_qset(sc, &sc->sge.qs[i]); 1617 } 1618 1619 /** 1620 * t3_sge_start - enable SGE 1621 * @sc: the controller softc 1622 * 1623 * Enables the SGE for DMAs. This is the last step in starting packet 1624 * transfers. 1625 */ 1626 void 1627 t3_sge_start(adapter_t *sc) 1628 { 1629 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 1630 } 1631 1632 1633 /** 1634 * free_tx_desc - reclaims Tx descriptors and their buffers 1635 * @adapter: the adapter 1636 * @q: the Tx queue to reclaim descriptors from 1637 * @n: the number of descriptors to reclaim 1638 * 1639 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 1640 * Tx buffers. Called with the Tx queue lock held. 1641 */ 1642 int 1643 free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec) 1644 { 1645 struct tx_sw_desc *d; 1646 unsigned int cidx = q->cidx; 1647 int nbufs = 0; 1648 1649 #ifdef T3_TRACE 1650 T3_TRACE2(sc->tb[q->cntxt_id & 7], 1651 "reclaiming %u Tx descriptors at cidx %u", n, cidx); 1652 #endif 1653 d = &q->sdesc[cidx]; 1654 1655 while (n-- > 0) { 1656 DPRINTF("cidx=%d d=%p\n", cidx, d); 1657 if (d->m) { 1658 if (d->flags & TX_SW_DESC_MAPPED) { 1659 bus_dmamap_unload(q->entry_tag, d->map); 1660 bus_dmamap_destroy(q->entry_tag, d->map); 1661 d->flags &= ~TX_SW_DESC_MAPPED; 1662 } 1663 m_vec[nbufs] = d->m; 1664 d->m = NULL; 1665 nbufs++; 1666 } 1667 ++d; 1668 if (++cidx == q->size) { 1669 cidx = 0; 1670 d = q->sdesc; 1671 } 1672 } 1673 q->cidx = cidx; 1674 1675 return (nbufs); 1676 } 1677 1678 /** 1679 * is_new_response - check if a response is newly written 1680 * @r: the response descriptor 1681 * @q: the response queue 1682 * 1683 * Returns true if a response descriptor contains a yet unprocessed 1684 * response. 1685 */ 1686 static __inline int 1687 is_new_response(const struct rsp_desc *r, 1688 const struct sge_rspq *q) 1689 { 1690 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 1691 } 1692 1693 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 1694 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 1695 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 1696 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 1697 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 1698 1699 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 1700 #define NOMEM_INTR_DELAY 2500 1701 1702 static __inline void 1703 deliver_partial_bundle(struct t3cdev *tdev, struct sge_rspq *q) 1704 { 1705 ; 1706 } 1707 1708 static __inline void 1709 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 1710 struct mbuf *m) 1711 { 1712 #ifdef notyet 1713 if (rq->polling) { 1714 rq->offload_skbs[rq->offload_skbs_idx++] = skb; 1715 if (rq->offload_skbs_idx == RX_BUNDLE_SIZE) { 1716 cxgb_ofld_recv(tdev, rq->offload_skbs, RX_BUNDLE_SIZE); 1717 rq->offload_skbs_idx = 0; 1718 rq->offload_bundles++; 1719 } 1720 } else 1721 #endif 1722 { 1723 /* XXX */ 1724 panic("implement offload enqueue\n"); 1725 } 1726 1727 } 1728 1729 static void 1730 restart_tx(struct sge_qset *qs) 1731 { 1732 ; 1733 } 1734 1735 void 1736 t3_rx_eth(struct port_info *pi, struct sge_rspq *rq, struct mbuf *m, int ethpad) 1737 { 1738 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(m->m_data + ethpad); 1739 struct ifnet *ifp = pi->ifp; 1740 1741 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, m->m_data, cpl->iff); 1742 if (&pi->adapter->port[cpl->iff] != pi) 1743 panic("bad port index %d m->m_data=%p\n", cpl->iff, m->m_data); 1744 1745 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 1746 cpl->csum_valid && cpl->csum == 0xffff) { 1747 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 1748 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 1749 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 1750 m->m_pkthdr.csum_data = 0xffff; 1751 } 1752 /* 1753 * XXX need to add VLAN support for 6.x 1754 */ 1755 #ifdef VLAN_SUPPORTED 1756 if (__predict_false(cpl->vlan_valid)) { 1757 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 1758 m->m_flags |= M_VLANTAG; 1759 } 1760 #endif 1761 m->m_pkthdr.rcvif = ifp; 1762 1763 m_adj(m, sizeof(*cpl) + ethpad); 1764 1765 (*ifp->if_input)(ifp, m); 1766 } 1767 1768 /** 1769 * get_packet - return the next ingress packet buffer from a free list 1770 * @adap: the adapter that received the packet 1771 * @drop_thres: # of remaining buffers before we start dropping packets 1772 * @qs: the qset that the SGE free list holding the packet belongs to 1773 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 1774 * @r: response descriptor 1775 * 1776 * Get the next packet from a free list and complete setup of the 1777 * sk_buff. If the packet is small we make a copy and recycle the 1778 * original buffer, otherwise we use the original buffer itself. If a 1779 * positive drop threshold is supplied packets are dropped and their 1780 * buffers recycled if (a) the number of remaining buffers is under the 1781 * threshold and the packet is too big to copy, or (b) the packet should 1782 * be copied but there is no memory for the copy. 1783 */ 1784 1785 static int 1786 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 1787 struct t3_mbuf_hdr *mh, struct rsp_desc *r, struct mbuf *m) 1788 { 1789 1790 unsigned int len_cq = ntohl(r->len_cq); 1791 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 1792 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 1793 uint32_t len = G_RSPD_LEN(len_cq); 1794 uint32_t flags = ntohl(r->flags); 1795 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 1796 int ret = 0; 1797 1798 prefetch(sd->cl); 1799 1800 fl->credits--; 1801 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 1802 bus_dmamap_unload(fl->entry_tag, sd->map); 1803 1804 m_cljset(m, sd->cl, fl->type); 1805 m->m_len = len; 1806 1807 switch(sopeop) { 1808 case RSPQ_SOP_EOP: 1809 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 1810 mh->mh_head = mh->mh_tail = m; 1811 m->m_pkthdr.len = len; 1812 m->m_flags |= M_PKTHDR; 1813 ret = 1; 1814 break; 1815 case RSPQ_NSOP_NEOP: 1816 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 1817 m->m_flags &= ~M_PKTHDR; 1818 if (mh->mh_tail == NULL) { 1819 if (cxgb_debug) 1820 printf("discarding intermediate descriptor entry\n"); 1821 m_freem(m); 1822 break; 1823 } 1824 mh->mh_tail->m_next = m; 1825 mh->mh_tail = m; 1826 mh->mh_head->m_pkthdr.len += len; 1827 ret = 0; 1828 break; 1829 case RSPQ_SOP: 1830 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 1831 m->m_pkthdr.len = len; 1832 mh->mh_head = mh->mh_tail = m; 1833 m->m_flags |= M_PKTHDR; 1834 ret = 0; 1835 break; 1836 case RSPQ_EOP: 1837 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 1838 m->m_flags &= ~M_PKTHDR; 1839 mh->mh_head->m_pkthdr.len += len; 1840 mh->mh_tail->m_next = m; 1841 mh->mh_tail = m; 1842 ret = 1; 1843 break; 1844 } 1845 if (++fl->cidx == fl->size) 1846 fl->cidx = 0; 1847 1848 return (ret); 1849 } 1850 1851 1852 /** 1853 * handle_rsp_cntrl_info - handles control information in a response 1854 * @qs: the queue set corresponding to the response 1855 * @flags: the response control flags 1856 * 1857 * Handles the control information of an SGE response, such as GTS 1858 * indications and completion credits for the queue set's Tx queues. 1859 * HW coalesces credits, we don't do any extra SW coalescing. 1860 */ 1861 static __inline void 1862 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 1863 { 1864 unsigned int credits; 1865 1866 #if USE_GTS 1867 if (flags & F_RSPD_TXQ0_GTS) 1868 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 1869 #endif 1870 credits = G_RSPD_TXQ0_CR(flags); 1871 if (credits) { 1872 qs->txq[TXQ_ETH].processed += credits; 1873 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC) 1874 taskqueue_enqueue(qs->port->adapter->tq, 1875 &qs->port->adapter->timer_reclaim_task); 1876 } 1877 1878 credits = G_RSPD_TXQ2_CR(flags); 1879 if (credits) 1880 qs->txq[TXQ_CTRL].processed += credits; 1881 1882 # if USE_GTS 1883 if (flags & F_RSPD_TXQ1_GTS) 1884 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 1885 # endif 1886 credits = G_RSPD_TXQ1_CR(flags); 1887 if (credits) 1888 qs->txq[TXQ_OFLD].processed += credits; 1889 } 1890 1891 static void 1892 check_ring_db(adapter_t *adap, struct sge_qset *qs, 1893 unsigned int sleeping) 1894 { 1895 ; 1896 } 1897 1898 /* 1899 * This is an awful hack to bind the ithread to CPU 1 1900 * to work around lack of ithread affinity 1901 */ 1902 static void 1903 bind_ithread(int cpu) 1904 { 1905 KASSERT(cpu < mp_ncpus, ("invalid cpu identifier")); 1906 #if 0 1907 if (mp_ncpus > 1) { 1908 mtx_lock_spin(&sched_lock); 1909 sched_bind(curthread, cpu); 1910 mtx_unlock_spin(&sched_lock); 1911 } 1912 #endif 1913 } 1914 1915 /** 1916 * process_responses - process responses from an SGE response queue 1917 * @adap: the adapter 1918 * @qs: the queue set to which the response queue belongs 1919 * @budget: how many responses can be processed in this round 1920 * 1921 * Process responses from an SGE response queue up to the supplied budget. 1922 * Responses include received packets as well as credits and other events 1923 * for the queues that belong to the response queue's queue set. 1924 * A negative budget is effectively unlimited. 1925 * 1926 * Additionally choose the interrupt holdoff time for the next interrupt 1927 * on this queue. If the system is under memory shortage use a fairly 1928 * long delay to help recovery. 1929 */ 1930 static int 1931 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 1932 { 1933 struct sge_rspq *rspq = &qs->rspq; 1934 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 1935 int budget_left = budget; 1936 unsigned int sleeping = 0; 1937 int lro = qs->lro.enabled; 1938 1939 static uint8_t pinned[MAXCPU]; 1940 1941 #ifdef DEBUG 1942 static int last_holdoff = 0; 1943 if (rspq->holdoff_tmr != last_holdoff) { 1944 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 1945 last_holdoff = rspq->holdoff_tmr; 1946 } 1947 #endif 1948 if (pinned[qs->rspq.cntxt_id * adap->params.nports] == 0) { 1949 /* 1950 * Assumes that cntxt_id < mp_ncpus 1951 */ 1952 bind_ithread(qs->rspq.cntxt_id); 1953 pinned[qs->rspq.cntxt_id * adap->params.nports] = 1; 1954 } 1955 rspq->next_holdoff = rspq->holdoff_tmr; 1956 1957 while (__predict_true(budget_left && is_new_response(r, rspq))) { 1958 int eth, eop = 0, ethpad = 0; 1959 uint32_t flags = ntohl(r->flags); 1960 uint32_t rss_csum = *(const uint32_t *)r; 1961 uint32_t rss_hash = r->rss_hdr.rss_hash_val; 1962 1963 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 1964 1965 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 1966 /* XXX */ 1967 printf("async notification\n"); 1968 1969 } else if (flags & F_RSPD_IMM_DATA_VALID) { 1970 if (cxgb_debug) 1971 printf("IMM DATA VALID\n"); 1972 1973 if(get_imm_packet(adap, r, &rspq->mh) == 0) { 1974 rspq->next_holdoff = NOMEM_INTR_DELAY; 1975 budget_left--; 1976 break; 1977 } else { 1978 eop = 1; 1979 } 1980 1981 rspq->imm_data++; 1982 } else if (r->len_cq) { 1983 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 1984 struct mbuf *m; 1985 1986 m = m_gethdr(M_NOWAIT, MT_DATA); 1987 1988 if (m == NULL) { 1989 log(LOG_WARNING, "failed to get mbuf for packet\n"); 1990 break; 1991 } 1992 1993 ethpad = 2; 1994 eop = get_packet(adap, drop_thresh, qs, &rspq->mh, r, m); 1995 } else { 1996 DPRINTF("pure response\n"); 1997 rspq->pure_rsps++; 1998 } 1999 2000 if (flags & RSPD_CTRL_MASK) { 2001 sleeping |= flags & RSPD_GTS_MASK; 2002 handle_rsp_cntrl_info(qs, flags); 2003 } 2004 2005 r++; 2006 if (__predict_false(++rspq->cidx == rspq->size)) { 2007 rspq->cidx = 0; 2008 rspq->gen ^= 1; 2009 r = rspq->desc; 2010 } 2011 2012 prefetch(r); 2013 if (++rspq->credits >= (rspq->size / 4)) { 2014 refill_rspq(adap, rspq, rspq->credits); 2015 rspq->credits = 0; 2016 } 2017 2018 if (eop) { 2019 prefetch(rspq->mh.mh_head->m_data); 2020 prefetch(rspq->mh.mh_head->m_data + L1_CACHE_BYTES); 2021 2022 if (eth) { 2023 t3_rx_eth_lro(adap, rspq, &rspq->mh, ethpad, 2024 rss_hash, rss_csum, lro); 2025 2026 rspq->mh.mh_tail = rspq->mh.mh_head = NULL; 2027 } else { 2028 #ifdef notyet 2029 if (__predict_false(r->rss_hdr.opcode == CPL_TRACE_PKT)) 2030 m_adj(m, 2); 2031 2032 rx_offload(&adap->tdev, rspq, m); 2033 #endif 2034 } 2035 #ifdef notyet 2036 taskqueue_enqueue(adap->tq, &adap->timer_reclaim_task); 2037 #else 2038 __refill_fl(adap, &qs->fl[0]); 2039 __refill_fl(adap, &qs->fl[1]); 2040 #endif 2041 2042 } 2043 --budget_left; 2044 } 2045 t3_sge_lro_flush_all(adap, qs); 2046 deliver_partial_bundle(&adap->tdev, rspq); 2047 2048 if (sleeping) 2049 check_ring_db(adap, qs, sleeping); 2050 2051 smp_mb(); /* commit Tx queue processed updates */ 2052 if (__predict_false(qs->txq_stopped != 0)) 2053 restart_tx(qs); 2054 2055 budget -= budget_left; 2056 return (budget); 2057 } 2058 2059 /* 2060 * A helper function that processes responses and issues GTS. 2061 */ 2062 static __inline int 2063 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 2064 { 2065 int work; 2066 static int last_holdoff = 0; 2067 2068 work = process_responses(adap, rspq_to_qset(rq), -1); 2069 2070 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 2071 printf("next_holdoff=%d\n", rq->next_holdoff); 2072 last_holdoff = rq->next_holdoff; 2073 } 2074 2075 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 2076 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 2077 return work; 2078 } 2079 2080 2081 /* 2082 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 2083 * Handles data events from SGE response queues as well as error and other 2084 * async events as they all use the same interrupt pin. We use one SGE 2085 * response queue per port in this mode and protect all response queues with 2086 * queue 0's lock. 2087 */ 2088 void 2089 t3b_intr(void *data) 2090 { 2091 uint32_t map; 2092 adapter_t *adap = data; 2093 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2094 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2095 2096 t3_write_reg(adap, A_PL_CLI, 0); 2097 map = t3_read_reg(adap, A_SG_DATA_INTR); 2098 2099 if (!map) 2100 return; 2101 2102 if (__predict_false(map & F_ERRINTR)) 2103 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2104 2105 mtx_lock(&q0->lock); 2106 2107 if (__predict_true(map & 1)) 2108 process_responses_gts(adap, q0); 2109 2110 if (map & 2) 2111 process_responses_gts(adap, q1); 2112 2113 mtx_unlock(&q0->lock); 2114 } 2115 2116 /* 2117 * The MSI interrupt handler. This needs to handle data events from SGE 2118 * response queues as well as error and other async events as they all use 2119 * the same MSI vector. We use one SGE response queue per port in this mode 2120 * and protect all response queues with queue 0's lock. 2121 */ 2122 void 2123 t3_intr_msi(void *data) 2124 { 2125 adapter_t *adap = data; 2126 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2127 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2128 int new_packets = 0; 2129 2130 mtx_lock(&q0->lock); 2131 if (process_responses_gts(adap, q0)) { 2132 new_packets = 1; 2133 } 2134 2135 if (adap->params.nports == 2 && 2136 process_responses_gts(adap, q1)) { 2137 new_packets = 1; 2138 } 2139 2140 mtx_unlock(&q0->lock); 2141 if (new_packets == 0) 2142 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2143 } 2144 2145 void 2146 t3_intr_msix(void *data) 2147 { 2148 struct sge_qset *qs = data; 2149 adapter_t *adap = qs->port->adapter; 2150 struct sge_rspq *rspq = &qs->rspq; 2151 2152 mtx_lock(&rspq->lock); 2153 if (process_responses_gts(adap, rspq) == 0) { 2154 #ifdef notyet 2155 rspq->unhandled_irqs++; 2156 #endif 2157 } 2158 mtx_unlock(&rspq->lock); 2159 } 2160 2161 static int 2162 t3_lro_enable(SYSCTL_HANDLER_ARGS) 2163 { 2164 adapter_t *sc; 2165 int i, j, enabled, err, nqsets = 0; 2166 2167 sc = arg1; 2168 enabled = sc->sge.qs[0].lro.enabled; 2169 err = sysctl_handle_int(oidp, &enabled, arg2, req); 2170 2171 if (err != 0) { 2172 return (err); 2173 } 2174 if (enabled == sc->sge.qs[0].lro.enabled) 2175 return (0); 2176 2177 for (i = 0; i < sc->params.nports; i++) 2178 for (j = 0; j < sc->port[i].nqsets; j++) 2179 nqsets++; 2180 2181 for (i = 0; i < nqsets; i++) { 2182 sc->sge.qs[i].lro.enabled = enabled; 2183 } 2184 2185 return (0); 2186 } 2187 2188 static int 2189 t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS) 2190 { 2191 adapter_t *sc = arg1; 2192 struct qset_params *qsp = &sc->params.sge.qset[0]; 2193 int coalesce_nsecs; 2194 struct sge_qset *qs; 2195 int i, j, err, nqsets = 0; 2196 struct mtx *lock; 2197 2198 coalesce_nsecs = qsp->coalesce_nsecs; 2199 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req); 2200 2201 if (err != 0) { 2202 return (err); 2203 } 2204 if (coalesce_nsecs == qsp->coalesce_nsecs) 2205 return (0); 2206 2207 for (i = 0; i < sc->params.nports; i++) 2208 for (j = 0; j < sc->port[i].nqsets; j++) 2209 nqsets++; 2210 2211 coalesce_nsecs = max(100, coalesce_nsecs); 2212 2213 for (i = 0; i < nqsets; i++) { 2214 qs = &sc->sge.qs[i]; 2215 qsp = &sc->params.sge.qset[i]; 2216 qsp->coalesce_nsecs = coalesce_nsecs; 2217 2218 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 2219 &sc->sge.qs[0].rspq.lock; 2220 2221 mtx_lock(lock); 2222 t3_update_qset_coalesce(qs, qsp); 2223 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 2224 V_NEWTIMER(qs->rspq.holdoff_tmr)); 2225 mtx_unlock(lock); 2226 } 2227 2228 return (0); 2229 } 2230 2231 2232 void 2233 t3_add_sysctls(adapter_t *sc) 2234 { 2235 struct sysctl_ctx_list *ctx; 2236 struct sysctl_oid_list *children; 2237 2238 ctx = device_get_sysctl_ctx(sc->dev); 2239 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 2240 2241 /* random information */ 2242 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 2243 "firmware_version", 2244 CTLFLAG_RD, &sc->fw_version, 2245 0, "firmware version"); 2246 2247 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2248 "enable_lro", 2249 CTLTYPE_INT|CTLFLAG_RW, sc, 2250 0, t3_lro_enable, 2251 "I", "enable large receive offload"); 2252 2253 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2254 "intr_coal", 2255 CTLTYPE_INT|CTLFLAG_RW, sc, 2256 0, t3_set_coalesce_nsecs, 2257 "I", "interrupt coalescing timer (ns)"); 2258 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2259 "enable_debug", 2260 CTLFLAG_RW, &cxgb_debug, 2261 0, "enable verbose debugging output"); 2262 2263 } 2264 2265 /** 2266 * t3_get_desc - dump an SGE descriptor for debugging purposes 2267 * @qs: the queue set 2268 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 2269 * @idx: the descriptor index in the queue 2270 * @data: where to dump the descriptor contents 2271 * 2272 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 2273 * size of the descriptor. 2274 */ 2275 int 2276 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 2277 unsigned char *data) 2278 { 2279 if (qnum >= 6) 2280 return (EINVAL); 2281 2282 if (qnum < 3) { 2283 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 2284 return -EINVAL; 2285 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 2286 return sizeof(struct tx_desc); 2287 } 2288 2289 if (qnum == 3) { 2290 if (!qs->rspq.desc || idx >= qs->rspq.size) 2291 return (EINVAL); 2292 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 2293 return sizeof(struct rsp_desc); 2294 } 2295 2296 qnum -= 4; 2297 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 2298 return (EINVAL); 2299 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 2300 return sizeof(struct rx_desc); 2301 } 2302