1 /************************************************************************** 2 3 Copyright (c) 2007, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Chelsio Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ***************************************************************************/ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/module.h> 41 #include <sys/bus.h> 42 #include <sys/conf.h> 43 #include <machine/bus.h> 44 #include <machine/resource.h> 45 #include <sys/bus_dma.h> 46 #include <sys/rman.h> 47 #include <sys/queue.h> 48 #include <sys/sysctl.h> 49 #include <sys/taskqueue.h> 50 51 52 #include <sys/proc.h> 53 #include <sys/sched.h> 54 #include <sys/smp.h> 55 #include <sys/systm.h> 56 57 #include <netinet/in_systm.h> 58 #include <netinet/in.h> 59 #include <netinet/ip.h> 60 #include <netinet/tcp.h> 61 62 #include <dev/pci/pcireg.h> 63 #include <dev/pci/pcivar.h> 64 #include <dev/cxgb/common/cxgb_common.h> 65 #include <dev/cxgb/common/cxgb_regs.h> 66 #include <dev/cxgb/common/cxgb_sge_defs.h> 67 #include <dev/cxgb/common/cxgb_t3_cpl.h> 68 #include <dev/cxgb/common/cxgb_firmware_exports.h> 69 70 #include <dev/cxgb/sys/mvec.h> 71 72 #define USE_GTS 0 73 74 #define SGE_RX_SM_BUF_SIZE 1536 75 #define SGE_RX_DROP_THRES 16 76 77 /* 78 * Period of the Tx buffer reclaim timer. This timer does not need to run 79 * frequently as Tx buffers are usually reclaimed by new Tx packets. 80 */ 81 #define TX_RECLAIM_PERIOD (hz >> 2) 82 83 /* 84 * work request size in bytes 85 */ 86 #define WR_LEN (WR_FLITS * 8) 87 88 /* 89 * Values for sge_txq.flags 90 */ 91 enum { 92 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 93 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 94 }; 95 96 struct tx_desc { 97 uint64_t flit[TX_DESC_FLITS]; 98 } __packed; 99 100 struct rx_desc { 101 uint32_t addr_lo; 102 uint32_t len_gen; 103 uint32_t gen2; 104 uint32_t addr_hi; 105 } __packed;; 106 107 struct rsp_desc { /* response queue descriptor */ 108 struct rss_header rss_hdr; 109 uint32_t flags; 110 uint32_t len_cq; 111 uint8_t imm_data[47]; 112 uint8_t intr_gen; 113 } __packed; 114 115 #define RX_SW_DESC_MAP_CREATED (1 << 0) 116 #define RX_SW_DESC_INUSE (1 << 3) 117 #define TX_SW_DESC_MAPPED (1 << 4) 118 119 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 120 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 121 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 122 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 123 124 struct tx_sw_desc { /* SW state per Tx descriptor */ 125 struct mbuf *m; 126 bus_dmamap_t map; 127 int flags; 128 }; 129 130 struct rx_sw_desc { /* SW state per Rx descriptor */ 131 void *cl; 132 bus_dmamap_t map; 133 int flags; 134 }; 135 136 struct txq_state { 137 unsigned int compl; 138 unsigned int gen; 139 unsigned int pidx; 140 }; 141 142 struct refill_fl_cb_arg { 143 int error; 144 bus_dma_segment_t seg; 145 int nseg; 146 }; 147 148 /* 149 * Maps a number of flits to the number of Tx descriptors that can hold them. 150 * The formula is 151 * 152 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 153 * 154 * HW allows up to 4 descriptors to be combined into a WR. 155 */ 156 static uint8_t flit_desc_map[] = { 157 0, 158 #if SGE_NUM_GENBITS == 1 159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 160 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 161 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 162 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 163 #elif SGE_NUM_GENBITS == 2 164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 165 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 166 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 167 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 168 #else 169 # error "SGE_NUM_GENBITS must be 1 or 2" 170 #endif 171 }; 172 173 174 static int lro_default = 0; 175 int cxgb_debug = 0; 176 177 static void t3_free_qset(adapter_t *sc, struct sge_qset *q); 178 static void sge_timer_cb(void *arg); 179 static void sge_timer_reclaim(void *arg, int ncount); 180 static int free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec); 181 182 /** 183 * reclaim_completed_tx - reclaims completed Tx descriptors 184 * @adapter: the adapter 185 * @q: the Tx queue to reclaim completed descriptors from 186 * 187 * Reclaims Tx descriptors that the SGE has indicated it has processed, 188 * and frees the associated buffers if possible. Called with the Tx 189 * queue's lock held. 190 */ 191 static __inline int 192 reclaim_completed_tx(adapter_t *adapter, struct sge_txq *q, int nbufs, struct mbuf **mvec) 193 { 194 int reclaimed, reclaim = desc_reclaimable(q); 195 int n = 0; 196 197 mtx_assert(&q->lock, MA_OWNED); 198 199 if (reclaim > 0) { 200 n = free_tx_desc(adapter, q, min(reclaim, nbufs), mvec); 201 reclaimed = min(reclaim, nbufs); 202 q->cleaned += reclaimed; 203 q->in_use -= reclaimed; 204 } 205 206 return (n); 207 } 208 209 /** 210 * t3_sge_init - initialize SGE 211 * @adap: the adapter 212 * @p: the SGE parameters 213 * 214 * Performs SGE initialization needed every time after a chip reset. 215 * We do not initialize any of the queue sets here, instead the driver 216 * top-level must request those individually. We also do not enable DMA 217 * here, that should be done after the queues have been set up. 218 */ 219 void 220 t3_sge_init(adapter_t *adap, struct sge_params *p) 221 { 222 u_int ctrl, ups; 223 224 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 225 226 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 227 F_CQCRDTCTRL | 228 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 229 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 230 #if SGE_NUM_GENBITS == 1 231 ctrl |= F_EGRGENCTRL; 232 #endif 233 if (adap->params.rev > 0) { 234 if (!(adap->flags & (USING_MSIX | USING_MSI))) 235 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 236 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL; 237 } 238 t3_write_reg(adap, A_SG_CONTROL, ctrl); 239 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 240 V_LORCQDRBTHRSH(512)); 241 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 242 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 243 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 244 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000); 245 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 246 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 247 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 248 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 249 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 250 } 251 252 253 /** 254 * sgl_len - calculates the size of an SGL of the given capacity 255 * @n: the number of SGL entries 256 * 257 * Calculates the number of flits needed for a scatter/gather list that 258 * can hold the given number of entries. 259 */ 260 static __inline unsigned int 261 sgl_len(unsigned int n) 262 { 263 return ((3 * n) / 2 + (n & 1)); 264 } 265 266 /** 267 * get_imm_packet - return the next ingress packet buffer from a response 268 * @resp: the response descriptor containing the packet data 269 * 270 * Return a packet containing the immediate data of the given response. 271 */ 272 static __inline int 273 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl) 274 { 275 int len; 276 uint32_t flags = ntohl(resp->flags); 277 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 278 279 /* 280 * would be a firmware bug 281 */ 282 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) 283 return (0); 284 285 m = m_gethdr(M_NOWAIT, MT_DATA); 286 len = G_RSPD_LEN(ntohl(resp->len_cq)); 287 288 if (m) { 289 MH_ALIGN(m, IMMED_PKT_SIZE); 290 memcpy(m->m_data, resp->imm_data, IMMED_PKT_SIZE); 291 m->m_len = len; 292 293 switch (sopeop) { 294 case RSPQ_SOP_EOP: 295 m->m_len = m->m_pkthdr.len = len; 296 m->m_flags |= M_PKTHDR; 297 memcpy(m->m_data, resp->imm_data, IMMED_PKT_SIZE); 298 MH_ALIGN(m, IMMED_PKT_SIZE); 299 break; 300 case RSPQ_EOP: 301 memcpy(cl, resp->imm_data, len); 302 m_iovappend(m, cl, MSIZE, len, 0); 303 break; 304 } 305 } 306 return (m != NULL); 307 } 308 309 310 static __inline u_int 311 flits_to_desc(u_int n) 312 { 313 return (flit_desc_map[n]); 314 } 315 316 void 317 t3_sge_err_intr_handler(adapter_t *adapter) 318 { 319 unsigned int v, status; 320 321 322 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 323 324 if (status & F_RSPQCREDITOVERFOW) 325 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 326 327 if (status & F_RSPQDISABLED) { 328 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 329 330 CH_ALERT(adapter, 331 "packet delivered to disabled response queue (0x%x)\n", 332 (v >> S_RSPQ0DISABLED) & 0xff); 333 } 334 335 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 336 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED)) 337 t3_fatal_err(adapter); 338 } 339 340 void 341 t3_sge_prep(adapter_t *adap, struct sge_params *p) 342 { 343 int i; 344 345 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 346 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data); 347 348 for (i = 0; i < SGE_QSETS; ++i) { 349 struct qset_params *q = p->qset + i; 350 351 q->polling = adap->params.rev > 0; 352 353 if (adap->flags & USING_MSIX) 354 q->coalesce_nsecs = 6000; 355 else 356 q->coalesce_nsecs = 3500; 357 358 q->rspq_size = RSPQ_Q_SIZE; 359 q->fl_size = FL_Q_SIZE; 360 q->jumbo_size = JUMBO_Q_SIZE; 361 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 362 q->txq_size[TXQ_OFLD] = 1024; 363 q->txq_size[TXQ_CTRL] = 256; 364 q->cong_thres = 0; 365 } 366 } 367 368 int 369 t3_sge_alloc(adapter_t *sc) 370 { 371 372 /* The parent tag. */ 373 if (bus_dma_tag_create( NULL, /* parent */ 374 1, 0, /* algnmnt, boundary */ 375 BUS_SPACE_MAXADDR, /* lowaddr */ 376 BUS_SPACE_MAXADDR, /* highaddr */ 377 NULL, NULL, /* filter, filterarg */ 378 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 379 BUS_SPACE_UNRESTRICTED, /* nsegments */ 380 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 381 0, /* flags */ 382 NULL, NULL, /* lock, lockarg */ 383 &sc->parent_dmat)) { 384 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 385 return (ENOMEM); 386 } 387 388 /* 389 * DMA tag for normal sized RX frames 390 */ 391 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 392 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 393 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 394 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 395 return (ENOMEM); 396 } 397 398 /* 399 * DMA tag for jumbo sized RX frames. 400 */ 401 if (bus_dma_tag_create(sc->parent_dmat, MJUMPAGESIZE, 0, BUS_SPACE_MAXADDR, 402 BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE, 403 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 404 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 405 return (ENOMEM); 406 } 407 408 /* 409 * DMA tag for TX frames. 410 */ 411 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 412 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 413 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 414 NULL, NULL, &sc->tx_dmat)) { 415 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 416 return (ENOMEM); 417 } 418 419 return (0); 420 } 421 422 int 423 t3_sge_free(struct adapter * sc) 424 { 425 426 if (sc->tx_dmat != NULL) 427 bus_dma_tag_destroy(sc->tx_dmat); 428 429 if (sc->rx_jumbo_dmat != NULL) 430 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 431 432 if (sc->rx_dmat != NULL) 433 bus_dma_tag_destroy(sc->rx_dmat); 434 435 if (sc->parent_dmat != NULL) 436 bus_dma_tag_destroy(sc->parent_dmat); 437 438 return (0); 439 } 440 441 void 442 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 443 { 444 445 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U); 446 qs->rspq.polling = 0 /* p->polling */; 447 } 448 449 static void 450 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 451 { 452 struct refill_fl_cb_arg *cb_arg = arg; 453 454 cb_arg->error = error; 455 cb_arg->seg = segs[0]; 456 cb_arg->nseg = nseg; 457 458 } 459 460 /** 461 * refill_fl - refill an SGE free-buffer list 462 * @sc: the controller softc 463 * @q: the free-list to refill 464 * @n: the number of new buffers to allocate 465 * 466 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 467 * The caller must assure that @n does not exceed the queue's capacity. 468 */ 469 static void 470 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 471 { 472 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 473 struct rx_desc *d = &q->desc[q->pidx]; 474 struct refill_fl_cb_arg cb_arg; 475 void *cl; 476 int err; 477 478 cb_arg.error = 0; 479 while (n--) { 480 /* 481 * We only allocate a cluster, mbuf allocation happens after rx 482 */ 483 if ((cl = m_cljget(NULL, M_DONTWAIT, q->buf_size)) == NULL) { 484 log(LOG_WARNING, "Failed to allocate cluster\n"); 485 goto done; 486 } 487 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 488 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 489 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 490 /* 491 * XXX free cluster 492 */ 493 goto done; 494 } 495 sd->flags |= RX_SW_DESC_MAP_CREATED; 496 } 497 err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size, 498 refill_fl_cb, &cb_arg, 0); 499 500 if (err != 0 || cb_arg.error) { 501 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error); 502 /* 503 * XXX free cluster 504 */ 505 return; 506 } 507 508 sd->flags |= RX_SW_DESC_INUSE; 509 sd->cl = cl; 510 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 511 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 512 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 513 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 514 515 d++; 516 sd++; 517 518 if (++q->pidx == q->size) { 519 q->pidx = 0; 520 q->gen ^= 1; 521 sd = q->sdesc; 522 d = q->desc; 523 } 524 q->credits++; 525 } 526 527 done: 528 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 529 } 530 531 532 /** 533 * free_rx_bufs - free the Rx buffers on an SGE free list 534 * @sc: the controle softc 535 * @q: the SGE free list to clean up 536 * 537 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 538 * this queue should be stopped before calling this function. 539 */ 540 static void 541 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 542 { 543 u_int cidx = q->cidx; 544 545 while (q->credits--) { 546 struct rx_sw_desc *d = &q->sdesc[cidx]; 547 548 if (d->flags & RX_SW_DESC_INUSE) { 549 bus_dmamap_unload(q->entry_tag, d->map); 550 bus_dmamap_destroy(q->entry_tag, d->map); 551 uma_zfree(q->zone, d->cl); 552 } 553 d->cl = NULL; 554 if (++cidx == q->size) 555 cidx = 0; 556 } 557 } 558 559 static __inline void 560 __refill_fl(adapter_t *adap, struct sge_fl *fl) 561 { 562 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 563 } 564 565 static void 566 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 567 { 568 uint32_t *addr; 569 570 addr = arg; 571 *addr = segs[0].ds_addr; 572 } 573 574 static int 575 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 576 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 577 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 578 { 579 size_t len = nelem * elem_size; 580 void *s = NULL; 581 void *p = NULL; 582 int err; 583 584 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 585 BUS_SPACE_MAXADDR_32BIT, 586 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 587 len, 0, NULL, NULL, tag)) != 0) { 588 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 589 return (ENOMEM); 590 } 591 592 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 593 map)) != 0) { 594 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 595 return (ENOMEM); 596 } 597 598 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 599 bzero(p, len); 600 *(void **)desc = p; 601 602 if (sw_size) { 603 len = nelem * sw_size; 604 s = malloc(len, M_DEVBUF, M_WAITOK); 605 bzero(s, len); 606 *(void **)sdesc = s; 607 } 608 if (parent_entry_tag == NULL) 609 return (0); 610 611 if ((err = bus_dma_tag_create(parent_entry_tag, PAGE_SIZE, 0, 612 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 613 NULL, NULL, PAGE_SIZE, 1, 614 PAGE_SIZE, BUS_DMA_ALLOCNOW, 615 NULL, NULL, entry_tag)) != 0) { 616 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 617 return (ENOMEM); 618 } 619 return (0); 620 } 621 622 static void 623 sge_slow_intr_handler(void *arg, int ncount) 624 { 625 adapter_t *sc = arg; 626 627 t3_slow_intr_handler(sc); 628 } 629 630 static void 631 sge_timer_cb(void *arg) 632 { 633 adapter_t *sc = arg; 634 struct sge_qset *qs; 635 struct sge_txq *txq; 636 int i, j; 637 int reclaim_eth, reclaim_ofl, refill_rx; 638 639 for (i = 0; i < sc->params.nports; i++) 640 for (j = 0; j < sc->port[i].nqsets; j++) { 641 qs = &sc->sge.qs[i + j]; 642 txq = &qs->txq[0]; 643 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned; 644 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 645 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 646 (qs->fl[1].credits < qs->fl[1].size)); 647 if (reclaim_eth || reclaim_ofl || refill_rx) { 648 taskqueue_enqueue(sc->tq, &sc->timer_reclaim_task); 649 goto done; 650 } 651 } 652 done: 653 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 654 } 655 656 /* 657 * This is meant to be a catch-all function to keep sge state private 658 * to sge.c 659 * 660 */ 661 int 662 t3_sge_init_sw(adapter_t *sc) 663 { 664 665 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 666 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 667 TASK_INIT(&sc->timer_reclaim_task, 0, sge_timer_reclaim, sc); 668 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 669 return (0); 670 } 671 672 void 673 t3_sge_deinit_sw(adapter_t *sc) 674 { 675 callout_drain(&sc->sge_timer_ch); 676 if (sc->tq) { 677 taskqueue_drain(sc->tq, &sc->timer_reclaim_task); 678 taskqueue_drain(sc->tq, &sc->slow_intr_task); 679 } 680 } 681 682 /** 683 * refill_rspq - replenish an SGE response queue 684 * @adapter: the adapter 685 * @q: the response queue to replenish 686 * @credits: how many new responses to make available 687 * 688 * Replenishes a response queue by making the supplied number of responses 689 * available to HW. 690 */ 691 static __inline void 692 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 693 { 694 695 /* mbufs are allocated on demand when a rspq entry is processed. */ 696 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 697 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 698 } 699 700 701 static void 702 sge_timer_reclaim(void *arg, int ncount) 703 { 704 adapter_t *sc = arg; 705 int i, nqsets = 0; 706 struct sge_qset *qs; 707 struct sge_txq *txq; 708 struct mtx *lock; 709 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 710 int n, reclaimable; 711 /* 712 * XXX assuming these quantities are allowed to change during operation 713 */ 714 for (i = 0; i < sc->params.nports; i++) 715 nqsets += sc->port[i].nqsets; 716 717 for (i = 0; i < nqsets; i++) { 718 qs = &sc->sge.qs[i]; 719 txq = &qs->txq[TXQ_ETH]; 720 reclaimable = desc_reclaimable(txq); 721 if (reclaimable > 0) { 722 mtx_lock(&txq->lock); 723 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); 724 mtx_unlock(&txq->lock); 725 726 for (i = 0; i < n; i++) { 727 m_freem(m_vec[i]); 728 } 729 } 730 731 txq = &qs->txq[TXQ_OFLD]; 732 reclaimable = desc_reclaimable(txq); 733 if (reclaimable > 0) { 734 mtx_lock(&txq->lock); 735 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); 736 mtx_unlock(&txq->lock); 737 738 for (i = 0; i < n; i++) { 739 m_freem(m_vec[i]); 740 } 741 } 742 743 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 744 &sc->sge.qs[0].rspq.lock; 745 746 if (mtx_trylock(lock)) { 747 /* XXX currently assume that we are *NOT* polling */ 748 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 749 750 if (qs->fl[0].credits < qs->fl[0].size - 16) 751 __refill_fl(sc, &qs->fl[0]); 752 if (qs->fl[1].credits < qs->fl[1].size - 16) 753 __refill_fl(sc, &qs->fl[1]); 754 755 if (status & (1 << qs->rspq.cntxt_id)) { 756 if (qs->rspq.credits) { 757 refill_rspq(sc, &qs->rspq, 1); 758 qs->rspq.credits--; 759 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 760 1 << qs->rspq.cntxt_id); 761 } 762 } 763 mtx_unlock(lock); 764 } 765 } 766 } 767 768 /** 769 * init_qset_cntxt - initialize an SGE queue set context info 770 * @qs: the queue set 771 * @id: the queue set id 772 * 773 * Initializes the TIDs and context ids for the queues of a queue set. 774 */ 775 static void 776 init_qset_cntxt(struct sge_qset *qs, u_int id) 777 { 778 779 qs->rspq.cntxt_id = id; 780 qs->fl[0].cntxt_id = 2 * id; 781 qs->fl[1].cntxt_id = 2 * id + 1; 782 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 783 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 784 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 785 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 786 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 787 } 788 789 790 static void 791 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 792 { 793 txq->in_use += ndesc; 794 /* 795 * XXX we don't handle stopping of queue 796 * presumably start handles this when we bump against the end 797 */ 798 txqs->gen = txq->gen; 799 txq->unacked += ndesc; 800 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3); 801 txq->unacked &= 7; 802 txqs->pidx = txq->pidx; 803 txq->pidx += ndesc; 804 805 if (txq->pidx >= txq->size) { 806 txq->pidx -= txq->size; 807 txq->gen ^= 1; 808 } 809 810 } 811 812 /** 813 * calc_tx_descs - calculate the number of Tx descriptors for a packet 814 * @m: the packet mbufs 815 * @nsegs: the number of segments 816 * 817 * Returns the number of Tx descriptors needed for the given Ethernet 818 * packet. Ethernet packets require addition of WR and CPL headers. 819 */ 820 static __inline unsigned int 821 calc_tx_descs(const struct mbuf *m, int nsegs) 822 { 823 unsigned int flits; 824 825 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) 826 return 1; 827 828 flits = sgl_len(nsegs) + 2; 829 #ifdef TSO_SUPPORTED 830 if (m->m_pkthdr.tso_segsz) 831 flits++; 832 #endif 833 return flits_to_desc(flits); 834 } 835 836 static __inline unsigned int 837 busdma_map_mbufs(struct mbuf **m, adapter_t *sc, struct tx_sw_desc *stx, 838 bus_dma_segment_t *segs, int *nsegs) 839 { 840 struct mbuf *m0, *mtmp; 841 int err, pktlen; 842 843 m0 = *m; 844 pktlen = m0->m_pkthdr.len; 845 err = bus_dmamap_load_mbuf_sg(sc->tx_dmat, stx->map, m0, segs, nsegs, 0); 846 if (err) { 847 int n = 0; 848 mtmp = m0; 849 while(mtmp) { 850 n++; 851 mtmp = mtmp->m_next; 852 } 853 #ifdef DEBUG 854 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n", 855 err, m0->m_pkthdr.len, n); 856 #endif 857 } 858 859 if (err == EFBIG) { 860 /* Too many segments, try to defrag */ 861 m0 = m_defrag(m0, M_NOWAIT); 862 if (m0 == NULL) { 863 m_freem(*m); 864 *m = NULL; 865 return (ENOBUFS); 866 } 867 *m = m0; 868 err = bus_dmamap_load_mbuf_sg(sc->tx_dmat, stx->map, m0, segs, nsegs, 0); 869 } 870 871 if (err == ENOMEM) { 872 return (err); 873 } 874 875 if (err) { 876 if (cxgb_debug) 877 printf("map failure err=%d pktlen=%d\n", err, pktlen); 878 m_freem(m0); 879 *m = NULL; 880 return (err); 881 } 882 883 bus_dmamap_sync(sc->tx_dmat, stx->map, BUS_DMASYNC_PREWRITE); 884 stx->flags |= TX_SW_DESC_MAPPED; 885 886 return (0); 887 } 888 889 /** 890 * make_sgl - populate a scatter/gather list for a packet 891 * @sgp: the SGL to populate 892 * @segs: the packet dma segments 893 * @nsegs: the number of segments 894 * 895 * Generates a scatter/gather list for the buffers that make up a packet 896 * and returns the SGL size in 8-byte words. The caller must size the SGL 897 * appropriately. 898 */ 899 static __inline void 900 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 901 { 902 int i, idx; 903 904 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) { 905 if (i && idx == 0) 906 ++sgp; 907 908 sgp->len[idx] = htobe32(segs[i].ds_len); 909 sgp->addr[idx] = htobe64(segs[i].ds_addr); 910 } 911 912 if (idx) 913 sgp->len[idx] = 0; 914 } 915 916 /** 917 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 918 * @adap: the adapter 919 * @q: the Tx queue 920 * 921 * Ring the doorbel if a Tx queue is asleep. There is a natural race, 922 * where the HW is going to sleep just after we checked, however, 923 * then the interrupt handler will detect the outstanding TX packet 924 * and ring the doorbell for us. 925 * 926 * When GTS is disabled we unconditionally ring the doorbell. 927 */ 928 static __inline void 929 check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 930 { 931 #if USE_GTS 932 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 933 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 934 set_bit(TXQ_LAST_PKT_DB, &q->flags); 935 #ifdef T3_TRACE 936 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 937 q->cntxt_id); 938 #endif 939 t3_write_reg(adap, A_SG_KDOORBELL, 940 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 941 } 942 #else 943 wmb(); /* write descriptors before telling HW */ 944 t3_write_reg(adap, A_SG_KDOORBELL, 945 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 946 #endif 947 } 948 949 static __inline void 950 wr_gen2(struct tx_desc *d, unsigned int gen) 951 { 952 #if SGE_NUM_GENBITS == 2 953 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 954 #endif 955 } 956 957 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 958 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 959 960 int 961 t3_encap(struct port_info *p, struct mbuf **m) 962 { 963 adapter_t *sc; 964 struct mbuf *m0; 965 struct sge_qset *qs; 966 struct sge_txq *txq; 967 struct tx_sw_desc *stx; 968 struct txq_state txqs; 969 unsigned int nsegs, ndesc, flits, cntrl, mlen, tso_info; 970 int err; 971 972 struct work_request_hdr *wrp; 973 struct tx_sw_desc *txsd; 974 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 975 bus_dma_segment_t segs[TX_MAX_SEGS]; 976 uint32_t wr_hi, wr_lo, sgl_flits; 977 978 struct tx_desc *txd; 979 struct cpl_tx_pkt *cpl; 980 981 DPRINTF("t3_encap "); 982 m0 = *m; 983 sc = p->adapter; 984 qs = &sc->sge.qs[p->first_qset]; 985 txq = &qs->txq[TXQ_ETH]; 986 stx = &txq->sdesc[txq->pidx]; 987 txd = &txq->desc[txq->pidx]; 988 cpl = (struct cpl_tx_pkt *)txd; 989 mlen = m0->m_pkthdr.len; 990 cpl->len = htonl(mlen | 0x80000000); 991 992 DPRINTF("mlen=%d\n", mlen); 993 /* 994 * XXX handle checksum, TSO, and VLAN here 995 * 996 */ 997 cntrl = V_TXPKT_INTF(p->port); 998 999 /* 1000 * XXX need to add VLAN support for 6.x 1001 */ 1002 #ifdef VLAN_SUPPORTED 1003 if (m0->m_flags & M_VLANTAG) 1004 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 1005 1006 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1007 #else 1008 tso_info = 0; 1009 #endif 1010 if (tso_info) { 1011 int eth_type; 1012 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl; 1013 struct ip *ip; 1014 struct tcphdr *tcp; 1015 uint8_t *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */ 1016 1017 txd->flit[2] = 0; 1018 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1019 hdr->cntrl = htonl(cntrl); 1020 1021 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1022 pkthdr = &tmp[0]; 1023 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr); 1024 } else { 1025 pkthdr = m0->m_data; 1026 } 1027 1028 if (__predict_false(m0->m_flags & M_VLANTAG)) { 1029 eth_type = CPL_ETH_II_VLAN; 1030 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1031 ETHER_VLAN_ENCAP_LEN); 1032 } else { 1033 eth_type = CPL_ETH_II; 1034 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1035 } 1036 tcp = (struct tcphdr *)((uint8_t *)ip + 1037 sizeof(*ip)); 1038 1039 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1040 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1041 V_LSO_TCPHDR_WORDS(tcp->th_off); 1042 hdr->lso_info = htonl(tso_info); 1043 1044 flits = 3; 1045 } else { 1046 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1047 cpl->cntrl = htonl(cntrl); 1048 1049 if (mlen <= WR_LEN - sizeof(*cpl)) { 1050 txq_prod(txq, 1, &txqs); 1051 txq->sdesc[txqs.pidx].m = m0; 1052 1053 if (m0->m_len == m0->m_pkthdr.len) 1054 memcpy(&txd->flit[2], m0->m_data, mlen); 1055 else 1056 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1057 1058 flits = (mlen + 7) / 8 + 2; 1059 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1060 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1061 F_WR_SOP | F_WR_EOP | txqs.compl); 1062 wmb(); 1063 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | 1064 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1065 1066 wr_gen2(txd, txqs.gen); 1067 check_ring_tx_db(sc, txq); 1068 return (0); 1069 } 1070 flits = 2; 1071 } 1072 1073 wrp = (struct work_request_hdr *)txd; 1074 1075 if ((err = busdma_map_mbufs(m, sc, stx, segs, &nsegs)) != 0) { 1076 return (err); 1077 } 1078 m0 = *m; 1079 ndesc = calc_tx_descs(m0, nsegs); 1080 1081 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : &sgl[0]; 1082 make_sgl(sgp, segs, nsegs); 1083 1084 sgl_flits = sgl_len(nsegs); 1085 1086 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); 1087 txq_prod(txq, ndesc, &txqs); 1088 txsd = &txq->sdesc[txqs.pidx]; 1089 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1090 wr_lo = htonl(V_WR_TID(txq->token)); 1091 txsd->m = m0; 1092 1093 if (__predict_true(ndesc == 1)) { 1094 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1095 V_WR_SGLSFLT(flits)) | wr_hi; 1096 wmb(); 1097 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | 1098 V_WR_GEN(txqs.gen)) | wr_lo; 1099 /* XXX gen? */ 1100 wr_gen2(txd, txqs.gen); 1101 } else { 1102 unsigned int ogen = txqs.gen; 1103 const uint64_t *fp = (const uint64_t *)sgl; 1104 struct work_request_hdr *wp = wrp; 1105 1106 /* XXX - CHECK ME */ 1107 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1108 V_WR_SGLSFLT(flits)) | wr_hi; 1109 1110 while (sgl_flits) { 1111 unsigned int avail = WR_FLITS - flits; 1112 1113 if (avail > sgl_flits) 1114 avail = sgl_flits; 1115 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1116 sgl_flits -= avail; 1117 ndesc--; 1118 if (!sgl_flits) 1119 break; 1120 1121 fp += avail; 1122 txd++; 1123 txsd++; 1124 if (++txqs.pidx == txq->size) { 1125 txqs.pidx = 0; 1126 txqs.gen ^= 1; 1127 txd = txq->desc; 1128 txsd = txq->sdesc; 1129 } 1130 1131 /* 1132 * when the head of the mbuf chain 1133 * is freed all clusters will be freed 1134 * with it 1135 */ 1136 txsd->m = NULL; 1137 wrp = (struct work_request_hdr *)txd; 1138 wrp->wr_hi = htonl(V_WR_DATATYPE(1) | 1139 V_WR_SGLSFLT(1)) | wr_hi; 1140 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, 1141 sgl_flits + 1)) | 1142 V_WR_GEN(txqs.gen)) | wr_lo; 1143 wr_gen2(txd, txqs.gen); 1144 flits = 1; 1145 } 1146 #ifdef WHY 1147 skb->priority = pidx; 1148 #endif 1149 wrp->wr_hi |= htonl(F_WR_EOP); 1150 wmb(); 1151 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1152 wr_gen2((struct tx_desc *)wp, ogen); 1153 } 1154 check_ring_tx_db(p->adapter, txq); 1155 1156 return (0); 1157 } 1158 1159 1160 /** 1161 * write_imm - write a packet into a Tx descriptor as immediate data 1162 * @d: the Tx descriptor to write 1163 * @m: the packet 1164 * @len: the length of packet data to write as immediate data 1165 * @gen: the generation bit value to write 1166 * 1167 * Writes a packet as immediate data into a Tx descriptor. The packet 1168 * contains a work request at its beginning. We must write the packet 1169 * carefully so the SGE doesn't read accidentally before it's written in 1170 * its entirety. 1171 */ 1172 static __inline void write_imm(struct tx_desc *d, struct mbuf *m, 1173 unsigned int len, unsigned int gen) 1174 { 1175 struct work_request_hdr *from = (struct work_request_hdr *)m->m_data; 1176 struct work_request_hdr *to = (struct work_request_hdr *)d; 1177 1178 memcpy(&to[1], &from[1], len - sizeof(*from)); 1179 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | 1180 V_WR_BCNTLFLT(len & 7)); 1181 wmb(); 1182 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | 1183 V_WR_LEN((len + 7) / 8)); 1184 wr_gen2(d, gen); 1185 m_freem(m); 1186 } 1187 1188 /** 1189 * check_desc_avail - check descriptor availability on a send queue 1190 * @adap: the adapter 1191 * @q: the TX queue 1192 * @m: the packet needing the descriptors 1193 * @ndesc: the number of Tx descriptors needed 1194 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1195 * 1196 * Checks if the requested number of Tx descriptors is available on an 1197 * SGE send queue. If the queue is already suspended or not enough 1198 * descriptors are available the packet is queued for later transmission. 1199 * Must be called with the Tx queue locked. 1200 * 1201 * Returns 0 if enough descriptors are available, 1 if there aren't 1202 * enough descriptors and the packet has been queued, and 2 if the caller 1203 * needs to retry because there weren't enough descriptors at the 1204 * beginning of the call but some freed up in the mean time. 1205 */ 1206 static __inline int 1207 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1208 struct mbuf *m, unsigned int ndesc, 1209 unsigned int qid) 1210 { 1211 /* 1212 * XXX We currently only use this for checking the control queue 1213 * the control queue is only used for binding qsets which happens 1214 * at init time so we are guaranteed enough descriptors 1215 */ 1216 #if 0 1217 if (__predict_false(!skb_queue_empty(&q->sendq))) { 1218 addq_exit: __skb_queue_tail(&q->sendq, skb); 1219 return 1; 1220 } 1221 if (__predict_false(q->size - q->in_use < ndesc)) { 1222 1223 struct sge_qset *qs = txq_to_qset(q, qid); 1224 1225 set_bit(qid, &qs->txq_stopped); 1226 smp_mb__after_clear_bit(); 1227 1228 if (should_restart_tx(q) && 1229 test_and_clear_bit(qid, &qs->txq_stopped)) 1230 return 2; 1231 1232 q->stops++; 1233 goto addq_exit; 1234 } 1235 #endif 1236 return 0; 1237 } 1238 1239 1240 /** 1241 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1242 * @q: the SGE control Tx queue 1243 * 1244 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1245 * that send only immediate data (presently just the control queues) and 1246 * thus do not have any sk_buffs to release. 1247 */ 1248 static __inline void 1249 reclaim_completed_tx_imm(struct sge_txq *q) 1250 { 1251 unsigned int reclaim = q->processed - q->cleaned; 1252 1253 mtx_assert(&q->lock, MA_OWNED); 1254 1255 q->in_use -= reclaim; 1256 q->cleaned += reclaim; 1257 } 1258 1259 static __inline int 1260 immediate(const struct mbuf *m) 1261 { 1262 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1263 } 1264 1265 /** 1266 * ctrl_xmit - send a packet through an SGE control Tx queue 1267 * @adap: the adapter 1268 * @q: the control queue 1269 * @m: the packet 1270 * 1271 * Send a packet through an SGE control Tx queue. Packets sent through 1272 * a control queue must fit entirely as immediate data in a single Tx 1273 * descriptor and have no page fragments. 1274 */ 1275 static int 1276 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1277 { 1278 int ret; 1279 struct work_request_hdr *wrp = (struct work_request_hdr *)m->m_data; 1280 1281 if (__predict_false(!immediate(m))) { 1282 m_freem(m); 1283 return 0; 1284 } 1285 1286 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); 1287 wrp->wr_lo = htonl(V_WR_TID(q->token)); 1288 1289 mtx_lock(&q->lock); 1290 again: reclaim_completed_tx_imm(q); 1291 1292 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1293 if (__predict_false(ret)) { 1294 if (ret == 1) { 1295 mtx_unlock(&q->lock); 1296 return (-1); 1297 } 1298 goto again; 1299 } 1300 1301 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1302 1303 q->in_use++; 1304 if (++q->pidx >= q->size) { 1305 q->pidx = 0; 1306 q->gen ^= 1; 1307 } 1308 mtx_unlock(&q->lock); 1309 wmb(); 1310 t3_write_reg(adap, A_SG_KDOORBELL, 1311 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1312 return (0); 1313 } 1314 1315 #ifdef RESTART_CTRLQ 1316 /** 1317 * restart_ctrlq - restart a suspended control queue 1318 * @qs: the queue set cotaining the control queue 1319 * 1320 * Resumes transmission on a suspended Tx control queue. 1321 */ 1322 static void 1323 restart_ctrlq(unsigned long data) 1324 { 1325 struct mbuf *m; 1326 struct sge_qset *qs = (struct sge_qset *)data; 1327 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1328 adapter_t *adap = qs->port->adapter; 1329 1330 mtx_lock(&q->lock); 1331 again: reclaim_completed_tx_imm(q); 1332 1333 while (q->in_use < q->size && 1334 (skb = __skb_dequeue(&q->sendq)) != NULL) { 1335 1336 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen); 1337 1338 if (++q->pidx >= q->size) { 1339 q->pidx = 0; 1340 q->gen ^= 1; 1341 } 1342 q->in_use++; 1343 } 1344 if (!skb_queue_empty(&q->sendq)) { 1345 set_bit(TXQ_CTRL, &qs->txq_stopped); 1346 smp_mb__after_clear_bit(); 1347 1348 if (should_restart_tx(q) && 1349 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1350 goto again; 1351 q->stops++; 1352 } 1353 1354 mtx_unlock(&q->lock); 1355 t3_write_reg(adap, A_SG_KDOORBELL, 1356 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1357 } 1358 #endif 1359 1360 /* 1361 * Send a management message through control queue 0 1362 */ 1363 int 1364 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1365 { 1366 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); 1367 } 1368 1369 /** 1370 * t3_sge_alloc_qset - initialize an SGE queue set 1371 * @sc: the controller softc 1372 * @id: the queue set id 1373 * @nports: how many Ethernet ports will be using this queue set 1374 * @irq_vec_idx: the IRQ vector index for response queue interrupts 1375 * @p: configuration parameters for this queue set 1376 * @ntxq: number of Tx queues for the queue set 1377 * @pi: port info for queue set 1378 * 1379 * Allocate resources and initialize an SGE queue set. A queue set 1380 * comprises a response queue, two Rx free-buffer queues, and up to 3 1381 * Tx queues. The Tx queues are assigned roles in the order Ethernet 1382 * queue, offload queue, and control queue. 1383 */ 1384 int 1385 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 1386 const struct qset_params *p, int ntxq, struct port_info *pi) 1387 { 1388 struct sge_qset *q = &sc->sge.qs[id]; 1389 int i, ret = 0; 1390 1391 init_qset_cntxt(q, id); 1392 1393 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 1394 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 1395 &q->fl[0].desc, &q->fl[0].sdesc, 1396 &q->fl[0].desc_tag, &q->fl[0].desc_map, 1397 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 1398 printf("error %d from alloc ring fl0\n", ret); 1399 goto err; 1400 } 1401 1402 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 1403 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 1404 &q->fl[1].desc, &q->fl[1].sdesc, 1405 &q->fl[1].desc_tag, &q->fl[1].desc_map, 1406 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 1407 printf("error %d from alloc ring fl1\n", ret); 1408 goto err; 1409 } 1410 1411 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 1412 &q->rspq.phys_addr, &q->rspq.desc, NULL, 1413 &q->rspq.desc_tag, &q->rspq.desc_map, 1414 NULL, NULL)) != 0) { 1415 printf("error %d from alloc ring rspq\n", ret); 1416 goto err; 1417 } 1418 1419 for (i = 0; i < ntxq; ++i) { 1420 /* 1421 * The control queue always uses immediate data so does not 1422 * need to keep track of any mbufs. 1423 * XXX Placeholder for future TOE support. 1424 */ 1425 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 1426 1427 if ((ret = alloc_ring(sc, p->txq_size[i], 1428 sizeof(struct tx_desc), sz, 1429 &q->txq[i].phys_addr, &q->txq[i].desc, 1430 &q->txq[i].sdesc, &q->txq[i].desc_tag, 1431 &q->txq[i].desc_map, 1432 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 1433 printf("error %d from alloc ring tx %i\n", ret, i); 1434 goto err; 1435 } 1436 1437 q->txq[i].gen = 1; 1438 q->txq[i].size = p->txq_size[i]; 1439 mtx_init(&q->txq[i].lock, "t3 txq lock", NULL, MTX_DEF); 1440 } 1441 1442 q->fl[0].gen = q->fl[1].gen = 1; 1443 q->fl[0].size = p->fl_size; 1444 q->fl[1].size = p->jumbo_size; 1445 1446 q->rspq.gen = 1; 1447 q->rspq.size = p->rspq_size; 1448 mtx_init(&q->rspq.lock, "t3 rspq lock", NULL, MTX_DEF); 1449 1450 q->txq[TXQ_ETH].stop_thres = nports * 1451 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 1452 1453 q->fl[0].buf_size = MCLBYTES; 1454 q->fl[0].zone = zone_clust; 1455 q->fl[0].type = EXT_CLUSTER; 1456 q->fl[1].buf_size = MJUMPAGESIZE; 1457 q->fl[1].zone = zone_jumbop; 1458 q->fl[1].type = EXT_JUMBOP; 1459 1460 q->lro.enabled = lro_default; 1461 1462 mtx_lock(&sc->sge.reg_lock); 1463 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 1464 q->rspq.phys_addr, q->rspq.size, 1465 q->fl[0].buf_size, 1, 0); 1466 if (ret) { 1467 printf("error %d from t3_sge_init_rspcntxt\n", ret); 1468 goto err_unlock; 1469 } 1470 1471 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1472 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 1473 q->fl[i].phys_addr, q->fl[i].size, 1474 q->fl[i].buf_size, p->cong_thres, 1, 1475 0); 1476 if (ret) { 1477 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 1478 goto err_unlock; 1479 } 1480 } 1481 1482 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 1483 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 1484 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 1485 1, 0); 1486 if (ret) { 1487 printf("error %d from t3_sge_init_ecntxt\n", ret); 1488 goto err_unlock; 1489 } 1490 1491 if (ntxq > 1) { 1492 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 1493 USE_GTS, SGE_CNTXT_OFLD, id, 1494 q->txq[TXQ_OFLD].phys_addr, 1495 q->txq[TXQ_OFLD].size, 0, 1, 0); 1496 if (ret) { 1497 printf("error %d from t3_sge_init_ecntxt\n", ret); 1498 goto err_unlock; 1499 } 1500 } 1501 1502 if (ntxq > 2) { 1503 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 1504 SGE_CNTXT_CTRL, id, 1505 q->txq[TXQ_CTRL].phys_addr, 1506 q->txq[TXQ_CTRL].size, 1507 q->txq[TXQ_CTRL].token, 1, 0); 1508 if (ret) { 1509 printf("error %d from t3_sge_init_ecntxt\n", ret); 1510 goto err_unlock; 1511 } 1512 } 1513 1514 mtx_unlock(&sc->sge.reg_lock); 1515 t3_update_qset_coalesce(q, p); 1516 q->port = pi; 1517 1518 refill_fl(sc, &q->fl[0], q->fl[0].size); 1519 refill_fl(sc, &q->fl[1], q->fl[1].size); 1520 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 1521 1522 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 1523 V_NEWTIMER(q->rspq.holdoff_tmr)); 1524 1525 return (0); 1526 1527 err_unlock: 1528 mtx_unlock(&sc->sge.reg_lock); 1529 err: 1530 t3_free_qset(sc, q); 1531 1532 return (ret); 1533 } 1534 1535 1536 /** 1537 * free_qset - free the resources of an SGE queue set 1538 * @sc: the controller owning the queue set 1539 * @q: the queue set 1540 * 1541 * Release the HW and SW resources associated with an SGE queue set, such 1542 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1543 * queue set must be quiesced prior to calling this. 1544 */ 1545 static void 1546 t3_free_qset(adapter_t *sc, struct sge_qset *q) 1547 { 1548 int i; 1549 1550 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1551 if (q->fl[i].desc) { 1552 mtx_lock(&sc->sge.reg_lock); 1553 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 1554 mtx_unlock(&sc->sge.reg_lock); 1555 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 1556 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 1557 q->fl[i].desc_map); 1558 bus_dma_tag_destroy(q->fl[i].desc_tag); 1559 bus_dma_tag_destroy(q->fl[i].entry_tag); 1560 } 1561 if (q->fl[i].sdesc) { 1562 free_rx_bufs(sc, &q->fl[i]); 1563 free(q->fl[i].sdesc, M_DEVBUF); 1564 } 1565 } 1566 1567 for (i = 0; i < SGE_TXQ_PER_SET; ++i) { 1568 if (q->txq[i].desc) { 1569 mtx_lock(&sc->sge.reg_lock); 1570 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 1571 mtx_unlock(&sc->sge.reg_lock); 1572 bus_dmamap_unload(q->txq[i].desc_tag, 1573 q->txq[i].desc_map); 1574 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 1575 q->txq[i].desc_map); 1576 bus_dma_tag_destroy(q->txq[i].desc_tag); 1577 bus_dma_tag_destroy(q->txq[i].entry_tag); 1578 } 1579 if (q->txq[i].sdesc) { 1580 free(q->txq[i].sdesc, M_DEVBUF); 1581 } 1582 if (mtx_initialized(&q->txq[i].lock)) { 1583 mtx_destroy(&q->txq[i].lock); 1584 } 1585 } 1586 1587 if (q->rspq.desc) { 1588 mtx_lock(&sc->sge.reg_lock); 1589 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 1590 mtx_unlock(&sc->sge.reg_lock); 1591 1592 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 1593 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 1594 q->rspq.desc_map); 1595 bus_dma_tag_destroy(q->rspq.desc_tag); 1596 } 1597 1598 if (mtx_initialized(&q->rspq.lock)) 1599 mtx_destroy(&q->rspq.lock); 1600 1601 bzero(q, sizeof(*q)); 1602 } 1603 1604 /** 1605 * t3_free_sge_resources - free SGE resources 1606 * @sc: the adapter softc 1607 * 1608 * Frees resources used by the SGE queue sets. 1609 */ 1610 void 1611 t3_free_sge_resources(adapter_t *sc) 1612 { 1613 int i; 1614 1615 for (i = 0; i < SGE_QSETS; ++i) 1616 t3_free_qset(sc, &sc->sge.qs[i]); 1617 } 1618 1619 /** 1620 * t3_sge_start - enable SGE 1621 * @sc: the controller softc 1622 * 1623 * Enables the SGE for DMAs. This is the last step in starting packet 1624 * transfers. 1625 */ 1626 void 1627 t3_sge_start(adapter_t *sc) 1628 { 1629 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 1630 } 1631 1632 1633 /** 1634 * free_tx_desc - reclaims Tx descriptors and their buffers 1635 * @adapter: the adapter 1636 * @q: the Tx queue to reclaim descriptors from 1637 * @n: the number of descriptors to reclaim 1638 * 1639 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 1640 * Tx buffers. Called with the Tx queue lock held. 1641 */ 1642 int 1643 free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec) 1644 { 1645 struct tx_sw_desc *d; 1646 unsigned int cidx = q->cidx; 1647 int nbufs = 0; 1648 1649 #ifdef T3_TRACE 1650 T3_TRACE2(sc->tb[q->cntxt_id & 7], 1651 "reclaiming %u Tx descriptors at cidx %u", n, cidx); 1652 #endif 1653 d = &q->sdesc[cidx]; 1654 1655 while (n-- > 0) { 1656 DPRINTF("cidx=%d d=%p\n", cidx, d); 1657 if (d->m) { 1658 if (d->flags & TX_SW_DESC_MAPPED) { 1659 bus_dmamap_unload(q->entry_tag, d->map); 1660 bus_dmamap_destroy(q->entry_tag, d->map); 1661 d->flags &= ~TX_SW_DESC_MAPPED; 1662 } 1663 m_vec[nbufs] = d->m; 1664 d->m = NULL; 1665 nbufs++; 1666 } 1667 ++d; 1668 if (++cidx == q->size) { 1669 cidx = 0; 1670 d = q->sdesc; 1671 } 1672 } 1673 q->cidx = cidx; 1674 1675 return (nbufs); 1676 } 1677 1678 /** 1679 * is_new_response - check if a response is newly written 1680 * @r: the response descriptor 1681 * @q: the response queue 1682 * 1683 * Returns true if a response descriptor contains a yet unprocessed 1684 * response. 1685 */ 1686 static __inline int 1687 is_new_response(const struct rsp_desc *r, 1688 const struct sge_rspq *q) 1689 { 1690 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 1691 } 1692 1693 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 1694 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 1695 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 1696 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 1697 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 1698 1699 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 1700 #define NOMEM_INTR_DELAY 2500 1701 1702 static __inline void 1703 deliver_partial_bundle(struct t3cdev *tdev, struct sge_rspq *q) 1704 { 1705 ; 1706 } 1707 1708 static __inline void 1709 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 1710 struct mbuf *m) 1711 { 1712 #ifdef notyet 1713 if (rq->polling) { 1714 rq->offload_skbs[rq->offload_skbs_idx++] = skb; 1715 if (rq->offload_skbs_idx == RX_BUNDLE_SIZE) { 1716 cxgb_ofld_recv(tdev, rq->offload_skbs, RX_BUNDLE_SIZE); 1717 rq->offload_skbs_idx = 0; 1718 rq->offload_bundles++; 1719 } 1720 } else 1721 #endif 1722 { 1723 /* XXX */ 1724 panic("implement offload enqueue\n"); 1725 } 1726 1727 } 1728 1729 static void 1730 restart_tx(struct sge_qset *qs) 1731 { 1732 ; 1733 } 1734 1735 void 1736 t3_rx_eth(struct port_info *pi, struct sge_rspq *rq, struct mbuf *m, int ethpad) 1737 { 1738 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(m->m_data + ethpad); 1739 struct ifnet *ifp = pi->ifp; 1740 1741 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, m->m_data, cpl->iff); 1742 if (&pi->adapter->port[cpl->iff] != pi) 1743 panic("bad port index %d m->m_data=%p\n", cpl->iff, m->m_data); 1744 1745 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 1746 cpl->csum_valid && cpl->csum == 0xffff) { 1747 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 1748 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 1749 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 1750 m->m_pkthdr.csum_data = 0xffff; 1751 } 1752 /* 1753 * XXX need to add VLAN support for 6.x 1754 */ 1755 #ifdef VLAN_SUPPORTED 1756 if (__predict_false(cpl->vlan_valid)) { 1757 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 1758 m->m_flags |= M_VLANTAG; 1759 } 1760 #endif 1761 m->m_pkthdr.rcvif = ifp; 1762 1763 m_explode(m); 1764 m_adj(m, sizeof(*cpl) + ethpad); 1765 1766 (*ifp->if_input)(ifp, m); 1767 } 1768 1769 /** 1770 * get_packet - return the next ingress packet buffer from a free list 1771 * @adap: the adapter that received the packet 1772 * @drop_thres: # of remaining buffers before we start dropping packets 1773 * @qs: the qset that the SGE free list holding the packet belongs to 1774 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 1775 * @r: response descriptor 1776 * 1777 * Get the next packet from a free list and complete setup of the 1778 * sk_buff. If the packet is small we make a copy and recycle the 1779 * original buffer, otherwise we use the original buffer itself. If a 1780 * positive drop threshold is supplied packets are dropped and their 1781 * buffers recycled if (a) the number of remaining buffers is under the 1782 * threshold and the packet is too big to copy, or (b) the packet should 1783 * be copied but there is no memory for the copy. 1784 */ 1785 1786 static int 1787 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 1788 struct mbuf *m, struct rsp_desc *r) 1789 { 1790 1791 unsigned int len_cq = ntohl(r->len_cq); 1792 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 1793 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 1794 uint32_t len = G_RSPD_LEN(len_cq); 1795 uint32_t flags = ntohl(r->flags); 1796 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 1797 int ret = 0; 1798 1799 prefetch(sd->cl); 1800 1801 fl->credits--; 1802 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 1803 bus_dmamap_unload(fl->entry_tag, sd->map); 1804 1805 1806 switch(sopeop) { 1807 case RSPQ_SOP_EOP: 1808 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 1809 m_cljset(m, sd->cl, fl->type); 1810 m->m_len = m->m_pkthdr.len = len; 1811 m->m_flags |= M_PKTHDR; 1812 ret = 1; 1813 goto done; 1814 break; 1815 case RSPQ_NSOP_NEOP: 1816 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 1817 ret = 0; 1818 break; 1819 case RSPQ_SOP: 1820 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 1821 m->m_flags |= M_PKTHDR; 1822 m_iovinit(m); 1823 ret = 0; 1824 break; 1825 case RSPQ_EOP: 1826 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 1827 ret = 1; 1828 break; 1829 } 1830 m_iovappend(m, sd->cl, fl->buf_size, len, 0); 1831 1832 done: 1833 if (++fl->cidx == fl->size) 1834 fl->cidx = 0; 1835 1836 return (ret); 1837 } 1838 1839 1840 /** 1841 * handle_rsp_cntrl_info - handles control information in a response 1842 * @qs: the queue set corresponding to the response 1843 * @flags: the response control flags 1844 * 1845 * Handles the control information of an SGE response, such as GTS 1846 * indications and completion credits for the queue set's Tx queues. 1847 * HW coalesces credits, we don't do any extra SW coalescing. 1848 */ 1849 static __inline void 1850 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 1851 { 1852 unsigned int credits; 1853 1854 #if USE_GTS 1855 if (flags & F_RSPD_TXQ0_GTS) 1856 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 1857 #endif 1858 credits = G_RSPD_TXQ0_CR(flags); 1859 if (credits) { 1860 qs->txq[TXQ_ETH].processed += credits; 1861 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC) 1862 taskqueue_enqueue(qs->port->adapter->tq, 1863 &qs->port->adapter->timer_reclaim_task); 1864 } 1865 1866 credits = G_RSPD_TXQ2_CR(flags); 1867 if (credits) 1868 qs->txq[TXQ_CTRL].processed += credits; 1869 1870 # if USE_GTS 1871 if (flags & F_RSPD_TXQ1_GTS) 1872 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 1873 # endif 1874 credits = G_RSPD_TXQ1_CR(flags); 1875 if (credits) 1876 qs->txq[TXQ_OFLD].processed += credits; 1877 } 1878 1879 static void 1880 check_ring_db(adapter_t *adap, struct sge_qset *qs, 1881 unsigned int sleeping) 1882 { 1883 ; 1884 } 1885 1886 /* 1887 * This is an awful hack to bind the ithread to CPU 1 1888 * to work around lack of ithread affinity 1889 */ 1890 static void 1891 bind_ithread(int cpu) 1892 { 1893 KASSERT(cpu < mp_ncpus, ("invalid cpu identifier")); 1894 #if 0 1895 if (mp_ncpus > 1) { 1896 mtx_lock_spin(&sched_lock); 1897 sched_bind(curthread, cpu); 1898 mtx_unlock_spin(&sched_lock); 1899 } 1900 #endif 1901 } 1902 1903 /** 1904 * process_responses - process responses from an SGE response queue 1905 * @adap: the adapter 1906 * @qs: the queue set to which the response queue belongs 1907 * @budget: how many responses can be processed in this round 1908 * 1909 * Process responses from an SGE response queue up to the supplied budget. 1910 * Responses include received packets as well as credits and other events 1911 * for the queues that belong to the response queue's queue set. 1912 * A negative budget is effectively unlimited. 1913 * 1914 * Additionally choose the interrupt holdoff time for the next interrupt 1915 * on this queue. If the system is under memory shortage use a fairly 1916 * long delay to help recovery. 1917 */ 1918 static int 1919 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 1920 { 1921 struct sge_rspq *rspq = &qs->rspq; 1922 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 1923 int budget_left = budget; 1924 unsigned int sleeping = 0; 1925 int lro = qs->lro.enabled; 1926 1927 static uint8_t pinned[MAXCPU]; 1928 1929 #ifdef DEBUG 1930 static int last_holdoff = 0; 1931 if (rspq->holdoff_tmr != last_holdoff) { 1932 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 1933 last_holdoff = rspq->holdoff_tmr; 1934 } 1935 #endif 1936 if (pinned[qs->rspq.cntxt_id * adap->params.nports] == 0) { 1937 /* 1938 * Assumes that cntxt_id < mp_ncpus 1939 */ 1940 bind_ithread(qs->rspq.cntxt_id); 1941 pinned[qs->rspq.cntxt_id * adap->params.nports] = 1; 1942 } 1943 rspq->next_holdoff = rspq->holdoff_tmr; 1944 1945 while (__predict_true(budget_left && is_new_response(r, rspq))) { 1946 int eth, eop = 0, ethpad = 0; 1947 uint32_t flags = ntohl(r->flags); 1948 uint32_t rss_csum = *(const uint32_t *)r; 1949 uint32_t rss_hash = r->rss_hdr.rss_hash_val; 1950 1951 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 1952 1953 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 1954 /* XXX */ 1955 printf("async notification\n"); 1956 1957 } else if (flags & F_RSPD_IMM_DATA_VALID) { 1958 struct mbuf *m = NULL; 1959 if (cxgb_debug) 1960 printf("IMM DATA VALID\n"); 1961 if (rspq->m == NULL) 1962 rspq->m = m_gethdr(M_NOWAIT, MT_DATA); 1963 else 1964 m = m_gethdr(M_NOWAIT, MT_DATA); 1965 1966 if (rspq->m == NULL || m == NULL) { 1967 rspq->next_holdoff = NOMEM_INTR_DELAY; 1968 budget_left--; 1969 break; 1970 } 1971 get_imm_packet(adap, r, rspq->m, m); 1972 eop = 1; 1973 rspq->imm_data++; 1974 } else if (r->len_cq) { 1975 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 1976 1977 if (rspq->m == NULL) { 1978 rspq->m = m_gethdr(M_NOWAIT, MT_DATA); 1979 rspq->m->m_flags = 0; 1980 } else if (rspq->m == NULL) { 1981 log(LOG_WARNING, "failed to get mbuf for packet\n"); 1982 break; 1983 } 1984 1985 ethpad = 2; 1986 eop = get_packet(adap, drop_thresh, qs, rspq->m, r); 1987 } else { 1988 DPRINTF("pure response\n"); 1989 rspq->pure_rsps++; 1990 } 1991 1992 if (flags & RSPD_CTRL_MASK) { 1993 sleeping |= flags & RSPD_GTS_MASK; 1994 handle_rsp_cntrl_info(qs, flags); 1995 } 1996 1997 r++; 1998 if (__predict_false(++rspq->cidx == rspq->size)) { 1999 rspq->cidx = 0; 2000 rspq->gen ^= 1; 2001 r = rspq->desc; 2002 } 2003 2004 prefetch(r); 2005 if (++rspq->credits >= (rspq->size / 4)) { 2006 refill_rspq(adap, rspq, rspq->credits); 2007 rspq->credits = 0; 2008 } 2009 2010 if (eop) { 2011 prefetch(rspq->m->m_data); 2012 prefetch(rspq->m->m_data + L1_CACHE_BYTES); 2013 2014 if (eth) { 2015 t3_rx_eth_lro(adap, rspq, rspq->m, ethpad, 2016 rss_hash, rss_csum, lro); 2017 2018 rspq->m = NULL; 2019 } else { 2020 #ifdef notyet 2021 if (__predict_false(r->rss_hdr.opcode == CPL_TRACE_PKT)) 2022 m_adj(m, 2); 2023 2024 rx_offload(&adap->tdev, rspq, m); 2025 #endif 2026 } 2027 #ifdef notyet 2028 taskqueue_enqueue(adap->tq, &adap->timer_reclaim_task); 2029 #else 2030 __refill_fl(adap, &qs->fl[0]); 2031 __refill_fl(adap, &qs->fl[1]); 2032 #endif 2033 } 2034 --budget_left; 2035 } 2036 t3_sge_lro_flush_all(adap, qs); 2037 deliver_partial_bundle(&adap->tdev, rspq); 2038 2039 if (sleeping) 2040 check_ring_db(adap, qs, sleeping); 2041 2042 smp_mb(); /* commit Tx queue processed updates */ 2043 if (__predict_false(qs->txq_stopped != 0)) 2044 restart_tx(qs); 2045 2046 budget -= budget_left; 2047 return (budget); 2048 } 2049 2050 /* 2051 * A helper function that processes responses and issues GTS. 2052 */ 2053 static __inline int 2054 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 2055 { 2056 int work; 2057 static int last_holdoff = 0; 2058 2059 work = process_responses(adap, rspq_to_qset(rq), -1); 2060 2061 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 2062 printf("next_holdoff=%d\n", rq->next_holdoff); 2063 last_holdoff = rq->next_holdoff; 2064 } 2065 2066 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 2067 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 2068 return work; 2069 } 2070 2071 2072 /* 2073 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 2074 * Handles data events from SGE response queues as well as error and other 2075 * async events as they all use the same interrupt pin. We use one SGE 2076 * response queue per port in this mode and protect all response queues with 2077 * queue 0's lock. 2078 */ 2079 void 2080 t3b_intr(void *data) 2081 { 2082 uint32_t map; 2083 adapter_t *adap = data; 2084 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2085 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2086 2087 t3_write_reg(adap, A_PL_CLI, 0); 2088 map = t3_read_reg(adap, A_SG_DATA_INTR); 2089 2090 if (!map) 2091 return; 2092 2093 if (__predict_false(map & F_ERRINTR)) 2094 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2095 2096 mtx_lock(&q0->lock); 2097 2098 if (__predict_true(map & 1)) 2099 process_responses_gts(adap, q0); 2100 2101 if (map & 2) 2102 process_responses_gts(adap, q1); 2103 2104 mtx_unlock(&q0->lock); 2105 } 2106 2107 /* 2108 * The MSI interrupt handler. This needs to handle data events from SGE 2109 * response queues as well as error and other async events as they all use 2110 * the same MSI vector. We use one SGE response queue per port in this mode 2111 * and protect all response queues with queue 0's lock. 2112 */ 2113 void 2114 t3_intr_msi(void *data) 2115 { 2116 adapter_t *adap = data; 2117 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2118 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2119 int new_packets = 0; 2120 2121 mtx_lock(&q0->lock); 2122 if (process_responses_gts(adap, q0)) { 2123 new_packets = 1; 2124 } 2125 2126 if (adap->params.nports == 2 && 2127 process_responses_gts(adap, q1)) { 2128 new_packets = 1; 2129 } 2130 2131 mtx_unlock(&q0->lock); 2132 if (new_packets == 0) 2133 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2134 } 2135 2136 void 2137 t3_intr_msix(void *data) 2138 { 2139 struct sge_qset *qs = data; 2140 adapter_t *adap = qs->port->adapter; 2141 struct sge_rspq *rspq = &qs->rspq; 2142 2143 mtx_lock(&rspq->lock); 2144 if (process_responses_gts(adap, rspq) == 0) { 2145 #ifdef notyet 2146 rspq->unhandled_irqs++; 2147 #endif 2148 } 2149 mtx_unlock(&rspq->lock); 2150 } 2151 2152 /* 2153 * broken by recent mbuf changes 2154 */ 2155 static int 2156 t3_lro_enable(SYSCTL_HANDLER_ARGS) 2157 { 2158 adapter_t *sc; 2159 int i, j, enabled, err, nqsets = 0; 2160 2161 #ifndef LRO_WORKING 2162 return (0); 2163 #endif 2164 2165 sc = arg1; 2166 enabled = sc->sge.qs[0].lro.enabled; 2167 err = sysctl_handle_int(oidp, &enabled, arg2, req); 2168 2169 if (err != 0) { 2170 return (err); 2171 } 2172 if (enabled == sc->sge.qs[0].lro.enabled) 2173 return (0); 2174 2175 for (i = 0; i < sc->params.nports; i++) 2176 for (j = 0; j < sc->port[i].nqsets; j++) 2177 nqsets++; 2178 2179 for (i = 0; i < nqsets; i++) { 2180 sc->sge.qs[i].lro.enabled = enabled; 2181 } 2182 2183 return (0); 2184 } 2185 2186 static int 2187 t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS) 2188 { 2189 adapter_t *sc = arg1; 2190 struct qset_params *qsp = &sc->params.sge.qset[0]; 2191 int coalesce_nsecs; 2192 struct sge_qset *qs; 2193 int i, j, err, nqsets = 0; 2194 struct mtx *lock; 2195 2196 coalesce_nsecs = qsp->coalesce_nsecs; 2197 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req); 2198 2199 if (err != 0) { 2200 return (err); 2201 } 2202 if (coalesce_nsecs == qsp->coalesce_nsecs) 2203 return (0); 2204 2205 for (i = 0; i < sc->params.nports; i++) 2206 for (j = 0; j < sc->port[i].nqsets; j++) 2207 nqsets++; 2208 2209 coalesce_nsecs = max(100, coalesce_nsecs); 2210 2211 for (i = 0; i < nqsets; i++) { 2212 qs = &sc->sge.qs[i]; 2213 qsp = &sc->params.sge.qset[i]; 2214 qsp->coalesce_nsecs = coalesce_nsecs; 2215 2216 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 2217 &sc->sge.qs[0].rspq.lock; 2218 2219 mtx_lock(lock); 2220 t3_update_qset_coalesce(qs, qsp); 2221 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 2222 V_NEWTIMER(qs->rspq.holdoff_tmr)); 2223 mtx_unlock(lock); 2224 } 2225 2226 return (0); 2227 } 2228 2229 2230 void 2231 t3_add_sysctls(adapter_t *sc) 2232 { 2233 struct sysctl_ctx_list *ctx; 2234 struct sysctl_oid_list *children; 2235 2236 ctx = device_get_sysctl_ctx(sc->dev); 2237 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 2238 2239 /* random information */ 2240 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 2241 "firmware_version", 2242 CTLFLAG_RD, &sc->fw_version, 2243 0, "firmware version"); 2244 2245 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2246 "enable_lro", 2247 CTLTYPE_INT|CTLFLAG_RW, sc, 2248 0, t3_lro_enable, 2249 "I", "enable large receive offload"); 2250 2251 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2252 "intr_coal", 2253 CTLTYPE_INT|CTLFLAG_RW, sc, 2254 0, t3_set_coalesce_nsecs, 2255 "I", "interrupt coalescing timer (ns)"); 2256 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2257 "enable_debug", 2258 CTLFLAG_RW, &cxgb_debug, 2259 0, "enable verbose debugging output"); 2260 2261 } 2262 2263 /** 2264 * t3_get_desc - dump an SGE descriptor for debugging purposes 2265 * @qs: the queue set 2266 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 2267 * @idx: the descriptor index in the queue 2268 * @data: where to dump the descriptor contents 2269 * 2270 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 2271 * size of the descriptor. 2272 */ 2273 int 2274 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 2275 unsigned char *data) 2276 { 2277 if (qnum >= 6) 2278 return (EINVAL); 2279 2280 if (qnum < 3) { 2281 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 2282 return -EINVAL; 2283 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 2284 return sizeof(struct tx_desc); 2285 } 2286 2287 if (qnum == 3) { 2288 if (!qs->rspq.desc || idx >= qs->rspq.size) 2289 return (EINVAL); 2290 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 2291 return sizeof(struct rsp_desc); 2292 } 2293 2294 qnum -= 4; 2295 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 2296 return (EINVAL); 2297 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 2298 return sizeof(struct rx_desc); 2299 } 2300