1 /************************************************************************** 2 3 Copyright (c) 2007, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/module.h> 37 #include <sys/bus.h> 38 #include <sys/conf.h> 39 #include <machine/bus.h> 40 #include <machine/resource.h> 41 #include <sys/bus_dma.h> 42 #include <sys/rman.h> 43 #include <sys/queue.h> 44 #include <sys/sysctl.h> 45 #include <sys/taskqueue.h> 46 47 48 #include <sys/proc.h> 49 #include <sys/sched.h> 50 #include <sys/smp.h> 51 #include <sys/systm.h> 52 53 #include <netinet/in_systm.h> 54 #include <netinet/in.h> 55 #include <netinet/ip.h> 56 #include <netinet/tcp.h> 57 58 #include <dev/pci/pcireg.h> 59 #include <dev/pci/pcivar.h> 60 61 #ifdef CONFIG_DEFINED 62 #include <cxgb_include.h> 63 #else 64 #include <dev/cxgb/cxgb_include.h> 65 #endif 66 67 uint32_t collapse_free = 0; 68 uint32_t mb_free_vec_free = 0; 69 int txq_fills = 0; 70 int collapse_mbufs = 0; 71 static int recycle_enable = 1; 72 static int bogus_imm = 0; 73 74 /* 75 * XXX GC 76 */ 77 #define NET_XMIT_CN 2 78 #define NET_XMIT_SUCCESS 0 79 80 #define USE_GTS 0 81 82 #define SGE_RX_SM_BUF_SIZE 1536 83 #define SGE_RX_DROP_THRES 16 84 #define SGE_RX_COPY_THRES 128 85 86 /* 87 * Period of the Tx buffer reclaim timer. This timer does not need to run 88 * frequently as Tx buffers are usually reclaimed by new Tx packets. 89 */ 90 #define TX_RECLAIM_PERIOD (hz >> 1) 91 92 /* 93 * work request size in bytes 94 */ 95 #define WR_LEN (WR_FLITS * 8) 96 97 /* 98 * Values for sge_txq.flags 99 */ 100 enum { 101 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 102 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 103 }; 104 105 struct tx_desc { 106 uint64_t flit[TX_DESC_FLITS]; 107 } __packed; 108 109 struct rx_desc { 110 uint32_t addr_lo; 111 uint32_t len_gen; 112 uint32_t gen2; 113 uint32_t addr_hi; 114 } __packed;; 115 116 struct rsp_desc { /* response queue descriptor */ 117 struct rss_header rss_hdr; 118 uint32_t flags; 119 uint32_t len_cq; 120 uint8_t imm_data[47]; 121 uint8_t intr_gen; 122 } __packed; 123 124 #define RX_SW_DESC_MAP_CREATED (1 << 0) 125 #define TX_SW_DESC_MAP_CREATED (1 << 1) 126 #define RX_SW_DESC_INUSE (1 << 3) 127 #define TX_SW_DESC_MAPPED (1 << 4) 128 129 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 130 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 131 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 132 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 133 134 struct tx_sw_desc { /* SW state per Tx descriptor */ 135 struct mbuf *m; 136 bus_dmamap_t map; 137 int flags; 138 }; 139 140 struct rx_sw_desc { /* SW state per Rx descriptor */ 141 void *cl; 142 bus_dmamap_t map; 143 int flags; 144 }; 145 146 struct txq_state { 147 unsigned int compl; 148 unsigned int gen; 149 unsigned int pidx; 150 }; 151 152 struct refill_fl_cb_arg { 153 int error; 154 bus_dma_segment_t seg; 155 int nseg; 156 }; 157 158 /* 159 * Maps a number of flits to the number of Tx descriptors that can hold them. 160 * The formula is 161 * 162 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 163 * 164 * HW allows up to 4 descriptors to be combined into a WR. 165 */ 166 static uint8_t flit_desc_map[] = { 167 0, 168 #if SGE_NUM_GENBITS == 1 169 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 170 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 171 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 172 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 173 #elif SGE_NUM_GENBITS == 2 174 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 175 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 176 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 177 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 178 #else 179 # error "SGE_NUM_GENBITS must be 1 or 2" 180 #endif 181 }; 182 183 184 static int lro_default = 0; 185 int cxgb_debug = 0; 186 187 static void t3_free_qset(adapter_t *sc, struct sge_qset *q); 188 static void sge_timer_cb(void *arg); 189 static void sge_timer_reclaim(void *arg, int ncount); 190 static void sge_txq_reclaim_handler(void *arg, int ncount); 191 static int free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec); 192 193 /** 194 * reclaim_completed_tx - reclaims completed Tx descriptors 195 * @adapter: the adapter 196 * @q: the Tx queue to reclaim completed descriptors from 197 * 198 * Reclaims Tx descriptors that the SGE has indicated it has processed, 199 * and frees the associated buffers if possible. Called with the Tx 200 * queue's lock held. 201 */ 202 static __inline int 203 reclaim_completed_tx(struct sge_txq *q, int nbufs, struct mbuf **mvec) 204 { 205 int reclaimed, reclaim = desc_reclaimable(q); 206 int n = 0; 207 208 mtx_assert(&q->lock, MA_OWNED); 209 if (reclaim > 0) { 210 n = free_tx_desc(q, min(reclaim, nbufs), mvec); 211 reclaimed = min(reclaim, nbufs); 212 q->cleaned += reclaimed; 213 q->in_use -= reclaimed; 214 } 215 return (n); 216 } 217 218 /** 219 * should_restart_tx - are there enough resources to restart a Tx queue? 220 * @q: the Tx queue 221 * 222 * Checks if there are enough descriptors to restart a suspended Tx queue. 223 */ 224 static __inline int 225 should_restart_tx(const struct sge_txq *q) 226 { 227 unsigned int r = q->processed - q->cleaned; 228 229 return q->in_use - r < (q->size >> 1); 230 } 231 232 /** 233 * t3_sge_init - initialize SGE 234 * @adap: the adapter 235 * @p: the SGE parameters 236 * 237 * Performs SGE initialization needed every time after a chip reset. 238 * We do not initialize any of the queue sets here, instead the driver 239 * top-level must request those individually. We also do not enable DMA 240 * here, that should be done after the queues have been set up. 241 */ 242 void 243 t3_sge_init(adapter_t *adap, struct sge_params *p) 244 { 245 u_int ctrl, ups; 246 247 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 248 249 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 250 F_CQCRDTCTRL | 251 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 252 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 253 #if SGE_NUM_GENBITS == 1 254 ctrl |= F_EGRGENCTRL; 255 #endif 256 if (adap->params.rev > 0) { 257 if (!(adap->flags & (USING_MSIX | USING_MSI))) 258 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 259 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL; 260 } 261 t3_write_reg(adap, A_SG_CONTROL, ctrl); 262 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 263 V_LORCQDRBTHRSH(512)); 264 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 265 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 266 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 267 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000); 268 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 269 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 270 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 271 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 272 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 273 } 274 275 276 /** 277 * sgl_len - calculates the size of an SGL of the given capacity 278 * @n: the number of SGL entries 279 * 280 * Calculates the number of flits needed for a scatter/gather list that 281 * can hold the given number of entries. 282 */ 283 static __inline unsigned int 284 sgl_len(unsigned int n) 285 { 286 return ((3 * n) / 2 + (n & 1)); 287 } 288 289 /** 290 * get_imm_packet - return the next ingress packet buffer from a response 291 * @resp: the response descriptor containing the packet data 292 * 293 * Return a packet containing the immediate data of the given response. 294 */ 295 static int 296 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl, uint32_t flags) 297 { 298 int len, error; 299 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 300 301 /* 302 * would be a firmware bug 303 */ 304 len = G_RSPD_LEN(ntohl(resp->len_cq)); 305 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) { 306 if (cxgb_debug) 307 device_printf(sc->dev, "unexpected value sopeop=%d flags=0x%x len=%din get_imm_packet\n", sopeop, flags, len); 308 bogus_imm++; 309 return (EINVAL); 310 } 311 error = 0; 312 switch (sopeop) { 313 case RSPQ_SOP_EOP: 314 m->m_len = m->m_pkthdr.len = len; 315 memcpy(mtod(m, uint8_t *), resp->imm_data, len); 316 break; 317 case RSPQ_EOP: 318 memcpy(cl, resp->imm_data, len); 319 m_iovappend(m, cl, MSIZE, len, 0); 320 break; 321 default: 322 bogus_imm++; 323 error = EINVAL; 324 } 325 326 return (error); 327 } 328 329 330 static __inline u_int 331 flits_to_desc(u_int n) 332 { 333 return (flit_desc_map[n]); 334 } 335 336 void 337 t3_sge_err_intr_handler(adapter_t *adapter) 338 { 339 unsigned int v, status; 340 341 342 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 343 344 if (status & F_RSPQCREDITOVERFOW) 345 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 346 347 if (status & F_RSPQDISABLED) { 348 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 349 350 CH_ALERT(adapter, 351 "packet delivered to disabled response queue (0x%x)\n", 352 (v >> S_RSPQ0DISABLED) & 0xff); 353 } 354 355 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 356 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED)) 357 t3_fatal_err(adapter); 358 } 359 360 void 361 t3_sge_prep(adapter_t *adap, struct sge_params *p) 362 { 363 int i; 364 365 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 366 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data); 367 368 for (i = 0; i < SGE_QSETS; ++i) { 369 struct qset_params *q = p->qset + i; 370 371 q->polling = adap->params.rev > 0; 372 373 if (adap->params.nports > 2) 374 q->coalesce_nsecs = 50000; 375 else 376 q->coalesce_nsecs = 5000; 377 378 q->rspq_size = RSPQ_Q_SIZE; 379 q->fl_size = FL_Q_SIZE; 380 q->jumbo_size = JUMBO_Q_SIZE; 381 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 382 q->txq_size[TXQ_OFLD] = 1024; 383 q->txq_size[TXQ_CTRL] = 256; 384 q->cong_thres = 0; 385 } 386 } 387 388 int 389 t3_sge_alloc(adapter_t *sc) 390 { 391 392 /* The parent tag. */ 393 if (bus_dma_tag_create( NULL, /* parent */ 394 1, 0, /* algnmnt, boundary */ 395 BUS_SPACE_MAXADDR, /* lowaddr */ 396 BUS_SPACE_MAXADDR, /* highaddr */ 397 NULL, NULL, /* filter, filterarg */ 398 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 399 BUS_SPACE_UNRESTRICTED, /* nsegments */ 400 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 401 0, /* flags */ 402 NULL, NULL, /* lock, lockarg */ 403 &sc->parent_dmat)) { 404 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 405 return (ENOMEM); 406 } 407 408 /* 409 * DMA tag for normal sized RX frames 410 */ 411 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 412 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 413 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 414 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 415 return (ENOMEM); 416 } 417 418 /* 419 * DMA tag for jumbo sized RX frames. 420 */ 421 if (bus_dma_tag_create(sc->parent_dmat, MJUMPAGESIZE, 0, BUS_SPACE_MAXADDR, 422 BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE, 423 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 424 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 425 return (ENOMEM); 426 } 427 428 /* 429 * DMA tag for TX frames. 430 */ 431 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 432 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 433 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 434 NULL, NULL, &sc->tx_dmat)) { 435 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 436 return (ENOMEM); 437 } 438 439 return (0); 440 } 441 442 int 443 t3_sge_free(struct adapter * sc) 444 { 445 446 if (sc->tx_dmat != NULL) 447 bus_dma_tag_destroy(sc->tx_dmat); 448 449 if (sc->rx_jumbo_dmat != NULL) 450 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 451 452 if (sc->rx_dmat != NULL) 453 bus_dma_tag_destroy(sc->rx_dmat); 454 455 if (sc->parent_dmat != NULL) 456 bus_dma_tag_destroy(sc->parent_dmat); 457 458 return (0); 459 } 460 461 void 462 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 463 { 464 465 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U); 466 qs->rspq.polling = 0 /* p->polling */; 467 } 468 469 static void 470 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 471 { 472 struct refill_fl_cb_arg *cb_arg = arg; 473 474 cb_arg->error = error; 475 cb_arg->seg = segs[0]; 476 cb_arg->nseg = nseg; 477 478 } 479 480 /** 481 * refill_fl - refill an SGE free-buffer list 482 * @sc: the controller softc 483 * @q: the free-list to refill 484 * @n: the number of new buffers to allocate 485 * 486 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 487 * The caller must assure that @n does not exceed the queue's capacity. 488 */ 489 static void 490 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 491 { 492 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 493 struct rx_desc *d = &q->desc[q->pidx]; 494 struct refill_fl_cb_arg cb_arg; 495 void *cl; 496 int err; 497 498 cb_arg.error = 0; 499 while (n--) { 500 /* 501 * We only allocate a cluster, mbuf allocation happens after rx 502 */ 503 if ((cl = m_cljget(NULL, M_DONTWAIT, q->buf_size)) == NULL) { 504 log(LOG_WARNING, "Failed to allocate cluster\n"); 505 goto done; 506 } 507 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 508 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 509 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 510 uma_zfree(q->zone, cl); 511 goto done; 512 } 513 sd->flags |= RX_SW_DESC_MAP_CREATED; 514 } 515 err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size, 516 refill_fl_cb, &cb_arg, 0); 517 518 if (err != 0 || cb_arg.error) { 519 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error); 520 /* 521 * XXX free cluster 522 */ 523 return; 524 } 525 526 sd->flags |= RX_SW_DESC_INUSE; 527 sd->cl = cl; 528 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 529 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 530 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 531 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 532 533 d++; 534 sd++; 535 536 if (++q->pidx == q->size) { 537 q->pidx = 0; 538 q->gen ^= 1; 539 sd = q->sdesc; 540 d = q->desc; 541 } 542 q->credits++; 543 } 544 545 done: 546 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 547 } 548 549 550 /** 551 * free_rx_bufs - free the Rx buffers on an SGE free list 552 * @sc: the controle softc 553 * @q: the SGE free list to clean up 554 * 555 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 556 * this queue should be stopped before calling this function. 557 */ 558 static void 559 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 560 { 561 u_int cidx = q->cidx; 562 563 while (q->credits--) { 564 struct rx_sw_desc *d = &q->sdesc[cidx]; 565 566 if (d->flags & RX_SW_DESC_INUSE) { 567 bus_dmamap_unload(q->entry_tag, d->map); 568 bus_dmamap_destroy(q->entry_tag, d->map); 569 uma_zfree(q->zone, d->cl); 570 } 571 d->cl = NULL; 572 if (++cidx == q->size) 573 cidx = 0; 574 } 575 } 576 577 static __inline void 578 __refill_fl(adapter_t *adap, struct sge_fl *fl) 579 { 580 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 581 } 582 583 /** 584 * recycle_rx_buf - recycle a receive buffer 585 * @adapter: the adapter 586 * @q: the SGE free list 587 * @idx: index of buffer to recycle 588 * 589 * Recycles the specified buffer on the given free list by adding it at 590 * the next available slot on the list. 591 */ 592 static void 593 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 594 { 595 struct rx_desc *from = &q->desc[idx]; 596 struct rx_desc *to = &q->desc[q->pidx]; 597 598 q->sdesc[q->pidx] = q->sdesc[idx]; 599 to->addr_lo = from->addr_lo; // already big endian 600 to->addr_hi = from->addr_hi; // likewise 601 wmb(); 602 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 603 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 604 q->credits++; 605 606 if (++q->pidx == q->size) { 607 q->pidx = 0; 608 q->gen ^= 1; 609 } 610 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 611 } 612 613 static void 614 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 615 { 616 uint32_t *addr; 617 618 addr = arg; 619 *addr = segs[0].ds_addr; 620 } 621 622 static int 623 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 624 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 625 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 626 { 627 size_t len = nelem * elem_size; 628 void *s = NULL; 629 void *p = NULL; 630 int err; 631 632 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 633 BUS_SPACE_MAXADDR_32BIT, 634 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 635 len, 0, NULL, NULL, tag)) != 0) { 636 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 637 return (ENOMEM); 638 } 639 640 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 641 map)) != 0) { 642 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 643 return (ENOMEM); 644 } 645 646 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 647 bzero(p, len); 648 *(void **)desc = p; 649 650 if (sw_size) { 651 len = nelem * sw_size; 652 s = malloc(len, M_DEVBUF, M_WAITOK); 653 bzero(s, len); 654 *(void **)sdesc = s; 655 } 656 if (parent_entry_tag == NULL) 657 return (0); 658 659 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 660 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 661 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 662 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 663 NULL, NULL, entry_tag)) != 0) { 664 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 665 return (ENOMEM); 666 } 667 return (0); 668 } 669 670 static void 671 sge_slow_intr_handler(void *arg, int ncount) 672 { 673 adapter_t *sc = arg; 674 675 t3_slow_intr_handler(sc); 676 } 677 678 /** 679 * sge_timer_cb - perform periodic maintenance of an SGE qset 680 * @data: the SGE queue set to maintain 681 * 682 * Runs periodically from a timer to perform maintenance of an SGE queue 683 * set. It performs two tasks: 684 * 685 * a) Cleans up any completed Tx descriptors that may still be pending. 686 * Normal descriptor cleanup happens when new packets are added to a Tx 687 * queue so this timer is relatively infrequent and does any cleanup only 688 * if the Tx queue has not seen any new packets in a while. We make a 689 * best effort attempt to reclaim descriptors, in that we don't wait 690 * around if we cannot get a queue's lock (which most likely is because 691 * someone else is queueing new packets and so will also handle the clean 692 * up). Since control queues use immediate data exclusively we don't 693 * bother cleaning them up here. 694 * 695 * b) Replenishes Rx queues that have run out due to memory shortage. 696 * Normally new Rx buffers are added when existing ones are consumed but 697 * when out of memory a queue can become empty. We try to add only a few 698 * buffers here, the queue will be replenished fully as these new buffers 699 * are used up if memory shortage has subsided. 700 * 701 * c) Return coalesced response queue credits in case a response queue is 702 * starved. 703 * 704 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 705 * fifo overflows and the FW doesn't implement any recovery scheme yet. 706 */ 707 static void 708 sge_timer_cb(void *arg) 709 { 710 adapter_t *sc = arg; 711 struct port_info *p; 712 struct sge_qset *qs; 713 struct sge_txq *txq; 714 int i, j; 715 int reclaim_eth, reclaim_ofl, refill_rx; 716 717 for (i = 0; i < sc->params.nports; i++) 718 for (j = 0; j < sc->port[i].nqsets; j++) { 719 qs = &sc->sge.qs[i + j]; 720 txq = &qs->txq[0]; 721 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned; 722 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 723 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 724 (qs->fl[1].credits < qs->fl[1].size)); 725 if (reclaim_eth || reclaim_ofl || refill_rx) { 726 p = &sc->port[i]; 727 taskqueue_enqueue(p->tq, &p->timer_reclaim_task); 728 break; 729 } 730 } 731 if (sc->params.nports > 2) { 732 int i; 733 734 for_each_port(sc, i) { 735 struct port_info *pi = &sc->port[i]; 736 737 t3_write_reg(sc, A_SG_KDOORBELL, 738 F_SELEGRCNTX | 739 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 740 } 741 } 742 if (sc->open_device_map != 0) 743 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 744 } 745 746 /* 747 * This is meant to be a catch-all function to keep sge state private 748 * to sge.c 749 * 750 */ 751 int 752 t3_sge_init_adapter(adapter_t *sc) 753 { 754 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 755 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 756 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 757 return (0); 758 } 759 760 int 761 t3_sge_init_port(struct port_info *p) 762 { 763 TASK_INIT(&p->timer_reclaim_task, 0, sge_timer_reclaim, p); 764 return (0); 765 } 766 767 void 768 t3_sge_deinit_sw(adapter_t *sc) 769 { 770 int i; 771 772 callout_drain(&sc->sge_timer_ch); 773 if (sc->tq) 774 taskqueue_drain(sc->tq, &sc->slow_intr_task); 775 for (i = 0; i < sc->params.nports; i++) 776 if (sc->port[i].tq != NULL) 777 taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task); 778 } 779 780 /** 781 * refill_rspq - replenish an SGE response queue 782 * @adapter: the adapter 783 * @q: the response queue to replenish 784 * @credits: how many new responses to make available 785 * 786 * Replenishes a response queue by making the supplied number of responses 787 * available to HW. 788 */ 789 static __inline void 790 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 791 { 792 793 /* mbufs are allocated on demand when a rspq entry is processed. */ 794 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 795 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 796 } 797 798 static __inline void 799 sge_txq_reclaim_(struct sge_txq *txq) 800 { 801 int reclaimable, i, n; 802 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 803 struct port_info *p; 804 805 p = txq->port; 806 reclaim_more: 807 n = 0; 808 reclaimable = desc_reclaimable(txq); 809 if (reclaimable > 0 && mtx_trylock(&txq->lock)) { 810 n = reclaim_completed_tx(txq, TX_CLEAN_MAX_DESC, m_vec); 811 mtx_unlock(&txq->lock); 812 } 813 if (n == 0) 814 return; 815 816 for (i = 0; i < n; i++) { 817 m_freem_vec(m_vec[i]); 818 } 819 if (p && p->ifp->if_drv_flags & IFF_DRV_OACTIVE && 820 txq->size - txq->in_use >= TX_START_MAX_DESC) { 821 txq_fills++; 822 p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 823 taskqueue_enqueue(p->tq, &p->start_task); 824 } 825 826 if (n) 827 goto reclaim_more; 828 } 829 830 static void 831 sge_txq_reclaim_handler(void *arg, int ncount) 832 { 833 struct sge_txq *q = arg; 834 835 sge_txq_reclaim_(q); 836 } 837 838 static void 839 sge_timer_reclaim(void *arg, int ncount) 840 { 841 struct port_info *p = arg; 842 int i, nqsets = p->nqsets; 843 adapter_t *sc = p->adapter; 844 struct sge_qset *qs; 845 struct sge_txq *txq; 846 struct mtx *lock; 847 848 for (i = 0; i < nqsets; i++) { 849 qs = &sc->sge.qs[i]; 850 txq = &qs->txq[TXQ_ETH]; 851 sge_txq_reclaim_(txq); 852 853 txq = &qs->txq[TXQ_OFLD]; 854 sge_txq_reclaim_(txq); 855 856 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 857 &sc->sge.qs[0].rspq.lock; 858 859 if (mtx_trylock(lock)) { 860 /* XXX currently assume that we are *NOT* polling */ 861 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 862 863 if (qs->fl[0].credits < qs->fl[0].size - 16) 864 __refill_fl(sc, &qs->fl[0]); 865 if (qs->fl[1].credits < qs->fl[1].size - 16) 866 __refill_fl(sc, &qs->fl[1]); 867 868 if (status & (1 << qs->rspq.cntxt_id)) { 869 if (qs->rspq.credits) { 870 refill_rspq(sc, &qs->rspq, 1); 871 qs->rspq.credits--; 872 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 873 1 << qs->rspq.cntxt_id); 874 } 875 } 876 mtx_unlock(lock); 877 } 878 } 879 } 880 881 /** 882 * init_qset_cntxt - initialize an SGE queue set context info 883 * @qs: the queue set 884 * @id: the queue set id 885 * 886 * Initializes the TIDs and context ids for the queues of a queue set. 887 */ 888 static void 889 init_qset_cntxt(struct sge_qset *qs, u_int id) 890 { 891 892 qs->rspq.cntxt_id = id; 893 qs->fl[0].cntxt_id = 2 * id; 894 qs->fl[1].cntxt_id = 2 * id + 1; 895 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 896 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 897 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 898 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 899 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 900 } 901 902 903 static void 904 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 905 { 906 txq->in_use += ndesc; 907 /* 908 * XXX we don't handle stopping of queue 909 * presumably start handles this when we bump against the end 910 */ 911 txqs->gen = txq->gen; 912 txq->unacked += ndesc; 913 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3); 914 txq->unacked &= 7; 915 txqs->pidx = txq->pidx; 916 txq->pidx += ndesc; 917 918 if (txq->pidx >= txq->size) { 919 txq->pidx -= txq->size; 920 txq->gen ^= 1; 921 } 922 923 } 924 925 /** 926 * calc_tx_descs - calculate the number of Tx descriptors for a packet 927 * @m: the packet mbufs 928 * @nsegs: the number of segments 929 * 930 * Returns the number of Tx descriptors needed for the given Ethernet 931 * packet. Ethernet packets require addition of WR and CPL headers. 932 */ 933 static __inline unsigned int 934 calc_tx_descs(const struct mbuf *m, int nsegs) 935 { 936 unsigned int flits; 937 938 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) 939 return 1; 940 941 flits = sgl_len(nsegs) + 2; 942 #ifdef TSO_SUPPORTED 943 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) 944 flits++; 945 #endif 946 return flits_to_desc(flits); 947 } 948 949 static unsigned int 950 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 951 struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs) 952 { 953 struct mbuf *m0; 954 int err, pktlen; 955 956 m0 = *m; 957 pktlen = m0->m_pkthdr.len; 958 959 err = bus_dmamap_load_mvec_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); 960 #ifdef DEBUG 961 if (err) { 962 int n = 0; 963 struct mbuf *mtmp = m0; 964 while(mtmp) { 965 n++; 966 mtmp = mtmp->m_next; 967 } 968 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n", 969 err, m0->m_pkthdr.len, n); 970 } 971 #endif 972 if (err == EFBIG) { 973 /* Too many segments, try to defrag */ 974 m0 = m_defrag(m0, M_DONTWAIT); 975 if (m0 == NULL) { 976 m_freem(*m); 977 *m = NULL; 978 return (ENOBUFS); 979 } 980 *m = m0; 981 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); 982 } 983 984 if (err == ENOMEM) { 985 return (err); 986 } 987 988 if (err) { 989 if (cxgb_debug) 990 printf("map failure err=%d pktlen=%d\n", err, pktlen); 991 m_freem_vec(m0); 992 *m = NULL; 993 return (err); 994 } 995 996 bus_dmamap_sync(txq->entry_tag, stx->map, BUS_DMASYNC_PREWRITE); 997 stx->flags |= TX_SW_DESC_MAPPED; 998 999 return (0); 1000 } 1001 1002 /** 1003 * make_sgl - populate a scatter/gather list for a packet 1004 * @sgp: the SGL to populate 1005 * @segs: the packet dma segments 1006 * @nsegs: the number of segments 1007 * 1008 * Generates a scatter/gather list for the buffers that make up a packet 1009 * and returns the SGL size in 8-byte words. The caller must size the SGL 1010 * appropriately. 1011 */ 1012 static __inline void 1013 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1014 { 1015 int i, idx; 1016 1017 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) { 1018 if (i && idx == 0) 1019 ++sgp; 1020 1021 sgp->len[idx] = htobe32(segs[i].ds_len); 1022 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1023 } 1024 1025 if (idx) 1026 sgp->len[idx] = 0; 1027 } 1028 1029 /** 1030 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1031 * @adap: the adapter 1032 * @q: the Tx queue 1033 * 1034 * Ring the doorbel if a Tx queue is asleep. There is a natural race, 1035 * where the HW is going to sleep just after we checked, however, 1036 * then the interrupt handler will detect the outstanding TX packet 1037 * and ring the doorbell for us. 1038 * 1039 * When GTS is disabled we unconditionally ring the doorbell. 1040 */ 1041 static __inline void 1042 check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 1043 { 1044 #if USE_GTS 1045 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1046 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1047 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1048 #ifdef T3_TRACE 1049 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1050 q->cntxt_id); 1051 #endif 1052 t3_write_reg(adap, A_SG_KDOORBELL, 1053 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1054 } 1055 #else 1056 wmb(); /* write descriptors before telling HW */ 1057 t3_write_reg(adap, A_SG_KDOORBELL, 1058 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1059 #endif 1060 } 1061 1062 static __inline void 1063 wr_gen2(struct tx_desc *d, unsigned int gen) 1064 { 1065 #if SGE_NUM_GENBITS == 2 1066 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1067 #endif 1068 } 1069 1070 1071 1072 /** 1073 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1074 * @ndesc: number of Tx descriptors spanned by the SGL 1075 * @txd: first Tx descriptor to be written 1076 * @txqs: txq state (generation and producer index) 1077 * @txq: the SGE Tx queue 1078 * @sgl: the SGL 1079 * @flits: number of flits to the start of the SGL in the first descriptor 1080 * @sgl_flits: the SGL size in flits 1081 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1082 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1083 * 1084 * Write a work request header and an associated SGL. If the SGL is 1085 * small enough to fit into one Tx descriptor it has already been written 1086 * and we just need to write the WR header. Otherwise we distribute the 1087 * SGL across the number of descriptors it spans. 1088 */ 1089 1090 static void 1091 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1092 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1093 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1094 { 1095 1096 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1097 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1098 1099 if (__predict_true(ndesc == 1)) { 1100 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1101 V_WR_SGLSFLT(flits)) | wr_hi; 1102 wmb(); 1103 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | 1104 V_WR_GEN(txqs->gen)) | wr_lo; 1105 /* XXX gen? */ 1106 wr_gen2(txd, txqs->gen); 1107 } else { 1108 unsigned int ogen = txqs->gen; 1109 const uint64_t *fp = (const uint64_t *)sgl; 1110 struct work_request_hdr *wp = wrp; 1111 1112 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1113 V_WR_SGLSFLT(flits)) | wr_hi; 1114 1115 while (sgl_flits) { 1116 unsigned int avail = WR_FLITS - flits; 1117 1118 if (avail > sgl_flits) 1119 avail = sgl_flits; 1120 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1121 sgl_flits -= avail; 1122 ndesc--; 1123 if (!sgl_flits) 1124 break; 1125 1126 fp += avail; 1127 txd++; 1128 txsd++; 1129 if (++txqs->pidx == txq->size) { 1130 txqs->pidx = 0; 1131 txqs->gen ^= 1; 1132 txd = txq->desc; 1133 txsd = txq->sdesc; 1134 } 1135 1136 /* 1137 * when the head of the mbuf chain 1138 * is freed all clusters will be freed 1139 * with it 1140 */ 1141 txsd->m = NULL; 1142 wrp = (struct work_request_hdr *)txd; 1143 wrp->wr_hi = htonl(V_WR_DATATYPE(1) | 1144 V_WR_SGLSFLT(1)) | wr_hi; 1145 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, 1146 sgl_flits + 1)) | 1147 V_WR_GEN(txqs->gen)) | wr_lo; 1148 wr_gen2(txd, txqs->gen); 1149 flits = 1; 1150 } 1151 wrp->wr_hi |= htonl(F_WR_EOP); 1152 wmb(); 1153 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1154 wr_gen2((struct tx_desc *)wp, ogen); 1155 } 1156 } 1157 1158 1159 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 1160 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 1161 1162 int 1163 t3_encap(struct port_info *p, struct mbuf **m) 1164 { 1165 adapter_t *sc; 1166 struct mbuf *m0; 1167 struct sge_qset *qs; 1168 struct sge_txq *txq; 1169 struct tx_sw_desc *stx; 1170 struct txq_state txqs; 1171 unsigned int nsegs, ndesc, flits, cntrl, mlen; 1172 int err, tso_info = 0; 1173 1174 struct work_request_hdr *wrp; 1175 struct tx_sw_desc *txsd; 1176 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1177 bus_dma_segment_t segs[TX_MAX_SEGS]; 1178 uint32_t wr_hi, wr_lo, sgl_flits; 1179 1180 struct tx_desc *txd; 1181 struct cpl_tx_pkt *cpl; 1182 1183 DPRINTF("t3_encap "); 1184 m0 = *m; 1185 sc = p->adapter; 1186 qs = &sc->sge.qs[p->first_qset]; 1187 txq = &qs->txq[TXQ_ETH]; 1188 stx = &txq->sdesc[txq->pidx]; 1189 txd = &txq->desc[txq->pidx]; 1190 cpl = (struct cpl_tx_pkt *)txd; 1191 mlen = m0->m_pkthdr.len; 1192 cpl->len = htonl(mlen | 0x80000000); 1193 1194 DPRINTF("mlen=%d\n", mlen); 1195 /* 1196 * XXX handle checksum, TSO, and VLAN here 1197 * 1198 */ 1199 cntrl = V_TXPKT_INTF(p->port); 1200 1201 /* 1202 * XXX need to add VLAN support for 6.x 1203 */ 1204 #ifdef VLAN_SUPPORTED 1205 if (m0->m_flags & M_VLANTAG) 1206 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 1207 if (m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1208 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1209 #endif 1210 if (tso_info) { 1211 int eth_type; 1212 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl; 1213 struct ip *ip; 1214 struct tcphdr *tcp; 1215 uint8_t *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */ 1216 1217 txd->flit[2] = 0; 1218 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1219 hdr->cntrl = htonl(cntrl); 1220 1221 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1222 pkthdr = &tmp[0]; 1223 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr); 1224 } else { 1225 pkthdr = mtod(m0, uint8_t *); 1226 } 1227 1228 if (__predict_false(m0->m_flags & M_VLANTAG)) { 1229 eth_type = CPL_ETH_II_VLAN; 1230 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1231 ETHER_VLAN_ENCAP_LEN); 1232 } else { 1233 eth_type = CPL_ETH_II; 1234 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1235 } 1236 tcp = (struct tcphdr *)((uint8_t *)ip + 1237 sizeof(*ip)); 1238 1239 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1240 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1241 V_LSO_TCPHDR_WORDS(tcp->th_off); 1242 hdr->lso_info = htonl(tso_info); 1243 flits = 3; 1244 } else { 1245 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1246 cpl->cntrl = htonl(cntrl); 1247 1248 if (mlen <= WR_LEN - sizeof(*cpl)) { 1249 txq_prod(txq, 1, &txqs); 1250 txq->sdesc[txqs.pidx].m = m0; 1251 m_set_priority(m0, txqs.pidx); 1252 1253 if (m0->m_len == m0->m_pkthdr.len) 1254 memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen); 1255 else 1256 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1257 1258 flits = (mlen + 7) / 8 + 2; 1259 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1260 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1261 F_WR_SOP | F_WR_EOP | txqs.compl); 1262 wmb(); 1263 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | 1264 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1265 1266 wr_gen2(txd, txqs.gen); 1267 check_ring_tx_db(sc, txq); 1268 return (0); 1269 } 1270 flits = 2; 1271 } 1272 1273 wrp = (struct work_request_hdr *)txd; 1274 1275 if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) { 1276 return (err); 1277 } 1278 m0 = *m; 1279 ndesc = calc_tx_descs(m0, nsegs); 1280 1281 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1282 make_sgl(sgp, segs, nsegs); 1283 1284 sgl_flits = sgl_len(nsegs); 1285 1286 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); 1287 txq_prod(txq, ndesc, &txqs); 1288 txsd = &txq->sdesc[txqs.pidx]; 1289 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1290 wr_lo = htonl(V_WR_TID(txq->token)); 1291 txsd->m = m0; 1292 m_set_priority(m0, txqs.pidx); 1293 1294 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); 1295 check_ring_tx_db(p->adapter, txq); 1296 1297 return (0); 1298 } 1299 1300 1301 /** 1302 * write_imm - write a packet into a Tx descriptor as immediate data 1303 * @d: the Tx descriptor to write 1304 * @m: the packet 1305 * @len: the length of packet data to write as immediate data 1306 * @gen: the generation bit value to write 1307 * 1308 * Writes a packet as immediate data into a Tx descriptor. The packet 1309 * contains a work request at its beginning. We must write the packet 1310 * carefully so the SGE doesn't read accidentally before it's written in 1311 * its entirety. 1312 */ 1313 static __inline void 1314 write_imm(struct tx_desc *d, struct mbuf *m, 1315 unsigned int len, unsigned int gen) 1316 { 1317 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1318 struct work_request_hdr *to = (struct work_request_hdr *)d; 1319 1320 memcpy(&to[1], &from[1], len - sizeof(*from)); 1321 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | 1322 V_WR_BCNTLFLT(len & 7)); 1323 wmb(); 1324 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | 1325 V_WR_LEN((len + 7) / 8)); 1326 wr_gen2(d, gen); 1327 m_freem(m); 1328 } 1329 1330 /** 1331 * check_desc_avail - check descriptor availability on a send queue 1332 * @adap: the adapter 1333 * @q: the TX queue 1334 * @m: the packet needing the descriptors 1335 * @ndesc: the number of Tx descriptors needed 1336 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1337 * 1338 * Checks if the requested number of Tx descriptors is available on an 1339 * SGE send queue. If the queue is already suspended or not enough 1340 * descriptors are available the packet is queued for later transmission. 1341 * Must be called with the Tx queue locked. 1342 * 1343 * Returns 0 if enough descriptors are available, 1 if there aren't 1344 * enough descriptors and the packet has been queued, and 2 if the caller 1345 * needs to retry because there weren't enough descriptors at the 1346 * beginning of the call but some freed up in the mean time. 1347 */ 1348 static __inline int 1349 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1350 struct mbuf *m, unsigned int ndesc, 1351 unsigned int qid) 1352 { 1353 /* 1354 * XXX We currently only use this for checking the control queue 1355 * the control queue is only used for binding qsets which happens 1356 * at init time so we are guaranteed enough descriptors 1357 */ 1358 if (__predict_false(!mbufq_empty(&q->sendq))) { 1359 addq_exit: mbufq_tail(&q->sendq, m); 1360 return 1; 1361 } 1362 if (__predict_false(q->size - q->in_use < ndesc)) { 1363 1364 struct sge_qset *qs = txq_to_qset(q, qid); 1365 1366 setbit(&qs->txq_stopped, qid); 1367 smp_mb(); 1368 1369 if (should_restart_tx(q) && 1370 test_and_clear_bit(qid, &qs->txq_stopped)) 1371 return 2; 1372 1373 q->stops++; 1374 goto addq_exit; 1375 } 1376 return 0; 1377 } 1378 1379 1380 /** 1381 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1382 * @q: the SGE control Tx queue 1383 * 1384 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1385 * that send only immediate data (presently just the control queues) and 1386 * thus do not have any mbufs 1387 */ 1388 static __inline void 1389 reclaim_completed_tx_imm(struct sge_txq *q) 1390 { 1391 unsigned int reclaim = q->processed - q->cleaned; 1392 1393 mtx_assert(&q->lock, MA_OWNED); 1394 1395 q->in_use -= reclaim; 1396 q->cleaned += reclaim; 1397 } 1398 1399 static __inline int 1400 immediate(const struct mbuf *m) 1401 { 1402 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1403 } 1404 1405 /** 1406 * ctrl_xmit - send a packet through an SGE control Tx queue 1407 * @adap: the adapter 1408 * @q: the control queue 1409 * @m: the packet 1410 * 1411 * Send a packet through an SGE control Tx queue. Packets sent through 1412 * a control queue must fit entirely as immediate data in a single Tx 1413 * descriptor and have no page fragments. 1414 */ 1415 static int 1416 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1417 { 1418 int ret; 1419 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1420 1421 if (__predict_false(!immediate(m))) { 1422 m_freem(m); 1423 return 0; 1424 } 1425 1426 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); 1427 wrp->wr_lo = htonl(V_WR_TID(q->token)); 1428 1429 mtx_lock(&q->lock); 1430 again: reclaim_completed_tx_imm(q); 1431 1432 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1433 if (__predict_false(ret)) { 1434 if (ret == 1) { 1435 mtx_unlock(&q->lock); 1436 return (-1); 1437 } 1438 goto again; 1439 } 1440 1441 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1442 1443 q->in_use++; 1444 if (++q->pidx >= q->size) { 1445 q->pidx = 0; 1446 q->gen ^= 1; 1447 } 1448 mtx_unlock(&q->lock); 1449 wmb(); 1450 t3_write_reg(adap, A_SG_KDOORBELL, 1451 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1452 return (0); 1453 } 1454 1455 1456 /** 1457 * restart_ctrlq - restart a suspended control queue 1458 * @qs: the queue set cotaining the control queue 1459 * 1460 * Resumes transmission on a suspended Tx control queue. 1461 */ 1462 static void 1463 restart_ctrlq(void *data, int npending) 1464 { 1465 struct mbuf *m; 1466 struct sge_qset *qs = (struct sge_qset *)data; 1467 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1468 adapter_t *adap = qs->port->adapter; 1469 1470 mtx_lock(&q->lock); 1471 again: reclaim_completed_tx_imm(q); 1472 1473 while (q->in_use < q->size && 1474 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1475 1476 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1477 1478 if (++q->pidx >= q->size) { 1479 q->pidx = 0; 1480 q->gen ^= 1; 1481 } 1482 q->in_use++; 1483 } 1484 if (!mbufq_empty(&q->sendq)) { 1485 setbit(&qs->txq_stopped, TXQ_CTRL); 1486 smp_mb(); 1487 1488 if (should_restart_tx(q) && 1489 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1490 goto again; 1491 q->stops++; 1492 } 1493 mtx_unlock(&q->lock); 1494 t3_write_reg(adap, A_SG_KDOORBELL, 1495 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1496 } 1497 1498 1499 /* 1500 * Send a management message through control queue 0 1501 */ 1502 int 1503 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1504 { 1505 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); 1506 } 1507 1508 /** 1509 * free_qset - free the resources of an SGE queue set 1510 * @sc: the controller owning the queue set 1511 * @q: the queue set 1512 * 1513 * Release the HW and SW resources associated with an SGE queue set, such 1514 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1515 * queue set must be quiesced prior to calling this. 1516 */ 1517 static void 1518 t3_free_qset(adapter_t *sc, struct sge_qset *q) 1519 { 1520 int i; 1521 1522 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1523 if (q->fl[i].desc) { 1524 mtx_lock(&sc->sge.reg_lock); 1525 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 1526 mtx_unlock(&sc->sge.reg_lock); 1527 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 1528 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 1529 q->fl[i].desc_map); 1530 bus_dma_tag_destroy(q->fl[i].desc_tag); 1531 bus_dma_tag_destroy(q->fl[i].entry_tag); 1532 } 1533 if (q->fl[i].sdesc) { 1534 free_rx_bufs(sc, &q->fl[i]); 1535 free(q->fl[i].sdesc, M_DEVBUF); 1536 } 1537 } 1538 1539 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 1540 if (q->txq[i].desc) { 1541 mtx_lock(&sc->sge.reg_lock); 1542 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 1543 mtx_unlock(&sc->sge.reg_lock); 1544 bus_dmamap_unload(q->txq[i].desc_tag, 1545 q->txq[i].desc_map); 1546 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 1547 q->txq[i].desc_map); 1548 bus_dma_tag_destroy(q->txq[i].desc_tag); 1549 bus_dma_tag_destroy(q->txq[i].entry_tag); 1550 MTX_DESTROY(&q->txq[i].lock); 1551 } 1552 if (q->txq[i].sdesc) { 1553 free(q->txq[i].sdesc, M_DEVBUF); 1554 } 1555 } 1556 1557 if (q->rspq.desc) { 1558 mtx_lock(&sc->sge.reg_lock); 1559 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 1560 mtx_unlock(&sc->sge.reg_lock); 1561 1562 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 1563 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 1564 q->rspq.desc_map); 1565 bus_dma_tag_destroy(q->rspq.desc_tag); 1566 MTX_DESTROY(&q->rspq.lock); 1567 } 1568 1569 bzero(q, sizeof(*q)); 1570 } 1571 1572 /** 1573 * t3_free_sge_resources - free SGE resources 1574 * @sc: the adapter softc 1575 * 1576 * Frees resources used by the SGE queue sets. 1577 */ 1578 void 1579 t3_free_sge_resources(adapter_t *sc) 1580 { 1581 int i, nqsets; 1582 1583 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1584 nqsets += sc->port[i].nqsets; 1585 1586 for (i = 0; i < nqsets; ++i) 1587 t3_free_qset(sc, &sc->sge.qs[i]); 1588 } 1589 1590 /** 1591 * t3_sge_start - enable SGE 1592 * @sc: the controller softc 1593 * 1594 * Enables the SGE for DMAs. This is the last step in starting packet 1595 * transfers. 1596 */ 1597 void 1598 t3_sge_start(adapter_t *sc) 1599 { 1600 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 1601 } 1602 1603 /** 1604 * t3_sge_stop - disable SGE operation 1605 * @sc: the adapter 1606 * 1607 * Disables the DMA engine. This can be called in emeregencies (e.g., 1608 * from error interrupts) or from normal process context. In the latter 1609 * case it also disables any pending queue restart tasklets. Note that 1610 * if it is called in interrupt context it cannot disable the restart 1611 * tasklets as it cannot wait, however the tasklets will have no effect 1612 * since the doorbells are disabled and the driver will call this again 1613 * later from process context, at which time the tasklets will be stopped 1614 * if they are still running. 1615 */ 1616 void 1617 t3_sge_stop(adapter_t *sc) 1618 { 1619 int i, nqsets; 1620 1621 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 1622 1623 if (sc->tq == NULL) 1624 return; 1625 1626 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1627 nqsets += sc->port[i].nqsets; 1628 1629 for (i = 0; i < nqsets; ++i) { 1630 struct sge_qset *qs = &sc->sge.qs[i]; 1631 1632 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 1633 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 1634 } 1635 } 1636 1637 1638 /** 1639 * free_tx_desc - reclaims Tx descriptors and their buffers 1640 * @adapter: the adapter 1641 * @q: the Tx queue to reclaim descriptors from 1642 * @n: the number of descriptors to reclaim 1643 * 1644 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 1645 * Tx buffers. Called with the Tx queue lock held. 1646 */ 1647 int 1648 free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec) 1649 { 1650 struct tx_sw_desc *d; 1651 unsigned int cidx = q->cidx; 1652 int nbufs = 0; 1653 1654 #ifdef T3_TRACE 1655 T3_TRACE2(sc->tb[q->cntxt_id & 7], 1656 "reclaiming %u Tx descriptors at cidx %u", n, cidx); 1657 #endif 1658 d = &q->sdesc[cidx]; 1659 1660 while (n-- > 0) { 1661 DPRINTF("cidx=%d d=%p\n", cidx, d); 1662 if (d->m) { 1663 if (d->flags & TX_SW_DESC_MAPPED) { 1664 bus_dmamap_unload(q->entry_tag, d->map); 1665 bus_dmamap_destroy(q->entry_tag, d->map); 1666 d->flags &= ~TX_SW_DESC_MAPPED; 1667 } 1668 if (m_get_priority(d->m) == cidx) { 1669 m_vec[nbufs] = d->m; 1670 d->m = NULL; 1671 nbufs++; 1672 } else { 1673 printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx); 1674 } 1675 } 1676 ++d; 1677 if (++cidx == q->size) { 1678 cidx = 0; 1679 d = q->sdesc; 1680 } 1681 } 1682 q->cidx = cidx; 1683 1684 return (nbufs); 1685 } 1686 1687 /** 1688 * is_new_response - check if a response is newly written 1689 * @r: the response descriptor 1690 * @q: the response queue 1691 * 1692 * Returns true if a response descriptor contains a yet unprocessed 1693 * response. 1694 */ 1695 static __inline int 1696 is_new_response(const struct rsp_desc *r, 1697 const struct sge_rspq *q) 1698 { 1699 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 1700 } 1701 1702 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 1703 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 1704 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 1705 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 1706 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 1707 1708 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 1709 #define NOMEM_INTR_DELAY 2500 1710 1711 /** 1712 * write_ofld_wr - write an offload work request 1713 * @adap: the adapter 1714 * @m: the packet to send 1715 * @q: the Tx queue 1716 * @pidx: index of the first Tx descriptor to write 1717 * @gen: the generation value to use 1718 * @ndesc: number of descriptors the packet will occupy 1719 * 1720 * Write an offload work request to send the supplied packet. The packet 1721 * data already carry the work request with most fields populated. 1722 */ 1723 static void 1724 write_ofld_wr(adapter_t *adap, struct mbuf *m, 1725 struct sge_txq *q, unsigned int pidx, 1726 unsigned int gen, unsigned int ndesc, 1727 bus_dma_segment_t *segs, unsigned int nsegs) 1728 { 1729 unsigned int sgl_flits, flits; 1730 struct work_request_hdr *from; 1731 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1732 struct tx_desc *d = &q->desc[pidx]; 1733 struct txq_state txqs; 1734 1735 if (immediate(m)) { 1736 q->sdesc[pidx].m = NULL; 1737 write_imm(d, m, m->m_len, gen); 1738 return; 1739 } 1740 1741 /* Only TX_DATA builds SGLs */ 1742 1743 from = mtod(m, struct work_request_hdr *); 1744 memcpy(&d->flit[1], &from[1], 1745 (uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *) - sizeof(*from)); 1746 1747 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; 1748 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 1749 1750 make_sgl(sgp, segs, nsegs); 1751 sgl_flits = sgl_len(nsegs); 1752 1753 txqs.gen = q->gen; 1754 txqs.pidx = q->pidx; 1755 txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3); 1756 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 1757 from->wr_hi, from->wr_lo); 1758 } 1759 1760 /** 1761 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 1762 * @m: the packet 1763 * 1764 * Returns the number of Tx descriptors needed for the given offload 1765 * packet. These packets are already fully constructed. 1766 */ 1767 static __inline unsigned int 1768 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 1769 { 1770 unsigned int flits, cnt = 0; 1771 1772 1773 if (m->m_len <= WR_LEN) 1774 return 1; /* packet fits as immediate data */ 1775 1776 if (m->m_flags & M_IOVEC) 1777 cnt = mtomv(m)->mv_count; 1778 1779 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; /* headers */ 1780 1781 return flits_to_desc(flits + sgl_len(cnt)); 1782 } 1783 1784 /** 1785 * ofld_xmit - send a packet through an offload queue 1786 * @adap: the adapter 1787 * @q: the Tx offload queue 1788 * @m: the packet 1789 * 1790 * Send an offload packet through an SGE offload queue. 1791 */ 1792 static int 1793 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1794 { 1795 int ret; 1796 unsigned int pidx, gen, nsegs; 1797 unsigned int ndesc; 1798 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 1799 bus_dma_segment_t segs[TX_MAX_SEGS]; 1800 int i, cleaned; 1801 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 1802 1803 mtx_lock(&q->lock); 1804 if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) { 1805 mtx_unlock(&q->lock); 1806 return (ret); 1807 } 1808 ndesc = calc_tx_descs_ofld(m, nsegs); 1809 again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec); 1810 1811 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 1812 if (__predict_false(ret)) { 1813 if (ret == 1) { 1814 m_set_priority(m, ndesc); /* save for restart */ 1815 mtx_unlock(&q->lock); 1816 return NET_XMIT_CN; 1817 } 1818 goto again; 1819 } 1820 1821 gen = q->gen; 1822 q->in_use += ndesc; 1823 pidx = q->pidx; 1824 q->pidx += ndesc; 1825 if (q->pidx >= q->size) { 1826 q->pidx -= q->size; 1827 q->gen ^= 1; 1828 } 1829 #ifdef T3_TRACE 1830 T3_TRACE5(adap->tb[q->cntxt_id & 7], 1831 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 1832 ndesc, pidx, skb->len, skb->len - skb->data_len, 1833 skb_shinfo(skb)->nr_frags); 1834 #endif 1835 mtx_unlock(&q->lock); 1836 1837 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 1838 check_ring_tx_db(adap, q); 1839 1840 for (i = 0; i < cleaned; i++) { 1841 m_freem_vec(m_vec[i]); 1842 } 1843 return NET_XMIT_SUCCESS; 1844 } 1845 1846 /** 1847 * restart_offloadq - restart a suspended offload queue 1848 * @qs: the queue set cotaining the offload queue 1849 * 1850 * Resumes transmission on a suspended Tx offload queue. 1851 */ 1852 static void 1853 restart_offloadq(void *data, int npending) 1854 { 1855 1856 struct mbuf *m; 1857 struct sge_qset *qs = data; 1858 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 1859 adapter_t *adap = qs->port->adapter; 1860 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 1861 bus_dma_segment_t segs[TX_MAX_SEGS]; 1862 int nsegs, i, cleaned; 1863 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 1864 1865 mtx_lock(&q->lock); 1866 again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec); 1867 1868 while ((m = mbufq_peek(&q->sendq)) != NULL) { 1869 unsigned int gen, pidx; 1870 unsigned int ndesc = m_get_priority(m); 1871 1872 if (__predict_false(q->size - q->in_use < ndesc)) { 1873 setbit(&qs->txq_stopped, TXQ_OFLD); 1874 smp_mb(); 1875 1876 if (should_restart_tx(q) && 1877 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 1878 goto again; 1879 q->stops++; 1880 break; 1881 } 1882 1883 gen = q->gen; 1884 q->in_use += ndesc; 1885 pidx = q->pidx; 1886 q->pidx += ndesc; 1887 if (q->pidx >= q->size) { 1888 q->pidx -= q->size; 1889 q->gen ^= 1; 1890 } 1891 1892 (void)mbufq_dequeue(&q->sendq); 1893 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 1894 mtx_unlock(&q->lock); 1895 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 1896 mtx_lock(&q->lock); 1897 } 1898 mtx_unlock(&q->lock); 1899 1900 #if USE_GTS 1901 set_bit(TXQ_RUNNING, &q->flags); 1902 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1903 #endif 1904 t3_write_reg(adap, A_SG_KDOORBELL, 1905 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1906 1907 for (i = 0; i < cleaned; i++) { 1908 m_freem_vec(m_vec[i]); 1909 } 1910 } 1911 1912 /** 1913 * queue_set - return the queue set a packet should use 1914 * @m: the packet 1915 * 1916 * Maps a packet to the SGE queue set it should use. The desired queue 1917 * set is carried in bits 1-3 in the packet's priority. 1918 */ 1919 static __inline int 1920 queue_set(const struct mbuf *m) 1921 { 1922 return m_get_priority(m) >> 1; 1923 } 1924 1925 /** 1926 * is_ctrl_pkt - return whether an offload packet is a control packet 1927 * @m: the packet 1928 * 1929 * Determines whether an offload packet should use an OFLD or a CTRL 1930 * Tx queue. This is indicated by bit 0 in the packet's priority. 1931 */ 1932 static __inline int 1933 is_ctrl_pkt(const struct mbuf *m) 1934 { 1935 return m_get_priority(m) & 1; 1936 } 1937 1938 /** 1939 * t3_offload_tx - send an offload packet 1940 * @tdev: the offload device to send to 1941 * @m: the packet 1942 * 1943 * Sends an offload packet. We use the packet priority to select the 1944 * appropriate Tx queue as follows: bit 0 indicates whether the packet 1945 * should be sent as regular or control, bits 1-3 select the queue set. 1946 */ 1947 int 1948 t3_offload_tx(struct toedev *tdev, struct mbuf *m) 1949 { 1950 adapter_t *adap = tdev2adap(tdev); 1951 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 1952 1953 if (__predict_false(is_ctrl_pkt(m))) 1954 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m); 1955 1956 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m); 1957 } 1958 1959 /** 1960 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 1961 * @tdev: the offload device that will be receiving the packets 1962 * @q: the SGE response queue that assembled the bundle 1963 * @m: the partial bundle 1964 * @n: the number of packets in the bundle 1965 * 1966 * Delivers a (partial) bundle of Rx offload packets to an offload device. 1967 */ 1968 static __inline void 1969 deliver_partial_bundle(struct toedev *tdev, 1970 struct sge_rspq *q, 1971 struct mbuf *mbufs[], int n) 1972 { 1973 if (n) { 1974 q->offload_bundles++; 1975 cxgb_ofld_recv(tdev, mbufs, n); 1976 } 1977 } 1978 1979 static __inline int 1980 rx_offload(struct toedev *tdev, struct sge_rspq *rq, 1981 struct mbuf *m, struct mbuf *rx_gather[], 1982 unsigned int gather_idx) 1983 { 1984 rq->offload_pkts++; 1985 m->m_pkthdr.header = mtod(m, void *); 1986 1987 rx_gather[gather_idx++] = m; 1988 if (gather_idx == RX_BUNDLE_SIZE) { 1989 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 1990 gather_idx = 0; 1991 rq->offload_bundles++; 1992 } 1993 return (gather_idx); 1994 } 1995 1996 static void 1997 restart_tx(struct sge_qset *qs) 1998 { 1999 struct adapter *sc = qs->port->adapter; 2000 2001 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2002 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2003 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2004 qs->txq[TXQ_OFLD].restarts++; 2005 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2006 } 2007 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2008 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2009 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2010 qs->txq[TXQ_CTRL].restarts++; 2011 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2012 } 2013 } 2014 2015 /** 2016 * t3_sge_alloc_qset - initialize an SGE queue set 2017 * @sc: the controller softc 2018 * @id: the queue set id 2019 * @nports: how many Ethernet ports will be using this queue set 2020 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2021 * @p: configuration parameters for this queue set 2022 * @ntxq: number of Tx queues for the queue set 2023 * @pi: port info for queue set 2024 * 2025 * Allocate resources and initialize an SGE queue set. A queue set 2026 * comprises a response queue, two Rx free-buffer queues, and up to 3 2027 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2028 * queue, offload queue, and control queue. 2029 */ 2030 int 2031 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2032 const struct qset_params *p, int ntxq, struct port_info *pi) 2033 { 2034 struct sge_qset *q = &sc->sge.qs[id]; 2035 int i, ret = 0; 2036 2037 init_qset_cntxt(q, id); 2038 2039 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2040 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2041 &q->fl[0].desc, &q->fl[0].sdesc, 2042 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2043 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2044 printf("error %d from alloc ring fl0\n", ret); 2045 goto err; 2046 } 2047 2048 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2049 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2050 &q->fl[1].desc, &q->fl[1].sdesc, 2051 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2052 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2053 printf("error %d from alloc ring fl1\n", ret); 2054 goto err; 2055 } 2056 2057 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2058 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2059 &q->rspq.desc_tag, &q->rspq.desc_map, 2060 NULL, NULL)) != 0) { 2061 printf("error %d from alloc ring rspq\n", ret); 2062 goto err; 2063 } 2064 2065 for (i = 0; i < ntxq; ++i) { 2066 /* 2067 * The control queue always uses immediate data so does not 2068 * need to keep track of any mbufs. 2069 * XXX Placeholder for future TOE support. 2070 */ 2071 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2072 2073 if ((ret = alloc_ring(sc, p->txq_size[i], 2074 sizeof(struct tx_desc), sz, 2075 &q->txq[i].phys_addr, &q->txq[i].desc, 2076 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2077 &q->txq[i].desc_map, 2078 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2079 printf("error %d from alloc ring tx %i\n", ret, i); 2080 goto err; 2081 } 2082 mbufq_init(&q->txq[i].sendq); 2083 q->txq[i].gen = 1; 2084 q->txq[i].size = p->txq_size[i]; 2085 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d", 2086 device_get_unit(sc->dev), irq_vec_idx, i); 2087 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF); 2088 } 2089 2090 q->txq[TXQ_ETH].port = pi; 2091 2092 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2093 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2094 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]); 2095 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]); 2096 2097 2098 2099 2100 q->fl[0].gen = q->fl[1].gen = 1; 2101 q->fl[0].size = p->fl_size; 2102 q->fl[1].size = p->jumbo_size; 2103 2104 q->rspq.gen = 1; 2105 q->rspq.cidx = 0; 2106 q->rspq.size = p->rspq_size; 2107 2108 q->txq[TXQ_ETH].stop_thres = nports * 2109 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2110 2111 q->fl[0].buf_size = MCLBYTES; 2112 q->fl[0].zone = zone_clust; 2113 q->fl[0].type = EXT_CLUSTER; 2114 q->fl[1].buf_size = MJUMPAGESIZE; 2115 q->fl[1].zone = zone_jumbop; 2116 q->fl[1].type = EXT_JUMBOP; 2117 2118 q->lro.enabled = lro_default; 2119 2120 mtx_lock(&sc->sge.reg_lock); 2121 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2122 q->rspq.phys_addr, q->rspq.size, 2123 q->fl[0].buf_size, 1, 0); 2124 if (ret) { 2125 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2126 goto err_unlock; 2127 } 2128 2129 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2130 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2131 q->fl[i].phys_addr, q->fl[i].size, 2132 q->fl[i].buf_size, p->cong_thres, 1, 2133 0); 2134 if (ret) { 2135 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2136 goto err_unlock; 2137 } 2138 } 2139 2140 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2141 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2142 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2143 1, 0); 2144 if (ret) { 2145 printf("error %d from t3_sge_init_ecntxt\n", ret); 2146 goto err_unlock; 2147 } 2148 2149 if (ntxq > 1) { 2150 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2151 USE_GTS, SGE_CNTXT_OFLD, id, 2152 q->txq[TXQ_OFLD].phys_addr, 2153 q->txq[TXQ_OFLD].size, 0, 1, 0); 2154 if (ret) { 2155 printf("error %d from t3_sge_init_ecntxt\n", ret); 2156 goto err_unlock; 2157 } 2158 } 2159 2160 if (ntxq > 2) { 2161 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2162 SGE_CNTXT_CTRL, id, 2163 q->txq[TXQ_CTRL].phys_addr, 2164 q->txq[TXQ_CTRL].size, 2165 q->txq[TXQ_CTRL].token, 1, 0); 2166 if (ret) { 2167 printf("error %d from t3_sge_init_ecntxt\n", ret); 2168 goto err_unlock; 2169 } 2170 } 2171 2172 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2173 device_get_unit(sc->dev), irq_vec_idx); 2174 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2175 2176 mtx_unlock(&sc->sge.reg_lock); 2177 t3_update_qset_coalesce(q, p); 2178 q->port = pi; 2179 2180 refill_fl(sc, &q->fl[0], q->fl[0].size); 2181 refill_fl(sc, &q->fl[1], q->fl[1].size); 2182 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2183 2184 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2185 V_NEWTIMER(q->rspq.holdoff_tmr)); 2186 2187 return (0); 2188 2189 err_unlock: 2190 mtx_unlock(&sc->sge.reg_lock); 2191 err: 2192 t3_free_qset(sc, q); 2193 2194 return (ret); 2195 } 2196 2197 void 2198 t3_rx_eth(struct port_info *pi, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2199 { 2200 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2201 struct ifnet *ifp = pi->ifp; 2202 2203 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2204 if (&pi->adapter->port[cpl->iff] != pi) 2205 panic("bad port index %d m->m_data=%p\n", cpl->iff, mtod(m, uint8_t *)); 2206 2207 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2208 cpl->csum_valid && cpl->csum == 0xffff) { 2209 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2210 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2211 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2212 m->m_pkthdr.csum_data = 0xffff; 2213 } 2214 /* 2215 * XXX need to add VLAN support for 6.x 2216 */ 2217 #ifdef VLAN_SUPPORTED 2218 if (__predict_false(cpl->vlan_valid)) { 2219 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2220 m->m_flags |= M_VLANTAG; 2221 } 2222 #endif 2223 2224 m->m_pkthdr.rcvif = ifp; 2225 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2226 m_explode(m); 2227 /* 2228 * adjust after conversion to mbuf chain 2229 */ 2230 m_adj(m, sizeof(*cpl) + ethpad); 2231 2232 (*ifp->if_input)(ifp, m); 2233 } 2234 2235 /** 2236 * get_packet - return the next ingress packet buffer from a free list 2237 * @adap: the adapter that received the packet 2238 * @drop_thres: # of remaining buffers before we start dropping packets 2239 * @qs: the qset that the SGE free list holding the packet belongs to 2240 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2241 * @r: response descriptor 2242 * 2243 * Get the next packet from a free list and complete setup of the 2244 * sk_buff. If the packet is small we make a copy and recycle the 2245 * original buffer, otherwise we use the original buffer itself. If a 2246 * positive drop threshold is supplied packets are dropped and their 2247 * buffers recycled if (a) the number of remaining buffers is under the 2248 * threshold and the packet is too big to copy, or (b) the packet should 2249 * be copied but there is no memory for the copy. 2250 */ 2251 static int 2252 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2253 struct mbuf *m, struct rsp_desc *r) 2254 { 2255 2256 unsigned int len_cq = ntohl(r->len_cq); 2257 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2258 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2259 uint32_t len = G_RSPD_LEN(len_cq); 2260 uint32_t flags = ntohl(r->flags); 2261 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2262 void *cl; 2263 int ret = 0; 2264 2265 prefetch(sd->cl); 2266 2267 fl->credits--; 2268 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2269 2270 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { 2271 cl = mtod(m, void *); 2272 memcpy(cl, sd->cl, len); 2273 recycle_rx_buf(adap, fl, fl->cidx); 2274 } else { 2275 cl = sd->cl; 2276 bus_dmamap_unload(fl->entry_tag, sd->map); 2277 } 2278 switch(sopeop) { 2279 case RSPQ_SOP_EOP: 2280 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2281 if (cl == sd->cl) 2282 m_cljset(m, cl, fl->type); 2283 m->m_len = m->m_pkthdr.len = len; 2284 ret = 1; 2285 goto done; 2286 break; 2287 case RSPQ_NSOP_NEOP: 2288 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2289 ret = 0; 2290 break; 2291 case RSPQ_SOP: 2292 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2293 m_iovinit(m); 2294 ret = 0; 2295 break; 2296 case RSPQ_EOP: 2297 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2298 ret = 1; 2299 break; 2300 } 2301 m_iovappend(m, cl, fl->buf_size, len, 0); 2302 2303 done: 2304 if (++fl->cidx == fl->size) 2305 fl->cidx = 0; 2306 2307 return (ret); 2308 } 2309 2310 /** 2311 * handle_rsp_cntrl_info - handles control information in a response 2312 * @qs: the queue set corresponding to the response 2313 * @flags: the response control flags 2314 * 2315 * Handles the control information of an SGE response, such as GTS 2316 * indications and completion credits for the queue set's Tx queues. 2317 * HW coalesces credits, we don't do any extra SW coalescing. 2318 */ 2319 static __inline void 2320 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2321 { 2322 unsigned int credits; 2323 2324 #if USE_GTS 2325 if (flags & F_RSPD_TXQ0_GTS) 2326 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2327 #endif 2328 credits = G_RSPD_TXQ0_CR(flags); 2329 if (credits) { 2330 qs->txq[TXQ_ETH].processed += credits; 2331 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC) 2332 taskqueue_enqueue(qs->port->adapter->tq, 2333 &qs->port->timer_reclaim_task); 2334 } 2335 2336 credits = G_RSPD_TXQ2_CR(flags); 2337 if (credits) 2338 qs->txq[TXQ_CTRL].processed += credits; 2339 2340 # if USE_GTS 2341 if (flags & F_RSPD_TXQ1_GTS) 2342 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2343 # endif 2344 credits = G_RSPD_TXQ1_CR(flags); 2345 if (credits) 2346 qs->txq[TXQ_OFLD].processed += credits; 2347 } 2348 2349 static void 2350 check_ring_db(adapter_t *adap, struct sge_qset *qs, 2351 unsigned int sleeping) 2352 { 2353 ; 2354 } 2355 2356 /** 2357 * process_responses - process responses from an SGE response queue 2358 * @adap: the adapter 2359 * @qs: the queue set to which the response queue belongs 2360 * @budget: how many responses can be processed in this round 2361 * 2362 * Process responses from an SGE response queue up to the supplied budget. 2363 * Responses include received packets as well as credits and other events 2364 * for the queues that belong to the response queue's queue set. 2365 * A negative budget is effectively unlimited. 2366 * 2367 * Additionally choose the interrupt holdoff time for the next interrupt 2368 * on this queue. If the system is under memory shortage use a fairly 2369 * long delay to help recovery. 2370 */ 2371 static int 2372 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2373 { 2374 struct sge_rspq *rspq = &qs->rspq; 2375 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2376 int budget_left = budget; 2377 unsigned int sleeping = 0; 2378 int lro = qs->lro.enabled; 2379 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2380 int ngathered = 0; 2381 #ifdef DEBUG 2382 static int last_holdoff = 0; 2383 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2384 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2385 last_holdoff = rspq->holdoff_tmr; 2386 } 2387 #endif 2388 rspq->next_holdoff = rspq->holdoff_tmr; 2389 2390 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2391 int eth, eop = 0, ethpad = 0; 2392 uint32_t flags = ntohl(r->flags); 2393 uint32_t rss_csum = *(const uint32_t *)r; 2394 uint32_t rss_hash = r->rss_hdr.rss_hash_val; 2395 2396 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2397 2398 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2399 /* XXX */ 2400 printf("async notification\n"); 2401 2402 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2403 struct mbuf *m = NULL; 2404 2405 if (cxgb_debug) 2406 printf("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", r->rss_hdr.opcode, rspq->cidx); 2407 if (rspq->m == NULL) 2408 rspq->m = m_gethdr(M_DONTWAIT, MT_DATA); 2409 else 2410 m = m_gethdr(M_DONTWAIT, MT_DATA); 2411 2412 /* 2413 * XXX revisit me 2414 */ 2415 if (rspq->m == NULL && m == NULL) { 2416 rspq->next_holdoff = NOMEM_INTR_DELAY; 2417 budget_left--; 2418 break; 2419 } 2420 if (get_imm_packet(adap, r, rspq->m, m, flags)) 2421 goto skip; 2422 eop = 1; 2423 rspq->imm_data++; 2424 } else if (r->len_cq) { 2425 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 2426 2427 if (rspq->m == NULL) 2428 rspq->m = m_gethdr(M_DONTWAIT, MT_DATA); 2429 if (rspq->m == NULL) { 2430 log(LOG_WARNING, "failed to get mbuf for packet\n"); 2431 break; 2432 } 2433 2434 ethpad = 2; 2435 eop = get_packet(adap, drop_thresh, qs, rspq->m, r); 2436 } else { 2437 DPRINTF("pure response\n"); 2438 rspq->pure_rsps++; 2439 } 2440 2441 if (flags & RSPD_CTRL_MASK) { 2442 sleeping |= flags & RSPD_GTS_MASK; 2443 handle_rsp_cntrl_info(qs, flags); 2444 } 2445 skip: 2446 r++; 2447 if (__predict_false(++rspq->cidx == rspq->size)) { 2448 rspq->cidx = 0; 2449 rspq->gen ^= 1; 2450 r = rspq->desc; 2451 } 2452 2453 prefetch(r); 2454 if (++rspq->credits >= (rspq->size / 4)) { 2455 refill_rspq(adap, rspq, rspq->credits); 2456 rspq->credits = 0; 2457 } 2458 2459 if (eop) { 2460 prefetch(mtod(rspq->m, uint8_t *)); 2461 prefetch(mtod(rspq->m, uint8_t *) + L1_CACHE_BYTES); 2462 2463 if (eth) { 2464 t3_rx_eth_lro(adap, rspq, rspq->m, ethpad, 2465 rss_hash, rss_csum, lro); 2466 2467 rspq->m = NULL; 2468 } else { 2469 rspq->m->m_pkthdr.csum_data = rss_csum; 2470 /* 2471 * XXX size mismatch 2472 */ 2473 m_set_priority(rspq->m, rss_hash); 2474 2475 ngathered = rx_offload(&adap->tdev, rspq, rspq->m, 2476 offload_mbufs, ngathered); 2477 } 2478 __refill_fl(adap, &qs->fl[0]); 2479 __refill_fl(adap, &qs->fl[1]); 2480 2481 } 2482 --budget_left; 2483 } 2484 2485 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 2486 t3_lro_flush(adap, qs, &qs->lro); 2487 2488 if (sleeping) 2489 check_ring_db(adap, qs, sleeping); 2490 2491 smp_mb(); /* commit Tx queue processed updates */ 2492 if (__predict_false(qs->txq_stopped != 0)) 2493 restart_tx(qs); 2494 2495 budget -= budget_left; 2496 return (budget); 2497 } 2498 2499 /* 2500 * A helper function that processes responses and issues GTS. 2501 */ 2502 static __inline int 2503 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 2504 { 2505 int work; 2506 static int last_holdoff = 0; 2507 2508 work = process_responses(adap, rspq_to_qset(rq), -1); 2509 2510 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 2511 printf("next_holdoff=%d\n", rq->next_holdoff); 2512 last_holdoff = rq->next_holdoff; 2513 } 2514 2515 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 2516 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 2517 return work; 2518 } 2519 2520 2521 /* 2522 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 2523 * Handles data events from SGE response queues as well as error and other 2524 * async events as they all use the same interrupt pin. We use one SGE 2525 * response queue per port in this mode and protect all response queues with 2526 * queue 0's lock. 2527 */ 2528 void 2529 t3b_intr(void *data) 2530 { 2531 uint32_t map; 2532 adapter_t *adap = data; 2533 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2534 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2535 2536 t3_write_reg(adap, A_PL_CLI, 0); 2537 map = t3_read_reg(adap, A_SG_DATA_INTR); 2538 2539 if (!map) 2540 return; 2541 2542 if (__predict_false(map & F_ERRINTR)) 2543 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2544 2545 mtx_lock(&q0->lock); 2546 2547 if (__predict_true(map & 1)) 2548 process_responses_gts(adap, q0); 2549 2550 if (map & 2) 2551 process_responses_gts(adap, q1); 2552 2553 mtx_unlock(&q0->lock); 2554 } 2555 2556 /* 2557 * The MSI interrupt handler. This needs to handle data events from SGE 2558 * response queues as well as error and other async events as they all use 2559 * the same MSI vector. We use one SGE response queue per port in this mode 2560 * and protect all response queues with queue 0's lock. 2561 */ 2562 void 2563 t3_intr_msi(void *data) 2564 { 2565 adapter_t *adap = data; 2566 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2567 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2568 int new_packets = 0; 2569 2570 mtx_lock(&q0->lock); 2571 if (process_responses_gts(adap, q0)) { 2572 new_packets = 1; 2573 } 2574 2575 if (adap->params.nports == 2 && 2576 process_responses_gts(adap, q1)) { 2577 new_packets = 1; 2578 } 2579 2580 mtx_unlock(&q0->lock); 2581 if (new_packets == 0) 2582 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2583 } 2584 2585 void 2586 t3_intr_msix(void *data) 2587 { 2588 struct sge_qset *qs = data; 2589 adapter_t *adap = qs->port->adapter; 2590 struct sge_rspq *rspq = &qs->rspq; 2591 2592 mtx_lock(&rspq->lock); 2593 if (process_responses_gts(adap, rspq) == 0) 2594 rspq->unhandled_irqs++; 2595 mtx_unlock(&rspq->lock); 2596 } 2597 2598 /* 2599 * broken by recent mbuf changes 2600 */ 2601 static int 2602 t3_lro_enable(SYSCTL_HANDLER_ARGS) 2603 { 2604 adapter_t *sc; 2605 int i, j, enabled, err, nqsets = 0; 2606 2607 #ifndef LRO_WORKING 2608 return (0); 2609 #endif 2610 2611 sc = arg1; 2612 enabled = sc->sge.qs[0].lro.enabled; 2613 err = sysctl_handle_int(oidp, &enabled, arg2, req); 2614 2615 if (err != 0) 2616 return (err); 2617 if (enabled == sc->sge.qs[0].lro.enabled) 2618 return (0); 2619 2620 for (i = 0; i < sc->params.nports; i++) 2621 for (j = 0; j < sc->port[i].nqsets; j++) 2622 nqsets++; 2623 2624 for (i = 0; i < nqsets; i++) 2625 sc->sge.qs[i].lro.enabled = enabled; 2626 2627 return (0); 2628 } 2629 2630 static int 2631 t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS) 2632 { 2633 adapter_t *sc = arg1; 2634 struct qset_params *qsp = &sc->params.sge.qset[0]; 2635 int coalesce_nsecs; 2636 struct sge_qset *qs; 2637 int i, j, err, nqsets = 0; 2638 struct mtx *lock; 2639 2640 coalesce_nsecs = qsp->coalesce_nsecs; 2641 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req); 2642 2643 if (err != 0) { 2644 return (err); 2645 } 2646 if (coalesce_nsecs == qsp->coalesce_nsecs) 2647 return (0); 2648 2649 for (i = 0; i < sc->params.nports; i++) 2650 for (j = 0; j < sc->port[i].nqsets; j++) 2651 nqsets++; 2652 2653 coalesce_nsecs = max(100, coalesce_nsecs); 2654 2655 for (i = 0; i < nqsets; i++) { 2656 qs = &sc->sge.qs[i]; 2657 qsp = &sc->params.sge.qset[i]; 2658 qsp->coalesce_nsecs = coalesce_nsecs; 2659 2660 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 2661 &sc->sge.qs[0].rspq.lock; 2662 2663 mtx_lock(lock); 2664 t3_update_qset_coalesce(qs, qsp); 2665 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 2666 V_NEWTIMER(qs->rspq.holdoff_tmr)); 2667 mtx_unlock(lock); 2668 } 2669 2670 return (0); 2671 } 2672 2673 2674 void 2675 t3_add_sysctls(adapter_t *sc) 2676 { 2677 struct sysctl_ctx_list *ctx; 2678 struct sysctl_oid_list *children; 2679 2680 ctx = device_get_sysctl_ctx(sc->dev); 2681 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 2682 2683 /* random information */ 2684 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 2685 "firmware_version", 2686 CTLFLAG_RD, &sc->fw_version, 2687 0, "firmware version"); 2688 2689 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2690 "enable_lro", 2691 CTLTYPE_INT|CTLFLAG_RW, sc, 2692 0, t3_lro_enable, 2693 "I", "enable large receive offload"); 2694 2695 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2696 "intr_coal", 2697 CTLTYPE_INT|CTLFLAG_RW, sc, 2698 0, t3_set_coalesce_nsecs, 2699 "I", "interrupt coalescing timer (ns)"); 2700 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2701 "enable_debug", 2702 CTLFLAG_RW, &cxgb_debug, 2703 0, "enable verbose debugging output"); 2704 2705 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2706 "collapse_free", 2707 CTLFLAG_RD, &collapse_free, 2708 0, "frees during collapse"); 2709 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2710 "mb_free_vec_free", 2711 CTLFLAG_RD, &mb_free_vec_free, 2712 0, "frees during mb_free_vec"); 2713 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2714 "collapse_mbufs", 2715 CTLFLAG_RW, &collapse_mbufs, 2716 0, "collapse mbuf chains into iovecs"); 2717 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2718 "txq_overrun", 2719 CTLFLAG_RD, &txq_fills, 2720 0, "#times txq overrun"); 2721 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2722 "bogus_imm", 2723 CTLFLAG_RD, &bogus_imm, 2724 0, "#times a bogus immediate response was seen"); 2725 } 2726 2727 /** 2728 * t3_get_desc - dump an SGE descriptor for debugging purposes 2729 * @qs: the queue set 2730 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 2731 * @idx: the descriptor index in the queue 2732 * @data: where to dump the descriptor contents 2733 * 2734 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 2735 * size of the descriptor. 2736 */ 2737 int 2738 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 2739 unsigned char *data) 2740 { 2741 if (qnum >= 6) 2742 return (EINVAL); 2743 2744 if (qnum < 3) { 2745 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 2746 return -EINVAL; 2747 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 2748 return sizeof(struct tx_desc); 2749 } 2750 2751 if (qnum == 3) { 2752 if (!qs->rspq.desc || idx >= qs->rspq.size) 2753 return (EINVAL); 2754 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 2755 return sizeof(struct rsp_desc); 2756 } 2757 2758 qnum -= 4; 2759 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 2760 return (EINVAL); 2761 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 2762 return sizeof(struct rx_desc); 2763 } 2764