1 /************************************************************************** 2 3 Copyright (c) 2007, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/module.h> 37 #include <sys/bus.h> 38 #include <sys/conf.h> 39 #include <machine/bus.h> 40 #include <machine/resource.h> 41 #include <sys/bus_dma.h> 42 #include <sys/rman.h> 43 #include <sys/queue.h> 44 #include <sys/sysctl.h> 45 #include <sys/taskqueue.h> 46 47 48 #include <sys/proc.h> 49 #include <sys/sched.h> 50 #include <sys/smp.h> 51 #include <sys/systm.h> 52 53 #include <netinet/in_systm.h> 54 #include <netinet/in.h> 55 #include <netinet/ip.h> 56 #include <netinet/tcp.h> 57 58 #include <dev/pci/pcireg.h> 59 #include <dev/pci/pcivar.h> 60 61 #ifdef CONFIG_DEFINED 62 #include <cxgb_include.h> 63 #else 64 #include <dev/cxgb/cxgb_include.h> 65 #endif 66 67 uint32_t collapse_free = 0; 68 uint32_t mb_free_vec_free = 0; 69 int collapse_mbufs = 0; 70 static int recycle_enable = 1; 71 72 73 /* 74 * XXX GC 75 */ 76 #define NET_XMIT_CN 2 77 #define NET_XMIT_SUCCESS 0 78 79 #define USE_GTS 0 80 81 #define SGE_RX_SM_BUF_SIZE 1536 82 #define SGE_RX_DROP_THRES 16 83 #define SGE_RX_COPY_THRES 128 84 85 /* 86 * Period of the Tx buffer reclaim timer. This timer does not need to run 87 * frequently as Tx buffers are usually reclaimed by new Tx packets. 88 */ 89 #define TX_RECLAIM_PERIOD (hz >> 1) 90 91 /* 92 * work request size in bytes 93 */ 94 #define WR_LEN (WR_FLITS * 8) 95 96 /* 97 * Values for sge_txq.flags 98 */ 99 enum { 100 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 101 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 102 }; 103 104 struct tx_desc { 105 uint64_t flit[TX_DESC_FLITS]; 106 } __packed; 107 108 struct rx_desc { 109 uint32_t addr_lo; 110 uint32_t len_gen; 111 uint32_t gen2; 112 uint32_t addr_hi; 113 } __packed;; 114 115 struct rsp_desc { /* response queue descriptor */ 116 struct rss_header rss_hdr; 117 uint32_t flags; 118 uint32_t len_cq; 119 uint8_t imm_data[47]; 120 uint8_t intr_gen; 121 } __packed; 122 123 #define RX_SW_DESC_MAP_CREATED (1 << 0) 124 #define TX_SW_DESC_MAP_CREATED (1 << 1) 125 #define RX_SW_DESC_INUSE (1 << 3) 126 #define TX_SW_DESC_MAPPED (1 << 4) 127 128 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 129 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 130 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 131 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 132 133 struct tx_sw_desc { /* SW state per Tx descriptor */ 134 struct mbuf *m; 135 bus_dmamap_t map; 136 int flags; 137 }; 138 139 struct rx_sw_desc { /* SW state per Rx descriptor */ 140 void *cl; 141 bus_dmamap_t map; 142 int flags; 143 }; 144 145 struct txq_state { 146 unsigned int compl; 147 unsigned int gen; 148 unsigned int pidx; 149 }; 150 151 struct refill_fl_cb_arg { 152 int error; 153 bus_dma_segment_t seg; 154 int nseg; 155 }; 156 157 /* 158 * Maps a number of flits to the number of Tx descriptors that can hold them. 159 * The formula is 160 * 161 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 162 * 163 * HW allows up to 4 descriptors to be combined into a WR. 164 */ 165 static uint8_t flit_desc_map[] = { 166 0, 167 #if SGE_NUM_GENBITS == 1 168 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 169 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 170 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 171 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 172 #elif SGE_NUM_GENBITS == 2 173 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 174 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 175 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 176 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 177 #else 178 # error "SGE_NUM_GENBITS must be 1 or 2" 179 #endif 180 }; 181 182 183 static int lro_default = 0; 184 int cxgb_debug = 0; 185 186 static void t3_free_qset(adapter_t *sc, struct sge_qset *q); 187 static void sge_timer_cb(void *arg); 188 static void sge_timer_reclaim(void *arg, int ncount); 189 static int free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec); 190 191 /** 192 * reclaim_completed_tx - reclaims completed Tx descriptors 193 * @adapter: the adapter 194 * @q: the Tx queue to reclaim completed descriptors from 195 * 196 * Reclaims Tx descriptors that the SGE has indicated it has processed, 197 * and frees the associated buffers if possible. Called with the Tx 198 * queue's lock held. 199 */ 200 static __inline int 201 reclaim_completed_tx(adapter_t *adapter, struct sge_txq *q, int nbufs, struct mbuf **mvec) 202 { 203 int reclaimed, reclaim = desc_reclaimable(q); 204 int n = 0; 205 206 mtx_assert(&q->lock, MA_OWNED); 207 if (reclaim > 0) { 208 n = free_tx_desc(adapter, q, min(reclaim, nbufs), mvec); 209 reclaimed = min(reclaim, nbufs); 210 q->cleaned += reclaimed; 211 q->in_use -= reclaimed; 212 } 213 return (n); 214 } 215 216 /** 217 * should_restart_tx - are there enough resources to restart a Tx queue? 218 * @q: the Tx queue 219 * 220 * Checks if there are enough descriptors to restart a suspended Tx queue. 221 */ 222 static __inline int 223 should_restart_tx(const struct sge_txq *q) 224 { 225 unsigned int r = q->processed - q->cleaned; 226 227 return q->in_use - r < (q->size >> 1); 228 } 229 230 /** 231 * t3_sge_init - initialize SGE 232 * @adap: the adapter 233 * @p: the SGE parameters 234 * 235 * Performs SGE initialization needed every time after a chip reset. 236 * We do not initialize any of the queue sets here, instead the driver 237 * top-level must request those individually. We also do not enable DMA 238 * here, that should be done after the queues have been set up. 239 */ 240 void 241 t3_sge_init(adapter_t *adap, struct sge_params *p) 242 { 243 u_int ctrl, ups; 244 245 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 246 247 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 248 F_CQCRDTCTRL | 249 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 250 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 251 #if SGE_NUM_GENBITS == 1 252 ctrl |= F_EGRGENCTRL; 253 #endif 254 if (adap->params.rev > 0) { 255 if (!(adap->flags & (USING_MSIX | USING_MSI))) 256 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 257 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL; 258 } 259 t3_write_reg(adap, A_SG_CONTROL, ctrl); 260 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 261 V_LORCQDRBTHRSH(512)); 262 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 263 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 264 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 265 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000); 266 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 267 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 268 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 269 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 270 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 271 } 272 273 274 /** 275 * sgl_len - calculates the size of an SGL of the given capacity 276 * @n: the number of SGL entries 277 * 278 * Calculates the number of flits needed for a scatter/gather list that 279 * can hold the given number of entries. 280 */ 281 static __inline unsigned int 282 sgl_len(unsigned int n) 283 { 284 return ((3 * n) / 2 + (n & 1)); 285 } 286 287 /** 288 * get_imm_packet - return the next ingress packet buffer from a response 289 * @resp: the response descriptor containing the packet data 290 * 291 * Return a packet containing the immediate data of the given response. 292 */ 293 static __inline void 294 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl) 295 { 296 int len; 297 uint32_t flags = ntohl(resp->flags); 298 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 299 300 /* 301 * would be a firmware bug 302 */ 303 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) 304 return; 305 306 len = G_RSPD_LEN(ntohl(resp->len_cq)); 307 switch (sopeop) { 308 case RSPQ_SOP_EOP: 309 m->m_len = m->m_pkthdr.len = len; 310 memcpy(mtod(m, uint8_t *), resp->imm_data, len); 311 break; 312 case RSPQ_EOP: 313 memcpy(cl, resp->imm_data, len); 314 m_iovappend(m, cl, MSIZE, len, 0); 315 break; 316 } 317 } 318 319 320 static __inline u_int 321 flits_to_desc(u_int n) 322 { 323 return (flit_desc_map[n]); 324 } 325 326 void 327 t3_sge_err_intr_handler(adapter_t *adapter) 328 { 329 unsigned int v, status; 330 331 332 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 333 334 if (status & F_RSPQCREDITOVERFOW) 335 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 336 337 if (status & F_RSPQDISABLED) { 338 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 339 340 CH_ALERT(adapter, 341 "packet delivered to disabled response queue (0x%x)\n", 342 (v >> S_RSPQ0DISABLED) & 0xff); 343 } 344 345 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 346 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED)) 347 t3_fatal_err(adapter); 348 } 349 350 void 351 t3_sge_prep(adapter_t *adap, struct sge_params *p) 352 { 353 int i; 354 355 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 356 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data); 357 358 for (i = 0; i < SGE_QSETS; ++i) { 359 struct qset_params *q = p->qset + i; 360 361 q->polling = adap->params.rev > 0; 362 363 q->coalesce_nsecs = 5000; 364 365 q->rspq_size = RSPQ_Q_SIZE; 366 q->fl_size = FL_Q_SIZE; 367 q->jumbo_size = JUMBO_Q_SIZE; 368 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 369 q->txq_size[TXQ_OFLD] = 1024; 370 q->txq_size[TXQ_CTRL] = 256; 371 q->cong_thres = 0; 372 } 373 } 374 375 int 376 t3_sge_alloc(adapter_t *sc) 377 { 378 379 /* The parent tag. */ 380 if (bus_dma_tag_create( NULL, /* parent */ 381 1, 0, /* algnmnt, boundary */ 382 BUS_SPACE_MAXADDR, /* lowaddr */ 383 BUS_SPACE_MAXADDR, /* highaddr */ 384 NULL, NULL, /* filter, filterarg */ 385 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 386 BUS_SPACE_UNRESTRICTED, /* nsegments */ 387 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 388 0, /* flags */ 389 NULL, NULL, /* lock, lockarg */ 390 &sc->parent_dmat)) { 391 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 392 return (ENOMEM); 393 } 394 395 /* 396 * DMA tag for normal sized RX frames 397 */ 398 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 399 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 400 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 401 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 402 return (ENOMEM); 403 } 404 405 /* 406 * DMA tag for jumbo sized RX frames. 407 */ 408 if (bus_dma_tag_create(sc->parent_dmat, MJUMPAGESIZE, 0, BUS_SPACE_MAXADDR, 409 BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE, 410 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 411 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 412 return (ENOMEM); 413 } 414 415 /* 416 * DMA tag for TX frames. 417 */ 418 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 419 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 420 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 421 NULL, NULL, &sc->tx_dmat)) { 422 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 423 return (ENOMEM); 424 } 425 426 return (0); 427 } 428 429 int 430 t3_sge_free(struct adapter * sc) 431 { 432 433 if (sc->tx_dmat != NULL) 434 bus_dma_tag_destroy(sc->tx_dmat); 435 436 if (sc->rx_jumbo_dmat != NULL) 437 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 438 439 if (sc->rx_dmat != NULL) 440 bus_dma_tag_destroy(sc->rx_dmat); 441 442 if (sc->parent_dmat != NULL) 443 bus_dma_tag_destroy(sc->parent_dmat); 444 445 return (0); 446 } 447 448 void 449 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 450 { 451 452 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U); 453 qs->rspq.polling = 0 /* p->polling */; 454 } 455 456 static void 457 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 458 { 459 struct refill_fl_cb_arg *cb_arg = arg; 460 461 cb_arg->error = error; 462 cb_arg->seg = segs[0]; 463 cb_arg->nseg = nseg; 464 465 } 466 467 /** 468 * refill_fl - refill an SGE free-buffer list 469 * @sc: the controller softc 470 * @q: the free-list to refill 471 * @n: the number of new buffers to allocate 472 * 473 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 474 * The caller must assure that @n does not exceed the queue's capacity. 475 */ 476 static void 477 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 478 { 479 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 480 struct rx_desc *d = &q->desc[q->pidx]; 481 struct refill_fl_cb_arg cb_arg; 482 void *cl; 483 int err; 484 485 cb_arg.error = 0; 486 while (n--) { 487 /* 488 * We only allocate a cluster, mbuf allocation happens after rx 489 */ 490 if ((cl = m_cljget(NULL, M_DONTWAIT, q->buf_size)) == NULL) { 491 log(LOG_WARNING, "Failed to allocate cluster\n"); 492 goto done; 493 } 494 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 495 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 496 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 497 uma_zfree(q->zone, cl); 498 goto done; 499 } 500 sd->flags |= RX_SW_DESC_MAP_CREATED; 501 } 502 err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size, 503 refill_fl_cb, &cb_arg, 0); 504 505 if (err != 0 || cb_arg.error) { 506 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error); 507 /* 508 * XXX free cluster 509 */ 510 return; 511 } 512 513 sd->flags |= RX_SW_DESC_INUSE; 514 sd->cl = cl; 515 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 516 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 517 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 518 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 519 520 d++; 521 sd++; 522 523 if (++q->pidx == q->size) { 524 q->pidx = 0; 525 q->gen ^= 1; 526 sd = q->sdesc; 527 d = q->desc; 528 } 529 q->credits++; 530 } 531 532 done: 533 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 534 } 535 536 537 /** 538 * free_rx_bufs - free the Rx buffers on an SGE free list 539 * @sc: the controle softc 540 * @q: the SGE free list to clean up 541 * 542 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 543 * this queue should be stopped before calling this function. 544 */ 545 static void 546 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 547 { 548 u_int cidx = q->cidx; 549 550 while (q->credits--) { 551 struct rx_sw_desc *d = &q->sdesc[cidx]; 552 553 if (d->flags & RX_SW_DESC_INUSE) { 554 bus_dmamap_unload(q->entry_tag, d->map); 555 bus_dmamap_destroy(q->entry_tag, d->map); 556 uma_zfree(q->zone, d->cl); 557 } 558 d->cl = NULL; 559 if (++cidx == q->size) 560 cidx = 0; 561 } 562 } 563 564 static __inline void 565 __refill_fl(adapter_t *adap, struct sge_fl *fl) 566 { 567 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 568 } 569 570 /** 571 * recycle_rx_buf - recycle a receive buffer 572 * @adapter: the adapter 573 * @q: the SGE free list 574 * @idx: index of buffer to recycle 575 * 576 * Recycles the specified buffer on the given free list by adding it at 577 * the next available slot on the list. 578 */ 579 static void 580 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 581 { 582 struct rx_desc *from = &q->desc[idx]; 583 struct rx_desc *to = &q->desc[q->pidx]; 584 585 q->sdesc[q->pidx] = q->sdesc[idx]; 586 to->addr_lo = from->addr_lo; // already big endian 587 to->addr_hi = from->addr_hi; // likewise 588 wmb(); 589 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 590 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 591 q->credits++; 592 593 if (++q->pidx == q->size) { 594 q->pidx = 0; 595 q->gen ^= 1; 596 } 597 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 598 } 599 600 static void 601 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 602 { 603 uint32_t *addr; 604 605 addr = arg; 606 *addr = segs[0].ds_addr; 607 } 608 609 static int 610 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 611 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 612 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 613 { 614 size_t len = nelem * elem_size; 615 void *s = NULL; 616 void *p = NULL; 617 int err; 618 619 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 620 BUS_SPACE_MAXADDR_32BIT, 621 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 622 len, 0, NULL, NULL, tag)) != 0) { 623 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 624 return (ENOMEM); 625 } 626 627 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 628 map)) != 0) { 629 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 630 return (ENOMEM); 631 } 632 633 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 634 bzero(p, len); 635 *(void **)desc = p; 636 637 if (sw_size) { 638 len = nelem * sw_size; 639 s = malloc(len, M_DEVBUF, M_WAITOK); 640 bzero(s, len); 641 *(void **)sdesc = s; 642 } 643 if (parent_entry_tag == NULL) 644 return (0); 645 646 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 647 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 648 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 649 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 650 NULL, NULL, entry_tag)) != 0) { 651 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 652 return (ENOMEM); 653 } 654 return (0); 655 } 656 657 static void 658 sge_slow_intr_handler(void *arg, int ncount) 659 { 660 adapter_t *sc = arg; 661 662 t3_slow_intr_handler(sc); 663 } 664 665 static void 666 sge_timer_cb(void *arg) 667 { 668 adapter_t *sc = arg; 669 struct port_info *p; 670 struct sge_qset *qs; 671 struct sge_txq *txq; 672 int i, j; 673 int reclaim_eth, reclaim_ofl, refill_rx; 674 675 for (i = 0; i < sc->params.nports; i++) 676 for (j = 0; j < sc->port[i].nqsets; j++) { 677 qs = &sc->sge.qs[i + j]; 678 txq = &qs->txq[0]; 679 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned; 680 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 681 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 682 (qs->fl[1].credits < qs->fl[1].size)); 683 if (reclaim_eth || reclaim_ofl || refill_rx) { 684 p = &sc->port[i]; 685 taskqueue_enqueue(p->tq, &p->timer_reclaim_task); 686 break; 687 } 688 } 689 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 690 } 691 692 /* 693 * This is meant to be a catch-all function to keep sge state private 694 * to sge.c 695 * 696 */ 697 int 698 t3_sge_init_adapter(adapter_t *sc) 699 { 700 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 701 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 702 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 703 return (0); 704 } 705 706 int 707 t3_sge_init_port(struct port_info *p) 708 { 709 TASK_INIT(&p->timer_reclaim_task, 0, sge_timer_reclaim, p); 710 } 711 712 void 713 t3_sge_deinit_sw(adapter_t *sc) 714 { 715 int i; 716 717 callout_drain(&sc->sge_timer_ch); 718 if (sc->tq) 719 taskqueue_drain(sc->tq, &sc->slow_intr_task); 720 for (i = 0; i < sc->params.nports; i++) 721 if (sc->port[i].tq != NULL) 722 taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task); 723 } 724 725 /** 726 * refill_rspq - replenish an SGE response queue 727 * @adapter: the adapter 728 * @q: the response queue to replenish 729 * @credits: how many new responses to make available 730 * 731 * Replenishes a response queue by making the supplied number of responses 732 * available to HW. 733 */ 734 static __inline void 735 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 736 { 737 738 /* mbufs are allocated on demand when a rspq entry is processed. */ 739 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 740 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 741 } 742 743 744 static void 745 sge_timer_reclaim(void *arg, int ncount) 746 { 747 struct port_info *p = arg; 748 int i, nqsets = p->nqsets; 749 adapter_t *sc = p->adapter; 750 struct sge_qset *qs; 751 struct sge_txq *txq; 752 struct mtx *lock; 753 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 754 int n, reclaimable; 755 756 for (i = 0; i < nqsets; i++) { 757 qs = &sc->sge.qs[i]; 758 txq = &qs->txq[TXQ_ETH]; 759 reclaimable = desc_reclaimable(txq); 760 if (reclaimable > 0) { 761 mtx_lock(&txq->lock); 762 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); 763 mtx_unlock(&txq->lock); 764 765 for (i = 0; i < n; i++) 766 m_freem_vec(m_vec[i]); 767 768 if (p->ifp->if_drv_flags & IFF_DRV_OACTIVE && 769 txq->size - txq->in_use >= TX_START_MAX_DESC) { 770 p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 771 taskqueue_enqueue(p->tq, &p->start_task); 772 } 773 } 774 775 txq = &qs->txq[TXQ_OFLD]; 776 reclaimable = desc_reclaimable(txq); 777 if (reclaimable > 0) { 778 mtx_lock(&txq->lock); 779 n = reclaim_completed_tx(sc, txq, TX_CLEAN_MAX_DESC, m_vec); 780 mtx_unlock(&txq->lock); 781 782 for (i = 0; i < n; i++) 783 m_freem_vec(m_vec[i]); 784 } 785 786 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 787 &sc->sge.qs[0].rspq.lock; 788 789 if (mtx_trylock(lock)) { 790 /* XXX currently assume that we are *NOT* polling */ 791 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 792 793 if (qs->fl[0].credits < qs->fl[0].size - 16) 794 __refill_fl(sc, &qs->fl[0]); 795 if (qs->fl[1].credits < qs->fl[1].size - 16) 796 __refill_fl(sc, &qs->fl[1]); 797 798 if (status & (1 << qs->rspq.cntxt_id)) { 799 if (qs->rspq.credits) { 800 refill_rspq(sc, &qs->rspq, 1); 801 qs->rspq.credits--; 802 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 803 1 << qs->rspq.cntxt_id); 804 } 805 } 806 mtx_unlock(lock); 807 } 808 } 809 } 810 811 /** 812 * init_qset_cntxt - initialize an SGE queue set context info 813 * @qs: the queue set 814 * @id: the queue set id 815 * 816 * Initializes the TIDs and context ids for the queues of a queue set. 817 */ 818 static void 819 init_qset_cntxt(struct sge_qset *qs, u_int id) 820 { 821 822 qs->rspq.cntxt_id = id; 823 qs->fl[0].cntxt_id = 2 * id; 824 qs->fl[1].cntxt_id = 2 * id + 1; 825 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 826 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 827 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 828 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 829 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 830 } 831 832 833 static void 834 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 835 { 836 txq->in_use += ndesc; 837 /* 838 * XXX we don't handle stopping of queue 839 * presumably start handles this when we bump against the end 840 */ 841 txqs->gen = txq->gen; 842 txq->unacked += ndesc; 843 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3); 844 txq->unacked &= 7; 845 txqs->pidx = txq->pidx; 846 txq->pidx += ndesc; 847 848 if (txq->pidx >= txq->size) { 849 txq->pidx -= txq->size; 850 txq->gen ^= 1; 851 } 852 853 } 854 855 /** 856 * calc_tx_descs - calculate the number of Tx descriptors for a packet 857 * @m: the packet mbufs 858 * @nsegs: the number of segments 859 * 860 * Returns the number of Tx descriptors needed for the given Ethernet 861 * packet. Ethernet packets require addition of WR and CPL headers. 862 */ 863 static __inline unsigned int 864 calc_tx_descs(const struct mbuf *m, int nsegs) 865 { 866 unsigned int flits; 867 868 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) 869 return 1; 870 871 flits = sgl_len(nsegs) + 2; 872 #ifdef TSO_SUPPORTED 873 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) 874 flits++; 875 #endif 876 return flits_to_desc(flits); 877 } 878 879 static unsigned int 880 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 881 struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs) 882 { 883 struct mbuf *m0; 884 int err, pktlen; 885 886 m0 = *m; 887 pktlen = m0->m_pkthdr.len; 888 889 err = bus_dmamap_load_mvec_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); 890 #ifdef DEBUG 891 if (err) { 892 int n = 0; 893 struct mbuf *mtmp = m0; 894 while(mtmp) { 895 n++; 896 mtmp = mtmp->m_next; 897 } 898 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n", 899 err, m0->m_pkthdr.len, n); 900 } 901 #endif 902 if (err == EFBIG) { 903 /* Too many segments, try to defrag */ 904 m0 = m_defrag(m0, M_NOWAIT); 905 if (m0 == NULL) { 906 m_freem(*m); 907 *m = NULL; 908 return (ENOBUFS); 909 } 910 *m = m0; 911 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); 912 } 913 914 if (err == ENOMEM) { 915 return (err); 916 } 917 918 if (err) { 919 if (cxgb_debug) 920 printf("map failure err=%d pktlen=%d\n", err, pktlen); 921 m_freem_vec(m0); 922 *m = NULL; 923 return (err); 924 } 925 926 bus_dmamap_sync(txq->entry_tag, stx->map, BUS_DMASYNC_PREWRITE); 927 stx->flags |= TX_SW_DESC_MAPPED; 928 929 return (0); 930 } 931 932 /** 933 * make_sgl - populate a scatter/gather list for a packet 934 * @sgp: the SGL to populate 935 * @segs: the packet dma segments 936 * @nsegs: the number of segments 937 * 938 * Generates a scatter/gather list for the buffers that make up a packet 939 * and returns the SGL size in 8-byte words. The caller must size the SGL 940 * appropriately. 941 */ 942 static __inline void 943 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 944 { 945 int i, idx; 946 947 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) { 948 if (i && idx == 0) 949 ++sgp; 950 951 sgp->len[idx] = htobe32(segs[i].ds_len); 952 sgp->addr[idx] = htobe64(segs[i].ds_addr); 953 } 954 955 if (idx) 956 sgp->len[idx] = 0; 957 } 958 959 /** 960 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 961 * @adap: the adapter 962 * @q: the Tx queue 963 * 964 * Ring the doorbel if a Tx queue is asleep. There is a natural race, 965 * where the HW is going to sleep just after we checked, however, 966 * then the interrupt handler will detect the outstanding TX packet 967 * and ring the doorbell for us. 968 * 969 * When GTS is disabled we unconditionally ring the doorbell. 970 */ 971 static __inline void 972 check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 973 { 974 #if USE_GTS 975 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 976 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 977 set_bit(TXQ_LAST_PKT_DB, &q->flags); 978 #ifdef T3_TRACE 979 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 980 q->cntxt_id); 981 #endif 982 t3_write_reg(adap, A_SG_KDOORBELL, 983 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 984 } 985 #else 986 wmb(); /* write descriptors before telling HW */ 987 t3_write_reg(adap, A_SG_KDOORBELL, 988 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 989 #endif 990 } 991 992 static __inline void 993 wr_gen2(struct tx_desc *d, unsigned int gen) 994 { 995 #if SGE_NUM_GENBITS == 2 996 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 997 #endif 998 } 999 1000 1001 1002 /** 1003 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1004 * @ndesc: number of Tx descriptors spanned by the SGL 1005 * @txd: first Tx descriptor to be written 1006 * @txqs: txq state (generation and producer index) 1007 * @txq: the SGE Tx queue 1008 * @sgl: the SGL 1009 * @flits: number of flits to the start of the SGL in the first descriptor 1010 * @sgl_flits: the SGL size in flits 1011 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1012 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1013 * 1014 * Write a work request header and an associated SGL. If the SGL is 1015 * small enough to fit into one Tx descriptor it has already been written 1016 * and we just need to write the WR header. Otherwise we distribute the 1017 * SGL across the number of descriptors it spans. 1018 */ 1019 1020 static void 1021 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1022 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1023 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1024 { 1025 1026 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1027 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1028 1029 if (__predict_true(ndesc == 1)) { 1030 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1031 V_WR_SGLSFLT(flits)) | wr_hi; 1032 wmb(); 1033 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | 1034 V_WR_GEN(txqs->gen)) | wr_lo; 1035 /* XXX gen? */ 1036 wr_gen2(txd, txqs->gen); 1037 } else { 1038 unsigned int ogen = txqs->gen; 1039 const uint64_t *fp = (const uint64_t *)sgl; 1040 struct work_request_hdr *wp = wrp; 1041 1042 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1043 V_WR_SGLSFLT(flits)) | wr_hi; 1044 1045 while (sgl_flits) { 1046 unsigned int avail = WR_FLITS - flits; 1047 1048 if (avail > sgl_flits) 1049 avail = sgl_flits; 1050 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1051 sgl_flits -= avail; 1052 ndesc--; 1053 if (!sgl_flits) 1054 break; 1055 1056 fp += avail; 1057 txd++; 1058 txsd++; 1059 if (++txqs->pidx == txq->size) { 1060 txqs->pidx = 0; 1061 txqs->gen ^= 1; 1062 txd = txq->desc; 1063 txsd = txq->sdesc; 1064 } 1065 1066 /* 1067 * when the head of the mbuf chain 1068 * is freed all clusters will be freed 1069 * with it 1070 */ 1071 txsd->m = NULL; 1072 wrp = (struct work_request_hdr *)txd; 1073 wrp->wr_hi = htonl(V_WR_DATATYPE(1) | 1074 V_WR_SGLSFLT(1)) | wr_hi; 1075 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, 1076 sgl_flits + 1)) | 1077 V_WR_GEN(txqs->gen)) | wr_lo; 1078 wr_gen2(txd, txqs->gen); 1079 flits = 1; 1080 } 1081 wrp->wr_hi |= htonl(F_WR_EOP); 1082 wmb(); 1083 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1084 wr_gen2((struct tx_desc *)wp, ogen); 1085 } 1086 } 1087 1088 1089 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 1090 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 1091 1092 int 1093 t3_encap(struct port_info *p, struct mbuf **m) 1094 { 1095 adapter_t *sc; 1096 struct mbuf *m0; 1097 struct sge_qset *qs; 1098 struct sge_txq *txq; 1099 struct tx_sw_desc *stx; 1100 struct txq_state txqs; 1101 unsigned int nsegs, ndesc, flits, cntrl, mlen; 1102 int err, tso_info = 0; 1103 1104 struct work_request_hdr *wrp; 1105 struct tx_sw_desc *txsd; 1106 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1107 bus_dma_segment_t segs[TX_MAX_SEGS]; 1108 uint32_t wr_hi, wr_lo, sgl_flits; 1109 1110 struct tx_desc *txd; 1111 struct cpl_tx_pkt *cpl; 1112 1113 DPRINTF("t3_encap "); 1114 m0 = *m; 1115 sc = p->adapter; 1116 qs = &sc->sge.qs[p->first_qset]; 1117 txq = &qs->txq[TXQ_ETH]; 1118 stx = &txq->sdesc[txq->pidx]; 1119 txd = &txq->desc[txq->pidx]; 1120 cpl = (struct cpl_tx_pkt *)txd; 1121 mlen = m0->m_pkthdr.len; 1122 cpl->len = htonl(mlen | 0x80000000); 1123 1124 DPRINTF("mlen=%d\n", mlen); 1125 /* 1126 * XXX handle checksum, TSO, and VLAN here 1127 * 1128 */ 1129 cntrl = V_TXPKT_INTF(p->port); 1130 1131 /* 1132 * XXX need to add VLAN support for 6.x 1133 */ 1134 #ifdef VLAN_SUPPORTED 1135 if (m0->m_flags & M_VLANTAG) 1136 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 1137 if (m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1138 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1139 #endif 1140 if (tso_info) { 1141 int eth_type; 1142 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl; 1143 struct ip *ip; 1144 struct tcphdr *tcp; 1145 uint8_t *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */ 1146 1147 txd->flit[2] = 0; 1148 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1149 hdr->cntrl = htonl(cntrl); 1150 1151 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1152 pkthdr = &tmp[0]; 1153 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr); 1154 } else { 1155 pkthdr = mtod(m0, uint8_t *); 1156 } 1157 1158 if (__predict_false(m0->m_flags & M_VLANTAG)) { 1159 eth_type = CPL_ETH_II_VLAN; 1160 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1161 ETHER_VLAN_ENCAP_LEN); 1162 } else { 1163 eth_type = CPL_ETH_II; 1164 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1165 } 1166 tcp = (struct tcphdr *)((uint8_t *)ip + 1167 sizeof(*ip)); 1168 1169 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1170 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1171 V_LSO_TCPHDR_WORDS(tcp->th_off); 1172 hdr->lso_info = htonl(tso_info); 1173 flits = 3; 1174 } else { 1175 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1176 cpl->cntrl = htonl(cntrl); 1177 1178 if (mlen <= WR_LEN - sizeof(*cpl)) { 1179 txq_prod(txq, 1, &txqs); 1180 txq->sdesc[txqs.pidx].m = m0; 1181 m_set_priority(m0, txqs.pidx); 1182 1183 if (m0->m_len == m0->m_pkthdr.len) 1184 memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen); 1185 else 1186 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1187 1188 flits = (mlen + 7) / 8 + 2; 1189 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1190 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1191 F_WR_SOP | F_WR_EOP | txqs.compl); 1192 wmb(); 1193 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | 1194 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1195 1196 wr_gen2(txd, txqs.gen); 1197 check_ring_tx_db(sc, txq); 1198 return (0); 1199 } 1200 flits = 2; 1201 } 1202 1203 wrp = (struct work_request_hdr *)txd; 1204 1205 if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) { 1206 return (err); 1207 } 1208 m0 = *m; 1209 ndesc = calc_tx_descs(m0, nsegs); 1210 1211 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1212 make_sgl(sgp, segs, nsegs); 1213 1214 sgl_flits = sgl_len(nsegs); 1215 1216 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); 1217 txq_prod(txq, ndesc, &txqs); 1218 txsd = &txq->sdesc[txqs.pidx]; 1219 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1220 wr_lo = htonl(V_WR_TID(txq->token)); 1221 txsd->m = m0; 1222 m_set_priority(m0, txqs.pidx); 1223 1224 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); 1225 check_ring_tx_db(p->adapter, txq); 1226 1227 return (0); 1228 } 1229 1230 1231 /** 1232 * write_imm - write a packet into a Tx descriptor as immediate data 1233 * @d: the Tx descriptor to write 1234 * @m: the packet 1235 * @len: the length of packet data to write as immediate data 1236 * @gen: the generation bit value to write 1237 * 1238 * Writes a packet as immediate data into a Tx descriptor. The packet 1239 * contains a work request at its beginning. We must write the packet 1240 * carefully so the SGE doesn't read accidentally before it's written in 1241 * its entirety. 1242 */ 1243 static __inline void 1244 write_imm(struct tx_desc *d, struct mbuf *m, 1245 unsigned int len, unsigned int gen) 1246 { 1247 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1248 struct work_request_hdr *to = (struct work_request_hdr *)d; 1249 1250 memcpy(&to[1], &from[1], len - sizeof(*from)); 1251 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | 1252 V_WR_BCNTLFLT(len & 7)); 1253 wmb(); 1254 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | 1255 V_WR_LEN((len + 7) / 8)); 1256 wr_gen2(d, gen); 1257 m_freem(m); 1258 } 1259 1260 /** 1261 * check_desc_avail - check descriptor availability on a send queue 1262 * @adap: the adapter 1263 * @q: the TX queue 1264 * @m: the packet needing the descriptors 1265 * @ndesc: the number of Tx descriptors needed 1266 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1267 * 1268 * Checks if the requested number of Tx descriptors is available on an 1269 * SGE send queue. If the queue is already suspended or not enough 1270 * descriptors are available the packet is queued for later transmission. 1271 * Must be called with the Tx queue locked. 1272 * 1273 * Returns 0 if enough descriptors are available, 1 if there aren't 1274 * enough descriptors and the packet has been queued, and 2 if the caller 1275 * needs to retry because there weren't enough descriptors at the 1276 * beginning of the call but some freed up in the mean time. 1277 */ 1278 static __inline int 1279 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1280 struct mbuf *m, unsigned int ndesc, 1281 unsigned int qid) 1282 { 1283 /* 1284 * XXX We currently only use this for checking the control queue 1285 * the control queue is only used for binding qsets which happens 1286 * at init time so we are guaranteed enough descriptors 1287 */ 1288 if (__predict_false(!mbufq_empty(&q->sendq))) { 1289 addq_exit: mbufq_tail(&q->sendq, m); 1290 return 1; 1291 } 1292 if (__predict_false(q->size - q->in_use < ndesc)) { 1293 1294 struct sge_qset *qs = txq_to_qset(q, qid); 1295 1296 setbit(&qs->txq_stopped, qid); 1297 smp_mb(); 1298 1299 if (should_restart_tx(q) && 1300 test_and_clear_bit(qid, &qs->txq_stopped)) 1301 return 2; 1302 1303 q->stops++; 1304 goto addq_exit; 1305 } 1306 return 0; 1307 } 1308 1309 1310 /** 1311 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1312 * @q: the SGE control Tx queue 1313 * 1314 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1315 * that send only immediate data (presently just the control queues) and 1316 * thus do not have any mbufs 1317 */ 1318 static __inline void 1319 reclaim_completed_tx_imm(struct sge_txq *q) 1320 { 1321 unsigned int reclaim = q->processed - q->cleaned; 1322 1323 mtx_assert(&q->lock, MA_OWNED); 1324 1325 q->in_use -= reclaim; 1326 q->cleaned += reclaim; 1327 } 1328 1329 static __inline int 1330 immediate(const struct mbuf *m) 1331 { 1332 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1333 } 1334 1335 /** 1336 * ctrl_xmit - send a packet through an SGE control Tx queue 1337 * @adap: the adapter 1338 * @q: the control queue 1339 * @m: the packet 1340 * 1341 * Send a packet through an SGE control Tx queue. Packets sent through 1342 * a control queue must fit entirely as immediate data in a single Tx 1343 * descriptor and have no page fragments. 1344 */ 1345 static int 1346 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1347 { 1348 int ret; 1349 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1350 1351 if (__predict_false(!immediate(m))) { 1352 m_freem(m); 1353 return 0; 1354 } 1355 1356 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); 1357 wrp->wr_lo = htonl(V_WR_TID(q->token)); 1358 1359 mtx_lock(&q->lock); 1360 again: reclaim_completed_tx_imm(q); 1361 1362 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1363 if (__predict_false(ret)) { 1364 if (ret == 1) { 1365 mtx_unlock(&q->lock); 1366 return (-1); 1367 } 1368 goto again; 1369 } 1370 1371 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1372 1373 q->in_use++; 1374 if (++q->pidx >= q->size) { 1375 q->pidx = 0; 1376 q->gen ^= 1; 1377 } 1378 mtx_unlock(&q->lock); 1379 wmb(); 1380 t3_write_reg(adap, A_SG_KDOORBELL, 1381 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1382 return (0); 1383 } 1384 1385 1386 /** 1387 * restart_ctrlq - restart a suspended control queue 1388 * @qs: the queue set cotaining the control queue 1389 * 1390 * Resumes transmission on a suspended Tx control queue. 1391 */ 1392 static void 1393 restart_ctrlq(void *data, int npending) 1394 { 1395 struct mbuf *m; 1396 struct sge_qset *qs = (struct sge_qset *)data; 1397 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1398 adapter_t *adap = qs->port->adapter; 1399 1400 mtx_lock(&q->lock); 1401 again: reclaim_completed_tx_imm(q); 1402 1403 while (q->in_use < q->size && 1404 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1405 1406 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1407 1408 if (++q->pidx >= q->size) { 1409 q->pidx = 0; 1410 q->gen ^= 1; 1411 } 1412 q->in_use++; 1413 } 1414 if (!mbufq_empty(&q->sendq)) { 1415 setbit(&qs->txq_stopped, TXQ_CTRL); 1416 smp_mb(); 1417 1418 if (should_restart_tx(q) && 1419 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1420 goto again; 1421 q->stops++; 1422 } 1423 mtx_unlock(&q->lock); 1424 t3_write_reg(adap, A_SG_KDOORBELL, 1425 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1426 } 1427 1428 1429 /* 1430 * Send a management message through control queue 0 1431 */ 1432 int 1433 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1434 { 1435 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); 1436 } 1437 1438 /** 1439 * free_qset - free the resources of an SGE queue set 1440 * @sc: the controller owning the queue set 1441 * @q: the queue set 1442 * 1443 * Release the HW and SW resources associated with an SGE queue set, such 1444 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1445 * queue set must be quiesced prior to calling this. 1446 */ 1447 static void 1448 t3_free_qset(adapter_t *sc, struct sge_qset *q) 1449 { 1450 int i; 1451 1452 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1453 if (q->fl[i].desc) { 1454 mtx_lock(&sc->sge.reg_lock); 1455 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 1456 mtx_unlock(&sc->sge.reg_lock); 1457 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 1458 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 1459 q->fl[i].desc_map); 1460 bus_dma_tag_destroy(q->fl[i].desc_tag); 1461 bus_dma_tag_destroy(q->fl[i].entry_tag); 1462 } 1463 if (q->fl[i].sdesc) { 1464 free_rx_bufs(sc, &q->fl[i]); 1465 free(q->fl[i].sdesc, M_DEVBUF); 1466 } 1467 } 1468 1469 for (i = 0; i < SGE_TXQ_PER_SET; ++i) { 1470 if (q->txq[i].desc) { 1471 mtx_lock(&sc->sge.reg_lock); 1472 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 1473 mtx_unlock(&sc->sge.reg_lock); 1474 bus_dmamap_unload(q->txq[i].desc_tag, 1475 q->txq[i].desc_map); 1476 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 1477 q->txq[i].desc_map); 1478 bus_dma_tag_destroy(q->txq[i].desc_tag); 1479 bus_dma_tag_destroy(q->txq[i].entry_tag); 1480 } 1481 if (q->txq[i].sdesc) { 1482 free(q->txq[i].sdesc, M_DEVBUF); 1483 } 1484 if (mtx_initialized(&q->txq[i].lock)) { 1485 mtx_destroy(&q->txq[i].lock); 1486 } 1487 } 1488 1489 if (q->rspq.desc) { 1490 mtx_lock(&sc->sge.reg_lock); 1491 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 1492 mtx_unlock(&sc->sge.reg_lock); 1493 1494 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 1495 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 1496 q->rspq.desc_map); 1497 bus_dma_tag_destroy(q->rspq.desc_tag); 1498 } 1499 1500 if (mtx_initialized(&q->rspq.lock)) 1501 mtx_destroy(&q->rspq.lock); 1502 1503 bzero(q, sizeof(*q)); 1504 } 1505 1506 /** 1507 * t3_free_sge_resources - free SGE resources 1508 * @sc: the adapter softc 1509 * 1510 * Frees resources used by the SGE queue sets. 1511 */ 1512 void 1513 t3_free_sge_resources(adapter_t *sc) 1514 { 1515 int i; 1516 1517 for (i = 0; i < SGE_QSETS; ++i) 1518 t3_free_qset(sc, &sc->sge.qs[i]); 1519 } 1520 1521 /** 1522 * t3_sge_start - enable SGE 1523 * @sc: the controller softc 1524 * 1525 * Enables the SGE for DMAs. This is the last step in starting packet 1526 * transfers. 1527 */ 1528 void 1529 t3_sge_start(adapter_t *sc) 1530 { 1531 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 1532 } 1533 1534 /** 1535 * t3_sge_stop - disable SGE operation 1536 * @sc: the adapter 1537 * 1538 * Disables the DMA engine. This can be called in emeregencies (e.g., 1539 * from error interrupts) or from normal process context. In the latter 1540 * case it also disables any pending queue restart tasklets. Note that 1541 * if it is called in interrupt context it cannot disable the restart 1542 * tasklets as it cannot wait, however the tasklets will have no effect 1543 * since the doorbells are disabled and the driver will call this again 1544 * later from process context, at which time the tasklets will be stopped 1545 * if they are still running. 1546 */ 1547 void 1548 t3_sge_stop(adapter_t *sc) 1549 { 1550 int i; 1551 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 1552 1553 if (sc->tq == NULL) 1554 return; 1555 1556 for (i = 0; i < SGE_QSETS; ++i) { 1557 struct sge_qset *qs = &sc->sge.qs[i]; 1558 1559 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk); 1560 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk); 1561 } 1562 } 1563 1564 1565 /** 1566 * free_tx_desc - reclaims Tx descriptors and their buffers 1567 * @adapter: the adapter 1568 * @q: the Tx queue to reclaim descriptors from 1569 * @n: the number of descriptors to reclaim 1570 * 1571 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 1572 * Tx buffers. Called with the Tx queue lock held. 1573 */ 1574 int 1575 free_tx_desc(adapter_t *sc, struct sge_txq *q, int n, struct mbuf **m_vec) 1576 { 1577 struct tx_sw_desc *d; 1578 unsigned int cidx = q->cidx; 1579 int nbufs = 0; 1580 1581 #ifdef T3_TRACE 1582 T3_TRACE2(sc->tb[q->cntxt_id & 7], 1583 "reclaiming %u Tx descriptors at cidx %u", n, cidx); 1584 #endif 1585 d = &q->sdesc[cidx]; 1586 1587 while (n-- > 0) { 1588 DPRINTF("cidx=%d d=%p\n", cidx, d); 1589 if (d->m) { 1590 if (d->flags & TX_SW_DESC_MAPPED) { 1591 bus_dmamap_unload(q->entry_tag, d->map); 1592 bus_dmamap_destroy(q->entry_tag, d->map); 1593 d->flags &= ~TX_SW_DESC_MAPPED; 1594 } 1595 if (m_get_priority(d->m) == cidx) { 1596 m_vec[nbufs] = d->m; 1597 d->m = NULL; 1598 nbufs++; 1599 } else { 1600 printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx); 1601 } 1602 } 1603 ++d; 1604 if (++cidx == q->size) { 1605 cidx = 0; 1606 d = q->sdesc; 1607 } 1608 } 1609 q->cidx = cidx; 1610 1611 return (nbufs); 1612 } 1613 1614 /** 1615 * is_new_response - check if a response is newly written 1616 * @r: the response descriptor 1617 * @q: the response queue 1618 * 1619 * Returns true if a response descriptor contains a yet unprocessed 1620 * response. 1621 */ 1622 static __inline int 1623 is_new_response(const struct rsp_desc *r, 1624 const struct sge_rspq *q) 1625 { 1626 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 1627 } 1628 1629 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 1630 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 1631 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 1632 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 1633 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 1634 1635 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 1636 #define NOMEM_INTR_DELAY 2500 1637 1638 /** 1639 * write_ofld_wr - write an offload work request 1640 * @adap: the adapter 1641 * @m: the packet to send 1642 * @q: the Tx queue 1643 * @pidx: index of the first Tx descriptor to write 1644 * @gen: the generation value to use 1645 * @ndesc: number of descriptors the packet will occupy 1646 * 1647 * Write an offload work request to send the supplied packet. The packet 1648 * data already carry the work request with most fields populated. 1649 */ 1650 static void 1651 write_ofld_wr(adapter_t *adap, struct mbuf *m, 1652 struct sge_txq *q, unsigned int pidx, 1653 unsigned int gen, unsigned int ndesc, 1654 bus_dma_segment_t *segs, unsigned int nsegs) 1655 { 1656 unsigned int sgl_flits, flits; 1657 struct work_request_hdr *from; 1658 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1659 struct tx_desc *d = &q->desc[pidx]; 1660 struct txq_state txqs; 1661 1662 if (immediate(m)) { 1663 q->sdesc[pidx].m = NULL; 1664 write_imm(d, m, m->m_len, gen); 1665 return; 1666 } 1667 1668 /* Only TX_DATA builds SGLs */ 1669 1670 from = mtod(m, struct work_request_hdr *); 1671 memcpy(&d->flit[1], &from[1], 1672 (uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *) - sizeof(*from)); 1673 1674 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; 1675 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 1676 1677 make_sgl(sgp, segs, nsegs); 1678 sgl_flits = sgl_len(nsegs); 1679 1680 txqs.gen = q->gen; 1681 txqs.pidx = q->pidx; 1682 txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3); 1683 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 1684 from->wr_hi, from->wr_lo); 1685 } 1686 1687 /** 1688 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 1689 * @m: the packet 1690 * 1691 * Returns the number of Tx descriptors needed for the given offload 1692 * packet. These packets are already fully constructed. 1693 */ 1694 static __inline unsigned int 1695 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 1696 { 1697 unsigned int flits, cnt = 0; 1698 1699 1700 if (m->m_len <= WR_LEN) 1701 return 1; /* packet fits as immediate data */ 1702 1703 if (m->m_flags & M_IOVEC) 1704 cnt = mtomv(m)->mv_count; 1705 1706 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; /* headers */ 1707 1708 return flits_to_desc(flits + sgl_len(cnt)); 1709 } 1710 1711 /** 1712 * ofld_xmit - send a packet through an offload queue 1713 * @adap: the adapter 1714 * @q: the Tx offload queue 1715 * @m: the packet 1716 * 1717 * Send an offload packet through an SGE offload queue. 1718 */ 1719 static int 1720 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1721 { 1722 int ret; 1723 unsigned int pidx, gen, nsegs; 1724 unsigned int ndesc; 1725 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 1726 bus_dma_segment_t segs[TX_MAX_SEGS]; 1727 int i, cleaned; 1728 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 1729 1730 mtx_lock(&q->lock); 1731 if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) { 1732 mtx_unlock(&q->lock); 1733 return (ret); 1734 } 1735 ndesc = calc_tx_descs_ofld(m, nsegs); 1736 again: cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec); 1737 1738 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 1739 if (__predict_false(ret)) { 1740 if (ret == 1) { 1741 m_set_priority(m, ndesc); /* save for restart */ 1742 mtx_unlock(&q->lock); 1743 return NET_XMIT_CN; 1744 } 1745 goto again; 1746 } 1747 1748 gen = q->gen; 1749 q->in_use += ndesc; 1750 pidx = q->pidx; 1751 q->pidx += ndesc; 1752 if (q->pidx >= q->size) { 1753 q->pidx -= q->size; 1754 q->gen ^= 1; 1755 } 1756 #ifdef T3_TRACE 1757 T3_TRACE5(adap->tb[q->cntxt_id & 7], 1758 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 1759 ndesc, pidx, skb->len, skb->len - skb->data_len, 1760 skb_shinfo(skb)->nr_frags); 1761 #endif 1762 mtx_unlock(&q->lock); 1763 1764 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 1765 check_ring_tx_db(adap, q); 1766 1767 for (i = 0; i < cleaned; i++) { 1768 m_freem_vec(m_vec[i]); 1769 } 1770 return NET_XMIT_SUCCESS; 1771 } 1772 1773 /** 1774 * restart_offloadq - restart a suspended offload queue 1775 * @qs: the queue set cotaining the offload queue 1776 * 1777 * Resumes transmission on a suspended Tx offload queue. 1778 */ 1779 static void 1780 restart_offloadq(void *data, int npending) 1781 { 1782 1783 struct mbuf *m; 1784 struct sge_qset *qs = data; 1785 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 1786 adapter_t *adap = qs->port->adapter; 1787 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 1788 bus_dma_segment_t segs[TX_MAX_SEGS]; 1789 int nsegs, i, cleaned; 1790 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 1791 1792 mtx_lock(&q->lock); 1793 again: cleaned = reclaim_completed_tx(adap, q, TX_CLEAN_MAX_DESC, m_vec); 1794 1795 while ((m = mbufq_peek(&q->sendq)) != NULL) { 1796 unsigned int gen, pidx; 1797 unsigned int ndesc = m_get_priority(m); 1798 1799 if (__predict_false(q->size - q->in_use < ndesc)) { 1800 setbit(&qs->txq_stopped, TXQ_OFLD); 1801 smp_mb(); 1802 1803 if (should_restart_tx(q) && 1804 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 1805 goto again; 1806 q->stops++; 1807 break; 1808 } 1809 1810 gen = q->gen; 1811 q->in_use += ndesc; 1812 pidx = q->pidx; 1813 q->pidx += ndesc; 1814 if (q->pidx >= q->size) { 1815 q->pidx -= q->size; 1816 q->gen ^= 1; 1817 } 1818 1819 (void)mbufq_dequeue(&q->sendq); 1820 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 1821 mtx_unlock(&q->lock); 1822 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 1823 mtx_lock(&q->lock); 1824 } 1825 mtx_unlock(&q->lock); 1826 1827 #if USE_GTS 1828 set_bit(TXQ_RUNNING, &q->flags); 1829 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1830 #endif 1831 t3_write_reg(adap, A_SG_KDOORBELL, 1832 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1833 1834 for (i = 0; i < cleaned; i++) { 1835 m_freem_vec(m_vec[i]); 1836 } 1837 } 1838 1839 /** 1840 * queue_set - return the queue set a packet should use 1841 * @m: the packet 1842 * 1843 * Maps a packet to the SGE queue set it should use. The desired queue 1844 * set is carried in bits 1-3 in the packet's priority. 1845 */ 1846 static __inline int 1847 queue_set(const struct mbuf *m) 1848 { 1849 return m_get_priority(m) >> 1; 1850 } 1851 1852 /** 1853 * is_ctrl_pkt - return whether an offload packet is a control packet 1854 * @m: the packet 1855 * 1856 * Determines whether an offload packet should use an OFLD or a CTRL 1857 * Tx queue. This is indicated by bit 0 in the packet's priority. 1858 */ 1859 static __inline int 1860 is_ctrl_pkt(const struct mbuf *m) 1861 { 1862 return m_get_priority(m) & 1; 1863 } 1864 1865 /** 1866 * t3_offload_tx - send an offload packet 1867 * @tdev: the offload device to send to 1868 * @m: the packet 1869 * 1870 * Sends an offload packet. We use the packet priority to select the 1871 * appropriate Tx queue as follows: bit 0 indicates whether the packet 1872 * should be sent as regular or control, bits 1-3 select the queue set. 1873 */ 1874 int 1875 t3_offload_tx(struct toedev *tdev, struct mbuf *m) 1876 { 1877 adapter_t *adap = tdev2adap(tdev); 1878 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 1879 1880 if (__predict_false(is_ctrl_pkt(m))) 1881 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m); 1882 1883 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m); 1884 } 1885 1886 /** 1887 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 1888 * @tdev: the offload device that will be receiving the packets 1889 * @q: the SGE response queue that assembled the bundle 1890 * @m: the partial bundle 1891 * @n: the number of packets in the bundle 1892 * 1893 * Delivers a (partial) bundle of Rx offload packets to an offload device. 1894 */ 1895 static __inline void 1896 deliver_partial_bundle(struct toedev *tdev, 1897 struct sge_rspq *q, 1898 struct mbuf *mbufs[], int n) 1899 { 1900 if (n) { 1901 q->offload_bundles++; 1902 cxgb_ofld_recv(tdev, mbufs, n); 1903 } 1904 } 1905 1906 static __inline int 1907 rx_offload(struct toedev *tdev, struct sge_rspq *rq, 1908 struct mbuf *m, struct mbuf *rx_gather[], 1909 unsigned int gather_idx) 1910 { 1911 rq->offload_pkts++; 1912 m->m_pkthdr.header = mtod(m, void *); 1913 1914 rx_gather[gather_idx++] = m; 1915 if (gather_idx == RX_BUNDLE_SIZE) { 1916 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 1917 gather_idx = 0; 1918 rq->offload_bundles++; 1919 } 1920 return (gather_idx); 1921 } 1922 1923 static void 1924 restart_tx(struct sge_qset *qs) 1925 { 1926 struct adapter *sc = qs->port->adapter; 1927 1928 if (isset(&qs->txq_stopped, TXQ_OFLD) && 1929 should_restart_tx(&qs->txq[TXQ_OFLD]) && 1930 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 1931 qs->txq[TXQ_OFLD].restarts++; 1932 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_tsk); 1933 } 1934 if (isset(&qs->txq_stopped, TXQ_CTRL) && 1935 should_restart_tx(&qs->txq[TXQ_CTRL]) && 1936 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 1937 qs->txq[TXQ_CTRL].restarts++; 1938 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_tsk); 1939 } 1940 } 1941 1942 /** 1943 * t3_sge_alloc_qset - initialize an SGE queue set 1944 * @sc: the controller softc 1945 * @id: the queue set id 1946 * @nports: how many Ethernet ports will be using this queue set 1947 * @irq_vec_idx: the IRQ vector index for response queue interrupts 1948 * @p: configuration parameters for this queue set 1949 * @ntxq: number of Tx queues for the queue set 1950 * @pi: port info for queue set 1951 * 1952 * Allocate resources and initialize an SGE queue set. A queue set 1953 * comprises a response queue, two Rx free-buffer queues, and up to 3 1954 * Tx queues. The Tx queues are assigned roles in the order Ethernet 1955 * queue, offload queue, and control queue. 1956 */ 1957 int 1958 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 1959 const struct qset_params *p, int ntxq, struct port_info *pi) 1960 { 1961 struct sge_qset *q = &sc->sge.qs[id]; 1962 int i, ret = 0; 1963 1964 init_qset_cntxt(q, id); 1965 1966 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 1967 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 1968 &q->fl[0].desc, &q->fl[0].sdesc, 1969 &q->fl[0].desc_tag, &q->fl[0].desc_map, 1970 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 1971 printf("error %d from alloc ring fl0\n", ret); 1972 goto err; 1973 } 1974 1975 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 1976 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 1977 &q->fl[1].desc, &q->fl[1].sdesc, 1978 &q->fl[1].desc_tag, &q->fl[1].desc_map, 1979 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 1980 printf("error %d from alloc ring fl1\n", ret); 1981 goto err; 1982 } 1983 1984 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 1985 &q->rspq.phys_addr, &q->rspq.desc, NULL, 1986 &q->rspq.desc_tag, &q->rspq.desc_map, 1987 NULL, NULL)) != 0) { 1988 printf("error %d from alloc ring rspq\n", ret); 1989 goto err; 1990 } 1991 1992 for (i = 0; i < ntxq; ++i) { 1993 /* 1994 * The control queue always uses immediate data so does not 1995 * need to keep track of any mbufs. 1996 * XXX Placeholder for future TOE support. 1997 */ 1998 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 1999 2000 if ((ret = alloc_ring(sc, p->txq_size[i], 2001 sizeof(struct tx_desc), sz, 2002 &q->txq[i].phys_addr, &q->txq[i].desc, 2003 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2004 &q->txq[i].desc_map, 2005 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2006 printf("error %d from alloc ring tx %i\n", ret, i); 2007 goto err; 2008 } 2009 mbufq_init(&q->txq[i].sendq); 2010 q->txq[i].gen = 1; 2011 q->txq[i].size = p->txq_size[i]; 2012 mtx_init(&q->txq[i].lock, "t3 txq lock", NULL, MTX_DEF); 2013 } 2014 2015 TASK_INIT(&q->txq[TXQ_OFLD].qresume_tsk, 0, restart_offloadq, q); 2016 TASK_INIT(&q->txq[TXQ_CTRL].qresume_tsk, 0, restart_ctrlq, q); 2017 2018 q->fl[0].gen = q->fl[1].gen = 1; 2019 q->fl[0].size = p->fl_size; 2020 q->fl[1].size = p->jumbo_size; 2021 2022 q->rspq.gen = 1; 2023 q->rspq.size = p->rspq_size; 2024 mtx_init(&q->rspq.lock, "t3 rspq lock", NULL, MTX_DEF); 2025 2026 q->txq[TXQ_ETH].stop_thres = nports * 2027 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2028 2029 q->fl[0].buf_size = MCLBYTES; 2030 q->fl[0].zone = zone_clust; 2031 q->fl[0].type = EXT_CLUSTER; 2032 q->fl[1].buf_size = MJUMPAGESIZE; 2033 q->fl[1].zone = zone_jumbop; 2034 q->fl[1].type = EXT_JUMBOP; 2035 2036 q->lro.enabled = lro_default; 2037 2038 mtx_lock(&sc->sge.reg_lock); 2039 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2040 q->rspq.phys_addr, q->rspq.size, 2041 q->fl[0].buf_size, 1, 0); 2042 if (ret) { 2043 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2044 goto err_unlock; 2045 } 2046 2047 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2048 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2049 q->fl[i].phys_addr, q->fl[i].size, 2050 q->fl[i].buf_size, p->cong_thres, 1, 2051 0); 2052 if (ret) { 2053 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2054 goto err_unlock; 2055 } 2056 } 2057 2058 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2059 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2060 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2061 1, 0); 2062 if (ret) { 2063 printf("error %d from t3_sge_init_ecntxt\n", ret); 2064 goto err_unlock; 2065 } 2066 2067 if (ntxq > 1) { 2068 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2069 USE_GTS, SGE_CNTXT_OFLD, id, 2070 q->txq[TXQ_OFLD].phys_addr, 2071 q->txq[TXQ_OFLD].size, 0, 1, 0); 2072 if (ret) { 2073 printf("error %d from t3_sge_init_ecntxt\n", ret); 2074 goto err_unlock; 2075 } 2076 } 2077 2078 if (ntxq > 2) { 2079 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2080 SGE_CNTXT_CTRL, id, 2081 q->txq[TXQ_CTRL].phys_addr, 2082 q->txq[TXQ_CTRL].size, 2083 q->txq[TXQ_CTRL].token, 1, 0); 2084 if (ret) { 2085 printf("error %d from t3_sge_init_ecntxt\n", ret); 2086 goto err_unlock; 2087 } 2088 } 2089 2090 mtx_unlock(&sc->sge.reg_lock); 2091 t3_update_qset_coalesce(q, p); 2092 q->port = pi; 2093 2094 refill_fl(sc, &q->fl[0], q->fl[0].size); 2095 refill_fl(sc, &q->fl[1], q->fl[1].size); 2096 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2097 2098 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2099 V_NEWTIMER(q->rspq.holdoff_tmr)); 2100 2101 return (0); 2102 2103 err_unlock: 2104 mtx_unlock(&sc->sge.reg_lock); 2105 err: 2106 t3_free_qset(sc, q); 2107 2108 return (ret); 2109 } 2110 2111 void 2112 t3_rx_eth(struct port_info *pi, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2113 { 2114 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2115 struct ifnet *ifp = pi->ifp; 2116 2117 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2118 if (&pi->adapter->port[cpl->iff] != pi) 2119 panic("bad port index %d m->m_data=%p\n", cpl->iff, mtod(m, uint8_t *)); 2120 2121 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2122 cpl->csum_valid && cpl->csum == 0xffff) { 2123 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2124 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2125 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2126 m->m_pkthdr.csum_data = 0xffff; 2127 } 2128 /* 2129 * XXX need to add VLAN support for 6.x 2130 */ 2131 #ifdef VLAN_SUPPORTED 2132 if (__predict_false(cpl->vlan_valid)) { 2133 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2134 m->m_flags |= M_VLANTAG; 2135 } 2136 #endif 2137 2138 m->m_pkthdr.rcvif = ifp; 2139 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2140 m_explode(m); 2141 /* 2142 * adjust after conversion to mbuf chain 2143 */ 2144 m_adj(m, sizeof(*cpl) + ethpad); 2145 2146 (*ifp->if_input)(ifp, m); 2147 } 2148 2149 /** 2150 * get_packet - return the next ingress packet buffer from a free list 2151 * @adap: the adapter that received the packet 2152 * @drop_thres: # of remaining buffers before we start dropping packets 2153 * @qs: the qset that the SGE free list holding the packet belongs to 2154 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2155 * @r: response descriptor 2156 * 2157 * Get the next packet from a free list and complete setup of the 2158 * sk_buff. If the packet is small we make a copy and recycle the 2159 * original buffer, otherwise we use the original buffer itself. If a 2160 * positive drop threshold is supplied packets are dropped and their 2161 * buffers recycled if (a) the number of remaining buffers is under the 2162 * threshold and the packet is too big to copy, or (b) the packet should 2163 * be copied but there is no memory for the copy. 2164 */ 2165 static int 2166 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2167 struct mbuf *m, struct rsp_desc *r) 2168 { 2169 2170 unsigned int len_cq = ntohl(r->len_cq); 2171 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2172 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2173 uint32_t len = G_RSPD_LEN(len_cq); 2174 uint32_t flags = ntohl(r->flags); 2175 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2176 void *cl; 2177 int ret = 0; 2178 2179 prefetch(sd->cl); 2180 2181 fl->credits--; 2182 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2183 2184 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { 2185 cl = mtod(m, void *); 2186 memcpy(cl, sd->cl, len); 2187 recycle_rx_buf(adap, fl, fl->cidx); 2188 } else { 2189 cl = sd->cl; 2190 bus_dmamap_unload(fl->entry_tag, sd->map); 2191 } 2192 switch(sopeop) { 2193 case RSPQ_SOP_EOP: 2194 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2195 if (cl == sd->cl) 2196 m_cljset(m, cl, fl->type); 2197 m->m_len = m->m_pkthdr.len = len; 2198 ret = 1; 2199 goto done; 2200 break; 2201 case RSPQ_NSOP_NEOP: 2202 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2203 ret = 0; 2204 break; 2205 case RSPQ_SOP: 2206 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2207 m_iovinit(m); 2208 ret = 0; 2209 break; 2210 case RSPQ_EOP: 2211 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2212 ret = 1; 2213 break; 2214 } 2215 m_iovappend(m, cl, fl->buf_size, len, 0); 2216 2217 done: 2218 if (++fl->cidx == fl->size) 2219 fl->cidx = 0; 2220 2221 return (ret); 2222 } 2223 2224 /** 2225 * handle_rsp_cntrl_info - handles control information in a response 2226 * @qs: the queue set corresponding to the response 2227 * @flags: the response control flags 2228 * 2229 * Handles the control information of an SGE response, such as GTS 2230 * indications and completion credits for the queue set's Tx queues. 2231 * HW coalesces credits, we don't do any extra SW coalescing. 2232 */ 2233 static __inline void 2234 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2235 { 2236 unsigned int credits; 2237 2238 #if USE_GTS 2239 if (flags & F_RSPD_TXQ0_GTS) 2240 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2241 #endif 2242 credits = G_RSPD_TXQ0_CR(flags); 2243 if (credits) { 2244 qs->txq[TXQ_ETH].processed += credits; 2245 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC) 2246 taskqueue_enqueue(qs->port->adapter->tq, 2247 &qs->port->timer_reclaim_task); 2248 } 2249 2250 credits = G_RSPD_TXQ2_CR(flags); 2251 if (credits) 2252 qs->txq[TXQ_CTRL].processed += credits; 2253 2254 # if USE_GTS 2255 if (flags & F_RSPD_TXQ1_GTS) 2256 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2257 # endif 2258 credits = G_RSPD_TXQ1_CR(flags); 2259 if (credits) 2260 qs->txq[TXQ_OFLD].processed += credits; 2261 } 2262 2263 static void 2264 check_ring_db(adapter_t *adap, struct sge_qset *qs, 2265 unsigned int sleeping) 2266 { 2267 ; 2268 } 2269 2270 /** 2271 * process_responses - process responses from an SGE response queue 2272 * @adap: the adapter 2273 * @qs: the queue set to which the response queue belongs 2274 * @budget: how many responses can be processed in this round 2275 * 2276 * Process responses from an SGE response queue up to the supplied budget. 2277 * Responses include received packets as well as credits and other events 2278 * for the queues that belong to the response queue's queue set. 2279 * A negative budget is effectively unlimited. 2280 * 2281 * Additionally choose the interrupt holdoff time for the next interrupt 2282 * on this queue. If the system is under memory shortage use a fairly 2283 * long delay to help recovery. 2284 */ 2285 static int 2286 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2287 { 2288 struct sge_rspq *rspq = &qs->rspq; 2289 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2290 int budget_left = budget; 2291 unsigned int sleeping = 0; 2292 int lro = qs->lro.enabled; 2293 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2294 int ngathered = 0; 2295 #ifdef DEBUG 2296 static int last_holdoff = 0; 2297 if (rspq->holdoff_tmr != last_holdoff) { 2298 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2299 last_holdoff = rspq->holdoff_tmr; 2300 } 2301 #endif 2302 rspq->next_holdoff = rspq->holdoff_tmr; 2303 2304 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2305 int eth, eop = 0, ethpad = 0; 2306 uint32_t flags = ntohl(r->flags); 2307 uint32_t rss_csum = *(const uint32_t *)r; 2308 uint32_t rss_hash = r->rss_hdr.rss_hash_val; 2309 2310 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2311 2312 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2313 /* XXX */ 2314 printf("async notification\n"); 2315 2316 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2317 struct mbuf *m = NULL; 2318 if (cxgb_debug) 2319 printf("IMM DATA VALID\n"); 2320 if (rspq->m == NULL) 2321 rspq->m = m_gethdr(M_NOWAIT, MT_DATA); 2322 else 2323 m = m_gethdr(M_NOWAIT, MT_DATA); 2324 2325 if (rspq->m == NULL || m == NULL) { 2326 rspq->next_holdoff = NOMEM_INTR_DELAY; 2327 budget_left--; 2328 break; 2329 } 2330 get_imm_packet(adap, r, rspq->m, m); 2331 eop = 1; 2332 rspq->imm_data++; 2333 } else if (r->len_cq) { 2334 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 2335 2336 if (rspq->m == NULL) 2337 rspq->m = m_gethdr(M_NOWAIT, MT_DATA); 2338 if (rspq->m == NULL) { 2339 log(LOG_WARNING, "failed to get mbuf for packet\n"); 2340 break; 2341 } 2342 2343 ethpad = 2; 2344 eop = get_packet(adap, drop_thresh, qs, rspq->m, r); 2345 } else { 2346 DPRINTF("pure response\n"); 2347 rspq->pure_rsps++; 2348 } 2349 2350 if (flags & RSPD_CTRL_MASK) { 2351 sleeping |= flags & RSPD_GTS_MASK; 2352 handle_rsp_cntrl_info(qs, flags); 2353 } 2354 2355 r++; 2356 if (__predict_false(++rspq->cidx == rspq->size)) { 2357 rspq->cidx = 0; 2358 rspq->gen ^= 1; 2359 r = rspq->desc; 2360 } 2361 2362 prefetch(r); 2363 if (++rspq->credits >= (rspq->size / 4)) { 2364 refill_rspq(adap, rspq, rspq->credits); 2365 rspq->credits = 0; 2366 } 2367 2368 if (eop) { 2369 prefetch(mtod(rspq->m, uint8_t *)); 2370 prefetch(mtod(rspq->m, uint8_t *) + L1_CACHE_BYTES); 2371 2372 if (eth) { 2373 t3_rx_eth_lro(adap, rspq, rspq->m, ethpad, 2374 rss_hash, rss_csum, lro); 2375 2376 rspq->m = NULL; 2377 } else { 2378 rspq->m->m_pkthdr.csum_data = rss_csum; 2379 /* 2380 * XXX size mismatch 2381 */ 2382 m_set_priority(rspq->m, rss_hash); 2383 2384 ngathered = rx_offload(&adap->tdev, rspq, rspq->m, 2385 offload_mbufs, ngathered); 2386 } 2387 #ifdef notyet 2388 taskqueue_enqueue(adap->tq, &adap->timer_reclaim_task); 2389 #else 2390 __refill_fl(adap, &qs->fl[0]); 2391 __refill_fl(adap, &qs->fl[1]); 2392 #endif 2393 } 2394 --budget_left; 2395 } 2396 2397 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 2398 t3_lro_flush(adap, qs, &qs->lro); 2399 2400 if (sleeping) 2401 check_ring_db(adap, qs, sleeping); 2402 2403 smp_mb(); /* commit Tx queue processed updates */ 2404 if (__predict_false(qs->txq_stopped != 0)) 2405 restart_tx(qs); 2406 2407 budget -= budget_left; 2408 return (budget); 2409 } 2410 2411 /* 2412 * A helper function that processes responses and issues GTS. 2413 */ 2414 static __inline int 2415 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 2416 { 2417 int work; 2418 static int last_holdoff = 0; 2419 2420 work = process_responses(adap, rspq_to_qset(rq), -1); 2421 2422 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 2423 printf("next_holdoff=%d\n", rq->next_holdoff); 2424 last_holdoff = rq->next_holdoff; 2425 } 2426 2427 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 2428 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 2429 return work; 2430 } 2431 2432 2433 /* 2434 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 2435 * Handles data events from SGE response queues as well as error and other 2436 * async events as they all use the same interrupt pin. We use one SGE 2437 * response queue per port in this mode and protect all response queues with 2438 * queue 0's lock. 2439 */ 2440 void 2441 t3b_intr(void *data) 2442 { 2443 uint32_t map; 2444 adapter_t *adap = data; 2445 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2446 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2447 2448 t3_write_reg(adap, A_PL_CLI, 0); 2449 map = t3_read_reg(adap, A_SG_DATA_INTR); 2450 2451 if (!map) 2452 return; 2453 2454 if (__predict_false(map & F_ERRINTR)) 2455 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2456 2457 mtx_lock(&q0->lock); 2458 2459 if (__predict_true(map & 1)) 2460 process_responses_gts(adap, q0); 2461 2462 if (map & 2) 2463 process_responses_gts(adap, q1); 2464 2465 mtx_unlock(&q0->lock); 2466 } 2467 2468 /* 2469 * The MSI interrupt handler. This needs to handle data events from SGE 2470 * response queues as well as error and other async events as they all use 2471 * the same MSI vector. We use one SGE response queue per port in this mode 2472 * and protect all response queues with queue 0's lock. 2473 */ 2474 void 2475 t3_intr_msi(void *data) 2476 { 2477 adapter_t *adap = data; 2478 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2479 struct sge_rspq *q1 = &adap->sge.qs[1].rspq; 2480 int new_packets = 0; 2481 2482 mtx_lock(&q0->lock); 2483 if (process_responses_gts(adap, q0)) { 2484 new_packets = 1; 2485 } 2486 2487 if (adap->params.nports == 2 && 2488 process_responses_gts(adap, q1)) { 2489 new_packets = 1; 2490 } 2491 2492 mtx_unlock(&q0->lock); 2493 if (new_packets == 0) 2494 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2495 } 2496 2497 void 2498 t3_intr_msix(void *data) 2499 { 2500 struct sge_qset *qs = data; 2501 adapter_t *adap = qs->port->adapter; 2502 struct sge_rspq *rspq = &qs->rspq; 2503 2504 mtx_lock(&rspq->lock); 2505 if (process_responses_gts(adap, rspq) == 0) 2506 rspq->unhandled_irqs++; 2507 mtx_unlock(&rspq->lock); 2508 } 2509 2510 /* 2511 * broken by recent mbuf changes 2512 */ 2513 static int 2514 t3_lro_enable(SYSCTL_HANDLER_ARGS) 2515 { 2516 adapter_t *sc; 2517 int i, j, enabled, err, nqsets = 0; 2518 2519 #ifndef LRO_WORKING 2520 return (0); 2521 #endif 2522 2523 sc = arg1; 2524 enabled = sc->sge.qs[0].lro.enabled; 2525 err = sysctl_handle_int(oidp, &enabled, arg2, req); 2526 2527 if (err != 0) 2528 return (err); 2529 if (enabled == sc->sge.qs[0].lro.enabled) 2530 return (0); 2531 2532 for (i = 0; i < sc->params.nports; i++) 2533 for (j = 0; j < sc->port[i].nqsets; j++) 2534 nqsets++; 2535 2536 for (i = 0; i < nqsets; i++) 2537 sc->sge.qs[i].lro.enabled = enabled; 2538 2539 return (0); 2540 } 2541 2542 static int 2543 t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS) 2544 { 2545 adapter_t *sc = arg1; 2546 struct qset_params *qsp = &sc->params.sge.qset[0]; 2547 int coalesce_nsecs; 2548 struct sge_qset *qs; 2549 int i, j, err, nqsets = 0; 2550 struct mtx *lock; 2551 2552 coalesce_nsecs = qsp->coalesce_nsecs; 2553 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req); 2554 2555 if (err != 0) { 2556 return (err); 2557 } 2558 if (coalesce_nsecs == qsp->coalesce_nsecs) 2559 return (0); 2560 2561 for (i = 0; i < sc->params.nports; i++) 2562 for (j = 0; j < sc->port[i].nqsets; j++) 2563 nqsets++; 2564 2565 coalesce_nsecs = max(100, coalesce_nsecs); 2566 2567 for (i = 0; i < nqsets; i++) { 2568 qs = &sc->sge.qs[i]; 2569 qsp = &sc->params.sge.qset[i]; 2570 qsp->coalesce_nsecs = coalesce_nsecs; 2571 2572 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 2573 &sc->sge.qs[0].rspq.lock; 2574 2575 mtx_lock(lock); 2576 t3_update_qset_coalesce(qs, qsp); 2577 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 2578 V_NEWTIMER(qs->rspq.holdoff_tmr)); 2579 mtx_unlock(lock); 2580 } 2581 2582 return (0); 2583 } 2584 2585 2586 void 2587 t3_add_sysctls(adapter_t *sc) 2588 { 2589 struct sysctl_ctx_list *ctx; 2590 struct sysctl_oid_list *children; 2591 2592 ctx = device_get_sysctl_ctx(sc->dev); 2593 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 2594 2595 /* random information */ 2596 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 2597 "firmware_version", 2598 CTLFLAG_RD, &sc->fw_version, 2599 0, "firmware version"); 2600 2601 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2602 "enable_lro", 2603 CTLTYPE_INT|CTLFLAG_RW, sc, 2604 0, t3_lro_enable, 2605 "I", "enable large receive offload"); 2606 2607 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2608 "intr_coal", 2609 CTLTYPE_INT|CTLFLAG_RW, sc, 2610 0, t3_set_coalesce_nsecs, 2611 "I", "interrupt coalescing timer (ns)"); 2612 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2613 "enable_debug", 2614 CTLFLAG_RW, &cxgb_debug, 2615 0, "enable verbose debugging output"); 2616 2617 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2618 "collapse_free", 2619 CTLFLAG_RD, &collapse_free, 2620 0, "frees during collapse"); 2621 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2622 "mb_free_vec_free", 2623 CTLFLAG_RD, &mb_free_vec_free, 2624 0, "frees during mb_free_vec"); 2625 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2626 "collapse_mbufs", 2627 CTLFLAG_RW, &collapse_mbufs, 2628 0, "collapse mbuf chains into iovecs"); 2629 } 2630 2631 /** 2632 * t3_get_desc - dump an SGE descriptor for debugging purposes 2633 * @qs: the queue set 2634 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 2635 * @idx: the descriptor index in the queue 2636 * @data: where to dump the descriptor contents 2637 * 2638 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 2639 * size of the descriptor. 2640 */ 2641 int 2642 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 2643 unsigned char *data) 2644 { 2645 if (qnum >= 6) 2646 return (EINVAL); 2647 2648 if (qnum < 3) { 2649 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 2650 return -EINVAL; 2651 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 2652 return sizeof(struct tx_desc); 2653 } 2654 2655 if (qnum == 3) { 2656 if (!qs->rspq.desc || idx >= qs->rspq.size) 2657 return (EINVAL); 2658 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 2659 return sizeof(struct rsp_desc); 2660 } 2661 2662 qnum -= 4; 2663 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 2664 return (EINVAL); 2665 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 2666 return sizeof(struct rx_desc); 2667 } 2668