1 /************************************************************************** 2 3 Copyright (c) 2007, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/module.h> 37 #include <sys/bus.h> 38 #include <sys/conf.h> 39 #include <machine/bus.h> 40 #include <machine/resource.h> 41 #include <sys/bus_dma.h> 42 #include <sys/rman.h> 43 #include <sys/queue.h> 44 #include <sys/sysctl.h> 45 #include <sys/taskqueue.h> 46 47 #include <sys/proc.h> 48 #include <sys/sched.h> 49 #include <sys/smp.h> 50 #include <sys/systm.h> 51 52 #include <netinet/in_systm.h> 53 #include <netinet/in.h> 54 #include <netinet/ip.h> 55 #include <netinet/tcp.h> 56 57 #include <dev/pci/pcireg.h> 58 #include <dev/pci/pcivar.h> 59 60 #ifdef CONFIG_DEFINED 61 #include <cxgb_include.h> 62 #else 63 #include <dev/cxgb/cxgb_include.h> 64 #endif 65 66 uint32_t collapse_free = 0; 67 uint32_t mb_free_vec_free = 0; 68 int txq_fills = 0; 69 int collapse_mbufs = 0; 70 static int bogus_imm = 0; 71 #ifndef DISABLE_MBUF_IOVEC 72 static int recycle_enable = 1; 73 #endif 74 75 #define USE_GTS 0 76 77 #define SGE_RX_SM_BUF_SIZE 1536 78 #define SGE_RX_DROP_THRES 16 79 #define SGE_RX_COPY_THRES 128 80 81 /* 82 * Period of the Tx buffer reclaim timer. This timer does not need to run 83 * frequently as Tx buffers are usually reclaimed by new Tx packets. 84 */ 85 #define TX_RECLAIM_PERIOD (hz >> 1) 86 87 /* 88 * work request size in bytes 89 */ 90 #define WR_LEN (WR_FLITS * 8) 91 92 /* 93 * Values for sge_txq.flags 94 */ 95 enum { 96 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 97 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 98 }; 99 100 struct tx_desc { 101 uint64_t flit[TX_DESC_FLITS]; 102 } __packed; 103 104 struct rx_desc { 105 uint32_t addr_lo; 106 uint32_t len_gen; 107 uint32_t gen2; 108 uint32_t addr_hi; 109 } __packed;; 110 111 struct rsp_desc { /* response queue descriptor */ 112 struct rss_header rss_hdr; 113 uint32_t flags; 114 uint32_t len_cq; 115 uint8_t imm_data[47]; 116 uint8_t intr_gen; 117 } __packed; 118 119 #define RX_SW_DESC_MAP_CREATED (1 << 0) 120 #define TX_SW_DESC_MAP_CREATED (1 << 1) 121 #define RX_SW_DESC_INUSE (1 << 3) 122 #define TX_SW_DESC_MAPPED (1 << 4) 123 124 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 125 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 126 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 127 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 128 129 struct tx_sw_desc { /* SW state per Tx descriptor */ 130 struct mbuf *m; 131 bus_dmamap_t map; 132 int flags; 133 }; 134 135 struct rx_sw_desc { /* SW state per Rx descriptor */ 136 void *cl; 137 bus_dmamap_t map; 138 int flags; 139 }; 140 141 struct txq_state { 142 unsigned int compl; 143 unsigned int gen; 144 unsigned int pidx; 145 }; 146 147 struct refill_fl_cb_arg { 148 int error; 149 bus_dma_segment_t seg; 150 int nseg; 151 }; 152 153 /* 154 * Maps a number of flits to the number of Tx descriptors that can hold them. 155 * The formula is 156 * 157 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 158 * 159 * HW allows up to 4 descriptors to be combined into a WR. 160 */ 161 static uint8_t flit_desc_map[] = { 162 0, 163 #if SGE_NUM_GENBITS == 1 164 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 165 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 166 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 167 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 168 #elif SGE_NUM_GENBITS == 2 169 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 170 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 171 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 172 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 173 #else 174 # error "SGE_NUM_GENBITS must be 1 or 2" 175 #endif 176 }; 177 178 179 static int lro_default = 0; 180 int cxgb_debug = 0; 181 182 static void t3_free_qset(adapter_t *sc, struct sge_qset *q); 183 static void sge_timer_cb(void *arg); 184 static void sge_timer_reclaim(void *arg, int ncount); 185 static void sge_txq_reclaim_handler(void *arg, int ncount); 186 static int free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec); 187 188 /** 189 * reclaim_completed_tx - reclaims completed Tx descriptors 190 * @adapter: the adapter 191 * @q: the Tx queue to reclaim completed descriptors from 192 * 193 * Reclaims Tx descriptors that the SGE has indicated it has processed, 194 * and frees the associated buffers if possible. Called with the Tx 195 * queue's lock held. 196 */ 197 static __inline int 198 reclaim_completed_tx(struct sge_txq *q, int nbufs, struct mbuf **mvec) 199 { 200 int reclaimed, reclaim = desc_reclaimable(q); 201 int n = 0; 202 203 mtx_assert(&q->lock, MA_OWNED); 204 if (reclaim > 0) { 205 n = free_tx_desc(q, min(reclaim, nbufs), mvec); 206 reclaimed = min(reclaim, nbufs); 207 q->cleaned += reclaimed; 208 q->in_use -= reclaimed; 209 } 210 return (n); 211 } 212 213 /** 214 * should_restart_tx - are there enough resources to restart a Tx queue? 215 * @q: the Tx queue 216 * 217 * Checks if there are enough descriptors to restart a suspended Tx queue. 218 */ 219 static __inline int 220 should_restart_tx(const struct sge_txq *q) 221 { 222 unsigned int r = q->processed - q->cleaned; 223 224 return q->in_use - r < (q->size >> 1); 225 } 226 227 /** 228 * t3_sge_init - initialize SGE 229 * @adap: the adapter 230 * @p: the SGE parameters 231 * 232 * Performs SGE initialization needed every time after a chip reset. 233 * We do not initialize any of the queue sets here, instead the driver 234 * top-level must request those individually. We also do not enable DMA 235 * here, that should be done after the queues have been set up. 236 */ 237 void 238 t3_sge_init(adapter_t *adap, struct sge_params *p) 239 { 240 u_int ctrl, ups; 241 242 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 243 244 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 245 F_CQCRDTCTRL | 246 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 247 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 248 #if SGE_NUM_GENBITS == 1 249 ctrl |= F_EGRGENCTRL; 250 #endif 251 if (adap->params.rev > 0) { 252 if (!(adap->flags & (USING_MSIX | USING_MSI))) 253 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 254 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL; 255 } 256 t3_write_reg(adap, A_SG_CONTROL, ctrl); 257 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 258 V_LORCQDRBTHRSH(512)); 259 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 260 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 261 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 262 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000); 263 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 264 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 265 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 266 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 267 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 268 } 269 270 271 /** 272 * sgl_len - calculates the size of an SGL of the given capacity 273 * @n: the number of SGL entries 274 * 275 * Calculates the number of flits needed for a scatter/gather list that 276 * can hold the given number of entries. 277 */ 278 static __inline unsigned int 279 sgl_len(unsigned int n) 280 { 281 return ((3 * n) / 2 + (n & 1)); 282 } 283 284 /** 285 * get_imm_packet - return the next ingress packet buffer from a response 286 * @resp: the response descriptor containing the packet data 287 * 288 * Return a packet containing the immediate data of the given response. 289 */ 290 #ifdef DISABLE_MBUF_IOVEC 291 static __inline int 292 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct t3_mbuf_hdr *mh) 293 { 294 struct mbuf *m; 295 int len; 296 uint32_t flags = ntohl(resp->flags); 297 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 298 299 /* 300 * would be a firmware bug 301 */ 302 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) 303 return (0); 304 305 m = m_gethdr(M_NOWAIT, MT_DATA); 306 len = G_RSPD_LEN(ntohl(resp->len_cq)); 307 308 if (m) { 309 MH_ALIGN(m, IMMED_PKT_SIZE); 310 memcpy(m->m_data, resp->imm_data, IMMED_PKT_SIZE); 311 m->m_len = len; 312 313 switch (sopeop) { 314 case RSPQ_SOP_EOP: 315 mh->mh_head = mh->mh_tail = m; 316 m->m_pkthdr.len = len; 317 m->m_flags |= M_PKTHDR; 318 break; 319 case RSPQ_EOP: 320 m->m_flags &= ~M_PKTHDR; 321 mh->mh_head->m_pkthdr.len += len; 322 mh->mh_tail->m_next = m; 323 mh->mh_tail = m; 324 break; 325 } 326 } 327 return (m != NULL); 328 } 329 330 #else 331 static int 332 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl, uint32_t flags) 333 { 334 int len, error; 335 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 336 337 /* 338 * would be a firmware bug 339 */ 340 len = G_RSPD_LEN(ntohl(resp->len_cq)); 341 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) { 342 if (cxgb_debug) 343 device_printf(sc->dev, "unexpected value sopeop=%d flags=0x%x len=%din get_imm_packet\n", sopeop, flags, len); 344 bogus_imm++; 345 return (EINVAL); 346 } 347 error = 0; 348 switch (sopeop) { 349 case RSPQ_SOP_EOP: 350 m->m_len = m->m_pkthdr.len = len; 351 memcpy(mtod(m, uint8_t *), resp->imm_data, len); 352 break; 353 case RSPQ_EOP: 354 memcpy(cl, resp->imm_data, len); 355 m_iovappend(m, cl, MSIZE, len, 0); 356 break; 357 default: 358 bogus_imm++; 359 error = EINVAL; 360 } 361 362 return (error); 363 } 364 #endif 365 366 static __inline u_int 367 flits_to_desc(u_int n) 368 { 369 return (flit_desc_map[n]); 370 } 371 372 void 373 t3_sge_err_intr_handler(adapter_t *adapter) 374 { 375 unsigned int v, status; 376 377 378 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 379 380 if (status & F_RSPQCREDITOVERFOW) 381 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 382 383 if (status & F_RSPQDISABLED) { 384 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 385 386 CH_ALERT(adapter, 387 "packet delivered to disabled response queue (0x%x)\n", 388 (v >> S_RSPQ0DISABLED) & 0xff); 389 } 390 391 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 392 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED)) 393 t3_fatal_err(adapter); 394 } 395 396 void 397 t3_sge_prep(adapter_t *adap, struct sge_params *p) 398 { 399 int i; 400 401 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 402 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data); 403 404 for (i = 0; i < SGE_QSETS; ++i) { 405 struct qset_params *q = p->qset + i; 406 407 q->polling = adap->params.rev > 0; 408 409 if (adap->params.nports > 2) 410 q->coalesce_nsecs = 50000; 411 else 412 q->coalesce_nsecs = 5000; 413 414 q->rspq_size = RSPQ_Q_SIZE; 415 q->fl_size = FL_Q_SIZE; 416 q->jumbo_size = JUMBO_Q_SIZE; 417 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 418 q->txq_size[TXQ_OFLD] = 1024; 419 q->txq_size[TXQ_CTRL] = 256; 420 q->cong_thres = 0; 421 } 422 } 423 424 int 425 t3_sge_alloc(adapter_t *sc) 426 { 427 428 /* The parent tag. */ 429 if (bus_dma_tag_create( NULL, /* parent */ 430 1, 0, /* algnmnt, boundary */ 431 BUS_SPACE_MAXADDR, /* lowaddr */ 432 BUS_SPACE_MAXADDR, /* highaddr */ 433 NULL, NULL, /* filter, filterarg */ 434 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 435 BUS_SPACE_UNRESTRICTED, /* nsegments */ 436 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 437 0, /* flags */ 438 NULL, NULL, /* lock, lockarg */ 439 &sc->parent_dmat)) { 440 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 441 return (ENOMEM); 442 } 443 444 /* 445 * DMA tag for normal sized RX frames 446 */ 447 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 448 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 449 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 450 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 451 return (ENOMEM); 452 } 453 454 /* 455 * DMA tag for jumbo sized RX frames. 456 */ 457 if (bus_dma_tag_create(sc->parent_dmat, MJUMPAGESIZE, 0, BUS_SPACE_MAXADDR, 458 BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE, 459 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 460 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 461 return (ENOMEM); 462 } 463 464 /* 465 * DMA tag for TX frames. 466 */ 467 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 468 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 469 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 470 NULL, NULL, &sc->tx_dmat)) { 471 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 472 return (ENOMEM); 473 } 474 475 return (0); 476 } 477 478 int 479 t3_sge_free(struct adapter * sc) 480 { 481 482 if (sc->tx_dmat != NULL) 483 bus_dma_tag_destroy(sc->tx_dmat); 484 485 if (sc->rx_jumbo_dmat != NULL) 486 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 487 488 if (sc->rx_dmat != NULL) 489 bus_dma_tag_destroy(sc->rx_dmat); 490 491 if (sc->parent_dmat != NULL) 492 bus_dma_tag_destroy(sc->parent_dmat); 493 494 return (0); 495 } 496 497 void 498 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 499 { 500 501 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U); 502 qs->rspq.polling = 0 /* p->polling */; 503 } 504 505 static void 506 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 507 { 508 struct refill_fl_cb_arg *cb_arg = arg; 509 510 cb_arg->error = error; 511 cb_arg->seg = segs[0]; 512 cb_arg->nseg = nseg; 513 514 } 515 516 /** 517 * refill_fl - refill an SGE free-buffer list 518 * @sc: the controller softc 519 * @q: the free-list to refill 520 * @n: the number of new buffers to allocate 521 * 522 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 523 * The caller must assure that @n does not exceed the queue's capacity. 524 */ 525 static void 526 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 527 { 528 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 529 struct rx_desc *d = &q->desc[q->pidx]; 530 struct refill_fl_cb_arg cb_arg; 531 void *cl; 532 int err; 533 534 cb_arg.error = 0; 535 while (n--) { 536 /* 537 * We only allocate a cluster, mbuf allocation happens after rx 538 */ 539 if ((cl = m_cljget(NULL, M_DONTWAIT, q->buf_size)) == NULL) { 540 log(LOG_WARNING, "Failed to allocate cluster\n"); 541 goto done; 542 } 543 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 544 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 545 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 546 uma_zfree(q->zone, cl); 547 goto done; 548 } 549 sd->flags |= RX_SW_DESC_MAP_CREATED; 550 } 551 err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size, 552 refill_fl_cb, &cb_arg, 0); 553 554 if (err != 0 || cb_arg.error) { 555 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error); 556 /* 557 * XXX free cluster 558 */ 559 return; 560 } 561 562 sd->flags |= RX_SW_DESC_INUSE; 563 sd->cl = cl; 564 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 565 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 566 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 567 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 568 569 d++; 570 sd++; 571 572 if (++q->pidx == q->size) { 573 q->pidx = 0; 574 q->gen ^= 1; 575 sd = q->sdesc; 576 d = q->desc; 577 } 578 q->credits++; 579 } 580 581 done: 582 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 583 } 584 585 586 /** 587 * free_rx_bufs - free the Rx buffers on an SGE free list 588 * @sc: the controle softc 589 * @q: the SGE free list to clean up 590 * 591 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 592 * this queue should be stopped before calling this function. 593 */ 594 static void 595 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 596 { 597 u_int cidx = q->cidx; 598 599 while (q->credits--) { 600 struct rx_sw_desc *d = &q->sdesc[cidx]; 601 602 if (d->flags & RX_SW_DESC_INUSE) { 603 bus_dmamap_unload(q->entry_tag, d->map); 604 bus_dmamap_destroy(q->entry_tag, d->map); 605 uma_zfree(q->zone, d->cl); 606 } 607 d->cl = NULL; 608 if (++cidx == q->size) 609 cidx = 0; 610 } 611 } 612 613 static __inline void 614 __refill_fl(adapter_t *adap, struct sge_fl *fl) 615 { 616 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 617 } 618 619 #ifndef DISABLE_MBUF_IOVEC 620 /** 621 * recycle_rx_buf - recycle a receive buffer 622 * @adapter: the adapter 623 * @q: the SGE free list 624 * @idx: index of buffer to recycle 625 * 626 * Recycles the specified buffer on the given free list by adding it at 627 * the next available slot on the list. 628 */ 629 static void 630 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 631 { 632 struct rx_desc *from = &q->desc[idx]; 633 struct rx_desc *to = &q->desc[q->pidx]; 634 635 q->sdesc[q->pidx] = q->sdesc[idx]; 636 to->addr_lo = from->addr_lo; // already big endian 637 to->addr_hi = from->addr_hi; // likewise 638 wmb(); 639 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 640 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 641 q->credits++; 642 643 if (++q->pidx == q->size) { 644 q->pidx = 0; 645 q->gen ^= 1; 646 } 647 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 648 } 649 #endif 650 651 static void 652 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 653 { 654 uint32_t *addr; 655 656 addr = arg; 657 *addr = segs[0].ds_addr; 658 } 659 660 static int 661 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 662 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 663 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 664 { 665 size_t len = nelem * elem_size; 666 void *s = NULL; 667 void *p = NULL; 668 int err; 669 670 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 671 BUS_SPACE_MAXADDR_32BIT, 672 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 673 len, 0, NULL, NULL, tag)) != 0) { 674 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 675 return (ENOMEM); 676 } 677 678 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 679 map)) != 0) { 680 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 681 return (ENOMEM); 682 } 683 684 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 685 bzero(p, len); 686 *(void **)desc = p; 687 688 if (sw_size) { 689 len = nelem * sw_size; 690 s = malloc(len, M_DEVBUF, M_WAITOK); 691 bzero(s, len); 692 *(void **)sdesc = s; 693 } 694 if (parent_entry_tag == NULL) 695 return (0); 696 697 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 698 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 699 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 700 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 701 NULL, NULL, entry_tag)) != 0) { 702 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 703 return (ENOMEM); 704 } 705 return (0); 706 } 707 708 static void 709 sge_slow_intr_handler(void *arg, int ncount) 710 { 711 adapter_t *sc = arg; 712 713 t3_slow_intr_handler(sc); 714 } 715 716 /** 717 * sge_timer_cb - perform periodic maintenance of an SGE qset 718 * @data: the SGE queue set to maintain 719 * 720 * Runs periodically from a timer to perform maintenance of an SGE queue 721 * set. It performs two tasks: 722 * 723 * a) Cleans up any completed Tx descriptors that may still be pending. 724 * Normal descriptor cleanup happens when new packets are added to a Tx 725 * queue so this timer is relatively infrequent and does any cleanup only 726 * if the Tx queue has not seen any new packets in a while. We make a 727 * best effort attempt to reclaim descriptors, in that we don't wait 728 * around if we cannot get a queue's lock (which most likely is because 729 * someone else is queueing new packets and so will also handle the clean 730 * up). Since control queues use immediate data exclusively we don't 731 * bother cleaning them up here. 732 * 733 * b) Replenishes Rx queues that have run out due to memory shortage. 734 * Normally new Rx buffers are added when existing ones are consumed but 735 * when out of memory a queue can become empty. We try to add only a few 736 * buffers here, the queue will be replenished fully as these new buffers 737 * are used up if memory shortage has subsided. 738 * 739 * c) Return coalesced response queue credits in case a response queue is 740 * starved. 741 * 742 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 743 * fifo overflows and the FW doesn't implement any recovery scheme yet. 744 */ 745 static void 746 sge_timer_cb(void *arg) 747 { 748 adapter_t *sc = arg; 749 struct port_info *p; 750 struct sge_qset *qs; 751 struct sge_txq *txq; 752 int i, j; 753 int reclaim_eth, reclaim_ofl, refill_rx; 754 755 for (i = 0; i < sc->params.nports; i++) 756 for (j = 0; j < sc->port[i].nqsets; j++) { 757 qs = &sc->sge.qs[i + j]; 758 txq = &qs->txq[0]; 759 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned; 760 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 761 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 762 (qs->fl[1].credits < qs->fl[1].size)); 763 if (reclaim_eth || reclaim_ofl || refill_rx) { 764 p = &sc->port[i]; 765 taskqueue_enqueue(p->tq, &p->timer_reclaim_task); 766 break; 767 } 768 } 769 if (sc->params.nports > 2) { 770 int i; 771 772 for_each_port(sc, i) { 773 struct port_info *pi = &sc->port[i]; 774 775 t3_write_reg(sc, A_SG_KDOORBELL, 776 F_SELEGRCNTX | 777 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 778 } 779 } 780 if (sc->open_device_map != 0) 781 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 782 } 783 784 /* 785 * This is meant to be a catch-all function to keep sge state private 786 * to sge.c 787 * 788 */ 789 int 790 t3_sge_init_adapter(adapter_t *sc) 791 { 792 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 793 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 794 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 795 return (0); 796 } 797 798 int 799 t3_sge_init_port(struct port_info *p) 800 { 801 TASK_INIT(&p->timer_reclaim_task, 0, sge_timer_reclaim, p); 802 return (0); 803 } 804 805 void 806 t3_sge_deinit_sw(adapter_t *sc) 807 { 808 int i; 809 810 callout_drain(&sc->sge_timer_ch); 811 if (sc->tq) 812 taskqueue_drain(sc->tq, &sc->slow_intr_task); 813 for (i = 0; i < sc->params.nports; i++) 814 if (sc->port[i].tq != NULL) 815 taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task); 816 } 817 818 /** 819 * refill_rspq - replenish an SGE response queue 820 * @adapter: the adapter 821 * @q: the response queue to replenish 822 * @credits: how many new responses to make available 823 * 824 * Replenishes a response queue by making the supplied number of responses 825 * available to HW. 826 */ 827 static __inline void 828 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 829 { 830 831 /* mbufs are allocated on demand when a rspq entry is processed. */ 832 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 833 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 834 } 835 836 static __inline void 837 sge_txq_reclaim_(struct sge_txq *txq) 838 { 839 int reclaimable, i, n; 840 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 841 struct port_info *p; 842 843 p = txq->port; 844 reclaim_more: 845 n = 0; 846 reclaimable = desc_reclaimable(txq); 847 if (reclaimable > 0 && mtx_trylock(&txq->lock)) { 848 n = reclaim_completed_tx(txq, TX_CLEAN_MAX_DESC, m_vec); 849 mtx_unlock(&txq->lock); 850 } 851 if (n == 0) 852 return; 853 854 for (i = 0; i < n; i++) { 855 m_freem_vec(m_vec[i]); 856 } 857 if (p && p->ifp->if_drv_flags & IFF_DRV_OACTIVE && 858 txq->size - txq->in_use >= TX_START_MAX_DESC) { 859 txq_fills++; 860 p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 861 taskqueue_enqueue(p->tq, &p->start_task); 862 } 863 864 if (n) 865 goto reclaim_more; 866 } 867 868 static void 869 sge_txq_reclaim_handler(void *arg, int ncount) 870 { 871 struct sge_txq *q = arg; 872 873 sge_txq_reclaim_(q); 874 } 875 876 static void 877 sge_timer_reclaim(void *arg, int ncount) 878 { 879 struct port_info *p = arg; 880 int i, nqsets = p->nqsets; 881 adapter_t *sc = p->adapter; 882 struct sge_qset *qs; 883 struct sge_txq *txq; 884 struct mtx *lock; 885 886 for (i = 0; i < nqsets; i++) { 887 qs = &sc->sge.qs[i]; 888 txq = &qs->txq[TXQ_ETH]; 889 sge_txq_reclaim_(txq); 890 891 txq = &qs->txq[TXQ_OFLD]; 892 sge_txq_reclaim_(txq); 893 894 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 895 &sc->sge.qs[0].rspq.lock; 896 897 if (mtx_trylock(lock)) { 898 /* XXX currently assume that we are *NOT* polling */ 899 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 900 901 if (qs->fl[0].credits < qs->fl[0].size - 16) 902 __refill_fl(sc, &qs->fl[0]); 903 if (qs->fl[1].credits < qs->fl[1].size - 16) 904 __refill_fl(sc, &qs->fl[1]); 905 906 if (status & (1 << qs->rspq.cntxt_id)) { 907 if (qs->rspq.credits) { 908 refill_rspq(sc, &qs->rspq, 1); 909 qs->rspq.credits--; 910 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 911 1 << qs->rspq.cntxt_id); 912 } 913 } 914 mtx_unlock(lock); 915 } 916 } 917 } 918 919 /** 920 * init_qset_cntxt - initialize an SGE queue set context info 921 * @qs: the queue set 922 * @id: the queue set id 923 * 924 * Initializes the TIDs and context ids for the queues of a queue set. 925 */ 926 static void 927 init_qset_cntxt(struct sge_qset *qs, u_int id) 928 { 929 930 qs->rspq.cntxt_id = id; 931 qs->fl[0].cntxt_id = 2 * id; 932 qs->fl[1].cntxt_id = 2 * id + 1; 933 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 934 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 935 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 936 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 937 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 938 } 939 940 941 static void 942 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 943 { 944 txq->in_use += ndesc; 945 /* 946 * XXX we don't handle stopping of queue 947 * presumably start handles this when we bump against the end 948 */ 949 txqs->gen = txq->gen; 950 txq->unacked += ndesc; 951 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3); 952 txq->unacked &= 7; 953 txqs->pidx = txq->pidx; 954 txq->pidx += ndesc; 955 956 if (txq->pidx >= txq->size) { 957 txq->pidx -= txq->size; 958 txq->gen ^= 1; 959 } 960 961 } 962 963 /** 964 * calc_tx_descs - calculate the number of Tx descriptors for a packet 965 * @m: the packet mbufs 966 * @nsegs: the number of segments 967 * 968 * Returns the number of Tx descriptors needed for the given Ethernet 969 * packet. Ethernet packets require addition of WR and CPL headers. 970 */ 971 static __inline unsigned int 972 calc_tx_descs(const struct mbuf *m, int nsegs) 973 { 974 unsigned int flits; 975 976 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) 977 return 1; 978 979 flits = sgl_len(nsegs) + 2; 980 #ifdef TSO_SUPPORTED 981 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) 982 flits++; 983 #endif 984 return flits_to_desc(flits); 985 } 986 987 static unsigned int 988 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 989 struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs) 990 { 991 struct mbuf *m0; 992 int err, pktlen; 993 994 m0 = *m; 995 pktlen = m0->m_pkthdr.len; 996 997 err = bus_dmamap_load_mvec_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); 998 #ifdef DEBUG 999 if (err) { 1000 int n = 0; 1001 struct mbuf *mtmp = m0; 1002 while(mtmp) { 1003 n++; 1004 mtmp = mtmp->m_next; 1005 } 1006 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n", 1007 err, m0->m_pkthdr.len, n); 1008 } 1009 #endif 1010 if (err == EFBIG) { 1011 /* Too many segments, try to defrag */ 1012 m0 = m_defrag(m0, M_DONTWAIT); 1013 if (m0 == NULL) { 1014 m_freem(*m); 1015 *m = NULL; 1016 return (ENOBUFS); 1017 } 1018 *m = m0; 1019 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); 1020 } 1021 1022 if (err == ENOMEM) { 1023 return (err); 1024 } 1025 1026 if (err) { 1027 if (cxgb_debug) 1028 printf("map failure err=%d pktlen=%d\n", err, pktlen); 1029 m_freem_vec(m0); 1030 *m = NULL; 1031 return (err); 1032 } 1033 1034 bus_dmamap_sync(txq->entry_tag, stx->map, BUS_DMASYNC_PREWRITE); 1035 stx->flags |= TX_SW_DESC_MAPPED; 1036 1037 return (0); 1038 } 1039 1040 /** 1041 * make_sgl - populate a scatter/gather list for a packet 1042 * @sgp: the SGL to populate 1043 * @segs: the packet dma segments 1044 * @nsegs: the number of segments 1045 * 1046 * Generates a scatter/gather list for the buffers that make up a packet 1047 * and returns the SGL size in 8-byte words. The caller must size the SGL 1048 * appropriately. 1049 */ 1050 static __inline void 1051 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1052 { 1053 int i, idx; 1054 1055 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) { 1056 if (i && idx == 0) 1057 ++sgp; 1058 1059 sgp->len[idx] = htobe32(segs[i].ds_len); 1060 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1061 } 1062 1063 if (idx) 1064 sgp->len[idx] = 0; 1065 } 1066 1067 /** 1068 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1069 * @adap: the adapter 1070 * @q: the Tx queue 1071 * 1072 * Ring the doorbel if a Tx queue is asleep. There is a natural race, 1073 * where the HW is going to sleep just after we checked, however, 1074 * then the interrupt handler will detect the outstanding TX packet 1075 * and ring the doorbell for us. 1076 * 1077 * When GTS is disabled we unconditionally ring the doorbell. 1078 */ 1079 static __inline void 1080 check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 1081 { 1082 #if USE_GTS 1083 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1084 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1085 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1086 #ifdef T3_TRACE 1087 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1088 q->cntxt_id); 1089 #endif 1090 t3_write_reg(adap, A_SG_KDOORBELL, 1091 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1092 } 1093 #else 1094 wmb(); /* write descriptors before telling HW */ 1095 t3_write_reg(adap, A_SG_KDOORBELL, 1096 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1097 #endif 1098 } 1099 1100 static __inline void 1101 wr_gen2(struct tx_desc *d, unsigned int gen) 1102 { 1103 #if SGE_NUM_GENBITS == 2 1104 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1105 #endif 1106 } 1107 1108 1109 1110 /** 1111 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1112 * @ndesc: number of Tx descriptors spanned by the SGL 1113 * @txd: first Tx descriptor to be written 1114 * @txqs: txq state (generation and producer index) 1115 * @txq: the SGE Tx queue 1116 * @sgl: the SGL 1117 * @flits: number of flits to the start of the SGL in the first descriptor 1118 * @sgl_flits: the SGL size in flits 1119 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1120 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1121 * 1122 * Write a work request header and an associated SGL. If the SGL is 1123 * small enough to fit into one Tx descriptor it has already been written 1124 * and we just need to write the WR header. Otherwise we distribute the 1125 * SGL across the number of descriptors it spans. 1126 */ 1127 1128 static void 1129 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1130 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1131 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1132 { 1133 1134 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1135 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1136 1137 if (__predict_true(ndesc == 1)) { 1138 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1139 V_WR_SGLSFLT(flits)) | wr_hi; 1140 wmb(); 1141 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | 1142 V_WR_GEN(txqs->gen)) | wr_lo; 1143 /* XXX gen? */ 1144 wr_gen2(txd, txqs->gen); 1145 } else { 1146 unsigned int ogen = txqs->gen; 1147 const uint64_t *fp = (const uint64_t *)sgl; 1148 struct work_request_hdr *wp = wrp; 1149 1150 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1151 V_WR_SGLSFLT(flits)) | wr_hi; 1152 1153 while (sgl_flits) { 1154 unsigned int avail = WR_FLITS - flits; 1155 1156 if (avail > sgl_flits) 1157 avail = sgl_flits; 1158 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1159 sgl_flits -= avail; 1160 ndesc--; 1161 if (!sgl_flits) 1162 break; 1163 1164 fp += avail; 1165 txd++; 1166 txsd++; 1167 if (++txqs->pidx == txq->size) { 1168 txqs->pidx = 0; 1169 txqs->gen ^= 1; 1170 txd = txq->desc; 1171 txsd = txq->sdesc; 1172 } 1173 1174 /* 1175 * when the head of the mbuf chain 1176 * is freed all clusters will be freed 1177 * with it 1178 */ 1179 txsd->m = NULL; 1180 wrp = (struct work_request_hdr *)txd; 1181 wrp->wr_hi = htonl(V_WR_DATATYPE(1) | 1182 V_WR_SGLSFLT(1)) | wr_hi; 1183 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, 1184 sgl_flits + 1)) | 1185 V_WR_GEN(txqs->gen)) | wr_lo; 1186 wr_gen2(txd, txqs->gen); 1187 flits = 1; 1188 } 1189 wrp->wr_hi |= htonl(F_WR_EOP); 1190 wmb(); 1191 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1192 wr_gen2((struct tx_desc *)wp, ogen); 1193 } 1194 } 1195 1196 1197 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 1198 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 1199 1200 int 1201 t3_encap(struct port_info *p, struct mbuf **m, int *free) 1202 { 1203 adapter_t *sc; 1204 struct mbuf *m0; 1205 struct sge_qset *qs; 1206 struct sge_txq *txq; 1207 struct tx_sw_desc *stx; 1208 struct txq_state txqs; 1209 unsigned int ndesc, flits, cntrl, mlen; 1210 int err, nsegs, tso_info = 0; 1211 1212 struct work_request_hdr *wrp; 1213 struct tx_sw_desc *txsd; 1214 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1215 bus_dma_segment_t segs[TX_MAX_SEGS]; 1216 uint32_t wr_hi, wr_lo, sgl_flits; 1217 1218 struct tx_desc *txd; 1219 struct cpl_tx_pkt *cpl; 1220 1221 m0 = *m; 1222 sc = p->adapter; 1223 1224 DPRINTF("t3_encap port_id=%d qsidx=%d ", p->port_id, p->first_qset); 1225 1226 /* port_id=1 qsid=1 txpkt_intf=2 tx_chan=0 */ 1227 1228 qs = &sc->sge.qs[p->first_qset]; 1229 1230 txq = &qs->txq[TXQ_ETH]; 1231 stx = &txq->sdesc[txq->pidx]; 1232 txd = &txq->desc[txq->pidx]; 1233 cpl = (struct cpl_tx_pkt *)txd; 1234 mlen = m0->m_pkthdr.len; 1235 cpl->len = htonl(mlen | 0x80000000); 1236 1237 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", mlen, p->txpkt_intf, p->tx_chan); 1238 /* 1239 * XXX handle checksum, TSO, and VLAN here 1240 * 1241 */ 1242 cntrl = V_TXPKT_INTF(p->txpkt_intf); 1243 1244 /* 1245 * XXX need to add VLAN support for 6.x 1246 */ 1247 #ifdef VLAN_SUPPORTED 1248 if (m0->m_flags & M_VLANTAG) 1249 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 1250 if (m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1251 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1252 #endif 1253 if (tso_info) { 1254 int eth_type; 1255 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl; 1256 struct ip *ip; 1257 struct tcphdr *tcp; 1258 char *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */ 1259 1260 txd->flit[2] = 0; 1261 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1262 hdr->cntrl = htonl(cntrl); 1263 1264 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1265 pkthdr = &tmp[0]; 1266 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr); 1267 } else { 1268 pkthdr = mtod(m0, char *); 1269 } 1270 1271 if (__predict_false(m0->m_flags & M_VLANTAG)) { 1272 eth_type = CPL_ETH_II_VLAN; 1273 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1274 ETHER_VLAN_ENCAP_LEN); 1275 } else { 1276 eth_type = CPL_ETH_II; 1277 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1278 } 1279 tcp = (struct tcphdr *)((uint8_t *)ip + 1280 sizeof(*ip)); 1281 1282 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1283 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1284 V_LSO_TCPHDR_WORDS(tcp->th_off); 1285 hdr->lso_info = htonl(tso_info); 1286 flits = 3; 1287 } else { 1288 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1289 cpl->cntrl = htonl(cntrl); 1290 1291 if (mlen <= WR_LEN - sizeof(*cpl)) { 1292 txq_prod(txq, 1, &txqs); 1293 txq->sdesc[txqs.pidx].m = NULL; 1294 1295 if (m0->m_len == m0->m_pkthdr.len) 1296 memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen); 1297 else 1298 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1299 1300 *free = 1; 1301 flits = (mlen + 7) / 8 + 2; 1302 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1303 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1304 F_WR_SOP | F_WR_EOP | txqs.compl); 1305 wmb(); 1306 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | 1307 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1308 1309 wr_gen2(txd, txqs.gen); 1310 check_ring_tx_db(sc, txq); 1311 return (0); 1312 } 1313 flits = 2; 1314 } 1315 1316 wrp = (struct work_request_hdr *)txd; 1317 1318 if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) { 1319 return (err); 1320 } 1321 m0 = *m; 1322 ndesc = calc_tx_descs(m0, nsegs); 1323 1324 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1325 make_sgl(sgp, segs, nsegs); 1326 1327 sgl_flits = sgl_len(nsegs); 1328 1329 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); 1330 txq_prod(txq, ndesc, &txqs); 1331 txsd = &txq->sdesc[txqs.pidx]; 1332 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1333 wr_lo = htonl(V_WR_TID(txq->token)); 1334 txsd->m = m0; 1335 m_set_priority(m0, txqs.pidx); 1336 1337 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); 1338 check_ring_tx_db(p->adapter, txq); 1339 1340 return (0); 1341 } 1342 1343 1344 /** 1345 * write_imm - write a packet into a Tx descriptor as immediate data 1346 * @d: the Tx descriptor to write 1347 * @m: the packet 1348 * @len: the length of packet data to write as immediate data 1349 * @gen: the generation bit value to write 1350 * 1351 * Writes a packet as immediate data into a Tx descriptor. The packet 1352 * contains a work request at its beginning. We must write the packet 1353 * carefully so the SGE doesn't read accidentally before it's written in 1354 * its entirety. 1355 */ 1356 static __inline void 1357 write_imm(struct tx_desc *d, struct mbuf *m, 1358 unsigned int len, unsigned int gen) 1359 { 1360 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1361 struct work_request_hdr *to = (struct work_request_hdr *)d; 1362 1363 memcpy(&to[1], &from[1], len - sizeof(*from)); 1364 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | 1365 V_WR_BCNTLFLT(len & 7)); 1366 wmb(); 1367 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | 1368 V_WR_LEN((len + 7) / 8)); 1369 wr_gen2(d, gen); 1370 m_freem(m); 1371 } 1372 1373 /** 1374 * check_desc_avail - check descriptor availability on a send queue 1375 * @adap: the adapter 1376 * @q: the TX queue 1377 * @m: the packet needing the descriptors 1378 * @ndesc: the number of Tx descriptors needed 1379 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1380 * 1381 * Checks if the requested number of Tx descriptors is available on an 1382 * SGE send queue. If the queue is already suspended or not enough 1383 * descriptors are available the packet is queued for later transmission. 1384 * Must be called with the Tx queue locked. 1385 * 1386 * Returns 0 if enough descriptors are available, 1 if there aren't 1387 * enough descriptors and the packet has been queued, and 2 if the caller 1388 * needs to retry because there weren't enough descriptors at the 1389 * beginning of the call but some freed up in the mean time. 1390 */ 1391 static __inline int 1392 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1393 struct mbuf *m, unsigned int ndesc, 1394 unsigned int qid) 1395 { 1396 /* 1397 * XXX We currently only use this for checking the control queue 1398 * the control queue is only used for binding qsets which happens 1399 * at init time so we are guaranteed enough descriptors 1400 */ 1401 if (__predict_false(!mbufq_empty(&q->sendq))) { 1402 addq_exit: mbufq_tail(&q->sendq, m); 1403 return 1; 1404 } 1405 if (__predict_false(q->size - q->in_use < ndesc)) { 1406 1407 struct sge_qset *qs = txq_to_qset(q, qid); 1408 1409 setbit(&qs->txq_stopped, qid); 1410 smp_mb(); 1411 1412 if (should_restart_tx(q) && 1413 test_and_clear_bit(qid, &qs->txq_stopped)) 1414 return 2; 1415 1416 q->stops++; 1417 goto addq_exit; 1418 } 1419 return 0; 1420 } 1421 1422 1423 /** 1424 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1425 * @q: the SGE control Tx queue 1426 * 1427 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1428 * that send only immediate data (presently just the control queues) and 1429 * thus do not have any mbufs 1430 */ 1431 static __inline void 1432 reclaim_completed_tx_imm(struct sge_txq *q) 1433 { 1434 unsigned int reclaim = q->processed - q->cleaned; 1435 1436 mtx_assert(&q->lock, MA_OWNED); 1437 1438 q->in_use -= reclaim; 1439 q->cleaned += reclaim; 1440 } 1441 1442 static __inline int 1443 immediate(const struct mbuf *m) 1444 { 1445 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1446 } 1447 1448 /** 1449 * ctrl_xmit - send a packet through an SGE control Tx queue 1450 * @adap: the adapter 1451 * @q: the control queue 1452 * @m: the packet 1453 * 1454 * Send a packet through an SGE control Tx queue. Packets sent through 1455 * a control queue must fit entirely as immediate data in a single Tx 1456 * descriptor and have no page fragments. 1457 */ 1458 static int 1459 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1460 { 1461 int ret; 1462 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1463 1464 if (__predict_false(!immediate(m))) { 1465 m_freem(m); 1466 return 0; 1467 } 1468 1469 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); 1470 wrp->wr_lo = htonl(V_WR_TID(q->token)); 1471 1472 mtx_lock(&q->lock); 1473 again: reclaim_completed_tx_imm(q); 1474 1475 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1476 if (__predict_false(ret)) { 1477 if (ret == 1) { 1478 mtx_unlock(&q->lock); 1479 return (-1); 1480 } 1481 goto again; 1482 } 1483 1484 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1485 1486 q->in_use++; 1487 if (++q->pidx >= q->size) { 1488 q->pidx = 0; 1489 q->gen ^= 1; 1490 } 1491 mtx_unlock(&q->lock); 1492 wmb(); 1493 t3_write_reg(adap, A_SG_KDOORBELL, 1494 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1495 return (0); 1496 } 1497 1498 1499 /** 1500 * restart_ctrlq - restart a suspended control queue 1501 * @qs: the queue set cotaining the control queue 1502 * 1503 * Resumes transmission on a suspended Tx control queue. 1504 */ 1505 static void 1506 restart_ctrlq(void *data, int npending) 1507 { 1508 struct mbuf *m; 1509 struct sge_qset *qs = (struct sge_qset *)data; 1510 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1511 adapter_t *adap = qs->port->adapter; 1512 1513 mtx_lock(&q->lock); 1514 again: reclaim_completed_tx_imm(q); 1515 1516 while (q->in_use < q->size && 1517 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1518 1519 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1520 1521 if (++q->pidx >= q->size) { 1522 q->pidx = 0; 1523 q->gen ^= 1; 1524 } 1525 q->in_use++; 1526 } 1527 if (!mbufq_empty(&q->sendq)) { 1528 setbit(&qs->txq_stopped, TXQ_CTRL); 1529 smp_mb(); 1530 1531 if (should_restart_tx(q) && 1532 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1533 goto again; 1534 q->stops++; 1535 } 1536 mtx_unlock(&q->lock); 1537 t3_write_reg(adap, A_SG_KDOORBELL, 1538 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1539 } 1540 1541 1542 /* 1543 * Send a management message through control queue 0 1544 */ 1545 int 1546 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1547 { 1548 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); 1549 } 1550 1551 /** 1552 * free_qset - free the resources of an SGE queue set 1553 * @sc: the controller owning the queue set 1554 * @q: the queue set 1555 * 1556 * Release the HW and SW resources associated with an SGE queue set, such 1557 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1558 * queue set must be quiesced prior to calling this. 1559 */ 1560 static void 1561 t3_free_qset(adapter_t *sc, struct sge_qset *q) 1562 { 1563 int i; 1564 1565 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1566 if (q->fl[i].desc) { 1567 mtx_lock(&sc->sge.reg_lock); 1568 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 1569 mtx_unlock(&sc->sge.reg_lock); 1570 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 1571 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 1572 q->fl[i].desc_map); 1573 bus_dma_tag_destroy(q->fl[i].desc_tag); 1574 bus_dma_tag_destroy(q->fl[i].entry_tag); 1575 } 1576 if (q->fl[i].sdesc) { 1577 free_rx_bufs(sc, &q->fl[i]); 1578 free(q->fl[i].sdesc, M_DEVBUF); 1579 } 1580 } 1581 1582 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 1583 if (q->txq[i].desc) { 1584 mtx_lock(&sc->sge.reg_lock); 1585 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 1586 mtx_unlock(&sc->sge.reg_lock); 1587 bus_dmamap_unload(q->txq[i].desc_tag, 1588 q->txq[i].desc_map); 1589 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 1590 q->txq[i].desc_map); 1591 bus_dma_tag_destroy(q->txq[i].desc_tag); 1592 bus_dma_tag_destroy(q->txq[i].entry_tag); 1593 MTX_DESTROY(&q->txq[i].lock); 1594 } 1595 if (q->txq[i].sdesc) { 1596 free(q->txq[i].sdesc, M_DEVBUF); 1597 } 1598 } 1599 1600 if (q->rspq.desc) { 1601 mtx_lock(&sc->sge.reg_lock); 1602 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 1603 mtx_unlock(&sc->sge.reg_lock); 1604 1605 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 1606 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 1607 q->rspq.desc_map); 1608 bus_dma_tag_destroy(q->rspq.desc_tag); 1609 MTX_DESTROY(&q->rspq.lock); 1610 } 1611 1612 bzero(q, sizeof(*q)); 1613 } 1614 1615 /** 1616 * t3_free_sge_resources - free SGE resources 1617 * @sc: the adapter softc 1618 * 1619 * Frees resources used by the SGE queue sets. 1620 */ 1621 void 1622 t3_free_sge_resources(adapter_t *sc) 1623 { 1624 int i, nqsets; 1625 1626 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1627 nqsets += sc->port[i].nqsets; 1628 1629 for (i = 0; i < nqsets; ++i) 1630 t3_free_qset(sc, &sc->sge.qs[i]); 1631 } 1632 1633 /** 1634 * t3_sge_start - enable SGE 1635 * @sc: the controller softc 1636 * 1637 * Enables the SGE for DMAs. This is the last step in starting packet 1638 * transfers. 1639 */ 1640 void 1641 t3_sge_start(adapter_t *sc) 1642 { 1643 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 1644 } 1645 1646 /** 1647 * t3_sge_stop - disable SGE operation 1648 * @sc: the adapter 1649 * 1650 * Disables the DMA engine. This can be called in emeregencies (e.g., 1651 * from error interrupts) or from normal process context. In the latter 1652 * case it also disables any pending queue restart tasklets. Note that 1653 * if it is called in interrupt context it cannot disable the restart 1654 * tasklets as it cannot wait, however the tasklets will have no effect 1655 * since the doorbells are disabled and the driver will call this again 1656 * later from process context, at which time the tasklets will be stopped 1657 * if they are still running. 1658 */ 1659 void 1660 t3_sge_stop(adapter_t *sc) 1661 { 1662 int i, nqsets; 1663 1664 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 1665 1666 if (sc->tq == NULL) 1667 return; 1668 1669 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1670 nqsets += sc->port[i].nqsets; 1671 1672 for (i = 0; i < nqsets; ++i) { 1673 struct sge_qset *qs = &sc->sge.qs[i]; 1674 1675 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 1676 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 1677 } 1678 } 1679 1680 1681 /** 1682 * free_tx_desc - reclaims Tx descriptors and their buffers 1683 * @adapter: the adapter 1684 * @q: the Tx queue to reclaim descriptors from 1685 * @n: the number of descriptors to reclaim 1686 * 1687 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 1688 * Tx buffers. Called with the Tx queue lock held. 1689 */ 1690 int 1691 free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec) 1692 { 1693 struct tx_sw_desc *d; 1694 unsigned int cidx = q->cidx; 1695 int nbufs = 0; 1696 1697 #ifdef T3_TRACE 1698 T3_TRACE2(sc->tb[q->cntxt_id & 7], 1699 "reclaiming %u Tx descriptors at cidx %u", n, cidx); 1700 #endif 1701 d = &q->sdesc[cidx]; 1702 1703 while (n-- > 0) { 1704 DPRINTF("cidx=%d d=%p\n", cidx, d); 1705 if (d->m) { 1706 if (d->flags & TX_SW_DESC_MAPPED) { 1707 bus_dmamap_unload(q->entry_tag, d->map); 1708 bus_dmamap_destroy(q->entry_tag, d->map); 1709 d->flags &= ~TX_SW_DESC_MAPPED; 1710 } 1711 if (m_get_priority(d->m) == cidx) { 1712 m_vec[nbufs] = d->m; 1713 d->m = NULL; 1714 nbufs++; 1715 } else { 1716 printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx); 1717 } 1718 } 1719 ++d; 1720 if (++cidx == q->size) { 1721 cidx = 0; 1722 d = q->sdesc; 1723 } 1724 } 1725 q->cidx = cidx; 1726 1727 return (nbufs); 1728 } 1729 1730 /** 1731 * is_new_response - check if a response is newly written 1732 * @r: the response descriptor 1733 * @q: the response queue 1734 * 1735 * Returns true if a response descriptor contains a yet unprocessed 1736 * response. 1737 */ 1738 static __inline int 1739 is_new_response(const struct rsp_desc *r, 1740 const struct sge_rspq *q) 1741 { 1742 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 1743 } 1744 1745 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 1746 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 1747 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 1748 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 1749 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 1750 1751 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 1752 #define NOMEM_INTR_DELAY 2500 1753 1754 /** 1755 * write_ofld_wr - write an offload work request 1756 * @adap: the adapter 1757 * @m: the packet to send 1758 * @q: the Tx queue 1759 * @pidx: index of the first Tx descriptor to write 1760 * @gen: the generation value to use 1761 * @ndesc: number of descriptors the packet will occupy 1762 * 1763 * Write an offload work request to send the supplied packet. The packet 1764 * data already carry the work request with most fields populated. 1765 */ 1766 static void 1767 write_ofld_wr(adapter_t *adap, struct mbuf *m, 1768 struct sge_txq *q, unsigned int pidx, 1769 unsigned int gen, unsigned int ndesc, 1770 bus_dma_segment_t *segs, unsigned int nsegs) 1771 { 1772 unsigned int sgl_flits, flits; 1773 struct work_request_hdr *from; 1774 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1775 struct tx_desc *d = &q->desc[pidx]; 1776 struct txq_state txqs; 1777 1778 if (immediate(m)) { 1779 q->sdesc[pidx].m = NULL; 1780 write_imm(d, m, m->m_len, gen); 1781 return; 1782 } 1783 1784 /* Only TX_DATA builds SGLs */ 1785 1786 from = mtod(m, struct work_request_hdr *); 1787 memcpy(&d->flit[1], &from[1], 1788 (uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *) - sizeof(*from)); 1789 1790 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; 1791 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 1792 1793 make_sgl(sgp, segs, nsegs); 1794 sgl_flits = sgl_len(nsegs); 1795 1796 txqs.gen = q->gen; 1797 txqs.pidx = q->pidx; 1798 txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3); 1799 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 1800 from->wr_hi, from->wr_lo); 1801 } 1802 1803 /** 1804 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 1805 * @m: the packet 1806 * 1807 * Returns the number of Tx descriptors needed for the given offload 1808 * packet. These packets are already fully constructed. 1809 */ 1810 static __inline unsigned int 1811 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 1812 { 1813 unsigned int flits, cnt = 0; 1814 1815 1816 if (m->m_len <= WR_LEN) 1817 return 1; /* packet fits as immediate data */ 1818 1819 if (m->m_flags & M_IOVEC) 1820 cnt = mtomv(m)->mv_count; 1821 1822 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; /* headers */ 1823 1824 return flits_to_desc(flits + sgl_len(cnt)); 1825 } 1826 1827 /** 1828 * ofld_xmit - send a packet through an offload queue 1829 * @adap: the adapter 1830 * @q: the Tx offload queue 1831 * @m: the packet 1832 * 1833 * Send an offload packet through an SGE offload queue. 1834 */ 1835 static int 1836 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1837 { 1838 int ret, nsegs; 1839 unsigned int ndesc; 1840 unsigned int pidx, gen; 1841 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 1842 bus_dma_segment_t segs[TX_MAX_SEGS]; 1843 int i, cleaned; 1844 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 1845 1846 mtx_lock(&q->lock); 1847 if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) { 1848 mtx_unlock(&q->lock); 1849 return (ret); 1850 } 1851 ndesc = calc_tx_descs_ofld(m, nsegs); 1852 again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec); 1853 1854 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 1855 if (__predict_false(ret)) { 1856 if (ret == 1) { 1857 m_set_priority(m, ndesc); /* save for restart */ 1858 mtx_unlock(&q->lock); 1859 return EINTR; 1860 } 1861 goto again; 1862 } 1863 1864 gen = q->gen; 1865 q->in_use += ndesc; 1866 pidx = q->pidx; 1867 q->pidx += ndesc; 1868 if (q->pidx >= q->size) { 1869 q->pidx -= q->size; 1870 q->gen ^= 1; 1871 } 1872 #ifdef T3_TRACE 1873 T3_TRACE5(adap->tb[q->cntxt_id & 7], 1874 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 1875 ndesc, pidx, skb->len, skb->len - skb->data_len, 1876 skb_shinfo(skb)->nr_frags); 1877 #endif 1878 mtx_unlock(&q->lock); 1879 1880 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 1881 check_ring_tx_db(adap, q); 1882 1883 for (i = 0; i < cleaned; i++) { 1884 m_freem_vec(m_vec[i]); 1885 } 1886 return (0); 1887 } 1888 1889 /** 1890 * restart_offloadq - restart a suspended offload queue 1891 * @qs: the queue set cotaining the offload queue 1892 * 1893 * Resumes transmission on a suspended Tx offload queue. 1894 */ 1895 static void 1896 restart_offloadq(void *data, int npending) 1897 { 1898 1899 struct mbuf *m; 1900 struct sge_qset *qs = data; 1901 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 1902 adapter_t *adap = qs->port->adapter; 1903 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 1904 bus_dma_segment_t segs[TX_MAX_SEGS]; 1905 int nsegs, i, cleaned; 1906 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 1907 1908 mtx_lock(&q->lock); 1909 again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec); 1910 1911 while ((m = mbufq_peek(&q->sendq)) != NULL) { 1912 unsigned int gen, pidx; 1913 unsigned int ndesc = m_get_priority(m); 1914 1915 if (__predict_false(q->size - q->in_use < ndesc)) { 1916 setbit(&qs->txq_stopped, TXQ_OFLD); 1917 smp_mb(); 1918 1919 if (should_restart_tx(q) && 1920 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 1921 goto again; 1922 q->stops++; 1923 break; 1924 } 1925 1926 gen = q->gen; 1927 q->in_use += ndesc; 1928 pidx = q->pidx; 1929 q->pidx += ndesc; 1930 if (q->pidx >= q->size) { 1931 q->pidx -= q->size; 1932 q->gen ^= 1; 1933 } 1934 1935 (void)mbufq_dequeue(&q->sendq); 1936 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 1937 mtx_unlock(&q->lock); 1938 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 1939 mtx_lock(&q->lock); 1940 } 1941 mtx_unlock(&q->lock); 1942 1943 #if USE_GTS 1944 set_bit(TXQ_RUNNING, &q->flags); 1945 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1946 #endif 1947 t3_write_reg(adap, A_SG_KDOORBELL, 1948 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1949 1950 for (i = 0; i < cleaned; i++) { 1951 m_freem_vec(m_vec[i]); 1952 } 1953 } 1954 1955 /** 1956 * queue_set - return the queue set a packet should use 1957 * @m: the packet 1958 * 1959 * Maps a packet to the SGE queue set it should use. The desired queue 1960 * set is carried in bits 1-3 in the packet's priority. 1961 */ 1962 static __inline int 1963 queue_set(const struct mbuf *m) 1964 { 1965 return m_get_priority(m) >> 1; 1966 } 1967 1968 /** 1969 * is_ctrl_pkt - return whether an offload packet is a control packet 1970 * @m: the packet 1971 * 1972 * Determines whether an offload packet should use an OFLD or a CTRL 1973 * Tx queue. This is indicated by bit 0 in the packet's priority. 1974 */ 1975 static __inline int 1976 is_ctrl_pkt(const struct mbuf *m) 1977 { 1978 return m_get_priority(m) & 1; 1979 } 1980 1981 /** 1982 * t3_offload_tx - send an offload packet 1983 * @tdev: the offload device to send to 1984 * @m: the packet 1985 * 1986 * Sends an offload packet. We use the packet priority to select the 1987 * appropriate Tx queue as follows: bit 0 indicates whether the packet 1988 * should be sent as regular or control, bits 1-3 select the queue set. 1989 */ 1990 int 1991 t3_offload_tx(struct toedev *tdev, struct mbuf *m) 1992 { 1993 adapter_t *adap = tdev2adap(tdev); 1994 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 1995 1996 if (__predict_false(is_ctrl_pkt(m))) 1997 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m); 1998 1999 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m); 2000 } 2001 2002 /** 2003 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 2004 * @tdev: the offload device that will be receiving the packets 2005 * @q: the SGE response queue that assembled the bundle 2006 * @m: the partial bundle 2007 * @n: the number of packets in the bundle 2008 * 2009 * Delivers a (partial) bundle of Rx offload packets to an offload device. 2010 */ 2011 static __inline void 2012 deliver_partial_bundle(struct toedev *tdev, 2013 struct sge_rspq *q, 2014 struct mbuf *mbufs[], int n) 2015 { 2016 if (n) { 2017 q->offload_bundles++; 2018 cxgb_ofld_recv(tdev, mbufs, n); 2019 } 2020 } 2021 2022 static __inline int 2023 rx_offload(struct toedev *tdev, struct sge_rspq *rq, 2024 struct mbuf *m, struct mbuf *rx_gather[], 2025 unsigned int gather_idx) 2026 { 2027 rq->offload_pkts++; 2028 m->m_pkthdr.header = mtod(m, void *); 2029 2030 rx_gather[gather_idx++] = m; 2031 if (gather_idx == RX_BUNDLE_SIZE) { 2032 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 2033 gather_idx = 0; 2034 rq->offload_bundles++; 2035 } 2036 return (gather_idx); 2037 } 2038 2039 static void 2040 restart_tx(struct sge_qset *qs) 2041 { 2042 struct adapter *sc = qs->port->adapter; 2043 2044 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2045 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2046 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2047 qs->txq[TXQ_OFLD].restarts++; 2048 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2049 } 2050 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2051 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2052 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2053 qs->txq[TXQ_CTRL].restarts++; 2054 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2055 } 2056 } 2057 2058 /** 2059 * t3_sge_alloc_qset - initialize an SGE queue set 2060 * @sc: the controller softc 2061 * @id: the queue set id 2062 * @nports: how many Ethernet ports will be using this queue set 2063 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2064 * @p: configuration parameters for this queue set 2065 * @ntxq: number of Tx queues for the queue set 2066 * @pi: port info for queue set 2067 * 2068 * Allocate resources and initialize an SGE queue set. A queue set 2069 * comprises a response queue, two Rx free-buffer queues, and up to 3 2070 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2071 * queue, offload queue, and control queue. 2072 */ 2073 int 2074 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2075 const struct qset_params *p, int ntxq, struct port_info *pi) 2076 { 2077 struct sge_qset *q = &sc->sge.qs[id]; 2078 int i, ret = 0; 2079 2080 init_qset_cntxt(q, id); 2081 2082 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2083 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2084 &q->fl[0].desc, &q->fl[0].sdesc, 2085 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2086 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2087 printf("error %d from alloc ring fl0\n", ret); 2088 goto err; 2089 } 2090 2091 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2092 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2093 &q->fl[1].desc, &q->fl[1].sdesc, 2094 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2095 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2096 printf("error %d from alloc ring fl1\n", ret); 2097 goto err; 2098 } 2099 2100 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2101 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2102 &q->rspq.desc_tag, &q->rspq.desc_map, 2103 NULL, NULL)) != 0) { 2104 printf("error %d from alloc ring rspq\n", ret); 2105 goto err; 2106 } 2107 2108 for (i = 0; i < ntxq; ++i) { 2109 /* 2110 * The control queue always uses immediate data so does not 2111 * need to keep track of any mbufs. 2112 * XXX Placeholder for future TOE support. 2113 */ 2114 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2115 2116 if ((ret = alloc_ring(sc, p->txq_size[i], 2117 sizeof(struct tx_desc), sz, 2118 &q->txq[i].phys_addr, &q->txq[i].desc, 2119 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2120 &q->txq[i].desc_map, 2121 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2122 printf("error %d from alloc ring tx %i\n", ret, i); 2123 goto err; 2124 } 2125 mbufq_init(&q->txq[i].sendq); 2126 q->txq[i].gen = 1; 2127 q->txq[i].size = p->txq_size[i]; 2128 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d", 2129 device_get_unit(sc->dev), irq_vec_idx, i); 2130 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF); 2131 } 2132 2133 q->txq[TXQ_ETH].port = pi; 2134 2135 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2136 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2137 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]); 2138 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]); 2139 2140 q->fl[0].gen = q->fl[1].gen = 1; 2141 q->fl[0].size = p->fl_size; 2142 q->fl[1].size = p->jumbo_size; 2143 2144 q->rspq.gen = 1; 2145 q->rspq.cidx = 0; 2146 q->rspq.size = p->rspq_size; 2147 2148 q->txq[TXQ_ETH].stop_thres = nports * 2149 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2150 2151 q->fl[0].buf_size = MCLBYTES; 2152 q->fl[0].zone = zone_clust; 2153 q->fl[0].type = EXT_CLUSTER; 2154 q->fl[1].buf_size = MJUMPAGESIZE; 2155 q->fl[1].zone = zone_jumbop; 2156 q->fl[1].type = EXT_JUMBOP; 2157 2158 q->lro.enabled = lro_default; 2159 2160 mtx_lock(&sc->sge.reg_lock); 2161 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2162 q->rspq.phys_addr, q->rspq.size, 2163 q->fl[0].buf_size, 1, 0); 2164 if (ret) { 2165 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2166 goto err_unlock; 2167 } 2168 2169 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2170 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2171 q->fl[i].phys_addr, q->fl[i].size, 2172 q->fl[i].buf_size, p->cong_thres, 1, 2173 0); 2174 if (ret) { 2175 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2176 goto err_unlock; 2177 } 2178 } 2179 2180 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2181 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2182 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2183 1, 0); 2184 if (ret) { 2185 printf("error %d from t3_sge_init_ecntxt\n", ret); 2186 goto err_unlock; 2187 } 2188 2189 if (ntxq > 1) { 2190 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2191 USE_GTS, SGE_CNTXT_OFLD, id, 2192 q->txq[TXQ_OFLD].phys_addr, 2193 q->txq[TXQ_OFLD].size, 0, 1, 0); 2194 if (ret) { 2195 printf("error %d from t3_sge_init_ecntxt\n", ret); 2196 goto err_unlock; 2197 } 2198 } 2199 2200 if (ntxq > 2) { 2201 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2202 SGE_CNTXT_CTRL, id, 2203 q->txq[TXQ_CTRL].phys_addr, 2204 q->txq[TXQ_CTRL].size, 2205 q->txq[TXQ_CTRL].token, 1, 0); 2206 if (ret) { 2207 printf("error %d from t3_sge_init_ecntxt\n", ret); 2208 goto err_unlock; 2209 } 2210 } 2211 2212 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2213 device_get_unit(sc->dev), irq_vec_idx); 2214 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2215 2216 mtx_unlock(&sc->sge.reg_lock); 2217 t3_update_qset_coalesce(q, p); 2218 q->port = pi; 2219 2220 refill_fl(sc, &q->fl[0], q->fl[0].size); 2221 refill_fl(sc, &q->fl[1], q->fl[1].size); 2222 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2223 2224 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2225 V_NEWTIMER(q->rspq.holdoff_tmr)); 2226 2227 return (0); 2228 2229 err_unlock: 2230 mtx_unlock(&sc->sge.reg_lock); 2231 err: 2232 t3_free_qset(sc, q); 2233 2234 return (ret); 2235 } 2236 2237 void 2238 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2239 { 2240 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2241 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2242 struct ifnet *ifp = pi->ifp; 2243 2244 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2245 2246 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2247 cpl->csum_valid && cpl->csum == 0xffff) { 2248 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2249 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2250 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2251 m->m_pkthdr.csum_data = 0xffff; 2252 } 2253 /* 2254 * XXX need to add VLAN support for 6.x 2255 */ 2256 #ifdef VLAN_SUPPORTED 2257 if (__predict_false(cpl->vlan_valid)) { 2258 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2259 m->m_flags |= M_VLANTAG; 2260 } 2261 #endif 2262 2263 m->m_pkthdr.rcvif = ifp; 2264 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2265 m_explode(m); 2266 /* 2267 * adjust after conversion to mbuf chain 2268 */ 2269 m_adj(m, sizeof(*cpl) + ethpad); 2270 2271 (*ifp->if_input)(ifp, m); 2272 } 2273 2274 /** 2275 * get_packet - return the next ingress packet buffer from a free list 2276 * @adap: the adapter that received the packet 2277 * @drop_thres: # of remaining buffers before we start dropping packets 2278 * @qs: the qset that the SGE free list holding the packet belongs to 2279 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2280 * @r: response descriptor 2281 * 2282 * Get the next packet from a free list and complete setup of the 2283 * sk_buff. If the packet is small we make a copy and recycle the 2284 * original buffer, otherwise we use the original buffer itself. If a 2285 * positive drop threshold is supplied packets are dropped and their 2286 * buffers recycled if (a) the number of remaining buffers is under the 2287 * threshold and the packet is too big to copy, or (b) the packet should 2288 * be copied but there is no memory for the copy. 2289 */ 2290 #ifdef DISABLE_MBUF_IOVEC 2291 2292 static int 2293 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2294 struct t3_mbuf_hdr *mh, struct rsp_desc *r, struct mbuf *m) 2295 { 2296 2297 unsigned int len_cq = ntohl(r->len_cq); 2298 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2299 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2300 uint32_t len = G_RSPD_LEN(len_cq); 2301 uint32_t flags = ntohl(r->flags); 2302 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2303 int ret = 0; 2304 2305 prefetch(sd->cl); 2306 2307 fl->credits--; 2308 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2309 bus_dmamap_unload(fl->entry_tag, sd->map); 2310 2311 m_cljset(m, sd->cl, fl->type); 2312 m->m_len = len; 2313 2314 switch(sopeop) { 2315 case RSPQ_SOP_EOP: 2316 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2317 mh->mh_head = mh->mh_tail = m; 2318 m->m_pkthdr.len = len; 2319 m->m_flags |= M_PKTHDR; 2320 ret = 1; 2321 break; 2322 case RSPQ_NSOP_NEOP: 2323 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2324 m->m_flags &= ~M_PKTHDR; 2325 if (mh->mh_tail == NULL) { 2326 if (cxgb_debug) 2327 printf("discarding intermediate descriptor entry\n"); 2328 m_freem(m); 2329 break; 2330 } 2331 mh->mh_tail->m_next = m; 2332 mh->mh_tail = m; 2333 mh->mh_head->m_pkthdr.len += len; 2334 ret = 0; 2335 break; 2336 case RSPQ_SOP: 2337 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2338 m->m_pkthdr.len = len; 2339 mh->mh_head = mh->mh_tail = m; 2340 m->m_flags |= M_PKTHDR; 2341 ret = 0; 2342 break; 2343 case RSPQ_EOP: 2344 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2345 m->m_flags &= ~M_PKTHDR; 2346 mh->mh_head->m_pkthdr.len += len; 2347 mh->mh_tail->m_next = m; 2348 mh->mh_tail = m; 2349 ret = 1; 2350 break; 2351 } 2352 if (++fl->cidx == fl->size) 2353 fl->cidx = 0; 2354 2355 return (ret); 2356 } 2357 2358 #else 2359 static int 2360 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2361 struct mbuf *m, struct rsp_desc *r) 2362 { 2363 2364 unsigned int len_cq = ntohl(r->len_cq); 2365 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2366 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2367 uint32_t len = G_RSPD_LEN(len_cq); 2368 uint32_t flags = ntohl(r->flags); 2369 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2370 void *cl; 2371 int ret = 0; 2372 2373 prefetch(sd->cl); 2374 2375 fl->credits--; 2376 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2377 2378 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { 2379 cl = mtod(m, void *); 2380 memcpy(cl, sd->cl, len); 2381 recycle_rx_buf(adap, fl, fl->cidx); 2382 } else { 2383 cl = sd->cl; 2384 bus_dmamap_unload(fl->entry_tag, sd->map); 2385 } 2386 switch(sopeop) { 2387 case RSPQ_SOP_EOP: 2388 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2389 if (cl == sd->cl) 2390 m_cljset(m, cl, fl->type); 2391 m->m_len = m->m_pkthdr.len = len; 2392 ret = 1; 2393 goto done; 2394 break; 2395 case RSPQ_NSOP_NEOP: 2396 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2397 ret = 0; 2398 break; 2399 case RSPQ_SOP: 2400 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2401 m_iovinit(m); 2402 ret = 0; 2403 break; 2404 case RSPQ_EOP: 2405 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2406 ret = 1; 2407 break; 2408 } 2409 m_iovappend(m, cl, fl->buf_size, len, 0); 2410 2411 done: 2412 if (++fl->cidx == fl->size) 2413 fl->cidx = 0; 2414 2415 return (ret); 2416 } 2417 #endif 2418 /** 2419 * handle_rsp_cntrl_info - handles control information in a response 2420 * @qs: the queue set corresponding to the response 2421 * @flags: the response control flags 2422 * 2423 * Handles the control information of an SGE response, such as GTS 2424 * indications and completion credits for the queue set's Tx queues. 2425 * HW coalesces credits, we don't do any extra SW coalescing. 2426 */ 2427 static __inline void 2428 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2429 { 2430 unsigned int credits; 2431 2432 #if USE_GTS 2433 if (flags & F_RSPD_TXQ0_GTS) 2434 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2435 #endif 2436 credits = G_RSPD_TXQ0_CR(flags); 2437 if (credits) { 2438 qs->txq[TXQ_ETH].processed += credits; 2439 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC) 2440 taskqueue_enqueue(qs->port->adapter->tq, 2441 &qs->port->timer_reclaim_task); 2442 } 2443 2444 credits = G_RSPD_TXQ2_CR(flags); 2445 if (credits) 2446 qs->txq[TXQ_CTRL].processed += credits; 2447 2448 # if USE_GTS 2449 if (flags & F_RSPD_TXQ1_GTS) 2450 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2451 # endif 2452 credits = G_RSPD_TXQ1_CR(flags); 2453 if (credits) 2454 qs->txq[TXQ_OFLD].processed += credits; 2455 } 2456 2457 static void 2458 check_ring_db(adapter_t *adap, struct sge_qset *qs, 2459 unsigned int sleeping) 2460 { 2461 ; 2462 } 2463 2464 /** 2465 * process_responses - process responses from an SGE response queue 2466 * @adap: the adapter 2467 * @qs: the queue set to which the response queue belongs 2468 * @budget: how many responses can be processed in this round 2469 * 2470 * Process responses from an SGE response queue up to the supplied budget. 2471 * Responses include received packets as well as credits and other events 2472 * for the queues that belong to the response queue's queue set. 2473 * A negative budget is effectively unlimited. 2474 * 2475 * Additionally choose the interrupt holdoff time for the next interrupt 2476 * on this queue. If the system is under memory shortage use a fairly 2477 * long delay to help recovery. 2478 */ 2479 static int 2480 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2481 { 2482 struct sge_rspq *rspq = &qs->rspq; 2483 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2484 int budget_left = budget; 2485 unsigned int sleeping = 0; 2486 int lro = qs->lro.enabled; 2487 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2488 int ngathered = 0; 2489 #ifdef DEBUG 2490 static int last_holdoff = 0; 2491 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2492 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2493 last_holdoff = rspq->holdoff_tmr; 2494 } 2495 #endif 2496 rspq->next_holdoff = rspq->holdoff_tmr; 2497 2498 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2499 int eth, eop = 0, ethpad = 0; 2500 uint32_t flags = ntohl(r->flags); 2501 uint32_t rss_csum = *(const uint32_t *)r; 2502 uint32_t rss_hash = r->rss_hdr.rss_hash_val; 2503 2504 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2505 2506 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2507 /* XXX */ 2508 printf("async notification\n"); 2509 2510 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2511 #ifdef DISABLE_MBUF_IOVEC 2512 2513 if (cxgb_debug) 2514 printf("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", r->rss_hdr.opcode, rspq->cidx); 2515 2516 if(get_imm_packet(adap, r, &rspq->rspq_mh) == 0) { 2517 rspq->next_holdoff = NOMEM_INTR_DELAY; 2518 budget_left--; 2519 break; 2520 } else { 2521 eop = 1; 2522 } 2523 #else 2524 struct mbuf *m = NULL; 2525 2526 if (rspq->rspq_mbuf == NULL) 2527 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA); 2528 else 2529 m = m_gethdr(M_DONTWAIT, MT_DATA); 2530 2531 /* 2532 * XXX revisit me 2533 */ 2534 if (rspq->rspq_mbuf == NULL && m == NULL) { 2535 rspq->next_holdoff = NOMEM_INTR_DELAY; 2536 budget_left--; 2537 break; 2538 } 2539 if (get_imm_packet(adap, r, rspq->rspq_mbuf, m, flags)) 2540 goto skip; 2541 eop = 1; 2542 #endif 2543 rspq->imm_data++; 2544 } else if (r->len_cq) { 2545 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 2546 2547 #ifdef DISABLE_MBUF_IOVEC 2548 struct mbuf *m; 2549 m = m_gethdr(M_NOWAIT, MT_DATA); 2550 2551 if (m == NULL) { 2552 log(LOG_WARNING, "failed to get mbuf for packet\n"); 2553 break; 2554 } 2555 2556 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r, m); 2557 #else 2558 if (rspq->rspq_mbuf == NULL) 2559 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA); 2560 if (rspq->rspq_mbuf == NULL) { 2561 log(LOG_WARNING, "failed to get mbuf for packet\n"); 2562 break; 2563 } 2564 eop = get_packet(adap, drop_thresh, qs, rspq->rspq_mbuf, r); 2565 #endif 2566 ethpad = 2; 2567 } else { 2568 DPRINTF("pure response\n"); 2569 rspq->pure_rsps++; 2570 } 2571 2572 if (flags & RSPD_CTRL_MASK) { 2573 sleeping |= flags & RSPD_GTS_MASK; 2574 handle_rsp_cntrl_info(qs, flags); 2575 } 2576 #ifndef DISABLE_MBUF_IOVEC 2577 skip: 2578 #endif 2579 r++; 2580 if (__predict_false(++rspq->cidx == rspq->size)) { 2581 rspq->cidx = 0; 2582 rspq->gen ^= 1; 2583 r = rspq->desc; 2584 } 2585 2586 prefetch(r); 2587 if (++rspq->credits >= (rspq->size / 4)) { 2588 refill_rspq(adap, rspq, rspq->credits); 2589 rspq->credits = 0; 2590 } 2591 2592 if (eop) { 2593 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *)); 2594 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES); 2595 2596 if (eth) { 2597 t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad, 2598 rss_hash, rss_csum, lro); 2599 2600 rspq->rspq_mh.mh_head = NULL; 2601 } else { 2602 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum; 2603 /* 2604 * XXX size mismatch 2605 */ 2606 m_set_priority(rspq->rspq_mh.mh_head, rss_hash); 2607 2608 ngathered = rx_offload(&adap->tdev, rspq, 2609 rspq->rspq_mh.mh_head, offload_mbufs, ngathered); 2610 } 2611 __refill_fl(adap, &qs->fl[0]); 2612 __refill_fl(adap, &qs->fl[1]); 2613 2614 } 2615 --budget_left; 2616 } 2617 2618 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 2619 t3_lro_flush(adap, qs, &qs->lro); 2620 2621 if (sleeping) 2622 check_ring_db(adap, qs, sleeping); 2623 2624 smp_mb(); /* commit Tx queue processed updates */ 2625 if (__predict_false(qs->txq_stopped != 0)) 2626 restart_tx(qs); 2627 2628 budget -= budget_left; 2629 return (budget); 2630 } 2631 2632 /* 2633 * A helper function that processes responses and issues GTS. 2634 */ 2635 static __inline int 2636 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 2637 { 2638 int work; 2639 static int last_holdoff = 0; 2640 2641 work = process_responses(adap, rspq_to_qset(rq), -1); 2642 2643 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 2644 printf("next_holdoff=%d\n", rq->next_holdoff); 2645 last_holdoff = rq->next_holdoff; 2646 } 2647 if (work) 2648 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 2649 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 2650 return work; 2651 } 2652 2653 2654 /* 2655 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 2656 * Handles data events from SGE response queues as well as error and other 2657 * async events as they all use the same interrupt pin. We use one SGE 2658 * response queue per port in this mode and protect all response queues with 2659 * queue 0's lock. 2660 */ 2661 void 2662 t3b_intr(void *data) 2663 { 2664 uint32_t i, map; 2665 adapter_t *adap = data; 2666 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2667 2668 t3_write_reg(adap, A_PL_CLI, 0); 2669 map = t3_read_reg(adap, A_SG_DATA_INTR); 2670 2671 if (!map) 2672 return; 2673 2674 if (__predict_false(map & F_ERRINTR)) 2675 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2676 2677 mtx_lock(&q0->lock); 2678 for_each_port(adap, i) 2679 if (map & (1 << i)) 2680 process_responses_gts(adap, &adap->sge.qs[i].rspq); 2681 mtx_unlock(&q0->lock); 2682 } 2683 2684 /* 2685 * The MSI interrupt handler. This needs to handle data events from SGE 2686 * response queues as well as error and other async events as they all use 2687 * the same MSI vector. We use one SGE response queue per port in this mode 2688 * and protect all response queues with queue 0's lock. 2689 */ 2690 void 2691 t3_intr_msi(void *data) 2692 { 2693 adapter_t *adap = data; 2694 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2695 int i, new_packets = 0; 2696 2697 mtx_lock(&q0->lock); 2698 2699 for_each_port(adap, i) 2700 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 2701 new_packets = 1; 2702 mtx_unlock(&q0->lock); 2703 if (new_packets == 0) 2704 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2705 } 2706 2707 void 2708 t3_intr_msix(void *data) 2709 { 2710 struct sge_qset *qs = data; 2711 adapter_t *adap = qs->port->adapter; 2712 struct sge_rspq *rspq = &qs->rspq; 2713 2714 mtx_lock(&rspq->lock); 2715 if (process_responses_gts(adap, rspq) == 0) 2716 rspq->unhandled_irqs++; 2717 mtx_unlock(&rspq->lock); 2718 } 2719 2720 /* 2721 * broken by recent mbuf changes 2722 */ 2723 static int 2724 t3_lro_enable(SYSCTL_HANDLER_ARGS) 2725 { 2726 adapter_t *sc; 2727 int i, j, enabled, err, nqsets = 0; 2728 2729 #ifndef LRO_WORKING 2730 return (0); 2731 #endif 2732 2733 sc = arg1; 2734 enabled = sc->sge.qs[0].lro.enabled; 2735 err = sysctl_handle_int(oidp, &enabled, arg2, req); 2736 2737 if (err != 0) 2738 return (err); 2739 if (enabled == sc->sge.qs[0].lro.enabled) 2740 return (0); 2741 2742 for (i = 0; i < sc->params.nports; i++) 2743 for (j = 0; j < sc->port[i].nqsets; j++) 2744 nqsets++; 2745 2746 for (i = 0; i < nqsets; i++) 2747 sc->sge.qs[i].lro.enabled = enabled; 2748 2749 return (0); 2750 } 2751 2752 static int 2753 t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS) 2754 { 2755 adapter_t *sc = arg1; 2756 struct qset_params *qsp = &sc->params.sge.qset[0]; 2757 int coalesce_nsecs; 2758 struct sge_qset *qs; 2759 int i, j, err, nqsets = 0; 2760 struct mtx *lock; 2761 2762 coalesce_nsecs = qsp->coalesce_nsecs; 2763 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req); 2764 2765 if (err != 0) { 2766 return (err); 2767 } 2768 if (coalesce_nsecs == qsp->coalesce_nsecs) 2769 return (0); 2770 2771 for (i = 0; i < sc->params.nports; i++) 2772 for (j = 0; j < sc->port[i].nqsets; j++) 2773 nqsets++; 2774 2775 coalesce_nsecs = max(100, coalesce_nsecs); 2776 2777 for (i = 0; i < nqsets; i++) { 2778 qs = &sc->sge.qs[i]; 2779 qsp = &sc->params.sge.qset[i]; 2780 qsp->coalesce_nsecs = coalesce_nsecs; 2781 2782 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 2783 &sc->sge.qs[0].rspq.lock; 2784 2785 mtx_lock(lock); 2786 t3_update_qset_coalesce(qs, qsp); 2787 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 2788 V_NEWTIMER(qs->rspq.holdoff_tmr)); 2789 mtx_unlock(lock); 2790 } 2791 2792 return (0); 2793 } 2794 2795 2796 void 2797 t3_add_sysctls(adapter_t *sc) 2798 { 2799 struct sysctl_ctx_list *ctx; 2800 struct sysctl_oid_list *children; 2801 2802 ctx = device_get_sysctl_ctx(sc->dev); 2803 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 2804 2805 /* random information */ 2806 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 2807 "firmware_version", 2808 CTLFLAG_RD, &sc->fw_version, 2809 0, "firmware version"); 2810 2811 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2812 "enable_lro", 2813 CTLTYPE_INT|CTLFLAG_RW, sc, 2814 0, t3_lro_enable, 2815 "I", "enable large receive offload"); 2816 2817 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2818 "intr_coal", 2819 CTLTYPE_INT|CTLFLAG_RW, sc, 2820 0, t3_set_coalesce_nsecs, 2821 "I", "interrupt coalescing timer (ns)"); 2822 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2823 "enable_debug", 2824 CTLFLAG_RW, &cxgb_debug, 2825 0, "enable verbose debugging output"); 2826 2827 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2828 "collapse_free", 2829 CTLFLAG_RD, &collapse_free, 2830 0, "frees during collapse"); 2831 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2832 "mb_free_vec_free", 2833 CTLFLAG_RD, &mb_free_vec_free, 2834 0, "frees during mb_free_vec"); 2835 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2836 "collapse_mbufs", 2837 CTLFLAG_RW, &collapse_mbufs, 2838 0, "collapse mbuf chains into iovecs"); 2839 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2840 "txq_overrun", 2841 CTLFLAG_RD, &txq_fills, 2842 0, "#times txq overrun"); 2843 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2844 "bogus_imm", 2845 CTLFLAG_RD, &bogus_imm, 2846 0, "#times a bogus immediate response was seen"); 2847 } 2848 2849 /** 2850 * t3_get_desc - dump an SGE descriptor for debugging purposes 2851 * @qs: the queue set 2852 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 2853 * @idx: the descriptor index in the queue 2854 * @data: where to dump the descriptor contents 2855 * 2856 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 2857 * size of the descriptor. 2858 */ 2859 int 2860 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 2861 unsigned char *data) 2862 { 2863 if (qnum >= 6) 2864 return (EINVAL); 2865 2866 if (qnum < 3) { 2867 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 2868 return -EINVAL; 2869 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 2870 return sizeof(struct tx_desc); 2871 } 2872 2873 if (qnum == 3) { 2874 if (!qs->rspq.desc || idx >= qs->rspq.size) 2875 return (EINVAL); 2876 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 2877 return sizeof(struct rsp_desc); 2878 } 2879 2880 qnum -= 4; 2881 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 2882 return (EINVAL); 2883 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 2884 return sizeof(struct rx_desc); 2885 } 2886