1 /************************************************************************** 2 3 Copyright (c) 2007, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/module.h> 37 #include <sys/bus.h> 38 #include <sys/conf.h> 39 #include <machine/bus.h> 40 #include <machine/resource.h> 41 #include <sys/bus_dma.h> 42 #include <sys/rman.h> 43 #include <sys/queue.h> 44 #include <sys/sysctl.h> 45 #include <sys/syslog.h> 46 #include <sys/taskqueue.h> 47 48 #include <sys/proc.h> 49 #include <sys/sched.h> 50 #include <sys/smp.h> 51 #include <sys/systm.h> 52 53 #include <netinet/in_systm.h> 54 #include <netinet/in.h> 55 #include <netinet/ip.h> 56 #include <netinet/tcp.h> 57 58 #include <dev/pci/pcireg.h> 59 #include <dev/pci/pcivar.h> 60 61 #include <vm/vm.h> 62 #include <vm/vm_page.h> 63 #include <vm/vm_map.h> 64 65 #ifdef CONFIG_DEFINED 66 #include <cxgb_include.h> 67 #include <sys/mvec.h> 68 #else 69 #include <dev/cxgb/cxgb_include.h> 70 #include <dev/cxgb/sys/mvec.h> 71 #endif 72 73 uint32_t collapse_free = 0; 74 uint32_t mb_free_vec_free = 0; 75 int txq_fills = 0; 76 int collapse_mbufs = 0; 77 static int bogus_imm = 0; 78 #ifndef DISABLE_MBUF_IOVEC 79 static int recycle_enable = 1; 80 #endif 81 82 #define USE_GTS 0 83 84 #define SGE_RX_SM_BUF_SIZE 1536 85 #define SGE_RX_DROP_THRES 16 86 #define SGE_RX_COPY_THRES 128 87 88 /* 89 * Period of the Tx buffer reclaim timer. This timer does not need to run 90 * frequently as Tx buffers are usually reclaimed by new Tx packets. 91 */ 92 #define TX_RECLAIM_PERIOD (hz >> 1) 93 94 /* 95 * work request size in bytes 96 */ 97 #define WR_LEN (WR_FLITS * 8) 98 99 /* 100 * Values for sge_txq.flags 101 */ 102 enum { 103 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 104 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 105 }; 106 107 struct tx_desc { 108 uint64_t flit[TX_DESC_FLITS]; 109 } __packed; 110 111 struct rx_desc { 112 uint32_t addr_lo; 113 uint32_t len_gen; 114 uint32_t gen2; 115 uint32_t addr_hi; 116 } __packed;; 117 118 struct rsp_desc { /* response queue descriptor */ 119 struct rss_header rss_hdr; 120 uint32_t flags; 121 uint32_t len_cq; 122 uint8_t imm_data[47]; 123 uint8_t intr_gen; 124 } __packed; 125 126 #define RX_SW_DESC_MAP_CREATED (1 << 0) 127 #define TX_SW_DESC_MAP_CREATED (1 << 1) 128 #define RX_SW_DESC_INUSE (1 << 3) 129 #define TX_SW_DESC_MAPPED (1 << 4) 130 131 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 132 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 133 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 134 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 135 136 struct tx_sw_desc { /* SW state per Tx descriptor */ 137 struct mbuf *m; 138 bus_dmamap_t map; 139 int flags; 140 }; 141 142 struct rx_sw_desc { /* SW state per Rx descriptor */ 143 void *cl; 144 bus_dmamap_t map; 145 int flags; 146 }; 147 148 struct txq_state { 149 unsigned int compl; 150 unsigned int gen; 151 unsigned int pidx; 152 }; 153 154 struct refill_fl_cb_arg { 155 int error; 156 bus_dma_segment_t seg; 157 int nseg; 158 }; 159 160 /* 161 * Maps a number of flits to the number of Tx descriptors that can hold them. 162 * The formula is 163 * 164 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 165 * 166 * HW allows up to 4 descriptors to be combined into a WR. 167 */ 168 static uint8_t flit_desc_map[] = { 169 0, 170 #if SGE_NUM_GENBITS == 1 171 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 172 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 173 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 174 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 175 #elif SGE_NUM_GENBITS == 2 176 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 177 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 178 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 179 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 180 #else 181 # error "SGE_NUM_GENBITS must be 1 or 2" 182 #endif 183 }; 184 185 186 static int lro_default = 0; 187 int cxgb_debug = 0; 188 189 static void t3_free_qset(adapter_t *sc, struct sge_qset *q); 190 static void sge_timer_cb(void *arg); 191 static void sge_timer_reclaim(void *arg, int ncount); 192 static void sge_txq_reclaim_handler(void *arg, int ncount); 193 static int free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec); 194 195 /** 196 * reclaim_completed_tx - reclaims completed Tx descriptors 197 * @adapter: the adapter 198 * @q: the Tx queue to reclaim completed descriptors from 199 * 200 * Reclaims Tx descriptors that the SGE has indicated it has processed, 201 * and frees the associated buffers if possible. Called with the Tx 202 * queue's lock held. 203 */ 204 static __inline int 205 reclaim_completed_tx(struct sge_txq *q, int nbufs, struct mbuf **mvec) 206 { 207 int reclaimed, reclaim = desc_reclaimable(q); 208 int n = 0; 209 210 mtx_assert(&q->lock, MA_OWNED); 211 if (reclaim > 0) { 212 n = free_tx_desc(q, min(reclaim, nbufs), mvec); 213 reclaimed = min(reclaim, nbufs); 214 q->cleaned += reclaimed; 215 q->in_use -= reclaimed; 216 } 217 return (n); 218 } 219 220 /** 221 * should_restart_tx - are there enough resources to restart a Tx queue? 222 * @q: the Tx queue 223 * 224 * Checks if there are enough descriptors to restart a suspended Tx queue. 225 */ 226 static __inline int 227 should_restart_tx(const struct sge_txq *q) 228 { 229 unsigned int r = q->processed - q->cleaned; 230 231 return q->in_use - r < (q->size >> 1); 232 } 233 234 /** 235 * t3_sge_init - initialize SGE 236 * @adap: the adapter 237 * @p: the SGE parameters 238 * 239 * Performs SGE initialization needed every time after a chip reset. 240 * We do not initialize any of the queue sets here, instead the driver 241 * top-level must request those individually. We also do not enable DMA 242 * here, that should be done after the queues have been set up. 243 */ 244 void 245 t3_sge_init(adapter_t *adap, struct sge_params *p) 246 { 247 u_int ctrl, ups; 248 249 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 250 251 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 252 F_CQCRDTCTRL | 253 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 254 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 255 #if SGE_NUM_GENBITS == 1 256 ctrl |= F_EGRGENCTRL; 257 #endif 258 if (adap->params.rev > 0) { 259 if (!(adap->flags & (USING_MSIX | USING_MSI))) 260 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 261 ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL; 262 } 263 t3_write_reg(adap, A_SG_CONTROL, ctrl); 264 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 265 V_LORCQDRBTHRSH(512)); 266 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 267 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 268 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 269 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000); 270 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 271 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 272 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 273 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 274 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 275 } 276 277 278 /** 279 * sgl_len - calculates the size of an SGL of the given capacity 280 * @n: the number of SGL entries 281 * 282 * Calculates the number of flits needed for a scatter/gather list that 283 * can hold the given number of entries. 284 */ 285 static __inline unsigned int 286 sgl_len(unsigned int n) 287 { 288 return ((3 * n) / 2 + (n & 1)); 289 } 290 291 /** 292 * get_imm_packet - return the next ingress packet buffer from a response 293 * @resp: the response descriptor containing the packet data 294 * 295 * Return a packet containing the immediate data of the given response. 296 */ 297 #ifdef DISABLE_MBUF_IOVEC 298 static __inline int 299 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct t3_mbuf_hdr *mh) 300 { 301 struct mbuf *m; 302 int len; 303 uint32_t flags = ntohl(resp->flags); 304 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 305 306 /* 307 * would be a firmware bug 308 */ 309 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) 310 return (0); 311 312 m = m_gethdr(M_NOWAIT, MT_DATA); 313 len = G_RSPD_LEN(ntohl(resp->len_cq)); 314 315 if (m) { 316 MH_ALIGN(m, IMMED_PKT_SIZE); 317 memcpy(m->m_data, resp->imm_data, IMMED_PKT_SIZE); 318 m->m_len = len; 319 320 switch (sopeop) { 321 case RSPQ_SOP_EOP: 322 mh->mh_head = mh->mh_tail = m; 323 m->m_pkthdr.len = len; 324 m->m_flags |= M_PKTHDR; 325 break; 326 case RSPQ_EOP: 327 m->m_flags &= ~M_PKTHDR; 328 mh->mh_head->m_pkthdr.len += len; 329 mh->mh_tail->m_next = m; 330 mh->mh_tail = m; 331 break; 332 } 333 } 334 return (m != NULL); 335 } 336 337 #else 338 static int 339 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl, uint32_t flags) 340 { 341 int len, error; 342 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 343 344 /* 345 * would be a firmware bug 346 */ 347 len = G_RSPD_LEN(ntohl(resp->len_cq)); 348 if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) { 349 if (cxgb_debug) 350 device_printf(sc->dev, "unexpected value sopeop=%d flags=0x%x len=%din get_imm_packet\n", sopeop, flags, len); 351 bogus_imm++; 352 return (EINVAL); 353 } 354 error = 0; 355 switch (sopeop) { 356 case RSPQ_SOP_EOP: 357 m->m_len = m->m_pkthdr.len = len; 358 memcpy(mtod(m, uint8_t *), resp->imm_data, len); 359 break; 360 case RSPQ_EOP: 361 memcpy(cl, resp->imm_data, len); 362 m_iovappend(m, cl, MSIZE, len, 0); 363 break; 364 default: 365 bogus_imm++; 366 error = EINVAL; 367 } 368 369 return (error); 370 } 371 #endif 372 373 static __inline u_int 374 flits_to_desc(u_int n) 375 { 376 return (flit_desc_map[n]); 377 } 378 379 void 380 t3_sge_err_intr_handler(adapter_t *adapter) 381 { 382 unsigned int v, status; 383 384 385 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 386 387 if (status & F_RSPQCREDITOVERFOW) 388 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 389 390 if (status & F_RSPQDISABLED) { 391 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 392 393 CH_ALERT(adapter, 394 "packet delivered to disabled response queue (0x%x)\n", 395 (v >> S_RSPQ0DISABLED) & 0xff); 396 } 397 398 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 399 if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED)) 400 t3_fatal_err(adapter); 401 } 402 403 void 404 t3_sge_prep(adapter_t *adap, struct sge_params *p) 405 { 406 int i; 407 408 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 409 p->max_pkt_size = MJUM16BYTES - sizeof(struct cpl_rx_data); 410 411 for (i = 0; i < SGE_QSETS; ++i) { 412 struct qset_params *q = p->qset + i; 413 414 q->polling = adap->params.rev > 0; 415 416 if (adap->params.nports > 2) 417 q->coalesce_nsecs = 50000; 418 else 419 q->coalesce_nsecs = 5000; 420 421 q->rspq_size = RSPQ_Q_SIZE; 422 q->fl_size = FL_Q_SIZE; 423 q->jumbo_size = JUMBO_Q_SIZE; 424 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 425 q->txq_size[TXQ_OFLD] = 1024; 426 q->txq_size[TXQ_CTRL] = 256; 427 q->cong_thres = 0; 428 } 429 } 430 431 int 432 t3_sge_alloc(adapter_t *sc) 433 { 434 435 /* The parent tag. */ 436 if (bus_dma_tag_create( NULL, /* parent */ 437 1, 0, /* algnmnt, boundary */ 438 BUS_SPACE_MAXADDR, /* lowaddr */ 439 BUS_SPACE_MAXADDR, /* highaddr */ 440 NULL, NULL, /* filter, filterarg */ 441 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 442 BUS_SPACE_UNRESTRICTED, /* nsegments */ 443 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 444 0, /* flags */ 445 NULL, NULL, /* lock, lockarg */ 446 &sc->parent_dmat)) { 447 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 448 return (ENOMEM); 449 } 450 451 /* 452 * DMA tag for normal sized RX frames 453 */ 454 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 455 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 456 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 457 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 458 return (ENOMEM); 459 } 460 461 /* 462 * DMA tag for jumbo sized RX frames. 463 */ 464 if (bus_dma_tag_create(sc->parent_dmat, MJUMPAGESIZE, 0, BUS_SPACE_MAXADDR, 465 BUS_SPACE_MAXADDR, NULL, NULL, MJUMPAGESIZE, 1, MJUMPAGESIZE, 466 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 467 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 468 return (ENOMEM); 469 } 470 471 /* 472 * DMA tag for TX frames. 473 */ 474 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 475 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 476 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 477 NULL, NULL, &sc->tx_dmat)) { 478 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 479 return (ENOMEM); 480 } 481 482 return (0); 483 } 484 485 int 486 t3_sge_free(struct adapter * sc) 487 { 488 489 if (sc->tx_dmat != NULL) 490 bus_dma_tag_destroy(sc->tx_dmat); 491 492 if (sc->rx_jumbo_dmat != NULL) 493 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 494 495 if (sc->rx_dmat != NULL) 496 bus_dma_tag_destroy(sc->rx_dmat); 497 498 if (sc->parent_dmat != NULL) 499 bus_dma_tag_destroy(sc->parent_dmat); 500 501 return (0); 502 } 503 504 void 505 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 506 { 507 508 qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U); 509 qs->rspq.polling = 0 /* p->polling */; 510 } 511 512 static void 513 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 514 { 515 struct refill_fl_cb_arg *cb_arg = arg; 516 517 cb_arg->error = error; 518 cb_arg->seg = segs[0]; 519 cb_arg->nseg = nseg; 520 521 } 522 523 /** 524 * refill_fl - refill an SGE free-buffer list 525 * @sc: the controller softc 526 * @q: the free-list to refill 527 * @n: the number of new buffers to allocate 528 * 529 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 530 * The caller must assure that @n does not exceed the queue's capacity. 531 */ 532 static void 533 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 534 { 535 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 536 struct rx_desc *d = &q->desc[q->pidx]; 537 struct refill_fl_cb_arg cb_arg; 538 void *cl; 539 int err; 540 541 cb_arg.error = 0; 542 while (n--) { 543 /* 544 * We only allocate a cluster, mbuf allocation happens after rx 545 */ 546 if ((cl = m_cljget(NULL, M_DONTWAIT, q->buf_size)) == NULL) { 547 log(LOG_WARNING, "Failed to allocate cluster\n"); 548 goto done; 549 } 550 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 551 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 552 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 553 uma_zfree(q->zone, cl); 554 goto done; 555 } 556 sd->flags |= RX_SW_DESC_MAP_CREATED; 557 } 558 err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size, 559 refill_fl_cb, &cb_arg, 0); 560 561 if (err != 0 || cb_arg.error) { 562 log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error); 563 /* 564 * XXX free cluster 565 */ 566 return; 567 } 568 569 sd->flags |= RX_SW_DESC_INUSE; 570 sd->cl = cl; 571 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 572 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 573 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 574 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 575 576 d++; 577 sd++; 578 579 if (++q->pidx == q->size) { 580 q->pidx = 0; 581 q->gen ^= 1; 582 sd = q->sdesc; 583 d = q->desc; 584 } 585 q->credits++; 586 } 587 588 done: 589 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 590 } 591 592 593 /** 594 * free_rx_bufs - free the Rx buffers on an SGE free list 595 * @sc: the controle softc 596 * @q: the SGE free list to clean up 597 * 598 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 599 * this queue should be stopped before calling this function. 600 */ 601 static void 602 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 603 { 604 u_int cidx = q->cidx; 605 606 while (q->credits--) { 607 struct rx_sw_desc *d = &q->sdesc[cidx]; 608 609 if (d->flags & RX_SW_DESC_INUSE) { 610 bus_dmamap_unload(q->entry_tag, d->map); 611 bus_dmamap_destroy(q->entry_tag, d->map); 612 uma_zfree(q->zone, d->cl); 613 } 614 d->cl = NULL; 615 if (++cidx == q->size) 616 cidx = 0; 617 } 618 } 619 620 static __inline void 621 __refill_fl(adapter_t *adap, struct sge_fl *fl) 622 { 623 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 624 } 625 626 #ifndef DISABLE_MBUF_IOVEC 627 /** 628 * recycle_rx_buf - recycle a receive buffer 629 * @adapter: the adapter 630 * @q: the SGE free list 631 * @idx: index of buffer to recycle 632 * 633 * Recycles the specified buffer on the given free list by adding it at 634 * the next available slot on the list. 635 */ 636 static void 637 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 638 { 639 struct rx_desc *from = &q->desc[idx]; 640 struct rx_desc *to = &q->desc[q->pidx]; 641 642 q->sdesc[q->pidx] = q->sdesc[idx]; 643 to->addr_lo = from->addr_lo; // already big endian 644 to->addr_hi = from->addr_hi; // likewise 645 wmb(); 646 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 647 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 648 q->credits++; 649 650 if (++q->pidx == q->size) { 651 q->pidx = 0; 652 q->gen ^= 1; 653 } 654 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 655 } 656 #endif 657 658 static void 659 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 660 { 661 uint32_t *addr; 662 663 addr = arg; 664 *addr = segs[0].ds_addr; 665 } 666 667 static int 668 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 669 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 670 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 671 { 672 size_t len = nelem * elem_size; 673 void *s = NULL; 674 void *p = NULL; 675 int err; 676 677 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 678 BUS_SPACE_MAXADDR_32BIT, 679 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 680 len, 0, NULL, NULL, tag)) != 0) { 681 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 682 return (ENOMEM); 683 } 684 685 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 686 map)) != 0) { 687 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 688 return (ENOMEM); 689 } 690 691 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 692 bzero(p, len); 693 *(void **)desc = p; 694 695 if (sw_size) { 696 len = nelem * sw_size; 697 s = malloc(len, M_DEVBUF, M_WAITOK); 698 bzero(s, len); 699 *(void **)sdesc = s; 700 } 701 if (parent_entry_tag == NULL) 702 return (0); 703 704 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 705 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 706 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 707 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 708 NULL, NULL, entry_tag)) != 0) { 709 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 710 return (ENOMEM); 711 } 712 return (0); 713 } 714 715 static void 716 sge_slow_intr_handler(void *arg, int ncount) 717 { 718 adapter_t *sc = arg; 719 720 t3_slow_intr_handler(sc); 721 } 722 723 /** 724 * sge_timer_cb - perform periodic maintenance of an SGE qset 725 * @data: the SGE queue set to maintain 726 * 727 * Runs periodically from a timer to perform maintenance of an SGE queue 728 * set. It performs two tasks: 729 * 730 * a) Cleans up any completed Tx descriptors that may still be pending. 731 * Normal descriptor cleanup happens when new packets are added to a Tx 732 * queue so this timer is relatively infrequent and does any cleanup only 733 * if the Tx queue has not seen any new packets in a while. We make a 734 * best effort attempt to reclaim descriptors, in that we don't wait 735 * around if we cannot get a queue's lock (which most likely is because 736 * someone else is queueing new packets and so will also handle the clean 737 * up). Since control queues use immediate data exclusively we don't 738 * bother cleaning them up here. 739 * 740 * b) Replenishes Rx queues that have run out due to memory shortage. 741 * Normally new Rx buffers are added when existing ones are consumed but 742 * when out of memory a queue can become empty. We try to add only a few 743 * buffers here, the queue will be replenished fully as these new buffers 744 * are used up if memory shortage has subsided. 745 * 746 * c) Return coalesced response queue credits in case a response queue is 747 * starved. 748 * 749 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 750 * fifo overflows and the FW doesn't implement any recovery scheme yet. 751 */ 752 static void 753 sge_timer_cb(void *arg) 754 { 755 adapter_t *sc = arg; 756 struct port_info *p; 757 struct sge_qset *qs; 758 struct sge_txq *txq; 759 int i, j; 760 int reclaim_eth, reclaim_ofl, refill_rx; 761 762 for (i = 0; i < sc->params.nports; i++) 763 for (j = 0; j < sc->port[i].nqsets; j++) { 764 qs = &sc->sge.qs[i + j]; 765 txq = &qs->txq[0]; 766 reclaim_eth = txq[TXQ_ETH].processed - txq[TXQ_ETH].cleaned; 767 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 768 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 769 (qs->fl[1].credits < qs->fl[1].size)); 770 if (reclaim_eth || reclaim_ofl || refill_rx) { 771 p = &sc->port[i]; 772 taskqueue_enqueue(p->tq, &p->timer_reclaim_task); 773 break; 774 } 775 } 776 if (sc->params.nports > 2) { 777 int i; 778 779 for_each_port(sc, i) { 780 struct port_info *pi = &sc->port[i]; 781 782 t3_write_reg(sc, A_SG_KDOORBELL, 783 F_SELEGRCNTX | 784 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 785 } 786 } 787 if (sc->open_device_map != 0) 788 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 789 } 790 791 /* 792 * This is meant to be a catch-all function to keep sge state private 793 * to sge.c 794 * 795 */ 796 int 797 t3_sge_init_adapter(adapter_t *sc) 798 { 799 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 800 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 801 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 802 return (0); 803 } 804 805 int 806 t3_sge_init_port(struct port_info *p) 807 { 808 TASK_INIT(&p->timer_reclaim_task, 0, sge_timer_reclaim, p); 809 return (0); 810 } 811 812 void 813 t3_sge_deinit_sw(adapter_t *sc) 814 { 815 int i; 816 817 callout_drain(&sc->sge_timer_ch); 818 if (sc->tq) 819 taskqueue_drain(sc->tq, &sc->slow_intr_task); 820 for (i = 0; i < sc->params.nports; i++) 821 if (sc->port[i].tq != NULL) 822 taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task); 823 } 824 825 /** 826 * refill_rspq - replenish an SGE response queue 827 * @adapter: the adapter 828 * @q: the response queue to replenish 829 * @credits: how many new responses to make available 830 * 831 * Replenishes a response queue by making the supplied number of responses 832 * available to HW. 833 */ 834 static __inline void 835 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 836 { 837 838 /* mbufs are allocated on demand when a rspq entry is processed. */ 839 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 840 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 841 } 842 843 static __inline void 844 sge_txq_reclaim_(struct sge_txq *txq) 845 { 846 int reclaimable, i, n; 847 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 848 struct port_info *p; 849 850 p = txq->port; 851 reclaim_more: 852 n = 0; 853 reclaimable = desc_reclaimable(txq); 854 if (reclaimable > 0 && mtx_trylock(&txq->lock)) { 855 n = reclaim_completed_tx(txq, TX_CLEAN_MAX_DESC, m_vec); 856 mtx_unlock(&txq->lock); 857 } 858 if (n == 0) 859 return; 860 861 for (i = 0; i < n; i++) { 862 m_freem(m_vec[i]); 863 } 864 if (p && p->ifp->if_drv_flags & IFF_DRV_OACTIVE && 865 txq->size - txq->in_use >= TX_START_MAX_DESC) { 866 txq_fills++; 867 p->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 868 taskqueue_enqueue(p->tq, &p->start_task); 869 } 870 871 if (n) 872 goto reclaim_more; 873 } 874 875 static void 876 sge_txq_reclaim_handler(void *arg, int ncount) 877 { 878 struct sge_txq *q = arg; 879 880 sge_txq_reclaim_(q); 881 } 882 883 static void 884 sge_timer_reclaim(void *arg, int ncount) 885 { 886 struct port_info *p = arg; 887 int i, nqsets = p->nqsets; 888 adapter_t *sc = p->adapter; 889 struct sge_qset *qs; 890 struct sge_txq *txq; 891 struct mtx *lock; 892 893 for (i = 0; i < nqsets; i++) { 894 qs = &sc->sge.qs[i]; 895 txq = &qs->txq[TXQ_ETH]; 896 sge_txq_reclaim_(txq); 897 898 txq = &qs->txq[TXQ_OFLD]; 899 sge_txq_reclaim_(txq); 900 901 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 902 &sc->sge.qs[0].rspq.lock; 903 904 if (mtx_trylock(lock)) { 905 /* XXX currently assume that we are *NOT* polling */ 906 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 907 908 if (qs->fl[0].credits < qs->fl[0].size - 16) 909 __refill_fl(sc, &qs->fl[0]); 910 if (qs->fl[1].credits < qs->fl[1].size - 16) 911 __refill_fl(sc, &qs->fl[1]); 912 913 if (status & (1 << qs->rspq.cntxt_id)) { 914 if (qs->rspq.credits) { 915 refill_rspq(sc, &qs->rspq, 1); 916 qs->rspq.credits--; 917 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 918 1 << qs->rspq.cntxt_id); 919 } 920 } 921 mtx_unlock(lock); 922 } 923 } 924 } 925 926 /** 927 * init_qset_cntxt - initialize an SGE queue set context info 928 * @qs: the queue set 929 * @id: the queue set id 930 * 931 * Initializes the TIDs and context ids for the queues of a queue set. 932 */ 933 static void 934 init_qset_cntxt(struct sge_qset *qs, u_int id) 935 { 936 937 qs->rspq.cntxt_id = id; 938 qs->fl[0].cntxt_id = 2 * id; 939 qs->fl[1].cntxt_id = 2 * id + 1; 940 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 941 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 942 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 943 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 944 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 945 } 946 947 948 static void 949 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 950 { 951 txq->in_use += ndesc; 952 /* 953 * XXX we don't handle stopping of queue 954 * presumably start handles this when we bump against the end 955 */ 956 txqs->gen = txq->gen; 957 txq->unacked += ndesc; 958 txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3); 959 txq->unacked &= 7; 960 txqs->pidx = txq->pidx; 961 txq->pidx += ndesc; 962 963 if (txq->pidx >= txq->size) { 964 txq->pidx -= txq->size; 965 txq->gen ^= 1; 966 } 967 968 } 969 970 /** 971 * calc_tx_descs - calculate the number of Tx descriptors for a packet 972 * @m: the packet mbufs 973 * @nsegs: the number of segments 974 * 975 * Returns the number of Tx descriptors needed for the given Ethernet 976 * packet. Ethernet packets require addition of WR and CPL headers. 977 */ 978 static __inline unsigned int 979 calc_tx_descs(const struct mbuf *m, int nsegs) 980 { 981 unsigned int flits; 982 983 if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt)) 984 return 1; 985 986 flits = sgl_len(nsegs) + 2; 987 #ifdef TSO_SUPPORTED 988 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) 989 flits++; 990 #endif 991 return flits_to_desc(flits); 992 } 993 994 static unsigned int 995 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 996 struct tx_sw_desc *stx, bus_dma_segment_t *segs, int *nsegs) 997 { 998 struct mbuf *m0; 999 int err, pktlen; 1000 1001 m0 = *m; 1002 pktlen = m0->m_pkthdr.len; 1003 1004 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); 1005 #ifdef DEBUG 1006 if (err) { 1007 int n = 0; 1008 struct mbuf *mtmp = m0; 1009 while(mtmp) { 1010 n++; 1011 mtmp = mtmp->m_next; 1012 } 1013 printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n", 1014 err, m0->m_pkthdr.len, n); 1015 } 1016 #endif 1017 if (err == EFBIG) { 1018 /* Too many segments, try to defrag */ 1019 m0 = m_defrag(m0, M_DONTWAIT); 1020 if (m0 == NULL) { 1021 m_freem(*m); 1022 *m = NULL; 1023 return (ENOBUFS); 1024 } 1025 *m = m0; 1026 err = bus_dmamap_load_mbuf_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); 1027 } 1028 1029 if (err == ENOMEM) { 1030 return (err); 1031 } 1032 1033 if (err) { 1034 if (cxgb_debug) 1035 printf("map failure err=%d pktlen=%d\n", err, pktlen); 1036 m_freem(m0); 1037 *m = NULL; 1038 return (err); 1039 } 1040 1041 bus_dmamap_sync(txq->entry_tag, stx->map, BUS_DMASYNC_PREWRITE); 1042 stx->flags |= TX_SW_DESC_MAPPED; 1043 1044 return (0); 1045 } 1046 1047 /** 1048 * make_sgl - populate a scatter/gather list for a packet 1049 * @sgp: the SGL to populate 1050 * @segs: the packet dma segments 1051 * @nsegs: the number of segments 1052 * 1053 * Generates a scatter/gather list for the buffers that make up a packet 1054 * and returns the SGL size in 8-byte words. The caller must size the SGL 1055 * appropriately. 1056 */ 1057 static __inline void 1058 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1059 { 1060 int i, idx; 1061 1062 for (idx = 0, i = 0; i < nsegs; i++, idx ^= 1) { 1063 if (i && idx == 0) 1064 ++sgp; 1065 1066 sgp->len[idx] = htobe32(segs[i].ds_len); 1067 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1068 } 1069 1070 if (idx) 1071 sgp->len[idx] = 0; 1072 } 1073 1074 /** 1075 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1076 * @adap: the adapter 1077 * @q: the Tx queue 1078 * 1079 * Ring the doorbel if a Tx queue is asleep. There is a natural race, 1080 * where the HW is going to sleep just after we checked, however, 1081 * then the interrupt handler will detect the outstanding TX packet 1082 * and ring the doorbell for us. 1083 * 1084 * When GTS is disabled we unconditionally ring the doorbell. 1085 */ 1086 static __inline void 1087 check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 1088 { 1089 #if USE_GTS 1090 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1091 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1092 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1093 #ifdef T3_TRACE 1094 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1095 q->cntxt_id); 1096 #endif 1097 t3_write_reg(adap, A_SG_KDOORBELL, 1098 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1099 } 1100 #else 1101 wmb(); /* write descriptors before telling HW */ 1102 t3_write_reg(adap, A_SG_KDOORBELL, 1103 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1104 #endif 1105 } 1106 1107 static __inline void 1108 wr_gen2(struct tx_desc *d, unsigned int gen) 1109 { 1110 #if SGE_NUM_GENBITS == 2 1111 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1112 #endif 1113 } 1114 1115 1116 1117 /** 1118 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1119 * @ndesc: number of Tx descriptors spanned by the SGL 1120 * @txd: first Tx descriptor to be written 1121 * @txqs: txq state (generation and producer index) 1122 * @txq: the SGE Tx queue 1123 * @sgl: the SGL 1124 * @flits: number of flits to the start of the SGL in the first descriptor 1125 * @sgl_flits: the SGL size in flits 1126 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1127 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1128 * 1129 * Write a work request header and an associated SGL. If the SGL is 1130 * small enough to fit into one Tx descriptor it has already been written 1131 * and we just need to write the WR header. Otherwise we distribute the 1132 * SGL across the number of descriptors it spans. 1133 */ 1134 1135 static void 1136 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1137 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1138 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1139 { 1140 1141 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1142 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1143 1144 if (__predict_true(ndesc == 1)) { 1145 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1146 V_WR_SGLSFLT(flits)) | wr_hi; 1147 wmb(); 1148 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) | 1149 V_WR_GEN(txqs->gen)) | wr_lo; 1150 /* XXX gen? */ 1151 wr_gen2(txd, txqs->gen); 1152 } else { 1153 unsigned int ogen = txqs->gen; 1154 const uint64_t *fp = (const uint64_t *)sgl; 1155 struct work_request_hdr *wp = wrp; 1156 1157 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1158 V_WR_SGLSFLT(flits)) | wr_hi; 1159 1160 while (sgl_flits) { 1161 unsigned int avail = WR_FLITS - flits; 1162 1163 if (avail > sgl_flits) 1164 avail = sgl_flits; 1165 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1166 sgl_flits -= avail; 1167 ndesc--; 1168 if (!sgl_flits) 1169 break; 1170 1171 fp += avail; 1172 txd++; 1173 txsd++; 1174 if (++txqs->pidx == txq->size) { 1175 txqs->pidx = 0; 1176 txqs->gen ^= 1; 1177 txd = txq->desc; 1178 txsd = txq->sdesc; 1179 } 1180 1181 /* 1182 * when the head of the mbuf chain 1183 * is freed all clusters will be freed 1184 * with it 1185 */ 1186 txsd->m = NULL; 1187 wrp = (struct work_request_hdr *)txd; 1188 wrp->wr_hi = htonl(V_WR_DATATYPE(1) | 1189 V_WR_SGLSFLT(1)) | wr_hi; 1190 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS, 1191 sgl_flits + 1)) | 1192 V_WR_GEN(txqs->gen)) | wr_lo; 1193 wr_gen2(txd, txqs->gen); 1194 flits = 1; 1195 } 1196 wrp->wr_hi |= htonl(F_WR_EOP); 1197 wmb(); 1198 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1199 wr_gen2((struct tx_desc *)wp, ogen); 1200 } 1201 } 1202 1203 1204 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 1205 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 1206 1207 int 1208 t3_encap(struct port_info *p, struct mbuf **m, int *free) 1209 { 1210 adapter_t *sc; 1211 struct mbuf *m0; 1212 struct sge_qset *qs; 1213 struct sge_txq *txq; 1214 struct tx_sw_desc *stx; 1215 struct txq_state txqs; 1216 unsigned int ndesc, flits, cntrl, mlen; 1217 int err, nsegs, tso_info = 0; 1218 1219 struct work_request_hdr *wrp; 1220 struct tx_sw_desc *txsd; 1221 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1222 bus_dma_segment_t segs[TX_MAX_SEGS]; 1223 uint32_t wr_hi, wr_lo, sgl_flits; 1224 1225 struct tx_desc *txd; 1226 struct cpl_tx_pkt *cpl; 1227 1228 m0 = *m; 1229 sc = p->adapter; 1230 1231 DPRINTF("t3_encap port_id=%d qsidx=%d ", p->port_id, p->first_qset); 1232 1233 /* port_id=1 qsid=1 txpkt_intf=2 tx_chan=0 */ 1234 1235 qs = &sc->sge.qs[p->first_qset]; 1236 1237 txq = &qs->txq[TXQ_ETH]; 1238 stx = &txq->sdesc[txq->pidx]; 1239 txd = &txq->desc[txq->pidx]; 1240 cpl = (struct cpl_tx_pkt *)txd; 1241 mlen = m0->m_pkthdr.len; 1242 cpl->len = htonl(mlen | 0x80000000); 1243 1244 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", mlen, p->txpkt_intf, p->tx_chan); 1245 /* 1246 * XXX handle checksum, TSO, and VLAN here 1247 * 1248 */ 1249 cntrl = V_TXPKT_INTF(p->txpkt_intf); 1250 1251 /* 1252 * XXX need to add VLAN support for 6.x 1253 */ 1254 #ifdef VLAN_SUPPORTED 1255 if (m0->m_flags & M_VLANTAG) 1256 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 1257 if (m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1258 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1259 #endif 1260 if (tso_info) { 1261 int eth_type; 1262 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *) cpl; 1263 struct ip *ip; 1264 struct tcphdr *tcp; 1265 char *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */ 1266 1267 txd->flit[2] = 0; 1268 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1269 hdr->cntrl = htonl(cntrl); 1270 1271 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1272 pkthdr = &tmp[0]; 1273 m_copydata(m0, 0, TCPPKTHDRSIZE, pkthdr); 1274 } else { 1275 pkthdr = mtod(m0, char *); 1276 } 1277 1278 if (__predict_false(m0->m_flags & M_VLANTAG)) { 1279 eth_type = CPL_ETH_II_VLAN; 1280 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1281 ETHER_VLAN_ENCAP_LEN); 1282 } else { 1283 eth_type = CPL_ETH_II; 1284 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1285 } 1286 tcp = (struct tcphdr *)((uint8_t *)ip + 1287 sizeof(*ip)); 1288 1289 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1290 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1291 V_LSO_TCPHDR_WORDS(tcp->th_off); 1292 hdr->lso_info = htonl(tso_info); 1293 flits = 3; 1294 } else { 1295 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1296 cpl->cntrl = htonl(cntrl); 1297 1298 if (mlen <= WR_LEN - sizeof(*cpl)) { 1299 txq_prod(txq, 1, &txqs); 1300 txq->sdesc[txqs.pidx].m = NULL; 1301 1302 if (m0->m_len == m0->m_pkthdr.len) 1303 memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen); 1304 else 1305 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1306 1307 *free = 1; 1308 flits = (mlen + 7) / 8 + 2; 1309 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1310 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1311 F_WR_SOP | F_WR_EOP | txqs.compl); 1312 wmb(); 1313 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | 1314 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1315 1316 wr_gen2(txd, txqs.gen); 1317 check_ring_tx_db(sc, txq); 1318 return (0); 1319 } 1320 flits = 2; 1321 } 1322 1323 wrp = (struct work_request_hdr *)txd; 1324 1325 if ((err = busdma_map_mbufs(m, txq, stx, segs, &nsegs)) != 0) { 1326 return (err); 1327 } 1328 m0 = *m; 1329 ndesc = calc_tx_descs(m0, nsegs); 1330 1331 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1332 make_sgl(sgp, segs, nsegs); 1333 1334 sgl_flits = sgl_len(nsegs); 1335 1336 DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc); 1337 txq_prod(txq, ndesc, &txqs); 1338 txsd = &txq->sdesc[txqs.pidx]; 1339 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1340 wr_lo = htonl(V_WR_TID(txq->token)); 1341 txsd->m = m0; 1342 m_set_priority(m0, txqs.pidx); 1343 1344 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); 1345 check_ring_tx_db(p->adapter, txq); 1346 1347 return (0); 1348 } 1349 1350 1351 /** 1352 * write_imm - write a packet into a Tx descriptor as immediate data 1353 * @d: the Tx descriptor to write 1354 * @m: the packet 1355 * @len: the length of packet data to write as immediate data 1356 * @gen: the generation bit value to write 1357 * 1358 * Writes a packet as immediate data into a Tx descriptor. The packet 1359 * contains a work request at its beginning. We must write the packet 1360 * carefully so the SGE doesn't read accidentally before it's written in 1361 * its entirety. 1362 */ 1363 static __inline void 1364 write_imm(struct tx_desc *d, struct mbuf *m, 1365 unsigned int len, unsigned int gen) 1366 { 1367 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1368 struct work_request_hdr *to = (struct work_request_hdr *)d; 1369 1370 memcpy(&to[1], &from[1], len - sizeof(*from)); 1371 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP | 1372 V_WR_BCNTLFLT(len & 7)); 1373 wmb(); 1374 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) | 1375 V_WR_LEN((len + 7) / 8)); 1376 wr_gen2(d, gen); 1377 m_freem(m); 1378 } 1379 1380 /** 1381 * check_desc_avail - check descriptor availability on a send queue 1382 * @adap: the adapter 1383 * @q: the TX queue 1384 * @m: the packet needing the descriptors 1385 * @ndesc: the number of Tx descriptors needed 1386 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1387 * 1388 * Checks if the requested number of Tx descriptors is available on an 1389 * SGE send queue. If the queue is already suspended or not enough 1390 * descriptors are available the packet is queued for later transmission. 1391 * Must be called with the Tx queue locked. 1392 * 1393 * Returns 0 if enough descriptors are available, 1 if there aren't 1394 * enough descriptors and the packet has been queued, and 2 if the caller 1395 * needs to retry because there weren't enough descriptors at the 1396 * beginning of the call but some freed up in the mean time. 1397 */ 1398 static __inline int 1399 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1400 struct mbuf *m, unsigned int ndesc, 1401 unsigned int qid) 1402 { 1403 /* 1404 * XXX We currently only use this for checking the control queue 1405 * the control queue is only used for binding qsets which happens 1406 * at init time so we are guaranteed enough descriptors 1407 */ 1408 if (__predict_false(!mbufq_empty(&q->sendq))) { 1409 addq_exit: mbufq_tail(&q->sendq, m); 1410 return 1; 1411 } 1412 if (__predict_false(q->size - q->in_use < ndesc)) { 1413 1414 struct sge_qset *qs = txq_to_qset(q, qid); 1415 1416 setbit(&qs->txq_stopped, qid); 1417 smp_mb(); 1418 1419 if (should_restart_tx(q) && 1420 test_and_clear_bit(qid, &qs->txq_stopped)) 1421 return 2; 1422 1423 q->stops++; 1424 goto addq_exit; 1425 } 1426 return 0; 1427 } 1428 1429 1430 /** 1431 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1432 * @q: the SGE control Tx queue 1433 * 1434 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1435 * that send only immediate data (presently just the control queues) and 1436 * thus do not have any mbufs 1437 */ 1438 static __inline void 1439 reclaim_completed_tx_imm(struct sge_txq *q) 1440 { 1441 unsigned int reclaim = q->processed - q->cleaned; 1442 1443 mtx_assert(&q->lock, MA_OWNED); 1444 1445 q->in_use -= reclaim; 1446 q->cleaned += reclaim; 1447 } 1448 1449 static __inline int 1450 immediate(const struct mbuf *m) 1451 { 1452 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1453 } 1454 1455 /** 1456 * ctrl_xmit - send a packet through an SGE control Tx queue 1457 * @adap: the adapter 1458 * @q: the control queue 1459 * @m: the packet 1460 * 1461 * Send a packet through an SGE control Tx queue. Packets sent through 1462 * a control queue must fit entirely as immediate data in a single Tx 1463 * descriptor and have no page fragments. 1464 */ 1465 static int 1466 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1467 { 1468 int ret; 1469 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1470 1471 if (__predict_false(!immediate(m))) { 1472 m_freem(m); 1473 return 0; 1474 } 1475 1476 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP); 1477 wrp->wr_lo = htonl(V_WR_TID(q->token)); 1478 1479 mtx_lock(&q->lock); 1480 again: reclaim_completed_tx_imm(q); 1481 1482 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1483 if (__predict_false(ret)) { 1484 if (ret == 1) { 1485 mtx_unlock(&q->lock); 1486 return (-1); 1487 } 1488 goto again; 1489 } 1490 1491 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1492 1493 q->in_use++; 1494 if (++q->pidx >= q->size) { 1495 q->pidx = 0; 1496 q->gen ^= 1; 1497 } 1498 mtx_unlock(&q->lock); 1499 wmb(); 1500 t3_write_reg(adap, A_SG_KDOORBELL, 1501 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1502 return (0); 1503 } 1504 1505 1506 /** 1507 * restart_ctrlq - restart a suspended control queue 1508 * @qs: the queue set cotaining the control queue 1509 * 1510 * Resumes transmission on a suspended Tx control queue. 1511 */ 1512 static void 1513 restart_ctrlq(void *data, int npending) 1514 { 1515 struct mbuf *m; 1516 struct sge_qset *qs = (struct sge_qset *)data; 1517 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1518 adapter_t *adap = qs->port->adapter; 1519 1520 mtx_lock(&q->lock); 1521 again: reclaim_completed_tx_imm(q); 1522 1523 while (q->in_use < q->size && 1524 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1525 1526 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1527 1528 if (++q->pidx >= q->size) { 1529 q->pidx = 0; 1530 q->gen ^= 1; 1531 } 1532 q->in_use++; 1533 } 1534 if (!mbufq_empty(&q->sendq)) { 1535 setbit(&qs->txq_stopped, TXQ_CTRL); 1536 smp_mb(); 1537 1538 if (should_restart_tx(q) && 1539 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1540 goto again; 1541 q->stops++; 1542 } 1543 mtx_unlock(&q->lock); 1544 t3_write_reg(adap, A_SG_KDOORBELL, 1545 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1546 } 1547 1548 1549 /* 1550 * Send a management message through control queue 0 1551 */ 1552 int 1553 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1554 { 1555 return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m); 1556 } 1557 1558 /** 1559 * free_qset - free the resources of an SGE queue set 1560 * @sc: the controller owning the queue set 1561 * @q: the queue set 1562 * 1563 * Release the HW and SW resources associated with an SGE queue set, such 1564 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1565 * queue set must be quiesced prior to calling this. 1566 */ 1567 static void 1568 t3_free_qset(adapter_t *sc, struct sge_qset *q) 1569 { 1570 int i; 1571 1572 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1573 if (q->fl[i].desc) { 1574 mtx_lock(&sc->sge.reg_lock); 1575 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 1576 mtx_unlock(&sc->sge.reg_lock); 1577 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 1578 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 1579 q->fl[i].desc_map); 1580 bus_dma_tag_destroy(q->fl[i].desc_tag); 1581 bus_dma_tag_destroy(q->fl[i].entry_tag); 1582 } 1583 if (q->fl[i].sdesc) { 1584 free_rx_bufs(sc, &q->fl[i]); 1585 free(q->fl[i].sdesc, M_DEVBUF); 1586 } 1587 } 1588 1589 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 1590 if (q->txq[i].desc) { 1591 mtx_lock(&sc->sge.reg_lock); 1592 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 1593 mtx_unlock(&sc->sge.reg_lock); 1594 bus_dmamap_unload(q->txq[i].desc_tag, 1595 q->txq[i].desc_map); 1596 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 1597 q->txq[i].desc_map); 1598 bus_dma_tag_destroy(q->txq[i].desc_tag); 1599 bus_dma_tag_destroy(q->txq[i].entry_tag); 1600 MTX_DESTROY(&q->txq[i].lock); 1601 } 1602 if (q->txq[i].sdesc) { 1603 free(q->txq[i].sdesc, M_DEVBUF); 1604 } 1605 } 1606 1607 if (q->rspq.desc) { 1608 mtx_lock(&sc->sge.reg_lock); 1609 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 1610 mtx_unlock(&sc->sge.reg_lock); 1611 1612 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 1613 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 1614 q->rspq.desc_map); 1615 bus_dma_tag_destroy(q->rspq.desc_tag); 1616 MTX_DESTROY(&q->rspq.lock); 1617 } 1618 1619 bzero(q, sizeof(*q)); 1620 } 1621 1622 /** 1623 * t3_free_sge_resources - free SGE resources 1624 * @sc: the adapter softc 1625 * 1626 * Frees resources used by the SGE queue sets. 1627 */ 1628 void 1629 t3_free_sge_resources(adapter_t *sc) 1630 { 1631 int i, nqsets; 1632 1633 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1634 nqsets += sc->port[i].nqsets; 1635 1636 for (i = 0; i < nqsets; ++i) 1637 t3_free_qset(sc, &sc->sge.qs[i]); 1638 } 1639 1640 /** 1641 * t3_sge_start - enable SGE 1642 * @sc: the controller softc 1643 * 1644 * Enables the SGE for DMAs. This is the last step in starting packet 1645 * transfers. 1646 */ 1647 void 1648 t3_sge_start(adapter_t *sc) 1649 { 1650 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 1651 } 1652 1653 /** 1654 * t3_sge_stop - disable SGE operation 1655 * @sc: the adapter 1656 * 1657 * Disables the DMA engine. This can be called in emeregencies (e.g., 1658 * from error interrupts) or from normal process context. In the latter 1659 * case it also disables any pending queue restart tasklets. Note that 1660 * if it is called in interrupt context it cannot disable the restart 1661 * tasklets as it cannot wait, however the tasklets will have no effect 1662 * since the doorbells are disabled and the driver will call this again 1663 * later from process context, at which time the tasklets will be stopped 1664 * if they are still running. 1665 */ 1666 void 1667 t3_sge_stop(adapter_t *sc) 1668 { 1669 int i, nqsets; 1670 1671 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 1672 1673 if (sc->tq == NULL) 1674 return; 1675 1676 for (nqsets = i = 0; i < (sc)->params.nports; i++) 1677 nqsets += sc->port[i].nqsets; 1678 1679 for (i = 0; i < nqsets; ++i) { 1680 struct sge_qset *qs = &sc->sge.qs[i]; 1681 1682 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 1683 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 1684 } 1685 } 1686 1687 1688 /** 1689 * free_tx_desc - reclaims Tx descriptors and their buffers 1690 * @adapter: the adapter 1691 * @q: the Tx queue to reclaim descriptors from 1692 * @n: the number of descriptors to reclaim 1693 * 1694 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 1695 * Tx buffers. Called with the Tx queue lock held. 1696 */ 1697 int 1698 free_tx_desc(struct sge_txq *q, int n, struct mbuf **m_vec) 1699 { 1700 struct tx_sw_desc *d; 1701 unsigned int cidx = q->cidx; 1702 int nbufs = 0; 1703 1704 #ifdef T3_TRACE 1705 T3_TRACE2(sc->tb[q->cntxt_id & 7], 1706 "reclaiming %u Tx descriptors at cidx %u", n, cidx); 1707 #endif 1708 d = &q->sdesc[cidx]; 1709 1710 while (n-- > 0) { 1711 DPRINTF("cidx=%d d=%p\n", cidx, d); 1712 if (d->m) { 1713 if (d->flags & TX_SW_DESC_MAPPED) { 1714 bus_dmamap_unload(q->entry_tag, d->map); 1715 bus_dmamap_destroy(q->entry_tag, d->map); 1716 d->flags &= ~TX_SW_DESC_MAPPED; 1717 } 1718 if (m_get_priority(d->m) == cidx) { 1719 m_vec[nbufs] = d->m; 1720 d->m = NULL; 1721 nbufs++; 1722 } else { 1723 printf("pri=%d cidx=%d\n", (int)m_get_priority(d->m), cidx); 1724 } 1725 } 1726 ++d; 1727 if (++cidx == q->size) { 1728 cidx = 0; 1729 d = q->sdesc; 1730 } 1731 } 1732 q->cidx = cidx; 1733 1734 return (nbufs); 1735 } 1736 1737 /** 1738 * is_new_response - check if a response is newly written 1739 * @r: the response descriptor 1740 * @q: the response queue 1741 * 1742 * Returns true if a response descriptor contains a yet unprocessed 1743 * response. 1744 */ 1745 static __inline int 1746 is_new_response(const struct rsp_desc *r, 1747 const struct sge_rspq *q) 1748 { 1749 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 1750 } 1751 1752 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 1753 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 1754 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 1755 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 1756 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 1757 1758 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 1759 #define NOMEM_INTR_DELAY 2500 1760 1761 /** 1762 * write_ofld_wr - write an offload work request 1763 * @adap: the adapter 1764 * @m: the packet to send 1765 * @q: the Tx queue 1766 * @pidx: index of the first Tx descriptor to write 1767 * @gen: the generation value to use 1768 * @ndesc: number of descriptors the packet will occupy 1769 * 1770 * Write an offload work request to send the supplied packet. The packet 1771 * data already carry the work request with most fields populated. 1772 */ 1773 static void 1774 write_ofld_wr(adapter_t *adap, struct mbuf *m, 1775 struct sge_txq *q, unsigned int pidx, 1776 unsigned int gen, unsigned int ndesc, 1777 bus_dma_segment_t *segs, unsigned int nsegs) 1778 { 1779 unsigned int sgl_flits, flits; 1780 struct work_request_hdr *from; 1781 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 1782 struct tx_desc *d = &q->desc[pidx]; 1783 struct txq_state txqs; 1784 1785 if (immediate(m)) { 1786 q->sdesc[pidx].m = NULL; 1787 write_imm(d, m, m->m_len, gen); 1788 return; 1789 } 1790 1791 /* Only TX_DATA builds SGLs */ 1792 1793 from = mtod(m, struct work_request_hdr *); 1794 memcpy(&d->flit[1], &from[1], 1795 (uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *) - sizeof(*from)); 1796 1797 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; 1798 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 1799 1800 make_sgl(sgp, segs, nsegs); 1801 sgl_flits = sgl_len(nsegs); 1802 1803 txqs.gen = q->gen; 1804 txqs.pidx = q->pidx; 1805 txqs.compl = (q->unacked & 8) << (S_WR_COMPL - 3); 1806 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 1807 from->wr_hi, from->wr_lo); 1808 } 1809 1810 /** 1811 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 1812 * @m: the packet 1813 * 1814 * Returns the number of Tx descriptors needed for the given offload 1815 * packet. These packets are already fully constructed. 1816 */ 1817 static __inline unsigned int 1818 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 1819 { 1820 unsigned int flits, cnt = 0; 1821 1822 1823 if (m->m_len <= WR_LEN) 1824 return 1; /* packet fits as immediate data */ 1825 1826 if (m->m_flags & M_IOVEC) 1827 cnt = mtomv(m)->mv_count; 1828 1829 flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8; /* headers */ 1830 1831 return flits_to_desc(flits + sgl_len(cnt)); 1832 } 1833 1834 /** 1835 * ofld_xmit - send a packet through an offload queue 1836 * @adap: the adapter 1837 * @q: the Tx offload queue 1838 * @m: the packet 1839 * 1840 * Send an offload packet through an SGE offload queue. 1841 */ 1842 static int 1843 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m) 1844 { 1845 int ret, nsegs; 1846 unsigned int ndesc; 1847 unsigned int pidx, gen; 1848 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 1849 bus_dma_segment_t segs[TX_MAX_SEGS]; 1850 int i, cleaned; 1851 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 1852 1853 mtx_lock(&q->lock); 1854 if ((ret = busdma_map_mbufs(&m, q, stx, segs, &nsegs)) != 0) { 1855 mtx_unlock(&q->lock); 1856 return (ret); 1857 } 1858 ndesc = calc_tx_descs_ofld(m, nsegs); 1859 again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec); 1860 1861 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 1862 if (__predict_false(ret)) { 1863 if (ret == 1) { 1864 m_set_priority(m, ndesc); /* save for restart */ 1865 mtx_unlock(&q->lock); 1866 return EINTR; 1867 } 1868 goto again; 1869 } 1870 1871 gen = q->gen; 1872 q->in_use += ndesc; 1873 pidx = q->pidx; 1874 q->pidx += ndesc; 1875 if (q->pidx >= q->size) { 1876 q->pidx -= q->size; 1877 q->gen ^= 1; 1878 } 1879 #ifdef T3_TRACE 1880 T3_TRACE5(adap->tb[q->cntxt_id & 7], 1881 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 1882 ndesc, pidx, skb->len, skb->len - skb->data_len, 1883 skb_shinfo(skb)->nr_frags); 1884 #endif 1885 mtx_unlock(&q->lock); 1886 1887 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 1888 check_ring_tx_db(adap, q); 1889 1890 for (i = 0; i < cleaned; i++) { 1891 m_freem(m_vec[i]); 1892 } 1893 return (0); 1894 } 1895 1896 /** 1897 * restart_offloadq - restart a suspended offload queue 1898 * @qs: the queue set cotaining the offload queue 1899 * 1900 * Resumes transmission on a suspended Tx offload queue. 1901 */ 1902 static void 1903 restart_offloadq(void *data, int npending) 1904 { 1905 1906 struct mbuf *m; 1907 struct sge_qset *qs = data; 1908 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 1909 adapter_t *adap = qs->port->adapter; 1910 struct mbuf *m_vec[TX_CLEAN_MAX_DESC]; 1911 bus_dma_segment_t segs[TX_MAX_SEGS]; 1912 int nsegs, i, cleaned; 1913 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 1914 1915 mtx_lock(&q->lock); 1916 again: cleaned = reclaim_completed_tx(q, TX_CLEAN_MAX_DESC, m_vec); 1917 1918 while ((m = mbufq_peek(&q->sendq)) != NULL) { 1919 unsigned int gen, pidx; 1920 unsigned int ndesc = m_get_priority(m); 1921 1922 if (__predict_false(q->size - q->in_use < ndesc)) { 1923 setbit(&qs->txq_stopped, TXQ_OFLD); 1924 smp_mb(); 1925 1926 if (should_restart_tx(q) && 1927 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 1928 goto again; 1929 q->stops++; 1930 break; 1931 } 1932 1933 gen = q->gen; 1934 q->in_use += ndesc; 1935 pidx = q->pidx; 1936 q->pidx += ndesc; 1937 if (q->pidx >= q->size) { 1938 q->pidx -= q->size; 1939 q->gen ^= 1; 1940 } 1941 1942 (void)mbufq_dequeue(&q->sendq); 1943 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 1944 mtx_unlock(&q->lock); 1945 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 1946 mtx_lock(&q->lock); 1947 } 1948 mtx_unlock(&q->lock); 1949 1950 #if USE_GTS 1951 set_bit(TXQ_RUNNING, &q->flags); 1952 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1953 #endif 1954 t3_write_reg(adap, A_SG_KDOORBELL, 1955 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1956 1957 for (i = 0; i < cleaned; i++) { 1958 m_freem(m_vec[i]); 1959 } 1960 } 1961 1962 /** 1963 * queue_set - return the queue set a packet should use 1964 * @m: the packet 1965 * 1966 * Maps a packet to the SGE queue set it should use. The desired queue 1967 * set is carried in bits 1-3 in the packet's priority. 1968 */ 1969 static __inline int 1970 queue_set(const struct mbuf *m) 1971 { 1972 return m_get_priority(m) >> 1; 1973 } 1974 1975 /** 1976 * is_ctrl_pkt - return whether an offload packet is a control packet 1977 * @m: the packet 1978 * 1979 * Determines whether an offload packet should use an OFLD or a CTRL 1980 * Tx queue. This is indicated by bit 0 in the packet's priority. 1981 */ 1982 static __inline int 1983 is_ctrl_pkt(const struct mbuf *m) 1984 { 1985 return m_get_priority(m) & 1; 1986 } 1987 1988 /** 1989 * t3_offload_tx - send an offload packet 1990 * @tdev: the offload device to send to 1991 * @m: the packet 1992 * 1993 * Sends an offload packet. We use the packet priority to select the 1994 * appropriate Tx queue as follows: bit 0 indicates whether the packet 1995 * should be sent as regular or control, bits 1-3 select the queue set. 1996 */ 1997 int 1998 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m) 1999 { 2000 adapter_t *adap = tdev2adap(tdev); 2001 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 2002 2003 if (__predict_false(is_ctrl_pkt(m))) 2004 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m); 2005 2006 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m); 2007 } 2008 2009 /** 2010 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 2011 * @tdev: the offload device that will be receiving the packets 2012 * @q: the SGE response queue that assembled the bundle 2013 * @m: the partial bundle 2014 * @n: the number of packets in the bundle 2015 * 2016 * Delivers a (partial) bundle of Rx offload packets to an offload device. 2017 */ 2018 static __inline void 2019 deliver_partial_bundle(struct t3cdev *tdev, 2020 struct sge_rspq *q, 2021 struct mbuf *mbufs[], int n) 2022 { 2023 if (n) { 2024 q->offload_bundles++; 2025 cxgb_ofld_recv(tdev, mbufs, n); 2026 } 2027 } 2028 2029 static __inline int 2030 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 2031 struct mbuf *m, struct mbuf *rx_gather[], 2032 unsigned int gather_idx) 2033 { 2034 rq->offload_pkts++; 2035 m->m_pkthdr.header = mtod(m, void *); 2036 2037 rx_gather[gather_idx++] = m; 2038 if (gather_idx == RX_BUNDLE_SIZE) { 2039 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 2040 gather_idx = 0; 2041 rq->offload_bundles++; 2042 } 2043 return (gather_idx); 2044 } 2045 2046 static void 2047 restart_tx(struct sge_qset *qs) 2048 { 2049 struct adapter *sc = qs->port->adapter; 2050 2051 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2052 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2053 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2054 qs->txq[TXQ_OFLD].restarts++; 2055 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2056 } 2057 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2058 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2059 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2060 qs->txq[TXQ_CTRL].restarts++; 2061 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2062 } 2063 } 2064 2065 /** 2066 * t3_sge_alloc_qset - initialize an SGE queue set 2067 * @sc: the controller softc 2068 * @id: the queue set id 2069 * @nports: how many Ethernet ports will be using this queue set 2070 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2071 * @p: configuration parameters for this queue set 2072 * @ntxq: number of Tx queues for the queue set 2073 * @pi: port info for queue set 2074 * 2075 * Allocate resources and initialize an SGE queue set. A queue set 2076 * comprises a response queue, two Rx free-buffer queues, and up to 3 2077 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2078 * queue, offload queue, and control queue. 2079 */ 2080 int 2081 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2082 const struct qset_params *p, int ntxq, struct port_info *pi) 2083 { 2084 struct sge_qset *q = &sc->sge.qs[id]; 2085 int i, ret = 0; 2086 2087 init_qset_cntxt(q, id); 2088 2089 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2090 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2091 &q->fl[0].desc, &q->fl[0].sdesc, 2092 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2093 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2094 printf("error %d from alloc ring fl0\n", ret); 2095 goto err; 2096 } 2097 2098 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2099 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2100 &q->fl[1].desc, &q->fl[1].sdesc, 2101 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2102 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2103 printf("error %d from alloc ring fl1\n", ret); 2104 goto err; 2105 } 2106 2107 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2108 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2109 &q->rspq.desc_tag, &q->rspq.desc_map, 2110 NULL, NULL)) != 0) { 2111 printf("error %d from alloc ring rspq\n", ret); 2112 goto err; 2113 } 2114 2115 for (i = 0; i < ntxq; ++i) { 2116 /* 2117 * The control queue always uses immediate data so does not 2118 * need to keep track of any mbufs. 2119 * XXX Placeholder for future TOE support. 2120 */ 2121 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2122 2123 if ((ret = alloc_ring(sc, p->txq_size[i], 2124 sizeof(struct tx_desc), sz, 2125 &q->txq[i].phys_addr, &q->txq[i].desc, 2126 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2127 &q->txq[i].desc_map, 2128 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2129 printf("error %d from alloc ring tx %i\n", ret, i); 2130 goto err; 2131 } 2132 mbufq_init(&q->txq[i].sendq); 2133 q->txq[i].gen = 1; 2134 q->txq[i].size = p->txq_size[i]; 2135 snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d", 2136 device_get_unit(sc->dev), irq_vec_idx, i); 2137 MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF); 2138 } 2139 2140 q->txq[TXQ_ETH].port = pi; 2141 2142 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2143 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2144 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]); 2145 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]); 2146 2147 q->fl[0].gen = q->fl[1].gen = 1; 2148 q->fl[0].size = p->fl_size; 2149 q->fl[1].size = p->jumbo_size; 2150 2151 q->rspq.gen = 1; 2152 q->rspq.cidx = 0; 2153 q->rspq.size = p->rspq_size; 2154 2155 q->txq[TXQ_ETH].stop_thres = nports * 2156 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2157 2158 q->fl[0].buf_size = MCLBYTES; 2159 q->fl[0].zone = zone_clust; 2160 q->fl[0].type = EXT_CLUSTER; 2161 q->fl[1].buf_size = MJUMPAGESIZE; 2162 q->fl[1].zone = zone_jumbop; 2163 q->fl[1].type = EXT_JUMBOP; 2164 2165 q->lro.enabled = lro_default; 2166 2167 mtx_lock(&sc->sge.reg_lock); 2168 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2169 q->rspq.phys_addr, q->rspq.size, 2170 q->fl[0].buf_size, 1, 0); 2171 if (ret) { 2172 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2173 goto err_unlock; 2174 } 2175 2176 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2177 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2178 q->fl[i].phys_addr, q->fl[i].size, 2179 q->fl[i].buf_size, p->cong_thres, 1, 2180 0); 2181 if (ret) { 2182 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2183 goto err_unlock; 2184 } 2185 } 2186 2187 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2188 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2189 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2190 1, 0); 2191 if (ret) { 2192 printf("error %d from t3_sge_init_ecntxt\n", ret); 2193 goto err_unlock; 2194 } 2195 2196 if (ntxq > 1) { 2197 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2198 USE_GTS, SGE_CNTXT_OFLD, id, 2199 q->txq[TXQ_OFLD].phys_addr, 2200 q->txq[TXQ_OFLD].size, 0, 1, 0); 2201 if (ret) { 2202 printf("error %d from t3_sge_init_ecntxt\n", ret); 2203 goto err_unlock; 2204 } 2205 } 2206 2207 if (ntxq > 2) { 2208 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2209 SGE_CNTXT_CTRL, id, 2210 q->txq[TXQ_CTRL].phys_addr, 2211 q->txq[TXQ_CTRL].size, 2212 q->txq[TXQ_CTRL].token, 1, 0); 2213 if (ret) { 2214 printf("error %d from t3_sge_init_ecntxt\n", ret); 2215 goto err_unlock; 2216 } 2217 } 2218 2219 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2220 device_get_unit(sc->dev), irq_vec_idx); 2221 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2222 2223 mtx_unlock(&sc->sge.reg_lock); 2224 t3_update_qset_coalesce(q, p); 2225 q->port = pi; 2226 2227 refill_fl(sc, &q->fl[0], q->fl[0].size); 2228 refill_fl(sc, &q->fl[1], q->fl[1].size); 2229 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2230 2231 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2232 V_NEWTIMER(q->rspq.holdoff_tmr)); 2233 2234 return (0); 2235 2236 err_unlock: 2237 mtx_unlock(&sc->sge.reg_lock); 2238 err: 2239 t3_free_qset(sc, q); 2240 2241 return (ret); 2242 } 2243 2244 void 2245 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2246 { 2247 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2248 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2249 struct ifnet *ifp = pi->ifp; 2250 2251 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2252 2253 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2254 cpl->csum_valid && cpl->csum == 0xffff) { 2255 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2256 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2257 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2258 m->m_pkthdr.csum_data = 0xffff; 2259 } 2260 /* 2261 * XXX need to add VLAN support for 6.x 2262 */ 2263 #ifdef VLAN_SUPPORTED 2264 if (__predict_false(cpl->vlan_valid)) { 2265 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2266 m->m_flags |= M_VLANTAG; 2267 } 2268 #endif 2269 2270 m->m_pkthdr.rcvif = ifp; 2271 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2272 m_explode(m); 2273 /* 2274 * adjust after conversion to mbuf chain 2275 */ 2276 m_adj(m, sizeof(*cpl) + ethpad); 2277 2278 (*ifp->if_input)(ifp, m); 2279 } 2280 2281 /** 2282 * get_packet - return the next ingress packet buffer from a free list 2283 * @adap: the adapter that received the packet 2284 * @drop_thres: # of remaining buffers before we start dropping packets 2285 * @qs: the qset that the SGE free list holding the packet belongs to 2286 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2287 * @r: response descriptor 2288 * 2289 * Get the next packet from a free list and complete setup of the 2290 * sk_buff. If the packet is small we make a copy and recycle the 2291 * original buffer, otherwise we use the original buffer itself. If a 2292 * positive drop threshold is supplied packets are dropped and their 2293 * buffers recycled if (a) the number of remaining buffers is under the 2294 * threshold and the packet is too big to copy, or (b) the packet should 2295 * be copied but there is no memory for the copy. 2296 */ 2297 #ifdef DISABLE_MBUF_IOVEC 2298 2299 static int 2300 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2301 struct t3_mbuf_hdr *mh, struct rsp_desc *r, struct mbuf *m) 2302 { 2303 2304 unsigned int len_cq = ntohl(r->len_cq); 2305 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2306 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2307 uint32_t len = G_RSPD_LEN(len_cq); 2308 uint32_t flags = ntohl(r->flags); 2309 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2310 int ret = 0; 2311 2312 prefetch(sd->cl); 2313 2314 fl->credits--; 2315 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2316 bus_dmamap_unload(fl->entry_tag, sd->map); 2317 2318 m_cljset(m, sd->cl, fl->type); 2319 m->m_len = len; 2320 2321 switch(sopeop) { 2322 case RSPQ_SOP_EOP: 2323 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2324 mh->mh_head = mh->mh_tail = m; 2325 m->m_pkthdr.len = len; 2326 m->m_flags |= M_PKTHDR; 2327 ret = 1; 2328 break; 2329 case RSPQ_NSOP_NEOP: 2330 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2331 m->m_flags &= ~M_PKTHDR; 2332 if (mh->mh_tail == NULL) { 2333 if (cxgb_debug) 2334 printf("discarding intermediate descriptor entry\n"); 2335 m_freem(m); 2336 break; 2337 } 2338 mh->mh_tail->m_next = m; 2339 mh->mh_tail = m; 2340 mh->mh_head->m_pkthdr.len += len; 2341 ret = 0; 2342 break; 2343 case RSPQ_SOP: 2344 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2345 m->m_pkthdr.len = len; 2346 mh->mh_head = mh->mh_tail = m; 2347 m->m_flags |= M_PKTHDR; 2348 ret = 0; 2349 break; 2350 case RSPQ_EOP: 2351 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2352 m->m_flags &= ~M_PKTHDR; 2353 mh->mh_head->m_pkthdr.len += len; 2354 mh->mh_tail->m_next = m; 2355 mh->mh_tail = m; 2356 ret = 1; 2357 break; 2358 } 2359 if (++fl->cidx == fl->size) 2360 fl->cidx = 0; 2361 2362 return (ret); 2363 } 2364 2365 #else 2366 static int 2367 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2368 struct mbuf *m, struct rsp_desc *r) 2369 { 2370 2371 unsigned int len_cq = ntohl(r->len_cq); 2372 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2373 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; 2374 uint32_t len = G_RSPD_LEN(len_cq); 2375 uint32_t flags = ntohl(r->flags); 2376 uint8_t sopeop = G_RSPD_SOP_EOP(flags); 2377 void *cl; 2378 int ret = 0; 2379 2380 prefetch(sd->cl); 2381 2382 fl->credits--; 2383 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2384 2385 if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { 2386 cl = mtod(m, void *); 2387 memcpy(cl, sd->cl, len); 2388 recycle_rx_buf(adap, fl, fl->cidx); 2389 } else { 2390 cl = sd->cl; 2391 bus_dmamap_unload(fl->entry_tag, sd->map); 2392 } 2393 switch(sopeop) { 2394 case RSPQ_SOP_EOP: 2395 DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m)); 2396 if (cl == sd->cl) 2397 m_cljset(m, cl, fl->type); 2398 m->m_len = m->m_pkthdr.len = len; 2399 ret = 1; 2400 goto done; 2401 break; 2402 case RSPQ_NSOP_NEOP: 2403 DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m)); 2404 ret = 0; 2405 break; 2406 case RSPQ_SOP: 2407 DBG(DBG_RX, ("get_packet: SOP m %p\n", m)); 2408 m_iovinit(m); 2409 ret = 0; 2410 break; 2411 case RSPQ_EOP: 2412 DBG(DBG_RX, ("get_packet: EOP m %p\n", m)); 2413 ret = 1; 2414 break; 2415 } 2416 m_iovappend(m, cl, fl->buf_size, len, 0); 2417 2418 done: 2419 if (++fl->cidx == fl->size) 2420 fl->cidx = 0; 2421 2422 return (ret); 2423 } 2424 #endif 2425 /** 2426 * handle_rsp_cntrl_info - handles control information in a response 2427 * @qs: the queue set corresponding to the response 2428 * @flags: the response control flags 2429 * 2430 * Handles the control information of an SGE response, such as GTS 2431 * indications and completion credits for the queue set's Tx queues. 2432 * HW coalesces credits, we don't do any extra SW coalescing. 2433 */ 2434 static __inline void 2435 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2436 { 2437 unsigned int credits; 2438 2439 #if USE_GTS 2440 if (flags & F_RSPD_TXQ0_GTS) 2441 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2442 #endif 2443 credits = G_RSPD_TXQ0_CR(flags); 2444 if (credits) { 2445 qs->txq[TXQ_ETH].processed += credits; 2446 if (desc_reclaimable(&qs->txq[TXQ_ETH]) > TX_START_MAX_DESC) 2447 taskqueue_enqueue(qs->port->adapter->tq, 2448 &qs->port->timer_reclaim_task); 2449 } 2450 2451 credits = G_RSPD_TXQ2_CR(flags); 2452 if (credits) 2453 qs->txq[TXQ_CTRL].processed += credits; 2454 2455 # if USE_GTS 2456 if (flags & F_RSPD_TXQ1_GTS) 2457 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2458 # endif 2459 credits = G_RSPD_TXQ1_CR(flags); 2460 if (credits) 2461 qs->txq[TXQ_OFLD].processed += credits; 2462 } 2463 2464 static void 2465 check_ring_db(adapter_t *adap, struct sge_qset *qs, 2466 unsigned int sleeping) 2467 { 2468 ; 2469 } 2470 2471 /** 2472 * process_responses - process responses from an SGE response queue 2473 * @adap: the adapter 2474 * @qs: the queue set to which the response queue belongs 2475 * @budget: how many responses can be processed in this round 2476 * 2477 * Process responses from an SGE response queue up to the supplied budget. 2478 * Responses include received packets as well as credits and other events 2479 * for the queues that belong to the response queue's queue set. 2480 * A negative budget is effectively unlimited. 2481 * 2482 * Additionally choose the interrupt holdoff time for the next interrupt 2483 * on this queue. If the system is under memory shortage use a fairly 2484 * long delay to help recovery. 2485 */ 2486 static int 2487 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2488 { 2489 struct sge_rspq *rspq = &qs->rspq; 2490 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2491 int budget_left = budget; 2492 unsigned int sleeping = 0; 2493 int lro = qs->lro.enabled; 2494 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2495 int ngathered = 0; 2496 #ifdef DEBUG 2497 static int last_holdoff = 0; 2498 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2499 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2500 last_holdoff = rspq->holdoff_tmr; 2501 } 2502 #endif 2503 rspq->next_holdoff = rspq->holdoff_tmr; 2504 2505 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2506 int eth, eop = 0, ethpad = 0; 2507 uint32_t flags = ntohl(r->flags); 2508 uint32_t rss_csum = *(const uint32_t *)r; 2509 uint32_t rss_hash = r->rss_hdr.rss_hash_val; 2510 2511 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2512 2513 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2514 /* XXX */ 2515 printf("async notification\n"); 2516 2517 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2518 #ifdef DISABLE_MBUF_IOVEC 2519 2520 if (cxgb_debug) 2521 printf("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", r->rss_hdr.opcode, rspq->cidx); 2522 2523 if(get_imm_packet(adap, r, &rspq->rspq_mh) == 0) { 2524 rspq->next_holdoff = NOMEM_INTR_DELAY; 2525 budget_left--; 2526 break; 2527 } else { 2528 eop = 1; 2529 } 2530 #else 2531 struct mbuf *m = NULL; 2532 2533 if (rspq->rspq_mbuf == NULL) 2534 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA); 2535 else 2536 m = m_gethdr(M_DONTWAIT, MT_DATA); 2537 2538 /* 2539 * XXX revisit me 2540 */ 2541 if (rspq->rspq_mbuf == NULL && m == NULL) { 2542 rspq->next_holdoff = NOMEM_INTR_DELAY; 2543 budget_left--; 2544 break; 2545 } 2546 if (get_imm_packet(adap, r, rspq->rspq_mbuf, m, flags)) 2547 goto skip; 2548 eop = 1; 2549 #endif 2550 rspq->imm_data++; 2551 } else if (r->len_cq) { 2552 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 2553 2554 #ifdef DISABLE_MBUF_IOVEC 2555 struct mbuf *m; 2556 m = m_gethdr(M_NOWAIT, MT_DATA); 2557 2558 if (m == NULL) { 2559 log(LOG_WARNING, "failed to get mbuf for packet\n"); 2560 break; 2561 } 2562 2563 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r, m); 2564 #else 2565 if (rspq->rspq_mbuf == NULL) 2566 rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA); 2567 if (rspq->rspq_mbuf == NULL) { 2568 log(LOG_WARNING, "failed to get mbuf for packet\n"); 2569 break; 2570 } 2571 eop = get_packet(adap, drop_thresh, qs, rspq->rspq_mbuf, r); 2572 #endif 2573 ethpad = 2; 2574 } else { 2575 DPRINTF("pure response\n"); 2576 rspq->pure_rsps++; 2577 } 2578 2579 if (flags & RSPD_CTRL_MASK) { 2580 sleeping |= flags & RSPD_GTS_MASK; 2581 handle_rsp_cntrl_info(qs, flags); 2582 } 2583 #ifndef DISABLE_MBUF_IOVEC 2584 skip: 2585 #endif 2586 r++; 2587 if (__predict_false(++rspq->cidx == rspq->size)) { 2588 rspq->cidx = 0; 2589 rspq->gen ^= 1; 2590 r = rspq->desc; 2591 } 2592 2593 prefetch(r); 2594 if (++rspq->credits >= (rspq->size / 4)) { 2595 refill_rspq(adap, rspq, rspq->credits); 2596 rspq->credits = 0; 2597 } 2598 2599 if (eop) { 2600 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *)); 2601 prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES); 2602 2603 if (eth) { 2604 t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad, 2605 rss_hash, rss_csum, lro); 2606 2607 rspq->rspq_mh.mh_head = NULL; 2608 } else { 2609 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum; 2610 /* 2611 * XXX size mismatch 2612 */ 2613 m_set_priority(rspq->rspq_mh.mh_head, rss_hash); 2614 2615 ngathered = rx_offload(&adap->tdev, rspq, 2616 rspq->rspq_mh.mh_head, offload_mbufs, ngathered); 2617 } 2618 __refill_fl(adap, &qs->fl[0]); 2619 __refill_fl(adap, &qs->fl[1]); 2620 2621 } 2622 --budget_left; 2623 } 2624 2625 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 2626 t3_lro_flush(adap, qs, &qs->lro); 2627 2628 if (sleeping) 2629 check_ring_db(adap, qs, sleeping); 2630 2631 smp_mb(); /* commit Tx queue processed updates */ 2632 if (__predict_false(qs->txq_stopped != 0)) 2633 restart_tx(qs); 2634 2635 budget -= budget_left; 2636 return (budget); 2637 } 2638 2639 /* 2640 * A helper function that processes responses and issues GTS. 2641 */ 2642 static __inline int 2643 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 2644 { 2645 int work; 2646 static int last_holdoff = 0; 2647 2648 work = process_responses(adap, rspq_to_qset(rq), -1); 2649 2650 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 2651 printf("next_holdoff=%d\n", rq->next_holdoff); 2652 last_holdoff = rq->next_holdoff; 2653 } 2654 if (work) 2655 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 2656 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 2657 return work; 2658 } 2659 2660 2661 /* 2662 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 2663 * Handles data events from SGE response queues as well as error and other 2664 * async events as they all use the same interrupt pin. We use one SGE 2665 * response queue per port in this mode and protect all response queues with 2666 * queue 0's lock. 2667 */ 2668 void 2669 t3b_intr(void *data) 2670 { 2671 uint32_t i, map; 2672 adapter_t *adap = data; 2673 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2674 2675 t3_write_reg(adap, A_PL_CLI, 0); 2676 map = t3_read_reg(adap, A_SG_DATA_INTR); 2677 2678 if (!map) 2679 return; 2680 2681 if (__predict_false(map & F_ERRINTR)) 2682 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2683 2684 mtx_lock(&q0->lock); 2685 for_each_port(adap, i) 2686 if (map & (1 << i)) 2687 process_responses_gts(adap, &adap->sge.qs[i].rspq); 2688 mtx_unlock(&q0->lock); 2689 } 2690 2691 /* 2692 * The MSI interrupt handler. This needs to handle data events from SGE 2693 * response queues as well as error and other async events as they all use 2694 * the same MSI vector. We use one SGE response queue per port in this mode 2695 * and protect all response queues with queue 0's lock. 2696 */ 2697 void 2698 t3_intr_msi(void *data) 2699 { 2700 adapter_t *adap = data; 2701 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 2702 int i, new_packets = 0; 2703 2704 mtx_lock(&q0->lock); 2705 2706 for_each_port(adap, i) 2707 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 2708 new_packets = 1; 2709 mtx_unlock(&q0->lock); 2710 if (new_packets == 0) 2711 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 2712 } 2713 2714 void 2715 t3_intr_msix(void *data) 2716 { 2717 struct sge_qset *qs = data; 2718 adapter_t *adap = qs->port->adapter; 2719 struct sge_rspq *rspq = &qs->rspq; 2720 2721 mtx_lock(&rspq->lock); 2722 if (process_responses_gts(adap, rspq) == 0) 2723 rspq->unhandled_irqs++; 2724 mtx_unlock(&rspq->lock); 2725 } 2726 2727 /* 2728 * broken by recent mbuf changes 2729 */ 2730 static int 2731 t3_lro_enable(SYSCTL_HANDLER_ARGS) 2732 { 2733 adapter_t *sc; 2734 int i, j, enabled, err, nqsets = 0; 2735 2736 #ifndef LRO_WORKING 2737 return (0); 2738 #endif 2739 2740 sc = arg1; 2741 enabled = sc->sge.qs[0].lro.enabled; 2742 err = sysctl_handle_int(oidp, &enabled, arg2, req); 2743 2744 if (err != 0) 2745 return (err); 2746 if (enabled == sc->sge.qs[0].lro.enabled) 2747 return (0); 2748 2749 for (i = 0; i < sc->params.nports; i++) 2750 for (j = 0; j < sc->port[i].nqsets; j++) 2751 nqsets++; 2752 2753 for (i = 0; i < nqsets; i++) 2754 sc->sge.qs[i].lro.enabled = enabled; 2755 2756 return (0); 2757 } 2758 2759 static int 2760 t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS) 2761 { 2762 adapter_t *sc = arg1; 2763 struct qset_params *qsp = &sc->params.sge.qset[0]; 2764 int coalesce_nsecs; 2765 struct sge_qset *qs; 2766 int i, j, err, nqsets = 0; 2767 struct mtx *lock; 2768 2769 coalesce_nsecs = qsp->coalesce_nsecs; 2770 err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req); 2771 2772 if (err != 0) { 2773 return (err); 2774 } 2775 if (coalesce_nsecs == qsp->coalesce_nsecs) 2776 return (0); 2777 2778 for (i = 0; i < sc->params.nports; i++) 2779 for (j = 0; j < sc->port[i].nqsets; j++) 2780 nqsets++; 2781 2782 coalesce_nsecs = max(100, coalesce_nsecs); 2783 2784 for (i = 0; i < nqsets; i++) { 2785 qs = &sc->sge.qs[i]; 2786 qsp = &sc->params.sge.qset[i]; 2787 qsp->coalesce_nsecs = coalesce_nsecs; 2788 2789 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 2790 &sc->sge.qs[0].rspq.lock; 2791 2792 mtx_lock(lock); 2793 t3_update_qset_coalesce(qs, qsp); 2794 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 2795 V_NEWTIMER(qs->rspq.holdoff_tmr)); 2796 mtx_unlock(lock); 2797 } 2798 2799 return (0); 2800 } 2801 2802 2803 void 2804 t3_add_sysctls(adapter_t *sc) 2805 { 2806 struct sysctl_ctx_list *ctx; 2807 struct sysctl_oid_list *children; 2808 2809 ctx = device_get_sysctl_ctx(sc->dev); 2810 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 2811 2812 /* random information */ 2813 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 2814 "firmware_version", 2815 CTLFLAG_RD, &sc->fw_version, 2816 0, "firmware version"); 2817 2818 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2819 "enable_lro", 2820 CTLTYPE_INT|CTLFLAG_RW, sc, 2821 0, t3_lro_enable, 2822 "I", "enable large receive offload"); 2823 2824 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 2825 "intr_coal", 2826 CTLTYPE_INT|CTLFLAG_RW, sc, 2827 0, t3_set_coalesce_nsecs, 2828 "I", "interrupt coalescing timer (ns)"); 2829 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2830 "enable_debug", 2831 CTLFLAG_RW, &cxgb_debug, 2832 0, "enable verbose debugging output"); 2833 2834 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2835 "collapse_free", 2836 CTLFLAG_RD, &collapse_free, 2837 0, "frees during collapse"); 2838 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2839 "mb_free_vec_free", 2840 CTLFLAG_RD, &mb_free_vec_free, 2841 0, "frees during mb_free_vec"); 2842 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2843 "collapse_mbufs", 2844 CTLFLAG_RW, &collapse_mbufs, 2845 0, "collapse mbuf chains into iovecs"); 2846 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2847 "txq_overrun", 2848 CTLFLAG_RD, &txq_fills, 2849 0, "#times txq overrun"); 2850 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 2851 "bogus_imm", 2852 CTLFLAG_RD, &bogus_imm, 2853 0, "#times a bogus immediate response was seen"); 2854 } 2855 2856 /** 2857 * t3_get_desc - dump an SGE descriptor for debugging purposes 2858 * @qs: the queue set 2859 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 2860 * @idx: the descriptor index in the queue 2861 * @data: where to dump the descriptor contents 2862 * 2863 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 2864 * size of the descriptor. 2865 */ 2866 int 2867 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 2868 unsigned char *data) 2869 { 2870 if (qnum >= 6) 2871 return (EINVAL); 2872 2873 if (qnum < 3) { 2874 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 2875 return -EINVAL; 2876 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 2877 return sizeof(struct tx_desc); 2878 } 2879 2880 if (qnum == 3) { 2881 if (!qs->rspq.desc || idx >= qs->rspq.size) 2882 return (EINVAL); 2883 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 2884 return sizeof(struct rsp_desc); 2885 } 2886 2887 qnum -= 4; 2888 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 2889 return (EINVAL); 2890 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 2891 return sizeof(struct rx_desc); 2892 } 2893