1 /************************************************************************** 2 3 Copyright (c) 2007-2009, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/module.h> 37 #include <sys/bus.h> 38 #include <sys/conf.h> 39 #include <machine/bus.h> 40 #include <machine/resource.h> 41 #include <sys/bus_dma.h> 42 #include <sys/rman.h> 43 #include <sys/queue.h> 44 #include <sys/sysctl.h> 45 #include <sys/taskqueue.h> 46 47 #include <sys/proc.h> 48 #include <sys/sbuf.h> 49 #include <sys/sched.h> 50 #include <sys/smp.h> 51 #include <sys/systm.h> 52 #include <sys/syslog.h> 53 54 #include <net/bpf.h> 55 56 #include <netinet/in_systm.h> 57 #include <netinet/in.h> 58 #include <netinet/ip.h> 59 #include <netinet/tcp.h> 60 61 #include <dev/pci/pcireg.h> 62 #include <dev/pci/pcivar.h> 63 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 67 #include <cxgb_include.h> 68 #include <sys/mvec.h> 69 70 int txq_fills = 0; 71 int multiq_tx_enable = 1; 72 73 extern struct sysctl_oid_list sysctl__hw_cxgb_children; 74 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; 75 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size); 76 SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, 77 "size of per-queue mbuf ring"); 78 79 static int cxgb_tx_coalesce_force = 0; 80 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force); 81 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW, 82 &cxgb_tx_coalesce_force, 0, 83 "coalesce small packets into a single work request regardless of ring state"); 84 85 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1 86 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3)) 87 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2 88 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5 89 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5 90 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2 91 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6 92 93 94 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT; 95 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start", 96 &cxgb_tx_coalesce_enable_start); 97 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW, 98 &cxgb_tx_coalesce_enable_start, 0, 99 "coalesce enable threshold"); 100 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT; 101 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop); 102 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW, 103 &cxgb_tx_coalesce_enable_stop, 0, 104 "coalesce disable threshold"); 105 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 106 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold); 107 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW, 108 &cxgb_tx_reclaim_threshold, 0, 109 "tx cleaning minimum threshold"); 110 111 /* 112 * XXX don't re-enable this until TOE stops assuming 113 * we have an m_ext 114 */ 115 static int recycle_enable = 0; 116 int cxgb_ext_freed = 0; 117 int cxgb_ext_inited = 0; 118 int fl_q_size = 0; 119 int jumbo_q_size = 0; 120 121 extern int cxgb_use_16k_clusters; 122 extern int nmbjumbo4; 123 extern int nmbjumbo9; 124 extern int nmbjumbo16; 125 126 #define USE_GTS 0 127 128 #define SGE_RX_SM_BUF_SIZE 1536 129 #define SGE_RX_DROP_THRES 16 130 #define SGE_RX_COPY_THRES 128 131 132 /* 133 * Period of the Tx buffer reclaim timer. This timer does not need to run 134 * frequently as Tx buffers are usually reclaimed by new Tx packets. 135 */ 136 #define TX_RECLAIM_PERIOD (hz >> 1) 137 138 /* 139 * Values for sge_txq.flags 140 */ 141 enum { 142 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 143 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 144 }; 145 146 struct tx_desc { 147 uint64_t flit[TX_DESC_FLITS]; 148 } __packed; 149 150 struct rx_desc { 151 uint32_t addr_lo; 152 uint32_t len_gen; 153 uint32_t gen2; 154 uint32_t addr_hi; 155 } __packed; 156 157 struct rsp_desc { /* response queue descriptor */ 158 struct rss_header rss_hdr; 159 uint32_t flags; 160 uint32_t len_cq; 161 uint8_t imm_data[47]; 162 uint8_t intr_gen; 163 } __packed; 164 165 #define RX_SW_DESC_MAP_CREATED (1 << 0) 166 #define TX_SW_DESC_MAP_CREATED (1 << 1) 167 #define RX_SW_DESC_INUSE (1 << 3) 168 #define TX_SW_DESC_MAPPED (1 << 4) 169 170 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 171 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 172 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 173 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 174 175 struct tx_sw_desc { /* SW state per Tx descriptor */ 176 struct mbuf *m; 177 bus_dmamap_t map; 178 int flags; 179 }; 180 181 struct rx_sw_desc { /* SW state per Rx descriptor */ 182 caddr_t rxsd_cl; 183 struct mbuf *m; 184 bus_dmamap_t map; 185 int flags; 186 }; 187 188 struct txq_state { 189 unsigned int compl; 190 unsigned int gen; 191 unsigned int pidx; 192 }; 193 194 struct refill_fl_cb_arg { 195 int error; 196 bus_dma_segment_t seg; 197 int nseg; 198 }; 199 200 201 /* 202 * Maps a number of flits to the number of Tx descriptors that can hold them. 203 * The formula is 204 * 205 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 206 * 207 * HW allows up to 4 descriptors to be combined into a WR. 208 */ 209 static uint8_t flit_desc_map[] = { 210 0, 211 #if SGE_NUM_GENBITS == 1 212 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 213 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 214 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 215 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 216 #elif SGE_NUM_GENBITS == 2 217 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 218 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 219 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 220 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 221 #else 222 # error "SGE_NUM_GENBITS must be 1 or 2" 223 #endif 224 }; 225 226 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED) 227 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock) 228 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock) 229 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock) 230 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 231 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 232 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \ 233 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg) 234 #define TXQ_RING_DEQUEUE(qs) \ 235 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 236 237 int cxgb_debug = 0; 238 239 static void sge_timer_cb(void *arg); 240 static void sge_timer_reclaim(void *arg, int ncount); 241 static void sge_txq_reclaim_handler(void *arg, int ncount); 242 static void cxgb_start_locked(struct sge_qset *qs); 243 244 /* 245 * XXX need to cope with bursty scheduling by looking at a wider 246 * window than we are now for determining the need for coalescing 247 * 248 */ 249 static __inline uint64_t 250 check_pkt_coalesce(struct sge_qset *qs) 251 { 252 struct adapter *sc; 253 struct sge_txq *txq; 254 uint8_t *fill; 255 256 if (__predict_false(cxgb_tx_coalesce_force)) 257 return (1); 258 txq = &qs->txq[TXQ_ETH]; 259 sc = qs->port->adapter; 260 fill = &sc->tunq_fill[qs->idx]; 261 262 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX) 263 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX; 264 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN) 265 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN; 266 /* 267 * if the hardware transmit queue is more than 1/8 full 268 * we mark it as coalescing - we drop back from coalescing 269 * when we go below 1/32 full and there are no packets enqueued, 270 * this provides us with some degree of hysteresis 271 */ 272 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 273 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0)) 274 *fill = 0; 275 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) 276 *fill = 1; 277 278 return (sc->tunq_coalesce); 279 } 280 281 #ifdef __LP64__ 282 static void 283 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 284 { 285 uint64_t wr_hilo; 286 #if _BYTE_ORDER == _LITTLE_ENDIAN 287 wr_hilo = wr_hi; 288 wr_hilo |= (((uint64_t)wr_lo)<<32); 289 #else 290 wr_hilo = wr_lo; 291 wr_hilo |= (((uint64_t)wr_hi)<<32); 292 #endif 293 wrp->wrh_hilo = wr_hilo; 294 } 295 #else 296 static void 297 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 298 { 299 300 wrp->wrh_hi = wr_hi; 301 wmb(); 302 wrp->wrh_lo = wr_lo; 303 } 304 #endif 305 306 struct coalesce_info { 307 int count; 308 int nbytes; 309 }; 310 311 static int 312 coalesce_check(struct mbuf *m, void *arg) 313 { 314 struct coalesce_info *ci = arg; 315 int *count = &ci->count; 316 int *nbytes = &ci->nbytes; 317 318 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) && 319 (*count < 7) && (m->m_next == NULL))) { 320 *count += 1; 321 *nbytes += m->m_len; 322 return (1); 323 } 324 return (0); 325 } 326 327 static struct mbuf * 328 cxgb_dequeue(struct sge_qset *qs) 329 { 330 struct mbuf *m, *m_head, *m_tail; 331 struct coalesce_info ci; 332 333 334 if (check_pkt_coalesce(qs) == 0) 335 return TXQ_RING_DEQUEUE(qs); 336 337 m_head = m_tail = NULL; 338 ci.count = ci.nbytes = 0; 339 do { 340 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci); 341 if (m_head == NULL) { 342 m_tail = m_head = m; 343 } else if (m != NULL) { 344 m_tail->m_nextpkt = m; 345 m_tail = m; 346 } 347 } while (m != NULL); 348 if (ci.count > 7) 349 panic("trying to coalesce %d packets in to one WR", ci.count); 350 return (m_head); 351 } 352 353 /** 354 * reclaim_completed_tx - reclaims completed Tx descriptors 355 * @adapter: the adapter 356 * @q: the Tx queue to reclaim completed descriptors from 357 * 358 * Reclaims Tx descriptors that the SGE has indicated it has processed, 359 * and frees the associated buffers if possible. Called with the Tx 360 * queue's lock held. 361 */ 362 static __inline int 363 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) 364 { 365 struct sge_txq *q = &qs->txq[queue]; 366 int reclaim = desc_reclaimable(q); 367 368 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) || 369 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN)) 370 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 371 372 if (reclaim < reclaim_min) 373 return (0); 374 375 mtx_assert(&qs->lock, MA_OWNED); 376 if (reclaim > 0) { 377 t3_free_tx_desc(qs, reclaim, queue); 378 q->cleaned += reclaim; 379 q->in_use -= reclaim; 380 } 381 if (isset(&qs->txq_stopped, TXQ_ETH)) 382 clrbit(&qs->txq_stopped, TXQ_ETH); 383 384 return (reclaim); 385 } 386 387 /** 388 * should_restart_tx - are there enough resources to restart a Tx queue? 389 * @q: the Tx queue 390 * 391 * Checks if there are enough descriptors to restart a suspended Tx queue. 392 */ 393 static __inline int 394 should_restart_tx(const struct sge_txq *q) 395 { 396 unsigned int r = q->processed - q->cleaned; 397 398 return q->in_use - r < (q->size >> 1); 399 } 400 401 /** 402 * t3_sge_init - initialize SGE 403 * @adap: the adapter 404 * @p: the SGE parameters 405 * 406 * Performs SGE initialization needed every time after a chip reset. 407 * We do not initialize any of the queue sets here, instead the driver 408 * top-level must request those individually. We also do not enable DMA 409 * here, that should be done after the queues have been set up. 410 */ 411 void 412 t3_sge_init(adapter_t *adap, struct sge_params *p) 413 { 414 u_int ctrl, ups; 415 416 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 417 418 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 419 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | 420 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 421 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 422 #if SGE_NUM_GENBITS == 1 423 ctrl |= F_EGRGENCTRL; 424 #endif 425 if (adap->params.rev > 0) { 426 if (!(adap->flags & (USING_MSIX | USING_MSI))) 427 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 428 } 429 t3_write_reg(adap, A_SG_CONTROL, ctrl); 430 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 431 V_LORCQDRBTHRSH(512)); 432 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 433 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 434 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 435 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 436 adap->params.rev < T3_REV_C ? 1000 : 500); 437 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 438 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 439 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 440 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 441 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 442 } 443 444 445 /** 446 * sgl_len - calculates the size of an SGL of the given capacity 447 * @n: the number of SGL entries 448 * 449 * Calculates the number of flits needed for a scatter/gather list that 450 * can hold the given number of entries. 451 */ 452 static __inline unsigned int 453 sgl_len(unsigned int n) 454 { 455 return ((3 * n) / 2 + (n & 1)); 456 } 457 458 /** 459 * get_imm_packet - return the next ingress packet buffer from a response 460 * @resp: the response descriptor containing the packet data 461 * 462 * Return a packet containing the immediate data of the given response. 463 */ 464 static int 465 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) 466 { 467 468 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE; 469 m->m_ext.ext_buf = NULL; 470 m->m_ext.ext_type = 0; 471 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 472 return (0); 473 } 474 475 static __inline u_int 476 flits_to_desc(u_int n) 477 { 478 return (flit_desc_map[n]); 479 } 480 481 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ 482 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ 483 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ 484 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ 485 F_HIRCQPARITYERROR) 486 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) 487 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ 488 F_RSPQDISABLED) 489 490 /** 491 * t3_sge_err_intr_handler - SGE async event interrupt handler 492 * @adapter: the adapter 493 * 494 * Interrupt handler for SGE asynchronous (non-data) events. 495 */ 496 void 497 t3_sge_err_intr_handler(adapter_t *adapter) 498 { 499 unsigned int v, status; 500 501 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 502 if (status & SGE_PARERR) 503 CH_ALERT(adapter, "SGE parity error (0x%x)\n", 504 status & SGE_PARERR); 505 if (status & SGE_FRAMINGERR) 506 CH_ALERT(adapter, "SGE framing error (0x%x)\n", 507 status & SGE_FRAMINGERR); 508 if (status & F_RSPQCREDITOVERFOW) 509 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 510 511 if (status & F_RSPQDISABLED) { 512 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 513 514 CH_ALERT(adapter, 515 "packet delivered to disabled response queue (0x%x)\n", 516 (v >> S_RSPQ0DISABLED) & 0xff); 517 } 518 519 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 520 if (status & SGE_FATALERR) 521 t3_fatal_err(adapter); 522 } 523 524 void 525 t3_sge_prep(adapter_t *adap, struct sge_params *p) 526 { 527 int i, nqsets; 528 529 nqsets = min(SGE_QSETS, mp_ncpus*4); 530 531 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); 532 533 while (!powerof2(fl_q_size)) 534 fl_q_size--; 535 #if __FreeBSD_version >= 700111 536 if (cxgb_use_16k_clusters) 537 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); 538 else 539 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); 540 #else 541 jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE); 542 #endif 543 while (!powerof2(jumbo_q_size)) 544 jumbo_q_size--; 545 546 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2)) 547 device_printf(adap->dev, 548 "Insufficient clusters and/or jumbo buffers.\n"); 549 550 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 551 p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data); 552 553 for (i = 0; i < SGE_QSETS; ++i) { 554 struct qset_params *q = p->qset + i; 555 556 if (adap->params.nports > 2) { 557 q->coalesce_usecs = 50; 558 } else { 559 #ifdef INVARIANTS 560 q->coalesce_usecs = 10; 561 #else 562 q->coalesce_usecs = 5; 563 #endif 564 } 565 q->polling = 0; 566 q->rspq_size = RSPQ_Q_SIZE; 567 q->fl_size = fl_q_size; 568 q->jumbo_size = jumbo_q_size; 569 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 570 q->txq_size[TXQ_OFLD] = 1024; 571 q->txq_size[TXQ_CTRL] = 256; 572 q->cong_thres = 0; 573 } 574 } 575 576 int 577 t3_sge_alloc(adapter_t *sc) 578 { 579 580 /* The parent tag. */ 581 if (bus_dma_tag_create( NULL, /* parent */ 582 1, 0, /* algnmnt, boundary */ 583 BUS_SPACE_MAXADDR, /* lowaddr */ 584 BUS_SPACE_MAXADDR, /* highaddr */ 585 NULL, NULL, /* filter, filterarg */ 586 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 587 BUS_SPACE_UNRESTRICTED, /* nsegments */ 588 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 589 0, /* flags */ 590 NULL, NULL, /* lock, lockarg */ 591 &sc->parent_dmat)) { 592 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 593 return (ENOMEM); 594 } 595 596 /* 597 * DMA tag for normal sized RX frames 598 */ 599 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 600 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 601 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 602 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 603 return (ENOMEM); 604 } 605 606 /* 607 * DMA tag for jumbo sized RX frames. 608 */ 609 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, 610 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, 611 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 612 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 613 return (ENOMEM); 614 } 615 616 /* 617 * DMA tag for TX frames. 618 */ 619 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 620 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 621 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 622 NULL, NULL, &sc->tx_dmat)) { 623 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 624 return (ENOMEM); 625 } 626 627 return (0); 628 } 629 630 int 631 t3_sge_free(struct adapter * sc) 632 { 633 634 if (sc->tx_dmat != NULL) 635 bus_dma_tag_destroy(sc->tx_dmat); 636 637 if (sc->rx_jumbo_dmat != NULL) 638 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 639 640 if (sc->rx_dmat != NULL) 641 bus_dma_tag_destroy(sc->rx_dmat); 642 643 if (sc->parent_dmat != NULL) 644 bus_dma_tag_destroy(sc->parent_dmat); 645 646 return (0); 647 } 648 649 void 650 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 651 { 652 653 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); 654 qs->rspq.polling = 0 /* p->polling */; 655 } 656 657 #if !defined(__i386__) && !defined(__amd64__) 658 static void 659 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 660 { 661 struct refill_fl_cb_arg *cb_arg = arg; 662 663 cb_arg->error = error; 664 cb_arg->seg = segs[0]; 665 cb_arg->nseg = nseg; 666 667 } 668 #endif 669 /** 670 * refill_fl - refill an SGE free-buffer list 671 * @sc: the controller softc 672 * @q: the free-list to refill 673 * @n: the number of new buffers to allocate 674 * 675 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 676 * The caller must assure that @n does not exceed the queue's capacity. 677 */ 678 static void 679 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 680 { 681 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 682 struct rx_desc *d = &q->desc[q->pidx]; 683 struct refill_fl_cb_arg cb_arg; 684 struct mbuf *m; 685 caddr_t cl; 686 int err, count = 0; 687 688 cb_arg.error = 0; 689 while (n--) { 690 /* 691 * We only allocate a cluster, mbuf allocation happens after rx 692 */ 693 if (q->zone == zone_pack) { 694 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL) 695 break; 696 cl = m->m_ext.ext_buf; 697 } else { 698 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL) 699 break; 700 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { 701 uma_zfree(q->zone, cl); 702 break; 703 } 704 } 705 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 706 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 707 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 708 uma_zfree(q->zone, cl); 709 goto done; 710 } 711 sd->flags |= RX_SW_DESC_MAP_CREATED; 712 } 713 #if !defined(__i386__) && !defined(__amd64__) 714 err = bus_dmamap_load(q->entry_tag, sd->map, 715 cl, q->buf_size, refill_fl_cb, &cb_arg, 0); 716 717 if (err != 0 || cb_arg.error) { 718 if (q->zone == zone_pack) 719 uma_zfree(q->zone, cl); 720 m_free(m); 721 goto done; 722 } 723 #else 724 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl); 725 #endif 726 sd->flags |= RX_SW_DESC_INUSE; 727 sd->rxsd_cl = cl; 728 sd->m = m; 729 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 730 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 731 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 732 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 733 734 d++; 735 sd++; 736 737 if (++q->pidx == q->size) { 738 q->pidx = 0; 739 q->gen ^= 1; 740 sd = q->sdesc; 741 d = q->desc; 742 } 743 q->credits++; 744 count++; 745 } 746 747 done: 748 if (count) 749 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 750 } 751 752 753 /** 754 * free_rx_bufs - free the Rx buffers on an SGE free list 755 * @sc: the controle softc 756 * @q: the SGE free list to clean up 757 * 758 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 759 * this queue should be stopped before calling this function. 760 */ 761 static void 762 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 763 { 764 u_int cidx = q->cidx; 765 766 while (q->credits--) { 767 struct rx_sw_desc *d = &q->sdesc[cidx]; 768 769 if (d->flags & RX_SW_DESC_INUSE) { 770 bus_dmamap_unload(q->entry_tag, d->map); 771 bus_dmamap_destroy(q->entry_tag, d->map); 772 if (q->zone == zone_pack) { 773 m_init(d->m, zone_pack, MCLBYTES, 774 M_NOWAIT, MT_DATA, M_EXT); 775 uma_zfree(zone_pack, d->m); 776 } else { 777 m_init(d->m, zone_mbuf, MLEN, 778 M_NOWAIT, MT_DATA, 0); 779 uma_zfree(zone_mbuf, d->m); 780 uma_zfree(q->zone, d->rxsd_cl); 781 } 782 } 783 784 d->rxsd_cl = NULL; 785 d->m = NULL; 786 if (++cidx == q->size) 787 cidx = 0; 788 } 789 } 790 791 static __inline void 792 __refill_fl(adapter_t *adap, struct sge_fl *fl) 793 { 794 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 795 } 796 797 static __inline void 798 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) 799 { 800 if ((fl->size - fl->credits) < max) 801 refill_fl(adap, fl, min(max, fl->size - fl->credits)); 802 } 803 804 /** 805 * recycle_rx_buf - recycle a receive buffer 806 * @adapter: the adapter 807 * @q: the SGE free list 808 * @idx: index of buffer to recycle 809 * 810 * Recycles the specified buffer on the given free list by adding it at 811 * the next available slot on the list. 812 */ 813 static void 814 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 815 { 816 struct rx_desc *from = &q->desc[idx]; 817 struct rx_desc *to = &q->desc[q->pidx]; 818 819 q->sdesc[q->pidx] = q->sdesc[idx]; 820 to->addr_lo = from->addr_lo; // already big endian 821 to->addr_hi = from->addr_hi; // likewise 822 wmb(); /* necessary ? */ 823 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 824 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 825 q->credits++; 826 827 if (++q->pidx == q->size) { 828 q->pidx = 0; 829 q->gen ^= 1; 830 } 831 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 832 } 833 834 static void 835 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 836 { 837 uint32_t *addr; 838 839 addr = arg; 840 *addr = segs[0].ds_addr; 841 } 842 843 static int 844 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 845 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 846 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 847 { 848 size_t len = nelem * elem_size; 849 void *s = NULL; 850 void *p = NULL; 851 int err; 852 853 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 854 BUS_SPACE_MAXADDR_32BIT, 855 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 856 len, 0, NULL, NULL, tag)) != 0) { 857 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 858 return (ENOMEM); 859 } 860 861 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 862 map)) != 0) { 863 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 864 return (ENOMEM); 865 } 866 867 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 868 bzero(p, len); 869 *(void **)desc = p; 870 871 if (sw_size) { 872 len = nelem * sw_size; 873 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 874 *(void **)sdesc = s; 875 } 876 if (parent_entry_tag == NULL) 877 return (0); 878 879 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 880 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 881 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 882 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 883 NULL, NULL, entry_tag)) != 0) { 884 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 885 return (ENOMEM); 886 } 887 return (0); 888 } 889 890 static void 891 sge_slow_intr_handler(void *arg, int ncount) 892 { 893 adapter_t *sc = arg; 894 895 t3_slow_intr_handler(sc); 896 } 897 898 /** 899 * sge_timer_cb - perform periodic maintenance of an SGE qset 900 * @data: the SGE queue set to maintain 901 * 902 * Runs periodically from a timer to perform maintenance of an SGE queue 903 * set. It performs two tasks: 904 * 905 * a) Cleans up any completed Tx descriptors that may still be pending. 906 * Normal descriptor cleanup happens when new packets are added to a Tx 907 * queue so this timer is relatively infrequent and does any cleanup only 908 * if the Tx queue has not seen any new packets in a while. We make a 909 * best effort attempt to reclaim descriptors, in that we don't wait 910 * around if we cannot get a queue's lock (which most likely is because 911 * someone else is queueing new packets and so will also handle the clean 912 * up). Since control queues use immediate data exclusively we don't 913 * bother cleaning them up here. 914 * 915 * b) Replenishes Rx queues that have run out due to memory shortage. 916 * Normally new Rx buffers are added when existing ones are consumed but 917 * when out of memory a queue can become empty. We try to add only a few 918 * buffers here, the queue will be replenished fully as these new buffers 919 * are used up if memory shortage has subsided. 920 * 921 * c) Return coalesced response queue credits in case a response queue is 922 * starved. 923 * 924 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 925 * fifo overflows and the FW doesn't implement any recovery scheme yet. 926 */ 927 static void 928 sge_timer_cb(void *arg) 929 { 930 adapter_t *sc = arg; 931 if ((sc->flags & USING_MSIX) == 0) { 932 933 struct port_info *pi; 934 struct sge_qset *qs; 935 struct sge_txq *txq; 936 int i, j; 937 int reclaim_ofl, refill_rx; 938 939 if (sc->open_device_map == 0) 940 return; 941 942 for (i = 0; i < sc->params.nports; i++) { 943 pi = &sc->port[i]; 944 for (j = 0; j < pi->nqsets; j++) { 945 qs = &sc->sge.qs[pi->first_qset + j]; 946 txq = &qs->txq[0]; 947 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 948 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 949 (qs->fl[1].credits < qs->fl[1].size)); 950 if (reclaim_ofl || refill_rx) { 951 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); 952 break; 953 } 954 } 955 } 956 } 957 958 if (sc->params.nports > 2) { 959 int i; 960 961 for_each_port(sc, i) { 962 struct port_info *pi = &sc->port[i]; 963 964 t3_write_reg(sc, A_SG_KDOORBELL, 965 F_SELEGRCNTX | 966 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 967 } 968 } 969 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) && 970 sc->open_device_map != 0) 971 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 972 } 973 974 /* 975 * This is meant to be a catch-all function to keep sge state private 976 * to sge.c 977 * 978 */ 979 int 980 t3_sge_init_adapter(adapter_t *sc) 981 { 982 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 983 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 984 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 985 return (0); 986 } 987 988 int 989 t3_sge_reset_adapter(adapter_t *sc) 990 { 991 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 992 return (0); 993 } 994 995 int 996 t3_sge_init_port(struct port_info *pi) 997 { 998 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); 999 return (0); 1000 } 1001 1002 /** 1003 * refill_rspq - replenish an SGE response queue 1004 * @adapter: the adapter 1005 * @q: the response queue to replenish 1006 * @credits: how many new responses to make available 1007 * 1008 * Replenishes a response queue by making the supplied number of responses 1009 * available to HW. 1010 */ 1011 static __inline void 1012 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 1013 { 1014 1015 /* mbufs are allocated on demand when a rspq entry is processed. */ 1016 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 1017 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 1018 } 1019 1020 static void 1021 sge_txq_reclaim_handler(void *arg, int ncount) 1022 { 1023 struct sge_qset *qs = arg; 1024 int i; 1025 1026 for (i = 0; i < 3; i++) 1027 reclaim_completed_tx(qs, 16, i); 1028 } 1029 1030 static void 1031 sge_timer_reclaim(void *arg, int ncount) 1032 { 1033 struct port_info *pi = arg; 1034 int i, nqsets = pi->nqsets; 1035 adapter_t *sc = pi->adapter; 1036 struct sge_qset *qs; 1037 struct mtx *lock; 1038 1039 KASSERT((sc->flags & USING_MSIX) == 0, 1040 ("can't call timer reclaim for msi-x")); 1041 1042 for (i = 0; i < nqsets; i++) { 1043 qs = &sc->sge.qs[pi->first_qset + i]; 1044 1045 reclaim_completed_tx(qs, 16, TXQ_OFLD); 1046 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 1047 &sc->sge.qs[0].rspq.lock; 1048 1049 if (mtx_trylock(lock)) { 1050 /* XXX currently assume that we are *NOT* polling */ 1051 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 1052 1053 if (qs->fl[0].credits < qs->fl[0].size - 16) 1054 __refill_fl(sc, &qs->fl[0]); 1055 if (qs->fl[1].credits < qs->fl[1].size - 16) 1056 __refill_fl(sc, &qs->fl[1]); 1057 1058 if (status & (1 << qs->rspq.cntxt_id)) { 1059 if (qs->rspq.credits) { 1060 refill_rspq(sc, &qs->rspq, 1); 1061 qs->rspq.credits--; 1062 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 1063 1 << qs->rspq.cntxt_id); 1064 } 1065 } 1066 mtx_unlock(lock); 1067 } 1068 } 1069 } 1070 1071 /** 1072 * init_qset_cntxt - initialize an SGE queue set context info 1073 * @qs: the queue set 1074 * @id: the queue set id 1075 * 1076 * Initializes the TIDs and context ids for the queues of a queue set. 1077 */ 1078 static void 1079 init_qset_cntxt(struct sge_qset *qs, u_int id) 1080 { 1081 1082 qs->rspq.cntxt_id = id; 1083 qs->fl[0].cntxt_id = 2 * id; 1084 qs->fl[1].cntxt_id = 2 * id + 1; 1085 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 1086 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 1087 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 1088 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 1089 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 1090 1091 mbufq_init(&qs->txq[TXQ_ETH].sendq); 1092 mbufq_init(&qs->txq[TXQ_OFLD].sendq); 1093 mbufq_init(&qs->txq[TXQ_CTRL].sendq); 1094 } 1095 1096 1097 static void 1098 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 1099 { 1100 txq->in_use += ndesc; 1101 /* 1102 * XXX we don't handle stopping of queue 1103 * presumably start handles this when we bump against the end 1104 */ 1105 txqs->gen = txq->gen; 1106 txq->unacked += ndesc; 1107 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); 1108 txq->unacked &= 31; 1109 txqs->pidx = txq->pidx; 1110 txq->pidx += ndesc; 1111 #ifdef INVARIANTS 1112 if (((txqs->pidx > txq->cidx) && 1113 (txq->pidx < txqs->pidx) && 1114 (txq->pidx >= txq->cidx)) || 1115 ((txqs->pidx < txq->cidx) && 1116 (txq->pidx >= txq-> cidx)) || 1117 ((txqs->pidx < txq->cidx) && 1118 (txq->cidx < txqs->pidx))) 1119 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", 1120 txqs->pidx, txq->pidx, txq->cidx); 1121 #endif 1122 if (txq->pidx >= txq->size) { 1123 txq->pidx -= txq->size; 1124 txq->gen ^= 1; 1125 } 1126 1127 } 1128 1129 /** 1130 * calc_tx_descs - calculate the number of Tx descriptors for a packet 1131 * @m: the packet mbufs 1132 * @nsegs: the number of segments 1133 * 1134 * Returns the number of Tx descriptors needed for the given Ethernet 1135 * packet. Ethernet packets require addition of WR and CPL headers. 1136 */ 1137 static __inline unsigned int 1138 calc_tx_descs(const struct mbuf *m, int nsegs) 1139 { 1140 unsigned int flits; 1141 1142 if (m->m_pkthdr.len <= PIO_LEN) 1143 return 1; 1144 1145 flits = sgl_len(nsegs) + 2; 1146 #ifdef TSO_SUPPORTED 1147 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1148 flits++; 1149 #endif 1150 return flits_to_desc(flits); 1151 } 1152 1153 static unsigned int 1154 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 1155 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs) 1156 { 1157 struct mbuf *m0; 1158 int err, pktlen, pass = 0; 1159 bus_dma_tag_t tag = txq->entry_tag; 1160 1161 retry: 1162 err = 0; 1163 m0 = *m; 1164 pktlen = m0->m_pkthdr.len; 1165 #if defined(__i386__) || defined(__amd64__) 1166 if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) { 1167 goto done; 1168 } else 1169 #endif 1170 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0); 1171 1172 if (err == 0) { 1173 goto done; 1174 } 1175 if (err == EFBIG && pass == 0) { 1176 pass = 1; 1177 /* Too many segments, try to defrag */ 1178 m0 = m_defrag(m0, M_DONTWAIT); 1179 if (m0 == NULL) { 1180 m_freem(*m); 1181 *m = NULL; 1182 return (ENOBUFS); 1183 } 1184 *m = m0; 1185 goto retry; 1186 } else if (err == ENOMEM) { 1187 return (err); 1188 } if (err) { 1189 if (cxgb_debug) 1190 printf("map failure err=%d pktlen=%d\n", err, pktlen); 1191 m_freem(m0); 1192 *m = NULL; 1193 return (err); 1194 } 1195 done: 1196 #if !defined(__i386__) && !defined(__amd64__) 1197 bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE); 1198 #endif 1199 txsd->flags |= TX_SW_DESC_MAPPED; 1200 1201 return (0); 1202 } 1203 1204 /** 1205 * make_sgl - populate a scatter/gather list for a packet 1206 * @sgp: the SGL to populate 1207 * @segs: the packet dma segments 1208 * @nsegs: the number of segments 1209 * 1210 * Generates a scatter/gather list for the buffers that make up a packet 1211 * and returns the SGL size in 8-byte words. The caller must size the SGL 1212 * appropriately. 1213 */ 1214 static __inline void 1215 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1216 { 1217 int i, idx; 1218 1219 for (idx = 0, i = 0; i < nsegs; i++) { 1220 /* 1221 * firmware doesn't like empty segments 1222 */ 1223 if (segs[i].ds_len == 0) 1224 continue; 1225 if (i && idx == 0) 1226 ++sgp; 1227 1228 sgp->len[idx] = htobe32(segs[i].ds_len); 1229 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1230 idx ^= 1; 1231 } 1232 1233 if (idx) { 1234 sgp->len[idx] = 0; 1235 sgp->addr[idx] = 0; 1236 } 1237 } 1238 1239 /** 1240 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1241 * @adap: the adapter 1242 * @q: the Tx queue 1243 * 1244 * Ring the doorbell if a Tx queue is asleep. There is a natural race, 1245 * where the HW is going to sleep just after we checked, however, 1246 * then the interrupt handler will detect the outstanding TX packet 1247 * and ring the doorbell for us. 1248 * 1249 * When GTS is disabled we unconditionally ring the doorbell. 1250 */ 1251 static __inline void 1252 check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 1253 { 1254 #if USE_GTS 1255 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1256 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1257 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1258 #ifdef T3_TRACE 1259 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1260 q->cntxt_id); 1261 #endif 1262 t3_write_reg(adap, A_SG_KDOORBELL, 1263 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1264 } 1265 #else 1266 wmb(); /* write descriptors before telling HW */ 1267 t3_write_reg(adap, A_SG_KDOORBELL, 1268 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1269 #endif 1270 } 1271 1272 static __inline void 1273 wr_gen2(struct tx_desc *d, unsigned int gen) 1274 { 1275 #if SGE_NUM_GENBITS == 2 1276 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1277 #endif 1278 } 1279 1280 /** 1281 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1282 * @ndesc: number of Tx descriptors spanned by the SGL 1283 * @txd: first Tx descriptor to be written 1284 * @txqs: txq state (generation and producer index) 1285 * @txq: the SGE Tx queue 1286 * @sgl: the SGL 1287 * @flits: number of flits to the start of the SGL in the first descriptor 1288 * @sgl_flits: the SGL size in flits 1289 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1290 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1291 * 1292 * Write a work request header and an associated SGL. If the SGL is 1293 * small enough to fit into one Tx descriptor it has already been written 1294 * and we just need to write the WR header. Otherwise we distribute the 1295 * SGL across the number of descriptors it spans. 1296 */ 1297 static void 1298 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1299 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1300 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1301 { 1302 1303 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1304 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1305 1306 if (__predict_true(ndesc == 1)) { 1307 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1308 V_WR_SGLSFLT(flits)) | wr_hi, 1309 htonl(V_WR_LEN(flits + sgl_flits) | 1310 V_WR_GEN(txqs->gen)) | wr_lo); 1311 /* XXX gen? */ 1312 wr_gen2(txd, txqs->gen); 1313 1314 } else { 1315 unsigned int ogen = txqs->gen; 1316 const uint64_t *fp = (const uint64_t *)sgl; 1317 struct work_request_hdr *wp = wrp; 1318 1319 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1320 V_WR_SGLSFLT(flits)) | wr_hi; 1321 1322 while (sgl_flits) { 1323 unsigned int avail = WR_FLITS - flits; 1324 1325 if (avail > sgl_flits) 1326 avail = sgl_flits; 1327 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1328 sgl_flits -= avail; 1329 ndesc--; 1330 if (!sgl_flits) 1331 break; 1332 1333 fp += avail; 1334 txd++; 1335 txsd++; 1336 if (++txqs->pidx == txq->size) { 1337 txqs->pidx = 0; 1338 txqs->gen ^= 1; 1339 txd = txq->desc; 1340 txsd = txq->sdesc; 1341 } 1342 1343 /* 1344 * when the head of the mbuf chain 1345 * is freed all clusters will be freed 1346 * with it 1347 */ 1348 wrp = (struct work_request_hdr *)txd; 1349 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) | 1350 V_WR_SGLSFLT(1)) | wr_hi; 1351 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS, 1352 sgl_flits + 1)) | 1353 V_WR_GEN(txqs->gen)) | wr_lo; 1354 wr_gen2(txd, txqs->gen); 1355 flits = 1; 1356 } 1357 wrp->wrh_hi |= htonl(F_WR_EOP); 1358 wmb(); 1359 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1360 wr_gen2((struct tx_desc *)wp, ogen); 1361 } 1362 } 1363 1364 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 1365 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 1366 1367 #ifdef VLAN_SUPPORTED 1368 #define GET_VTAG(cntrl, m) \ 1369 do { \ 1370 if ((m)->m_flags & M_VLANTAG) \ 1371 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ 1372 } while (0) 1373 1374 #else 1375 #define GET_VTAG(cntrl, m) 1376 #endif 1377 1378 static int 1379 t3_encap(struct sge_qset *qs, struct mbuf **m) 1380 { 1381 adapter_t *sc; 1382 struct mbuf *m0; 1383 struct sge_txq *txq; 1384 struct txq_state txqs; 1385 struct port_info *pi; 1386 unsigned int ndesc, flits, cntrl, mlen; 1387 int err, nsegs, tso_info = 0; 1388 1389 struct work_request_hdr *wrp; 1390 struct tx_sw_desc *txsd; 1391 struct sg_ent *sgp, *sgl; 1392 uint32_t wr_hi, wr_lo, sgl_flits; 1393 bus_dma_segment_t segs[TX_MAX_SEGS]; 1394 1395 struct tx_desc *txd; 1396 1397 pi = qs->port; 1398 sc = pi->adapter; 1399 txq = &qs->txq[TXQ_ETH]; 1400 txd = &txq->desc[txq->pidx]; 1401 txsd = &txq->sdesc[txq->pidx]; 1402 sgl = txq->txq_sgl; 1403 1404 prefetch(txd); 1405 m0 = *m; 1406 1407 DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset); 1408 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan); 1409 1410 mtx_assert(&qs->lock, MA_OWNED); 1411 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1412 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n")); 1413 1414 #ifdef VLAN_SUPPORTED 1415 if (m0->m_nextpkt == NULL && m0->m_next != NULL && 1416 m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1417 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1418 #endif 1419 if (m0->m_nextpkt != NULL) { 1420 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs); 1421 ndesc = 1; 1422 mlen = 0; 1423 } else { 1424 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map, 1425 &m0, segs, &nsegs))) { 1426 if (cxgb_debug) 1427 printf("failed ... err=%d\n", err); 1428 return (err); 1429 } 1430 mlen = m0->m_pkthdr.len; 1431 ndesc = calc_tx_descs(m0, nsegs); 1432 } 1433 txq_prod(txq, ndesc, &txqs); 1434 1435 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs)); 1436 txsd->m = m0; 1437 1438 if (m0->m_nextpkt != NULL) { 1439 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; 1440 int i, fidx; 1441 1442 if (nsegs > 7) 1443 panic("trying to coalesce %d packets in to one WR", nsegs); 1444 txq->txq_coalesced += nsegs; 1445 wrp = (struct work_request_hdr *)txd; 1446 flits = nsegs*2 + 1; 1447 1448 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) { 1449 struct cpl_tx_pkt_batch_entry *cbe; 1450 uint64_t flit; 1451 uint32_t *hflit = (uint32_t *)&flit; 1452 int cflags = m0->m_pkthdr.csum_flags; 1453 1454 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1455 GET_VTAG(cntrl, m0); 1456 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1457 if (__predict_false(!(cflags & CSUM_IP))) 1458 cntrl |= F_TXPKT_IPCSUM_DIS; 1459 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP)))) 1460 cntrl |= F_TXPKT_L4CSUM_DIS; 1461 1462 hflit[0] = htonl(cntrl); 1463 hflit[1] = htonl(segs[i].ds_len | 0x80000000); 1464 flit |= htobe64(1 << 24); 1465 cbe = &cpl_batch->pkt_entry[i]; 1466 cbe->cntrl = hflit[0]; 1467 cbe->len = hflit[1]; 1468 cbe->addr = htobe64(segs[i].ds_addr); 1469 } 1470 1471 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1472 V_WR_SGLSFLT(flits)) | 1473 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1474 wr_lo = htonl(V_WR_LEN(flits) | 1475 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); 1476 set_wr_hdr(wrp, wr_hi, wr_lo); 1477 wmb(); 1478 wr_gen2(txd, txqs.gen); 1479 check_ring_tx_db(sc, txq); 1480 return (0); 1481 } else if (tso_info) { 1482 int min_size = TCPPKTHDRSIZE, eth_type, tagged; 1483 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; 1484 struct ip *ip; 1485 struct tcphdr *tcp; 1486 char *pkthdr; 1487 1488 txd->flit[2] = 0; 1489 GET_VTAG(cntrl, m0); 1490 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1491 hdr->cntrl = htonl(cntrl); 1492 hdr->len = htonl(mlen | 0x80000000); 1493 1494 DPRINTF("tso buf len=%d\n", mlen); 1495 1496 tagged = m0->m_flags & M_VLANTAG; 1497 if (!tagged) 1498 min_size -= ETHER_VLAN_ENCAP_LEN; 1499 1500 if (__predict_false(mlen < min_size)) { 1501 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", 1502 m0, mlen, m0->m_pkthdr.tso_segsz, 1503 m0->m_pkthdr.csum_flags, m0->m_flags); 1504 panic("tx tso packet too small"); 1505 } 1506 1507 /* Make sure that ether, ip, tcp headers are all in m0 */ 1508 if (__predict_false(m0->m_len < min_size)) { 1509 m0 = m_pullup(m0, min_size); 1510 if (__predict_false(m0 == NULL)) { 1511 /* XXX panic probably an overreaction */ 1512 panic("couldn't fit header into mbuf"); 1513 } 1514 } 1515 pkthdr = m0->m_data; 1516 1517 if (tagged) { 1518 eth_type = CPL_ETH_II_VLAN; 1519 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1520 ETHER_VLAN_ENCAP_LEN); 1521 } else { 1522 eth_type = CPL_ETH_II; 1523 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1524 } 1525 tcp = (struct tcphdr *)((uint8_t *)ip + 1526 sizeof(*ip)); 1527 1528 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1529 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1530 V_LSO_TCPHDR_WORDS(tcp->th_off); 1531 hdr->lso_info = htonl(tso_info); 1532 1533 if (__predict_false(mlen <= PIO_LEN)) { 1534 /* pkt not undersized but fits in PIO_LEN 1535 * Indicates a TSO bug at the higher levels. 1536 * 1537 */ 1538 DPRINTF("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", 1539 m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags); 1540 txsd->m = NULL; 1541 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); 1542 flits = (mlen + 7) / 8 + 3; 1543 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1544 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1545 F_WR_SOP | F_WR_EOP | txqs.compl); 1546 wr_lo = htonl(V_WR_LEN(flits) | 1547 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1548 set_wr_hdr(&hdr->wr, wr_hi, wr_lo); 1549 wmb(); 1550 wr_gen2(txd, txqs.gen); 1551 check_ring_tx_db(sc, txq); 1552 return (0); 1553 } 1554 flits = 3; 1555 } else { 1556 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; 1557 1558 GET_VTAG(cntrl, m0); 1559 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1560 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) 1561 cntrl |= F_TXPKT_IPCSUM_DIS; 1562 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))) 1563 cntrl |= F_TXPKT_L4CSUM_DIS; 1564 cpl->cntrl = htonl(cntrl); 1565 cpl->len = htonl(mlen | 0x80000000); 1566 1567 if (mlen <= PIO_LEN) { 1568 txsd->m = NULL; 1569 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1570 flits = (mlen + 7) / 8 + 2; 1571 1572 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1573 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1574 F_WR_SOP | F_WR_EOP | txqs.compl); 1575 wr_lo = htonl(V_WR_LEN(flits) | 1576 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1577 set_wr_hdr(&cpl->wr, wr_hi, wr_lo); 1578 wmb(); 1579 wr_gen2(txd, txqs.gen); 1580 check_ring_tx_db(sc, txq); 1581 return (0); 1582 } 1583 flits = 2; 1584 } 1585 wrp = (struct work_request_hdr *)txd; 1586 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1587 make_sgl(sgp, segs, nsegs); 1588 1589 sgl_flits = sgl_len(nsegs); 1590 1591 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc)); 1592 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1593 wr_lo = htonl(V_WR_TID(txq->token)); 1594 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, 1595 sgl_flits, wr_hi, wr_lo); 1596 check_ring_tx_db(pi->adapter, txq); 1597 1598 return (0); 1599 } 1600 1601 void 1602 cxgb_tx_watchdog(void *arg) 1603 { 1604 struct sge_qset *qs = arg; 1605 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1606 1607 if (qs->coalescing != 0 && 1608 (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 1609 TXQ_RING_EMPTY(qs)) 1610 qs->coalescing = 0; 1611 else if (qs->coalescing == 0 && 1612 (txq->in_use >= cxgb_tx_coalesce_enable_start)) 1613 qs->coalescing = 1; 1614 if (TXQ_TRYLOCK(qs)) { 1615 qs->qs_flags |= QS_FLUSHING; 1616 cxgb_start_locked(qs); 1617 qs->qs_flags &= ~QS_FLUSHING; 1618 TXQ_UNLOCK(qs); 1619 } 1620 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) 1621 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog, 1622 qs, txq->txq_watchdog.c_cpu); 1623 } 1624 1625 static void 1626 cxgb_tx_timeout(void *arg) 1627 { 1628 struct sge_qset *qs = arg; 1629 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1630 1631 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3))) 1632 qs->coalescing = 1; 1633 if (TXQ_TRYLOCK(qs)) { 1634 qs->qs_flags |= QS_TIMEOUT; 1635 cxgb_start_locked(qs); 1636 qs->qs_flags &= ~QS_TIMEOUT; 1637 TXQ_UNLOCK(qs); 1638 } 1639 } 1640 1641 static void 1642 cxgb_start_locked(struct sge_qset *qs) 1643 { 1644 struct mbuf *m_head = NULL; 1645 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1646 int avail, txmax; 1647 int in_use_init = txq->in_use; 1648 struct port_info *pi = qs->port; 1649 struct ifnet *ifp = pi->ifp; 1650 avail = txq->size - txq->in_use - 4; 1651 txmax = min(TX_START_MAX_DESC, avail); 1652 1653 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT)) 1654 reclaim_completed_tx(qs, 0, TXQ_ETH); 1655 1656 if (!pi->link_config.link_ok) { 1657 TXQ_RING_FLUSH(qs); 1658 return; 1659 } 1660 TXQ_LOCK_ASSERT(qs); 1661 while ((txq->in_use - in_use_init < txmax) && 1662 !TXQ_RING_EMPTY(qs) && 1663 (ifp->if_drv_flags & IFF_DRV_RUNNING) && 1664 pi->link_config.link_ok) { 1665 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1666 1667 if ((m_head = cxgb_dequeue(qs)) == NULL) 1668 break; 1669 /* 1670 * Encapsulation can modify our pointer, and or make it 1671 * NULL on failure. In that event, we can't requeue. 1672 */ 1673 if (t3_encap(qs, &m_head) || m_head == NULL) 1674 break; 1675 1676 /* Send a copy of the frame to the BPF listener */ 1677 ETHER_BPF_MTAP(ifp, m_head); 1678 1679 /* 1680 * We sent via PIO, no longer need a copy 1681 */ 1682 if (m_head->m_nextpkt == NULL && 1683 m_head->m_pkthdr.len <= PIO_LEN) 1684 m_freem(m_head); 1685 1686 m_head = NULL; 1687 } 1688 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && 1689 pi->link_config.link_ok) 1690 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1691 qs, txq->txq_timer.c_cpu); 1692 if (m_head != NULL) 1693 m_freem(m_head); 1694 } 1695 1696 static int 1697 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m) 1698 { 1699 struct port_info *pi = qs->port; 1700 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1701 struct buf_ring *br = txq->txq_mr; 1702 int error, avail; 1703 1704 avail = txq->size - txq->in_use; 1705 TXQ_LOCK_ASSERT(qs); 1706 1707 /* 1708 * We can only do a direct transmit if the following are true: 1709 * - we aren't coalescing (ring < 3/4 full) 1710 * - the link is up -- checked in caller 1711 * - there are no packets enqueued already 1712 * - there is space in hardware transmit queue 1713 */ 1714 if (check_pkt_coalesce(qs) == 0 && 1715 TXQ_RING_EMPTY(qs) && avail > 4) { 1716 if (t3_encap(qs, &m)) { 1717 if (m != NULL && 1718 (error = drbr_enqueue(ifp, br, m)) != 0) 1719 return (error); 1720 } else { 1721 /* 1722 * We've bypassed the buf ring so we need to update 1723 * the stats directly 1724 */ 1725 txq->txq_direct_packets++; 1726 txq->txq_direct_bytes += m->m_pkthdr.len; 1727 /* 1728 ** Send a copy of the frame to the BPF 1729 ** listener and set the watchdog on. 1730 */ 1731 ETHER_BPF_MTAP(ifp, m); 1732 /* 1733 * We sent via PIO, no longer need a copy 1734 */ 1735 if (m->m_pkthdr.len <= PIO_LEN) 1736 m_freem(m); 1737 1738 } 1739 } else if ((error = drbr_enqueue(ifp, br, m)) != 0) 1740 return (error); 1741 1742 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1743 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && 1744 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) 1745 cxgb_start_locked(qs); 1746 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) 1747 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1748 qs, txq->txq_timer.c_cpu); 1749 return (0); 1750 } 1751 1752 int 1753 cxgb_transmit(struct ifnet *ifp, struct mbuf *m) 1754 { 1755 struct sge_qset *qs; 1756 struct port_info *pi = ifp->if_softc; 1757 int error, qidx = pi->first_qset; 1758 1759 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 1760 ||(!pi->link_config.link_ok)) { 1761 m_freem(m); 1762 return (0); 1763 } 1764 1765 if (m->m_flags & M_FLOWID) 1766 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset; 1767 1768 qs = &pi->adapter->sge.qs[qidx]; 1769 1770 if (TXQ_TRYLOCK(qs)) { 1771 /* XXX running */ 1772 error = cxgb_transmit_locked(ifp, qs, m); 1773 TXQ_UNLOCK(qs); 1774 } else 1775 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); 1776 return (error); 1777 } 1778 void 1779 cxgb_start(struct ifnet *ifp) 1780 { 1781 struct port_info *pi = ifp->if_softc; 1782 struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset]; 1783 1784 if (!pi->link_config.link_ok) 1785 return; 1786 1787 TXQ_LOCK(qs); 1788 cxgb_start_locked(qs); 1789 TXQ_UNLOCK(qs); 1790 } 1791 1792 void 1793 cxgb_qflush(struct ifnet *ifp) 1794 { 1795 /* 1796 * flush any enqueued mbufs in the buf_rings 1797 * and in the transmit queues 1798 * no-op for now 1799 */ 1800 return; 1801 } 1802 1803 /** 1804 * write_imm - write a packet into a Tx descriptor as immediate data 1805 * @d: the Tx descriptor to write 1806 * @m: the packet 1807 * @len: the length of packet data to write as immediate data 1808 * @gen: the generation bit value to write 1809 * 1810 * Writes a packet as immediate data into a Tx descriptor. The packet 1811 * contains a work request at its beginning. We must write the packet 1812 * carefully so the SGE doesn't read accidentally before it's written in 1813 * its entirety. 1814 */ 1815 static __inline void 1816 write_imm(struct tx_desc *d, struct mbuf *m, 1817 unsigned int len, unsigned int gen) 1818 { 1819 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1820 struct work_request_hdr *to = (struct work_request_hdr *)d; 1821 uint32_t wr_hi, wr_lo; 1822 1823 if (len > WR_LEN) 1824 panic("len too big %d\n", len); 1825 if (len < sizeof(*from)) 1826 panic("len too small %d", len); 1827 1828 memcpy(&to[1], &from[1], len - sizeof(*from)); 1829 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | 1830 V_WR_BCNTLFLT(len & 7)); 1831 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | 1832 V_WR_LEN((len + 7) / 8)); 1833 set_wr_hdr(to, wr_hi, wr_lo); 1834 wmb(); 1835 wr_gen2(d, gen); 1836 1837 /* 1838 * This check is a hack we should really fix the logic so 1839 * that this can't happen 1840 */ 1841 if (m->m_type != MT_DONTFREE) 1842 m_freem(m); 1843 1844 } 1845 1846 /** 1847 * check_desc_avail - check descriptor availability on a send queue 1848 * @adap: the adapter 1849 * @q: the TX queue 1850 * @m: the packet needing the descriptors 1851 * @ndesc: the number of Tx descriptors needed 1852 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1853 * 1854 * Checks if the requested number of Tx descriptors is available on an 1855 * SGE send queue. If the queue is already suspended or not enough 1856 * descriptors are available the packet is queued for later transmission. 1857 * Must be called with the Tx queue locked. 1858 * 1859 * Returns 0 if enough descriptors are available, 1 if there aren't 1860 * enough descriptors and the packet has been queued, and 2 if the caller 1861 * needs to retry because there weren't enough descriptors at the 1862 * beginning of the call but some freed up in the mean time. 1863 */ 1864 static __inline int 1865 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1866 struct mbuf *m, unsigned int ndesc, 1867 unsigned int qid) 1868 { 1869 /* 1870 * XXX We currently only use this for checking the control queue 1871 * the control queue is only used for binding qsets which happens 1872 * at init time so we are guaranteed enough descriptors 1873 */ 1874 if (__predict_false(!mbufq_empty(&q->sendq))) { 1875 addq_exit: mbufq_tail(&q->sendq, m); 1876 return 1; 1877 } 1878 if (__predict_false(q->size - q->in_use < ndesc)) { 1879 1880 struct sge_qset *qs = txq_to_qset(q, qid); 1881 1882 setbit(&qs->txq_stopped, qid); 1883 if (should_restart_tx(q) && 1884 test_and_clear_bit(qid, &qs->txq_stopped)) 1885 return 2; 1886 1887 q->stops++; 1888 goto addq_exit; 1889 } 1890 return 0; 1891 } 1892 1893 1894 /** 1895 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1896 * @q: the SGE control Tx queue 1897 * 1898 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1899 * that send only immediate data (presently just the control queues) and 1900 * thus do not have any mbufs 1901 */ 1902 static __inline void 1903 reclaim_completed_tx_imm(struct sge_txq *q) 1904 { 1905 unsigned int reclaim = q->processed - q->cleaned; 1906 1907 q->in_use -= reclaim; 1908 q->cleaned += reclaim; 1909 } 1910 1911 static __inline int 1912 immediate(const struct mbuf *m) 1913 { 1914 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1915 } 1916 1917 /** 1918 * ctrl_xmit - send a packet through an SGE control Tx queue 1919 * @adap: the adapter 1920 * @q: the control queue 1921 * @m: the packet 1922 * 1923 * Send a packet through an SGE control Tx queue. Packets sent through 1924 * a control queue must fit entirely as immediate data in a single Tx 1925 * descriptor and have no page fragments. 1926 */ 1927 static int 1928 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 1929 { 1930 int ret; 1931 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1932 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1933 1934 if (__predict_false(!immediate(m))) { 1935 m_freem(m); 1936 return 0; 1937 } 1938 1939 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP); 1940 wrp->wrh_lo = htonl(V_WR_TID(q->token)); 1941 1942 TXQ_LOCK(qs); 1943 again: reclaim_completed_tx_imm(q); 1944 1945 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1946 if (__predict_false(ret)) { 1947 if (ret == 1) { 1948 TXQ_UNLOCK(qs); 1949 return (ENOSPC); 1950 } 1951 goto again; 1952 } 1953 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1954 1955 q->in_use++; 1956 if (++q->pidx >= q->size) { 1957 q->pidx = 0; 1958 q->gen ^= 1; 1959 } 1960 TXQ_UNLOCK(qs); 1961 wmb(); 1962 t3_write_reg(adap, A_SG_KDOORBELL, 1963 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1964 return (0); 1965 } 1966 1967 1968 /** 1969 * restart_ctrlq - restart a suspended control queue 1970 * @qs: the queue set cotaining the control queue 1971 * 1972 * Resumes transmission on a suspended Tx control queue. 1973 */ 1974 static void 1975 restart_ctrlq(void *data, int npending) 1976 { 1977 struct mbuf *m; 1978 struct sge_qset *qs = (struct sge_qset *)data; 1979 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1980 adapter_t *adap = qs->port->adapter; 1981 1982 TXQ_LOCK(qs); 1983 again: reclaim_completed_tx_imm(q); 1984 1985 while (q->in_use < q->size && 1986 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1987 1988 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1989 1990 if (++q->pidx >= q->size) { 1991 q->pidx = 0; 1992 q->gen ^= 1; 1993 } 1994 q->in_use++; 1995 } 1996 if (!mbufq_empty(&q->sendq)) { 1997 setbit(&qs->txq_stopped, TXQ_CTRL); 1998 1999 if (should_restart_tx(q) && 2000 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 2001 goto again; 2002 q->stops++; 2003 } 2004 TXQ_UNLOCK(qs); 2005 t3_write_reg(adap, A_SG_KDOORBELL, 2006 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2007 } 2008 2009 2010 /* 2011 * Send a management message through control queue 0 2012 */ 2013 int 2014 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 2015 { 2016 return ctrl_xmit(adap, &adap->sge.qs[0], m); 2017 } 2018 2019 /** 2020 * free_qset - free the resources of an SGE queue set 2021 * @sc: the controller owning the queue set 2022 * @q: the queue set 2023 * 2024 * Release the HW and SW resources associated with an SGE queue set, such 2025 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 2026 * queue set must be quiesced prior to calling this. 2027 */ 2028 static void 2029 t3_free_qset(adapter_t *sc, struct sge_qset *q) 2030 { 2031 int i; 2032 2033 reclaim_completed_tx(q, 0, TXQ_ETH); 2034 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2035 if (q->txq[i].txq_mr != NULL) 2036 buf_ring_free(q->txq[i].txq_mr, M_DEVBUF); 2037 if (q->txq[i].txq_ifq != NULL) { 2038 ifq_delete(q->txq[i].txq_ifq); 2039 free(q->txq[i].txq_ifq, M_DEVBUF); 2040 } 2041 } 2042 2043 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2044 if (q->fl[i].desc) { 2045 mtx_lock_spin(&sc->sge.reg_lock); 2046 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 2047 mtx_unlock_spin(&sc->sge.reg_lock); 2048 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 2049 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 2050 q->fl[i].desc_map); 2051 bus_dma_tag_destroy(q->fl[i].desc_tag); 2052 bus_dma_tag_destroy(q->fl[i].entry_tag); 2053 } 2054 if (q->fl[i].sdesc) { 2055 free_rx_bufs(sc, &q->fl[i]); 2056 free(q->fl[i].sdesc, M_DEVBUF); 2057 } 2058 } 2059 2060 mtx_unlock(&q->lock); 2061 MTX_DESTROY(&q->lock); 2062 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2063 if (q->txq[i].desc) { 2064 mtx_lock_spin(&sc->sge.reg_lock); 2065 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 2066 mtx_unlock_spin(&sc->sge.reg_lock); 2067 bus_dmamap_unload(q->txq[i].desc_tag, 2068 q->txq[i].desc_map); 2069 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 2070 q->txq[i].desc_map); 2071 bus_dma_tag_destroy(q->txq[i].desc_tag); 2072 bus_dma_tag_destroy(q->txq[i].entry_tag); 2073 } 2074 if (q->txq[i].sdesc) { 2075 free(q->txq[i].sdesc, M_DEVBUF); 2076 } 2077 } 2078 2079 if (q->rspq.desc) { 2080 mtx_lock_spin(&sc->sge.reg_lock); 2081 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 2082 mtx_unlock_spin(&sc->sge.reg_lock); 2083 2084 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 2085 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 2086 q->rspq.desc_map); 2087 bus_dma_tag_destroy(q->rspq.desc_tag); 2088 MTX_DESTROY(&q->rspq.lock); 2089 } 2090 2091 #ifdef LRO_SUPPORTED 2092 tcp_lro_free(&q->lro.ctrl); 2093 #endif 2094 2095 bzero(q, sizeof(*q)); 2096 } 2097 2098 /** 2099 * t3_free_sge_resources - free SGE resources 2100 * @sc: the adapter softc 2101 * 2102 * Frees resources used by the SGE queue sets. 2103 */ 2104 void 2105 t3_free_sge_resources(adapter_t *sc) 2106 { 2107 int i, nqsets; 2108 2109 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2110 nqsets += sc->port[i].nqsets; 2111 2112 for (i = 0; i < nqsets; ++i) { 2113 TXQ_LOCK(&sc->sge.qs[i]); 2114 t3_free_qset(sc, &sc->sge.qs[i]); 2115 } 2116 2117 } 2118 2119 /** 2120 * t3_sge_start - enable SGE 2121 * @sc: the controller softc 2122 * 2123 * Enables the SGE for DMAs. This is the last step in starting packet 2124 * transfers. 2125 */ 2126 void 2127 t3_sge_start(adapter_t *sc) 2128 { 2129 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 2130 } 2131 2132 /** 2133 * t3_sge_stop - disable SGE operation 2134 * @sc: the adapter 2135 * 2136 * Disables the DMA engine. This can be called in emeregencies (e.g., 2137 * from error interrupts) or from normal process context. In the latter 2138 * case it also disables any pending queue restart tasklets. Note that 2139 * if it is called in interrupt context it cannot disable the restart 2140 * tasklets as it cannot wait, however the tasklets will have no effect 2141 * since the doorbells are disabled and the driver will call this again 2142 * later from process context, at which time the tasklets will be stopped 2143 * if they are still running. 2144 */ 2145 void 2146 t3_sge_stop(adapter_t *sc) 2147 { 2148 int i, nqsets; 2149 2150 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 2151 2152 if (sc->tq == NULL) 2153 return; 2154 2155 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2156 nqsets += sc->port[i].nqsets; 2157 #ifdef notyet 2158 /* 2159 * 2160 * XXX 2161 */ 2162 for (i = 0; i < nqsets; ++i) { 2163 struct sge_qset *qs = &sc->sge.qs[i]; 2164 2165 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2166 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2167 } 2168 #endif 2169 } 2170 2171 /** 2172 * t3_free_tx_desc - reclaims Tx descriptors and their buffers 2173 * @adapter: the adapter 2174 * @q: the Tx queue to reclaim descriptors from 2175 * @reclaimable: the number of descriptors to reclaim 2176 * @m_vec_size: maximum number of buffers to reclaim 2177 * @desc_reclaimed: returns the number of descriptors reclaimed 2178 * 2179 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 2180 * Tx buffers. Called with the Tx queue lock held. 2181 * 2182 * Returns number of buffers of reclaimed 2183 */ 2184 void 2185 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue) 2186 { 2187 struct tx_sw_desc *txsd; 2188 unsigned int cidx, mask; 2189 struct sge_txq *q = &qs->txq[queue]; 2190 2191 #ifdef T3_TRACE 2192 T3_TRACE2(sc->tb[q->cntxt_id & 7], 2193 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); 2194 #endif 2195 cidx = q->cidx; 2196 mask = q->size - 1; 2197 txsd = &q->sdesc[cidx]; 2198 2199 mtx_assert(&qs->lock, MA_OWNED); 2200 while (reclaimable--) { 2201 prefetch(q->sdesc[(cidx + 1) & mask].m); 2202 prefetch(q->sdesc[(cidx + 2) & mask].m); 2203 2204 if (txsd->m != NULL) { 2205 if (txsd->flags & TX_SW_DESC_MAPPED) { 2206 bus_dmamap_unload(q->entry_tag, txsd->map); 2207 txsd->flags &= ~TX_SW_DESC_MAPPED; 2208 } 2209 m_freem_list(txsd->m); 2210 txsd->m = NULL; 2211 } else 2212 q->txq_skipped++; 2213 2214 ++txsd; 2215 if (++cidx == q->size) { 2216 cidx = 0; 2217 txsd = q->sdesc; 2218 } 2219 } 2220 q->cidx = cidx; 2221 2222 } 2223 2224 /** 2225 * is_new_response - check if a response is newly written 2226 * @r: the response descriptor 2227 * @q: the response queue 2228 * 2229 * Returns true if a response descriptor contains a yet unprocessed 2230 * response. 2231 */ 2232 static __inline int 2233 is_new_response(const struct rsp_desc *r, 2234 const struct sge_rspq *q) 2235 { 2236 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 2237 } 2238 2239 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 2240 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 2241 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 2242 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 2243 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 2244 2245 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 2246 #define NOMEM_INTR_DELAY 2500 2247 2248 /** 2249 * write_ofld_wr - write an offload work request 2250 * @adap: the adapter 2251 * @m: the packet to send 2252 * @q: the Tx queue 2253 * @pidx: index of the first Tx descriptor to write 2254 * @gen: the generation value to use 2255 * @ndesc: number of descriptors the packet will occupy 2256 * 2257 * Write an offload work request to send the supplied packet. The packet 2258 * data already carry the work request with most fields populated. 2259 */ 2260 static void 2261 write_ofld_wr(adapter_t *adap, struct mbuf *m, 2262 struct sge_txq *q, unsigned int pidx, 2263 unsigned int gen, unsigned int ndesc, 2264 bus_dma_segment_t *segs, unsigned int nsegs) 2265 { 2266 unsigned int sgl_flits, flits; 2267 struct work_request_hdr *from; 2268 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 2269 struct tx_desc *d = &q->desc[pidx]; 2270 struct txq_state txqs; 2271 2272 if (immediate(m) && nsegs == 0) { 2273 write_imm(d, m, m->m_len, gen); 2274 return; 2275 } 2276 2277 /* Only TX_DATA builds SGLs */ 2278 from = mtod(m, struct work_request_hdr *); 2279 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from)); 2280 2281 flits = m->m_len / 8; 2282 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 2283 2284 make_sgl(sgp, segs, nsegs); 2285 sgl_flits = sgl_len(nsegs); 2286 2287 txqs.gen = gen; 2288 txqs.pidx = pidx; 2289 txqs.compl = 0; 2290 2291 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 2292 from->wrh_hi, from->wrh_lo); 2293 } 2294 2295 /** 2296 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 2297 * @m: the packet 2298 * 2299 * Returns the number of Tx descriptors needed for the given offload 2300 * packet. These packets are already fully constructed. 2301 */ 2302 static __inline unsigned int 2303 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 2304 { 2305 unsigned int flits, cnt = 0; 2306 int ndescs; 2307 2308 if (m->m_len <= WR_LEN && nsegs == 0) 2309 return (1); /* packet fits as immediate data */ 2310 2311 /* 2312 * This needs to be re-visited for TOE 2313 */ 2314 2315 cnt = nsegs; 2316 2317 /* headers */ 2318 flits = m->m_len / 8; 2319 2320 ndescs = flits_to_desc(flits + sgl_len(cnt)); 2321 2322 return (ndescs); 2323 } 2324 2325 /** 2326 * ofld_xmit - send a packet through an offload queue 2327 * @adap: the adapter 2328 * @q: the Tx offload queue 2329 * @m: the packet 2330 * 2331 * Send an offload packet through an SGE offload queue. 2332 */ 2333 static int 2334 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 2335 { 2336 int ret, nsegs; 2337 unsigned int ndesc; 2338 unsigned int pidx, gen; 2339 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2340 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs; 2341 struct tx_sw_desc *stx; 2342 2343 nsegs = m_get_sgllen(m); 2344 vsegs = m_get_sgl(m); 2345 ndesc = calc_tx_descs_ofld(m, nsegs); 2346 busdma_map_sgl(vsegs, segs, nsegs); 2347 2348 stx = &q->sdesc[q->pidx]; 2349 2350 TXQ_LOCK(qs); 2351 again: reclaim_completed_tx(qs, 16, TXQ_OFLD); 2352 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 2353 if (__predict_false(ret)) { 2354 if (ret == 1) { 2355 printf("no ofld desc avail\n"); 2356 2357 m_set_priority(m, ndesc); /* save for restart */ 2358 TXQ_UNLOCK(qs); 2359 return (EINTR); 2360 } 2361 goto again; 2362 } 2363 2364 gen = q->gen; 2365 q->in_use += ndesc; 2366 pidx = q->pidx; 2367 q->pidx += ndesc; 2368 if (q->pidx >= q->size) { 2369 q->pidx -= q->size; 2370 q->gen ^= 1; 2371 } 2372 #ifdef T3_TRACE 2373 T3_TRACE5(adap->tb[q->cntxt_id & 7], 2374 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 2375 ndesc, pidx, skb->len, skb->len - skb->data_len, 2376 skb_shinfo(skb)->nr_frags); 2377 #endif 2378 TXQ_UNLOCK(qs); 2379 2380 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2381 check_ring_tx_db(adap, q); 2382 return (0); 2383 } 2384 2385 /** 2386 * restart_offloadq - restart a suspended offload queue 2387 * @qs: the queue set cotaining the offload queue 2388 * 2389 * Resumes transmission on a suspended Tx offload queue. 2390 */ 2391 static void 2392 restart_offloadq(void *data, int npending) 2393 { 2394 struct mbuf *m; 2395 struct sge_qset *qs = data; 2396 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2397 adapter_t *adap = qs->port->adapter; 2398 bus_dma_segment_t segs[TX_MAX_SEGS]; 2399 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 2400 int nsegs, cleaned; 2401 2402 TXQ_LOCK(qs); 2403 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD); 2404 2405 while ((m = mbufq_peek(&q->sendq)) != NULL) { 2406 unsigned int gen, pidx; 2407 unsigned int ndesc = m_get_priority(m); 2408 2409 if (__predict_false(q->size - q->in_use < ndesc)) { 2410 setbit(&qs->txq_stopped, TXQ_OFLD); 2411 if (should_restart_tx(q) && 2412 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 2413 goto again; 2414 q->stops++; 2415 break; 2416 } 2417 2418 gen = q->gen; 2419 q->in_use += ndesc; 2420 pidx = q->pidx; 2421 q->pidx += ndesc; 2422 if (q->pidx >= q->size) { 2423 q->pidx -= q->size; 2424 q->gen ^= 1; 2425 } 2426 2427 (void)mbufq_dequeue(&q->sendq); 2428 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 2429 TXQ_UNLOCK(qs); 2430 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2431 TXQ_LOCK(qs); 2432 } 2433 #if USE_GTS 2434 set_bit(TXQ_RUNNING, &q->flags); 2435 set_bit(TXQ_LAST_PKT_DB, &q->flags); 2436 #endif 2437 TXQ_UNLOCK(qs); 2438 wmb(); 2439 t3_write_reg(adap, A_SG_KDOORBELL, 2440 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2441 } 2442 2443 /** 2444 * queue_set - return the queue set a packet should use 2445 * @m: the packet 2446 * 2447 * Maps a packet to the SGE queue set it should use. The desired queue 2448 * set is carried in bits 1-3 in the packet's priority. 2449 */ 2450 static __inline int 2451 queue_set(const struct mbuf *m) 2452 { 2453 return m_get_priority(m) >> 1; 2454 } 2455 2456 /** 2457 * is_ctrl_pkt - return whether an offload packet is a control packet 2458 * @m: the packet 2459 * 2460 * Determines whether an offload packet should use an OFLD or a CTRL 2461 * Tx queue. This is indicated by bit 0 in the packet's priority. 2462 */ 2463 static __inline int 2464 is_ctrl_pkt(const struct mbuf *m) 2465 { 2466 return m_get_priority(m) & 1; 2467 } 2468 2469 /** 2470 * t3_offload_tx - send an offload packet 2471 * @tdev: the offload device to send to 2472 * @m: the packet 2473 * 2474 * Sends an offload packet. We use the packet priority to select the 2475 * appropriate Tx queue as follows: bit 0 indicates whether the packet 2476 * should be sent as regular or control, bits 1-3 select the queue set. 2477 */ 2478 int 2479 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m) 2480 { 2481 adapter_t *adap = tdev2adap(tdev); 2482 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 2483 2484 if (__predict_false(is_ctrl_pkt(m))) 2485 return ctrl_xmit(adap, qs, m); 2486 2487 return ofld_xmit(adap, qs, m); 2488 } 2489 2490 /** 2491 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 2492 * @tdev: the offload device that will be receiving the packets 2493 * @q: the SGE response queue that assembled the bundle 2494 * @m: the partial bundle 2495 * @n: the number of packets in the bundle 2496 * 2497 * Delivers a (partial) bundle of Rx offload packets to an offload device. 2498 */ 2499 static __inline void 2500 deliver_partial_bundle(struct t3cdev *tdev, 2501 struct sge_rspq *q, 2502 struct mbuf *mbufs[], int n) 2503 { 2504 if (n) { 2505 q->offload_bundles++; 2506 cxgb_ofld_recv(tdev, mbufs, n); 2507 } 2508 } 2509 2510 static __inline int 2511 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 2512 struct mbuf *m, struct mbuf *rx_gather[], 2513 unsigned int gather_idx) 2514 { 2515 2516 rq->offload_pkts++; 2517 m->m_pkthdr.header = mtod(m, void *); 2518 rx_gather[gather_idx++] = m; 2519 if (gather_idx == RX_BUNDLE_SIZE) { 2520 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 2521 gather_idx = 0; 2522 rq->offload_bundles++; 2523 } 2524 return (gather_idx); 2525 } 2526 2527 static void 2528 restart_tx(struct sge_qset *qs) 2529 { 2530 struct adapter *sc = qs->port->adapter; 2531 2532 2533 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2534 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2535 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2536 qs->txq[TXQ_OFLD].restarts++; 2537 DPRINTF("restarting TXQ_OFLD\n"); 2538 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2539 } 2540 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n", 2541 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]), 2542 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned, 2543 qs->txq[TXQ_CTRL].in_use); 2544 2545 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2546 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2547 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2548 qs->txq[TXQ_CTRL].restarts++; 2549 DPRINTF("restarting TXQ_CTRL\n"); 2550 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2551 } 2552 } 2553 2554 /** 2555 * t3_sge_alloc_qset - initialize an SGE queue set 2556 * @sc: the controller softc 2557 * @id: the queue set id 2558 * @nports: how many Ethernet ports will be using this queue set 2559 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2560 * @p: configuration parameters for this queue set 2561 * @ntxq: number of Tx queues for the queue set 2562 * @pi: port info for queue set 2563 * 2564 * Allocate resources and initialize an SGE queue set. A queue set 2565 * comprises a response queue, two Rx free-buffer queues, and up to 3 2566 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2567 * queue, offload queue, and control queue. 2568 */ 2569 int 2570 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2571 const struct qset_params *p, int ntxq, struct port_info *pi) 2572 { 2573 struct sge_qset *q = &sc->sge.qs[id]; 2574 int i, ret = 0; 2575 2576 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF); 2577 q->port = pi; 2578 2579 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2580 2581 if ((q->txq[i].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, 2582 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) { 2583 device_printf(sc->dev, "failed to allocate mbuf ring\n"); 2584 goto err; 2585 } 2586 if ((q->txq[i].txq_ifq = 2587 malloc(sizeof(struct ifaltq), M_DEVBUF, M_NOWAIT|M_ZERO)) 2588 == NULL) { 2589 device_printf(sc->dev, "failed to allocate ifq\n"); 2590 goto err; 2591 } 2592 ifq_init(q->txq[i].txq_ifq, pi->ifp); 2593 callout_init(&q->txq[i].txq_timer, 1); 2594 callout_init(&q->txq[i].txq_watchdog, 1); 2595 q->txq[i].txq_timer.c_cpu = id % mp_ncpus; 2596 q->txq[i].txq_watchdog.c_cpu = id % mp_ncpus; 2597 } 2598 init_qset_cntxt(q, id); 2599 q->idx = id; 2600 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2601 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2602 &q->fl[0].desc, &q->fl[0].sdesc, 2603 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2604 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2605 printf("error %d from alloc ring fl0\n", ret); 2606 goto err; 2607 } 2608 2609 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2610 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2611 &q->fl[1].desc, &q->fl[1].sdesc, 2612 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2613 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2614 printf("error %d from alloc ring fl1\n", ret); 2615 goto err; 2616 } 2617 2618 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2619 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2620 &q->rspq.desc_tag, &q->rspq.desc_map, 2621 NULL, NULL)) != 0) { 2622 printf("error %d from alloc ring rspq\n", ret); 2623 goto err; 2624 } 2625 2626 for (i = 0; i < ntxq; ++i) { 2627 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2628 2629 if ((ret = alloc_ring(sc, p->txq_size[i], 2630 sizeof(struct tx_desc), sz, 2631 &q->txq[i].phys_addr, &q->txq[i].desc, 2632 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2633 &q->txq[i].desc_map, 2634 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2635 printf("error %d from alloc ring tx %i\n", ret, i); 2636 goto err; 2637 } 2638 mbufq_init(&q->txq[i].sendq); 2639 q->txq[i].gen = 1; 2640 q->txq[i].size = p->txq_size[i]; 2641 } 2642 2643 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2644 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2645 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2646 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2647 2648 q->fl[0].gen = q->fl[1].gen = 1; 2649 q->fl[0].size = p->fl_size; 2650 q->fl[1].size = p->jumbo_size; 2651 2652 q->rspq.gen = 1; 2653 q->rspq.cidx = 0; 2654 q->rspq.size = p->rspq_size; 2655 2656 q->txq[TXQ_ETH].stop_thres = nports * 2657 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2658 2659 q->fl[0].buf_size = MCLBYTES; 2660 q->fl[0].zone = zone_pack; 2661 q->fl[0].type = EXT_PACKET; 2662 #if __FreeBSD_version > 800000 2663 if (cxgb_use_16k_clusters) { 2664 q->fl[1].buf_size = MJUM16BYTES; 2665 q->fl[1].zone = zone_jumbo16; 2666 q->fl[1].type = EXT_JUMBO16; 2667 } else { 2668 q->fl[1].buf_size = MJUM9BYTES; 2669 q->fl[1].zone = zone_jumbo9; 2670 q->fl[1].type = EXT_JUMBO9; 2671 } 2672 #else 2673 q->fl[1].buf_size = MJUMPAGESIZE; 2674 q->fl[1].zone = zone_jumbop; 2675 q->fl[1].type = EXT_JUMBOP; 2676 #endif 2677 2678 #ifdef LRO_SUPPORTED 2679 /* Allocate and setup the lro_ctrl structure */ 2680 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); 2681 ret = tcp_lro_init(&q->lro.ctrl); 2682 if (ret) { 2683 printf("error %d from tcp_lro_init\n", ret); 2684 goto err; 2685 } 2686 q->lro.ctrl.ifp = pi->ifp; 2687 #endif 2688 2689 mtx_lock_spin(&sc->sge.reg_lock); 2690 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2691 q->rspq.phys_addr, q->rspq.size, 2692 q->fl[0].buf_size, 1, 0); 2693 if (ret) { 2694 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2695 goto err_unlock; 2696 } 2697 2698 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2699 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2700 q->fl[i].phys_addr, q->fl[i].size, 2701 q->fl[i].buf_size, p->cong_thres, 1, 2702 0); 2703 if (ret) { 2704 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2705 goto err_unlock; 2706 } 2707 } 2708 2709 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2710 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2711 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2712 1, 0); 2713 if (ret) { 2714 printf("error %d from t3_sge_init_ecntxt\n", ret); 2715 goto err_unlock; 2716 } 2717 2718 if (ntxq > 1) { 2719 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2720 USE_GTS, SGE_CNTXT_OFLD, id, 2721 q->txq[TXQ_OFLD].phys_addr, 2722 q->txq[TXQ_OFLD].size, 0, 1, 0); 2723 if (ret) { 2724 printf("error %d from t3_sge_init_ecntxt\n", ret); 2725 goto err_unlock; 2726 } 2727 } 2728 2729 if (ntxq > 2) { 2730 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2731 SGE_CNTXT_CTRL, id, 2732 q->txq[TXQ_CTRL].phys_addr, 2733 q->txq[TXQ_CTRL].size, 2734 q->txq[TXQ_CTRL].token, 1, 0); 2735 if (ret) { 2736 printf("error %d from t3_sge_init_ecntxt\n", ret); 2737 goto err_unlock; 2738 } 2739 } 2740 2741 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2742 device_get_unit(sc->dev), irq_vec_idx); 2743 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2744 2745 mtx_unlock_spin(&sc->sge.reg_lock); 2746 t3_update_qset_coalesce(q, p); 2747 q->port = pi; 2748 2749 refill_fl(sc, &q->fl[0], q->fl[0].size); 2750 refill_fl(sc, &q->fl[1], q->fl[1].size); 2751 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2752 2753 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2754 V_NEWTIMER(q->rspq.holdoff_tmr)); 2755 2756 return (0); 2757 2758 err_unlock: 2759 mtx_unlock_spin(&sc->sge.reg_lock); 2760 err: 2761 TXQ_LOCK(q); 2762 t3_free_qset(sc, q); 2763 2764 return (ret); 2765 } 2766 2767 /* 2768 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with 2769 * ethernet data. Hardware assistance with various checksums and any vlan tag 2770 * will also be taken into account here. 2771 */ 2772 void 2773 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2774 { 2775 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2776 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2777 struct ifnet *ifp = pi->ifp; 2778 2779 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2780 2781 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2782 cpl->csum_valid && cpl->csum == 0xffff) { 2783 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2784 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2785 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2786 m->m_pkthdr.csum_data = 0xffff; 2787 } 2788 /* 2789 * XXX need to add VLAN support for 6.x 2790 */ 2791 #ifdef VLAN_SUPPORTED 2792 if (__predict_false(cpl->vlan_valid)) { 2793 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2794 m->m_flags |= M_VLANTAG; 2795 } 2796 #endif 2797 2798 m->m_pkthdr.rcvif = ifp; 2799 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2800 /* 2801 * adjust after conversion to mbuf chain 2802 */ 2803 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); 2804 m->m_len -= (sizeof(*cpl) + ethpad); 2805 m->m_data += (sizeof(*cpl) + ethpad); 2806 } 2807 2808 /** 2809 * get_packet - return the next ingress packet buffer from a free list 2810 * @adap: the adapter that received the packet 2811 * @drop_thres: # of remaining buffers before we start dropping packets 2812 * @qs: the qset that the SGE free list holding the packet belongs to 2813 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2814 * @r: response descriptor 2815 * 2816 * Get the next packet from a free list and complete setup of the 2817 * sk_buff. If the packet is small we make a copy and recycle the 2818 * original buffer, otherwise we use the original buffer itself. If a 2819 * positive drop threshold is supplied packets are dropped and their 2820 * buffers recycled if (a) the number of remaining buffers is under the 2821 * threshold and the packet is too big to copy, or (b) the packet should 2822 * be copied but there is no memory for the copy. 2823 */ 2824 static int 2825 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2826 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 2827 { 2828 2829 unsigned int len_cq = ntohl(r->len_cq); 2830 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2831 int mask, cidx = fl->cidx; 2832 struct rx_sw_desc *sd = &fl->sdesc[cidx]; 2833 uint32_t len = G_RSPD_LEN(len_cq); 2834 uint32_t flags = M_EXT; 2835 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags)); 2836 caddr_t cl; 2837 struct mbuf *m; 2838 int ret = 0; 2839 2840 mask = fl->size - 1; 2841 prefetch(fl->sdesc[(cidx + 1) & mask].m); 2842 prefetch(fl->sdesc[(cidx + 2) & mask].m); 2843 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); 2844 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 2845 2846 fl->credits--; 2847 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2848 2849 if (recycle_enable && len <= SGE_RX_COPY_THRES && 2850 sopeop == RSPQ_SOP_EOP) { 2851 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2852 goto skip_recycle; 2853 cl = mtod(m, void *); 2854 memcpy(cl, sd->rxsd_cl, len); 2855 recycle_rx_buf(adap, fl, fl->cidx); 2856 m->m_pkthdr.len = m->m_len = len; 2857 m->m_flags = 0; 2858 mh->mh_head = mh->mh_tail = m; 2859 ret = 1; 2860 goto done; 2861 } else { 2862 skip_recycle: 2863 bus_dmamap_unload(fl->entry_tag, sd->map); 2864 cl = sd->rxsd_cl; 2865 m = sd->m; 2866 2867 if ((sopeop == RSPQ_SOP_EOP) || 2868 (sopeop == RSPQ_SOP)) 2869 flags |= M_PKTHDR; 2870 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags); 2871 if (fl->zone == zone_pack) { 2872 /* 2873 * restore clobbered data pointer 2874 */ 2875 m->m_data = m->m_ext.ext_buf; 2876 } else { 2877 m_cljset(m, cl, fl->type); 2878 } 2879 m->m_len = len; 2880 } 2881 switch(sopeop) { 2882 case RSPQ_SOP_EOP: 2883 ret = 1; 2884 /* FALLTHROUGH */ 2885 case RSPQ_SOP: 2886 mh->mh_head = mh->mh_tail = m; 2887 m->m_pkthdr.len = len; 2888 break; 2889 case RSPQ_EOP: 2890 ret = 1; 2891 /* FALLTHROUGH */ 2892 case RSPQ_NSOP_NEOP: 2893 if (mh->mh_tail == NULL) { 2894 log(LOG_ERR, "discarding intermediate descriptor entry\n"); 2895 m_freem(m); 2896 break; 2897 } 2898 mh->mh_tail->m_next = m; 2899 mh->mh_tail = m; 2900 mh->mh_head->m_pkthdr.len += len; 2901 break; 2902 } 2903 if (cxgb_debug) 2904 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len); 2905 done: 2906 if (++fl->cidx == fl->size) 2907 fl->cidx = 0; 2908 2909 return (ret); 2910 } 2911 2912 /** 2913 * handle_rsp_cntrl_info - handles control information in a response 2914 * @qs: the queue set corresponding to the response 2915 * @flags: the response control flags 2916 * 2917 * Handles the control information of an SGE response, such as GTS 2918 * indications and completion credits for the queue set's Tx queues. 2919 * HW coalesces credits, we don't do any extra SW coalescing. 2920 */ 2921 static __inline void 2922 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2923 { 2924 unsigned int credits; 2925 2926 #if USE_GTS 2927 if (flags & F_RSPD_TXQ0_GTS) 2928 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2929 #endif 2930 credits = G_RSPD_TXQ0_CR(flags); 2931 if (credits) 2932 qs->txq[TXQ_ETH].processed += credits; 2933 2934 credits = G_RSPD_TXQ2_CR(flags); 2935 if (credits) 2936 qs->txq[TXQ_CTRL].processed += credits; 2937 2938 # if USE_GTS 2939 if (flags & F_RSPD_TXQ1_GTS) 2940 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2941 # endif 2942 credits = G_RSPD_TXQ1_CR(flags); 2943 if (credits) 2944 qs->txq[TXQ_OFLD].processed += credits; 2945 2946 } 2947 2948 static void 2949 check_ring_db(adapter_t *adap, struct sge_qset *qs, 2950 unsigned int sleeping) 2951 { 2952 ; 2953 } 2954 2955 /** 2956 * process_responses - process responses from an SGE response queue 2957 * @adap: the adapter 2958 * @qs: the queue set to which the response queue belongs 2959 * @budget: how many responses can be processed in this round 2960 * 2961 * Process responses from an SGE response queue up to the supplied budget. 2962 * Responses include received packets as well as credits and other events 2963 * for the queues that belong to the response queue's queue set. 2964 * A negative budget is effectively unlimited. 2965 * 2966 * Additionally choose the interrupt holdoff time for the next interrupt 2967 * on this queue. If the system is under memory shortage use a fairly 2968 * long delay to help recovery. 2969 */ 2970 static int 2971 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2972 { 2973 struct sge_rspq *rspq = &qs->rspq; 2974 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2975 int budget_left = budget; 2976 unsigned int sleeping = 0; 2977 #ifdef LRO_SUPPORTED 2978 int lro_enabled = qs->lro.enabled; 2979 int skip_lro; 2980 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; 2981 #endif 2982 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2983 int ngathered = 0; 2984 #ifdef DEBUG 2985 static int last_holdoff = 0; 2986 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2987 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2988 last_holdoff = rspq->holdoff_tmr; 2989 } 2990 #endif 2991 rspq->next_holdoff = rspq->holdoff_tmr; 2992 2993 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2994 int eth, eop = 0, ethpad = 0; 2995 uint32_t flags = ntohl(r->flags); 2996 uint32_t rss_csum = *(const uint32_t *)r; 2997 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); 2998 2999 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 3000 3001 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 3002 struct mbuf *m; 3003 3004 if (cxgb_debug) 3005 printf("async notification\n"); 3006 3007 if (rspq->rspq_mh.mh_head == NULL) { 3008 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 3009 m = rspq->rspq_mh.mh_head; 3010 } else { 3011 m = m_gethdr(M_DONTWAIT, MT_DATA); 3012 } 3013 if (m == NULL) 3014 goto no_mem; 3015 3016 memcpy(mtod(m, char *), r, AN_PKT_SIZE); 3017 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; 3018 *mtod(m, char *) = CPL_ASYNC_NOTIF; 3019 rss_csum = htonl(CPL_ASYNC_NOTIF << 24); 3020 eop = 1; 3021 rspq->async_notif++; 3022 goto skip; 3023 } else if (flags & F_RSPD_IMM_DATA_VALID) { 3024 struct mbuf *m = NULL; 3025 3026 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", 3027 r->rss_hdr.opcode, rspq->cidx); 3028 if (rspq->rspq_mh.mh_head == NULL) 3029 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 3030 else 3031 m = m_gethdr(M_DONTWAIT, MT_DATA); 3032 3033 if (rspq->rspq_mh.mh_head == NULL && m == NULL) { 3034 no_mem: 3035 rspq->next_holdoff = NOMEM_INTR_DELAY; 3036 budget_left--; 3037 break; 3038 } 3039 get_imm_packet(adap, r, rspq->rspq_mh.mh_head); 3040 eop = 1; 3041 rspq->imm_data++; 3042 } else if (r->len_cq) { 3043 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 3044 3045 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r); 3046 if (eop) { 3047 rspq->rspq_mh.mh_head->m_flags |= M_FLOWID; 3048 rspq->rspq_mh.mh_head->m_pkthdr.flowid = rss_hash; 3049 } 3050 3051 ethpad = 2; 3052 } else { 3053 rspq->pure_rsps++; 3054 } 3055 skip: 3056 if (flags & RSPD_CTRL_MASK) { 3057 sleeping |= flags & RSPD_GTS_MASK; 3058 handle_rsp_cntrl_info(qs, flags); 3059 } 3060 3061 r++; 3062 if (__predict_false(++rspq->cidx == rspq->size)) { 3063 rspq->cidx = 0; 3064 rspq->gen ^= 1; 3065 r = rspq->desc; 3066 } 3067 3068 if (++rspq->credits >= (rspq->size / 4)) { 3069 refill_rspq(adap, rspq, rspq->credits); 3070 rspq->credits = 0; 3071 } 3072 if (!eth && eop) { 3073 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum; 3074 /* 3075 * XXX size mismatch 3076 */ 3077 m_set_priority(rspq->rspq_mh.mh_head, rss_hash); 3078 3079 3080 ngathered = rx_offload(&adap->tdev, rspq, 3081 rspq->rspq_mh.mh_head, offload_mbufs, ngathered); 3082 rspq->rspq_mh.mh_head = NULL; 3083 DPRINTF("received offload packet\n"); 3084 3085 } else if (eth && eop) { 3086 struct mbuf *m = rspq->rspq_mh.mh_head; 3087 3088 t3_rx_eth(adap, rspq, m, ethpad); 3089 3090 #ifdef LRO_SUPPORTED 3091 /* 3092 * The T304 sends incoming packets on any qset. If LRO 3093 * is also enabled, we could end up sending packet up 3094 * lro_ctrl->ifp's input. That is incorrect. 3095 * 3096 * The mbuf's rcvif was derived from the cpl header and 3097 * is accurate. Skip LRO and just use that. 3098 */ 3099 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); 3100 3101 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro && 3102 (tcp_lro_rx(lro_ctrl, m, 0) == 0)) { 3103 /* successfully queue'd for LRO */ 3104 } else 3105 #endif 3106 { 3107 /* 3108 * LRO not enabled, packet unsuitable for LRO, 3109 * or unable to queue. Pass it up right now in 3110 * either case. 3111 */ 3112 struct ifnet *ifp = m->m_pkthdr.rcvif; 3113 (*ifp->if_input)(ifp, m); 3114 } 3115 rspq->rspq_mh.mh_head = NULL; 3116 3117 } 3118 __refill_fl_lt(adap, &qs->fl[0], 32); 3119 __refill_fl_lt(adap, &qs->fl[1], 32); 3120 --budget_left; 3121 } 3122 3123 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 3124 3125 #ifdef LRO_SUPPORTED 3126 /* Flush LRO */ 3127 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) { 3128 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active); 3129 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next); 3130 tcp_lro_flush(lro_ctrl, queued); 3131 } 3132 #endif 3133 3134 if (sleeping) 3135 check_ring_db(adap, qs, sleeping); 3136 3137 mb(); /* commit Tx queue processed updates */ 3138 if (__predict_false(qs->txq_stopped > 1)) 3139 restart_tx(qs); 3140 3141 __refill_fl_lt(adap, &qs->fl[0], 512); 3142 __refill_fl_lt(adap, &qs->fl[1], 512); 3143 budget -= budget_left; 3144 return (budget); 3145 } 3146 3147 /* 3148 * A helper function that processes responses and issues GTS. 3149 */ 3150 static __inline int 3151 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 3152 { 3153 int work; 3154 static int last_holdoff = 0; 3155 3156 work = process_responses(adap, rspq_to_qset(rq), -1); 3157 3158 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 3159 printf("next_holdoff=%d\n", rq->next_holdoff); 3160 last_holdoff = rq->next_holdoff; 3161 } 3162 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 3163 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 3164 3165 return (work); 3166 } 3167 3168 3169 /* 3170 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 3171 * Handles data events from SGE response queues as well as error and other 3172 * async events as they all use the same interrupt pin. We use one SGE 3173 * response queue per port in this mode and protect all response queues with 3174 * queue 0's lock. 3175 */ 3176 void 3177 t3b_intr(void *data) 3178 { 3179 uint32_t i, map; 3180 adapter_t *adap = data; 3181 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3182 3183 t3_write_reg(adap, A_PL_CLI, 0); 3184 map = t3_read_reg(adap, A_SG_DATA_INTR); 3185 3186 if (!map) 3187 return; 3188 3189 if (__predict_false(map & F_ERRINTR)) 3190 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3191 3192 mtx_lock(&q0->lock); 3193 for_each_port(adap, i) 3194 if (map & (1 << i)) 3195 process_responses_gts(adap, &adap->sge.qs[i].rspq); 3196 mtx_unlock(&q0->lock); 3197 } 3198 3199 /* 3200 * The MSI interrupt handler. This needs to handle data events from SGE 3201 * response queues as well as error and other async events as they all use 3202 * the same MSI vector. We use one SGE response queue per port in this mode 3203 * and protect all response queues with queue 0's lock. 3204 */ 3205 void 3206 t3_intr_msi(void *data) 3207 { 3208 adapter_t *adap = data; 3209 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3210 int i, new_packets = 0; 3211 3212 mtx_lock(&q0->lock); 3213 3214 for_each_port(adap, i) 3215 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 3216 new_packets = 1; 3217 mtx_unlock(&q0->lock); 3218 if (new_packets == 0) 3219 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3220 } 3221 3222 void 3223 t3_intr_msix(void *data) 3224 { 3225 struct sge_qset *qs = data; 3226 adapter_t *adap = qs->port->adapter; 3227 struct sge_rspq *rspq = &qs->rspq; 3228 3229 if (process_responses_gts(adap, rspq) == 0) 3230 rspq->unhandled_irqs++; 3231 } 3232 3233 #define QDUMP_SBUF_SIZE 32 * 400 3234 static int 3235 t3_dump_rspq(SYSCTL_HANDLER_ARGS) 3236 { 3237 struct sge_rspq *rspq; 3238 struct sge_qset *qs; 3239 int i, err, dump_end, idx; 3240 static int multiplier = 1; 3241 struct sbuf *sb; 3242 struct rsp_desc *rspd; 3243 uint32_t data[4]; 3244 3245 rspq = arg1; 3246 qs = rspq_to_qset(rspq); 3247 if (rspq->rspq_dump_count == 0) 3248 return (0); 3249 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { 3250 log(LOG_WARNING, 3251 "dump count is too large %d\n", rspq->rspq_dump_count); 3252 rspq->rspq_dump_count = 0; 3253 return (EINVAL); 3254 } 3255 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { 3256 log(LOG_WARNING, 3257 "dump start of %d is greater than queue size\n", 3258 rspq->rspq_dump_start); 3259 rspq->rspq_dump_start = 0; 3260 return (EINVAL); 3261 } 3262 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); 3263 if (err) 3264 return (err); 3265 retry_sbufops: 3266 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3267 3268 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", 3269 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), 3270 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); 3271 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", 3272 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); 3273 3274 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, 3275 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); 3276 3277 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; 3278 for (i = rspq->rspq_dump_start; i < dump_end; i++) { 3279 idx = i & (RSPQ_Q_SIZE-1); 3280 3281 rspd = &rspq->desc[idx]; 3282 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", 3283 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, 3284 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); 3285 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", 3286 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), 3287 be32toh(rspd->len_cq), rspd->intr_gen); 3288 } 3289 if (sbuf_overflowed(sb)) { 3290 sbuf_delete(sb); 3291 multiplier++; 3292 goto retry_sbufops; 3293 } 3294 sbuf_finish(sb); 3295 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3296 sbuf_delete(sb); 3297 return (err); 3298 } 3299 3300 static int 3301 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) 3302 { 3303 struct sge_txq *txq; 3304 struct sge_qset *qs; 3305 int i, j, err, dump_end; 3306 static int multiplier = 1; 3307 struct sbuf *sb; 3308 struct tx_desc *txd; 3309 uint32_t *WR, wr_hi, wr_lo, gen; 3310 uint32_t data[4]; 3311 3312 txq = arg1; 3313 qs = txq_to_qset(txq, TXQ_ETH); 3314 if (txq->txq_dump_count == 0) { 3315 return (0); 3316 } 3317 if (txq->txq_dump_count > TX_ETH_Q_SIZE) { 3318 log(LOG_WARNING, 3319 "dump count is too large %d\n", txq->txq_dump_count); 3320 txq->txq_dump_count = 1; 3321 return (EINVAL); 3322 } 3323 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { 3324 log(LOG_WARNING, 3325 "dump start of %d is greater than queue size\n", 3326 txq->txq_dump_start); 3327 txq->txq_dump_start = 0; 3328 return (EINVAL); 3329 } 3330 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); 3331 if (err) 3332 return (err); 3333 3334 3335 retry_sbufops: 3336 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3337 3338 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", 3339 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 3340 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); 3341 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", 3342 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), 3343 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); 3344 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3345 txq->txq_dump_start, 3346 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); 3347 3348 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3349 for (i = txq->txq_dump_start; i < dump_end; i++) { 3350 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; 3351 WR = (uint32_t *)txd->flit; 3352 wr_hi = ntohl(WR[0]); 3353 wr_lo = ntohl(WR[1]); 3354 gen = G_WR_GEN(wr_lo); 3355 3356 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3357 wr_hi, wr_lo, gen); 3358 for (j = 2; j < 30; j += 4) 3359 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3360 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3361 3362 } 3363 if (sbuf_overflowed(sb)) { 3364 sbuf_delete(sb); 3365 multiplier++; 3366 goto retry_sbufops; 3367 } 3368 sbuf_finish(sb); 3369 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3370 sbuf_delete(sb); 3371 return (err); 3372 } 3373 3374 static int 3375 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) 3376 { 3377 struct sge_txq *txq; 3378 struct sge_qset *qs; 3379 int i, j, err, dump_end; 3380 static int multiplier = 1; 3381 struct sbuf *sb; 3382 struct tx_desc *txd; 3383 uint32_t *WR, wr_hi, wr_lo, gen; 3384 3385 txq = arg1; 3386 qs = txq_to_qset(txq, TXQ_CTRL); 3387 if (txq->txq_dump_count == 0) { 3388 return (0); 3389 } 3390 if (txq->txq_dump_count > 256) { 3391 log(LOG_WARNING, 3392 "dump count is too large %d\n", txq->txq_dump_count); 3393 txq->txq_dump_count = 1; 3394 return (EINVAL); 3395 } 3396 if (txq->txq_dump_start > 255) { 3397 log(LOG_WARNING, 3398 "dump start of %d is greater than queue size\n", 3399 txq->txq_dump_start); 3400 txq->txq_dump_start = 0; 3401 return (EINVAL); 3402 } 3403 3404 retry_sbufops: 3405 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3406 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3407 txq->txq_dump_start, 3408 (txq->txq_dump_start + txq->txq_dump_count) & 255); 3409 3410 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3411 for (i = txq->txq_dump_start; i < dump_end; i++) { 3412 txd = &txq->desc[i & (255)]; 3413 WR = (uint32_t *)txd->flit; 3414 wr_hi = ntohl(WR[0]); 3415 wr_lo = ntohl(WR[1]); 3416 gen = G_WR_GEN(wr_lo); 3417 3418 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3419 wr_hi, wr_lo, gen); 3420 for (j = 2; j < 30; j += 4) 3421 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3422 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3423 3424 } 3425 if (sbuf_overflowed(sb)) { 3426 sbuf_delete(sb); 3427 multiplier++; 3428 goto retry_sbufops; 3429 } 3430 sbuf_finish(sb); 3431 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3432 sbuf_delete(sb); 3433 return (err); 3434 } 3435 3436 static int 3437 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) 3438 { 3439 adapter_t *sc = arg1; 3440 struct qset_params *qsp = &sc->params.sge.qset[0]; 3441 int coalesce_usecs; 3442 struct sge_qset *qs; 3443 int i, j, err, nqsets = 0; 3444 struct mtx *lock; 3445 3446 if ((sc->flags & FULL_INIT_DONE) == 0) 3447 return (ENXIO); 3448 3449 coalesce_usecs = qsp->coalesce_usecs; 3450 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); 3451 3452 if (err != 0) { 3453 return (err); 3454 } 3455 if (coalesce_usecs == qsp->coalesce_usecs) 3456 return (0); 3457 3458 for (i = 0; i < sc->params.nports; i++) 3459 for (j = 0; j < sc->port[i].nqsets; j++) 3460 nqsets++; 3461 3462 coalesce_usecs = max(1, coalesce_usecs); 3463 3464 for (i = 0; i < nqsets; i++) { 3465 qs = &sc->sge.qs[i]; 3466 qsp = &sc->params.sge.qset[i]; 3467 qsp->coalesce_usecs = coalesce_usecs; 3468 3469 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 3470 &sc->sge.qs[0].rspq.lock; 3471 3472 mtx_lock(lock); 3473 t3_update_qset_coalesce(qs, qsp); 3474 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 3475 V_NEWTIMER(qs->rspq.holdoff_tmr)); 3476 mtx_unlock(lock); 3477 } 3478 3479 return (0); 3480 } 3481 3482 3483 void 3484 t3_add_attach_sysctls(adapter_t *sc) 3485 { 3486 struct sysctl_ctx_list *ctx; 3487 struct sysctl_oid_list *children; 3488 3489 ctx = device_get_sysctl_ctx(sc->dev); 3490 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3491 3492 /* random information */ 3493 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3494 "firmware_version", 3495 CTLFLAG_RD, &sc->fw_version, 3496 0, "firmware version"); 3497 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3498 "hw_revision", 3499 CTLFLAG_RD, &sc->params.rev, 3500 0, "chip model"); 3501 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3502 "port_types", 3503 CTLFLAG_RD, &sc->port_types, 3504 0, "type of ports"); 3505 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3506 "enable_debug", 3507 CTLFLAG_RW, &cxgb_debug, 3508 0, "enable verbose debugging output"); 3509 SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce", 3510 CTLFLAG_RD, &sc->tunq_coalesce, 3511 "#tunneled packets freed"); 3512 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3513 "txq_overrun", 3514 CTLFLAG_RD, &txq_fills, 3515 0, "#times txq overrun"); 3516 } 3517 3518 3519 static const char *rspq_name = "rspq"; 3520 static const char *txq_names[] = 3521 { 3522 "txq_eth", 3523 "txq_ofld", 3524 "txq_ctrl" 3525 }; 3526 3527 static int 3528 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) 3529 { 3530 struct port_info *p = arg1; 3531 uint64_t *parg; 3532 3533 if (!p) 3534 return (EINVAL); 3535 3536 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); 3537 PORT_LOCK(p); 3538 t3_mac_update_stats(&p->mac); 3539 PORT_UNLOCK(p); 3540 3541 return (sysctl_handle_quad(oidp, parg, 0, req)); 3542 } 3543 3544 void 3545 t3_add_configured_sysctls(adapter_t *sc) 3546 { 3547 struct sysctl_ctx_list *ctx; 3548 struct sysctl_oid_list *children; 3549 int i, j; 3550 3551 ctx = device_get_sysctl_ctx(sc->dev); 3552 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3553 3554 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3555 "intr_coal", 3556 CTLTYPE_INT|CTLFLAG_RW, sc, 3557 0, t3_set_coalesce_usecs, 3558 "I", "interrupt coalescing timer (us)"); 3559 3560 for (i = 0; i < sc->params.nports; i++) { 3561 struct port_info *pi = &sc->port[i]; 3562 struct sysctl_oid *poid; 3563 struct sysctl_oid_list *poidlist; 3564 struct mac_stats *mstats = &pi->mac.stats; 3565 3566 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); 3567 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 3568 pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); 3569 poidlist = SYSCTL_CHILDREN(poid); 3570 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO, 3571 "nqsets", CTLFLAG_RD, &pi->nqsets, 3572 0, "#queue sets"); 3573 3574 for (j = 0; j < pi->nqsets; j++) { 3575 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; 3576 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, 3577 *ctrlqpoid, *lropoid; 3578 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, 3579 *txqpoidlist, *ctrlqpoidlist, 3580 *lropoidlist; 3581 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 3582 3583 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); 3584 3585 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 3586 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); 3587 qspoidlist = SYSCTL_CHILDREN(qspoid); 3588 3589 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty", 3590 CTLFLAG_RD, &qs->fl[0].empty, 0, 3591 "freelist #0 empty"); 3592 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty", 3593 CTLFLAG_RD, &qs->fl[1].empty, 0, 3594 "freelist #1 empty"); 3595 3596 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3597 rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); 3598 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); 3599 3600 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3601 txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); 3602 txqpoidlist = SYSCTL_CHILDREN(txqpoid); 3603 3604 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3605 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics"); 3606 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); 3607 3608 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3609 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics"); 3610 lropoidlist = SYSCTL_CHILDREN(lropoid); 3611 3612 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", 3613 CTLFLAG_RD, &qs->rspq.size, 3614 0, "#entries in response queue"); 3615 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", 3616 CTLFLAG_RD, &qs->rspq.cidx, 3617 0, "consumer index"); 3618 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", 3619 CTLFLAG_RD, &qs->rspq.credits, 3620 0, "#credits"); 3621 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr", 3622 CTLFLAG_RD, &qs->rspq.phys_addr, 3623 "physical_address_of the queue"); 3624 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", 3625 CTLFLAG_RW, &qs->rspq.rspq_dump_start, 3626 0, "start rspq dump entry"); 3627 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", 3628 CTLFLAG_RW, &qs->rspq.rspq_dump_count, 3629 0, "#rspq entries to dump"); 3630 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", 3631 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 3632 0, t3_dump_rspq, "A", "dump of the response queue"); 3633 3634 3635 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped", 3636 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops, 3637 0, "#tunneled packets dropped"); 3638 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen", 3639 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen, 3640 0, "#tunneled packets waiting to be sent"); 3641 #if 0 3642 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", 3643 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 3644 0, "#tunneled packets queue producer index"); 3645 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", 3646 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 3647 0, "#tunneled packets queue consumer index"); 3648 #endif 3649 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed", 3650 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 3651 0, "#tunneled packets processed by the card"); 3652 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", 3653 CTLFLAG_RD, &txq->cleaned, 3654 0, "#tunneled packets cleaned"); 3655 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", 3656 CTLFLAG_RD, &txq->in_use, 3657 0, "#tunneled packet slots in use"); 3658 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees", 3659 CTLFLAG_RD, &txq->txq_frees, 3660 "#tunneled packets freed"); 3661 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", 3662 CTLFLAG_RD, &txq->txq_skipped, 3663 0, "#tunneled packet descriptors skipped"); 3664 SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced", 3665 CTLFLAG_RD, &txq->txq_coalesced, 3666 "#tunneled packets coalesced"); 3667 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", 3668 CTLFLAG_RD, &txq->txq_enqueued, 3669 0, "#tunneled packets enqueued to hardware"); 3670 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", 3671 CTLFLAG_RD, &qs->txq_stopped, 3672 0, "tx queues stopped"); 3673 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr", 3674 CTLFLAG_RD, &txq->phys_addr, 3675 "physical_address_of the queue"); 3676 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", 3677 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 3678 0, "txq generation"); 3679 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", 3680 CTLFLAG_RD, &txq->cidx, 3681 0, "hardware queue cidx"); 3682 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", 3683 CTLFLAG_RD, &txq->pidx, 3684 0, "hardware queue pidx"); 3685 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", 3686 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 3687 0, "txq start idx for dump"); 3688 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", 3689 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 3690 0, "txq #entries to dump"); 3691 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", 3692 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 3693 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); 3694 3695 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", 3696 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 3697 0, "ctrlq start idx for dump"); 3698 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", 3699 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 3700 0, "ctrl #entries to dump"); 3701 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", 3702 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], 3703 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); 3704 3705 #ifdef LRO_SUPPORTED 3706 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued", 3707 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); 3708 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed", 3709 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); 3710 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", 3711 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); 3712 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", 3713 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); 3714 #endif 3715 } 3716 3717 /* Now add a node for mac stats. */ 3718 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", 3719 CTLFLAG_RD, NULL, "MAC statistics"); 3720 poidlist = SYSCTL_CHILDREN(poid); 3721 3722 /* 3723 * We (ab)use the length argument (arg2) to pass on the offset 3724 * of the data that we are interested in. This is only required 3725 * for the quad counters that are updated from the hardware (we 3726 * make sure that we return the latest value). 3727 * sysctl_handle_macstat first updates *all* the counters from 3728 * the hardware, and then returns the latest value of the 3729 * requested counter. Best would be to update only the 3730 * requested counter from hardware, but t3_mac_update_stats() 3731 * hides all the register details and we don't want to dive into 3732 * all that here. 3733 */ 3734 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ 3735 (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \ 3736 sysctl_handle_macstat, "QU", 0) 3737 CXGB_SYSCTL_ADD_QUAD(tx_octets); 3738 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); 3739 CXGB_SYSCTL_ADD_QUAD(tx_frames); 3740 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); 3741 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); 3742 CXGB_SYSCTL_ADD_QUAD(tx_pause); 3743 CXGB_SYSCTL_ADD_QUAD(tx_deferred); 3744 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); 3745 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); 3746 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); 3747 CXGB_SYSCTL_ADD_QUAD(tx_underrun); 3748 CXGB_SYSCTL_ADD_QUAD(tx_len_errs); 3749 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); 3750 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); 3751 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); 3752 CXGB_SYSCTL_ADD_QUAD(tx_frames_64); 3753 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); 3754 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); 3755 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); 3756 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); 3757 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); 3758 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); 3759 CXGB_SYSCTL_ADD_QUAD(rx_octets); 3760 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); 3761 CXGB_SYSCTL_ADD_QUAD(rx_frames); 3762 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); 3763 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); 3764 CXGB_SYSCTL_ADD_QUAD(rx_pause); 3765 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs); 3766 CXGB_SYSCTL_ADD_QUAD(rx_align_errs); 3767 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); 3768 CXGB_SYSCTL_ADD_QUAD(rx_data_errs); 3769 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); 3770 CXGB_SYSCTL_ADD_QUAD(rx_runt); 3771 CXGB_SYSCTL_ADD_QUAD(rx_jabber); 3772 CXGB_SYSCTL_ADD_QUAD(rx_short); 3773 CXGB_SYSCTL_ADD_QUAD(rx_too_long); 3774 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); 3775 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); 3776 CXGB_SYSCTL_ADD_QUAD(rx_frames_64); 3777 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); 3778 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); 3779 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); 3780 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); 3781 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); 3782 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); 3783 #undef CXGB_SYSCTL_ADD_QUAD 3784 3785 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ 3786 CTLFLAG_RD, &mstats->a, 0) 3787 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); 3788 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); 3789 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); 3790 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); 3791 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); 3792 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); 3793 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); 3794 CXGB_SYSCTL_ADD_ULONG(num_toggled); 3795 CXGB_SYSCTL_ADD_ULONG(num_resets); 3796 CXGB_SYSCTL_ADD_ULONG(link_faults); 3797 #undef CXGB_SYSCTL_ADD_ULONG 3798 } 3799 } 3800 3801 /** 3802 * t3_get_desc - dump an SGE descriptor for debugging purposes 3803 * @qs: the queue set 3804 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 3805 * @idx: the descriptor index in the queue 3806 * @data: where to dump the descriptor contents 3807 * 3808 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 3809 * size of the descriptor. 3810 */ 3811 int 3812 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 3813 unsigned char *data) 3814 { 3815 if (qnum >= 6) 3816 return (EINVAL); 3817 3818 if (qnum < 3) { 3819 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 3820 return -EINVAL; 3821 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 3822 return sizeof(struct tx_desc); 3823 } 3824 3825 if (qnum == 3) { 3826 if (!qs->rspq.desc || idx >= qs->rspq.size) 3827 return (EINVAL); 3828 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 3829 return sizeof(struct rsp_desc); 3830 } 3831 3832 qnum -= 4; 3833 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 3834 return (EINVAL); 3835 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 3836 return sizeof(struct rx_desc); 3837 } 3838