1 /************************************************************************** 2 3 Copyright (c) 2007-2009, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/module.h> 37 #include <sys/bus.h> 38 #include <sys/conf.h> 39 #include <machine/bus.h> 40 #include <machine/resource.h> 41 #include <sys/bus_dma.h> 42 #include <sys/rman.h> 43 #include <sys/queue.h> 44 #include <sys/sysctl.h> 45 #include <sys/taskqueue.h> 46 47 #include <sys/proc.h> 48 #include <sys/sbuf.h> 49 #include <sys/sched.h> 50 #include <sys/smp.h> 51 #include <sys/systm.h> 52 #include <sys/syslog.h> 53 54 #include <net/bpf.h> 55 56 #include <netinet/in_systm.h> 57 #include <netinet/in.h> 58 #include <netinet/ip.h> 59 #include <netinet/tcp.h> 60 61 #include <dev/pci/pcireg.h> 62 #include <dev/pci/pcivar.h> 63 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 67 #include <cxgb_include.h> 68 #include <sys/mvec.h> 69 70 int txq_fills = 0; 71 int multiq_tx_enable = 1; 72 73 extern struct sysctl_oid_list sysctl__hw_cxgb_children; 74 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; 75 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size); 76 SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, 77 "size of per-queue mbuf ring"); 78 79 static int cxgb_tx_coalesce_force = 0; 80 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force); 81 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW, 82 &cxgb_tx_coalesce_force, 0, 83 "coalesce small packets into a single work request regardless of ring state"); 84 85 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1 86 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3)) 87 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2 88 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5 89 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5 90 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2 91 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6 92 93 94 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT; 95 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start", 96 &cxgb_tx_coalesce_enable_start); 97 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW, 98 &cxgb_tx_coalesce_enable_start, 0, 99 "coalesce enable threshold"); 100 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT; 101 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop); 102 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW, 103 &cxgb_tx_coalesce_enable_stop, 0, 104 "coalesce disable threshold"); 105 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 106 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold); 107 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW, 108 &cxgb_tx_reclaim_threshold, 0, 109 "tx cleaning minimum threshold"); 110 111 /* 112 * XXX don't re-enable this until TOE stops assuming 113 * we have an m_ext 114 */ 115 static int recycle_enable = 0; 116 int cxgb_ext_freed = 0; 117 int cxgb_ext_inited = 0; 118 int fl_q_size = 0; 119 int jumbo_q_size = 0; 120 121 extern int cxgb_use_16k_clusters; 122 extern int nmbjumbo4; 123 extern int nmbjumbo9; 124 extern int nmbjumbo16; 125 126 #define USE_GTS 0 127 128 #define SGE_RX_SM_BUF_SIZE 1536 129 #define SGE_RX_DROP_THRES 16 130 #define SGE_RX_COPY_THRES 128 131 132 /* 133 * Period of the Tx buffer reclaim timer. This timer does not need to run 134 * frequently as Tx buffers are usually reclaimed by new Tx packets. 135 */ 136 #define TX_RECLAIM_PERIOD (hz >> 1) 137 138 /* 139 * Values for sge_txq.flags 140 */ 141 enum { 142 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 143 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 144 }; 145 146 struct tx_desc { 147 uint64_t flit[TX_DESC_FLITS]; 148 } __packed; 149 150 struct rx_desc { 151 uint32_t addr_lo; 152 uint32_t len_gen; 153 uint32_t gen2; 154 uint32_t addr_hi; 155 } __packed;; 156 157 struct rsp_desc { /* response queue descriptor */ 158 struct rss_header rss_hdr; 159 uint32_t flags; 160 uint32_t len_cq; 161 uint8_t imm_data[47]; 162 uint8_t intr_gen; 163 } __packed; 164 165 #define RX_SW_DESC_MAP_CREATED (1 << 0) 166 #define TX_SW_DESC_MAP_CREATED (1 << 1) 167 #define RX_SW_DESC_INUSE (1 << 3) 168 #define TX_SW_DESC_MAPPED (1 << 4) 169 170 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 171 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 172 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 173 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 174 175 struct tx_sw_desc { /* SW state per Tx descriptor */ 176 struct mbuf *m; 177 bus_dmamap_t map; 178 int flags; 179 }; 180 181 struct rx_sw_desc { /* SW state per Rx descriptor */ 182 caddr_t rxsd_cl; 183 struct mbuf *m; 184 bus_dmamap_t map; 185 int flags; 186 }; 187 188 struct txq_state { 189 unsigned int compl; 190 unsigned int gen; 191 unsigned int pidx; 192 }; 193 194 struct refill_fl_cb_arg { 195 int error; 196 bus_dma_segment_t seg; 197 int nseg; 198 }; 199 200 201 /* 202 * Maps a number of flits to the number of Tx descriptors that can hold them. 203 * The formula is 204 * 205 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 206 * 207 * HW allows up to 4 descriptors to be combined into a WR. 208 */ 209 static uint8_t flit_desc_map[] = { 210 0, 211 #if SGE_NUM_GENBITS == 1 212 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 213 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 214 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 215 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 216 #elif SGE_NUM_GENBITS == 2 217 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 218 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 219 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 220 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 221 #else 222 # error "SGE_NUM_GENBITS must be 1 or 2" 223 #endif 224 }; 225 226 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED) 227 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock) 228 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock) 229 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock) 230 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 231 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 232 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \ 233 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg) 234 #define TXQ_RING_DEQUEUE(qs) \ 235 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 236 237 int cxgb_debug = 0; 238 239 static void sge_timer_cb(void *arg); 240 static void sge_timer_reclaim(void *arg, int ncount); 241 static void sge_txq_reclaim_handler(void *arg, int ncount); 242 static void cxgb_start_locked(struct sge_qset *qs); 243 244 /* 245 * XXX need to cope with bursty scheduling by looking at a wider 246 * window than we are now for determining the need for coalescing 247 * 248 */ 249 static __inline uint64_t 250 check_pkt_coalesce(struct sge_qset *qs) 251 { 252 struct adapter *sc; 253 struct sge_txq *txq; 254 uint8_t *fill; 255 256 if (__predict_false(cxgb_tx_coalesce_force)) 257 return (1); 258 txq = &qs->txq[TXQ_ETH]; 259 sc = qs->port->adapter; 260 fill = &sc->tunq_fill[qs->idx]; 261 262 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX) 263 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX; 264 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN) 265 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN; 266 /* 267 * if the hardware transmit queue is more than 1/8 full 268 * we mark it as coalescing - we drop back from coalescing 269 * when we go below 1/32 full and there are no packets enqueued, 270 * this provides us with some degree of hysteresis 271 */ 272 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 273 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0)) 274 *fill = 0; 275 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) 276 *fill = 1; 277 278 return (sc->tunq_coalesce); 279 } 280 281 #ifdef __LP64__ 282 static void 283 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 284 { 285 uint64_t wr_hilo; 286 #if _BYTE_ORDER == _LITTLE_ENDIAN 287 wr_hilo = wr_hi; 288 wr_hilo |= (((uint64_t)wr_lo)<<32); 289 #else 290 wr_hilo = wr_lo; 291 wr_hilo |= (((uint64_t)wr_hi)<<32); 292 #endif 293 wrp->wrh_hilo = wr_hilo; 294 } 295 #else 296 static void 297 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 298 { 299 300 wrp->wrh_hi = wr_hi; 301 wmb(); 302 wrp->wrh_lo = wr_lo; 303 } 304 #endif 305 306 struct coalesce_info { 307 int count; 308 int nbytes; 309 }; 310 311 static int 312 coalesce_check(struct mbuf *m, void *arg) 313 { 314 struct coalesce_info *ci = arg; 315 int *count = &ci->count; 316 int *nbytes = &ci->nbytes; 317 318 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) && 319 (*count < 7) && (m->m_next == NULL))) { 320 *count += 1; 321 *nbytes += m->m_len; 322 return (1); 323 } 324 return (0); 325 } 326 327 static struct mbuf * 328 cxgb_dequeue(struct sge_qset *qs) 329 { 330 struct mbuf *m, *m_head, *m_tail; 331 struct coalesce_info ci; 332 333 334 if (check_pkt_coalesce(qs) == 0) 335 return TXQ_RING_DEQUEUE(qs); 336 337 m_head = m_tail = NULL; 338 ci.count = ci.nbytes = 0; 339 do { 340 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci); 341 if (m_head == NULL) { 342 m_tail = m_head = m; 343 } else if (m != NULL) { 344 m_tail->m_nextpkt = m; 345 m_tail = m; 346 } 347 } while (m != NULL); 348 if (ci.count > 7) 349 panic("trying to coalesce %d packets in to one WR", ci.count); 350 return (m_head); 351 } 352 353 /** 354 * reclaim_completed_tx - reclaims completed Tx descriptors 355 * @adapter: the adapter 356 * @q: the Tx queue to reclaim completed descriptors from 357 * 358 * Reclaims Tx descriptors that the SGE has indicated it has processed, 359 * and frees the associated buffers if possible. Called with the Tx 360 * queue's lock held. 361 */ 362 static __inline int 363 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) 364 { 365 struct sge_txq *q = &qs->txq[queue]; 366 int reclaim = desc_reclaimable(q); 367 368 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) || 369 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN)) 370 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 371 372 if (reclaim < reclaim_min) 373 return (0); 374 375 mtx_assert(&qs->lock, MA_OWNED); 376 if (reclaim > 0) { 377 t3_free_tx_desc(qs, reclaim, queue); 378 q->cleaned += reclaim; 379 q->in_use -= reclaim; 380 } 381 if (isset(&qs->txq_stopped, TXQ_ETH)) 382 clrbit(&qs->txq_stopped, TXQ_ETH); 383 384 return (reclaim); 385 } 386 387 /** 388 * should_restart_tx - are there enough resources to restart a Tx queue? 389 * @q: the Tx queue 390 * 391 * Checks if there are enough descriptors to restart a suspended Tx queue. 392 */ 393 static __inline int 394 should_restart_tx(const struct sge_txq *q) 395 { 396 unsigned int r = q->processed - q->cleaned; 397 398 return q->in_use - r < (q->size >> 1); 399 } 400 401 /** 402 * t3_sge_init - initialize SGE 403 * @adap: the adapter 404 * @p: the SGE parameters 405 * 406 * Performs SGE initialization needed every time after a chip reset. 407 * We do not initialize any of the queue sets here, instead the driver 408 * top-level must request those individually. We also do not enable DMA 409 * here, that should be done after the queues have been set up. 410 */ 411 void 412 t3_sge_init(adapter_t *adap, struct sge_params *p) 413 { 414 u_int ctrl, ups; 415 416 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 417 418 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 419 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | 420 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 421 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 422 #if SGE_NUM_GENBITS == 1 423 ctrl |= F_EGRGENCTRL; 424 #endif 425 if (adap->params.rev > 0) { 426 if (!(adap->flags & (USING_MSIX | USING_MSI))) 427 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 428 } 429 t3_write_reg(adap, A_SG_CONTROL, ctrl); 430 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 431 V_LORCQDRBTHRSH(512)); 432 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 433 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 434 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 435 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 436 adap->params.rev < T3_REV_C ? 1000 : 500); 437 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 438 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 439 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 440 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 441 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 442 } 443 444 445 /** 446 * sgl_len - calculates the size of an SGL of the given capacity 447 * @n: the number of SGL entries 448 * 449 * Calculates the number of flits needed for a scatter/gather list that 450 * can hold the given number of entries. 451 */ 452 static __inline unsigned int 453 sgl_len(unsigned int n) 454 { 455 return ((3 * n) / 2 + (n & 1)); 456 } 457 458 /** 459 * get_imm_packet - return the next ingress packet buffer from a response 460 * @resp: the response descriptor containing the packet data 461 * 462 * Return a packet containing the immediate data of the given response. 463 */ 464 static int 465 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) 466 { 467 468 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE; 469 m->m_ext.ext_buf = NULL; 470 m->m_ext.ext_type = 0; 471 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 472 return (0); 473 } 474 475 static __inline u_int 476 flits_to_desc(u_int n) 477 { 478 return (flit_desc_map[n]); 479 } 480 481 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ 482 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ 483 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ 484 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ 485 F_HIRCQPARITYERROR) 486 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) 487 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ 488 F_RSPQDISABLED) 489 490 /** 491 * t3_sge_err_intr_handler - SGE async event interrupt handler 492 * @adapter: the adapter 493 * 494 * Interrupt handler for SGE asynchronous (non-data) events. 495 */ 496 void 497 t3_sge_err_intr_handler(adapter_t *adapter) 498 { 499 unsigned int v, status; 500 501 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 502 if (status & SGE_PARERR) 503 CH_ALERT(adapter, "SGE parity error (0x%x)\n", 504 status & SGE_PARERR); 505 if (status & SGE_FRAMINGERR) 506 CH_ALERT(adapter, "SGE framing error (0x%x)\n", 507 status & SGE_FRAMINGERR); 508 if (status & F_RSPQCREDITOVERFOW) 509 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 510 511 if (status & F_RSPQDISABLED) { 512 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 513 514 CH_ALERT(adapter, 515 "packet delivered to disabled response queue (0x%x)\n", 516 (v >> S_RSPQ0DISABLED) & 0xff); 517 } 518 519 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 520 if (status & SGE_FATALERR) 521 t3_fatal_err(adapter); 522 } 523 524 void 525 t3_sge_prep(adapter_t *adap, struct sge_params *p) 526 { 527 int i, nqsets; 528 529 nqsets = min(SGE_QSETS, mp_ncpus*4); 530 531 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); 532 533 while (!powerof2(fl_q_size)) 534 fl_q_size--; 535 #if __FreeBSD_version >= 700111 536 if (cxgb_use_16k_clusters) 537 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); 538 else 539 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); 540 #else 541 jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE); 542 #endif 543 while (!powerof2(jumbo_q_size)) 544 jumbo_q_size--; 545 546 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 547 p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data); 548 549 for (i = 0; i < SGE_QSETS; ++i) { 550 struct qset_params *q = p->qset + i; 551 552 if (adap->params.nports > 2) { 553 q->coalesce_usecs = 50; 554 } else { 555 #ifdef INVARIANTS 556 q->coalesce_usecs = 10; 557 #else 558 q->coalesce_usecs = 5; 559 #endif 560 } 561 q->polling = 0; 562 q->rspq_size = RSPQ_Q_SIZE; 563 q->fl_size = fl_q_size; 564 q->jumbo_size = jumbo_q_size; 565 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 566 q->txq_size[TXQ_OFLD] = 1024; 567 q->txq_size[TXQ_CTRL] = 256; 568 q->cong_thres = 0; 569 } 570 } 571 572 int 573 t3_sge_alloc(adapter_t *sc) 574 { 575 576 /* The parent tag. */ 577 if (bus_dma_tag_create( NULL, /* parent */ 578 1, 0, /* algnmnt, boundary */ 579 BUS_SPACE_MAXADDR, /* lowaddr */ 580 BUS_SPACE_MAXADDR, /* highaddr */ 581 NULL, NULL, /* filter, filterarg */ 582 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 583 BUS_SPACE_UNRESTRICTED, /* nsegments */ 584 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 585 0, /* flags */ 586 NULL, NULL, /* lock, lockarg */ 587 &sc->parent_dmat)) { 588 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 589 return (ENOMEM); 590 } 591 592 /* 593 * DMA tag for normal sized RX frames 594 */ 595 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 596 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 597 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 598 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 599 return (ENOMEM); 600 } 601 602 /* 603 * DMA tag for jumbo sized RX frames. 604 */ 605 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, 606 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, 607 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 608 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 609 return (ENOMEM); 610 } 611 612 /* 613 * DMA tag for TX frames. 614 */ 615 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 616 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 617 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 618 NULL, NULL, &sc->tx_dmat)) { 619 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 620 return (ENOMEM); 621 } 622 623 return (0); 624 } 625 626 int 627 t3_sge_free(struct adapter * sc) 628 { 629 630 if (sc->tx_dmat != NULL) 631 bus_dma_tag_destroy(sc->tx_dmat); 632 633 if (sc->rx_jumbo_dmat != NULL) 634 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 635 636 if (sc->rx_dmat != NULL) 637 bus_dma_tag_destroy(sc->rx_dmat); 638 639 if (sc->parent_dmat != NULL) 640 bus_dma_tag_destroy(sc->parent_dmat); 641 642 return (0); 643 } 644 645 void 646 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 647 { 648 649 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); 650 qs->rspq.polling = 0 /* p->polling */; 651 } 652 653 #if !defined(__i386__) && !defined(__amd64__) 654 static void 655 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 656 { 657 struct refill_fl_cb_arg *cb_arg = arg; 658 659 cb_arg->error = error; 660 cb_arg->seg = segs[0]; 661 cb_arg->nseg = nseg; 662 663 } 664 #endif 665 /** 666 * refill_fl - refill an SGE free-buffer list 667 * @sc: the controller softc 668 * @q: the free-list to refill 669 * @n: the number of new buffers to allocate 670 * 671 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 672 * The caller must assure that @n does not exceed the queue's capacity. 673 */ 674 static void 675 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 676 { 677 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 678 struct rx_desc *d = &q->desc[q->pidx]; 679 struct refill_fl_cb_arg cb_arg; 680 struct mbuf *m; 681 caddr_t cl; 682 int err, count = 0; 683 684 cb_arg.error = 0; 685 while (n--) { 686 /* 687 * We only allocate a cluster, mbuf allocation happens after rx 688 */ 689 if (q->zone == zone_pack) { 690 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL) 691 break; 692 cl = m->m_ext.ext_buf; 693 } else { 694 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL) 695 break; 696 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { 697 uma_zfree(q->zone, cl); 698 break; 699 } 700 } 701 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 702 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 703 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 704 uma_zfree(q->zone, cl); 705 goto done; 706 } 707 sd->flags |= RX_SW_DESC_MAP_CREATED; 708 } 709 #if !defined(__i386__) && !defined(__amd64__) 710 err = bus_dmamap_load(q->entry_tag, sd->map, 711 cl, q->buf_size, refill_fl_cb, &cb_arg, 0); 712 713 if (err != 0 || cb_arg.error) { 714 if (q->zone == zone_pack) 715 uma_zfree(q->zone, cl); 716 m_free(m); 717 goto done; 718 } 719 #else 720 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl); 721 #endif 722 sd->flags |= RX_SW_DESC_INUSE; 723 sd->rxsd_cl = cl; 724 sd->m = m; 725 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 726 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 727 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 728 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 729 730 d++; 731 sd++; 732 733 if (++q->pidx == q->size) { 734 q->pidx = 0; 735 q->gen ^= 1; 736 sd = q->sdesc; 737 d = q->desc; 738 } 739 q->credits++; 740 count++; 741 } 742 743 done: 744 if (count) 745 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 746 } 747 748 749 /** 750 * free_rx_bufs - free the Rx buffers on an SGE free list 751 * @sc: the controle softc 752 * @q: the SGE free list to clean up 753 * 754 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 755 * this queue should be stopped before calling this function. 756 */ 757 static void 758 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 759 { 760 u_int cidx = q->cidx; 761 762 while (q->credits--) { 763 struct rx_sw_desc *d = &q->sdesc[cidx]; 764 765 if (d->flags & RX_SW_DESC_INUSE) { 766 bus_dmamap_unload(q->entry_tag, d->map); 767 bus_dmamap_destroy(q->entry_tag, d->map); 768 if (q->zone == zone_pack) { 769 m_init(d->m, zone_pack, MCLBYTES, 770 M_NOWAIT, MT_DATA, M_EXT); 771 uma_zfree(zone_pack, d->m); 772 } else { 773 m_init(d->m, zone_mbuf, MLEN, 774 M_NOWAIT, MT_DATA, 0); 775 uma_zfree(zone_mbuf, d->m); 776 uma_zfree(q->zone, d->rxsd_cl); 777 } 778 } 779 780 d->rxsd_cl = NULL; 781 d->m = NULL; 782 if (++cidx == q->size) 783 cidx = 0; 784 } 785 } 786 787 static __inline void 788 __refill_fl(adapter_t *adap, struct sge_fl *fl) 789 { 790 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 791 } 792 793 static __inline void 794 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) 795 { 796 if ((fl->size - fl->credits) < max) 797 refill_fl(adap, fl, min(max, fl->size - fl->credits)); 798 } 799 800 /** 801 * recycle_rx_buf - recycle a receive buffer 802 * @adapter: the adapter 803 * @q: the SGE free list 804 * @idx: index of buffer to recycle 805 * 806 * Recycles the specified buffer on the given free list by adding it at 807 * the next available slot on the list. 808 */ 809 static void 810 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 811 { 812 struct rx_desc *from = &q->desc[idx]; 813 struct rx_desc *to = &q->desc[q->pidx]; 814 815 q->sdesc[q->pidx] = q->sdesc[idx]; 816 to->addr_lo = from->addr_lo; // already big endian 817 to->addr_hi = from->addr_hi; // likewise 818 wmb(); /* necessary ? */ 819 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 820 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 821 q->credits++; 822 823 if (++q->pidx == q->size) { 824 q->pidx = 0; 825 q->gen ^= 1; 826 } 827 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 828 } 829 830 static void 831 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 832 { 833 uint32_t *addr; 834 835 addr = arg; 836 *addr = segs[0].ds_addr; 837 } 838 839 static int 840 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 841 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 842 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 843 { 844 size_t len = nelem * elem_size; 845 void *s = NULL; 846 void *p = NULL; 847 int err; 848 849 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 850 BUS_SPACE_MAXADDR_32BIT, 851 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 852 len, 0, NULL, NULL, tag)) != 0) { 853 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 854 return (ENOMEM); 855 } 856 857 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 858 map)) != 0) { 859 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 860 return (ENOMEM); 861 } 862 863 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 864 bzero(p, len); 865 *(void **)desc = p; 866 867 if (sw_size) { 868 len = nelem * sw_size; 869 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 870 *(void **)sdesc = s; 871 } 872 if (parent_entry_tag == NULL) 873 return (0); 874 875 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 876 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 877 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 878 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 879 NULL, NULL, entry_tag)) != 0) { 880 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 881 return (ENOMEM); 882 } 883 return (0); 884 } 885 886 static void 887 sge_slow_intr_handler(void *arg, int ncount) 888 { 889 adapter_t *sc = arg; 890 891 t3_slow_intr_handler(sc); 892 } 893 894 /** 895 * sge_timer_cb - perform periodic maintenance of an SGE qset 896 * @data: the SGE queue set to maintain 897 * 898 * Runs periodically from a timer to perform maintenance of an SGE queue 899 * set. It performs two tasks: 900 * 901 * a) Cleans up any completed Tx descriptors that may still be pending. 902 * Normal descriptor cleanup happens when new packets are added to a Tx 903 * queue so this timer is relatively infrequent and does any cleanup only 904 * if the Tx queue has not seen any new packets in a while. We make a 905 * best effort attempt to reclaim descriptors, in that we don't wait 906 * around if we cannot get a queue's lock (which most likely is because 907 * someone else is queueing new packets and so will also handle the clean 908 * up). Since control queues use immediate data exclusively we don't 909 * bother cleaning them up here. 910 * 911 * b) Replenishes Rx queues that have run out due to memory shortage. 912 * Normally new Rx buffers are added when existing ones are consumed but 913 * when out of memory a queue can become empty. We try to add only a few 914 * buffers here, the queue will be replenished fully as these new buffers 915 * are used up if memory shortage has subsided. 916 * 917 * c) Return coalesced response queue credits in case a response queue is 918 * starved. 919 * 920 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 921 * fifo overflows and the FW doesn't implement any recovery scheme yet. 922 */ 923 static void 924 sge_timer_cb(void *arg) 925 { 926 adapter_t *sc = arg; 927 if ((sc->flags & USING_MSIX) == 0) { 928 929 struct port_info *pi; 930 struct sge_qset *qs; 931 struct sge_txq *txq; 932 int i, j; 933 int reclaim_ofl, refill_rx; 934 935 if (sc->open_device_map == 0) 936 return; 937 938 for (i = 0; i < sc->params.nports; i++) { 939 pi = &sc->port[i]; 940 for (j = 0; j < pi->nqsets; j++) { 941 qs = &sc->sge.qs[pi->first_qset + j]; 942 txq = &qs->txq[0]; 943 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 944 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 945 (qs->fl[1].credits < qs->fl[1].size)); 946 if (reclaim_ofl || refill_rx) { 947 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); 948 break; 949 } 950 } 951 } 952 } 953 954 if (sc->params.nports > 2) { 955 int i; 956 957 for_each_port(sc, i) { 958 struct port_info *pi = &sc->port[i]; 959 960 t3_write_reg(sc, A_SG_KDOORBELL, 961 F_SELEGRCNTX | 962 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 963 } 964 } 965 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) && 966 sc->open_device_map != 0) 967 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 968 } 969 970 /* 971 * This is meant to be a catch-all function to keep sge state private 972 * to sge.c 973 * 974 */ 975 int 976 t3_sge_init_adapter(adapter_t *sc) 977 { 978 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 979 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 980 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 981 return (0); 982 } 983 984 int 985 t3_sge_reset_adapter(adapter_t *sc) 986 { 987 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 988 return (0); 989 } 990 991 int 992 t3_sge_init_port(struct port_info *pi) 993 { 994 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); 995 return (0); 996 } 997 998 /** 999 * refill_rspq - replenish an SGE response queue 1000 * @adapter: the adapter 1001 * @q: the response queue to replenish 1002 * @credits: how many new responses to make available 1003 * 1004 * Replenishes a response queue by making the supplied number of responses 1005 * available to HW. 1006 */ 1007 static __inline void 1008 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 1009 { 1010 1011 /* mbufs are allocated on demand when a rspq entry is processed. */ 1012 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 1013 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 1014 } 1015 1016 static void 1017 sge_txq_reclaim_handler(void *arg, int ncount) 1018 { 1019 struct sge_qset *qs = arg; 1020 int i; 1021 1022 for (i = 0; i < 3; i++) 1023 reclaim_completed_tx(qs, 16, i); 1024 } 1025 1026 static void 1027 sge_timer_reclaim(void *arg, int ncount) 1028 { 1029 struct port_info *pi = arg; 1030 int i, nqsets = pi->nqsets; 1031 adapter_t *sc = pi->adapter; 1032 struct sge_qset *qs; 1033 struct mtx *lock; 1034 1035 KASSERT((sc->flags & USING_MSIX) == 0, 1036 ("can't call timer reclaim for msi-x")); 1037 1038 for (i = 0; i < nqsets; i++) { 1039 qs = &sc->sge.qs[pi->first_qset + i]; 1040 1041 reclaim_completed_tx(qs, 16, TXQ_OFLD); 1042 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 1043 &sc->sge.qs[0].rspq.lock; 1044 1045 if (mtx_trylock(lock)) { 1046 /* XXX currently assume that we are *NOT* polling */ 1047 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 1048 1049 if (qs->fl[0].credits < qs->fl[0].size - 16) 1050 __refill_fl(sc, &qs->fl[0]); 1051 if (qs->fl[1].credits < qs->fl[1].size - 16) 1052 __refill_fl(sc, &qs->fl[1]); 1053 1054 if (status & (1 << qs->rspq.cntxt_id)) { 1055 if (qs->rspq.credits) { 1056 refill_rspq(sc, &qs->rspq, 1); 1057 qs->rspq.credits--; 1058 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 1059 1 << qs->rspq.cntxt_id); 1060 } 1061 } 1062 mtx_unlock(lock); 1063 } 1064 } 1065 } 1066 1067 /** 1068 * init_qset_cntxt - initialize an SGE queue set context info 1069 * @qs: the queue set 1070 * @id: the queue set id 1071 * 1072 * Initializes the TIDs and context ids for the queues of a queue set. 1073 */ 1074 static void 1075 init_qset_cntxt(struct sge_qset *qs, u_int id) 1076 { 1077 1078 qs->rspq.cntxt_id = id; 1079 qs->fl[0].cntxt_id = 2 * id; 1080 qs->fl[1].cntxt_id = 2 * id + 1; 1081 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 1082 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 1083 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 1084 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 1085 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 1086 1087 mbufq_init(&qs->txq[TXQ_ETH].sendq); 1088 mbufq_init(&qs->txq[TXQ_OFLD].sendq); 1089 mbufq_init(&qs->txq[TXQ_CTRL].sendq); 1090 } 1091 1092 1093 static void 1094 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 1095 { 1096 txq->in_use += ndesc; 1097 /* 1098 * XXX we don't handle stopping of queue 1099 * presumably start handles this when we bump against the end 1100 */ 1101 txqs->gen = txq->gen; 1102 txq->unacked += ndesc; 1103 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); 1104 txq->unacked &= 31; 1105 txqs->pidx = txq->pidx; 1106 txq->pidx += ndesc; 1107 #ifdef INVARIANTS 1108 if (((txqs->pidx > txq->cidx) && 1109 (txq->pidx < txqs->pidx) && 1110 (txq->pidx >= txq->cidx)) || 1111 ((txqs->pidx < txq->cidx) && 1112 (txq->pidx >= txq-> cidx)) || 1113 ((txqs->pidx < txq->cidx) && 1114 (txq->cidx < txqs->pidx))) 1115 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", 1116 txqs->pidx, txq->pidx, txq->cidx); 1117 #endif 1118 if (txq->pidx >= txq->size) { 1119 txq->pidx -= txq->size; 1120 txq->gen ^= 1; 1121 } 1122 1123 } 1124 1125 /** 1126 * calc_tx_descs - calculate the number of Tx descriptors for a packet 1127 * @m: the packet mbufs 1128 * @nsegs: the number of segments 1129 * 1130 * Returns the number of Tx descriptors needed for the given Ethernet 1131 * packet. Ethernet packets require addition of WR and CPL headers. 1132 */ 1133 static __inline unsigned int 1134 calc_tx_descs(const struct mbuf *m, int nsegs) 1135 { 1136 unsigned int flits; 1137 1138 if (m->m_pkthdr.len <= PIO_LEN) 1139 return 1; 1140 1141 flits = sgl_len(nsegs) + 2; 1142 #ifdef TSO_SUPPORTED 1143 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1144 flits++; 1145 #endif 1146 return flits_to_desc(flits); 1147 } 1148 1149 static unsigned int 1150 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 1151 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs) 1152 { 1153 struct mbuf *m0; 1154 int err, pktlen, pass = 0; 1155 bus_dma_tag_t tag = txq->entry_tag; 1156 1157 retry: 1158 err = 0; 1159 m0 = *m; 1160 pktlen = m0->m_pkthdr.len; 1161 #if defined(__i386__) || defined(__amd64__) 1162 if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) { 1163 goto done; 1164 } else 1165 #endif 1166 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0); 1167 1168 if (err == 0) { 1169 goto done; 1170 } 1171 if (err == EFBIG && pass == 0) { 1172 pass = 1; 1173 /* Too many segments, try to defrag */ 1174 m0 = m_defrag(m0, M_DONTWAIT); 1175 if (m0 == NULL) { 1176 m_freem(*m); 1177 *m = NULL; 1178 return (ENOBUFS); 1179 } 1180 *m = m0; 1181 goto retry; 1182 } else if (err == ENOMEM) { 1183 return (err); 1184 } if (err) { 1185 if (cxgb_debug) 1186 printf("map failure err=%d pktlen=%d\n", err, pktlen); 1187 m_freem(m0); 1188 *m = NULL; 1189 return (err); 1190 } 1191 done: 1192 #if !defined(__i386__) && !defined(__amd64__) 1193 bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE); 1194 #endif 1195 txsd->flags |= TX_SW_DESC_MAPPED; 1196 1197 return (0); 1198 } 1199 1200 /** 1201 * make_sgl - populate a scatter/gather list for a packet 1202 * @sgp: the SGL to populate 1203 * @segs: the packet dma segments 1204 * @nsegs: the number of segments 1205 * 1206 * Generates a scatter/gather list for the buffers that make up a packet 1207 * and returns the SGL size in 8-byte words. The caller must size the SGL 1208 * appropriately. 1209 */ 1210 static __inline void 1211 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1212 { 1213 int i, idx; 1214 1215 for (idx = 0, i = 0; i < nsegs; i++) { 1216 /* 1217 * firmware doesn't like empty segments 1218 */ 1219 if (segs[i].ds_len == 0) 1220 continue; 1221 if (i && idx == 0) 1222 ++sgp; 1223 1224 sgp->len[idx] = htobe32(segs[i].ds_len); 1225 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1226 idx ^= 1; 1227 } 1228 1229 if (idx) { 1230 sgp->len[idx] = 0; 1231 sgp->addr[idx] = 0; 1232 } 1233 } 1234 1235 /** 1236 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1237 * @adap: the adapter 1238 * @q: the Tx queue 1239 * 1240 * Ring the doorbell if a Tx queue is asleep. There is a natural race, 1241 * where the HW is going to sleep just after we checked, however, 1242 * then the interrupt handler will detect the outstanding TX packet 1243 * and ring the doorbell for us. 1244 * 1245 * When GTS is disabled we unconditionally ring the doorbell. 1246 */ 1247 static __inline void 1248 check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 1249 { 1250 #if USE_GTS 1251 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1252 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1253 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1254 #ifdef T3_TRACE 1255 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1256 q->cntxt_id); 1257 #endif 1258 t3_write_reg(adap, A_SG_KDOORBELL, 1259 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1260 } 1261 #else 1262 wmb(); /* write descriptors before telling HW */ 1263 t3_write_reg(adap, A_SG_KDOORBELL, 1264 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1265 #endif 1266 } 1267 1268 static __inline void 1269 wr_gen2(struct tx_desc *d, unsigned int gen) 1270 { 1271 #if SGE_NUM_GENBITS == 2 1272 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1273 #endif 1274 } 1275 1276 /** 1277 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1278 * @ndesc: number of Tx descriptors spanned by the SGL 1279 * @txd: first Tx descriptor to be written 1280 * @txqs: txq state (generation and producer index) 1281 * @txq: the SGE Tx queue 1282 * @sgl: the SGL 1283 * @flits: number of flits to the start of the SGL in the first descriptor 1284 * @sgl_flits: the SGL size in flits 1285 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1286 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1287 * 1288 * Write a work request header and an associated SGL. If the SGL is 1289 * small enough to fit into one Tx descriptor it has already been written 1290 * and we just need to write the WR header. Otherwise we distribute the 1291 * SGL across the number of descriptors it spans. 1292 */ 1293 static void 1294 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1295 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1296 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1297 { 1298 1299 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1300 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1301 1302 if (__predict_true(ndesc == 1)) { 1303 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1304 V_WR_SGLSFLT(flits)) | wr_hi, 1305 htonl(V_WR_LEN(flits + sgl_flits) | 1306 V_WR_GEN(txqs->gen)) | wr_lo); 1307 /* XXX gen? */ 1308 wr_gen2(txd, txqs->gen); 1309 1310 } else { 1311 unsigned int ogen = txqs->gen; 1312 const uint64_t *fp = (const uint64_t *)sgl; 1313 struct work_request_hdr *wp = wrp; 1314 1315 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1316 V_WR_SGLSFLT(flits)) | wr_hi; 1317 1318 while (sgl_flits) { 1319 unsigned int avail = WR_FLITS - flits; 1320 1321 if (avail > sgl_flits) 1322 avail = sgl_flits; 1323 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1324 sgl_flits -= avail; 1325 ndesc--; 1326 if (!sgl_flits) 1327 break; 1328 1329 fp += avail; 1330 txd++; 1331 txsd++; 1332 if (++txqs->pidx == txq->size) { 1333 txqs->pidx = 0; 1334 txqs->gen ^= 1; 1335 txd = txq->desc; 1336 txsd = txq->sdesc; 1337 } 1338 1339 /* 1340 * when the head of the mbuf chain 1341 * is freed all clusters will be freed 1342 * with it 1343 */ 1344 wrp = (struct work_request_hdr *)txd; 1345 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) | 1346 V_WR_SGLSFLT(1)) | wr_hi; 1347 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS, 1348 sgl_flits + 1)) | 1349 V_WR_GEN(txqs->gen)) | wr_lo; 1350 wr_gen2(txd, txqs->gen); 1351 flits = 1; 1352 } 1353 wrp->wrh_hi |= htonl(F_WR_EOP); 1354 wmb(); 1355 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1356 wr_gen2((struct tx_desc *)wp, ogen); 1357 } 1358 } 1359 1360 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 1361 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 1362 1363 #ifdef VLAN_SUPPORTED 1364 #define GET_VTAG(cntrl, m) \ 1365 do { \ 1366 if ((m)->m_flags & M_VLANTAG) \ 1367 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ 1368 } while (0) 1369 1370 #else 1371 #define GET_VTAG(cntrl, m) 1372 #endif 1373 1374 static int 1375 t3_encap(struct sge_qset *qs, struct mbuf **m) 1376 { 1377 adapter_t *sc; 1378 struct mbuf *m0; 1379 struct sge_txq *txq; 1380 struct txq_state txqs; 1381 struct port_info *pi; 1382 unsigned int ndesc, flits, cntrl, mlen; 1383 int err, nsegs, tso_info = 0; 1384 1385 struct work_request_hdr *wrp; 1386 struct tx_sw_desc *txsd; 1387 struct sg_ent *sgp, *sgl; 1388 uint32_t wr_hi, wr_lo, sgl_flits; 1389 bus_dma_segment_t segs[TX_MAX_SEGS]; 1390 1391 struct tx_desc *txd; 1392 1393 pi = qs->port; 1394 sc = pi->adapter; 1395 txq = &qs->txq[TXQ_ETH]; 1396 txd = &txq->desc[txq->pidx]; 1397 txsd = &txq->sdesc[txq->pidx]; 1398 sgl = txq->txq_sgl; 1399 1400 prefetch(txd); 1401 m0 = *m; 1402 1403 DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset); 1404 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan); 1405 1406 mtx_assert(&qs->lock, MA_OWNED); 1407 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1408 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n")); 1409 1410 #ifdef VLAN_SUPPORTED 1411 if (m0->m_nextpkt == NULL && m0->m_next != NULL && 1412 m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1413 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1414 #endif 1415 if (m0->m_nextpkt != NULL) { 1416 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs); 1417 ndesc = 1; 1418 mlen = 0; 1419 } else { 1420 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map, 1421 &m0, segs, &nsegs))) { 1422 if (cxgb_debug) 1423 printf("failed ... err=%d\n", err); 1424 return (err); 1425 } 1426 mlen = m0->m_pkthdr.len; 1427 ndesc = calc_tx_descs(m0, nsegs); 1428 } 1429 txq_prod(txq, ndesc, &txqs); 1430 1431 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs)); 1432 txsd->m = m0; 1433 1434 if (m0->m_nextpkt != NULL) { 1435 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; 1436 int i, fidx; 1437 1438 if (nsegs > 7) 1439 panic("trying to coalesce %d packets in to one WR", nsegs); 1440 txq->txq_coalesced += nsegs; 1441 wrp = (struct work_request_hdr *)txd; 1442 flits = nsegs*2 + 1; 1443 1444 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) { 1445 struct cpl_tx_pkt_batch_entry *cbe; 1446 uint64_t flit; 1447 uint32_t *hflit = (uint32_t *)&flit; 1448 int cflags = m0->m_pkthdr.csum_flags; 1449 1450 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1451 GET_VTAG(cntrl, m0); 1452 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1453 if (__predict_false(!(cflags & CSUM_IP))) 1454 cntrl |= F_TXPKT_IPCSUM_DIS; 1455 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP)))) 1456 cntrl |= F_TXPKT_L4CSUM_DIS; 1457 1458 hflit[0] = htonl(cntrl); 1459 hflit[1] = htonl(segs[i].ds_len | 0x80000000); 1460 flit |= htobe64(1 << 24); 1461 cbe = &cpl_batch->pkt_entry[i]; 1462 cbe->cntrl = hflit[0]; 1463 cbe->len = hflit[1]; 1464 cbe->addr = htobe64(segs[i].ds_addr); 1465 } 1466 1467 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1468 V_WR_SGLSFLT(flits)) | 1469 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1470 wr_lo = htonl(V_WR_LEN(flits) | 1471 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); 1472 set_wr_hdr(wrp, wr_hi, wr_lo); 1473 wmb(); 1474 wr_gen2(txd, txqs.gen); 1475 check_ring_tx_db(sc, txq); 1476 return (0); 1477 } else if (tso_info) { 1478 int min_size = TCPPKTHDRSIZE, eth_type, tagged; 1479 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; 1480 struct ip *ip; 1481 struct tcphdr *tcp; 1482 char *pkthdr; 1483 1484 txd->flit[2] = 0; 1485 GET_VTAG(cntrl, m0); 1486 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1487 hdr->cntrl = htonl(cntrl); 1488 hdr->len = htonl(mlen | 0x80000000); 1489 1490 DPRINTF("tso buf len=%d\n", mlen); 1491 1492 tagged = m0->m_flags & M_VLANTAG; 1493 if (!tagged) 1494 min_size -= ETHER_VLAN_ENCAP_LEN; 1495 1496 if (__predict_false(mlen < min_size)) { 1497 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", 1498 m0, mlen, m0->m_pkthdr.tso_segsz, 1499 m0->m_pkthdr.csum_flags, m0->m_flags); 1500 panic("tx tso packet too small"); 1501 } 1502 1503 /* Make sure that ether, ip, tcp headers are all in m0 */ 1504 if (__predict_false(m0->m_len < min_size)) { 1505 m0 = m_pullup(m0, min_size); 1506 if (__predict_false(m0 == NULL)) { 1507 /* XXX panic probably an overreaction */ 1508 panic("couldn't fit header into mbuf"); 1509 } 1510 } 1511 pkthdr = m0->m_data; 1512 1513 if (tagged) { 1514 eth_type = CPL_ETH_II_VLAN; 1515 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1516 ETHER_VLAN_ENCAP_LEN); 1517 } else { 1518 eth_type = CPL_ETH_II; 1519 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1520 } 1521 tcp = (struct tcphdr *)((uint8_t *)ip + 1522 sizeof(*ip)); 1523 1524 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1525 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1526 V_LSO_TCPHDR_WORDS(tcp->th_off); 1527 hdr->lso_info = htonl(tso_info); 1528 1529 if (__predict_false(mlen <= PIO_LEN)) { 1530 /* pkt not undersized but fits in PIO_LEN 1531 * Indicates a TSO bug at the higher levels. 1532 * 1533 */ 1534 DPRINTF("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", 1535 m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags); 1536 txsd->m = NULL; 1537 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); 1538 flits = (mlen + 7) / 8 + 3; 1539 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1540 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1541 F_WR_SOP | F_WR_EOP | txqs.compl); 1542 wr_lo = htonl(V_WR_LEN(flits) | 1543 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1544 set_wr_hdr(&hdr->wr, wr_hi, wr_lo); 1545 wmb(); 1546 wr_gen2(txd, txqs.gen); 1547 check_ring_tx_db(sc, txq); 1548 return (0); 1549 } 1550 flits = 3; 1551 } else { 1552 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; 1553 1554 GET_VTAG(cntrl, m0); 1555 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1556 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) 1557 cntrl |= F_TXPKT_IPCSUM_DIS; 1558 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))) 1559 cntrl |= F_TXPKT_L4CSUM_DIS; 1560 cpl->cntrl = htonl(cntrl); 1561 cpl->len = htonl(mlen | 0x80000000); 1562 1563 if (mlen <= PIO_LEN) { 1564 txsd->m = NULL; 1565 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1566 flits = (mlen + 7) / 8 + 2; 1567 1568 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1569 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1570 F_WR_SOP | F_WR_EOP | txqs.compl); 1571 wr_lo = htonl(V_WR_LEN(flits) | 1572 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1573 set_wr_hdr(&cpl->wr, wr_hi, wr_lo); 1574 wmb(); 1575 wr_gen2(txd, txqs.gen); 1576 check_ring_tx_db(sc, txq); 1577 return (0); 1578 } 1579 flits = 2; 1580 } 1581 wrp = (struct work_request_hdr *)txd; 1582 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1583 make_sgl(sgp, segs, nsegs); 1584 1585 sgl_flits = sgl_len(nsegs); 1586 1587 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc)); 1588 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1589 wr_lo = htonl(V_WR_TID(txq->token)); 1590 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, 1591 sgl_flits, wr_hi, wr_lo); 1592 check_ring_tx_db(pi->adapter, txq); 1593 1594 return (0); 1595 } 1596 1597 void 1598 cxgb_tx_watchdog(void *arg) 1599 { 1600 struct sge_qset *qs = arg; 1601 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1602 1603 if (qs->coalescing != 0 && 1604 (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 1605 TXQ_RING_EMPTY(qs)) 1606 qs->coalescing = 0; 1607 else if (qs->coalescing == 0 && 1608 (txq->in_use >= cxgb_tx_coalesce_enable_start)) 1609 qs->coalescing = 1; 1610 if (TXQ_TRYLOCK(qs)) { 1611 qs->qs_flags |= QS_FLUSHING; 1612 cxgb_start_locked(qs); 1613 qs->qs_flags &= ~QS_FLUSHING; 1614 TXQ_UNLOCK(qs); 1615 } 1616 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) 1617 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog, 1618 qs, txq->txq_watchdog.c_cpu); 1619 } 1620 1621 static void 1622 cxgb_tx_timeout(void *arg) 1623 { 1624 struct sge_qset *qs = arg; 1625 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1626 1627 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3))) 1628 qs->coalescing = 1; 1629 if (TXQ_TRYLOCK(qs)) { 1630 qs->qs_flags |= QS_TIMEOUT; 1631 cxgb_start_locked(qs); 1632 qs->qs_flags &= ~QS_TIMEOUT; 1633 TXQ_UNLOCK(qs); 1634 } 1635 } 1636 1637 static void 1638 cxgb_start_locked(struct sge_qset *qs) 1639 { 1640 struct mbuf *m_head = NULL; 1641 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1642 int avail, txmax; 1643 int in_use_init = txq->in_use; 1644 struct port_info *pi = qs->port; 1645 struct ifnet *ifp = pi->ifp; 1646 avail = txq->size - txq->in_use - 4; 1647 txmax = min(TX_START_MAX_DESC, avail); 1648 1649 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT)) 1650 reclaim_completed_tx(qs, 0, TXQ_ETH); 1651 1652 if (!pi->link_config.link_ok) { 1653 TXQ_RING_FLUSH(qs); 1654 return; 1655 } 1656 TXQ_LOCK_ASSERT(qs); 1657 while ((txq->in_use - in_use_init < txmax) && 1658 !TXQ_RING_EMPTY(qs) && 1659 (ifp->if_drv_flags & IFF_DRV_RUNNING) && 1660 pi->link_config.link_ok) { 1661 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1662 1663 if ((m_head = cxgb_dequeue(qs)) == NULL) 1664 break; 1665 /* 1666 * Encapsulation can modify our pointer, and or make it 1667 * NULL on failure. In that event, we can't requeue. 1668 */ 1669 if (t3_encap(qs, &m_head) || m_head == NULL) 1670 break; 1671 1672 /* Send a copy of the frame to the BPF listener */ 1673 ETHER_BPF_MTAP(ifp, m_head); 1674 1675 /* 1676 * We sent via PIO, no longer need a copy 1677 */ 1678 if (m_head->m_nextpkt == NULL && 1679 m_head->m_pkthdr.len <= PIO_LEN) 1680 m_freem(m_head); 1681 1682 m_head = NULL; 1683 } 1684 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && 1685 pi->link_config.link_ok) 1686 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1687 qs, txq->txq_timer.c_cpu); 1688 if (m_head != NULL) 1689 m_freem(m_head); 1690 } 1691 1692 static int 1693 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m) 1694 { 1695 struct port_info *pi = qs->port; 1696 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1697 struct buf_ring *br = txq->txq_mr; 1698 int error, avail; 1699 1700 avail = txq->size - txq->in_use; 1701 TXQ_LOCK_ASSERT(qs); 1702 1703 /* 1704 * We can only do a direct transmit if the following are true: 1705 * - we aren't coalescing (ring < 3/4 full) 1706 * - the link is up -- checked in caller 1707 * - there are no packets enqueued already 1708 * - there is space in hardware transmit queue 1709 */ 1710 if (check_pkt_coalesce(qs) == 0 && 1711 TXQ_RING_EMPTY(qs) && avail > 4) { 1712 if (t3_encap(qs, &m)) { 1713 if (m != NULL && 1714 (error = drbr_enqueue(ifp, br, m)) != 0) 1715 return (error); 1716 } else { 1717 /* 1718 * We've bypassed the buf ring so we need to update 1719 * the stats directly 1720 */ 1721 txq->txq_direct_packets++; 1722 txq->txq_direct_bytes += m->m_pkthdr.len; 1723 /* 1724 ** Send a copy of the frame to the BPF 1725 ** listener and set the watchdog on. 1726 */ 1727 ETHER_BPF_MTAP(ifp, m); 1728 /* 1729 * We sent via PIO, no longer need a copy 1730 */ 1731 if (m->m_pkthdr.len <= PIO_LEN) 1732 m_freem(m); 1733 1734 } 1735 } else if ((error = drbr_enqueue(ifp, br, m)) != 0) 1736 return (error); 1737 1738 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1739 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && 1740 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) 1741 cxgb_start_locked(qs); 1742 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) 1743 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1744 qs, txq->txq_timer.c_cpu); 1745 return (0); 1746 } 1747 1748 int 1749 cxgb_transmit(struct ifnet *ifp, struct mbuf *m) 1750 { 1751 struct sge_qset *qs; 1752 struct port_info *pi = ifp->if_softc; 1753 int error, qidx = pi->first_qset; 1754 1755 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 1756 ||(!pi->link_config.link_ok)) { 1757 m_freem(m); 1758 return (0); 1759 } 1760 1761 if (m->m_flags & M_FLOWID) 1762 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset; 1763 1764 qs = &pi->adapter->sge.qs[qidx]; 1765 1766 if (TXQ_TRYLOCK(qs)) { 1767 /* XXX running */ 1768 error = cxgb_transmit_locked(ifp, qs, m); 1769 TXQ_UNLOCK(qs); 1770 } else 1771 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); 1772 return (error); 1773 } 1774 void 1775 cxgb_start(struct ifnet *ifp) 1776 { 1777 struct port_info *pi = ifp->if_softc; 1778 struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset]; 1779 1780 if (!pi->link_config.link_ok) 1781 return; 1782 1783 TXQ_LOCK(qs); 1784 cxgb_start_locked(qs); 1785 TXQ_UNLOCK(qs); 1786 } 1787 1788 void 1789 cxgb_qflush(struct ifnet *ifp) 1790 { 1791 /* 1792 * flush any enqueued mbufs in the buf_rings 1793 * and in the transmit queues 1794 * no-op for now 1795 */ 1796 return; 1797 } 1798 1799 /** 1800 * write_imm - write a packet into a Tx descriptor as immediate data 1801 * @d: the Tx descriptor to write 1802 * @m: the packet 1803 * @len: the length of packet data to write as immediate data 1804 * @gen: the generation bit value to write 1805 * 1806 * Writes a packet as immediate data into a Tx descriptor. The packet 1807 * contains a work request at its beginning. We must write the packet 1808 * carefully so the SGE doesn't read accidentally before it's written in 1809 * its entirety. 1810 */ 1811 static __inline void 1812 write_imm(struct tx_desc *d, struct mbuf *m, 1813 unsigned int len, unsigned int gen) 1814 { 1815 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1816 struct work_request_hdr *to = (struct work_request_hdr *)d; 1817 uint32_t wr_hi, wr_lo; 1818 1819 if (len > WR_LEN) 1820 panic("len too big %d\n", len); 1821 if (len < sizeof(*from)) 1822 panic("len too small %d", len); 1823 1824 memcpy(&to[1], &from[1], len - sizeof(*from)); 1825 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | 1826 V_WR_BCNTLFLT(len & 7)); 1827 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | 1828 V_WR_LEN((len + 7) / 8)); 1829 set_wr_hdr(to, wr_hi, wr_lo); 1830 wmb(); 1831 wr_gen2(d, gen); 1832 1833 /* 1834 * This check is a hack we should really fix the logic so 1835 * that this can't happen 1836 */ 1837 if (m->m_type != MT_DONTFREE) 1838 m_freem(m); 1839 1840 } 1841 1842 /** 1843 * check_desc_avail - check descriptor availability on a send queue 1844 * @adap: the adapter 1845 * @q: the TX queue 1846 * @m: the packet needing the descriptors 1847 * @ndesc: the number of Tx descriptors needed 1848 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1849 * 1850 * Checks if the requested number of Tx descriptors is available on an 1851 * SGE send queue. If the queue is already suspended or not enough 1852 * descriptors are available the packet is queued for later transmission. 1853 * Must be called with the Tx queue locked. 1854 * 1855 * Returns 0 if enough descriptors are available, 1 if there aren't 1856 * enough descriptors and the packet has been queued, and 2 if the caller 1857 * needs to retry because there weren't enough descriptors at the 1858 * beginning of the call but some freed up in the mean time. 1859 */ 1860 static __inline int 1861 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1862 struct mbuf *m, unsigned int ndesc, 1863 unsigned int qid) 1864 { 1865 /* 1866 * XXX We currently only use this for checking the control queue 1867 * the control queue is only used for binding qsets which happens 1868 * at init time so we are guaranteed enough descriptors 1869 */ 1870 if (__predict_false(!mbufq_empty(&q->sendq))) { 1871 addq_exit: mbufq_tail(&q->sendq, m); 1872 return 1; 1873 } 1874 if (__predict_false(q->size - q->in_use < ndesc)) { 1875 1876 struct sge_qset *qs = txq_to_qset(q, qid); 1877 1878 setbit(&qs->txq_stopped, qid); 1879 if (should_restart_tx(q) && 1880 test_and_clear_bit(qid, &qs->txq_stopped)) 1881 return 2; 1882 1883 q->stops++; 1884 goto addq_exit; 1885 } 1886 return 0; 1887 } 1888 1889 1890 /** 1891 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1892 * @q: the SGE control Tx queue 1893 * 1894 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1895 * that send only immediate data (presently just the control queues) and 1896 * thus do not have any mbufs 1897 */ 1898 static __inline void 1899 reclaim_completed_tx_imm(struct sge_txq *q) 1900 { 1901 unsigned int reclaim = q->processed - q->cleaned; 1902 1903 q->in_use -= reclaim; 1904 q->cleaned += reclaim; 1905 } 1906 1907 static __inline int 1908 immediate(const struct mbuf *m) 1909 { 1910 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1911 } 1912 1913 /** 1914 * ctrl_xmit - send a packet through an SGE control Tx queue 1915 * @adap: the adapter 1916 * @q: the control queue 1917 * @m: the packet 1918 * 1919 * Send a packet through an SGE control Tx queue. Packets sent through 1920 * a control queue must fit entirely as immediate data in a single Tx 1921 * descriptor and have no page fragments. 1922 */ 1923 static int 1924 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 1925 { 1926 int ret; 1927 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1928 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1929 1930 if (__predict_false(!immediate(m))) { 1931 m_freem(m); 1932 return 0; 1933 } 1934 1935 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP); 1936 wrp->wrh_lo = htonl(V_WR_TID(q->token)); 1937 1938 TXQ_LOCK(qs); 1939 again: reclaim_completed_tx_imm(q); 1940 1941 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1942 if (__predict_false(ret)) { 1943 if (ret == 1) { 1944 TXQ_UNLOCK(qs); 1945 return (ENOSPC); 1946 } 1947 goto again; 1948 } 1949 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1950 1951 q->in_use++; 1952 if (++q->pidx >= q->size) { 1953 q->pidx = 0; 1954 q->gen ^= 1; 1955 } 1956 TXQ_UNLOCK(qs); 1957 wmb(); 1958 t3_write_reg(adap, A_SG_KDOORBELL, 1959 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1960 return (0); 1961 } 1962 1963 1964 /** 1965 * restart_ctrlq - restart a suspended control queue 1966 * @qs: the queue set cotaining the control queue 1967 * 1968 * Resumes transmission on a suspended Tx control queue. 1969 */ 1970 static void 1971 restart_ctrlq(void *data, int npending) 1972 { 1973 struct mbuf *m; 1974 struct sge_qset *qs = (struct sge_qset *)data; 1975 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1976 adapter_t *adap = qs->port->adapter; 1977 1978 TXQ_LOCK(qs); 1979 again: reclaim_completed_tx_imm(q); 1980 1981 while (q->in_use < q->size && 1982 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1983 1984 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1985 1986 if (++q->pidx >= q->size) { 1987 q->pidx = 0; 1988 q->gen ^= 1; 1989 } 1990 q->in_use++; 1991 } 1992 if (!mbufq_empty(&q->sendq)) { 1993 setbit(&qs->txq_stopped, TXQ_CTRL); 1994 1995 if (should_restart_tx(q) && 1996 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1997 goto again; 1998 q->stops++; 1999 } 2000 TXQ_UNLOCK(qs); 2001 t3_write_reg(adap, A_SG_KDOORBELL, 2002 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2003 } 2004 2005 2006 /* 2007 * Send a management message through control queue 0 2008 */ 2009 int 2010 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 2011 { 2012 return ctrl_xmit(adap, &adap->sge.qs[0], m); 2013 } 2014 2015 /** 2016 * free_qset - free the resources of an SGE queue set 2017 * @sc: the controller owning the queue set 2018 * @q: the queue set 2019 * 2020 * Release the HW and SW resources associated with an SGE queue set, such 2021 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 2022 * queue set must be quiesced prior to calling this. 2023 */ 2024 static void 2025 t3_free_qset(adapter_t *sc, struct sge_qset *q) 2026 { 2027 int i; 2028 2029 reclaim_completed_tx(q, 0, TXQ_ETH); 2030 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2031 if (q->txq[i].txq_mr != NULL) 2032 buf_ring_free(q->txq[i].txq_mr, M_DEVBUF); 2033 if (q->txq[i].txq_ifq != NULL) { 2034 ifq_delete(q->txq[i].txq_ifq); 2035 free(q->txq[i].txq_ifq, M_DEVBUF); 2036 } 2037 } 2038 2039 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2040 if (q->fl[i].desc) { 2041 mtx_lock_spin(&sc->sge.reg_lock); 2042 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 2043 mtx_unlock_spin(&sc->sge.reg_lock); 2044 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 2045 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 2046 q->fl[i].desc_map); 2047 bus_dma_tag_destroy(q->fl[i].desc_tag); 2048 bus_dma_tag_destroy(q->fl[i].entry_tag); 2049 } 2050 if (q->fl[i].sdesc) { 2051 free_rx_bufs(sc, &q->fl[i]); 2052 free(q->fl[i].sdesc, M_DEVBUF); 2053 } 2054 } 2055 2056 mtx_unlock(&q->lock); 2057 MTX_DESTROY(&q->lock); 2058 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2059 if (q->txq[i].desc) { 2060 mtx_lock_spin(&sc->sge.reg_lock); 2061 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 2062 mtx_unlock_spin(&sc->sge.reg_lock); 2063 bus_dmamap_unload(q->txq[i].desc_tag, 2064 q->txq[i].desc_map); 2065 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 2066 q->txq[i].desc_map); 2067 bus_dma_tag_destroy(q->txq[i].desc_tag); 2068 bus_dma_tag_destroy(q->txq[i].entry_tag); 2069 } 2070 if (q->txq[i].sdesc) { 2071 free(q->txq[i].sdesc, M_DEVBUF); 2072 } 2073 } 2074 2075 if (q->rspq.desc) { 2076 mtx_lock_spin(&sc->sge.reg_lock); 2077 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 2078 mtx_unlock_spin(&sc->sge.reg_lock); 2079 2080 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 2081 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 2082 q->rspq.desc_map); 2083 bus_dma_tag_destroy(q->rspq.desc_tag); 2084 MTX_DESTROY(&q->rspq.lock); 2085 } 2086 2087 #ifdef LRO_SUPPORTED 2088 tcp_lro_free(&q->lro.ctrl); 2089 #endif 2090 2091 bzero(q, sizeof(*q)); 2092 } 2093 2094 /** 2095 * t3_free_sge_resources - free SGE resources 2096 * @sc: the adapter softc 2097 * 2098 * Frees resources used by the SGE queue sets. 2099 */ 2100 void 2101 t3_free_sge_resources(adapter_t *sc) 2102 { 2103 int i, nqsets; 2104 2105 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2106 nqsets += sc->port[i].nqsets; 2107 2108 for (i = 0; i < nqsets; ++i) { 2109 TXQ_LOCK(&sc->sge.qs[i]); 2110 t3_free_qset(sc, &sc->sge.qs[i]); 2111 } 2112 2113 } 2114 2115 /** 2116 * t3_sge_start - enable SGE 2117 * @sc: the controller softc 2118 * 2119 * Enables the SGE for DMAs. This is the last step in starting packet 2120 * transfers. 2121 */ 2122 void 2123 t3_sge_start(adapter_t *sc) 2124 { 2125 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 2126 } 2127 2128 /** 2129 * t3_sge_stop - disable SGE operation 2130 * @sc: the adapter 2131 * 2132 * Disables the DMA engine. This can be called in emeregencies (e.g., 2133 * from error interrupts) or from normal process context. In the latter 2134 * case it also disables any pending queue restart tasklets. Note that 2135 * if it is called in interrupt context it cannot disable the restart 2136 * tasklets as it cannot wait, however the tasklets will have no effect 2137 * since the doorbells are disabled and the driver will call this again 2138 * later from process context, at which time the tasklets will be stopped 2139 * if they are still running. 2140 */ 2141 void 2142 t3_sge_stop(adapter_t *sc) 2143 { 2144 int i, nqsets; 2145 2146 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 2147 2148 if (sc->tq == NULL) 2149 return; 2150 2151 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2152 nqsets += sc->port[i].nqsets; 2153 #ifdef notyet 2154 /* 2155 * 2156 * XXX 2157 */ 2158 for (i = 0; i < nqsets; ++i) { 2159 struct sge_qset *qs = &sc->sge.qs[i]; 2160 2161 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2162 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2163 } 2164 #endif 2165 } 2166 2167 /** 2168 * t3_free_tx_desc - reclaims Tx descriptors and their buffers 2169 * @adapter: the adapter 2170 * @q: the Tx queue to reclaim descriptors from 2171 * @reclaimable: the number of descriptors to reclaim 2172 * @m_vec_size: maximum number of buffers to reclaim 2173 * @desc_reclaimed: returns the number of descriptors reclaimed 2174 * 2175 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 2176 * Tx buffers. Called with the Tx queue lock held. 2177 * 2178 * Returns number of buffers of reclaimed 2179 */ 2180 void 2181 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue) 2182 { 2183 struct tx_sw_desc *txsd; 2184 unsigned int cidx, mask; 2185 struct sge_txq *q = &qs->txq[queue]; 2186 2187 #ifdef T3_TRACE 2188 T3_TRACE2(sc->tb[q->cntxt_id & 7], 2189 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); 2190 #endif 2191 cidx = q->cidx; 2192 mask = q->size - 1; 2193 txsd = &q->sdesc[cidx]; 2194 2195 mtx_assert(&qs->lock, MA_OWNED); 2196 while (reclaimable--) { 2197 prefetch(q->sdesc[(cidx + 1) & mask].m); 2198 prefetch(q->sdesc[(cidx + 2) & mask].m); 2199 2200 if (txsd->m != NULL) { 2201 if (txsd->flags & TX_SW_DESC_MAPPED) { 2202 bus_dmamap_unload(q->entry_tag, txsd->map); 2203 txsd->flags &= ~TX_SW_DESC_MAPPED; 2204 } 2205 m_freem_list(txsd->m); 2206 txsd->m = NULL; 2207 } else 2208 q->txq_skipped++; 2209 2210 ++txsd; 2211 if (++cidx == q->size) { 2212 cidx = 0; 2213 txsd = q->sdesc; 2214 } 2215 } 2216 q->cidx = cidx; 2217 2218 } 2219 2220 /** 2221 * is_new_response - check if a response is newly written 2222 * @r: the response descriptor 2223 * @q: the response queue 2224 * 2225 * Returns true if a response descriptor contains a yet unprocessed 2226 * response. 2227 */ 2228 static __inline int 2229 is_new_response(const struct rsp_desc *r, 2230 const struct sge_rspq *q) 2231 { 2232 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 2233 } 2234 2235 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 2236 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 2237 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 2238 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 2239 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 2240 2241 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 2242 #define NOMEM_INTR_DELAY 2500 2243 2244 /** 2245 * write_ofld_wr - write an offload work request 2246 * @adap: the adapter 2247 * @m: the packet to send 2248 * @q: the Tx queue 2249 * @pidx: index of the first Tx descriptor to write 2250 * @gen: the generation value to use 2251 * @ndesc: number of descriptors the packet will occupy 2252 * 2253 * Write an offload work request to send the supplied packet. The packet 2254 * data already carry the work request with most fields populated. 2255 */ 2256 static void 2257 write_ofld_wr(adapter_t *adap, struct mbuf *m, 2258 struct sge_txq *q, unsigned int pidx, 2259 unsigned int gen, unsigned int ndesc, 2260 bus_dma_segment_t *segs, unsigned int nsegs) 2261 { 2262 unsigned int sgl_flits, flits; 2263 struct work_request_hdr *from; 2264 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 2265 struct tx_desc *d = &q->desc[pidx]; 2266 struct txq_state txqs; 2267 2268 if (immediate(m) && nsegs == 0) { 2269 write_imm(d, m, m->m_len, gen); 2270 return; 2271 } 2272 2273 /* Only TX_DATA builds SGLs */ 2274 from = mtod(m, struct work_request_hdr *); 2275 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from)); 2276 2277 flits = m->m_len / 8; 2278 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 2279 2280 make_sgl(sgp, segs, nsegs); 2281 sgl_flits = sgl_len(nsegs); 2282 2283 txqs.gen = gen; 2284 txqs.pidx = pidx; 2285 txqs.compl = 0; 2286 2287 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 2288 from->wrh_hi, from->wrh_lo); 2289 } 2290 2291 /** 2292 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 2293 * @m: the packet 2294 * 2295 * Returns the number of Tx descriptors needed for the given offload 2296 * packet. These packets are already fully constructed. 2297 */ 2298 static __inline unsigned int 2299 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 2300 { 2301 unsigned int flits, cnt = 0; 2302 int ndescs; 2303 2304 if (m->m_len <= WR_LEN && nsegs == 0) 2305 return (1); /* packet fits as immediate data */ 2306 2307 /* 2308 * This needs to be re-visited for TOE 2309 */ 2310 2311 cnt = nsegs; 2312 2313 /* headers */ 2314 flits = m->m_len / 8; 2315 2316 ndescs = flits_to_desc(flits + sgl_len(cnt)); 2317 2318 return (ndescs); 2319 } 2320 2321 /** 2322 * ofld_xmit - send a packet through an offload queue 2323 * @adap: the adapter 2324 * @q: the Tx offload queue 2325 * @m: the packet 2326 * 2327 * Send an offload packet through an SGE offload queue. 2328 */ 2329 static int 2330 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 2331 { 2332 int ret, nsegs; 2333 unsigned int ndesc; 2334 unsigned int pidx, gen; 2335 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2336 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs; 2337 struct tx_sw_desc *stx; 2338 2339 nsegs = m_get_sgllen(m); 2340 vsegs = m_get_sgl(m); 2341 ndesc = calc_tx_descs_ofld(m, nsegs); 2342 busdma_map_sgl(vsegs, segs, nsegs); 2343 2344 stx = &q->sdesc[q->pidx]; 2345 2346 TXQ_LOCK(qs); 2347 again: reclaim_completed_tx(qs, 16, TXQ_OFLD); 2348 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 2349 if (__predict_false(ret)) { 2350 if (ret == 1) { 2351 printf("no ofld desc avail\n"); 2352 2353 m_set_priority(m, ndesc); /* save for restart */ 2354 TXQ_UNLOCK(qs); 2355 return (EINTR); 2356 } 2357 goto again; 2358 } 2359 2360 gen = q->gen; 2361 q->in_use += ndesc; 2362 pidx = q->pidx; 2363 q->pidx += ndesc; 2364 if (q->pidx >= q->size) { 2365 q->pidx -= q->size; 2366 q->gen ^= 1; 2367 } 2368 #ifdef T3_TRACE 2369 T3_TRACE5(adap->tb[q->cntxt_id & 7], 2370 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 2371 ndesc, pidx, skb->len, skb->len - skb->data_len, 2372 skb_shinfo(skb)->nr_frags); 2373 #endif 2374 TXQ_UNLOCK(qs); 2375 2376 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2377 check_ring_tx_db(adap, q); 2378 return (0); 2379 } 2380 2381 /** 2382 * restart_offloadq - restart a suspended offload queue 2383 * @qs: the queue set cotaining the offload queue 2384 * 2385 * Resumes transmission on a suspended Tx offload queue. 2386 */ 2387 static void 2388 restart_offloadq(void *data, int npending) 2389 { 2390 struct mbuf *m; 2391 struct sge_qset *qs = data; 2392 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2393 adapter_t *adap = qs->port->adapter; 2394 bus_dma_segment_t segs[TX_MAX_SEGS]; 2395 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 2396 int nsegs, cleaned; 2397 2398 TXQ_LOCK(qs); 2399 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD); 2400 2401 while ((m = mbufq_peek(&q->sendq)) != NULL) { 2402 unsigned int gen, pidx; 2403 unsigned int ndesc = m_get_priority(m); 2404 2405 if (__predict_false(q->size - q->in_use < ndesc)) { 2406 setbit(&qs->txq_stopped, TXQ_OFLD); 2407 if (should_restart_tx(q) && 2408 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 2409 goto again; 2410 q->stops++; 2411 break; 2412 } 2413 2414 gen = q->gen; 2415 q->in_use += ndesc; 2416 pidx = q->pidx; 2417 q->pidx += ndesc; 2418 if (q->pidx >= q->size) { 2419 q->pidx -= q->size; 2420 q->gen ^= 1; 2421 } 2422 2423 (void)mbufq_dequeue(&q->sendq); 2424 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 2425 TXQ_UNLOCK(qs); 2426 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2427 TXQ_LOCK(qs); 2428 } 2429 #if USE_GTS 2430 set_bit(TXQ_RUNNING, &q->flags); 2431 set_bit(TXQ_LAST_PKT_DB, &q->flags); 2432 #endif 2433 TXQ_UNLOCK(qs); 2434 wmb(); 2435 t3_write_reg(adap, A_SG_KDOORBELL, 2436 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2437 } 2438 2439 /** 2440 * queue_set - return the queue set a packet should use 2441 * @m: the packet 2442 * 2443 * Maps a packet to the SGE queue set it should use. The desired queue 2444 * set is carried in bits 1-3 in the packet's priority. 2445 */ 2446 static __inline int 2447 queue_set(const struct mbuf *m) 2448 { 2449 return m_get_priority(m) >> 1; 2450 } 2451 2452 /** 2453 * is_ctrl_pkt - return whether an offload packet is a control packet 2454 * @m: the packet 2455 * 2456 * Determines whether an offload packet should use an OFLD or a CTRL 2457 * Tx queue. This is indicated by bit 0 in the packet's priority. 2458 */ 2459 static __inline int 2460 is_ctrl_pkt(const struct mbuf *m) 2461 { 2462 return m_get_priority(m) & 1; 2463 } 2464 2465 /** 2466 * t3_offload_tx - send an offload packet 2467 * @tdev: the offload device to send to 2468 * @m: the packet 2469 * 2470 * Sends an offload packet. We use the packet priority to select the 2471 * appropriate Tx queue as follows: bit 0 indicates whether the packet 2472 * should be sent as regular or control, bits 1-3 select the queue set. 2473 */ 2474 int 2475 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m) 2476 { 2477 adapter_t *adap = tdev2adap(tdev); 2478 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 2479 2480 if (__predict_false(is_ctrl_pkt(m))) 2481 return ctrl_xmit(adap, qs, m); 2482 2483 return ofld_xmit(adap, qs, m); 2484 } 2485 2486 /** 2487 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 2488 * @tdev: the offload device that will be receiving the packets 2489 * @q: the SGE response queue that assembled the bundle 2490 * @m: the partial bundle 2491 * @n: the number of packets in the bundle 2492 * 2493 * Delivers a (partial) bundle of Rx offload packets to an offload device. 2494 */ 2495 static __inline void 2496 deliver_partial_bundle(struct t3cdev *tdev, 2497 struct sge_rspq *q, 2498 struct mbuf *mbufs[], int n) 2499 { 2500 if (n) { 2501 q->offload_bundles++; 2502 cxgb_ofld_recv(tdev, mbufs, n); 2503 } 2504 } 2505 2506 static __inline int 2507 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 2508 struct mbuf *m, struct mbuf *rx_gather[], 2509 unsigned int gather_idx) 2510 { 2511 2512 rq->offload_pkts++; 2513 m->m_pkthdr.header = mtod(m, void *); 2514 rx_gather[gather_idx++] = m; 2515 if (gather_idx == RX_BUNDLE_SIZE) { 2516 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 2517 gather_idx = 0; 2518 rq->offload_bundles++; 2519 } 2520 return (gather_idx); 2521 } 2522 2523 static void 2524 restart_tx(struct sge_qset *qs) 2525 { 2526 struct adapter *sc = qs->port->adapter; 2527 2528 2529 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2530 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2531 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2532 qs->txq[TXQ_OFLD].restarts++; 2533 DPRINTF("restarting TXQ_OFLD\n"); 2534 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2535 } 2536 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n", 2537 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]), 2538 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned, 2539 qs->txq[TXQ_CTRL].in_use); 2540 2541 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2542 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2543 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2544 qs->txq[TXQ_CTRL].restarts++; 2545 DPRINTF("restarting TXQ_CTRL\n"); 2546 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2547 } 2548 } 2549 2550 /** 2551 * t3_sge_alloc_qset - initialize an SGE queue set 2552 * @sc: the controller softc 2553 * @id: the queue set id 2554 * @nports: how many Ethernet ports will be using this queue set 2555 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2556 * @p: configuration parameters for this queue set 2557 * @ntxq: number of Tx queues for the queue set 2558 * @pi: port info for queue set 2559 * 2560 * Allocate resources and initialize an SGE queue set. A queue set 2561 * comprises a response queue, two Rx free-buffer queues, and up to 3 2562 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2563 * queue, offload queue, and control queue. 2564 */ 2565 int 2566 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2567 const struct qset_params *p, int ntxq, struct port_info *pi) 2568 { 2569 struct sge_qset *q = &sc->sge.qs[id]; 2570 int i, ret = 0; 2571 2572 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF); 2573 q->port = pi; 2574 2575 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2576 2577 if ((q->txq[i].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, 2578 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) { 2579 device_printf(sc->dev, "failed to allocate mbuf ring\n"); 2580 goto err; 2581 } 2582 if ((q->txq[i].txq_ifq = 2583 malloc(sizeof(struct ifaltq), M_DEVBUF, M_NOWAIT|M_ZERO)) 2584 == NULL) { 2585 device_printf(sc->dev, "failed to allocate ifq\n"); 2586 goto err; 2587 } 2588 ifq_init(q->txq[i].txq_ifq, pi->ifp); 2589 callout_init(&q->txq[i].txq_timer, 1); 2590 callout_init(&q->txq[i].txq_watchdog, 1); 2591 q->txq[i].txq_timer.c_cpu = id % mp_ncpus; 2592 q->txq[i].txq_watchdog.c_cpu = id % mp_ncpus; 2593 } 2594 init_qset_cntxt(q, id); 2595 q->idx = id; 2596 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2597 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2598 &q->fl[0].desc, &q->fl[0].sdesc, 2599 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2600 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2601 printf("error %d from alloc ring fl0\n", ret); 2602 goto err; 2603 } 2604 2605 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2606 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2607 &q->fl[1].desc, &q->fl[1].sdesc, 2608 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2609 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2610 printf("error %d from alloc ring fl1\n", ret); 2611 goto err; 2612 } 2613 2614 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2615 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2616 &q->rspq.desc_tag, &q->rspq.desc_map, 2617 NULL, NULL)) != 0) { 2618 printf("error %d from alloc ring rspq\n", ret); 2619 goto err; 2620 } 2621 2622 for (i = 0; i < ntxq; ++i) { 2623 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2624 2625 if ((ret = alloc_ring(sc, p->txq_size[i], 2626 sizeof(struct tx_desc), sz, 2627 &q->txq[i].phys_addr, &q->txq[i].desc, 2628 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2629 &q->txq[i].desc_map, 2630 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2631 printf("error %d from alloc ring tx %i\n", ret, i); 2632 goto err; 2633 } 2634 mbufq_init(&q->txq[i].sendq); 2635 q->txq[i].gen = 1; 2636 q->txq[i].size = p->txq_size[i]; 2637 } 2638 2639 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2640 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2641 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2642 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2643 2644 q->fl[0].gen = q->fl[1].gen = 1; 2645 q->fl[0].size = p->fl_size; 2646 q->fl[1].size = p->jumbo_size; 2647 2648 q->rspq.gen = 1; 2649 q->rspq.cidx = 0; 2650 q->rspq.size = p->rspq_size; 2651 2652 q->txq[TXQ_ETH].stop_thres = nports * 2653 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2654 2655 q->fl[0].buf_size = MCLBYTES; 2656 q->fl[0].zone = zone_pack; 2657 q->fl[0].type = EXT_PACKET; 2658 #if __FreeBSD_version > 800000 2659 if (cxgb_use_16k_clusters) { 2660 q->fl[1].buf_size = MJUM16BYTES; 2661 q->fl[1].zone = zone_jumbo16; 2662 q->fl[1].type = EXT_JUMBO16; 2663 } else { 2664 q->fl[1].buf_size = MJUM9BYTES; 2665 q->fl[1].zone = zone_jumbo9; 2666 q->fl[1].type = EXT_JUMBO9; 2667 } 2668 #else 2669 q->fl[1].buf_size = MJUMPAGESIZE; 2670 q->fl[1].zone = zone_jumbop; 2671 q->fl[1].type = EXT_JUMBOP; 2672 #endif 2673 2674 #ifdef LRO_SUPPORTED 2675 /* Allocate and setup the lro_ctrl structure */ 2676 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); 2677 ret = tcp_lro_init(&q->lro.ctrl); 2678 if (ret) { 2679 printf("error %d from tcp_lro_init\n", ret); 2680 goto err; 2681 } 2682 q->lro.ctrl.ifp = pi->ifp; 2683 #endif 2684 2685 mtx_lock_spin(&sc->sge.reg_lock); 2686 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2687 q->rspq.phys_addr, q->rspq.size, 2688 q->fl[0].buf_size, 1, 0); 2689 if (ret) { 2690 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2691 goto err_unlock; 2692 } 2693 2694 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2695 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2696 q->fl[i].phys_addr, q->fl[i].size, 2697 q->fl[i].buf_size, p->cong_thres, 1, 2698 0); 2699 if (ret) { 2700 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2701 goto err_unlock; 2702 } 2703 } 2704 2705 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2706 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2707 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2708 1, 0); 2709 if (ret) { 2710 printf("error %d from t3_sge_init_ecntxt\n", ret); 2711 goto err_unlock; 2712 } 2713 2714 if (ntxq > 1) { 2715 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2716 USE_GTS, SGE_CNTXT_OFLD, id, 2717 q->txq[TXQ_OFLD].phys_addr, 2718 q->txq[TXQ_OFLD].size, 0, 1, 0); 2719 if (ret) { 2720 printf("error %d from t3_sge_init_ecntxt\n", ret); 2721 goto err_unlock; 2722 } 2723 } 2724 2725 if (ntxq > 2) { 2726 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2727 SGE_CNTXT_CTRL, id, 2728 q->txq[TXQ_CTRL].phys_addr, 2729 q->txq[TXQ_CTRL].size, 2730 q->txq[TXQ_CTRL].token, 1, 0); 2731 if (ret) { 2732 printf("error %d from t3_sge_init_ecntxt\n", ret); 2733 goto err_unlock; 2734 } 2735 } 2736 2737 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2738 device_get_unit(sc->dev), irq_vec_idx); 2739 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2740 2741 mtx_unlock_spin(&sc->sge.reg_lock); 2742 t3_update_qset_coalesce(q, p); 2743 q->port = pi; 2744 2745 refill_fl(sc, &q->fl[0], q->fl[0].size); 2746 refill_fl(sc, &q->fl[1], q->fl[1].size); 2747 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2748 2749 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2750 V_NEWTIMER(q->rspq.holdoff_tmr)); 2751 2752 return (0); 2753 2754 err_unlock: 2755 mtx_unlock_spin(&sc->sge.reg_lock); 2756 err: 2757 TXQ_LOCK(q); 2758 t3_free_qset(sc, q); 2759 2760 return (ret); 2761 } 2762 2763 /* 2764 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with 2765 * ethernet data. Hardware assistance with various checksums and any vlan tag 2766 * will also be taken into account here. 2767 */ 2768 void 2769 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2770 { 2771 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2772 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2773 struct ifnet *ifp = pi->ifp; 2774 2775 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2776 2777 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2778 cpl->csum_valid && cpl->csum == 0xffff) { 2779 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2780 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2781 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2782 m->m_pkthdr.csum_data = 0xffff; 2783 } 2784 /* 2785 * XXX need to add VLAN support for 6.x 2786 */ 2787 #ifdef VLAN_SUPPORTED 2788 if (__predict_false(cpl->vlan_valid)) { 2789 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2790 m->m_flags |= M_VLANTAG; 2791 } 2792 #endif 2793 2794 m->m_pkthdr.rcvif = ifp; 2795 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2796 /* 2797 * adjust after conversion to mbuf chain 2798 */ 2799 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); 2800 m->m_len -= (sizeof(*cpl) + ethpad); 2801 m->m_data += (sizeof(*cpl) + ethpad); 2802 } 2803 2804 /** 2805 * get_packet - return the next ingress packet buffer from a free list 2806 * @adap: the adapter that received the packet 2807 * @drop_thres: # of remaining buffers before we start dropping packets 2808 * @qs: the qset that the SGE free list holding the packet belongs to 2809 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2810 * @r: response descriptor 2811 * 2812 * Get the next packet from a free list and complete setup of the 2813 * sk_buff. If the packet is small we make a copy and recycle the 2814 * original buffer, otherwise we use the original buffer itself. If a 2815 * positive drop threshold is supplied packets are dropped and their 2816 * buffers recycled if (a) the number of remaining buffers is under the 2817 * threshold and the packet is too big to copy, or (b) the packet should 2818 * be copied but there is no memory for the copy. 2819 */ 2820 static int 2821 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2822 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 2823 { 2824 2825 unsigned int len_cq = ntohl(r->len_cq); 2826 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2827 int mask, cidx = fl->cidx; 2828 struct rx_sw_desc *sd = &fl->sdesc[cidx]; 2829 uint32_t len = G_RSPD_LEN(len_cq); 2830 uint32_t flags = M_EXT; 2831 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags)); 2832 caddr_t cl; 2833 struct mbuf *m; 2834 int ret = 0; 2835 2836 mask = fl->size - 1; 2837 prefetch(fl->sdesc[(cidx + 1) & mask].m); 2838 prefetch(fl->sdesc[(cidx + 2) & mask].m); 2839 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); 2840 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 2841 2842 fl->credits--; 2843 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2844 2845 if (recycle_enable && len <= SGE_RX_COPY_THRES && 2846 sopeop == RSPQ_SOP_EOP) { 2847 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2848 goto skip_recycle; 2849 cl = mtod(m, void *); 2850 memcpy(cl, sd->rxsd_cl, len); 2851 recycle_rx_buf(adap, fl, fl->cidx); 2852 m->m_pkthdr.len = m->m_len = len; 2853 m->m_flags = 0; 2854 mh->mh_head = mh->mh_tail = m; 2855 ret = 1; 2856 goto done; 2857 } else { 2858 skip_recycle: 2859 bus_dmamap_unload(fl->entry_tag, sd->map); 2860 cl = sd->rxsd_cl; 2861 m = sd->m; 2862 2863 if ((sopeop == RSPQ_SOP_EOP) || 2864 (sopeop == RSPQ_SOP)) 2865 flags |= M_PKTHDR; 2866 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags); 2867 if (fl->zone == zone_pack) { 2868 /* 2869 * restore clobbered data pointer 2870 */ 2871 m->m_data = m->m_ext.ext_buf; 2872 } else { 2873 m_cljset(m, cl, fl->type); 2874 } 2875 m->m_len = len; 2876 } 2877 switch(sopeop) { 2878 case RSPQ_SOP_EOP: 2879 ret = 1; 2880 /* FALLTHROUGH */ 2881 case RSPQ_SOP: 2882 mh->mh_head = mh->mh_tail = m; 2883 m->m_pkthdr.len = len; 2884 break; 2885 case RSPQ_EOP: 2886 ret = 1; 2887 /* FALLTHROUGH */ 2888 case RSPQ_NSOP_NEOP: 2889 if (mh->mh_tail == NULL) { 2890 log(LOG_ERR, "discarding intermediate descriptor entry\n"); 2891 m_freem(m); 2892 break; 2893 } 2894 mh->mh_tail->m_next = m; 2895 mh->mh_tail = m; 2896 mh->mh_head->m_pkthdr.len += len; 2897 break; 2898 } 2899 if (cxgb_debug) 2900 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len); 2901 done: 2902 if (++fl->cidx == fl->size) 2903 fl->cidx = 0; 2904 2905 return (ret); 2906 } 2907 2908 /** 2909 * handle_rsp_cntrl_info - handles control information in a response 2910 * @qs: the queue set corresponding to the response 2911 * @flags: the response control flags 2912 * 2913 * Handles the control information of an SGE response, such as GTS 2914 * indications and completion credits for the queue set's Tx queues. 2915 * HW coalesces credits, we don't do any extra SW coalescing. 2916 */ 2917 static __inline void 2918 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2919 { 2920 unsigned int credits; 2921 2922 #if USE_GTS 2923 if (flags & F_RSPD_TXQ0_GTS) 2924 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2925 #endif 2926 credits = G_RSPD_TXQ0_CR(flags); 2927 if (credits) 2928 qs->txq[TXQ_ETH].processed += credits; 2929 2930 credits = G_RSPD_TXQ2_CR(flags); 2931 if (credits) 2932 qs->txq[TXQ_CTRL].processed += credits; 2933 2934 # if USE_GTS 2935 if (flags & F_RSPD_TXQ1_GTS) 2936 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2937 # endif 2938 credits = G_RSPD_TXQ1_CR(flags); 2939 if (credits) 2940 qs->txq[TXQ_OFLD].processed += credits; 2941 2942 } 2943 2944 static void 2945 check_ring_db(adapter_t *adap, struct sge_qset *qs, 2946 unsigned int sleeping) 2947 { 2948 ; 2949 } 2950 2951 /** 2952 * process_responses - process responses from an SGE response queue 2953 * @adap: the adapter 2954 * @qs: the queue set to which the response queue belongs 2955 * @budget: how many responses can be processed in this round 2956 * 2957 * Process responses from an SGE response queue up to the supplied budget. 2958 * Responses include received packets as well as credits and other events 2959 * for the queues that belong to the response queue's queue set. 2960 * A negative budget is effectively unlimited. 2961 * 2962 * Additionally choose the interrupt holdoff time for the next interrupt 2963 * on this queue. If the system is under memory shortage use a fairly 2964 * long delay to help recovery. 2965 */ 2966 static int 2967 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2968 { 2969 struct sge_rspq *rspq = &qs->rspq; 2970 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2971 int budget_left = budget; 2972 unsigned int sleeping = 0; 2973 #ifdef LRO_SUPPORTED 2974 int lro_enabled = qs->lro.enabled; 2975 int skip_lro; 2976 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; 2977 #endif 2978 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2979 int ngathered = 0; 2980 #ifdef DEBUG 2981 static int last_holdoff = 0; 2982 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2983 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2984 last_holdoff = rspq->holdoff_tmr; 2985 } 2986 #endif 2987 rspq->next_holdoff = rspq->holdoff_tmr; 2988 2989 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2990 int eth, eop = 0, ethpad = 0; 2991 uint32_t flags = ntohl(r->flags); 2992 uint32_t rss_csum = *(const uint32_t *)r; 2993 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); 2994 2995 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2996 2997 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2998 struct mbuf *m; 2999 3000 if (cxgb_debug) 3001 printf("async notification\n"); 3002 3003 if (rspq->rspq_mh.mh_head == NULL) { 3004 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 3005 m = rspq->rspq_mh.mh_head; 3006 } else { 3007 m = m_gethdr(M_DONTWAIT, MT_DATA); 3008 } 3009 if (m == NULL) 3010 goto no_mem; 3011 3012 memcpy(mtod(m, char *), r, AN_PKT_SIZE); 3013 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; 3014 *mtod(m, char *) = CPL_ASYNC_NOTIF; 3015 rss_csum = htonl(CPL_ASYNC_NOTIF << 24); 3016 eop = 1; 3017 rspq->async_notif++; 3018 goto skip; 3019 } else if (flags & F_RSPD_IMM_DATA_VALID) { 3020 struct mbuf *m = NULL; 3021 3022 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", 3023 r->rss_hdr.opcode, rspq->cidx); 3024 if (rspq->rspq_mh.mh_head == NULL) 3025 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 3026 else 3027 m = m_gethdr(M_DONTWAIT, MT_DATA); 3028 3029 if (rspq->rspq_mh.mh_head == NULL && m == NULL) { 3030 no_mem: 3031 rspq->next_holdoff = NOMEM_INTR_DELAY; 3032 budget_left--; 3033 break; 3034 } 3035 get_imm_packet(adap, r, rspq->rspq_mh.mh_head); 3036 eop = 1; 3037 rspq->imm_data++; 3038 } else if (r->len_cq) { 3039 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 3040 3041 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r); 3042 if (eop) { 3043 rspq->rspq_mh.mh_head->m_flags |= M_FLOWID; 3044 rspq->rspq_mh.mh_head->m_pkthdr.flowid = rss_hash; 3045 } 3046 3047 ethpad = 2; 3048 } else { 3049 rspq->pure_rsps++; 3050 } 3051 skip: 3052 if (flags & RSPD_CTRL_MASK) { 3053 sleeping |= flags & RSPD_GTS_MASK; 3054 handle_rsp_cntrl_info(qs, flags); 3055 } 3056 3057 r++; 3058 if (__predict_false(++rspq->cidx == rspq->size)) { 3059 rspq->cidx = 0; 3060 rspq->gen ^= 1; 3061 r = rspq->desc; 3062 } 3063 3064 if (++rspq->credits >= (rspq->size / 4)) { 3065 refill_rspq(adap, rspq, rspq->credits); 3066 rspq->credits = 0; 3067 } 3068 if (!eth && eop) { 3069 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum; 3070 /* 3071 * XXX size mismatch 3072 */ 3073 m_set_priority(rspq->rspq_mh.mh_head, rss_hash); 3074 3075 3076 ngathered = rx_offload(&adap->tdev, rspq, 3077 rspq->rspq_mh.mh_head, offload_mbufs, ngathered); 3078 rspq->rspq_mh.mh_head = NULL; 3079 DPRINTF("received offload packet\n"); 3080 3081 } else if (eth && eop) { 3082 struct mbuf *m = rspq->rspq_mh.mh_head; 3083 3084 t3_rx_eth(adap, rspq, m, ethpad); 3085 3086 #ifdef LRO_SUPPORTED 3087 /* 3088 * The T304 sends incoming packets on any qset. If LRO 3089 * is also enabled, we could end up sending packet up 3090 * lro_ctrl->ifp's input. That is incorrect. 3091 * 3092 * The mbuf's rcvif was derived from the cpl header and 3093 * is accurate. Skip LRO and just use that. 3094 */ 3095 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); 3096 3097 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro && 3098 (tcp_lro_rx(lro_ctrl, m, 0) == 0)) { 3099 /* successfully queue'd for LRO */ 3100 } else 3101 #endif 3102 { 3103 /* 3104 * LRO not enabled, packet unsuitable for LRO, 3105 * or unable to queue. Pass it up right now in 3106 * either case. 3107 */ 3108 struct ifnet *ifp = m->m_pkthdr.rcvif; 3109 (*ifp->if_input)(ifp, m); 3110 } 3111 rspq->rspq_mh.mh_head = NULL; 3112 3113 } 3114 __refill_fl_lt(adap, &qs->fl[0], 32); 3115 __refill_fl_lt(adap, &qs->fl[1], 32); 3116 --budget_left; 3117 } 3118 3119 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 3120 3121 #ifdef LRO_SUPPORTED 3122 /* Flush LRO */ 3123 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) { 3124 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active); 3125 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next); 3126 tcp_lro_flush(lro_ctrl, queued); 3127 } 3128 #endif 3129 3130 if (sleeping) 3131 check_ring_db(adap, qs, sleeping); 3132 3133 mb(); /* commit Tx queue processed updates */ 3134 if (__predict_false(qs->txq_stopped > 1)) 3135 restart_tx(qs); 3136 3137 __refill_fl_lt(adap, &qs->fl[0], 512); 3138 __refill_fl_lt(adap, &qs->fl[1], 512); 3139 budget -= budget_left; 3140 return (budget); 3141 } 3142 3143 /* 3144 * A helper function that processes responses and issues GTS. 3145 */ 3146 static __inline int 3147 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 3148 { 3149 int work; 3150 static int last_holdoff = 0; 3151 3152 work = process_responses(adap, rspq_to_qset(rq), -1); 3153 3154 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 3155 printf("next_holdoff=%d\n", rq->next_holdoff); 3156 last_holdoff = rq->next_holdoff; 3157 } 3158 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 3159 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 3160 3161 return (work); 3162 } 3163 3164 3165 /* 3166 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 3167 * Handles data events from SGE response queues as well as error and other 3168 * async events as they all use the same interrupt pin. We use one SGE 3169 * response queue per port in this mode and protect all response queues with 3170 * queue 0's lock. 3171 */ 3172 void 3173 t3b_intr(void *data) 3174 { 3175 uint32_t i, map; 3176 adapter_t *adap = data; 3177 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3178 3179 t3_write_reg(adap, A_PL_CLI, 0); 3180 map = t3_read_reg(adap, A_SG_DATA_INTR); 3181 3182 if (!map) 3183 return; 3184 3185 if (__predict_false(map & F_ERRINTR)) 3186 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3187 3188 mtx_lock(&q0->lock); 3189 for_each_port(adap, i) 3190 if (map & (1 << i)) 3191 process_responses_gts(adap, &adap->sge.qs[i].rspq); 3192 mtx_unlock(&q0->lock); 3193 } 3194 3195 /* 3196 * The MSI interrupt handler. This needs to handle data events from SGE 3197 * response queues as well as error and other async events as they all use 3198 * the same MSI vector. We use one SGE response queue per port in this mode 3199 * and protect all response queues with queue 0's lock. 3200 */ 3201 void 3202 t3_intr_msi(void *data) 3203 { 3204 adapter_t *adap = data; 3205 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3206 int i, new_packets = 0; 3207 3208 mtx_lock(&q0->lock); 3209 3210 for_each_port(adap, i) 3211 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 3212 new_packets = 1; 3213 mtx_unlock(&q0->lock); 3214 if (new_packets == 0) 3215 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3216 } 3217 3218 void 3219 t3_intr_msix(void *data) 3220 { 3221 struct sge_qset *qs = data; 3222 adapter_t *adap = qs->port->adapter; 3223 struct sge_rspq *rspq = &qs->rspq; 3224 3225 if (process_responses_gts(adap, rspq) == 0) 3226 rspq->unhandled_irqs++; 3227 } 3228 3229 #define QDUMP_SBUF_SIZE 32 * 400 3230 static int 3231 t3_dump_rspq(SYSCTL_HANDLER_ARGS) 3232 { 3233 struct sge_rspq *rspq; 3234 struct sge_qset *qs; 3235 int i, err, dump_end, idx; 3236 static int multiplier = 1; 3237 struct sbuf *sb; 3238 struct rsp_desc *rspd; 3239 uint32_t data[4]; 3240 3241 rspq = arg1; 3242 qs = rspq_to_qset(rspq); 3243 if (rspq->rspq_dump_count == 0) 3244 return (0); 3245 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { 3246 log(LOG_WARNING, 3247 "dump count is too large %d\n", rspq->rspq_dump_count); 3248 rspq->rspq_dump_count = 0; 3249 return (EINVAL); 3250 } 3251 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { 3252 log(LOG_WARNING, 3253 "dump start of %d is greater than queue size\n", 3254 rspq->rspq_dump_start); 3255 rspq->rspq_dump_start = 0; 3256 return (EINVAL); 3257 } 3258 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); 3259 if (err) 3260 return (err); 3261 retry_sbufops: 3262 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3263 3264 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", 3265 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), 3266 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); 3267 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", 3268 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); 3269 3270 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, 3271 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); 3272 3273 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; 3274 for (i = rspq->rspq_dump_start; i < dump_end; i++) { 3275 idx = i & (RSPQ_Q_SIZE-1); 3276 3277 rspd = &rspq->desc[idx]; 3278 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", 3279 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, 3280 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); 3281 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", 3282 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), 3283 be32toh(rspd->len_cq), rspd->intr_gen); 3284 } 3285 if (sbuf_overflowed(sb)) { 3286 sbuf_delete(sb); 3287 multiplier++; 3288 goto retry_sbufops; 3289 } 3290 sbuf_finish(sb); 3291 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3292 sbuf_delete(sb); 3293 return (err); 3294 } 3295 3296 static int 3297 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) 3298 { 3299 struct sge_txq *txq; 3300 struct sge_qset *qs; 3301 int i, j, err, dump_end; 3302 static int multiplier = 1; 3303 struct sbuf *sb; 3304 struct tx_desc *txd; 3305 uint32_t *WR, wr_hi, wr_lo, gen; 3306 uint32_t data[4]; 3307 3308 txq = arg1; 3309 qs = txq_to_qset(txq, TXQ_ETH); 3310 if (txq->txq_dump_count == 0) { 3311 return (0); 3312 } 3313 if (txq->txq_dump_count > TX_ETH_Q_SIZE) { 3314 log(LOG_WARNING, 3315 "dump count is too large %d\n", txq->txq_dump_count); 3316 txq->txq_dump_count = 1; 3317 return (EINVAL); 3318 } 3319 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { 3320 log(LOG_WARNING, 3321 "dump start of %d is greater than queue size\n", 3322 txq->txq_dump_start); 3323 txq->txq_dump_start = 0; 3324 return (EINVAL); 3325 } 3326 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); 3327 if (err) 3328 return (err); 3329 3330 3331 retry_sbufops: 3332 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3333 3334 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", 3335 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 3336 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); 3337 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", 3338 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), 3339 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); 3340 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3341 txq->txq_dump_start, 3342 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); 3343 3344 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3345 for (i = txq->txq_dump_start; i < dump_end; i++) { 3346 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; 3347 WR = (uint32_t *)txd->flit; 3348 wr_hi = ntohl(WR[0]); 3349 wr_lo = ntohl(WR[1]); 3350 gen = G_WR_GEN(wr_lo); 3351 3352 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3353 wr_hi, wr_lo, gen); 3354 for (j = 2; j < 30; j += 4) 3355 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3356 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3357 3358 } 3359 if (sbuf_overflowed(sb)) { 3360 sbuf_delete(sb); 3361 multiplier++; 3362 goto retry_sbufops; 3363 } 3364 sbuf_finish(sb); 3365 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3366 sbuf_delete(sb); 3367 return (err); 3368 } 3369 3370 static int 3371 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) 3372 { 3373 struct sge_txq *txq; 3374 struct sge_qset *qs; 3375 int i, j, err, dump_end; 3376 static int multiplier = 1; 3377 struct sbuf *sb; 3378 struct tx_desc *txd; 3379 uint32_t *WR, wr_hi, wr_lo, gen; 3380 3381 txq = arg1; 3382 qs = txq_to_qset(txq, TXQ_CTRL); 3383 if (txq->txq_dump_count == 0) { 3384 return (0); 3385 } 3386 if (txq->txq_dump_count > 256) { 3387 log(LOG_WARNING, 3388 "dump count is too large %d\n", txq->txq_dump_count); 3389 txq->txq_dump_count = 1; 3390 return (EINVAL); 3391 } 3392 if (txq->txq_dump_start > 255) { 3393 log(LOG_WARNING, 3394 "dump start of %d is greater than queue size\n", 3395 txq->txq_dump_start); 3396 txq->txq_dump_start = 0; 3397 return (EINVAL); 3398 } 3399 3400 retry_sbufops: 3401 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3402 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3403 txq->txq_dump_start, 3404 (txq->txq_dump_start + txq->txq_dump_count) & 255); 3405 3406 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3407 for (i = txq->txq_dump_start; i < dump_end; i++) { 3408 txd = &txq->desc[i & (255)]; 3409 WR = (uint32_t *)txd->flit; 3410 wr_hi = ntohl(WR[0]); 3411 wr_lo = ntohl(WR[1]); 3412 gen = G_WR_GEN(wr_lo); 3413 3414 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3415 wr_hi, wr_lo, gen); 3416 for (j = 2; j < 30; j += 4) 3417 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3418 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3419 3420 } 3421 if (sbuf_overflowed(sb)) { 3422 sbuf_delete(sb); 3423 multiplier++; 3424 goto retry_sbufops; 3425 } 3426 sbuf_finish(sb); 3427 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3428 sbuf_delete(sb); 3429 return (err); 3430 } 3431 3432 static int 3433 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) 3434 { 3435 adapter_t *sc = arg1; 3436 struct qset_params *qsp = &sc->params.sge.qset[0]; 3437 int coalesce_usecs; 3438 struct sge_qset *qs; 3439 int i, j, err, nqsets = 0; 3440 struct mtx *lock; 3441 3442 if ((sc->flags & FULL_INIT_DONE) == 0) 3443 return (ENXIO); 3444 3445 coalesce_usecs = qsp->coalesce_usecs; 3446 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); 3447 3448 if (err != 0) { 3449 return (err); 3450 } 3451 if (coalesce_usecs == qsp->coalesce_usecs) 3452 return (0); 3453 3454 for (i = 0; i < sc->params.nports; i++) 3455 for (j = 0; j < sc->port[i].nqsets; j++) 3456 nqsets++; 3457 3458 coalesce_usecs = max(1, coalesce_usecs); 3459 3460 for (i = 0; i < nqsets; i++) { 3461 qs = &sc->sge.qs[i]; 3462 qsp = &sc->params.sge.qset[i]; 3463 qsp->coalesce_usecs = coalesce_usecs; 3464 3465 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 3466 &sc->sge.qs[0].rspq.lock; 3467 3468 mtx_lock(lock); 3469 t3_update_qset_coalesce(qs, qsp); 3470 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 3471 V_NEWTIMER(qs->rspq.holdoff_tmr)); 3472 mtx_unlock(lock); 3473 } 3474 3475 return (0); 3476 } 3477 3478 3479 void 3480 t3_add_attach_sysctls(adapter_t *sc) 3481 { 3482 struct sysctl_ctx_list *ctx; 3483 struct sysctl_oid_list *children; 3484 3485 ctx = device_get_sysctl_ctx(sc->dev); 3486 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3487 3488 /* random information */ 3489 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3490 "firmware_version", 3491 CTLFLAG_RD, &sc->fw_version, 3492 0, "firmware version"); 3493 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3494 "hw_revision", 3495 CTLFLAG_RD, &sc->params.rev, 3496 0, "chip model"); 3497 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3498 "port_types", 3499 CTLFLAG_RD, &sc->port_types, 3500 0, "type of ports"); 3501 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3502 "enable_debug", 3503 CTLFLAG_RW, &cxgb_debug, 3504 0, "enable verbose debugging output"); 3505 SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce", 3506 CTLFLAG_RD, &sc->tunq_coalesce, 3507 "#tunneled packets freed"); 3508 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3509 "txq_overrun", 3510 CTLFLAG_RD, &txq_fills, 3511 0, "#times txq overrun"); 3512 } 3513 3514 3515 static const char *rspq_name = "rspq"; 3516 static const char *txq_names[] = 3517 { 3518 "txq_eth", 3519 "txq_ofld", 3520 "txq_ctrl" 3521 }; 3522 3523 static int 3524 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) 3525 { 3526 struct port_info *p = arg1; 3527 uint64_t *parg; 3528 3529 if (!p) 3530 return (EINVAL); 3531 3532 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); 3533 PORT_LOCK(p); 3534 t3_mac_update_stats(&p->mac); 3535 PORT_UNLOCK(p); 3536 3537 return (sysctl_handle_quad(oidp, parg, 0, req)); 3538 } 3539 3540 void 3541 t3_add_configured_sysctls(adapter_t *sc) 3542 { 3543 struct sysctl_ctx_list *ctx; 3544 struct sysctl_oid_list *children; 3545 int i, j; 3546 3547 ctx = device_get_sysctl_ctx(sc->dev); 3548 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3549 3550 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3551 "intr_coal", 3552 CTLTYPE_INT|CTLFLAG_RW, sc, 3553 0, t3_set_coalesce_usecs, 3554 "I", "interrupt coalescing timer (us)"); 3555 3556 for (i = 0; i < sc->params.nports; i++) { 3557 struct port_info *pi = &sc->port[i]; 3558 struct sysctl_oid *poid; 3559 struct sysctl_oid_list *poidlist; 3560 struct mac_stats *mstats = &pi->mac.stats; 3561 3562 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); 3563 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 3564 pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); 3565 poidlist = SYSCTL_CHILDREN(poid); 3566 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO, 3567 "nqsets", CTLFLAG_RD, &pi->nqsets, 3568 0, "#queue sets"); 3569 3570 for (j = 0; j < pi->nqsets; j++) { 3571 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; 3572 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, 3573 *ctrlqpoid, *lropoid; 3574 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, 3575 *txqpoidlist, *ctrlqpoidlist, 3576 *lropoidlist; 3577 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 3578 3579 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); 3580 3581 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 3582 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); 3583 qspoidlist = SYSCTL_CHILDREN(qspoid); 3584 3585 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty", 3586 CTLFLAG_RD, &qs->fl[0].empty, 0, 3587 "freelist #0 empty"); 3588 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty", 3589 CTLFLAG_RD, &qs->fl[1].empty, 0, 3590 "freelist #1 empty"); 3591 3592 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3593 rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); 3594 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); 3595 3596 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3597 txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); 3598 txqpoidlist = SYSCTL_CHILDREN(txqpoid); 3599 3600 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3601 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics"); 3602 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); 3603 3604 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3605 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics"); 3606 lropoidlist = SYSCTL_CHILDREN(lropoid); 3607 3608 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", 3609 CTLFLAG_RD, &qs->rspq.size, 3610 0, "#entries in response queue"); 3611 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", 3612 CTLFLAG_RD, &qs->rspq.cidx, 3613 0, "consumer index"); 3614 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", 3615 CTLFLAG_RD, &qs->rspq.credits, 3616 0, "#credits"); 3617 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr", 3618 CTLFLAG_RD, &qs->rspq.phys_addr, 3619 "physical_address_of the queue"); 3620 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", 3621 CTLFLAG_RW, &qs->rspq.rspq_dump_start, 3622 0, "start rspq dump entry"); 3623 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", 3624 CTLFLAG_RW, &qs->rspq.rspq_dump_count, 3625 0, "#rspq entries to dump"); 3626 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", 3627 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 3628 0, t3_dump_rspq, "A", "dump of the response queue"); 3629 3630 3631 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped", 3632 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops, 3633 0, "#tunneled packets dropped"); 3634 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen", 3635 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen, 3636 0, "#tunneled packets waiting to be sent"); 3637 #if 0 3638 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", 3639 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 3640 0, "#tunneled packets queue producer index"); 3641 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", 3642 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 3643 0, "#tunneled packets queue consumer index"); 3644 #endif 3645 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed", 3646 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 3647 0, "#tunneled packets processed by the card"); 3648 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", 3649 CTLFLAG_RD, &txq->cleaned, 3650 0, "#tunneled packets cleaned"); 3651 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", 3652 CTLFLAG_RD, &txq->in_use, 3653 0, "#tunneled packet slots in use"); 3654 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees", 3655 CTLFLAG_RD, &txq->txq_frees, 3656 "#tunneled packets freed"); 3657 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", 3658 CTLFLAG_RD, &txq->txq_skipped, 3659 0, "#tunneled packet descriptors skipped"); 3660 SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced", 3661 CTLFLAG_RD, &txq->txq_coalesced, 3662 "#tunneled packets coalesced"); 3663 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", 3664 CTLFLAG_RD, &txq->txq_enqueued, 3665 0, "#tunneled packets enqueued to hardware"); 3666 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", 3667 CTLFLAG_RD, &qs->txq_stopped, 3668 0, "tx queues stopped"); 3669 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr", 3670 CTLFLAG_RD, &txq->phys_addr, 3671 "physical_address_of the queue"); 3672 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", 3673 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 3674 0, "txq generation"); 3675 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", 3676 CTLFLAG_RD, &txq->cidx, 3677 0, "hardware queue cidx"); 3678 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", 3679 CTLFLAG_RD, &txq->pidx, 3680 0, "hardware queue pidx"); 3681 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", 3682 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 3683 0, "txq start idx for dump"); 3684 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", 3685 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 3686 0, "txq #entries to dump"); 3687 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", 3688 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 3689 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); 3690 3691 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", 3692 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 3693 0, "ctrlq start idx for dump"); 3694 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", 3695 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 3696 0, "ctrl #entries to dump"); 3697 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", 3698 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], 3699 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); 3700 3701 #ifdef LRO_SUPPORTED 3702 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued", 3703 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); 3704 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed", 3705 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); 3706 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", 3707 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); 3708 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", 3709 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); 3710 #endif 3711 } 3712 3713 /* Now add a node for mac stats. */ 3714 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", 3715 CTLFLAG_RD, NULL, "MAC statistics"); 3716 poidlist = SYSCTL_CHILDREN(poid); 3717 3718 /* 3719 * We (ab)use the length argument (arg2) to pass on the offset 3720 * of the data that we are interested in. This is only required 3721 * for the quad counters that are updated from the hardware (we 3722 * make sure that we return the latest value). 3723 * sysctl_handle_macstat first updates *all* the counters from 3724 * the hardware, and then returns the latest value of the 3725 * requested counter. Best would be to update only the 3726 * requested counter from hardware, but t3_mac_update_stats() 3727 * hides all the register details and we don't want to dive into 3728 * all that here. 3729 */ 3730 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ 3731 (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \ 3732 sysctl_handle_macstat, "QU", 0) 3733 CXGB_SYSCTL_ADD_QUAD(tx_octets); 3734 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); 3735 CXGB_SYSCTL_ADD_QUAD(tx_frames); 3736 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); 3737 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); 3738 CXGB_SYSCTL_ADD_QUAD(tx_pause); 3739 CXGB_SYSCTL_ADD_QUAD(tx_deferred); 3740 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); 3741 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); 3742 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); 3743 CXGB_SYSCTL_ADD_QUAD(tx_underrun); 3744 CXGB_SYSCTL_ADD_QUAD(tx_len_errs); 3745 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); 3746 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); 3747 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); 3748 CXGB_SYSCTL_ADD_QUAD(tx_frames_64); 3749 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); 3750 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); 3751 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); 3752 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); 3753 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); 3754 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); 3755 CXGB_SYSCTL_ADD_QUAD(rx_octets); 3756 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); 3757 CXGB_SYSCTL_ADD_QUAD(rx_frames); 3758 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); 3759 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); 3760 CXGB_SYSCTL_ADD_QUAD(rx_pause); 3761 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs); 3762 CXGB_SYSCTL_ADD_QUAD(rx_align_errs); 3763 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); 3764 CXGB_SYSCTL_ADD_QUAD(rx_data_errs); 3765 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); 3766 CXGB_SYSCTL_ADD_QUAD(rx_runt); 3767 CXGB_SYSCTL_ADD_QUAD(rx_jabber); 3768 CXGB_SYSCTL_ADD_QUAD(rx_short); 3769 CXGB_SYSCTL_ADD_QUAD(rx_too_long); 3770 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); 3771 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); 3772 CXGB_SYSCTL_ADD_QUAD(rx_frames_64); 3773 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); 3774 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); 3775 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); 3776 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); 3777 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); 3778 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); 3779 #undef CXGB_SYSCTL_ADD_QUAD 3780 3781 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ 3782 CTLFLAG_RD, &mstats->a, 0) 3783 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); 3784 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); 3785 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); 3786 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); 3787 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); 3788 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); 3789 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); 3790 CXGB_SYSCTL_ADD_ULONG(num_toggled); 3791 CXGB_SYSCTL_ADD_ULONG(num_resets); 3792 CXGB_SYSCTL_ADD_ULONG(link_faults); 3793 #undef CXGB_SYSCTL_ADD_ULONG 3794 } 3795 } 3796 3797 /** 3798 * t3_get_desc - dump an SGE descriptor for debugging purposes 3799 * @qs: the queue set 3800 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 3801 * @idx: the descriptor index in the queue 3802 * @data: where to dump the descriptor contents 3803 * 3804 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 3805 * size of the descriptor. 3806 */ 3807 int 3808 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 3809 unsigned char *data) 3810 { 3811 if (qnum >= 6) 3812 return (EINVAL); 3813 3814 if (qnum < 3) { 3815 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 3816 return -EINVAL; 3817 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 3818 return sizeof(struct tx_desc); 3819 } 3820 3821 if (qnum == 3) { 3822 if (!qs->rspq.desc || idx >= qs->rspq.size) 3823 return (EINVAL); 3824 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 3825 return sizeof(struct rsp_desc); 3826 } 3827 3828 qnum -= 4; 3829 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 3830 return (EINVAL); 3831 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 3832 return sizeof(struct rx_desc); 3833 } 3834