1 /************************************************************************** 2 3 Copyright (c) 2007-2009, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/module.h> 37 #include <sys/bus.h> 38 #include <sys/conf.h> 39 #include <machine/bus.h> 40 #include <machine/resource.h> 41 #include <sys/bus_dma.h> 42 #include <sys/rman.h> 43 #include <sys/queue.h> 44 #include <sys/sysctl.h> 45 #include <sys/taskqueue.h> 46 47 #include <sys/proc.h> 48 #include <sys/sbuf.h> 49 #include <sys/sched.h> 50 #include <sys/smp.h> 51 #include <sys/systm.h> 52 #include <sys/syslog.h> 53 54 #include <net/bpf.h> 55 56 #include <netinet/in_systm.h> 57 #include <netinet/in.h> 58 #include <netinet/ip.h> 59 #include <netinet/tcp.h> 60 61 #include <dev/pci/pcireg.h> 62 #include <dev/pci/pcivar.h> 63 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 67 #include <cxgb_include.h> 68 #include <sys/mvec.h> 69 70 int txq_fills = 0; 71 int multiq_tx_enable = 1; 72 73 extern struct sysctl_oid_list sysctl__hw_cxgb_children; 74 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; 75 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size); 76 SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, 77 "size of per-queue mbuf ring"); 78 79 static int cxgb_tx_coalesce_force = 0; 80 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force); 81 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW, 82 &cxgb_tx_coalesce_force, 0, 83 "coalesce small packets into a single work request regardless of ring state"); 84 85 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1 86 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3)) 87 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2 88 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5 89 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5 90 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2 91 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6 92 93 94 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT; 95 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start", 96 &cxgb_tx_coalesce_enable_start); 97 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW, 98 &cxgb_tx_coalesce_enable_start, 0, 99 "coalesce enable threshold"); 100 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT; 101 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop); 102 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW, 103 &cxgb_tx_coalesce_enable_stop, 0, 104 "coalesce disable threshold"); 105 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 106 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold); 107 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW, 108 &cxgb_tx_reclaim_threshold, 0, 109 "tx cleaning minimum threshold"); 110 111 /* 112 * XXX don't re-enable this until TOE stops assuming 113 * we have an m_ext 114 */ 115 static int recycle_enable = 0; 116 int cxgb_ext_freed = 0; 117 int cxgb_ext_inited = 0; 118 int fl_q_size = 0; 119 int jumbo_q_size = 0; 120 121 extern int cxgb_use_16k_clusters; 122 extern int nmbjumbo4; 123 extern int nmbjumbo9; 124 extern int nmbjumbo16; 125 126 #define USE_GTS 0 127 128 #define SGE_RX_SM_BUF_SIZE 1536 129 #define SGE_RX_DROP_THRES 16 130 #define SGE_RX_COPY_THRES 128 131 132 /* 133 * Period of the Tx buffer reclaim timer. This timer does not need to run 134 * frequently as Tx buffers are usually reclaimed by new Tx packets. 135 */ 136 #define TX_RECLAIM_PERIOD (hz >> 1) 137 138 /* 139 * Values for sge_txq.flags 140 */ 141 enum { 142 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 143 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 144 }; 145 146 struct tx_desc { 147 uint64_t flit[TX_DESC_FLITS]; 148 } __packed; 149 150 struct rx_desc { 151 uint32_t addr_lo; 152 uint32_t len_gen; 153 uint32_t gen2; 154 uint32_t addr_hi; 155 } __packed;; 156 157 struct rsp_desc { /* response queue descriptor */ 158 struct rss_header rss_hdr; 159 uint32_t flags; 160 uint32_t len_cq; 161 uint8_t imm_data[47]; 162 uint8_t intr_gen; 163 } __packed; 164 165 #define RX_SW_DESC_MAP_CREATED (1 << 0) 166 #define TX_SW_DESC_MAP_CREATED (1 << 1) 167 #define RX_SW_DESC_INUSE (1 << 3) 168 #define TX_SW_DESC_MAPPED (1 << 4) 169 170 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 171 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 172 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 173 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 174 175 struct tx_sw_desc { /* SW state per Tx descriptor */ 176 struct mbuf *m; 177 bus_dmamap_t map; 178 int flags; 179 }; 180 181 struct rx_sw_desc { /* SW state per Rx descriptor */ 182 caddr_t rxsd_cl; 183 struct mbuf *m; 184 bus_dmamap_t map; 185 int flags; 186 }; 187 188 struct txq_state { 189 unsigned int compl; 190 unsigned int gen; 191 unsigned int pidx; 192 }; 193 194 struct refill_fl_cb_arg { 195 int error; 196 bus_dma_segment_t seg; 197 int nseg; 198 }; 199 200 201 /* 202 * Maps a number of flits to the number of Tx descriptors that can hold them. 203 * The formula is 204 * 205 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 206 * 207 * HW allows up to 4 descriptors to be combined into a WR. 208 */ 209 static uint8_t flit_desc_map[] = { 210 0, 211 #if SGE_NUM_GENBITS == 1 212 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 213 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 214 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 215 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 216 #elif SGE_NUM_GENBITS == 2 217 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 218 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 219 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 220 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 221 #else 222 # error "SGE_NUM_GENBITS must be 1 or 2" 223 #endif 224 }; 225 226 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED) 227 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock) 228 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock) 229 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock) 230 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 231 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 232 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \ 233 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg) 234 #define TXQ_RING_DEQUEUE(qs) \ 235 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 236 237 int cxgb_debug = 0; 238 239 static void sge_timer_cb(void *arg); 240 static void sge_timer_reclaim(void *arg, int ncount); 241 static void sge_txq_reclaim_handler(void *arg, int ncount); 242 static void cxgb_start_locked(struct sge_qset *qs); 243 244 /* 245 * XXX need to cope with bursty scheduling by looking at a wider 246 * window than we are now for determining the need for coalescing 247 * 248 */ 249 static __inline uint64_t 250 check_pkt_coalesce(struct sge_qset *qs) 251 { 252 struct adapter *sc; 253 struct sge_txq *txq; 254 uint8_t *fill; 255 256 if (__predict_false(cxgb_tx_coalesce_force)) 257 return (1); 258 txq = &qs->txq[TXQ_ETH]; 259 sc = qs->port->adapter; 260 fill = &sc->tunq_fill[qs->idx]; 261 262 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX) 263 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX; 264 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN) 265 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN; 266 /* 267 * if the hardware transmit queue is more than 1/8 full 268 * we mark it as coalescing - we drop back from coalescing 269 * when we go below 1/32 full and there are no packets enqueued, 270 * this provides us with some degree of hysteresis 271 */ 272 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 273 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0)) 274 *fill = 0; 275 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) 276 *fill = 1; 277 278 return (sc->tunq_coalesce); 279 } 280 281 #ifdef __LP64__ 282 static void 283 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 284 { 285 uint64_t wr_hilo; 286 #if _BYTE_ORDER == _LITTLE_ENDIAN 287 wr_hilo = wr_hi; 288 wr_hilo |= (((uint64_t)wr_lo)<<32); 289 #else 290 wr_hilo = wr_lo; 291 wr_hilo |= (((uint64_t)wr_hi)<<32); 292 #endif 293 wrp->wrh_hilo = wr_hilo; 294 } 295 #else 296 static void 297 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 298 { 299 300 wrp->wrh_hi = wr_hi; 301 wmb(); 302 wrp->wrh_lo = wr_lo; 303 } 304 #endif 305 306 struct coalesce_info { 307 int count; 308 int nbytes; 309 }; 310 311 static int 312 coalesce_check(struct mbuf *m, void *arg) 313 { 314 struct coalesce_info *ci = arg; 315 int *count = &ci->count; 316 int *nbytes = &ci->nbytes; 317 318 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) && 319 (*count < 7) && (m->m_next == NULL))) { 320 *count += 1; 321 *nbytes += m->m_len; 322 return (1); 323 } 324 return (0); 325 } 326 327 static struct mbuf * 328 cxgb_dequeue(struct sge_qset *qs) 329 { 330 struct mbuf *m, *m_head, *m_tail; 331 struct coalesce_info ci; 332 333 334 if (check_pkt_coalesce(qs) == 0) 335 return TXQ_RING_DEQUEUE(qs); 336 337 m_head = m_tail = NULL; 338 ci.count = ci.nbytes = 0; 339 do { 340 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci); 341 if (m_head == NULL) { 342 m_tail = m_head = m; 343 } else if (m != NULL) { 344 m_tail->m_nextpkt = m; 345 m_tail = m; 346 } 347 } while (m != NULL); 348 if (ci.count > 7) 349 panic("trying to coalesce %d packets in to one WR", ci.count); 350 return (m_head); 351 } 352 353 /** 354 * reclaim_completed_tx - reclaims completed Tx descriptors 355 * @adapter: the adapter 356 * @q: the Tx queue to reclaim completed descriptors from 357 * 358 * Reclaims Tx descriptors that the SGE has indicated it has processed, 359 * and frees the associated buffers if possible. Called with the Tx 360 * queue's lock held. 361 */ 362 static __inline int 363 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) 364 { 365 struct sge_txq *q = &qs->txq[queue]; 366 int reclaim = desc_reclaimable(q); 367 368 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) || 369 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN)) 370 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 371 372 if (reclaim < reclaim_min) 373 return (0); 374 375 mtx_assert(&qs->lock, MA_OWNED); 376 if (reclaim > 0) { 377 t3_free_tx_desc(qs, reclaim, queue); 378 q->cleaned += reclaim; 379 q->in_use -= reclaim; 380 } 381 if (isset(&qs->txq_stopped, TXQ_ETH)) 382 clrbit(&qs->txq_stopped, TXQ_ETH); 383 384 return (reclaim); 385 } 386 387 /** 388 * should_restart_tx - are there enough resources to restart a Tx queue? 389 * @q: the Tx queue 390 * 391 * Checks if there are enough descriptors to restart a suspended Tx queue. 392 */ 393 static __inline int 394 should_restart_tx(const struct sge_txq *q) 395 { 396 unsigned int r = q->processed - q->cleaned; 397 398 return q->in_use - r < (q->size >> 1); 399 } 400 401 /** 402 * t3_sge_init - initialize SGE 403 * @adap: the adapter 404 * @p: the SGE parameters 405 * 406 * Performs SGE initialization needed every time after a chip reset. 407 * We do not initialize any of the queue sets here, instead the driver 408 * top-level must request those individually. We also do not enable DMA 409 * here, that should be done after the queues have been set up. 410 */ 411 void 412 t3_sge_init(adapter_t *adap, struct sge_params *p) 413 { 414 u_int ctrl, ups; 415 416 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 417 418 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 419 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | 420 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 421 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 422 #if SGE_NUM_GENBITS == 1 423 ctrl |= F_EGRGENCTRL; 424 #endif 425 if (adap->params.rev > 0) { 426 if (!(adap->flags & (USING_MSIX | USING_MSI))) 427 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 428 } 429 t3_write_reg(adap, A_SG_CONTROL, ctrl); 430 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 431 V_LORCQDRBTHRSH(512)); 432 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 433 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 434 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 435 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 436 adap->params.rev < T3_REV_C ? 1000 : 500); 437 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 438 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 439 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 440 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 441 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 442 } 443 444 445 /** 446 * sgl_len - calculates the size of an SGL of the given capacity 447 * @n: the number of SGL entries 448 * 449 * Calculates the number of flits needed for a scatter/gather list that 450 * can hold the given number of entries. 451 */ 452 static __inline unsigned int 453 sgl_len(unsigned int n) 454 { 455 return ((3 * n) / 2 + (n & 1)); 456 } 457 458 /** 459 * get_imm_packet - return the next ingress packet buffer from a response 460 * @resp: the response descriptor containing the packet data 461 * 462 * Return a packet containing the immediate data of the given response. 463 */ 464 static int 465 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) 466 { 467 468 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE; 469 m->m_ext.ext_buf = NULL; 470 m->m_ext.ext_type = 0; 471 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 472 return (0); 473 } 474 475 static __inline u_int 476 flits_to_desc(u_int n) 477 { 478 return (flit_desc_map[n]); 479 } 480 481 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ 482 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ 483 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ 484 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ 485 F_HIRCQPARITYERROR) 486 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) 487 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ 488 F_RSPQDISABLED) 489 490 /** 491 * t3_sge_err_intr_handler - SGE async event interrupt handler 492 * @adapter: the adapter 493 * 494 * Interrupt handler for SGE asynchronous (non-data) events. 495 */ 496 void 497 t3_sge_err_intr_handler(adapter_t *adapter) 498 { 499 unsigned int v, status; 500 501 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 502 if (status & SGE_PARERR) 503 CH_ALERT(adapter, "SGE parity error (0x%x)\n", 504 status & SGE_PARERR); 505 if (status & SGE_FRAMINGERR) 506 CH_ALERT(adapter, "SGE framing error (0x%x)\n", 507 status & SGE_FRAMINGERR); 508 if (status & F_RSPQCREDITOVERFOW) 509 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 510 511 if (status & F_RSPQDISABLED) { 512 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 513 514 CH_ALERT(adapter, 515 "packet delivered to disabled response queue (0x%x)\n", 516 (v >> S_RSPQ0DISABLED) & 0xff); 517 } 518 519 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 520 if (status & SGE_FATALERR) 521 t3_fatal_err(adapter); 522 } 523 524 void 525 t3_sge_prep(adapter_t *adap, struct sge_params *p) 526 { 527 int i, nqsets; 528 529 nqsets = min(SGE_QSETS, mp_ncpus*4); 530 531 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); 532 533 while (!powerof2(fl_q_size)) 534 fl_q_size--; 535 #if __FreeBSD_version >= 700111 536 if (cxgb_use_16k_clusters) 537 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); 538 else 539 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); 540 #else 541 jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE); 542 #endif 543 while (!powerof2(jumbo_q_size)) 544 jumbo_q_size--; 545 546 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 547 p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data); 548 549 for (i = 0; i < SGE_QSETS; ++i) { 550 struct qset_params *q = p->qset + i; 551 552 if (adap->params.nports > 2) { 553 q->coalesce_usecs = 50; 554 } else { 555 #ifdef INVARIANTS 556 q->coalesce_usecs = 10; 557 #else 558 q->coalesce_usecs = 5; 559 #endif 560 } 561 q->polling = 0; 562 q->rspq_size = RSPQ_Q_SIZE; 563 q->fl_size = fl_q_size; 564 q->jumbo_size = jumbo_q_size; 565 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 566 q->txq_size[TXQ_OFLD] = 1024; 567 q->txq_size[TXQ_CTRL] = 256; 568 q->cong_thres = 0; 569 } 570 } 571 572 int 573 t3_sge_alloc(adapter_t *sc) 574 { 575 576 /* The parent tag. */ 577 if (bus_dma_tag_create( NULL, /* parent */ 578 1, 0, /* algnmnt, boundary */ 579 BUS_SPACE_MAXADDR, /* lowaddr */ 580 BUS_SPACE_MAXADDR, /* highaddr */ 581 NULL, NULL, /* filter, filterarg */ 582 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 583 BUS_SPACE_UNRESTRICTED, /* nsegments */ 584 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 585 0, /* flags */ 586 NULL, NULL, /* lock, lockarg */ 587 &sc->parent_dmat)) { 588 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 589 return (ENOMEM); 590 } 591 592 /* 593 * DMA tag for normal sized RX frames 594 */ 595 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 596 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 597 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 598 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 599 return (ENOMEM); 600 } 601 602 /* 603 * DMA tag for jumbo sized RX frames. 604 */ 605 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, 606 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, 607 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 608 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 609 return (ENOMEM); 610 } 611 612 /* 613 * DMA tag for TX frames. 614 */ 615 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 616 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 617 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 618 NULL, NULL, &sc->tx_dmat)) { 619 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 620 return (ENOMEM); 621 } 622 623 return (0); 624 } 625 626 int 627 t3_sge_free(struct adapter * sc) 628 { 629 630 if (sc->tx_dmat != NULL) 631 bus_dma_tag_destroy(sc->tx_dmat); 632 633 if (sc->rx_jumbo_dmat != NULL) 634 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 635 636 if (sc->rx_dmat != NULL) 637 bus_dma_tag_destroy(sc->rx_dmat); 638 639 if (sc->parent_dmat != NULL) 640 bus_dma_tag_destroy(sc->parent_dmat); 641 642 return (0); 643 } 644 645 void 646 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 647 { 648 649 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); 650 qs->rspq.polling = 0 /* p->polling */; 651 } 652 653 #if !defined(__i386__) && !defined(__amd64__) 654 static void 655 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 656 { 657 struct refill_fl_cb_arg *cb_arg = arg; 658 659 cb_arg->error = error; 660 cb_arg->seg = segs[0]; 661 cb_arg->nseg = nseg; 662 663 } 664 #endif 665 /** 666 * refill_fl - refill an SGE free-buffer list 667 * @sc: the controller softc 668 * @q: the free-list to refill 669 * @n: the number of new buffers to allocate 670 * 671 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 672 * The caller must assure that @n does not exceed the queue's capacity. 673 */ 674 static void 675 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 676 { 677 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 678 struct rx_desc *d = &q->desc[q->pidx]; 679 struct refill_fl_cb_arg cb_arg; 680 struct mbuf *m; 681 caddr_t cl; 682 int err, count = 0; 683 684 cb_arg.error = 0; 685 while (n--) { 686 /* 687 * We only allocate a cluster, mbuf allocation happens after rx 688 */ 689 if (q->zone == zone_pack) { 690 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL) 691 break; 692 cl = m->m_ext.ext_buf; 693 } else { 694 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL) 695 break; 696 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { 697 uma_zfree(q->zone, cl); 698 break; 699 } 700 } 701 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 702 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 703 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 704 uma_zfree(q->zone, cl); 705 goto done; 706 } 707 sd->flags |= RX_SW_DESC_MAP_CREATED; 708 } 709 #if !defined(__i386__) && !defined(__amd64__) 710 err = bus_dmamap_load(q->entry_tag, sd->map, 711 cl, q->buf_size, refill_fl_cb, &cb_arg, 0); 712 713 if (err != 0 || cb_arg.error) { 714 if (q->zone == zone_pack) 715 uma_zfree(q->zone, cl); 716 m_free(m); 717 goto done; 718 } 719 #else 720 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl); 721 #endif 722 sd->flags |= RX_SW_DESC_INUSE; 723 sd->rxsd_cl = cl; 724 sd->m = m; 725 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 726 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 727 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 728 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 729 730 d++; 731 sd++; 732 733 if (++q->pidx == q->size) { 734 q->pidx = 0; 735 q->gen ^= 1; 736 sd = q->sdesc; 737 d = q->desc; 738 } 739 q->credits++; 740 count++; 741 } 742 743 done: 744 if (count) 745 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 746 } 747 748 749 /** 750 * free_rx_bufs - free the Rx buffers on an SGE free list 751 * @sc: the controle softc 752 * @q: the SGE free list to clean up 753 * 754 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 755 * this queue should be stopped before calling this function. 756 */ 757 static void 758 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 759 { 760 u_int cidx = q->cidx; 761 762 while (q->credits--) { 763 struct rx_sw_desc *d = &q->sdesc[cidx]; 764 765 if (d->flags & RX_SW_DESC_INUSE) { 766 bus_dmamap_unload(q->entry_tag, d->map); 767 bus_dmamap_destroy(q->entry_tag, d->map); 768 if (q->zone == zone_pack) { 769 m_init(d->m, zone_pack, MCLBYTES, 770 M_NOWAIT, MT_DATA, M_EXT); 771 uma_zfree(zone_pack, d->m); 772 } else { 773 m_init(d->m, zone_mbuf, MLEN, 774 M_NOWAIT, MT_DATA, 0); 775 uma_zfree(zone_mbuf, d->m); 776 uma_zfree(q->zone, d->rxsd_cl); 777 } 778 } 779 780 d->rxsd_cl = NULL; 781 d->m = NULL; 782 if (++cidx == q->size) 783 cidx = 0; 784 } 785 } 786 787 static __inline void 788 __refill_fl(adapter_t *adap, struct sge_fl *fl) 789 { 790 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 791 } 792 793 static __inline void 794 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) 795 { 796 if ((fl->size - fl->credits) < max) 797 refill_fl(adap, fl, min(max, fl->size - fl->credits)); 798 } 799 800 /** 801 * recycle_rx_buf - recycle a receive buffer 802 * @adapter: the adapter 803 * @q: the SGE free list 804 * @idx: index of buffer to recycle 805 * 806 * Recycles the specified buffer on the given free list by adding it at 807 * the next available slot on the list. 808 */ 809 static void 810 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 811 { 812 struct rx_desc *from = &q->desc[idx]; 813 struct rx_desc *to = &q->desc[q->pidx]; 814 815 q->sdesc[q->pidx] = q->sdesc[idx]; 816 to->addr_lo = from->addr_lo; // already big endian 817 to->addr_hi = from->addr_hi; // likewise 818 wmb(); /* necessary ? */ 819 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 820 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 821 q->credits++; 822 823 if (++q->pidx == q->size) { 824 q->pidx = 0; 825 q->gen ^= 1; 826 } 827 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 828 } 829 830 static void 831 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 832 { 833 uint32_t *addr; 834 835 addr = arg; 836 *addr = segs[0].ds_addr; 837 } 838 839 static int 840 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 841 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 842 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 843 { 844 size_t len = nelem * elem_size; 845 void *s = NULL; 846 void *p = NULL; 847 int err; 848 849 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 850 BUS_SPACE_MAXADDR_32BIT, 851 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 852 len, 0, NULL, NULL, tag)) != 0) { 853 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 854 return (ENOMEM); 855 } 856 857 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 858 map)) != 0) { 859 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 860 return (ENOMEM); 861 } 862 863 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 864 bzero(p, len); 865 *(void **)desc = p; 866 867 if (sw_size) { 868 len = nelem * sw_size; 869 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 870 *(void **)sdesc = s; 871 } 872 if (parent_entry_tag == NULL) 873 return (0); 874 875 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 876 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 877 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 878 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 879 NULL, NULL, entry_tag)) != 0) { 880 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 881 return (ENOMEM); 882 } 883 return (0); 884 } 885 886 static void 887 sge_slow_intr_handler(void *arg, int ncount) 888 { 889 adapter_t *sc = arg; 890 891 t3_slow_intr_handler(sc); 892 } 893 894 /** 895 * sge_timer_cb - perform periodic maintenance of an SGE qset 896 * @data: the SGE queue set to maintain 897 * 898 * Runs periodically from a timer to perform maintenance of an SGE queue 899 * set. It performs two tasks: 900 * 901 * a) Cleans up any completed Tx descriptors that may still be pending. 902 * Normal descriptor cleanup happens when new packets are added to a Tx 903 * queue so this timer is relatively infrequent and does any cleanup only 904 * if the Tx queue has not seen any new packets in a while. We make a 905 * best effort attempt to reclaim descriptors, in that we don't wait 906 * around if we cannot get a queue's lock (which most likely is because 907 * someone else is queueing new packets and so will also handle the clean 908 * up). Since control queues use immediate data exclusively we don't 909 * bother cleaning them up here. 910 * 911 * b) Replenishes Rx queues that have run out due to memory shortage. 912 * Normally new Rx buffers are added when existing ones are consumed but 913 * when out of memory a queue can become empty. We try to add only a few 914 * buffers here, the queue will be replenished fully as these new buffers 915 * are used up if memory shortage has subsided. 916 * 917 * c) Return coalesced response queue credits in case a response queue is 918 * starved. 919 * 920 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 921 * fifo overflows and the FW doesn't implement any recovery scheme yet. 922 */ 923 static void 924 sge_timer_cb(void *arg) 925 { 926 adapter_t *sc = arg; 927 if ((sc->flags & USING_MSIX) == 0) { 928 929 struct port_info *pi; 930 struct sge_qset *qs; 931 struct sge_txq *txq; 932 int i, j; 933 int reclaim_ofl, refill_rx; 934 935 if (sc->open_device_map == 0) 936 return; 937 938 for (i = 0; i < sc->params.nports; i++) { 939 pi = &sc->port[i]; 940 for (j = 0; j < pi->nqsets; j++) { 941 qs = &sc->sge.qs[pi->first_qset + j]; 942 txq = &qs->txq[0]; 943 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 944 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 945 (qs->fl[1].credits < qs->fl[1].size)); 946 if (reclaim_ofl || refill_rx) { 947 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); 948 break; 949 } 950 } 951 } 952 } 953 954 if (sc->params.nports > 2) { 955 int i; 956 957 for_each_port(sc, i) { 958 struct port_info *pi = &sc->port[i]; 959 960 t3_write_reg(sc, A_SG_KDOORBELL, 961 F_SELEGRCNTX | 962 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 963 } 964 } 965 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) && 966 sc->open_device_map != 0) 967 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 968 } 969 970 /* 971 * This is meant to be a catch-all function to keep sge state private 972 * to sge.c 973 * 974 */ 975 int 976 t3_sge_init_adapter(adapter_t *sc) 977 { 978 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 979 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 980 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 981 return (0); 982 } 983 984 int 985 t3_sge_reset_adapter(adapter_t *sc) 986 { 987 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 988 return (0); 989 } 990 991 int 992 t3_sge_init_port(struct port_info *pi) 993 { 994 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); 995 return (0); 996 } 997 998 /** 999 * refill_rspq - replenish an SGE response queue 1000 * @adapter: the adapter 1001 * @q: the response queue to replenish 1002 * @credits: how many new responses to make available 1003 * 1004 * Replenishes a response queue by making the supplied number of responses 1005 * available to HW. 1006 */ 1007 static __inline void 1008 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 1009 { 1010 1011 /* mbufs are allocated on demand when a rspq entry is processed. */ 1012 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 1013 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 1014 } 1015 1016 static void 1017 sge_txq_reclaim_handler(void *arg, int ncount) 1018 { 1019 struct sge_qset *qs = arg; 1020 int i; 1021 1022 for (i = 0; i < 3; i++) 1023 reclaim_completed_tx(qs, 16, i); 1024 } 1025 1026 static void 1027 sge_timer_reclaim(void *arg, int ncount) 1028 { 1029 struct port_info *pi = arg; 1030 int i, nqsets = pi->nqsets; 1031 adapter_t *sc = pi->adapter; 1032 struct sge_qset *qs; 1033 struct mtx *lock; 1034 1035 KASSERT((sc->flags & USING_MSIX) == 0, 1036 ("can't call timer reclaim for msi-x")); 1037 1038 for (i = 0; i < nqsets; i++) { 1039 qs = &sc->sge.qs[pi->first_qset + i]; 1040 1041 reclaim_completed_tx(qs, 16, TXQ_OFLD); 1042 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 1043 &sc->sge.qs[0].rspq.lock; 1044 1045 if (mtx_trylock(lock)) { 1046 /* XXX currently assume that we are *NOT* polling */ 1047 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 1048 1049 if (qs->fl[0].credits < qs->fl[0].size - 16) 1050 __refill_fl(sc, &qs->fl[0]); 1051 if (qs->fl[1].credits < qs->fl[1].size - 16) 1052 __refill_fl(sc, &qs->fl[1]); 1053 1054 if (status & (1 << qs->rspq.cntxt_id)) { 1055 if (qs->rspq.credits) { 1056 refill_rspq(sc, &qs->rspq, 1); 1057 qs->rspq.credits--; 1058 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 1059 1 << qs->rspq.cntxt_id); 1060 } 1061 } 1062 mtx_unlock(lock); 1063 } 1064 } 1065 } 1066 1067 /** 1068 * init_qset_cntxt - initialize an SGE queue set context info 1069 * @qs: the queue set 1070 * @id: the queue set id 1071 * 1072 * Initializes the TIDs and context ids for the queues of a queue set. 1073 */ 1074 static void 1075 init_qset_cntxt(struct sge_qset *qs, u_int id) 1076 { 1077 1078 qs->rspq.cntxt_id = id; 1079 qs->fl[0].cntxt_id = 2 * id; 1080 qs->fl[1].cntxt_id = 2 * id + 1; 1081 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 1082 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 1083 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 1084 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 1085 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 1086 1087 mbufq_init(&qs->txq[TXQ_ETH].sendq); 1088 mbufq_init(&qs->txq[TXQ_OFLD].sendq); 1089 mbufq_init(&qs->txq[TXQ_CTRL].sendq); 1090 } 1091 1092 1093 static void 1094 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 1095 { 1096 txq->in_use += ndesc; 1097 /* 1098 * XXX we don't handle stopping of queue 1099 * presumably start handles this when we bump against the end 1100 */ 1101 txqs->gen = txq->gen; 1102 txq->unacked += ndesc; 1103 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); 1104 txq->unacked &= 31; 1105 txqs->pidx = txq->pidx; 1106 txq->pidx += ndesc; 1107 #ifdef INVARIANTS 1108 if (((txqs->pidx > txq->cidx) && 1109 (txq->pidx < txqs->pidx) && 1110 (txq->pidx >= txq->cidx)) || 1111 ((txqs->pidx < txq->cidx) && 1112 (txq->pidx >= txq-> cidx)) || 1113 ((txqs->pidx < txq->cidx) && 1114 (txq->cidx < txqs->pidx))) 1115 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", 1116 txqs->pidx, txq->pidx, txq->cidx); 1117 #endif 1118 if (txq->pidx >= txq->size) { 1119 txq->pidx -= txq->size; 1120 txq->gen ^= 1; 1121 } 1122 1123 } 1124 1125 /** 1126 * calc_tx_descs - calculate the number of Tx descriptors for a packet 1127 * @m: the packet mbufs 1128 * @nsegs: the number of segments 1129 * 1130 * Returns the number of Tx descriptors needed for the given Ethernet 1131 * packet. Ethernet packets require addition of WR and CPL headers. 1132 */ 1133 static __inline unsigned int 1134 calc_tx_descs(const struct mbuf *m, int nsegs) 1135 { 1136 unsigned int flits; 1137 1138 if (m->m_pkthdr.len <= PIO_LEN) 1139 return 1; 1140 1141 flits = sgl_len(nsegs) + 2; 1142 #ifdef TSO_SUPPORTED 1143 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1144 flits++; 1145 #endif 1146 return flits_to_desc(flits); 1147 } 1148 1149 static unsigned int 1150 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 1151 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs) 1152 { 1153 struct mbuf *m0; 1154 int err, pktlen, pass = 0; 1155 bus_dma_tag_t tag = txq->entry_tag; 1156 1157 retry: 1158 err = 0; 1159 m0 = *m; 1160 pktlen = m0->m_pkthdr.len; 1161 #if defined(__i386__) || defined(__amd64__) 1162 if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) { 1163 goto done; 1164 } else 1165 #endif 1166 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0); 1167 1168 if (err == 0) { 1169 goto done; 1170 } 1171 if (err == EFBIG && pass == 0) { 1172 pass = 1; 1173 /* Too many segments, try to defrag */ 1174 m0 = m_defrag(m0, M_DONTWAIT); 1175 if (m0 == NULL) { 1176 m_freem(*m); 1177 *m = NULL; 1178 return (ENOBUFS); 1179 } 1180 *m = m0; 1181 goto retry; 1182 } else if (err == ENOMEM) { 1183 return (err); 1184 } if (err) { 1185 if (cxgb_debug) 1186 printf("map failure err=%d pktlen=%d\n", err, pktlen); 1187 m_freem(m0); 1188 *m = NULL; 1189 return (err); 1190 } 1191 done: 1192 #if !defined(__i386__) && !defined(__amd64__) 1193 bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE); 1194 #endif 1195 txsd->flags |= TX_SW_DESC_MAPPED; 1196 1197 return (0); 1198 } 1199 1200 /** 1201 * make_sgl - populate a scatter/gather list for a packet 1202 * @sgp: the SGL to populate 1203 * @segs: the packet dma segments 1204 * @nsegs: the number of segments 1205 * 1206 * Generates a scatter/gather list for the buffers that make up a packet 1207 * and returns the SGL size in 8-byte words. The caller must size the SGL 1208 * appropriately. 1209 */ 1210 static __inline void 1211 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1212 { 1213 int i, idx; 1214 1215 for (idx = 0, i = 0; i < nsegs; i++) { 1216 /* 1217 * firmware doesn't like empty segments 1218 */ 1219 if (segs[i].ds_len == 0) 1220 continue; 1221 if (i && idx == 0) 1222 ++sgp; 1223 1224 sgp->len[idx] = htobe32(segs[i].ds_len); 1225 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1226 idx ^= 1; 1227 } 1228 1229 if (idx) { 1230 sgp->len[idx] = 0; 1231 sgp->addr[idx] = 0; 1232 } 1233 } 1234 1235 /** 1236 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1237 * @adap: the adapter 1238 * @q: the Tx queue 1239 * 1240 * Ring the doorbell if a Tx queue is asleep. There is a natural race, 1241 * where the HW is going to sleep just after we checked, however, 1242 * then the interrupt handler will detect the outstanding TX packet 1243 * and ring the doorbell for us. 1244 * 1245 * When GTS is disabled we unconditionally ring the doorbell. 1246 */ 1247 static __inline void 1248 check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 1249 { 1250 #if USE_GTS 1251 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1252 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1253 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1254 #ifdef T3_TRACE 1255 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1256 q->cntxt_id); 1257 #endif 1258 t3_write_reg(adap, A_SG_KDOORBELL, 1259 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1260 } 1261 #else 1262 wmb(); /* write descriptors before telling HW */ 1263 t3_write_reg(adap, A_SG_KDOORBELL, 1264 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1265 #endif 1266 } 1267 1268 static __inline void 1269 wr_gen2(struct tx_desc *d, unsigned int gen) 1270 { 1271 #if SGE_NUM_GENBITS == 2 1272 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1273 #endif 1274 } 1275 1276 /** 1277 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1278 * @ndesc: number of Tx descriptors spanned by the SGL 1279 * @txd: first Tx descriptor to be written 1280 * @txqs: txq state (generation and producer index) 1281 * @txq: the SGE Tx queue 1282 * @sgl: the SGL 1283 * @flits: number of flits to the start of the SGL in the first descriptor 1284 * @sgl_flits: the SGL size in flits 1285 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1286 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1287 * 1288 * Write a work request header and an associated SGL. If the SGL is 1289 * small enough to fit into one Tx descriptor it has already been written 1290 * and we just need to write the WR header. Otherwise we distribute the 1291 * SGL across the number of descriptors it spans. 1292 */ 1293 static void 1294 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1295 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1296 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1297 { 1298 1299 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1300 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1301 1302 if (__predict_true(ndesc == 1)) { 1303 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1304 V_WR_SGLSFLT(flits)) | wr_hi, 1305 htonl(V_WR_LEN(flits + sgl_flits) | 1306 V_WR_GEN(txqs->gen)) | wr_lo); 1307 /* XXX gen? */ 1308 wr_gen2(txd, txqs->gen); 1309 1310 } else { 1311 unsigned int ogen = txqs->gen; 1312 const uint64_t *fp = (const uint64_t *)sgl; 1313 struct work_request_hdr *wp = wrp; 1314 1315 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1316 V_WR_SGLSFLT(flits)) | wr_hi; 1317 1318 while (sgl_flits) { 1319 unsigned int avail = WR_FLITS - flits; 1320 1321 if (avail > sgl_flits) 1322 avail = sgl_flits; 1323 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1324 sgl_flits -= avail; 1325 ndesc--; 1326 if (!sgl_flits) 1327 break; 1328 1329 fp += avail; 1330 txd++; 1331 txsd++; 1332 if (++txqs->pidx == txq->size) { 1333 txqs->pidx = 0; 1334 txqs->gen ^= 1; 1335 txd = txq->desc; 1336 txsd = txq->sdesc; 1337 } 1338 1339 /* 1340 * when the head of the mbuf chain 1341 * is freed all clusters will be freed 1342 * with it 1343 */ 1344 wrp = (struct work_request_hdr *)txd; 1345 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) | 1346 V_WR_SGLSFLT(1)) | wr_hi; 1347 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS, 1348 sgl_flits + 1)) | 1349 V_WR_GEN(txqs->gen)) | wr_lo; 1350 wr_gen2(txd, txqs->gen); 1351 flits = 1; 1352 } 1353 wrp->wrh_hi |= htonl(F_WR_EOP); 1354 wmb(); 1355 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1356 wr_gen2((struct tx_desc *)wp, ogen); 1357 } 1358 } 1359 1360 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 1361 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 1362 1363 #ifdef VLAN_SUPPORTED 1364 #define GET_VTAG(cntrl, m) \ 1365 do { \ 1366 if ((m)->m_flags & M_VLANTAG) \ 1367 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ 1368 } while (0) 1369 1370 #else 1371 #define GET_VTAG(cntrl, m) 1372 #endif 1373 1374 static int 1375 t3_encap(struct sge_qset *qs, struct mbuf **m) 1376 { 1377 adapter_t *sc; 1378 struct mbuf *m0; 1379 struct sge_txq *txq; 1380 struct txq_state txqs; 1381 struct port_info *pi; 1382 unsigned int ndesc, flits, cntrl, mlen; 1383 int err, nsegs, tso_info = 0; 1384 1385 struct work_request_hdr *wrp; 1386 struct tx_sw_desc *txsd; 1387 struct sg_ent *sgp, *sgl; 1388 uint32_t wr_hi, wr_lo, sgl_flits; 1389 bus_dma_segment_t segs[TX_MAX_SEGS]; 1390 1391 struct tx_desc *txd; 1392 1393 pi = qs->port; 1394 sc = pi->adapter; 1395 txq = &qs->txq[TXQ_ETH]; 1396 txd = &txq->desc[txq->pidx]; 1397 txsd = &txq->sdesc[txq->pidx]; 1398 sgl = txq->txq_sgl; 1399 1400 prefetch(txd); 1401 m0 = *m; 1402 1403 DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset); 1404 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan); 1405 1406 mtx_assert(&qs->lock, MA_OWNED); 1407 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1408 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n")); 1409 1410 #ifdef VLAN_SUPPORTED 1411 if (m0->m_nextpkt == NULL && m0->m_next != NULL && 1412 m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1413 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1414 #endif 1415 if (m0->m_nextpkt != NULL) { 1416 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs); 1417 ndesc = 1; 1418 mlen = 0; 1419 } else { 1420 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map, 1421 &m0, segs, &nsegs))) { 1422 if (cxgb_debug) 1423 printf("failed ... err=%d\n", err); 1424 return (err); 1425 } 1426 mlen = m0->m_pkthdr.len; 1427 ndesc = calc_tx_descs(m0, nsegs); 1428 } 1429 txq_prod(txq, ndesc, &txqs); 1430 1431 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs)); 1432 txsd->m = m0; 1433 1434 if (m0->m_nextpkt != NULL) { 1435 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; 1436 int i, fidx; 1437 1438 if (nsegs > 7) 1439 panic("trying to coalesce %d packets in to one WR", nsegs); 1440 txq->txq_coalesced += nsegs; 1441 wrp = (struct work_request_hdr *)txd; 1442 flits = nsegs*2 + 1; 1443 1444 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) { 1445 struct cpl_tx_pkt_batch_entry *cbe; 1446 uint64_t flit; 1447 uint32_t *hflit = (uint32_t *)&flit; 1448 int cflags = m0->m_pkthdr.csum_flags; 1449 1450 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1451 GET_VTAG(cntrl, m0); 1452 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1453 if (__predict_false(!(cflags & CSUM_IP))) 1454 cntrl |= F_TXPKT_IPCSUM_DIS; 1455 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP)))) 1456 cntrl |= F_TXPKT_L4CSUM_DIS; 1457 1458 hflit[0] = htonl(cntrl); 1459 hflit[1] = htonl(segs[i].ds_len | 0x80000000); 1460 flit |= htobe64(1 << 24); 1461 cbe = &cpl_batch->pkt_entry[i]; 1462 cbe->cntrl = hflit[0]; 1463 cbe->len = hflit[1]; 1464 cbe->addr = htobe64(segs[i].ds_addr); 1465 } 1466 1467 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1468 V_WR_SGLSFLT(flits)) | 1469 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1470 wr_lo = htonl(V_WR_LEN(flits) | 1471 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); 1472 set_wr_hdr(wrp, wr_hi, wr_lo); 1473 wmb(); 1474 wr_gen2(txd, txqs.gen); 1475 check_ring_tx_db(sc, txq); 1476 return (0); 1477 } else if (tso_info) { 1478 int min_size = TCPPKTHDRSIZE, eth_type, tagged; 1479 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; 1480 struct ip *ip; 1481 struct tcphdr *tcp; 1482 char *pkthdr; 1483 1484 txd->flit[2] = 0; 1485 GET_VTAG(cntrl, m0); 1486 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1487 hdr->cntrl = htonl(cntrl); 1488 hdr->len = htonl(mlen | 0x80000000); 1489 1490 DPRINTF("tso buf len=%d\n", mlen); 1491 1492 tagged = m0->m_flags & M_VLANTAG; 1493 if (!tagged) 1494 min_size -= ETHER_VLAN_ENCAP_LEN; 1495 1496 if (__predict_false(mlen < min_size)) { 1497 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", 1498 m0, mlen, m0->m_pkthdr.tso_segsz, 1499 m0->m_pkthdr.csum_flags, m0->m_flags); 1500 panic("tx tso packet too small"); 1501 } 1502 1503 /* Make sure that ether, ip, tcp headers are all in m0 */ 1504 if (__predict_false(m0->m_len < min_size)) { 1505 m0 = m_pullup(m0, min_size); 1506 if (__predict_false(m0 == NULL)) { 1507 /* XXX panic probably an overreaction */ 1508 panic("couldn't fit header into mbuf"); 1509 } 1510 } 1511 pkthdr = m0->m_data; 1512 1513 if (tagged) { 1514 eth_type = CPL_ETH_II_VLAN; 1515 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1516 ETHER_VLAN_ENCAP_LEN); 1517 } else { 1518 eth_type = CPL_ETH_II; 1519 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1520 } 1521 tcp = (struct tcphdr *)((uint8_t *)ip + 1522 sizeof(*ip)); 1523 1524 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1525 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1526 V_LSO_TCPHDR_WORDS(tcp->th_off); 1527 hdr->lso_info = htonl(tso_info); 1528 1529 if (__predict_false(mlen <= PIO_LEN)) { 1530 /* pkt not undersized but fits in PIO_LEN 1531 * Indicates a TSO bug at the higher levels. 1532 * 1533 */ 1534 DPRINTF("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", 1535 m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags); 1536 txsd->m = NULL; 1537 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); 1538 flits = (mlen + 7) / 8 + 3; 1539 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1540 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1541 F_WR_SOP | F_WR_EOP | txqs.compl); 1542 wr_lo = htonl(V_WR_LEN(flits) | 1543 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1544 set_wr_hdr(&hdr->wr, wr_hi, wr_lo); 1545 wmb(); 1546 wr_gen2(txd, txqs.gen); 1547 check_ring_tx_db(sc, txq); 1548 return (0); 1549 } 1550 flits = 3; 1551 } else { 1552 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; 1553 1554 GET_VTAG(cntrl, m0); 1555 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1556 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) 1557 cntrl |= F_TXPKT_IPCSUM_DIS; 1558 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))) 1559 cntrl |= F_TXPKT_L4CSUM_DIS; 1560 cpl->cntrl = htonl(cntrl); 1561 cpl->len = htonl(mlen | 0x80000000); 1562 1563 if (mlen <= PIO_LEN) { 1564 txsd->m = NULL; 1565 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1566 flits = (mlen + 7) / 8 + 2; 1567 1568 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1569 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1570 F_WR_SOP | F_WR_EOP | txqs.compl); 1571 wr_lo = htonl(V_WR_LEN(flits) | 1572 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1573 set_wr_hdr(&cpl->wr, wr_hi, wr_lo); 1574 wmb(); 1575 wr_gen2(txd, txqs.gen); 1576 check_ring_tx_db(sc, txq); 1577 return (0); 1578 } 1579 flits = 2; 1580 } 1581 wrp = (struct work_request_hdr *)txd; 1582 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1583 make_sgl(sgp, segs, nsegs); 1584 1585 sgl_flits = sgl_len(nsegs); 1586 1587 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc)); 1588 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1589 wr_lo = htonl(V_WR_TID(txq->token)); 1590 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, 1591 sgl_flits, wr_hi, wr_lo); 1592 check_ring_tx_db(pi->adapter, txq); 1593 1594 return (0); 1595 } 1596 1597 void 1598 cxgb_tx_watchdog(void *arg) 1599 { 1600 struct sge_qset *qs = arg; 1601 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1602 1603 if (qs->coalescing != 0 && 1604 (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 1605 TXQ_RING_EMPTY(qs)) 1606 qs->coalescing = 0; 1607 else if (qs->coalescing == 0 && 1608 (txq->in_use >= cxgb_tx_coalesce_enable_start)) 1609 qs->coalescing = 1; 1610 if (TXQ_TRYLOCK(qs)) { 1611 qs->qs_flags |= QS_FLUSHING; 1612 cxgb_start_locked(qs); 1613 qs->qs_flags &= ~QS_FLUSHING; 1614 TXQ_UNLOCK(qs); 1615 } 1616 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) 1617 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog, 1618 qs, txq->txq_watchdog.c_cpu); 1619 } 1620 1621 static void 1622 cxgb_tx_timeout(void *arg) 1623 { 1624 struct sge_qset *qs = arg; 1625 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1626 1627 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3))) 1628 qs->coalescing = 1; 1629 if (TXQ_TRYLOCK(qs)) { 1630 qs->qs_flags |= QS_TIMEOUT; 1631 cxgb_start_locked(qs); 1632 qs->qs_flags &= ~QS_TIMEOUT; 1633 TXQ_UNLOCK(qs); 1634 } 1635 } 1636 1637 static void 1638 cxgb_start_locked(struct sge_qset *qs) 1639 { 1640 struct mbuf *m_head = NULL; 1641 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1642 int avail, txmax; 1643 int in_use_init = txq->in_use; 1644 struct port_info *pi = qs->port; 1645 struct ifnet *ifp = pi->ifp; 1646 avail = txq->size - txq->in_use - 4; 1647 txmax = min(TX_START_MAX_DESC, avail); 1648 1649 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT)) 1650 reclaim_completed_tx(qs, 0, TXQ_ETH); 1651 1652 if (!pi->link_config.link_ok) { 1653 TXQ_RING_FLUSH(qs); 1654 return; 1655 } 1656 TXQ_LOCK_ASSERT(qs); 1657 while ((txq->in_use - in_use_init < txmax) && 1658 !TXQ_RING_EMPTY(qs) && 1659 (ifp->if_drv_flags & IFF_DRV_RUNNING) && 1660 pi->link_config.link_ok) { 1661 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1662 1663 if ((m_head = cxgb_dequeue(qs)) == NULL) 1664 break; 1665 /* 1666 * Encapsulation can modify our pointer, and or make it 1667 * NULL on failure. In that event, we can't requeue. 1668 */ 1669 if (t3_encap(qs, &m_head) || m_head == NULL) 1670 break; 1671 1672 /* Send a copy of the frame to the BPF listener */ 1673 ETHER_BPF_MTAP(ifp, m_head); 1674 1675 /* 1676 * We sent via PIO, no longer need a copy 1677 */ 1678 if (m_head->m_nextpkt == NULL && 1679 m_head->m_pkthdr.len <= PIO_LEN) 1680 m_freem(m_head); 1681 1682 m_head = NULL; 1683 } 1684 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && 1685 pi->link_config.link_ok) 1686 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1687 qs, txq->txq_timer.c_cpu); 1688 if (m_head != NULL) 1689 m_freem(m_head); 1690 } 1691 1692 static int 1693 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m) 1694 { 1695 struct port_info *pi = qs->port; 1696 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1697 struct buf_ring *br = txq->txq_mr; 1698 int error, avail; 1699 1700 avail = txq->size - txq->in_use; 1701 TXQ_LOCK_ASSERT(qs); 1702 1703 /* 1704 * We can only do a direct transmit if the following are true: 1705 * - we aren't coalescing (ring < 3/4 full) 1706 * - the link is up -- checked in caller 1707 * - there are no packets enqueued already 1708 * - there is space in hardware transmit queue 1709 */ 1710 if (check_pkt_coalesce(qs) == 0 && 1711 TXQ_RING_EMPTY(qs) && avail > 4) { 1712 if (t3_encap(qs, &m)) { 1713 if (m != NULL && 1714 (error = drbr_enqueue(ifp, br, m)) != 0) 1715 return (error); 1716 } else { 1717 /* 1718 * We've bypassed the buf ring so we need to update 1719 * the stats directly 1720 */ 1721 txq->txq_direct_packets++; 1722 txq->txq_direct_bytes += m->m_pkthdr.len; 1723 /* 1724 ** Send a copy of the frame to the BPF 1725 ** listener and set the watchdog on. 1726 */ 1727 ETHER_BPF_MTAP(ifp, m); 1728 /* 1729 * We sent via PIO, no longer need a copy 1730 */ 1731 if (m->m_pkthdr.len <= PIO_LEN) 1732 m_freem(m); 1733 1734 } 1735 } else if ((error = drbr_enqueue(ifp, br, m)) != 0) 1736 return (error); 1737 1738 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1739 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && 1740 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) 1741 cxgb_start_locked(qs); 1742 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) 1743 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1744 qs, txq->txq_timer.c_cpu); 1745 return (0); 1746 } 1747 1748 int 1749 cxgb_transmit(struct ifnet *ifp, struct mbuf *m) 1750 { 1751 struct sge_qset *qs; 1752 struct port_info *pi = ifp->if_softc; 1753 int error, qidx = pi->first_qset; 1754 1755 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 1756 ||(!pi->link_config.link_ok)) { 1757 m_freem(m); 1758 return (0); 1759 } 1760 1761 if (m->m_flags & M_FLOWID) 1762 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset; 1763 1764 qs = &pi->adapter->sge.qs[qidx]; 1765 1766 if (TXQ_TRYLOCK(qs)) { 1767 /* XXX running */ 1768 error = cxgb_transmit_locked(ifp, qs, m); 1769 TXQ_UNLOCK(qs); 1770 } else 1771 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); 1772 return (error); 1773 } 1774 void 1775 cxgb_start(struct ifnet *ifp) 1776 { 1777 struct port_info *pi = ifp->if_softc; 1778 struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset]; 1779 1780 if (!pi->link_config.link_ok) 1781 return; 1782 1783 TXQ_LOCK(qs); 1784 cxgb_start_locked(qs); 1785 TXQ_UNLOCK(qs); 1786 } 1787 1788 void 1789 cxgb_qflush(struct ifnet *ifp) 1790 { 1791 /* 1792 * flush any enqueued mbufs in the buf_rings 1793 * and in the transmit queues 1794 * no-op for now 1795 */ 1796 return; 1797 } 1798 1799 /** 1800 * write_imm - write a packet into a Tx descriptor as immediate data 1801 * @d: the Tx descriptor to write 1802 * @m: the packet 1803 * @len: the length of packet data to write as immediate data 1804 * @gen: the generation bit value to write 1805 * 1806 * Writes a packet as immediate data into a Tx descriptor. The packet 1807 * contains a work request at its beginning. We must write the packet 1808 * carefully so the SGE doesn't read accidentally before it's written in 1809 * its entirety. 1810 */ 1811 static __inline void 1812 write_imm(struct tx_desc *d, struct mbuf *m, 1813 unsigned int len, unsigned int gen) 1814 { 1815 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1816 struct work_request_hdr *to = (struct work_request_hdr *)d; 1817 uint32_t wr_hi, wr_lo; 1818 1819 if (len > WR_LEN) 1820 panic("len too big %d\n", len); 1821 if (len < sizeof(*from)) 1822 panic("len too small %d", len); 1823 1824 memcpy(&to[1], &from[1], len - sizeof(*from)); 1825 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | 1826 V_WR_BCNTLFLT(len & 7)); 1827 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | 1828 V_WR_LEN((len + 7) / 8)); 1829 set_wr_hdr(to, wr_hi, wr_lo); 1830 wmb(); 1831 wr_gen2(d, gen); 1832 1833 /* 1834 * This check is a hack we should really fix the logic so 1835 * that this can't happen 1836 */ 1837 if (m->m_type != MT_DONTFREE) 1838 m_freem(m); 1839 1840 } 1841 1842 /** 1843 * check_desc_avail - check descriptor availability on a send queue 1844 * @adap: the adapter 1845 * @q: the TX queue 1846 * @m: the packet needing the descriptors 1847 * @ndesc: the number of Tx descriptors needed 1848 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1849 * 1850 * Checks if the requested number of Tx descriptors is available on an 1851 * SGE send queue. If the queue is already suspended or not enough 1852 * descriptors are available the packet is queued for later transmission. 1853 * Must be called with the Tx queue locked. 1854 * 1855 * Returns 0 if enough descriptors are available, 1 if there aren't 1856 * enough descriptors and the packet has been queued, and 2 if the caller 1857 * needs to retry because there weren't enough descriptors at the 1858 * beginning of the call but some freed up in the mean time. 1859 */ 1860 static __inline int 1861 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1862 struct mbuf *m, unsigned int ndesc, 1863 unsigned int qid) 1864 { 1865 /* 1866 * XXX We currently only use this for checking the control queue 1867 * the control queue is only used for binding qsets which happens 1868 * at init time so we are guaranteed enough descriptors 1869 */ 1870 if (__predict_false(!mbufq_empty(&q->sendq))) { 1871 addq_exit: mbufq_tail(&q->sendq, m); 1872 return 1; 1873 } 1874 if (__predict_false(q->size - q->in_use < ndesc)) { 1875 1876 struct sge_qset *qs = txq_to_qset(q, qid); 1877 1878 setbit(&qs->txq_stopped, qid); 1879 if (should_restart_tx(q) && 1880 test_and_clear_bit(qid, &qs->txq_stopped)) 1881 return 2; 1882 1883 q->stops++; 1884 goto addq_exit; 1885 } 1886 return 0; 1887 } 1888 1889 1890 /** 1891 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1892 * @q: the SGE control Tx queue 1893 * 1894 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1895 * that send only immediate data (presently just the control queues) and 1896 * thus do not have any mbufs 1897 */ 1898 static __inline void 1899 reclaim_completed_tx_imm(struct sge_txq *q) 1900 { 1901 unsigned int reclaim = q->processed - q->cleaned; 1902 1903 q->in_use -= reclaim; 1904 q->cleaned += reclaim; 1905 } 1906 1907 static __inline int 1908 immediate(const struct mbuf *m) 1909 { 1910 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1911 } 1912 1913 /** 1914 * ctrl_xmit - send a packet through an SGE control Tx queue 1915 * @adap: the adapter 1916 * @q: the control queue 1917 * @m: the packet 1918 * 1919 * Send a packet through an SGE control Tx queue. Packets sent through 1920 * a control queue must fit entirely as immediate data in a single Tx 1921 * descriptor and have no page fragments. 1922 */ 1923 static int 1924 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 1925 { 1926 int ret; 1927 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1928 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1929 1930 if (__predict_false(!immediate(m))) { 1931 m_freem(m); 1932 return 0; 1933 } 1934 1935 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP); 1936 wrp->wrh_lo = htonl(V_WR_TID(q->token)); 1937 1938 TXQ_LOCK(qs); 1939 again: reclaim_completed_tx_imm(q); 1940 1941 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1942 if (__predict_false(ret)) { 1943 if (ret == 1) { 1944 TXQ_UNLOCK(qs); 1945 log(LOG_ERR, "no desc available\n"); 1946 return (ENOSPC); 1947 } 1948 goto again; 1949 } 1950 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1951 1952 q->in_use++; 1953 if (++q->pidx >= q->size) { 1954 q->pidx = 0; 1955 q->gen ^= 1; 1956 } 1957 TXQ_UNLOCK(qs); 1958 t3_write_reg(adap, A_SG_KDOORBELL, 1959 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1960 return (0); 1961 } 1962 1963 1964 /** 1965 * restart_ctrlq - restart a suspended control queue 1966 * @qs: the queue set cotaining the control queue 1967 * 1968 * Resumes transmission on a suspended Tx control queue. 1969 */ 1970 static void 1971 restart_ctrlq(void *data, int npending) 1972 { 1973 struct mbuf *m; 1974 struct sge_qset *qs = (struct sge_qset *)data; 1975 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1976 adapter_t *adap = qs->port->adapter; 1977 1978 log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use); 1979 1980 TXQ_LOCK(qs); 1981 again: reclaim_completed_tx_imm(q); 1982 1983 while (q->in_use < q->size && 1984 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1985 1986 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1987 1988 if (++q->pidx >= q->size) { 1989 q->pidx = 0; 1990 q->gen ^= 1; 1991 } 1992 q->in_use++; 1993 } 1994 if (!mbufq_empty(&q->sendq)) { 1995 setbit(&qs->txq_stopped, TXQ_CTRL); 1996 1997 if (should_restart_tx(q) && 1998 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1999 goto again; 2000 q->stops++; 2001 } 2002 TXQ_UNLOCK(qs); 2003 t3_write_reg(adap, A_SG_KDOORBELL, 2004 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2005 } 2006 2007 2008 /* 2009 * Send a management message through control queue 0 2010 */ 2011 int 2012 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 2013 { 2014 return ctrl_xmit(adap, &adap->sge.qs[0], m); 2015 } 2016 2017 /** 2018 * free_qset - free the resources of an SGE queue set 2019 * @sc: the controller owning the queue set 2020 * @q: the queue set 2021 * 2022 * Release the HW and SW resources associated with an SGE queue set, such 2023 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 2024 * queue set must be quiesced prior to calling this. 2025 */ 2026 static void 2027 t3_free_qset(adapter_t *sc, struct sge_qset *q) 2028 { 2029 int i; 2030 2031 reclaim_completed_tx(q, 0, TXQ_ETH); 2032 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2033 if (q->txq[i].txq_mr != NULL) 2034 buf_ring_free(q->txq[i].txq_mr, M_DEVBUF); 2035 if (q->txq[i].txq_ifq != NULL) { 2036 ifq_delete(q->txq[i].txq_ifq); 2037 free(q->txq[i].txq_ifq, M_DEVBUF); 2038 } 2039 } 2040 2041 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2042 if (q->fl[i].desc) { 2043 mtx_lock_spin(&sc->sge.reg_lock); 2044 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 2045 mtx_unlock_spin(&sc->sge.reg_lock); 2046 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 2047 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 2048 q->fl[i].desc_map); 2049 bus_dma_tag_destroy(q->fl[i].desc_tag); 2050 bus_dma_tag_destroy(q->fl[i].entry_tag); 2051 } 2052 if (q->fl[i].sdesc) { 2053 free_rx_bufs(sc, &q->fl[i]); 2054 free(q->fl[i].sdesc, M_DEVBUF); 2055 } 2056 } 2057 2058 mtx_unlock(&q->lock); 2059 MTX_DESTROY(&q->lock); 2060 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2061 if (q->txq[i].desc) { 2062 mtx_lock_spin(&sc->sge.reg_lock); 2063 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 2064 mtx_unlock_spin(&sc->sge.reg_lock); 2065 bus_dmamap_unload(q->txq[i].desc_tag, 2066 q->txq[i].desc_map); 2067 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 2068 q->txq[i].desc_map); 2069 bus_dma_tag_destroy(q->txq[i].desc_tag); 2070 bus_dma_tag_destroy(q->txq[i].entry_tag); 2071 } 2072 if (q->txq[i].sdesc) { 2073 free(q->txq[i].sdesc, M_DEVBUF); 2074 } 2075 } 2076 2077 if (q->rspq.desc) { 2078 mtx_lock_spin(&sc->sge.reg_lock); 2079 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 2080 mtx_unlock_spin(&sc->sge.reg_lock); 2081 2082 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 2083 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 2084 q->rspq.desc_map); 2085 bus_dma_tag_destroy(q->rspq.desc_tag); 2086 MTX_DESTROY(&q->rspq.lock); 2087 } 2088 2089 #ifdef LRO_SUPPORTED 2090 tcp_lro_free(&q->lro.ctrl); 2091 #endif 2092 2093 bzero(q, sizeof(*q)); 2094 } 2095 2096 /** 2097 * t3_free_sge_resources - free SGE resources 2098 * @sc: the adapter softc 2099 * 2100 * Frees resources used by the SGE queue sets. 2101 */ 2102 void 2103 t3_free_sge_resources(adapter_t *sc) 2104 { 2105 int i, nqsets; 2106 2107 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2108 nqsets += sc->port[i].nqsets; 2109 2110 for (i = 0; i < nqsets; ++i) { 2111 TXQ_LOCK(&sc->sge.qs[i]); 2112 t3_free_qset(sc, &sc->sge.qs[i]); 2113 } 2114 2115 } 2116 2117 /** 2118 * t3_sge_start - enable SGE 2119 * @sc: the controller softc 2120 * 2121 * Enables the SGE for DMAs. This is the last step in starting packet 2122 * transfers. 2123 */ 2124 void 2125 t3_sge_start(adapter_t *sc) 2126 { 2127 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 2128 } 2129 2130 /** 2131 * t3_sge_stop - disable SGE operation 2132 * @sc: the adapter 2133 * 2134 * Disables the DMA engine. This can be called in emeregencies (e.g., 2135 * from error interrupts) or from normal process context. In the latter 2136 * case it also disables any pending queue restart tasklets. Note that 2137 * if it is called in interrupt context it cannot disable the restart 2138 * tasklets as it cannot wait, however the tasklets will have no effect 2139 * since the doorbells are disabled and the driver will call this again 2140 * later from process context, at which time the tasklets will be stopped 2141 * if they are still running. 2142 */ 2143 void 2144 t3_sge_stop(adapter_t *sc) 2145 { 2146 int i, nqsets; 2147 2148 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 2149 2150 if (sc->tq == NULL) 2151 return; 2152 2153 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2154 nqsets += sc->port[i].nqsets; 2155 #ifdef notyet 2156 /* 2157 * 2158 * XXX 2159 */ 2160 for (i = 0; i < nqsets; ++i) { 2161 struct sge_qset *qs = &sc->sge.qs[i]; 2162 2163 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2164 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2165 } 2166 #endif 2167 } 2168 2169 /** 2170 * t3_free_tx_desc - reclaims Tx descriptors and their buffers 2171 * @adapter: the adapter 2172 * @q: the Tx queue to reclaim descriptors from 2173 * @reclaimable: the number of descriptors to reclaim 2174 * @m_vec_size: maximum number of buffers to reclaim 2175 * @desc_reclaimed: returns the number of descriptors reclaimed 2176 * 2177 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 2178 * Tx buffers. Called with the Tx queue lock held. 2179 * 2180 * Returns number of buffers of reclaimed 2181 */ 2182 void 2183 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue) 2184 { 2185 struct tx_sw_desc *txsd; 2186 unsigned int cidx, mask; 2187 struct sge_txq *q = &qs->txq[queue]; 2188 2189 #ifdef T3_TRACE 2190 T3_TRACE2(sc->tb[q->cntxt_id & 7], 2191 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); 2192 #endif 2193 cidx = q->cidx; 2194 mask = q->size - 1; 2195 txsd = &q->sdesc[cidx]; 2196 2197 mtx_assert(&qs->lock, MA_OWNED); 2198 while (reclaimable--) { 2199 prefetch(q->sdesc[(cidx + 1) & mask].m); 2200 prefetch(q->sdesc[(cidx + 2) & mask].m); 2201 2202 if (txsd->m != NULL) { 2203 if (txsd->flags & TX_SW_DESC_MAPPED) { 2204 bus_dmamap_unload(q->entry_tag, txsd->map); 2205 txsd->flags &= ~TX_SW_DESC_MAPPED; 2206 } 2207 m_freem_list(txsd->m); 2208 txsd->m = NULL; 2209 } else 2210 q->txq_skipped++; 2211 2212 ++txsd; 2213 if (++cidx == q->size) { 2214 cidx = 0; 2215 txsd = q->sdesc; 2216 } 2217 } 2218 q->cidx = cidx; 2219 2220 } 2221 2222 /** 2223 * is_new_response - check if a response is newly written 2224 * @r: the response descriptor 2225 * @q: the response queue 2226 * 2227 * Returns true if a response descriptor contains a yet unprocessed 2228 * response. 2229 */ 2230 static __inline int 2231 is_new_response(const struct rsp_desc *r, 2232 const struct sge_rspq *q) 2233 { 2234 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 2235 } 2236 2237 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 2238 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 2239 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 2240 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 2241 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 2242 2243 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 2244 #define NOMEM_INTR_DELAY 2500 2245 2246 /** 2247 * write_ofld_wr - write an offload work request 2248 * @adap: the adapter 2249 * @m: the packet to send 2250 * @q: the Tx queue 2251 * @pidx: index of the first Tx descriptor to write 2252 * @gen: the generation value to use 2253 * @ndesc: number of descriptors the packet will occupy 2254 * 2255 * Write an offload work request to send the supplied packet. The packet 2256 * data already carry the work request with most fields populated. 2257 */ 2258 static void 2259 write_ofld_wr(adapter_t *adap, struct mbuf *m, 2260 struct sge_txq *q, unsigned int pidx, 2261 unsigned int gen, unsigned int ndesc, 2262 bus_dma_segment_t *segs, unsigned int nsegs) 2263 { 2264 unsigned int sgl_flits, flits; 2265 struct work_request_hdr *from; 2266 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 2267 struct tx_desc *d = &q->desc[pidx]; 2268 struct txq_state txqs; 2269 2270 if (immediate(m) && nsegs == 0) { 2271 write_imm(d, m, m->m_len, gen); 2272 return; 2273 } 2274 2275 /* Only TX_DATA builds SGLs */ 2276 from = mtod(m, struct work_request_hdr *); 2277 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from)); 2278 2279 flits = m->m_len / 8; 2280 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 2281 2282 make_sgl(sgp, segs, nsegs); 2283 sgl_flits = sgl_len(nsegs); 2284 2285 txqs.gen = gen; 2286 txqs.pidx = pidx; 2287 txqs.compl = 0; 2288 2289 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 2290 from->wrh_hi, from->wrh_lo); 2291 } 2292 2293 /** 2294 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 2295 * @m: the packet 2296 * 2297 * Returns the number of Tx descriptors needed for the given offload 2298 * packet. These packets are already fully constructed. 2299 */ 2300 static __inline unsigned int 2301 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 2302 { 2303 unsigned int flits, cnt = 0; 2304 int ndescs; 2305 2306 if (m->m_len <= WR_LEN && nsegs == 0) 2307 return (1); /* packet fits as immediate data */ 2308 2309 /* 2310 * This needs to be re-visited for TOE 2311 */ 2312 2313 cnt = nsegs; 2314 2315 /* headers */ 2316 flits = m->m_len / 8; 2317 2318 ndescs = flits_to_desc(flits + sgl_len(cnt)); 2319 2320 return (ndescs); 2321 } 2322 2323 /** 2324 * ofld_xmit - send a packet through an offload queue 2325 * @adap: the adapter 2326 * @q: the Tx offload queue 2327 * @m: the packet 2328 * 2329 * Send an offload packet through an SGE offload queue. 2330 */ 2331 static int 2332 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 2333 { 2334 int ret, nsegs; 2335 unsigned int ndesc; 2336 unsigned int pidx, gen; 2337 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2338 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs; 2339 struct tx_sw_desc *stx; 2340 2341 nsegs = m_get_sgllen(m); 2342 vsegs = m_get_sgl(m); 2343 ndesc = calc_tx_descs_ofld(m, nsegs); 2344 busdma_map_sgl(vsegs, segs, nsegs); 2345 2346 stx = &q->sdesc[q->pidx]; 2347 2348 TXQ_LOCK(qs); 2349 again: reclaim_completed_tx(qs, 16, TXQ_OFLD); 2350 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 2351 if (__predict_false(ret)) { 2352 if (ret == 1) { 2353 printf("no ofld desc avail\n"); 2354 2355 m_set_priority(m, ndesc); /* save for restart */ 2356 TXQ_UNLOCK(qs); 2357 return (EINTR); 2358 } 2359 goto again; 2360 } 2361 2362 gen = q->gen; 2363 q->in_use += ndesc; 2364 pidx = q->pidx; 2365 q->pidx += ndesc; 2366 if (q->pidx >= q->size) { 2367 q->pidx -= q->size; 2368 q->gen ^= 1; 2369 } 2370 #ifdef T3_TRACE 2371 T3_TRACE5(adap->tb[q->cntxt_id & 7], 2372 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 2373 ndesc, pidx, skb->len, skb->len - skb->data_len, 2374 skb_shinfo(skb)->nr_frags); 2375 #endif 2376 TXQ_UNLOCK(qs); 2377 2378 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2379 check_ring_tx_db(adap, q); 2380 return (0); 2381 } 2382 2383 /** 2384 * restart_offloadq - restart a suspended offload queue 2385 * @qs: the queue set cotaining the offload queue 2386 * 2387 * Resumes transmission on a suspended Tx offload queue. 2388 */ 2389 static void 2390 restart_offloadq(void *data, int npending) 2391 { 2392 struct mbuf *m; 2393 struct sge_qset *qs = data; 2394 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2395 adapter_t *adap = qs->port->adapter; 2396 bus_dma_segment_t segs[TX_MAX_SEGS]; 2397 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 2398 int nsegs, cleaned; 2399 2400 TXQ_LOCK(qs); 2401 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD); 2402 2403 while ((m = mbufq_peek(&q->sendq)) != NULL) { 2404 unsigned int gen, pidx; 2405 unsigned int ndesc = m_get_priority(m); 2406 2407 if (__predict_false(q->size - q->in_use < ndesc)) { 2408 setbit(&qs->txq_stopped, TXQ_OFLD); 2409 if (should_restart_tx(q) && 2410 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 2411 goto again; 2412 q->stops++; 2413 break; 2414 } 2415 2416 gen = q->gen; 2417 q->in_use += ndesc; 2418 pidx = q->pidx; 2419 q->pidx += ndesc; 2420 if (q->pidx >= q->size) { 2421 q->pidx -= q->size; 2422 q->gen ^= 1; 2423 } 2424 2425 (void)mbufq_dequeue(&q->sendq); 2426 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 2427 TXQ_UNLOCK(qs); 2428 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2429 TXQ_LOCK(qs); 2430 } 2431 #if USE_GTS 2432 set_bit(TXQ_RUNNING, &q->flags); 2433 set_bit(TXQ_LAST_PKT_DB, &q->flags); 2434 #endif 2435 TXQ_UNLOCK(qs); 2436 wmb(); 2437 t3_write_reg(adap, A_SG_KDOORBELL, 2438 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2439 } 2440 2441 /** 2442 * queue_set - return the queue set a packet should use 2443 * @m: the packet 2444 * 2445 * Maps a packet to the SGE queue set it should use. The desired queue 2446 * set is carried in bits 1-3 in the packet's priority. 2447 */ 2448 static __inline int 2449 queue_set(const struct mbuf *m) 2450 { 2451 return m_get_priority(m) >> 1; 2452 } 2453 2454 /** 2455 * is_ctrl_pkt - return whether an offload packet is a control packet 2456 * @m: the packet 2457 * 2458 * Determines whether an offload packet should use an OFLD or a CTRL 2459 * Tx queue. This is indicated by bit 0 in the packet's priority. 2460 */ 2461 static __inline int 2462 is_ctrl_pkt(const struct mbuf *m) 2463 { 2464 return m_get_priority(m) & 1; 2465 } 2466 2467 /** 2468 * t3_offload_tx - send an offload packet 2469 * @tdev: the offload device to send to 2470 * @m: the packet 2471 * 2472 * Sends an offload packet. We use the packet priority to select the 2473 * appropriate Tx queue as follows: bit 0 indicates whether the packet 2474 * should be sent as regular or control, bits 1-3 select the queue set. 2475 */ 2476 int 2477 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m) 2478 { 2479 adapter_t *adap = tdev2adap(tdev); 2480 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 2481 2482 if (__predict_false(is_ctrl_pkt(m))) 2483 return ctrl_xmit(adap, qs, m); 2484 2485 return ofld_xmit(adap, qs, m); 2486 } 2487 2488 /** 2489 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 2490 * @tdev: the offload device that will be receiving the packets 2491 * @q: the SGE response queue that assembled the bundle 2492 * @m: the partial bundle 2493 * @n: the number of packets in the bundle 2494 * 2495 * Delivers a (partial) bundle of Rx offload packets to an offload device. 2496 */ 2497 static __inline void 2498 deliver_partial_bundle(struct t3cdev *tdev, 2499 struct sge_rspq *q, 2500 struct mbuf *mbufs[], int n) 2501 { 2502 if (n) { 2503 q->offload_bundles++; 2504 cxgb_ofld_recv(tdev, mbufs, n); 2505 } 2506 } 2507 2508 static __inline int 2509 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 2510 struct mbuf *m, struct mbuf *rx_gather[], 2511 unsigned int gather_idx) 2512 { 2513 2514 rq->offload_pkts++; 2515 m->m_pkthdr.header = mtod(m, void *); 2516 rx_gather[gather_idx++] = m; 2517 if (gather_idx == RX_BUNDLE_SIZE) { 2518 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 2519 gather_idx = 0; 2520 rq->offload_bundles++; 2521 } 2522 return (gather_idx); 2523 } 2524 2525 static void 2526 restart_tx(struct sge_qset *qs) 2527 { 2528 struct adapter *sc = qs->port->adapter; 2529 2530 2531 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2532 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2533 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2534 qs->txq[TXQ_OFLD].restarts++; 2535 DPRINTF("restarting TXQ_OFLD\n"); 2536 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2537 } 2538 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n", 2539 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]), 2540 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned, 2541 qs->txq[TXQ_CTRL].in_use); 2542 2543 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2544 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2545 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2546 qs->txq[TXQ_CTRL].restarts++; 2547 DPRINTF("restarting TXQ_CTRL\n"); 2548 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2549 } 2550 } 2551 2552 /** 2553 * t3_sge_alloc_qset - initialize an SGE queue set 2554 * @sc: the controller softc 2555 * @id: the queue set id 2556 * @nports: how many Ethernet ports will be using this queue set 2557 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2558 * @p: configuration parameters for this queue set 2559 * @ntxq: number of Tx queues for the queue set 2560 * @pi: port info for queue set 2561 * 2562 * Allocate resources and initialize an SGE queue set. A queue set 2563 * comprises a response queue, two Rx free-buffer queues, and up to 3 2564 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2565 * queue, offload queue, and control queue. 2566 */ 2567 int 2568 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2569 const struct qset_params *p, int ntxq, struct port_info *pi) 2570 { 2571 struct sge_qset *q = &sc->sge.qs[id]; 2572 int i, ret = 0; 2573 2574 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF); 2575 q->port = pi; 2576 2577 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2578 2579 if ((q->txq[i].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, 2580 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) { 2581 device_printf(sc->dev, "failed to allocate mbuf ring\n"); 2582 goto err; 2583 } 2584 if ((q->txq[i].txq_ifq = 2585 malloc(sizeof(struct ifaltq), M_DEVBUF, M_NOWAIT|M_ZERO)) 2586 == NULL) { 2587 device_printf(sc->dev, "failed to allocate ifq\n"); 2588 goto err; 2589 } 2590 ifq_init(q->txq[i].txq_ifq, pi->ifp); 2591 callout_init(&q->txq[i].txq_timer, 1); 2592 callout_init(&q->txq[i].txq_watchdog, 1); 2593 q->txq[i].txq_timer.c_cpu = id % mp_ncpus; 2594 q->txq[i].txq_watchdog.c_cpu = id % mp_ncpus; 2595 } 2596 init_qset_cntxt(q, id); 2597 q->idx = id; 2598 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2599 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2600 &q->fl[0].desc, &q->fl[0].sdesc, 2601 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2602 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2603 printf("error %d from alloc ring fl0\n", ret); 2604 goto err; 2605 } 2606 2607 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2608 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2609 &q->fl[1].desc, &q->fl[1].sdesc, 2610 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2611 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2612 printf("error %d from alloc ring fl1\n", ret); 2613 goto err; 2614 } 2615 2616 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2617 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2618 &q->rspq.desc_tag, &q->rspq.desc_map, 2619 NULL, NULL)) != 0) { 2620 printf("error %d from alloc ring rspq\n", ret); 2621 goto err; 2622 } 2623 2624 for (i = 0; i < ntxq; ++i) { 2625 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2626 2627 if ((ret = alloc_ring(sc, p->txq_size[i], 2628 sizeof(struct tx_desc), sz, 2629 &q->txq[i].phys_addr, &q->txq[i].desc, 2630 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2631 &q->txq[i].desc_map, 2632 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2633 printf("error %d from alloc ring tx %i\n", ret, i); 2634 goto err; 2635 } 2636 mbufq_init(&q->txq[i].sendq); 2637 q->txq[i].gen = 1; 2638 q->txq[i].size = p->txq_size[i]; 2639 } 2640 2641 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2642 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2643 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2644 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2645 2646 q->fl[0].gen = q->fl[1].gen = 1; 2647 q->fl[0].size = p->fl_size; 2648 q->fl[1].size = p->jumbo_size; 2649 2650 q->rspq.gen = 1; 2651 q->rspq.cidx = 0; 2652 q->rspq.size = p->rspq_size; 2653 2654 q->txq[TXQ_ETH].stop_thres = nports * 2655 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2656 2657 q->fl[0].buf_size = MCLBYTES; 2658 q->fl[0].zone = zone_pack; 2659 q->fl[0].type = EXT_PACKET; 2660 #if __FreeBSD_version > 800000 2661 if (cxgb_use_16k_clusters) { 2662 q->fl[1].buf_size = MJUM16BYTES; 2663 q->fl[1].zone = zone_jumbo16; 2664 q->fl[1].type = EXT_JUMBO16; 2665 } else { 2666 q->fl[1].buf_size = MJUM9BYTES; 2667 q->fl[1].zone = zone_jumbo9; 2668 q->fl[1].type = EXT_JUMBO9; 2669 } 2670 #else 2671 q->fl[1].buf_size = MJUMPAGESIZE; 2672 q->fl[1].zone = zone_jumbop; 2673 q->fl[1].type = EXT_JUMBOP; 2674 #endif 2675 2676 #ifdef LRO_SUPPORTED 2677 /* Allocate and setup the lro_ctrl structure */ 2678 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); 2679 ret = tcp_lro_init(&q->lro.ctrl); 2680 if (ret) { 2681 printf("error %d from tcp_lro_init\n", ret); 2682 goto err; 2683 } 2684 q->lro.ctrl.ifp = pi->ifp; 2685 #endif 2686 2687 mtx_lock_spin(&sc->sge.reg_lock); 2688 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2689 q->rspq.phys_addr, q->rspq.size, 2690 q->fl[0].buf_size, 1, 0); 2691 if (ret) { 2692 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2693 goto err_unlock; 2694 } 2695 2696 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2697 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2698 q->fl[i].phys_addr, q->fl[i].size, 2699 q->fl[i].buf_size, p->cong_thres, 1, 2700 0); 2701 if (ret) { 2702 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2703 goto err_unlock; 2704 } 2705 } 2706 2707 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2708 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2709 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2710 1, 0); 2711 if (ret) { 2712 printf("error %d from t3_sge_init_ecntxt\n", ret); 2713 goto err_unlock; 2714 } 2715 2716 if (ntxq > 1) { 2717 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2718 USE_GTS, SGE_CNTXT_OFLD, id, 2719 q->txq[TXQ_OFLD].phys_addr, 2720 q->txq[TXQ_OFLD].size, 0, 1, 0); 2721 if (ret) { 2722 printf("error %d from t3_sge_init_ecntxt\n", ret); 2723 goto err_unlock; 2724 } 2725 } 2726 2727 if (ntxq > 2) { 2728 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2729 SGE_CNTXT_CTRL, id, 2730 q->txq[TXQ_CTRL].phys_addr, 2731 q->txq[TXQ_CTRL].size, 2732 q->txq[TXQ_CTRL].token, 1, 0); 2733 if (ret) { 2734 printf("error %d from t3_sge_init_ecntxt\n", ret); 2735 goto err_unlock; 2736 } 2737 } 2738 2739 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2740 device_get_unit(sc->dev), irq_vec_idx); 2741 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2742 2743 mtx_unlock_spin(&sc->sge.reg_lock); 2744 t3_update_qset_coalesce(q, p); 2745 q->port = pi; 2746 2747 refill_fl(sc, &q->fl[0], q->fl[0].size); 2748 refill_fl(sc, &q->fl[1], q->fl[1].size); 2749 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2750 2751 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2752 V_NEWTIMER(q->rspq.holdoff_tmr)); 2753 2754 return (0); 2755 2756 err_unlock: 2757 mtx_unlock_spin(&sc->sge.reg_lock); 2758 err: 2759 TXQ_LOCK(q); 2760 t3_free_qset(sc, q); 2761 2762 return (ret); 2763 } 2764 2765 /* 2766 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with 2767 * ethernet data. Hardware assistance with various checksums and any vlan tag 2768 * will also be taken into account here. 2769 */ 2770 void 2771 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2772 { 2773 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2774 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2775 struct ifnet *ifp = pi->ifp; 2776 2777 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2778 2779 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2780 cpl->csum_valid && cpl->csum == 0xffff) { 2781 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2782 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2783 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2784 m->m_pkthdr.csum_data = 0xffff; 2785 } 2786 /* 2787 * XXX need to add VLAN support for 6.x 2788 */ 2789 #ifdef VLAN_SUPPORTED 2790 if (__predict_false(cpl->vlan_valid)) { 2791 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2792 m->m_flags |= M_VLANTAG; 2793 } 2794 #endif 2795 2796 m->m_pkthdr.rcvif = ifp; 2797 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2798 /* 2799 * adjust after conversion to mbuf chain 2800 */ 2801 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); 2802 m->m_len -= (sizeof(*cpl) + ethpad); 2803 m->m_data += (sizeof(*cpl) + ethpad); 2804 } 2805 2806 /** 2807 * get_packet - return the next ingress packet buffer from a free list 2808 * @adap: the adapter that received the packet 2809 * @drop_thres: # of remaining buffers before we start dropping packets 2810 * @qs: the qset that the SGE free list holding the packet belongs to 2811 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2812 * @r: response descriptor 2813 * 2814 * Get the next packet from a free list and complete setup of the 2815 * sk_buff. If the packet is small we make a copy and recycle the 2816 * original buffer, otherwise we use the original buffer itself. If a 2817 * positive drop threshold is supplied packets are dropped and their 2818 * buffers recycled if (a) the number of remaining buffers is under the 2819 * threshold and the packet is too big to copy, or (b) the packet should 2820 * be copied but there is no memory for the copy. 2821 */ 2822 static int 2823 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2824 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 2825 { 2826 2827 unsigned int len_cq = ntohl(r->len_cq); 2828 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2829 int mask, cidx = fl->cidx; 2830 struct rx_sw_desc *sd = &fl->sdesc[cidx]; 2831 uint32_t len = G_RSPD_LEN(len_cq); 2832 uint32_t flags = M_EXT; 2833 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags)); 2834 caddr_t cl; 2835 struct mbuf *m; 2836 int ret = 0; 2837 2838 mask = fl->size - 1; 2839 prefetch(fl->sdesc[(cidx + 1) & mask].m); 2840 prefetch(fl->sdesc[(cidx + 2) & mask].m); 2841 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); 2842 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 2843 2844 fl->credits--; 2845 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2846 2847 if (recycle_enable && len <= SGE_RX_COPY_THRES && 2848 sopeop == RSPQ_SOP_EOP) { 2849 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2850 goto skip_recycle; 2851 cl = mtod(m, void *); 2852 memcpy(cl, sd->rxsd_cl, len); 2853 recycle_rx_buf(adap, fl, fl->cidx); 2854 m->m_pkthdr.len = m->m_len = len; 2855 m->m_flags = 0; 2856 mh->mh_head = mh->mh_tail = m; 2857 ret = 1; 2858 goto done; 2859 } else { 2860 skip_recycle: 2861 bus_dmamap_unload(fl->entry_tag, sd->map); 2862 cl = sd->rxsd_cl; 2863 m = sd->m; 2864 2865 if ((sopeop == RSPQ_SOP_EOP) || 2866 (sopeop == RSPQ_SOP)) 2867 flags |= M_PKTHDR; 2868 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags); 2869 if (fl->zone == zone_pack) { 2870 /* 2871 * restore clobbered data pointer 2872 */ 2873 m->m_data = m->m_ext.ext_buf; 2874 } else { 2875 m_cljset(m, cl, fl->type); 2876 } 2877 m->m_len = len; 2878 } 2879 switch(sopeop) { 2880 case RSPQ_SOP_EOP: 2881 ret = 1; 2882 /* FALLTHROUGH */ 2883 case RSPQ_SOP: 2884 mh->mh_head = mh->mh_tail = m; 2885 m->m_pkthdr.len = len; 2886 break; 2887 case RSPQ_EOP: 2888 ret = 1; 2889 /* FALLTHROUGH */ 2890 case RSPQ_NSOP_NEOP: 2891 if (mh->mh_tail == NULL) { 2892 log(LOG_ERR, "discarding intermediate descriptor entry\n"); 2893 m_freem(m); 2894 break; 2895 } 2896 mh->mh_tail->m_next = m; 2897 mh->mh_tail = m; 2898 mh->mh_head->m_pkthdr.len += len; 2899 break; 2900 } 2901 if (cxgb_debug) 2902 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len); 2903 done: 2904 if (++fl->cidx == fl->size) 2905 fl->cidx = 0; 2906 2907 return (ret); 2908 } 2909 2910 /** 2911 * handle_rsp_cntrl_info - handles control information in a response 2912 * @qs: the queue set corresponding to the response 2913 * @flags: the response control flags 2914 * 2915 * Handles the control information of an SGE response, such as GTS 2916 * indications and completion credits for the queue set's Tx queues. 2917 * HW coalesces credits, we don't do any extra SW coalescing. 2918 */ 2919 static __inline void 2920 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2921 { 2922 unsigned int credits; 2923 2924 #if USE_GTS 2925 if (flags & F_RSPD_TXQ0_GTS) 2926 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2927 #endif 2928 credits = G_RSPD_TXQ0_CR(flags); 2929 if (credits) 2930 qs->txq[TXQ_ETH].processed += credits; 2931 2932 credits = G_RSPD_TXQ2_CR(flags); 2933 if (credits) 2934 qs->txq[TXQ_CTRL].processed += credits; 2935 2936 # if USE_GTS 2937 if (flags & F_RSPD_TXQ1_GTS) 2938 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2939 # endif 2940 credits = G_RSPD_TXQ1_CR(flags); 2941 if (credits) 2942 qs->txq[TXQ_OFLD].processed += credits; 2943 2944 } 2945 2946 static void 2947 check_ring_db(adapter_t *adap, struct sge_qset *qs, 2948 unsigned int sleeping) 2949 { 2950 ; 2951 } 2952 2953 /** 2954 * process_responses - process responses from an SGE response queue 2955 * @adap: the adapter 2956 * @qs: the queue set to which the response queue belongs 2957 * @budget: how many responses can be processed in this round 2958 * 2959 * Process responses from an SGE response queue up to the supplied budget. 2960 * Responses include received packets as well as credits and other events 2961 * for the queues that belong to the response queue's queue set. 2962 * A negative budget is effectively unlimited. 2963 * 2964 * Additionally choose the interrupt holdoff time for the next interrupt 2965 * on this queue. If the system is under memory shortage use a fairly 2966 * long delay to help recovery. 2967 */ 2968 static int 2969 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2970 { 2971 struct sge_rspq *rspq = &qs->rspq; 2972 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2973 int budget_left = budget; 2974 unsigned int sleeping = 0; 2975 #ifdef LRO_SUPPORTED 2976 int lro_enabled = qs->lro.enabled; 2977 int skip_lro; 2978 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; 2979 #endif 2980 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2981 int ngathered = 0; 2982 #ifdef DEBUG 2983 static int last_holdoff = 0; 2984 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2985 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2986 last_holdoff = rspq->holdoff_tmr; 2987 } 2988 #endif 2989 rspq->next_holdoff = rspq->holdoff_tmr; 2990 2991 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2992 int eth, eop = 0, ethpad = 0; 2993 uint32_t flags = ntohl(r->flags); 2994 uint32_t rss_csum = *(const uint32_t *)r; 2995 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); 2996 2997 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2998 2999 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 3000 struct mbuf *m; 3001 3002 if (cxgb_debug) 3003 printf("async notification\n"); 3004 3005 if (rspq->rspq_mh.mh_head == NULL) { 3006 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 3007 m = rspq->rspq_mh.mh_head; 3008 } else { 3009 m = m_gethdr(M_DONTWAIT, MT_DATA); 3010 } 3011 if (m == NULL) 3012 goto no_mem; 3013 3014 memcpy(mtod(m, char *), r, AN_PKT_SIZE); 3015 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; 3016 *mtod(m, char *) = CPL_ASYNC_NOTIF; 3017 rss_csum = htonl(CPL_ASYNC_NOTIF << 24); 3018 eop = 1; 3019 rspq->async_notif++; 3020 goto skip; 3021 } else if (flags & F_RSPD_IMM_DATA_VALID) { 3022 struct mbuf *m = NULL; 3023 3024 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", 3025 r->rss_hdr.opcode, rspq->cidx); 3026 if (rspq->rspq_mh.mh_head == NULL) 3027 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 3028 else 3029 m = m_gethdr(M_DONTWAIT, MT_DATA); 3030 3031 if (rspq->rspq_mh.mh_head == NULL && m == NULL) { 3032 no_mem: 3033 rspq->next_holdoff = NOMEM_INTR_DELAY; 3034 budget_left--; 3035 break; 3036 } 3037 get_imm_packet(adap, r, rspq->rspq_mh.mh_head); 3038 eop = 1; 3039 rspq->imm_data++; 3040 } else if (r->len_cq) { 3041 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 3042 3043 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r); 3044 if (eop) { 3045 rspq->rspq_mh.mh_head->m_flags |= M_FLOWID; 3046 rspq->rspq_mh.mh_head->m_pkthdr.flowid = rss_hash; 3047 } 3048 3049 ethpad = 2; 3050 } else { 3051 rspq->pure_rsps++; 3052 } 3053 skip: 3054 if (flags & RSPD_CTRL_MASK) { 3055 sleeping |= flags & RSPD_GTS_MASK; 3056 handle_rsp_cntrl_info(qs, flags); 3057 } 3058 3059 r++; 3060 if (__predict_false(++rspq->cidx == rspq->size)) { 3061 rspq->cidx = 0; 3062 rspq->gen ^= 1; 3063 r = rspq->desc; 3064 } 3065 3066 if (++rspq->credits >= (rspq->size / 4)) { 3067 refill_rspq(adap, rspq, rspq->credits); 3068 rspq->credits = 0; 3069 } 3070 if (!eth && eop) { 3071 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum; 3072 /* 3073 * XXX size mismatch 3074 */ 3075 m_set_priority(rspq->rspq_mh.mh_head, rss_hash); 3076 3077 3078 ngathered = rx_offload(&adap->tdev, rspq, 3079 rspq->rspq_mh.mh_head, offload_mbufs, ngathered); 3080 rspq->rspq_mh.mh_head = NULL; 3081 DPRINTF("received offload packet\n"); 3082 3083 } else if (eth && eop) { 3084 struct mbuf *m = rspq->rspq_mh.mh_head; 3085 3086 t3_rx_eth(adap, rspq, m, ethpad); 3087 3088 #ifdef LRO_SUPPORTED 3089 /* 3090 * The T304 sends incoming packets on any qset. If LRO 3091 * is also enabled, we could end up sending packet up 3092 * lro_ctrl->ifp's input. That is incorrect. 3093 * 3094 * The mbuf's rcvif was derived from the cpl header and 3095 * is accurate. Skip LRO and just use that. 3096 */ 3097 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); 3098 3099 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro && 3100 (tcp_lro_rx(lro_ctrl, m, 0) == 0)) { 3101 /* successfully queue'd for LRO */ 3102 } else 3103 #endif 3104 { 3105 /* 3106 * LRO not enabled, packet unsuitable for LRO, 3107 * or unable to queue. Pass it up right now in 3108 * either case. 3109 */ 3110 struct ifnet *ifp = m->m_pkthdr.rcvif; 3111 (*ifp->if_input)(ifp, m); 3112 } 3113 rspq->rspq_mh.mh_head = NULL; 3114 3115 } 3116 __refill_fl_lt(adap, &qs->fl[0], 32); 3117 __refill_fl_lt(adap, &qs->fl[1], 32); 3118 --budget_left; 3119 } 3120 3121 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 3122 3123 #ifdef LRO_SUPPORTED 3124 /* Flush LRO */ 3125 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) { 3126 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active); 3127 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next); 3128 tcp_lro_flush(lro_ctrl, queued); 3129 } 3130 #endif 3131 3132 if (sleeping) 3133 check_ring_db(adap, qs, sleeping); 3134 3135 mb(); /* commit Tx queue processed updates */ 3136 if (__predict_false(qs->txq_stopped > 1)) { 3137 printf("restarting tx on %p\n", qs); 3138 3139 restart_tx(qs); 3140 } 3141 3142 __refill_fl_lt(adap, &qs->fl[0], 512); 3143 __refill_fl_lt(adap, &qs->fl[1], 512); 3144 budget -= budget_left; 3145 return (budget); 3146 } 3147 3148 /* 3149 * A helper function that processes responses and issues GTS. 3150 */ 3151 static __inline int 3152 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 3153 { 3154 int work; 3155 static int last_holdoff = 0; 3156 3157 work = process_responses(adap, rspq_to_qset(rq), -1); 3158 3159 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 3160 printf("next_holdoff=%d\n", rq->next_holdoff); 3161 last_holdoff = rq->next_holdoff; 3162 } 3163 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 3164 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 3165 3166 return (work); 3167 } 3168 3169 3170 /* 3171 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 3172 * Handles data events from SGE response queues as well as error and other 3173 * async events as they all use the same interrupt pin. We use one SGE 3174 * response queue per port in this mode and protect all response queues with 3175 * queue 0's lock. 3176 */ 3177 void 3178 t3b_intr(void *data) 3179 { 3180 uint32_t i, map; 3181 adapter_t *adap = data; 3182 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3183 3184 t3_write_reg(adap, A_PL_CLI, 0); 3185 map = t3_read_reg(adap, A_SG_DATA_INTR); 3186 3187 if (!map) 3188 return; 3189 3190 if (__predict_false(map & F_ERRINTR)) 3191 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3192 3193 mtx_lock(&q0->lock); 3194 for_each_port(adap, i) 3195 if (map & (1 << i)) 3196 process_responses_gts(adap, &adap->sge.qs[i].rspq); 3197 mtx_unlock(&q0->lock); 3198 } 3199 3200 /* 3201 * The MSI interrupt handler. This needs to handle data events from SGE 3202 * response queues as well as error and other async events as they all use 3203 * the same MSI vector. We use one SGE response queue per port in this mode 3204 * and protect all response queues with queue 0's lock. 3205 */ 3206 void 3207 t3_intr_msi(void *data) 3208 { 3209 adapter_t *adap = data; 3210 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3211 int i, new_packets = 0; 3212 3213 mtx_lock(&q0->lock); 3214 3215 for_each_port(adap, i) 3216 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 3217 new_packets = 1; 3218 mtx_unlock(&q0->lock); 3219 if (new_packets == 0) 3220 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3221 } 3222 3223 void 3224 t3_intr_msix(void *data) 3225 { 3226 struct sge_qset *qs = data; 3227 adapter_t *adap = qs->port->adapter; 3228 struct sge_rspq *rspq = &qs->rspq; 3229 3230 if (process_responses_gts(adap, rspq) == 0) 3231 rspq->unhandled_irqs++; 3232 } 3233 3234 #define QDUMP_SBUF_SIZE 32 * 400 3235 static int 3236 t3_dump_rspq(SYSCTL_HANDLER_ARGS) 3237 { 3238 struct sge_rspq *rspq; 3239 struct sge_qset *qs; 3240 int i, err, dump_end, idx; 3241 static int multiplier = 1; 3242 struct sbuf *sb; 3243 struct rsp_desc *rspd; 3244 uint32_t data[4]; 3245 3246 rspq = arg1; 3247 qs = rspq_to_qset(rspq); 3248 if (rspq->rspq_dump_count == 0) 3249 return (0); 3250 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { 3251 log(LOG_WARNING, 3252 "dump count is too large %d\n", rspq->rspq_dump_count); 3253 rspq->rspq_dump_count = 0; 3254 return (EINVAL); 3255 } 3256 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { 3257 log(LOG_WARNING, 3258 "dump start of %d is greater than queue size\n", 3259 rspq->rspq_dump_start); 3260 rspq->rspq_dump_start = 0; 3261 return (EINVAL); 3262 } 3263 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); 3264 if (err) 3265 return (err); 3266 retry_sbufops: 3267 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3268 3269 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", 3270 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), 3271 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); 3272 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", 3273 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); 3274 3275 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, 3276 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); 3277 3278 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; 3279 for (i = rspq->rspq_dump_start; i < dump_end; i++) { 3280 idx = i & (RSPQ_Q_SIZE-1); 3281 3282 rspd = &rspq->desc[idx]; 3283 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", 3284 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, 3285 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); 3286 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", 3287 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), 3288 be32toh(rspd->len_cq), rspd->intr_gen); 3289 } 3290 if (sbuf_overflowed(sb)) { 3291 sbuf_delete(sb); 3292 multiplier++; 3293 goto retry_sbufops; 3294 } 3295 sbuf_finish(sb); 3296 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3297 sbuf_delete(sb); 3298 return (err); 3299 } 3300 3301 static int 3302 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) 3303 { 3304 struct sge_txq *txq; 3305 struct sge_qset *qs; 3306 int i, j, err, dump_end; 3307 static int multiplier = 1; 3308 struct sbuf *sb; 3309 struct tx_desc *txd; 3310 uint32_t *WR, wr_hi, wr_lo, gen; 3311 uint32_t data[4]; 3312 3313 txq = arg1; 3314 qs = txq_to_qset(txq, TXQ_ETH); 3315 if (txq->txq_dump_count == 0) { 3316 return (0); 3317 } 3318 if (txq->txq_dump_count > TX_ETH_Q_SIZE) { 3319 log(LOG_WARNING, 3320 "dump count is too large %d\n", txq->txq_dump_count); 3321 txq->txq_dump_count = 1; 3322 return (EINVAL); 3323 } 3324 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { 3325 log(LOG_WARNING, 3326 "dump start of %d is greater than queue size\n", 3327 txq->txq_dump_start); 3328 txq->txq_dump_start = 0; 3329 return (EINVAL); 3330 } 3331 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); 3332 if (err) 3333 return (err); 3334 3335 3336 retry_sbufops: 3337 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3338 3339 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", 3340 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 3341 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); 3342 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", 3343 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), 3344 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); 3345 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3346 txq->txq_dump_start, 3347 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); 3348 3349 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3350 for (i = txq->txq_dump_start; i < dump_end; i++) { 3351 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; 3352 WR = (uint32_t *)txd->flit; 3353 wr_hi = ntohl(WR[0]); 3354 wr_lo = ntohl(WR[1]); 3355 gen = G_WR_GEN(wr_lo); 3356 3357 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3358 wr_hi, wr_lo, gen); 3359 for (j = 2; j < 30; j += 4) 3360 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3361 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3362 3363 } 3364 if (sbuf_overflowed(sb)) { 3365 sbuf_delete(sb); 3366 multiplier++; 3367 goto retry_sbufops; 3368 } 3369 sbuf_finish(sb); 3370 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3371 sbuf_delete(sb); 3372 return (err); 3373 } 3374 3375 static int 3376 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) 3377 { 3378 struct sge_txq *txq; 3379 struct sge_qset *qs; 3380 int i, j, err, dump_end; 3381 static int multiplier = 1; 3382 struct sbuf *sb; 3383 struct tx_desc *txd; 3384 uint32_t *WR, wr_hi, wr_lo, gen; 3385 3386 txq = arg1; 3387 qs = txq_to_qset(txq, TXQ_CTRL); 3388 if (txq->txq_dump_count == 0) { 3389 return (0); 3390 } 3391 if (txq->txq_dump_count > 256) { 3392 log(LOG_WARNING, 3393 "dump count is too large %d\n", txq->txq_dump_count); 3394 txq->txq_dump_count = 1; 3395 return (EINVAL); 3396 } 3397 if (txq->txq_dump_start > 255) { 3398 log(LOG_WARNING, 3399 "dump start of %d is greater than queue size\n", 3400 txq->txq_dump_start); 3401 txq->txq_dump_start = 0; 3402 return (EINVAL); 3403 } 3404 3405 retry_sbufops: 3406 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3407 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3408 txq->txq_dump_start, 3409 (txq->txq_dump_start + txq->txq_dump_count) & 255); 3410 3411 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3412 for (i = txq->txq_dump_start; i < dump_end; i++) { 3413 txd = &txq->desc[i & (255)]; 3414 WR = (uint32_t *)txd->flit; 3415 wr_hi = ntohl(WR[0]); 3416 wr_lo = ntohl(WR[1]); 3417 gen = G_WR_GEN(wr_lo); 3418 3419 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3420 wr_hi, wr_lo, gen); 3421 for (j = 2; j < 30; j += 4) 3422 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3423 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3424 3425 } 3426 if (sbuf_overflowed(sb)) { 3427 sbuf_delete(sb); 3428 multiplier++; 3429 goto retry_sbufops; 3430 } 3431 sbuf_finish(sb); 3432 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3433 sbuf_delete(sb); 3434 return (err); 3435 } 3436 3437 static int 3438 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) 3439 { 3440 adapter_t *sc = arg1; 3441 struct qset_params *qsp = &sc->params.sge.qset[0]; 3442 int coalesce_usecs; 3443 struct sge_qset *qs; 3444 int i, j, err, nqsets = 0; 3445 struct mtx *lock; 3446 3447 if ((sc->flags & FULL_INIT_DONE) == 0) 3448 return (ENXIO); 3449 3450 coalesce_usecs = qsp->coalesce_usecs; 3451 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); 3452 3453 if (err != 0) { 3454 return (err); 3455 } 3456 if (coalesce_usecs == qsp->coalesce_usecs) 3457 return (0); 3458 3459 for (i = 0; i < sc->params.nports; i++) 3460 for (j = 0; j < sc->port[i].nqsets; j++) 3461 nqsets++; 3462 3463 coalesce_usecs = max(1, coalesce_usecs); 3464 3465 for (i = 0; i < nqsets; i++) { 3466 qs = &sc->sge.qs[i]; 3467 qsp = &sc->params.sge.qset[i]; 3468 qsp->coalesce_usecs = coalesce_usecs; 3469 3470 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 3471 &sc->sge.qs[0].rspq.lock; 3472 3473 mtx_lock(lock); 3474 t3_update_qset_coalesce(qs, qsp); 3475 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 3476 V_NEWTIMER(qs->rspq.holdoff_tmr)); 3477 mtx_unlock(lock); 3478 } 3479 3480 return (0); 3481 } 3482 3483 3484 void 3485 t3_add_attach_sysctls(adapter_t *sc) 3486 { 3487 struct sysctl_ctx_list *ctx; 3488 struct sysctl_oid_list *children; 3489 3490 ctx = device_get_sysctl_ctx(sc->dev); 3491 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3492 3493 /* random information */ 3494 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3495 "firmware_version", 3496 CTLFLAG_RD, &sc->fw_version, 3497 0, "firmware version"); 3498 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3499 "hw_revision", 3500 CTLFLAG_RD, &sc->params.rev, 3501 0, "chip model"); 3502 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3503 "port_types", 3504 CTLFLAG_RD, &sc->port_types, 3505 0, "type of ports"); 3506 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3507 "enable_debug", 3508 CTLFLAG_RW, &cxgb_debug, 3509 0, "enable verbose debugging output"); 3510 SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce", 3511 CTLFLAG_RD, &sc->tunq_coalesce, 3512 "#tunneled packets freed"); 3513 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3514 "txq_overrun", 3515 CTLFLAG_RD, &txq_fills, 3516 0, "#times txq overrun"); 3517 } 3518 3519 3520 static const char *rspq_name = "rspq"; 3521 static const char *txq_names[] = 3522 { 3523 "txq_eth", 3524 "txq_ofld", 3525 "txq_ctrl" 3526 }; 3527 3528 static int 3529 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) 3530 { 3531 struct port_info *p = arg1; 3532 uint64_t *parg; 3533 3534 if (!p) 3535 return (EINVAL); 3536 3537 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); 3538 PORT_LOCK(p); 3539 t3_mac_update_stats(&p->mac); 3540 PORT_UNLOCK(p); 3541 3542 return (sysctl_handle_quad(oidp, parg, 0, req)); 3543 } 3544 3545 void 3546 t3_add_configured_sysctls(adapter_t *sc) 3547 { 3548 struct sysctl_ctx_list *ctx; 3549 struct sysctl_oid_list *children; 3550 int i, j; 3551 3552 ctx = device_get_sysctl_ctx(sc->dev); 3553 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3554 3555 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3556 "intr_coal", 3557 CTLTYPE_INT|CTLFLAG_RW, sc, 3558 0, t3_set_coalesce_usecs, 3559 "I", "interrupt coalescing timer (us)"); 3560 3561 for (i = 0; i < sc->params.nports; i++) { 3562 struct port_info *pi = &sc->port[i]; 3563 struct sysctl_oid *poid; 3564 struct sysctl_oid_list *poidlist; 3565 struct mac_stats *mstats = &pi->mac.stats; 3566 3567 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); 3568 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 3569 pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); 3570 poidlist = SYSCTL_CHILDREN(poid); 3571 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO, 3572 "nqsets", CTLFLAG_RD, &pi->nqsets, 3573 0, "#queue sets"); 3574 3575 for (j = 0; j < pi->nqsets; j++) { 3576 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; 3577 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, 3578 *ctrlqpoid, *lropoid; 3579 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, 3580 *txqpoidlist, *ctrlqpoidlist, 3581 *lropoidlist; 3582 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 3583 3584 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); 3585 3586 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 3587 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); 3588 qspoidlist = SYSCTL_CHILDREN(qspoid); 3589 3590 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty", 3591 CTLFLAG_RD, &qs->fl[0].empty, 0, 3592 "freelist #0 empty"); 3593 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty", 3594 CTLFLAG_RD, &qs->fl[1].empty, 0, 3595 "freelist #1 empty"); 3596 3597 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3598 rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); 3599 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); 3600 3601 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3602 txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); 3603 txqpoidlist = SYSCTL_CHILDREN(txqpoid); 3604 3605 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3606 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics"); 3607 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); 3608 3609 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3610 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics"); 3611 lropoidlist = SYSCTL_CHILDREN(lropoid); 3612 3613 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", 3614 CTLFLAG_RD, &qs->rspq.size, 3615 0, "#entries in response queue"); 3616 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", 3617 CTLFLAG_RD, &qs->rspq.cidx, 3618 0, "consumer index"); 3619 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", 3620 CTLFLAG_RD, &qs->rspq.credits, 3621 0, "#credits"); 3622 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr", 3623 CTLFLAG_RD, &qs->rspq.phys_addr, 3624 "physical_address_of the queue"); 3625 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", 3626 CTLFLAG_RW, &qs->rspq.rspq_dump_start, 3627 0, "start rspq dump entry"); 3628 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", 3629 CTLFLAG_RW, &qs->rspq.rspq_dump_count, 3630 0, "#rspq entries to dump"); 3631 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", 3632 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 3633 0, t3_dump_rspq, "A", "dump of the response queue"); 3634 3635 3636 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped", 3637 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops, 3638 0, "#tunneled packets dropped"); 3639 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen", 3640 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen, 3641 0, "#tunneled packets waiting to be sent"); 3642 #if 0 3643 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", 3644 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 3645 0, "#tunneled packets queue producer index"); 3646 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", 3647 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 3648 0, "#tunneled packets queue consumer index"); 3649 #endif 3650 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed", 3651 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 3652 0, "#tunneled packets processed by the card"); 3653 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", 3654 CTLFLAG_RD, &txq->cleaned, 3655 0, "#tunneled packets cleaned"); 3656 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", 3657 CTLFLAG_RD, &txq->in_use, 3658 0, "#tunneled packet slots in use"); 3659 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees", 3660 CTLFLAG_RD, &txq->txq_frees, 3661 "#tunneled packets freed"); 3662 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", 3663 CTLFLAG_RD, &txq->txq_skipped, 3664 0, "#tunneled packet descriptors skipped"); 3665 SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced", 3666 CTLFLAG_RD, &txq->txq_coalesced, 3667 "#tunneled packets coalesced"); 3668 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", 3669 CTLFLAG_RD, &txq->txq_enqueued, 3670 0, "#tunneled packets enqueued to hardware"); 3671 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", 3672 CTLFLAG_RD, &qs->txq_stopped, 3673 0, "tx queues stopped"); 3674 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr", 3675 CTLFLAG_RD, &txq->phys_addr, 3676 "physical_address_of the queue"); 3677 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", 3678 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 3679 0, "txq generation"); 3680 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", 3681 CTLFLAG_RD, &txq->cidx, 3682 0, "hardware queue cidx"); 3683 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", 3684 CTLFLAG_RD, &txq->pidx, 3685 0, "hardware queue pidx"); 3686 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", 3687 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 3688 0, "txq start idx for dump"); 3689 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", 3690 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 3691 0, "txq #entries to dump"); 3692 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", 3693 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 3694 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); 3695 3696 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", 3697 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 3698 0, "ctrlq start idx for dump"); 3699 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", 3700 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 3701 0, "ctrl #entries to dump"); 3702 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", 3703 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], 3704 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); 3705 3706 #ifdef LRO_SUPPORTED 3707 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued", 3708 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); 3709 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed", 3710 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); 3711 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", 3712 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); 3713 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", 3714 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); 3715 #endif 3716 } 3717 3718 /* Now add a node for mac stats. */ 3719 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", 3720 CTLFLAG_RD, NULL, "MAC statistics"); 3721 poidlist = SYSCTL_CHILDREN(poid); 3722 3723 /* 3724 * We (ab)use the length argument (arg2) to pass on the offset 3725 * of the data that we are interested in. This is only required 3726 * for the quad counters that are updated from the hardware (we 3727 * make sure that we return the latest value). 3728 * sysctl_handle_macstat first updates *all* the counters from 3729 * the hardware, and then returns the latest value of the 3730 * requested counter. Best would be to update only the 3731 * requested counter from hardware, but t3_mac_update_stats() 3732 * hides all the register details and we don't want to dive into 3733 * all that here. 3734 */ 3735 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ 3736 (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \ 3737 sysctl_handle_macstat, "QU", 0) 3738 CXGB_SYSCTL_ADD_QUAD(tx_octets); 3739 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); 3740 CXGB_SYSCTL_ADD_QUAD(tx_frames); 3741 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); 3742 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); 3743 CXGB_SYSCTL_ADD_QUAD(tx_pause); 3744 CXGB_SYSCTL_ADD_QUAD(tx_deferred); 3745 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); 3746 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); 3747 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); 3748 CXGB_SYSCTL_ADD_QUAD(tx_underrun); 3749 CXGB_SYSCTL_ADD_QUAD(tx_len_errs); 3750 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); 3751 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); 3752 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); 3753 CXGB_SYSCTL_ADD_QUAD(tx_frames_64); 3754 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); 3755 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); 3756 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); 3757 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); 3758 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); 3759 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); 3760 CXGB_SYSCTL_ADD_QUAD(rx_octets); 3761 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); 3762 CXGB_SYSCTL_ADD_QUAD(rx_frames); 3763 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); 3764 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); 3765 CXGB_SYSCTL_ADD_QUAD(rx_pause); 3766 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs); 3767 CXGB_SYSCTL_ADD_QUAD(rx_align_errs); 3768 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); 3769 CXGB_SYSCTL_ADD_QUAD(rx_data_errs); 3770 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); 3771 CXGB_SYSCTL_ADD_QUAD(rx_runt); 3772 CXGB_SYSCTL_ADD_QUAD(rx_jabber); 3773 CXGB_SYSCTL_ADD_QUAD(rx_short); 3774 CXGB_SYSCTL_ADD_QUAD(rx_too_long); 3775 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); 3776 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); 3777 CXGB_SYSCTL_ADD_QUAD(rx_frames_64); 3778 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); 3779 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); 3780 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); 3781 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); 3782 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); 3783 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); 3784 #undef CXGB_SYSCTL_ADD_QUAD 3785 3786 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ 3787 CTLFLAG_RD, &mstats->a, 0) 3788 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); 3789 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); 3790 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); 3791 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); 3792 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); 3793 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); 3794 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); 3795 CXGB_SYSCTL_ADD_ULONG(num_toggled); 3796 CXGB_SYSCTL_ADD_ULONG(num_resets); 3797 CXGB_SYSCTL_ADD_ULONG(link_faults); 3798 #undef CXGB_SYSCTL_ADD_ULONG 3799 } 3800 } 3801 3802 /** 3803 * t3_get_desc - dump an SGE descriptor for debugging purposes 3804 * @qs: the queue set 3805 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 3806 * @idx: the descriptor index in the queue 3807 * @data: where to dump the descriptor contents 3808 * 3809 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 3810 * size of the descriptor. 3811 */ 3812 int 3813 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 3814 unsigned char *data) 3815 { 3816 if (qnum >= 6) 3817 return (EINVAL); 3818 3819 if (qnum < 3) { 3820 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 3821 return -EINVAL; 3822 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 3823 return sizeof(struct tx_desc); 3824 } 3825 3826 if (qnum == 3) { 3827 if (!qs->rspq.desc || idx >= qs->rspq.size) 3828 return (EINVAL); 3829 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 3830 return sizeof(struct rsp_desc); 3831 } 3832 3833 qnum -= 4; 3834 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 3835 return (EINVAL); 3836 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 3837 return sizeof(struct rx_desc); 3838 } 3839