1 /************************************************************************** 2 3 Copyright (c) 2007-2009, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/module.h> 37 #include <sys/bus.h> 38 #include <sys/conf.h> 39 #include <machine/bus.h> 40 #include <machine/resource.h> 41 #include <sys/bus_dma.h> 42 #include <sys/rman.h> 43 #include <sys/queue.h> 44 #include <sys/sysctl.h> 45 #include <sys/taskqueue.h> 46 47 #include <sys/proc.h> 48 #include <sys/sbuf.h> 49 #include <sys/sched.h> 50 #include <sys/smp.h> 51 #include <sys/systm.h> 52 #include <sys/syslog.h> 53 54 #include <net/bpf.h> 55 56 #include <netinet/in_systm.h> 57 #include <netinet/in.h> 58 #include <netinet/ip.h> 59 #include <netinet/tcp.h> 60 61 #include <dev/pci/pcireg.h> 62 #include <dev/pci/pcivar.h> 63 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 67 #include <cxgb_include.h> 68 #include <sys/mvec.h> 69 70 int txq_fills = 0; 71 int multiq_tx_enable = 1; 72 73 extern struct sysctl_oid_list sysctl__hw_cxgb_children; 74 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; 75 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size); 76 SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, 77 "size of per-queue mbuf ring"); 78 79 static int cxgb_tx_coalesce_force = 0; 80 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force); 81 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW, 82 &cxgb_tx_coalesce_force, 0, 83 "coalesce small packets into a single work request regardless of ring state"); 84 85 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1 86 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3)) 87 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2 88 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5 89 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5 90 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2 91 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6 92 93 94 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT; 95 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start", 96 &cxgb_tx_coalesce_enable_start); 97 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW, 98 &cxgb_tx_coalesce_enable_start, 0, 99 "coalesce enable threshold"); 100 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT; 101 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop); 102 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW, 103 &cxgb_tx_coalesce_enable_stop, 0, 104 "coalesce disable threshold"); 105 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 106 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold); 107 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW, 108 &cxgb_tx_reclaim_threshold, 0, 109 "tx cleaning minimum threshold"); 110 111 /* 112 * XXX don't re-enable this until TOE stops assuming 113 * we have an m_ext 114 */ 115 static int recycle_enable = 0; 116 int cxgb_ext_freed = 0; 117 int cxgb_ext_inited = 0; 118 int fl_q_size = 0; 119 int jumbo_q_size = 0; 120 121 extern int cxgb_use_16k_clusters; 122 extern int nmbjumbo4; 123 extern int nmbjumbo9; 124 extern int nmbjumbo16; 125 126 #define USE_GTS 0 127 128 #define SGE_RX_SM_BUF_SIZE 1536 129 #define SGE_RX_DROP_THRES 16 130 #define SGE_RX_COPY_THRES 128 131 132 /* 133 * Period of the Tx buffer reclaim timer. This timer does not need to run 134 * frequently as Tx buffers are usually reclaimed by new Tx packets. 135 */ 136 #define TX_RECLAIM_PERIOD (hz >> 1) 137 138 /* 139 * Values for sge_txq.flags 140 */ 141 enum { 142 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 143 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 144 }; 145 146 struct tx_desc { 147 uint64_t flit[TX_DESC_FLITS]; 148 } __packed; 149 150 struct rx_desc { 151 uint32_t addr_lo; 152 uint32_t len_gen; 153 uint32_t gen2; 154 uint32_t addr_hi; 155 } __packed; 156 157 struct rsp_desc { /* response queue descriptor */ 158 struct rss_header rss_hdr; 159 uint32_t flags; 160 uint32_t len_cq; 161 uint8_t imm_data[47]; 162 uint8_t intr_gen; 163 } __packed; 164 165 #define RX_SW_DESC_MAP_CREATED (1 << 0) 166 #define TX_SW_DESC_MAP_CREATED (1 << 1) 167 #define RX_SW_DESC_INUSE (1 << 3) 168 #define TX_SW_DESC_MAPPED (1 << 4) 169 170 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 171 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 172 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 173 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 174 175 struct tx_sw_desc { /* SW state per Tx descriptor */ 176 struct mbuf *m; 177 bus_dmamap_t map; 178 int flags; 179 }; 180 181 struct rx_sw_desc { /* SW state per Rx descriptor */ 182 caddr_t rxsd_cl; 183 struct mbuf *m; 184 bus_dmamap_t map; 185 int flags; 186 }; 187 188 struct txq_state { 189 unsigned int compl; 190 unsigned int gen; 191 unsigned int pidx; 192 }; 193 194 struct refill_fl_cb_arg { 195 int error; 196 bus_dma_segment_t seg; 197 int nseg; 198 }; 199 200 201 /* 202 * Maps a number of flits to the number of Tx descriptors that can hold them. 203 * The formula is 204 * 205 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 206 * 207 * HW allows up to 4 descriptors to be combined into a WR. 208 */ 209 static uint8_t flit_desc_map[] = { 210 0, 211 #if SGE_NUM_GENBITS == 1 212 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 213 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 214 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 215 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 216 #elif SGE_NUM_GENBITS == 2 217 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 218 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 219 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 220 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 221 #else 222 # error "SGE_NUM_GENBITS must be 1 or 2" 223 #endif 224 }; 225 226 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED) 227 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock) 228 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock) 229 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock) 230 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 231 #define TXQ_RING_NEEDS_ENQUEUE(qs) \ 232 drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 233 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 234 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \ 235 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg) 236 #define TXQ_RING_DEQUEUE(qs) \ 237 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 238 239 int cxgb_debug = 0; 240 241 static void sge_timer_cb(void *arg); 242 static void sge_timer_reclaim(void *arg, int ncount); 243 static void sge_txq_reclaim_handler(void *arg, int ncount); 244 static void cxgb_start_locked(struct sge_qset *qs); 245 246 /* 247 * XXX need to cope with bursty scheduling by looking at a wider 248 * window than we are now for determining the need for coalescing 249 * 250 */ 251 static __inline uint64_t 252 check_pkt_coalesce(struct sge_qset *qs) 253 { 254 struct adapter *sc; 255 struct sge_txq *txq; 256 uint8_t *fill; 257 258 if (__predict_false(cxgb_tx_coalesce_force)) 259 return (1); 260 txq = &qs->txq[TXQ_ETH]; 261 sc = qs->port->adapter; 262 fill = &sc->tunq_fill[qs->idx]; 263 264 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX) 265 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX; 266 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN) 267 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN; 268 /* 269 * if the hardware transmit queue is more than 1/8 full 270 * we mark it as coalescing - we drop back from coalescing 271 * when we go below 1/32 full and there are no packets enqueued, 272 * this provides us with some degree of hysteresis 273 */ 274 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 275 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0)) 276 *fill = 0; 277 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) 278 *fill = 1; 279 280 return (sc->tunq_coalesce); 281 } 282 283 #ifdef __LP64__ 284 static void 285 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 286 { 287 uint64_t wr_hilo; 288 #if _BYTE_ORDER == _LITTLE_ENDIAN 289 wr_hilo = wr_hi; 290 wr_hilo |= (((uint64_t)wr_lo)<<32); 291 #else 292 wr_hilo = wr_lo; 293 wr_hilo |= (((uint64_t)wr_hi)<<32); 294 #endif 295 wrp->wrh_hilo = wr_hilo; 296 } 297 #else 298 static void 299 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 300 { 301 302 wrp->wrh_hi = wr_hi; 303 wmb(); 304 wrp->wrh_lo = wr_lo; 305 } 306 #endif 307 308 struct coalesce_info { 309 int count; 310 int nbytes; 311 }; 312 313 static int 314 coalesce_check(struct mbuf *m, void *arg) 315 { 316 struct coalesce_info *ci = arg; 317 int *count = &ci->count; 318 int *nbytes = &ci->nbytes; 319 320 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) && 321 (*count < 7) && (m->m_next == NULL))) { 322 *count += 1; 323 *nbytes += m->m_len; 324 return (1); 325 } 326 return (0); 327 } 328 329 static struct mbuf * 330 cxgb_dequeue(struct sge_qset *qs) 331 { 332 struct mbuf *m, *m_head, *m_tail; 333 struct coalesce_info ci; 334 335 336 if (check_pkt_coalesce(qs) == 0) 337 return TXQ_RING_DEQUEUE(qs); 338 339 m_head = m_tail = NULL; 340 ci.count = ci.nbytes = 0; 341 do { 342 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci); 343 if (m_head == NULL) { 344 m_tail = m_head = m; 345 } else if (m != NULL) { 346 m_tail->m_nextpkt = m; 347 m_tail = m; 348 } 349 } while (m != NULL); 350 if (ci.count > 7) 351 panic("trying to coalesce %d packets in to one WR", ci.count); 352 return (m_head); 353 } 354 355 /** 356 * reclaim_completed_tx - reclaims completed Tx descriptors 357 * @adapter: the adapter 358 * @q: the Tx queue to reclaim completed descriptors from 359 * 360 * Reclaims Tx descriptors that the SGE has indicated it has processed, 361 * and frees the associated buffers if possible. Called with the Tx 362 * queue's lock held. 363 */ 364 static __inline int 365 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) 366 { 367 struct sge_txq *q = &qs->txq[queue]; 368 int reclaim = desc_reclaimable(q); 369 370 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) || 371 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN)) 372 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 373 374 if (reclaim < reclaim_min) 375 return (0); 376 377 mtx_assert(&qs->lock, MA_OWNED); 378 if (reclaim > 0) { 379 t3_free_tx_desc(qs, reclaim, queue); 380 q->cleaned += reclaim; 381 q->in_use -= reclaim; 382 } 383 if (isset(&qs->txq_stopped, TXQ_ETH)) 384 clrbit(&qs->txq_stopped, TXQ_ETH); 385 386 return (reclaim); 387 } 388 389 /** 390 * should_restart_tx - are there enough resources to restart a Tx queue? 391 * @q: the Tx queue 392 * 393 * Checks if there are enough descriptors to restart a suspended Tx queue. 394 */ 395 static __inline int 396 should_restart_tx(const struct sge_txq *q) 397 { 398 unsigned int r = q->processed - q->cleaned; 399 400 return q->in_use - r < (q->size >> 1); 401 } 402 403 /** 404 * t3_sge_init - initialize SGE 405 * @adap: the adapter 406 * @p: the SGE parameters 407 * 408 * Performs SGE initialization needed every time after a chip reset. 409 * We do not initialize any of the queue sets here, instead the driver 410 * top-level must request those individually. We also do not enable DMA 411 * here, that should be done after the queues have been set up. 412 */ 413 void 414 t3_sge_init(adapter_t *adap, struct sge_params *p) 415 { 416 u_int ctrl, ups; 417 418 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 419 420 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 421 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | 422 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 423 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 424 #if SGE_NUM_GENBITS == 1 425 ctrl |= F_EGRGENCTRL; 426 #endif 427 if (adap->params.rev > 0) { 428 if (!(adap->flags & (USING_MSIX | USING_MSI))) 429 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 430 } 431 t3_write_reg(adap, A_SG_CONTROL, ctrl); 432 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 433 V_LORCQDRBTHRSH(512)); 434 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 435 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 436 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 437 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 438 adap->params.rev < T3_REV_C ? 1000 : 500); 439 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 440 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 441 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 442 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 443 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 444 } 445 446 447 /** 448 * sgl_len - calculates the size of an SGL of the given capacity 449 * @n: the number of SGL entries 450 * 451 * Calculates the number of flits needed for a scatter/gather list that 452 * can hold the given number of entries. 453 */ 454 static __inline unsigned int 455 sgl_len(unsigned int n) 456 { 457 return ((3 * n) / 2 + (n & 1)); 458 } 459 460 /** 461 * get_imm_packet - return the next ingress packet buffer from a response 462 * @resp: the response descriptor containing the packet data 463 * 464 * Return a packet containing the immediate data of the given response. 465 */ 466 static int 467 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) 468 { 469 470 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE; 471 m->m_ext.ext_buf = NULL; 472 m->m_ext.ext_type = 0; 473 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 474 return (0); 475 } 476 477 static __inline u_int 478 flits_to_desc(u_int n) 479 { 480 return (flit_desc_map[n]); 481 } 482 483 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ 484 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ 485 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ 486 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ 487 F_HIRCQPARITYERROR) 488 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) 489 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ 490 F_RSPQDISABLED) 491 492 /** 493 * t3_sge_err_intr_handler - SGE async event interrupt handler 494 * @adapter: the adapter 495 * 496 * Interrupt handler for SGE asynchronous (non-data) events. 497 */ 498 void 499 t3_sge_err_intr_handler(adapter_t *adapter) 500 { 501 unsigned int v, status; 502 503 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 504 if (status & SGE_PARERR) 505 CH_ALERT(adapter, "SGE parity error (0x%x)\n", 506 status & SGE_PARERR); 507 if (status & SGE_FRAMINGERR) 508 CH_ALERT(adapter, "SGE framing error (0x%x)\n", 509 status & SGE_FRAMINGERR); 510 if (status & F_RSPQCREDITOVERFOW) 511 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 512 513 if (status & F_RSPQDISABLED) { 514 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 515 516 CH_ALERT(adapter, 517 "packet delivered to disabled response queue (0x%x)\n", 518 (v >> S_RSPQ0DISABLED) & 0xff); 519 } 520 521 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 522 if (status & SGE_FATALERR) 523 t3_fatal_err(adapter); 524 } 525 526 void 527 t3_sge_prep(adapter_t *adap, struct sge_params *p) 528 { 529 int i, nqsets; 530 531 nqsets = min(SGE_QSETS, mp_ncpus*4); 532 533 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); 534 535 while (!powerof2(fl_q_size)) 536 fl_q_size--; 537 #if __FreeBSD_version >= 700111 538 if (cxgb_use_16k_clusters) 539 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); 540 else 541 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); 542 #else 543 jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE); 544 #endif 545 while (!powerof2(jumbo_q_size)) 546 jumbo_q_size--; 547 548 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2)) 549 device_printf(adap->dev, 550 "Insufficient clusters and/or jumbo buffers.\n"); 551 552 /* XXX Does ETHER_ALIGN need to be accounted for here? */ 553 p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data); 554 555 for (i = 0; i < SGE_QSETS; ++i) { 556 struct qset_params *q = p->qset + i; 557 558 if (adap->params.nports > 2) { 559 q->coalesce_usecs = 50; 560 } else { 561 #ifdef INVARIANTS 562 q->coalesce_usecs = 10; 563 #else 564 q->coalesce_usecs = 5; 565 #endif 566 } 567 q->polling = 0; 568 q->rspq_size = RSPQ_Q_SIZE; 569 q->fl_size = fl_q_size; 570 q->jumbo_size = jumbo_q_size; 571 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 572 q->txq_size[TXQ_OFLD] = 1024; 573 q->txq_size[TXQ_CTRL] = 256; 574 q->cong_thres = 0; 575 } 576 } 577 578 int 579 t3_sge_alloc(adapter_t *sc) 580 { 581 582 /* The parent tag. */ 583 if (bus_dma_tag_create( NULL, /* parent */ 584 1, 0, /* algnmnt, boundary */ 585 BUS_SPACE_MAXADDR, /* lowaddr */ 586 BUS_SPACE_MAXADDR, /* highaddr */ 587 NULL, NULL, /* filter, filterarg */ 588 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 589 BUS_SPACE_UNRESTRICTED, /* nsegments */ 590 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 591 0, /* flags */ 592 NULL, NULL, /* lock, lockarg */ 593 &sc->parent_dmat)) { 594 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 595 return (ENOMEM); 596 } 597 598 /* 599 * DMA tag for normal sized RX frames 600 */ 601 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 602 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 603 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 604 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 605 return (ENOMEM); 606 } 607 608 /* 609 * DMA tag for jumbo sized RX frames. 610 */ 611 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, 612 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, 613 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 614 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 615 return (ENOMEM); 616 } 617 618 /* 619 * DMA tag for TX frames. 620 */ 621 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 622 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 623 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 624 NULL, NULL, &sc->tx_dmat)) { 625 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 626 return (ENOMEM); 627 } 628 629 return (0); 630 } 631 632 int 633 t3_sge_free(struct adapter * sc) 634 { 635 636 if (sc->tx_dmat != NULL) 637 bus_dma_tag_destroy(sc->tx_dmat); 638 639 if (sc->rx_jumbo_dmat != NULL) 640 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 641 642 if (sc->rx_dmat != NULL) 643 bus_dma_tag_destroy(sc->rx_dmat); 644 645 if (sc->parent_dmat != NULL) 646 bus_dma_tag_destroy(sc->parent_dmat); 647 648 return (0); 649 } 650 651 void 652 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 653 { 654 655 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); 656 qs->rspq.polling = 0 /* p->polling */; 657 } 658 659 #if !defined(__i386__) && !defined(__amd64__) 660 static void 661 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 662 { 663 struct refill_fl_cb_arg *cb_arg = arg; 664 665 cb_arg->error = error; 666 cb_arg->seg = segs[0]; 667 cb_arg->nseg = nseg; 668 669 } 670 #endif 671 /** 672 * refill_fl - refill an SGE free-buffer list 673 * @sc: the controller softc 674 * @q: the free-list to refill 675 * @n: the number of new buffers to allocate 676 * 677 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 678 * The caller must assure that @n does not exceed the queue's capacity. 679 */ 680 static void 681 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 682 { 683 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 684 struct rx_desc *d = &q->desc[q->pidx]; 685 struct refill_fl_cb_arg cb_arg; 686 struct mbuf *m; 687 caddr_t cl; 688 int err, count = 0; 689 690 cb_arg.error = 0; 691 while (n--) { 692 /* 693 * We only allocate a cluster, mbuf allocation happens after rx 694 */ 695 if (q->zone == zone_pack) { 696 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL) 697 break; 698 cl = m->m_ext.ext_buf; 699 } else { 700 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL) 701 break; 702 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { 703 uma_zfree(q->zone, cl); 704 break; 705 } 706 } 707 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 708 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 709 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 710 uma_zfree(q->zone, cl); 711 goto done; 712 } 713 sd->flags |= RX_SW_DESC_MAP_CREATED; 714 } 715 #if !defined(__i386__) && !defined(__amd64__) 716 err = bus_dmamap_load(q->entry_tag, sd->map, 717 cl, q->buf_size, refill_fl_cb, &cb_arg, 0); 718 719 if (err != 0 || cb_arg.error) { 720 if (q->zone == zone_pack) 721 uma_zfree(q->zone, cl); 722 m_free(m); 723 goto done; 724 } 725 #else 726 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl); 727 #endif 728 sd->flags |= RX_SW_DESC_INUSE; 729 sd->rxsd_cl = cl; 730 sd->m = m; 731 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 732 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 733 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 734 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 735 736 d++; 737 sd++; 738 739 if (++q->pidx == q->size) { 740 q->pidx = 0; 741 q->gen ^= 1; 742 sd = q->sdesc; 743 d = q->desc; 744 } 745 q->credits++; 746 count++; 747 } 748 749 done: 750 if (count) 751 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 752 } 753 754 755 /** 756 * free_rx_bufs - free the Rx buffers on an SGE free list 757 * @sc: the controle softc 758 * @q: the SGE free list to clean up 759 * 760 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 761 * this queue should be stopped before calling this function. 762 */ 763 static void 764 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 765 { 766 u_int cidx = q->cidx; 767 768 while (q->credits--) { 769 struct rx_sw_desc *d = &q->sdesc[cidx]; 770 771 if (d->flags & RX_SW_DESC_INUSE) { 772 bus_dmamap_unload(q->entry_tag, d->map); 773 bus_dmamap_destroy(q->entry_tag, d->map); 774 if (q->zone == zone_pack) { 775 m_init(d->m, zone_pack, MCLBYTES, 776 M_NOWAIT, MT_DATA, M_EXT); 777 uma_zfree(zone_pack, d->m); 778 } else { 779 m_init(d->m, zone_mbuf, MLEN, 780 M_NOWAIT, MT_DATA, 0); 781 uma_zfree(zone_mbuf, d->m); 782 uma_zfree(q->zone, d->rxsd_cl); 783 } 784 } 785 786 d->rxsd_cl = NULL; 787 d->m = NULL; 788 if (++cidx == q->size) 789 cidx = 0; 790 } 791 } 792 793 static __inline void 794 __refill_fl(adapter_t *adap, struct sge_fl *fl) 795 { 796 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 797 } 798 799 static __inline void 800 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) 801 { 802 if ((fl->size - fl->credits) < max) 803 refill_fl(adap, fl, min(max, fl->size - fl->credits)); 804 } 805 806 /** 807 * recycle_rx_buf - recycle a receive buffer 808 * @adapter: the adapter 809 * @q: the SGE free list 810 * @idx: index of buffer to recycle 811 * 812 * Recycles the specified buffer on the given free list by adding it at 813 * the next available slot on the list. 814 */ 815 static void 816 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 817 { 818 struct rx_desc *from = &q->desc[idx]; 819 struct rx_desc *to = &q->desc[q->pidx]; 820 821 q->sdesc[q->pidx] = q->sdesc[idx]; 822 to->addr_lo = from->addr_lo; // already big endian 823 to->addr_hi = from->addr_hi; // likewise 824 wmb(); /* necessary ? */ 825 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 826 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 827 q->credits++; 828 829 if (++q->pidx == q->size) { 830 q->pidx = 0; 831 q->gen ^= 1; 832 } 833 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 834 } 835 836 static void 837 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 838 { 839 uint32_t *addr; 840 841 addr = arg; 842 *addr = segs[0].ds_addr; 843 } 844 845 static int 846 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 847 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 848 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 849 { 850 size_t len = nelem * elem_size; 851 void *s = NULL; 852 void *p = NULL; 853 int err; 854 855 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 856 BUS_SPACE_MAXADDR_32BIT, 857 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 858 len, 0, NULL, NULL, tag)) != 0) { 859 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 860 return (ENOMEM); 861 } 862 863 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 864 map)) != 0) { 865 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 866 return (ENOMEM); 867 } 868 869 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 870 bzero(p, len); 871 *(void **)desc = p; 872 873 if (sw_size) { 874 len = nelem * sw_size; 875 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 876 *(void **)sdesc = s; 877 } 878 if (parent_entry_tag == NULL) 879 return (0); 880 881 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 882 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 883 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 884 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 885 NULL, NULL, entry_tag)) != 0) { 886 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 887 return (ENOMEM); 888 } 889 return (0); 890 } 891 892 static void 893 sge_slow_intr_handler(void *arg, int ncount) 894 { 895 adapter_t *sc = arg; 896 897 t3_slow_intr_handler(sc); 898 } 899 900 /** 901 * sge_timer_cb - perform periodic maintenance of an SGE qset 902 * @data: the SGE queue set to maintain 903 * 904 * Runs periodically from a timer to perform maintenance of an SGE queue 905 * set. It performs two tasks: 906 * 907 * a) Cleans up any completed Tx descriptors that may still be pending. 908 * Normal descriptor cleanup happens when new packets are added to a Tx 909 * queue so this timer is relatively infrequent and does any cleanup only 910 * if the Tx queue has not seen any new packets in a while. We make a 911 * best effort attempt to reclaim descriptors, in that we don't wait 912 * around if we cannot get a queue's lock (which most likely is because 913 * someone else is queueing new packets and so will also handle the clean 914 * up). Since control queues use immediate data exclusively we don't 915 * bother cleaning them up here. 916 * 917 * b) Replenishes Rx queues that have run out due to memory shortage. 918 * Normally new Rx buffers are added when existing ones are consumed but 919 * when out of memory a queue can become empty. We try to add only a few 920 * buffers here, the queue will be replenished fully as these new buffers 921 * are used up if memory shortage has subsided. 922 * 923 * c) Return coalesced response queue credits in case a response queue is 924 * starved. 925 * 926 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 927 * fifo overflows and the FW doesn't implement any recovery scheme yet. 928 */ 929 static void 930 sge_timer_cb(void *arg) 931 { 932 adapter_t *sc = arg; 933 if ((sc->flags & USING_MSIX) == 0) { 934 935 struct port_info *pi; 936 struct sge_qset *qs; 937 struct sge_txq *txq; 938 int i, j; 939 int reclaim_ofl, refill_rx; 940 941 if (sc->open_device_map == 0) 942 return; 943 944 for (i = 0; i < sc->params.nports; i++) { 945 pi = &sc->port[i]; 946 for (j = 0; j < pi->nqsets; j++) { 947 qs = &sc->sge.qs[pi->first_qset + j]; 948 txq = &qs->txq[0]; 949 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 950 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 951 (qs->fl[1].credits < qs->fl[1].size)); 952 if (reclaim_ofl || refill_rx) { 953 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); 954 break; 955 } 956 } 957 } 958 } 959 960 if (sc->params.nports > 2) { 961 int i; 962 963 for_each_port(sc, i) { 964 struct port_info *pi = &sc->port[i]; 965 966 t3_write_reg(sc, A_SG_KDOORBELL, 967 F_SELEGRCNTX | 968 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 969 } 970 } 971 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) && 972 sc->open_device_map != 0) 973 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 974 } 975 976 /* 977 * This is meant to be a catch-all function to keep sge state private 978 * to sge.c 979 * 980 */ 981 int 982 t3_sge_init_adapter(adapter_t *sc) 983 { 984 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 985 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 986 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 987 return (0); 988 } 989 990 int 991 t3_sge_reset_adapter(adapter_t *sc) 992 { 993 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 994 return (0); 995 } 996 997 int 998 t3_sge_init_port(struct port_info *pi) 999 { 1000 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); 1001 return (0); 1002 } 1003 1004 /** 1005 * refill_rspq - replenish an SGE response queue 1006 * @adapter: the adapter 1007 * @q: the response queue to replenish 1008 * @credits: how many new responses to make available 1009 * 1010 * Replenishes a response queue by making the supplied number of responses 1011 * available to HW. 1012 */ 1013 static __inline void 1014 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 1015 { 1016 1017 /* mbufs are allocated on demand when a rspq entry is processed. */ 1018 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 1019 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 1020 } 1021 1022 static void 1023 sge_txq_reclaim_handler(void *arg, int ncount) 1024 { 1025 struct sge_qset *qs = arg; 1026 int i; 1027 1028 for (i = 0; i < 3; i++) 1029 reclaim_completed_tx(qs, 16, i); 1030 } 1031 1032 static void 1033 sge_timer_reclaim(void *arg, int ncount) 1034 { 1035 struct port_info *pi = arg; 1036 int i, nqsets = pi->nqsets; 1037 adapter_t *sc = pi->adapter; 1038 struct sge_qset *qs; 1039 struct mtx *lock; 1040 1041 KASSERT((sc->flags & USING_MSIX) == 0, 1042 ("can't call timer reclaim for msi-x")); 1043 1044 for (i = 0; i < nqsets; i++) { 1045 qs = &sc->sge.qs[pi->first_qset + i]; 1046 1047 reclaim_completed_tx(qs, 16, TXQ_OFLD); 1048 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 1049 &sc->sge.qs[0].rspq.lock; 1050 1051 if (mtx_trylock(lock)) { 1052 /* XXX currently assume that we are *NOT* polling */ 1053 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 1054 1055 if (qs->fl[0].credits < qs->fl[0].size - 16) 1056 __refill_fl(sc, &qs->fl[0]); 1057 if (qs->fl[1].credits < qs->fl[1].size - 16) 1058 __refill_fl(sc, &qs->fl[1]); 1059 1060 if (status & (1 << qs->rspq.cntxt_id)) { 1061 if (qs->rspq.credits) { 1062 refill_rspq(sc, &qs->rspq, 1); 1063 qs->rspq.credits--; 1064 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 1065 1 << qs->rspq.cntxt_id); 1066 } 1067 } 1068 mtx_unlock(lock); 1069 } 1070 } 1071 } 1072 1073 /** 1074 * init_qset_cntxt - initialize an SGE queue set context info 1075 * @qs: the queue set 1076 * @id: the queue set id 1077 * 1078 * Initializes the TIDs and context ids for the queues of a queue set. 1079 */ 1080 static void 1081 init_qset_cntxt(struct sge_qset *qs, u_int id) 1082 { 1083 1084 qs->rspq.cntxt_id = id; 1085 qs->fl[0].cntxt_id = 2 * id; 1086 qs->fl[1].cntxt_id = 2 * id + 1; 1087 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 1088 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 1089 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 1090 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 1091 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 1092 1093 mbufq_init(&qs->txq[TXQ_ETH].sendq); 1094 mbufq_init(&qs->txq[TXQ_OFLD].sendq); 1095 mbufq_init(&qs->txq[TXQ_CTRL].sendq); 1096 } 1097 1098 1099 static void 1100 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 1101 { 1102 txq->in_use += ndesc; 1103 /* 1104 * XXX we don't handle stopping of queue 1105 * presumably start handles this when we bump against the end 1106 */ 1107 txqs->gen = txq->gen; 1108 txq->unacked += ndesc; 1109 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); 1110 txq->unacked &= 31; 1111 txqs->pidx = txq->pidx; 1112 txq->pidx += ndesc; 1113 #ifdef INVARIANTS 1114 if (((txqs->pidx > txq->cidx) && 1115 (txq->pidx < txqs->pidx) && 1116 (txq->pidx >= txq->cidx)) || 1117 ((txqs->pidx < txq->cidx) && 1118 (txq->pidx >= txq-> cidx)) || 1119 ((txqs->pidx < txq->cidx) && 1120 (txq->cidx < txqs->pidx))) 1121 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", 1122 txqs->pidx, txq->pidx, txq->cidx); 1123 #endif 1124 if (txq->pidx >= txq->size) { 1125 txq->pidx -= txq->size; 1126 txq->gen ^= 1; 1127 } 1128 1129 } 1130 1131 /** 1132 * calc_tx_descs - calculate the number of Tx descriptors for a packet 1133 * @m: the packet mbufs 1134 * @nsegs: the number of segments 1135 * 1136 * Returns the number of Tx descriptors needed for the given Ethernet 1137 * packet. Ethernet packets require addition of WR and CPL headers. 1138 */ 1139 static __inline unsigned int 1140 calc_tx_descs(const struct mbuf *m, int nsegs) 1141 { 1142 unsigned int flits; 1143 1144 if (m->m_pkthdr.len <= PIO_LEN) 1145 return 1; 1146 1147 flits = sgl_len(nsegs) + 2; 1148 #ifdef TSO_SUPPORTED 1149 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1150 flits++; 1151 #endif 1152 return flits_to_desc(flits); 1153 } 1154 1155 static unsigned int 1156 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 1157 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs) 1158 { 1159 struct mbuf *m0; 1160 int err, pktlen, pass = 0; 1161 bus_dma_tag_t tag = txq->entry_tag; 1162 1163 retry: 1164 err = 0; 1165 m0 = *m; 1166 pktlen = m0->m_pkthdr.len; 1167 #if defined(__i386__) || defined(__amd64__) 1168 if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) { 1169 goto done; 1170 } else 1171 #endif 1172 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0); 1173 1174 if (err == 0) { 1175 goto done; 1176 } 1177 if (err == EFBIG && pass == 0) { 1178 pass = 1; 1179 /* Too many segments, try to defrag */ 1180 m0 = m_defrag(m0, M_DONTWAIT); 1181 if (m0 == NULL) { 1182 m_freem(*m); 1183 *m = NULL; 1184 return (ENOBUFS); 1185 } 1186 *m = m0; 1187 goto retry; 1188 } else if (err == ENOMEM) { 1189 return (err); 1190 } if (err) { 1191 if (cxgb_debug) 1192 printf("map failure err=%d pktlen=%d\n", err, pktlen); 1193 m_freem(m0); 1194 *m = NULL; 1195 return (err); 1196 } 1197 done: 1198 #if !defined(__i386__) && !defined(__amd64__) 1199 bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE); 1200 #endif 1201 txsd->flags |= TX_SW_DESC_MAPPED; 1202 1203 return (0); 1204 } 1205 1206 /** 1207 * make_sgl - populate a scatter/gather list for a packet 1208 * @sgp: the SGL to populate 1209 * @segs: the packet dma segments 1210 * @nsegs: the number of segments 1211 * 1212 * Generates a scatter/gather list for the buffers that make up a packet 1213 * and returns the SGL size in 8-byte words. The caller must size the SGL 1214 * appropriately. 1215 */ 1216 static __inline void 1217 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1218 { 1219 int i, idx; 1220 1221 for (idx = 0, i = 0; i < nsegs; i++) { 1222 /* 1223 * firmware doesn't like empty segments 1224 */ 1225 if (segs[i].ds_len == 0) 1226 continue; 1227 if (i && idx == 0) 1228 ++sgp; 1229 1230 sgp->len[idx] = htobe32(segs[i].ds_len); 1231 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1232 idx ^= 1; 1233 } 1234 1235 if (idx) { 1236 sgp->len[idx] = 0; 1237 sgp->addr[idx] = 0; 1238 } 1239 } 1240 1241 /** 1242 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1243 * @adap: the adapter 1244 * @q: the Tx queue 1245 * 1246 * Ring the doorbell if a Tx queue is asleep. There is a natural race, 1247 * where the HW is going to sleep just after we checked, however, 1248 * then the interrupt handler will detect the outstanding TX packet 1249 * and ring the doorbell for us. 1250 * 1251 * When GTS is disabled we unconditionally ring the doorbell. 1252 */ 1253 static __inline void 1254 check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 1255 { 1256 #if USE_GTS 1257 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1258 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1259 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1260 #ifdef T3_TRACE 1261 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1262 q->cntxt_id); 1263 #endif 1264 t3_write_reg(adap, A_SG_KDOORBELL, 1265 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1266 } 1267 #else 1268 wmb(); /* write descriptors before telling HW */ 1269 t3_write_reg(adap, A_SG_KDOORBELL, 1270 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1271 #endif 1272 } 1273 1274 static __inline void 1275 wr_gen2(struct tx_desc *d, unsigned int gen) 1276 { 1277 #if SGE_NUM_GENBITS == 2 1278 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1279 #endif 1280 } 1281 1282 /** 1283 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1284 * @ndesc: number of Tx descriptors spanned by the SGL 1285 * @txd: first Tx descriptor to be written 1286 * @txqs: txq state (generation and producer index) 1287 * @txq: the SGE Tx queue 1288 * @sgl: the SGL 1289 * @flits: number of flits to the start of the SGL in the first descriptor 1290 * @sgl_flits: the SGL size in flits 1291 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1292 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1293 * 1294 * Write a work request header and an associated SGL. If the SGL is 1295 * small enough to fit into one Tx descriptor it has already been written 1296 * and we just need to write the WR header. Otherwise we distribute the 1297 * SGL across the number of descriptors it spans. 1298 */ 1299 static void 1300 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1301 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1302 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1303 { 1304 1305 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1306 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1307 1308 if (__predict_true(ndesc == 1)) { 1309 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1310 V_WR_SGLSFLT(flits)) | wr_hi, 1311 htonl(V_WR_LEN(flits + sgl_flits) | 1312 V_WR_GEN(txqs->gen)) | wr_lo); 1313 /* XXX gen? */ 1314 wr_gen2(txd, txqs->gen); 1315 1316 } else { 1317 unsigned int ogen = txqs->gen; 1318 const uint64_t *fp = (const uint64_t *)sgl; 1319 struct work_request_hdr *wp = wrp; 1320 1321 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1322 V_WR_SGLSFLT(flits)) | wr_hi; 1323 1324 while (sgl_flits) { 1325 unsigned int avail = WR_FLITS - flits; 1326 1327 if (avail > sgl_flits) 1328 avail = sgl_flits; 1329 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1330 sgl_flits -= avail; 1331 ndesc--; 1332 if (!sgl_flits) 1333 break; 1334 1335 fp += avail; 1336 txd++; 1337 txsd++; 1338 if (++txqs->pidx == txq->size) { 1339 txqs->pidx = 0; 1340 txqs->gen ^= 1; 1341 txd = txq->desc; 1342 txsd = txq->sdesc; 1343 } 1344 1345 /* 1346 * when the head of the mbuf chain 1347 * is freed all clusters will be freed 1348 * with it 1349 */ 1350 wrp = (struct work_request_hdr *)txd; 1351 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) | 1352 V_WR_SGLSFLT(1)) | wr_hi; 1353 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS, 1354 sgl_flits + 1)) | 1355 V_WR_GEN(txqs->gen)) | wr_lo; 1356 wr_gen2(txd, txqs->gen); 1357 flits = 1; 1358 } 1359 wrp->wrh_hi |= htonl(F_WR_EOP); 1360 wmb(); 1361 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1362 wr_gen2((struct tx_desc *)wp, ogen); 1363 } 1364 } 1365 1366 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */ 1367 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20) 1368 1369 #ifdef VLAN_SUPPORTED 1370 #define GET_VTAG(cntrl, m) \ 1371 do { \ 1372 if ((m)->m_flags & M_VLANTAG) \ 1373 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ 1374 } while (0) 1375 1376 #else 1377 #define GET_VTAG(cntrl, m) 1378 #endif 1379 1380 static int 1381 t3_encap(struct sge_qset *qs, struct mbuf **m) 1382 { 1383 adapter_t *sc; 1384 struct mbuf *m0; 1385 struct sge_txq *txq; 1386 struct txq_state txqs; 1387 struct port_info *pi; 1388 unsigned int ndesc, flits, cntrl, mlen; 1389 int err, nsegs, tso_info = 0; 1390 1391 struct work_request_hdr *wrp; 1392 struct tx_sw_desc *txsd; 1393 struct sg_ent *sgp, *sgl; 1394 uint32_t wr_hi, wr_lo, sgl_flits; 1395 bus_dma_segment_t segs[TX_MAX_SEGS]; 1396 1397 struct tx_desc *txd; 1398 1399 pi = qs->port; 1400 sc = pi->adapter; 1401 txq = &qs->txq[TXQ_ETH]; 1402 txd = &txq->desc[txq->pidx]; 1403 txsd = &txq->sdesc[txq->pidx]; 1404 sgl = txq->txq_sgl; 1405 1406 prefetch(txd); 1407 m0 = *m; 1408 1409 DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset); 1410 DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan); 1411 1412 mtx_assert(&qs->lock, MA_OWNED); 1413 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1414 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n")); 1415 1416 #ifdef VLAN_SUPPORTED 1417 if (m0->m_nextpkt == NULL && m0->m_next != NULL && 1418 m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1419 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1420 #endif 1421 if (m0->m_nextpkt != NULL) { 1422 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs); 1423 ndesc = 1; 1424 mlen = 0; 1425 } else { 1426 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map, 1427 &m0, segs, &nsegs))) { 1428 if (cxgb_debug) 1429 printf("failed ... err=%d\n", err); 1430 return (err); 1431 } 1432 mlen = m0->m_pkthdr.len; 1433 ndesc = calc_tx_descs(m0, nsegs); 1434 } 1435 txq_prod(txq, ndesc, &txqs); 1436 1437 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs)); 1438 txsd->m = m0; 1439 1440 if (m0->m_nextpkt != NULL) { 1441 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; 1442 int i, fidx; 1443 1444 if (nsegs > 7) 1445 panic("trying to coalesce %d packets in to one WR", nsegs); 1446 txq->txq_coalesced += nsegs; 1447 wrp = (struct work_request_hdr *)txd; 1448 flits = nsegs*2 + 1; 1449 1450 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) { 1451 struct cpl_tx_pkt_batch_entry *cbe; 1452 uint64_t flit; 1453 uint32_t *hflit = (uint32_t *)&flit; 1454 int cflags = m0->m_pkthdr.csum_flags; 1455 1456 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1457 GET_VTAG(cntrl, m0); 1458 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1459 if (__predict_false(!(cflags & CSUM_IP))) 1460 cntrl |= F_TXPKT_IPCSUM_DIS; 1461 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP)))) 1462 cntrl |= F_TXPKT_L4CSUM_DIS; 1463 1464 hflit[0] = htonl(cntrl); 1465 hflit[1] = htonl(segs[i].ds_len | 0x80000000); 1466 flit |= htobe64(1 << 24); 1467 cbe = &cpl_batch->pkt_entry[i]; 1468 cbe->cntrl = hflit[0]; 1469 cbe->len = hflit[1]; 1470 cbe->addr = htobe64(segs[i].ds_addr); 1471 } 1472 1473 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1474 V_WR_SGLSFLT(flits)) | 1475 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1476 wr_lo = htonl(V_WR_LEN(flits) | 1477 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); 1478 set_wr_hdr(wrp, wr_hi, wr_lo); 1479 wmb(); 1480 wr_gen2(txd, txqs.gen); 1481 check_ring_tx_db(sc, txq); 1482 return (0); 1483 } else if (tso_info) { 1484 int min_size = TCPPKTHDRSIZE, eth_type, tagged; 1485 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; 1486 struct ip *ip; 1487 struct tcphdr *tcp; 1488 char *pkthdr; 1489 1490 txd->flit[2] = 0; 1491 GET_VTAG(cntrl, m0); 1492 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1493 hdr->cntrl = htonl(cntrl); 1494 hdr->len = htonl(mlen | 0x80000000); 1495 1496 DPRINTF("tso buf len=%d\n", mlen); 1497 1498 tagged = m0->m_flags & M_VLANTAG; 1499 if (!tagged) 1500 min_size -= ETHER_VLAN_ENCAP_LEN; 1501 1502 if (__predict_false(mlen < min_size)) { 1503 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", 1504 m0, mlen, m0->m_pkthdr.tso_segsz, 1505 m0->m_pkthdr.csum_flags, m0->m_flags); 1506 panic("tx tso packet too small"); 1507 } 1508 1509 /* Make sure that ether, ip, tcp headers are all in m0 */ 1510 if (__predict_false(m0->m_len < min_size)) { 1511 m0 = m_pullup(m0, min_size); 1512 if (__predict_false(m0 == NULL)) { 1513 /* XXX panic probably an overreaction */ 1514 panic("couldn't fit header into mbuf"); 1515 } 1516 } 1517 pkthdr = m0->m_data; 1518 1519 if (tagged) { 1520 eth_type = CPL_ETH_II_VLAN; 1521 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN + 1522 ETHER_VLAN_ENCAP_LEN); 1523 } else { 1524 eth_type = CPL_ETH_II; 1525 ip = (struct ip *)(pkthdr + ETHER_HDR_LEN); 1526 } 1527 tcp = (struct tcphdr *)((uint8_t *)ip + 1528 sizeof(*ip)); 1529 1530 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1531 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1532 V_LSO_TCPHDR_WORDS(tcp->th_off); 1533 hdr->lso_info = htonl(tso_info); 1534 1535 if (__predict_false(mlen <= PIO_LEN)) { 1536 /* pkt not undersized but fits in PIO_LEN 1537 * Indicates a TSO bug at the higher levels. 1538 * 1539 */ 1540 DPRINTF("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", 1541 m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags); 1542 txsd->m = NULL; 1543 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); 1544 flits = (mlen + 7) / 8 + 3; 1545 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1546 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1547 F_WR_SOP | F_WR_EOP | txqs.compl); 1548 wr_lo = htonl(V_WR_LEN(flits) | 1549 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1550 set_wr_hdr(&hdr->wr, wr_hi, wr_lo); 1551 wmb(); 1552 wr_gen2(txd, txqs.gen); 1553 check_ring_tx_db(sc, txq); 1554 return (0); 1555 } 1556 flits = 3; 1557 } else { 1558 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; 1559 1560 GET_VTAG(cntrl, m0); 1561 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1562 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) 1563 cntrl |= F_TXPKT_IPCSUM_DIS; 1564 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))) 1565 cntrl |= F_TXPKT_L4CSUM_DIS; 1566 cpl->cntrl = htonl(cntrl); 1567 cpl->len = htonl(mlen | 0x80000000); 1568 1569 if (mlen <= PIO_LEN) { 1570 txsd->m = NULL; 1571 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1572 flits = (mlen + 7) / 8 + 2; 1573 1574 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1575 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1576 F_WR_SOP | F_WR_EOP | txqs.compl); 1577 wr_lo = htonl(V_WR_LEN(flits) | 1578 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1579 set_wr_hdr(&cpl->wr, wr_hi, wr_lo); 1580 wmb(); 1581 wr_gen2(txd, txqs.gen); 1582 check_ring_tx_db(sc, txq); 1583 return (0); 1584 } 1585 flits = 2; 1586 } 1587 wrp = (struct work_request_hdr *)txd; 1588 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1589 make_sgl(sgp, segs, nsegs); 1590 1591 sgl_flits = sgl_len(nsegs); 1592 1593 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc)); 1594 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1595 wr_lo = htonl(V_WR_TID(txq->token)); 1596 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, 1597 sgl_flits, wr_hi, wr_lo); 1598 check_ring_tx_db(pi->adapter, txq); 1599 1600 return (0); 1601 } 1602 1603 void 1604 cxgb_tx_watchdog(void *arg) 1605 { 1606 struct sge_qset *qs = arg; 1607 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1608 1609 if (qs->coalescing != 0 && 1610 (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 1611 TXQ_RING_EMPTY(qs)) 1612 qs->coalescing = 0; 1613 else if (qs->coalescing == 0 && 1614 (txq->in_use >= cxgb_tx_coalesce_enable_start)) 1615 qs->coalescing = 1; 1616 if (TXQ_TRYLOCK(qs)) { 1617 qs->qs_flags |= QS_FLUSHING; 1618 cxgb_start_locked(qs); 1619 qs->qs_flags &= ~QS_FLUSHING; 1620 TXQ_UNLOCK(qs); 1621 } 1622 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) 1623 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog, 1624 qs, txq->txq_watchdog.c_cpu); 1625 } 1626 1627 static void 1628 cxgb_tx_timeout(void *arg) 1629 { 1630 struct sge_qset *qs = arg; 1631 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1632 1633 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3))) 1634 qs->coalescing = 1; 1635 if (TXQ_TRYLOCK(qs)) { 1636 qs->qs_flags |= QS_TIMEOUT; 1637 cxgb_start_locked(qs); 1638 qs->qs_flags &= ~QS_TIMEOUT; 1639 TXQ_UNLOCK(qs); 1640 } 1641 } 1642 1643 static void 1644 cxgb_start_locked(struct sge_qset *qs) 1645 { 1646 struct mbuf *m_head = NULL; 1647 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1648 int avail, txmax; 1649 int in_use_init = txq->in_use; 1650 struct port_info *pi = qs->port; 1651 struct ifnet *ifp = pi->ifp; 1652 avail = txq->size - txq->in_use - 4; 1653 txmax = min(TX_START_MAX_DESC, avail); 1654 1655 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT)) 1656 reclaim_completed_tx(qs, 0, TXQ_ETH); 1657 1658 if (!pi->link_config.link_ok) { 1659 TXQ_RING_FLUSH(qs); 1660 return; 1661 } 1662 TXQ_LOCK_ASSERT(qs); 1663 while ((txq->in_use - in_use_init < txmax) && 1664 !TXQ_RING_EMPTY(qs) && 1665 (ifp->if_drv_flags & IFF_DRV_RUNNING) && 1666 pi->link_config.link_ok) { 1667 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1668 1669 if ((m_head = cxgb_dequeue(qs)) == NULL) 1670 break; 1671 /* 1672 * Encapsulation can modify our pointer, and or make it 1673 * NULL on failure. In that event, we can't requeue. 1674 */ 1675 if (t3_encap(qs, &m_head) || m_head == NULL) 1676 break; 1677 1678 /* Send a copy of the frame to the BPF listener */ 1679 ETHER_BPF_MTAP(ifp, m_head); 1680 1681 /* 1682 * We sent via PIO, no longer need a copy 1683 */ 1684 if (m_head->m_nextpkt == NULL && 1685 m_head->m_pkthdr.len <= PIO_LEN) 1686 m_freem(m_head); 1687 1688 m_head = NULL; 1689 } 1690 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && 1691 pi->link_config.link_ok) 1692 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1693 qs, txq->txq_timer.c_cpu); 1694 if (m_head != NULL) 1695 m_freem(m_head); 1696 } 1697 1698 static int 1699 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m) 1700 { 1701 struct port_info *pi = qs->port; 1702 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1703 struct buf_ring *br = txq->txq_mr; 1704 int error, avail; 1705 1706 avail = txq->size - txq->in_use; 1707 TXQ_LOCK_ASSERT(qs); 1708 1709 /* 1710 * We can only do a direct transmit if the following are true: 1711 * - we aren't coalescing (ring < 3/4 full) 1712 * - the link is up -- checked in caller 1713 * - there are no packets enqueued already 1714 * - there is space in hardware transmit queue 1715 */ 1716 if (check_pkt_coalesce(qs) == 0 && 1717 !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > 4) { 1718 if (t3_encap(qs, &m)) { 1719 if (m != NULL && 1720 (error = drbr_enqueue(ifp, br, m)) != 0) 1721 return (error); 1722 } else { 1723 /* 1724 * We've bypassed the buf ring so we need to update 1725 * the stats directly 1726 */ 1727 txq->txq_direct_packets++; 1728 txq->txq_direct_bytes += m->m_pkthdr.len; 1729 /* 1730 ** Send a copy of the frame to the BPF 1731 ** listener and set the watchdog on. 1732 */ 1733 ETHER_BPF_MTAP(ifp, m); 1734 /* 1735 * We sent via PIO, no longer need a copy 1736 */ 1737 if (m->m_pkthdr.len <= PIO_LEN) 1738 m_freem(m); 1739 1740 } 1741 } else if ((error = drbr_enqueue(ifp, br, m)) != 0) 1742 return (error); 1743 1744 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1745 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && 1746 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) 1747 cxgb_start_locked(qs); 1748 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) 1749 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1750 qs, txq->txq_timer.c_cpu); 1751 return (0); 1752 } 1753 1754 int 1755 cxgb_transmit(struct ifnet *ifp, struct mbuf *m) 1756 { 1757 struct sge_qset *qs; 1758 struct port_info *pi = ifp->if_softc; 1759 int error, qidx = pi->first_qset; 1760 1761 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 1762 ||(!pi->link_config.link_ok)) { 1763 m_freem(m); 1764 return (0); 1765 } 1766 1767 if (m->m_flags & M_FLOWID) 1768 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset; 1769 1770 qs = &pi->adapter->sge.qs[qidx]; 1771 1772 if (TXQ_TRYLOCK(qs)) { 1773 /* XXX running */ 1774 error = cxgb_transmit_locked(ifp, qs, m); 1775 TXQ_UNLOCK(qs); 1776 } else 1777 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); 1778 return (error); 1779 } 1780 void 1781 cxgb_start(struct ifnet *ifp) 1782 { 1783 struct port_info *pi = ifp->if_softc; 1784 struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset]; 1785 1786 if (!pi->link_config.link_ok) 1787 return; 1788 1789 TXQ_LOCK(qs); 1790 cxgb_start_locked(qs); 1791 TXQ_UNLOCK(qs); 1792 } 1793 1794 void 1795 cxgb_qflush(struct ifnet *ifp) 1796 { 1797 /* 1798 * flush any enqueued mbufs in the buf_rings 1799 * and in the transmit queues 1800 * no-op for now 1801 */ 1802 return; 1803 } 1804 1805 /** 1806 * write_imm - write a packet into a Tx descriptor as immediate data 1807 * @d: the Tx descriptor to write 1808 * @m: the packet 1809 * @len: the length of packet data to write as immediate data 1810 * @gen: the generation bit value to write 1811 * 1812 * Writes a packet as immediate data into a Tx descriptor. The packet 1813 * contains a work request at its beginning. We must write the packet 1814 * carefully so the SGE doesn't read accidentally before it's written in 1815 * its entirety. 1816 */ 1817 static __inline void 1818 write_imm(struct tx_desc *d, struct mbuf *m, 1819 unsigned int len, unsigned int gen) 1820 { 1821 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1822 struct work_request_hdr *to = (struct work_request_hdr *)d; 1823 uint32_t wr_hi, wr_lo; 1824 1825 if (len > WR_LEN) 1826 panic("len too big %d\n", len); 1827 if (len < sizeof(*from)) 1828 panic("len too small %d", len); 1829 1830 memcpy(&to[1], &from[1], len - sizeof(*from)); 1831 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | 1832 V_WR_BCNTLFLT(len & 7)); 1833 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | 1834 V_WR_LEN((len + 7) / 8)); 1835 set_wr_hdr(to, wr_hi, wr_lo); 1836 wmb(); 1837 wr_gen2(d, gen); 1838 1839 /* 1840 * This check is a hack we should really fix the logic so 1841 * that this can't happen 1842 */ 1843 if (m->m_type != MT_DONTFREE) 1844 m_freem(m); 1845 1846 } 1847 1848 /** 1849 * check_desc_avail - check descriptor availability on a send queue 1850 * @adap: the adapter 1851 * @q: the TX queue 1852 * @m: the packet needing the descriptors 1853 * @ndesc: the number of Tx descriptors needed 1854 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1855 * 1856 * Checks if the requested number of Tx descriptors is available on an 1857 * SGE send queue. If the queue is already suspended or not enough 1858 * descriptors are available the packet is queued for later transmission. 1859 * Must be called with the Tx queue locked. 1860 * 1861 * Returns 0 if enough descriptors are available, 1 if there aren't 1862 * enough descriptors and the packet has been queued, and 2 if the caller 1863 * needs to retry because there weren't enough descriptors at the 1864 * beginning of the call but some freed up in the mean time. 1865 */ 1866 static __inline int 1867 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1868 struct mbuf *m, unsigned int ndesc, 1869 unsigned int qid) 1870 { 1871 /* 1872 * XXX We currently only use this for checking the control queue 1873 * the control queue is only used for binding qsets which happens 1874 * at init time so we are guaranteed enough descriptors 1875 */ 1876 if (__predict_false(!mbufq_empty(&q->sendq))) { 1877 addq_exit: mbufq_tail(&q->sendq, m); 1878 return 1; 1879 } 1880 if (__predict_false(q->size - q->in_use < ndesc)) { 1881 1882 struct sge_qset *qs = txq_to_qset(q, qid); 1883 1884 setbit(&qs->txq_stopped, qid); 1885 if (should_restart_tx(q) && 1886 test_and_clear_bit(qid, &qs->txq_stopped)) 1887 return 2; 1888 1889 q->stops++; 1890 goto addq_exit; 1891 } 1892 return 0; 1893 } 1894 1895 1896 /** 1897 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1898 * @q: the SGE control Tx queue 1899 * 1900 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1901 * that send only immediate data (presently just the control queues) and 1902 * thus do not have any mbufs 1903 */ 1904 static __inline void 1905 reclaim_completed_tx_imm(struct sge_txq *q) 1906 { 1907 unsigned int reclaim = q->processed - q->cleaned; 1908 1909 q->in_use -= reclaim; 1910 q->cleaned += reclaim; 1911 } 1912 1913 static __inline int 1914 immediate(const struct mbuf *m) 1915 { 1916 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1917 } 1918 1919 /** 1920 * ctrl_xmit - send a packet through an SGE control Tx queue 1921 * @adap: the adapter 1922 * @q: the control queue 1923 * @m: the packet 1924 * 1925 * Send a packet through an SGE control Tx queue. Packets sent through 1926 * a control queue must fit entirely as immediate data in a single Tx 1927 * descriptor and have no page fragments. 1928 */ 1929 static int 1930 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 1931 { 1932 int ret; 1933 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1934 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1935 1936 if (__predict_false(!immediate(m))) { 1937 m_freem(m); 1938 return 0; 1939 } 1940 1941 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP); 1942 wrp->wrh_lo = htonl(V_WR_TID(q->token)); 1943 1944 TXQ_LOCK(qs); 1945 again: reclaim_completed_tx_imm(q); 1946 1947 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1948 if (__predict_false(ret)) { 1949 if (ret == 1) { 1950 TXQ_UNLOCK(qs); 1951 return (ENOSPC); 1952 } 1953 goto again; 1954 } 1955 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1956 1957 q->in_use++; 1958 if (++q->pidx >= q->size) { 1959 q->pidx = 0; 1960 q->gen ^= 1; 1961 } 1962 TXQ_UNLOCK(qs); 1963 wmb(); 1964 t3_write_reg(adap, A_SG_KDOORBELL, 1965 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1966 return (0); 1967 } 1968 1969 1970 /** 1971 * restart_ctrlq - restart a suspended control queue 1972 * @qs: the queue set cotaining the control queue 1973 * 1974 * Resumes transmission on a suspended Tx control queue. 1975 */ 1976 static void 1977 restart_ctrlq(void *data, int npending) 1978 { 1979 struct mbuf *m; 1980 struct sge_qset *qs = (struct sge_qset *)data; 1981 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1982 adapter_t *adap = qs->port->adapter; 1983 1984 TXQ_LOCK(qs); 1985 again: reclaim_completed_tx_imm(q); 1986 1987 while (q->in_use < q->size && 1988 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1989 1990 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1991 1992 if (++q->pidx >= q->size) { 1993 q->pidx = 0; 1994 q->gen ^= 1; 1995 } 1996 q->in_use++; 1997 } 1998 if (!mbufq_empty(&q->sendq)) { 1999 setbit(&qs->txq_stopped, TXQ_CTRL); 2000 2001 if (should_restart_tx(q) && 2002 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 2003 goto again; 2004 q->stops++; 2005 } 2006 TXQ_UNLOCK(qs); 2007 t3_write_reg(adap, A_SG_KDOORBELL, 2008 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2009 } 2010 2011 2012 /* 2013 * Send a management message through control queue 0 2014 */ 2015 int 2016 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 2017 { 2018 return ctrl_xmit(adap, &adap->sge.qs[0], m); 2019 } 2020 2021 /** 2022 * free_qset - free the resources of an SGE queue set 2023 * @sc: the controller owning the queue set 2024 * @q: the queue set 2025 * 2026 * Release the HW and SW resources associated with an SGE queue set, such 2027 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 2028 * queue set must be quiesced prior to calling this. 2029 */ 2030 static void 2031 t3_free_qset(adapter_t *sc, struct sge_qset *q) 2032 { 2033 int i; 2034 2035 reclaim_completed_tx(q, 0, TXQ_ETH); 2036 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2037 if (q->txq[i].txq_mr != NULL) 2038 buf_ring_free(q->txq[i].txq_mr, M_DEVBUF); 2039 if (q->txq[i].txq_ifq != NULL) { 2040 ifq_delete(q->txq[i].txq_ifq); 2041 free(q->txq[i].txq_ifq, M_DEVBUF); 2042 } 2043 } 2044 2045 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2046 if (q->fl[i].desc) { 2047 mtx_lock_spin(&sc->sge.reg_lock); 2048 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 2049 mtx_unlock_spin(&sc->sge.reg_lock); 2050 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 2051 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 2052 q->fl[i].desc_map); 2053 bus_dma_tag_destroy(q->fl[i].desc_tag); 2054 bus_dma_tag_destroy(q->fl[i].entry_tag); 2055 } 2056 if (q->fl[i].sdesc) { 2057 free_rx_bufs(sc, &q->fl[i]); 2058 free(q->fl[i].sdesc, M_DEVBUF); 2059 } 2060 } 2061 2062 mtx_unlock(&q->lock); 2063 MTX_DESTROY(&q->lock); 2064 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2065 if (q->txq[i].desc) { 2066 mtx_lock_spin(&sc->sge.reg_lock); 2067 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 2068 mtx_unlock_spin(&sc->sge.reg_lock); 2069 bus_dmamap_unload(q->txq[i].desc_tag, 2070 q->txq[i].desc_map); 2071 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 2072 q->txq[i].desc_map); 2073 bus_dma_tag_destroy(q->txq[i].desc_tag); 2074 bus_dma_tag_destroy(q->txq[i].entry_tag); 2075 } 2076 if (q->txq[i].sdesc) { 2077 free(q->txq[i].sdesc, M_DEVBUF); 2078 } 2079 } 2080 2081 if (q->rspq.desc) { 2082 mtx_lock_spin(&sc->sge.reg_lock); 2083 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 2084 mtx_unlock_spin(&sc->sge.reg_lock); 2085 2086 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 2087 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 2088 q->rspq.desc_map); 2089 bus_dma_tag_destroy(q->rspq.desc_tag); 2090 MTX_DESTROY(&q->rspq.lock); 2091 } 2092 2093 #ifdef LRO_SUPPORTED 2094 tcp_lro_free(&q->lro.ctrl); 2095 #endif 2096 2097 bzero(q, sizeof(*q)); 2098 } 2099 2100 /** 2101 * t3_free_sge_resources - free SGE resources 2102 * @sc: the adapter softc 2103 * 2104 * Frees resources used by the SGE queue sets. 2105 */ 2106 void 2107 t3_free_sge_resources(adapter_t *sc) 2108 { 2109 int i, nqsets; 2110 2111 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2112 nqsets += sc->port[i].nqsets; 2113 2114 for (i = 0; i < nqsets; ++i) { 2115 TXQ_LOCK(&sc->sge.qs[i]); 2116 t3_free_qset(sc, &sc->sge.qs[i]); 2117 } 2118 2119 } 2120 2121 /** 2122 * t3_sge_start - enable SGE 2123 * @sc: the controller softc 2124 * 2125 * Enables the SGE for DMAs. This is the last step in starting packet 2126 * transfers. 2127 */ 2128 void 2129 t3_sge_start(adapter_t *sc) 2130 { 2131 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 2132 } 2133 2134 /** 2135 * t3_sge_stop - disable SGE operation 2136 * @sc: the adapter 2137 * 2138 * Disables the DMA engine. This can be called in emeregencies (e.g., 2139 * from error interrupts) or from normal process context. In the latter 2140 * case it also disables any pending queue restart tasklets. Note that 2141 * if it is called in interrupt context it cannot disable the restart 2142 * tasklets as it cannot wait, however the tasklets will have no effect 2143 * since the doorbells are disabled and the driver will call this again 2144 * later from process context, at which time the tasklets will be stopped 2145 * if they are still running. 2146 */ 2147 void 2148 t3_sge_stop(adapter_t *sc) 2149 { 2150 int i, nqsets; 2151 2152 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 2153 2154 if (sc->tq == NULL) 2155 return; 2156 2157 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2158 nqsets += sc->port[i].nqsets; 2159 #ifdef notyet 2160 /* 2161 * 2162 * XXX 2163 */ 2164 for (i = 0; i < nqsets; ++i) { 2165 struct sge_qset *qs = &sc->sge.qs[i]; 2166 2167 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2168 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2169 } 2170 #endif 2171 } 2172 2173 /** 2174 * t3_free_tx_desc - reclaims Tx descriptors and their buffers 2175 * @adapter: the adapter 2176 * @q: the Tx queue to reclaim descriptors from 2177 * @reclaimable: the number of descriptors to reclaim 2178 * @m_vec_size: maximum number of buffers to reclaim 2179 * @desc_reclaimed: returns the number of descriptors reclaimed 2180 * 2181 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 2182 * Tx buffers. Called with the Tx queue lock held. 2183 * 2184 * Returns number of buffers of reclaimed 2185 */ 2186 void 2187 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue) 2188 { 2189 struct tx_sw_desc *txsd; 2190 unsigned int cidx, mask; 2191 struct sge_txq *q = &qs->txq[queue]; 2192 2193 #ifdef T3_TRACE 2194 T3_TRACE2(sc->tb[q->cntxt_id & 7], 2195 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); 2196 #endif 2197 cidx = q->cidx; 2198 mask = q->size - 1; 2199 txsd = &q->sdesc[cidx]; 2200 2201 mtx_assert(&qs->lock, MA_OWNED); 2202 while (reclaimable--) { 2203 prefetch(q->sdesc[(cidx + 1) & mask].m); 2204 prefetch(q->sdesc[(cidx + 2) & mask].m); 2205 2206 if (txsd->m != NULL) { 2207 if (txsd->flags & TX_SW_DESC_MAPPED) { 2208 bus_dmamap_unload(q->entry_tag, txsd->map); 2209 txsd->flags &= ~TX_SW_DESC_MAPPED; 2210 } 2211 m_freem_list(txsd->m); 2212 txsd->m = NULL; 2213 } else 2214 q->txq_skipped++; 2215 2216 ++txsd; 2217 if (++cidx == q->size) { 2218 cidx = 0; 2219 txsd = q->sdesc; 2220 } 2221 } 2222 q->cidx = cidx; 2223 2224 } 2225 2226 /** 2227 * is_new_response - check if a response is newly written 2228 * @r: the response descriptor 2229 * @q: the response queue 2230 * 2231 * Returns true if a response descriptor contains a yet unprocessed 2232 * response. 2233 */ 2234 static __inline int 2235 is_new_response(const struct rsp_desc *r, 2236 const struct sge_rspq *q) 2237 { 2238 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 2239 } 2240 2241 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 2242 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 2243 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 2244 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 2245 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 2246 2247 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 2248 #define NOMEM_INTR_DELAY 2500 2249 2250 /** 2251 * write_ofld_wr - write an offload work request 2252 * @adap: the adapter 2253 * @m: the packet to send 2254 * @q: the Tx queue 2255 * @pidx: index of the first Tx descriptor to write 2256 * @gen: the generation value to use 2257 * @ndesc: number of descriptors the packet will occupy 2258 * 2259 * Write an offload work request to send the supplied packet. The packet 2260 * data already carry the work request with most fields populated. 2261 */ 2262 static void 2263 write_ofld_wr(adapter_t *adap, struct mbuf *m, 2264 struct sge_txq *q, unsigned int pidx, 2265 unsigned int gen, unsigned int ndesc, 2266 bus_dma_segment_t *segs, unsigned int nsegs) 2267 { 2268 unsigned int sgl_flits, flits; 2269 struct work_request_hdr *from; 2270 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 2271 struct tx_desc *d = &q->desc[pidx]; 2272 struct txq_state txqs; 2273 2274 if (immediate(m) && nsegs == 0) { 2275 write_imm(d, m, m->m_len, gen); 2276 return; 2277 } 2278 2279 /* Only TX_DATA builds SGLs */ 2280 from = mtod(m, struct work_request_hdr *); 2281 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from)); 2282 2283 flits = m->m_len / 8; 2284 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 2285 2286 make_sgl(sgp, segs, nsegs); 2287 sgl_flits = sgl_len(nsegs); 2288 2289 txqs.gen = gen; 2290 txqs.pidx = pidx; 2291 txqs.compl = 0; 2292 2293 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 2294 from->wrh_hi, from->wrh_lo); 2295 } 2296 2297 /** 2298 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 2299 * @m: the packet 2300 * 2301 * Returns the number of Tx descriptors needed for the given offload 2302 * packet. These packets are already fully constructed. 2303 */ 2304 static __inline unsigned int 2305 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 2306 { 2307 unsigned int flits, cnt = 0; 2308 int ndescs; 2309 2310 if (m->m_len <= WR_LEN && nsegs == 0) 2311 return (1); /* packet fits as immediate data */ 2312 2313 /* 2314 * This needs to be re-visited for TOE 2315 */ 2316 2317 cnt = nsegs; 2318 2319 /* headers */ 2320 flits = m->m_len / 8; 2321 2322 ndescs = flits_to_desc(flits + sgl_len(cnt)); 2323 2324 return (ndescs); 2325 } 2326 2327 /** 2328 * ofld_xmit - send a packet through an offload queue 2329 * @adap: the adapter 2330 * @q: the Tx offload queue 2331 * @m: the packet 2332 * 2333 * Send an offload packet through an SGE offload queue. 2334 */ 2335 static int 2336 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 2337 { 2338 int ret, nsegs; 2339 unsigned int ndesc; 2340 unsigned int pidx, gen; 2341 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2342 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs; 2343 struct tx_sw_desc *stx; 2344 2345 nsegs = m_get_sgllen(m); 2346 vsegs = m_get_sgl(m); 2347 ndesc = calc_tx_descs_ofld(m, nsegs); 2348 busdma_map_sgl(vsegs, segs, nsegs); 2349 2350 stx = &q->sdesc[q->pidx]; 2351 2352 TXQ_LOCK(qs); 2353 again: reclaim_completed_tx(qs, 16, TXQ_OFLD); 2354 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 2355 if (__predict_false(ret)) { 2356 if (ret == 1) { 2357 printf("no ofld desc avail\n"); 2358 2359 m_set_priority(m, ndesc); /* save for restart */ 2360 TXQ_UNLOCK(qs); 2361 return (EINTR); 2362 } 2363 goto again; 2364 } 2365 2366 gen = q->gen; 2367 q->in_use += ndesc; 2368 pidx = q->pidx; 2369 q->pidx += ndesc; 2370 if (q->pidx >= q->size) { 2371 q->pidx -= q->size; 2372 q->gen ^= 1; 2373 } 2374 #ifdef T3_TRACE 2375 T3_TRACE5(adap->tb[q->cntxt_id & 7], 2376 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 2377 ndesc, pidx, skb->len, skb->len - skb->data_len, 2378 skb_shinfo(skb)->nr_frags); 2379 #endif 2380 TXQ_UNLOCK(qs); 2381 2382 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2383 check_ring_tx_db(adap, q); 2384 return (0); 2385 } 2386 2387 /** 2388 * restart_offloadq - restart a suspended offload queue 2389 * @qs: the queue set cotaining the offload queue 2390 * 2391 * Resumes transmission on a suspended Tx offload queue. 2392 */ 2393 static void 2394 restart_offloadq(void *data, int npending) 2395 { 2396 struct mbuf *m; 2397 struct sge_qset *qs = data; 2398 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2399 adapter_t *adap = qs->port->adapter; 2400 bus_dma_segment_t segs[TX_MAX_SEGS]; 2401 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 2402 int nsegs, cleaned; 2403 2404 TXQ_LOCK(qs); 2405 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD); 2406 2407 while ((m = mbufq_peek(&q->sendq)) != NULL) { 2408 unsigned int gen, pidx; 2409 unsigned int ndesc = m_get_priority(m); 2410 2411 if (__predict_false(q->size - q->in_use < ndesc)) { 2412 setbit(&qs->txq_stopped, TXQ_OFLD); 2413 if (should_restart_tx(q) && 2414 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 2415 goto again; 2416 q->stops++; 2417 break; 2418 } 2419 2420 gen = q->gen; 2421 q->in_use += ndesc; 2422 pidx = q->pidx; 2423 q->pidx += ndesc; 2424 if (q->pidx >= q->size) { 2425 q->pidx -= q->size; 2426 q->gen ^= 1; 2427 } 2428 2429 (void)mbufq_dequeue(&q->sendq); 2430 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 2431 TXQ_UNLOCK(qs); 2432 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2433 TXQ_LOCK(qs); 2434 } 2435 #if USE_GTS 2436 set_bit(TXQ_RUNNING, &q->flags); 2437 set_bit(TXQ_LAST_PKT_DB, &q->flags); 2438 #endif 2439 TXQ_UNLOCK(qs); 2440 wmb(); 2441 t3_write_reg(adap, A_SG_KDOORBELL, 2442 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2443 } 2444 2445 /** 2446 * queue_set - return the queue set a packet should use 2447 * @m: the packet 2448 * 2449 * Maps a packet to the SGE queue set it should use. The desired queue 2450 * set is carried in bits 1-3 in the packet's priority. 2451 */ 2452 static __inline int 2453 queue_set(const struct mbuf *m) 2454 { 2455 return m_get_priority(m) >> 1; 2456 } 2457 2458 /** 2459 * is_ctrl_pkt - return whether an offload packet is a control packet 2460 * @m: the packet 2461 * 2462 * Determines whether an offload packet should use an OFLD or a CTRL 2463 * Tx queue. This is indicated by bit 0 in the packet's priority. 2464 */ 2465 static __inline int 2466 is_ctrl_pkt(const struct mbuf *m) 2467 { 2468 return m_get_priority(m) & 1; 2469 } 2470 2471 /** 2472 * t3_offload_tx - send an offload packet 2473 * @tdev: the offload device to send to 2474 * @m: the packet 2475 * 2476 * Sends an offload packet. We use the packet priority to select the 2477 * appropriate Tx queue as follows: bit 0 indicates whether the packet 2478 * should be sent as regular or control, bits 1-3 select the queue set. 2479 */ 2480 int 2481 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m) 2482 { 2483 adapter_t *adap = tdev2adap(tdev); 2484 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 2485 2486 if (__predict_false(is_ctrl_pkt(m))) 2487 return ctrl_xmit(adap, qs, m); 2488 2489 return ofld_xmit(adap, qs, m); 2490 } 2491 2492 /** 2493 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 2494 * @tdev: the offload device that will be receiving the packets 2495 * @q: the SGE response queue that assembled the bundle 2496 * @m: the partial bundle 2497 * @n: the number of packets in the bundle 2498 * 2499 * Delivers a (partial) bundle of Rx offload packets to an offload device. 2500 */ 2501 static __inline void 2502 deliver_partial_bundle(struct t3cdev *tdev, 2503 struct sge_rspq *q, 2504 struct mbuf *mbufs[], int n) 2505 { 2506 if (n) { 2507 q->offload_bundles++; 2508 cxgb_ofld_recv(tdev, mbufs, n); 2509 } 2510 } 2511 2512 static __inline int 2513 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 2514 struct mbuf *m, struct mbuf *rx_gather[], 2515 unsigned int gather_idx) 2516 { 2517 2518 rq->offload_pkts++; 2519 m->m_pkthdr.header = mtod(m, void *); 2520 rx_gather[gather_idx++] = m; 2521 if (gather_idx == RX_BUNDLE_SIZE) { 2522 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 2523 gather_idx = 0; 2524 rq->offload_bundles++; 2525 } 2526 return (gather_idx); 2527 } 2528 2529 static void 2530 restart_tx(struct sge_qset *qs) 2531 { 2532 struct adapter *sc = qs->port->adapter; 2533 2534 2535 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2536 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2537 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2538 qs->txq[TXQ_OFLD].restarts++; 2539 DPRINTF("restarting TXQ_OFLD\n"); 2540 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2541 } 2542 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n", 2543 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]), 2544 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned, 2545 qs->txq[TXQ_CTRL].in_use); 2546 2547 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2548 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2549 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2550 qs->txq[TXQ_CTRL].restarts++; 2551 DPRINTF("restarting TXQ_CTRL\n"); 2552 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2553 } 2554 } 2555 2556 /** 2557 * t3_sge_alloc_qset - initialize an SGE queue set 2558 * @sc: the controller softc 2559 * @id: the queue set id 2560 * @nports: how many Ethernet ports will be using this queue set 2561 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2562 * @p: configuration parameters for this queue set 2563 * @ntxq: number of Tx queues for the queue set 2564 * @pi: port info for queue set 2565 * 2566 * Allocate resources and initialize an SGE queue set. A queue set 2567 * comprises a response queue, two Rx free-buffer queues, and up to 3 2568 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2569 * queue, offload queue, and control queue. 2570 */ 2571 int 2572 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2573 const struct qset_params *p, int ntxq, struct port_info *pi) 2574 { 2575 struct sge_qset *q = &sc->sge.qs[id]; 2576 int i, ret = 0; 2577 2578 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF); 2579 q->port = pi; 2580 2581 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2582 2583 if ((q->txq[i].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, 2584 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) { 2585 device_printf(sc->dev, "failed to allocate mbuf ring\n"); 2586 goto err; 2587 } 2588 if ((q->txq[i].txq_ifq = 2589 malloc(sizeof(struct ifaltq), M_DEVBUF, M_NOWAIT|M_ZERO)) 2590 == NULL) { 2591 device_printf(sc->dev, "failed to allocate ifq\n"); 2592 goto err; 2593 } 2594 ifq_init(q->txq[i].txq_ifq, pi->ifp); 2595 callout_init(&q->txq[i].txq_timer, 1); 2596 callout_init(&q->txq[i].txq_watchdog, 1); 2597 q->txq[i].txq_timer.c_cpu = id % mp_ncpus; 2598 q->txq[i].txq_watchdog.c_cpu = id % mp_ncpus; 2599 } 2600 init_qset_cntxt(q, id); 2601 q->idx = id; 2602 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2603 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2604 &q->fl[0].desc, &q->fl[0].sdesc, 2605 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2606 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2607 printf("error %d from alloc ring fl0\n", ret); 2608 goto err; 2609 } 2610 2611 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2612 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2613 &q->fl[1].desc, &q->fl[1].sdesc, 2614 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2615 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2616 printf("error %d from alloc ring fl1\n", ret); 2617 goto err; 2618 } 2619 2620 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2621 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2622 &q->rspq.desc_tag, &q->rspq.desc_map, 2623 NULL, NULL)) != 0) { 2624 printf("error %d from alloc ring rspq\n", ret); 2625 goto err; 2626 } 2627 2628 for (i = 0; i < ntxq; ++i) { 2629 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2630 2631 if ((ret = alloc_ring(sc, p->txq_size[i], 2632 sizeof(struct tx_desc), sz, 2633 &q->txq[i].phys_addr, &q->txq[i].desc, 2634 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2635 &q->txq[i].desc_map, 2636 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2637 printf("error %d from alloc ring tx %i\n", ret, i); 2638 goto err; 2639 } 2640 mbufq_init(&q->txq[i].sendq); 2641 q->txq[i].gen = 1; 2642 q->txq[i].size = p->txq_size[i]; 2643 } 2644 2645 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2646 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2647 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2648 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2649 2650 q->fl[0].gen = q->fl[1].gen = 1; 2651 q->fl[0].size = p->fl_size; 2652 q->fl[1].size = p->jumbo_size; 2653 2654 q->rspq.gen = 1; 2655 q->rspq.cidx = 0; 2656 q->rspq.size = p->rspq_size; 2657 2658 q->txq[TXQ_ETH].stop_thres = nports * 2659 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2660 2661 q->fl[0].buf_size = MCLBYTES; 2662 q->fl[0].zone = zone_pack; 2663 q->fl[0].type = EXT_PACKET; 2664 #if __FreeBSD_version > 800000 2665 if (cxgb_use_16k_clusters) { 2666 q->fl[1].buf_size = MJUM16BYTES; 2667 q->fl[1].zone = zone_jumbo16; 2668 q->fl[1].type = EXT_JUMBO16; 2669 } else { 2670 q->fl[1].buf_size = MJUM9BYTES; 2671 q->fl[1].zone = zone_jumbo9; 2672 q->fl[1].type = EXT_JUMBO9; 2673 } 2674 #else 2675 q->fl[1].buf_size = MJUMPAGESIZE; 2676 q->fl[1].zone = zone_jumbop; 2677 q->fl[1].type = EXT_JUMBOP; 2678 #endif 2679 2680 #ifdef LRO_SUPPORTED 2681 /* Allocate and setup the lro_ctrl structure */ 2682 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); 2683 ret = tcp_lro_init(&q->lro.ctrl); 2684 if (ret) { 2685 printf("error %d from tcp_lro_init\n", ret); 2686 goto err; 2687 } 2688 q->lro.ctrl.ifp = pi->ifp; 2689 #endif 2690 2691 mtx_lock_spin(&sc->sge.reg_lock); 2692 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2693 q->rspq.phys_addr, q->rspq.size, 2694 q->fl[0].buf_size, 1, 0); 2695 if (ret) { 2696 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2697 goto err_unlock; 2698 } 2699 2700 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2701 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2702 q->fl[i].phys_addr, q->fl[i].size, 2703 q->fl[i].buf_size, p->cong_thres, 1, 2704 0); 2705 if (ret) { 2706 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2707 goto err_unlock; 2708 } 2709 } 2710 2711 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2712 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2713 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2714 1, 0); 2715 if (ret) { 2716 printf("error %d from t3_sge_init_ecntxt\n", ret); 2717 goto err_unlock; 2718 } 2719 2720 if (ntxq > 1) { 2721 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2722 USE_GTS, SGE_CNTXT_OFLD, id, 2723 q->txq[TXQ_OFLD].phys_addr, 2724 q->txq[TXQ_OFLD].size, 0, 1, 0); 2725 if (ret) { 2726 printf("error %d from t3_sge_init_ecntxt\n", ret); 2727 goto err_unlock; 2728 } 2729 } 2730 2731 if (ntxq > 2) { 2732 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2733 SGE_CNTXT_CTRL, id, 2734 q->txq[TXQ_CTRL].phys_addr, 2735 q->txq[TXQ_CTRL].size, 2736 q->txq[TXQ_CTRL].token, 1, 0); 2737 if (ret) { 2738 printf("error %d from t3_sge_init_ecntxt\n", ret); 2739 goto err_unlock; 2740 } 2741 } 2742 2743 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2744 device_get_unit(sc->dev), irq_vec_idx); 2745 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2746 2747 mtx_unlock_spin(&sc->sge.reg_lock); 2748 t3_update_qset_coalesce(q, p); 2749 q->port = pi; 2750 2751 refill_fl(sc, &q->fl[0], q->fl[0].size); 2752 refill_fl(sc, &q->fl[1], q->fl[1].size); 2753 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2754 2755 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2756 V_NEWTIMER(q->rspq.holdoff_tmr)); 2757 2758 return (0); 2759 2760 err_unlock: 2761 mtx_unlock_spin(&sc->sge.reg_lock); 2762 err: 2763 TXQ_LOCK(q); 2764 t3_free_qset(sc, q); 2765 2766 return (ret); 2767 } 2768 2769 /* 2770 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with 2771 * ethernet data. Hardware assistance with various checksums and any vlan tag 2772 * will also be taken into account here. 2773 */ 2774 void 2775 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2776 { 2777 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2778 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2779 struct ifnet *ifp = pi->ifp; 2780 2781 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2782 2783 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2784 cpl->csum_valid && cpl->csum == 0xffff) { 2785 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2786 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2787 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2788 m->m_pkthdr.csum_data = 0xffff; 2789 } 2790 /* 2791 * XXX need to add VLAN support for 6.x 2792 */ 2793 #ifdef VLAN_SUPPORTED 2794 if (__predict_false(cpl->vlan_valid)) { 2795 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2796 m->m_flags |= M_VLANTAG; 2797 } 2798 #endif 2799 2800 m->m_pkthdr.rcvif = ifp; 2801 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2802 /* 2803 * adjust after conversion to mbuf chain 2804 */ 2805 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); 2806 m->m_len -= (sizeof(*cpl) + ethpad); 2807 m->m_data += (sizeof(*cpl) + ethpad); 2808 } 2809 2810 /** 2811 * get_packet - return the next ingress packet buffer from a free list 2812 * @adap: the adapter that received the packet 2813 * @drop_thres: # of remaining buffers before we start dropping packets 2814 * @qs: the qset that the SGE free list holding the packet belongs to 2815 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2816 * @r: response descriptor 2817 * 2818 * Get the next packet from a free list and complete setup of the 2819 * sk_buff. If the packet is small we make a copy and recycle the 2820 * original buffer, otherwise we use the original buffer itself. If a 2821 * positive drop threshold is supplied packets are dropped and their 2822 * buffers recycled if (a) the number of remaining buffers is under the 2823 * threshold and the packet is too big to copy, or (b) the packet should 2824 * be copied but there is no memory for the copy. 2825 */ 2826 static int 2827 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2828 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 2829 { 2830 2831 unsigned int len_cq = ntohl(r->len_cq); 2832 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2833 int mask, cidx = fl->cidx; 2834 struct rx_sw_desc *sd = &fl->sdesc[cidx]; 2835 uint32_t len = G_RSPD_LEN(len_cq); 2836 uint32_t flags = M_EXT; 2837 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags)); 2838 caddr_t cl; 2839 struct mbuf *m; 2840 int ret = 0; 2841 2842 mask = fl->size - 1; 2843 prefetch(fl->sdesc[(cidx + 1) & mask].m); 2844 prefetch(fl->sdesc[(cidx + 2) & mask].m); 2845 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); 2846 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 2847 2848 fl->credits--; 2849 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2850 2851 if (recycle_enable && len <= SGE_RX_COPY_THRES && 2852 sopeop == RSPQ_SOP_EOP) { 2853 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2854 goto skip_recycle; 2855 cl = mtod(m, void *); 2856 memcpy(cl, sd->rxsd_cl, len); 2857 recycle_rx_buf(adap, fl, fl->cidx); 2858 m->m_pkthdr.len = m->m_len = len; 2859 m->m_flags = 0; 2860 mh->mh_head = mh->mh_tail = m; 2861 ret = 1; 2862 goto done; 2863 } else { 2864 skip_recycle: 2865 bus_dmamap_unload(fl->entry_tag, sd->map); 2866 cl = sd->rxsd_cl; 2867 m = sd->m; 2868 2869 if ((sopeop == RSPQ_SOP_EOP) || 2870 (sopeop == RSPQ_SOP)) 2871 flags |= M_PKTHDR; 2872 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags); 2873 if (fl->zone == zone_pack) { 2874 /* 2875 * restore clobbered data pointer 2876 */ 2877 m->m_data = m->m_ext.ext_buf; 2878 } else { 2879 m_cljset(m, cl, fl->type); 2880 } 2881 m->m_len = len; 2882 } 2883 switch(sopeop) { 2884 case RSPQ_SOP_EOP: 2885 ret = 1; 2886 /* FALLTHROUGH */ 2887 case RSPQ_SOP: 2888 mh->mh_head = mh->mh_tail = m; 2889 m->m_pkthdr.len = len; 2890 break; 2891 case RSPQ_EOP: 2892 ret = 1; 2893 /* FALLTHROUGH */ 2894 case RSPQ_NSOP_NEOP: 2895 if (mh->mh_tail == NULL) { 2896 log(LOG_ERR, "discarding intermediate descriptor entry\n"); 2897 m_freem(m); 2898 break; 2899 } 2900 mh->mh_tail->m_next = m; 2901 mh->mh_tail = m; 2902 mh->mh_head->m_pkthdr.len += len; 2903 break; 2904 } 2905 if (cxgb_debug) 2906 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len); 2907 done: 2908 if (++fl->cidx == fl->size) 2909 fl->cidx = 0; 2910 2911 return (ret); 2912 } 2913 2914 /** 2915 * handle_rsp_cntrl_info - handles control information in a response 2916 * @qs: the queue set corresponding to the response 2917 * @flags: the response control flags 2918 * 2919 * Handles the control information of an SGE response, such as GTS 2920 * indications and completion credits for the queue set's Tx queues. 2921 * HW coalesces credits, we don't do any extra SW coalescing. 2922 */ 2923 static __inline void 2924 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2925 { 2926 unsigned int credits; 2927 2928 #if USE_GTS 2929 if (flags & F_RSPD_TXQ0_GTS) 2930 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2931 #endif 2932 credits = G_RSPD_TXQ0_CR(flags); 2933 if (credits) 2934 qs->txq[TXQ_ETH].processed += credits; 2935 2936 credits = G_RSPD_TXQ2_CR(flags); 2937 if (credits) 2938 qs->txq[TXQ_CTRL].processed += credits; 2939 2940 # if USE_GTS 2941 if (flags & F_RSPD_TXQ1_GTS) 2942 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2943 # endif 2944 credits = G_RSPD_TXQ1_CR(flags); 2945 if (credits) 2946 qs->txq[TXQ_OFLD].processed += credits; 2947 2948 } 2949 2950 static void 2951 check_ring_db(adapter_t *adap, struct sge_qset *qs, 2952 unsigned int sleeping) 2953 { 2954 ; 2955 } 2956 2957 /** 2958 * process_responses - process responses from an SGE response queue 2959 * @adap: the adapter 2960 * @qs: the queue set to which the response queue belongs 2961 * @budget: how many responses can be processed in this round 2962 * 2963 * Process responses from an SGE response queue up to the supplied budget. 2964 * Responses include received packets as well as credits and other events 2965 * for the queues that belong to the response queue's queue set. 2966 * A negative budget is effectively unlimited. 2967 * 2968 * Additionally choose the interrupt holdoff time for the next interrupt 2969 * on this queue. If the system is under memory shortage use a fairly 2970 * long delay to help recovery. 2971 */ 2972 static int 2973 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2974 { 2975 struct sge_rspq *rspq = &qs->rspq; 2976 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2977 int budget_left = budget; 2978 unsigned int sleeping = 0; 2979 #ifdef LRO_SUPPORTED 2980 int lro_enabled = qs->lro.enabled; 2981 int skip_lro; 2982 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; 2983 #endif 2984 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2985 int ngathered = 0; 2986 #ifdef DEBUG 2987 static int last_holdoff = 0; 2988 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2989 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2990 last_holdoff = rspq->holdoff_tmr; 2991 } 2992 #endif 2993 rspq->next_holdoff = rspq->holdoff_tmr; 2994 2995 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2996 int eth, eop = 0, ethpad = 0; 2997 uint32_t flags = ntohl(r->flags); 2998 uint32_t rss_csum = *(const uint32_t *)r; 2999 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); 3000 3001 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 3002 3003 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 3004 struct mbuf *m; 3005 3006 if (cxgb_debug) 3007 printf("async notification\n"); 3008 3009 if (rspq->rspq_mh.mh_head == NULL) { 3010 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 3011 m = rspq->rspq_mh.mh_head; 3012 } else { 3013 m = m_gethdr(M_DONTWAIT, MT_DATA); 3014 } 3015 if (m == NULL) 3016 goto no_mem; 3017 3018 memcpy(mtod(m, char *), r, AN_PKT_SIZE); 3019 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; 3020 *mtod(m, char *) = CPL_ASYNC_NOTIF; 3021 rss_csum = htonl(CPL_ASYNC_NOTIF << 24); 3022 eop = 1; 3023 rspq->async_notif++; 3024 goto skip; 3025 } else if (flags & F_RSPD_IMM_DATA_VALID) { 3026 struct mbuf *m = NULL; 3027 3028 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", 3029 r->rss_hdr.opcode, rspq->cidx); 3030 if (rspq->rspq_mh.mh_head == NULL) 3031 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 3032 else 3033 m = m_gethdr(M_DONTWAIT, MT_DATA); 3034 3035 if (rspq->rspq_mh.mh_head == NULL && m == NULL) { 3036 no_mem: 3037 rspq->next_holdoff = NOMEM_INTR_DELAY; 3038 budget_left--; 3039 break; 3040 } 3041 get_imm_packet(adap, r, rspq->rspq_mh.mh_head); 3042 eop = 1; 3043 rspq->imm_data++; 3044 } else if (r->len_cq) { 3045 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 3046 3047 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r); 3048 if (eop) { 3049 rspq->rspq_mh.mh_head->m_flags |= M_FLOWID; 3050 rspq->rspq_mh.mh_head->m_pkthdr.flowid = rss_hash; 3051 } 3052 3053 ethpad = 2; 3054 } else { 3055 rspq->pure_rsps++; 3056 } 3057 skip: 3058 if (flags & RSPD_CTRL_MASK) { 3059 sleeping |= flags & RSPD_GTS_MASK; 3060 handle_rsp_cntrl_info(qs, flags); 3061 } 3062 3063 r++; 3064 if (__predict_false(++rspq->cidx == rspq->size)) { 3065 rspq->cidx = 0; 3066 rspq->gen ^= 1; 3067 r = rspq->desc; 3068 } 3069 3070 if (++rspq->credits >= (rspq->size / 4)) { 3071 refill_rspq(adap, rspq, rspq->credits); 3072 rspq->credits = 0; 3073 } 3074 if (!eth && eop) { 3075 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum; 3076 /* 3077 * XXX size mismatch 3078 */ 3079 m_set_priority(rspq->rspq_mh.mh_head, rss_hash); 3080 3081 3082 ngathered = rx_offload(&adap->tdev, rspq, 3083 rspq->rspq_mh.mh_head, offload_mbufs, ngathered); 3084 rspq->rspq_mh.mh_head = NULL; 3085 DPRINTF("received offload packet\n"); 3086 3087 } else if (eth && eop) { 3088 struct mbuf *m = rspq->rspq_mh.mh_head; 3089 3090 t3_rx_eth(adap, rspq, m, ethpad); 3091 3092 #ifdef LRO_SUPPORTED 3093 /* 3094 * The T304 sends incoming packets on any qset. If LRO 3095 * is also enabled, we could end up sending packet up 3096 * lro_ctrl->ifp's input. That is incorrect. 3097 * 3098 * The mbuf's rcvif was derived from the cpl header and 3099 * is accurate. Skip LRO and just use that. 3100 */ 3101 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); 3102 3103 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro && 3104 (tcp_lro_rx(lro_ctrl, m, 0) == 0)) { 3105 /* successfully queue'd for LRO */ 3106 } else 3107 #endif 3108 { 3109 /* 3110 * LRO not enabled, packet unsuitable for LRO, 3111 * or unable to queue. Pass it up right now in 3112 * either case. 3113 */ 3114 struct ifnet *ifp = m->m_pkthdr.rcvif; 3115 (*ifp->if_input)(ifp, m); 3116 } 3117 rspq->rspq_mh.mh_head = NULL; 3118 3119 } 3120 __refill_fl_lt(adap, &qs->fl[0], 32); 3121 __refill_fl_lt(adap, &qs->fl[1], 32); 3122 --budget_left; 3123 } 3124 3125 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 3126 3127 #ifdef LRO_SUPPORTED 3128 /* Flush LRO */ 3129 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) { 3130 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active); 3131 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next); 3132 tcp_lro_flush(lro_ctrl, queued); 3133 } 3134 #endif 3135 3136 if (sleeping) 3137 check_ring_db(adap, qs, sleeping); 3138 3139 mb(); /* commit Tx queue processed updates */ 3140 if (__predict_false(qs->txq_stopped > 1)) 3141 restart_tx(qs); 3142 3143 __refill_fl_lt(adap, &qs->fl[0], 512); 3144 __refill_fl_lt(adap, &qs->fl[1], 512); 3145 budget -= budget_left; 3146 return (budget); 3147 } 3148 3149 /* 3150 * A helper function that processes responses and issues GTS. 3151 */ 3152 static __inline int 3153 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 3154 { 3155 int work; 3156 static int last_holdoff = 0; 3157 3158 work = process_responses(adap, rspq_to_qset(rq), -1); 3159 3160 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 3161 printf("next_holdoff=%d\n", rq->next_holdoff); 3162 last_holdoff = rq->next_holdoff; 3163 } 3164 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 3165 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 3166 3167 return (work); 3168 } 3169 3170 3171 /* 3172 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 3173 * Handles data events from SGE response queues as well as error and other 3174 * async events as they all use the same interrupt pin. We use one SGE 3175 * response queue per port in this mode and protect all response queues with 3176 * queue 0's lock. 3177 */ 3178 void 3179 t3b_intr(void *data) 3180 { 3181 uint32_t i, map; 3182 adapter_t *adap = data; 3183 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3184 3185 t3_write_reg(adap, A_PL_CLI, 0); 3186 map = t3_read_reg(adap, A_SG_DATA_INTR); 3187 3188 if (!map) 3189 return; 3190 3191 if (__predict_false(map & F_ERRINTR)) 3192 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3193 3194 mtx_lock(&q0->lock); 3195 for_each_port(adap, i) 3196 if (map & (1 << i)) 3197 process_responses_gts(adap, &adap->sge.qs[i].rspq); 3198 mtx_unlock(&q0->lock); 3199 } 3200 3201 /* 3202 * The MSI interrupt handler. This needs to handle data events from SGE 3203 * response queues as well as error and other async events as they all use 3204 * the same MSI vector. We use one SGE response queue per port in this mode 3205 * and protect all response queues with queue 0's lock. 3206 */ 3207 void 3208 t3_intr_msi(void *data) 3209 { 3210 adapter_t *adap = data; 3211 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3212 int i, new_packets = 0; 3213 3214 mtx_lock(&q0->lock); 3215 3216 for_each_port(adap, i) 3217 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 3218 new_packets = 1; 3219 mtx_unlock(&q0->lock); 3220 if (new_packets == 0) 3221 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3222 } 3223 3224 void 3225 t3_intr_msix(void *data) 3226 { 3227 struct sge_qset *qs = data; 3228 adapter_t *adap = qs->port->adapter; 3229 struct sge_rspq *rspq = &qs->rspq; 3230 3231 if (process_responses_gts(adap, rspq) == 0) 3232 rspq->unhandled_irqs++; 3233 } 3234 3235 #define QDUMP_SBUF_SIZE 32 * 400 3236 static int 3237 t3_dump_rspq(SYSCTL_HANDLER_ARGS) 3238 { 3239 struct sge_rspq *rspq; 3240 struct sge_qset *qs; 3241 int i, err, dump_end, idx; 3242 static int multiplier = 1; 3243 struct sbuf *sb; 3244 struct rsp_desc *rspd; 3245 uint32_t data[4]; 3246 3247 rspq = arg1; 3248 qs = rspq_to_qset(rspq); 3249 if (rspq->rspq_dump_count == 0) 3250 return (0); 3251 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { 3252 log(LOG_WARNING, 3253 "dump count is too large %d\n", rspq->rspq_dump_count); 3254 rspq->rspq_dump_count = 0; 3255 return (EINVAL); 3256 } 3257 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { 3258 log(LOG_WARNING, 3259 "dump start of %d is greater than queue size\n", 3260 rspq->rspq_dump_start); 3261 rspq->rspq_dump_start = 0; 3262 return (EINVAL); 3263 } 3264 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); 3265 if (err) 3266 return (err); 3267 retry_sbufops: 3268 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3269 3270 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", 3271 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), 3272 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); 3273 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", 3274 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); 3275 3276 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, 3277 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); 3278 3279 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; 3280 for (i = rspq->rspq_dump_start; i < dump_end; i++) { 3281 idx = i & (RSPQ_Q_SIZE-1); 3282 3283 rspd = &rspq->desc[idx]; 3284 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", 3285 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, 3286 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); 3287 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", 3288 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), 3289 be32toh(rspd->len_cq), rspd->intr_gen); 3290 } 3291 if (sbuf_overflowed(sb)) { 3292 sbuf_delete(sb); 3293 multiplier++; 3294 goto retry_sbufops; 3295 } 3296 sbuf_finish(sb); 3297 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3298 sbuf_delete(sb); 3299 return (err); 3300 } 3301 3302 static int 3303 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) 3304 { 3305 struct sge_txq *txq; 3306 struct sge_qset *qs; 3307 int i, j, err, dump_end; 3308 static int multiplier = 1; 3309 struct sbuf *sb; 3310 struct tx_desc *txd; 3311 uint32_t *WR, wr_hi, wr_lo, gen; 3312 uint32_t data[4]; 3313 3314 txq = arg1; 3315 qs = txq_to_qset(txq, TXQ_ETH); 3316 if (txq->txq_dump_count == 0) { 3317 return (0); 3318 } 3319 if (txq->txq_dump_count > TX_ETH_Q_SIZE) { 3320 log(LOG_WARNING, 3321 "dump count is too large %d\n", txq->txq_dump_count); 3322 txq->txq_dump_count = 1; 3323 return (EINVAL); 3324 } 3325 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { 3326 log(LOG_WARNING, 3327 "dump start of %d is greater than queue size\n", 3328 txq->txq_dump_start); 3329 txq->txq_dump_start = 0; 3330 return (EINVAL); 3331 } 3332 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); 3333 if (err) 3334 return (err); 3335 3336 3337 retry_sbufops: 3338 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3339 3340 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", 3341 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 3342 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); 3343 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", 3344 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), 3345 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); 3346 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3347 txq->txq_dump_start, 3348 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); 3349 3350 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3351 for (i = txq->txq_dump_start; i < dump_end; i++) { 3352 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; 3353 WR = (uint32_t *)txd->flit; 3354 wr_hi = ntohl(WR[0]); 3355 wr_lo = ntohl(WR[1]); 3356 gen = G_WR_GEN(wr_lo); 3357 3358 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3359 wr_hi, wr_lo, gen); 3360 for (j = 2; j < 30; j += 4) 3361 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3362 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3363 3364 } 3365 if (sbuf_overflowed(sb)) { 3366 sbuf_delete(sb); 3367 multiplier++; 3368 goto retry_sbufops; 3369 } 3370 sbuf_finish(sb); 3371 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3372 sbuf_delete(sb); 3373 return (err); 3374 } 3375 3376 static int 3377 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) 3378 { 3379 struct sge_txq *txq; 3380 struct sge_qset *qs; 3381 int i, j, err, dump_end; 3382 static int multiplier = 1; 3383 struct sbuf *sb; 3384 struct tx_desc *txd; 3385 uint32_t *WR, wr_hi, wr_lo, gen; 3386 3387 txq = arg1; 3388 qs = txq_to_qset(txq, TXQ_CTRL); 3389 if (txq->txq_dump_count == 0) { 3390 return (0); 3391 } 3392 if (txq->txq_dump_count > 256) { 3393 log(LOG_WARNING, 3394 "dump count is too large %d\n", txq->txq_dump_count); 3395 txq->txq_dump_count = 1; 3396 return (EINVAL); 3397 } 3398 if (txq->txq_dump_start > 255) { 3399 log(LOG_WARNING, 3400 "dump start of %d is greater than queue size\n", 3401 txq->txq_dump_start); 3402 txq->txq_dump_start = 0; 3403 return (EINVAL); 3404 } 3405 3406 retry_sbufops: 3407 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3408 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3409 txq->txq_dump_start, 3410 (txq->txq_dump_start + txq->txq_dump_count) & 255); 3411 3412 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3413 for (i = txq->txq_dump_start; i < dump_end; i++) { 3414 txd = &txq->desc[i & (255)]; 3415 WR = (uint32_t *)txd->flit; 3416 wr_hi = ntohl(WR[0]); 3417 wr_lo = ntohl(WR[1]); 3418 gen = G_WR_GEN(wr_lo); 3419 3420 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3421 wr_hi, wr_lo, gen); 3422 for (j = 2; j < 30; j += 4) 3423 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3424 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3425 3426 } 3427 if (sbuf_overflowed(sb)) { 3428 sbuf_delete(sb); 3429 multiplier++; 3430 goto retry_sbufops; 3431 } 3432 sbuf_finish(sb); 3433 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3434 sbuf_delete(sb); 3435 return (err); 3436 } 3437 3438 static int 3439 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) 3440 { 3441 adapter_t *sc = arg1; 3442 struct qset_params *qsp = &sc->params.sge.qset[0]; 3443 int coalesce_usecs; 3444 struct sge_qset *qs; 3445 int i, j, err, nqsets = 0; 3446 struct mtx *lock; 3447 3448 if ((sc->flags & FULL_INIT_DONE) == 0) 3449 return (ENXIO); 3450 3451 coalesce_usecs = qsp->coalesce_usecs; 3452 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); 3453 3454 if (err != 0) { 3455 return (err); 3456 } 3457 if (coalesce_usecs == qsp->coalesce_usecs) 3458 return (0); 3459 3460 for (i = 0; i < sc->params.nports; i++) 3461 for (j = 0; j < sc->port[i].nqsets; j++) 3462 nqsets++; 3463 3464 coalesce_usecs = max(1, coalesce_usecs); 3465 3466 for (i = 0; i < nqsets; i++) { 3467 qs = &sc->sge.qs[i]; 3468 qsp = &sc->params.sge.qset[i]; 3469 qsp->coalesce_usecs = coalesce_usecs; 3470 3471 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 3472 &sc->sge.qs[0].rspq.lock; 3473 3474 mtx_lock(lock); 3475 t3_update_qset_coalesce(qs, qsp); 3476 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 3477 V_NEWTIMER(qs->rspq.holdoff_tmr)); 3478 mtx_unlock(lock); 3479 } 3480 3481 return (0); 3482 } 3483 3484 3485 void 3486 t3_add_attach_sysctls(adapter_t *sc) 3487 { 3488 struct sysctl_ctx_list *ctx; 3489 struct sysctl_oid_list *children; 3490 3491 ctx = device_get_sysctl_ctx(sc->dev); 3492 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3493 3494 /* random information */ 3495 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3496 "firmware_version", 3497 CTLFLAG_RD, &sc->fw_version, 3498 0, "firmware version"); 3499 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3500 "hw_revision", 3501 CTLFLAG_RD, &sc->params.rev, 3502 0, "chip model"); 3503 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3504 "port_types", 3505 CTLFLAG_RD, &sc->port_types, 3506 0, "type of ports"); 3507 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3508 "enable_debug", 3509 CTLFLAG_RW, &cxgb_debug, 3510 0, "enable verbose debugging output"); 3511 SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce", 3512 CTLFLAG_RD, &sc->tunq_coalesce, 3513 "#tunneled packets freed"); 3514 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3515 "txq_overrun", 3516 CTLFLAG_RD, &txq_fills, 3517 0, "#times txq overrun"); 3518 } 3519 3520 3521 static const char *rspq_name = "rspq"; 3522 static const char *txq_names[] = 3523 { 3524 "txq_eth", 3525 "txq_ofld", 3526 "txq_ctrl" 3527 }; 3528 3529 static int 3530 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) 3531 { 3532 struct port_info *p = arg1; 3533 uint64_t *parg; 3534 3535 if (!p) 3536 return (EINVAL); 3537 3538 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); 3539 PORT_LOCK(p); 3540 t3_mac_update_stats(&p->mac); 3541 PORT_UNLOCK(p); 3542 3543 return (sysctl_handle_quad(oidp, parg, 0, req)); 3544 } 3545 3546 void 3547 t3_add_configured_sysctls(adapter_t *sc) 3548 { 3549 struct sysctl_ctx_list *ctx; 3550 struct sysctl_oid_list *children; 3551 int i, j; 3552 3553 ctx = device_get_sysctl_ctx(sc->dev); 3554 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3555 3556 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3557 "intr_coal", 3558 CTLTYPE_INT|CTLFLAG_RW, sc, 3559 0, t3_set_coalesce_usecs, 3560 "I", "interrupt coalescing timer (us)"); 3561 3562 for (i = 0; i < sc->params.nports; i++) { 3563 struct port_info *pi = &sc->port[i]; 3564 struct sysctl_oid *poid; 3565 struct sysctl_oid_list *poidlist; 3566 struct mac_stats *mstats = &pi->mac.stats; 3567 3568 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); 3569 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 3570 pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); 3571 poidlist = SYSCTL_CHILDREN(poid); 3572 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO, 3573 "nqsets", CTLFLAG_RD, &pi->nqsets, 3574 0, "#queue sets"); 3575 3576 for (j = 0; j < pi->nqsets; j++) { 3577 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; 3578 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, 3579 *ctrlqpoid, *lropoid; 3580 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, 3581 *txqpoidlist, *ctrlqpoidlist, 3582 *lropoidlist; 3583 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 3584 3585 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); 3586 3587 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 3588 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); 3589 qspoidlist = SYSCTL_CHILDREN(qspoid); 3590 3591 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty", 3592 CTLFLAG_RD, &qs->fl[0].empty, 0, 3593 "freelist #0 empty"); 3594 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty", 3595 CTLFLAG_RD, &qs->fl[1].empty, 0, 3596 "freelist #1 empty"); 3597 3598 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3599 rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); 3600 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); 3601 3602 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3603 txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); 3604 txqpoidlist = SYSCTL_CHILDREN(txqpoid); 3605 3606 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3607 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics"); 3608 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); 3609 3610 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3611 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics"); 3612 lropoidlist = SYSCTL_CHILDREN(lropoid); 3613 3614 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", 3615 CTLFLAG_RD, &qs->rspq.size, 3616 0, "#entries in response queue"); 3617 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", 3618 CTLFLAG_RD, &qs->rspq.cidx, 3619 0, "consumer index"); 3620 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", 3621 CTLFLAG_RD, &qs->rspq.credits, 3622 0, "#credits"); 3623 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr", 3624 CTLFLAG_RD, &qs->rspq.phys_addr, 3625 "physical_address_of the queue"); 3626 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", 3627 CTLFLAG_RW, &qs->rspq.rspq_dump_start, 3628 0, "start rspq dump entry"); 3629 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", 3630 CTLFLAG_RW, &qs->rspq.rspq_dump_count, 3631 0, "#rspq entries to dump"); 3632 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", 3633 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 3634 0, t3_dump_rspq, "A", "dump of the response queue"); 3635 3636 3637 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped", 3638 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops, 3639 0, "#tunneled packets dropped"); 3640 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen", 3641 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen, 3642 0, "#tunneled packets waiting to be sent"); 3643 #if 0 3644 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", 3645 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 3646 0, "#tunneled packets queue producer index"); 3647 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", 3648 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 3649 0, "#tunneled packets queue consumer index"); 3650 #endif 3651 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed", 3652 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 3653 0, "#tunneled packets processed by the card"); 3654 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", 3655 CTLFLAG_RD, &txq->cleaned, 3656 0, "#tunneled packets cleaned"); 3657 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", 3658 CTLFLAG_RD, &txq->in_use, 3659 0, "#tunneled packet slots in use"); 3660 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees", 3661 CTLFLAG_RD, &txq->txq_frees, 3662 "#tunneled packets freed"); 3663 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", 3664 CTLFLAG_RD, &txq->txq_skipped, 3665 0, "#tunneled packet descriptors skipped"); 3666 SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced", 3667 CTLFLAG_RD, &txq->txq_coalesced, 3668 "#tunneled packets coalesced"); 3669 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", 3670 CTLFLAG_RD, &txq->txq_enqueued, 3671 0, "#tunneled packets enqueued to hardware"); 3672 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", 3673 CTLFLAG_RD, &qs->txq_stopped, 3674 0, "tx queues stopped"); 3675 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr", 3676 CTLFLAG_RD, &txq->phys_addr, 3677 "physical_address_of the queue"); 3678 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", 3679 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 3680 0, "txq generation"); 3681 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", 3682 CTLFLAG_RD, &txq->cidx, 3683 0, "hardware queue cidx"); 3684 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", 3685 CTLFLAG_RD, &txq->pidx, 3686 0, "hardware queue pidx"); 3687 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", 3688 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 3689 0, "txq start idx for dump"); 3690 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", 3691 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 3692 0, "txq #entries to dump"); 3693 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", 3694 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 3695 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); 3696 3697 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", 3698 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 3699 0, "ctrlq start idx for dump"); 3700 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", 3701 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 3702 0, "ctrl #entries to dump"); 3703 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", 3704 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], 3705 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); 3706 3707 #ifdef LRO_SUPPORTED 3708 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued", 3709 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); 3710 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed", 3711 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); 3712 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", 3713 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); 3714 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", 3715 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); 3716 #endif 3717 } 3718 3719 /* Now add a node for mac stats. */ 3720 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", 3721 CTLFLAG_RD, NULL, "MAC statistics"); 3722 poidlist = SYSCTL_CHILDREN(poid); 3723 3724 /* 3725 * We (ab)use the length argument (arg2) to pass on the offset 3726 * of the data that we are interested in. This is only required 3727 * for the quad counters that are updated from the hardware (we 3728 * make sure that we return the latest value). 3729 * sysctl_handle_macstat first updates *all* the counters from 3730 * the hardware, and then returns the latest value of the 3731 * requested counter. Best would be to update only the 3732 * requested counter from hardware, but t3_mac_update_stats() 3733 * hides all the register details and we don't want to dive into 3734 * all that here. 3735 */ 3736 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ 3737 (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \ 3738 sysctl_handle_macstat, "QU", 0) 3739 CXGB_SYSCTL_ADD_QUAD(tx_octets); 3740 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); 3741 CXGB_SYSCTL_ADD_QUAD(tx_frames); 3742 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); 3743 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); 3744 CXGB_SYSCTL_ADD_QUAD(tx_pause); 3745 CXGB_SYSCTL_ADD_QUAD(tx_deferred); 3746 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); 3747 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); 3748 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); 3749 CXGB_SYSCTL_ADD_QUAD(tx_underrun); 3750 CXGB_SYSCTL_ADD_QUAD(tx_len_errs); 3751 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); 3752 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); 3753 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); 3754 CXGB_SYSCTL_ADD_QUAD(tx_frames_64); 3755 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); 3756 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); 3757 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); 3758 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); 3759 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); 3760 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); 3761 CXGB_SYSCTL_ADD_QUAD(rx_octets); 3762 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); 3763 CXGB_SYSCTL_ADD_QUAD(rx_frames); 3764 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); 3765 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); 3766 CXGB_SYSCTL_ADD_QUAD(rx_pause); 3767 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs); 3768 CXGB_SYSCTL_ADD_QUAD(rx_align_errs); 3769 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); 3770 CXGB_SYSCTL_ADD_QUAD(rx_data_errs); 3771 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); 3772 CXGB_SYSCTL_ADD_QUAD(rx_runt); 3773 CXGB_SYSCTL_ADD_QUAD(rx_jabber); 3774 CXGB_SYSCTL_ADD_QUAD(rx_short); 3775 CXGB_SYSCTL_ADD_QUAD(rx_too_long); 3776 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); 3777 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); 3778 CXGB_SYSCTL_ADD_QUAD(rx_frames_64); 3779 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); 3780 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); 3781 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); 3782 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); 3783 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); 3784 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); 3785 #undef CXGB_SYSCTL_ADD_QUAD 3786 3787 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ 3788 CTLFLAG_RD, &mstats->a, 0) 3789 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); 3790 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); 3791 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); 3792 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); 3793 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); 3794 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); 3795 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); 3796 CXGB_SYSCTL_ADD_ULONG(num_toggled); 3797 CXGB_SYSCTL_ADD_ULONG(num_resets); 3798 CXGB_SYSCTL_ADD_ULONG(link_faults); 3799 #undef CXGB_SYSCTL_ADD_ULONG 3800 } 3801 } 3802 3803 /** 3804 * t3_get_desc - dump an SGE descriptor for debugging purposes 3805 * @qs: the queue set 3806 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 3807 * @idx: the descriptor index in the queue 3808 * @data: where to dump the descriptor contents 3809 * 3810 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 3811 * size of the descriptor. 3812 */ 3813 int 3814 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 3815 unsigned char *data) 3816 { 3817 if (qnum >= 6) 3818 return (EINVAL); 3819 3820 if (qnum < 3) { 3821 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 3822 return -EINVAL; 3823 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 3824 return sizeof(struct tx_desc); 3825 } 3826 3827 if (qnum == 3) { 3828 if (!qs->rspq.desc || idx >= qs->rspq.size) 3829 return (EINVAL); 3830 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 3831 return sizeof(struct rsp_desc); 3832 } 3833 3834 qnum -= 4; 3835 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 3836 return (EINVAL); 3837 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 3838 return sizeof(struct rx_desc); 3839 } 3840