1 /************************************************************************** 2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 4 Copyright (c) 2007-2009, Chelsio Inc. 5 All rights reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 10 1. Redistributions of source code must retain the above copyright notice, 11 this list of conditions and the following disclaimer. 12 13 2. Neither the name of the Chelsio Corporation nor the names of its 14 contributors may be used to endorse or promote products derived from 15 this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 POSSIBILITY OF SUCH DAMAGE. 28 29 ***************************************************************************/ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_inet6.h" 35 #include "opt_inet.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/module.h> 41 #include <sys/bus.h> 42 #include <sys/conf.h> 43 #include <machine/bus.h> 44 #include <machine/resource.h> 45 #include <sys/rman.h> 46 #include <sys/queue.h> 47 #include <sys/sysctl.h> 48 #include <sys/taskqueue.h> 49 50 #include <sys/proc.h> 51 #include <sys/sbuf.h> 52 #include <sys/sched.h> 53 #include <sys/smp.h> 54 #include <sys/systm.h> 55 #include <sys/syslog.h> 56 #include <sys/socket.h> 57 #include <sys/sglist.h> 58 59 #include <net/if.h> 60 #include <net/if_var.h> 61 #include <net/bpf.h> 62 #include <net/ethernet.h> 63 #include <net/if_vlan_var.h> 64 65 #include <netinet/in_systm.h> 66 #include <netinet/in.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip6.h> 69 #include <netinet/tcp.h> 70 71 #include <dev/pci/pcireg.h> 72 #include <dev/pci/pcivar.h> 73 74 #include <vm/vm.h> 75 #include <vm/pmap.h> 76 77 #include <cxgb_include.h> 78 #include <sys/mvec.h> 79 80 int txq_fills = 0; 81 int multiq_tx_enable = 1; 82 83 #ifdef TCP_OFFLOAD 84 CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS); 85 #endif 86 87 extern struct sysctl_oid_list sysctl__hw_cxgb_children; 88 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; 89 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, 90 "size of per-queue mbuf ring"); 91 92 static int cxgb_tx_coalesce_force = 0; 93 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RWTUN, 94 &cxgb_tx_coalesce_force, 0, 95 "coalesce small packets into a single work request regardless of ring state"); 96 97 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1 98 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3)) 99 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2 100 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5 101 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5 102 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2 103 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6 104 105 106 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT; 107 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RWTUN, 108 &cxgb_tx_coalesce_enable_start, 0, 109 "coalesce enable threshold"); 110 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT; 111 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RWTUN, 112 &cxgb_tx_coalesce_enable_stop, 0, 113 "coalesce disable threshold"); 114 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 115 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RWTUN, 116 &cxgb_tx_reclaim_threshold, 0, 117 "tx cleaning minimum threshold"); 118 119 /* 120 * XXX don't re-enable this until TOE stops assuming 121 * we have an m_ext 122 */ 123 static int recycle_enable = 0; 124 125 extern int cxgb_use_16k_clusters; 126 extern int nmbjumbop; 127 extern int nmbjumbo9; 128 extern int nmbjumbo16; 129 130 #define USE_GTS 0 131 132 #define SGE_RX_SM_BUF_SIZE 1536 133 #define SGE_RX_DROP_THRES 16 134 #define SGE_RX_COPY_THRES 128 135 136 /* 137 * Period of the Tx buffer reclaim timer. This timer does not need to run 138 * frequently as Tx buffers are usually reclaimed by new Tx packets. 139 */ 140 #define TX_RECLAIM_PERIOD (hz >> 1) 141 142 /* 143 * Values for sge_txq.flags 144 */ 145 enum { 146 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 147 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 148 }; 149 150 struct tx_desc { 151 uint64_t flit[TX_DESC_FLITS]; 152 } __packed; 153 154 struct rx_desc { 155 uint32_t addr_lo; 156 uint32_t len_gen; 157 uint32_t gen2; 158 uint32_t addr_hi; 159 } __packed; 160 161 struct rsp_desc { /* response queue descriptor */ 162 struct rss_header rss_hdr; 163 uint32_t flags; 164 uint32_t len_cq; 165 uint8_t imm_data[47]; 166 uint8_t intr_gen; 167 } __packed; 168 169 #define RX_SW_DESC_MAP_CREATED (1 << 0) 170 #define TX_SW_DESC_MAP_CREATED (1 << 1) 171 #define RX_SW_DESC_INUSE (1 << 3) 172 #define TX_SW_DESC_MAPPED (1 << 4) 173 174 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 175 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 176 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 177 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 178 179 struct tx_sw_desc { /* SW state per Tx descriptor */ 180 struct mbuf *m; 181 bus_dmamap_t map; 182 int flags; 183 }; 184 185 struct rx_sw_desc { /* SW state per Rx descriptor */ 186 caddr_t rxsd_cl; 187 struct mbuf *m; 188 bus_dmamap_t map; 189 int flags; 190 }; 191 192 struct txq_state { 193 unsigned int compl; 194 unsigned int gen; 195 unsigned int pidx; 196 }; 197 198 struct refill_fl_cb_arg { 199 int error; 200 bus_dma_segment_t seg; 201 int nseg; 202 }; 203 204 205 /* 206 * Maps a number of flits to the number of Tx descriptors that can hold them. 207 * The formula is 208 * 209 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 210 * 211 * HW allows up to 4 descriptors to be combined into a WR. 212 */ 213 static uint8_t flit_desc_map[] = { 214 0, 215 #if SGE_NUM_GENBITS == 1 216 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 217 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 218 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 219 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 220 #elif SGE_NUM_GENBITS == 2 221 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 222 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 223 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 224 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 225 #else 226 # error "SGE_NUM_GENBITS must be 1 or 2" 227 #endif 228 }; 229 230 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED) 231 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock) 232 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock) 233 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock) 234 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 235 #define TXQ_RING_NEEDS_ENQUEUE(qs) \ 236 drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 237 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 238 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \ 239 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg) 240 #define TXQ_RING_DEQUEUE(qs) \ 241 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 242 243 int cxgb_debug = 0; 244 245 static void sge_timer_cb(void *arg); 246 static void sge_timer_reclaim(void *arg, int ncount); 247 static void sge_txq_reclaim_handler(void *arg, int ncount); 248 static void cxgb_start_locked(struct sge_qset *qs); 249 250 /* 251 * XXX need to cope with bursty scheduling by looking at a wider 252 * window than we are now for determining the need for coalescing 253 * 254 */ 255 static __inline uint64_t 256 check_pkt_coalesce(struct sge_qset *qs) 257 { 258 struct adapter *sc; 259 struct sge_txq *txq; 260 uint8_t *fill; 261 262 if (__predict_false(cxgb_tx_coalesce_force)) 263 return (1); 264 txq = &qs->txq[TXQ_ETH]; 265 sc = qs->port->adapter; 266 fill = &sc->tunq_fill[qs->idx]; 267 268 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX) 269 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX; 270 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN) 271 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN; 272 /* 273 * if the hardware transmit queue is more than 1/8 full 274 * we mark it as coalescing - we drop back from coalescing 275 * when we go below 1/32 full and there are no packets enqueued, 276 * this provides us with some degree of hysteresis 277 */ 278 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 279 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0)) 280 *fill = 0; 281 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) 282 *fill = 1; 283 284 return (sc->tunq_coalesce); 285 } 286 287 #ifdef __LP64__ 288 static void 289 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 290 { 291 uint64_t wr_hilo; 292 #if _BYTE_ORDER == _LITTLE_ENDIAN 293 wr_hilo = wr_hi; 294 wr_hilo |= (((uint64_t)wr_lo)<<32); 295 #else 296 wr_hilo = wr_lo; 297 wr_hilo |= (((uint64_t)wr_hi)<<32); 298 #endif 299 wrp->wrh_hilo = wr_hilo; 300 } 301 #else 302 static void 303 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 304 { 305 306 wrp->wrh_hi = wr_hi; 307 wmb(); 308 wrp->wrh_lo = wr_lo; 309 } 310 #endif 311 312 struct coalesce_info { 313 int count; 314 int nbytes; 315 }; 316 317 static int 318 coalesce_check(struct mbuf *m, void *arg) 319 { 320 struct coalesce_info *ci = arg; 321 int *count = &ci->count; 322 int *nbytes = &ci->nbytes; 323 324 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) && 325 (*count < 7) && (m->m_next == NULL))) { 326 *count += 1; 327 *nbytes += m->m_len; 328 return (1); 329 } 330 return (0); 331 } 332 333 static struct mbuf * 334 cxgb_dequeue(struct sge_qset *qs) 335 { 336 struct mbuf *m, *m_head, *m_tail; 337 struct coalesce_info ci; 338 339 340 if (check_pkt_coalesce(qs) == 0) 341 return TXQ_RING_DEQUEUE(qs); 342 343 m_head = m_tail = NULL; 344 ci.count = ci.nbytes = 0; 345 do { 346 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci); 347 if (m_head == NULL) { 348 m_tail = m_head = m; 349 } else if (m != NULL) { 350 m_tail->m_nextpkt = m; 351 m_tail = m; 352 } 353 } while (m != NULL); 354 if (ci.count > 7) 355 panic("trying to coalesce %d packets in to one WR", ci.count); 356 return (m_head); 357 } 358 359 /** 360 * reclaim_completed_tx - reclaims completed Tx descriptors 361 * @adapter: the adapter 362 * @q: the Tx queue to reclaim completed descriptors from 363 * 364 * Reclaims Tx descriptors that the SGE has indicated it has processed, 365 * and frees the associated buffers if possible. Called with the Tx 366 * queue's lock held. 367 */ 368 static __inline int 369 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) 370 { 371 struct sge_txq *q = &qs->txq[queue]; 372 int reclaim = desc_reclaimable(q); 373 374 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) || 375 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN)) 376 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 377 378 if (reclaim < reclaim_min) 379 return (0); 380 381 mtx_assert(&qs->lock, MA_OWNED); 382 if (reclaim > 0) { 383 t3_free_tx_desc(qs, reclaim, queue); 384 q->cleaned += reclaim; 385 q->in_use -= reclaim; 386 } 387 if (isset(&qs->txq_stopped, TXQ_ETH)) 388 clrbit(&qs->txq_stopped, TXQ_ETH); 389 390 return (reclaim); 391 } 392 393 #ifdef DEBUGNET 394 int 395 cxgb_debugnet_poll_tx(struct sge_qset *qs) 396 { 397 398 return (reclaim_completed_tx(qs, TX_RECLAIM_MAX, TXQ_ETH)); 399 } 400 #endif 401 402 /** 403 * should_restart_tx - are there enough resources to restart a Tx queue? 404 * @q: the Tx queue 405 * 406 * Checks if there are enough descriptors to restart a suspended Tx queue. 407 */ 408 static __inline int 409 should_restart_tx(const struct sge_txq *q) 410 { 411 unsigned int r = q->processed - q->cleaned; 412 413 return q->in_use - r < (q->size >> 1); 414 } 415 416 /** 417 * t3_sge_init - initialize SGE 418 * @adap: the adapter 419 * @p: the SGE parameters 420 * 421 * Performs SGE initialization needed every time after a chip reset. 422 * We do not initialize any of the queue sets here, instead the driver 423 * top-level must request those individually. We also do not enable DMA 424 * here, that should be done after the queues have been set up. 425 */ 426 void 427 t3_sge_init(adapter_t *adap, struct sge_params *p) 428 { 429 u_int ctrl, ups; 430 431 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 432 433 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 434 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | 435 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 436 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 437 #if SGE_NUM_GENBITS == 1 438 ctrl |= F_EGRGENCTRL; 439 #endif 440 if (adap->params.rev > 0) { 441 if (!(adap->flags & (USING_MSIX | USING_MSI))) 442 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 443 } 444 t3_write_reg(adap, A_SG_CONTROL, ctrl); 445 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 446 V_LORCQDRBTHRSH(512)); 447 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 448 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 449 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 450 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 451 adap->params.rev < T3_REV_C ? 1000 : 500); 452 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 453 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 454 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 455 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 456 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 457 } 458 459 460 /** 461 * sgl_len - calculates the size of an SGL of the given capacity 462 * @n: the number of SGL entries 463 * 464 * Calculates the number of flits needed for a scatter/gather list that 465 * can hold the given number of entries. 466 */ 467 static __inline unsigned int 468 sgl_len(unsigned int n) 469 { 470 return ((3 * n) / 2 + (n & 1)); 471 } 472 473 /** 474 * get_imm_packet - return the next ingress packet buffer from a response 475 * @resp: the response descriptor containing the packet data 476 * 477 * Return a packet containing the immediate data of the given response. 478 */ 479 static int 480 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) 481 { 482 483 if (resp->rss_hdr.opcode == CPL_RX_DATA) { 484 const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0]; 485 m->m_len = sizeof(*cpl) + ntohs(cpl->len); 486 } else if (resp->rss_hdr.opcode == CPL_RX_PKT) { 487 const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0]; 488 m->m_len = sizeof(*cpl) + ntohs(cpl->len); 489 } else 490 m->m_len = IMMED_PKT_SIZE; 491 m->m_ext.ext_buf = NULL; 492 m->m_ext.ext_type = 0; 493 memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len); 494 return (0); 495 } 496 497 static __inline u_int 498 flits_to_desc(u_int n) 499 { 500 return (flit_desc_map[n]); 501 } 502 503 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ 504 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ 505 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ 506 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ 507 F_HIRCQPARITYERROR) 508 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) 509 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ 510 F_RSPQDISABLED) 511 512 /** 513 * t3_sge_err_intr_handler - SGE async event interrupt handler 514 * @adapter: the adapter 515 * 516 * Interrupt handler for SGE asynchronous (non-data) events. 517 */ 518 void 519 t3_sge_err_intr_handler(adapter_t *adapter) 520 { 521 unsigned int v, status; 522 523 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 524 if (status & SGE_PARERR) 525 CH_ALERT(adapter, "SGE parity error (0x%x)\n", 526 status & SGE_PARERR); 527 if (status & SGE_FRAMINGERR) 528 CH_ALERT(adapter, "SGE framing error (0x%x)\n", 529 status & SGE_FRAMINGERR); 530 if (status & F_RSPQCREDITOVERFOW) 531 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 532 533 if (status & F_RSPQDISABLED) { 534 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 535 536 CH_ALERT(adapter, 537 "packet delivered to disabled response queue (0x%x)\n", 538 (v >> S_RSPQ0DISABLED) & 0xff); 539 } 540 541 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 542 if (status & SGE_FATALERR) 543 t3_fatal_err(adapter); 544 } 545 546 void 547 t3_sge_prep(adapter_t *adap, struct sge_params *p) 548 { 549 int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size; 550 551 nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus); 552 nqsets *= adap->params.nports; 553 554 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); 555 556 while (!powerof2(fl_q_size)) 557 fl_q_size--; 558 559 use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters : 560 is_offload(adap); 561 562 #if __FreeBSD_version >= 700111 563 if (use_16k) { 564 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); 565 jumbo_buf_size = MJUM16BYTES; 566 } else { 567 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); 568 jumbo_buf_size = MJUM9BYTES; 569 } 570 #else 571 jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE); 572 jumbo_buf_size = MJUMPAGESIZE; 573 #endif 574 while (!powerof2(jumbo_q_size)) 575 jumbo_q_size--; 576 577 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2)) 578 device_printf(adap->dev, 579 "Insufficient clusters and/or jumbo buffers.\n"); 580 581 p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data); 582 583 for (i = 0; i < SGE_QSETS; ++i) { 584 struct qset_params *q = p->qset + i; 585 586 if (adap->params.nports > 2) { 587 q->coalesce_usecs = 50; 588 } else { 589 #ifdef INVARIANTS 590 q->coalesce_usecs = 10; 591 #else 592 q->coalesce_usecs = 5; 593 #endif 594 } 595 q->polling = 0; 596 q->rspq_size = RSPQ_Q_SIZE; 597 q->fl_size = fl_q_size; 598 q->jumbo_size = jumbo_q_size; 599 q->jumbo_buf_size = jumbo_buf_size; 600 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 601 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16; 602 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE; 603 q->cong_thres = 0; 604 } 605 } 606 607 int 608 t3_sge_alloc(adapter_t *sc) 609 { 610 611 /* The parent tag. */ 612 if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */ 613 1, 0, /* algnmnt, boundary */ 614 BUS_SPACE_MAXADDR, /* lowaddr */ 615 BUS_SPACE_MAXADDR, /* highaddr */ 616 NULL, NULL, /* filter, filterarg */ 617 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 618 BUS_SPACE_UNRESTRICTED, /* nsegments */ 619 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 620 0, /* flags */ 621 NULL, NULL, /* lock, lockarg */ 622 &sc->parent_dmat)) { 623 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 624 return (ENOMEM); 625 } 626 627 /* 628 * DMA tag for normal sized RX frames 629 */ 630 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 631 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 632 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 633 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 634 return (ENOMEM); 635 } 636 637 /* 638 * DMA tag for jumbo sized RX frames. 639 */ 640 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, 641 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, 642 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 643 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 644 return (ENOMEM); 645 } 646 647 /* 648 * DMA tag for TX frames. 649 */ 650 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 651 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 652 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 653 NULL, NULL, &sc->tx_dmat)) { 654 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 655 return (ENOMEM); 656 } 657 658 return (0); 659 } 660 661 int 662 t3_sge_free(struct adapter * sc) 663 { 664 665 if (sc->tx_dmat != NULL) 666 bus_dma_tag_destroy(sc->tx_dmat); 667 668 if (sc->rx_jumbo_dmat != NULL) 669 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 670 671 if (sc->rx_dmat != NULL) 672 bus_dma_tag_destroy(sc->rx_dmat); 673 674 if (sc->parent_dmat != NULL) 675 bus_dma_tag_destroy(sc->parent_dmat); 676 677 return (0); 678 } 679 680 void 681 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 682 { 683 684 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); 685 qs->rspq.polling = 0 /* p->polling */; 686 } 687 688 #if !defined(__i386__) && !defined(__amd64__) 689 static void 690 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 691 { 692 struct refill_fl_cb_arg *cb_arg = arg; 693 694 cb_arg->error = error; 695 cb_arg->seg = segs[0]; 696 cb_arg->nseg = nseg; 697 698 } 699 #endif 700 /** 701 * refill_fl - refill an SGE free-buffer list 702 * @sc: the controller softc 703 * @q: the free-list to refill 704 * @n: the number of new buffers to allocate 705 * 706 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 707 * The caller must assure that @n does not exceed the queue's capacity. 708 */ 709 static void 710 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 711 { 712 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 713 struct rx_desc *d = &q->desc[q->pidx]; 714 struct refill_fl_cb_arg cb_arg; 715 struct mbuf *m; 716 caddr_t cl; 717 int err; 718 719 cb_arg.error = 0; 720 while (n--) { 721 /* 722 * We allocate an uninitialized mbuf + cluster, mbuf is 723 * initialized after rx. 724 */ 725 if (q->zone == zone_pack) { 726 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL) 727 break; 728 cl = m->m_ext.ext_buf; 729 } else { 730 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL) 731 break; 732 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { 733 uma_zfree(q->zone, cl); 734 break; 735 } 736 } 737 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 738 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 739 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 740 uma_zfree(q->zone, cl); 741 goto done; 742 } 743 sd->flags |= RX_SW_DESC_MAP_CREATED; 744 } 745 #if !defined(__i386__) && !defined(__amd64__) 746 err = bus_dmamap_load(q->entry_tag, sd->map, 747 cl, q->buf_size, refill_fl_cb, &cb_arg, 0); 748 749 if (err != 0 || cb_arg.error) { 750 if (q->zone != zone_pack) 751 uma_zfree(q->zone, cl); 752 m_free(m); 753 goto done; 754 } 755 #else 756 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl); 757 #endif 758 sd->flags |= RX_SW_DESC_INUSE; 759 sd->rxsd_cl = cl; 760 sd->m = m; 761 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 762 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 763 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 764 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 765 766 d++; 767 sd++; 768 769 if (++q->pidx == q->size) { 770 q->pidx = 0; 771 q->gen ^= 1; 772 sd = q->sdesc; 773 d = q->desc; 774 } 775 q->credits++; 776 q->db_pending++; 777 } 778 779 done: 780 if (q->db_pending >= 32) { 781 q->db_pending = 0; 782 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 783 } 784 } 785 786 787 /** 788 * free_rx_bufs - free the Rx buffers on an SGE free list 789 * @sc: the controle softc 790 * @q: the SGE free list to clean up 791 * 792 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 793 * this queue should be stopped before calling this function. 794 */ 795 static void 796 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 797 { 798 u_int cidx = q->cidx; 799 800 while (q->credits--) { 801 struct rx_sw_desc *d = &q->sdesc[cidx]; 802 803 if (d->flags & RX_SW_DESC_INUSE) { 804 bus_dmamap_unload(q->entry_tag, d->map); 805 bus_dmamap_destroy(q->entry_tag, d->map); 806 if (q->zone == zone_pack) { 807 m_init(d->m, M_NOWAIT, MT_DATA, M_EXT); 808 uma_zfree(zone_pack, d->m); 809 } else { 810 m_init(d->m, M_NOWAIT, MT_DATA, 0); 811 uma_zfree(zone_mbuf, d->m); 812 uma_zfree(q->zone, d->rxsd_cl); 813 } 814 } 815 816 d->rxsd_cl = NULL; 817 d->m = NULL; 818 if (++cidx == q->size) 819 cidx = 0; 820 } 821 } 822 823 static __inline void 824 __refill_fl(adapter_t *adap, struct sge_fl *fl) 825 { 826 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 827 } 828 829 static __inline void 830 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) 831 { 832 uint32_t reclaimable = fl->size - fl->credits; 833 834 if (reclaimable > 0) 835 refill_fl(adap, fl, min(max, reclaimable)); 836 } 837 838 /** 839 * recycle_rx_buf - recycle a receive buffer 840 * @adapter: the adapter 841 * @q: the SGE free list 842 * @idx: index of buffer to recycle 843 * 844 * Recycles the specified buffer on the given free list by adding it at 845 * the next available slot on the list. 846 */ 847 static void 848 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 849 { 850 struct rx_desc *from = &q->desc[idx]; 851 struct rx_desc *to = &q->desc[q->pidx]; 852 853 q->sdesc[q->pidx] = q->sdesc[idx]; 854 to->addr_lo = from->addr_lo; // already big endian 855 to->addr_hi = from->addr_hi; // likewise 856 wmb(); /* necessary ? */ 857 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 858 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 859 q->credits++; 860 861 if (++q->pidx == q->size) { 862 q->pidx = 0; 863 q->gen ^= 1; 864 } 865 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 866 } 867 868 static void 869 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 870 { 871 uint32_t *addr; 872 873 addr = arg; 874 *addr = segs[0].ds_addr; 875 } 876 877 static int 878 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 879 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 880 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 881 { 882 size_t len = nelem * elem_size; 883 void *s = NULL; 884 void *p = NULL; 885 int err; 886 887 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 888 BUS_SPACE_MAXADDR_32BIT, 889 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 890 len, 0, NULL, NULL, tag)) != 0) { 891 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 892 return (ENOMEM); 893 } 894 895 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 896 map)) != 0) { 897 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 898 return (ENOMEM); 899 } 900 901 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 902 bzero(p, len); 903 *(void **)desc = p; 904 905 if (sw_size) { 906 len = nelem * sw_size; 907 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 908 *(void **)sdesc = s; 909 } 910 if (parent_entry_tag == NULL) 911 return (0); 912 913 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 914 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 915 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 916 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 917 NULL, NULL, entry_tag)) != 0) { 918 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 919 return (ENOMEM); 920 } 921 return (0); 922 } 923 924 static void 925 sge_slow_intr_handler(void *arg, int ncount) 926 { 927 adapter_t *sc = arg; 928 929 t3_slow_intr_handler(sc); 930 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask); 931 (void) t3_read_reg(sc, A_PL_INT_ENABLE0); 932 } 933 934 /** 935 * sge_timer_cb - perform periodic maintenance of an SGE qset 936 * @data: the SGE queue set to maintain 937 * 938 * Runs periodically from a timer to perform maintenance of an SGE queue 939 * set. It performs two tasks: 940 * 941 * a) Cleans up any completed Tx descriptors that may still be pending. 942 * Normal descriptor cleanup happens when new packets are added to a Tx 943 * queue so this timer is relatively infrequent and does any cleanup only 944 * if the Tx queue has not seen any new packets in a while. We make a 945 * best effort attempt to reclaim descriptors, in that we don't wait 946 * around if we cannot get a queue's lock (which most likely is because 947 * someone else is queueing new packets and so will also handle the clean 948 * up). Since control queues use immediate data exclusively we don't 949 * bother cleaning them up here. 950 * 951 * b) Replenishes Rx queues that have run out due to memory shortage. 952 * Normally new Rx buffers are added when existing ones are consumed but 953 * when out of memory a queue can become empty. We try to add only a few 954 * buffers here, the queue will be replenished fully as these new buffers 955 * are used up if memory shortage has subsided. 956 * 957 * c) Return coalesced response queue credits in case a response queue is 958 * starved. 959 * 960 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 961 * fifo overflows and the FW doesn't implement any recovery scheme yet. 962 */ 963 static void 964 sge_timer_cb(void *arg) 965 { 966 adapter_t *sc = arg; 967 if ((sc->flags & USING_MSIX) == 0) { 968 969 struct port_info *pi; 970 struct sge_qset *qs; 971 struct sge_txq *txq; 972 int i, j; 973 int reclaim_ofl, refill_rx; 974 975 if (sc->open_device_map == 0) 976 return; 977 978 for (i = 0; i < sc->params.nports; i++) { 979 pi = &sc->port[i]; 980 for (j = 0; j < pi->nqsets; j++) { 981 qs = &sc->sge.qs[pi->first_qset + j]; 982 txq = &qs->txq[0]; 983 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 984 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 985 (qs->fl[1].credits < qs->fl[1].size)); 986 if (reclaim_ofl || refill_rx) { 987 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); 988 break; 989 } 990 } 991 } 992 } 993 994 if (sc->params.nports > 2) { 995 int i; 996 997 for_each_port(sc, i) { 998 struct port_info *pi = &sc->port[i]; 999 1000 t3_write_reg(sc, A_SG_KDOORBELL, 1001 F_SELEGRCNTX | 1002 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 1003 } 1004 } 1005 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) && 1006 sc->open_device_map != 0) 1007 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 1008 } 1009 1010 /* 1011 * This is meant to be a catch-all function to keep sge state private 1012 * to sge.c 1013 * 1014 */ 1015 int 1016 t3_sge_init_adapter(adapter_t *sc) 1017 { 1018 callout_init(&sc->sge_timer_ch, 1); 1019 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 1020 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 1021 return (0); 1022 } 1023 1024 int 1025 t3_sge_reset_adapter(adapter_t *sc) 1026 { 1027 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 1028 return (0); 1029 } 1030 1031 int 1032 t3_sge_init_port(struct port_info *pi) 1033 { 1034 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); 1035 return (0); 1036 } 1037 1038 /** 1039 * refill_rspq - replenish an SGE response queue 1040 * @adapter: the adapter 1041 * @q: the response queue to replenish 1042 * @credits: how many new responses to make available 1043 * 1044 * Replenishes a response queue by making the supplied number of responses 1045 * available to HW. 1046 */ 1047 static __inline void 1048 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 1049 { 1050 1051 /* mbufs are allocated on demand when a rspq entry is processed. */ 1052 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 1053 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 1054 } 1055 1056 static void 1057 sge_txq_reclaim_handler(void *arg, int ncount) 1058 { 1059 struct sge_qset *qs = arg; 1060 int i; 1061 1062 for (i = 0; i < 3; i++) 1063 reclaim_completed_tx(qs, 16, i); 1064 } 1065 1066 static void 1067 sge_timer_reclaim(void *arg, int ncount) 1068 { 1069 struct port_info *pi = arg; 1070 int i, nqsets = pi->nqsets; 1071 adapter_t *sc = pi->adapter; 1072 struct sge_qset *qs; 1073 struct mtx *lock; 1074 1075 KASSERT((sc->flags & USING_MSIX) == 0, 1076 ("can't call timer reclaim for msi-x")); 1077 1078 for (i = 0; i < nqsets; i++) { 1079 qs = &sc->sge.qs[pi->first_qset + i]; 1080 1081 reclaim_completed_tx(qs, 16, TXQ_OFLD); 1082 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 1083 &sc->sge.qs[0].rspq.lock; 1084 1085 if (mtx_trylock(lock)) { 1086 /* XXX currently assume that we are *NOT* polling */ 1087 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 1088 1089 if (qs->fl[0].credits < qs->fl[0].size - 16) 1090 __refill_fl(sc, &qs->fl[0]); 1091 if (qs->fl[1].credits < qs->fl[1].size - 16) 1092 __refill_fl(sc, &qs->fl[1]); 1093 1094 if (status & (1 << qs->rspq.cntxt_id)) { 1095 if (qs->rspq.credits) { 1096 refill_rspq(sc, &qs->rspq, 1); 1097 qs->rspq.credits--; 1098 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 1099 1 << qs->rspq.cntxt_id); 1100 } 1101 } 1102 mtx_unlock(lock); 1103 } 1104 } 1105 } 1106 1107 /** 1108 * init_qset_cntxt - initialize an SGE queue set context info 1109 * @qs: the queue set 1110 * @id: the queue set id 1111 * 1112 * Initializes the TIDs and context ids for the queues of a queue set. 1113 */ 1114 static void 1115 init_qset_cntxt(struct sge_qset *qs, u_int id) 1116 { 1117 1118 qs->rspq.cntxt_id = id; 1119 qs->fl[0].cntxt_id = 2 * id; 1120 qs->fl[1].cntxt_id = 2 * id + 1; 1121 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 1122 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 1123 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 1124 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 1125 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 1126 1127 /* XXX: a sane limit is needed instead of INT_MAX */ 1128 mbufq_init(&qs->txq[TXQ_ETH].sendq, INT_MAX); 1129 mbufq_init(&qs->txq[TXQ_OFLD].sendq, INT_MAX); 1130 mbufq_init(&qs->txq[TXQ_CTRL].sendq, INT_MAX); 1131 } 1132 1133 1134 static void 1135 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 1136 { 1137 txq->in_use += ndesc; 1138 /* 1139 * XXX we don't handle stopping of queue 1140 * presumably start handles this when we bump against the end 1141 */ 1142 txqs->gen = txq->gen; 1143 txq->unacked += ndesc; 1144 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); 1145 txq->unacked &= 31; 1146 txqs->pidx = txq->pidx; 1147 txq->pidx += ndesc; 1148 #ifdef INVARIANTS 1149 if (((txqs->pidx > txq->cidx) && 1150 (txq->pidx < txqs->pidx) && 1151 (txq->pidx >= txq->cidx)) || 1152 ((txqs->pidx < txq->cidx) && 1153 (txq->pidx >= txq-> cidx)) || 1154 ((txqs->pidx < txq->cidx) && 1155 (txq->cidx < txqs->pidx))) 1156 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", 1157 txqs->pidx, txq->pidx, txq->cidx); 1158 #endif 1159 if (txq->pidx >= txq->size) { 1160 txq->pidx -= txq->size; 1161 txq->gen ^= 1; 1162 } 1163 1164 } 1165 1166 /** 1167 * calc_tx_descs - calculate the number of Tx descriptors for a packet 1168 * @m: the packet mbufs 1169 * @nsegs: the number of segments 1170 * 1171 * Returns the number of Tx descriptors needed for the given Ethernet 1172 * packet. Ethernet packets require addition of WR and CPL headers. 1173 */ 1174 static __inline unsigned int 1175 calc_tx_descs(const struct mbuf *m, int nsegs) 1176 { 1177 unsigned int flits; 1178 1179 if (m->m_pkthdr.len <= PIO_LEN) 1180 return 1; 1181 1182 flits = sgl_len(nsegs) + 2; 1183 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1184 flits++; 1185 1186 return flits_to_desc(flits); 1187 } 1188 1189 /** 1190 * make_sgl - populate a scatter/gather list for a packet 1191 * @sgp: the SGL to populate 1192 * @segs: the packet dma segments 1193 * @nsegs: the number of segments 1194 * 1195 * Generates a scatter/gather list for the buffers that make up a packet 1196 * and returns the SGL size in 8-byte words. The caller must size the SGL 1197 * appropriately. 1198 */ 1199 static __inline void 1200 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1201 { 1202 int i, idx; 1203 1204 for (idx = 0, i = 0; i < nsegs; i++) { 1205 /* 1206 * firmware doesn't like empty segments 1207 */ 1208 if (segs[i].ds_len == 0) 1209 continue; 1210 if (i && idx == 0) 1211 ++sgp; 1212 1213 sgp->len[idx] = htobe32(segs[i].ds_len); 1214 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1215 idx ^= 1; 1216 } 1217 1218 if (idx) { 1219 sgp->len[idx] = 0; 1220 sgp->addr[idx] = 0; 1221 } 1222 } 1223 1224 /** 1225 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1226 * @adap: the adapter 1227 * @q: the Tx queue 1228 * 1229 * Ring the doorbell if a Tx queue is asleep. There is a natural race, 1230 * where the HW is going to sleep just after we checked, however, 1231 * then the interrupt handler will detect the outstanding TX packet 1232 * and ring the doorbell for us. 1233 * 1234 * When GTS is disabled we unconditionally ring the doorbell. 1235 */ 1236 static __inline void 1237 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring) 1238 { 1239 #if USE_GTS 1240 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1241 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1242 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1243 #ifdef T3_TRACE 1244 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1245 q->cntxt_id); 1246 #endif 1247 t3_write_reg(adap, A_SG_KDOORBELL, 1248 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1249 } 1250 #else 1251 if (mustring || ++q->db_pending >= 32) { 1252 wmb(); /* write descriptors before telling HW */ 1253 t3_write_reg(adap, A_SG_KDOORBELL, 1254 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1255 q->db_pending = 0; 1256 } 1257 #endif 1258 } 1259 1260 static __inline void 1261 wr_gen2(struct tx_desc *d, unsigned int gen) 1262 { 1263 #if SGE_NUM_GENBITS == 2 1264 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1265 #endif 1266 } 1267 1268 /** 1269 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1270 * @ndesc: number of Tx descriptors spanned by the SGL 1271 * @txd: first Tx descriptor to be written 1272 * @txqs: txq state (generation and producer index) 1273 * @txq: the SGE Tx queue 1274 * @sgl: the SGL 1275 * @flits: number of flits to the start of the SGL in the first descriptor 1276 * @sgl_flits: the SGL size in flits 1277 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1278 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1279 * 1280 * Write a work request header and an associated SGL. If the SGL is 1281 * small enough to fit into one Tx descriptor it has already been written 1282 * and we just need to write the WR header. Otherwise we distribute the 1283 * SGL across the number of descriptors it spans. 1284 */ 1285 static void 1286 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1287 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1288 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1289 { 1290 1291 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1292 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1293 1294 if (__predict_true(ndesc == 1)) { 1295 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1296 V_WR_SGLSFLT(flits)) | wr_hi, 1297 htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) | 1298 wr_lo); 1299 1300 wr_gen2(txd, txqs->gen); 1301 1302 } else { 1303 unsigned int ogen = txqs->gen; 1304 const uint64_t *fp = (const uint64_t *)sgl; 1305 struct work_request_hdr *wp = wrp; 1306 1307 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1308 V_WR_SGLSFLT(flits)) | wr_hi; 1309 1310 while (sgl_flits) { 1311 unsigned int avail = WR_FLITS - flits; 1312 1313 if (avail > sgl_flits) 1314 avail = sgl_flits; 1315 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1316 sgl_flits -= avail; 1317 ndesc--; 1318 if (!sgl_flits) 1319 break; 1320 1321 fp += avail; 1322 txd++; 1323 txsd++; 1324 if (++txqs->pidx == txq->size) { 1325 txqs->pidx = 0; 1326 txqs->gen ^= 1; 1327 txd = txq->desc; 1328 txsd = txq->sdesc; 1329 } 1330 1331 /* 1332 * when the head of the mbuf chain 1333 * is freed all clusters will be freed 1334 * with it 1335 */ 1336 wrp = (struct work_request_hdr *)txd; 1337 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) | 1338 V_WR_SGLSFLT(1)) | wr_hi; 1339 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS, 1340 sgl_flits + 1)) | 1341 V_WR_GEN(txqs->gen)) | wr_lo; 1342 wr_gen2(txd, txqs->gen); 1343 flits = 1; 1344 } 1345 wrp->wrh_hi |= htonl(F_WR_EOP); 1346 wmb(); 1347 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1348 wr_gen2((struct tx_desc *)wp, ogen); 1349 } 1350 } 1351 1352 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */ 1353 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20) 1354 1355 #define GET_VTAG(cntrl, m) \ 1356 do { \ 1357 if ((m)->m_flags & M_VLANTAG) \ 1358 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ 1359 } while (0) 1360 1361 static int 1362 t3_encap(struct sge_qset *qs, struct mbuf **m) 1363 { 1364 adapter_t *sc; 1365 struct mbuf *m0; 1366 struct sge_txq *txq; 1367 struct txq_state txqs; 1368 struct port_info *pi; 1369 unsigned int ndesc, flits, cntrl, mlen; 1370 int err, nsegs, tso_info = 0; 1371 1372 struct work_request_hdr *wrp; 1373 struct tx_sw_desc *txsd; 1374 struct sg_ent *sgp, *sgl; 1375 uint32_t wr_hi, wr_lo, sgl_flits; 1376 bus_dma_segment_t segs[TX_MAX_SEGS]; 1377 1378 struct tx_desc *txd; 1379 1380 pi = qs->port; 1381 sc = pi->adapter; 1382 txq = &qs->txq[TXQ_ETH]; 1383 txd = &txq->desc[txq->pidx]; 1384 txsd = &txq->sdesc[txq->pidx]; 1385 sgl = txq->txq_sgl; 1386 1387 prefetch(txd); 1388 m0 = *m; 1389 1390 mtx_assert(&qs->lock, MA_OWNED); 1391 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1392 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n")); 1393 1394 if (m0->m_nextpkt == NULL && m0->m_next != NULL && 1395 m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1396 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1397 1398 if (m0->m_nextpkt != NULL) { 1399 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs); 1400 ndesc = 1; 1401 mlen = 0; 1402 } else { 1403 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map, 1404 &m0, segs, &nsegs))) { 1405 if (cxgb_debug) 1406 printf("failed ... err=%d\n", err); 1407 return (err); 1408 } 1409 mlen = m0->m_pkthdr.len; 1410 ndesc = calc_tx_descs(m0, nsegs); 1411 } 1412 txq_prod(txq, ndesc, &txqs); 1413 1414 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs)); 1415 txsd->m = m0; 1416 1417 if (m0->m_nextpkt != NULL) { 1418 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; 1419 int i, fidx; 1420 1421 if (nsegs > 7) 1422 panic("trying to coalesce %d packets in to one WR", nsegs); 1423 txq->txq_coalesced += nsegs; 1424 wrp = (struct work_request_hdr *)txd; 1425 flits = nsegs*2 + 1; 1426 1427 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) { 1428 struct cpl_tx_pkt_batch_entry *cbe; 1429 uint64_t flit; 1430 uint32_t *hflit = (uint32_t *)&flit; 1431 int cflags = m0->m_pkthdr.csum_flags; 1432 1433 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1434 GET_VTAG(cntrl, m0); 1435 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1436 if (__predict_false(!(cflags & CSUM_IP))) 1437 cntrl |= F_TXPKT_IPCSUM_DIS; 1438 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP | 1439 CSUM_UDP_IPV6 | CSUM_TCP_IPV6)))) 1440 cntrl |= F_TXPKT_L4CSUM_DIS; 1441 1442 hflit[0] = htonl(cntrl); 1443 hflit[1] = htonl(segs[i].ds_len | 0x80000000); 1444 flit |= htobe64(1 << 24); 1445 cbe = &cpl_batch->pkt_entry[i]; 1446 cbe->cntrl = hflit[0]; 1447 cbe->len = hflit[1]; 1448 cbe->addr = htobe64(segs[i].ds_addr); 1449 } 1450 1451 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1452 V_WR_SGLSFLT(flits)) | 1453 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1454 wr_lo = htonl(V_WR_LEN(flits) | 1455 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); 1456 set_wr_hdr(wrp, wr_hi, wr_lo); 1457 wmb(); 1458 ETHER_BPF_MTAP(pi->ifp, m0); 1459 wr_gen2(txd, txqs.gen); 1460 check_ring_tx_db(sc, txq, 0); 1461 return (0); 1462 } else if (tso_info) { 1463 uint16_t eth_type; 1464 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; 1465 struct ether_header *eh; 1466 void *l3hdr; 1467 struct tcphdr *tcp; 1468 1469 txd->flit[2] = 0; 1470 GET_VTAG(cntrl, m0); 1471 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1472 hdr->cntrl = htonl(cntrl); 1473 hdr->len = htonl(mlen | 0x80000000); 1474 1475 if (__predict_false(mlen < TCPPKTHDRSIZE)) { 1476 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x", 1477 m0, mlen, m0->m_pkthdr.tso_segsz, 1478 (int)m0->m_pkthdr.csum_flags, CSUM_BITS, m0->m_flags); 1479 panic("tx tso packet too small"); 1480 } 1481 1482 /* Make sure that ether, ip, tcp headers are all in m0 */ 1483 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1484 m0 = m_pullup(m0, TCPPKTHDRSIZE); 1485 if (__predict_false(m0 == NULL)) { 1486 /* XXX panic probably an overreaction */ 1487 panic("couldn't fit header into mbuf"); 1488 } 1489 } 1490 1491 eh = mtod(m0, struct ether_header *); 1492 eth_type = eh->ether_type; 1493 if (eth_type == htons(ETHERTYPE_VLAN)) { 1494 struct ether_vlan_header *evh = (void *)eh; 1495 1496 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN); 1497 l3hdr = evh + 1; 1498 eth_type = evh->evl_proto; 1499 } else { 1500 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II); 1501 l3hdr = eh + 1; 1502 } 1503 1504 if (eth_type == htons(ETHERTYPE_IP)) { 1505 struct ip *ip = l3hdr; 1506 1507 tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl); 1508 tcp = (struct tcphdr *)(ip + 1); 1509 } else if (eth_type == htons(ETHERTYPE_IPV6)) { 1510 struct ip6_hdr *ip6 = l3hdr; 1511 1512 KASSERT(ip6->ip6_nxt == IPPROTO_TCP, 1513 ("%s: CSUM_TSO with ip6_nxt %d", 1514 __func__, ip6->ip6_nxt)); 1515 1516 tso_info |= F_LSO_IPV6; 1517 tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2); 1518 tcp = (struct tcphdr *)(ip6 + 1); 1519 } else 1520 panic("%s: CSUM_TSO but neither ip nor ip6", __func__); 1521 1522 tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off); 1523 hdr->lso_info = htonl(tso_info); 1524 1525 if (__predict_false(mlen <= PIO_LEN)) { 1526 /* 1527 * pkt not undersized but fits in PIO_LEN 1528 * Indicates a TSO bug at the higher levels. 1529 */ 1530 txsd->m = NULL; 1531 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); 1532 flits = (mlen + 7) / 8 + 3; 1533 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1534 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1535 F_WR_SOP | F_WR_EOP | txqs.compl); 1536 wr_lo = htonl(V_WR_LEN(flits) | 1537 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1538 set_wr_hdr(&hdr->wr, wr_hi, wr_lo); 1539 wmb(); 1540 ETHER_BPF_MTAP(pi->ifp, m0); 1541 wr_gen2(txd, txqs.gen); 1542 check_ring_tx_db(sc, txq, 0); 1543 m_freem(m0); 1544 return (0); 1545 } 1546 flits = 3; 1547 } else { 1548 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; 1549 1550 GET_VTAG(cntrl, m0); 1551 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1552 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) 1553 cntrl |= F_TXPKT_IPCSUM_DIS; 1554 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | 1555 CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6)))) 1556 cntrl |= F_TXPKT_L4CSUM_DIS; 1557 cpl->cntrl = htonl(cntrl); 1558 cpl->len = htonl(mlen | 0x80000000); 1559 1560 if (mlen <= PIO_LEN) { 1561 txsd->m = NULL; 1562 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1563 flits = (mlen + 7) / 8 + 2; 1564 1565 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1566 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1567 F_WR_SOP | F_WR_EOP | txqs.compl); 1568 wr_lo = htonl(V_WR_LEN(flits) | 1569 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1570 set_wr_hdr(&cpl->wr, wr_hi, wr_lo); 1571 wmb(); 1572 ETHER_BPF_MTAP(pi->ifp, m0); 1573 wr_gen2(txd, txqs.gen); 1574 check_ring_tx_db(sc, txq, 0); 1575 m_freem(m0); 1576 return (0); 1577 } 1578 flits = 2; 1579 } 1580 wrp = (struct work_request_hdr *)txd; 1581 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1582 make_sgl(sgp, segs, nsegs); 1583 1584 sgl_flits = sgl_len(nsegs); 1585 1586 ETHER_BPF_MTAP(pi->ifp, m0); 1587 1588 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc)); 1589 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1590 wr_lo = htonl(V_WR_TID(txq->token)); 1591 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, 1592 sgl_flits, wr_hi, wr_lo); 1593 check_ring_tx_db(sc, txq, 0); 1594 1595 return (0); 1596 } 1597 1598 #ifdef DEBUGNET 1599 int 1600 cxgb_debugnet_encap(struct sge_qset *qs, struct mbuf **m) 1601 { 1602 int error; 1603 1604 error = t3_encap(qs, m); 1605 if (error == 0) 1606 check_ring_tx_db(qs->port->adapter, &qs->txq[TXQ_ETH], 1); 1607 else if (*m != NULL) { 1608 m_freem(*m); 1609 *m = NULL; 1610 } 1611 return (error); 1612 } 1613 #endif 1614 1615 void 1616 cxgb_tx_watchdog(void *arg) 1617 { 1618 struct sge_qset *qs = arg; 1619 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1620 1621 if (qs->coalescing != 0 && 1622 (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 1623 TXQ_RING_EMPTY(qs)) 1624 qs->coalescing = 0; 1625 else if (qs->coalescing == 0 && 1626 (txq->in_use >= cxgb_tx_coalesce_enable_start)) 1627 qs->coalescing = 1; 1628 if (TXQ_TRYLOCK(qs)) { 1629 qs->qs_flags |= QS_FLUSHING; 1630 cxgb_start_locked(qs); 1631 qs->qs_flags &= ~QS_FLUSHING; 1632 TXQ_UNLOCK(qs); 1633 } 1634 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) 1635 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog, 1636 qs, txq->txq_watchdog.c_cpu); 1637 } 1638 1639 static void 1640 cxgb_tx_timeout(void *arg) 1641 { 1642 struct sge_qset *qs = arg; 1643 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1644 1645 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3))) 1646 qs->coalescing = 1; 1647 if (TXQ_TRYLOCK(qs)) { 1648 qs->qs_flags |= QS_TIMEOUT; 1649 cxgb_start_locked(qs); 1650 qs->qs_flags &= ~QS_TIMEOUT; 1651 TXQ_UNLOCK(qs); 1652 } 1653 } 1654 1655 static void 1656 cxgb_start_locked(struct sge_qset *qs) 1657 { 1658 struct mbuf *m_head = NULL; 1659 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1660 struct port_info *pi = qs->port; 1661 struct ifnet *ifp = pi->ifp; 1662 1663 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT)) 1664 reclaim_completed_tx(qs, 0, TXQ_ETH); 1665 1666 if (!pi->link_config.link_ok) { 1667 TXQ_RING_FLUSH(qs); 1668 return; 1669 } 1670 TXQ_LOCK_ASSERT(qs); 1671 while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) && 1672 pi->link_config.link_ok) { 1673 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1674 1675 if (txq->size - txq->in_use <= TX_MAX_DESC) 1676 break; 1677 1678 if ((m_head = cxgb_dequeue(qs)) == NULL) 1679 break; 1680 /* 1681 * Encapsulation can modify our pointer, and or make it 1682 * NULL on failure. In that event, we can't requeue. 1683 */ 1684 if (t3_encap(qs, &m_head) || m_head == NULL) 1685 break; 1686 1687 m_head = NULL; 1688 } 1689 1690 if (txq->db_pending) 1691 check_ring_tx_db(pi->adapter, txq, 1); 1692 1693 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && 1694 pi->link_config.link_ok) 1695 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1696 qs, txq->txq_timer.c_cpu); 1697 if (m_head != NULL) 1698 m_freem(m_head); 1699 } 1700 1701 static int 1702 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m) 1703 { 1704 struct port_info *pi = qs->port; 1705 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1706 struct buf_ring *br = txq->txq_mr; 1707 int error, avail; 1708 1709 avail = txq->size - txq->in_use; 1710 TXQ_LOCK_ASSERT(qs); 1711 1712 /* 1713 * We can only do a direct transmit if the following are true: 1714 * - we aren't coalescing (ring < 3/4 full) 1715 * - the link is up -- checked in caller 1716 * - there are no packets enqueued already 1717 * - there is space in hardware transmit queue 1718 */ 1719 if (check_pkt_coalesce(qs) == 0 && 1720 !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) { 1721 if (t3_encap(qs, &m)) { 1722 if (m != NULL && 1723 (error = drbr_enqueue(ifp, br, m)) != 0) 1724 return (error); 1725 } else { 1726 if (txq->db_pending) 1727 check_ring_tx_db(pi->adapter, txq, 1); 1728 1729 /* 1730 * We've bypassed the buf ring so we need to update 1731 * the stats directly 1732 */ 1733 txq->txq_direct_packets++; 1734 txq->txq_direct_bytes += m->m_pkthdr.len; 1735 } 1736 } else if ((error = drbr_enqueue(ifp, br, m)) != 0) 1737 return (error); 1738 1739 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1740 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && 1741 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) 1742 cxgb_start_locked(qs); 1743 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) 1744 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1745 qs, txq->txq_timer.c_cpu); 1746 return (0); 1747 } 1748 1749 int 1750 cxgb_transmit(struct ifnet *ifp, struct mbuf *m) 1751 { 1752 struct sge_qset *qs; 1753 struct port_info *pi = ifp->if_softc; 1754 int error, qidx = pi->first_qset; 1755 1756 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 1757 ||(!pi->link_config.link_ok)) { 1758 m_freem(m); 1759 return (0); 1760 } 1761 1762 /* check if flowid is set */ 1763 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 1764 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset; 1765 1766 qs = &pi->adapter->sge.qs[qidx]; 1767 1768 if (TXQ_TRYLOCK(qs)) { 1769 /* XXX running */ 1770 error = cxgb_transmit_locked(ifp, qs, m); 1771 TXQ_UNLOCK(qs); 1772 } else 1773 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); 1774 return (error); 1775 } 1776 1777 void 1778 cxgb_qflush(struct ifnet *ifp) 1779 { 1780 /* 1781 * flush any enqueued mbufs in the buf_rings 1782 * and in the transmit queues 1783 * no-op for now 1784 */ 1785 return; 1786 } 1787 1788 /** 1789 * write_imm - write a packet into a Tx descriptor as immediate data 1790 * @d: the Tx descriptor to write 1791 * @m: the packet 1792 * @len: the length of packet data to write as immediate data 1793 * @gen: the generation bit value to write 1794 * 1795 * Writes a packet as immediate data into a Tx descriptor. The packet 1796 * contains a work request at its beginning. We must write the packet 1797 * carefully so the SGE doesn't read accidentally before it's written in 1798 * its entirety. 1799 */ 1800 static __inline void 1801 write_imm(struct tx_desc *d, caddr_t src, 1802 unsigned int len, unsigned int gen) 1803 { 1804 struct work_request_hdr *from = (struct work_request_hdr *)src; 1805 struct work_request_hdr *to = (struct work_request_hdr *)d; 1806 uint32_t wr_hi, wr_lo; 1807 1808 KASSERT(len <= WR_LEN && len >= sizeof(*from), 1809 ("%s: invalid len %d", __func__, len)); 1810 1811 memcpy(&to[1], &from[1], len - sizeof(*from)); 1812 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | 1813 V_WR_BCNTLFLT(len & 7)); 1814 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8)); 1815 set_wr_hdr(to, wr_hi, wr_lo); 1816 wmb(); 1817 wr_gen2(d, gen); 1818 } 1819 1820 /** 1821 * check_desc_avail - check descriptor availability on a send queue 1822 * @adap: the adapter 1823 * @q: the TX queue 1824 * @m: the packet needing the descriptors 1825 * @ndesc: the number of Tx descriptors needed 1826 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1827 * 1828 * Checks if the requested number of Tx descriptors is available on an 1829 * SGE send queue. If the queue is already suspended or not enough 1830 * descriptors are available the packet is queued for later transmission. 1831 * Must be called with the Tx queue locked. 1832 * 1833 * Returns 0 if enough descriptors are available, 1 if there aren't 1834 * enough descriptors and the packet has been queued, and 2 if the caller 1835 * needs to retry because there weren't enough descriptors at the 1836 * beginning of the call but some freed up in the mean time. 1837 */ 1838 static __inline int 1839 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1840 struct mbuf *m, unsigned int ndesc, 1841 unsigned int qid) 1842 { 1843 /* 1844 * XXX We currently only use this for checking the control queue 1845 * the control queue is only used for binding qsets which happens 1846 * at init time so we are guaranteed enough descriptors 1847 */ 1848 if (__predict_false(mbufq_len(&q->sendq))) { 1849 addq_exit: (void )mbufq_enqueue(&q->sendq, m); 1850 return 1; 1851 } 1852 if (__predict_false(q->size - q->in_use < ndesc)) { 1853 1854 struct sge_qset *qs = txq_to_qset(q, qid); 1855 1856 setbit(&qs->txq_stopped, qid); 1857 if (should_restart_tx(q) && 1858 test_and_clear_bit(qid, &qs->txq_stopped)) 1859 return 2; 1860 1861 q->stops++; 1862 goto addq_exit; 1863 } 1864 return 0; 1865 } 1866 1867 1868 /** 1869 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1870 * @q: the SGE control Tx queue 1871 * 1872 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1873 * that send only immediate data (presently just the control queues) and 1874 * thus do not have any mbufs 1875 */ 1876 static __inline void 1877 reclaim_completed_tx_imm(struct sge_txq *q) 1878 { 1879 unsigned int reclaim = q->processed - q->cleaned; 1880 1881 q->in_use -= reclaim; 1882 q->cleaned += reclaim; 1883 } 1884 1885 /** 1886 * ctrl_xmit - send a packet through an SGE control Tx queue 1887 * @adap: the adapter 1888 * @q: the control queue 1889 * @m: the packet 1890 * 1891 * Send a packet through an SGE control Tx queue. Packets sent through 1892 * a control queue must fit entirely as immediate data in a single Tx 1893 * descriptor and have no page fragments. 1894 */ 1895 static int 1896 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 1897 { 1898 int ret; 1899 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1900 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1901 1902 KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__)); 1903 1904 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP); 1905 wrp->wrh_lo = htonl(V_WR_TID(q->token)); 1906 1907 TXQ_LOCK(qs); 1908 again: reclaim_completed_tx_imm(q); 1909 1910 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1911 if (__predict_false(ret)) { 1912 if (ret == 1) { 1913 TXQ_UNLOCK(qs); 1914 return (ENOSPC); 1915 } 1916 goto again; 1917 } 1918 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen); 1919 1920 q->in_use++; 1921 if (++q->pidx >= q->size) { 1922 q->pidx = 0; 1923 q->gen ^= 1; 1924 } 1925 TXQ_UNLOCK(qs); 1926 wmb(); 1927 t3_write_reg(adap, A_SG_KDOORBELL, 1928 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1929 1930 m_free(m); 1931 return (0); 1932 } 1933 1934 1935 /** 1936 * restart_ctrlq - restart a suspended control queue 1937 * @qs: the queue set cotaining the control queue 1938 * 1939 * Resumes transmission on a suspended Tx control queue. 1940 */ 1941 static void 1942 restart_ctrlq(void *data, int npending) 1943 { 1944 struct mbuf *m; 1945 struct sge_qset *qs = (struct sge_qset *)data; 1946 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1947 adapter_t *adap = qs->port->adapter; 1948 1949 TXQ_LOCK(qs); 1950 again: reclaim_completed_tx_imm(q); 1951 1952 while (q->in_use < q->size && 1953 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1954 1955 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen); 1956 m_free(m); 1957 1958 if (++q->pidx >= q->size) { 1959 q->pidx = 0; 1960 q->gen ^= 1; 1961 } 1962 q->in_use++; 1963 } 1964 if (mbufq_len(&q->sendq)) { 1965 setbit(&qs->txq_stopped, TXQ_CTRL); 1966 1967 if (should_restart_tx(q) && 1968 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1969 goto again; 1970 q->stops++; 1971 } 1972 TXQ_UNLOCK(qs); 1973 t3_write_reg(adap, A_SG_KDOORBELL, 1974 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1975 } 1976 1977 1978 /* 1979 * Send a management message through control queue 0 1980 */ 1981 int 1982 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1983 { 1984 return ctrl_xmit(adap, &adap->sge.qs[0], m); 1985 } 1986 1987 /** 1988 * free_qset - free the resources of an SGE queue set 1989 * @sc: the controller owning the queue set 1990 * @q: the queue set 1991 * 1992 * Release the HW and SW resources associated with an SGE queue set, such 1993 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1994 * queue set must be quiesced prior to calling this. 1995 */ 1996 static void 1997 t3_free_qset(adapter_t *sc, struct sge_qset *q) 1998 { 1999 int i; 2000 2001 reclaim_completed_tx(q, 0, TXQ_ETH); 2002 if (q->txq[TXQ_ETH].txq_mr != NULL) 2003 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF); 2004 if (q->txq[TXQ_ETH].txq_ifq != NULL) { 2005 ifq_delete(q->txq[TXQ_ETH].txq_ifq); 2006 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF); 2007 } 2008 2009 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2010 if (q->fl[i].desc) { 2011 mtx_lock_spin(&sc->sge.reg_lock); 2012 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 2013 mtx_unlock_spin(&sc->sge.reg_lock); 2014 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 2015 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 2016 q->fl[i].desc_map); 2017 bus_dma_tag_destroy(q->fl[i].desc_tag); 2018 bus_dma_tag_destroy(q->fl[i].entry_tag); 2019 } 2020 if (q->fl[i].sdesc) { 2021 free_rx_bufs(sc, &q->fl[i]); 2022 free(q->fl[i].sdesc, M_DEVBUF); 2023 } 2024 } 2025 2026 mtx_unlock(&q->lock); 2027 MTX_DESTROY(&q->lock); 2028 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2029 if (q->txq[i].desc) { 2030 mtx_lock_spin(&sc->sge.reg_lock); 2031 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 2032 mtx_unlock_spin(&sc->sge.reg_lock); 2033 bus_dmamap_unload(q->txq[i].desc_tag, 2034 q->txq[i].desc_map); 2035 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 2036 q->txq[i].desc_map); 2037 bus_dma_tag_destroy(q->txq[i].desc_tag); 2038 bus_dma_tag_destroy(q->txq[i].entry_tag); 2039 } 2040 if (q->txq[i].sdesc) { 2041 free(q->txq[i].sdesc, M_DEVBUF); 2042 } 2043 } 2044 2045 if (q->rspq.desc) { 2046 mtx_lock_spin(&sc->sge.reg_lock); 2047 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 2048 mtx_unlock_spin(&sc->sge.reg_lock); 2049 2050 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 2051 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 2052 q->rspq.desc_map); 2053 bus_dma_tag_destroy(q->rspq.desc_tag); 2054 MTX_DESTROY(&q->rspq.lock); 2055 } 2056 2057 #if defined(INET6) || defined(INET) 2058 tcp_lro_free(&q->lro.ctrl); 2059 #endif 2060 2061 bzero(q, sizeof(*q)); 2062 } 2063 2064 /** 2065 * t3_free_sge_resources - free SGE resources 2066 * @sc: the adapter softc 2067 * 2068 * Frees resources used by the SGE queue sets. 2069 */ 2070 void 2071 t3_free_sge_resources(adapter_t *sc, int nqsets) 2072 { 2073 int i; 2074 2075 for (i = 0; i < nqsets; ++i) { 2076 TXQ_LOCK(&sc->sge.qs[i]); 2077 t3_free_qset(sc, &sc->sge.qs[i]); 2078 } 2079 } 2080 2081 /** 2082 * t3_sge_start - enable SGE 2083 * @sc: the controller softc 2084 * 2085 * Enables the SGE for DMAs. This is the last step in starting packet 2086 * transfers. 2087 */ 2088 void 2089 t3_sge_start(adapter_t *sc) 2090 { 2091 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 2092 } 2093 2094 /** 2095 * t3_sge_stop - disable SGE operation 2096 * @sc: the adapter 2097 * 2098 * Disables the DMA engine. This can be called in emeregencies (e.g., 2099 * from error interrupts) or from normal process context. In the latter 2100 * case it also disables any pending queue restart tasklets. Note that 2101 * if it is called in interrupt context it cannot disable the restart 2102 * tasklets as it cannot wait, however the tasklets will have no effect 2103 * since the doorbells are disabled and the driver will call this again 2104 * later from process context, at which time the tasklets will be stopped 2105 * if they are still running. 2106 */ 2107 void 2108 t3_sge_stop(adapter_t *sc) 2109 { 2110 int i, nqsets; 2111 2112 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 2113 2114 if (sc->tq == NULL) 2115 return; 2116 2117 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2118 nqsets += sc->port[i].nqsets; 2119 #ifdef notyet 2120 /* 2121 * 2122 * XXX 2123 */ 2124 for (i = 0; i < nqsets; ++i) { 2125 struct sge_qset *qs = &sc->sge.qs[i]; 2126 2127 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2128 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2129 } 2130 #endif 2131 } 2132 2133 /** 2134 * t3_free_tx_desc - reclaims Tx descriptors and their buffers 2135 * @adapter: the adapter 2136 * @q: the Tx queue to reclaim descriptors from 2137 * @reclaimable: the number of descriptors to reclaim 2138 * @m_vec_size: maximum number of buffers to reclaim 2139 * @desc_reclaimed: returns the number of descriptors reclaimed 2140 * 2141 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 2142 * Tx buffers. Called with the Tx queue lock held. 2143 * 2144 * Returns number of buffers of reclaimed 2145 */ 2146 void 2147 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue) 2148 { 2149 struct tx_sw_desc *txsd; 2150 unsigned int cidx, mask; 2151 struct sge_txq *q = &qs->txq[queue]; 2152 2153 #ifdef T3_TRACE 2154 T3_TRACE2(sc->tb[q->cntxt_id & 7], 2155 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); 2156 #endif 2157 cidx = q->cidx; 2158 mask = q->size - 1; 2159 txsd = &q->sdesc[cidx]; 2160 2161 mtx_assert(&qs->lock, MA_OWNED); 2162 while (reclaimable--) { 2163 prefetch(q->sdesc[(cidx + 1) & mask].m); 2164 prefetch(q->sdesc[(cidx + 2) & mask].m); 2165 2166 if (txsd->m != NULL) { 2167 if (txsd->flags & TX_SW_DESC_MAPPED) { 2168 bus_dmamap_unload(q->entry_tag, txsd->map); 2169 txsd->flags &= ~TX_SW_DESC_MAPPED; 2170 } 2171 m_freem_list(txsd->m); 2172 txsd->m = NULL; 2173 } else 2174 q->txq_skipped++; 2175 2176 ++txsd; 2177 if (++cidx == q->size) { 2178 cidx = 0; 2179 txsd = q->sdesc; 2180 } 2181 } 2182 q->cidx = cidx; 2183 2184 } 2185 2186 /** 2187 * is_new_response - check if a response is newly written 2188 * @r: the response descriptor 2189 * @q: the response queue 2190 * 2191 * Returns true if a response descriptor contains a yet unprocessed 2192 * response. 2193 */ 2194 static __inline int 2195 is_new_response(const struct rsp_desc *r, 2196 const struct sge_rspq *q) 2197 { 2198 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 2199 } 2200 2201 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 2202 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 2203 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 2204 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 2205 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 2206 2207 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 2208 #define NOMEM_INTR_DELAY 2500 2209 2210 #ifdef TCP_OFFLOAD 2211 /** 2212 * write_ofld_wr - write an offload work request 2213 * @adap: the adapter 2214 * @m: the packet to send 2215 * @q: the Tx queue 2216 * @pidx: index of the first Tx descriptor to write 2217 * @gen: the generation value to use 2218 * @ndesc: number of descriptors the packet will occupy 2219 * 2220 * Write an offload work request to send the supplied packet. The packet 2221 * data already carry the work request with most fields populated. 2222 */ 2223 static void 2224 write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q, 2225 unsigned int pidx, unsigned int gen, unsigned int ndesc) 2226 { 2227 unsigned int sgl_flits, flits; 2228 int i, idx, nsegs, wrlen; 2229 struct work_request_hdr *from; 2230 struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1]; 2231 struct tx_desc *d = &q->desc[pidx]; 2232 struct txq_state txqs; 2233 struct sglist_seg *segs; 2234 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); 2235 struct sglist *sgl; 2236 2237 from = (void *)(oh + 1); /* Start of WR within mbuf */ 2238 wrlen = m->m_len - sizeof(*oh); 2239 2240 if (!(oh->flags & F_HDR_SGL)) { 2241 write_imm(d, (caddr_t)from, wrlen, gen); 2242 2243 /* 2244 * mbuf with "real" immediate tx data will be enqueue_wr'd by 2245 * t3_push_frames and freed in wr_ack. Others, like those sent 2246 * down by close_conn, t3_send_reset, etc. should be freed here. 2247 */ 2248 if (!(oh->flags & F_HDR_DF)) 2249 m_free(m); 2250 return; 2251 } 2252 2253 memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from)); 2254 2255 sgl = oh->sgl; 2256 flits = wrlen / 8; 2257 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl; 2258 2259 nsegs = sgl->sg_nseg; 2260 segs = sgl->sg_segs; 2261 for (idx = 0, i = 0; i < nsegs; i++) { 2262 KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__)); 2263 if (i && idx == 0) 2264 ++sgp; 2265 sgp->len[idx] = htobe32(segs[i].ss_len); 2266 sgp->addr[idx] = htobe64(segs[i].ss_paddr); 2267 idx ^= 1; 2268 } 2269 if (idx) { 2270 sgp->len[idx] = 0; 2271 sgp->addr[idx] = 0; 2272 } 2273 2274 sgl_flits = sgl_len(nsegs); 2275 txqs.gen = gen; 2276 txqs.pidx = pidx; 2277 txqs.compl = 0; 2278 2279 write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits, 2280 from->wrh_hi, from->wrh_lo); 2281 } 2282 2283 /** 2284 * ofld_xmit - send a packet through an offload queue 2285 * @adap: the adapter 2286 * @q: the Tx offload queue 2287 * @m: the packet 2288 * 2289 * Send an offload packet through an SGE offload queue. 2290 */ 2291 static int 2292 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 2293 { 2294 int ret; 2295 unsigned int ndesc; 2296 unsigned int pidx, gen; 2297 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2298 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); 2299 2300 ndesc = G_HDR_NDESC(oh->flags); 2301 2302 TXQ_LOCK(qs); 2303 again: reclaim_completed_tx(qs, 16, TXQ_OFLD); 2304 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 2305 if (__predict_false(ret)) { 2306 if (ret == 1) { 2307 TXQ_UNLOCK(qs); 2308 return (EINTR); 2309 } 2310 goto again; 2311 } 2312 2313 gen = q->gen; 2314 q->in_use += ndesc; 2315 pidx = q->pidx; 2316 q->pidx += ndesc; 2317 if (q->pidx >= q->size) { 2318 q->pidx -= q->size; 2319 q->gen ^= 1; 2320 } 2321 2322 write_ofld_wr(adap, m, q, pidx, gen, ndesc); 2323 check_ring_tx_db(adap, q, 1); 2324 TXQ_UNLOCK(qs); 2325 2326 return (0); 2327 } 2328 2329 /** 2330 * restart_offloadq - restart a suspended offload queue 2331 * @qs: the queue set cotaining the offload queue 2332 * 2333 * Resumes transmission on a suspended Tx offload queue. 2334 */ 2335 static void 2336 restart_offloadq(void *data, int npending) 2337 { 2338 struct mbuf *m; 2339 struct sge_qset *qs = data; 2340 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2341 adapter_t *adap = qs->port->adapter; 2342 int cleaned; 2343 2344 TXQ_LOCK(qs); 2345 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD); 2346 2347 while ((m = mbufq_first(&q->sendq)) != NULL) { 2348 unsigned int gen, pidx; 2349 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); 2350 unsigned int ndesc = G_HDR_NDESC(oh->flags); 2351 2352 if (__predict_false(q->size - q->in_use < ndesc)) { 2353 setbit(&qs->txq_stopped, TXQ_OFLD); 2354 if (should_restart_tx(q) && 2355 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 2356 goto again; 2357 q->stops++; 2358 break; 2359 } 2360 2361 gen = q->gen; 2362 q->in_use += ndesc; 2363 pidx = q->pidx; 2364 q->pidx += ndesc; 2365 if (q->pidx >= q->size) { 2366 q->pidx -= q->size; 2367 q->gen ^= 1; 2368 } 2369 2370 (void)mbufq_dequeue(&q->sendq); 2371 TXQ_UNLOCK(qs); 2372 write_ofld_wr(adap, m, q, pidx, gen, ndesc); 2373 TXQ_LOCK(qs); 2374 } 2375 #if USE_GTS 2376 set_bit(TXQ_RUNNING, &q->flags); 2377 set_bit(TXQ_LAST_PKT_DB, &q->flags); 2378 #endif 2379 TXQ_UNLOCK(qs); 2380 wmb(); 2381 t3_write_reg(adap, A_SG_KDOORBELL, 2382 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2383 } 2384 2385 /** 2386 * t3_offload_tx - send an offload packet 2387 * @m: the packet 2388 * 2389 * Sends an offload packet. We use the packet priority to select the 2390 * appropriate Tx queue as follows: bit 0 indicates whether the packet 2391 * should be sent as regular or control, bits 1-3 select the queue set. 2392 */ 2393 int 2394 t3_offload_tx(struct adapter *sc, struct mbuf *m) 2395 { 2396 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); 2397 struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)]; 2398 2399 if (oh->flags & F_HDR_CTRL) { 2400 m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */ 2401 return (ctrl_xmit(sc, qs, m)); 2402 } else 2403 return (ofld_xmit(sc, qs, m)); 2404 } 2405 #endif 2406 2407 static void 2408 restart_tx(struct sge_qset *qs) 2409 { 2410 struct adapter *sc = qs->port->adapter; 2411 2412 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2413 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2414 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2415 qs->txq[TXQ_OFLD].restarts++; 2416 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2417 } 2418 2419 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2420 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2421 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2422 qs->txq[TXQ_CTRL].restarts++; 2423 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2424 } 2425 } 2426 2427 /** 2428 * t3_sge_alloc_qset - initialize an SGE queue set 2429 * @sc: the controller softc 2430 * @id: the queue set id 2431 * @nports: how many Ethernet ports will be using this queue set 2432 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2433 * @p: configuration parameters for this queue set 2434 * @ntxq: number of Tx queues for the queue set 2435 * @pi: port info for queue set 2436 * 2437 * Allocate resources and initialize an SGE queue set. A queue set 2438 * comprises a response queue, two Rx free-buffer queues, and up to 3 2439 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2440 * queue, offload queue, and control queue. 2441 */ 2442 int 2443 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2444 const struct qset_params *p, int ntxq, struct port_info *pi) 2445 { 2446 struct sge_qset *q = &sc->sge.qs[id]; 2447 int i, ret = 0; 2448 2449 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF); 2450 q->port = pi; 2451 q->adap = sc; 2452 2453 if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, 2454 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) { 2455 device_printf(sc->dev, "failed to allocate mbuf ring\n"); 2456 goto err; 2457 } 2458 if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF, 2459 M_NOWAIT | M_ZERO)) == NULL) { 2460 device_printf(sc->dev, "failed to allocate ifq\n"); 2461 goto err; 2462 } 2463 ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp); 2464 callout_init(&q->txq[TXQ_ETH].txq_timer, 1); 2465 callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1); 2466 q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus; 2467 q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus; 2468 2469 init_qset_cntxt(q, id); 2470 q->idx = id; 2471 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2472 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2473 &q->fl[0].desc, &q->fl[0].sdesc, 2474 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2475 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2476 printf("error %d from alloc ring fl0\n", ret); 2477 goto err; 2478 } 2479 2480 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2481 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2482 &q->fl[1].desc, &q->fl[1].sdesc, 2483 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2484 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2485 printf("error %d from alloc ring fl1\n", ret); 2486 goto err; 2487 } 2488 2489 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2490 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2491 &q->rspq.desc_tag, &q->rspq.desc_map, 2492 NULL, NULL)) != 0) { 2493 printf("error %d from alloc ring rspq\n", ret); 2494 goto err; 2495 } 2496 2497 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2498 device_get_unit(sc->dev), irq_vec_idx); 2499 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2500 2501 for (i = 0; i < ntxq; ++i) { 2502 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2503 2504 if ((ret = alloc_ring(sc, p->txq_size[i], 2505 sizeof(struct tx_desc), sz, 2506 &q->txq[i].phys_addr, &q->txq[i].desc, 2507 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2508 &q->txq[i].desc_map, 2509 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2510 printf("error %d from alloc ring tx %i\n", ret, i); 2511 goto err; 2512 } 2513 mbufq_init(&q->txq[i].sendq, INT_MAX); 2514 q->txq[i].gen = 1; 2515 q->txq[i].size = p->txq_size[i]; 2516 } 2517 2518 #ifdef TCP_OFFLOAD 2519 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2520 #endif 2521 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2522 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2523 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2524 2525 q->fl[0].gen = q->fl[1].gen = 1; 2526 q->fl[0].size = p->fl_size; 2527 q->fl[1].size = p->jumbo_size; 2528 2529 q->rspq.gen = 1; 2530 q->rspq.cidx = 0; 2531 q->rspq.size = p->rspq_size; 2532 2533 q->txq[TXQ_ETH].stop_thres = nports * 2534 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2535 2536 q->fl[0].buf_size = MCLBYTES; 2537 q->fl[0].zone = zone_pack; 2538 q->fl[0].type = EXT_PACKET; 2539 2540 if (p->jumbo_buf_size == MJUM16BYTES) { 2541 q->fl[1].zone = zone_jumbo16; 2542 q->fl[1].type = EXT_JUMBO16; 2543 } else if (p->jumbo_buf_size == MJUM9BYTES) { 2544 q->fl[1].zone = zone_jumbo9; 2545 q->fl[1].type = EXT_JUMBO9; 2546 } else if (p->jumbo_buf_size == MJUMPAGESIZE) { 2547 q->fl[1].zone = zone_jumbop; 2548 q->fl[1].type = EXT_JUMBOP; 2549 } else { 2550 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size)); 2551 ret = EDOOFUS; 2552 goto err; 2553 } 2554 q->fl[1].buf_size = p->jumbo_buf_size; 2555 2556 /* Allocate and setup the lro_ctrl structure */ 2557 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); 2558 #if defined(INET6) || defined(INET) 2559 ret = tcp_lro_init(&q->lro.ctrl); 2560 if (ret) { 2561 printf("error %d from tcp_lro_init\n", ret); 2562 goto err; 2563 } 2564 #endif 2565 q->lro.ctrl.ifp = pi->ifp; 2566 2567 mtx_lock_spin(&sc->sge.reg_lock); 2568 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2569 q->rspq.phys_addr, q->rspq.size, 2570 q->fl[0].buf_size, 1, 0); 2571 if (ret) { 2572 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2573 goto err_unlock; 2574 } 2575 2576 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2577 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2578 q->fl[i].phys_addr, q->fl[i].size, 2579 q->fl[i].buf_size, p->cong_thres, 1, 2580 0); 2581 if (ret) { 2582 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2583 goto err_unlock; 2584 } 2585 } 2586 2587 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2588 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2589 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2590 1, 0); 2591 if (ret) { 2592 printf("error %d from t3_sge_init_ecntxt\n", ret); 2593 goto err_unlock; 2594 } 2595 2596 if (ntxq > 1) { 2597 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2598 USE_GTS, SGE_CNTXT_OFLD, id, 2599 q->txq[TXQ_OFLD].phys_addr, 2600 q->txq[TXQ_OFLD].size, 0, 1, 0); 2601 if (ret) { 2602 printf("error %d from t3_sge_init_ecntxt\n", ret); 2603 goto err_unlock; 2604 } 2605 } 2606 2607 if (ntxq > 2) { 2608 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2609 SGE_CNTXT_CTRL, id, 2610 q->txq[TXQ_CTRL].phys_addr, 2611 q->txq[TXQ_CTRL].size, 2612 q->txq[TXQ_CTRL].token, 1, 0); 2613 if (ret) { 2614 printf("error %d from t3_sge_init_ecntxt\n", ret); 2615 goto err_unlock; 2616 } 2617 } 2618 2619 mtx_unlock_spin(&sc->sge.reg_lock); 2620 t3_update_qset_coalesce(q, p); 2621 2622 refill_fl(sc, &q->fl[0], q->fl[0].size); 2623 refill_fl(sc, &q->fl[1], q->fl[1].size); 2624 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2625 2626 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2627 V_NEWTIMER(q->rspq.holdoff_tmr)); 2628 2629 return (0); 2630 2631 err_unlock: 2632 mtx_unlock_spin(&sc->sge.reg_lock); 2633 err: 2634 TXQ_LOCK(q); 2635 t3_free_qset(sc, q); 2636 2637 return (ret); 2638 } 2639 2640 /* 2641 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with 2642 * ethernet data. Hardware assistance with various checksums and any vlan tag 2643 * will also be taken into account here. 2644 */ 2645 void 2646 t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad) 2647 { 2648 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2649 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2650 struct ifnet *ifp = pi->ifp; 2651 2652 if (cpl->vlan_valid) { 2653 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2654 m->m_flags |= M_VLANTAG; 2655 } 2656 2657 m->m_pkthdr.rcvif = ifp; 2658 /* 2659 * adjust after conversion to mbuf chain 2660 */ 2661 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); 2662 m->m_len -= (sizeof(*cpl) + ethpad); 2663 m->m_data += (sizeof(*cpl) + ethpad); 2664 2665 if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) { 2666 struct ether_header *eh = mtod(m, void *); 2667 uint16_t eh_type; 2668 2669 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2670 struct ether_vlan_header *evh = mtod(m, void *); 2671 2672 eh_type = evh->evl_proto; 2673 } else 2674 eh_type = eh->ether_type; 2675 2676 if (ifp->if_capenable & IFCAP_RXCSUM && 2677 eh_type == htons(ETHERTYPE_IP)) { 2678 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | 2679 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 2680 m->m_pkthdr.csum_data = 0xffff; 2681 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && 2682 eh_type == htons(ETHERTYPE_IPV6)) { 2683 m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 | 2684 CSUM_PSEUDO_HDR); 2685 m->m_pkthdr.csum_data = 0xffff; 2686 } 2687 } 2688 } 2689 2690 /** 2691 * get_packet - return the next ingress packet buffer from a free list 2692 * @adap: the adapter that received the packet 2693 * @drop_thres: # of remaining buffers before we start dropping packets 2694 * @qs: the qset that the SGE free list holding the packet belongs to 2695 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2696 * @r: response descriptor 2697 * 2698 * Get the next packet from a free list and complete setup of the 2699 * sk_buff. If the packet is small we make a copy and recycle the 2700 * original buffer, otherwise we use the original buffer itself. If a 2701 * positive drop threshold is supplied packets are dropped and their 2702 * buffers recycled if (a) the number of remaining buffers is under the 2703 * threshold and the packet is too big to copy, or (b) the packet should 2704 * be copied but there is no memory for the copy. 2705 */ 2706 static int 2707 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2708 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 2709 { 2710 2711 unsigned int len_cq = ntohl(r->len_cq); 2712 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2713 int mask, cidx = fl->cidx; 2714 struct rx_sw_desc *sd = &fl->sdesc[cidx]; 2715 uint32_t len = G_RSPD_LEN(len_cq); 2716 uint32_t flags = M_EXT; 2717 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags)); 2718 caddr_t cl; 2719 struct mbuf *m; 2720 int ret = 0; 2721 2722 mask = fl->size - 1; 2723 prefetch(fl->sdesc[(cidx + 1) & mask].m); 2724 prefetch(fl->sdesc[(cidx + 2) & mask].m); 2725 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); 2726 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 2727 2728 fl->credits--; 2729 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2730 2731 if (recycle_enable && len <= SGE_RX_COPY_THRES && 2732 sopeop == RSPQ_SOP_EOP) { 2733 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) 2734 goto skip_recycle; 2735 cl = mtod(m, void *); 2736 memcpy(cl, sd->rxsd_cl, len); 2737 recycle_rx_buf(adap, fl, fl->cidx); 2738 m->m_pkthdr.len = m->m_len = len; 2739 m->m_flags = 0; 2740 mh->mh_head = mh->mh_tail = m; 2741 ret = 1; 2742 goto done; 2743 } else { 2744 skip_recycle: 2745 bus_dmamap_unload(fl->entry_tag, sd->map); 2746 cl = sd->rxsd_cl; 2747 m = sd->m; 2748 2749 if ((sopeop == RSPQ_SOP_EOP) || 2750 (sopeop == RSPQ_SOP)) 2751 flags |= M_PKTHDR; 2752 m_init(m, M_NOWAIT, MT_DATA, flags); 2753 if (fl->zone == zone_pack) { 2754 /* 2755 * restore clobbered data pointer 2756 */ 2757 m->m_data = m->m_ext.ext_buf; 2758 } else { 2759 m_cljset(m, cl, fl->type); 2760 } 2761 m->m_len = len; 2762 } 2763 switch(sopeop) { 2764 case RSPQ_SOP_EOP: 2765 ret = 1; 2766 /* FALLTHROUGH */ 2767 case RSPQ_SOP: 2768 mh->mh_head = mh->mh_tail = m; 2769 m->m_pkthdr.len = len; 2770 break; 2771 case RSPQ_EOP: 2772 ret = 1; 2773 /* FALLTHROUGH */ 2774 case RSPQ_NSOP_NEOP: 2775 if (mh->mh_tail == NULL) { 2776 log(LOG_ERR, "discarding intermediate descriptor entry\n"); 2777 m_freem(m); 2778 break; 2779 } 2780 mh->mh_tail->m_next = m; 2781 mh->mh_tail = m; 2782 mh->mh_head->m_pkthdr.len += len; 2783 break; 2784 } 2785 if (cxgb_debug) 2786 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len); 2787 done: 2788 if (++fl->cidx == fl->size) 2789 fl->cidx = 0; 2790 2791 return (ret); 2792 } 2793 2794 /** 2795 * handle_rsp_cntrl_info - handles control information in a response 2796 * @qs: the queue set corresponding to the response 2797 * @flags: the response control flags 2798 * 2799 * Handles the control information of an SGE response, such as GTS 2800 * indications and completion credits for the queue set's Tx queues. 2801 * HW coalesces credits, we don't do any extra SW coalescing. 2802 */ 2803 static __inline void 2804 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2805 { 2806 unsigned int credits; 2807 2808 #if USE_GTS 2809 if (flags & F_RSPD_TXQ0_GTS) 2810 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2811 #endif 2812 credits = G_RSPD_TXQ0_CR(flags); 2813 if (credits) 2814 qs->txq[TXQ_ETH].processed += credits; 2815 2816 credits = G_RSPD_TXQ2_CR(flags); 2817 if (credits) 2818 qs->txq[TXQ_CTRL].processed += credits; 2819 2820 # if USE_GTS 2821 if (flags & F_RSPD_TXQ1_GTS) 2822 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2823 # endif 2824 credits = G_RSPD_TXQ1_CR(flags); 2825 if (credits) 2826 qs->txq[TXQ_OFLD].processed += credits; 2827 2828 } 2829 2830 static void 2831 check_ring_db(adapter_t *adap, struct sge_qset *qs, 2832 unsigned int sleeping) 2833 { 2834 ; 2835 } 2836 2837 /** 2838 * process_responses - process responses from an SGE response queue 2839 * @adap: the adapter 2840 * @qs: the queue set to which the response queue belongs 2841 * @budget: how many responses can be processed in this round 2842 * 2843 * Process responses from an SGE response queue up to the supplied budget. 2844 * Responses include received packets as well as credits and other events 2845 * for the queues that belong to the response queue's queue set. 2846 * A negative budget is effectively unlimited. 2847 * 2848 * Additionally choose the interrupt holdoff time for the next interrupt 2849 * on this queue. If the system is under memory shortage use a fairly 2850 * long delay to help recovery. 2851 */ 2852 static int 2853 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2854 { 2855 struct sge_rspq *rspq = &qs->rspq; 2856 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2857 int budget_left = budget; 2858 unsigned int sleeping = 0; 2859 #if defined(INET6) || defined(INET) 2860 int lro_enabled = qs->lro.enabled; 2861 int skip_lro; 2862 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; 2863 #endif 2864 struct t3_mbuf_hdr *mh = &rspq->rspq_mh; 2865 #ifdef DEBUG 2866 static int last_holdoff = 0; 2867 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2868 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2869 last_holdoff = rspq->holdoff_tmr; 2870 } 2871 #endif 2872 rspq->next_holdoff = rspq->holdoff_tmr; 2873 2874 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2875 int eth, eop = 0, ethpad = 0; 2876 uint32_t flags = ntohl(r->flags); 2877 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); 2878 uint8_t opcode = r->rss_hdr.opcode; 2879 2880 eth = (opcode == CPL_RX_PKT); 2881 2882 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2883 struct mbuf *m; 2884 2885 if (cxgb_debug) 2886 printf("async notification\n"); 2887 2888 if (mh->mh_head == NULL) { 2889 mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA); 2890 m = mh->mh_head; 2891 } else { 2892 m = m_gethdr(M_NOWAIT, MT_DATA); 2893 } 2894 if (m == NULL) 2895 goto no_mem; 2896 2897 memcpy(mtod(m, char *), r, AN_PKT_SIZE); 2898 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; 2899 *mtod(m, uint8_t *) = CPL_ASYNC_NOTIF; 2900 opcode = CPL_ASYNC_NOTIF; 2901 eop = 1; 2902 rspq->async_notif++; 2903 goto skip; 2904 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2905 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); 2906 2907 if (m == NULL) { 2908 no_mem: 2909 rspq->next_holdoff = NOMEM_INTR_DELAY; 2910 budget_left--; 2911 break; 2912 } 2913 if (mh->mh_head == NULL) 2914 mh->mh_head = m; 2915 else 2916 mh->mh_tail->m_next = m; 2917 mh->mh_tail = m; 2918 2919 get_imm_packet(adap, r, m); 2920 mh->mh_head->m_pkthdr.len += m->m_len; 2921 eop = 1; 2922 rspq->imm_data++; 2923 } else if (r->len_cq) { 2924 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 2925 2926 eop = get_packet(adap, drop_thresh, qs, mh, r); 2927 if (eop) { 2928 if (r->rss_hdr.hash_type && !adap->timestamp) { 2929 M_HASHTYPE_SET(mh->mh_head, 2930 M_HASHTYPE_OPAQUE_HASH); 2931 mh->mh_head->m_pkthdr.flowid = rss_hash; 2932 } 2933 } 2934 2935 ethpad = 2; 2936 } else { 2937 rspq->pure_rsps++; 2938 } 2939 skip: 2940 if (flags & RSPD_CTRL_MASK) { 2941 sleeping |= flags & RSPD_GTS_MASK; 2942 handle_rsp_cntrl_info(qs, flags); 2943 } 2944 2945 if (!eth && eop) { 2946 rspq->offload_pkts++; 2947 #ifdef TCP_OFFLOAD 2948 adap->cpl_handler[opcode](qs, r, mh->mh_head); 2949 #else 2950 m_freem(mh->mh_head); 2951 #endif 2952 mh->mh_head = NULL; 2953 } else if (eth && eop) { 2954 struct mbuf *m = mh->mh_head; 2955 2956 t3_rx_eth(adap, m, ethpad); 2957 2958 /* 2959 * The T304 sends incoming packets on any qset. If LRO 2960 * is also enabled, we could end up sending packet up 2961 * lro_ctrl->ifp's input. That is incorrect. 2962 * 2963 * The mbuf's rcvif was derived from the cpl header and 2964 * is accurate. Skip LRO and just use that. 2965 */ 2966 #if defined(INET6) || defined(INET) 2967 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); 2968 2969 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro 2970 && (tcp_lro_rx(lro_ctrl, m, 0) == 0) 2971 ) { 2972 /* successfully queue'd for LRO */ 2973 } else 2974 #endif 2975 { 2976 /* 2977 * LRO not enabled, packet unsuitable for LRO, 2978 * or unable to queue. Pass it up right now in 2979 * either case. 2980 */ 2981 struct ifnet *ifp = m->m_pkthdr.rcvif; 2982 (*ifp->if_input)(ifp, m); 2983 } 2984 mh->mh_head = NULL; 2985 2986 } 2987 2988 r++; 2989 if (__predict_false(++rspq->cidx == rspq->size)) { 2990 rspq->cidx = 0; 2991 rspq->gen ^= 1; 2992 r = rspq->desc; 2993 } 2994 2995 if (++rspq->credits >= 64) { 2996 refill_rspq(adap, rspq, rspq->credits); 2997 rspq->credits = 0; 2998 } 2999 __refill_fl_lt(adap, &qs->fl[0], 32); 3000 __refill_fl_lt(adap, &qs->fl[1], 32); 3001 --budget_left; 3002 } 3003 3004 #if defined(INET6) || defined(INET) 3005 /* Flush LRO */ 3006 tcp_lro_flush_all(lro_ctrl); 3007 #endif 3008 3009 if (sleeping) 3010 check_ring_db(adap, qs, sleeping); 3011 3012 mb(); /* commit Tx queue processed updates */ 3013 if (__predict_false(qs->txq_stopped > 1)) 3014 restart_tx(qs); 3015 3016 __refill_fl_lt(adap, &qs->fl[0], 512); 3017 __refill_fl_lt(adap, &qs->fl[1], 512); 3018 budget -= budget_left; 3019 return (budget); 3020 } 3021 3022 /* 3023 * A helper function that processes responses and issues GTS. 3024 */ 3025 static __inline int 3026 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 3027 { 3028 int work; 3029 static int last_holdoff = 0; 3030 3031 work = process_responses(adap, rspq_to_qset(rq), -1); 3032 3033 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 3034 printf("next_holdoff=%d\n", rq->next_holdoff); 3035 last_holdoff = rq->next_holdoff; 3036 } 3037 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 3038 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 3039 3040 return (work); 3041 } 3042 3043 #ifdef DEBUGNET 3044 int 3045 cxgb_debugnet_poll_rx(adapter_t *adap, struct sge_qset *qs) 3046 { 3047 3048 return (process_responses_gts(adap, &qs->rspq)); 3049 } 3050 #endif 3051 3052 /* 3053 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 3054 * Handles data events from SGE response queues as well as error and other 3055 * async events as they all use the same interrupt pin. We use one SGE 3056 * response queue per port in this mode and protect all response queues with 3057 * queue 0's lock. 3058 */ 3059 void 3060 t3b_intr(void *data) 3061 { 3062 uint32_t i, map; 3063 adapter_t *adap = data; 3064 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3065 3066 t3_write_reg(adap, A_PL_CLI, 0); 3067 map = t3_read_reg(adap, A_SG_DATA_INTR); 3068 3069 if (!map) 3070 return; 3071 3072 if (__predict_false(map & F_ERRINTR)) { 3073 t3_write_reg(adap, A_PL_INT_ENABLE0, 0); 3074 (void) t3_read_reg(adap, A_PL_INT_ENABLE0); 3075 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3076 } 3077 3078 mtx_lock(&q0->lock); 3079 for_each_port(adap, i) 3080 if (map & (1 << i)) 3081 process_responses_gts(adap, &adap->sge.qs[i].rspq); 3082 mtx_unlock(&q0->lock); 3083 } 3084 3085 /* 3086 * The MSI interrupt handler. This needs to handle data events from SGE 3087 * response queues as well as error and other async events as they all use 3088 * the same MSI vector. We use one SGE response queue per port in this mode 3089 * and protect all response queues with queue 0's lock. 3090 */ 3091 void 3092 t3_intr_msi(void *data) 3093 { 3094 adapter_t *adap = data; 3095 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3096 int i, new_packets = 0; 3097 3098 mtx_lock(&q0->lock); 3099 3100 for_each_port(adap, i) 3101 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 3102 new_packets = 1; 3103 mtx_unlock(&q0->lock); 3104 if (new_packets == 0) { 3105 t3_write_reg(adap, A_PL_INT_ENABLE0, 0); 3106 (void) t3_read_reg(adap, A_PL_INT_ENABLE0); 3107 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3108 } 3109 } 3110 3111 void 3112 t3_intr_msix(void *data) 3113 { 3114 struct sge_qset *qs = data; 3115 adapter_t *adap = qs->port->adapter; 3116 struct sge_rspq *rspq = &qs->rspq; 3117 3118 if (process_responses_gts(adap, rspq) == 0) 3119 rspq->unhandled_irqs++; 3120 } 3121 3122 #define QDUMP_SBUF_SIZE 32 * 400 3123 static int 3124 t3_dump_rspq(SYSCTL_HANDLER_ARGS) 3125 { 3126 struct sge_rspq *rspq; 3127 struct sge_qset *qs; 3128 int i, err, dump_end, idx; 3129 struct sbuf *sb; 3130 struct rsp_desc *rspd; 3131 uint32_t data[4]; 3132 3133 rspq = arg1; 3134 qs = rspq_to_qset(rspq); 3135 if (rspq->rspq_dump_count == 0) 3136 return (0); 3137 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { 3138 log(LOG_WARNING, 3139 "dump count is too large %d\n", rspq->rspq_dump_count); 3140 rspq->rspq_dump_count = 0; 3141 return (EINVAL); 3142 } 3143 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { 3144 log(LOG_WARNING, 3145 "dump start of %d is greater than queue size\n", 3146 rspq->rspq_dump_start); 3147 rspq->rspq_dump_start = 0; 3148 return (EINVAL); 3149 } 3150 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); 3151 if (err) 3152 return (err); 3153 err = sysctl_wire_old_buffer(req, 0); 3154 if (err) 3155 return (err); 3156 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3157 3158 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", 3159 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), 3160 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); 3161 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", 3162 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); 3163 3164 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, 3165 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); 3166 3167 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; 3168 for (i = rspq->rspq_dump_start; i < dump_end; i++) { 3169 idx = i & (RSPQ_Q_SIZE-1); 3170 3171 rspd = &rspq->desc[idx]; 3172 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", 3173 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, 3174 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); 3175 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", 3176 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), 3177 be32toh(rspd->len_cq), rspd->intr_gen); 3178 } 3179 3180 err = sbuf_finish(sb); 3181 sbuf_delete(sb); 3182 return (err); 3183 } 3184 3185 static int 3186 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) 3187 { 3188 struct sge_txq *txq; 3189 struct sge_qset *qs; 3190 int i, j, err, dump_end; 3191 struct sbuf *sb; 3192 struct tx_desc *txd; 3193 uint32_t *WR, wr_hi, wr_lo, gen; 3194 uint32_t data[4]; 3195 3196 txq = arg1; 3197 qs = txq_to_qset(txq, TXQ_ETH); 3198 if (txq->txq_dump_count == 0) { 3199 return (0); 3200 } 3201 if (txq->txq_dump_count > TX_ETH_Q_SIZE) { 3202 log(LOG_WARNING, 3203 "dump count is too large %d\n", txq->txq_dump_count); 3204 txq->txq_dump_count = 1; 3205 return (EINVAL); 3206 } 3207 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { 3208 log(LOG_WARNING, 3209 "dump start of %d is greater than queue size\n", 3210 txq->txq_dump_start); 3211 txq->txq_dump_start = 0; 3212 return (EINVAL); 3213 } 3214 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); 3215 if (err) 3216 return (err); 3217 err = sysctl_wire_old_buffer(req, 0); 3218 if (err) 3219 return (err); 3220 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3221 3222 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", 3223 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 3224 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); 3225 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", 3226 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), 3227 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); 3228 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3229 txq->txq_dump_start, 3230 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); 3231 3232 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3233 for (i = txq->txq_dump_start; i < dump_end; i++) { 3234 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; 3235 WR = (uint32_t *)txd->flit; 3236 wr_hi = ntohl(WR[0]); 3237 wr_lo = ntohl(WR[1]); 3238 gen = G_WR_GEN(wr_lo); 3239 3240 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3241 wr_hi, wr_lo, gen); 3242 for (j = 2; j < 30; j += 4) 3243 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3244 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3245 3246 } 3247 err = sbuf_finish(sb); 3248 sbuf_delete(sb); 3249 return (err); 3250 } 3251 3252 static int 3253 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) 3254 { 3255 struct sge_txq *txq; 3256 struct sge_qset *qs; 3257 int i, j, err, dump_end; 3258 struct sbuf *sb; 3259 struct tx_desc *txd; 3260 uint32_t *WR, wr_hi, wr_lo, gen; 3261 3262 txq = arg1; 3263 qs = txq_to_qset(txq, TXQ_CTRL); 3264 if (txq->txq_dump_count == 0) { 3265 return (0); 3266 } 3267 if (txq->txq_dump_count > 256) { 3268 log(LOG_WARNING, 3269 "dump count is too large %d\n", txq->txq_dump_count); 3270 txq->txq_dump_count = 1; 3271 return (EINVAL); 3272 } 3273 if (txq->txq_dump_start > 255) { 3274 log(LOG_WARNING, 3275 "dump start of %d is greater than queue size\n", 3276 txq->txq_dump_start); 3277 txq->txq_dump_start = 0; 3278 return (EINVAL); 3279 } 3280 3281 err = sysctl_wire_old_buffer(req, 0); 3282 if (err != 0) 3283 return (err); 3284 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3285 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3286 txq->txq_dump_start, 3287 (txq->txq_dump_start + txq->txq_dump_count) & 255); 3288 3289 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3290 for (i = txq->txq_dump_start; i < dump_end; i++) { 3291 txd = &txq->desc[i & (255)]; 3292 WR = (uint32_t *)txd->flit; 3293 wr_hi = ntohl(WR[0]); 3294 wr_lo = ntohl(WR[1]); 3295 gen = G_WR_GEN(wr_lo); 3296 3297 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3298 wr_hi, wr_lo, gen); 3299 for (j = 2; j < 30; j += 4) 3300 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3301 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3302 3303 } 3304 err = sbuf_finish(sb); 3305 sbuf_delete(sb); 3306 return (err); 3307 } 3308 3309 static int 3310 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) 3311 { 3312 adapter_t *sc = arg1; 3313 struct qset_params *qsp = &sc->params.sge.qset[0]; 3314 int coalesce_usecs; 3315 struct sge_qset *qs; 3316 int i, j, err, nqsets = 0; 3317 struct mtx *lock; 3318 3319 if ((sc->flags & FULL_INIT_DONE) == 0) 3320 return (ENXIO); 3321 3322 coalesce_usecs = qsp->coalesce_usecs; 3323 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); 3324 3325 if (err != 0) { 3326 return (err); 3327 } 3328 if (coalesce_usecs == qsp->coalesce_usecs) 3329 return (0); 3330 3331 for (i = 0; i < sc->params.nports; i++) 3332 for (j = 0; j < sc->port[i].nqsets; j++) 3333 nqsets++; 3334 3335 coalesce_usecs = max(1, coalesce_usecs); 3336 3337 for (i = 0; i < nqsets; i++) { 3338 qs = &sc->sge.qs[i]; 3339 qsp = &sc->params.sge.qset[i]; 3340 qsp->coalesce_usecs = coalesce_usecs; 3341 3342 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 3343 &sc->sge.qs[0].rspq.lock; 3344 3345 mtx_lock(lock); 3346 t3_update_qset_coalesce(qs, qsp); 3347 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 3348 V_NEWTIMER(qs->rspq.holdoff_tmr)); 3349 mtx_unlock(lock); 3350 } 3351 3352 return (0); 3353 } 3354 3355 static int 3356 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS) 3357 { 3358 adapter_t *sc = arg1; 3359 int rc, timestamp; 3360 3361 if ((sc->flags & FULL_INIT_DONE) == 0) 3362 return (ENXIO); 3363 3364 timestamp = sc->timestamp; 3365 rc = sysctl_handle_int(oidp, ×tamp, arg2, req); 3366 3367 if (rc != 0) 3368 return (rc); 3369 3370 if (timestamp != sc->timestamp) { 3371 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS, 3372 timestamp ? F_ENABLERXPKTTMSTPRSS : 0); 3373 sc->timestamp = timestamp; 3374 } 3375 3376 return (0); 3377 } 3378 3379 void 3380 t3_add_attach_sysctls(adapter_t *sc) 3381 { 3382 struct sysctl_ctx_list *ctx; 3383 struct sysctl_oid_list *children; 3384 3385 ctx = device_get_sysctl_ctx(sc->dev); 3386 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3387 3388 /* random information */ 3389 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3390 "firmware_version", 3391 CTLFLAG_RD, sc->fw_version, 3392 0, "firmware version"); 3393 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, 3394 "hw_revision", 3395 CTLFLAG_RD, &sc->params.rev, 3396 0, "chip model"); 3397 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3398 "port_types", 3399 CTLFLAG_RD, sc->port_types, 3400 0, "type of ports"); 3401 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3402 "enable_debug", 3403 CTLFLAG_RW, &cxgb_debug, 3404 0, "enable verbose debugging output"); 3405 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce", 3406 CTLFLAG_RD, &sc->tunq_coalesce, 3407 "#tunneled packets freed"); 3408 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3409 "txq_overrun", 3410 CTLFLAG_RD, &txq_fills, 3411 0, "#times txq overrun"); 3412 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, 3413 "core_clock", 3414 CTLFLAG_RD, &sc->params.vpd.cclk, 3415 0, "core clock frequency (in KHz)"); 3416 } 3417 3418 3419 static const char *rspq_name = "rspq"; 3420 static const char *txq_names[] = 3421 { 3422 "txq_eth", 3423 "txq_ofld", 3424 "txq_ctrl" 3425 }; 3426 3427 static int 3428 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) 3429 { 3430 struct port_info *p = arg1; 3431 uint64_t *parg; 3432 3433 if (!p) 3434 return (EINVAL); 3435 3436 cxgb_refresh_stats(p); 3437 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); 3438 3439 return (sysctl_handle_64(oidp, parg, 0, req)); 3440 } 3441 3442 void 3443 t3_add_configured_sysctls(adapter_t *sc) 3444 { 3445 struct sysctl_ctx_list *ctx; 3446 struct sysctl_oid_list *children; 3447 int i, j; 3448 3449 ctx = device_get_sysctl_ctx(sc->dev); 3450 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3451 3452 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3453 "intr_coal", 3454 CTLTYPE_INT|CTLFLAG_RW, sc, 3455 0, t3_set_coalesce_usecs, 3456 "I", "interrupt coalescing timer (us)"); 3457 3458 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3459 "pkt_timestamp", 3460 CTLTYPE_INT | CTLFLAG_RW, sc, 3461 0, t3_pkt_timestamp, 3462 "I", "provide packet timestamp instead of connection hash"); 3463 3464 for (i = 0; i < sc->params.nports; i++) { 3465 struct port_info *pi = &sc->port[i]; 3466 struct sysctl_oid *poid; 3467 struct sysctl_oid_list *poidlist; 3468 struct mac_stats *mstats = &pi->mac.stats; 3469 3470 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); 3471 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 3472 pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); 3473 poidlist = SYSCTL_CHILDREN(poid); 3474 SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO, 3475 "nqsets", CTLFLAG_RD, &pi->nqsets, 3476 0, "#queue sets"); 3477 3478 for (j = 0; j < pi->nqsets; j++) { 3479 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; 3480 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, 3481 *ctrlqpoid, *lropoid; 3482 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, 3483 *txqpoidlist, *ctrlqpoidlist, 3484 *lropoidlist; 3485 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 3486 3487 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); 3488 3489 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 3490 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); 3491 qspoidlist = SYSCTL_CHILDREN(qspoid); 3492 3493 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty", 3494 CTLFLAG_RD, &qs->fl[0].empty, 0, 3495 "freelist #0 empty"); 3496 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty", 3497 CTLFLAG_RD, &qs->fl[1].empty, 0, 3498 "freelist #1 empty"); 3499 3500 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3501 rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); 3502 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); 3503 3504 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3505 txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); 3506 txqpoidlist = SYSCTL_CHILDREN(txqpoid); 3507 3508 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3509 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics"); 3510 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); 3511 3512 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3513 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics"); 3514 lropoidlist = SYSCTL_CHILDREN(lropoid); 3515 3516 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", 3517 CTLFLAG_RD, &qs->rspq.size, 3518 0, "#entries in response queue"); 3519 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", 3520 CTLFLAG_RD, &qs->rspq.cidx, 3521 0, "consumer index"); 3522 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", 3523 CTLFLAG_RD, &qs->rspq.credits, 3524 0, "#credits"); 3525 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved", 3526 CTLFLAG_RD, &qs->rspq.starved, 3527 0, "#times starved"); 3528 SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr", 3529 CTLFLAG_RD, &qs->rspq.phys_addr, 3530 "physical_address_of the queue"); 3531 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", 3532 CTLFLAG_RW, &qs->rspq.rspq_dump_start, 3533 0, "start rspq dump entry"); 3534 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", 3535 CTLFLAG_RW, &qs->rspq.rspq_dump_count, 3536 0, "#rspq entries to dump"); 3537 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", 3538 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 3539 0, t3_dump_rspq, "A", "dump of the response queue"); 3540 3541 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped", 3542 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops, 3543 "#tunneled packets dropped"); 3544 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen", 3545 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.mq_len, 3546 0, "#tunneled packets waiting to be sent"); 3547 #if 0 3548 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", 3549 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 3550 0, "#tunneled packets queue producer index"); 3551 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", 3552 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 3553 0, "#tunneled packets queue consumer index"); 3554 #endif 3555 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed", 3556 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 3557 0, "#tunneled packets processed by the card"); 3558 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", 3559 CTLFLAG_RD, &txq->cleaned, 3560 0, "#tunneled packets cleaned"); 3561 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", 3562 CTLFLAG_RD, &txq->in_use, 3563 0, "#tunneled packet slots in use"); 3564 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees", 3565 CTLFLAG_RD, &txq->txq_frees, 3566 "#tunneled packets freed"); 3567 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", 3568 CTLFLAG_RD, &txq->txq_skipped, 3569 0, "#tunneled packet descriptors skipped"); 3570 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced", 3571 CTLFLAG_RD, &txq->txq_coalesced, 3572 "#tunneled packets coalesced"); 3573 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", 3574 CTLFLAG_RD, &txq->txq_enqueued, 3575 0, "#tunneled packets enqueued to hardware"); 3576 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", 3577 CTLFLAG_RD, &qs->txq_stopped, 3578 0, "tx queues stopped"); 3579 SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr", 3580 CTLFLAG_RD, &txq->phys_addr, 3581 "physical_address_of the queue"); 3582 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", 3583 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 3584 0, "txq generation"); 3585 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", 3586 CTLFLAG_RD, &txq->cidx, 3587 0, "hardware queue cidx"); 3588 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", 3589 CTLFLAG_RD, &txq->pidx, 3590 0, "hardware queue pidx"); 3591 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", 3592 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 3593 0, "txq start idx for dump"); 3594 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", 3595 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 3596 0, "txq #entries to dump"); 3597 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", 3598 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 3599 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); 3600 3601 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", 3602 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 3603 0, "ctrlq start idx for dump"); 3604 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", 3605 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 3606 0, "ctrl #entries to dump"); 3607 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", 3608 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], 3609 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); 3610 3611 SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_queued", 3612 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); 3613 SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_flushed", 3614 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); 3615 SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", 3616 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); 3617 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", 3618 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); 3619 } 3620 3621 /* Now add a node for mac stats. */ 3622 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", 3623 CTLFLAG_RD, NULL, "MAC statistics"); 3624 poidlist = SYSCTL_CHILDREN(poid); 3625 3626 /* 3627 * We (ab)use the length argument (arg2) to pass on the offset 3628 * of the data that we are interested in. This is only required 3629 * for the quad counters that are updated from the hardware (we 3630 * make sure that we return the latest value). 3631 * sysctl_handle_macstat first updates *all* the counters from 3632 * the hardware, and then returns the latest value of the 3633 * requested counter. Best would be to update only the 3634 * requested counter from hardware, but t3_mac_update_stats() 3635 * hides all the register details and we don't want to dive into 3636 * all that here. 3637 */ 3638 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ 3639 (CTLTYPE_U64 | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \ 3640 sysctl_handle_macstat, "QU", 0) 3641 CXGB_SYSCTL_ADD_QUAD(tx_octets); 3642 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); 3643 CXGB_SYSCTL_ADD_QUAD(tx_frames); 3644 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); 3645 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); 3646 CXGB_SYSCTL_ADD_QUAD(tx_pause); 3647 CXGB_SYSCTL_ADD_QUAD(tx_deferred); 3648 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); 3649 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); 3650 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); 3651 CXGB_SYSCTL_ADD_QUAD(tx_underrun); 3652 CXGB_SYSCTL_ADD_QUAD(tx_len_errs); 3653 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); 3654 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); 3655 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); 3656 CXGB_SYSCTL_ADD_QUAD(tx_frames_64); 3657 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); 3658 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); 3659 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); 3660 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); 3661 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); 3662 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); 3663 CXGB_SYSCTL_ADD_QUAD(rx_octets); 3664 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); 3665 CXGB_SYSCTL_ADD_QUAD(rx_frames); 3666 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); 3667 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); 3668 CXGB_SYSCTL_ADD_QUAD(rx_pause); 3669 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs); 3670 CXGB_SYSCTL_ADD_QUAD(rx_align_errs); 3671 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); 3672 CXGB_SYSCTL_ADD_QUAD(rx_data_errs); 3673 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); 3674 CXGB_SYSCTL_ADD_QUAD(rx_runt); 3675 CXGB_SYSCTL_ADD_QUAD(rx_jabber); 3676 CXGB_SYSCTL_ADD_QUAD(rx_short); 3677 CXGB_SYSCTL_ADD_QUAD(rx_too_long); 3678 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); 3679 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); 3680 CXGB_SYSCTL_ADD_QUAD(rx_frames_64); 3681 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); 3682 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); 3683 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); 3684 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); 3685 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); 3686 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); 3687 #undef CXGB_SYSCTL_ADD_QUAD 3688 3689 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ 3690 CTLFLAG_RD, &mstats->a, 0) 3691 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); 3692 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); 3693 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); 3694 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); 3695 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); 3696 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); 3697 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); 3698 CXGB_SYSCTL_ADD_ULONG(num_toggled); 3699 CXGB_SYSCTL_ADD_ULONG(num_resets); 3700 CXGB_SYSCTL_ADD_ULONG(link_faults); 3701 #undef CXGB_SYSCTL_ADD_ULONG 3702 } 3703 } 3704 3705 /** 3706 * t3_get_desc - dump an SGE descriptor for debugging purposes 3707 * @qs: the queue set 3708 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 3709 * @idx: the descriptor index in the queue 3710 * @data: where to dump the descriptor contents 3711 * 3712 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 3713 * size of the descriptor. 3714 */ 3715 int 3716 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 3717 unsigned char *data) 3718 { 3719 if (qnum >= 6) 3720 return (EINVAL); 3721 3722 if (qnum < 3) { 3723 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 3724 return -EINVAL; 3725 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 3726 return sizeof(struct tx_desc); 3727 } 3728 3729 if (qnum == 3) { 3730 if (!qs->rspq.desc || idx >= qs->rspq.size) 3731 return (EINVAL); 3732 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 3733 return sizeof(struct rsp_desc); 3734 } 3735 3736 qnum -= 4; 3737 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 3738 return (EINVAL); 3739 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 3740 return sizeof(struct rx_desc); 3741 } 3742