1 /************************************************************************** 2 SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 4 Copyright (c) 2007-2009, Chelsio Inc. 5 All rights reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 10 1. Redistributions of source code must retain the above copyright notice, 11 this list of conditions and the following disclaimer. 12 13 2. Neither the name of the Chelsio Corporation nor the names of its 14 contributors may be used to endorse or promote products derived from 15 this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 POSSIBILITY OF SUCH DAMAGE. 28 29 ***************************************************************************/ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_inet6.h" 35 #include "opt_inet.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/module.h> 41 #include <sys/bus.h> 42 #include <sys/conf.h> 43 #include <machine/bus.h> 44 #include <machine/resource.h> 45 #include <sys/rman.h> 46 #include <sys/queue.h> 47 #include <sys/sysctl.h> 48 #include <sys/taskqueue.h> 49 50 #include <sys/proc.h> 51 #include <sys/sbuf.h> 52 #include <sys/sched.h> 53 #include <sys/smp.h> 54 #include <sys/systm.h> 55 #include <sys/syslog.h> 56 #include <sys/socket.h> 57 #include <sys/sglist.h> 58 59 #include <net/if.h> 60 #include <net/if_var.h> 61 #include <net/bpf.h> 62 #include <net/ethernet.h> 63 #include <net/if_vlan_var.h> 64 65 #include <netinet/in_systm.h> 66 #include <netinet/in.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip6.h> 69 #include <netinet/tcp.h> 70 71 #include <dev/pci/pcireg.h> 72 #include <dev/pci/pcivar.h> 73 74 #include <vm/vm.h> 75 #include <vm/pmap.h> 76 77 #include <cxgb_include.h> 78 #include <sys/mvec.h> 79 80 int txq_fills = 0; 81 int multiq_tx_enable = 1; 82 83 #ifdef TCP_OFFLOAD 84 CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS); 85 #endif 86 87 extern struct sysctl_oid_list sysctl__hw_cxgb_children; 88 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; 89 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, 90 "size of per-queue mbuf ring"); 91 92 static int cxgb_tx_coalesce_force = 0; 93 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RWTUN, 94 &cxgb_tx_coalesce_force, 0, 95 "coalesce small packets into a single work request regardless of ring state"); 96 97 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1 98 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3)) 99 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2 100 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5 101 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5 102 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2 103 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6 104 105 106 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT; 107 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RWTUN, 108 &cxgb_tx_coalesce_enable_start, 0, 109 "coalesce enable threshold"); 110 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT; 111 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RWTUN, 112 &cxgb_tx_coalesce_enable_stop, 0, 113 "coalesce disable threshold"); 114 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 115 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RWTUN, 116 &cxgb_tx_reclaim_threshold, 0, 117 "tx cleaning minimum threshold"); 118 119 /* 120 * XXX don't re-enable this until TOE stops assuming 121 * we have an m_ext 122 */ 123 static int recycle_enable = 0; 124 125 extern int cxgb_use_16k_clusters; 126 extern int nmbjumbop; 127 extern int nmbjumbo9; 128 extern int nmbjumbo16; 129 130 #define USE_GTS 0 131 132 #define SGE_RX_SM_BUF_SIZE 1536 133 #define SGE_RX_DROP_THRES 16 134 #define SGE_RX_COPY_THRES 128 135 136 /* 137 * Period of the Tx buffer reclaim timer. This timer does not need to run 138 * frequently as Tx buffers are usually reclaimed by new Tx packets. 139 */ 140 #define TX_RECLAIM_PERIOD (hz >> 1) 141 142 /* 143 * Values for sge_txq.flags 144 */ 145 enum { 146 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 147 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 148 }; 149 150 struct tx_desc { 151 uint64_t flit[TX_DESC_FLITS]; 152 } __packed; 153 154 struct rx_desc { 155 uint32_t addr_lo; 156 uint32_t len_gen; 157 uint32_t gen2; 158 uint32_t addr_hi; 159 } __packed; 160 161 struct rsp_desc { /* response queue descriptor */ 162 struct rss_header rss_hdr; 163 uint32_t flags; 164 uint32_t len_cq; 165 uint8_t imm_data[47]; 166 uint8_t intr_gen; 167 } __packed; 168 169 #define RX_SW_DESC_MAP_CREATED (1 << 0) 170 #define TX_SW_DESC_MAP_CREATED (1 << 1) 171 #define RX_SW_DESC_INUSE (1 << 3) 172 #define TX_SW_DESC_MAPPED (1 << 4) 173 174 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 175 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 176 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 177 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 178 179 struct tx_sw_desc { /* SW state per Tx descriptor */ 180 struct mbuf *m; 181 bus_dmamap_t map; 182 int flags; 183 }; 184 185 struct rx_sw_desc { /* SW state per Rx descriptor */ 186 caddr_t rxsd_cl; 187 struct mbuf *m; 188 bus_dmamap_t map; 189 int flags; 190 }; 191 192 struct txq_state { 193 unsigned int compl; 194 unsigned int gen; 195 unsigned int pidx; 196 }; 197 198 struct refill_fl_cb_arg { 199 int error; 200 bus_dma_segment_t seg; 201 int nseg; 202 }; 203 204 205 /* 206 * Maps a number of flits to the number of Tx descriptors that can hold them. 207 * The formula is 208 * 209 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 210 * 211 * HW allows up to 4 descriptors to be combined into a WR. 212 */ 213 static uint8_t flit_desc_map[] = { 214 0, 215 #if SGE_NUM_GENBITS == 1 216 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 217 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 218 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 219 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 220 #elif SGE_NUM_GENBITS == 2 221 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 222 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 223 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 224 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 225 #else 226 # error "SGE_NUM_GENBITS must be 1 or 2" 227 #endif 228 }; 229 230 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED) 231 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock) 232 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock) 233 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock) 234 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 235 #define TXQ_RING_NEEDS_ENQUEUE(qs) \ 236 drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 237 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 238 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \ 239 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg) 240 #define TXQ_RING_DEQUEUE(qs) \ 241 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 242 243 int cxgb_debug = 0; 244 245 static void sge_timer_cb(void *arg); 246 static void sge_timer_reclaim(void *arg, int ncount); 247 static void sge_txq_reclaim_handler(void *arg, int ncount); 248 static void cxgb_start_locked(struct sge_qset *qs); 249 250 /* 251 * XXX need to cope with bursty scheduling by looking at a wider 252 * window than we are now for determining the need for coalescing 253 * 254 */ 255 static __inline uint64_t 256 check_pkt_coalesce(struct sge_qset *qs) 257 { 258 struct adapter *sc; 259 struct sge_txq *txq; 260 uint8_t *fill; 261 262 if (__predict_false(cxgb_tx_coalesce_force)) 263 return (1); 264 txq = &qs->txq[TXQ_ETH]; 265 sc = qs->port->adapter; 266 fill = &sc->tunq_fill[qs->idx]; 267 268 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX) 269 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX; 270 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN) 271 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN; 272 /* 273 * if the hardware transmit queue is more than 1/8 full 274 * we mark it as coalescing - we drop back from coalescing 275 * when we go below 1/32 full and there are no packets enqueued, 276 * this provides us with some degree of hysteresis 277 */ 278 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 279 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0)) 280 *fill = 0; 281 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) 282 *fill = 1; 283 284 return (sc->tunq_coalesce); 285 } 286 287 #ifdef __LP64__ 288 static void 289 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 290 { 291 uint64_t wr_hilo; 292 #if _BYTE_ORDER == _LITTLE_ENDIAN 293 wr_hilo = wr_hi; 294 wr_hilo |= (((uint64_t)wr_lo)<<32); 295 #else 296 wr_hilo = wr_lo; 297 wr_hilo |= (((uint64_t)wr_hi)<<32); 298 #endif 299 wrp->wrh_hilo = wr_hilo; 300 } 301 #else 302 static void 303 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 304 { 305 306 wrp->wrh_hi = wr_hi; 307 wmb(); 308 wrp->wrh_lo = wr_lo; 309 } 310 #endif 311 312 struct coalesce_info { 313 int count; 314 int nbytes; 315 }; 316 317 static int 318 coalesce_check(struct mbuf *m, void *arg) 319 { 320 struct coalesce_info *ci = arg; 321 int *count = &ci->count; 322 int *nbytes = &ci->nbytes; 323 324 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) && 325 (*count < 7) && (m->m_next == NULL) && 326 ((mtod(m, vm_offset_t) & PAGE_MASK) + m->m_len <= PAGE_SIZE))) { 327 *count += 1; 328 *nbytes += m->m_len; 329 return (1); 330 } 331 return (0); 332 } 333 334 static struct mbuf * 335 cxgb_dequeue(struct sge_qset *qs) 336 { 337 struct mbuf *m, *m_head, *m_tail; 338 struct coalesce_info ci; 339 340 341 if (check_pkt_coalesce(qs) == 0) 342 return TXQ_RING_DEQUEUE(qs); 343 344 m_head = m_tail = NULL; 345 ci.count = ci.nbytes = 0; 346 do { 347 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci); 348 if (m_head == NULL) { 349 m_tail = m_head = m; 350 } else if (m != NULL) { 351 m_tail->m_nextpkt = m; 352 m_tail = m; 353 } 354 } while (m != NULL); 355 if (ci.count > 7) 356 panic("trying to coalesce %d packets in to one WR", ci.count); 357 return (m_head); 358 } 359 360 /** 361 * reclaim_completed_tx - reclaims completed Tx descriptors 362 * @adapter: the adapter 363 * @q: the Tx queue to reclaim completed descriptors from 364 * 365 * Reclaims Tx descriptors that the SGE has indicated it has processed, 366 * and frees the associated buffers if possible. Called with the Tx 367 * queue's lock held. 368 */ 369 static __inline int 370 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) 371 { 372 struct sge_txq *q = &qs->txq[queue]; 373 int reclaim = desc_reclaimable(q); 374 375 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) || 376 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN)) 377 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 378 379 if (reclaim < reclaim_min) 380 return (0); 381 382 mtx_assert(&qs->lock, MA_OWNED); 383 if (reclaim > 0) { 384 t3_free_tx_desc(qs, reclaim, queue); 385 q->cleaned += reclaim; 386 q->in_use -= reclaim; 387 } 388 if (isset(&qs->txq_stopped, TXQ_ETH)) 389 clrbit(&qs->txq_stopped, TXQ_ETH); 390 391 return (reclaim); 392 } 393 394 #ifdef DEBUGNET 395 int 396 cxgb_debugnet_poll_tx(struct sge_qset *qs) 397 { 398 399 return (reclaim_completed_tx(qs, TX_RECLAIM_MAX, TXQ_ETH)); 400 } 401 #endif 402 403 /** 404 * should_restart_tx - are there enough resources to restart a Tx queue? 405 * @q: the Tx queue 406 * 407 * Checks if there are enough descriptors to restart a suspended Tx queue. 408 */ 409 static __inline int 410 should_restart_tx(const struct sge_txq *q) 411 { 412 unsigned int r = q->processed - q->cleaned; 413 414 return q->in_use - r < (q->size >> 1); 415 } 416 417 /** 418 * t3_sge_init - initialize SGE 419 * @adap: the adapter 420 * @p: the SGE parameters 421 * 422 * Performs SGE initialization needed every time after a chip reset. 423 * We do not initialize any of the queue sets here, instead the driver 424 * top-level must request those individually. We also do not enable DMA 425 * here, that should be done after the queues have been set up. 426 */ 427 void 428 t3_sge_init(adapter_t *adap, struct sge_params *p) 429 { 430 u_int ctrl, ups; 431 432 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 433 434 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 435 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | 436 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 437 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 438 #if SGE_NUM_GENBITS == 1 439 ctrl |= F_EGRGENCTRL; 440 #endif 441 if (adap->params.rev > 0) { 442 if (!(adap->flags & (USING_MSIX | USING_MSI))) 443 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 444 } 445 t3_write_reg(adap, A_SG_CONTROL, ctrl); 446 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 447 V_LORCQDRBTHRSH(512)); 448 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 449 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 450 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 451 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 452 adap->params.rev < T3_REV_C ? 1000 : 500); 453 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 454 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 455 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 456 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 457 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 458 } 459 460 461 /** 462 * sgl_len - calculates the size of an SGL of the given capacity 463 * @n: the number of SGL entries 464 * 465 * Calculates the number of flits needed for a scatter/gather list that 466 * can hold the given number of entries. 467 */ 468 static __inline unsigned int 469 sgl_len(unsigned int n) 470 { 471 return ((3 * n) / 2 + (n & 1)); 472 } 473 474 /** 475 * get_imm_packet - return the next ingress packet buffer from a response 476 * @resp: the response descriptor containing the packet data 477 * 478 * Return a packet containing the immediate data of the given response. 479 */ 480 static int 481 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) 482 { 483 484 if (resp->rss_hdr.opcode == CPL_RX_DATA) { 485 const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0]; 486 m->m_len = sizeof(*cpl) + ntohs(cpl->len); 487 } else if (resp->rss_hdr.opcode == CPL_RX_PKT) { 488 const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0]; 489 m->m_len = sizeof(*cpl) + ntohs(cpl->len); 490 } else 491 m->m_len = IMMED_PKT_SIZE; 492 m->m_ext.ext_buf = NULL; 493 m->m_ext.ext_type = 0; 494 memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len); 495 return (0); 496 } 497 498 static __inline u_int 499 flits_to_desc(u_int n) 500 { 501 return (flit_desc_map[n]); 502 } 503 504 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ 505 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ 506 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ 507 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ 508 F_HIRCQPARITYERROR) 509 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) 510 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ 511 F_RSPQDISABLED) 512 513 /** 514 * t3_sge_err_intr_handler - SGE async event interrupt handler 515 * @adapter: the adapter 516 * 517 * Interrupt handler for SGE asynchronous (non-data) events. 518 */ 519 void 520 t3_sge_err_intr_handler(adapter_t *adapter) 521 { 522 unsigned int v, status; 523 524 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 525 if (status & SGE_PARERR) 526 CH_ALERT(adapter, "SGE parity error (0x%x)\n", 527 status & SGE_PARERR); 528 if (status & SGE_FRAMINGERR) 529 CH_ALERT(adapter, "SGE framing error (0x%x)\n", 530 status & SGE_FRAMINGERR); 531 if (status & F_RSPQCREDITOVERFOW) 532 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 533 534 if (status & F_RSPQDISABLED) { 535 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 536 537 CH_ALERT(adapter, 538 "packet delivered to disabled response queue (0x%x)\n", 539 (v >> S_RSPQ0DISABLED) & 0xff); 540 } 541 542 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 543 if (status & SGE_FATALERR) 544 t3_fatal_err(adapter); 545 } 546 547 void 548 t3_sge_prep(adapter_t *adap, struct sge_params *p) 549 { 550 int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size; 551 552 nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus); 553 nqsets *= adap->params.nports; 554 555 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); 556 557 while (!powerof2(fl_q_size)) 558 fl_q_size--; 559 560 use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters : 561 is_offload(adap); 562 563 if (use_16k) { 564 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); 565 jumbo_buf_size = MJUM16BYTES; 566 } else { 567 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); 568 jumbo_buf_size = MJUM9BYTES; 569 } 570 while (!powerof2(jumbo_q_size)) 571 jumbo_q_size--; 572 573 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2)) 574 device_printf(adap->dev, 575 "Insufficient clusters and/or jumbo buffers.\n"); 576 577 p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data); 578 579 for (i = 0; i < SGE_QSETS; ++i) { 580 struct qset_params *q = p->qset + i; 581 582 if (adap->params.nports > 2) { 583 q->coalesce_usecs = 50; 584 } else { 585 #ifdef INVARIANTS 586 q->coalesce_usecs = 10; 587 #else 588 q->coalesce_usecs = 5; 589 #endif 590 } 591 q->polling = 0; 592 q->rspq_size = RSPQ_Q_SIZE; 593 q->fl_size = fl_q_size; 594 q->jumbo_size = jumbo_q_size; 595 q->jumbo_buf_size = jumbo_buf_size; 596 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 597 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16; 598 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE; 599 q->cong_thres = 0; 600 } 601 } 602 603 int 604 t3_sge_alloc(adapter_t *sc) 605 { 606 607 /* The parent tag. */ 608 if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */ 609 1, 0, /* algnmnt, boundary */ 610 BUS_SPACE_MAXADDR, /* lowaddr */ 611 BUS_SPACE_MAXADDR, /* highaddr */ 612 NULL, NULL, /* filter, filterarg */ 613 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 614 BUS_SPACE_UNRESTRICTED, /* nsegments */ 615 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 616 0, /* flags */ 617 NULL, NULL, /* lock, lockarg */ 618 &sc->parent_dmat)) { 619 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 620 return (ENOMEM); 621 } 622 623 /* 624 * DMA tag for normal sized RX frames 625 */ 626 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 627 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 628 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 629 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 630 return (ENOMEM); 631 } 632 633 /* 634 * DMA tag for jumbo sized RX frames. 635 */ 636 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, 637 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, 638 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 639 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 640 return (ENOMEM); 641 } 642 643 /* 644 * DMA tag for TX frames. 645 */ 646 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 647 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 648 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 649 NULL, NULL, &sc->tx_dmat)) { 650 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 651 return (ENOMEM); 652 } 653 654 return (0); 655 } 656 657 int 658 t3_sge_free(struct adapter * sc) 659 { 660 661 if (sc->tx_dmat != NULL) 662 bus_dma_tag_destroy(sc->tx_dmat); 663 664 if (sc->rx_jumbo_dmat != NULL) 665 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 666 667 if (sc->rx_dmat != NULL) 668 bus_dma_tag_destroy(sc->rx_dmat); 669 670 if (sc->parent_dmat != NULL) 671 bus_dma_tag_destroy(sc->parent_dmat); 672 673 return (0); 674 } 675 676 void 677 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 678 { 679 680 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); 681 qs->rspq.polling = 0 /* p->polling */; 682 } 683 684 #if !defined(__i386__) && !defined(__amd64__) 685 static void 686 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 687 { 688 struct refill_fl_cb_arg *cb_arg = arg; 689 690 cb_arg->error = error; 691 cb_arg->seg = segs[0]; 692 cb_arg->nseg = nseg; 693 694 } 695 #endif 696 /** 697 * refill_fl - refill an SGE free-buffer list 698 * @sc: the controller softc 699 * @q: the free-list to refill 700 * @n: the number of new buffers to allocate 701 * 702 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 703 * The caller must assure that @n does not exceed the queue's capacity. 704 */ 705 static void 706 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 707 { 708 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 709 struct rx_desc *d = &q->desc[q->pidx]; 710 struct refill_fl_cb_arg cb_arg; 711 struct mbuf *m; 712 caddr_t cl; 713 int err; 714 715 cb_arg.error = 0; 716 while (n--) { 717 /* 718 * We allocate an uninitialized mbuf + cluster, mbuf is 719 * initialized after rx. 720 */ 721 if (q->zone == zone_pack) { 722 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL) 723 break; 724 cl = m->m_ext.ext_buf; 725 } else { 726 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL) 727 break; 728 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { 729 uma_zfree(q->zone, cl); 730 break; 731 } 732 } 733 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 734 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 735 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 736 uma_zfree(q->zone, cl); 737 goto done; 738 } 739 sd->flags |= RX_SW_DESC_MAP_CREATED; 740 } 741 #if !defined(__i386__) && !defined(__amd64__) 742 err = bus_dmamap_load(q->entry_tag, sd->map, 743 cl, q->buf_size, refill_fl_cb, &cb_arg, 0); 744 745 if (err != 0 || cb_arg.error) { 746 if (q->zone != zone_pack) 747 uma_zfree(q->zone, cl); 748 m_free(m); 749 goto done; 750 } 751 #else 752 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl); 753 #endif 754 sd->flags |= RX_SW_DESC_INUSE; 755 sd->rxsd_cl = cl; 756 sd->m = m; 757 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 758 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 759 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 760 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 761 762 d++; 763 sd++; 764 765 if (++q->pidx == q->size) { 766 q->pidx = 0; 767 q->gen ^= 1; 768 sd = q->sdesc; 769 d = q->desc; 770 } 771 q->credits++; 772 q->db_pending++; 773 } 774 775 done: 776 if (q->db_pending >= 32) { 777 q->db_pending = 0; 778 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 779 } 780 } 781 782 783 /** 784 * free_rx_bufs - free the Rx buffers on an SGE free list 785 * @sc: the controle softc 786 * @q: the SGE free list to clean up 787 * 788 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 789 * this queue should be stopped before calling this function. 790 */ 791 static void 792 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 793 { 794 u_int cidx = q->cidx; 795 796 while (q->credits--) { 797 struct rx_sw_desc *d = &q->sdesc[cidx]; 798 799 if (d->flags & RX_SW_DESC_INUSE) { 800 bus_dmamap_unload(q->entry_tag, d->map); 801 bus_dmamap_destroy(q->entry_tag, d->map); 802 if (q->zone == zone_pack) { 803 m_init(d->m, M_NOWAIT, MT_DATA, M_EXT); 804 uma_zfree(zone_pack, d->m); 805 } else { 806 m_init(d->m, M_NOWAIT, MT_DATA, 0); 807 uma_zfree(zone_mbuf, d->m); 808 uma_zfree(q->zone, d->rxsd_cl); 809 } 810 } 811 812 d->rxsd_cl = NULL; 813 d->m = NULL; 814 if (++cidx == q->size) 815 cidx = 0; 816 } 817 } 818 819 static __inline void 820 __refill_fl(adapter_t *adap, struct sge_fl *fl) 821 { 822 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 823 } 824 825 static __inline void 826 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) 827 { 828 uint32_t reclaimable = fl->size - fl->credits; 829 830 if (reclaimable > 0) 831 refill_fl(adap, fl, min(max, reclaimable)); 832 } 833 834 /** 835 * recycle_rx_buf - recycle a receive buffer 836 * @adapter: the adapter 837 * @q: the SGE free list 838 * @idx: index of buffer to recycle 839 * 840 * Recycles the specified buffer on the given free list by adding it at 841 * the next available slot on the list. 842 */ 843 static void 844 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 845 { 846 struct rx_desc *from = &q->desc[idx]; 847 struct rx_desc *to = &q->desc[q->pidx]; 848 849 q->sdesc[q->pidx] = q->sdesc[idx]; 850 to->addr_lo = from->addr_lo; // already big endian 851 to->addr_hi = from->addr_hi; // likewise 852 wmb(); /* necessary ? */ 853 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 854 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 855 q->credits++; 856 857 if (++q->pidx == q->size) { 858 q->pidx = 0; 859 q->gen ^= 1; 860 } 861 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 862 } 863 864 static void 865 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 866 { 867 uint32_t *addr; 868 869 addr = arg; 870 *addr = segs[0].ds_addr; 871 } 872 873 static int 874 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 875 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 876 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 877 { 878 size_t len = nelem * elem_size; 879 void *s = NULL; 880 void *p = NULL; 881 int err; 882 883 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 884 BUS_SPACE_MAXADDR_32BIT, 885 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 886 len, 0, NULL, NULL, tag)) != 0) { 887 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 888 return (ENOMEM); 889 } 890 891 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 892 map)) != 0) { 893 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 894 return (ENOMEM); 895 } 896 897 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 898 bzero(p, len); 899 *(void **)desc = p; 900 901 if (sw_size) { 902 len = nelem * sw_size; 903 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 904 *(void **)sdesc = s; 905 } 906 if (parent_entry_tag == NULL) 907 return (0); 908 909 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 910 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 911 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 912 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 913 NULL, NULL, entry_tag)) != 0) { 914 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 915 return (ENOMEM); 916 } 917 return (0); 918 } 919 920 static void 921 sge_slow_intr_handler(void *arg, int ncount) 922 { 923 adapter_t *sc = arg; 924 925 t3_slow_intr_handler(sc); 926 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask); 927 (void) t3_read_reg(sc, A_PL_INT_ENABLE0); 928 } 929 930 /** 931 * sge_timer_cb - perform periodic maintenance of an SGE qset 932 * @data: the SGE queue set to maintain 933 * 934 * Runs periodically from a timer to perform maintenance of an SGE queue 935 * set. It performs two tasks: 936 * 937 * a) Cleans up any completed Tx descriptors that may still be pending. 938 * Normal descriptor cleanup happens when new packets are added to a Tx 939 * queue so this timer is relatively infrequent and does any cleanup only 940 * if the Tx queue has not seen any new packets in a while. We make a 941 * best effort attempt to reclaim descriptors, in that we don't wait 942 * around if we cannot get a queue's lock (which most likely is because 943 * someone else is queueing new packets and so will also handle the clean 944 * up). Since control queues use immediate data exclusively we don't 945 * bother cleaning them up here. 946 * 947 * b) Replenishes Rx queues that have run out due to memory shortage. 948 * Normally new Rx buffers are added when existing ones are consumed but 949 * when out of memory a queue can become empty. We try to add only a few 950 * buffers here, the queue will be replenished fully as these new buffers 951 * are used up if memory shortage has subsided. 952 * 953 * c) Return coalesced response queue credits in case a response queue is 954 * starved. 955 * 956 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 957 * fifo overflows and the FW doesn't implement any recovery scheme yet. 958 */ 959 static void 960 sge_timer_cb(void *arg) 961 { 962 adapter_t *sc = arg; 963 if ((sc->flags & USING_MSIX) == 0) { 964 965 struct port_info *pi; 966 struct sge_qset *qs; 967 struct sge_txq *txq; 968 int i, j; 969 int reclaim_ofl, refill_rx; 970 971 if (sc->open_device_map == 0) 972 return; 973 974 for (i = 0; i < sc->params.nports; i++) { 975 pi = &sc->port[i]; 976 for (j = 0; j < pi->nqsets; j++) { 977 qs = &sc->sge.qs[pi->first_qset + j]; 978 txq = &qs->txq[0]; 979 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 980 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 981 (qs->fl[1].credits < qs->fl[1].size)); 982 if (reclaim_ofl || refill_rx) { 983 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); 984 break; 985 } 986 } 987 } 988 } 989 990 if (sc->params.nports > 2) { 991 int i; 992 993 for_each_port(sc, i) { 994 struct port_info *pi = &sc->port[i]; 995 996 t3_write_reg(sc, A_SG_KDOORBELL, 997 F_SELEGRCNTX | 998 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 999 } 1000 } 1001 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) && 1002 sc->open_device_map != 0) 1003 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 1004 } 1005 1006 /* 1007 * This is meant to be a catch-all function to keep sge state private 1008 * to sge.c 1009 * 1010 */ 1011 int 1012 t3_sge_init_adapter(adapter_t *sc) 1013 { 1014 callout_init(&sc->sge_timer_ch, 1); 1015 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 1016 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 1017 return (0); 1018 } 1019 1020 int 1021 t3_sge_reset_adapter(adapter_t *sc) 1022 { 1023 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 1024 return (0); 1025 } 1026 1027 int 1028 t3_sge_init_port(struct port_info *pi) 1029 { 1030 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); 1031 return (0); 1032 } 1033 1034 /** 1035 * refill_rspq - replenish an SGE response queue 1036 * @adapter: the adapter 1037 * @q: the response queue to replenish 1038 * @credits: how many new responses to make available 1039 * 1040 * Replenishes a response queue by making the supplied number of responses 1041 * available to HW. 1042 */ 1043 static __inline void 1044 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 1045 { 1046 1047 /* mbufs are allocated on demand when a rspq entry is processed. */ 1048 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 1049 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 1050 } 1051 1052 static void 1053 sge_txq_reclaim_handler(void *arg, int ncount) 1054 { 1055 struct sge_qset *qs = arg; 1056 int i; 1057 1058 for (i = 0; i < 3; i++) 1059 reclaim_completed_tx(qs, 16, i); 1060 } 1061 1062 static void 1063 sge_timer_reclaim(void *arg, int ncount) 1064 { 1065 struct port_info *pi = arg; 1066 int i, nqsets = pi->nqsets; 1067 adapter_t *sc = pi->adapter; 1068 struct sge_qset *qs; 1069 struct mtx *lock; 1070 1071 KASSERT((sc->flags & USING_MSIX) == 0, 1072 ("can't call timer reclaim for msi-x")); 1073 1074 for (i = 0; i < nqsets; i++) { 1075 qs = &sc->sge.qs[pi->first_qset + i]; 1076 1077 reclaim_completed_tx(qs, 16, TXQ_OFLD); 1078 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 1079 &sc->sge.qs[0].rspq.lock; 1080 1081 if (mtx_trylock(lock)) { 1082 /* XXX currently assume that we are *NOT* polling */ 1083 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 1084 1085 if (qs->fl[0].credits < qs->fl[0].size - 16) 1086 __refill_fl(sc, &qs->fl[0]); 1087 if (qs->fl[1].credits < qs->fl[1].size - 16) 1088 __refill_fl(sc, &qs->fl[1]); 1089 1090 if (status & (1 << qs->rspq.cntxt_id)) { 1091 if (qs->rspq.credits) { 1092 refill_rspq(sc, &qs->rspq, 1); 1093 qs->rspq.credits--; 1094 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 1095 1 << qs->rspq.cntxt_id); 1096 } 1097 } 1098 mtx_unlock(lock); 1099 } 1100 } 1101 } 1102 1103 /** 1104 * init_qset_cntxt - initialize an SGE queue set context info 1105 * @qs: the queue set 1106 * @id: the queue set id 1107 * 1108 * Initializes the TIDs and context ids for the queues of a queue set. 1109 */ 1110 static void 1111 init_qset_cntxt(struct sge_qset *qs, u_int id) 1112 { 1113 1114 qs->rspq.cntxt_id = id; 1115 qs->fl[0].cntxt_id = 2 * id; 1116 qs->fl[1].cntxt_id = 2 * id + 1; 1117 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 1118 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 1119 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 1120 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 1121 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 1122 1123 /* XXX: a sane limit is needed instead of INT_MAX */ 1124 mbufq_init(&qs->txq[TXQ_ETH].sendq, INT_MAX); 1125 mbufq_init(&qs->txq[TXQ_OFLD].sendq, INT_MAX); 1126 mbufq_init(&qs->txq[TXQ_CTRL].sendq, INT_MAX); 1127 } 1128 1129 1130 static void 1131 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 1132 { 1133 txq->in_use += ndesc; 1134 /* 1135 * XXX we don't handle stopping of queue 1136 * presumably start handles this when we bump against the end 1137 */ 1138 txqs->gen = txq->gen; 1139 txq->unacked += ndesc; 1140 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); 1141 txq->unacked &= 31; 1142 txqs->pidx = txq->pidx; 1143 txq->pidx += ndesc; 1144 #ifdef INVARIANTS 1145 if (((txqs->pidx > txq->cidx) && 1146 (txq->pidx < txqs->pidx) && 1147 (txq->pidx >= txq->cidx)) || 1148 ((txqs->pidx < txq->cidx) && 1149 (txq->pidx >= txq-> cidx)) || 1150 ((txqs->pidx < txq->cidx) && 1151 (txq->cidx < txqs->pidx))) 1152 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", 1153 txqs->pidx, txq->pidx, txq->cidx); 1154 #endif 1155 if (txq->pidx >= txq->size) { 1156 txq->pidx -= txq->size; 1157 txq->gen ^= 1; 1158 } 1159 1160 } 1161 1162 /** 1163 * calc_tx_descs - calculate the number of Tx descriptors for a packet 1164 * @m: the packet mbufs 1165 * @nsegs: the number of segments 1166 * 1167 * Returns the number of Tx descriptors needed for the given Ethernet 1168 * packet. Ethernet packets require addition of WR and CPL headers. 1169 */ 1170 static __inline unsigned int 1171 calc_tx_descs(const struct mbuf *m, int nsegs) 1172 { 1173 unsigned int flits; 1174 1175 if (m->m_pkthdr.len <= PIO_LEN) 1176 return 1; 1177 1178 flits = sgl_len(nsegs) + 2; 1179 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1180 flits++; 1181 1182 return flits_to_desc(flits); 1183 } 1184 1185 /** 1186 * make_sgl - populate a scatter/gather list for a packet 1187 * @sgp: the SGL to populate 1188 * @segs: the packet dma segments 1189 * @nsegs: the number of segments 1190 * 1191 * Generates a scatter/gather list for the buffers that make up a packet 1192 * and returns the SGL size in 8-byte words. The caller must size the SGL 1193 * appropriately. 1194 */ 1195 static __inline void 1196 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1197 { 1198 int i, idx; 1199 1200 for (idx = 0, i = 0; i < nsegs; i++) { 1201 /* 1202 * firmware doesn't like empty segments 1203 */ 1204 if (segs[i].ds_len == 0) 1205 continue; 1206 if (i && idx == 0) 1207 ++sgp; 1208 1209 sgp->len[idx] = htobe32(segs[i].ds_len); 1210 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1211 idx ^= 1; 1212 } 1213 1214 if (idx) { 1215 sgp->len[idx] = 0; 1216 sgp->addr[idx] = 0; 1217 } 1218 } 1219 1220 /** 1221 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1222 * @adap: the adapter 1223 * @q: the Tx queue 1224 * 1225 * Ring the doorbell if a Tx queue is asleep. There is a natural race, 1226 * where the HW is going to sleep just after we checked, however, 1227 * then the interrupt handler will detect the outstanding TX packet 1228 * and ring the doorbell for us. 1229 * 1230 * When GTS is disabled we unconditionally ring the doorbell. 1231 */ 1232 static __inline void 1233 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring) 1234 { 1235 #if USE_GTS 1236 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1237 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1238 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1239 #ifdef T3_TRACE 1240 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1241 q->cntxt_id); 1242 #endif 1243 t3_write_reg(adap, A_SG_KDOORBELL, 1244 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1245 } 1246 #else 1247 if (mustring || ++q->db_pending >= 32) { 1248 wmb(); /* write descriptors before telling HW */ 1249 t3_write_reg(adap, A_SG_KDOORBELL, 1250 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1251 q->db_pending = 0; 1252 } 1253 #endif 1254 } 1255 1256 static __inline void 1257 wr_gen2(struct tx_desc *d, unsigned int gen) 1258 { 1259 #if SGE_NUM_GENBITS == 2 1260 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1261 #endif 1262 } 1263 1264 /** 1265 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1266 * @ndesc: number of Tx descriptors spanned by the SGL 1267 * @txd: first Tx descriptor to be written 1268 * @txqs: txq state (generation and producer index) 1269 * @txq: the SGE Tx queue 1270 * @sgl: the SGL 1271 * @flits: number of flits to the start of the SGL in the first descriptor 1272 * @sgl_flits: the SGL size in flits 1273 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1274 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1275 * 1276 * Write a work request header and an associated SGL. If the SGL is 1277 * small enough to fit into one Tx descriptor it has already been written 1278 * and we just need to write the WR header. Otherwise we distribute the 1279 * SGL across the number of descriptors it spans. 1280 */ 1281 static void 1282 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1283 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1284 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1285 { 1286 1287 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1288 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1289 1290 if (__predict_true(ndesc == 1)) { 1291 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1292 V_WR_SGLSFLT(flits)) | wr_hi, 1293 htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) | 1294 wr_lo); 1295 1296 wr_gen2(txd, txqs->gen); 1297 1298 } else { 1299 unsigned int ogen = txqs->gen; 1300 const uint64_t *fp = (const uint64_t *)sgl; 1301 struct work_request_hdr *wp = wrp; 1302 1303 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1304 V_WR_SGLSFLT(flits)) | wr_hi; 1305 1306 while (sgl_flits) { 1307 unsigned int avail = WR_FLITS - flits; 1308 1309 if (avail > sgl_flits) 1310 avail = sgl_flits; 1311 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1312 sgl_flits -= avail; 1313 ndesc--; 1314 if (!sgl_flits) 1315 break; 1316 1317 fp += avail; 1318 txd++; 1319 txsd++; 1320 if (++txqs->pidx == txq->size) { 1321 txqs->pidx = 0; 1322 txqs->gen ^= 1; 1323 txd = txq->desc; 1324 txsd = txq->sdesc; 1325 } 1326 1327 /* 1328 * when the head of the mbuf chain 1329 * is freed all clusters will be freed 1330 * with it 1331 */ 1332 wrp = (struct work_request_hdr *)txd; 1333 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) | 1334 V_WR_SGLSFLT(1)) | wr_hi; 1335 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS, 1336 sgl_flits + 1)) | 1337 V_WR_GEN(txqs->gen)) | wr_lo; 1338 wr_gen2(txd, txqs->gen); 1339 flits = 1; 1340 } 1341 wrp->wrh_hi |= htonl(F_WR_EOP); 1342 wmb(); 1343 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1344 wr_gen2((struct tx_desc *)wp, ogen); 1345 } 1346 } 1347 1348 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */ 1349 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20) 1350 1351 #define GET_VTAG(cntrl, m) \ 1352 do { \ 1353 if ((m)->m_flags & M_VLANTAG) \ 1354 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ 1355 } while (0) 1356 1357 static int 1358 t3_encap(struct sge_qset *qs, struct mbuf **m) 1359 { 1360 adapter_t *sc; 1361 struct mbuf *m0; 1362 struct sge_txq *txq; 1363 struct txq_state txqs; 1364 struct port_info *pi; 1365 unsigned int ndesc, flits, cntrl, mlen; 1366 int err, nsegs, tso_info = 0; 1367 1368 struct work_request_hdr *wrp; 1369 struct tx_sw_desc *txsd; 1370 struct sg_ent *sgp, *sgl; 1371 uint32_t wr_hi, wr_lo, sgl_flits; 1372 bus_dma_segment_t segs[TX_MAX_SEGS]; 1373 1374 struct tx_desc *txd; 1375 1376 pi = qs->port; 1377 sc = pi->adapter; 1378 txq = &qs->txq[TXQ_ETH]; 1379 txd = &txq->desc[txq->pidx]; 1380 txsd = &txq->sdesc[txq->pidx]; 1381 sgl = txq->txq_sgl; 1382 1383 prefetch(txd); 1384 m0 = *m; 1385 1386 mtx_assert(&qs->lock, MA_OWNED); 1387 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1388 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n")); 1389 1390 if (m0->m_nextpkt == NULL && m0->m_next != NULL && 1391 m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1392 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1393 1394 if (m0->m_nextpkt != NULL) { 1395 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs); 1396 ndesc = 1; 1397 mlen = 0; 1398 } else { 1399 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map, 1400 &m0, segs, &nsegs))) { 1401 if (cxgb_debug) 1402 printf("failed ... err=%d\n", err); 1403 return (err); 1404 } 1405 mlen = m0->m_pkthdr.len; 1406 ndesc = calc_tx_descs(m0, nsegs); 1407 } 1408 txq_prod(txq, ndesc, &txqs); 1409 1410 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs)); 1411 txsd->m = m0; 1412 1413 if (m0->m_nextpkt != NULL) { 1414 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; 1415 int i, fidx; 1416 1417 if (nsegs > 7) 1418 panic("trying to coalesce %d packets in to one WR", nsegs); 1419 txq->txq_coalesced += nsegs; 1420 wrp = (struct work_request_hdr *)txd; 1421 flits = nsegs*2 + 1; 1422 1423 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) { 1424 struct cpl_tx_pkt_batch_entry *cbe; 1425 uint64_t flit; 1426 uint32_t *hflit = (uint32_t *)&flit; 1427 int cflags = m0->m_pkthdr.csum_flags; 1428 1429 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1430 GET_VTAG(cntrl, m0); 1431 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1432 if (__predict_false(!(cflags & CSUM_IP))) 1433 cntrl |= F_TXPKT_IPCSUM_DIS; 1434 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP | 1435 CSUM_UDP_IPV6 | CSUM_TCP_IPV6)))) 1436 cntrl |= F_TXPKT_L4CSUM_DIS; 1437 1438 hflit[0] = htonl(cntrl); 1439 hflit[1] = htonl(segs[i].ds_len | 0x80000000); 1440 flit |= htobe64(1 << 24); 1441 cbe = &cpl_batch->pkt_entry[i]; 1442 cbe->cntrl = hflit[0]; 1443 cbe->len = hflit[1]; 1444 cbe->addr = htobe64(segs[i].ds_addr); 1445 } 1446 1447 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1448 V_WR_SGLSFLT(flits)) | 1449 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1450 wr_lo = htonl(V_WR_LEN(flits) | 1451 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); 1452 set_wr_hdr(wrp, wr_hi, wr_lo); 1453 wmb(); 1454 ETHER_BPF_MTAP(pi->ifp, m0); 1455 wr_gen2(txd, txqs.gen); 1456 check_ring_tx_db(sc, txq, 0); 1457 return (0); 1458 } else if (tso_info) { 1459 uint16_t eth_type; 1460 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; 1461 struct ether_header *eh; 1462 void *l3hdr; 1463 struct tcphdr *tcp; 1464 1465 txd->flit[2] = 0; 1466 GET_VTAG(cntrl, m0); 1467 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1468 hdr->cntrl = htonl(cntrl); 1469 hdr->len = htonl(mlen | 0x80000000); 1470 1471 if (__predict_false(mlen < TCPPKTHDRSIZE)) { 1472 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x", 1473 m0, mlen, m0->m_pkthdr.tso_segsz, 1474 (int)m0->m_pkthdr.csum_flags, CSUM_BITS, m0->m_flags); 1475 panic("tx tso packet too small"); 1476 } 1477 1478 /* Make sure that ether, ip, tcp headers are all in m0 */ 1479 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1480 m0 = m_pullup(m0, TCPPKTHDRSIZE); 1481 if (__predict_false(m0 == NULL)) { 1482 /* XXX panic probably an overreaction */ 1483 panic("couldn't fit header into mbuf"); 1484 } 1485 } 1486 1487 eh = mtod(m0, struct ether_header *); 1488 eth_type = eh->ether_type; 1489 if (eth_type == htons(ETHERTYPE_VLAN)) { 1490 struct ether_vlan_header *evh = (void *)eh; 1491 1492 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN); 1493 l3hdr = evh + 1; 1494 eth_type = evh->evl_proto; 1495 } else { 1496 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II); 1497 l3hdr = eh + 1; 1498 } 1499 1500 if (eth_type == htons(ETHERTYPE_IP)) { 1501 struct ip *ip = l3hdr; 1502 1503 tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl); 1504 tcp = (struct tcphdr *)(ip + 1); 1505 } else if (eth_type == htons(ETHERTYPE_IPV6)) { 1506 struct ip6_hdr *ip6 = l3hdr; 1507 1508 KASSERT(ip6->ip6_nxt == IPPROTO_TCP, 1509 ("%s: CSUM_TSO with ip6_nxt %d", 1510 __func__, ip6->ip6_nxt)); 1511 1512 tso_info |= F_LSO_IPV6; 1513 tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2); 1514 tcp = (struct tcphdr *)(ip6 + 1); 1515 } else 1516 panic("%s: CSUM_TSO but neither ip nor ip6", __func__); 1517 1518 tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off); 1519 hdr->lso_info = htonl(tso_info); 1520 1521 if (__predict_false(mlen <= PIO_LEN)) { 1522 /* 1523 * pkt not undersized but fits in PIO_LEN 1524 * Indicates a TSO bug at the higher levels. 1525 */ 1526 txsd->m = NULL; 1527 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); 1528 flits = (mlen + 7) / 8 + 3; 1529 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1530 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1531 F_WR_SOP | F_WR_EOP | txqs.compl); 1532 wr_lo = htonl(V_WR_LEN(flits) | 1533 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1534 set_wr_hdr(&hdr->wr, wr_hi, wr_lo); 1535 wmb(); 1536 ETHER_BPF_MTAP(pi->ifp, m0); 1537 wr_gen2(txd, txqs.gen); 1538 check_ring_tx_db(sc, txq, 0); 1539 m_freem(m0); 1540 return (0); 1541 } 1542 flits = 3; 1543 } else { 1544 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; 1545 1546 GET_VTAG(cntrl, m0); 1547 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1548 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) 1549 cntrl |= F_TXPKT_IPCSUM_DIS; 1550 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | 1551 CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6)))) 1552 cntrl |= F_TXPKT_L4CSUM_DIS; 1553 cpl->cntrl = htonl(cntrl); 1554 cpl->len = htonl(mlen | 0x80000000); 1555 1556 if (mlen <= PIO_LEN) { 1557 txsd->m = NULL; 1558 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1559 flits = (mlen + 7) / 8 + 2; 1560 1561 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1562 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1563 F_WR_SOP | F_WR_EOP | txqs.compl); 1564 wr_lo = htonl(V_WR_LEN(flits) | 1565 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1566 set_wr_hdr(&cpl->wr, wr_hi, wr_lo); 1567 wmb(); 1568 ETHER_BPF_MTAP(pi->ifp, m0); 1569 wr_gen2(txd, txqs.gen); 1570 check_ring_tx_db(sc, txq, 0); 1571 m_freem(m0); 1572 return (0); 1573 } 1574 flits = 2; 1575 } 1576 wrp = (struct work_request_hdr *)txd; 1577 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1578 make_sgl(sgp, segs, nsegs); 1579 1580 sgl_flits = sgl_len(nsegs); 1581 1582 ETHER_BPF_MTAP(pi->ifp, m0); 1583 1584 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc)); 1585 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1586 wr_lo = htonl(V_WR_TID(txq->token)); 1587 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, 1588 sgl_flits, wr_hi, wr_lo); 1589 check_ring_tx_db(sc, txq, 0); 1590 1591 return (0); 1592 } 1593 1594 #ifdef DEBUGNET 1595 int 1596 cxgb_debugnet_encap(struct sge_qset *qs, struct mbuf **m) 1597 { 1598 int error; 1599 1600 error = t3_encap(qs, m); 1601 if (error == 0) 1602 check_ring_tx_db(qs->port->adapter, &qs->txq[TXQ_ETH], 1); 1603 else if (*m != NULL) { 1604 m_freem(*m); 1605 *m = NULL; 1606 } 1607 return (error); 1608 } 1609 #endif 1610 1611 void 1612 cxgb_tx_watchdog(void *arg) 1613 { 1614 struct sge_qset *qs = arg; 1615 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1616 1617 if (qs->coalescing != 0 && 1618 (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 1619 TXQ_RING_EMPTY(qs)) 1620 qs->coalescing = 0; 1621 else if (qs->coalescing == 0 && 1622 (txq->in_use >= cxgb_tx_coalesce_enable_start)) 1623 qs->coalescing = 1; 1624 if (TXQ_TRYLOCK(qs)) { 1625 qs->qs_flags |= QS_FLUSHING; 1626 cxgb_start_locked(qs); 1627 qs->qs_flags &= ~QS_FLUSHING; 1628 TXQ_UNLOCK(qs); 1629 } 1630 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) 1631 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog, 1632 qs, txq->txq_watchdog.c_cpu); 1633 } 1634 1635 static void 1636 cxgb_tx_timeout(void *arg) 1637 { 1638 struct sge_qset *qs = arg; 1639 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1640 1641 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3))) 1642 qs->coalescing = 1; 1643 if (TXQ_TRYLOCK(qs)) { 1644 qs->qs_flags |= QS_TIMEOUT; 1645 cxgb_start_locked(qs); 1646 qs->qs_flags &= ~QS_TIMEOUT; 1647 TXQ_UNLOCK(qs); 1648 } 1649 } 1650 1651 static void 1652 cxgb_start_locked(struct sge_qset *qs) 1653 { 1654 struct mbuf *m_head = NULL; 1655 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1656 struct port_info *pi = qs->port; 1657 struct ifnet *ifp = pi->ifp; 1658 1659 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT)) 1660 reclaim_completed_tx(qs, 0, TXQ_ETH); 1661 1662 if (!pi->link_config.link_ok) { 1663 TXQ_RING_FLUSH(qs); 1664 return; 1665 } 1666 TXQ_LOCK_ASSERT(qs); 1667 while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) && 1668 pi->link_config.link_ok) { 1669 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1670 1671 if (txq->size - txq->in_use <= TX_MAX_DESC) 1672 break; 1673 1674 if ((m_head = cxgb_dequeue(qs)) == NULL) 1675 break; 1676 /* 1677 * Encapsulation can modify our pointer, and or make it 1678 * NULL on failure. In that event, we can't requeue. 1679 */ 1680 if (t3_encap(qs, &m_head) || m_head == NULL) 1681 break; 1682 1683 m_head = NULL; 1684 } 1685 1686 if (txq->db_pending) 1687 check_ring_tx_db(pi->adapter, txq, 1); 1688 1689 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && 1690 pi->link_config.link_ok) 1691 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1692 qs, txq->txq_timer.c_cpu); 1693 if (m_head != NULL) 1694 m_freem(m_head); 1695 } 1696 1697 static int 1698 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m) 1699 { 1700 struct port_info *pi = qs->port; 1701 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1702 struct buf_ring *br = txq->txq_mr; 1703 int error, avail; 1704 1705 avail = txq->size - txq->in_use; 1706 TXQ_LOCK_ASSERT(qs); 1707 1708 /* 1709 * We can only do a direct transmit if the following are true: 1710 * - we aren't coalescing (ring < 3/4 full) 1711 * - the link is up -- checked in caller 1712 * - there are no packets enqueued already 1713 * - there is space in hardware transmit queue 1714 */ 1715 if (check_pkt_coalesce(qs) == 0 && 1716 !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) { 1717 if (t3_encap(qs, &m)) { 1718 if (m != NULL && 1719 (error = drbr_enqueue(ifp, br, m)) != 0) 1720 return (error); 1721 } else { 1722 if (txq->db_pending) 1723 check_ring_tx_db(pi->adapter, txq, 1); 1724 1725 /* 1726 * We've bypassed the buf ring so we need to update 1727 * the stats directly 1728 */ 1729 txq->txq_direct_packets++; 1730 txq->txq_direct_bytes += m->m_pkthdr.len; 1731 } 1732 } else if ((error = drbr_enqueue(ifp, br, m)) != 0) 1733 return (error); 1734 1735 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1736 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && 1737 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) 1738 cxgb_start_locked(qs); 1739 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) 1740 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1741 qs, txq->txq_timer.c_cpu); 1742 return (0); 1743 } 1744 1745 int 1746 cxgb_transmit(struct ifnet *ifp, struct mbuf *m) 1747 { 1748 struct sge_qset *qs; 1749 struct port_info *pi = ifp->if_softc; 1750 int error, qidx = pi->first_qset; 1751 1752 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 1753 ||(!pi->link_config.link_ok)) { 1754 m_freem(m); 1755 return (0); 1756 } 1757 1758 /* check if flowid is set */ 1759 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 1760 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset; 1761 1762 qs = &pi->adapter->sge.qs[qidx]; 1763 1764 if (TXQ_TRYLOCK(qs)) { 1765 /* XXX running */ 1766 error = cxgb_transmit_locked(ifp, qs, m); 1767 TXQ_UNLOCK(qs); 1768 } else 1769 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); 1770 return (error); 1771 } 1772 1773 void 1774 cxgb_qflush(struct ifnet *ifp) 1775 { 1776 /* 1777 * flush any enqueued mbufs in the buf_rings 1778 * and in the transmit queues 1779 * no-op for now 1780 */ 1781 return; 1782 } 1783 1784 /** 1785 * write_imm - write a packet into a Tx descriptor as immediate data 1786 * @d: the Tx descriptor to write 1787 * @m: the packet 1788 * @len: the length of packet data to write as immediate data 1789 * @gen: the generation bit value to write 1790 * 1791 * Writes a packet as immediate data into a Tx descriptor. The packet 1792 * contains a work request at its beginning. We must write the packet 1793 * carefully so the SGE doesn't read accidentally before it's written in 1794 * its entirety. 1795 */ 1796 static __inline void 1797 write_imm(struct tx_desc *d, caddr_t src, 1798 unsigned int len, unsigned int gen) 1799 { 1800 struct work_request_hdr *from = (struct work_request_hdr *)src; 1801 struct work_request_hdr *to = (struct work_request_hdr *)d; 1802 uint32_t wr_hi, wr_lo; 1803 1804 KASSERT(len <= WR_LEN && len >= sizeof(*from), 1805 ("%s: invalid len %d", __func__, len)); 1806 1807 memcpy(&to[1], &from[1], len - sizeof(*from)); 1808 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | 1809 V_WR_BCNTLFLT(len & 7)); 1810 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8)); 1811 set_wr_hdr(to, wr_hi, wr_lo); 1812 wmb(); 1813 wr_gen2(d, gen); 1814 } 1815 1816 /** 1817 * check_desc_avail - check descriptor availability on a send queue 1818 * @adap: the adapter 1819 * @q: the TX queue 1820 * @m: the packet needing the descriptors 1821 * @ndesc: the number of Tx descriptors needed 1822 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1823 * 1824 * Checks if the requested number of Tx descriptors is available on an 1825 * SGE send queue. If the queue is already suspended or not enough 1826 * descriptors are available the packet is queued for later transmission. 1827 * Must be called with the Tx queue locked. 1828 * 1829 * Returns 0 if enough descriptors are available, 1 if there aren't 1830 * enough descriptors and the packet has been queued, and 2 if the caller 1831 * needs to retry because there weren't enough descriptors at the 1832 * beginning of the call but some freed up in the mean time. 1833 */ 1834 static __inline int 1835 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1836 struct mbuf *m, unsigned int ndesc, 1837 unsigned int qid) 1838 { 1839 /* 1840 * XXX We currently only use this for checking the control queue 1841 * the control queue is only used for binding qsets which happens 1842 * at init time so we are guaranteed enough descriptors 1843 */ 1844 if (__predict_false(mbufq_len(&q->sendq))) { 1845 addq_exit: (void )mbufq_enqueue(&q->sendq, m); 1846 return 1; 1847 } 1848 if (__predict_false(q->size - q->in_use < ndesc)) { 1849 1850 struct sge_qset *qs = txq_to_qset(q, qid); 1851 1852 setbit(&qs->txq_stopped, qid); 1853 if (should_restart_tx(q) && 1854 test_and_clear_bit(qid, &qs->txq_stopped)) 1855 return 2; 1856 1857 q->stops++; 1858 goto addq_exit; 1859 } 1860 return 0; 1861 } 1862 1863 1864 /** 1865 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1866 * @q: the SGE control Tx queue 1867 * 1868 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1869 * that send only immediate data (presently just the control queues) and 1870 * thus do not have any mbufs 1871 */ 1872 static __inline void 1873 reclaim_completed_tx_imm(struct sge_txq *q) 1874 { 1875 unsigned int reclaim = q->processed - q->cleaned; 1876 1877 q->in_use -= reclaim; 1878 q->cleaned += reclaim; 1879 } 1880 1881 /** 1882 * ctrl_xmit - send a packet through an SGE control Tx queue 1883 * @adap: the adapter 1884 * @q: the control queue 1885 * @m: the packet 1886 * 1887 * Send a packet through an SGE control Tx queue. Packets sent through 1888 * a control queue must fit entirely as immediate data in a single Tx 1889 * descriptor and have no page fragments. 1890 */ 1891 static int 1892 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 1893 { 1894 int ret; 1895 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1896 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1897 1898 KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__)); 1899 1900 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP); 1901 wrp->wrh_lo = htonl(V_WR_TID(q->token)); 1902 1903 TXQ_LOCK(qs); 1904 again: reclaim_completed_tx_imm(q); 1905 1906 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1907 if (__predict_false(ret)) { 1908 if (ret == 1) { 1909 TXQ_UNLOCK(qs); 1910 return (ENOSPC); 1911 } 1912 goto again; 1913 } 1914 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen); 1915 1916 q->in_use++; 1917 if (++q->pidx >= q->size) { 1918 q->pidx = 0; 1919 q->gen ^= 1; 1920 } 1921 TXQ_UNLOCK(qs); 1922 wmb(); 1923 t3_write_reg(adap, A_SG_KDOORBELL, 1924 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1925 1926 m_free(m); 1927 return (0); 1928 } 1929 1930 1931 /** 1932 * restart_ctrlq - restart a suspended control queue 1933 * @qs: the queue set cotaining the control queue 1934 * 1935 * Resumes transmission on a suspended Tx control queue. 1936 */ 1937 static void 1938 restart_ctrlq(void *data, int npending) 1939 { 1940 struct mbuf *m; 1941 struct sge_qset *qs = (struct sge_qset *)data; 1942 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1943 adapter_t *adap = qs->port->adapter; 1944 1945 TXQ_LOCK(qs); 1946 again: reclaim_completed_tx_imm(q); 1947 1948 while (q->in_use < q->size && 1949 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1950 1951 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen); 1952 m_free(m); 1953 1954 if (++q->pidx >= q->size) { 1955 q->pidx = 0; 1956 q->gen ^= 1; 1957 } 1958 q->in_use++; 1959 } 1960 if (mbufq_len(&q->sendq)) { 1961 setbit(&qs->txq_stopped, TXQ_CTRL); 1962 1963 if (should_restart_tx(q) && 1964 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1965 goto again; 1966 q->stops++; 1967 } 1968 TXQ_UNLOCK(qs); 1969 t3_write_reg(adap, A_SG_KDOORBELL, 1970 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1971 } 1972 1973 1974 /* 1975 * Send a management message through control queue 0 1976 */ 1977 int 1978 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1979 { 1980 return ctrl_xmit(adap, &adap->sge.qs[0], m); 1981 } 1982 1983 /** 1984 * free_qset - free the resources of an SGE queue set 1985 * @sc: the controller owning the queue set 1986 * @q: the queue set 1987 * 1988 * Release the HW and SW resources associated with an SGE queue set, such 1989 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1990 * queue set must be quiesced prior to calling this. 1991 */ 1992 static void 1993 t3_free_qset(adapter_t *sc, struct sge_qset *q) 1994 { 1995 int i; 1996 1997 reclaim_completed_tx(q, 0, TXQ_ETH); 1998 if (q->txq[TXQ_ETH].txq_mr != NULL) 1999 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF); 2000 if (q->txq[TXQ_ETH].txq_ifq != NULL) { 2001 ifq_delete(q->txq[TXQ_ETH].txq_ifq); 2002 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF); 2003 } 2004 2005 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2006 if (q->fl[i].desc) { 2007 mtx_lock_spin(&sc->sge.reg_lock); 2008 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 2009 mtx_unlock_spin(&sc->sge.reg_lock); 2010 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 2011 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 2012 q->fl[i].desc_map); 2013 bus_dma_tag_destroy(q->fl[i].desc_tag); 2014 bus_dma_tag_destroy(q->fl[i].entry_tag); 2015 } 2016 if (q->fl[i].sdesc) { 2017 free_rx_bufs(sc, &q->fl[i]); 2018 free(q->fl[i].sdesc, M_DEVBUF); 2019 } 2020 } 2021 2022 mtx_unlock(&q->lock); 2023 MTX_DESTROY(&q->lock); 2024 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2025 if (q->txq[i].desc) { 2026 mtx_lock_spin(&sc->sge.reg_lock); 2027 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 2028 mtx_unlock_spin(&sc->sge.reg_lock); 2029 bus_dmamap_unload(q->txq[i].desc_tag, 2030 q->txq[i].desc_map); 2031 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 2032 q->txq[i].desc_map); 2033 bus_dma_tag_destroy(q->txq[i].desc_tag); 2034 bus_dma_tag_destroy(q->txq[i].entry_tag); 2035 } 2036 if (q->txq[i].sdesc) { 2037 free(q->txq[i].sdesc, M_DEVBUF); 2038 } 2039 } 2040 2041 if (q->rspq.desc) { 2042 mtx_lock_spin(&sc->sge.reg_lock); 2043 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 2044 mtx_unlock_spin(&sc->sge.reg_lock); 2045 2046 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 2047 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 2048 q->rspq.desc_map); 2049 bus_dma_tag_destroy(q->rspq.desc_tag); 2050 MTX_DESTROY(&q->rspq.lock); 2051 } 2052 2053 #if defined(INET6) || defined(INET) 2054 tcp_lro_free(&q->lro.ctrl); 2055 #endif 2056 2057 bzero(q, sizeof(*q)); 2058 } 2059 2060 /** 2061 * t3_free_sge_resources - free SGE resources 2062 * @sc: the adapter softc 2063 * 2064 * Frees resources used by the SGE queue sets. 2065 */ 2066 void 2067 t3_free_sge_resources(adapter_t *sc, int nqsets) 2068 { 2069 int i; 2070 2071 for (i = 0; i < nqsets; ++i) { 2072 TXQ_LOCK(&sc->sge.qs[i]); 2073 t3_free_qset(sc, &sc->sge.qs[i]); 2074 } 2075 } 2076 2077 /** 2078 * t3_sge_start - enable SGE 2079 * @sc: the controller softc 2080 * 2081 * Enables the SGE for DMAs. This is the last step in starting packet 2082 * transfers. 2083 */ 2084 void 2085 t3_sge_start(adapter_t *sc) 2086 { 2087 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 2088 } 2089 2090 /** 2091 * t3_sge_stop - disable SGE operation 2092 * @sc: the adapter 2093 * 2094 * Disables the DMA engine. This can be called in emeregencies (e.g., 2095 * from error interrupts) or from normal process context. In the latter 2096 * case it also disables any pending queue restart tasklets. Note that 2097 * if it is called in interrupt context it cannot disable the restart 2098 * tasklets as it cannot wait, however the tasklets will have no effect 2099 * since the doorbells are disabled and the driver will call this again 2100 * later from process context, at which time the tasklets will be stopped 2101 * if they are still running. 2102 */ 2103 void 2104 t3_sge_stop(adapter_t *sc) 2105 { 2106 int i, nqsets; 2107 2108 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 2109 2110 if (sc->tq == NULL) 2111 return; 2112 2113 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2114 nqsets += sc->port[i].nqsets; 2115 #ifdef notyet 2116 /* 2117 * 2118 * XXX 2119 */ 2120 for (i = 0; i < nqsets; ++i) { 2121 struct sge_qset *qs = &sc->sge.qs[i]; 2122 2123 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2124 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2125 } 2126 #endif 2127 } 2128 2129 /** 2130 * t3_free_tx_desc - reclaims Tx descriptors and their buffers 2131 * @adapter: the adapter 2132 * @q: the Tx queue to reclaim descriptors from 2133 * @reclaimable: the number of descriptors to reclaim 2134 * @m_vec_size: maximum number of buffers to reclaim 2135 * @desc_reclaimed: returns the number of descriptors reclaimed 2136 * 2137 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 2138 * Tx buffers. Called with the Tx queue lock held. 2139 * 2140 * Returns number of buffers of reclaimed 2141 */ 2142 void 2143 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue) 2144 { 2145 struct tx_sw_desc *txsd; 2146 unsigned int cidx, mask; 2147 struct sge_txq *q = &qs->txq[queue]; 2148 2149 #ifdef T3_TRACE 2150 T3_TRACE2(sc->tb[q->cntxt_id & 7], 2151 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); 2152 #endif 2153 cidx = q->cidx; 2154 mask = q->size - 1; 2155 txsd = &q->sdesc[cidx]; 2156 2157 mtx_assert(&qs->lock, MA_OWNED); 2158 while (reclaimable--) { 2159 prefetch(q->sdesc[(cidx + 1) & mask].m); 2160 prefetch(q->sdesc[(cidx + 2) & mask].m); 2161 2162 if (txsd->m != NULL) { 2163 if (txsd->flags & TX_SW_DESC_MAPPED) { 2164 bus_dmamap_unload(q->entry_tag, txsd->map); 2165 txsd->flags &= ~TX_SW_DESC_MAPPED; 2166 } 2167 m_freem_list(txsd->m); 2168 txsd->m = NULL; 2169 } else 2170 q->txq_skipped++; 2171 2172 ++txsd; 2173 if (++cidx == q->size) { 2174 cidx = 0; 2175 txsd = q->sdesc; 2176 } 2177 } 2178 q->cidx = cidx; 2179 2180 } 2181 2182 /** 2183 * is_new_response - check if a response is newly written 2184 * @r: the response descriptor 2185 * @q: the response queue 2186 * 2187 * Returns true if a response descriptor contains a yet unprocessed 2188 * response. 2189 */ 2190 static __inline int 2191 is_new_response(const struct rsp_desc *r, 2192 const struct sge_rspq *q) 2193 { 2194 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 2195 } 2196 2197 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 2198 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 2199 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 2200 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 2201 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 2202 2203 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 2204 #define NOMEM_INTR_DELAY 2500 2205 2206 #ifdef TCP_OFFLOAD 2207 /** 2208 * write_ofld_wr - write an offload work request 2209 * @adap: the adapter 2210 * @m: the packet to send 2211 * @q: the Tx queue 2212 * @pidx: index of the first Tx descriptor to write 2213 * @gen: the generation value to use 2214 * @ndesc: number of descriptors the packet will occupy 2215 * 2216 * Write an offload work request to send the supplied packet. The packet 2217 * data already carry the work request with most fields populated. 2218 */ 2219 static void 2220 write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q, 2221 unsigned int pidx, unsigned int gen, unsigned int ndesc) 2222 { 2223 unsigned int sgl_flits, flits; 2224 int i, idx, nsegs, wrlen; 2225 struct work_request_hdr *from; 2226 struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1]; 2227 struct tx_desc *d = &q->desc[pidx]; 2228 struct txq_state txqs; 2229 struct sglist_seg *segs; 2230 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); 2231 struct sglist *sgl; 2232 2233 from = (void *)(oh + 1); /* Start of WR within mbuf */ 2234 wrlen = m->m_len - sizeof(*oh); 2235 2236 if (!(oh->flags & F_HDR_SGL)) { 2237 write_imm(d, (caddr_t)from, wrlen, gen); 2238 2239 /* 2240 * mbuf with "real" immediate tx data will be enqueue_wr'd by 2241 * t3_push_frames and freed in wr_ack. Others, like those sent 2242 * down by close_conn, t3_send_reset, etc. should be freed here. 2243 */ 2244 if (!(oh->flags & F_HDR_DF)) 2245 m_free(m); 2246 return; 2247 } 2248 2249 memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from)); 2250 2251 sgl = oh->sgl; 2252 flits = wrlen / 8; 2253 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl; 2254 2255 nsegs = sgl->sg_nseg; 2256 segs = sgl->sg_segs; 2257 for (idx = 0, i = 0; i < nsegs; i++) { 2258 KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__)); 2259 if (i && idx == 0) 2260 ++sgp; 2261 sgp->len[idx] = htobe32(segs[i].ss_len); 2262 sgp->addr[idx] = htobe64(segs[i].ss_paddr); 2263 idx ^= 1; 2264 } 2265 if (idx) { 2266 sgp->len[idx] = 0; 2267 sgp->addr[idx] = 0; 2268 } 2269 2270 sgl_flits = sgl_len(nsegs); 2271 txqs.gen = gen; 2272 txqs.pidx = pidx; 2273 txqs.compl = 0; 2274 2275 write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits, 2276 from->wrh_hi, from->wrh_lo); 2277 } 2278 2279 /** 2280 * ofld_xmit - send a packet through an offload queue 2281 * @adap: the adapter 2282 * @q: the Tx offload queue 2283 * @m: the packet 2284 * 2285 * Send an offload packet through an SGE offload queue. 2286 */ 2287 static int 2288 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 2289 { 2290 int ret; 2291 unsigned int ndesc; 2292 unsigned int pidx, gen; 2293 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2294 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); 2295 2296 ndesc = G_HDR_NDESC(oh->flags); 2297 2298 TXQ_LOCK(qs); 2299 again: reclaim_completed_tx(qs, 16, TXQ_OFLD); 2300 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 2301 if (__predict_false(ret)) { 2302 if (ret == 1) { 2303 TXQ_UNLOCK(qs); 2304 return (EINTR); 2305 } 2306 goto again; 2307 } 2308 2309 gen = q->gen; 2310 q->in_use += ndesc; 2311 pidx = q->pidx; 2312 q->pidx += ndesc; 2313 if (q->pidx >= q->size) { 2314 q->pidx -= q->size; 2315 q->gen ^= 1; 2316 } 2317 2318 write_ofld_wr(adap, m, q, pidx, gen, ndesc); 2319 check_ring_tx_db(adap, q, 1); 2320 TXQ_UNLOCK(qs); 2321 2322 return (0); 2323 } 2324 2325 /** 2326 * restart_offloadq - restart a suspended offload queue 2327 * @qs: the queue set cotaining the offload queue 2328 * 2329 * Resumes transmission on a suspended Tx offload queue. 2330 */ 2331 static void 2332 restart_offloadq(void *data, int npending) 2333 { 2334 struct mbuf *m; 2335 struct sge_qset *qs = data; 2336 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2337 adapter_t *adap = qs->port->adapter; 2338 int cleaned; 2339 2340 TXQ_LOCK(qs); 2341 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD); 2342 2343 while ((m = mbufq_first(&q->sendq)) != NULL) { 2344 unsigned int gen, pidx; 2345 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); 2346 unsigned int ndesc = G_HDR_NDESC(oh->flags); 2347 2348 if (__predict_false(q->size - q->in_use < ndesc)) { 2349 setbit(&qs->txq_stopped, TXQ_OFLD); 2350 if (should_restart_tx(q) && 2351 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 2352 goto again; 2353 q->stops++; 2354 break; 2355 } 2356 2357 gen = q->gen; 2358 q->in_use += ndesc; 2359 pidx = q->pidx; 2360 q->pidx += ndesc; 2361 if (q->pidx >= q->size) { 2362 q->pidx -= q->size; 2363 q->gen ^= 1; 2364 } 2365 2366 (void)mbufq_dequeue(&q->sendq); 2367 TXQ_UNLOCK(qs); 2368 write_ofld_wr(adap, m, q, pidx, gen, ndesc); 2369 TXQ_LOCK(qs); 2370 } 2371 #if USE_GTS 2372 set_bit(TXQ_RUNNING, &q->flags); 2373 set_bit(TXQ_LAST_PKT_DB, &q->flags); 2374 #endif 2375 TXQ_UNLOCK(qs); 2376 wmb(); 2377 t3_write_reg(adap, A_SG_KDOORBELL, 2378 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2379 } 2380 2381 /** 2382 * t3_offload_tx - send an offload packet 2383 * @m: the packet 2384 * 2385 * Sends an offload packet. We use the packet priority to select the 2386 * appropriate Tx queue as follows: bit 0 indicates whether the packet 2387 * should be sent as regular or control, bits 1-3 select the queue set. 2388 */ 2389 int 2390 t3_offload_tx(struct adapter *sc, struct mbuf *m) 2391 { 2392 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); 2393 struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)]; 2394 2395 if (oh->flags & F_HDR_CTRL) { 2396 m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */ 2397 return (ctrl_xmit(sc, qs, m)); 2398 } else 2399 return (ofld_xmit(sc, qs, m)); 2400 } 2401 #endif 2402 2403 static void 2404 restart_tx(struct sge_qset *qs) 2405 { 2406 struct adapter *sc = qs->port->adapter; 2407 2408 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2409 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2410 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2411 qs->txq[TXQ_OFLD].restarts++; 2412 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2413 } 2414 2415 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2416 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2417 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2418 qs->txq[TXQ_CTRL].restarts++; 2419 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2420 } 2421 } 2422 2423 /** 2424 * t3_sge_alloc_qset - initialize an SGE queue set 2425 * @sc: the controller softc 2426 * @id: the queue set id 2427 * @nports: how many Ethernet ports will be using this queue set 2428 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2429 * @p: configuration parameters for this queue set 2430 * @ntxq: number of Tx queues for the queue set 2431 * @pi: port info for queue set 2432 * 2433 * Allocate resources and initialize an SGE queue set. A queue set 2434 * comprises a response queue, two Rx free-buffer queues, and up to 3 2435 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2436 * queue, offload queue, and control queue. 2437 */ 2438 int 2439 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2440 const struct qset_params *p, int ntxq, struct port_info *pi) 2441 { 2442 struct sge_qset *q = &sc->sge.qs[id]; 2443 int i, ret = 0; 2444 2445 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF); 2446 q->port = pi; 2447 q->adap = sc; 2448 2449 if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, 2450 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) { 2451 device_printf(sc->dev, "failed to allocate mbuf ring\n"); 2452 goto err; 2453 } 2454 if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF, 2455 M_NOWAIT | M_ZERO)) == NULL) { 2456 device_printf(sc->dev, "failed to allocate ifq\n"); 2457 goto err; 2458 } 2459 ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp); 2460 callout_init(&q->txq[TXQ_ETH].txq_timer, 1); 2461 callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1); 2462 q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus; 2463 q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus; 2464 2465 init_qset_cntxt(q, id); 2466 q->idx = id; 2467 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2468 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2469 &q->fl[0].desc, &q->fl[0].sdesc, 2470 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2471 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2472 printf("error %d from alloc ring fl0\n", ret); 2473 goto err; 2474 } 2475 2476 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2477 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2478 &q->fl[1].desc, &q->fl[1].sdesc, 2479 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2480 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2481 printf("error %d from alloc ring fl1\n", ret); 2482 goto err; 2483 } 2484 2485 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2486 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2487 &q->rspq.desc_tag, &q->rspq.desc_map, 2488 NULL, NULL)) != 0) { 2489 printf("error %d from alloc ring rspq\n", ret); 2490 goto err; 2491 } 2492 2493 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2494 device_get_unit(sc->dev), irq_vec_idx); 2495 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2496 2497 for (i = 0; i < ntxq; ++i) { 2498 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2499 2500 if ((ret = alloc_ring(sc, p->txq_size[i], 2501 sizeof(struct tx_desc), sz, 2502 &q->txq[i].phys_addr, &q->txq[i].desc, 2503 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2504 &q->txq[i].desc_map, 2505 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2506 printf("error %d from alloc ring tx %i\n", ret, i); 2507 goto err; 2508 } 2509 mbufq_init(&q->txq[i].sendq, INT_MAX); 2510 q->txq[i].gen = 1; 2511 q->txq[i].size = p->txq_size[i]; 2512 } 2513 2514 #ifdef TCP_OFFLOAD 2515 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2516 #endif 2517 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2518 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2519 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2520 2521 q->fl[0].gen = q->fl[1].gen = 1; 2522 q->fl[0].size = p->fl_size; 2523 q->fl[1].size = p->jumbo_size; 2524 2525 q->rspq.gen = 1; 2526 q->rspq.cidx = 0; 2527 q->rspq.size = p->rspq_size; 2528 2529 q->txq[TXQ_ETH].stop_thres = nports * 2530 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2531 2532 q->fl[0].buf_size = MCLBYTES; 2533 q->fl[0].zone = zone_pack; 2534 q->fl[0].type = EXT_PACKET; 2535 2536 if (p->jumbo_buf_size == MJUM16BYTES) { 2537 q->fl[1].zone = zone_jumbo16; 2538 q->fl[1].type = EXT_JUMBO16; 2539 } else if (p->jumbo_buf_size == MJUM9BYTES) { 2540 q->fl[1].zone = zone_jumbo9; 2541 q->fl[1].type = EXT_JUMBO9; 2542 } else if (p->jumbo_buf_size == MJUMPAGESIZE) { 2543 q->fl[1].zone = zone_jumbop; 2544 q->fl[1].type = EXT_JUMBOP; 2545 } else { 2546 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size)); 2547 ret = EDOOFUS; 2548 goto err; 2549 } 2550 q->fl[1].buf_size = p->jumbo_buf_size; 2551 2552 /* Allocate and setup the lro_ctrl structure */ 2553 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); 2554 #if defined(INET6) || defined(INET) 2555 ret = tcp_lro_init(&q->lro.ctrl); 2556 if (ret) { 2557 printf("error %d from tcp_lro_init\n", ret); 2558 goto err; 2559 } 2560 #endif 2561 q->lro.ctrl.ifp = pi->ifp; 2562 2563 mtx_lock_spin(&sc->sge.reg_lock); 2564 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2565 q->rspq.phys_addr, q->rspq.size, 2566 q->fl[0].buf_size, 1, 0); 2567 if (ret) { 2568 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2569 goto err_unlock; 2570 } 2571 2572 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2573 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2574 q->fl[i].phys_addr, q->fl[i].size, 2575 q->fl[i].buf_size, p->cong_thres, 1, 2576 0); 2577 if (ret) { 2578 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2579 goto err_unlock; 2580 } 2581 } 2582 2583 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2584 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2585 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2586 1, 0); 2587 if (ret) { 2588 printf("error %d from t3_sge_init_ecntxt\n", ret); 2589 goto err_unlock; 2590 } 2591 2592 if (ntxq > 1) { 2593 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2594 USE_GTS, SGE_CNTXT_OFLD, id, 2595 q->txq[TXQ_OFLD].phys_addr, 2596 q->txq[TXQ_OFLD].size, 0, 1, 0); 2597 if (ret) { 2598 printf("error %d from t3_sge_init_ecntxt\n", ret); 2599 goto err_unlock; 2600 } 2601 } 2602 2603 if (ntxq > 2) { 2604 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2605 SGE_CNTXT_CTRL, id, 2606 q->txq[TXQ_CTRL].phys_addr, 2607 q->txq[TXQ_CTRL].size, 2608 q->txq[TXQ_CTRL].token, 1, 0); 2609 if (ret) { 2610 printf("error %d from t3_sge_init_ecntxt\n", ret); 2611 goto err_unlock; 2612 } 2613 } 2614 2615 mtx_unlock_spin(&sc->sge.reg_lock); 2616 t3_update_qset_coalesce(q, p); 2617 2618 refill_fl(sc, &q->fl[0], q->fl[0].size); 2619 refill_fl(sc, &q->fl[1], q->fl[1].size); 2620 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2621 2622 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2623 V_NEWTIMER(q->rspq.holdoff_tmr)); 2624 2625 return (0); 2626 2627 err_unlock: 2628 mtx_unlock_spin(&sc->sge.reg_lock); 2629 err: 2630 TXQ_LOCK(q); 2631 t3_free_qset(sc, q); 2632 2633 return (ret); 2634 } 2635 2636 /* 2637 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with 2638 * ethernet data. Hardware assistance with various checksums and any vlan tag 2639 * will also be taken into account here. 2640 */ 2641 void 2642 t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad) 2643 { 2644 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2645 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2646 struct ifnet *ifp = pi->ifp; 2647 2648 if (cpl->vlan_valid) { 2649 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2650 m->m_flags |= M_VLANTAG; 2651 } 2652 2653 m->m_pkthdr.rcvif = ifp; 2654 /* 2655 * adjust after conversion to mbuf chain 2656 */ 2657 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); 2658 m->m_len -= (sizeof(*cpl) + ethpad); 2659 m->m_data += (sizeof(*cpl) + ethpad); 2660 2661 if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) { 2662 struct ether_header *eh = mtod(m, void *); 2663 uint16_t eh_type; 2664 2665 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2666 struct ether_vlan_header *evh = mtod(m, void *); 2667 2668 eh_type = evh->evl_proto; 2669 } else 2670 eh_type = eh->ether_type; 2671 2672 if (ifp->if_capenable & IFCAP_RXCSUM && 2673 eh_type == htons(ETHERTYPE_IP)) { 2674 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | 2675 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 2676 m->m_pkthdr.csum_data = 0xffff; 2677 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && 2678 eh_type == htons(ETHERTYPE_IPV6)) { 2679 m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 | 2680 CSUM_PSEUDO_HDR); 2681 m->m_pkthdr.csum_data = 0xffff; 2682 } 2683 } 2684 } 2685 2686 /** 2687 * get_packet - return the next ingress packet buffer from a free list 2688 * @adap: the adapter that received the packet 2689 * @drop_thres: # of remaining buffers before we start dropping packets 2690 * @qs: the qset that the SGE free list holding the packet belongs to 2691 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2692 * @r: response descriptor 2693 * 2694 * Get the next packet from a free list and complete setup of the 2695 * sk_buff. If the packet is small we make a copy and recycle the 2696 * original buffer, otherwise we use the original buffer itself. If a 2697 * positive drop threshold is supplied packets are dropped and their 2698 * buffers recycled if (a) the number of remaining buffers is under the 2699 * threshold and the packet is too big to copy, or (b) the packet should 2700 * be copied but there is no memory for the copy. 2701 */ 2702 static int 2703 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2704 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 2705 { 2706 2707 unsigned int len_cq = ntohl(r->len_cq); 2708 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2709 int mask, cidx = fl->cidx; 2710 struct rx_sw_desc *sd = &fl->sdesc[cidx]; 2711 uint32_t len = G_RSPD_LEN(len_cq); 2712 uint32_t flags = M_EXT; 2713 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags)); 2714 caddr_t cl; 2715 struct mbuf *m; 2716 int ret = 0; 2717 2718 mask = fl->size - 1; 2719 prefetch(fl->sdesc[(cidx + 1) & mask].m); 2720 prefetch(fl->sdesc[(cidx + 2) & mask].m); 2721 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); 2722 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 2723 2724 fl->credits--; 2725 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2726 2727 if (recycle_enable && len <= SGE_RX_COPY_THRES && 2728 sopeop == RSPQ_SOP_EOP) { 2729 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) 2730 goto skip_recycle; 2731 cl = mtod(m, void *); 2732 memcpy(cl, sd->rxsd_cl, len); 2733 recycle_rx_buf(adap, fl, fl->cidx); 2734 m->m_pkthdr.len = m->m_len = len; 2735 m->m_flags = 0; 2736 mh->mh_head = mh->mh_tail = m; 2737 ret = 1; 2738 goto done; 2739 } else { 2740 skip_recycle: 2741 bus_dmamap_unload(fl->entry_tag, sd->map); 2742 cl = sd->rxsd_cl; 2743 m = sd->m; 2744 2745 if ((sopeop == RSPQ_SOP_EOP) || 2746 (sopeop == RSPQ_SOP)) 2747 flags |= M_PKTHDR; 2748 m_init(m, M_NOWAIT, MT_DATA, flags); 2749 if (fl->zone == zone_pack) { 2750 /* 2751 * restore clobbered data pointer 2752 */ 2753 m->m_data = m->m_ext.ext_buf; 2754 } else { 2755 m_cljset(m, cl, fl->type); 2756 } 2757 m->m_len = len; 2758 } 2759 switch(sopeop) { 2760 case RSPQ_SOP_EOP: 2761 ret = 1; 2762 /* FALLTHROUGH */ 2763 case RSPQ_SOP: 2764 mh->mh_head = mh->mh_tail = m; 2765 m->m_pkthdr.len = len; 2766 break; 2767 case RSPQ_EOP: 2768 ret = 1; 2769 /* FALLTHROUGH */ 2770 case RSPQ_NSOP_NEOP: 2771 if (mh->mh_tail == NULL) { 2772 log(LOG_ERR, "discarding intermediate descriptor entry\n"); 2773 m_freem(m); 2774 break; 2775 } 2776 mh->mh_tail->m_next = m; 2777 mh->mh_tail = m; 2778 mh->mh_head->m_pkthdr.len += len; 2779 break; 2780 } 2781 if (cxgb_debug) 2782 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len); 2783 done: 2784 if (++fl->cidx == fl->size) 2785 fl->cidx = 0; 2786 2787 return (ret); 2788 } 2789 2790 /** 2791 * handle_rsp_cntrl_info - handles control information in a response 2792 * @qs: the queue set corresponding to the response 2793 * @flags: the response control flags 2794 * 2795 * Handles the control information of an SGE response, such as GTS 2796 * indications and completion credits for the queue set's Tx queues. 2797 * HW coalesces credits, we don't do any extra SW coalescing. 2798 */ 2799 static __inline void 2800 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2801 { 2802 unsigned int credits; 2803 2804 #if USE_GTS 2805 if (flags & F_RSPD_TXQ0_GTS) 2806 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2807 #endif 2808 credits = G_RSPD_TXQ0_CR(flags); 2809 if (credits) 2810 qs->txq[TXQ_ETH].processed += credits; 2811 2812 credits = G_RSPD_TXQ2_CR(flags); 2813 if (credits) 2814 qs->txq[TXQ_CTRL].processed += credits; 2815 2816 # if USE_GTS 2817 if (flags & F_RSPD_TXQ1_GTS) 2818 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2819 # endif 2820 credits = G_RSPD_TXQ1_CR(flags); 2821 if (credits) 2822 qs->txq[TXQ_OFLD].processed += credits; 2823 2824 } 2825 2826 static void 2827 check_ring_db(adapter_t *adap, struct sge_qset *qs, 2828 unsigned int sleeping) 2829 { 2830 ; 2831 } 2832 2833 /** 2834 * process_responses - process responses from an SGE response queue 2835 * @adap: the adapter 2836 * @qs: the queue set to which the response queue belongs 2837 * @budget: how many responses can be processed in this round 2838 * 2839 * Process responses from an SGE response queue up to the supplied budget. 2840 * Responses include received packets as well as credits and other events 2841 * for the queues that belong to the response queue's queue set. 2842 * A negative budget is effectively unlimited. 2843 * 2844 * Additionally choose the interrupt holdoff time for the next interrupt 2845 * on this queue. If the system is under memory shortage use a fairly 2846 * long delay to help recovery. 2847 */ 2848 static int 2849 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2850 { 2851 struct sge_rspq *rspq = &qs->rspq; 2852 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2853 int budget_left = budget; 2854 unsigned int sleeping = 0; 2855 #if defined(INET6) || defined(INET) 2856 int lro_enabled = qs->lro.enabled; 2857 int skip_lro; 2858 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; 2859 #endif 2860 struct t3_mbuf_hdr *mh = &rspq->rspq_mh; 2861 #ifdef DEBUG 2862 static int last_holdoff = 0; 2863 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2864 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2865 last_holdoff = rspq->holdoff_tmr; 2866 } 2867 #endif 2868 rspq->next_holdoff = rspq->holdoff_tmr; 2869 2870 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2871 int eth, eop = 0, ethpad = 0; 2872 uint32_t flags = ntohl(r->flags); 2873 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); 2874 uint8_t opcode = r->rss_hdr.opcode; 2875 2876 eth = (opcode == CPL_RX_PKT); 2877 2878 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2879 struct mbuf *m; 2880 2881 if (cxgb_debug) 2882 printf("async notification\n"); 2883 2884 if (mh->mh_head == NULL) { 2885 mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA); 2886 m = mh->mh_head; 2887 } else { 2888 m = m_gethdr(M_NOWAIT, MT_DATA); 2889 } 2890 if (m == NULL) 2891 goto no_mem; 2892 2893 memcpy(mtod(m, char *), r, AN_PKT_SIZE); 2894 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; 2895 *mtod(m, uint8_t *) = CPL_ASYNC_NOTIF; 2896 opcode = CPL_ASYNC_NOTIF; 2897 eop = 1; 2898 rspq->async_notif++; 2899 goto skip; 2900 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2901 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); 2902 2903 if (m == NULL) { 2904 no_mem: 2905 rspq->next_holdoff = NOMEM_INTR_DELAY; 2906 budget_left--; 2907 break; 2908 } 2909 if (mh->mh_head == NULL) 2910 mh->mh_head = m; 2911 else 2912 mh->mh_tail->m_next = m; 2913 mh->mh_tail = m; 2914 2915 get_imm_packet(adap, r, m); 2916 mh->mh_head->m_pkthdr.len += m->m_len; 2917 eop = 1; 2918 rspq->imm_data++; 2919 } else if (r->len_cq) { 2920 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 2921 2922 eop = get_packet(adap, drop_thresh, qs, mh, r); 2923 if (eop) { 2924 if (r->rss_hdr.hash_type && !adap->timestamp) { 2925 M_HASHTYPE_SET(mh->mh_head, 2926 M_HASHTYPE_OPAQUE_HASH); 2927 mh->mh_head->m_pkthdr.flowid = rss_hash; 2928 } 2929 } 2930 2931 ethpad = 2; 2932 } else { 2933 rspq->pure_rsps++; 2934 } 2935 skip: 2936 if (flags & RSPD_CTRL_MASK) { 2937 sleeping |= flags & RSPD_GTS_MASK; 2938 handle_rsp_cntrl_info(qs, flags); 2939 } 2940 2941 if (!eth && eop) { 2942 rspq->offload_pkts++; 2943 #ifdef TCP_OFFLOAD 2944 adap->cpl_handler[opcode](qs, r, mh->mh_head); 2945 #else 2946 m_freem(mh->mh_head); 2947 #endif 2948 mh->mh_head = NULL; 2949 } else if (eth && eop) { 2950 struct mbuf *m = mh->mh_head; 2951 2952 t3_rx_eth(adap, m, ethpad); 2953 2954 /* 2955 * The T304 sends incoming packets on any qset. If LRO 2956 * is also enabled, we could end up sending packet up 2957 * lro_ctrl->ifp's input. That is incorrect. 2958 * 2959 * The mbuf's rcvif was derived from the cpl header and 2960 * is accurate. Skip LRO and just use that. 2961 */ 2962 #if defined(INET6) || defined(INET) 2963 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); 2964 2965 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro 2966 && (tcp_lro_rx(lro_ctrl, m, 0) == 0) 2967 ) { 2968 /* successfully queue'd for LRO */ 2969 } else 2970 #endif 2971 { 2972 /* 2973 * LRO not enabled, packet unsuitable for LRO, 2974 * or unable to queue. Pass it up right now in 2975 * either case. 2976 */ 2977 struct ifnet *ifp = m->m_pkthdr.rcvif; 2978 (*ifp->if_input)(ifp, m); 2979 } 2980 mh->mh_head = NULL; 2981 2982 } 2983 2984 r++; 2985 if (__predict_false(++rspq->cidx == rspq->size)) { 2986 rspq->cidx = 0; 2987 rspq->gen ^= 1; 2988 r = rspq->desc; 2989 } 2990 2991 if (++rspq->credits >= 64) { 2992 refill_rspq(adap, rspq, rspq->credits); 2993 rspq->credits = 0; 2994 } 2995 __refill_fl_lt(adap, &qs->fl[0], 32); 2996 __refill_fl_lt(adap, &qs->fl[1], 32); 2997 --budget_left; 2998 } 2999 3000 #if defined(INET6) || defined(INET) 3001 /* Flush LRO */ 3002 tcp_lro_flush_all(lro_ctrl); 3003 #endif 3004 3005 if (sleeping) 3006 check_ring_db(adap, qs, sleeping); 3007 3008 mb(); /* commit Tx queue processed updates */ 3009 if (__predict_false(qs->txq_stopped > 1)) 3010 restart_tx(qs); 3011 3012 __refill_fl_lt(adap, &qs->fl[0], 512); 3013 __refill_fl_lt(adap, &qs->fl[1], 512); 3014 budget -= budget_left; 3015 return (budget); 3016 } 3017 3018 /* 3019 * A helper function that processes responses and issues GTS. 3020 */ 3021 static __inline int 3022 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 3023 { 3024 int work; 3025 static int last_holdoff = 0; 3026 3027 work = process_responses(adap, rspq_to_qset(rq), -1); 3028 3029 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 3030 printf("next_holdoff=%d\n", rq->next_holdoff); 3031 last_holdoff = rq->next_holdoff; 3032 } 3033 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 3034 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 3035 3036 return (work); 3037 } 3038 3039 #ifdef DEBUGNET 3040 int 3041 cxgb_debugnet_poll_rx(adapter_t *adap, struct sge_qset *qs) 3042 { 3043 3044 return (process_responses_gts(adap, &qs->rspq)); 3045 } 3046 #endif 3047 3048 /* 3049 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 3050 * Handles data events from SGE response queues as well as error and other 3051 * async events as they all use the same interrupt pin. We use one SGE 3052 * response queue per port in this mode and protect all response queues with 3053 * queue 0's lock. 3054 */ 3055 void 3056 t3b_intr(void *data) 3057 { 3058 uint32_t i, map; 3059 adapter_t *adap = data; 3060 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3061 3062 t3_write_reg(adap, A_PL_CLI, 0); 3063 map = t3_read_reg(adap, A_SG_DATA_INTR); 3064 3065 if (!map) 3066 return; 3067 3068 if (__predict_false(map & F_ERRINTR)) { 3069 t3_write_reg(adap, A_PL_INT_ENABLE0, 0); 3070 (void) t3_read_reg(adap, A_PL_INT_ENABLE0); 3071 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3072 } 3073 3074 mtx_lock(&q0->lock); 3075 for_each_port(adap, i) 3076 if (map & (1 << i)) 3077 process_responses_gts(adap, &adap->sge.qs[i].rspq); 3078 mtx_unlock(&q0->lock); 3079 } 3080 3081 /* 3082 * The MSI interrupt handler. This needs to handle data events from SGE 3083 * response queues as well as error and other async events as they all use 3084 * the same MSI vector. We use one SGE response queue per port in this mode 3085 * and protect all response queues with queue 0's lock. 3086 */ 3087 void 3088 t3_intr_msi(void *data) 3089 { 3090 adapter_t *adap = data; 3091 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3092 int i, new_packets = 0; 3093 3094 mtx_lock(&q0->lock); 3095 3096 for_each_port(adap, i) 3097 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 3098 new_packets = 1; 3099 mtx_unlock(&q0->lock); 3100 if (new_packets == 0) { 3101 t3_write_reg(adap, A_PL_INT_ENABLE0, 0); 3102 (void) t3_read_reg(adap, A_PL_INT_ENABLE0); 3103 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3104 } 3105 } 3106 3107 void 3108 t3_intr_msix(void *data) 3109 { 3110 struct sge_qset *qs = data; 3111 adapter_t *adap = qs->port->adapter; 3112 struct sge_rspq *rspq = &qs->rspq; 3113 3114 if (process_responses_gts(adap, rspq) == 0) 3115 rspq->unhandled_irqs++; 3116 } 3117 3118 #define QDUMP_SBUF_SIZE 32 * 400 3119 static int 3120 t3_dump_rspq(SYSCTL_HANDLER_ARGS) 3121 { 3122 struct sge_rspq *rspq; 3123 struct sge_qset *qs; 3124 int i, err, dump_end, idx; 3125 struct sbuf *sb; 3126 struct rsp_desc *rspd; 3127 uint32_t data[4]; 3128 3129 rspq = arg1; 3130 qs = rspq_to_qset(rspq); 3131 if (rspq->rspq_dump_count == 0) 3132 return (0); 3133 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { 3134 log(LOG_WARNING, 3135 "dump count is too large %d\n", rspq->rspq_dump_count); 3136 rspq->rspq_dump_count = 0; 3137 return (EINVAL); 3138 } 3139 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { 3140 log(LOG_WARNING, 3141 "dump start of %d is greater than queue size\n", 3142 rspq->rspq_dump_start); 3143 rspq->rspq_dump_start = 0; 3144 return (EINVAL); 3145 } 3146 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); 3147 if (err) 3148 return (err); 3149 err = sysctl_wire_old_buffer(req, 0); 3150 if (err) 3151 return (err); 3152 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3153 3154 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", 3155 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), 3156 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); 3157 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", 3158 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); 3159 3160 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, 3161 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); 3162 3163 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; 3164 for (i = rspq->rspq_dump_start; i < dump_end; i++) { 3165 idx = i & (RSPQ_Q_SIZE-1); 3166 3167 rspd = &rspq->desc[idx]; 3168 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", 3169 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, 3170 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); 3171 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", 3172 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), 3173 be32toh(rspd->len_cq), rspd->intr_gen); 3174 } 3175 3176 err = sbuf_finish(sb); 3177 sbuf_delete(sb); 3178 return (err); 3179 } 3180 3181 static int 3182 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) 3183 { 3184 struct sge_txq *txq; 3185 struct sge_qset *qs; 3186 int i, j, err, dump_end; 3187 struct sbuf *sb; 3188 struct tx_desc *txd; 3189 uint32_t *WR, wr_hi, wr_lo, gen; 3190 uint32_t data[4]; 3191 3192 txq = arg1; 3193 qs = txq_to_qset(txq, TXQ_ETH); 3194 if (txq->txq_dump_count == 0) { 3195 return (0); 3196 } 3197 if (txq->txq_dump_count > TX_ETH_Q_SIZE) { 3198 log(LOG_WARNING, 3199 "dump count is too large %d\n", txq->txq_dump_count); 3200 txq->txq_dump_count = 1; 3201 return (EINVAL); 3202 } 3203 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { 3204 log(LOG_WARNING, 3205 "dump start of %d is greater than queue size\n", 3206 txq->txq_dump_start); 3207 txq->txq_dump_start = 0; 3208 return (EINVAL); 3209 } 3210 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); 3211 if (err) 3212 return (err); 3213 err = sysctl_wire_old_buffer(req, 0); 3214 if (err) 3215 return (err); 3216 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3217 3218 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", 3219 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 3220 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); 3221 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", 3222 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), 3223 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); 3224 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3225 txq->txq_dump_start, 3226 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); 3227 3228 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3229 for (i = txq->txq_dump_start; i < dump_end; i++) { 3230 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; 3231 WR = (uint32_t *)txd->flit; 3232 wr_hi = ntohl(WR[0]); 3233 wr_lo = ntohl(WR[1]); 3234 gen = G_WR_GEN(wr_lo); 3235 3236 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3237 wr_hi, wr_lo, gen); 3238 for (j = 2; j < 30; j += 4) 3239 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3240 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3241 3242 } 3243 err = sbuf_finish(sb); 3244 sbuf_delete(sb); 3245 return (err); 3246 } 3247 3248 static int 3249 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) 3250 { 3251 struct sge_txq *txq; 3252 struct sge_qset *qs; 3253 int i, j, err, dump_end; 3254 struct sbuf *sb; 3255 struct tx_desc *txd; 3256 uint32_t *WR, wr_hi, wr_lo, gen; 3257 3258 txq = arg1; 3259 qs = txq_to_qset(txq, TXQ_CTRL); 3260 if (txq->txq_dump_count == 0) { 3261 return (0); 3262 } 3263 if (txq->txq_dump_count > 256) { 3264 log(LOG_WARNING, 3265 "dump count is too large %d\n", txq->txq_dump_count); 3266 txq->txq_dump_count = 1; 3267 return (EINVAL); 3268 } 3269 if (txq->txq_dump_start > 255) { 3270 log(LOG_WARNING, 3271 "dump start of %d is greater than queue size\n", 3272 txq->txq_dump_start); 3273 txq->txq_dump_start = 0; 3274 return (EINVAL); 3275 } 3276 3277 err = sysctl_wire_old_buffer(req, 0); 3278 if (err != 0) 3279 return (err); 3280 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3281 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3282 txq->txq_dump_start, 3283 (txq->txq_dump_start + txq->txq_dump_count) & 255); 3284 3285 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3286 for (i = txq->txq_dump_start; i < dump_end; i++) { 3287 txd = &txq->desc[i & (255)]; 3288 WR = (uint32_t *)txd->flit; 3289 wr_hi = ntohl(WR[0]); 3290 wr_lo = ntohl(WR[1]); 3291 gen = G_WR_GEN(wr_lo); 3292 3293 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3294 wr_hi, wr_lo, gen); 3295 for (j = 2; j < 30; j += 4) 3296 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3297 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3298 3299 } 3300 err = sbuf_finish(sb); 3301 sbuf_delete(sb); 3302 return (err); 3303 } 3304 3305 static int 3306 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) 3307 { 3308 adapter_t *sc = arg1; 3309 struct qset_params *qsp = &sc->params.sge.qset[0]; 3310 int coalesce_usecs; 3311 struct sge_qset *qs; 3312 int i, j, err, nqsets = 0; 3313 struct mtx *lock; 3314 3315 if ((sc->flags & FULL_INIT_DONE) == 0) 3316 return (ENXIO); 3317 3318 coalesce_usecs = qsp->coalesce_usecs; 3319 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); 3320 3321 if (err != 0) { 3322 return (err); 3323 } 3324 if (coalesce_usecs == qsp->coalesce_usecs) 3325 return (0); 3326 3327 for (i = 0; i < sc->params.nports; i++) 3328 for (j = 0; j < sc->port[i].nqsets; j++) 3329 nqsets++; 3330 3331 coalesce_usecs = max(1, coalesce_usecs); 3332 3333 for (i = 0; i < nqsets; i++) { 3334 qs = &sc->sge.qs[i]; 3335 qsp = &sc->params.sge.qset[i]; 3336 qsp->coalesce_usecs = coalesce_usecs; 3337 3338 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 3339 &sc->sge.qs[0].rspq.lock; 3340 3341 mtx_lock(lock); 3342 t3_update_qset_coalesce(qs, qsp); 3343 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 3344 V_NEWTIMER(qs->rspq.holdoff_tmr)); 3345 mtx_unlock(lock); 3346 } 3347 3348 return (0); 3349 } 3350 3351 static int 3352 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS) 3353 { 3354 adapter_t *sc = arg1; 3355 int rc, timestamp; 3356 3357 if ((sc->flags & FULL_INIT_DONE) == 0) 3358 return (ENXIO); 3359 3360 timestamp = sc->timestamp; 3361 rc = sysctl_handle_int(oidp, ×tamp, arg2, req); 3362 3363 if (rc != 0) 3364 return (rc); 3365 3366 if (timestamp != sc->timestamp) { 3367 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS, 3368 timestamp ? F_ENABLERXPKTTMSTPRSS : 0); 3369 sc->timestamp = timestamp; 3370 } 3371 3372 return (0); 3373 } 3374 3375 void 3376 t3_add_attach_sysctls(adapter_t *sc) 3377 { 3378 struct sysctl_ctx_list *ctx; 3379 struct sysctl_oid_list *children; 3380 3381 ctx = device_get_sysctl_ctx(sc->dev); 3382 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3383 3384 /* random information */ 3385 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3386 "firmware_version", 3387 CTLFLAG_RD, sc->fw_version, 3388 0, "firmware version"); 3389 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, 3390 "hw_revision", 3391 CTLFLAG_RD, &sc->params.rev, 3392 0, "chip model"); 3393 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3394 "port_types", 3395 CTLFLAG_RD, sc->port_types, 3396 0, "type of ports"); 3397 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3398 "enable_debug", 3399 CTLFLAG_RW, &cxgb_debug, 3400 0, "enable verbose debugging output"); 3401 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce", 3402 CTLFLAG_RD, &sc->tunq_coalesce, 3403 "#tunneled packets freed"); 3404 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3405 "txq_overrun", 3406 CTLFLAG_RD, &txq_fills, 3407 0, "#times txq overrun"); 3408 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, 3409 "core_clock", 3410 CTLFLAG_RD, &sc->params.vpd.cclk, 3411 0, "core clock frequency (in KHz)"); 3412 } 3413 3414 3415 static const char *rspq_name = "rspq"; 3416 static const char *txq_names[] = 3417 { 3418 "txq_eth", 3419 "txq_ofld", 3420 "txq_ctrl" 3421 }; 3422 3423 static int 3424 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) 3425 { 3426 struct port_info *p = arg1; 3427 uint64_t *parg; 3428 3429 if (!p) 3430 return (EINVAL); 3431 3432 cxgb_refresh_stats(p); 3433 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); 3434 3435 return (sysctl_handle_64(oidp, parg, 0, req)); 3436 } 3437 3438 void 3439 t3_add_configured_sysctls(adapter_t *sc) 3440 { 3441 struct sysctl_ctx_list *ctx; 3442 struct sysctl_oid_list *children; 3443 int i, j; 3444 3445 ctx = device_get_sysctl_ctx(sc->dev); 3446 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3447 3448 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3449 "intr_coal", 3450 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 3451 0, t3_set_coalesce_usecs, 3452 "I", "interrupt coalescing timer (us)"); 3453 3454 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3455 "pkt_timestamp", 3456 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 3457 0, t3_pkt_timestamp, 3458 "I", "provide packet timestamp instead of connection hash"); 3459 3460 for (i = 0; i < sc->params.nports; i++) { 3461 struct port_info *pi = &sc->port[i]; 3462 struct sysctl_oid *poid; 3463 struct sysctl_oid_list *poidlist; 3464 struct mac_stats *mstats = &pi->mac.stats; 3465 3466 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); 3467 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 3468 pi->namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 3469 "port statistics"); 3470 poidlist = SYSCTL_CHILDREN(poid); 3471 SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO, 3472 "nqsets", CTLFLAG_RD, &pi->nqsets, 3473 0, "#queue sets"); 3474 3475 for (j = 0; j < pi->nqsets; j++) { 3476 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; 3477 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, 3478 *ctrlqpoid, *lropoid; 3479 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, 3480 *txqpoidlist, *ctrlqpoidlist, 3481 *lropoidlist; 3482 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 3483 3484 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); 3485 3486 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 3487 qs->namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 3488 "qset statistics"); 3489 qspoidlist = SYSCTL_CHILDREN(qspoid); 3490 3491 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty", 3492 CTLFLAG_RD, &qs->fl[0].empty, 0, 3493 "freelist #0 empty"); 3494 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty", 3495 CTLFLAG_RD, &qs->fl[1].empty, 0, 3496 "freelist #1 empty"); 3497 3498 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3499 rspq_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 3500 "rspq statistics"); 3501 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); 3502 3503 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3504 txq_names[0], CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 3505 "txq statistics"); 3506 txqpoidlist = SYSCTL_CHILDREN(txqpoid); 3507 3508 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3509 txq_names[2], CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 3510 "ctrlq statistics"); 3511 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); 3512 3513 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3514 "lro_stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 3515 "LRO statistics"); 3516 lropoidlist = SYSCTL_CHILDREN(lropoid); 3517 3518 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", 3519 CTLFLAG_RD, &qs->rspq.size, 3520 0, "#entries in response queue"); 3521 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", 3522 CTLFLAG_RD, &qs->rspq.cidx, 3523 0, "consumer index"); 3524 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", 3525 CTLFLAG_RD, &qs->rspq.credits, 3526 0, "#credits"); 3527 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved", 3528 CTLFLAG_RD, &qs->rspq.starved, 3529 0, "#times starved"); 3530 SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr", 3531 CTLFLAG_RD, &qs->rspq.phys_addr, 3532 "physical_address_of the queue"); 3533 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", 3534 CTLFLAG_RW, &qs->rspq.rspq_dump_start, 3535 0, "start rspq dump entry"); 3536 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", 3537 CTLFLAG_RW, &qs->rspq.rspq_dump_count, 3538 0, "#rspq entries to dump"); 3539 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", 3540 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, 3541 &qs->rspq, 0, t3_dump_rspq, "A", 3542 "dump of the response queue"); 3543 3544 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped", 3545 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops, 3546 "#tunneled packets dropped"); 3547 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen", 3548 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.mq_len, 3549 0, "#tunneled packets waiting to be sent"); 3550 #if 0 3551 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", 3552 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 3553 0, "#tunneled packets queue producer index"); 3554 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", 3555 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 3556 0, "#tunneled packets queue consumer index"); 3557 #endif 3558 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed", 3559 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 3560 0, "#tunneled packets processed by the card"); 3561 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", 3562 CTLFLAG_RD, &txq->cleaned, 3563 0, "#tunneled packets cleaned"); 3564 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", 3565 CTLFLAG_RD, &txq->in_use, 3566 0, "#tunneled packet slots in use"); 3567 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees", 3568 CTLFLAG_RD, &txq->txq_frees, 3569 "#tunneled packets freed"); 3570 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", 3571 CTLFLAG_RD, &txq->txq_skipped, 3572 0, "#tunneled packet descriptors skipped"); 3573 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced", 3574 CTLFLAG_RD, &txq->txq_coalesced, 3575 "#tunneled packets coalesced"); 3576 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", 3577 CTLFLAG_RD, &txq->txq_enqueued, 3578 0, "#tunneled packets enqueued to hardware"); 3579 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", 3580 CTLFLAG_RD, &qs->txq_stopped, 3581 0, "tx queues stopped"); 3582 SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr", 3583 CTLFLAG_RD, &txq->phys_addr, 3584 "physical_address_of the queue"); 3585 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", 3586 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 3587 0, "txq generation"); 3588 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", 3589 CTLFLAG_RD, &txq->cidx, 3590 0, "hardware queue cidx"); 3591 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", 3592 CTLFLAG_RD, &txq->pidx, 3593 0, "hardware queue pidx"); 3594 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", 3595 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 3596 0, "txq start idx for dump"); 3597 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", 3598 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 3599 0, "txq #entries to dump"); 3600 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", 3601 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, 3602 &qs->txq[TXQ_ETH], 0, t3_dump_txq_eth, "A", 3603 "dump of the transmit queue"); 3604 3605 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", 3606 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 3607 0, "ctrlq start idx for dump"); 3608 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", 3609 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 3610 0, "ctrl #entries to dump"); 3611 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", 3612 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, 3613 &qs->txq[TXQ_CTRL], 0, t3_dump_txq_ctrl, "A", 3614 "dump of the transmit queue"); 3615 3616 SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_queued", 3617 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); 3618 SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_flushed", 3619 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); 3620 SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", 3621 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); 3622 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", 3623 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); 3624 } 3625 3626 /* Now add a node for mac stats. */ 3627 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", 3628 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "MAC statistics"); 3629 poidlist = SYSCTL_CHILDREN(poid); 3630 3631 /* 3632 * We (ab)use the length argument (arg2) to pass on the offset 3633 * of the data that we are interested in. This is only required 3634 * for the quad counters that are updated from the hardware (we 3635 * make sure that we return the latest value). 3636 * sysctl_handle_macstat first updates *all* the counters from 3637 * the hardware, and then returns the latest value of the 3638 * requested counter. Best would be to update only the 3639 * requested counter from hardware, but t3_mac_update_stats() 3640 * hides all the register details and we don't want to dive into 3641 * all that here. 3642 */ 3643 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ 3644 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pi, \ 3645 offsetof(struct mac_stats, a), sysctl_handle_macstat, "QU", 0) 3646 CXGB_SYSCTL_ADD_QUAD(tx_octets); 3647 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); 3648 CXGB_SYSCTL_ADD_QUAD(tx_frames); 3649 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); 3650 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); 3651 CXGB_SYSCTL_ADD_QUAD(tx_pause); 3652 CXGB_SYSCTL_ADD_QUAD(tx_deferred); 3653 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); 3654 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); 3655 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); 3656 CXGB_SYSCTL_ADD_QUAD(tx_underrun); 3657 CXGB_SYSCTL_ADD_QUAD(tx_len_errs); 3658 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); 3659 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); 3660 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); 3661 CXGB_SYSCTL_ADD_QUAD(tx_frames_64); 3662 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); 3663 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); 3664 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); 3665 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); 3666 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); 3667 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); 3668 CXGB_SYSCTL_ADD_QUAD(rx_octets); 3669 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); 3670 CXGB_SYSCTL_ADD_QUAD(rx_frames); 3671 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); 3672 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); 3673 CXGB_SYSCTL_ADD_QUAD(rx_pause); 3674 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs); 3675 CXGB_SYSCTL_ADD_QUAD(rx_align_errs); 3676 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); 3677 CXGB_SYSCTL_ADD_QUAD(rx_data_errs); 3678 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); 3679 CXGB_SYSCTL_ADD_QUAD(rx_runt); 3680 CXGB_SYSCTL_ADD_QUAD(rx_jabber); 3681 CXGB_SYSCTL_ADD_QUAD(rx_short); 3682 CXGB_SYSCTL_ADD_QUAD(rx_too_long); 3683 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); 3684 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); 3685 CXGB_SYSCTL_ADD_QUAD(rx_frames_64); 3686 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); 3687 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); 3688 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); 3689 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); 3690 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); 3691 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); 3692 #undef CXGB_SYSCTL_ADD_QUAD 3693 3694 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ 3695 CTLFLAG_RD, &mstats->a, 0) 3696 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); 3697 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); 3698 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); 3699 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); 3700 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); 3701 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); 3702 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); 3703 CXGB_SYSCTL_ADD_ULONG(num_toggled); 3704 CXGB_SYSCTL_ADD_ULONG(num_resets); 3705 CXGB_SYSCTL_ADD_ULONG(link_faults); 3706 #undef CXGB_SYSCTL_ADD_ULONG 3707 } 3708 } 3709 3710 /** 3711 * t3_get_desc - dump an SGE descriptor for debugging purposes 3712 * @qs: the queue set 3713 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 3714 * @idx: the descriptor index in the queue 3715 * @data: where to dump the descriptor contents 3716 * 3717 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 3718 * size of the descriptor. 3719 */ 3720 int 3721 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 3722 unsigned char *data) 3723 { 3724 if (qnum >= 6) 3725 return (EINVAL); 3726 3727 if (qnum < 3) { 3728 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 3729 return -EINVAL; 3730 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 3731 return sizeof(struct tx_desc); 3732 } 3733 3734 if (qnum == 3) { 3735 if (!qs->rspq.desc || idx >= qs->rspq.size) 3736 return (EINVAL); 3737 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 3738 return sizeof(struct rsp_desc); 3739 } 3740 3741 qnum -= 4; 3742 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 3743 return (EINVAL); 3744 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 3745 return sizeof(struct rx_desc); 3746 } 3747