1 /************************************************************************** 2 3 Copyright (c) 2007-2009, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_inet6.h" 34 #include "opt_inet.h" 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/kernel.h> 39 #include <sys/module.h> 40 #include <sys/bus.h> 41 #include <sys/conf.h> 42 #include <machine/bus.h> 43 #include <machine/resource.h> 44 #include <sys/bus_dma.h> 45 #include <sys/rman.h> 46 #include <sys/queue.h> 47 #include <sys/sysctl.h> 48 #include <sys/taskqueue.h> 49 50 #include <sys/proc.h> 51 #include <sys/sbuf.h> 52 #include <sys/sched.h> 53 #include <sys/smp.h> 54 #include <sys/systm.h> 55 #include <sys/syslog.h> 56 #include <sys/socket.h> 57 #include <sys/sglist.h> 58 59 #include <net/if.h> 60 #include <net/if_var.h> 61 #include <net/bpf.h> 62 #include <net/ethernet.h> 63 #include <net/if_vlan_var.h> 64 65 #include <netinet/in_systm.h> 66 #include <netinet/in.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip6.h> 69 #include <netinet/tcp.h> 70 71 #include <dev/pci/pcireg.h> 72 #include <dev/pci/pcivar.h> 73 74 #include <vm/vm.h> 75 #include <vm/pmap.h> 76 77 #include <cxgb_include.h> 78 #include <sys/mvec.h> 79 80 int txq_fills = 0; 81 int multiq_tx_enable = 1; 82 83 #ifdef TCP_OFFLOAD 84 CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS); 85 #endif 86 87 extern struct sysctl_oid_list sysctl__hw_cxgb_children; 88 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; 89 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, 90 "size of per-queue mbuf ring"); 91 92 static int cxgb_tx_coalesce_force = 0; 93 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RWTUN, 94 &cxgb_tx_coalesce_force, 0, 95 "coalesce small packets into a single work request regardless of ring state"); 96 97 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1 98 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3)) 99 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2 100 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5 101 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5 102 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2 103 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6 104 105 106 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT; 107 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RWTUN, 108 &cxgb_tx_coalesce_enable_start, 0, 109 "coalesce enable threshold"); 110 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT; 111 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RWTUN, 112 &cxgb_tx_coalesce_enable_stop, 0, 113 "coalesce disable threshold"); 114 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 115 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RWTUN, 116 &cxgb_tx_reclaim_threshold, 0, 117 "tx cleaning minimum threshold"); 118 119 /* 120 * XXX don't re-enable this until TOE stops assuming 121 * we have an m_ext 122 */ 123 static int recycle_enable = 0; 124 125 extern int cxgb_use_16k_clusters; 126 extern int nmbjumbop; 127 extern int nmbjumbo9; 128 extern int nmbjumbo16; 129 130 #define USE_GTS 0 131 132 #define SGE_RX_SM_BUF_SIZE 1536 133 #define SGE_RX_DROP_THRES 16 134 #define SGE_RX_COPY_THRES 128 135 136 /* 137 * Period of the Tx buffer reclaim timer. This timer does not need to run 138 * frequently as Tx buffers are usually reclaimed by new Tx packets. 139 */ 140 #define TX_RECLAIM_PERIOD (hz >> 1) 141 142 /* 143 * Values for sge_txq.flags 144 */ 145 enum { 146 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 147 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 148 }; 149 150 struct tx_desc { 151 uint64_t flit[TX_DESC_FLITS]; 152 } __packed; 153 154 struct rx_desc { 155 uint32_t addr_lo; 156 uint32_t len_gen; 157 uint32_t gen2; 158 uint32_t addr_hi; 159 } __packed; 160 161 struct rsp_desc { /* response queue descriptor */ 162 struct rss_header rss_hdr; 163 uint32_t flags; 164 uint32_t len_cq; 165 uint8_t imm_data[47]; 166 uint8_t intr_gen; 167 } __packed; 168 169 #define RX_SW_DESC_MAP_CREATED (1 << 0) 170 #define TX_SW_DESC_MAP_CREATED (1 << 1) 171 #define RX_SW_DESC_INUSE (1 << 3) 172 #define TX_SW_DESC_MAPPED (1 << 4) 173 174 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 175 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 176 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 177 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 178 179 struct tx_sw_desc { /* SW state per Tx descriptor */ 180 struct mbuf *m; 181 bus_dmamap_t map; 182 int flags; 183 }; 184 185 struct rx_sw_desc { /* SW state per Rx descriptor */ 186 caddr_t rxsd_cl; 187 struct mbuf *m; 188 bus_dmamap_t map; 189 int flags; 190 }; 191 192 struct txq_state { 193 unsigned int compl; 194 unsigned int gen; 195 unsigned int pidx; 196 }; 197 198 struct refill_fl_cb_arg { 199 int error; 200 bus_dma_segment_t seg; 201 int nseg; 202 }; 203 204 205 /* 206 * Maps a number of flits to the number of Tx descriptors that can hold them. 207 * The formula is 208 * 209 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 210 * 211 * HW allows up to 4 descriptors to be combined into a WR. 212 */ 213 static uint8_t flit_desc_map[] = { 214 0, 215 #if SGE_NUM_GENBITS == 1 216 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 217 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 218 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 219 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 220 #elif SGE_NUM_GENBITS == 2 221 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 222 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 223 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 224 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 225 #else 226 # error "SGE_NUM_GENBITS must be 1 or 2" 227 #endif 228 }; 229 230 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED) 231 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock) 232 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock) 233 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock) 234 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 235 #define TXQ_RING_NEEDS_ENQUEUE(qs) \ 236 drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 237 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 238 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \ 239 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg) 240 #define TXQ_RING_DEQUEUE(qs) \ 241 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 242 243 int cxgb_debug = 0; 244 245 static void sge_timer_cb(void *arg); 246 static void sge_timer_reclaim(void *arg, int ncount); 247 static void sge_txq_reclaim_handler(void *arg, int ncount); 248 static void cxgb_start_locked(struct sge_qset *qs); 249 250 /* 251 * XXX need to cope with bursty scheduling by looking at a wider 252 * window than we are now for determining the need for coalescing 253 * 254 */ 255 static __inline uint64_t 256 check_pkt_coalesce(struct sge_qset *qs) 257 { 258 struct adapter *sc; 259 struct sge_txq *txq; 260 uint8_t *fill; 261 262 if (__predict_false(cxgb_tx_coalesce_force)) 263 return (1); 264 txq = &qs->txq[TXQ_ETH]; 265 sc = qs->port->adapter; 266 fill = &sc->tunq_fill[qs->idx]; 267 268 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX) 269 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX; 270 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN) 271 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN; 272 /* 273 * if the hardware transmit queue is more than 1/8 full 274 * we mark it as coalescing - we drop back from coalescing 275 * when we go below 1/32 full and there are no packets enqueued, 276 * this provides us with some degree of hysteresis 277 */ 278 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 279 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0)) 280 *fill = 0; 281 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) 282 *fill = 1; 283 284 return (sc->tunq_coalesce); 285 } 286 287 #ifdef __LP64__ 288 static void 289 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 290 { 291 uint64_t wr_hilo; 292 #if _BYTE_ORDER == _LITTLE_ENDIAN 293 wr_hilo = wr_hi; 294 wr_hilo |= (((uint64_t)wr_lo)<<32); 295 #else 296 wr_hilo = wr_lo; 297 wr_hilo |= (((uint64_t)wr_hi)<<32); 298 #endif 299 wrp->wrh_hilo = wr_hilo; 300 } 301 #else 302 static void 303 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 304 { 305 306 wrp->wrh_hi = wr_hi; 307 wmb(); 308 wrp->wrh_lo = wr_lo; 309 } 310 #endif 311 312 struct coalesce_info { 313 int count; 314 int nbytes; 315 }; 316 317 static int 318 coalesce_check(struct mbuf *m, void *arg) 319 { 320 struct coalesce_info *ci = arg; 321 int *count = &ci->count; 322 int *nbytes = &ci->nbytes; 323 324 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) && 325 (*count < 7) && (m->m_next == NULL))) { 326 *count += 1; 327 *nbytes += m->m_len; 328 return (1); 329 } 330 return (0); 331 } 332 333 static struct mbuf * 334 cxgb_dequeue(struct sge_qset *qs) 335 { 336 struct mbuf *m, *m_head, *m_tail; 337 struct coalesce_info ci; 338 339 340 if (check_pkt_coalesce(qs) == 0) 341 return TXQ_RING_DEQUEUE(qs); 342 343 m_head = m_tail = NULL; 344 ci.count = ci.nbytes = 0; 345 do { 346 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci); 347 if (m_head == NULL) { 348 m_tail = m_head = m; 349 } else if (m != NULL) { 350 m_tail->m_nextpkt = m; 351 m_tail = m; 352 } 353 } while (m != NULL); 354 if (ci.count > 7) 355 panic("trying to coalesce %d packets in to one WR", ci.count); 356 return (m_head); 357 } 358 359 /** 360 * reclaim_completed_tx - reclaims completed Tx descriptors 361 * @adapter: the adapter 362 * @q: the Tx queue to reclaim completed descriptors from 363 * 364 * Reclaims Tx descriptors that the SGE has indicated it has processed, 365 * and frees the associated buffers if possible. Called with the Tx 366 * queue's lock held. 367 */ 368 static __inline int 369 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) 370 { 371 struct sge_txq *q = &qs->txq[queue]; 372 int reclaim = desc_reclaimable(q); 373 374 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) || 375 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN)) 376 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 377 378 if (reclaim < reclaim_min) 379 return (0); 380 381 mtx_assert(&qs->lock, MA_OWNED); 382 if (reclaim > 0) { 383 t3_free_tx_desc(qs, reclaim, queue); 384 q->cleaned += reclaim; 385 q->in_use -= reclaim; 386 } 387 if (isset(&qs->txq_stopped, TXQ_ETH)) 388 clrbit(&qs->txq_stopped, TXQ_ETH); 389 390 return (reclaim); 391 } 392 393 /** 394 * should_restart_tx - are there enough resources to restart a Tx queue? 395 * @q: the Tx queue 396 * 397 * Checks if there are enough descriptors to restart a suspended Tx queue. 398 */ 399 static __inline int 400 should_restart_tx(const struct sge_txq *q) 401 { 402 unsigned int r = q->processed - q->cleaned; 403 404 return q->in_use - r < (q->size >> 1); 405 } 406 407 /** 408 * t3_sge_init - initialize SGE 409 * @adap: the adapter 410 * @p: the SGE parameters 411 * 412 * Performs SGE initialization needed every time after a chip reset. 413 * We do not initialize any of the queue sets here, instead the driver 414 * top-level must request those individually. We also do not enable DMA 415 * here, that should be done after the queues have been set up. 416 */ 417 void 418 t3_sge_init(adapter_t *adap, struct sge_params *p) 419 { 420 u_int ctrl, ups; 421 422 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 423 424 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 425 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | 426 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 427 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 428 #if SGE_NUM_GENBITS == 1 429 ctrl |= F_EGRGENCTRL; 430 #endif 431 if (adap->params.rev > 0) { 432 if (!(adap->flags & (USING_MSIX | USING_MSI))) 433 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 434 } 435 t3_write_reg(adap, A_SG_CONTROL, ctrl); 436 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 437 V_LORCQDRBTHRSH(512)); 438 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 439 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 440 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 441 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 442 adap->params.rev < T3_REV_C ? 1000 : 500); 443 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 444 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 445 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 446 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 447 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 448 } 449 450 451 /** 452 * sgl_len - calculates the size of an SGL of the given capacity 453 * @n: the number of SGL entries 454 * 455 * Calculates the number of flits needed for a scatter/gather list that 456 * can hold the given number of entries. 457 */ 458 static __inline unsigned int 459 sgl_len(unsigned int n) 460 { 461 return ((3 * n) / 2 + (n & 1)); 462 } 463 464 /** 465 * get_imm_packet - return the next ingress packet buffer from a response 466 * @resp: the response descriptor containing the packet data 467 * 468 * Return a packet containing the immediate data of the given response. 469 */ 470 static int 471 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) 472 { 473 474 if (resp->rss_hdr.opcode == CPL_RX_DATA) { 475 const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0]; 476 m->m_len = sizeof(*cpl) + ntohs(cpl->len); 477 } else if (resp->rss_hdr.opcode == CPL_RX_PKT) { 478 const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0]; 479 m->m_len = sizeof(*cpl) + ntohs(cpl->len); 480 } else 481 m->m_len = IMMED_PKT_SIZE; 482 m->m_ext.ext_buf = NULL; 483 m->m_ext.ext_type = 0; 484 memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len); 485 return (0); 486 } 487 488 static __inline u_int 489 flits_to_desc(u_int n) 490 { 491 return (flit_desc_map[n]); 492 } 493 494 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ 495 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ 496 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ 497 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ 498 F_HIRCQPARITYERROR) 499 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) 500 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ 501 F_RSPQDISABLED) 502 503 /** 504 * t3_sge_err_intr_handler - SGE async event interrupt handler 505 * @adapter: the adapter 506 * 507 * Interrupt handler for SGE asynchronous (non-data) events. 508 */ 509 void 510 t3_sge_err_intr_handler(adapter_t *adapter) 511 { 512 unsigned int v, status; 513 514 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 515 if (status & SGE_PARERR) 516 CH_ALERT(adapter, "SGE parity error (0x%x)\n", 517 status & SGE_PARERR); 518 if (status & SGE_FRAMINGERR) 519 CH_ALERT(adapter, "SGE framing error (0x%x)\n", 520 status & SGE_FRAMINGERR); 521 if (status & F_RSPQCREDITOVERFOW) 522 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 523 524 if (status & F_RSPQDISABLED) { 525 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 526 527 CH_ALERT(adapter, 528 "packet delivered to disabled response queue (0x%x)\n", 529 (v >> S_RSPQ0DISABLED) & 0xff); 530 } 531 532 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 533 if (status & SGE_FATALERR) 534 t3_fatal_err(adapter); 535 } 536 537 void 538 t3_sge_prep(adapter_t *adap, struct sge_params *p) 539 { 540 int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size; 541 542 nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus); 543 nqsets *= adap->params.nports; 544 545 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); 546 547 while (!powerof2(fl_q_size)) 548 fl_q_size--; 549 550 use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters : 551 is_offload(adap); 552 553 #if __FreeBSD_version >= 700111 554 if (use_16k) { 555 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); 556 jumbo_buf_size = MJUM16BYTES; 557 } else { 558 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); 559 jumbo_buf_size = MJUM9BYTES; 560 } 561 #else 562 jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE); 563 jumbo_buf_size = MJUMPAGESIZE; 564 #endif 565 while (!powerof2(jumbo_q_size)) 566 jumbo_q_size--; 567 568 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2)) 569 device_printf(adap->dev, 570 "Insufficient clusters and/or jumbo buffers.\n"); 571 572 p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data); 573 574 for (i = 0; i < SGE_QSETS; ++i) { 575 struct qset_params *q = p->qset + i; 576 577 if (adap->params.nports > 2) { 578 q->coalesce_usecs = 50; 579 } else { 580 #ifdef INVARIANTS 581 q->coalesce_usecs = 10; 582 #else 583 q->coalesce_usecs = 5; 584 #endif 585 } 586 q->polling = 0; 587 q->rspq_size = RSPQ_Q_SIZE; 588 q->fl_size = fl_q_size; 589 q->jumbo_size = jumbo_q_size; 590 q->jumbo_buf_size = jumbo_buf_size; 591 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 592 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16; 593 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE; 594 q->cong_thres = 0; 595 } 596 } 597 598 int 599 t3_sge_alloc(adapter_t *sc) 600 { 601 602 /* The parent tag. */ 603 if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */ 604 1, 0, /* algnmnt, boundary */ 605 BUS_SPACE_MAXADDR, /* lowaddr */ 606 BUS_SPACE_MAXADDR, /* highaddr */ 607 NULL, NULL, /* filter, filterarg */ 608 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 609 BUS_SPACE_UNRESTRICTED, /* nsegments */ 610 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 611 0, /* flags */ 612 NULL, NULL, /* lock, lockarg */ 613 &sc->parent_dmat)) { 614 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 615 return (ENOMEM); 616 } 617 618 /* 619 * DMA tag for normal sized RX frames 620 */ 621 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 622 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 623 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 624 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 625 return (ENOMEM); 626 } 627 628 /* 629 * DMA tag for jumbo sized RX frames. 630 */ 631 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, 632 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, 633 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 634 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 635 return (ENOMEM); 636 } 637 638 /* 639 * DMA tag for TX frames. 640 */ 641 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 642 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 643 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 644 NULL, NULL, &sc->tx_dmat)) { 645 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 646 return (ENOMEM); 647 } 648 649 return (0); 650 } 651 652 int 653 t3_sge_free(struct adapter * sc) 654 { 655 656 if (sc->tx_dmat != NULL) 657 bus_dma_tag_destroy(sc->tx_dmat); 658 659 if (sc->rx_jumbo_dmat != NULL) 660 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 661 662 if (sc->rx_dmat != NULL) 663 bus_dma_tag_destroy(sc->rx_dmat); 664 665 if (sc->parent_dmat != NULL) 666 bus_dma_tag_destroy(sc->parent_dmat); 667 668 return (0); 669 } 670 671 void 672 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 673 { 674 675 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); 676 qs->rspq.polling = 0 /* p->polling */; 677 } 678 679 #if !defined(__i386__) && !defined(__amd64__) 680 static void 681 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 682 { 683 struct refill_fl_cb_arg *cb_arg = arg; 684 685 cb_arg->error = error; 686 cb_arg->seg = segs[0]; 687 cb_arg->nseg = nseg; 688 689 } 690 #endif 691 /** 692 * refill_fl - refill an SGE free-buffer list 693 * @sc: the controller softc 694 * @q: the free-list to refill 695 * @n: the number of new buffers to allocate 696 * 697 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 698 * The caller must assure that @n does not exceed the queue's capacity. 699 */ 700 static void 701 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 702 { 703 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 704 struct rx_desc *d = &q->desc[q->pidx]; 705 struct refill_fl_cb_arg cb_arg; 706 struct mbuf *m; 707 caddr_t cl; 708 int err; 709 710 cb_arg.error = 0; 711 while (n--) { 712 /* 713 * We allocate an uninitialized mbuf + cluster, mbuf is 714 * initialized after rx. 715 */ 716 if (q->zone == zone_pack) { 717 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL) 718 break; 719 cl = m->m_ext.ext_buf; 720 } else { 721 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL) 722 break; 723 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { 724 uma_zfree(q->zone, cl); 725 break; 726 } 727 } 728 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 729 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 730 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 731 uma_zfree(q->zone, cl); 732 goto done; 733 } 734 sd->flags |= RX_SW_DESC_MAP_CREATED; 735 } 736 #if !defined(__i386__) && !defined(__amd64__) 737 err = bus_dmamap_load(q->entry_tag, sd->map, 738 cl, q->buf_size, refill_fl_cb, &cb_arg, 0); 739 740 if (err != 0 || cb_arg.error) { 741 if (q->zone == zone_pack) 742 uma_zfree(q->zone, cl); 743 m_free(m); 744 goto done; 745 } 746 #else 747 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl); 748 #endif 749 sd->flags |= RX_SW_DESC_INUSE; 750 sd->rxsd_cl = cl; 751 sd->m = m; 752 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 753 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 754 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 755 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 756 757 d++; 758 sd++; 759 760 if (++q->pidx == q->size) { 761 q->pidx = 0; 762 q->gen ^= 1; 763 sd = q->sdesc; 764 d = q->desc; 765 } 766 q->credits++; 767 q->db_pending++; 768 } 769 770 done: 771 if (q->db_pending >= 32) { 772 q->db_pending = 0; 773 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 774 } 775 } 776 777 778 /** 779 * free_rx_bufs - free the Rx buffers on an SGE free list 780 * @sc: the controle softc 781 * @q: the SGE free list to clean up 782 * 783 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 784 * this queue should be stopped before calling this function. 785 */ 786 static void 787 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 788 { 789 u_int cidx = q->cidx; 790 791 while (q->credits--) { 792 struct rx_sw_desc *d = &q->sdesc[cidx]; 793 794 if (d->flags & RX_SW_DESC_INUSE) { 795 bus_dmamap_unload(q->entry_tag, d->map); 796 bus_dmamap_destroy(q->entry_tag, d->map); 797 if (q->zone == zone_pack) { 798 m_init(d->m, zone_pack, MCLBYTES, 799 M_NOWAIT, MT_DATA, M_EXT); 800 uma_zfree(zone_pack, d->m); 801 } else { 802 m_init(d->m, zone_mbuf, MLEN, 803 M_NOWAIT, MT_DATA, 0); 804 uma_zfree(zone_mbuf, d->m); 805 uma_zfree(q->zone, d->rxsd_cl); 806 } 807 } 808 809 d->rxsd_cl = NULL; 810 d->m = NULL; 811 if (++cidx == q->size) 812 cidx = 0; 813 } 814 } 815 816 static __inline void 817 __refill_fl(adapter_t *adap, struct sge_fl *fl) 818 { 819 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 820 } 821 822 static __inline void 823 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) 824 { 825 uint32_t reclaimable = fl->size - fl->credits; 826 827 if (reclaimable > 0) 828 refill_fl(adap, fl, min(max, reclaimable)); 829 } 830 831 /** 832 * recycle_rx_buf - recycle a receive buffer 833 * @adapter: the adapter 834 * @q: the SGE free list 835 * @idx: index of buffer to recycle 836 * 837 * Recycles the specified buffer on the given free list by adding it at 838 * the next available slot on the list. 839 */ 840 static void 841 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 842 { 843 struct rx_desc *from = &q->desc[idx]; 844 struct rx_desc *to = &q->desc[q->pidx]; 845 846 q->sdesc[q->pidx] = q->sdesc[idx]; 847 to->addr_lo = from->addr_lo; // already big endian 848 to->addr_hi = from->addr_hi; // likewise 849 wmb(); /* necessary ? */ 850 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 851 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 852 q->credits++; 853 854 if (++q->pidx == q->size) { 855 q->pidx = 0; 856 q->gen ^= 1; 857 } 858 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 859 } 860 861 static void 862 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 863 { 864 uint32_t *addr; 865 866 addr = arg; 867 *addr = segs[0].ds_addr; 868 } 869 870 static int 871 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 872 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 873 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 874 { 875 size_t len = nelem * elem_size; 876 void *s = NULL; 877 void *p = NULL; 878 int err; 879 880 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 881 BUS_SPACE_MAXADDR_32BIT, 882 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 883 len, 0, NULL, NULL, tag)) != 0) { 884 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 885 return (ENOMEM); 886 } 887 888 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 889 map)) != 0) { 890 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 891 return (ENOMEM); 892 } 893 894 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 895 bzero(p, len); 896 *(void **)desc = p; 897 898 if (sw_size) { 899 len = nelem * sw_size; 900 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 901 *(void **)sdesc = s; 902 } 903 if (parent_entry_tag == NULL) 904 return (0); 905 906 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 907 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 908 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 909 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 910 NULL, NULL, entry_tag)) != 0) { 911 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 912 return (ENOMEM); 913 } 914 return (0); 915 } 916 917 static void 918 sge_slow_intr_handler(void *arg, int ncount) 919 { 920 adapter_t *sc = arg; 921 922 t3_slow_intr_handler(sc); 923 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask); 924 (void) t3_read_reg(sc, A_PL_INT_ENABLE0); 925 } 926 927 /** 928 * sge_timer_cb - perform periodic maintenance of an SGE qset 929 * @data: the SGE queue set to maintain 930 * 931 * Runs periodically from a timer to perform maintenance of an SGE queue 932 * set. It performs two tasks: 933 * 934 * a) Cleans up any completed Tx descriptors that may still be pending. 935 * Normal descriptor cleanup happens when new packets are added to a Tx 936 * queue so this timer is relatively infrequent and does any cleanup only 937 * if the Tx queue has not seen any new packets in a while. We make a 938 * best effort attempt to reclaim descriptors, in that we don't wait 939 * around if we cannot get a queue's lock (which most likely is because 940 * someone else is queueing new packets and so will also handle the clean 941 * up). Since control queues use immediate data exclusively we don't 942 * bother cleaning them up here. 943 * 944 * b) Replenishes Rx queues that have run out due to memory shortage. 945 * Normally new Rx buffers are added when existing ones are consumed but 946 * when out of memory a queue can become empty. We try to add only a few 947 * buffers here, the queue will be replenished fully as these new buffers 948 * are used up if memory shortage has subsided. 949 * 950 * c) Return coalesced response queue credits in case a response queue is 951 * starved. 952 * 953 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 954 * fifo overflows and the FW doesn't implement any recovery scheme yet. 955 */ 956 static void 957 sge_timer_cb(void *arg) 958 { 959 adapter_t *sc = arg; 960 if ((sc->flags & USING_MSIX) == 0) { 961 962 struct port_info *pi; 963 struct sge_qset *qs; 964 struct sge_txq *txq; 965 int i, j; 966 int reclaim_ofl, refill_rx; 967 968 if (sc->open_device_map == 0) 969 return; 970 971 for (i = 0; i < sc->params.nports; i++) { 972 pi = &sc->port[i]; 973 for (j = 0; j < pi->nqsets; j++) { 974 qs = &sc->sge.qs[pi->first_qset + j]; 975 txq = &qs->txq[0]; 976 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 977 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 978 (qs->fl[1].credits < qs->fl[1].size)); 979 if (reclaim_ofl || refill_rx) { 980 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); 981 break; 982 } 983 } 984 } 985 } 986 987 if (sc->params.nports > 2) { 988 int i; 989 990 for_each_port(sc, i) { 991 struct port_info *pi = &sc->port[i]; 992 993 t3_write_reg(sc, A_SG_KDOORBELL, 994 F_SELEGRCNTX | 995 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 996 } 997 } 998 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) && 999 sc->open_device_map != 0) 1000 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 1001 } 1002 1003 /* 1004 * This is meant to be a catch-all function to keep sge state private 1005 * to sge.c 1006 * 1007 */ 1008 int 1009 t3_sge_init_adapter(adapter_t *sc) 1010 { 1011 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 1012 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 1013 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 1014 return (0); 1015 } 1016 1017 int 1018 t3_sge_reset_adapter(adapter_t *sc) 1019 { 1020 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 1021 return (0); 1022 } 1023 1024 int 1025 t3_sge_init_port(struct port_info *pi) 1026 { 1027 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); 1028 return (0); 1029 } 1030 1031 /** 1032 * refill_rspq - replenish an SGE response queue 1033 * @adapter: the adapter 1034 * @q: the response queue to replenish 1035 * @credits: how many new responses to make available 1036 * 1037 * Replenishes a response queue by making the supplied number of responses 1038 * available to HW. 1039 */ 1040 static __inline void 1041 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 1042 { 1043 1044 /* mbufs are allocated on demand when a rspq entry is processed. */ 1045 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 1046 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 1047 } 1048 1049 static void 1050 sge_txq_reclaim_handler(void *arg, int ncount) 1051 { 1052 struct sge_qset *qs = arg; 1053 int i; 1054 1055 for (i = 0; i < 3; i++) 1056 reclaim_completed_tx(qs, 16, i); 1057 } 1058 1059 static void 1060 sge_timer_reclaim(void *arg, int ncount) 1061 { 1062 struct port_info *pi = arg; 1063 int i, nqsets = pi->nqsets; 1064 adapter_t *sc = pi->adapter; 1065 struct sge_qset *qs; 1066 struct mtx *lock; 1067 1068 KASSERT((sc->flags & USING_MSIX) == 0, 1069 ("can't call timer reclaim for msi-x")); 1070 1071 for (i = 0; i < nqsets; i++) { 1072 qs = &sc->sge.qs[pi->first_qset + i]; 1073 1074 reclaim_completed_tx(qs, 16, TXQ_OFLD); 1075 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 1076 &sc->sge.qs[0].rspq.lock; 1077 1078 if (mtx_trylock(lock)) { 1079 /* XXX currently assume that we are *NOT* polling */ 1080 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 1081 1082 if (qs->fl[0].credits < qs->fl[0].size - 16) 1083 __refill_fl(sc, &qs->fl[0]); 1084 if (qs->fl[1].credits < qs->fl[1].size - 16) 1085 __refill_fl(sc, &qs->fl[1]); 1086 1087 if (status & (1 << qs->rspq.cntxt_id)) { 1088 if (qs->rspq.credits) { 1089 refill_rspq(sc, &qs->rspq, 1); 1090 qs->rspq.credits--; 1091 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 1092 1 << qs->rspq.cntxt_id); 1093 } 1094 } 1095 mtx_unlock(lock); 1096 } 1097 } 1098 } 1099 1100 /** 1101 * init_qset_cntxt - initialize an SGE queue set context info 1102 * @qs: the queue set 1103 * @id: the queue set id 1104 * 1105 * Initializes the TIDs and context ids for the queues of a queue set. 1106 */ 1107 static void 1108 init_qset_cntxt(struct sge_qset *qs, u_int id) 1109 { 1110 1111 qs->rspq.cntxt_id = id; 1112 qs->fl[0].cntxt_id = 2 * id; 1113 qs->fl[1].cntxt_id = 2 * id + 1; 1114 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 1115 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 1116 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 1117 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 1118 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 1119 1120 mbufq_init(&qs->txq[TXQ_ETH].sendq); 1121 mbufq_init(&qs->txq[TXQ_OFLD].sendq); 1122 mbufq_init(&qs->txq[TXQ_CTRL].sendq); 1123 } 1124 1125 1126 static void 1127 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 1128 { 1129 txq->in_use += ndesc; 1130 /* 1131 * XXX we don't handle stopping of queue 1132 * presumably start handles this when we bump against the end 1133 */ 1134 txqs->gen = txq->gen; 1135 txq->unacked += ndesc; 1136 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); 1137 txq->unacked &= 31; 1138 txqs->pidx = txq->pidx; 1139 txq->pidx += ndesc; 1140 #ifdef INVARIANTS 1141 if (((txqs->pidx > txq->cidx) && 1142 (txq->pidx < txqs->pidx) && 1143 (txq->pidx >= txq->cidx)) || 1144 ((txqs->pidx < txq->cidx) && 1145 (txq->pidx >= txq-> cidx)) || 1146 ((txqs->pidx < txq->cidx) && 1147 (txq->cidx < txqs->pidx))) 1148 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", 1149 txqs->pidx, txq->pidx, txq->cidx); 1150 #endif 1151 if (txq->pidx >= txq->size) { 1152 txq->pidx -= txq->size; 1153 txq->gen ^= 1; 1154 } 1155 1156 } 1157 1158 /** 1159 * calc_tx_descs - calculate the number of Tx descriptors for a packet 1160 * @m: the packet mbufs 1161 * @nsegs: the number of segments 1162 * 1163 * Returns the number of Tx descriptors needed for the given Ethernet 1164 * packet. Ethernet packets require addition of WR and CPL headers. 1165 */ 1166 static __inline unsigned int 1167 calc_tx_descs(const struct mbuf *m, int nsegs) 1168 { 1169 unsigned int flits; 1170 1171 if (m->m_pkthdr.len <= PIO_LEN) 1172 return 1; 1173 1174 flits = sgl_len(nsegs) + 2; 1175 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1176 flits++; 1177 1178 return flits_to_desc(flits); 1179 } 1180 1181 /** 1182 * make_sgl - populate a scatter/gather list for a packet 1183 * @sgp: the SGL to populate 1184 * @segs: the packet dma segments 1185 * @nsegs: the number of segments 1186 * 1187 * Generates a scatter/gather list for the buffers that make up a packet 1188 * and returns the SGL size in 8-byte words. The caller must size the SGL 1189 * appropriately. 1190 */ 1191 static __inline void 1192 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1193 { 1194 int i, idx; 1195 1196 for (idx = 0, i = 0; i < nsegs; i++) { 1197 /* 1198 * firmware doesn't like empty segments 1199 */ 1200 if (segs[i].ds_len == 0) 1201 continue; 1202 if (i && idx == 0) 1203 ++sgp; 1204 1205 sgp->len[idx] = htobe32(segs[i].ds_len); 1206 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1207 idx ^= 1; 1208 } 1209 1210 if (idx) { 1211 sgp->len[idx] = 0; 1212 sgp->addr[idx] = 0; 1213 } 1214 } 1215 1216 /** 1217 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1218 * @adap: the adapter 1219 * @q: the Tx queue 1220 * 1221 * Ring the doorbell if a Tx queue is asleep. There is a natural race, 1222 * where the HW is going to sleep just after we checked, however, 1223 * then the interrupt handler will detect the outstanding TX packet 1224 * and ring the doorbell for us. 1225 * 1226 * When GTS is disabled we unconditionally ring the doorbell. 1227 */ 1228 static __inline void 1229 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring) 1230 { 1231 #if USE_GTS 1232 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1233 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1234 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1235 #ifdef T3_TRACE 1236 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1237 q->cntxt_id); 1238 #endif 1239 t3_write_reg(adap, A_SG_KDOORBELL, 1240 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1241 } 1242 #else 1243 if (mustring || ++q->db_pending >= 32) { 1244 wmb(); /* write descriptors before telling HW */ 1245 t3_write_reg(adap, A_SG_KDOORBELL, 1246 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1247 q->db_pending = 0; 1248 } 1249 #endif 1250 } 1251 1252 static __inline void 1253 wr_gen2(struct tx_desc *d, unsigned int gen) 1254 { 1255 #if SGE_NUM_GENBITS == 2 1256 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1257 #endif 1258 } 1259 1260 /** 1261 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1262 * @ndesc: number of Tx descriptors spanned by the SGL 1263 * @txd: first Tx descriptor to be written 1264 * @txqs: txq state (generation and producer index) 1265 * @txq: the SGE Tx queue 1266 * @sgl: the SGL 1267 * @flits: number of flits to the start of the SGL in the first descriptor 1268 * @sgl_flits: the SGL size in flits 1269 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1270 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1271 * 1272 * Write a work request header and an associated SGL. If the SGL is 1273 * small enough to fit into one Tx descriptor it has already been written 1274 * and we just need to write the WR header. Otherwise we distribute the 1275 * SGL across the number of descriptors it spans. 1276 */ 1277 static void 1278 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1279 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1280 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1281 { 1282 1283 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1284 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1285 1286 if (__predict_true(ndesc == 1)) { 1287 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1288 V_WR_SGLSFLT(flits)) | wr_hi, 1289 htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) | 1290 wr_lo); 1291 1292 wr_gen2(txd, txqs->gen); 1293 1294 } else { 1295 unsigned int ogen = txqs->gen; 1296 const uint64_t *fp = (const uint64_t *)sgl; 1297 struct work_request_hdr *wp = wrp; 1298 1299 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1300 V_WR_SGLSFLT(flits)) | wr_hi; 1301 1302 while (sgl_flits) { 1303 unsigned int avail = WR_FLITS - flits; 1304 1305 if (avail > sgl_flits) 1306 avail = sgl_flits; 1307 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1308 sgl_flits -= avail; 1309 ndesc--; 1310 if (!sgl_flits) 1311 break; 1312 1313 fp += avail; 1314 txd++; 1315 txsd++; 1316 if (++txqs->pidx == txq->size) { 1317 txqs->pidx = 0; 1318 txqs->gen ^= 1; 1319 txd = txq->desc; 1320 txsd = txq->sdesc; 1321 } 1322 1323 /* 1324 * when the head of the mbuf chain 1325 * is freed all clusters will be freed 1326 * with it 1327 */ 1328 wrp = (struct work_request_hdr *)txd; 1329 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) | 1330 V_WR_SGLSFLT(1)) | wr_hi; 1331 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS, 1332 sgl_flits + 1)) | 1333 V_WR_GEN(txqs->gen)) | wr_lo; 1334 wr_gen2(txd, txqs->gen); 1335 flits = 1; 1336 } 1337 wrp->wrh_hi |= htonl(F_WR_EOP); 1338 wmb(); 1339 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1340 wr_gen2((struct tx_desc *)wp, ogen); 1341 } 1342 } 1343 1344 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */ 1345 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20) 1346 1347 #define GET_VTAG(cntrl, m) \ 1348 do { \ 1349 if ((m)->m_flags & M_VLANTAG) \ 1350 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ 1351 } while (0) 1352 1353 static int 1354 t3_encap(struct sge_qset *qs, struct mbuf **m) 1355 { 1356 adapter_t *sc; 1357 struct mbuf *m0; 1358 struct sge_txq *txq; 1359 struct txq_state txqs; 1360 struct port_info *pi; 1361 unsigned int ndesc, flits, cntrl, mlen; 1362 int err, nsegs, tso_info = 0; 1363 1364 struct work_request_hdr *wrp; 1365 struct tx_sw_desc *txsd; 1366 struct sg_ent *sgp, *sgl; 1367 uint32_t wr_hi, wr_lo, sgl_flits; 1368 bus_dma_segment_t segs[TX_MAX_SEGS]; 1369 1370 struct tx_desc *txd; 1371 1372 pi = qs->port; 1373 sc = pi->adapter; 1374 txq = &qs->txq[TXQ_ETH]; 1375 txd = &txq->desc[txq->pidx]; 1376 txsd = &txq->sdesc[txq->pidx]; 1377 sgl = txq->txq_sgl; 1378 1379 prefetch(txd); 1380 m0 = *m; 1381 1382 mtx_assert(&qs->lock, MA_OWNED); 1383 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1384 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n")); 1385 1386 if (m0->m_nextpkt == NULL && m0->m_next != NULL && 1387 m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1388 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1389 1390 if (m0->m_nextpkt != NULL) { 1391 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs); 1392 ndesc = 1; 1393 mlen = 0; 1394 } else { 1395 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map, 1396 &m0, segs, &nsegs))) { 1397 if (cxgb_debug) 1398 printf("failed ... err=%d\n", err); 1399 return (err); 1400 } 1401 mlen = m0->m_pkthdr.len; 1402 ndesc = calc_tx_descs(m0, nsegs); 1403 } 1404 txq_prod(txq, ndesc, &txqs); 1405 1406 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs)); 1407 txsd->m = m0; 1408 1409 if (m0->m_nextpkt != NULL) { 1410 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; 1411 int i, fidx; 1412 1413 if (nsegs > 7) 1414 panic("trying to coalesce %d packets in to one WR", nsegs); 1415 txq->txq_coalesced += nsegs; 1416 wrp = (struct work_request_hdr *)txd; 1417 flits = nsegs*2 + 1; 1418 1419 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) { 1420 struct cpl_tx_pkt_batch_entry *cbe; 1421 uint64_t flit; 1422 uint32_t *hflit = (uint32_t *)&flit; 1423 int cflags = m0->m_pkthdr.csum_flags; 1424 1425 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1426 GET_VTAG(cntrl, m0); 1427 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1428 if (__predict_false(!(cflags & CSUM_IP))) 1429 cntrl |= F_TXPKT_IPCSUM_DIS; 1430 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP | 1431 CSUM_UDP_IPV6 | CSUM_TCP_IPV6)))) 1432 cntrl |= F_TXPKT_L4CSUM_DIS; 1433 1434 hflit[0] = htonl(cntrl); 1435 hflit[1] = htonl(segs[i].ds_len | 0x80000000); 1436 flit |= htobe64(1 << 24); 1437 cbe = &cpl_batch->pkt_entry[i]; 1438 cbe->cntrl = hflit[0]; 1439 cbe->len = hflit[1]; 1440 cbe->addr = htobe64(segs[i].ds_addr); 1441 } 1442 1443 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1444 V_WR_SGLSFLT(flits)) | 1445 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1446 wr_lo = htonl(V_WR_LEN(flits) | 1447 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); 1448 set_wr_hdr(wrp, wr_hi, wr_lo); 1449 wmb(); 1450 ETHER_BPF_MTAP(pi->ifp, m0); 1451 wr_gen2(txd, txqs.gen); 1452 check_ring_tx_db(sc, txq, 0); 1453 return (0); 1454 } else if (tso_info) { 1455 uint16_t eth_type; 1456 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; 1457 struct ether_header *eh; 1458 void *l3hdr; 1459 struct tcphdr *tcp; 1460 1461 txd->flit[2] = 0; 1462 GET_VTAG(cntrl, m0); 1463 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1464 hdr->cntrl = htonl(cntrl); 1465 hdr->len = htonl(mlen | 0x80000000); 1466 1467 if (__predict_false(mlen < TCPPKTHDRSIZE)) { 1468 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x", 1469 m0, mlen, m0->m_pkthdr.tso_segsz, 1470 (int)m0->m_pkthdr.csum_flags, CSUM_BITS, m0->m_flags); 1471 panic("tx tso packet too small"); 1472 } 1473 1474 /* Make sure that ether, ip, tcp headers are all in m0 */ 1475 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1476 m0 = m_pullup(m0, TCPPKTHDRSIZE); 1477 if (__predict_false(m0 == NULL)) { 1478 /* XXX panic probably an overreaction */ 1479 panic("couldn't fit header into mbuf"); 1480 } 1481 } 1482 1483 eh = mtod(m0, struct ether_header *); 1484 eth_type = eh->ether_type; 1485 if (eth_type == htons(ETHERTYPE_VLAN)) { 1486 struct ether_vlan_header *evh = (void *)eh; 1487 1488 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN); 1489 l3hdr = evh + 1; 1490 eth_type = evh->evl_proto; 1491 } else { 1492 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II); 1493 l3hdr = eh + 1; 1494 } 1495 1496 if (eth_type == htons(ETHERTYPE_IP)) { 1497 struct ip *ip = l3hdr; 1498 1499 tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl); 1500 tcp = (struct tcphdr *)(ip + 1); 1501 } else if (eth_type == htons(ETHERTYPE_IPV6)) { 1502 struct ip6_hdr *ip6 = l3hdr; 1503 1504 KASSERT(ip6->ip6_nxt == IPPROTO_TCP, 1505 ("%s: CSUM_TSO with ip6_nxt %d", 1506 __func__, ip6->ip6_nxt)); 1507 1508 tso_info |= F_LSO_IPV6; 1509 tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2); 1510 tcp = (struct tcphdr *)(ip6 + 1); 1511 } else 1512 panic("%s: CSUM_TSO but neither ip nor ip6", __func__); 1513 1514 tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off); 1515 hdr->lso_info = htonl(tso_info); 1516 1517 if (__predict_false(mlen <= PIO_LEN)) { 1518 /* 1519 * pkt not undersized but fits in PIO_LEN 1520 * Indicates a TSO bug at the higher levels. 1521 */ 1522 txsd->m = NULL; 1523 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); 1524 flits = (mlen + 7) / 8 + 3; 1525 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1526 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1527 F_WR_SOP | F_WR_EOP | txqs.compl); 1528 wr_lo = htonl(V_WR_LEN(flits) | 1529 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1530 set_wr_hdr(&hdr->wr, wr_hi, wr_lo); 1531 wmb(); 1532 ETHER_BPF_MTAP(pi->ifp, m0); 1533 wr_gen2(txd, txqs.gen); 1534 check_ring_tx_db(sc, txq, 0); 1535 m_freem(m0); 1536 return (0); 1537 } 1538 flits = 3; 1539 } else { 1540 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; 1541 1542 GET_VTAG(cntrl, m0); 1543 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1544 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) 1545 cntrl |= F_TXPKT_IPCSUM_DIS; 1546 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | 1547 CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6)))) 1548 cntrl |= F_TXPKT_L4CSUM_DIS; 1549 cpl->cntrl = htonl(cntrl); 1550 cpl->len = htonl(mlen | 0x80000000); 1551 1552 if (mlen <= PIO_LEN) { 1553 txsd->m = NULL; 1554 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1555 flits = (mlen + 7) / 8 + 2; 1556 1557 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1558 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1559 F_WR_SOP | F_WR_EOP | txqs.compl); 1560 wr_lo = htonl(V_WR_LEN(flits) | 1561 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1562 set_wr_hdr(&cpl->wr, wr_hi, wr_lo); 1563 wmb(); 1564 ETHER_BPF_MTAP(pi->ifp, m0); 1565 wr_gen2(txd, txqs.gen); 1566 check_ring_tx_db(sc, txq, 0); 1567 m_freem(m0); 1568 return (0); 1569 } 1570 flits = 2; 1571 } 1572 wrp = (struct work_request_hdr *)txd; 1573 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1574 make_sgl(sgp, segs, nsegs); 1575 1576 sgl_flits = sgl_len(nsegs); 1577 1578 ETHER_BPF_MTAP(pi->ifp, m0); 1579 1580 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc)); 1581 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1582 wr_lo = htonl(V_WR_TID(txq->token)); 1583 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, 1584 sgl_flits, wr_hi, wr_lo); 1585 check_ring_tx_db(sc, txq, 0); 1586 1587 return (0); 1588 } 1589 1590 void 1591 cxgb_tx_watchdog(void *arg) 1592 { 1593 struct sge_qset *qs = arg; 1594 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1595 1596 if (qs->coalescing != 0 && 1597 (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 1598 TXQ_RING_EMPTY(qs)) 1599 qs->coalescing = 0; 1600 else if (qs->coalescing == 0 && 1601 (txq->in_use >= cxgb_tx_coalesce_enable_start)) 1602 qs->coalescing = 1; 1603 if (TXQ_TRYLOCK(qs)) { 1604 qs->qs_flags |= QS_FLUSHING; 1605 cxgb_start_locked(qs); 1606 qs->qs_flags &= ~QS_FLUSHING; 1607 TXQ_UNLOCK(qs); 1608 } 1609 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) 1610 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog, 1611 qs, txq->txq_watchdog.c_cpu); 1612 } 1613 1614 static void 1615 cxgb_tx_timeout(void *arg) 1616 { 1617 struct sge_qset *qs = arg; 1618 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1619 1620 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3))) 1621 qs->coalescing = 1; 1622 if (TXQ_TRYLOCK(qs)) { 1623 qs->qs_flags |= QS_TIMEOUT; 1624 cxgb_start_locked(qs); 1625 qs->qs_flags &= ~QS_TIMEOUT; 1626 TXQ_UNLOCK(qs); 1627 } 1628 } 1629 1630 static void 1631 cxgb_start_locked(struct sge_qset *qs) 1632 { 1633 struct mbuf *m_head = NULL; 1634 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1635 struct port_info *pi = qs->port; 1636 struct ifnet *ifp = pi->ifp; 1637 1638 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT)) 1639 reclaim_completed_tx(qs, 0, TXQ_ETH); 1640 1641 if (!pi->link_config.link_ok) { 1642 TXQ_RING_FLUSH(qs); 1643 return; 1644 } 1645 TXQ_LOCK_ASSERT(qs); 1646 while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) && 1647 pi->link_config.link_ok) { 1648 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1649 1650 if (txq->size - txq->in_use <= TX_MAX_DESC) 1651 break; 1652 1653 if ((m_head = cxgb_dequeue(qs)) == NULL) 1654 break; 1655 /* 1656 * Encapsulation can modify our pointer, and or make it 1657 * NULL on failure. In that event, we can't requeue. 1658 */ 1659 if (t3_encap(qs, &m_head) || m_head == NULL) 1660 break; 1661 1662 m_head = NULL; 1663 } 1664 1665 if (txq->db_pending) 1666 check_ring_tx_db(pi->adapter, txq, 1); 1667 1668 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && 1669 pi->link_config.link_ok) 1670 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1671 qs, txq->txq_timer.c_cpu); 1672 if (m_head != NULL) 1673 m_freem(m_head); 1674 } 1675 1676 static int 1677 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m) 1678 { 1679 struct port_info *pi = qs->port; 1680 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1681 struct buf_ring *br = txq->txq_mr; 1682 int error, avail; 1683 1684 avail = txq->size - txq->in_use; 1685 TXQ_LOCK_ASSERT(qs); 1686 1687 /* 1688 * We can only do a direct transmit if the following are true: 1689 * - we aren't coalescing (ring < 3/4 full) 1690 * - the link is up -- checked in caller 1691 * - there are no packets enqueued already 1692 * - there is space in hardware transmit queue 1693 */ 1694 if (check_pkt_coalesce(qs) == 0 && 1695 !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) { 1696 if (t3_encap(qs, &m)) { 1697 if (m != NULL && 1698 (error = drbr_enqueue(ifp, br, m)) != 0) 1699 return (error); 1700 } else { 1701 if (txq->db_pending) 1702 check_ring_tx_db(pi->adapter, txq, 1); 1703 1704 /* 1705 * We've bypassed the buf ring so we need to update 1706 * the stats directly 1707 */ 1708 txq->txq_direct_packets++; 1709 txq->txq_direct_bytes += m->m_pkthdr.len; 1710 } 1711 } else if ((error = drbr_enqueue(ifp, br, m)) != 0) 1712 return (error); 1713 1714 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1715 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && 1716 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) 1717 cxgb_start_locked(qs); 1718 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) 1719 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1720 qs, txq->txq_timer.c_cpu); 1721 return (0); 1722 } 1723 1724 int 1725 cxgb_transmit(struct ifnet *ifp, struct mbuf *m) 1726 { 1727 struct sge_qset *qs; 1728 struct port_info *pi = ifp->if_softc; 1729 int error, qidx = pi->first_qset; 1730 1731 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 1732 ||(!pi->link_config.link_ok)) { 1733 m_freem(m); 1734 return (0); 1735 } 1736 1737 /* check if flowid is set */ 1738 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 1739 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset; 1740 1741 qs = &pi->adapter->sge.qs[qidx]; 1742 1743 if (TXQ_TRYLOCK(qs)) { 1744 /* XXX running */ 1745 error = cxgb_transmit_locked(ifp, qs, m); 1746 TXQ_UNLOCK(qs); 1747 } else 1748 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); 1749 return (error); 1750 } 1751 1752 void 1753 cxgb_qflush(struct ifnet *ifp) 1754 { 1755 /* 1756 * flush any enqueued mbufs in the buf_rings 1757 * and in the transmit queues 1758 * no-op for now 1759 */ 1760 return; 1761 } 1762 1763 /** 1764 * write_imm - write a packet into a Tx descriptor as immediate data 1765 * @d: the Tx descriptor to write 1766 * @m: the packet 1767 * @len: the length of packet data to write as immediate data 1768 * @gen: the generation bit value to write 1769 * 1770 * Writes a packet as immediate data into a Tx descriptor. The packet 1771 * contains a work request at its beginning. We must write the packet 1772 * carefully so the SGE doesn't read accidentally before it's written in 1773 * its entirety. 1774 */ 1775 static __inline void 1776 write_imm(struct tx_desc *d, caddr_t src, 1777 unsigned int len, unsigned int gen) 1778 { 1779 struct work_request_hdr *from = (struct work_request_hdr *)src; 1780 struct work_request_hdr *to = (struct work_request_hdr *)d; 1781 uint32_t wr_hi, wr_lo; 1782 1783 KASSERT(len <= WR_LEN && len >= sizeof(*from), 1784 ("%s: invalid len %d", __func__, len)); 1785 1786 memcpy(&to[1], &from[1], len - sizeof(*from)); 1787 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | 1788 V_WR_BCNTLFLT(len & 7)); 1789 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8)); 1790 set_wr_hdr(to, wr_hi, wr_lo); 1791 wmb(); 1792 wr_gen2(d, gen); 1793 } 1794 1795 /** 1796 * check_desc_avail - check descriptor availability on a send queue 1797 * @adap: the adapter 1798 * @q: the TX queue 1799 * @m: the packet needing the descriptors 1800 * @ndesc: the number of Tx descriptors needed 1801 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1802 * 1803 * Checks if the requested number of Tx descriptors is available on an 1804 * SGE send queue. If the queue is already suspended or not enough 1805 * descriptors are available the packet is queued for later transmission. 1806 * Must be called with the Tx queue locked. 1807 * 1808 * Returns 0 if enough descriptors are available, 1 if there aren't 1809 * enough descriptors and the packet has been queued, and 2 if the caller 1810 * needs to retry because there weren't enough descriptors at the 1811 * beginning of the call but some freed up in the mean time. 1812 */ 1813 static __inline int 1814 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1815 struct mbuf *m, unsigned int ndesc, 1816 unsigned int qid) 1817 { 1818 /* 1819 * XXX We currently only use this for checking the control queue 1820 * the control queue is only used for binding qsets which happens 1821 * at init time so we are guaranteed enough descriptors 1822 */ 1823 if (__predict_false(!mbufq_empty(&q->sendq))) { 1824 addq_exit: mbufq_tail(&q->sendq, m); 1825 return 1; 1826 } 1827 if (__predict_false(q->size - q->in_use < ndesc)) { 1828 1829 struct sge_qset *qs = txq_to_qset(q, qid); 1830 1831 setbit(&qs->txq_stopped, qid); 1832 if (should_restart_tx(q) && 1833 test_and_clear_bit(qid, &qs->txq_stopped)) 1834 return 2; 1835 1836 q->stops++; 1837 goto addq_exit; 1838 } 1839 return 0; 1840 } 1841 1842 1843 /** 1844 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1845 * @q: the SGE control Tx queue 1846 * 1847 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1848 * that send only immediate data (presently just the control queues) and 1849 * thus do not have any mbufs 1850 */ 1851 static __inline void 1852 reclaim_completed_tx_imm(struct sge_txq *q) 1853 { 1854 unsigned int reclaim = q->processed - q->cleaned; 1855 1856 q->in_use -= reclaim; 1857 q->cleaned += reclaim; 1858 } 1859 1860 /** 1861 * ctrl_xmit - send a packet through an SGE control Tx queue 1862 * @adap: the adapter 1863 * @q: the control queue 1864 * @m: the packet 1865 * 1866 * Send a packet through an SGE control Tx queue. Packets sent through 1867 * a control queue must fit entirely as immediate data in a single Tx 1868 * descriptor and have no page fragments. 1869 */ 1870 static int 1871 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 1872 { 1873 int ret; 1874 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1875 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1876 1877 KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__)); 1878 1879 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP); 1880 wrp->wrh_lo = htonl(V_WR_TID(q->token)); 1881 1882 TXQ_LOCK(qs); 1883 again: reclaim_completed_tx_imm(q); 1884 1885 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1886 if (__predict_false(ret)) { 1887 if (ret == 1) { 1888 TXQ_UNLOCK(qs); 1889 return (ENOSPC); 1890 } 1891 goto again; 1892 } 1893 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen); 1894 1895 q->in_use++; 1896 if (++q->pidx >= q->size) { 1897 q->pidx = 0; 1898 q->gen ^= 1; 1899 } 1900 TXQ_UNLOCK(qs); 1901 wmb(); 1902 t3_write_reg(adap, A_SG_KDOORBELL, 1903 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1904 1905 m_free(m); 1906 return (0); 1907 } 1908 1909 1910 /** 1911 * restart_ctrlq - restart a suspended control queue 1912 * @qs: the queue set cotaining the control queue 1913 * 1914 * Resumes transmission on a suspended Tx control queue. 1915 */ 1916 static void 1917 restart_ctrlq(void *data, int npending) 1918 { 1919 struct mbuf *m; 1920 struct sge_qset *qs = (struct sge_qset *)data; 1921 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1922 adapter_t *adap = qs->port->adapter; 1923 1924 TXQ_LOCK(qs); 1925 again: reclaim_completed_tx_imm(q); 1926 1927 while (q->in_use < q->size && 1928 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1929 1930 write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen); 1931 m_free(m); 1932 1933 if (++q->pidx >= q->size) { 1934 q->pidx = 0; 1935 q->gen ^= 1; 1936 } 1937 q->in_use++; 1938 } 1939 if (!mbufq_empty(&q->sendq)) { 1940 setbit(&qs->txq_stopped, TXQ_CTRL); 1941 1942 if (should_restart_tx(q) && 1943 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1944 goto again; 1945 q->stops++; 1946 } 1947 TXQ_UNLOCK(qs); 1948 t3_write_reg(adap, A_SG_KDOORBELL, 1949 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1950 } 1951 1952 1953 /* 1954 * Send a management message through control queue 0 1955 */ 1956 int 1957 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1958 { 1959 return ctrl_xmit(adap, &adap->sge.qs[0], m); 1960 } 1961 1962 /** 1963 * free_qset - free the resources of an SGE queue set 1964 * @sc: the controller owning the queue set 1965 * @q: the queue set 1966 * 1967 * Release the HW and SW resources associated with an SGE queue set, such 1968 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 1969 * queue set must be quiesced prior to calling this. 1970 */ 1971 static void 1972 t3_free_qset(adapter_t *sc, struct sge_qset *q) 1973 { 1974 int i; 1975 1976 reclaim_completed_tx(q, 0, TXQ_ETH); 1977 if (q->txq[TXQ_ETH].txq_mr != NULL) 1978 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF); 1979 if (q->txq[TXQ_ETH].txq_ifq != NULL) { 1980 ifq_delete(q->txq[TXQ_ETH].txq_ifq); 1981 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF); 1982 } 1983 1984 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 1985 if (q->fl[i].desc) { 1986 mtx_lock_spin(&sc->sge.reg_lock); 1987 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 1988 mtx_unlock_spin(&sc->sge.reg_lock); 1989 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 1990 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 1991 q->fl[i].desc_map); 1992 bus_dma_tag_destroy(q->fl[i].desc_tag); 1993 bus_dma_tag_destroy(q->fl[i].entry_tag); 1994 } 1995 if (q->fl[i].sdesc) { 1996 free_rx_bufs(sc, &q->fl[i]); 1997 free(q->fl[i].sdesc, M_DEVBUF); 1998 } 1999 } 2000 2001 mtx_unlock(&q->lock); 2002 MTX_DESTROY(&q->lock); 2003 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2004 if (q->txq[i].desc) { 2005 mtx_lock_spin(&sc->sge.reg_lock); 2006 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 2007 mtx_unlock_spin(&sc->sge.reg_lock); 2008 bus_dmamap_unload(q->txq[i].desc_tag, 2009 q->txq[i].desc_map); 2010 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 2011 q->txq[i].desc_map); 2012 bus_dma_tag_destroy(q->txq[i].desc_tag); 2013 bus_dma_tag_destroy(q->txq[i].entry_tag); 2014 } 2015 if (q->txq[i].sdesc) { 2016 free(q->txq[i].sdesc, M_DEVBUF); 2017 } 2018 } 2019 2020 if (q->rspq.desc) { 2021 mtx_lock_spin(&sc->sge.reg_lock); 2022 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 2023 mtx_unlock_spin(&sc->sge.reg_lock); 2024 2025 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 2026 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 2027 q->rspq.desc_map); 2028 bus_dma_tag_destroy(q->rspq.desc_tag); 2029 MTX_DESTROY(&q->rspq.lock); 2030 } 2031 2032 #if defined(INET6) || defined(INET) 2033 tcp_lro_free(&q->lro.ctrl); 2034 #endif 2035 2036 bzero(q, sizeof(*q)); 2037 } 2038 2039 /** 2040 * t3_free_sge_resources - free SGE resources 2041 * @sc: the adapter softc 2042 * 2043 * Frees resources used by the SGE queue sets. 2044 */ 2045 void 2046 t3_free_sge_resources(adapter_t *sc, int nqsets) 2047 { 2048 int i; 2049 2050 for (i = 0; i < nqsets; ++i) { 2051 TXQ_LOCK(&sc->sge.qs[i]); 2052 t3_free_qset(sc, &sc->sge.qs[i]); 2053 } 2054 } 2055 2056 /** 2057 * t3_sge_start - enable SGE 2058 * @sc: the controller softc 2059 * 2060 * Enables the SGE for DMAs. This is the last step in starting packet 2061 * transfers. 2062 */ 2063 void 2064 t3_sge_start(adapter_t *sc) 2065 { 2066 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 2067 } 2068 2069 /** 2070 * t3_sge_stop - disable SGE operation 2071 * @sc: the adapter 2072 * 2073 * Disables the DMA engine. This can be called in emeregencies (e.g., 2074 * from error interrupts) or from normal process context. In the latter 2075 * case it also disables any pending queue restart tasklets. Note that 2076 * if it is called in interrupt context it cannot disable the restart 2077 * tasklets as it cannot wait, however the tasklets will have no effect 2078 * since the doorbells are disabled and the driver will call this again 2079 * later from process context, at which time the tasklets will be stopped 2080 * if they are still running. 2081 */ 2082 void 2083 t3_sge_stop(adapter_t *sc) 2084 { 2085 int i, nqsets; 2086 2087 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 2088 2089 if (sc->tq == NULL) 2090 return; 2091 2092 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2093 nqsets += sc->port[i].nqsets; 2094 #ifdef notyet 2095 /* 2096 * 2097 * XXX 2098 */ 2099 for (i = 0; i < nqsets; ++i) { 2100 struct sge_qset *qs = &sc->sge.qs[i]; 2101 2102 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2103 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2104 } 2105 #endif 2106 } 2107 2108 /** 2109 * t3_free_tx_desc - reclaims Tx descriptors and their buffers 2110 * @adapter: the adapter 2111 * @q: the Tx queue to reclaim descriptors from 2112 * @reclaimable: the number of descriptors to reclaim 2113 * @m_vec_size: maximum number of buffers to reclaim 2114 * @desc_reclaimed: returns the number of descriptors reclaimed 2115 * 2116 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 2117 * Tx buffers. Called with the Tx queue lock held. 2118 * 2119 * Returns number of buffers of reclaimed 2120 */ 2121 void 2122 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue) 2123 { 2124 struct tx_sw_desc *txsd; 2125 unsigned int cidx, mask; 2126 struct sge_txq *q = &qs->txq[queue]; 2127 2128 #ifdef T3_TRACE 2129 T3_TRACE2(sc->tb[q->cntxt_id & 7], 2130 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); 2131 #endif 2132 cidx = q->cidx; 2133 mask = q->size - 1; 2134 txsd = &q->sdesc[cidx]; 2135 2136 mtx_assert(&qs->lock, MA_OWNED); 2137 while (reclaimable--) { 2138 prefetch(q->sdesc[(cidx + 1) & mask].m); 2139 prefetch(q->sdesc[(cidx + 2) & mask].m); 2140 2141 if (txsd->m != NULL) { 2142 if (txsd->flags & TX_SW_DESC_MAPPED) { 2143 bus_dmamap_unload(q->entry_tag, txsd->map); 2144 txsd->flags &= ~TX_SW_DESC_MAPPED; 2145 } 2146 m_freem_list(txsd->m); 2147 txsd->m = NULL; 2148 } else 2149 q->txq_skipped++; 2150 2151 ++txsd; 2152 if (++cidx == q->size) { 2153 cidx = 0; 2154 txsd = q->sdesc; 2155 } 2156 } 2157 q->cidx = cidx; 2158 2159 } 2160 2161 /** 2162 * is_new_response - check if a response is newly written 2163 * @r: the response descriptor 2164 * @q: the response queue 2165 * 2166 * Returns true if a response descriptor contains a yet unprocessed 2167 * response. 2168 */ 2169 static __inline int 2170 is_new_response(const struct rsp_desc *r, 2171 const struct sge_rspq *q) 2172 { 2173 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 2174 } 2175 2176 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 2177 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 2178 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 2179 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 2180 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 2181 2182 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 2183 #define NOMEM_INTR_DELAY 2500 2184 2185 #ifdef TCP_OFFLOAD 2186 /** 2187 * write_ofld_wr - write an offload work request 2188 * @adap: the adapter 2189 * @m: the packet to send 2190 * @q: the Tx queue 2191 * @pidx: index of the first Tx descriptor to write 2192 * @gen: the generation value to use 2193 * @ndesc: number of descriptors the packet will occupy 2194 * 2195 * Write an offload work request to send the supplied packet. The packet 2196 * data already carry the work request with most fields populated. 2197 */ 2198 static void 2199 write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q, 2200 unsigned int pidx, unsigned int gen, unsigned int ndesc) 2201 { 2202 unsigned int sgl_flits, flits; 2203 int i, idx, nsegs, wrlen; 2204 struct work_request_hdr *from; 2205 struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1]; 2206 struct tx_desc *d = &q->desc[pidx]; 2207 struct txq_state txqs; 2208 struct sglist_seg *segs; 2209 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); 2210 struct sglist *sgl; 2211 2212 from = (void *)(oh + 1); /* Start of WR within mbuf */ 2213 wrlen = m->m_len - sizeof(*oh); 2214 2215 if (!(oh->flags & F_HDR_SGL)) { 2216 write_imm(d, (caddr_t)from, wrlen, gen); 2217 2218 /* 2219 * mbuf with "real" immediate tx data will be enqueue_wr'd by 2220 * t3_push_frames and freed in wr_ack. Others, like those sent 2221 * down by close_conn, t3_send_reset, etc. should be freed here. 2222 */ 2223 if (!(oh->flags & F_HDR_DF)) 2224 m_free(m); 2225 return; 2226 } 2227 2228 memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from)); 2229 2230 sgl = oh->sgl; 2231 flits = wrlen / 8; 2232 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl; 2233 2234 nsegs = sgl->sg_nseg; 2235 segs = sgl->sg_segs; 2236 for (idx = 0, i = 0; i < nsegs; i++) { 2237 KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__)); 2238 if (i && idx == 0) 2239 ++sgp; 2240 sgp->len[idx] = htobe32(segs[i].ss_len); 2241 sgp->addr[idx] = htobe64(segs[i].ss_paddr); 2242 idx ^= 1; 2243 } 2244 if (idx) { 2245 sgp->len[idx] = 0; 2246 sgp->addr[idx] = 0; 2247 } 2248 2249 sgl_flits = sgl_len(nsegs); 2250 txqs.gen = gen; 2251 txqs.pidx = pidx; 2252 txqs.compl = 0; 2253 2254 write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits, 2255 from->wrh_hi, from->wrh_lo); 2256 } 2257 2258 /** 2259 * ofld_xmit - send a packet through an offload queue 2260 * @adap: the adapter 2261 * @q: the Tx offload queue 2262 * @m: the packet 2263 * 2264 * Send an offload packet through an SGE offload queue. 2265 */ 2266 static int 2267 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 2268 { 2269 int ret; 2270 unsigned int ndesc; 2271 unsigned int pidx, gen; 2272 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2273 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); 2274 2275 ndesc = G_HDR_NDESC(oh->flags); 2276 2277 TXQ_LOCK(qs); 2278 again: reclaim_completed_tx(qs, 16, TXQ_OFLD); 2279 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 2280 if (__predict_false(ret)) { 2281 if (ret == 1) { 2282 TXQ_UNLOCK(qs); 2283 return (EINTR); 2284 } 2285 goto again; 2286 } 2287 2288 gen = q->gen; 2289 q->in_use += ndesc; 2290 pidx = q->pidx; 2291 q->pidx += ndesc; 2292 if (q->pidx >= q->size) { 2293 q->pidx -= q->size; 2294 q->gen ^= 1; 2295 } 2296 2297 write_ofld_wr(adap, m, q, pidx, gen, ndesc); 2298 check_ring_tx_db(adap, q, 1); 2299 TXQ_UNLOCK(qs); 2300 2301 return (0); 2302 } 2303 2304 /** 2305 * restart_offloadq - restart a suspended offload queue 2306 * @qs: the queue set cotaining the offload queue 2307 * 2308 * Resumes transmission on a suspended Tx offload queue. 2309 */ 2310 static void 2311 restart_offloadq(void *data, int npending) 2312 { 2313 struct mbuf *m; 2314 struct sge_qset *qs = data; 2315 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2316 adapter_t *adap = qs->port->adapter; 2317 int cleaned; 2318 2319 TXQ_LOCK(qs); 2320 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD); 2321 2322 while ((m = mbufq_peek(&q->sendq)) != NULL) { 2323 unsigned int gen, pidx; 2324 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); 2325 unsigned int ndesc = G_HDR_NDESC(oh->flags); 2326 2327 if (__predict_false(q->size - q->in_use < ndesc)) { 2328 setbit(&qs->txq_stopped, TXQ_OFLD); 2329 if (should_restart_tx(q) && 2330 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 2331 goto again; 2332 q->stops++; 2333 break; 2334 } 2335 2336 gen = q->gen; 2337 q->in_use += ndesc; 2338 pidx = q->pidx; 2339 q->pidx += ndesc; 2340 if (q->pidx >= q->size) { 2341 q->pidx -= q->size; 2342 q->gen ^= 1; 2343 } 2344 2345 (void)mbufq_dequeue(&q->sendq); 2346 TXQ_UNLOCK(qs); 2347 write_ofld_wr(adap, m, q, pidx, gen, ndesc); 2348 TXQ_LOCK(qs); 2349 } 2350 #if USE_GTS 2351 set_bit(TXQ_RUNNING, &q->flags); 2352 set_bit(TXQ_LAST_PKT_DB, &q->flags); 2353 #endif 2354 TXQ_UNLOCK(qs); 2355 wmb(); 2356 t3_write_reg(adap, A_SG_KDOORBELL, 2357 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2358 } 2359 2360 /** 2361 * t3_offload_tx - send an offload packet 2362 * @m: the packet 2363 * 2364 * Sends an offload packet. We use the packet priority to select the 2365 * appropriate Tx queue as follows: bit 0 indicates whether the packet 2366 * should be sent as regular or control, bits 1-3 select the queue set. 2367 */ 2368 int 2369 t3_offload_tx(struct adapter *sc, struct mbuf *m) 2370 { 2371 struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); 2372 struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)]; 2373 2374 if (oh->flags & F_HDR_CTRL) { 2375 m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */ 2376 return (ctrl_xmit(sc, qs, m)); 2377 } else 2378 return (ofld_xmit(sc, qs, m)); 2379 } 2380 #endif 2381 2382 static void 2383 restart_tx(struct sge_qset *qs) 2384 { 2385 struct adapter *sc = qs->port->adapter; 2386 2387 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2388 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2389 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2390 qs->txq[TXQ_OFLD].restarts++; 2391 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2392 } 2393 2394 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2395 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2396 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2397 qs->txq[TXQ_CTRL].restarts++; 2398 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2399 } 2400 } 2401 2402 /** 2403 * t3_sge_alloc_qset - initialize an SGE queue set 2404 * @sc: the controller softc 2405 * @id: the queue set id 2406 * @nports: how many Ethernet ports will be using this queue set 2407 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2408 * @p: configuration parameters for this queue set 2409 * @ntxq: number of Tx queues for the queue set 2410 * @pi: port info for queue set 2411 * 2412 * Allocate resources and initialize an SGE queue set. A queue set 2413 * comprises a response queue, two Rx free-buffer queues, and up to 3 2414 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2415 * queue, offload queue, and control queue. 2416 */ 2417 int 2418 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2419 const struct qset_params *p, int ntxq, struct port_info *pi) 2420 { 2421 struct sge_qset *q = &sc->sge.qs[id]; 2422 int i, ret = 0; 2423 2424 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF); 2425 q->port = pi; 2426 q->adap = sc; 2427 2428 if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, 2429 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) { 2430 device_printf(sc->dev, "failed to allocate mbuf ring\n"); 2431 goto err; 2432 } 2433 if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF, 2434 M_NOWAIT | M_ZERO)) == NULL) { 2435 device_printf(sc->dev, "failed to allocate ifq\n"); 2436 goto err; 2437 } 2438 ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp); 2439 callout_init(&q->txq[TXQ_ETH].txq_timer, 1); 2440 callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1); 2441 q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus; 2442 q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus; 2443 2444 init_qset_cntxt(q, id); 2445 q->idx = id; 2446 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2447 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2448 &q->fl[0].desc, &q->fl[0].sdesc, 2449 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2450 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2451 printf("error %d from alloc ring fl0\n", ret); 2452 goto err; 2453 } 2454 2455 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2456 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2457 &q->fl[1].desc, &q->fl[1].sdesc, 2458 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2459 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2460 printf("error %d from alloc ring fl1\n", ret); 2461 goto err; 2462 } 2463 2464 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2465 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2466 &q->rspq.desc_tag, &q->rspq.desc_map, 2467 NULL, NULL)) != 0) { 2468 printf("error %d from alloc ring rspq\n", ret); 2469 goto err; 2470 } 2471 2472 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2473 device_get_unit(sc->dev), irq_vec_idx); 2474 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2475 2476 for (i = 0; i < ntxq; ++i) { 2477 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2478 2479 if ((ret = alloc_ring(sc, p->txq_size[i], 2480 sizeof(struct tx_desc), sz, 2481 &q->txq[i].phys_addr, &q->txq[i].desc, 2482 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2483 &q->txq[i].desc_map, 2484 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2485 printf("error %d from alloc ring tx %i\n", ret, i); 2486 goto err; 2487 } 2488 mbufq_init(&q->txq[i].sendq); 2489 q->txq[i].gen = 1; 2490 q->txq[i].size = p->txq_size[i]; 2491 } 2492 2493 #ifdef TCP_OFFLOAD 2494 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2495 #endif 2496 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2497 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2498 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2499 2500 q->fl[0].gen = q->fl[1].gen = 1; 2501 q->fl[0].size = p->fl_size; 2502 q->fl[1].size = p->jumbo_size; 2503 2504 q->rspq.gen = 1; 2505 q->rspq.cidx = 0; 2506 q->rspq.size = p->rspq_size; 2507 2508 q->txq[TXQ_ETH].stop_thres = nports * 2509 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2510 2511 q->fl[0].buf_size = MCLBYTES; 2512 q->fl[0].zone = zone_pack; 2513 q->fl[0].type = EXT_PACKET; 2514 2515 if (p->jumbo_buf_size == MJUM16BYTES) { 2516 q->fl[1].zone = zone_jumbo16; 2517 q->fl[1].type = EXT_JUMBO16; 2518 } else if (p->jumbo_buf_size == MJUM9BYTES) { 2519 q->fl[1].zone = zone_jumbo9; 2520 q->fl[1].type = EXT_JUMBO9; 2521 } else if (p->jumbo_buf_size == MJUMPAGESIZE) { 2522 q->fl[1].zone = zone_jumbop; 2523 q->fl[1].type = EXT_JUMBOP; 2524 } else { 2525 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size)); 2526 ret = EDOOFUS; 2527 goto err; 2528 } 2529 q->fl[1].buf_size = p->jumbo_buf_size; 2530 2531 /* Allocate and setup the lro_ctrl structure */ 2532 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); 2533 #if defined(INET6) || defined(INET) 2534 ret = tcp_lro_init(&q->lro.ctrl); 2535 if (ret) { 2536 printf("error %d from tcp_lro_init\n", ret); 2537 goto err; 2538 } 2539 #endif 2540 q->lro.ctrl.ifp = pi->ifp; 2541 2542 mtx_lock_spin(&sc->sge.reg_lock); 2543 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2544 q->rspq.phys_addr, q->rspq.size, 2545 q->fl[0].buf_size, 1, 0); 2546 if (ret) { 2547 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2548 goto err_unlock; 2549 } 2550 2551 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2552 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2553 q->fl[i].phys_addr, q->fl[i].size, 2554 q->fl[i].buf_size, p->cong_thres, 1, 2555 0); 2556 if (ret) { 2557 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2558 goto err_unlock; 2559 } 2560 } 2561 2562 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2563 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2564 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2565 1, 0); 2566 if (ret) { 2567 printf("error %d from t3_sge_init_ecntxt\n", ret); 2568 goto err_unlock; 2569 } 2570 2571 if (ntxq > 1) { 2572 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2573 USE_GTS, SGE_CNTXT_OFLD, id, 2574 q->txq[TXQ_OFLD].phys_addr, 2575 q->txq[TXQ_OFLD].size, 0, 1, 0); 2576 if (ret) { 2577 printf("error %d from t3_sge_init_ecntxt\n", ret); 2578 goto err_unlock; 2579 } 2580 } 2581 2582 if (ntxq > 2) { 2583 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2584 SGE_CNTXT_CTRL, id, 2585 q->txq[TXQ_CTRL].phys_addr, 2586 q->txq[TXQ_CTRL].size, 2587 q->txq[TXQ_CTRL].token, 1, 0); 2588 if (ret) { 2589 printf("error %d from t3_sge_init_ecntxt\n", ret); 2590 goto err_unlock; 2591 } 2592 } 2593 2594 mtx_unlock_spin(&sc->sge.reg_lock); 2595 t3_update_qset_coalesce(q, p); 2596 2597 refill_fl(sc, &q->fl[0], q->fl[0].size); 2598 refill_fl(sc, &q->fl[1], q->fl[1].size); 2599 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2600 2601 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2602 V_NEWTIMER(q->rspq.holdoff_tmr)); 2603 2604 return (0); 2605 2606 err_unlock: 2607 mtx_unlock_spin(&sc->sge.reg_lock); 2608 err: 2609 TXQ_LOCK(q); 2610 t3_free_qset(sc, q); 2611 2612 return (ret); 2613 } 2614 2615 /* 2616 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with 2617 * ethernet data. Hardware assistance with various checksums and any vlan tag 2618 * will also be taken into account here. 2619 */ 2620 void 2621 t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad) 2622 { 2623 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2624 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2625 struct ifnet *ifp = pi->ifp; 2626 2627 if (cpl->vlan_valid) { 2628 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2629 m->m_flags |= M_VLANTAG; 2630 } 2631 2632 m->m_pkthdr.rcvif = ifp; 2633 /* 2634 * adjust after conversion to mbuf chain 2635 */ 2636 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); 2637 m->m_len -= (sizeof(*cpl) + ethpad); 2638 m->m_data += (sizeof(*cpl) + ethpad); 2639 2640 if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) { 2641 struct ether_header *eh = mtod(m, void *); 2642 uint16_t eh_type; 2643 2644 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2645 struct ether_vlan_header *evh = mtod(m, void *); 2646 2647 eh_type = evh->evl_proto; 2648 } else 2649 eh_type = eh->ether_type; 2650 2651 if (ifp->if_capenable & IFCAP_RXCSUM && 2652 eh_type == htons(ETHERTYPE_IP)) { 2653 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | 2654 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 2655 m->m_pkthdr.csum_data = 0xffff; 2656 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && 2657 eh_type == htons(ETHERTYPE_IPV6)) { 2658 m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 | 2659 CSUM_PSEUDO_HDR); 2660 m->m_pkthdr.csum_data = 0xffff; 2661 } 2662 } 2663 } 2664 2665 /** 2666 * get_packet - return the next ingress packet buffer from a free list 2667 * @adap: the adapter that received the packet 2668 * @drop_thres: # of remaining buffers before we start dropping packets 2669 * @qs: the qset that the SGE free list holding the packet belongs to 2670 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2671 * @r: response descriptor 2672 * 2673 * Get the next packet from a free list and complete setup of the 2674 * sk_buff. If the packet is small we make a copy and recycle the 2675 * original buffer, otherwise we use the original buffer itself. If a 2676 * positive drop threshold is supplied packets are dropped and their 2677 * buffers recycled if (a) the number of remaining buffers is under the 2678 * threshold and the packet is too big to copy, or (b) the packet should 2679 * be copied but there is no memory for the copy. 2680 */ 2681 static int 2682 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2683 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 2684 { 2685 2686 unsigned int len_cq = ntohl(r->len_cq); 2687 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2688 int mask, cidx = fl->cidx; 2689 struct rx_sw_desc *sd = &fl->sdesc[cidx]; 2690 uint32_t len = G_RSPD_LEN(len_cq); 2691 uint32_t flags = M_EXT; 2692 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags)); 2693 caddr_t cl; 2694 struct mbuf *m; 2695 int ret = 0; 2696 2697 mask = fl->size - 1; 2698 prefetch(fl->sdesc[(cidx + 1) & mask].m); 2699 prefetch(fl->sdesc[(cidx + 2) & mask].m); 2700 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); 2701 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 2702 2703 fl->credits--; 2704 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2705 2706 if (recycle_enable && len <= SGE_RX_COPY_THRES && 2707 sopeop == RSPQ_SOP_EOP) { 2708 if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) 2709 goto skip_recycle; 2710 cl = mtod(m, void *); 2711 memcpy(cl, sd->rxsd_cl, len); 2712 recycle_rx_buf(adap, fl, fl->cidx); 2713 m->m_pkthdr.len = m->m_len = len; 2714 m->m_flags = 0; 2715 mh->mh_head = mh->mh_tail = m; 2716 ret = 1; 2717 goto done; 2718 } else { 2719 skip_recycle: 2720 bus_dmamap_unload(fl->entry_tag, sd->map); 2721 cl = sd->rxsd_cl; 2722 m = sd->m; 2723 2724 if ((sopeop == RSPQ_SOP_EOP) || 2725 (sopeop == RSPQ_SOP)) 2726 flags |= M_PKTHDR; 2727 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags); 2728 if (fl->zone == zone_pack) { 2729 /* 2730 * restore clobbered data pointer 2731 */ 2732 m->m_data = m->m_ext.ext_buf; 2733 } else { 2734 m_cljset(m, cl, fl->type); 2735 } 2736 m->m_len = len; 2737 } 2738 switch(sopeop) { 2739 case RSPQ_SOP_EOP: 2740 ret = 1; 2741 /* FALLTHROUGH */ 2742 case RSPQ_SOP: 2743 mh->mh_head = mh->mh_tail = m; 2744 m->m_pkthdr.len = len; 2745 break; 2746 case RSPQ_EOP: 2747 ret = 1; 2748 /* FALLTHROUGH */ 2749 case RSPQ_NSOP_NEOP: 2750 if (mh->mh_tail == NULL) { 2751 log(LOG_ERR, "discarding intermediate descriptor entry\n"); 2752 m_freem(m); 2753 break; 2754 } 2755 mh->mh_tail->m_next = m; 2756 mh->mh_tail = m; 2757 mh->mh_head->m_pkthdr.len += len; 2758 break; 2759 } 2760 if (cxgb_debug) 2761 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len); 2762 done: 2763 if (++fl->cidx == fl->size) 2764 fl->cidx = 0; 2765 2766 return (ret); 2767 } 2768 2769 /** 2770 * handle_rsp_cntrl_info - handles control information in a response 2771 * @qs: the queue set corresponding to the response 2772 * @flags: the response control flags 2773 * 2774 * Handles the control information of an SGE response, such as GTS 2775 * indications and completion credits for the queue set's Tx queues. 2776 * HW coalesces credits, we don't do any extra SW coalescing. 2777 */ 2778 static __inline void 2779 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2780 { 2781 unsigned int credits; 2782 2783 #if USE_GTS 2784 if (flags & F_RSPD_TXQ0_GTS) 2785 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2786 #endif 2787 credits = G_RSPD_TXQ0_CR(flags); 2788 if (credits) 2789 qs->txq[TXQ_ETH].processed += credits; 2790 2791 credits = G_RSPD_TXQ2_CR(flags); 2792 if (credits) 2793 qs->txq[TXQ_CTRL].processed += credits; 2794 2795 # if USE_GTS 2796 if (flags & F_RSPD_TXQ1_GTS) 2797 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2798 # endif 2799 credits = G_RSPD_TXQ1_CR(flags); 2800 if (credits) 2801 qs->txq[TXQ_OFLD].processed += credits; 2802 2803 } 2804 2805 static void 2806 check_ring_db(adapter_t *adap, struct sge_qset *qs, 2807 unsigned int sleeping) 2808 { 2809 ; 2810 } 2811 2812 /** 2813 * process_responses - process responses from an SGE response queue 2814 * @adap: the adapter 2815 * @qs: the queue set to which the response queue belongs 2816 * @budget: how many responses can be processed in this round 2817 * 2818 * Process responses from an SGE response queue up to the supplied budget. 2819 * Responses include received packets as well as credits and other events 2820 * for the queues that belong to the response queue's queue set. 2821 * A negative budget is effectively unlimited. 2822 * 2823 * Additionally choose the interrupt holdoff time for the next interrupt 2824 * on this queue. If the system is under memory shortage use a fairly 2825 * long delay to help recovery. 2826 */ 2827 static int 2828 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2829 { 2830 struct sge_rspq *rspq = &qs->rspq; 2831 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2832 int budget_left = budget; 2833 unsigned int sleeping = 0; 2834 #if defined(INET6) || defined(INET) 2835 int lro_enabled = qs->lro.enabled; 2836 int skip_lro; 2837 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; 2838 #endif 2839 struct t3_mbuf_hdr *mh = &rspq->rspq_mh; 2840 #ifdef DEBUG 2841 static int last_holdoff = 0; 2842 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2843 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2844 last_holdoff = rspq->holdoff_tmr; 2845 } 2846 #endif 2847 rspq->next_holdoff = rspq->holdoff_tmr; 2848 2849 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2850 int eth, eop = 0, ethpad = 0; 2851 uint32_t flags = ntohl(r->flags); 2852 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); 2853 uint8_t opcode = r->rss_hdr.opcode; 2854 2855 eth = (opcode == CPL_RX_PKT); 2856 2857 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2858 struct mbuf *m; 2859 2860 if (cxgb_debug) 2861 printf("async notification\n"); 2862 2863 if (mh->mh_head == NULL) { 2864 mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA); 2865 m = mh->mh_head; 2866 } else { 2867 m = m_gethdr(M_NOWAIT, MT_DATA); 2868 } 2869 if (m == NULL) 2870 goto no_mem; 2871 2872 memcpy(mtod(m, char *), r, AN_PKT_SIZE); 2873 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; 2874 *mtod(m, char *) = CPL_ASYNC_NOTIF; 2875 opcode = CPL_ASYNC_NOTIF; 2876 eop = 1; 2877 rspq->async_notif++; 2878 goto skip; 2879 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2880 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); 2881 2882 if (m == NULL) { 2883 no_mem: 2884 rspq->next_holdoff = NOMEM_INTR_DELAY; 2885 budget_left--; 2886 break; 2887 } 2888 if (mh->mh_head == NULL) 2889 mh->mh_head = m; 2890 else 2891 mh->mh_tail->m_next = m; 2892 mh->mh_tail = m; 2893 2894 get_imm_packet(adap, r, m); 2895 mh->mh_head->m_pkthdr.len += m->m_len; 2896 eop = 1; 2897 rspq->imm_data++; 2898 } else if (r->len_cq) { 2899 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 2900 2901 eop = get_packet(adap, drop_thresh, qs, mh, r); 2902 if (eop) { 2903 if (r->rss_hdr.hash_type && !adap->timestamp) { 2904 M_HASHTYPE_SET(mh->mh_head, M_HASHTYPE_OPAQUE); 2905 mh->mh_head->m_pkthdr.flowid = rss_hash; 2906 } 2907 } 2908 2909 ethpad = 2; 2910 } else { 2911 rspq->pure_rsps++; 2912 } 2913 skip: 2914 if (flags & RSPD_CTRL_MASK) { 2915 sleeping |= flags & RSPD_GTS_MASK; 2916 handle_rsp_cntrl_info(qs, flags); 2917 } 2918 2919 if (!eth && eop) { 2920 rspq->offload_pkts++; 2921 #ifdef TCP_OFFLOAD 2922 adap->cpl_handler[opcode](qs, r, mh->mh_head); 2923 #else 2924 m_freem(mh->mh_head); 2925 #endif 2926 mh->mh_head = NULL; 2927 } else if (eth && eop) { 2928 struct mbuf *m = mh->mh_head; 2929 2930 t3_rx_eth(adap, m, ethpad); 2931 2932 /* 2933 * The T304 sends incoming packets on any qset. If LRO 2934 * is also enabled, we could end up sending packet up 2935 * lro_ctrl->ifp's input. That is incorrect. 2936 * 2937 * The mbuf's rcvif was derived from the cpl header and 2938 * is accurate. Skip LRO and just use that. 2939 */ 2940 #if defined(INET6) || defined(INET) 2941 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); 2942 2943 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro 2944 && (tcp_lro_rx(lro_ctrl, m, 0) == 0) 2945 ) { 2946 /* successfully queue'd for LRO */ 2947 } else 2948 #endif 2949 { 2950 /* 2951 * LRO not enabled, packet unsuitable for LRO, 2952 * or unable to queue. Pass it up right now in 2953 * either case. 2954 */ 2955 struct ifnet *ifp = m->m_pkthdr.rcvif; 2956 (*ifp->if_input)(ifp, m); 2957 } 2958 mh->mh_head = NULL; 2959 2960 } 2961 2962 r++; 2963 if (__predict_false(++rspq->cidx == rspq->size)) { 2964 rspq->cidx = 0; 2965 rspq->gen ^= 1; 2966 r = rspq->desc; 2967 } 2968 2969 if (++rspq->credits >= 64) { 2970 refill_rspq(adap, rspq, rspq->credits); 2971 rspq->credits = 0; 2972 } 2973 __refill_fl_lt(adap, &qs->fl[0], 32); 2974 __refill_fl_lt(adap, &qs->fl[1], 32); 2975 --budget_left; 2976 } 2977 2978 #if defined(INET6) || defined(INET) 2979 /* Flush LRO */ 2980 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) { 2981 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active); 2982 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next); 2983 tcp_lro_flush(lro_ctrl, queued); 2984 } 2985 #endif 2986 2987 if (sleeping) 2988 check_ring_db(adap, qs, sleeping); 2989 2990 mb(); /* commit Tx queue processed updates */ 2991 if (__predict_false(qs->txq_stopped > 1)) 2992 restart_tx(qs); 2993 2994 __refill_fl_lt(adap, &qs->fl[0], 512); 2995 __refill_fl_lt(adap, &qs->fl[1], 512); 2996 budget -= budget_left; 2997 return (budget); 2998 } 2999 3000 /* 3001 * A helper function that processes responses and issues GTS. 3002 */ 3003 static __inline int 3004 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 3005 { 3006 int work; 3007 static int last_holdoff = 0; 3008 3009 work = process_responses(adap, rspq_to_qset(rq), -1); 3010 3011 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 3012 printf("next_holdoff=%d\n", rq->next_holdoff); 3013 last_holdoff = rq->next_holdoff; 3014 } 3015 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 3016 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 3017 3018 return (work); 3019 } 3020 3021 3022 /* 3023 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 3024 * Handles data events from SGE response queues as well as error and other 3025 * async events as they all use the same interrupt pin. We use one SGE 3026 * response queue per port in this mode and protect all response queues with 3027 * queue 0's lock. 3028 */ 3029 void 3030 t3b_intr(void *data) 3031 { 3032 uint32_t i, map; 3033 adapter_t *adap = data; 3034 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3035 3036 t3_write_reg(adap, A_PL_CLI, 0); 3037 map = t3_read_reg(adap, A_SG_DATA_INTR); 3038 3039 if (!map) 3040 return; 3041 3042 if (__predict_false(map & F_ERRINTR)) { 3043 t3_write_reg(adap, A_PL_INT_ENABLE0, 0); 3044 (void) t3_read_reg(adap, A_PL_INT_ENABLE0); 3045 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3046 } 3047 3048 mtx_lock(&q0->lock); 3049 for_each_port(adap, i) 3050 if (map & (1 << i)) 3051 process_responses_gts(adap, &adap->sge.qs[i].rspq); 3052 mtx_unlock(&q0->lock); 3053 } 3054 3055 /* 3056 * The MSI interrupt handler. This needs to handle data events from SGE 3057 * response queues as well as error and other async events as they all use 3058 * the same MSI vector. We use one SGE response queue per port in this mode 3059 * and protect all response queues with queue 0's lock. 3060 */ 3061 void 3062 t3_intr_msi(void *data) 3063 { 3064 adapter_t *adap = data; 3065 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3066 int i, new_packets = 0; 3067 3068 mtx_lock(&q0->lock); 3069 3070 for_each_port(adap, i) 3071 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 3072 new_packets = 1; 3073 mtx_unlock(&q0->lock); 3074 if (new_packets == 0) { 3075 t3_write_reg(adap, A_PL_INT_ENABLE0, 0); 3076 (void) t3_read_reg(adap, A_PL_INT_ENABLE0); 3077 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3078 } 3079 } 3080 3081 void 3082 t3_intr_msix(void *data) 3083 { 3084 struct sge_qset *qs = data; 3085 adapter_t *adap = qs->port->adapter; 3086 struct sge_rspq *rspq = &qs->rspq; 3087 3088 if (process_responses_gts(adap, rspq) == 0) 3089 rspq->unhandled_irqs++; 3090 } 3091 3092 #define QDUMP_SBUF_SIZE 32 * 400 3093 static int 3094 t3_dump_rspq(SYSCTL_HANDLER_ARGS) 3095 { 3096 struct sge_rspq *rspq; 3097 struct sge_qset *qs; 3098 int i, err, dump_end, idx; 3099 struct sbuf *sb; 3100 struct rsp_desc *rspd; 3101 uint32_t data[4]; 3102 3103 rspq = arg1; 3104 qs = rspq_to_qset(rspq); 3105 if (rspq->rspq_dump_count == 0) 3106 return (0); 3107 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { 3108 log(LOG_WARNING, 3109 "dump count is too large %d\n", rspq->rspq_dump_count); 3110 rspq->rspq_dump_count = 0; 3111 return (EINVAL); 3112 } 3113 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { 3114 log(LOG_WARNING, 3115 "dump start of %d is greater than queue size\n", 3116 rspq->rspq_dump_start); 3117 rspq->rspq_dump_start = 0; 3118 return (EINVAL); 3119 } 3120 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); 3121 if (err) 3122 return (err); 3123 err = sysctl_wire_old_buffer(req, 0); 3124 if (err) 3125 return (err); 3126 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3127 3128 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", 3129 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), 3130 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); 3131 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", 3132 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); 3133 3134 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, 3135 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); 3136 3137 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; 3138 for (i = rspq->rspq_dump_start; i < dump_end; i++) { 3139 idx = i & (RSPQ_Q_SIZE-1); 3140 3141 rspd = &rspq->desc[idx]; 3142 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", 3143 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, 3144 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); 3145 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", 3146 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), 3147 be32toh(rspd->len_cq), rspd->intr_gen); 3148 } 3149 3150 err = sbuf_finish(sb); 3151 /* Output a trailing NUL. */ 3152 if (err == 0) 3153 err = SYSCTL_OUT(req, "", 1); 3154 sbuf_delete(sb); 3155 return (err); 3156 } 3157 3158 static int 3159 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) 3160 { 3161 struct sge_txq *txq; 3162 struct sge_qset *qs; 3163 int i, j, err, dump_end; 3164 struct sbuf *sb; 3165 struct tx_desc *txd; 3166 uint32_t *WR, wr_hi, wr_lo, gen; 3167 uint32_t data[4]; 3168 3169 txq = arg1; 3170 qs = txq_to_qset(txq, TXQ_ETH); 3171 if (txq->txq_dump_count == 0) { 3172 return (0); 3173 } 3174 if (txq->txq_dump_count > TX_ETH_Q_SIZE) { 3175 log(LOG_WARNING, 3176 "dump count is too large %d\n", txq->txq_dump_count); 3177 txq->txq_dump_count = 1; 3178 return (EINVAL); 3179 } 3180 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { 3181 log(LOG_WARNING, 3182 "dump start of %d is greater than queue size\n", 3183 txq->txq_dump_start); 3184 txq->txq_dump_start = 0; 3185 return (EINVAL); 3186 } 3187 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); 3188 if (err) 3189 return (err); 3190 err = sysctl_wire_old_buffer(req, 0); 3191 if (err) 3192 return (err); 3193 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3194 3195 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", 3196 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 3197 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); 3198 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", 3199 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), 3200 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); 3201 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3202 txq->txq_dump_start, 3203 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); 3204 3205 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3206 for (i = txq->txq_dump_start; i < dump_end; i++) { 3207 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; 3208 WR = (uint32_t *)txd->flit; 3209 wr_hi = ntohl(WR[0]); 3210 wr_lo = ntohl(WR[1]); 3211 gen = G_WR_GEN(wr_lo); 3212 3213 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3214 wr_hi, wr_lo, gen); 3215 for (j = 2; j < 30; j += 4) 3216 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3217 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3218 3219 } 3220 err = sbuf_finish(sb); 3221 /* Output a trailing NUL. */ 3222 if (err == 0) 3223 err = SYSCTL_OUT(req, "", 1); 3224 sbuf_delete(sb); 3225 return (err); 3226 } 3227 3228 static int 3229 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) 3230 { 3231 struct sge_txq *txq; 3232 struct sge_qset *qs; 3233 int i, j, err, dump_end; 3234 struct sbuf *sb; 3235 struct tx_desc *txd; 3236 uint32_t *WR, wr_hi, wr_lo, gen; 3237 3238 txq = arg1; 3239 qs = txq_to_qset(txq, TXQ_CTRL); 3240 if (txq->txq_dump_count == 0) { 3241 return (0); 3242 } 3243 if (txq->txq_dump_count > 256) { 3244 log(LOG_WARNING, 3245 "dump count is too large %d\n", txq->txq_dump_count); 3246 txq->txq_dump_count = 1; 3247 return (EINVAL); 3248 } 3249 if (txq->txq_dump_start > 255) { 3250 log(LOG_WARNING, 3251 "dump start of %d is greater than queue size\n", 3252 txq->txq_dump_start); 3253 txq->txq_dump_start = 0; 3254 return (EINVAL); 3255 } 3256 3257 err = sysctl_wire_old_buffer(req, 0); 3258 if (err != 0) 3259 return (err); 3260 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3261 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3262 txq->txq_dump_start, 3263 (txq->txq_dump_start + txq->txq_dump_count) & 255); 3264 3265 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3266 for (i = txq->txq_dump_start; i < dump_end; i++) { 3267 txd = &txq->desc[i & (255)]; 3268 WR = (uint32_t *)txd->flit; 3269 wr_hi = ntohl(WR[0]); 3270 wr_lo = ntohl(WR[1]); 3271 gen = G_WR_GEN(wr_lo); 3272 3273 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3274 wr_hi, wr_lo, gen); 3275 for (j = 2; j < 30; j += 4) 3276 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3277 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3278 3279 } 3280 err = sbuf_finish(sb); 3281 /* Output a trailing NUL. */ 3282 if (err == 0) 3283 err = SYSCTL_OUT(req, "", 1); 3284 sbuf_delete(sb); 3285 return (err); 3286 } 3287 3288 static int 3289 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) 3290 { 3291 adapter_t *sc = arg1; 3292 struct qset_params *qsp = &sc->params.sge.qset[0]; 3293 int coalesce_usecs; 3294 struct sge_qset *qs; 3295 int i, j, err, nqsets = 0; 3296 struct mtx *lock; 3297 3298 if ((sc->flags & FULL_INIT_DONE) == 0) 3299 return (ENXIO); 3300 3301 coalesce_usecs = qsp->coalesce_usecs; 3302 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); 3303 3304 if (err != 0) { 3305 return (err); 3306 } 3307 if (coalesce_usecs == qsp->coalesce_usecs) 3308 return (0); 3309 3310 for (i = 0; i < sc->params.nports; i++) 3311 for (j = 0; j < sc->port[i].nqsets; j++) 3312 nqsets++; 3313 3314 coalesce_usecs = max(1, coalesce_usecs); 3315 3316 for (i = 0; i < nqsets; i++) { 3317 qs = &sc->sge.qs[i]; 3318 qsp = &sc->params.sge.qset[i]; 3319 qsp->coalesce_usecs = coalesce_usecs; 3320 3321 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 3322 &sc->sge.qs[0].rspq.lock; 3323 3324 mtx_lock(lock); 3325 t3_update_qset_coalesce(qs, qsp); 3326 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 3327 V_NEWTIMER(qs->rspq.holdoff_tmr)); 3328 mtx_unlock(lock); 3329 } 3330 3331 return (0); 3332 } 3333 3334 static int 3335 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS) 3336 { 3337 adapter_t *sc = arg1; 3338 int rc, timestamp; 3339 3340 if ((sc->flags & FULL_INIT_DONE) == 0) 3341 return (ENXIO); 3342 3343 timestamp = sc->timestamp; 3344 rc = sysctl_handle_int(oidp, ×tamp, arg2, req); 3345 3346 if (rc != 0) 3347 return (rc); 3348 3349 if (timestamp != sc->timestamp) { 3350 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS, 3351 timestamp ? F_ENABLERXPKTTMSTPRSS : 0); 3352 sc->timestamp = timestamp; 3353 } 3354 3355 return (0); 3356 } 3357 3358 void 3359 t3_add_attach_sysctls(adapter_t *sc) 3360 { 3361 struct sysctl_ctx_list *ctx; 3362 struct sysctl_oid_list *children; 3363 3364 ctx = device_get_sysctl_ctx(sc->dev); 3365 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3366 3367 /* random information */ 3368 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3369 "firmware_version", 3370 CTLFLAG_RD, sc->fw_version, 3371 0, "firmware version"); 3372 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, 3373 "hw_revision", 3374 CTLFLAG_RD, &sc->params.rev, 3375 0, "chip model"); 3376 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3377 "port_types", 3378 CTLFLAG_RD, sc->port_types, 3379 0, "type of ports"); 3380 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3381 "enable_debug", 3382 CTLFLAG_RW, &cxgb_debug, 3383 0, "enable verbose debugging output"); 3384 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce", 3385 CTLFLAG_RD, &sc->tunq_coalesce, 3386 "#tunneled packets freed"); 3387 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3388 "txq_overrun", 3389 CTLFLAG_RD, &txq_fills, 3390 0, "#times txq overrun"); 3391 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, 3392 "core_clock", 3393 CTLFLAG_RD, &sc->params.vpd.cclk, 3394 0, "core clock frequency (in KHz)"); 3395 } 3396 3397 3398 static const char *rspq_name = "rspq"; 3399 static const char *txq_names[] = 3400 { 3401 "txq_eth", 3402 "txq_ofld", 3403 "txq_ctrl" 3404 }; 3405 3406 static int 3407 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) 3408 { 3409 struct port_info *p = arg1; 3410 uint64_t *parg; 3411 3412 if (!p) 3413 return (EINVAL); 3414 3415 cxgb_refresh_stats(p); 3416 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); 3417 3418 return (sysctl_handle_64(oidp, parg, 0, req)); 3419 } 3420 3421 void 3422 t3_add_configured_sysctls(adapter_t *sc) 3423 { 3424 struct sysctl_ctx_list *ctx; 3425 struct sysctl_oid_list *children; 3426 int i, j; 3427 3428 ctx = device_get_sysctl_ctx(sc->dev); 3429 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3430 3431 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3432 "intr_coal", 3433 CTLTYPE_INT|CTLFLAG_RW, sc, 3434 0, t3_set_coalesce_usecs, 3435 "I", "interrupt coalescing timer (us)"); 3436 3437 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3438 "pkt_timestamp", 3439 CTLTYPE_INT | CTLFLAG_RW, sc, 3440 0, t3_pkt_timestamp, 3441 "I", "provide packet timestamp instead of connection hash"); 3442 3443 for (i = 0; i < sc->params.nports; i++) { 3444 struct port_info *pi = &sc->port[i]; 3445 struct sysctl_oid *poid; 3446 struct sysctl_oid_list *poidlist; 3447 struct mac_stats *mstats = &pi->mac.stats; 3448 3449 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); 3450 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 3451 pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); 3452 poidlist = SYSCTL_CHILDREN(poid); 3453 SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO, 3454 "nqsets", CTLFLAG_RD, &pi->nqsets, 3455 0, "#queue sets"); 3456 3457 for (j = 0; j < pi->nqsets; j++) { 3458 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; 3459 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, 3460 *ctrlqpoid, *lropoid; 3461 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, 3462 *txqpoidlist, *ctrlqpoidlist, 3463 *lropoidlist; 3464 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 3465 3466 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); 3467 3468 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 3469 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); 3470 qspoidlist = SYSCTL_CHILDREN(qspoid); 3471 3472 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty", 3473 CTLFLAG_RD, &qs->fl[0].empty, 0, 3474 "freelist #0 empty"); 3475 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty", 3476 CTLFLAG_RD, &qs->fl[1].empty, 0, 3477 "freelist #1 empty"); 3478 3479 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3480 rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); 3481 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); 3482 3483 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3484 txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); 3485 txqpoidlist = SYSCTL_CHILDREN(txqpoid); 3486 3487 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3488 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics"); 3489 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); 3490 3491 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3492 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics"); 3493 lropoidlist = SYSCTL_CHILDREN(lropoid); 3494 3495 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", 3496 CTLFLAG_RD, &qs->rspq.size, 3497 0, "#entries in response queue"); 3498 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", 3499 CTLFLAG_RD, &qs->rspq.cidx, 3500 0, "consumer index"); 3501 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", 3502 CTLFLAG_RD, &qs->rspq.credits, 3503 0, "#credits"); 3504 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved", 3505 CTLFLAG_RD, &qs->rspq.starved, 3506 0, "#times starved"); 3507 SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr", 3508 CTLFLAG_RD, &qs->rspq.phys_addr, 3509 "physical_address_of the queue"); 3510 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", 3511 CTLFLAG_RW, &qs->rspq.rspq_dump_start, 3512 0, "start rspq dump entry"); 3513 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", 3514 CTLFLAG_RW, &qs->rspq.rspq_dump_count, 3515 0, "#rspq entries to dump"); 3516 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", 3517 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 3518 0, t3_dump_rspq, "A", "dump of the response queue"); 3519 3520 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped", 3521 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops, 3522 "#tunneled packets dropped"); 3523 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen", 3524 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen, 3525 0, "#tunneled packets waiting to be sent"); 3526 #if 0 3527 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", 3528 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 3529 0, "#tunneled packets queue producer index"); 3530 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", 3531 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 3532 0, "#tunneled packets queue consumer index"); 3533 #endif 3534 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed", 3535 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 3536 0, "#tunneled packets processed by the card"); 3537 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", 3538 CTLFLAG_RD, &txq->cleaned, 3539 0, "#tunneled packets cleaned"); 3540 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", 3541 CTLFLAG_RD, &txq->in_use, 3542 0, "#tunneled packet slots in use"); 3543 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees", 3544 CTLFLAG_RD, &txq->txq_frees, 3545 "#tunneled packets freed"); 3546 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", 3547 CTLFLAG_RD, &txq->txq_skipped, 3548 0, "#tunneled packet descriptors skipped"); 3549 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced", 3550 CTLFLAG_RD, &txq->txq_coalesced, 3551 "#tunneled packets coalesced"); 3552 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", 3553 CTLFLAG_RD, &txq->txq_enqueued, 3554 0, "#tunneled packets enqueued to hardware"); 3555 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", 3556 CTLFLAG_RD, &qs->txq_stopped, 3557 0, "tx queues stopped"); 3558 SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr", 3559 CTLFLAG_RD, &txq->phys_addr, 3560 "physical_address_of the queue"); 3561 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", 3562 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 3563 0, "txq generation"); 3564 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", 3565 CTLFLAG_RD, &txq->cidx, 3566 0, "hardware queue cidx"); 3567 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", 3568 CTLFLAG_RD, &txq->pidx, 3569 0, "hardware queue pidx"); 3570 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", 3571 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 3572 0, "txq start idx for dump"); 3573 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", 3574 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 3575 0, "txq #entries to dump"); 3576 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", 3577 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 3578 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); 3579 3580 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", 3581 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 3582 0, "ctrlq start idx for dump"); 3583 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", 3584 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 3585 0, "ctrl #entries to dump"); 3586 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", 3587 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], 3588 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); 3589 3590 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued", 3591 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); 3592 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed", 3593 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); 3594 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", 3595 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); 3596 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", 3597 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); 3598 } 3599 3600 /* Now add a node for mac stats. */ 3601 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", 3602 CTLFLAG_RD, NULL, "MAC statistics"); 3603 poidlist = SYSCTL_CHILDREN(poid); 3604 3605 /* 3606 * We (ab)use the length argument (arg2) to pass on the offset 3607 * of the data that we are interested in. This is only required 3608 * for the quad counters that are updated from the hardware (we 3609 * make sure that we return the latest value). 3610 * sysctl_handle_macstat first updates *all* the counters from 3611 * the hardware, and then returns the latest value of the 3612 * requested counter. Best would be to update only the 3613 * requested counter from hardware, but t3_mac_update_stats() 3614 * hides all the register details and we don't want to dive into 3615 * all that here. 3616 */ 3617 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ 3618 (CTLTYPE_U64 | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \ 3619 sysctl_handle_macstat, "QU", 0) 3620 CXGB_SYSCTL_ADD_QUAD(tx_octets); 3621 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); 3622 CXGB_SYSCTL_ADD_QUAD(tx_frames); 3623 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); 3624 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); 3625 CXGB_SYSCTL_ADD_QUAD(tx_pause); 3626 CXGB_SYSCTL_ADD_QUAD(tx_deferred); 3627 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); 3628 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); 3629 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); 3630 CXGB_SYSCTL_ADD_QUAD(tx_underrun); 3631 CXGB_SYSCTL_ADD_QUAD(tx_len_errs); 3632 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); 3633 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); 3634 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); 3635 CXGB_SYSCTL_ADD_QUAD(tx_frames_64); 3636 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); 3637 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); 3638 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); 3639 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); 3640 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); 3641 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); 3642 CXGB_SYSCTL_ADD_QUAD(rx_octets); 3643 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); 3644 CXGB_SYSCTL_ADD_QUAD(rx_frames); 3645 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); 3646 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); 3647 CXGB_SYSCTL_ADD_QUAD(rx_pause); 3648 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs); 3649 CXGB_SYSCTL_ADD_QUAD(rx_align_errs); 3650 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); 3651 CXGB_SYSCTL_ADD_QUAD(rx_data_errs); 3652 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); 3653 CXGB_SYSCTL_ADD_QUAD(rx_runt); 3654 CXGB_SYSCTL_ADD_QUAD(rx_jabber); 3655 CXGB_SYSCTL_ADD_QUAD(rx_short); 3656 CXGB_SYSCTL_ADD_QUAD(rx_too_long); 3657 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); 3658 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); 3659 CXGB_SYSCTL_ADD_QUAD(rx_frames_64); 3660 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); 3661 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); 3662 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); 3663 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); 3664 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); 3665 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); 3666 #undef CXGB_SYSCTL_ADD_QUAD 3667 3668 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ 3669 CTLFLAG_RD, &mstats->a, 0) 3670 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); 3671 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); 3672 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); 3673 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); 3674 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); 3675 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); 3676 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); 3677 CXGB_SYSCTL_ADD_ULONG(num_toggled); 3678 CXGB_SYSCTL_ADD_ULONG(num_resets); 3679 CXGB_SYSCTL_ADD_ULONG(link_faults); 3680 #undef CXGB_SYSCTL_ADD_ULONG 3681 } 3682 } 3683 3684 /** 3685 * t3_get_desc - dump an SGE descriptor for debugging purposes 3686 * @qs: the queue set 3687 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 3688 * @idx: the descriptor index in the queue 3689 * @data: where to dump the descriptor contents 3690 * 3691 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 3692 * size of the descriptor. 3693 */ 3694 int 3695 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 3696 unsigned char *data) 3697 { 3698 if (qnum >= 6) 3699 return (EINVAL); 3700 3701 if (qnum < 3) { 3702 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 3703 return -EINVAL; 3704 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 3705 return sizeof(struct tx_desc); 3706 } 3707 3708 if (qnum == 3) { 3709 if (!qs->rspq.desc || idx >= qs->rspq.size) 3710 return (EINVAL); 3711 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 3712 return sizeof(struct rsp_desc); 3713 } 3714 3715 qnum -= 4; 3716 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 3717 return (EINVAL); 3718 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 3719 return sizeof(struct rx_desc); 3720 } 3721