1 /************************************************************************** 2 3 Copyright (c) 2007-2009, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_inet.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/module.h> 39 #include <sys/bus.h> 40 #include <sys/conf.h> 41 #include <machine/bus.h> 42 #include <machine/resource.h> 43 #include <sys/bus_dma.h> 44 #include <sys/rman.h> 45 #include <sys/queue.h> 46 #include <sys/sysctl.h> 47 #include <sys/taskqueue.h> 48 49 #include <sys/proc.h> 50 #include <sys/sbuf.h> 51 #include <sys/sched.h> 52 #include <sys/smp.h> 53 #include <sys/systm.h> 54 #include <sys/syslog.h> 55 #include <sys/socket.h> 56 57 #include <net/bpf.h> 58 #include <net/ethernet.h> 59 #include <net/if.h> 60 #include <net/if_vlan_var.h> 61 62 #include <netinet/in_systm.h> 63 #include <netinet/in.h> 64 #include <netinet/ip.h> 65 #include <netinet/tcp.h> 66 67 #include <dev/pci/pcireg.h> 68 #include <dev/pci/pcivar.h> 69 70 #include <vm/vm.h> 71 #include <vm/pmap.h> 72 73 #include <cxgb_include.h> 74 #include <sys/mvec.h> 75 76 int txq_fills = 0; 77 int multiq_tx_enable = 1; 78 79 extern struct sysctl_oid_list sysctl__hw_cxgb_children; 80 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; 81 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size); 82 SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, 83 "size of per-queue mbuf ring"); 84 85 static int cxgb_tx_coalesce_force = 0; 86 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force); 87 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW, 88 &cxgb_tx_coalesce_force, 0, 89 "coalesce small packets into a single work request regardless of ring state"); 90 91 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1 92 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3)) 93 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2 94 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5 95 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5 96 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2 97 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6 98 99 100 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT; 101 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start", 102 &cxgb_tx_coalesce_enable_start); 103 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW, 104 &cxgb_tx_coalesce_enable_start, 0, 105 "coalesce enable threshold"); 106 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT; 107 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop); 108 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW, 109 &cxgb_tx_coalesce_enable_stop, 0, 110 "coalesce disable threshold"); 111 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 112 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold); 113 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW, 114 &cxgb_tx_reclaim_threshold, 0, 115 "tx cleaning minimum threshold"); 116 117 /* 118 * XXX don't re-enable this until TOE stops assuming 119 * we have an m_ext 120 */ 121 static int recycle_enable = 0; 122 123 extern int cxgb_use_16k_clusters; 124 extern int nmbjumbop; 125 extern int nmbjumbo9; 126 extern int nmbjumbo16; 127 128 #define USE_GTS 0 129 130 #define SGE_RX_SM_BUF_SIZE 1536 131 #define SGE_RX_DROP_THRES 16 132 #define SGE_RX_COPY_THRES 128 133 134 /* 135 * Period of the Tx buffer reclaim timer. This timer does not need to run 136 * frequently as Tx buffers are usually reclaimed by new Tx packets. 137 */ 138 #define TX_RECLAIM_PERIOD (hz >> 1) 139 140 /* 141 * Values for sge_txq.flags 142 */ 143 enum { 144 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 145 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 146 }; 147 148 struct tx_desc { 149 uint64_t flit[TX_DESC_FLITS]; 150 } __packed; 151 152 struct rx_desc { 153 uint32_t addr_lo; 154 uint32_t len_gen; 155 uint32_t gen2; 156 uint32_t addr_hi; 157 } __packed; 158 159 struct rsp_desc { /* response queue descriptor */ 160 struct rss_header rss_hdr; 161 uint32_t flags; 162 uint32_t len_cq; 163 uint8_t imm_data[47]; 164 uint8_t intr_gen; 165 } __packed; 166 167 #define RX_SW_DESC_MAP_CREATED (1 << 0) 168 #define TX_SW_DESC_MAP_CREATED (1 << 1) 169 #define RX_SW_DESC_INUSE (1 << 3) 170 #define TX_SW_DESC_MAPPED (1 << 4) 171 172 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 173 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 174 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 175 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 176 177 struct tx_sw_desc { /* SW state per Tx descriptor */ 178 struct mbuf *m; 179 bus_dmamap_t map; 180 int flags; 181 }; 182 183 struct rx_sw_desc { /* SW state per Rx descriptor */ 184 caddr_t rxsd_cl; 185 struct mbuf *m; 186 bus_dmamap_t map; 187 int flags; 188 }; 189 190 struct txq_state { 191 unsigned int compl; 192 unsigned int gen; 193 unsigned int pidx; 194 }; 195 196 struct refill_fl_cb_arg { 197 int error; 198 bus_dma_segment_t seg; 199 int nseg; 200 }; 201 202 203 /* 204 * Maps a number of flits to the number of Tx descriptors that can hold them. 205 * The formula is 206 * 207 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 208 * 209 * HW allows up to 4 descriptors to be combined into a WR. 210 */ 211 static uint8_t flit_desc_map[] = { 212 0, 213 #if SGE_NUM_GENBITS == 1 214 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 215 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 216 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 217 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 218 #elif SGE_NUM_GENBITS == 2 219 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 220 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 221 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 222 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 223 #else 224 # error "SGE_NUM_GENBITS must be 1 or 2" 225 #endif 226 }; 227 228 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED) 229 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock) 230 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock) 231 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock) 232 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 233 #define TXQ_RING_NEEDS_ENQUEUE(qs) \ 234 drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 235 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 236 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \ 237 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg) 238 #define TXQ_RING_DEQUEUE(qs) \ 239 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 240 241 int cxgb_debug = 0; 242 243 static void sge_timer_cb(void *arg); 244 static void sge_timer_reclaim(void *arg, int ncount); 245 static void sge_txq_reclaim_handler(void *arg, int ncount); 246 static void cxgb_start_locked(struct sge_qset *qs); 247 248 /* 249 * XXX need to cope with bursty scheduling by looking at a wider 250 * window than we are now for determining the need for coalescing 251 * 252 */ 253 static __inline uint64_t 254 check_pkt_coalesce(struct sge_qset *qs) 255 { 256 struct adapter *sc; 257 struct sge_txq *txq; 258 uint8_t *fill; 259 260 if (__predict_false(cxgb_tx_coalesce_force)) 261 return (1); 262 txq = &qs->txq[TXQ_ETH]; 263 sc = qs->port->adapter; 264 fill = &sc->tunq_fill[qs->idx]; 265 266 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX) 267 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX; 268 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN) 269 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN; 270 /* 271 * if the hardware transmit queue is more than 1/8 full 272 * we mark it as coalescing - we drop back from coalescing 273 * when we go below 1/32 full and there are no packets enqueued, 274 * this provides us with some degree of hysteresis 275 */ 276 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 277 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0)) 278 *fill = 0; 279 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) 280 *fill = 1; 281 282 return (sc->tunq_coalesce); 283 } 284 285 #ifdef __LP64__ 286 static void 287 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 288 { 289 uint64_t wr_hilo; 290 #if _BYTE_ORDER == _LITTLE_ENDIAN 291 wr_hilo = wr_hi; 292 wr_hilo |= (((uint64_t)wr_lo)<<32); 293 #else 294 wr_hilo = wr_lo; 295 wr_hilo |= (((uint64_t)wr_hi)<<32); 296 #endif 297 wrp->wrh_hilo = wr_hilo; 298 } 299 #else 300 static void 301 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 302 { 303 304 wrp->wrh_hi = wr_hi; 305 wmb(); 306 wrp->wrh_lo = wr_lo; 307 } 308 #endif 309 310 struct coalesce_info { 311 int count; 312 int nbytes; 313 }; 314 315 static int 316 coalesce_check(struct mbuf *m, void *arg) 317 { 318 struct coalesce_info *ci = arg; 319 int *count = &ci->count; 320 int *nbytes = &ci->nbytes; 321 322 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) && 323 (*count < 7) && (m->m_next == NULL))) { 324 *count += 1; 325 *nbytes += m->m_len; 326 return (1); 327 } 328 return (0); 329 } 330 331 static struct mbuf * 332 cxgb_dequeue(struct sge_qset *qs) 333 { 334 struct mbuf *m, *m_head, *m_tail; 335 struct coalesce_info ci; 336 337 338 if (check_pkt_coalesce(qs) == 0) 339 return TXQ_RING_DEQUEUE(qs); 340 341 m_head = m_tail = NULL; 342 ci.count = ci.nbytes = 0; 343 do { 344 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci); 345 if (m_head == NULL) { 346 m_tail = m_head = m; 347 } else if (m != NULL) { 348 m_tail->m_nextpkt = m; 349 m_tail = m; 350 } 351 } while (m != NULL); 352 if (ci.count > 7) 353 panic("trying to coalesce %d packets in to one WR", ci.count); 354 return (m_head); 355 } 356 357 /** 358 * reclaim_completed_tx - reclaims completed Tx descriptors 359 * @adapter: the adapter 360 * @q: the Tx queue to reclaim completed descriptors from 361 * 362 * Reclaims Tx descriptors that the SGE has indicated it has processed, 363 * and frees the associated buffers if possible. Called with the Tx 364 * queue's lock held. 365 */ 366 static __inline int 367 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) 368 { 369 struct sge_txq *q = &qs->txq[queue]; 370 int reclaim = desc_reclaimable(q); 371 372 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) || 373 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN)) 374 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 375 376 if (reclaim < reclaim_min) 377 return (0); 378 379 mtx_assert(&qs->lock, MA_OWNED); 380 if (reclaim > 0) { 381 t3_free_tx_desc(qs, reclaim, queue); 382 q->cleaned += reclaim; 383 q->in_use -= reclaim; 384 } 385 if (isset(&qs->txq_stopped, TXQ_ETH)) 386 clrbit(&qs->txq_stopped, TXQ_ETH); 387 388 return (reclaim); 389 } 390 391 /** 392 * should_restart_tx - are there enough resources to restart a Tx queue? 393 * @q: the Tx queue 394 * 395 * Checks if there are enough descriptors to restart a suspended Tx queue. 396 */ 397 static __inline int 398 should_restart_tx(const struct sge_txq *q) 399 { 400 unsigned int r = q->processed - q->cleaned; 401 402 return q->in_use - r < (q->size >> 1); 403 } 404 405 /** 406 * t3_sge_init - initialize SGE 407 * @adap: the adapter 408 * @p: the SGE parameters 409 * 410 * Performs SGE initialization needed every time after a chip reset. 411 * We do not initialize any of the queue sets here, instead the driver 412 * top-level must request those individually. We also do not enable DMA 413 * here, that should be done after the queues have been set up. 414 */ 415 void 416 t3_sge_init(adapter_t *adap, struct sge_params *p) 417 { 418 u_int ctrl, ups; 419 420 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 421 422 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 423 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | 424 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 425 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 426 #if SGE_NUM_GENBITS == 1 427 ctrl |= F_EGRGENCTRL; 428 #endif 429 if (adap->params.rev > 0) { 430 if (!(adap->flags & (USING_MSIX | USING_MSI))) 431 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 432 } 433 t3_write_reg(adap, A_SG_CONTROL, ctrl); 434 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 435 V_LORCQDRBTHRSH(512)); 436 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 437 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 438 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 439 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 440 adap->params.rev < T3_REV_C ? 1000 : 500); 441 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 442 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 443 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 444 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 445 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 446 } 447 448 449 /** 450 * sgl_len - calculates the size of an SGL of the given capacity 451 * @n: the number of SGL entries 452 * 453 * Calculates the number of flits needed for a scatter/gather list that 454 * can hold the given number of entries. 455 */ 456 static __inline unsigned int 457 sgl_len(unsigned int n) 458 { 459 return ((3 * n) / 2 + (n & 1)); 460 } 461 462 /** 463 * get_imm_packet - return the next ingress packet buffer from a response 464 * @resp: the response descriptor containing the packet data 465 * 466 * Return a packet containing the immediate data of the given response. 467 */ 468 static int 469 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) 470 { 471 472 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE; 473 m->m_ext.ext_buf = NULL; 474 m->m_ext.ext_type = 0; 475 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 476 return (0); 477 } 478 479 static __inline u_int 480 flits_to_desc(u_int n) 481 { 482 return (flit_desc_map[n]); 483 } 484 485 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ 486 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ 487 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ 488 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ 489 F_HIRCQPARITYERROR) 490 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) 491 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ 492 F_RSPQDISABLED) 493 494 /** 495 * t3_sge_err_intr_handler - SGE async event interrupt handler 496 * @adapter: the adapter 497 * 498 * Interrupt handler for SGE asynchronous (non-data) events. 499 */ 500 void 501 t3_sge_err_intr_handler(adapter_t *adapter) 502 { 503 unsigned int v, status; 504 505 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 506 if (status & SGE_PARERR) 507 CH_ALERT(adapter, "SGE parity error (0x%x)\n", 508 status & SGE_PARERR); 509 if (status & SGE_FRAMINGERR) 510 CH_ALERT(adapter, "SGE framing error (0x%x)\n", 511 status & SGE_FRAMINGERR); 512 if (status & F_RSPQCREDITOVERFOW) 513 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 514 515 if (status & F_RSPQDISABLED) { 516 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 517 518 CH_ALERT(adapter, 519 "packet delivered to disabled response queue (0x%x)\n", 520 (v >> S_RSPQ0DISABLED) & 0xff); 521 } 522 523 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 524 if (status & SGE_FATALERR) 525 t3_fatal_err(adapter); 526 } 527 528 void 529 t3_sge_prep(adapter_t *adap, struct sge_params *p) 530 { 531 int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size; 532 533 nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus); 534 nqsets *= adap->params.nports; 535 536 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); 537 538 while (!powerof2(fl_q_size)) 539 fl_q_size--; 540 541 use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters : 542 is_offload(adap); 543 544 #if __FreeBSD_version >= 700111 545 if (use_16k) { 546 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); 547 jumbo_buf_size = MJUM16BYTES; 548 } else { 549 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); 550 jumbo_buf_size = MJUM9BYTES; 551 } 552 #else 553 jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE); 554 jumbo_buf_size = MJUMPAGESIZE; 555 #endif 556 while (!powerof2(jumbo_q_size)) 557 jumbo_q_size--; 558 559 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2)) 560 device_printf(adap->dev, 561 "Insufficient clusters and/or jumbo buffers.\n"); 562 563 p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data); 564 565 for (i = 0; i < SGE_QSETS; ++i) { 566 struct qset_params *q = p->qset + i; 567 568 if (adap->params.nports > 2) { 569 q->coalesce_usecs = 50; 570 } else { 571 #ifdef INVARIANTS 572 q->coalesce_usecs = 10; 573 #else 574 q->coalesce_usecs = 5; 575 #endif 576 } 577 q->polling = 0; 578 q->rspq_size = RSPQ_Q_SIZE; 579 q->fl_size = fl_q_size; 580 q->jumbo_size = jumbo_q_size; 581 q->jumbo_buf_size = jumbo_buf_size; 582 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 583 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16; 584 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE; 585 q->cong_thres = 0; 586 } 587 } 588 589 int 590 t3_sge_alloc(adapter_t *sc) 591 { 592 593 /* The parent tag. */ 594 if (bus_dma_tag_create( NULL, /* parent */ 595 1, 0, /* algnmnt, boundary */ 596 BUS_SPACE_MAXADDR, /* lowaddr */ 597 BUS_SPACE_MAXADDR, /* highaddr */ 598 NULL, NULL, /* filter, filterarg */ 599 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 600 BUS_SPACE_UNRESTRICTED, /* nsegments */ 601 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 602 0, /* flags */ 603 NULL, NULL, /* lock, lockarg */ 604 &sc->parent_dmat)) { 605 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 606 return (ENOMEM); 607 } 608 609 /* 610 * DMA tag for normal sized RX frames 611 */ 612 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 613 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 614 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 615 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 616 return (ENOMEM); 617 } 618 619 /* 620 * DMA tag for jumbo sized RX frames. 621 */ 622 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, 623 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, 624 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 625 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 626 return (ENOMEM); 627 } 628 629 /* 630 * DMA tag for TX frames. 631 */ 632 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 633 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 634 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 635 NULL, NULL, &sc->tx_dmat)) { 636 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 637 return (ENOMEM); 638 } 639 640 return (0); 641 } 642 643 int 644 t3_sge_free(struct adapter * sc) 645 { 646 647 if (sc->tx_dmat != NULL) 648 bus_dma_tag_destroy(sc->tx_dmat); 649 650 if (sc->rx_jumbo_dmat != NULL) 651 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 652 653 if (sc->rx_dmat != NULL) 654 bus_dma_tag_destroy(sc->rx_dmat); 655 656 if (sc->parent_dmat != NULL) 657 bus_dma_tag_destroy(sc->parent_dmat); 658 659 return (0); 660 } 661 662 void 663 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 664 { 665 666 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); 667 qs->rspq.polling = 0 /* p->polling */; 668 } 669 670 #if !defined(__i386__) && !defined(__amd64__) 671 static void 672 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 673 { 674 struct refill_fl_cb_arg *cb_arg = arg; 675 676 cb_arg->error = error; 677 cb_arg->seg = segs[0]; 678 cb_arg->nseg = nseg; 679 680 } 681 #endif 682 /** 683 * refill_fl - refill an SGE free-buffer list 684 * @sc: the controller softc 685 * @q: the free-list to refill 686 * @n: the number of new buffers to allocate 687 * 688 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 689 * The caller must assure that @n does not exceed the queue's capacity. 690 */ 691 static void 692 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 693 { 694 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 695 struct rx_desc *d = &q->desc[q->pidx]; 696 struct refill_fl_cb_arg cb_arg; 697 struct mbuf *m; 698 caddr_t cl; 699 int err, count = 0; 700 701 cb_arg.error = 0; 702 while (n--) { 703 /* 704 * We only allocate a cluster, mbuf allocation happens after rx 705 */ 706 if (q->zone == zone_pack) { 707 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL) 708 break; 709 cl = m->m_ext.ext_buf; 710 } else { 711 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL) 712 break; 713 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { 714 uma_zfree(q->zone, cl); 715 break; 716 } 717 } 718 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 719 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 720 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 721 uma_zfree(q->zone, cl); 722 goto done; 723 } 724 sd->flags |= RX_SW_DESC_MAP_CREATED; 725 } 726 #if !defined(__i386__) && !defined(__amd64__) 727 err = bus_dmamap_load(q->entry_tag, sd->map, 728 cl, q->buf_size, refill_fl_cb, &cb_arg, 0); 729 730 if (err != 0 || cb_arg.error) { 731 if (q->zone == zone_pack) 732 uma_zfree(q->zone, cl); 733 m_free(m); 734 goto done; 735 } 736 #else 737 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl); 738 #endif 739 sd->flags |= RX_SW_DESC_INUSE; 740 sd->rxsd_cl = cl; 741 sd->m = m; 742 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 743 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 744 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 745 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 746 747 d++; 748 sd++; 749 750 if (++q->pidx == q->size) { 751 q->pidx = 0; 752 q->gen ^= 1; 753 sd = q->sdesc; 754 d = q->desc; 755 } 756 q->credits++; 757 count++; 758 } 759 760 done: 761 if (count) 762 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 763 } 764 765 766 /** 767 * free_rx_bufs - free the Rx buffers on an SGE free list 768 * @sc: the controle softc 769 * @q: the SGE free list to clean up 770 * 771 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 772 * this queue should be stopped before calling this function. 773 */ 774 static void 775 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 776 { 777 u_int cidx = q->cidx; 778 779 while (q->credits--) { 780 struct rx_sw_desc *d = &q->sdesc[cidx]; 781 782 if (d->flags & RX_SW_DESC_INUSE) { 783 bus_dmamap_unload(q->entry_tag, d->map); 784 bus_dmamap_destroy(q->entry_tag, d->map); 785 if (q->zone == zone_pack) { 786 m_init(d->m, zone_pack, MCLBYTES, 787 M_NOWAIT, MT_DATA, M_EXT); 788 uma_zfree(zone_pack, d->m); 789 } else { 790 m_init(d->m, zone_mbuf, MLEN, 791 M_NOWAIT, MT_DATA, 0); 792 uma_zfree(zone_mbuf, d->m); 793 uma_zfree(q->zone, d->rxsd_cl); 794 } 795 } 796 797 d->rxsd_cl = NULL; 798 d->m = NULL; 799 if (++cidx == q->size) 800 cidx = 0; 801 } 802 } 803 804 static __inline void 805 __refill_fl(adapter_t *adap, struct sge_fl *fl) 806 { 807 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 808 } 809 810 static __inline void 811 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) 812 { 813 if ((fl->size - fl->credits) < max) 814 refill_fl(adap, fl, min(max, fl->size - fl->credits)); 815 } 816 817 /** 818 * recycle_rx_buf - recycle a receive buffer 819 * @adapter: the adapter 820 * @q: the SGE free list 821 * @idx: index of buffer to recycle 822 * 823 * Recycles the specified buffer on the given free list by adding it at 824 * the next available slot on the list. 825 */ 826 static void 827 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 828 { 829 struct rx_desc *from = &q->desc[idx]; 830 struct rx_desc *to = &q->desc[q->pidx]; 831 832 q->sdesc[q->pidx] = q->sdesc[idx]; 833 to->addr_lo = from->addr_lo; // already big endian 834 to->addr_hi = from->addr_hi; // likewise 835 wmb(); /* necessary ? */ 836 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 837 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 838 q->credits++; 839 840 if (++q->pidx == q->size) { 841 q->pidx = 0; 842 q->gen ^= 1; 843 } 844 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 845 } 846 847 static void 848 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 849 { 850 uint32_t *addr; 851 852 addr = arg; 853 *addr = segs[0].ds_addr; 854 } 855 856 static int 857 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 858 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 859 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 860 { 861 size_t len = nelem * elem_size; 862 void *s = NULL; 863 void *p = NULL; 864 int err; 865 866 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 867 BUS_SPACE_MAXADDR_32BIT, 868 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 869 len, 0, NULL, NULL, tag)) != 0) { 870 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 871 return (ENOMEM); 872 } 873 874 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 875 map)) != 0) { 876 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 877 return (ENOMEM); 878 } 879 880 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 881 bzero(p, len); 882 *(void **)desc = p; 883 884 if (sw_size) { 885 len = nelem * sw_size; 886 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 887 *(void **)sdesc = s; 888 } 889 if (parent_entry_tag == NULL) 890 return (0); 891 892 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 893 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 894 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 895 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 896 NULL, NULL, entry_tag)) != 0) { 897 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 898 return (ENOMEM); 899 } 900 return (0); 901 } 902 903 static void 904 sge_slow_intr_handler(void *arg, int ncount) 905 { 906 adapter_t *sc = arg; 907 908 t3_slow_intr_handler(sc); 909 } 910 911 /** 912 * sge_timer_cb - perform periodic maintenance of an SGE qset 913 * @data: the SGE queue set to maintain 914 * 915 * Runs periodically from a timer to perform maintenance of an SGE queue 916 * set. It performs two tasks: 917 * 918 * a) Cleans up any completed Tx descriptors that may still be pending. 919 * Normal descriptor cleanup happens when new packets are added to a Tx 920 * queue so this timer is relatively infrequent and does any cleanup only 921 * if the Tx queue has not seen any new packets in a while. We make a 922 * best effort attempt to reclaim descriptors, in that we don't wait 923 * around if we cannot get a queue's lock (which most likely is because 924 * someone else is queueing new packets and so will also handle the clean 925 * up). Since control queues use immediate data exclusively we don't 926 * bother cleaning them up here. 927 * 928 * b) Replenishes Rx queues that have run out due to memory shortage. 929 * Normally new Rx buffers are added when existing ones are consumed but 930 * when out of memory a queue can become empty. We try to add only a few 931 * buffers here, the queue will be replenished fully as these new buffers 932 * are used up if memory shortage has subsided. 933 * 934 * c) Return coalesced response queue credits in case a response queue is 935 * starved. 936 * 937 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 938 * fifo overflows and the FW doesn't implement any recovery scheme yet. 939 */ 940 static void 941 sge_timer_cb(void *arg) 942 { 943 adapter_t *sc = arg; 944 if ((sc->flags & USING_MSIX) == 0) { 945 946 struct port_info *pi; 947 struct sge_qset *qs; 948 struct sge_txq *txq; 949 int i, j; 950 int reclaim_ofl, refill_rx; 951 952 if (sc->open_device_map == 0) 953 return; 954 955 for (i = 0; i < sc->params.nports; i++) { 956 pi = &sc->port[i]; 957 for (j = 0; j < pi->nqsets; j++) { 958 qs = &sc->sge.qs[pi->first_qset + j]; 959 txq = &qs->txq[0]; 960 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 961 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 962 (qs->fl[1].credits < qs->fl[1].size)); 963 if (reclaim_ofl || refill_rx) { 964 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); 965 break; 966 } 967 } 968 } 969 } 970 971 if (sc->params.nports > 2) { 972 int i; 973 974 for_each_port(sc, i) { 975 struct port_info *pi = &sc->port[i]; 976 977 t3_write_reg(sc, A_SG_KDOORBELL, 978 F_SELEGRCNTX | 979 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 980 } 981 } 982 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) && 983 sc->open_device_map != 0) 984 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 985 } 986 987 /* 988 * This is meant to be a catch-all function to keep sge state private 989 * to sge.c 990 * 991 */ 992 int 993 t3_sge_init_adapter(adapter_t *sc) 994 { 995 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 996 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 997 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 998 return (0); 999 } 1000 1001 int 1002 t3_sge_reset_adapter(adapter_t *sc) 1003 { 1004 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 1005 return (0); 1006 } 1007 1008 int 1009 t3_sge_init_port(struct port_info *pi) 1010 { 1011 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); 1012 return (0); 1013 } 1014 1015 /** 1016 * refill_rspq - replenish an SGE response queue 1017 * @adapter: the adapter 1018 * @q: the response queue to replenish 1019 * @credits: how many new responses to make available 1020 * 1021 * Replenishes a response queue by making the supplied number of responses 1022 * available to HW. 1023 */ 1024 static __inline void 1025 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 1026 { 1027 1028 /* mbufs are allocated on demand when a rspq entry is processed. */ 1029 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 1030 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 1031 } 1032 1033 static void 1034 sge_txq_reclaim_handler(void *arg, int ncount) 1035 { 1036 struct sge_qset *qs = arg; 1037 int i; 1038 1039 for (i = 0; i < 3; i++) 1040 reclaim_completed_tx(qs, 16, i); 1041 } 1042 1043 static void 1044 sge_timer_reclaim(void *arg, int ncount) 1045 { 1046 struct port_info *pi = arg; 1047 int i, nqsets = pi->nqsets; 1048 adapter_t *sc = pi->adapter; 1049 struct sge_qset *qs; 1050 struct mtx *lock; 1051 1052 KASSERT((sc->flags & USING_MSIX) == 0, 1053 ("can't call timer reclaim for msi-x")); 1054 1055 for (i = 0; i < nqsets; i++) { 1056 qs = &sc->sge.qs[pi->first_qset + i]; 1057 1058 reclaim_completed_tx(qs, 16, TXQ_OFLD); 1059 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 1060 &sc->sge.qs[0].rspq.lock; 1061 1062 if (mtx_trylock(lock)) { 1063 /* XXX currently assume that we are *NOT* polling */ 1064 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 1065 1066 if (qs->fl[0].credits < qs->fl[0].size - 16) 1067 __refill_fl(sc, &qs->fl[0]); 1068 if (qs->fl[1].credits < qs->fl[1].size - 16) 1069 __refill_fl(sc, &qs->fl[1]); 1070 1071 if (status & (1 << qs->rspq.cntxt_id)) { 1072 if (qs->rspq.credits) { 1073 refill_rspq(sc, &qs->rspq, 1); 1074 qs->rspq.credits--; 1075 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 1076 1 << qs->rspq.cntxt_id); 1077 } 1078 } 1079 mtx_unlock(lock); 1080 } 1081 } 1082 } 1083 1084 /** 1085 * init_qset_cntxt - initialize an SGE queue set context info 1086 * @qs: the queue set 1087 * @id: the queue set id 1088 * 1089 * Initializes the TIDs and context ids for the queues of a queue set. 1090 */ 1091 static void 1092 init_qset_cntxt(struct sge_qset *qs, u_int id) 1093 { 1094 1095 qs->rspq.cntxt_id = id; 1096 qs->fl[0].cntxt_id = 2 * id; 1097 qs->fl[1].cntxt_id = 2 * id + 1; 1098 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 1099 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 1100 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 1101 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 1102 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 1103 1104 mbufq_init(&qs->txq[TXQ_ETH].sendq); 1105 mbufq_init(&qs->txq[TXQ_OFLD].sendq); 1106 mbufq_init(&qs->txq[TXQ_CTRL].sendq); 1107 } 1108 1109 1110 static void 1111 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 1112 { 1113 txq->in_use += ndesc; 1114 /* 1115 * XXX we don't handle stopping of queue 1116 * presumably start handles this when we bump against the end 1117 */ 1118 txqs->gen = txq->gen; 1119 txq->unacked += ndesc; 1120 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); 1121 txq->unacked &= 31; 1122 txqs->pidx = txq->pidx; 1123 txq->pidx += ndesc; 1124 #ifdef INVARIANTS 1125 if (((txqs->pidx > txq->cidx) && 1126 (txq->pidx < txqs->pidx) && 1127 (txq->pidx >= txq->cidx)) || 1128 ((txqs->pidx < txq->cidx) && 1129 (txq->pidx >= txq-> cidx)) || 1130 ((txqs->pidx < txq->cidx) && 1131 (txq->cidx < txqs->pidx))) 1132 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", 1133 txqs->pidx, txq->pidx, txq->cidx); 1134 #endif 1135 if (txq->pidx >= txq->size) { 1136 txq->pidx -= txq->size; 1137 txq->gen ^= 1; 1138 } 1139 1140 } 1141 1142 /** 1143 * calc_tx_descs - calculate the number of Tx descriptors for a packet 1144 * @m: the packet mbufs 1145 * @nsegs: the number of segments 1146 * 1147 * Returns the number of Tx descriptors needed for the given Ethernet 1148 * packet. Ethernet packets require addition of WR and CPL headers. 1149 */ 1150 static __inline unsigned int 1151 calc_tx_descs(const struct mbuf *m, int nsegs) 1152 { 1153 unsigned int flits; 1154 1155 if (m->m_pkthdr.len <= PIO_LEN) 1156 return 1; 1157 1158 flits = sgl_len(nsegs) + 2; 1159 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1160 flits++; 1161 1162 return flits_to_desc(flits); 1163 } 1164 1165 static unsigned int 1166 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 1167 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs) 1168 { 1169 struct mbuf *m0; 1170 int err, pktlen, pass = 0; 1171 bus_dma_tag_t tag = txq->entry_tag; 1172 1173 retry: 1174 err = 0; 1175 m0 = *m; 1176 pktlen = m0->m_pkthdr.len; 1177 #if defined(__i386__) || defined(__amd64__) 1178 if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) { 1179 goto done; 1180 } else 1181 #endif 1182 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0); 1183 1184 if (err == 0) { 1185 goto done; 1186 } 1187 if (err == EFBIG && pass == 0) { 1188 pass = 1; 1189 /* Too many segments, try to defrag */ 1190 m0 = m_defrag(m0, M_DONTWAIT); 1191 if (m0 == NULL) { 1192 m_freem(*m); 1193 *m = NULL; 1194 return (ENOBUFS); 1195 } 1196 *m = m0; 1197 goto retry; 1198 } else if (err == ENOMEM) { 1199 return (err); 1200 } if (err) { 1201 if (cxgb_debug) 1202 printf("map failure err=%d pktlen=%d\n", err, pktlen); 1203 m_freem(m0); 1204 *m = NULL; 1205 return (err); 1206 } 1207 done: 1208 #if !defined(__i386__) && !defined(__amd64__) 1209 bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE); 1210 #endif 1211 txsd->flags |= TX_SW_DESC_MAPPED; 1212 1213 return (0); 1214 } 1215 1216 /** 1217 * make_sgl - populate a scatter/gather list for a packet 1218 * @sgp: the SGL to populate 1219 * @segs: the packet dma segments 1220 * @nsegs: the number of segments 1221 * 1222 * Generates a scatter/gather list for the buffers that make up a packet 1223 * and returns the SGL size in 8-byte words. The caller must size the SGL 1224 * appropriately. 1225 */ 1226 static __inline void 1227 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1228 { 1229 int i, idx; 1230 1231 for (idx = 0, i = 0; i < nsegs; i++) { 1232 /* 1233 * firmware doesn't like empty segments 1234 */ 1235 if (segs[i].ds_len == 0) 1236 continue; 1237 if (i && idx == 0) 1238 ++sgp; 1239 1240 sgp->len[idx] = htobe32(segs[i].ds_len); 1241 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1242 idx ^= 1; 1243 } 1244 1245 if (idx) { 1246 sgp->len[idx] = 0; 1247 sgp->addr[idx] = 0; 1248 } 1249 } 1250 1251 /** 1252 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1253 * @adap: the adapter 1254 * @q: the Tx queue 1255 * 1256 * Ring the doorbell if a Tx queue is asleep. There is a natural race, 1257 * where the HW is going to sleep just after we checked, however, 1258 * then the interrupt handler will detect the outstanding TX packet 1259 * and ring the doorbell for us. 1260 * 1261 * When GTS is disabled we unconditionally ring the doorbell. 1262 */ 1263 static __inline void 1264 check_ring_tx_db(adapter_t *adap, struct sge_txq *q) 1265 { 1266 #if USE_GTS 1267 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1268 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1269 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1270 #ifdef T3_TRACE 1271 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1272 q->cntxt_id); 1273 #endif 1274 t3_write_reg(adap, A_SG_KDOORBELL, 1275 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1276 } 1277 #else 1278 wmb(); /* write descriptors before telling HW */ 1279 t3_write_reg(adap, A_SG_KDOORBELL, 1280 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1281 #endif 1282 } 1283 1284 static __inline void 1285 wr_gen2(struct tx_desc *d, unsigned int gen) 1286 { 1287 #if SGE_NUM_GENBITS == 2 1288 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1289 #endif 1290 } 1291 1292 /** 1293 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1294 * @ndesc: number of Tx descriptors spanned by the SGL 1295 * @txd: first Tx descriptor to be written 1296 * @txqs: txq state (generation and producer index) 1297 * @txq: the SGE Tx queue 1298 * @sgl: the SGL 1299 * @flits: number of flits to the start of the SGL in the first descriptor 1300 * @sgl_flits: the SGL size in flits 1301 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1302 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1303 * 1304 * Write a work request header and an associated SGL. If the SGL is 1305 * small enough to fit into one Tx descriptor it has already been written 1306 * and we just need to write the WR header. Otherwise we distribute the 1307 * SGL across the number of descriptors it spans. 1308 */ 1309 static void 1310 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1311 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1312 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1313 { 1314 1315 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1316 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1317 1318 if (__predict_true(ndesc == 1)) { 1319 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1320 V_WR_SGLSFLT(flits)) | wr_hi, 1321 htonl(V_WR_LEN(flits + sgl_flits) | 1322 V_WR_GEN(txqs->gen)) | wr_lo); 1323 /* XXX gen? */ 1324 wr_gen2(txd, txqs->gen); 1325 1326 } else { 1327 unsigned int ogen = txqs->gen; 1328 const uint64_t *fp = (const uint64_t *)sgl; 1329 struct work_request_hdr *wp = wrp; 1330 1331 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1332 V_WR_SGLSFLT(flits)) | wr_hi; 1333 1334 while (sgl_flits) { 1335 unsigned int avail = WR_FLITS - flits; 1336 1337 if (avail > sgl_flits) 1338 avail = sgl_flits; 1339 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1340 sgl_flits -= avail; 1341 ndesc--; 1342 if (!sgl_flits) 1343 break; 1344 1345 fp += avail; 1346 txd++; 1347 txsd++; 1348 if (++txqs->pidx == txq->size) { 1349 txqs->pidx = 0; 1350 txqs->gen ^= 1; 1351 txd = txq->desc; 1352 txsd = txq->sdesc; 1353 } 1354 1355 /* 1356 * when the head of the mbuf chain 1357 * is freed all clusters will be freed 1358 * with it 1359 */ 1360 wrp = (struct work_request_hdr *)txd; 1361 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) | 1362 V_WR_SGLSFLT(1)) | wr_hi; 1363 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS, 1364 sgl_flits + 1)) | 1365 V_WR_GEN(txqs->gen)) | wr_lo; 1366 wr_gen2(txd, txqs->gen); 1367 flits = 1; 1368 } 1369 wrp->wrh_hi |= htonl(F_WR_EOP); 1370 wmb(); 1371 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1372 wr_gen2((struct tx_desc *)wp, ogen); 1373 } 1374 } 1375 1376 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */ 1377 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20) 1378 1379 #define GET_VTAG(cntrl, m) \ 1380 do { \ 1381 if ((m)->m_flags & M_VLANTAG) \ 1382 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ 1383 } while (0) 1384 1385 static int 1386 t3_encap(struct sge_qset *qs, struct mbuf **m) 1387 { 1388 adapter_t *sc; 1389 struct mbuf *m0; 1390 struct sge_txq *txq; 1391 struct txq_state txqs; 1392 struct port_info *pi; 1393 unsigned int ndesc, flits, cntrl, mlen; 1394 int err, nsegs, tso_info = 0; 1395 1396 struct work_request_hdr *wrp; 1397 struct tx_sw_desc *txsd; 1398 struct sg_ent *sgp, *sgl; 1399 uint32_t wr_hi, wr_lo, sgl_flits; 1400 bus_dma_segment_t segs[TX_MAX_SEGS]; 1401 1402 struct tx_desc *txd; 1403 1404 pi = qs->port; 1405 sc = pi->adapter; 1406 txq = &qs->txq[TXQ_ETH]; 1407 txd = &txq->desc[txq->pidx]; 1408 txsd = &txq->sdesc[txq->pidx]; 1409 sgl = txq->txq_sgl; 1410 1411 prefetch(txd); 1412 m0 = *m; 1413 1414 mtx_assert(&qs->lock, MA_OWNED); 1415 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1416 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n")); 1417 1418 if (m0->m_nextpkt == NULL && m0->m_next != NULL && 1419 m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1420 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1421 1422 if (m0->m_nextpkt != NULL) { 1423 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs); 1424 ndesc = 1; 1425 mlen = 0; 1426 } else { 1427 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map, 1428 &m0, segs, &nsegs))) { 1429 if (cxgb_debug) 1430 printf("failed ... err=%d\n", err); 1431 return (err); 1432 } 1433 mlen = m0->m_pkthdr.len; 1434 ndesc = calc_tx_descs(m0, nsegs); 1435 } 1436 txq_prod(txq, ndesc, &txqs); 1437 1438 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs)); 1439 txsd->m = m0; 1440 1441 if (m0->m_nextpkt != NULL) { 1442 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; 1443 int i, fidx; 1444 1445 if (nsegs > 7) 1446 panic("trying to coalesce %d packets in to one WR", nsegs); 1447 txq->txq_coalesced += nsegs; 1448 wrp = (struct work_request_hdr *)txd; 1449 flits = nsegs*2 + 1; 1450 1451 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) { 1452 struct cpl_tx_pkt_batch_entry *cbe; 1453 uint64_t flit; 1454 uint32_t *hflit = (uint32_t *)&flit; 1455 int cflags = m0->m_pkthdr.csum_flags; 1456 1457 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1458 GET_VTAG(cntrl, m0); 1459 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1460 if (__predict_false(!(cflags & CSUM_IP))) 1461 cntrl |= F_TXPKT_IPCSUM_DIS; 1462 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP)))) 1463 cntrl |= F_TXPKT_L4CSUM_DIS; 1464 1465 hflit[0] = htonl(cntrl); 1466 hflit[1] = htonl(segs[i].ds_len | 0x80000000); 1467 flit |= htobe64(1 << 24); 1468 cbe = &cpl_batch->pkt_entry[i]; 1469 cbe->cntrl = hflit[0]; 1470 cbe->len = hflit[1]; 1471 cbe->addr = htobe64(segs[i].ds_addr); 1472 } 1473 1474 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1475 V_WR_SGLSFLT(flits)) | 1476 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1477 wr_lo = htonl(V_WR_LEN(flits) | 1478 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); 1479 set_wr_hdr(wrp, wr_hi, wr_lo); 1480 wmb(); 1481 ETHER_BPF_MTAP(pi->ifp, m0); 1482 wr_gen2(txd, txqs.gen); 1483 check_ring_tx_db(sc, txq); 1484 return (0); 1485 } else if (tso_info) { 1486 int eth_type; 1487 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; 1488 struct ether_header *eh; 1489 struct ip *ip; 1490 struct tcphdr *tcp; 1491 1492 txd->flit[2] = 0; 1493 GET_VTAG(cntrl, m0); 1494 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1495 hdr->cntrl = htonl(cntrl); 1496 hdr->len = htonl(mlen | 0x80000000); 1497 1498 if (__predict_false(mlen < TCPPKTHDRSIZE)) { 1499 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", 1500 m0, mlen, m0->m_pkthdr.tso_segsz, 1501 m0->m_pkthdr.csum_flags, m0->m_flags); 1502 panic("tx tso packet too small"); 1503 } 1504 1505 /* Make sure that ether, ip, tcp headers are all in m0 */ 1506 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1507 m0 = m_pullup(m0, TCPPKTHDRSIZE); 1508 if (__predict_false(m0 == NULL)) { 1509 /* XXX panic probably an overreaction */ 1510 panic("couldn't fit header into mbuf"); 1511 } 1512 } 1513 1514 eh = mtod(m0, struct ether_header *); 1515 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 1516 eth_type = CPL_ETH_II_VLAN; 1517 ip = (struct ip *)((struct ether_vlan_header *)eh + 1); 1518 } else { 1519 eth_type = CPL_ETH_II; 1520 ip = (struct ip *)(eh + 1); 1521 } 1522 tcp = (struct tcphdr *)(ip + 1); 1523 1524 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1525 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1526 V_LSO_TCPHDR_WORDS(tcp->th_off); 1527 hdr->lso_info = htonl(tso_info); 1528 1529 if (__predict_false(mlen <= PIO_LEN)) { 1530 /* 1531 * pkt not undersized but fits in PIO_LEN 1532 * Indicates a TSO bug at the higher levels. 1533 */ 1534 txsd->m = NULL; 1535 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); 1536 flits = (mlen + 7) / 8 + 3; 1537 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1538 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1539 F_WR_SOP | F_WR_EOP | txqs.compl); 1540 wr_lo = htonl(V_WR_LEN(flits) | 1541 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1542 set_wr_hdr(&hdr->wr, wr_hi, wr_lo); 1543 wmb(); 1544 ETHER_BPF_MTAP(pi->ifp, m0); 1545 wr_gen2(txd, txqs.gen); 1546 check_ring_tx_db(sc, txq); 1547 m_freem(m0); 1548 return (0); 1549 } 1550 flits = 3; 1551 } else { 1552 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; 1553 1554 GET_VTAG(cntrl, m0); 1555 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1556 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) 1557 cntrl |= F_TXPKT_IPCSUM_DIS; 1558 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))) 1559 cntrl |= F_TXPKT_L4CSUM_DIS; 1560 cpl->cntrl = htonl(cntrl); 1561 cpl->len = htonl(mlen | 0x80000000); 1562 1563 if (mlen <= PIO_LEN) { 1564 txsd->m = NULL; 1565 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1566 flits = (mlen + 7) / 8 + 2; 1567 1568 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1569 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1570 F_WR_SOP | F_WR_EOP | txqs.compl); 1571 wr_lo = htonl(V_WR_LEN(flits) | 1572 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1573 set_wr_hdr(&cpl->wr, wr_hi, wr_lo); 1574 wmb(); 1575 ETHER_BPF_MTAP(pi->ifp, m0); 1576 wr_gen2(txd, txqs.gen); 1577 check_ring_tx_db(sc, txq); 1578 m_freem(m0); 1579 return (0); 1580 } 1581 flits = 2; 1582 } 1583 wrp = (struct work_request_hdr *)txd; 1584 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1585 make_sgl(sgp, segs, nsegs); 1586 1587 sgl_flits = sgl_len(nsegs); 1588 1589 ETHER_BPF_MTAP(pi->ifp, m0); 1590 1591 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc)); 1592 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1593 wr_lo = htonl(V_WR_TID(txq->token)); 1594 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, 1595 sgl_flits, wr_hi, wr_lo); 1596 check_ring_tx_db(sc, txq); 1597 1598 return (0); 1599 } 1600 1601 void 1602 cxgb_tx_watchdog(void *arg) 1603 { 1604 struct sge_qset *qs = arg; 1605 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1606 1607 if (qs->coalescing != 0 && 1608 (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 1609 TXQ_RING_EMPTY(qs)) 1610 qs->coalescing = 0; 1611 else if (qs->coalescing == 0 && 1612 (txq->in_use >= cxgb_tx_coalesce_enable_start)) 1613 qs->coalescing = 1; 1614 if (TXQ_TRYLOCK(qs)) { 1615 qs->qs_flags |= QS_FLUSHING; 1616 cxgb_start_locked(qs); 1617 qs->qs_flags &= ~QS_FLUSHING; 1618 TXQ_UNLOCK(qs); 1619 } 1620 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) 1621 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog, 1622 qs, txq->txq_watchdog.c_cpu); 1623 } 1624 1625 static void 1626 cxgb_tx_timeout(void *arg) 1627 { 1628 struct sge_qset *qs = arg; 1629 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1630 1631 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3))) 1632 qs->coalescing = 1; 1633 if (TXQ_TRYLOCK(qs)) { 1634 qs->qs_flags |= QS_TIMEOUT; 1635 cxgb_start_locked(qs); 1636 qs->qs_flags &= ~QS_TIMEOUT; 1637 TXQ_UNLOCK(qs); 1638 } 1639 } 1640 1641 static void 1642 cxgb_start_locked(struct sge_qset *qs) 1643 { 1644 struct mbuf *m_head = NULL; 1645 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1646 int in_use_init = txq->in_use; 1647 struct port_info *pi = qs->port; 1648 struct ifnet *ifp = pi->ifp; 1649 1650 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT)) 1651 reclaim_completed_tx(qs, 0, TXQ_ETH); 1652 1653 if (!pi->link_config.link_ok) { 1654 TXQ_RING_FLUSH(qs); 1655 return; 1656 } 1657 TXQ_LOCK_ASSERT(qs); 1658 while ((txq->in_use - in_use_init < TX_START_MAX_DESC) && 1659 !TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) && 1660 pi->link_config.link_ok) { 1661 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1662 1663 if (txq->size - txq->in_use <= TX_MAX_DESC) 1664 break; 1665 1666 if ((m_head = cxgb_dequeue(qs)) == NULL) 1667 break; 1668 /* 1669 * Encapsulation can modify our pointer, and or make it 1670 * NULL on failure. In that event, we can't requeue. 1671 */ 1672 if (t3_encap(qs, &m_head) || m_head == NULL) 1673 break; 1674 1675 m_head = NULL; 1676 } 1677 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && 1678 pi->link_config.link_ok) 1679 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1680 qs, txq->txq_timer.c_cpu); 1681 if (m_head != NULL) 1682 m_freem(m_head); 1683 } 1684 1685 static int 1686 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m) 1687 { 1688 struct port_info *pi = qs->port; 1689 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1690 struct buf_ring *br = txq->txq_mr; 1691 int error, avail; 1692 1693 avail = txq->size - txq->in_use; 1694 TXQ_LOCK_ASSERT(qs); 1695 1696 /* 1697 * We can only do a direct transmit if the following are true: 1698 * - we aren't coalescing (ring < 3/4 full) 1699 * - the link is up -- checked in caller 1700 * - there are no packets enqueued already 1701 * - there is space in hardware transmit queue 1702 */ 1703 if (check_pkt_coalesce(qs) == 0 && 1704 !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) { 1705 if (t3_encap(qs, &m)) { 1706 if (m != NULL && 1707 (error = drbr_enqueue(ifp, br, m)) != 0) 1708 return (error); 1709 } else { 1710 /* 1711 * We've bypassed the buf ring so we need to update 1712 * the stats directly 1713 */ 1714 txq->txq_direct_packets++; 1715 txq->txq_direct_bytes += m->m_pkthdr.len; 1716 } 1717 } else if ((error = drbr_enqueue(ifp, br, m)) != 0) 1718 return (error); 1719 1720 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1721 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && 1722 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) 1723 cxgb_start_locked(qs); 1724 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) 1725 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1726 qs, txq->txq_timer.c_cpu); 1727 return (0); 1728 } 1729 1730 int 1731 cxgb_transmit(struct ifnet *ifp, struct mbuf *m) 1732 { 1733 struct sge_qset *qs; 1734 struct port_info *pi = ifp->if_softc; 1735 int error, qidx = pi->first_qset; 1736 1737 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 1738 ||(!pi->link_config.link_ok)) { 1739 m_freem(m); 1740 return (0); 1741 } 1742 1743 if (m->m_flags & M_FLOWID) 1744 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset; 1745 1746 qs = &pi->adapter->sge.qs[qidx]; 1747 1748 if (TXQ_TRYLOCK(qs)) { 1749 /* XXX running */ 1750 error = cxgb_transmit_locked(ifp, qs, m); 1751 TXQ_UNLOCK(qs); 1752 } else 1753 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); 1754 return (error); 1755 } 1756 void 1757 cxgb_start(struct ifnet *ifp) 1758 { 1759 struct port_info *pi = ifp->if_softc; 1760 struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset]; 1761 1762 if (!pi->link_config.link_ok) 1763 return; 1764 1765 TXQ_LOCK(qs); 1766 cxgb_start_locked(qs); 1767 TXQ_UNLOCK(qs); 1768 } 1769 1770 void 1771 cxgb_qflush(struct ifnet *ifp) 1772 { 1773 /* 1774 * flush any enqueued mbufs in the buf_rings 1775 * and in the transmit queues 1776 * no-op for now 1777 */ 1778 return; 1779 } 1780 1781 /** 1782 * write_imm - write a packet into a Tx descriptor as immediate data 1783 * @d: the Tx descriptor to write 1784 * @m: the packet 1785 * @len: the length of packet data to write as immediate data 1786 * @gen: the generation bit value to write 1787 * 1788 * Writes a packet as immediate data into a Tx descriptor. The packet 1789 * contains a work request at its beginning. We must write the packet 1790 * carefully so the SGE doesn't read accidentally before it's written in 1791 * its entirety. 1792 */ 1793 static __inline void 1794 write_imm(struct tx_desc *d, struct mbuf *m, 1795 unsigned int len, unsigned int gen) 1796 { 1797 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1798 struct work_request_hdr *to = (struct work_request_hdr *)d; 1799 uint32_t wr_hi, wr_lo; 1800 1801 if (len > WR_LEN) 1802 panic("len too big %d\n", len); 1803 if (len < sizeof(*from)) 1804 panic("len too small %d", len); 1805 1806 memcpy(&to[1], &from[1], len - sizeof(*from)); 1807 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | 1808 V_WR_BCNTLFLT(len & 7)); 1809 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | 1810 V_WR_LEN((len + 7) / 8)); 1811 set_wr_hdr(to, wr_hi, wr_lo); 1812 wmb(); 1813 wr_gen2(d, gen); 1814 1815 /* 1816 * This check is a hack we should really fix the logic so 1817 * that this can't happen 1818 */ 1819 if (m->m_type != MT_DONTFREE) 1820 m_freem(m); 1821 1822 } 1823 1824 /** 1825 * check_desc_avail - check descriptor availability on a send queue 1826 * @adap: the adapter 1827 * @q: the TX queue 1828 * @m: the packet needing the descriptors 1829 * @ndesc: the number of Tx descriptors needed 1830 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1831 * 1832 * Checks if the requested number of Tx descriptors is available on an 1833 * SGE send queue. If the queue is already suspended or not enough 1834 * descriptors are available the packet is queued for later transmission. 1835 * Must be called with the Tx queue locked. 1836 * 1837 * Returns 0 if enough descriptors are available, 1 if there aren't 1838 * enough descriptors and the packet has been queued, and 2 if the caller 1839 * needs to retry because there weren't enough descriptors at the 1840 * beginning of the call but some freed up in the mean time. 1841 */ 1842 static __inline int 1843 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1844 struct mbuf *m, unsigned int ndesc, 1845 unsigned int qid) 1846 { 1847 /* 1848 * XXX We currently only use this for checking the control queue 1849 * the control queue is only used for binding qsets which happens 1850 * at init time so we are guaranteed enough descriptors 1851 */ 1852 if (__predict_false(!mbufq_empty(&q->sendq))) { 1853 addq_exit: mbufq_tail(&q->sendq, m); 1854 return 1; 1855 } 1856 if (__predict_false(q->size - q->in_use < ndesc)) { 1857 1858 struct sge_qset *qs = txq_to_qset(q, qid); 1859 1860 setbit(&qs->txq_stopped, qid); 1861 if (should_restart_tx(q) && 1862 test_and_clear_bit(qid, &qs->txq_stopped)) 1863 return 2; 1864 1865 q->stops++; 1866 goto addq_exit; 1867 } 1868 return 0; 1869 } 1870 1871 1872 /** 1873 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1874 * @q: the SGE control Tx queue 1875 * 1876 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1877 * that send only immediate data (presently just the control queues) and 1878 * thus do not have any mbufs 1879 */ 1880 static __inline void 1881 reclaim_completed_tx_imm(struct sge_txq *q) 1882 { 1883 unsigned int reclaim = q->processed - q->cleaned; 1884 1885 q->in_use -= reclaim; 1886 q->cleaned += reclaim; 1887 } 1888 1889 static __inline int 1890 immediate(const struct mbuf *m) 1891 { 1892 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1893 } 1894 1895 /** 1896 * ctrl_xmit - send a packet through an SGE control Tx queue 1897 * @adap: the adapter 1898 * @q: the control queue 1899 * @m: the packet 1900 * 1901 * Send a packet through an SGE control Tx queue. Packets sent through 1902 * a control queue must fit entirely as immediate data in a single Tx 1903 * descriptor and have no page fragments. 1904 */ 1905 static int 1906 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 1907 { 1908 int ret; 1909 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1910 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1911 1912 if (__predict_false(!immediate(m))) { 1913 m_freem(m); 1914 return 0; 1915 } 1916 1917 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP); 1918 wrp->wrh_lo = htonl(V_WR_TID(q->token)); 1919 1920 TXQ_LOCK(qs); 1921 again: reclaim_completed_tx_imm(q); 1922 1923 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1924 if (__predict_false(ret)) { 1925 if (ret == 1) { 1926 TXQ_UNLOCK(qs); 1927 return (ENOSPC); 1928 } 1929 goto again; 1930 } 1931 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1932 1933 q->in_use++; 1934 if (++q->pidx >= q->size) { 1935 q->pidx = 0; 1936 q->gen ^= 1; 1937 } 1938 TXQ_UNLOCK(qs); 1939 wmb(); 1940 t3_write_reg(adap, A_SG_KDOORBELL, 1941 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1942 return (0); 1943 } 1944 1945 1946 /** 1947 * restart_ctrlq - restart a suspended control queue 1948 * @qs: the queue set cotaining the control queue 1949 * 1950 * Resumes transmission on a suspended Tx control queue. 1951 */ 1952 static void 1953 restart_ctrlq(void *data, int npending) 1954 { 1955 struct mbuf *m; 1956 struct sge_qset *qs = (struct sge_qset *)data; 1957 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1958 adapter_t *adap = qs->port->adapter; 1959 1960 TXQ_LOCK(qs); 1961 again: reclaim_completed_tx_imm(q); 1962 1963 while (q->in_use < q->size && 1964 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1965 1966 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1967 1968 if (++q->pidx >= q->size) { 1969 q->pidx = 0; 1970 q->gen ^= 1; 1971 } 1972 q->in_use++; 1973 } 1974 if (!mbufq_empty(&q->sendq)) { 1975 setbit(&qs->txq_stopped, TXQ_CTRL); 1976 1977 if (should_restart_tx(q) && 1978 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1979 goto again; 1980 q->stops++; 1981 } 1982 TXQ_UNLOCK(qs); 1983 t3_write_reg(adap, A_SG_KDOORBELL, 1984 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1985 } 1986 1987 1988 /* 1989 * Send a management message through control queue 0 1990 */ 1991 int 1992 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 1993 { 1994 return ctrl_xmit(adap, &adap->sge.qs[0], m); 1995 } 1996 1997 /** 1998 * free_qset - free the resources of an SGE queue set 1999 * @sc: the controller owning the queue set 2000 * @q: the queue set 2001 * 2002 * Release the HW and SW resources associated with an SGE queue set, such 2003 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 2004 * queue set must be quiesced prior to calling this. 2005 */ 2006 static void 2007 t3_free_qset(adapter_t *sc, struct sge_qset *q) 2008 { 2009 int i; 2010 2011 reclaim_completed_tx(q, 0, TXQ_ETH); 2012 if (q->txq[TXQ_ETH].txq_mr != NULL) 2013 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF); 2014 if (q->txq[TXQ_ETH].txq_ifq != NULL) { 2015 ifq_delete(q->txq[TXQ_ETH].txq_ifq); 2016 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF); 2017 } 2018 2019 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2020 if (q->fl[i].desc) { 2021 mtx_lock_spin(&sc->sge.reg_lock); 2022 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 2023 mtx_unlock_spin(&sc->sge.reg_lock); 2024 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 2025 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 2026 q->fl[i].desc_map); 2027 bus_dma_tag_destroy(q->fl[i].desc_tag); 2028 bus_dma_tag_destroy(q->fl[i].entry_tag); 2029 } 2030 if (q->fl[i].sdesc) { 2031 free_rx_bufs(sc, &q->fl[i]); 2032 free(q->fl[i].sdesc, M_DEVBUF); 2033 } 2034 } 2035 2036 mtx_unlock(&q->lock); 2037 MTX_DESTROY(&q->lock); 2038 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2039 if (q->txq[i].desc) { 2040 mtx_lock_spin(&sc->sge.reg_lock); 2041 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 2042 mtx_unlock_spin(&sc->sge.reg_lock); 2043 bus_dmamap_unload(q->txq[i].desc_tag, 2044 q->txq[i].desc_map); 2045 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 2046 q->txq[i].desc_map); 2047 bus_dma_tag_destroy(q->txq[i].desc_tag); 2048 bus_dma_tag_destroy(q->txq[i].entry_tag); 2049 } 2050 if (q->txq[i].sdesc) { 2051 free(q->txq[i].sdesc, M_DEVBUF); 2052 } 2053 } 2054 2055 if (q->rspq.desc) { 2056 mtx_lock_spin(&sc->sge.reg_lock); 2057 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 2058 mtx_unlock_spin(&sc->sge.reg_lock); 2059 2060 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 2061 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 2062 q->rspq.desc_map); 2063 bus_dma_tag_destroy(q->rspq.desc_tag); 2064 MTX_DESTROY(&q->rspq.lock); 2065 } 2066 2067 #ifdef INET 2068 tcp_lro_free(&q->lro.ctrl); 2069 #endif 2070 2071 bzero(q, sizeof(*q)); 2072 } 2073 2074 /** 2075 * t3_free_sge_resources - free SGE resources 2076 * @sc: the adapter softc 2077 * 2078 * Frees resources used by the SGE queue sets. 2079 */ 2080 void 2081 t3_free_sge_resources(adapter_t *sc) 2082 { 2083 int i, nqsets; 2084 2085 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2086 nqsets += sc->port[i].nqsets; 2087 2088 for (i = 0; i < nqsets; ++i) { 2089 TXQ_LOCK(&sc->sge.qs[i]); 2090 t3_free_qset(sc, &sc->sge.qs[i]); 2091 } 2092 2093 } 2094 2095 /** 2096 * t3_sge_start - enable SGE 2097 * @sc: the controller softc 2098 * 2099 * Enables the SGE for DMAs. This is the last step in starting packet 2100 * transfers. 2101 */ 2102 void 2103 t3_sge_start(adapter_t *sc) 2104 { 2105 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 2106 } 2107 2108 /** 2109 * t3_sge_stop - disable SGE operation 2110 * @sc: the adapter 2111 * 2112 * Disables the DMA engine. This can be called in emeregencies (e.g., 2113 * from error interrupts) or from normal process context. In the latter 2114 * case it also disables any pending queue restart tasklets. Note that 2115 * if it is called in interrupt context it cannot disable the restart 2116 * tasklets as it cannot wait, however the tasklets will have no effect 2117 * since the doorbells are disabled and the driver will call this again 2118 * later from process context, at which time the tasklets will be stopped 2119 * if they are still running. 2120 */ 2121 void 2122 t3_sge_stop(adapter_t *sc) 2123 { 2124 int i, nqsets; 2125 2126 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 2127 2128 if (sc->tq == NULL) 2129 return; 2130 2131 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2132 nqsets += sc->port[i].nqsets; 2133 #ifdef notyet 2134 /* 2135 * 2136 * XXX 2137 */ 2138 for (i = 0; i < nqsets; ++i) { 2139 struct sge_qset *qs = &sc->sge.qs[i]; 2140 2141 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2142 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2143 } 2144 #endif 2145 } 2146 2147 /** 2148 * t3_free_tx_desc - reclaims Tx descriptors and their buffers 2149 * @adapter: the adapter 2150 * @q: the Tx queue to reclaim descriptors from 2151 * @reclaimable: the number of descriptors to reclaim 2152 * @m_vec_size: maximum number of buffers to reclaim 2153 * @desc_reclaimed: returns the number of descriptors reclaimed 2154 * 2155 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 2156 * Tx buffers. Called with the Tx queue lock held. 2157 * 2158 * Returns number of buffers of reclaimed 2159 */ 2160 void 2161 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue) 2162 { 2163 struct tx_sw_desc *txsd; 2164 unsigned int cidx, mask; 2165 struct sge_txq *q = &qs->txq[queue]; 2166 2167 #ifdef T3_TRACE 2168 T3_TRACE2(sc->tb[q->cntxt_id & 7], 2169 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); 2170 #endif 2171 cidx = q->cidx; 2172 mask = q->size - 1; 2173 txsd = &q->sdesc[cidx]; 2174 2175 mtx_assert(&qs->lock, MA_OWNED); 2176 while (reclaimable--) { 2177 prefetch(q->sdesc[(cidx + 1) & mask].m); 2178 prefetch(q->sdesc[(cidx + 2) & mask].m); 2179 2180 if (txsd->m != NULL) { 2181 if (txsd->flags & TX_SW_DESC_MAPPED) { 2182 bus_dmamap_unload(q->entry_tag, txsd->map); 2183 txsd->flags &= ~TX_SW_DESC_MAPPED; 2184 } 2185 m_freem_list(txsd->m); 2186 txsd->m = NULL; 2187 } else 2188 q->txq_skipped++; 2189 2190 ++txsd; 2191 if (++cidx == q->size) { 2192 cidx = 0; 2193 txsd = q->sdesc; 2194 } 2195 } 2196 q->cidx = cidx; 2197 2198 } 2199 2200 /** 2201 * is_new_response - check if a response is newly written 2202 * @r: the response descriptor 2203 * @q: the response queue 2204 * 2205 * Returns true if a response descriptor contains a yet unprocessed 2206 * response. 2207 */ 2208 static __inline int 2209 is_new_response(const struct rsp_desc *r, 2210 const struct sge_rspq *q) 2211 { 2212 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 2213 } 2214 2215 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 2216 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 2217 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 2218 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 2219 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 2220 2221 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 2222 #define NOMEM_INTR_DELAY 2500 2223 2224 /** 2225 * write_ofld_wr - write an offload work request 2226 * @adap: the adapter 2227 * @m: the packet to send 2228 * @q: the Tx queue 2229 * @pidx: index of the first Tx descriptor to write 2230 * @gen: the generation value to use 2231 * @ndesc: number of descriptors the packet will occupy 2232 * 2233 * Write an offload work request to send the supplied packet. The packet 2234 * data already carry the work request with most fields populated. 2235 */ 2236 static void 2237 write_ofld_wr(adapter_t *adap, struct mbuf *m, 2238 struct sge_txq *q, unsigned int pidx, 2239 unsigned int gen, unsigned int ndesc, 2240 bus_dma_segment_t *segs, unsigned int nsegs) 2241 { 2242 unsigned int sgl_flits, flits; 2243 struct work_request_hdr *from; 2244 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 2245 struct tx_desc *d = &q->desc[pidx]; 2246 struct txq_state txqs; 2247 2248 if (immediate(m) && nsegs == 0) { 2249 write_imm(d, m, m->m_len, gen); 2250 return; 2251 } 2252 2253 /* Only TX_DATA builds SGLs */ 2254 from = mtod(m, struct work_request_hdr *); 2255 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from)); 2256 2257 flits = m->m_len / 8; 2258 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 2259 2260 make_sgl(sgp, segs, nsegs); 2261 sgl_flits = sgl_len(nsegs); 2262 2263 txqs.gen = gen; 2264 txqs.pidx = pidx; 2265 txqs.compl = 0; 2266 2267 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 2268 from->wrh_hi, from->wrh_lo); 2269 } 2270 2271 /** 2272 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 2273 * @m: the packet 2274 * 2275 * Returns the number of Tx descriptors needed for the given offload 2276 * packet. These packets are already fully constructed. 2277 */ 2278 static __inline unsigned int 2279 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 2280 { 2281 unsigned int flits, cnt = 0; 2282 int ndescs; 2283 2284 if (m->m_len <= WR_LEN && nsegs == 0) 2285 return (1); /* packet fits as immediate data */ 2286 2287 /* 2288 * This needs to be re-visited for TOE 2289 */ 2290 2291 cnt = nsegs; 2292 2293 /* headers */ 2294 flits = m->m_len / 8; 2295 2296 ndescs = flits_to_desc(flits + sgl_len(cnt)); 2297 2298 return (ndescs); 2299 } 2300 2301 /** 2302 * ofld_xmit - send a packet through an offload queue 2303 * @adap: the adapter 2304 * @q: the Tx offload queue 2305 * @m: the packet 2306 * 2307 * Send an offload packet through an SGE offload queue. 2308 */ 2309 static int 2310 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 2311 { 2312 int ret, nsegs; 2313 unsigned int ndesc; 2314 unsigned int pidx, gen; 2315 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2316 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs; 2317 struct tx_sw_desc *stx; 2318 2319 nsegs = m_get_sgllen(m); 2320 vsegs = m_get_sgl(m); 2321 ndesc = calc_tx_descs_ofld(m, nsegs); 2322 busdma_map_sgl(vsegs, segs, nsegs); 2323 2324 stx = &q->sdesc[q->pidx]; 2325 2326 TXQ_LOCK(qs); 2327 again: reclaim_completed_tx(qs, 16, TXQ_OFLD); 2328 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 2329 if (__predict_false(ret)) { 2330 if (ret == 1) { 2331 printf("no ofld desc avail\n"); 2332 2333 m_set_priority(m, ndesc); /* save for restart */ 2334 TXQ_UNLOCK(qs); 2335 return (EINTR); 2336 } 2337 goto again; 2338 } 2339 2340 gen = q->gen; 2341 q->in_use += ndesc; 2342 pidx = q->pidx; 2343 q->pidx += ndesc; 2344 if (q->pidx >= q->size) { 2345 q->pidx -= q->size; 2346 q->gen ^= 1; 2347 } 2348 #ifdef T3_TRACE 2349 T3_TRACE5(adap->tb[q->cntxt_id & 7], 2350 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 2351 ndesc, pidx, skb->len, skb->len - skb->data_len, 2352 skb_shinfo(skb)->nr_frags); 2353 #endif 2354 TXQ_UNLOCK(qs); 2355 2356 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2357 check_ring_tx_db(adap, q); 2358 return (0); 2359 } 2360 2361 /** 2362 * restart_offloadq - restart a suspended offload queue 2363 * @qs: the queue set cotaining the offload queue 2364 * 2365 * Resumes transmission on a suspended Tx offload queue. 2366 */ 2367 static void 2368 restart_offloadq(void *data, int npending) 2369 { 2370 struct mbuf *m; 2371 struct sge_qset *qs = data; 2372 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2373 adapter_t *adap = qs->port->adapter; 2374 bus_dma_segment_t segs[TX_MAX_SEGS]; 2375 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 2376 int nsegs, cleaned; 2377 2378 TXQ_LOCK(qs); 2379 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD); 2380 2381 while ((m = mbufq_peek(&q->sendq)) != NULL) { 2382 unsigned int gen, pidx; 2383 unsigned int ndesc = m_get_priority(m); 2384 2385 if (__predict_false(q->size - q->in_use < ndesc)) { 2386 setbit(&qs->txq_stopped, TXQ_OFLD); 2387 if (should_restart_tx(q) && 2388 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 2389 goto again; 2390 q->stops++; 2391 break; 2392 } 2393 2394 gen = q->gen; 2395 q->in_use += ndesc; 2396 pidx = q->pidx; 2397 q->pidx += ndesc; 2398 if (q->pidx >= q->size) { 2399 q->pidx -= q->size; 2400 q->gen ^= 1; 2401 } 2402 2403 (void)mbufq_dequeue(&q->sendq); 2404 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 2405 TXQ_UNLOCK(qs); 2406 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2407 TXQ_LOCK(qs); 2408 } 2409 #if USE_GTS 2410 set_bit(TXQ_RUNNING, &q->flags); 2411 set_bit(TXQ_LAST_PKT_DB, &q->flags); 2412 #endif 2413 TXQ_UNLOCK(qs); 2414 wmb(); 2415 t3_write_reg(adap, A_SG_KDOORBELL, 2416 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2417 } 2418 2419 /** 2420 * queue_set - return the queue set a packet should use 2421 * @m: the packet 2422 * 2423 * Maps a packet to the SGE queue set it should use. The desired queue 2424 * set is carried in bits 1-3 in the packet's priority. 2425 */ 2426 static __inline int 2427 queue_set(const struct mbuf *m) 2428 { 2429 return m_get_priority(m) >> 1; 2430 } 2431 2432 /** 2433 * is_ctrl_pkt - return whether an offload packet is a control packet 2434 * @m: the packet 2435 * 2436 * Determines whether an offload packet should use an OFLD or a CTRL 2437 * Tx queue. This is indicated by bit 0 in the packet's priority. 2438 */ 2439 static __inline int 2440 is_ctrl_pkt(const struct mbuf *m) 2441 { 2442 return m_get_priority(m) & 1; 2443 } 2444 2445 /** 2446 * t3_offload_tx - send an offload packet 2447 * @tdev: the offload device to send to 2448 * @m: the packet 2449 * 2450 * Sends an offload packet. We use the packet priority to select the 2451 * appropriate Tx queue as follows: bit 0 indicates whether the packet 2452 * should be sent as regular or control, bits 1-3 select the queue set. 2453 */ 2454 int 2455 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m) 2456 { 2457 adapter_t *adap = tdev2adap(tdev); 2458 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 2459 2460 if (__predict_false(is_ctrl_pkt(m))) 2461 return ctrl_xmit(adap, qs, m); 2462 2463 return ofld_xmit(adap, qs, m); 2464 } 2465 2466 /** 2467 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 2468 * @tdev: the offload device that will be receiving the packets 2469 * @q: the SGE response queue that assembled the bundle 2470 * @m: the partial bundle 2471 * @n: the number of packets in the bundle 2472 * 2473 * Delivers a (partial) bundle of Rx offload packets to an offload device. 2474 */ 2475 static __inline void 2476 deliver_partial_bundle(struct t3cdev *tdev, 2477 struct sge_rspq *q, 2478 struct mbuf *mbufs[], int n) 2479 { 2480 if (n) { 2481 q->offload_bundles++; 2482 cxgb_ofld_recv(tdev, mbufs, n); 2483 } 2484 } 2485 2486 static __inline int 2487 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 2488 struct mbuf *m, struct mbuf *rx_gather[], 2489 unsigned int gather_idx) 2490 { 2491 2492 rq->offload_pkts++; 2493 m->m_pkthdr.header = mtod(m, void *); 2494 rx_gather[gather_idx++] = m; 2495 if (gather_idx == RX_BUNDLE_SIZE) { 2496 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 2497 gather_idx = 0; 2498 rq->offload_bundles++; 2499 } 2500 return (gather_idx); 2501 } 2502 2503 static void 2504 restart_tx(struct sge_qset *qs) 2505 { 2506 struct adapter *sc = qs->port->adapter; 2507 2508 2509 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2510 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2511 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2512 qs->txq[TXQ_OFLD].restarts++; 2513 DPRINTF("restarting TXQ_OFLD\n"); 2514 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2515 } 2516 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n", 2517 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]), 2518 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned, 2519 qs->txq[TXQ_CTRL].in_use); 2520 2521 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2522 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2523 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2524 qs->txq[TXQ_CTRL].restarts++; 2525 DPRINTF("restarting TXQ_CTRL\n"); 2526 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2527 } 2528 } 2529 2530 /** 2531 * t3_sge_alloc_qset - initialize an SGE queue set 2532 * @sc: the controller softc 2533 * @id: the queue set id 2534 * @nports: how many Ethernet ports will be using this queue set 2535 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2536 * @p: configuration parameters for this queue set 2537 * @ntxq: number of Tx queues for the queue set 2538 * @pi: port info for queue set 2539 * 2540 * Allocate resources and initialize an SGE queue set. A queue set 2541 * comprises a response queue, two Rx free-buffer queues, and up to 3 2542 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2543 * queue, offload queue, and control queue. 2544 */ 2545 int 2546 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2547 const struct qset_params *p, int ntxq, struct port_info *pi) 2548 { 2549 struct sge_qset *q = &sc->sge.qs[id]; 2550 int i, ret = 0; 2551 2552 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF); 2553 q->port = pi; 2554 2555 if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, 2556 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) { 2557 device_printf(sc->dev, "failed to allocate mbuf ring\n"); 2558 goto err; 2559 } 2560 if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF, 2561 M_NOWAIT | M_ZERO)) == NULL) { 2562 device_printf(sc->dev, "failed to allocate ifq\n"); 2563 goto err; 2564 } 2565 ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp); 2566 callout_init(&q->txq[TXQ_ETH].txq_timer, 1); 2567 callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1); 2568 q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus; 2569 q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus; 2570 2571 init_qset_cntxt(q, id); 2572 q->idx = id; 2573 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2574 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2575 &q->fl[0].desc, &q->fl[0].sdesc, 2576 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2577 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2578 printf("error %d from alloc ring fl0\n", ret); 2579 goto err; 2580 } 2581 2582 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2583 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2584 &q->fl[1].desc, &q->fl[1].sdesc, 2585 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2586 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2587 printf("error %d from alloc ring fl1\n", ret); 2588 goto err; 2589 } 2590 2591 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2592 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2593 &q->rspq.desc_tag, &q->rspq.desc_map, 2594 NULL, NULL)) != 0) { 2595 printf("error %d from alloc ring rspq\n", ret); 2596 goto err; 2597 } 2598 2599 for (i = 0; i < ntxq; ++i) { 2600 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2601 2602 if ((ret = alloc_ring(sc, p->txq_size[i], 2603 sizeof(struct tx_desc), sz, 2604 &q->txq[i].phys_addr, &q->txq[i].desc, 2605 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2606 &q->txq[i].desc_map, 2607 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2608 printf("error %d from alloc ring tx %i\n", ret, i); 2609 goto err; 2610 } 2611 mbufq_init(&q->txq[i].sendq); 2612 q->txq[i].gen = 1; 2613 q->txq[i].size = p->txq_size[i]; 2614 } 2615 2616 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2617 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2618 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2619 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2620 2621 q->fl[0].gen = q->fl[1].gen = 1; 2622 q->fl[0].size = p->fl_size; 2623 q->fl[1].size = p->jumbo_size; 2624 2625 q->rspq.gen = 1; 2626 q->rspq.cidx = 0; 2627 q->rspq.size = p->rspq_size; 2628 2629 q->txq[TXQ_ETH].stop_thres = nports * 2630 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2631 2632 q->fl[0].buf_size = MCLBYTES; 2633 q->fl[0].zone = zone_pack; 2634 q->fl[0].type = EXT_PACKET; 2635 2636 if (p->jumbo_buf_size == MJUM16BYTES) { 2637 q->fl[1].zone = zone_jumbo16; 2638 q->fl[1].type = EXT_JUMBO16; 2639 } else if (p->jumbo_buf_size == MJUM9BYTES) { 2640 q->fl[1].zone = zone_jumbo9; 2641 q->fl[1].type = EXT_JUMBO9; 2642 } else if (p->jumbo_buf_size == MJUMPAGESIZE) { 2643 q->fl[1].zone = zone_jumbop; 2644 q->fl[1].type = EXT_JUMBOP; 2645 } else { 2646 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size)); 2647 ret = EDOOFUS; 2648 goto err; 2649 } 2650 q->fl[1].buf_size = p->jumbo_buf_size; 2651 2652 /* Allocate and setup the lro_ctrl structure */ 2653 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); 2654 #ifdef INET 2655 ret = tcp_lro_init(&q->lro.ctrl); 2656 if (ret) { 2657 printf("error %d from tcp_lro_init\n", ret); 2658 goto err; 2659 } 2660 #endif 2661 q->lro.ctrl.ifp = pi->ifp; 2662 2663 mtx_lock_spin(&sc->sge.reg_lock); 2664 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2665 q->rspq.phys_addr, q->rspq.size, 2666 q->fl[0].buf_size, 1, 0); 2667 if (ret) { 2668 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2669 goto err_unlock; 2670 } 2671 2672 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2673 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2674 q->fl[i].phys_addr, q->fl[i].size, 2675 q->fl[i].buf_size, p->cong_thres, 1, 2676 0); 2677 if (ret) { 2678 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2679 goto err_unlock; 2680 } 2681 } 2682 2683 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2684 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2685 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2686 1, 0); 2687 if (ret) { 2688 printf("error %d from t3_sge_init_ecntxt\n", ret); 2689 goto err_unlock; 2690 } 2691 2692 if (ntxq > 1) { 2693 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2694 USE_GTS, SGE_CNTXT_OFLD, id, 2695 q->txq[TXQ_OFLD].phys_addr, 2696 q->txq[TXQ_OFLD].size, 0, 1, 0); 2697 if (ret) { 2698 printf("error %d from t3_sge_init_ecntxt\n", ret); 2699 goto err_unlock; 2700 } 2701 } 2702 2703 if (ntxq > 2) { 2704 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2705 SGE_CNTXT_CTRL, id, 2706 q->txq[TXQ_CTRL].phys_addr, 2707 q->txq[TXQ_CTRL].size, 2708 q->txq[TXQ_CTRL].token, 1, 0); 2709 if (ret) { 2710 printf("error %d from t3_sge_init_ecntxt\n", ret); 2711 goto err_unlock; 2712 } 2713 } 2714 2715 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2716 device_get_unit(sc->dev), irq_vec_idx); 2717 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2718 2719 mtx_unlock_spin(&sc->sge.reg_lock); 2720 t3_update_qset_coalesce(q, p); 2721 q->port = pi; 2722 2723 refill_fl(sc, &q->fl[0], q->fl[0].size); 2724 refill_fl(sc, &q->fl[1], q->fl[1].size); 2725 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2726 2727 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2728 V_NEWTIMER(q->rspq.holdoff_tmr)); 2729 2730 return (0); 2731 2732 err_unlock: 2733 mtx_unlock_spin(&sc->sge.reg_lock); 2734 err: 2735 TXQ_LOCK(q); 2736 t3_free_qset(sc, q); 2737 2738 return (ret); 2739 } 2740 2741 /* 2742 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with 2743 * ethernet data. Hardware assistance with various checksums and any vlan tag 2744 * will also be taken into account here. 2745 */ 2746 void 2747 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2748 { 2749 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2750 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2751 struct ifnet *ifp = pi->ifp; 2752 2753 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2754 2755 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2756 cpl->csum_valid && cpl->csum == 0xffff) { 2757 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2758 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2759 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2760 m->m_pkthdr.csum_data = 0xffff; 2761 } 2762 2763 if (cpl->vlan_valid) { 2764 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2765 m->m_flags |= M_VLANTAG; 2766 } 2767 2768 m->m_pkthdr.rcvif = ifp; 2769 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2770 /* 2771 * adjust after conversion to mbuf chain 2772 */ 2773 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); 2774 m->m_len -= (sizeof(*cpl) + ethpad); 2775 m->m_data += (sizeof(*cpl) + ethpad); 2776 } 2777 2778 /** 2779 * get_packet - return the next ingress packet buffer from a free list 2780 * @adap: the adapter that received the packet 2781 * @drop_thres: # of remaining buffers before we start dropping packets 2782 * @qs: the qset that the SGE free list holding the packet belongs to 2783 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2784 * @r: response descriptor 2785 * 2786 * Get the next packet from a free list and complete setup of the 2787 * sk_buff. If the packet is small we make a copy and recycle the 2788 * original buffer, otherwise we use the original buffer itself. If a 2789 * positive drop threshold is supplied packets are dropped and their 2790 * buffers recycled if (a) the number of remaining buffers is under the 2791 * threshold and the packet is too big to copy, or (b) the packet should 2792 * be copied but there is no memory for the copy. 2793 */ 2794 static int 2795 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2796 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 2797 { 2798 2799 unsigned int len_cq = ntohl(r->len_cq); 2800 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2801 int mask, cidx = fl->cidx; 2802 struct rx_sw_desc *sd = &fl->sdesc[cidx]; 2803 uint32_t len = G_RSPD_LEN(len_cq); 2804 uint32_t flags = M_EXT; 2805 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags)); 2806 caddr_t cl; 2807 struct mbuf *m; 2808 int ret = 0; 2809 2810 mask = fl->size - 1; 2811 prefetch(fl->sdesc[(cidx + 1) & mask].m); 2812 prefetch(fl->sdesc[(cidx + 2) & mask].m); 2813 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); 2814 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 2815 2816 fl->credits--; 2817 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2818 2819 if (recycle_enable && len <= SGE_RX_COPY_THRES && 2820 sopeop == RSPQ_SOP_EOP) { 2821 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2822 goto skip_recycle; 2823 cl = mtod(m, void *); 2824 memcpy(cl, sd->rxsd_cl, len); 2825 recycle_rx_buf(adap, fl, fl->cidx); 2826 m->m_pkthdr.len = m->m_len = len; 2827 m->m_flags = 0; 2828 mh->mh_head = mh->mh_tail = m; 2829 ret = 1; 2830 goto done; 2831 } else { 2832 skip_recycle: 2833 bus_dmamap_unload(fl->entry_tag, sd->map); 2834 cl = sd->rxsd_cl; 2835 m = sd->m; 2836 2837 if ((sopeop == RSPQ_SOP_EOP) || 2838 (sopeop == RSPQ_SOP)) 2839 flags |= M_PKTHDR; 2840 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags); 2841 if (fl->zone == zone_pack) { 2842 /* 2843 * restore clobbered data pointer 2844 */ 2845 m->m_data = m->m_ext.ext_buf; 2846 } else { 2847 m_cljset(m, cl, fl->type); 2848 } 2849 m->m_len = len; 2850 } 2851 switch(sopeop) { 2852 case RSPQ_SOP_EOP: 2853 ret = 1; 2854 /* FALLTHROUGH */ 2855 case RSPQ_SOP: 2856 mh->mh_head = mh->mh_tail = m; 2857 m->m_pkthdr.len = len; 2858 break; 2859 case RSPQ_EOP: 2860 ret = 1; 2861 /* FALLTHROUGH */ 2862 case RSPQ_NSOP_NEOP: 2863 if (mh->mh_tail == NULL) { 2864 log(LOG_ERR, "discarding intermediate descriptor entry\n"); 2865 m_freem(m); 2866 break; 2867 } 2868 mh->mh_tail->m_next = m; 2869 mh->mh_tail = m; 2870 mh->mh_head->m_pkthdr.len += len; 2871 break; 2872 } 2873 if (cxgb_debug) 2874 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len); 2875 done: 2876 if (++fl->cidx == fl->size) 2877 fl->cidx = 0; 2878 2879 return (ret); 2880 } 2881 2882 /** 2883 * handle_rsp_cntrl_info - handles control information in a response 2884 * @qs: the queue set corresponding to the response 2885 * @flags: the response control flags 2886 * 2887 * Handles the control information of an SGE response, such as GTS 2888 * indications and completion credits for the queue set's Tx queues. 2889 * HW coalesces credits, we don't do any extra SW coalescing. 2890 */ 2891 static __inline void 2892 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2893 { 2894 unsigned int credits; 2895 2896 #if USE_GTS 2897 if (flags & F_RSPD_TXQ0_GTS) 2898 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2899 #endif 2900 credits = G_RSPD_TXQ0_CR(flags); 2901 if (credits) 2902 qs->txq[TXQ_ETH].processed += credits; 2903 2904 credits = G_RSPD_TXQ2_CR(flags); 2905 if (credits) 2906 qs->txq[TXQ_CTRL].processed += credits; 2907 2908 # if USE_GTS 2909 if (flags & F_RSPD_TXQ1_GTS) 2910 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2911 # endif 2912 credits = G_RSPD_TXQ1_CR(flags); 2913 if (credits) 2914 qs->txq[TXQ_OFLD].processed += credits; 2915 2916 } 2917 2918 static void 2919 check_ring_db(adapter_t *adap, struct sge_qset *qs, 2920 unsigned int sleeping) 2921 { 2922 ; 2923 } 2924 2925 /** 2926 * process_responses - process responses from an SGE response queue 2927 * @adap: the adapter 2928 * @qs: the queue set to which the response queue belongs 2929 * @budget: how many responses can be processed in this round 2930 * 2931 * Process responses from an SGE response queue up to the supplied budget. 2932 * Responses include received packets as well as credits and other events 2933 * for the queues that belong to the response queue's queue set. 2934 * A negative budget is effectively unlimited. 2935 * 2936 * Additionally choose the interrupt holdoff time for the next interrupt 2937 * on this queue. If the system is under memory shortage use a fairly 2938 * long delay to help recovery. 2939 */ 2940 static int 2941 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2942 { 2943 struct sge_rspq *rspq = &qs->rspq; 2944 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2945 int budget_left = budget; 2946 unsigned int sleeping = 0; 2947 int lro_enabled = qs->lro.enabled; 2948 int skip_lro; 2949 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; 2950 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2951 int ngathered = 0; 2952 #ifdef DEBUG 2953 static int last_holdoff = 0; 2954 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2955 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2956 last_holdoff = rspq->holdoff_tmr; 2957 } 2958 #endif 2959 rspq->next_holdoff = rspq->holdoff_tmr; 2960 2961 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2962 int eth, eop = 0, ethpad = 0; 2963 uint32_t flags = ntohl(r->flags); 2964 uint32_t rss_csum = *(const uint32_t *)r; 2965 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); 2966 2967 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2968 2969 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2970 struct mbuf *m; 2971 2972 if (cxgb_debug) 2973 printf("async notification\n"); 2974 2975 if (rspq->rspq_mh.mh_head == NULL) { 2976 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 2977 m = rspq->rspq_mh.mh_head; 2978 } else { 2979 m = m_gethdr(M_DONTWAIT, MT_DATA); 2980 } 2981 if (m == NULL) 2982 goto no_mem; 2983 2984 memcpy(mtod(m, char *), r, AN_PKT_SIZE); 2985 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; 2986 *mtod(m, char *) = CPL_ASYNC_NOTIF; 2987 rss_csum = htonl(CPL_ASYNC_NOTIF << 24); 2988 eop = 1; 2989 rspq->async_notif++; 2990 goto skip; 2991 } else if (flags & F_RSPD_IMM_DATA_VALID) { 2992 struct mbuf *m = NULL; 2993 2994 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", 2995 r->rss_hdr.opcode, rspq->cidx); 2996 if (rspq->rspq_mh.mh_head == NULL) 2997 rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 2998 else 2999 m = m_gethdr(M_DONTWAIT, MT_DATA); 3000 3001 if (rspq->rspq_mh.mh_head == NULL && m == NULL) { 3002 no_mem: 3003 rspq->next_holdoff = NOMEM_INTR_DELAY; 3004 budget_left--; 3005 break; 3006 } 3007 get_imm_packet(adap, r, rspq->rspq_mh.mh_head); 3008 eop = 1; 3009 rspq->imm_data++; 3010 } else if (r->len_cq) { 3011 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 3012 3013 eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r); 3014 if (eop) { 3015 rspq->rspq_mh.mh_head->m_flags |= M_FLOWID; 3016 rspq->rspq_mh.mh_head->m_pkthdr.flowid = rss_hash; 3017 } 3018 3019 ethpad = 2; 3020 } else { 3021 rspq->pure_rsps++; 3022 } 3023 skip: 3024 if (flags & RSPD_CTRL_MASK) { 3025 sleeping |= flags & RSPD_GTS_MASK; 3026 handle_rsp_cntrl_info(qs, flags); 3027 } 3028 3029 r++; 3030 if (__predict_false(++rspq->cidx == rspq->size)) { 3031 rspq->cidx = 0; 3032 rspq->gen ^= 1; 3033 r = rspq->desc; 3034 } 3035 3036 if (++rspq->credits >= (rspq->size / 4)) { 3037 refill_rspq(adap, rspq, rspq->credits); 3038 rspq->credits = 0; 3039 } 3040 if (!eth && eop) { 3041 rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum; 3042 /* 3043 * XXX size mismatch 3044 */ 3045 m_set_priority(rspq->rspq_mh.mh_head, rss_hash); 3046 3047 3048 ngathered = rx_offload(&adap->tdev, rspq, 3049 rspq->rspq_mh.mh_head, offload_mbufs, ngathered); 3050 rspq->rspq_mh.mh_head = NULL; 3051 DPRINTF("received offload packet\n"); 3052 3053 } else if (eth && eop) { 3054 struct mbuf *m = rspq->rspq_mh.mh_head; 3055 3056 t3_rx_eth(adap, rspq, m, ethpad); 3057 3058 /* 3059 * The T304 sends incoming packets on any qset. If LRO 3060 * is also enabled, we could end up sending packet up 3061 * lro_ctrl->ifp's input. That is incorrect. 3062 * 3063 * The mbuf's rcvif was derived from the cpl header and 3064 * is accurate. Skip LRO and just use that. 3065 */ 3066 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); 3067 3068 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro 3069 #ifdef INET 3070 && (tcp_lro_rx(lro_ctrl, m, 0) == 0) 3071 #endif 3072 ) { 3073 /* successfully queue'd for LRO */ 3074 } else { 3075 /* 3076 * LRO not enabled, packet unsuitable for LRO, 3077 * or unable to queue. Pass it up right now in 3078 * either case. 3079 */ 3080 struct ifnet *ifp = m->m_pkthdr.rcvif; 3081 (*ifp->if_input)(ifp, m); 3082 } 3083 rspq->rspq_mh.mh_head = NULL; 3084 3085 } 3086 __refill_fl_lt(adap, &qs->fl[0], 32); 3087 __refill_fl_lt(adap, &qs->fl[1], 32); 3088 --budget_left; 3089 } 3090 3091 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 3092 3093 #ifdef INET 3094 /* Flush LRO */ 3095 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) { 3096 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active); 3097 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next); 3098 tcp_lro_flush(lro_ctrl, queued); 3099 } 3100 #endif 3101 3102 if (sleeping) 3103 check_ring_db(adap, qs, sleeping); 3104 3105 mb(); /* commit Tx queue processed updates */ 3106 if (__predict_false(qs->txq_stopped > 1)) 3107 restart_tx(qs); 3108 3109 __refill_fl_lt(adap, &qs->fl[0], 512); 3110 __refill_fl_lt(adap, &qs->fl[1], 512); 3111 budget -= budget_left; 3112 return (budget); 3113 } 3114 3115 /* 3116 * A helper function that processes responses and issues GTS. 3117 */ 3118 static __inline int 3119 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 3120 { 3121 int work; 3122 static int last_holdoff = 0; 3123 3124 work = process_responses(adap, rspq_to_qset(rq), -1); 3125 3126 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 3127 printf("next_holdoff=%d\n", rq->next_holdoff); 3128 last_holdoff = rq->next_holdoff; 3129 } 3130 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 3131 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 3132 3133 return (work); 3134 } 3135 3136 3137 /* 3138 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 3139 * Handles data events from SGE response queues as well as error and other 3140 * async events as they all use the same interrupt pin. We use one SGE 3141 * response queue per port in this mode and protect all response queues with 3142 * queue 0's lock. 3143 */ 3144 void 3145 t3b_intr(void *data) 3146 { 3147 uint32_t i, map; 3148 adapter_t *adap = data; 3149 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3150 3151 t3_write_reg(adap, A_PL_CLI, 0); 3152 map = t3_read_reg(adap, A_SG_DATA_INTR); 3153 3154 if (!map) 3155 return; 3156 3157 if (__predict_false(map & F_ERRINTR)) 3158 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3159 3160 mtx_lock(&q0->lock); 3161 for_each_port(adap, i) 3162 if (map & (1 << i)) 3163 process_responses_gts(adap, &adap->sge.qs[i].rspq); 3164 mtx_unlock(&q0->lock); 3165 } 3166 3167 /* 3168 * The MSI interrupt handler. This needs to handle data events from SGE 3169 * response queues as well as error and other async events as they all use 3170 * the same MSI vector. We use one SGE response queue per port in this mode 3171 * and protect all response queues with queue 0's lock. 3172 */ 3173 void 3174 t3_intr_msi(void *data) 3175 { 3176 adapter_t *adap = data; 3177 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3178 int i, new_packets = 0; 3179 3180 mtx_lock(&q0->lock); 3181 3182 for_each_port(adap, i) 3183 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 3184 new_packets = 1; 3185 mtx_unlock(&q0->lock); 3186 if (new_packets == 0) 3187 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3188 } 3189 3190 void 3191 t3_intr_msix(void *data) 3192 { 3193 struct sge_qset *qs = data; 3194 adapter_t *adap = qs->port->adapter; 3195 struct sge_rspq *rspq = &qs->rspq; 3196 3197 if (process_responses_gts(adap, rspq) == 0) 3198 rspq->unhandled_irqs++; 3199 } 3200 3201 #define QDUMP_SBUF_SIZE 32 * 400 3202 static int 3203 t3_dump_rspq(SYSCTL_HANDLER_ARGS) 3204 { 3205 struct sge_rspq *rspq; 3206 struct sge_qset *qs; 3207 int i, err, dump_end, idx; 3208 static int multiplier = 1; 3209 struct sbuf *sb; 3210 struct rsp_desc *rspd; 3211 uint32_t data[4]; 3212 3213 rspq = arg1; 3214 qs = rspq_to_qset(rspq); 3215 if (rspq->rspq_dump_count == 0) 3216 return (0); 3217 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { 3218 log(LOG_WARNING, 3219 "dump count is too large %d\n", rspq->rspq_dump_count); 3220 rspq->rspq_dump_count = 0; 3221 return (EINVAL); 3222 } 3223 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { 3224 log(LOG_WARNING, 3225 "dump start of %d is greater than queue size\n", 3226 rspq->rspq_dump_start); 3227 rspq->rspq_dump_start = 0; 3228 return (EINVAL); 3229 } 3230 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); 3231 if (err) 3232 return (err); 3233 retry_sbufops: 3234 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3235 3236 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", 3237 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), 3238 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); 3239 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", 3240 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); 3241 3242 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, 3243 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); 3244 3245 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; 3246 for (i = rspq->rspq_dump_start; i < dump_end; i++) { 3247 idx = i & (RSPQ_Q_SIZE-1); 3248 3249 rspd = &rspq->desc[idx]; 3250 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", 3251 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, 3252 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); 3253 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", 3254 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), 3255 be32toh(rspd->len_cq), rspd->intr_gen); 3256 } 3257 if (sbuf_overflowed(sb)) { 3258 sbuf_delete(sb); 3259 multiplier++; 3260 goto retry_sbufops; 3261 } 3262 sbuf_finish(sb); 3263 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3264 sbuf_delete(sb); 3265 return (err); 3266 } 3267 3268 static int 3269 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) 3270 { 3271 struct sge_txq *txq; 3272 struct sge_qset *qs; 3273 int i, j, err, dump_end; 3274 static int multiplier = 1; 3275 struct sbuf *sb; 3276 struct tx_desc *txd; 3277 uint32_t *WR, wr_hi, wr_lo, gen; 3278 uint32_t data[4]; 3279 3280 txq = arg1; 3281 qs = txq_to_qset(txq, TXQ_ETH); 3282 if (txq->txq_dump_count == 0) { 3283 return (0); 3284 } 3285 if (txq->txq_dump_count > TX_ETH_Q_SIZE) { 3286 log(LOG_WARNING, 3287 "dump count is too large %d\n", txq->txq_dump_count); 3288 txq->txq_dump_count = 1; 3289 return (EINVAL); 3290 } 3291 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { 3292 log(LOG_WARNING, 3293 "dump start of %d is greater than queue size\n", 3294 txq->txq_dump_start); 3295 txq->txq_dump_start = 0; 3296 return (EINVAL); 3297 } 3298 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); 3299 if (err) 3300 return (err); 3301 3302 3303 retry_sbufops: 3304 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3305 3306 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", 3307 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 3308 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); 3309 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", 3310 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), 3311 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); 3312 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3313 txq->txq_dump_start, 3314 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); 3315 3316 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3317 for (i = txq->txq_dump_start; i < dump_end; i++) { 3318 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; 3319 WR = (uint32_t *)txd->flit; 3320 wr_hi = ntohl(WR[0]); 3321 wr_lo = ntohl(WR[1]); 3322 gen = G_WR_GEN(wr_lo); 3323 3324 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3325 wr_hi, wr_lo, gen); 3326 for (j = 2; j < 30; j += 4) 3327 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3328 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3329 3330 } 3331 if (sbuf_overflowed(sb)) { 3332 sbuf_delete(sb); 3333 multiplier++; 3334 goto retry_sbufops; 3335 } 3336 sbuf_finish(sb); 3337 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3338 sbuf_delete(sb); 3339 return (err); 3340 } 3341 3342 static int 3343 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) 3344 { 3345 struct sge_txq *txq; 3346 struct sge_qset *qs; 3347 int i, j, err, dump_end; 3348 static int multiplier = 1; 3349 struct sbuf *sb; 3350 struct tx_desc *txd; 3351 uint32_t *WR, wr_hi, wr_lo, gen; 3352 3353 txq = arg1; 3354 qs = txq_to_qset(txq, TXQ_CTRL); 3355 if (txq->txq_dump_count == 0) { 3356 return (0); 3357 } 3358 if (txq->txq_dump_count > 256) { 3359 log(LOG_WARNING, 3360 "dump count is too large %d\n", txq->txq_dump_count); 3361 txq->txq_dump_count = 1; 3362 return (EINVAL); 3363 } 3364 if (txq->txq_dump_start > 255) { 3365 log(LOG_WARNING, 3366 "dump start of %d is greater than queue size\n", 3367 txq->txq_dump_start); 3368 txq->txq_dump_start = 0; 3369 return (EINVAL); 3370 } 3371 3372 retry_sbufops: 3373 sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN); 3374 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3375 txq->txq_dump_start, 3376 (txq->txq_dump_start + txq->txq_dump_count) & 255); 3377 3378 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3379 for (i = txq->txq_dump_start; i < dump_end; i++) { 3380 txd = &txq->desc[i & (255)]; 3381 WR = (uint32_t *)txd->flit; 3382 wr_hi = ntohl(WR[0]); 3383 wr_lo = ntohl(WR[1]); 3384 gen = G_WR_GEN(wr_lo); 3385 3386 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3387 wr_hi, wr_lo, gen); 3388 for (j = 2; j < 30; j += 4) 3389 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3390 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3391 3392 } 3393 if (sbuf_overflowed(sb)) { 3394 sbuf_delete(sb); 3395 multiplier++; 3396 goto retry_sbufops; 3397 } 3398 sbuf_finish(sb); 3399 err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); 3400 sbuf_delete(sb); 3401 return (err); 3402 } 3403 3404 static int 3405 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) 3406 { 3407 adapter_t *sc = arg1; 3408 struct qset_params *qsp = &sc->params.sge.qset[0]; 3409 int coalesce_usecs; 3410 struct sge_qset *qs; 3411 int i, j, err, nqsets = 0; 3412 struct mtx *lock; 3413 3414 if ((sc->flags & FULL_INIT_DONE) == 0) 3415 return (ENXIO); 3416 3417 coalesce_usecs = qsp->coalesce_usecs; 3418 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); 3419 3420 if (err != 0) { 3421 return (err); 3422 } 3423 if (coalesce_usecs == qsp->coalesce_usecs) 3424 return (0); 3425 3426 for (i = 0; i < sc->params.nports; i++) 3427 for (j = 0; j < sc->port[i].nqsets; j++) 3428 nqsets++; 3429 3430 coalesce_usecs = max(1, coalesce_usecs); 3431 3432 for (i = 0; i < nqsets; i++) { 3433 qs = &sc->sge.qs[i]; 3434 qsp = &sc->params.sge.qset[i]; 3435 qsp->coalesce_usecs = coalesce_usecs; 3436 3437 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 3438 &sc->sge.qs[0].rspq.lock; 3439 3440 mtx_lock(lock); 3441 t3_update_qset_coalesce(qs, qsp); 3442 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 3443 V_NEWTIMER(qs->rspq.holdoff_tmr)); 3444 mtx_unlock(lock); 3445 } 3446 3447 return (0); 3448 } 3449 3450 3451 void 3452 t3_add_attach_sysctls(adapter_t *sc) 3453 { 3454 struct sysctl_ctx_list *ctx; 3455 struct sysctl_oid_list *children; 3456 3457 ctx = device_get_sysctl_ctx(sc->dev); 3458 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3459 3460 /* random information */ 3461 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3462 "firmware_version", 3463 CTLFLAG_RD, &sc->fw_version, 3464 0, "firmware version"); 3465 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3466 "hw_revision", 3467 CTLFLAG_RD, &sc->params.rev, 3468 0, "chip model"); 3469 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3470 "port_types", 3471 CTLFLAG_RD, &sc->port_types, 3472 0, "type of ports"); 3473 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3474 "enable_debug", 3475 CTLFLAG_RW, &cxgb_debug, 3476 0, "enable verbose debugging output"); 3477 SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce", 3478 CTLFLAG_RD, &sc->tunq_coalesce, 3479 "#tunneled packets freed"); 3480 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3481 "txq_overrun", 3482 CTLFLAG_RD, &txq_fills, 3483 0, "#times txq overrun"); 3484 } 3485 3486 3487 static const char *rspq_name = "rspq"; 3488 static const char *txq_names[] = 3489 { 3490 "txq_eth", 3491 "txq_ofld", 3492 "txq_ctrl" 3493 }; 3494 3495 static int 3496 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) 3497 { 3498 struct port_info *p = arg1; 3499 uint64_t *parg; 3500 3501 if (!p) 3502 return (EINVAL); 3503 3504 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); 3505 PORT_LOCK(p); 3506 t3_mac_update_stats(&p->mac); 3507 PORT_UNLOCK(p); 3508 3509 return (sysctl_handle_quad(oidp, parg, 0, req)); 3510 } 3511 3512 void 3513 t3_add_configured_sysctls(adapter_t *sc) 3514 { 3515 struct sysctl_ctx_list *ctx; 3516 struct sysctl_oid_list *children; 3517 int i, j; 3518 3519 ctx = device_get_sysctl_ctx(sc->dev); 3520 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3521 3522 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3523 "intr_coal", 3524 CTLTYPE_INT|CTLFLAG_RW, sc, 3525 0, t3_set_coalesce_usecs, 3526 "I", "interrupt coalescing timer (us)"); 3527 3528 for (i = 0; i < sc->params.nports; i++) { 3529 struct port_info *pi = &sc->port[i]; 3530 struct sysctl_oid *poid; 3531 struct sysctl_oid_list *poidlist; 3532 struct mac_stats *mstats = &pi->mac.stats; 3533 3534 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); 3535 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 3536 pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); 3537 poidlist = SYSCTL_CHILDREN(poid); 3538 SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO, 3539 "nqsets", CTLFLAG_RD, &pi->nqsets, 3540 0, "#queue sets"); 3541 3542 for (j = 0; j < pi->nqsets; j++) { 3543 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; 3544 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, 3545 *ctrlqpoid, *lropoid; 3546 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, 3547 *txqpoidlist, *ctrlqpoidlist, 3548 *lropoidlist; 3549 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 3550 3551 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); 3552 3553 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 3554 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); 3555 qspoidlist = SYSCTL_CHILDREN(qspoid); 3556 3557 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty", 3558 CTLFLAG_RD, &qs->fl[0].empty, 0, 3559 "freelist #0 empty"); 3560 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty", 3561 CTLFLAG_RD, &qs->fl[1].empty, 0, 3562 "freelist #1 empty"); 3563 3564 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3565 rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); 3566 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); 3567 3568 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3569 txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); 3570 txqpoidlist = SYSCTL_CHILDREN(txqpoid); 3571 3572 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3573 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics"); 3574 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); 3575 3576 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3577 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics"); 3578 lropoidlist = SYSCTL_CHILDREN(lropoid); 3579 3580 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", 3581 CTLFLAG_RD, &qs->rspq.size, 3582 0, "#entries in response queue"); 3583 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", 3584 CTLFLAG_RD, &qs->rspq.cidx, 3585 0, "consumer index"); 3586 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", 3587 CTLFLAG_RD, &qs->rspq.credits, 3588 0, "#credits"); 3589 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved", 3590 CTLFLAG_RD, &qs->rspq.starved, 3591 0, "#times starved"); 3592 SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr", 3593 CTLFLAG_RD, &qs->rspq.phys_addr, 3594 "physical_address_of the queue"); 3595 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", 3596 CTLFLAG_RW, &qs->rspq.rspq_dump_start, 3597 0, "start rspq dump entry"); 3598 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", 3599 CTLFLAG_RW, &qs->rspq.rspq_dump_count, 3600 0, "#rspq entries to dump"); 3601 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", 3602 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 3603 0, t3_dump_rspq, "A", "dump of the response queue"); 3604 3605 SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "dropped", 3606 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops, 3607 "#tunneled packets dropped"); 3608 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen", 3609 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen, 3610 0, "#tunneled packets waiting to be sent"); 3611 #if 0 3612 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", 3613 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 3614 0, "#tunneled packets queue producer index"); 3615 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", 3616 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 3617 0, "#tunneled packets queue consumer index"); 3618 #endif 3619 SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed", 3620 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 3621 0, "#tunneled packets processed by the card"); 3622 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", 3623 CTLFLAG_RD, &txq->cleaned, 3624 0, "#tunneled packets cleaned"); 3625 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", 3626 CTLFLAG_RD, &txq->in_use, 3627 0, "#tunneled packet slots in use"); 3628 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees", 3629 CTLFLAG_RD, &txq->txq_frees, 3630 "#tunneled packets freed"); 3631 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", 3632 CTLFLAG_RD, &txq->txq_skipped, 3633 0, "#tunneled packet descriptors skipped"); 3634 SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced", 3635 CTLFLAG_RD, &txq->txq_coalesced, 3636 "#tunneled packets coalesced"); 3637 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", 3638 CTLFLAG_RD, &txq->txq_enqueued, 3639 0, "#tunneled packets enqueued to hardware"); 3640 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", 3641 CTLFLAG_RD, &qs->txq_stopped, 3642 0, "tx queues stopped"); 3643 SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr", 3644 CTLFLAG_RD, &txq->phys_addr, 3645 "physical_address_of the queue"); 3646 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", 3647 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 3648 0, "txq generation"); 3649 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", 3650 CTLFLAG_RD, &txq->cidx, 3651 0, "hardware queue cidx"); 3652 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", 3653 CTLFLAG_RD, &txq->pidx, 3654 0, "hardware queue pidx"); 3655 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", 3656 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 3657 0, "txq start idx for dump"); 3658 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", 3659 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 3660 0, "txq #entries to dump"); 3661 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", 3662 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 3663 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); 3664 3665 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", 3666 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 3667 0, "ctrlq start idx for dump"); 3668 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", 3669 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 3670 0, "ctrl #entries to dump"); 3671 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", 3672 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], 3673 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); 3674 3675 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued", 3676 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); 3677 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed", 3678 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); 3679 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", 3680 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); 3681 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", 3682 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); 3683 } 3684 3685 /* Now add a node for mac stats. */ 3686 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", 3687 CTLFLAG_RD, NULL, "MAC statistics"); 3688 poidlist = SYSCTL_CHILDREN(poid); 3689 3690 /* 3691 * We (ab)use the length argument (arg2) to pass on the offset 3692 * of the data that we are interested in. This is only required 3693 * for the quad counters that are updated from the hardware (we 3694 * make sure that we return the latest value). 3695 * sysctl_handle_macstat first updates *all* the counters from 3696 * the hardware, and then returns the latest value of the 3697 * requested counter. Best would be to update only the 3698 * requested counter from hardware, but t3_mac_update_stats() 3699 * hides all the register details and we don't want to dive into 3700 * all that here. 3701 */ 3702 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ 3703 (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \ 3704 sysctl_handle_macstat, "QU", 0) 3705 CXGB_SYSCTL_ADD_QUAD(tx_octets); 3706 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); 3707 CXGB_SYSCTL_ADD_QUAD(tx_frames); 3708 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); 3709 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); 3710 CXGB_SYSCTL_ADD_QUAD(tx_pause); 3711 CXGB_SYSCTL_ADD_QUAD(tx_deferred); 3712 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); 3713 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); 3714 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); 3715 CXGB_SYSCTL_ADD_QUAD(tx_underrun); 3716 CXGB_SYSCTL_ADD_QUAD(tx_len_errs); 3717 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); 3718 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); 3719 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); 3720 CXGB_SYSCTL_ADD_QUAD(tx_frames_64); 3721 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); 3722 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); 3723 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); 3724 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); 3725 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); 3726 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); 3727 CXGB_SYSCTL_ADD_QUAD(rx_octets); 3728 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); 3729 CXGB_SYSCTL_ADD_QUAD(rx_frames); 3730 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); 3731 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); 3732 CXGB_SYSCTL_ADD_QUAD(rx_pause); 3733 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs); 3734 CXGB_SYSCTL_ADD_QUAD(rx_align_errs); 3735 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); 3736 CXGB_SYSCTL_ADD_QUAD(rx_data_errs); 3737 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); 3738 CXGB_SYSCTL_ADD_QUAD(rx_runt); 3739 CXGB_SYSCTL_ADD_QUAD(rx_jabber); 3740 CXGB_SYSCTL_ADD_QUAD(rx_short); 3741 CXGB_SYSCTL_ADD_QUAD(rx_too_long); 3742 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); 3743 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); 3744 CXGB_SYSCTL_ADD_QUAD(rx_frames_64); 3745 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); 3746 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); 3747 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); 3748 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); 3749 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); 3750 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); 3751 #undef CXGB_SYSCTL_ADD_QUAD 3752 3753 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ 3754 CTLFLAG_RD, &mstats->a, 0) 3755 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); 3756 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); 3757 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); 3758 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); 3759 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); 3760 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); 3761 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); 3762 CXGB_SYSCTL_ADD_ULONG(num_toggled); 3763 CXGB_SYSCTL_ADD_ULONG(num_resets); 3764 CXGB_SYSCTL_ADD_ULONG(link_faults); 3765 #undef CXGB_SYSCTL_ADD_ULONG 3766 } 3767 } 3768 3769 /** 3770 * t3_get_desc - dump an SGE descriptor for debugging purposes 3771 * @qs: the queue set 3772 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 3773 * @idx: the descriptor index in the queue 3774 * @data: where to dump the descriptor contents 3775 * 3776 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 3777 * size of the descriptor. 3778 */ 3779 int 3780 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 3781 unsigned char *data) 3782 { 3783 if (qnum >= 6) 3784 return (EINVAL); 3785 3786 if (qnum < 3) { 3787 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 3788 return -EINVAL; 3789 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 3790 return sizeof(struct tx_desc); 3791 } 3792 3793 if (qnum == 3) { 3794 if (!qs->rspq.desc || idx >= qs->rspq.size) 3795 return (EINVAL); 3796 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 3797 return sizeof(struct rsp_desc); 3798 } 3799 3800 qnum -= 4; 3801 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 3802 return (EINVAL); 3803 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 3804 return sizeof(struct rx_desc); 3805 } 3806