1 /************************************************************************** 2 3 Copyright (c) 2007-2009, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_inet.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/module.h> 39 #include <sys/bus.h> 40 #include <sys/conf.h> 41 #include <machine/bus.h> 42 #include <machine/resource.h> 43 #include <sys/bus_dma.h> 44 #include <sys/rman.h> 45 #include <sys/queue.h> 46 #include <sys/sysctl.h> 47 #include <sys/taskqueue.h> 48 49 #include <sys/proc.h> 50 #include <sys/sbuf.h> 51 #include <sys/sched.h> 52 #include <sys/smp.h> 53 #include <sys/systm.h> 54 #include <sys/syslog.h> 55 #include <sys/socket.h> 56 57 #include <net/bpf.h> 58 #include <net/ethernet.h> 59 #include <net/if.h> 60 #include <net/if_vlan_var.h> 61 62 #include <netinet/in_systm.h> 63 #include <netinet/in.h> 64 #include <netinet/ip.h> 65 #include <netinet/tcp.h> 66 67 #include <dev/pci/pcireg.h> 68 #include <dev/pci/pcivar.h> 69 70 #include <vm/vm.h> 71 #include <vm/pmap.h> 72 73 #include <cxgb_include.h> 74 #include <sys/mvec.h> 75 76 int txq_fills = 0; 77 int multiq_tx_enable = 1; 78 79 extern struct sysctl_oid_list sysctl__hw_cxgb_children; 80 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; 81 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size); 82 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, 83 "size of per-queue mbuf ring"); 84 85 static int cxgb_tx_coalesce_force = 0; 86 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force); 87 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW, 88 &cxgb_tx_coalesce_force, 0, 89 "coalesce small packets into a single work request regardless of ring state"); 90 91 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1 92 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3)) 93 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2 94 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5 95 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5 96 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2 97 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6 98 99 100 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT; 101 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start", 102 &cxgb_tx_coalesce_enable_start); 103 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW, 104 &cxgb_tx_coalesce_enable_start, 0, 105 "coalesce enable threshold"); 106 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT; 107 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop); 108 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW, 109 &cxgb_tx_coalesce_enable_stop, 0, 110 "coalesce disable threshold"); 111 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 112 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold); 113 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW, 114 &cxgb_tx_reclaim_threshold, 0, 115 "tx cleaning minimum threshold"); 116 117 /* 118 * XXX don't re-enable this until TOE stops assuming 119 * we have an m_ext 120 */ 121 static int recycle_enable = 0; 122 123 extern int cxgb_use_16k_clusters; 124 extern int nmbjumbop; 125 extern int nmbjumbo9; 126 extern int nmbjumbo16; 127 128 #define USE_GTS 0 129 130 #define SGE_RX_SM_BUF_SIZE 1536 131 #define SGE_RX_DROP_THRES 16 132 #define SGE_RX_COPY_THRES 128 133 134 /* 135 * Period of the Tx buffer reclaim timer. This timer does not need to run 136 * frequently as Tx buffers are usually reclaimed by new Tx packets. 137 */ 138 #define TX_RECLAIM_PERIOD (hz >> 1) 139 140 /* 141 * Values for sge_txq.flags 142 */ 143 enum { 144 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 145 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 146 }; 147 148 struct tx_desc { 149 uint64_t flit[TX_DESC_FLITS]; 150 } __packed; 151 152 struct rx_desc { 153 uint32_t addr_lo; 154 uint32_t len_gen; 155 uint32_t gen2; 156 uint32_t addr_hi; 157 } __packed; 158 159 struct rsp_desc { /* response queue descriptor */ 160 struct rss_header rss_hdr; 161 uint32_t flags; 162 uint32_t len_cq; 163 uint8_t imm_data[47]; 164 uint8_t intr_gen; 165 } __packed; 166 167 #define RX_SW_DESC_MAP_CREATED (1 << 0) 168 #define TX_SW_DESC_MAP_CREATED (1 << 1) 169 #define RX_SW_DESC_INUSE (1 << 3) 170 #define TX_SW_DESC_MAPPED (1 << 4) 171 172 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 173 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 174 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 175 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 176 177 struct tx_sw_desc { /* SW state per Tx descriptor */ 178 struct mbuf *m; 179 bus_dmamap_t map; 180 int flags; 181 }; 182 183 struct rx_sw_desc { /* SW state per Rx descriptor */ 184 caddr_t rxsd_cl; 185 struct mbuf *m; 186 bus_dmamap_t map; 187 int flags; 188 }; 189 190 struct txq_state { 191 unsigned int compl; 192 unsigned int gen; 193 unsigned int pidx; 194 }; 195 196 struct refill_fl_cb_arg { 197 int error; 198 bus_dma_segment_t seg; 199 int nseg; 200 }; 201 202 203 /* 204 * Maps a number of flits to the number of Tx descriptors that can hold them. 205 * The formula is 206 * 207 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 208 * 209 * HW allows up to 4 descriptors to be combined into a WR. 210 */ 211 static uint8_t flit_desc_map[] = { 212 0, 213 #if SGE_NUM_GENBITS == 1 214 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 215 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 216 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 217 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 218 #elif SGE_NUM_GENBITS == 2 219 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 220 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 221 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 222 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 223 #else 224 # error "SGE_NUM_GENBITS must be 1 or 2" 225 #endif 226 }; 227 228 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED) 229 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock) 230 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock) 231 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock) 232 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 233 #define TXQ_RING_NEEDS_ENQUEUE(qs) \ 234 drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 235 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 236 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \ 237 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg) 238 #define TXQ_RING_DEQUEUE(qs) \ 239 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 240 241 int cxgb_debug = 0; 242 243 static void sge_timer_cb(void *arg); 244 static void sge_timer_reclaim(void *arg, int ncount); 245 static void sge_txq_reclaim_handler(void *arg, int ncount); 246 static void cxgb_start_locked(struct sge_qset *qs); 247 248 /* 249 * XXX need to cope with bursty scheduling by looking at a wider 250 * window than we are now for determining the need for coalescing 251 * 252 */ 253 static __inline uint64_t 254 check_pkt_coalesce(struct sge_qset *qs) 255 { 256 struct adapter *sc; 257 struct sge_txq *txq; 258 uint8_t *fill; 259 260 if (__predict_false(cxgb_tx_coalesce_force)) 261 return (1); 262 txq = &qs->txq[TXQ_ETH]; 263 sc = qs->port->adapter; 264 fill = &sc->tunq_fill[qs->idx]; 265 266 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX) 267 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX; 268 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN) 269 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN; 270 /* 271 * if the hardware transmit queue is more than 1/8 full 272 * we mark it as coalescing - we drop back from coalescing 273 * when we go below 1/32 full and there are no packets enqueued, 274 * this provides us with some degree of hysteresis 275 */ 276 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 277 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0)) 278 *fill = 0; 279 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) 280 *fill = 1; 281 282 return (sc->tunq_coalesce); 283 } 284 285 #ifdef __LP64__ 286 static void 287 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 288 { 289 uint64_t wr_hilo; 290 #if _BYTE_ORDER == _LITTLE_ENDIAN 291 wr_hilo = wr_hi; 292 wr_hilo |= (((uint64_t)wr_lo)<<32); 293 #else 294 wr_hilo = wr_lo; 295 wr_hilo |= (((uint64_t)wr_hi)<<32); 296 #endif 297 wrp->wrh_hilo = wr_hilo; 298 } 299 #else 300 static void 301 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 302 { 303 304 wrp->wrh_hi = wr_hi; 305 wmb(); 306 wrp->wrh_lo = wr_lo; 307 } 308 #endif 309 310 struct coalesce_info { 311 int count; 312 int nbytes; 313 }; 314 315 static int 316 coalesce_check(struct mbuf *m, void *arg) 317 { 318 struct coalesce_info *ci = arg; 319 int *count = &ci->count; 320 int *nbytes = &ci->nbytes; 321 322 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) && 323 (*count < 7) && (m->m_next == NULL))) { 324 *count += 1; 325 *nbytes += m->m_len; 326 return (1); 327 } 328 return (0); 329 } 330 331 static struct mbuf * 332 cxgb_dequeue(struct sge_qset *qs) 333 { 334 struct mbuf *m, *m_head, *m_tail; 335 struct coalesce_info ci; 336 337 338 if (check_pkt_coalesce(qs) == 0) 339 return TXQ_RING_DEQUEUE(qs); 340 341 m_head = m_tail = NULL; 342 ci.count = ci.nbytes = 0; 343 do { 344 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci); 345 if (m_head == NULL) { 346 m_tail = m_head = m; 347 } else if (m != NULL) { 348 m_tail->m_nextpkt = m; 349 m_tail = m; 350 } 351 } while (m != NULL); 352 if (ci.count > 7) 353 panic("trying to coalesce %d packets in to one WR", ci.count); 354 return (m_head); 355 } 356 357 /** 358 * reclaim_completed_tx - reclaims completed Tx descriptors 359 * @adapter: the adapter 360 * @q: the Tx queue to reclaim completed descriptors from 361 * 362 * Reclaims Tx descriptors that the SGE has indicated it has processed, 363 * and frees the associated buffers if possible. Called with the Tx 364 * queue's lock held. 365 */ 366 static __inline int 367 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) 368 { 369 struct sge_txq *q = &qs->txq[queue]; 370 int reclaim = desc_reclaimable(q); 371 372 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) || 373 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN)) 374 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 375 376 if (reclaim < reclaim_min) 377 return (0); 378 379 mtx_assert(&qs->lock, MA_OWNED); 380 if (reclaim > 0) { 381 t3_free_tx_desc(qs, reclaim, queue); 382 q->cleaned += reclaim; 383 q->in_use -= reclaim; 384 } 385 if (isset(&qs->txq_stopped, TXQ_ETH)) 386 clrbit(&qs->txq_stopped, TXQ_ETH); 387 388 return (reclaim); 389 } 390 391 /** 392 * should_restart_tx - are there enough resources to restart a Tx queue? 393 * @q: the Tx queue 394 * 395 * Checks if there are enough descriptors to restart a suspended Tx queue. 396 */ 397 static __inline int 398 should_restart_tx(const struct sge_txq *q) 399 { 400 unsigned int r = q->processed - q->cleaned; 401 402 return q->in_use - r < (q->size >> 1); 403 } 404 405 /** 406 * t3_sge_init - initialize SGE 407 * @adap: the adapter 408 * @p: the SGE parameters 409 * 410 * Performs SGE initialization needed every time after a chip reset. 411 * We do not initialize any of the queue sets here, instead the driver 412 * top-level must request those individually. We also do not enable DMA 413 * here, that should be done after the queues have been set up. 414 */ 415 void 416 t3_sge_init(adapter_t *adap, struct sge_params *p) 417 { 418 u_int ctrl, ups; 419 420 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 421 422 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 423 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | 424 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 425 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 426 #if SGE_NUM_GENBITS == 1 427 ctrl |= F_EGRGENCTRL; 428 #endif 429 if (adap->params.rev > 0) { 430 if (!(adap->flags & (USING_MSIX | USING_MSI))) 431 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 432 } 433 t3_write_reg(adap, A_SG_CONTROL, ctrl); 434 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 435 V_LORCQDRBTHRSH(512)); 436 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 437 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 438 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 439 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 440 adap->params.rev < T3_REV_C ? 1000 : 500); 441 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 442 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 443 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 444 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 445 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 446 } 447 448 449 /** 450 * sgl_len - calculates the size of an SGL of the given capacity 451 * @n: the number of SGL entries 452 * 453 * Calculates the number of flits needed for a scatter/gather list that 454 * can hold the given number of entries. 455 */ 456 static __inline unsigned int 457 sgl_len(unsigned int n) 458 { 459 return ((3 * n) / 2 + (n & 1)); 460 } 461 462 /** 463 * get_imm_packet - return the next ingress packet buffer from a response 464 * @resp: the response descriptor containing the packet data 465 * 466 * Return a packet containing the immediate data of the given response. 467 */ 468 static int 469 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) 470 { 471 472 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE; 473 m->m_ext.ext_buf = NULL; 474 m->m_ext.ext_type = 0; 475 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 476 return (0); 477 } 478 479 static __inline u_int 480 flits_to_desc(u_int n) 481 { 482 return (flit_desc_map[n]); 483 } 484 485 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ 486 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ 487 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ 488 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ 489 F_HIRCQPARITYERROR) 490 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) 491 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ 492 F_RSPQDISABLED) 493 494 /** 495 * t3_sge_err_intr_handler - SGE async event interrupt handler 496 * @adapter: the adapter 497 * 498 * Interrupt handler for SGE asynchronous (non-data) events. 499 */ 500 void 501 t3_sge_err_intr_handler(adapter_t *adapter) 502 { 503 unsigned int v, status; 504 505 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 506 if (status & SGE_PARERR) 507 CH_ALERT(adapter, "SGE parity error (0x%x)\n", 508 status & SGE_PARERR); 509 if (status & SGE_FRAMINGERR) 510 CH_ALERT(adapter, "SGE framing error (0x%x)\n", 511 status & SGE_FRAMINGERR); 512 if (status & F_RSPQCREDITOVERFOW) 513 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 514 515 if (status & F_RSPQDISABLED) { 516 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 517 518 CH_ALERT(adapter, 519 "packet delivered to disabled response queue (0x%x)\n", 520 (v >> S_RSPQ0DISABLED) & 0xff); 521 } 522 523 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 524 if (status & SGE_FATALERR) 525 t3_fatal_err(adapter); 526 } 527 528 void 529 t3_sge_prep(adapter_t *adap, struct sge_params *p) 530 { 531 int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size; 532 533 nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus); 534 nqsets *= adap->params.nports; 535 536 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); 537 538 while (!powerof2(fl_q_size)) 539 fl_q_size--; 540 541 use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters : 542 is_offload(adap); 543 544 #if __FreeBSD_version >= 700111 545 if (use_16k) { 546 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); 547 jumbo_buf_size = MJUM16BYTES; 548 } else { 549 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); 550 jumbo_buf_size = MJUM9BYTES; 551 } 552 #else 553 jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE); 554 jumbo_buf_size = MJUMPAGESIZE; 555 #endif 556 while (!powerof2(jumbo_q_size)) 557 jumbo_q_size--; 558 559 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2)) 560 device_printf(adap->dev, 561 "Insufficient clusters and/or jumbo buffers.\n"); 562 563 p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data); 564 565 for (i = 0; i < SGE_QSETS; ++i) { 566 struct qset_params *q = p->qset + i; 567 568 if (adap->params.nports > 2) { 569 q->coalesce_usecs = 50; 570 } else { 571 #ifdef INVARIANTS 572 q->coalesce_usecs = 10; 573 #else 574 q->coalesce_usecs = 5; 575 #endif 576 } 577 q->polling = 0; 578 q->rspq_size = RSPQ_Q_SIZE; 579 q->fl_size = fl_q_size; 580 q->jumbo_size = jumbo_q_size; 581 q->jumbo_buf_size = jumbo_buf_size; 582 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 583 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16; 584 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE; 585 q->cong_thres = 0; 586 } 587 } 588 589 int 590 t3_sge_alloc(adapter_t *sc) 591 { 592 593 /* The parent tag. */ 594 if (bus_dma_tag_create( NULL, /* parent */ 595 1, 0, /* algnmnt, boundary */ 596 BUS_SPACE_MAXADDR, /* lowaddr */ 597 BUS_SPACE_MAXADDR, /* highaddr */ 598 NULL, NULL, /* filter, filterarg */ 599 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 600 BUS_SPACE_UNRESTRICTED, /* nsegments */ 601 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 602 0, /* flags */ 603 NULL, NULL, /* lock, lockarg */ 604 &sc->parent_dmat)) { 605 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 606 return (ENOMEM); 607 } 608 609 /* 610 * DMA tag for normal sized RX frames 611 */ 612 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 613 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 614 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 615 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 616 return (ENOMEM); 617 } 618 619 /* 620 * DMA tag for jumbo sized RX frames. 621 */ 622 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, 623 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, 624 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 625 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 626 return (ENOMEM); 627 } 628 629 /* 630 * DMA tag for TX frames. 631 */ 632 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 633 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 634 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 635 NULL, NULL, &sc->tx_dmat)) { 636 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 637 return (ENOMEM); 638 } 639 640 return (0); 641 } 642 643 int 644 t3_sge_free(struct adapter * sc) 645 { 646 647 if (sc->tx_dmat != NULL) 648 bus_dma_tag_destroy(sc->tx_dmat); 649 650 if (sc->rx_jumbo_dmat != NULL) 651 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 652 653 if (sc->rx_dmat != NULL) 654 bus_dma_tag_destroy(sc->rx_dmat); 655 656 if (sc->parent_dmat != NULL) 657 bus_dma_tag_destroy(sc->parent_dmat); 658 659 return (0); 660 } 661 662 void 663 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 664 { 665 666 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); 667 qs->rspq.polling = 0 /* p->polling */; 668 } 669 670 #if !defined(__i386__) && !defined(__amd64__) 671 static void 672 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 673 { 674 struct refill_fl_cb_arg *cb_arg = arg; 675 676 cb_arg->error = error; 677 cb_arg->seg = segs[0]; 678 cb_arg->nseg = nseg; 679 680 } 681 #endif 682 /** 683 * refill_fl - refill an SGE free-buffer list 684 * @sc: the controller softc 685 * @q: the free-list to refill 686 * @n: the number of new buffers to allocate 687 * 688 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 689 * The caller must assure that @n does not exceed the queue's capacity. 690 */ 691 static void 692 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 693 { 694 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 695 struct rx_desc *d = &q->desc[q->pidx]; 696 struct refill_fl_cb_arg cb_arg; 697 struct mbuf *m; 698 caddr_t cl; 699 int err; 700 701 cb_arg.error = 0; 702 while (n--) { 703 /* 704 * We only allocate a cluster, mbuf allocation happens after rx 705 */ 706 if (q->zone == zone_pack) { 707 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL) 708 break; 709 cl = m->m_ext.ext_buf; 710 } else { 711 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL) 712 break; 713 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { 714 uma_zfree(q->zone, cl); 715 break; 716 } 717 } 718 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 719 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 720 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 721 uma_zfree(q->zone, cl); 722 goto done; 723 } 724 sd->flags |= RX_SW_DESC_MAP_CREATED; 725 } 726 #if !defined(__i386__) && !defined(__amd64__) 727 err = bus_dmamap_load(q->entry_tag, sd->map, 728 cl, q->buf_size, refill_fl_cb, &cb_arg, 0); 729 730 if (err != 0 || cb_arg.error) { 731 if (q->zone == zone_pack) 732 uma_zfree(q->zone, cl); 733 m_free(m); 734 goto done; 735 } 736 #else 737 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl); 738 #endif 739 sd->flags |= RX_SW_DESC_INUSE; 740 sd->rxsd_cl = cl; 741 sd->m = m; 742 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 743 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 744 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 745 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 746 747 d++; 748 sd++; 749 750 if (++q->pidx == q->size) { 751 q->pidx = 0; 752 q->gen ^= 1; 753 sd = q->sdesc; 754 d = q->desc; 755 } 756 q->credits++; 757 q->db_pending++; 758 } 759 760 done: 761 if (q->db_pending >= 32) { 762 q->db_pending = 0; 763 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 764 } 765 } 766 767 768 /** 769 * free_rx_bufs - free the Rx buffers on an SGE free list 770 * @sc: the controle softc 771 * @q: the SGE free list to clean up 772 * 773 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 774 * this queue should be stopped before calling this function. 775 */ 776 static void 777 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 778 { 779 u_int cidx = q->cidx; 780 781 while (q->credits--) { 782 struct rx_sw_desc *d = &q->sdesc[cidx]; 783 784 if (d->flags & RX_SW_DESC_INUSE) { 785 bus_dmamap_unload(q->entry_tag, d->map); 786 bus_dmamap_destroy(q->entry_tag, d->map); 787 if (q->zone == zone_pack) { 788 m_init(d->m, zone_pack, MCLBYTES, 789 M_NOWAIT, MT_DATA, M_EXT); 790 uma_zfree(zone_pack, d->m); 791 } else { 792 m_init(d->m, zone_mbuf, MLEN, 793 M_NOWAIT, MT_DATA, 0); 794 uma_zfree(zone_mbuf, d->m); 795 uma_zfree(q->zone, d->rxsd_cl); 796 } 797 } 798 799 d->rxsd_cl = NULL; 800 d->m = NULL; 801 if (++cidx == q->size) 802 cidx = 0; 803 } 804 } 805 806 static __inline void 807 __refill_fl(adapter_t *adap, struct sge_fl *fl) 808 { 809 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 810 } 811 812 static __inline void 813 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) 814 { 815 uint32_t reclaimable = fl->size - fl->credits; 816 817 if (reclaimable > 0) 818 refill_fl(adap, fl, min(max, reclaimable)); 819 } 820 821 /** 822 * recycle_rx_buf - recycle a receive buffer 823 * @adapter: the adapter 824 * @q: the SGE free list 825 * @idx: index of buffer to recycle 826 * 827 * Recycles the specified buffer on the given free list by adding it at 828 * the next available slot on the list. 829 */ 830 static void 831 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 832 { 833 struct rx_desc *from = &q->desc[idx]; 834 struct rx_desc *to = &q->desc[q->pidx]; 835 836 q->sdesc[q->pidx] = q->sdesc[idx]; 837 to->addr_lo = from->addr_lo; // already big endian 838 to->addr_hi = from->addr_hi; // likewise 839 wmb(); /* necessary ? */ 840 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 841 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 842 q->credits++; 843 844 if (++q->pidx == q->size) { 845 q->pidx = 0; 846 q->gen ^= 1; 847 } 848 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 849 } 850 851 static void 852 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 853 { 854 uint32_t *addr; 855 856 addr = arg; 857 *addr = segs[0].ds_addr; 858 } 859 860 static int 861 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 862 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 863 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 864 { 865 size_t len = nelem * elem_size; 866 void *s = NULL; 867 void *p = NULL; 868 int err; 869 870 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 871 BUS_SPACE_MAXADDR_32BIT, 872 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 873 len, 0, NULL, NULL, tag)) != 0) { 874 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 875 return (ENOMEM); 876 } 877 878 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 879 map)) != 0) { 880 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 881 return (ENOMEM); 882 } 883 884 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 885 bzero(p, len); 886 *(void **)desc = p; 887 888 if (sw_size) { 889 len = nelem * sw_size; 890 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 891 *(void **)sdesc = s; 892 } 893 if (parent_entry_tag == NULL) 894 return (0); 895 896 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 897 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 898 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 899 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 900 NULL, NULL, entry_tag)) != 0) { 901 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 902 return (ENOMEM); 903 } 904 return (0); 905 } 906 907 static void 908 sge_slow_intr_handler(void *arg, int ncount) 909 { 910 adapter_t *sc = arg; 911 912 t3_slow_intr_handler(sc); 913 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask); 914 (void) t3_read_reg(sc, A_PL_INT_ENABLE0); 915 } 916 917 /** 918 * sge_timer_cb - perform periodic maintenance of an SGE qset 919 * @data: the SGE queue set to maintain 920 * 921 * Runs periodically from a timer to perform maintenance of an SGE queue 922 * set. It performs two tasks: 923 * 924 * a) Cleans up any completed Tx descriptors that may still be pending. 925 * Normal descriptor cleanup happens when new packets are added to a Tx 926 * queue so this timer is relatively infrequent and does any cleanup only 927 * if the Tx queue has not seen any new packets in a while. We make a 928 * best effort attempt to reclaim descriptors, in that we don't wait 929 * around if we cannot get a queue's lock (which most likely is because 930 * someone else is queueing new packets and so will also handle the clean 931 * up). Since control queues use immediate data exclusively we don't 932 * bother cleaning them up here. 933 * 934 * b) Replenishes Rx queues that have run out due to memory shortage. 935 * Normally new Rx buffers are added when existing ones are consumed but 936 * when out of memory a queue can become empty. We try to add only a few 937 * buffers here, the queue will be replenished fully as these new buffers 938 * are used up if memory shortage has subsided. 939 * 940 * c) Return coalesced response queue credits in case a response queue is 941 * starved. 942 * 943 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 944 * fifo overflows and the FW doesn't implement any recovery scheme yet. 945 */ 946 static void 947 sge_timer_cb(void *arg) 948 { 949 adapter_t *sc = arg; 950 if ((sc->flags & USING_MSIX) == 0) { 951 952 struct port_info *pi; 953 struct sge_qset *qs; 954 struct sge_txq *txq; 955 int i, j; 956 int reclaim_ofl, refill_rx; 957 958 if (sc->open_device_map == 0) 959 return; 960 961 for (i = 0; i < sc->params.nports; i++) { 962 pi = &sc->port[i]; 963 for (j = 0; j < pi->nqsets; j++) { 964 qs = &sc->sge.qs[pi->first_qset + j]; 965 txq = &qs->txq[0]; 966 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 967 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 968 (qs->fl[1].credits < qs->fl[1].size)); 969 if (reclaim_ofl || refill_rx) { 970 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); 971 break; 972 } 973 } 974 } 975 } 976 977 if (sc->params.nports > 2) { 978 int i; 979 980 for_each_port(sc, i) { 981 struct port_info *pi = &sc->port[i]; 982 983 t3_write_reg(sc, A_SG_KDOORBELL, 984 F_SELEGRCNTX | 985 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 986 } 987 } 988 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) && 989 sc->open_device_map != 0) 990 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 991 } 992 993 /* 994 * This is meant to be a catch-all function to keep sge state private 995 * to sge.c 996 * 997 */ 998 int 999 t3_sge_init_adapter(adapter_t *sc) 1000 { 1001 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 1002 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 1003 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 1004 return (0); 1005 } 1006 1007 int 1008 t3_sge_reset_adapter(adapter_t *sc) 1009 { 1010 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 1011 return (0); 1012 } 1013 1014 int 1015 t3_sge_init_port(struct port_info *pi) 1016 { 1017 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); 1018 return (0); 1019 } 1020 1021 /** 1022 * refill_rspq - replenish an SGE response queue 1023 * @adapter: the adapter 1024 * @q: the response queue to replenish 1025 * @credits: how many new responses to make available 1026 * 1027 * Replenishes a response queue by making the supplied number of responses 1028 * available to HW. 1029 */ 1030 static __inline void 1031 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 1032 { 1033 1034 /* mbufs are allocated on demand when a rspq entry is processed. */ 1035 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 1036 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 1037 } 1038 1039 static void 1040 sge_txq_reclaim_handler(void *arg, int ncount) 1041 { 1042 struct sge_qset *qs = arg; 1043 int i; 1044 1045 for (i = 0; i < 3; i++) 1046 reclaim_completed_tx(qs, 16, i); 1047 } 1048 1049 static void 1050 sge_timer_reclaim(void *arg, int ncount) 1051 { 1052 struct port_info *pi = arg; 1053 int i, nqsets = pi->nqsets; 1054 adapter_t *sc = pi->adapter; 1055 struct sge_qset *qs; 1056 struct mtx *lock; 1057 1058 KASSERT((sc->flags & USING_MSIX) == 0, 1059 ("can't call timer reclaim for msi-x")); 1060 1061 for (i = 0; i < nqsets; i++) { 1062 qs = &sc->sge.qs[pi->first_qset + i]; 1063 1064 reclaim_completed_tx(qs, 16, TXQ_OFLD); 1065 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 1066 &sc->sge.qs[0].rspq.lock; 1067 1068 if (mtx_trylock(lock)) { 1069 /* XXX currently assume that we are *NOT* polling */ 1070 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 1071 1072 if (qs->fl[0].credits < qs->fl[0].size - 16) 1073 __refill_fl(sc, &qs->fl[0]); 1074 if (qs->fl[1].credits < qs->fl[1].size - 16) 1075 __refill_fl(sc, &qs->fl[1]); 1076 1077 if (status & (1 << qs->rspq.cntxt_id)) { 1078 if (qs->rspq.credits) { 1079 refill_rspq(sc, &qs->rspq, 1); 1080 qs->rspq.credits--; 1081 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 1082 1 << qs->rspq.cntxt_id); 1083 } 1084 } 1085 mtx_unlock(lock); 1086 } 1087 } 1088 } 1089 1090 /** 1091 * init_qset_cntxt - initialize an SGE queue set context info 1092 * @qs: the queue set 1093 * @id: the queue set id 1094 * 1095 * Initializes the TIDs and context ids for the queues of a queue set. 1096 */ 1097 static void 1098 init_qset_cntxt(struct sge_qset *qs, u_int id) 1099 { 1100 1101 qs->rspq.cntxt_id = id; 1102 qs->fl[0].cntxt_id = 2 * id; 1103 qs->fl[1].cntxt_id = 2 * id + 1; 1104 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 1105 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 1106 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 1107 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 1108 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 1109 1110 mbufq_init(&qs->txq[TXQ_ETH].sendq); 1111 mbufq_init(&qs->txq[TXQ_OFLD].sendq); 1112 mbufq_init(&qs->txq[TXQ_CTRL].sendq); 1113 } 1114 1115 1116 static void 1117 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 1118 { 1119 txq->in_use += ndesc; 1120 /* 1121 * XXX we don't handle stopping of queue 1122 * presumably start handles this when we bump against the end 1123 */ 1124 txqs->gen = txq->gen; 1125 txq->unacked += ndesc; 1126 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); 1127 txq->unacked &= 31; 1128 txqs->pidx = txq->pidx; 1129 txq->pidx += ndesc; 1130 #ifdef INVARIANTS 1131 if (((txqs->pidx > txq->cidx) && 1132 (txq->pidx < txqs->pidx) && 1133 (txq->pidx >= txq->cidx)) || 1134 ((txqs->pidx < txq->cidx) && 1135 (txq->pidx >= txq-> cidx)) || 1136 ((txqs->pidx < txq->cidx) && 1137 (txq->cidx < txqs->pidx))) 1138 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", 1139 txqs->pidx, txq->pidx, txq->cidx); 1140 #endif 1141 if (txq->pidx >= txq->size) { 1142 txq->pidx -= txq->size; 1143 txq->gen ^= 1; 1144 } 1145 1146 } 1147 1148 /** 1149 * calc_tx_descs - calculate the number of Tx descriptors for a packet 1150 * @m: the packet mbufs 1151 * @nsegs: the number of segments 1152 * 1153 * Returns the number of Tx descriptors needed for the given Ethernet 1154 * packet. Ethernet packets require addition of WR and CPL headers. 1155 */ 1156 static __inline unsigned int 1157 calc_tx_descs(const struct mbuf *m, int nsegs) 1158 { 1159 unsigned int flits; 1160 1161 if (m->m_pkthdr.len <= PIO_LEN) 1162 return 1; 1163 1164 flits = sgl_len(nsegs) + 2; 1165 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1166 flits++; 1167 1168 return flits_to_desc(flits); 1169 } 1170 1171 static unsigned int 1172 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 1173 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs) 1174 { 1175 struct mbuf *m0; 1176 int err, pktlen, pass = 0; 1177 bus_dma_tag_t tag = txq->entry_tag; 1178 1179 retry: 1180 err = 0; 1181 m0 = *m; 1182 pktlen = m0->m_pkthdr.len; 1183 #if defined(__i386__) || defined(__amd64__) 1184 if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) { 1185 goto done; 1186 } else 1187 #endif 1188 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0); 1189 1190 if (err == 0) { 1191 goto done; 1192 } 1193 if (err == EFBIG && pass == 0) { 1194 pass = 1; 1195 /* Too many segments, try to defrag */ 1196 m0 = m_defrag(m0, M_DONTWAIT); 1197 if (m0 == NULL) { 1198 m_freem(*m); 1199 *m = NULL; 1200 return (ENOBUFS); 1201 } 1202 *m = m0; 1203 goto retry; 1204 } else if (err == ENOMEM) { 1205 return (err); 1206 } if (err) { 1207 if (cxgb_debug) 1208 printf("map failure err=%d pktlen=%d\n", err, pktlen); 1209 m_freem(m0); 1210 *m = NULL; 1211 return (err); 1212 } 1213 done: 1214 #if !defined(__i386__) && !defined(__amd64__) 1215 bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE); 1216 #endif 1217 txsd->flags |= TX_SW_DESC_MAPPED; 1218 1219 return (0); 1220 } 1221 1222 /** 1223 * make_sgl - populate a scatter/gather list for a packet 1224 * @sgp: the SGL to populate 1225 * @segs: the packet dma segments 1226 * @nsegs: the number of segments 1227 * 1228 * Generates a scatter/gather list for the buffers that make up a packet 1229 * and returns the SGL size in 8-byte words. The caller must size the SGL 1230 * appropriately. 1231 */ 1232 static __inline void 1233 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1234 { 1235 int i, idx; 1236 1237 for (idx = 0, i = 0; i < nsegs; i++) { 1238 /* 1239 * firmware doesn't like empty segments 1240 */ 1241 if (segs[i].ds_len == 0) 1242 continue; 1243 if (i && idx == 0) 1244 ++sgp; 1245 1246 sgp->len[idx] = htobe32(segs[i].ds_len); 1247 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1248 idx ^= 1; 1249 } 1250 1251 if (idx) { 1252 sgp->len[idx] = 0; 1253 sgp->addr[idx] = 0; 1254 } 1255 } 1256 1257 /** 1258 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1259 * @adap: the adapter 1260 * @q: the Tx queue 1261 * 1262 * Ring the doorbell if a Tx queue is asleep. There is a natural race, 1263 * where the HW is going to sleep just after we checked, however, 1264 * then the interrupt handler will detect the outstanding TX packet 1265 * and ring the doorbell for us. 1266 * 1267 * When GTS is disabled we unconditionally ring the doorbell. 1268 */ 1269 static __inline void 1270 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring) 1271 { 1272 #if USE_GTS 1273 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1274 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1275 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1276 #ifdef T3_TRACE 1277 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1278 q->cntxt_id); 1279 #endif 1280 t3_write_reg(adap, A_SG_KDOORBELL, 1281 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1282 } 1283 #else 1284 if (mustring || ++q->db_pending >= 32) { 1285 wmb(); /* write descriptors before telling HW */ 1286 t3_write_reg(adap, A_SG_KDOORBELL, 1287 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1288 q->db_pending = 0; 1289 } 1290 #endif 1291 } 1292 1293 static __inline void 1294 wr_gen2(struct tx_desc *d, unsigned int gen) 1295 { 1296 #if SGE_NUM_GENBITS == 2 1297 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1298 #endif 1299 } 1300 1301 /** 1302 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1303 * @ndesc: number of Tx descriptors spanned by the SGL 1304 * @txd: first Tx descriptor to be written 1305 * @txqs: txq state (generation and producer index) 1306 * @txq: the SGE Tx queue 1307 * @sgl: the SGL 1308 * @flits: number of flits to the start of the SGL in the first descriptor 1309 * @sgl_flits: the SGL size in flits 1310 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1311 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1312 * 1313 * Write a work request header and an associated SGL. If the SGL is 1314 * small enough to fit into one Tx descriptor it has already been written 1315 * and we just need to write the WR header. Otherwise we distribute the 1316 * SGL across the number of descriptors it spans. 1317 */ 1318 static void 1319 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1320 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1321 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1322 { 1323 1324 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1325 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1326 1327 if (__predict_true(ndesc == 1)) { 1328 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1329 V_WR_SGLSFLT(flits)) | wr_hi, 1330 htonl(V_WR_LEN(flits + sgl_flits) | 1331 V_WR_GEN(txqs->gen)) | wr_lo); 1332 /* XXX gen? */ 1333 wr_gen2(txd, txqs->gen); 1334 1335 } else { 1336 unsigned int ogen = txqs->gen; 1337 const uint64_t *fp = (const uint64_t *)sgl; 1338 struct work_request_hdr *wp = wrp; 1339 1340 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1341 V_WR_SGLSFLT(flits)) | wr_hi; 1342 1343 while (sgl_flits) { 1344 unsigned int avail = WR_FLITS - flits; 1345 1346 if (avail > sgl_flits) 1347 avail = sgl_flits; 1348 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1349 sgl_flits -= avail; 1350 ndesc--; 1351 if (!sgl_flits) 1352 break; 1353 1354 fp += avail; 1355 txd++; 1356 txsd++; 1357 if (++txqs->pidx == txq->size) { 1358 txqs->pidx = 0; 1359 txqs->gen ^= 1; 1360 txd = txq->desc; 1361 txsd = txq->sdesc; 1362 } 1363 1364 /* 1365 * when the head of the mbuf chain 1366 * is freed all clusters will be freed 1367 * with it 1368 */ 1369 wrp = (struct work_request_hdr *)txd; 1370 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) | 1371 V_WR_SGLSFLT(1)) | wr_hi; 1372 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS, 1373 sgl_flits + 1)) | 1374 V_WR_GEN(txqs->gen)) | wr_lo; 1375 wr_gen2(txd, txqs->gen); 1376 flits = 1; 1377 } 1378 wrp->wrh_hi |= htonl(F_WR_EOP); 1379 wmb(); 1380 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1381 wr_gen2((struct tx_desc *)wp, ogen); 1382 } 1383 } 1384 1385 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */ 1386 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20) 1387 1388 #define GET_VTAG(cntrl, m) \ 1389 do { \ 1390 if ((m)->m_flags & M_VLANTAG) \ 1391 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ 1392 } while (0) 1393 1394 static int 1395 t3_encap(struct sge_qset *qs, struct mbuf **m) 1396 { 1397 adapter_t *sc; 1398 struct mbuf *m0; 1399 struct sge_txq *txq; 1400 struct txq_state txqs; 1401 struct port_info *pi; 1402 unsigned int ndesc, flits, cntrl, mlen; 1403 int err, nsegs, tso_info = 0; 1404 1405 struct work_request_hdr *wrp; 1406 struct tx_sw_desc *txsd; 1407 struct sg_ent *sgp, *sgl; 1408 uint32_t wr_hi, wr_lo, sgl_flits; 1409 bus_dma_segment_t segs[TX_MAX_SEGS]; 1410 1411 struct tx_desc *txd; 1412 1413 pi = qs->port; 1414 sc = pi->adapter; 1415 txq = &qs->txq[TXQ_ETH]; 1416 txd = &txq->desc[txq->pidx]; 1417 txsd = &txq->sdesc[txq->pidx]; 1418 sgl = txq->txq_sgl; 1419 1420 prefetch(txd); 1421 m0 = *m; 1422 1423 mtx_assert(&qs->lock, MA_OWNED); 1424 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1425 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n")); 1426 1427 if (m0->m_nextpkt == NULL && m0->m_next != NULL && 1428 m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1429 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1430 1431 if (m0->m_nextpkt != NULL) { 1432 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs); 1433 ndesc = 1; 1434 mlen = 0; 1435 } else { 1436 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map, 1437 &m0, segs, &nsegs))) { 1438 if (cxgb_debug) 1439 printf("failed ... err=%d\n", err); 1440 return (err); 1441 } 1442 mlen = m0->m_pkthdr.len; 1443 ndesc = calc_tx_descs(m0, nsegs); 1444 } 1445 txq_prod(txq, ndesc, &txqs); 1446 1447 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs)); 1448 txsd->m = m0; 1449 1450 if (m0->m_nextpkt != NULL) { 1451 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; 1452 int i, fidx; 1453 1454 if (nsegs > 7) 1455 panic("trying to coalesce %d packets in to one WR", nsegs); 1456 txq->txq_coalesced += nsegs; 1457 wrp = (struct work_request_hdr *)txd; 1458 flits = nsegs*2 + 1; 1459 1460 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) { 1461 struct cpl_tx_pkt_batch_entry *cbe; 1462 uint64_t flit; 1463 uint32_t *hflit = (uint32_t *)&flit; 1464 int cflags = m0->m_pkthdr.csum_flags; 1465 1466 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1467 GET_VTAG(cntrl, m0); 1468 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1469 if (__predict_false(!(cflags & CSUM_IP))) 1470 cntrl |= F_TXPKT_IPCSUM_DIS; 1471 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP)))) 1472 cntrl |= F_TXPKT_L4CSUM_DIS; 1473 1474 hflit[0] = htonl(cntrl); 1475 hflit[1] = htonl(segs[i].ds_len | 0x80000000); 1476 flit |= htobe64(1 << 24); 1477 cbe = &cpl_batch->pkt_entry[i]; 1478 cbe->cntrl = hflit[0]; 1479 cbe->len = hflit[1]; 1480 cbe->addr = htobe64(segs[i].ds_addr); 1481 } 1482 1483 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1484 V_WR_SGLSFLT(flits)) | 1485 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1486 wr_lo = htonl(V_WR_LEN(flits) | 1487 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); 1488 set_wr_hdr(wrp, wr_hi, wr_lo); 1489 wmb(); 1490 ETHER_BPF_MTAP(pi->ifp, m0); 1491 wr_gen2(txd, txqs.gen); 1492 check_ring_tx_db(sc, txq, 0); 1493 return (0); 1494 } else if (tso_info) { 1495 int eth_type; 1496 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; 1497 struct ether_header *eh; 1498 struct ip *ip; 1499 struct tcphdr *tcp; 1500 1501 txd->flit[2] = 0; 1502 GET_VTAG(cntrl, m0); 1503 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1504 hdr->cntrl = htonl(cntrl); 1505 hdr->len = htonl(mlen | 0x80000000); 1506 1507 if (__predict_false(mlen < TCPPKTHDRSIZE)) { 1508 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", 1509 m0, mlen, m0->m_pkthdr.tso_segsz, 1510 m0->m_pkthdr.csum_flags, m0->m_flags); 1511 panic("tx tso packet too small"); 1512 } 1513 1514 /* Make sure that ether, ip, tcp headers are all in m0 */ 1515 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1516 m0 = m_pullup(m0, TCPPKTHDRSIZE); 1517 if (__predict_false(m0 == NULL)) { 1518 /* XXX panic probably an overreaction */ 1519 panic("couldn't fit header into mbuf"); 1520 } 1521 } 1522 1523 eh = mtod(m0, struct ether_header *); 1524 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 1525 eth_type = CPL_ETH_II_VLAN; 1526 ip = (struct ip *)((struct ether_vlan_header *)eh + 1); 1527 } else { 1528 eth_type = CPL_ETH_II; 1529 ip = (struct ip *)(eh + 1); 1530 } 1531 tcp = (struct tcphdr *)(ip + 1); 1532 1533 tso_info |= V_LSO_ETH_TYPE(eth_type) | 1534 V_LSO_IPHDR_WORDS(ip->ip_hl) | 1535 V_LSO_TCPHDR_WORDS(tcp->th_off); 1536 hdr->lso_info = htonl(tso_info); 1537 1538 if (__predict_false(mlen <= PIO_LEN)) { 1539 /* 1540 * pkt not undersized but fits in PIO_LEN 1541 * Indicates a TSO bug at the higher levels. 1542 */ 1543 txsd->m = NULL; 1544 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); 1545 flits = (mlen + 7) / 8 + 3; 1546 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1547 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1548 F_WR_SOP | F_WR_EOP | txqs.compl); 1549 wr_lo = htonl(V_WR_LEN(flits) | 1550 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1551 set_wr_hdr(&hdr->wr, wr_hi, wr_lo); 1552 wmb(); 1553 ETHER_BPF_MTAP(pi->ifp, m0); 1554 wr_gen2(txd, txqs.gen); 1555 check_ring_tx_db(sc, txq, 0); 1556 m_freem(m0); 1557 return (0); 1558 } 1559 flits = 3; 1560 } else { 1561 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; 1562 1563 GET_VTAG(cntrl, m0); 1564 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1565 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) 1566 cntrl |= F_TXPKT_IPCSUM_DIS; 1567 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))) 1568 cntrl |= F_TXPKT_L4CSUM_DIS; 1569 cpl->cntrl = htonl(cntrl); 1570 cpl->len = htonl(mlen | 0x80000000); 1571 1572 if (mlen <= PIO_LEN) { 1573 txsd->m = NULL; 1574 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1575 flits = (mlen + 7) / 8 + 2; 1576 1577 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1578 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1579 F_WR_SOP | F_WR_EOP | txqs.compl); 1580 wr_lo = htonl(V_WR_LEN(flits) | 1581 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1582 set_wr_hdr(&cpl->wr, wr_hi, wr_lo); 1583 wmb(); 1584 ETHER_BPF_MTAP(pi->ifp, m0); 1585 wr_gen2(txd, txqs.gen); 1586 check_ring_tx_db(sc, txq, 0); 1587 m_freem(m0); 1588 return (0); 1589 } 1590 flits = 2; 1591 } 1592 wrp = (struct work_request_hdr *)txd; 1593 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1594 make_sgl(sgp, segs, nsegs); 1595 1596 sgl_flits = sgl_len(nsegs); 1597 1598 ETHER_BPF_MTAP(pi->ifp, m0); 1599 1600 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc)); 1601 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1602 wr_lo = htonl(V_WR_TID(txq->token)); 1603 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, 1604 sgl_flits, wr_hi, wr_lo); 1605 check_ring_tx_db(sc, txq, 0); 1606 1607 return (0); 1608 } 1609 1610 void 1611 cxgb_tx_watchdog(void *arg) 1612 { 1613 struct sge_qset *qs = arg; 1614 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1615 1616 if (qs->coalescing != 0 && 1617 (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 1618 TXQ_RING_EMPTY(qs)) 1619 qs->coalescing = 0; 1620 else if (qs->coalescing == 0 && 1621 (txq->in_use >= cxgb_tx_coalesce_enable_start)) 1622 qs->coalescing = 1; 1623 if (TXQ_TRYLOCK(qs)) { 1624 qs->qs_flags |= QS_FLUSHING; 1625 cxgb_start_locked(qs); 1626 qs->qs_flags &= ~QS_FLUSHING; 1627 TXQ_UNLOCK(qs); 1628 } 1629 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) 1630 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog, 1631 qs, txq->txq_watchdog.c_cpu); 1632 } 1633 1634 static void 1635 cxgb_tx_timeout(void *arg) 1636 { 1637 struct sge_qset *qs = arg; 1638 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1639 1640 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3))) 1641 qs->coalescing = 1; 1642 if (TXQ_TRYLOCK(qs)) { 1643 qs->qs_flags |= QS_TIMEOUT; 1644 cxgb_start_locked(qs); 1645 qs->qs_flags &= ~QS_TIMEOUT; 1646 TXQ_UNLOCK(qs); 1647 } 1648 } 1649 1650 static void 1651 cxgb_start_locked(struct sge_qset *qs) 1652 { 1653 struct mbuf *m_head = NULL; 1654 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1655 struct port_info *pi = qs->port; 1656 struct ifnet *ifp = pi->ifp; 1657 1658 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT)) 1659 reclaim_completed_tx(qs, 0, TXQ_ETH); 1660 1661 if (!pi->link_config.link_ok) { 1662 TXQ_RING_FLUSH(qs); 1663 return; 1664 } 1665 TXQ_LOCK_ASSERT(qs); 1666 while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) && 1667 pi->link_config.link_ok) { 1668 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1669 1670 if (txq->size - txq->in_use <= TX_MAX_DESC) 1671 break; 1672 1673 if ((m_head = cxgb_dequeue(qs)) == NULL) 1674 break; 1675 /* 1676 * Encapsulation can modify our pointer, and or make it 1677 * NULL on failure. In that event, we can't requeue. 1678 */ 1679 if (t3_encap(qs, &m_head) || m_head == NULL) 1680 break; 1681 1682 m_head = NULL; 1683 } 1684 1685 if (txq->db_pending) 1686 check_ring_tx_db(pi->adapter, txq, 1); 1687 1688 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && 1689 pi->link_config.link_ok) 1690 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1691 qs, txq->txq_timer.c_cpu); 1692 if (m_head != NULL) 1693 m_freem(m_head); 1694 } 1695 1696 static int 1697 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m) 1698 { 1699 struct port_info *pi = qs->port; 1700 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1701 struct buf_ring *br = txq->txq_mr; 1702 int error, avail; 1703 1704 avail = txq->size - txq->in_use; 1705 TXQ_LOCK_ASSERT(qs); 1706 1707 /* 1708 * We can only do a direct transmit if the following are true: 1709 * - we aren't coalescing (ring < 3/4 full) 1710 * - the link is up -- checked in caller 1711 * - there are no packets enqueued already 1712 * - there is space in hardware transmit queue 1713 */ 1714 if (check_pkt_coalesce(qs) == 0 && 1715 !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) { 1716 if (t3_encap(qs, &m)) { 1717 if (m != NULL && 1718 (error = drbr_enqueue(ifp, br, m)) != 0) 1719 return (error); 1720 } else { 1721 if (txq->db_pending) 1722 check_ring_tx_db(pi->adapter, txq, 1); 1723 1724 /* 1725 * We've bypassed the buf ring so we need to update 1726 * the stats directly 1727 */ 1728 txq->txq_direct_packets++; 1729 txq->txq_direct_bytes += m->m_pkthdr.len; 1730 } 1731 } else if ((error = drbr_enqueue(ifp, br, m)) != 0) 1732 return (error); 1733 1734 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1735 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && 1736 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) 1737 cxgb_start_locked(qs); 1738 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) 1739 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1740 qs, txq->txq_timer.c_cpu); 1741 return (0); 1742 } 1743 1744 int 1745 cxgb_transmit(struct ifnet *ifp, struct mbuf *m) 1746 { 1747 struct sge_qset *qs; 1748 struct port_info *pi = ifp->if_softc; 1749 int error, qidx = pi->first_qset; 1750 1751 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 1752 ||(!pi->link_config.link_ok)) { 1753 m_freem(m); 1754 return (0); 1755 } 1756 1757 if (m->m_flags & M_FLOWID) 1758 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset; 1759 1760 qs = &pi->adapter->sge.qs[qidx]; 1761 1762 if (TXQ_TRYLOCK(qs)) { 1763 /* XXX running */ 1764 error = cxgb_transmit_locked(ifp, qs, m); 1765 TXQ_UNLOCK(qs); 1766 } else 1767 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); 1768 return (error); 1769 } 1770 void 1771 cxgb_start(struct ifnet *ifp) 1772 { 1773 struct port_info *pi = ifp->if_softc; 1774 struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset]; 1775 1776 if (!pi->link_config.link_ok) 1777 return; 1778 1779 TXQ_LOCK(qs); 1780 cxgb_start_locked(qs); 1781 TXQ_UNLOCK(qs); 1782 } 1783 1784 void 1785 cxgb_qflush(struct ifnet *ifp) 1786 { 1787 /* 1788 * flush any enqueued mbufs in the buf_rings 1789 * and in the transmit queues 1790 * no-op for now 1791 */ 1792 return; 1793 } 1794 1795 /** 1796 * write_imm - write a packet into a Tx descriptor as immediate data 1797 * @d: the Tx descriptor to write 1798 * @m: the packet 1799 * @len: the length of packet data to write as immediate data 1800 * @gen: the generation bit value to write 1801 * 1802 * Writes a packet as immediate data into a Tx descriptor. The packet 1803 * contains a work request at its beginning. We must write the packet 1804 * carefully so the SGE doesn't read accidentally before it's written in 1805 * its entirety. 1806 */ 1807 static __inline void 1808 write_imm(struct tx_desc *d, struct mbuf *m, 1809 unsigned int len, unsigned int gen) 1810 { 1811 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1812 struct work_request_hdr *to = (struct work_request_hdr *)d; 1813 uint32_t wr_hi, wr_lo; 1814 1815 if (len > WR_LEN) 1816 panic("len too big %d\n", len); 1817 if (len < sizeof(*from)) 1818 panic("len too small %d", len); 1819 1820 memcpy(&to[1], &from[1], len - sizeof(*from)); 1821 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | 1822 V_WR_BCNTLFLT(len & 7)); 1823 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | 1824 V_WR_LEN((len + 7) / 8)); 1825 set_wr_hdr(to, wr_hi, wr_lo); 1826 wmb(); 1827 wr_gen2(d, gen); 1828 1829 /* 1830 * This check is a hack we should really fix the logic so 1831 * that this can't happen 1832 */ 1833 if (m->m_type != MT_DONTFREE) 1834 m_freem(m); 1835 1836 } 1837 1838 /** 1839 * check_desc_avail - check descriptor availability on a send queue 1840 * @adap: the adapter 1841 * @q: the TX queue 1842 * @m: the packet needing the descriptors 1843 * @ndesc: the number of Tx descriptors needed 1844 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1845 * 1846 * Checks if the requested number of Tx descriptors is available on an 1847 * SGE send queue. If the queue is already suspended or not enough 1848 * descriptors are available the packet is queued for later transmission. 1849 * Must be called with the Tx queue locked. 1850 * 1851 * Returns 0 if enough descriptors are available, 1 if there aren't 1852 * enough descriptors and the packet has been queued, and 2 if the caller 1853 * needs to retry because there weren't enough descriptors at the 1854 * beginning of the call but some freed up in the mean time. 1855 */ 1856 static __inline int 1857 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1858 struct mbuf *m, unsigned int ndesc, 1859 unsigned int qid) 1860 { 1861 /* 1862 * XXX We currently only use this for checking the control queue 1863 * the control queue is only used for binding qsets which happens 1864 * at init time so we are guaranteed enough descriptors 1865 */ 1866 if (__predict_false(!mbufq_empty(&q->sendq))) { 1867 addq_exit: mbufq_tail(&q->sendq, m); 1868 return 1; 1869 } 1870 if (__predict_false(q->size - q->in_use < ndesc)) { 1871 1872 struct sge_qset *qs = txq_to_qset(q, qid); 1873 1874 setbit(&qs->txq_stopped, qid); 1875 if (should_restart_tx(q) && 1876 test_and_clear_bit(qid, &qs->txq_stopped)) 1877 return 2; 1878 1879 q->stops++; 1880 goto addq_exit; 1881 } 1882 return 0; 1883 } 1884 1885 1886 /** 1887 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1888 * @q: the SGE control Tx queue 1889 * 1890 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1891 * that send only immediate data (presently just the control queues) and 1892 * thus do not have any mbufs 1893 */ 1894 static __inline void 1895 reclaim_completed_tx_imm(struct sge_txq *q) 1896 { 1897 unsigned int reclaim = q->processed - q->cleaned; 1898 1899 q->in_use -= reclaim; 1900 q->cleaned += reclaim; 1901 } 1902 1903 static __inline int 1904 immediate(const struct mbuf *m) 1905 { 1906 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1907 } 1908 1909 /** 1910 * ctrl_xmit - send a packet through an SGE control Tx queue 1911 * @adap: the adapter 1912 * @q: the control queue 1913 * @m: the packet 1914 * 1915 * Send a packet through an SGE control Tx queue. Packets sent through 1916 * a control queue must fit entirely as immediate data in a single Tx 1917 * descriptor and have no page fragments. 1918 */ 1919 static int 1920 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 1921 { 1922 int ret; 1923 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1924 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1925 1926 if (__predict_false(!immediate(m))) { 1927 m_freem(m); 1928 return 0; 1929 } 1930 1931 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP); 1932 wrp->wrh_lo = htonl(V_WR_TID(q->token)); 1933 1934 TXQ_LOCK(qs); 1935 again: reclaim_completed_tx_imm(q); 1936 1937 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1938 if (__predict_false(ret)) { 1939 if (ret == 1) { 1940 TXQ_UNLOCK(qs); 1941 return (ENOSPC); 1942 } 1943 goto again; 1944 } 1945 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1946 1947 q->in_use++; 1948 if (++q->pidx >= q->size) { 1949 q->pidx = 0; 1950 q->gen ^= 1; 1951 } 1952 TXQ_UNLOCK(qs); 1953 wmb(); 1954 t3_write_reg(adap, A_SG_KDOORBELL, 1955 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1956 return (0); 1957 } 1958 1959 1960 /** 1961 * restart_ctrlq - restart a suspended control queue 1962 * @qs: the queue set cotaining the control queue 1963 * 1964 * Resumes transmission on a suspended Tx control queue. 1965 */ 1966 static void 1967 restart_ctrlq(void *data, int npending) 1968 { 1969 struct mbuf *m; 1970 struct sge_qset *qs = (struct sge_qset *)data; 1971 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1972 adapter_t *adap = qs->port->adapter; 1973 1974 TXQ_LOCK(qs); 1975 again: reclaim_completed_tx_imm(q); 1976 1977 while (q->in_use < q->size && 1978 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1979 1980 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1981 1982 if (++q->pidx >= q->size) { 1983 q->pidx = 0; 1984 q->gen ^= 1; 1985 } 1986 q->in_use++; 1987 } 1988 if (!mbufq_empty(&q->sendq)) { 1989 setbit(&qs->txq_stopped, TXQ_CTRL); 1990 1991 if (should_restart_tx(q) && 1992 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 1993 goto again; 1994 q->stops++; 1995 } 1996 TXQ_UNLOCK(qs); 1997 t3_write_reg(adap, A_SG_KDOORBELL, 1998 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1999 } 2000 2001 2002 /* 2003 * Send a management message through control queue 0 2004 */ 2005 int 2006 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 2007 { 2008 return ctrl_xmit(adap, &adap->sge.qs[0], m); 2009 } 2010 2011 /** 2012 * free_qset - free the resources of an SGE queue set 2013 * @sc: the controller owning the queue set 2014 * @q: the queue set 2015 * 2016 * Release the HW and SW resources associated with an SGE queue set, such 2017 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 2018 * queue set must be quiesced prior to calling this. 2019 */ 2020 static void 2021 t3_free_qset(adapter_t *sc, struct sge_qset *q) 2022 { 2023 int i; 2024 2025 reclaim_completed_tx(q, 0, TXQ_ETH); 2026 if (q->txq[TXQ_ETH].txq_mr != NULL) 2027 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF); 2028 if (q->txq[TXQ_ETH].txq_ifq != NULL) { 2029 ifq_delete(q->txq[TXQ_ETH].txq_ifq); 2030 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF); 2031 } 2032 2033 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2034 if (q->fl[i].desc) { 2035 mtx_lock_spin(&sc->sge.reg_lock); 2036 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 2037 mtx_unlock_spin(&sc->sge.reg_lock); 2038 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 2039 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 2040 q->fl[i].desc_map); 2041 bus_dma_tag_destroy(q->fl[i].desc_tag); 2042 bus_dma_tag_destroy(q->fl[i].entry_tag); 2043 } 2044 if (q->fl[i].sdesc) { 2045 free_rx_bufs(sc, &q->fl[i]); 2046 free(q->fl[i].sdesc, M_DEVBUF); 2047 } 2048 } 2049 2050 mtx_unlock(&q->lock); 2051 MTX_DESTROY(&q->lock); 2052 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2053 if (q->txq[i].desc) { 2054 mtx_lock_spin(&sc->sge.reg_lock); 2055 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 2056 mtx_unlock_spin(&sc->sge.reg_lock); 2057 bus_dmamap_unload(q->txq[i].desc_tag, 2058 q->txq[i].desc_map); 2059 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 2060 q->txq[i].desc_map); 2061 bus_dma_tag_destroy(q->txq[i].desc_tag); 2062 bus_dma_tag_destroy(q->txq[i].entry_tag); 2063 } 2064 if (q->txq[i].sdesc) { 2065 free(q->txq[i].sdesc, M_DEVBUF); 2066 } 2067 } 2068 2069 if (q->rspq.desc) { 2070 mtx_lock_spin(&sc->sge.reg_lock); 2071 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 2072 mtx_unlock_spin(&sc->sge.reg_lock); 2073 2074 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 2075 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 2076 q->rspq.desc_map); 2077 bus_dma_tag_destroy(q->rspq.desc_tag); 2078 MTX_DESTROY(&q->rspq.lock); 2079 } 2080 2081 #ifdef INET 2082 tcp_lro_free(&q->lro.ctrl); 2083 #endif 2084 2085 bzero(q, sizeof(*q)); 2086 } 2087 2088 /** 2089 * t3_free_sge_resources - free SGE resources 2090 * @sc: the adapter softc 2091 * 2092 * Frees resources used by the SGE queue sets. 2093 */ 2094 void 2095 t3_free_sge_resources(adapter_t *sc, int nqsets) 2096 { 2097 int i; 2098 2099 for (i = 0; i < nqsets; ++i) { 2100 TXQ_LOCK(&sc->sge.qs[i]); 2101 t3_free_qset(sc, &sc->sge.qs[i]); 2102 } 2103 } 2104 2105 /** 2106 * t3_sge_start - enable SGE 2107 * @sc: the controller softc 2108 * 2109 * Enables the SGE for DMAs. This is the last step in starting packet 2110 * transfers. 2111 */ 2112 void 2113 t3_sge_start(adapter_t *sc) 2114 { 2115 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 2116 } 2117 2118 /** 2119 * t3_sge_stop - disable SGE operation 2120 * @sc: the adapter 2121 * 2122 * Disables the DMA engine. This can be called in emeregencies (e.g., 2123 * from error interrupts) or from normal process context. In the latter 2124 * case it also disables any pending queue restart tasklets. Note that 2125 * if it is called in interrupt context it cannot disable the restart 2126 * tasklets as it cannot wait, however the tasklets will have no effect 2127 * since the doorbells are disabled and the driver will call this again 2128 * later from process context, at which time the tasklets will be stopped 2129 * if they are still running. 2130 */ 2131 void 2132 t3_sge_stop(adapter_t *sc) 2133 { 2134 int i, nqsets; 2135 2136 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 2137 2138 if (sc->tq == NULL) 2139 return; 2140 2141 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2142 nqsets += sc->port[i].nqsets; 2143 #ifdef notyet 2144 /* 2145 * 2146 * XXX 2147 */ 2148 for (i = 0; i < nqsets; ++i) { 2149 struct sge_qset *qs = &sc->sge.qs[i]; 2150 2151 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2152 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2153 } 2154 #endif 2155 } 2156 2157 /** 2158 * t3_free_tx_desc - reclaims Tx descriptors and their buffers 2159 * @adapter: the adapter 2160 * @q: the Tx queue to reclaim descriptors from 2161 * @reclaimable: the number of descriptors to reclaim 2162 * @m_vec_size: maximum number of buffers to reclaim 2163 * @desc_reclaimed: returns the number of descriptors reclaimed 2164 * 2165 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 2166 * Tx buffers. Called with the Tx queue lock held. 2167 * 2168 * Returns number of buffers of reclaimed 2169 */ 2170 void 2171 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue) 2172 { 2173 struct tx_sw_desc *txsd; 2174 unsigned int cidx, mask; 2175 struct sge_txq *q = &qs->txq[queue]; 2176 2177 #ifdef T3_TRACE 2178 T3_TRACE2(sc->tb[q->cntxt_id & 7], 2179 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); 2180 #endif 2181 cidx = q->cidx; 2182 mask = q->size - 1; 2183 txsd = &q->sdesc[cidx]; 2184 2185 mtx_assert(&qs->lock, MA_OWNED); 2186 while (reclaimable--) { 2187 prefetch(q->sdesc[(cidx + 1) & mask].m); 2188 prefetch(q->sdesc[(cidx + 2) & mask].m); 2189 2190 if (txsd->m != NULL) { 2191 if (txsd->flags & TX_SW_DESC_MAPPED) { 2192 bus_dmamap_unload(q->entry_tag, txsd->map); 2193 txsd->flags &= ~TX_SW_DESC_MAPPED; 2194 } 2195 m_freem_list(txsd->m); 2196 txsd->m = NULL; 2197 } else 2198 q->txq_skipped++; 2199 2200 ++txsd; 2201 if (++cidx == q->size) { 2202 cidx = 0; 2203 txsd = q->sdesc; 2204 } 2205 } 2206 q->cidx = cidx; 2207 2208 } 2209 2210 /** 2211 * is_new_response - check if a response is newly written 2212 * @r: the response descriptor 2213 * @q: the response queue 2214 * 2215 * Returns true if a response descriptor contains a yet unprocessed 2216 * response. 2217 */ 2218 static __inline int 2219 is_new_response(const struct rsp_desc *r, 2220 const struct sge_rspq *q) 2221 { 2222 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 2223 } 2224 2225 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 2226 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 2227 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 2228 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 2229 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 2230 2231 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 2232 #define NOMEM_INTR_DELAY 2500 2233 2234 /** 2235 * write_ofld_wr - write an offload work request 2236 * @adap: the adapter 2237 * @m: the packet to send 2238 * @q: the Tx queue 2239 * @pidx: index of the first Tx descriptor to write 2240 * @gen: the generation value to use 2241 * @ndesc: number of descriptors the packet will occupy 2242 * 2243 * Write an offload work request to send the supplied packet. The packet 2244 * data already carry the work request with most fields populated. 2245 */ 2246 static void 2247 write_ofld_wr(adapter_t *adap, struct mbuf *m, 2248 struct sge_txq *q, unsigned int pidx, 2249 unsigned int gen, unsigned int ndesc, 2250 bus_dma_segment_t *segs, unsigned int nsegs) 2251 { 2252 unsigned int sgl_flits, flits; 2253 struct work_request_hdr *from; 2254 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 2255 struct tx_desc *d = &q->desc[pidx]; 2256 struct txq_state txqs; 2257 2258 if (immediate(m) && nsegs == 0) { 2259 write_imm(d, m, m->m_len, gen); 2260 return; 2261 } 2262 2263 /* Only TX_DATA builds SGLs */ 2264 from = mtod(m, struct work_request_hdr *); 2265 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from)); 2266 2267 flits = m->m_len / 8; 2268 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 2269 2270 make_sgl(sgp, segs, nsegs); 2271 sgl_flits = sgl_len(nsegs); 2272 2273 txqs.gen = gen; 2274 txqs.pidx = pidx; 2275 txqs.compl = 0; 2276 2277 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 2278 from->wrh_hi, from->wrh_lo); 2279 } 2280 2281 /** 2282 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 2283 * @m: the packet 2284 * 2285 * Returns the number of Tx descriptors needed for the given offload 2286 * packet. These packets are already fully constructed. 2287 */ 2288 static __inline unsigned int 2289 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 2290 { 2291 unsigned int flits, cnt = 0; 2292 int ndescs; 2293 2294 if (m->m_len <= WR_LEN && nsegs == 0) 2295 return (1); /* packet fits as immediate data */ 2296 2297 /* 2298 * This needs to be re-visited for TOE 2299 */ 2300 2301 cnt = nsegs; 2302 2303 /* headers */ 2304 flits = m->m_len / 8; 2305 2306 ndescs = flits_to_desc(flits + sgl_len(cnt)); 2307 2308 return (ndescs); 2309 } 2310 2311 /** 2312 * ofld_xmit - send a packet through an offload queue 2313 * @adap: the adapter 2314 * @q: the Tx offload queue 2315 * @m: the packet 2316 * 2317 * Send an offload packet through an SGE offload queue. 2318 */ 2319 static int 2320 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 2321 { 2322 int ret, nsegs; 2323 unsigned int ndesc; 2324 unsigned int pidx, gen; 2325 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2326 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs; 2327 struct tx_sw_desc *stx; 2328 2329 nsegs = m_get_sgllen(m); 2330 vsegs = m_get_sgl(m); 2331 ndesc = calc_tx_descs_ofld(m, nsegs); 2332 busdma_map_sgl(vsegs, segs, nsegs); 2333 2334 stx = &q->sdesc[q->pidx]; 2335 2336 TXQ_LOCK(qs); 2337 again: reclaim_completed_tx(qs, 16, TXQ_OFLD); 2338 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 2339 if (__predict_false(ret)) { 2340 if (ret == 1) { 2341 printf("no ofld desc avail\n"); 2342 2343 m_set_priority(m, ndesc); /* save for restart */ 2344 TXQ_UNLOCK(qs); 2345 return (EINTR); 2346 } 2347 goto again; 2348 } 2349 2350 gen = q->gen; 2351 q->in_use += ndesc; 2352 pidx = q->pidx; 2353 q->pidx += ndesc; 2354 if (q->pidx >= q->size) { 2355 q->pidx -= q->size; 2356 q->gen ^= 1; 2357 } 2358 #ifdef T3_TRACE 2359 T3_TRACE5(adap->tb[q->cntxt_id & 7], 2360 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 2361 ndesc, pidx, skb->len, skb->len - skb->data_len, 2362 skb_shinfo(skb)->nr_frags); 2363 #endif 2364 TXQ_UNLOCK(qs); 2365 2366 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2367 check_ring_tx_db(adap, q, 1); 2368 return (0); 2369 } 2370 2371 /** 2372 * restart_offloadq - restart a suspended offload queue 2373 * @qs: the queue set cotaining the offload queue 2374 * 2375 * Resumes transmission on a suspended Tx offload queue. 2376 */ 2377 static void 2378 restart_offloadq(void *data, int npending) 2379 { 2380 struct mbuf *m; 2381 struct sge_qset *qs = data; 2382 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2383 adapter_t *adap = qs->port->adapter; 2384 bus_dma_segment_t segs[TX_MAX_SEGS]; 2385 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 2386 int nsegs, cleaned; 2387 2388 TXQ_LOCK(qs); 2389 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD); 2390 2391 while ((m = mbufq_peek(&q->sendq)) != NULL) { 2392 unsigned int gen, pidx; 2393 unsigned int ndesc = m_get_priority(m); 2394 2395 if (__predict_false(q->size - q->in_use < ndesc)) { 2396 setbit(&qs->txq_stopped, TXQ_OFLD); 2397 if (should_restart_tx(q) && 2398 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 2399 goto again; 2400 q->stops++; 2401 break; 2402 } 2403 2404 gen = q->gen; 2405 q->in_use += ndesc; 2406 pidx = q->pidx; 2407 q->pidx += ndesc; 2408 if (q->pidx >= q->size) { 2409 q->pidx -= q->size; 2410 q->gen ^= 1; 2411 } 2412 2413 (void)mbufq_dequeue(&q->sendq); 2414 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 2415 TXQ_UNLOCK(qs); 2416 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2417 TXQ_LOCK(qs); 2418 } 2419 #if USE_GTS 2420 set_bit(TXQ_RUNNING, &q->flags); 2421 set_bit(TXQ_LAST_PKT_DB, &q->flags); 2422 #endif 2423 TXQ_UNLOCK(qs); 2424 wmb(); 2425 t3_write_reg(adap, A_SG_KDOORBELL, 2426 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2427 } 2428 2429 /** 2430 * queue_set - return the queue set a packet should use 2431 * @m: the packet 2432 * 2433 * Maps a packet to the SGE queue set it should use. The desired queue 2434 * set is carried in bits 1-3 in the packet's priority. 2435 */ 2436 static __inline int 2437 queue_set(const struct mbuf *m) 2438 { 2439 return m_get_priority(m) >> 1; 2440 } 2441 2442 /** 2443 * is_ctrl_pkt - return whether an offload packet is a control packet 2444 * @m: the packet 2445 * 2446 * Determines whether an offload packet should use an OFLD or a CTRL 2447 * Tx queue. This is indicated by bit 0 in the packet's priority. 2448 */ 2449 static __inline int 2450 is_ctrl_pkt(const struct mbuf *m) 2451 { 2452 return m_get_priority(m) & 1; 2453 } 2454 2455 /** 2456 * t3_offload_tx - send an offload packet 2457 * @tdev: the offload device to send to 2458 * @m: the packet 2459 * 2460 * Sends an offload packet. We use the packet priority to select the 2461 * appropriate Tx queue as follows: bit 0 indicates whether the packet 2462 * should be sent as regular or control, bits 1-3 select the queue set. 2463 */ 2464 int 2465 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m) 2466 { 2467 adapter_t *adap = tdev2adap(tdev); 2468 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 2469 2470 if (__predict_false(is_ctrl_pkt(m))) 2471 return ctrl_xmit(adap, qs, m); 2472 2473 return ofld_xmit(adap, qs, m); 2474 } 2475 2476 /** 2477 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 2478 * @tdev: the offload device that will be receiving the packets 2479 * @q: the SGE response queue that assembled the bundle 2480 * @m: the partial bundle 2481 * @n: the number of packets in the bundle 2482 * 2483 * Delivers a (partial) bundle of Rx offload packets to an offload device. 2484 */ 2485 static __inline void 2486 deliver_partial_bundle(struct t3cdev *tdev, 2487 struct sge_rspq *q, 2488 struct mbuf *mbufs[], int n) 2489 { 2490 if (n) { 2491 q->offload_bundles++; 2492 cxgb_ofld_recv(tdev, mbufs, n); 2493 } 2494 } 2495 2496 static __inline int 2497 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 2498 struct mbuf *m, struct mbuf *rx_gather[], 2499 unsigned int gather_idx) 2500 { 2501 2502 rq->offload_pkts++; 2503 m->m_pkthdr.header = mtod(m, void *); 2504 rx_gather[gather_idx++] = m; 2505 if (gather_idx == RX_BUNDLE_SIZE) { 2506 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 2507 gather_idx = 0; 2508 rq->offload_bundles++; 2509 } 2510 return (gather_idx); 2511 } 2512 2513 static void 2514 restart_tx(struct sge_qset *qs) 2515 { 2516 struct adapter *sc = qs->port->adapter; 2517 2518 2519 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2520 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2521 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2522 qs->txq[TXQ_OFLD].restarts++; 2523 DPRINTF("restarting TXQ_OFLD\n"); 2524 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2525 } 2526 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n", 2527 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]), 2528 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned, 2529 qs->txq[TXQ_CTRL].in_use); 2530 2531 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2532 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2533 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2534 qs->txq[TXQ_CTRL].restarts++; 2535 DPRINTF("restarting TXQ_CTRL\n"); 2536 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2537 } 2538 } 2539 2540 /** 2541 * t3_sge_alloc_qset - initialize an SGE queue set 2542 * @sc: the controller softc 2543 * @id: the queue set id 2544 * @nports: how many Ethernet ports will be using this queue set 2545 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2546 * @p: configuration parameters for this queue set 2547 * @ntxq: number of Tx queues for the queue set 2548 * @pi: port info for queue set 2549 * 2550 * Allocate resources and initialize an SGE queue set. A queue set 2551 * comprises a response queue, two Rx free-buffer queues, and up to 3 2552 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2553 * queue, offload queue, and control queue. 2554 */ 2555 int 2556 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2557 const struct qset_params *p, int ntxq, struct port_info *pi) 2558 { 2559 struct sge_qset *q = &sc->sge.qs[id]; 2560 int i, ret = 0; 2561 2562 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF); 2563 q->port = pi; 2564 2565 if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, 2566 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) { 2567 device_printf(sc->dev, "failed to allocate mbuf ring\n"); 2568 goto err; 2569 } 2570 if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF, 2571 M_NOWAIT | M_ZERO)) == NULL) { 2572 device_printf(sc->dev, "failed to allocate ifq\n"); 2573 goto err; 2574 } 2575 ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp); 2576 callout_init(&q->txq[TXQ_ETH].txq_timer, 1); 2577 callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1); 2578 q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus; 2579 q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus; 2580 2581 init_qset_cntxt(q, id); 2582 q->idx = id; 2583 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2584 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2585 &q->fl[0].desc, &q->fl[0].sdesc, 2586 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2587 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2588 printf("error %d from alloc ring fl0\n", ret); 2589 goto err; 2590 } 2591 2592 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2593 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2594 &q->fl[1].desc, &q->fl[1].sdesc, 2595 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2596 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2597 printf("error %d from alloc ring fl1\n", ret); 2598 goto err; 2599 } 2600 2601 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2602 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2603 &q->rspq.desc_tag, &q->rspq.desc_map, 2604 NULL, NULL)) != 0) { 2605 printf("error %d from alloc ring rspq\n", ret); 2606 goto err; 2607 } 2608 2609 for (i = 0; i < ntxq; ++i) { 2610 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2611 2612 if ((ret = alloc_ring(sc, p->txq_size[i], 2613 sizeof(struct tx_desc), sz, 2614 &q->txq[i].phys_addr, &q->txq[i].desc, 2615 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2616 &q->txq[i].desc_map, 2617 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2618 printf("error %d from alloc ring tx %i\n", ret, i); 2619 goto err; 2620 } 2621 mbufq_init(&q->txq[i].sendq); 2622 q->txq[i].gen = 1; 2623 q->txq[i].size = p->txq_size[i]; 2624 } 2625 2626 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2627 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2628 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2629 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2630 2631 q->fl[0].gen = q->fl[1].gen = 1; 2632 q->fl[0].size = p->fl_size; 2633 q->fl[1].size = p->jumbo_size; 2634 2635 q->rspq.gen = 1; 2636 q->rspq.cidx = 0; 2637 q->rspq.size = p->rspq_size; 2638 2639 q->txq[TXQ_ETH].stop_thres = nports * 2640 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2641 2642 q->fl[0].buf_size = MCLBYTES; 2643 q->fl[0].zone = zone_pack; 2644 q->fl[0].type = EXT_PACKET; 2645 2646 if (p->jumbo_buf_size == MJUM16BYTES) { 2647 q->fl[1].zone = zone_jumbo16; 2648 q->fl[1].type = EXT_JUMBO16; 2649 } else if (p->jumbo_buf_size == MJUM9BYTES) { 2650 q->fl[1].zone = zone_jumbo9; 2651 q->fl[1].type = EXT_JUMBO9; 2652 } else if (p->jumbo_buf_size == MJUMPAGESIZE) { 2653 q->fl[1].zone = zone_jumbop; 2654 q->fl[1].type = EXT_JUMBOP; 2655 } else { 2656 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size)); 2657 ret = EDOOFUS; 2658 goto err; 2659 } 2660 q->fl[1].buf_size = p->jumbo_buf_size; 2661 2662 /* Allocate and setup the lro_ctrl structure */ 2663 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); 2664 #ifdef INET 2665 ret = tcp_lro_init(&q->lro.ctrl); 2666 if (ret) { 2667 printf("error %d from tcp_lro_init\n", ret); 2668 goto err; 2669 } 2670 #endif 2671 q->lro.ctrl.ifp = pi->ifp; 2672 2673 mtx_lock_spin(&sc->sge.reg_lock); 2674 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2675 q->rspq.phys_addr, q->rspq.size, 2676 q->fl[0].buf_size, 1, 0); 2677 if (ret) { 2678 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2679 goto err_unlock; 2680 } 2681 2682 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2683 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2684 q->fl[i].phys_addr, q->fl[i].size, 2685 q->fl[i].buf_size, p->cong_thres, 1, 2686 0); 2687 if (ret) { 2688 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2689 goto err_unlock; 2690 } 2691 } 2692 2693 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2694 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2695 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2696 1, 0); 2697 if (ret) { 2698 printf("error %d from t3_sge_init_ecntxt\n", ret); 2699 goto err_unlock; 2700 } 2701 2702 if (ntxq > 1) { 2703 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2704 USE_GTS, SGE_CNTXT_OFLD, id, 2705 q->txq[TXQ_OFLD].phys_addr, 2706 q->txq[TXQ_OFLD].size, 0, 1, 0); 2707 if (ret) { 2708 printf("error %d from t3_sge_init_ecntxt\n", ret); 2709 goto err_unlock; 2710 } 2711 } 2712 2713 if (ntxq > 2) { 2714 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2715 SGE_CNTXT_CTRL, id, 2716 q->txq[TXQ_CTRL].phys_addr, 2717 q->txq[TXQ_CTRL].size, 2718 q->txq[TXQ_CTRL].token, 1, 0); 2719 if (ret) { 2720 printf("error %d from t3_sge_init_ecntxt\n", ret); 2721 goto err_unlock; 2722 } 2723 } 2724 2725 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2726 device_get_unit(sc->dev), irq_vec_idx); 2727 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2728 2729 mtx_unlock_spin(&sc->sge.reg_lock); 2730 t3_update_qset_coalesce(q, p); 2731 q->port = pi; 2732 2733 refill_fl(sc, &q->fl[0], q->fl[0].size); 2734 refill_fl(sc, &q->fl[1], q->fl[1].size); 2735 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2736 2737 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2738 V_NEWTIMER(q->rspq.holdoff_tmr)); 2739 2740 return (0); 2741 2742 err_unlock: 2743 mtx_unlock_spin(&sc->sge.reg_lock); 2744 err: 2745 TXQ_LOCK(q); 2746 t3_free_qset(sc, q); 2747 2748 return (ret); 2749 } 2750 2751 /* 2752 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with 2753 * ethernet data. Hardware assistance with various checksums and any vlan tag 2754 * will also be taken into account here. 2755 */ 2756 void 2757 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2758 { 2759 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2760 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2761 struct ifnet *ifp = pi->ifp; 2762 2763 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2764 2765 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2766 cpl->csum_valid && cpl->csum == 0xffff) { 2767 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2768 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2769 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2770 m->m_pkthdr.csum_data = 0xffff; 2771 } 2772 2773 if (cpl->vlan_valid) { 2774 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2775 m->m_flags |= M_VLANTAG; 2776 } 2777 2778 m->m_pkthdr.rcvif = ifp; 2779 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2780 /* 2781 * adjust after conversion to mbuf chain 2782 */ 2783 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); 2784 m->m_len -= (sizeof(*cpl) + ethpad); 2785 m->m_data += (sizeof(*cpl) + ethpad); 2786 } 2787 2788 /** 2789 * get_packet - return the next ingress packet buffer from a free list 2790 * @adap: the adapter that received the packet 2791 * @drop_thres: # of remaining buffers before we start dropping packets 2792 * @qs: the qset that the SGE free list holding the packet belongs to 2793 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2794 * @r: response descriptor 2795 * 2796 * Get the next packet from a free list and complete setup of the 2797 * sk_buff. If the packet is small we make a copy and recycle the 2798 * original buffer, otherwise we use the original buffer itself. If a 2799 * positive drop threshold is supplied packets are dropped and their 2800 * buffers recycled if (a) the number of remaining buffers is under the 2801 * threshold and the packet is too big to copy, or (b) the packet should 2802 * be copied but there is no memory for the copy. 2803 */ 2804 static int 2805 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2806 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 2807 { 2808 2809 unsigned int len_cq = ntohl(r->len_cq); 2810 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2811 int mask, cidx = fl->cidx; 2812 struct rx_sw_desc *sd = &fl->sdesc[cidx]; 2813 uint32_t len = G_RSPD_LEN(len_cq); 2814 uint32_t flags = M_EXT; 2815 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags)); 2816 caddr_t cl; 2817 struct mbuf *m; 2818 int ret = 0; 2819 2820 mask = fl->size - 1; 2821 prefetch(fl->sdesc[(cidx + 1) & mask].m); 2822 prefetch(fl->sdesc[(cidx + 2) & mask].m); 2823 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); 2824 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 2825 2826 fl->credits--; 2827 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2828 2829 if (recycle_enable && len <= SGE_RX_COPY_THRES && 2830 sopeop == RSPQ_SOP_EOP) { 2831 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2832 goto skip_recycle; 2833 cl = mtod(m, void *); 2834 memcpy(cl, sd->rxsd_cl, len); 2835 recycle_rx_buf(adap, fl, fl->cidx); 2836 m->m_pkthdr.len = m->m_len = len; 2837 m->m_flags = 0; 2838 mh->mh_head = mh->mh_tail = m; 2839 ret = 1; 2840 goto done; 2841 } else { 2842 skip_recycle: 2843 bus_dmamap_unload(fl->entry_tag, sd->map); 2844 cl = sd->rxsd_cl; 2845 m = sd->m; 2846 2847 if ((sopeop == RSPQ_SOP_EOP) || 2848 (sopeop == RSPQ_SOP)) 2849 flags |= M_PKTHDR; 2850 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags); 2851 if (fl->zone == zone_pack) { 2852 /* 2853 * restore clobbered data pointer 2854 */ 2855 m->m_data = m->m_ext.ext_buf; 2856 } else { 2857 m_cljset(m, cl, fl->type); 2858 } 2859 m->m_len = len; 2860 } 2861 switch(sopeop) { 2862 case RSPQ_SOP_EOP: 2863 ret = 1; 2864 /* FALLTHROUGH */ 2865 case RSPQ_SOP: 2866 mh->mh_head = mh->mh_tail = m; 2867 m->m_pkthdr.len = len; 2868 break; 2869 case RSPQ_EOP: 2870 ret = 1; 2871 /* FALLTHROUGH */ 2872 case RSPQ_NSOP_NEOP: 2873 if (mh->mh_tail == NULL) { 2874 log(LOG_ERR, "discarding intermediate descriptor entry\n"); 2875 m_freem(m); 2876 break; 2877 } 2878 mh->mh_tail->m_next = m; 2879 mh->mh_tail = m; 2880 mh->mh_head->m_pkthdr.len += len; 2881 break; 2882 } 2883 if (cxgb_debug) 2884 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len); 2885 done: 2886 if (++fl->cidx == fl->size) 2887 fl->cidx = 0; 2888 2889 return (ret); 2890 } 2891 2892 /** 2893 * handle_rsp_cntrl_info - handles control information in a response 2894 * @qs: the queue set corresponding to the response 2895 * @flags: the response control flags 2896 * 2897 * Handles the control information of an SGE response, such as GTS 2898 * indications and completion credits for the queue set's Tx queues. 2899 * HW coalesces credits, we don't do any extra SW coalescing. 2900 */ 2901 static __inline void 2902 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2903 { 2904 unsigned int credits; 2905 2906 #if USE_GTS 2907 if (flags & F_RSPD_TXQ0_GTS) 2908 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2909 #endif 2910 credits = G_RSPD_TXQ0_CR(flags); 2911 if (credits) 2912 qs->txq[TXQ_ETH].processed += credits; 2913 2914 credits = G_RSPD_TXQ2_CR(flags); 2915 if (credits) 2916 qs->txq[TXQ_CTRL].processed += credits; 2917 2918 # if USE_GTS 2919 if (flags & F_RSPD_TXQ1_GTS) 2920 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2921 # endif 2922 credits = G_RSPD_TXQ1_CR(flags); 2923 if (credits) 2924 qs->txq[TXQ_OFLD].processed += credits; 2925 2926 } 2927 2928 static void 2929 check_ring_db(adapter_t *adap, struct sge_qset *qs, 2930 unsigned int sleeping) 2931 { 2932 ; 2933 } 2934 2935 /** 2936 * process_responses - process responses from an SGE response queue 2937 * @adap: the adapter 2938 * @qs: the queue set to which the response queue belongs 2939 * @budget: how many responses can be processed in this round 2940 * 2941 * Process responses from an SGE response queue up to the supplied budget. 2942 * Responses include received packets as well as credits and other events 2943 * for the queues that belong to the response queue's queue set. 2944 * A negative budget is effectively unlimited. 2945 * 2946 * Additionally choose the interrupt holdoff time for the next interrupt 2947 * on this queue. If the system is under memory shortage use a fairly 2948 * long delay to help recovery. 2949 */ 2950 static int 2951 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2952 { 2953 struct sge_rspq *rspq = &qs->rspq; 2954 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2955 int budget_left = budget; 2956 unsigned int sleeping = 0; 2957 int lro_enabled = qs->lro.enabled; 2958 int skip_lro; 2959 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; 2960 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2961 int ngathered = 0; 2962 struct t3_mbuf_hdr *mh = &rspq->rspq_mh; 2963 #ifdef DEBUG 2964 static int last_holdoff = 0; 2965 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2966 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2967 last_holdoff = rspq->holdoff_tmr; 2968 } 2969 #endif 2970 rspq->next_holdoff = rspq->holdoff_tmr; 2971 2972 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2973 int eth, eop = 0, ethpad = 0; 2974 uint32_t flags = ntohl(r->flags); 2975 uint32_t rss_csum = *(const uint32_t *)r; 2976 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); 2977 2978 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2979 2980 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2981 struct mbuf *m; 2982 2983 if (cxgb_debug) 2984 printf("async notification\n"); 2985 2986 if (mh->mh_head == NULL) { 2987 mh->mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 2988 m = mh->mh_head; 2989 } else { 2990 m = m_gethdr(M_DONTWAIT, MT_DATA); 2991 } 2992 if (m == NULL) 2993 goto no_mem; 2994 2995 memcpy(mtod(m, char *), r, AN_PKT_SIZE); 2996 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; 2997 *mtod(m, char *) = CPL_ASYNC_NOTIF; 2998 rss_csum = htonl(CPL_ASYNC_NOTIF << 24); 2999 eop = 1; 3000 rspq->async_notif++; 3001 goto skip; 3002 } else if (flags & F_RSPD_IMM_DATA_VALID) { 3003 struct mbuf *m = NULL; 3004 3005 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", 3006 r->rss_hdr.opcode, rspq->cidx); 3007 if (mh->mh_head == NULL) 3008 mh->mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 3009 else 3010 m = m_gethdr(M_DONTWAIT, MT_DATA); 3011 3012 if (mh->mh_head == NULL && m == NULL) { 3013 no_mem: 3014 rspq->next_holdoff = NOMEM_INTR_DELAY; 3015 budget_left--; 3016 break; 3017 } 3018 get_imm_packet(adap, r, mh->mh_head); 3019 eop = 1; 3020 rspq->imm_data++; 3021 } else if (r->len_cq) { 3022 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 3023 3024 eop = get_packet(adap, drop_thresh, qs, mh, r); 3025 if (eop) { 3026 if (r->rss_hdr.hash_type && !adap->timestamp) 3027 mh->mh_head->m_flags |= M_FLOWID; 3028 mh->mh_head->m_pkthdr.flowid = rss_hash; 3029 } 3030 3031 ethpad = 2; 3032 } else { 3033 rspq->pure_rsps++; 3034 } 3035 skip: 3036 if (flags & RSPD_CTRL_MASK) { 3037 sleeping |= flags & RSPD_GTS_MASK; 3038 handle_rsp_cntrl_info(qs, flags); 3039 } 3040 3041 r++; 3042 if (__predict_false(++rspq->cidx == rspq->size)) { 3043 rspq->cidx = 0; 3044 rspq->gen ^= 1; 3045 r = rspq->desc; 3046 } 3047 3048 if (++rspq->credits >= 64) { 3049 refill_rspq(adap, rspq, rspq->credits); 3050 rspq->credits = 0; 3051 } 3052 if (!eth && eop) { 3053 mh->mh_head->m_pkthdr.csum_data = rss_csum; 3054 /* 3055 * XXX size mismatch 3056 */ 3057 m_set_priority(mh->mh_head, rss_hash); 3058 3059 3060 ngathered = rx_offload(&adap->tdev, rspq, 3061 mh->mh_head, offload_mbufs, ngathered); 3062 mh->mh_head = NULL; 3063 DPRINTF("received offload packet\n"); 3064 3065 } else if (eth && eop) { 3066 struct mbuf *m = mh->mh_head; 3067 3068 t3_rx_eth(adap, rspq, m, ethpad); 3069 3070 /* 3071 * The T304 sends incoming packets on any qset. If LRO 3072 * is also enabled, we could end up sending packet up 3073 * lro_ctrl->ifp's input. That is incorrect. 3074 * 3075 * The mbuf's rcvif was derived from the cpl header and 3076 * is accurate. Skip LRO and just use that. 3077 */ 3078 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); 3079 3080 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro 3081 #ifdef INET 3082 && (tcp_lro_rx(lro_ctrl, m, 0) == 0) 3083 #endif 3084 ) { 3085 /* successfully queue'd for LRO */ 3086 } else { 3087 /* 3088 * LRO not enabled, packet unsuitable for LRO, 3089 * or unable to queue. Pass it up right now in 3090 * either case. 3091 */ 3092 struct ifnet *ifp = m->m_pkthdr.rcvif; 3093 (*ifp->if_input)(ifp, m); 3094 } 3095 mh->mh_head = NULL; 3096 3097 } 3098 __refill_fl_lt(adap, &qs->fl[0], 32); 3099 __refill_fl_lt(adap, &qs->fl[1], 32); 3100 --budget_left; 3101 } 3102 3103 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 3104 3105 #ifdef INET 3106 /* Flush LRO */ 3107 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) { 3108 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active); 3109 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next); 3110 tcp_lro_flush(lro_ctrl, queued); 3111 } 3112 #endif 3113 3114 if (sleeping) 3115 check_ring_db(adap, qs, sleeping); 3116 3117 mb(); /* commit Tx queue processed updates */ 3118 if (__predict_false(qs->txq_stopped > 1)) 3119 restart_tx(qs); 3120 3121 __refill_fl_lt(adap, &qs->fl[0], 512); 3122 __refill_fl_lt(adap, &qs->fl[1], 512); 3123 budget -= budget_left; 3124 return (budget); 3125 } 3126 3127 /* 3128 * A helper function that processes responses and issues GTS. 3129 */ 3130 static __inline int 3131 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 3132 { 3133 int work; 3134 static int last_holdoff = 0; 3135 3136 work = process_responses(adap, rspq_to_qset(rq), -1); 3137 3138 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 3139 printf("next_holdoff=%d\n", rq->next_holdoff); 3140 last_holdoff = rq->next_holdoff; 3141 } 3142 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 3143 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 3144 3145 return (work); 3146 } 3147 3148 3149 /* 3150 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 3151 * Handles data events from SGE response queues as well as error and other 3152 * async events as they all use the same interrupt pin. We use one SGE 3153 * response queue per port in this mode and protect all response queues with 3154 * queue 0's lock. 3155 */ 3156 void 3157 t3b_intr(void *data) 3158 { 3159 uint32_t i, map; 3160 adapter_t *adap = data; 3161 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3162 3163 t3_write_reg(adap, A_PL_CLI, 0); 3164 map = t3_read_reg(adap, A_SG_DATA_INTR); 3165 3166 if (!map) 3167 return; 3168 3169 if (__predict_false(map & F_ERRINTR)) { 3170 t3_write_reg(adap, A_PL_INT_ENABLE0, 0); 3171 (void) t3_read_reg(adap, A_PL_INT_ENABLE0); 3172 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3173 } 3174 3175 mtx_lock(&q0->lock); 3176 for_each_port(adap, i) 3177 if (map & (1 << i)) 3178 process_responses_gts(adap, &adap->sge.qs[i].rspq); 3179 mtx_unlock(&q0->lock); 3180 } 3181 3182 /* 3183 * The MSI interrupt handler. This needs to handle data events from SGE 3184 * response queues as well as error and other async events as they all use 3185 * the same MSI vector. We use one SGE response queue per port in this mode 3186 * and protect all response queues with queue 0's lock. 3187 */ 3188 void 3189 t3_intr_msi(void *data) 3190 { 3191 adapter_t *adap = data; 3192 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3193 int i, new_packets = 0; 3194 3195 mtx_lock(&q0->lock); 3196 3197 for_each_port(adap, i) 3198 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 3199 new_packets = 1; 3200 mtx_unlock(&q0->lock); 3201 if (new_packets == 0) { 3202 t3_write_reg(adap, A_PL_INT_ENABLE0, 0); 3203 (void) t3_read_reg(adap, A_PL_INT_ENABLE0); 3204 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3205 } 3206 } 3207 3208 void 3209 t3_intr_msix(void *data) 3210 { 3211 struct sge_qset *qs = data; 3212 adapter_t *adap = qs->port->adapter; 3213 struct sge_rspq *rspq = &qs->rspq; 3214 3215 if (process_responses_gts(adap, rspq) == 0) 3216 rspq->unhandled_irqs++; 3217 } 3218 3219 #define QDUMP_SBUF_SIZE 32 * 400 3220 static int 3221 t3_dump_rspq(SYSCTL_HANDLER_ARGS) 3222 { 3223 struct sge_rspq *rspq; 3224 struct sge_qset *qs; 3225 int i, err, dump_end, idx; 3226 struct sbuf *sb; 3227 struct rsp_desc *rspd; 3228 uint32_t data[4]; 3229 3230 rspq = arg1; 3231 qs = rspq_to_qset(rspq); 3232 if (rspq->rspq_dump_count == 0) 3233 return (0); 3234 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { 3235 log(LOG_WARNING, 3236 "dump count is too large %d\n", rspq->rspq_dump_count); 3237 rspq->rspq_dump_count = 0; 3238 return (EINVAL); 3239 } 3240 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { 3241 log(LOG_WARNING, 3242 "dump start of %d is greater than queue size\n", 3243 rspq->rspq_dump_start); 3244 rspq->rspq_dump_start = 0; 3245 return (EINVAL); 3246 } 3247 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); 3248 if (err) 3249 return (err); 3250 err = sysctl_wire_old_buffer(req, 0); 3251 if (err) 3252 return (err); 3253 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3254 3255 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", 3256 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), 3257 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); 3258 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", 3259 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); 3260 3261 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, 3262 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); 3263 3264 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; 3265 for (i = rspq->rspq_dump_start; i < dump_end; i++) { 3266 idx = i & (RSPQ_Q_SIZE-1); 3267 3268 rspd = &rspq->desc[idx]; 3269 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", 3270 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, 3271 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); 3272 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", 3273 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), 3274 be32toh(rspd->len_cq), rspd->intr_gen); 3275 } 3276 3277 err = sbuf_finish(sb); 3278 /* Output a trailing NUL. */ 3279 if (err == 0) 3280 err = SYSCTL_OUT(req, "", 1); 3281 sbuf_delete(sb); 3282 return (err); 3283 } 3284 3285 static int 3286 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) 3287 { 3288 struct sge_txq *txq; 3289 struct sge_qset *qs; 3290 int i, j, err, dump_end; 3291 struct sbuf *sb; 3292 struct tx_desc *txd; 3293 uint32_t *WR, wr_hi, wr_lo, gen; 3294 uint32_t data[4]; 3295 3296 txq = arg1; 3297 qs = txq_to_qset(txq, TXQ_ETH); 3298 if (txq->txq_dump_count == 0) { 3299 return (0); 3300 } 3301 if (txq->txq_dump_count > TX_ETH_Q_SIZE) { 3302 log(LOG_WARNING, 3303 "dump count is too large %d\n", txq->txq_dump_count); 3304 txq->txq_dump_count = 1; 3305 return (EINVAL); 3306 } 3307 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { 3308 log(LOG_WARNING, 3309 "dump start of %d is greater than queue size\n", 3310 txq->txq_dump_start); 3311 txq->txq_dump_start = 0; 3312 return (EINVAL); 3313 } 3314 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); 3315 if (err) 3316 return (err); 3317 err = sysctl_wire_old_buffer(req, 0); 3318 if (err) 3319 return (err); 3320 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3321 3322 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", 3323 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 3324 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); 3325 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", 3326 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), 3327 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); 3328 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3329 txq->txq_dump_start, 3330 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); 3331 3332 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3333 for (i = txq->txq_dump_start; i < dump_end; i++) { 3334 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; 3335 WR = (uint32_t *)txd->flit; 3336 wr_hi = ntohl(WR[0]); 3337 wr_lo = ntohl(WR[1]); 3338 gen = G_WR_GEN(wr_lo); 3339 3340 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3341 wr_hi, wr_lo, gen); 3342 for (j = 2; j < 30; j += 4) 3343 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3344 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3345 3346 } 3347 err = sbuf_finish(sb); 3348 /* Output a trailing NUL. */ 3349 if (err == 0) 3350 err = SYSCTL_OUT(req, "", 1); 3351 sbuf_delete(sb); 3352 return (err); 3353 } 3354 3355 static int 3356 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) 3357 { 3358 struct sge_txq *txq; 3359 struct sge_qset *qs; 3360 int i, j, err, dump_end; 3361 struct sbuf *sb; 3362 struct tx_desc *txd; 3363 uint32_t *WR, wr_hi, wr_lo, gen; 3364 3365 txq = arg1; 3366 qs = txq_to_qset(txq, TXQ_CTRL); 3367 if (txq->txq_dump_count == 0) { 3368 return (0); 3369 } 3370 if (txq->txq_dump_count > 256) { 3371 log(LOG_WARNING, 3372 "dump count is too large %d\n", txq->txq_dump_count); 3373 txq->txq_dump_count = 1; 3374 return (EINVAL); 3375 } 3376 if (txq->txq_dump_start > 255) { 3377 log(LOG_WARNING, 3378 "dump start of %d is greater than queue size\n", 3379 txq->txq_dump_start); 3380 txq->txq_dump_start = 0; 3381 return (EINVAL); 3382 } 3383 3384 err = sysctl_wire_old_buffer(req, 0); 3385 if (err != 0) 3386 return (err); 3387 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3388 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3389 txq->txq_dump_start, 3390 (txq->txq_dump_start + txq->txq_dump_count) & 255); 3391 3392 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3393 for (i = txq->txq_dump_start; i < dump_end; i++) { 3394 txd = &txq->desc[i & (255)]; 3395 WR = (uint32_t *)txd->flit; 3396 wr_hi = ntohl(WR[0]); 3397 wr_lo = ntohl(WR[1]); 3398 gen = G_WR_GEN(wr_lo); 3399 3400 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3401 wr_hi, wr_lo, gen); 3402 for (j = 2; j < 30; j += 4) 3403 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3404 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3405 3406 } 3407 err = sbuf_finish(sb); 3408 /* Output a trailing NUL. */ 3409 if (err == 0) 3410 err = SYSCTL_OUT(req, "", 1); 3411 sbuf_delete(sb); 3412 return (err); 3413 } 3414 3415 static int 3416 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) 3417 { 3418 adapter_t *sc = arg1; 3419 struct qset_params *qsp = &sc->params.sge.qset[0]; 3420 int coalesce_usecs; 3421 struct sge_qset *qs; 3422 int i, j, err, nqsets = 0; 3423 struct mtx *lock; 3424 3425 if ((sc->flags & FULL_INIT_DONE) == 0) 3426 return (ENXIO); 3427 3428 coalesce_usecs = qsp->coalesce_usecs; 3429 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); 3430 3431 if (err != 0) { 3432 return (err); 3433 } 3434 if (coalesce_usecs == qsp->coalesce_usecs) 3435 return (0); 3436 3437 for (i = 0; i < sc->params.nports; i++) 3438 for (j = 0; j < sc->port[i].nqsets; j++) 3439 nqsets++; 3440 3441 coalesce_usecs = max(1, coalesce_usecs); 3442 3443 for (i = 0; i < nqsets; i++) { 3444 qs = &sc->sge.qs[i]; 3445 qsp = &sc->params.sge.qset[i]; 3446 qsp->coalesce_usecs = coalesce_usecs; 3447 3448 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 3449 &sc->sge.qs[0].rspq.lock; 3450 3451 mtx_lock(lock); 3452 t3_update_qset_coalesce(qs, qsp); 3453 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 3454 V_NEWTIMER(qs->rspq.holdoff_tmr)); 3455 mtx_unlock(lock); 3456 } 3457 3458 return (0); 3459 } 3460 3461 static int 3462 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS) 3463 { 3464 adapter_t *sc = arg1; 3465 int rc, timestamp; 3466 3467 if ((sc->flags & FULL_INIT_DONE) == 0) 3468 return (ENXIO); 3469 3470 timestamp = sc->timestamp; 3471 rc = sysctl_handle_int(oidp, ×tamp, arg2, req); 3472 3473 if (rc != 0) 3474 return (rc); 3475 3476 if (timestamp != sc->timestamp) { 3477 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS, 3478 timestamp ? F_ENABLERXPKTTMSTPRSS : 0); 3479 sc->timestamp = timestamp; 3480 } 3481 3482 return (0); 3483 } 3484 3485 void 3486 t3_add_attach_sysctls(adapter_t *sc) 3487 { 3488 struct sysctl_ctx_list *ctx; 3489 struct sysctl_oid_list *children; 3490 3491 ctx = device_get_sysctl_ctx(sc->dev); 3492 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3493 3494 /* random information */ 3495 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3496 "firmware_version", 3497 CTLFLAG_RD, &sc->fw_version, 3498 0, "firmware version"); 3499 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, 3500 "hw_revision", 3501 CTLFLAG_RD, &sc->params.rev, 3502 0, "chip model"); 3503 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3504 "port_types", 3505 CTLFLAG_RD, &sc->port_types, 3506 0, "type of ports"); 3507 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3508 "enable_debug", 3509 CTLFLAG_RW, &cxgb_debug, 3510 0, "enable verbose debugging output"); 3511 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce", 3512 CTLFLAG_RD, &sc->tunq_coalesce, 3513 "#tunneled packets freed"); 3514 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3515 "txq_overrun", 3516 CTLFLAG_RD, &txq_fills, 3517 0, "#times txq overrun"); 3518 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, 3519 "core_clock", 3520 CTLFLAG_RD, &sc->params.vpd.cclk, 3521 0, "core clock frequency (in KHz)"); 3522 } 3523 3524 3525 static const char *rspq_name = "rspq"; 3526 static const char *txq_names[] = 3527 { 3528 "txq_eth", 3529 "txq_ofld", 3530 "txq_ctrl" 3531 }; 3532 3533 static int 3534 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) 3535 { 3536 struct port_info *p = arg1; 3537 uint64_t *parg; 3538 3539 if (!p) 3540 return (EINVAL); 3541 3542 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); 3543 PORT_LOCK(p); 3544 t3_mac_update_stats(&p->mac); 3545 PORT_UNLOCK(p); 3546 3547 return (sysctl_handle_64(oidp, parg, 0, req)); 3548 } 3549 3550 void 3551 t3_add_configured_sysctls(adapter_t *sc) 3552 { 3553 struct sysctl_ctx_list *ctx; 3554 struct sysctl_oid_list *children; 3555 int i, j; 3556 3557 ctx = device_get_sysctl_ctx(sc->dev); 3558 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3559 3560 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3561 "intr_coal", 3562 CTLTYPE_INT|CTLFLAG_RW, sc, 3563 0, t3_set_coalesce_usecs, 3564 "I", "interrupt coalescing timer (us)"); 3565 3566 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3567 "pkt_timestamp", 3568 CTLTYPE_INT | CTLFLAG_RW, sc, 3569 0, t3_pkt_timestamp, 3570 "I", "provide packet timestamp instead of connection hash"); 3571 3572 for (i = 0; i < sc->params.nports; i++) { 3573 struct port_info *pi = &sc->port[i]; 3574 struct sysctl_oid *poid; 3575 struct sysctl_oid_list *poidlist; 3576 struct mac_stats *mstats = &pi->mac.stats; 3577 3578 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); 3579 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 3580 pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); 3581 poidlist = SYSCTL_CHILDREN(poid); 3582 SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO, 3583 "nqsets", CTLFLAG_RD, &pi->nqsets, 3584 0, "#queue sets"); 3585 3586 for (j = 0; j < pi->nqsets; j++) { 3587 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; 3588 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, 3589 *ctrlqpoid, *lropoid; 3590 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, 3591 *txqpoidlist, *ctrlqpoidlist, 3592 *lropoidlist; 3593 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 3594 3595 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); 3596 3597 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 3598 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); 3599 qspoidlist = SYSCTL_CHILDREN(qspoid); 3600 3601 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty", 3602 CTLFLAG_RD, &qs->fl[0].empty, 0, 3603 "freelist #0 empty"); 3604 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty", 3605 CTLFLAG_RD, &qs->fl[1].empty, 0, 3606 "freelist #1 empty"); 3607 3608 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3609 rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); 3610 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); 3611 3612 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3613 txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); 3614 txqpoidlist = SYSCTL_CHILDREN(txqpoid); 3615 3616 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3617 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics"); 3618 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); 3619 3620 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3621 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics"); 3622 lropoidlist = SYSCTL_CHILDREN(lropoid); 3623 3624 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", 3625 CTLFLAG_RD, &qs->rspq.size, 3626 0, "#entries in response queue"); 3627 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", 3628 CTLFLAG_RD, &qs->rspq.cidx, 3629 0, "consumer index"); 3630 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", 3631 CTLFLAG_RD, &qs->rspq.credits, 3632 0, "#credits"); 3633 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved", 3634 CTLFLAG_RD, &qs->rspq.starved, 3635 0, "#times starved"); 3636 SYSCTL_ADD_ULONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr", 3637 CTLFLAG_RD, &qs->rspq.phys_addr, 3638 "physical_address_of the queue"); 3639 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", 3640 CTLFLAG_RW, &qs->rspq.rspq_dump_start, 3641 0, "start rspq dump entry"); 3642 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", 3643 CTLFLAG_RW, &qs->rspq.rspq_dump_count, 3644 0, "#rspq entries to dump"); 3645 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", 3646 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 3647 0, t3_dump_rspq, "A", "dump of the response queue"); 3648 3649 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped", 3650 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops, 3651 "#tunneled packets dropped"); 3652 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen", 3653 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen, 3654 0, "#tunneled packets waiting to be sent"); 3655 #if 0 3656 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", 3657 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 3658 0, "#tunneled packets queue producer index"); 3659 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", 3660 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 3661 0, "#tunneled packets queue consumer index"); 3662 #endif 3663 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed", 3664 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 3665 0, "#tunneled packets processed by the card"); 3666 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", 3667 CTLFLAG_RD, &txq->cleaned, 3668 0, "#tunneled packets cleaned"); 3669 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", 3670 CTLFLAG_RD, &txq->in_use, 3671 0, "#tunneled packet slots in use"); 3672 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees", 3673 CTLFLAG_RD, &txq->txq_frees, 3674 "#tunneled packets freed"); 3675 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", 3676 CTLFLAG_RD, &txq->txq_skipped, 3677 0, "#tunneled packet descriptors skipped"); 3678 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced", 3679 CTLFLAG_RD, &txq->txq_coalesced, 3680 "#tunneled packets coalesced"); 3681 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", 3682 CTLFLAG_RD, &txq->txq_enqueued, 3683 0, "#tunneled packets enqueued to hardware"); 3684 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", 3685 CTLFLAG_RD, &qs->txq_stopped, 3686 0, "tx queues stopped"); 3687 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "phys_addr", 3688 CTLFLAG_RD, &txq->phys_addr, 3689 "physical_address_of the queue"); 3690 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", 3691 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 3692 0, "txq generation"); 3693 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", 3694 CTLFLAG_RD, &txq->cidx, 3695 0, "hardware queue cidx"); 3696 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", 3697 CTLFLAG_RD, &txq->pidx, 3698 0, "hardware queue pidx"); 3699 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", 3700 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 3701 0, "txq start idx for dump"); 3702 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", 3703 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 3704 0, "txq #entries to dump"); 3705 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", 3706 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 3707 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); 3708 3709 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", 3710 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 3711 0, "ctrlq start idx for dump"); 3712 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", 3713 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 3714 0, "ctrl #entries to dump"); 3715 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", 3716 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], 3717 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); 3718 3719 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued", 3720 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); 3721 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed", 3722 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); 3723 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", 3724 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); 3725 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", 3726 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); 3727 } 3728 3729 /* Now add a node for mac stats. */ 3730 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", 3731 CTLFLAG_RD, NULL, "MAC statistics"); 3732 poidlist = SYSCTL_CHILDREN(poid); 3733 3734 /* 3735 * We (ab)use the length argument (arg2) to pass on the offset 3736 * of the data that we are interested in. This is only required 3737 * for the quad counters that are updated from the hardware (we 3738 * make sure that we return the latest value). 3739 * sysctl_handle_macstat first updates *all* the counters from 3740 * the hardware, and then returns the latest value of the 3741 * requested counter. Best would be to update only the 3742 * requested counter from hardware, but t3_mac_update_stats() 3743 * hides all the register details and we don't want to dive into 3744 * all that here. 3745 */ 3746 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ 3747 (CTLTYPE_U64 | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \ 3748 sysctl_handle_macstat, "QU", 0) 3749 CXGB_SYSCTL_ADD_QUAD(tx_octets); 3750 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); 3751 CXGB_SYSCTL_ADD_QUAD(tx_frames); 3752 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); 3753 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); 3754 CXGB_SYSCTL_ADD_QUAD(tx_pause); 3755 CXGB_SYSCTL_ADD_QUAD(tx_deferred); 3756 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); 3757 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); 3758 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); 3759 CXGB_SYSCTL_ADD_QUAD(tx_underrun); 3760 CXGB_SYSCTL_ADD_QUAD(tx_len_errs); 3761 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); 3762 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); 3763 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); 3764 CXGB_SYSCTL_ADD_QUAD(tx_frames_64); 3765 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); 3766 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); 3767 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); 3768 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); 3769 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); 3770 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); 3771 CXGB_SYSCTL_ADD_QUAD(rx_octets); 3772 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); 3773 CXGB_SYSCTL_ADD_QUAD(rx_frames); 3774 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); 3775 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); 3776 CXGB_SYSCTL_ADD_QUAD(rx_pause); 3777 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs); 3778 CXGB_SYSCTL_ADD_QUAD(rx_align_errs); 3779 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); 3780 CXGB_SYSCTL_ADD_QUAD(rx_data_errs); 3781 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); 3782 CXGB_SYSCTL_ADD_QUAD(rx_runt); 3783 CXGB_SYSCTL_ADD_QUAD(rx_jabber); 3784 CXGB_SYSCTL_ADD_QUAD(rx_short); 3785 CXGB_SYSCTL_ADD_QUAD(rx_too_long); 3786 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); 3787 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); 3788 CXGB_SYSCTL_ADD_QUAD(rx_frames_64); 3789 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); 3790 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); 3791 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); 3792 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); 3793 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); 3794 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); 3795 #undef CXGB_SYSCTL_ADD_QUAD 3796 3797 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ 3798 CTLFLAG_RD, &mstats->a, 0) 3799 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); 3800 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); 3801 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); 3802 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); 3803 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); 3804 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); 3805 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); 3806 CXGB_SYSCTL_ADD_ULONG(num_toggled); 3807 CXGB_SYSCTL_ADD_ULONG(num_resets); 3808 CXGB_SYSCTL_ADD_ULONG(link_faults); 3809 #undef CXGB_SYSCTL_ADD_ULONG 3810 } 3811 } 3812 3813 /** 3814 * t3_get_desc - dump an SGE descriptor for debugging purposes 3815 * @qs: the queue set 3816 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 3817 * @idx: the descriptor index in the queue 3818 * @data: where to dump the descriptor contents 3819 * 3820 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 3821 * size of the descriptor. 3822 */ 3823 int 3824 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 3825 unsigned char *data) 3826 { 3827 if (qnum >= 6) 3828 return (EINVAL); 3829 3830 if (qnum < 3) { 3831 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 3832 return -EINVAL; 3833 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 3834 return sizeof(struct tx_desc); 3835 } 3836 3837 if (qnum == 3) { 3838 if (!qs->rspq.desc || idx >= qs->rspq.size) 3839 return (EINVAL); 3840 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 3841 return sizeof(struct rsp_desc); 3842 } 3843 3844 qnum -= 4; 3845 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 3846 return (EINVAL); 3847 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 3848 return sizeof(struct rx_desc); 3849 } 3850