1 /************************************************************************** 2 3 Copyright (c) 2007-2009, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_inet6.h" 34 #include "opt_inet.h" 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/kernel.h> 39 #include <sys/module.h> 40 #include <sys/bus.h> 41 #include <sys/conf.h> 42 #include <machine/bus.h> 43 #include <machine/resource.h> 44 #include <sys/bus_dma.h> 45 #include <sys/rman.h> 46 #include <sys/queue.h> 47 #include <sys/sysctl.h> 48 #include <sys/taskqueue.h> 49 50 #include <sys/proc.h> 51 #include <sys/sbuf.h> 52 #include <sys/sched.h> 53 #include <sys/smp.h> 54 #include <sys/systm.h> 55 #include <sys/syslog.h> 56 #include <sys/socket.h> 57 58 #include <net/bpf.h> 59 #include <net/ethernet.h> 60 #include <net/if.h> 61 #include <net/if_vlan_var.h> 62 63 #include <netinet/in_systm.h> 64 #include <netinet/in.h> 65 #include <netinet/ip.h> 66 #include <netinet/ip6.h> 67 #include <netinet/tcp.h> 68 69 #include <dev/pci/pcireg.h> 70 #include <dev/pci/pcivar.h> 71 72 #include <vm/vm.h> 73 #include <vm/pmap.h> 74 75 #include <cxgb_include.h> 76 #include <sys/mvec.h> 77 78 int txq_fills = 0; 79 int multiq_tx_enable = 1; 80 81 extern struct sysctl_oid_list sysctl__hw_cxgb_children; 82 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; 83 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size); 84 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, 85 "size of per-queue mbuf ring"); 86 87 static int cxgb_tx_coalesce_force = 0; 88 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force); 89 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW, 90 &cxgb_tx_coalesce_force, 0, 91 "coalesce small packets into a single work request regardless of ring state"); 92 93 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1 94 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3)) 95 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2 96 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5 97 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5 98 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2 99 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6 100 101 102 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT; 103 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start", 104 &cxgb_tx_coalesce_enable_start); 105 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW, 106 &cxgb_tx_coalesce_enable_start, 0, 107 "coalesce enable threshold"); 108 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT; 109 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop); 110 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW, 111 &cxgb_tx_coalesce_enable_stop, 0, 112 "coalesce disable threshold"); 113 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 114 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold); 115 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW, 116 &cxgb_tx_reclaim_threshold, 0, 117 "tx cleaning minimum threshold"); 118 119 /* 120 * XXX don't re-enable this until TOE stops assuming 121 * we have an m_ext 122 */ 123 static int recycle_enable = 0; 124 125 extern int cxgb_use_16k_clusters; 126 extern int nmbjumbop; 127 extern int nmbjumbo9; 128 extern int nmbjumbo16; 129 130 #define USE_GTS 0 131 132 #define SGE_RX_SM_BUF_SIZE 1536 133 #define SGE_RX_DROP_THRES 16 134 #define SGE_RX_COPY_THRES 128 135 136 /* 137 * Period of the Tx buffer reclaim timer. This timer does not need to run 138 * frequently as Tx buffers are usually reclaimed by new Tx packets. 139 */ 140 #define TX_RECLAIM_PERIOD (hz >> 1) 141 142 /* 143 * Values for sge_txq.flags 144 */ 145 enum { 146 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 147 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 148 }; 149 150 struct tx_desc { 151 uint64_t flit[TX_DESC_FLITS]; 152 } __packed; 153 154 struct rx_desc { 155 uint32_t addr_lo; 156 uint32_t len_gen; 157 uint32_t gen2; 158 uint32_t addr_hi; 159 } __packed; 160 161 struct rsp_desc { /* response queue descriptor */ 162 struct rss_header rss_hdr; 163 uint32_t flags; 164 uint32_t len_cq; 165 uint8_t imm_data[47]; 166 uint8_t intr_gen; 167 } __packed; 168 169 #define RX_SW_DESC_MAP_CREATED (1 << 0) 170 #define TX_SW_DESC_MAP_CREATED (1 << 1) 171 #define RX_SW_DESC_INUSE (1 << 3) 172 #define TX_SW_DESC_MAPPED (1 << 4) 173 174 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 175 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 176 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 177 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 178 179 struct tx_sw_desc { /* SW state per Tx descriptor */ 180 struct mbuf *m; 181 bus_dmamap_t map; 182 int flags; 183 }; 184 185 struct rx_sw_desc { /* SW state per Rx descriptor */ 186 caddr_t rxsd_cl; 187 struct mbuf *m; 188 bus_dmamap_t map; 189 int flags; 190 }; 191 192 struct txq_state { 193 unsigned int compl; 194 unsigned int gen; 195 unsigned int pidx; 196 }; 197 198 struct refill_fl_cb_arg { 199 int error; 200 bus_dma_segment_t seg; 201 int nseg; 202 }; 203 204 205 /* 206 * Maps a number of flits to the number of Tx descriptors that can hold them. 207 * The formula is 208 * 209 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 210 * 211 * HW allows up to 4 descriptors to be combined into a WR. 212 */ 213 static uint8_t flit_desc_map[] = { 214 0, 215 #if SGE_NUM_GENBITS == 1 216 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 217 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 218 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 219 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 220 #elif SGE_NUM_GENBITS == 2 221 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 222 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 223 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 224 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 225 #else 226 # error "SGE_NUM_GENBITS must be 1 or 2" 227 #endif 228 }; 229 230 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED) 231 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock) 232 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock) 233 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock) 234 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 235 #define TXQ_RING_NEEDS_ENQUEUE(qs) \ 236 drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 237 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 238 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \ 239 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg) 240 #define TXQ_RING_DEQUEUE(qs) \ 241 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 242 243 int cxgb_debug = 0; 244 245 static void sge_timer_cb(void *arg); 246 static void sge_timer_reclaim(void *arg, int ncount); 247 static void sge_txq_reclaim_handler(void *arg, int ncount); 248 static void cxgb_start_locked(struct sge_qset *qs); 249 250 /* 251 * XXX need to cope with bursty scheduling by looking at a wider 252 * window than we are now for determining the need for coalescing 253 * 254 */ 255 static __inline uint64_t 256 check_pkt_coalesce(struct sge_qset *qs) 257 { 258 struct adapter *sc; 259 struct sge_txq *txq; 260 uint8_t *fill; 261 262 if (__predict_false(cxgb_tx_coalesce_force)) 263 return (1); 264 txq = &qs->txq[TXQ_ETH]; 265 sc = qs->port->adapter; 266 fill = &sc->tunq_fill[qs->idx]; 267 268 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX) 269 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX; 270 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN) 271 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN; 272 /* 273 * if the hardware transmit queue is more than 1/8 full 274 * we mark it as coalescing - we drop back from coalescing 275 * when we go below 1/32 full and there are no packets enqueued, 276 * this provides us with some degree of hysteresis 277 */ 278 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 279 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0)) 280 *fill = 0; 281 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) 282 *fill = 1; 283 284 return (sc->tunq_coalesce); 285 } 286 287 #ifdef __LP64__ 288 static void 289 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 290 { 291 uint64_t wr_hilo; 292 #if _BYTE_ORDER == _LITTLE_ENDIAN 293 wr_hilo = wr_hi; 294 wr_hilo |= (((uint64_t)wr_lo)<<32); 295 #else 296 wr_hilo = wr_lo; 297 wr_hilo |= (((uint64_t)wr_hi)<<32); 298 #endif 299 wrp->wrh_hilo = wr_hilo; 300 } 301 #else 302 static void 303 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 304 { 305 306 wrp->wrh_hi = wr_hi; 307 wmb(); 308 wrp->wrh_lo = wr_lo; 309 } 310 #endif 311 312 struct coalesce_info { 313 int count; 314 int nbytes; 315 }; 316 317 static int 318 coalesce_check(struct mbuf *m, void *arg) 319 { 320 struct coalesce_info *ci = arg; 321 int *count = &ci->count; 322 int *nbytes = &ci->nbytes; 323 324 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) && 325 (*count < 7) && (m->m_next == NULL))) { 326 *count += 1; 327 *nbytes += m->m_len; 328 return (1); 329 } 330 return (0); 331 } 332 333 static struct mbuf * 334 cxgb_dequeue(struct sge_qset *qs) 335 { 336 struct mbuf *m, *m_head, *m_tail; 337 struct coalesce_info ci; 338 339 340 if (check_pkt_coalesce(qs) == 0) 341 return TXQ_RING_DEQUEUE(qs); 342 343 m_head = m_tail = NULL; 344 ci.count = ci.nbytes = 0; 345 do { 346 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci); 347 if (m_head == NULL) { 348 m_tail = m_head = m; 349 } else if (m != NULL) { 350 m_tail->m_nextpkt = m; 351 m_tail = m; 352 } 353 } while (m != NULL); 354 if (ci.count > 7) 355 panic("trying to coalesce %d packets in to one WR", ci.count); 356 return (m_head); 357 } 358 359 /** 360 * reclaim_completed_tx - reclaims completed Tx descriptors 361 * @adapter: the adapter 362 * @q: the Tx queue to reclaim completed descriptors from 363 * 364 * Reclaims Tx descriptors that the SGE has indicated it has processed, 365 * and frees the associated buffers if possible. Called with the Tx 366 * queue's lock held. 367 */ 368 static __inline int 369 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) 370 { 371 struct sge_txq *q = &qs->txq[queue]; 372 int reclaim = desc_reclaimable(q); 373 374 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) || 375 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN)) 376 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 377 378 if (reclaim < reclaim_min) 379 return (0); 380 381 mtx_assert(&qs->lock, MA_OWNED); 382 if (reclaim > 0) { 383 t3_free_tx_desc(qs, reclaim, queue); 384 q->cleaned += reclaim; 385 q->in_use -= reclaim; 386 } 387 if (isset(&qs->txq_stopped, TXQ_ETH)) 388 clrbit(&qs->txq_stopped, TXQ_ETH); 389 390 return (reclaim); 391 } 392 393 /** 394 * should_restart_tx - are there enough resources to restart a Tx queue? 395 * @q: the Tx queue 396 * 397 * Checks if there are enough descriptors to restart a suspended Tx queue. 398 */ 399 static __inline int 400 should_restart_tx(const struct sge_txq *q) 401 { 402 unsigned int r = q->processed - q->cleaned; 403 404 return q->in_use - r < (q->size >> 1); 405 } 406 407 /** 408 * t3_sge_init - initialize SGE 409 * @adap: the adapter 410 * @p: the SGE parameters 411 * 412 * Performs SGE initialization needed every time after a chip reset. 413 * We do not initialize any of the queue sets here, instead the driver 414 * top-level must request those individually. We also do not enable DMA 415 * here, that should be done after the queues have been set up. 416 */ 417 void 418 t3_sge_init(adapter_t *adap, struct sge_params *p) 419 { 420 u_int ctrl, ups; 421 422 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 423 424 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 425 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | 426 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 427 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 428 #if SGE_NUM_GENBITS == 1 429 ctrl |= F_EGRGENCTRL; 430 #endif 431 if (adap->params.rev > 0) { 432 if (!(adap->flags & (USING_MSIX | USING_MSI))) 433 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 434 } 435 t3_write_reg(adap, A_SG_CONTROL, ctrl); 436 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 437 V_LORCQDRBTHRSH(512)); 438 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 439 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 440 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 441 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 442 adap->params.rev < T3_REV_C ? 1000 : 500); 443 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 444 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 445 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 446 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 447 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 448 } 449 450 451 /** 452 * sgl_len - calculates the size of an SGL of the given capacity 453 * @n: the number of SGL entries 454 * 455 * Calculates the number of flits needed for a scatter/gather list that 456 * can hold the given number of entries. 457 */ 458 static __inline unsigned int 459 sgl_len(unsigned int n) 460 { 461 return ((3 * n) / 2 + (n & 1)); 462 } 463 464 /** 465 * get_imm_packet - return the next ingress packet buffer from a response 466 * @resp: the response descriptor containing the packet data 467 * 468 * Return a packet containing the immediate data of the given response. 469 */ 470 static int 471 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) 472 { 473 474 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE; 475 m->m_ext.ext_buf = NULL; 476 m->m_ext.ext_type = 0; 477 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 478 return (0); 479 } 480 481 static __inline u_int 482 flits_to_desc(u_int n) 483 { 484 return (flit_desc_map[n]); 485 } 486 487 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ 488 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ 489 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ 490 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ 491 F_HIRCQPARITYERROR) 492 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) 493 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ 494 F_RSPQDISABLED) 495 496 /** 497 * t3_sge_err_intr_handler - SGE async event interrupt handler 498 * @adapter: the adapter 499 * 500 * Interrupt handler for SGE asynchronous (non-data) events. 501 */ 502 void 503 t3_sge_err_intr_handler(adapter_t *adapter) 504 { 505 unsigned int v, status; 506 507 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 508 if (status & SGE_PARERR) 509 CH_ALERT(adapter, "SGE parity error (0x%x)\n", 510 status & SGE_PARERR); 511 if (status & SGE_FRAMINGERR) 512 CH_ALERT(adapter, "SGE framing error (0x%x)\n", 513 status & SGE_FRAMINGERR); 514 if (status & F_RSPQCREDITOVERFOW) 515 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 516 517 if (status & F_RSPQDISABLED) { 518 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 519 520 CH_ALERT(adapter, 521 "packet delivered to disabled response queue (0x%x)\n", 522 (v >> S_RSPQ0DISABLED) & 0xff); 523 } 524 525 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 526 if (status & SGE_FATALERR) 527 t3_fatal_err(adapter); 528 } 529 530 void 531 t3_sge_prep(adapter_t *adap, struct sge_params *p) 532 { 533 int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size; 534 535 nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus); 536 nqsets *= adap->params.nports; 537 538 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); 539 540 while (!powerof2(fl_q_size)) 541 fl_q_size--; 542 543 use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters : 544 is_offload(adap); 545 546 #if __FreeBSD_version >= 700111 547 if (use_16k) { 548 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); 549 jumbo_buf_size = MJUM16BYTES; 550 } else { 551 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); 552 jumbo_buf_size = MJUM9BYTES; 553 } 554 #else 555 jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE); 556 jumbo_buf_size = MJUMPAGESIZE; 557 #endif 558 while (!powerof2(jumbo_q_size)) 559 jumbo_q_size--; 560 561 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2)) 562 device_printf(adap->dev, 563 "Insufficient clusters and/or jumbo buffers.\n"); 564 565 p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data); 566 567 for (i = 0; i < SGE_QSETS; ++i) { 568 struct qset_params *q = p->qset + i; 569 570 if (adap->params.nports > 2) { 571 q->coalesce_usecs = 50; 572 } else { 573 #ifdef INVARIANTS 574 q->coalesce_usecs = 10; 575 #else 576 q->coalesce_usecs = 5; 577 #endif 578 } 579 q->polling = 0; 580 q->rspq_size = RSPQ_Q_SIZE; 581 q->fl_size = fl_q_size; 582 q->jumbo_size = jumbo_q_size; 583 q->jumbo_buf_size = jumbo_buf_size; 584 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 585 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16; 586 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE; 587 q->cong_thres = 0; 588 } 589 } 590 591 int 592 t3_sge_alloc(adapter_t *sc) 593 { 594 595 /* The parent tag. */ 596 if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */ 597 1, 0, /* algnmnt, boundary */ 598 BUS_SPACE_MAXADDR, /* lowaddr */ 599 BUS_SPACE_MAXADDR, /* highaddr */ 600 NULL, NULL, /* filter, filterarg */ 601 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 602 BUS_SPACE_UNRESTRICTED, /* nsegments */ 603 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 604 0, /* flags */ 605 NULL, NULL, /* lock, lockarg */ 606 &sc->parent_dmat)) { 607 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 608 return (ENOMEM); 609 } 610 611 /* 612 * DMA tag for normal sized RX frames 613 */ 614 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 615 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 616 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 617 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 618 return (ENOMEM); 619 } 620 621 /* 622 * DMA tag for jumbo sized RX frames. 623 */ 624 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, 625 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, 626 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 627 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 628 return (ENOMEM); 629 } 630 631 /* 632 * DMA tag for TX frames. 633 */ 634 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 635 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 636 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 637 NULL, NULL, &sc->tx_dmat)) { 638 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 639 return (ENOMEM); 640 } 641 642 return (0); 643 } 644 645 int 646 t3_sge_free(struct adapter * sc) 647 { 648 649 if (sc->tx_dmat != NULL) 650 bus_dma_tag_destroy(sc->tx_dmat); 651 652 if (sc->rx_jumbo_dmat != NULL) 653 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 654 655 if (sc->rx_dmat != NULL) 656 bus_dma_tag_destroy(sc->rx_dmat); 657 658 if (sc->parent_dmat != NULL) 659 bus_dma_tag_destroy(sc->parent_dmat); 660 661 return (0); 662 } 663 664 void 665 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 666 { 667 668 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); 669 qs->rspq.polling = 0 /* p->polling */; 670 } 671 672 #if !defined(__i386__) && !defined(__amd64__) 673 static void 674 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 675 { 676 struct refill_fl_cb_arg *cb_arg = arg; 677 678 cb_arg->error = error; 679 cb_arg->seg = segs[0]; 680 cb_arg->nseg = nseg; 681 682 } 683 #endif 684 /** 685 * refill_fl - refill an SGE free-buffer list 686 * @sc: the controller softc 687 * @q: the free-list to refill 688 * @n: the number of new buffers to allocate 689 * 690 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 691 * The caller must assure that @n does not exceed the queue's capacity. 692 */ 693 static void 694 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 695 { 696 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 697 struct rx_desc *d = &q->desc[q->pidx]; 698 struct refill_fl_cb_arg cb_arg; 699 struct mbuf *m; 700 caddr_t cl; 701 int err; 702 703 cb_arg.error = 0; 704 while (n--) { 705 /* 706 * We only allocate a cluster, mbuf allocation happens after rx 707 */ 708 if (q->zone == zone_pack) { 709 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL) 710 break; 711 cl = m->m_ext.ext_buf; 712 } else { 713 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL) 714 break; 715 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { 716 uma_zfree(q->zone, cl); 717 break; 718 } 719 } 720 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 721 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 722 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 723 uma_zfree(q->zone, cl); 724 goto done; 725 } 726 sd->flags |= RX_SW_DESC_MAP_CREATED; 727 } 728 #if !defined(__i386__) && !defined(__amd64__) 729 err = bus_dmamap_load(q->entry_tag, sd->map, 730 cl, q->buf_size, refill_fl_cb, &cb_arg, 0); 731 732 if (err != 0 || cb_arg.error) { 733 if (q->zone == zone_pack) 734 uma_zfree(q->zone, cl); 735 m_free(m); 736 goto done; 737 } 738 #else 739 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl); 740 #endif 741 sd->flags |= RX_SW_DESC_INUSE; 742 sd->rxsd_cl = cl; 743 sd->m = m; 744 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 745 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 746 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 747 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 748 749 d++; 750 sd++; 751 752 if (++q->pidx == q->size) { 753 q->pidx = 0; 754 q->gen ^= 1; 755 sd = q->sdesc; 756 d = q->desc; 757 } 758 q->credits++; 759 q->db_pending++; 760 } 761 762 done: 763 if (q->db_pending >= 32) { 764 q->db_pending = 0; 765 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 766 } 767 } 768 769 770 /** 771 * free_rx_bufs - free the Rx buffers on an SGE free list 772 * @sc: the controle softc 773 * @q: the SGE free list to clean up 774 * 775 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 776 * this queue should be stopped before calling this function. 777 */ 778 static void 779 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 780 { 781 u_int cidx = q->cidx; 782 783 while (q->credits--) { 784 struct rx_sw_desc *d = &q->sdesc[cidx]; 785 786 if (d->flags & RX_SW_DESC_INUSE) { 787 bus_dmamap_unload(q->entry_tag, d->map); 788 bus_dmamap_destroy(q->entry_tag, d->map); 789 if (q->zone == zone_pack) { 790 m_init(d->m, zone_pack, MCLBYTES, 791 M_NOWAIT, MT_DATA, M_EXT); 792 uma_zfree(zone_pack, d->m); 793 } else { 794 m_init(d->m, zone_mbuf, MLEN, 795 M_NOWAIT, MT_DATA, 0); 796 uma_zfree(zone_mbuf, d->m); 797 uma_zfree(q->zone, d->rxsd_cl); 798 } 799 } 800 801 d->rxsd_cl = NULL; 802 d->m = NULL; 803 if (++cidx == q->size) 804 cidx = 0; 805 } 806 } 807 808 static __inline void 809 __refill_fl(adapter_t *adap, struct sge_fl *fl) 810 { 811 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 812 } 813 814 static __inline void 815 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) 816 { 817 uint32_t reclaimable = fl->size - fl->credits; 818 819 if (reclaimable > 0) 820 refill_fl(adap, fl, min(max, reclaimable)); 821 } 822 823 /** 824 * recycle_rx_buf - recycle a receive buffer 825 * @adapter: the adapter 826 * @q: the SGE free list 827 * @idx: index of buffer to recycle 828 * 829 * Recycles the specified buffer on the given free list by adding it at 830 * the next available slot on the list. 831 */ 832 static void 833 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 834 { 835 struct rx_desc *from = &q->desc[idx]; 836 struct rx_desc *to = &q->desc[q->pidx]; 837 838 q->sdesc[q->pidx] = q->sdesc[idx]; 839 to->addr_lo = from->addr_lo; // already big endian 840 to->addr_hi = from->addr_hi; // likewise 841 wmb(); /* necessary ? */ 842 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 843 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 844 q->credits++; 845 846 if (++q->pidx == q->size) { 847 q->pidx = 0; 848 q->gen ^= 1; 849 } 850 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 851 } 852 853 static void 854 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 855 { 856 uint32_t *addr; 857 858 addr = arg; 859 *addr = segs[0].ds_addr; 860 } 861 862 static int 863 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 864 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 865 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 866 { 867 size_t len = nelem * elem_size; 868 void *s = NULL; 869 void *p = NULL; 870 int err; 871 872 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 873 BUS_SPACE_MAXADDR_32BIT, 874 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 875 len, 0, NULL, NULL, tag)) != 0) { 876 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 877 return (ENOMEM); 878 } 879 880 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 881 map)) != 0) { 882 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 883 return (ENOMEM); 884 } 885 886 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 887 bzero(p, len); 888 *(void **)desc = p; 889 890 if (sw_size) { 891 len = nelem * sw_size; 892 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 893 *(void **)sdesc = s; 894 } 895 if (parent_entry_tag == NULL) 896 return (0); 897 898 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 899 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 900 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 901 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 902 NULL, NULL, entry_tag)) != 0) { 903 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 904 return (ENOMEM); 905 } 906 return (0); 907 } 908 909 static void 910 sge_slow_intr_handler(void *arg, int ncount) 911 { 912 adapter_t *sc = arg; 913 914 t3_slow_intr_handler(sc); 915 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask); 916 (void) t3_read_reg(sc, A_PL_INT_ENABLE0); 917 } 918 919 /** 920 * sge_timer_cb - perform periodic maintenance of an SGE qset 921 * @data: the SGE queue set to maintain 922 * 923 * Runs periodically from a timer to perform maintenance of an SGE queue 924 * set. It performs two tasks: 925 * 926 * a) Cleans up any completed Tx descriptors that may still be pending. 927 * Normal descriptor cleanup happens when new packets are added to a Tx 928 * queue so this timer is relatively infrequent and does any cleanup only 929 * if the Tx queue has not seen any new packets in a while. We make a 930 * best effort attempt to reclaim descriptors, in that we don't wait 931 * around if we cannot get a queue's lock (which most likely is because 932 * someone else is queueing new packets and so will also handle the clean 933 * up). Since control queues use immediate data exclusively we don't 934 * bother cleaning them up here. 935 * 936 * b) Replenishes Rx queues that have run out due to memory shortage. 937 * Normally new Rx buffers are added when existing ones are consumed but 938 * when out of memory a queue can become empty. We try to add only a few 939 * buffers here, the queue will be replenished fully as these new buffers 940 * are used up if memory shortage has subsided. 941 * 942 * c) Return coalesced response queue credits in case a response queue is 943 * starved. 944 * 945 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 946 * fifo overflows and the FW doesn't implement any recovery scheme yet. 947 */ 948 static void 949 sge_timer_cb(void *arg) 950 { 951 adapter_t *sc = arg; 952 if ((sc->flags & USING_MSIX) == 0) { 953 954 struct port_info *pi; 955 struct sge_qset *qs; 956 struct sge_txq *txq; 957 int i, j; 958 int reclaim_ofl, refill_rx; 959 960 if (sc->open_device_map == 0) 961 return; 962 963 for (i = 0; i < sc->params.nports; i++) { 964 pi = &sc->port[i]; 965 for (j = 0; j < pi->nqsets; j++) { 966 qs = &sc->sge.qs[pi->first_qset + j]; 967 txq = &qs->txq[0]; 968 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 969 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 970 (qs->fl[1].credits < qs->fl[1].size)); 971 if (reclaim_ofl || refill_rx) { 972 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); 973 break; 974 } 975 } 976 } 977 } 978 979 if (sc->params.nports > 2) { 980 int i; 981 982 for_each_port(sc, i) { 983 struct port_info *pi = &sc->port[i]; 984 985 t3_write_reg(sc, A_SG_KDOORBELL, 986 F_SELEGRCNTX | 987 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 988 } 989 } 990 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) && 991 sc->open_device_map != 0) 992 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 993 } 994 995 /* 996 * This is meant to be a catch-all function to keep sge state private 997 * to sge.c 998 * 999 */ 1000 int 1001 t3_sge_init_adapter(adapter_t *sc) 1002 { 1003 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 1004 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 1005 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 1006 return (0); 1007 } 1008 1009 int 1010 t3_sge_reset_adapter(adapter_t *sc) 1011 { 1012 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 1013 return (0); 1014 } 1015 1016 int 1017 t3_sge_init_port(struct port_info *pi) 1018 { 1019 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); 1020 return (0); 1021 } 1022 1023 /** 1024 * refill_rspq - replenish an SGE response queue 1025 * @adapter: the adapter 1026 * @q: the response queue to replenish 1027 * @credits: how many new responses to make available 1028 * 1029 * Replenishes a response queue by making the supplied number of responses 1030 * available to HW. 1031 */ 1032 static __inline void 1033 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 1034 { 1035 1036 /* mbufs are allocated on demand when a rspq entry is processed. */ 1037 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 1038 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 1039 } 1040 1041 static void 1042 sge_txq_reclaim_handler(void *arg, int ncount) 1043 { 1044 struct sge_qset *qs = arg; 1045 int i; 1046 1047 for (i = 0; i < 3; i++) 1048 reclaim_completed_tx(qs, 16, i); 1049 } 1050 1051 static void 1052 sge_timer_reclaim(void *arg, int ncount) 1053 { 1054 struct port_info *pi = arg; 1055 int i, nqsets = pi->nqsets; 1056 adapter_t *sc = pi->adapter; 1057 struct sge_qset *qs; 1058 struct mtx *lock; 1059 1060 KASSERT((sc->flags & USING_MSIX) == 0, 1061 ("can't call timer reclaim for msi-x")); 1062 1063 for (i = 0; i < nqsets; i++) { 1064 qs = &sc->sge.qs[pi->first_qset + i]; 1065 1066 reclaim_completed_tx(qs, 16, TXQ_OFLD); 1067 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 1068 &sc->sge.qs[0].rspq.lock; 1069 1070 if (mtx_trylock(lock)) { 1071 /* XXX currently assume that we are *NOT* polling */ 1072 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 1073 1074 if (qs->fl[0].credits < qs->fl[0].size - 16) 1075 __refill_fl(sc, &qs->fl[0]); 1076 if (qs->fl[1].credits < qs->fl[1].size - 16) 1077 __refill_fl(sc, &qs->fl[1]); 1078 1079 if (status & (1 << qs->rspq.cntxt_id)) { 1080 if (qs->rspq.credits) { 1081 refill_rspq(sc, &qs->rspq, 1); 1082 qs->rspq.credits--; 1083 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 1084 1 << qs->rspq.cntxt_id); 1085 } 1086 } 1087 mtx_unlock(lock); 1088 } 1089 } 1090 } 1091 1092 /** 1093 * init_qset_cntxt - initialize an SGE queue set context info 1094 * @qs: the queue set 1095 * @id: the queue set id 1096 * 1097 * Initializes the TIDs and context ids for the queues of a queue set. 1098 */ 1099 static void 1100 init_qset_cntxt(struct sge_qset *qs, u_int id) 1101 { 1102 1103 qs->rspq.cntxt_id = id; 1104 qs->fl[0].cntxt_id = 2 * id; 1105 qs->fl[1].cntxt_id = 2 * id + 1; 1106 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 1107 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 1108 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 1109 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 1110 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 1111 1112 mbufq_init(&qs->txq[TXQ_ETH].sendq); 1113 mbufq_init(&qs->txq[TXQ_OFLD].sendq); 1114 mbufq_init(&qs->txq[TXQ_CTRL].sendq); 1115 } 1116 1117 1118 static void 1119 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 1120 { 1121 txq->in_use += ndesc; 1122 /* 1123 * XXX we don't handle stopping of queue 1124 * presumably start handles this when we bump against the end 1125 */ 1126 txqs->gen = txq->gen; 1127 txq->unacked += ndesc; 1128 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); 1129 txq->unacked &= 31; 1130 txqs->pidx = txq->pidx; 1131 txq->pidx += ndesc; 1132 #ifdef INVARIANTS 1133 if (((txqs->pidx > txq->cidx) && 1134 (txq->pidx < txqs->pidx) && 1135 (txq->pidx >= txq->cidx)) || 1136 ((txqs->pidx < txq->cidx) && 1137 (txq->pidx >= txq-> cidx)) || 1138 ((txqs->pidx < txq->cidx) && 1139 (txq->cidx < txqs->pidx))) 1140 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", 1141 txqs->pidx, txq->pidx, txq->cidx); 1142 #endif 1143 if (txq->pidx >= txq->size) { 1144 txq->pidx -= txq->size; 1145 txq->gen ^= 1; 1146 } 1147 1148 } 1149 1150 /** 1151 * calc_tx_descs - calculate the number of Tx descriptors for a packet 1152 * @m: the packet mbufs 1153 * @nsegs: the number of segments 1154 * 1155 * Returns the number of Tx descriptors needed for the given Ethernet 1156 * packet. Ethernet packets require addition of WR and CPL headers. 1157 */ 1158 static __inline unsigned int 1159 calc_tx_descs(const struct mbuf *m, int nsegs) 1160 { 1161 unsigned int flits; 1162 1163 if (m->m_pkthdr.len <= PIO_LEN) 1164 return 1; 1165 1166 flits = sgl_len(nsegs) + 2; 1167 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1168 flits++; 1169 1170 return flits_to_desc(flits); 1171 } 1172 1173 static unsigned int 1174 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 1175 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs) 1176 { 1177 struct mbuf *m0; 1178 int err, pktlen, pass = 0; 1179 bus_dma_tag_t tag = txq->entry_tag; 1180 1181 retry: 1182 err = 0; 1183 m0 = *m; 1184 pktlen = m0->m_pkthdr.len; 1185 #if defined(__i386__) || defined(__amd64__) 1186 if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) { 1187 goto done; 1188 } else 1189 #endif 1190 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0); 1191 1192 if (err == 0) { 1193 goto done; 1194 } 1195 if (err == EFBIG && pass == 0) { 1196 pass = 1; 1197 /* Too many segments, try to defrag */ 1198 m0 = m_defrag(m0, M_DONTWAIT); 1199 if (m0 == NULL) { 1200 m_freem(*m); 1201 *m = NULL; 1202 return (ENOBUFS); 1203 } 1204 *m = m0; 1205 goto retry; 1206 } else if (err == ENOMEM) { 1207 return (err); 1208 } if (err) { 1209 if (cxgb_debug) 1210 printf("map failure err=%d pktlen=%d\n", err, pktlen); 1211 m_freem(m0); 1212 *m = NULL; 1213 return (err); 1214 } 1215 done: 1216 #if !defined(__i386__) && !defined(__amd64__) 1217 bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE); 1218 #endif 1219 txsd->flags |= TX_SW_DESC_MAPPED; 1220 1221 return (0); 1222 } 1223 1224 /** 1225 * make_sgl - populate a scatter/gather list for a packet 1226 * @sgp: the SGL to populate 1227 * @segs: the packet dma segments 1228 * @nsegs: the number of segments 1229 * 1230 * Generates a scatter/gather list for the buffers that make up a packet 1231 * and returns the SGL size in 8-byte words. The caller must size the SGL 1232 * appropriately. 1233 */ 1234 static __inline void 1235 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1236 { 1237 int i, idx; 1238 1239 for (idx = 0, i = 0; i < nsegs; i++) { 1240 /* 1241 * firmware doesn't like empty segments 1242 */ 1243 if (segs[i].ds_len == 0) 1244 continue; 1245 if (i && idx == 0) 1246 ++sgp; 1247 1248 sgp->len[idx] = htobe32(segs[i].ds_len); 1249 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1250 idx ^= 1; 1251 } 1252 1253 if (idx) { 1254 sgp->len[idx] = 0; 1255 sgp->addr[idx] = 0; 1256 } 1257 } 1258 1259 /** 1260 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1261 * @adap: the adapter 1262 * @q: the Tx queue 1263 * 1264 * Ring the doorbell if a Tx queue is asleep. There is a natural race, 1265 * where the HW is going to sleep just after we checked, however, 1266 * then the interrupt handler will detect the outstanding TX packet 1267 * and ring the doorbell for us. 1268 * 1269 * When GTS is disabled we unconditionally ring the doorbell. 1270 */ 1271 static __inline void 1272 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring) 1273 { 1274 #if USE_GTS 1275 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1276 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1277 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1278 #ifdef T3_TRACE 1279 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1280 q->cntxt_id); 1281 #endif 1282 t3_write_reg(adap, A_SG_KDOORBELL, 1283 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1284 } 1285 #else 1286 if (mustring || ++q->db_pending >= 32) { 1287 wmb(); /* write descriptors before telling HW */ 1288 t3_write_reg(adap, A_SG_KDOORBELL, 1289 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1290 q->db_pending = 0; 1291 } 1292 #endif 1293 } 1294 1295 static __inline void 1296 wr_gen2(struct tx_desc *d, unsigned int gen) 1297 { 1298 #if SGE_NUM_GENBITS == 2 1299 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1300 #endif 1301 } 1302 1303 /** 1304 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1305 * @ndesc: number of Tx descriptors spanned by the SGL 1306 * @txd: first Tx descriptor to be written 1307 * @txqs: txq state (generation and producer index) 1308 * @txq: the SGE Tx queue 1309 * @sgl: the SGL 1310 * @flits: number of flits to the start of the SGL in the first descriptor 1311 * @sgl_flits: the SGL size in flits 1312 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1313 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1314 * 1315 * Write a work request header and an associated SGL. If the SGL is 1316 * small enough to fit into one Tx descriptor it has already been written 1317 * and we just need to write the WR header. Otherwise we distribute the 1318 * SGL across the number of descriptors it spans. 1319 */ 1320 static void 1321 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1322 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1323 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1324 { 1325 1326 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1327 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1328 1329 if (__predict_true(ndesc == 1)) { 1330 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1331 V_WR_SGLSFLT(flits)) | wr_hi, 1332 htonl(V_WR_LEN(flits + sgl_flits) | 1333 V_WR_GEN(txqs->gen)) | wr_lo); 1334 /* XXX gen? */ 1335 wr_gen2(txd, txqs->gen); 1336 1337 } else { 1338 unsigned int ogen = txqs->gen; 1339 const uint64_t *fp = (const uint64_t *)sgl; 1340 struct work_request_hdr *wp = wrp; 1341 1342 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1343 V_WR_SGLSFLT(flits)) | wr_hi; 1344 1345 while (sgl_flits) { 1346 unsigned int avail = WR_FLITS - flits; 1347 1348 if (avail > sgl_flits) 1349 avail = sgl_flits; 1350 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1351 sgl_flits -= avail; 1352 ndesc--; 1353 if (!sgl_flits) 1354 break; 1355 1356 fp += avail; 1357 txd++; 1358 txsd++; 1359 if (++txqs->pidx == txq->size) { 1360 txqs->pidx = 0; 1361 txqs->gen ^= 1; 1362 txd = txq->desc; 1363 txsd = txq->sdesc; 1364 } 1365 1366 /* 1367 * when the head of the mbuf chain 1368 * is freed all clusters will be freed 1369 * with it 1370 */ 1371 wrp = (struct work_request_hdr *)txd; 1372 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) | 1373 V_WR_SGLSFLT(1)) | wr_hi; 1374 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS, 1375 sgl_flits + 1)) | 1376 V_WR_GEN(txqs->gen)) | wr_lo; 1377 wr_gen2(txd, txqs->gen); 1378 flits = 1; 1379 } 1380 wrp->wrh_hi |= htonl(F_WR_EOP); 1381 wmb(); 1382 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1383 wr_gen2((struct tx_desc *)wp, ogen); 1384 } 1385 } 1386 1387 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */ 1388 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20) 1389 1390 #define GET_VTAG(cntrl, m) \ 1391 do { \ 1392 if ((m)->m_flags & M_VLANTAG) \ 1393 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ 1394 } while (0) 1395 1396 static int 1397 t3_encap(struct sge_qset *qs, struct mbuf **m) 1398 { 1399 adapter_t *sc; 1400 struct mbuf *m0; 1401 struct sge_txq *txq; 1402 struct txq_state txqs; 1403 struct port_info *pi; 1404 unsigned int ndesc, flits, cntrl, mlen; 1405 int err, nsegs, tso_info = 0; 1406 1407 struct work_request_hdr *wrp; 1408 struct tx_sw_desc *txsd; 1409 struct sg_ent *sgp, *sgl; 1410 uint32_t wr_hi, wr_lo, sgl_flits; 1411 bus_dma_segment_t segs[TX_MAX_SEGS]; 1412 1413 struct tx_desc *txd; 1414 1415 pi = qs->port; 1416 sc = pi->adapter; 1417 txq = &qs->txq[TXQ_ETH]; 1418 txd = &txq->desc[txq->pidx]; 1419 txsd = &txq->sdesc[txq->pidx]; 1420 sgl = txq->txq_sgl; 1421 1422 prefetch(txd); 1423 m0 = *m; 1424 1425 mtx_assert(&qs->lock, MA_OWNED); 1426 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1427 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n")); 1428 1429 if (m0->m_nextpkt == NULL && m0->m_next != NULL && 1430 m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1431 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1432 1433 if (m0->m_nextpkt != NULL) { 1434 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs); 1435 ndesc = 1; 1436 mlen = 0; 1437 } else { 1438 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map, 1439 &m0, segs, &nsegs))) { 1440 if (cxgb_debug) 1441 printf("failed ... err=%d\n", err); 1442 return (err); 1443 } 1444 mlen = m0->m_pkthdr.len; 1445 ndesc = calc_tx_descs(m0, nsegs); 1446 } 1447 txq_prod(txq, ndesc, &txqs); 1448 1449 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs)); 1450 txsd->m = m0; 1451 1452 if (m0->m_nextpkt != NULL) { 1453 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; 1454 int i, fidx; 1455 1456 if (nsegs > 7) 1457 panic("trying to coalesce %d packets in to one WR", nsegs); 1458 txq->txq_coalesced += nsegs; 1459 wrp = (struct work_request_hdr *)txd; 1460 flits = nsegs*2 + 1; 1461 1462 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) { 1463 struct cpl_tx_pkt_batch_entry *cbe; 1464 uint64_t flit; 1465 uint32_t *hflit = (uint32_t *)&flit; 1466 int cflags = m0->m_pkthdr.csum_flags; 1467 1468 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1469 GET_VTAG(cntrl, m0); 1470 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1471 if (__predict_false(!(cflags & CSUM_IP))) 1472 cntrl |= F_TXPKT_IPCSUM_DIS; 1473 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP)))) 1474 cntrl |= F_TXPKT_L4CSUM_DIS; 1475 1476 hflit[0] = htonl(cntrl); 1477 hflit[1] = htonl(segs[i].ds_len | 0x80000000); 1478 flit |= htobe64(1 << 24); 1479 cbe = &cpl_batch->pkt_entry[i]; 1480 cbe->cntrl = hflit[0]; 1481 cbe->len = hflit[1]; 1482 cbe->addr = htobe64(segs[i].ds_addr); 1483 } 1484 1485 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1486 V_WR_SGLSFLT(flits)) | 1487 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1488 wr_lo = htonl(V_WR_LEN(flits) | 1489 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); 1490 set_wr_hdr(wrp, wr_hi, wr_lo); 1491 wmb(); 1492 ETHER_BPF_MTAP(pi->ifp, m0); 1493 wr_gen2(txd, txqs.gen); 1494 check_ring_tx_db(sc, txq, 0); 1495 return (0); 1496 } else if (tso_info) { 1497 uint16_t eth_type; 1498 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; 1499 struct ether_header *eh; 1500 void *l3hdr; 1501 struct tcphdr *tcp; 1502 1503 txd->flit[2] = 0; 1504 GET_VTAG(cntrl, m0); 1505 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1506 hdr->cntrl = htonl(cntrl); 1507 hdr->len = htonl(mlen | 0x80000000); 1508 1509 if (__predict_false(mlen < TCPPKTHDRSIZE)) { 1510 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", 1511 m0, mlen, m0->m_pkthdr.tso_segsz, 1512 m0->m_pkthdr.csum_flags, m0->m_flags); 1513 panic("tx tso packet too small"); 1514 } 1515 1516 /* Make sure that ether, ip, tcp headers are all in m0 */ 1517 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1518 m0 = m_pullup(m0, TCPPKTHDRSIZE); 1519 if (__predict_false(m0 == NULL)) { 1520 /* XXX panic probably an overreaction */ 1521 panic("couldn't fit header into mbuf"); 1522 } 1523 } 1524 1525 eh = mtod(m0, struct ether_header *); 1526 eth_type = eh->ether_type; 1527 if (eth_type == htons(ETHERTYPE_VLAN)) { 1528 struct ether_vlan_header *evh = (void *)eh; 1529 1530 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN); 1531 l3hdr = evh + 1; 1532 eth_type = evh->evl_proto; 1533 } else { 1534 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II); 1535 l3hdr = eh + 1; 1536 } 1537 1538 if (eth_type == htons(ETHERTYPE_IP)) { 1539 struct ip *ip = l3hdr; 1540 1541 tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl); 1542 tcp = (struct tcphdr *)(ip + 1); 1543 } else if (eth_type == htons(ETHERTYPE_IPV6)) { 1544 struct ip6_hdr *ip6 = l3hdr; 1545 1546 KASSERT(ip6->ip6_nxt == IPPROTO_TCP, 1547 ("%s: CSUM_TSO with ip6_nxt %d", 1548 __func__, ip6->ip6_nxt)); 1549 1550 tso_info |= F_LSO_IPV6; 1551 tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2); 1552 tcp = (struct tcphdr *)(ip6 + 1); 1553 } else 1554 panic("%s: CSUM_TSO but neither ip nor ip6", __func__); 1555 1556 tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off); 1557 hdr->lso_info = htonl(tso_info); 1558 1559 if (__predict_false(mlen <= PIO_LEN)) { 1560 /* 1561 * pkt not undersized but fits in PIO_LEN 1562 * Indicates a TSO bug at the higher levels. 1563 */ 1564 txsd->m = NULL; 1565 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); 1566 flits = (mlen + 7) / 8 + 3; 1567 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1568 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1569 F_WR_SOP | F_WR_EOP | txqs.compl); 1570 wr_lo = htonl(V_WR_LEN(flits) | 1571 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1572 set_wr_hdr(&hdr->wr, wr_hi, wr_lo); 1573 wmb(); 1574 ETHER_BPF_MTAP(pi->ifp, m0); 1575 wr_gen2(txd, txqs.gen); 1576 check_ring_tx_db(sc, txq, 0); 1577 m_freem(m0); 1578 return (0); 1579 } 1580 flits = 3; 1581 } else { 1582 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; 1583 1584 GET_VTAG(cntrl, m0); 1585 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1586 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) 1587 cntrl |= F_TXPKT_IPCSUM_DIS; 1588 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))) 1589 cntrl |= F_TXPKT_L4CSUM_DIS; 1590 cpl->cntrl = htonl(cntrl); 1591 cpl->len = htonl(mlen | 0x80000000); 1592 1593 if (mlen <= PIO_LEN) { 1594 txsd->m = NULL; 1595 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1596 flits = (mlen + 7) / 8 + 2; 1597 1598 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1599 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1600 F_WR_SOP | F_WR_EOP | txqs.compl); 1601 wr_lo = htonl(V_WR_LEN(flits) | 1602 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1603 set_wr_hdr(&cpl->wr, wr_hi, wr_lo); 1604 wmb(); 1605 ETHER_BPF_MTAP(pi->ifp, m0); 1606 wr_gen2(txd, txqs.gen); 1607 check_ring_tx_db(sc, txq, 0); 1608 m_freem(m0); 1609 return (0); 1610 } 1611 flits = 2; 1612 } 1613 wrp = (struct work_request_hdr *)txd; 1614 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1615 make_sgl(sgp, segs, nsegs); 1616 1617 sgl_flits = sgl_len(nsegs); 1618 1619 ETHER_BPF_MTAP(pi->ifp, m0); 1620 1621 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc)); 1622 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1623 wr_lo = htonl(V_WR_TID(txq->token)); 1624 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, 1625 sgl_flits, wr_hi, wr_lo); 1626 check_ring_tx_db(sc, txq, 0); 1627 1628 return (0); 1629 } 1630 1631 void 1632 cxgb_tx_watchdog(void *arg) 1633 { 1634 struct sge_qset *qs = arg; 1635 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1636 1637 if (qs->coalescing != 0 && 1638 (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 1639 TXQ_RING_EMPTY(qs)) 1640 qs->coalescing = 0; 1641 else if (qs->coalescing == 0 && 1642 (txq->in_use >= cxgb_tx_coalesce_enable_start)) 1643 qs->coalescing = 1; 1644 if (TXQ_TRYLOCK(qs)) { 1645 qs->qs_flags |= QS_FLUSHING; 1646 cxgb_start_locked(qs); 1647 qs->qs_flags &= ~QS_FLUSHING; 1648 TXQ_UNLOCK(qs); 1649 } 1650 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) 1651 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog, 1652 qs, txq->txq_watchdog.c_cpu); 1653 } 1654 1655 static void 1656 cxgb_tx_timeout(void *arg) 1657 { 1658 struct sge_qset *qs = arg; 1659 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1660 1661 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3))) 1662 qs->coalescing = 1; 1663 if (TXQ_TRYLOCK(qs)) { 1664 qs->qs_flags |= QS_TIMEOUT; 1665 cxgb_start_locked(qs); 1666 qs->qs_flags &= ~QS_TIMEOUT; 1667 TXQ_UNLOCK(qs); 1668 } 1669 } 1670 1671 static void 1672 cxgb_start_locked(struct sge_qset *qs) 1673 { 1674 struct mbuf *m_head = NULL; 1675 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1676 struct port_info *pi = qs->port; 1677 struct ifnet *ifp = pi->ifp; 1678 1679 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT)) 1680 reclaim_completed_tx(qs, 0, TXQ_ETH); 1681 1682 if (!pi->link_config.link_ok) { 1683 TXQ_RING_FLUSH(qs); 1684 return; 1685 } 1686 TXQ_LOCK_ASSERT(qs); 1687 while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) && 1688 pi->link_config.link_ok) { 1689 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1690 1691 if (txq->size - txq->in_use <= TX_MAX_DESC) 1692 break; 1693 1694 if ((m_head = cxgb_dequeue(qs)) == NULL) 1695 break; 1696 /* 1697 * Encapsulation can modify our pointer, and or make it 1698 * NULL on failure. In that event, we can't requeue. 1699 */ 1700 if (t3_encap(qs, &m_head) || m_head == NULL) 1701 break; 1702 1703 m_head = NULL; 1704 } 1705 1706 if (txq->db_pending) 1707 check_ring_tx_db(pi->adapter, txq, 1); 1708 1709 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && 1710 pi->link_config.link_ok) 1711 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1712 qs, txq->txq_timer.c_cpu); 1713 if (m_head != NULL) 1714 m_freem(m_head); 1715 } 1716 1717 static int 1718 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m) 1719 { 1720 struct port_info *pi = qs->port; 1721 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1722 struct buf_ring *br = txq->txq_mr; 1723 int error, avail; 1724 1725 avail = txq->size - txq->in_use; 1726 TXQ_LOCK_ASSERT(qs); 1727 1728 /* 1729 * We can only do a direct transmit if the following are true: 1730 * - we aren't coalescing (ring < 3/4 full) 1731 * - the link is up -- checked in caller 1732 * - there are no packets enqueued already 1733 * - there is space in hardware transmit queue 1734 */ 1735 if (check_pkt_coalesce(qs) == 0 && 1736 !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) { 1737 if (t3_encap(qs, &m)) { 1738 if (m != NULL && 1739 (error = drbr_enqueue(ifp, br, m)) != 0) 1740 return (error); 1741 } else { 1742 if (txq->db_pending) 1743 check_ring_tx_db(pi->adapter, txq, 1); 1744 1745 /* 1746 * We've bypassed the buf ring so we need to update 1747 * the stats directly 1748 */ 1749 txq->txq_direct_packets++; 1750 txq->txq_direct_bytes += m->m_pkthdr.len; 1751 } 1752 } else if ((error = drbr_enqueue(ifp, br, m)) != 0) 1753 return (error); 1754 1755 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1756 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && 1757 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) 1758 cxgb_start_locked(qs); 1759 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) 1760 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1761 qs, txq->txq_timer.c_cpu); 1762 return (0); 1763 } 1764 1765 int 1766 cxgb_transmit(struct ifnet *ifp, struct mbuf *m) 1767 { 1768 struct sge_qset *qs; 1769 struct port_info *pi = ifp->if_softc; 1770 int error, qidx = pi->first_qset; 1771 1772 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 1773 ||(!pi->link_config.link_ok)) { 1774 m_freem(m); 1775 return (0); 1776 } 1777 1778 if (m->m_flags & M_FLOWID) 1779 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset; 1780 1781 qs = &pi->adapter->sge.qs[qidx]; 1782 1783 if (TXQ_TRYLOCK(qs)) { 1784 /* XXX running */ 1785 error = cxgb_transmit_locked(ifp, qs, m); 1786 TXQ_UNLOCK(qs); 1787 } else 1788 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); 1789 return (error); 1790 } 1791 1792 void 1793 cxgb_qflush(struct ifnet *ifp) 1794 { 1795 /* 1796 * flush any enqueued mbufs in the buf_rings 1797 * and in the transmit queues 1798 * no-op for now 1799 */ 1800 return; 1801 } 1802 1803 /** 1804 * write_imm - write a packet into a Tx descriptor as immediate data 1805 * @d: the Tx descriptor to write 1806 * @m: the packet 1807 * @len: the length of packet data to write as immediate data 1808 * @gen: the generation bit value to write 1809 * 1810 * Writes a packet as immediate data into a Tx descriptor. The packet 1811 * contains a work request at its beginning. We must write the packet 1812 * carefully so the SGE doesn't read accidentally before it's written in 1813 * its entirety. 1814 */ 1815 static __inline void 1816 write_imm(struct tx_desc *d, struct mbuf *m, 1817 unsigned int len, unsigned int gen) 1818 { 1819 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1820 struct work_request_hdr *to = (struct work_request_hdr *)d; 1821 uint32_t wr_hi, wr_lo; 1822 1823 if (len > WR_LEN) 1824 panic("len too big %d\n", len); 1825 if (len < sizeof(*from)) 1826 panic("len too small %d", len); 1827 1828 memcpy(&to[1], &from[1], len - sizeof(*from)); 1829 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | 1830 V_WR_BCNTLFLT(len & 7)); 1831 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | 1832 V_WR_LEN((len + 7) / 8)); 1833 set_wr_hdr(to, wr_hi, wr_lo); 1834 wmb(); 1835 wr_gen2(d, gen); 1836 1837 /* 1838 * This check is a hack we should really fix the logic so 1839 * that this can't happen 1840 */ 1841 if (m->m_type != MT_DONTFREE) 1842 m_freem(m); 1843 1844 } 1845 1846 /** 1847 * check_desc_avail - check descriptor availability on a send queue 1848 * @adap: the adapter 1849 * @q: the TX queue 1850 * @m: the packet needing the descriptors 1851 * @ndesc: the number of Tx descriptors needed 1852 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1853 * 1854 * Checks if the requested number of Tx descriptors is available on an 1855 * SGE send queue. If the queue is already suspended or not enough 1856 * descriptors are available the packet is queued for later transmission. 1857 * Must be called with the Tx queue locked. 1858 * 1859 * Returns 0 if enough descriptors are available, 1 if there aren't 1860 * enough descriptors and the packet has been queued, and 2 if the caller 1861 * needs to retry because there weren't enough descriptors at the 1862 * beginning of the call but some freed up in the mean time. 1863 */ 1864 static __inline int 1865 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1866 struct mbuf *m, unsigned int ndesc, 1867 unsigned int qid) 1868 { 1869 /* 1870 * XXX We currently only use this for checking the control queue 1871 * the control queue is only used for binding qsets which happens 1872 * at init time so we are guaranteed enough descriptors 1873 */ 1874 if (__predict_false(!mbufq_empty(&q->sendq))) { 1875 addq_exit: mbufq_tail(&q->sendq, m); 1876 return 1; 1877 } 1878 if (__predict_false(q->size - q->in_use < ndesc)) { 1879 1880 struct sge_qset *qs = txq_to_qset(q, qid); 1881 1882 setbit(&qs->txq_stopped, qid); 1883 if (should_restart_tx(q) && 1884 test_and_clear_bit(qid, &qs->txq_stopped)) 1885 return 2; 1886 1887 q->stops++; 1888 goto addq_exit; 1889 } 1890 return 0; 1891 } 1892 1893 1894 /** 1895 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1896 * @q: the SGE control Tx queue 1897 * 1898 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1899 * that send only immediate data (presently just the control queues) and 1900 * thus do not have any mbufs 1901 */ 1902 static __inline void 1903 reclaim_completed_tx_imm(struct sge_txq *q) 1904 { 1905 unsigned int reclaim = q->processed - q->cleaned; 1906 1907 q->in_use -= reclaim; 1908 q->cleaned += reclaim; 1909 } 1910 1911 static __inline int 1912 immediate(const struct mbuf *m) 1913 { 1914 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1915 } 1916 1917 /** 1918 * ctrl_xmit - send a packet through an SGE control Tx queue 1919 * @adap: the adapter 1920 * @q: the control queue 1921 * @m: the packet 1922 * 1923 * Send a packet through an SGE control Tx queue. Packets sent through 1924 * a control queue must fit entirely as immediate data in a single Tx 1925 * descriptor and have no page fragments. 1926 */ 1927 static int 1928 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 1929 { 1930 int ret; 1931 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1932 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1933 1934 if (__predict_false(!immediate(m))) { 1935 m_freem(m); 1936 return 0; 1937 } 1938 1939 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP); 1940 wrp->wrh_lo = htonl(V_WR_TID(q->token)); 1941 1942 TXQ_LOCK(qs); 1943 again: reclaim_completed_tx_imm(q); 1944 1945 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1946 if (__predict_false(ret)) { 1947 if (ret == 1) { 1948 TXQ_UNLOCK(qs); 1949 return (ENOSPC); 1950 } 1951 goto again; 1952 } 1953 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1954 1955 q->in_use++; 1956 if (++q->pidx >= q->size) { 1957 q->pidx = 0; 1958 q->gen ^= 1; 1959 } 1960 TXQ_UNLOCK(qs); 1961 wmb(); 1962 t3_write_reg(adap, A_SG_KDOORBELL, 1963 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1964 return (0); 1965 } 1966 1967 1968 /** 1969 * restart_ctrlq - restart a suspended control queue 1970 * @qs: the queue set cotaining the control queue 1971 * 1972 * Resumes transmission on a suspended Tx control queue. 1973 */ 1974 static void 1975 restart_ctrlq(void *data, int npending) 1976 { 1977 struct mbuf *m; 1978 struct sge_qset *qs = (struct sge_qset *)data; 1979 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1980 adapter_t *adap = qs->port->adapter; 1981 1982 TXQ_LOCK(qs); 1983 again: reclaim_completed_tx_imm(q); 1984 1985 while (q->in_use < q->size && 1986 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1987 1988 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1989 1990 if (++q->pidx >= q->size) { 1991 q->pidx = 0; 1992 q->gen ^= 1; 1993 } 1994 q->in_use++; 1995 } 1996 if (!mbufq_empty(&q->sendq)) { 1997 setbit(&qs->txq_stopped, TXQ_CTRL); 1998 1999 if (should_restart_tx(q) && 2000 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 2001 goto again; 2002 q->stops++; 2003 } 2004 TXQ_UNLOCK(qs); 2005 t3_write_reg(adap, A_SG_KDOORBELL, 2006 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2007 } 2008 2009 2010 /* 2011 * Send a management message through control queue 0 2012 */ 2013 int 2014 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 2015 { 2016 return ctrl_xmit(adap, &adap->sge.qs[0], m); 2017 } 2018 2019 /** 2020 * free_qset - free the resources of an SGE queue set 2021 * @sc: the controller owning the queue set 2022 * @q: the queue set 2023 * 2024 * Release the HW and SW resources associated with an SGE queue set, such 2025 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 2026 * queue set must be quiesced prior to calling this. 2027 */ 2028 static void 2029 t3_free_qset(adapter_t *sc, struct sge_qset *q) 2030 { 2031 int i; 2032 2033 reclaim_completed_tx(q, 0, TXQ_ETH); 2034 if (q->txq[TXQ_ETH].txq_mr != NULL) 2035 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF); 2036 if (q->txq[TXQ_ETH].txq_ifq != NULL) { 2037 ifq_delete(q->txq[TXQ_ETH].txq_ifq); 2038 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF); 2039 } 2040 2041 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2042 if (q->fl[i].desc) { 2043 mtx_lock_spin(&sc->sge.reg_lock); 2044 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 2045 mtx_unlock_spin(&sc->sge.reg_lock); 2046 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 2047 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 2048 q->fl[i].desc_map); 2049 bus_dma_tag_destroy(q->fl[i].desc_tag); 2050 bus_dma_tag_destroy(q->fl[i].entry_tag); 2051 } 2052 if (q->fl[i].sdesc) { 2053 free_rx_bufs(sc, &q->fl[i]); 2054 free(q->fl[i].sdesc, M_DEVBUF); 2055 } 2056 } 2057 2058 mtx_unlock(&q->lock); 2059 MTX_DESTROY(&q->lock); 2060 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2061 if (q->txq[i].desc) { 2062 mtx_lock_spin(&sc->sge.reg_lock); 2063 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 2064 mtx_unlock_spin(&sc->sge.reg_lock); 2065 bus_dmamap_unload(q->txq[i].desc_tag, 2066 q->txq[i].desc_map); 2067 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 2068 q->txq[i].desc_map); 2069 bus_dma_tag_destroy(q->txq[i].desc_tag); 2070 bus_dma_tag_destroy(q->txq[i].entry_tag); 2071 } 2072 if (q->txq[i].sdesc) { 2073 free(q->txq[i].sdesc, M_DEVBUF); 2074 } 2075 } 2076 2077 if (q->rspq.desc) { 2078 mtx_lock_spin(&sc->sge.reg_lock); 2079 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 2080 mtx_unlock_spin(&sc->sge.reg_lock); 2081 2082 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 2083 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 2084 q->rspq.desc_map); 2085 bus_dma_tag_destroy(q->rspq.desc_tag); 2086 MTX_DESTROY(&q->rspq.lock); 2087 } 2088 2089 #if defined(INET6) || defined(INET) 2090 tcp_lro_free(&q->lro.ctrl); 2091 #endif 2092 2093 bzero(q, sizeof(*q)); 2094 } 2095 2096 /** 2097 * t3_free_sge_resources - free SGE resources 2098 * @sc: the adapter softc 2099 * 2100 * Frees resources used by the SGE queue sets. 2101 */ 2102 void 2103 t3_free_sge_resources(adapter_t *sc, int nqsets) 2104 { 2105 int i; 2106 2107 for (i = 0; i < nqsets; ++i) { 2108 TXQ_LOCK(&sc->sge.qs[i]); 2109 t3_free_qset(sc, &sc->sge.qs[i]); 2110 } 2111 } 2112 2113 /** 2114 * t3_sge_start - enable SGE 2115 * @sc: the controller softc 2116 * 2117 * Enables the SGE for DMAs. This is the last step in starting packet 2118 * transfers. 2119 */ 2120 void 2121 t3_sge_start(adapter_t *sc) 2122 { 2123 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 2124 } 2125 2126 /** 2127 * t3_sge_stop - disable SGE operation 2128 * @sc: the adapter 2129 * 2130 * Disables the DMA engine. This can be called in emeregencies (e.g., 2131 * from error interrupts) or from normal process context. In the latter 2132 * case it also disables any pending queue restart tasklets. Note that 2133 * if it is called in interrupt context it cannot disable the restart 2134 * tasklets as it cannot wait, however the tasklets will have no effect 2135 * since the doorbells are disabled and the driver will call this again 2136 * later from process context, at which time the tasklets will be stopped 2137 * if they are still running. 2138 */ 2139 void 2140 t3_sge_stop(adapter_t *sc) 2141 { 2142 int i, nqsets; 2143 2144 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 2145 2146 if (sc->tq == NULL) 2147 return; 2148 2149 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2150 nqsets += sc->port[i].nqsets; 2151 #ifdef notyet 2152 /* 2153 * 2154 * XXX 2155 */ 2156 for (i = 0; i < nqsets; ++i) { 2157 struct sge_qset *qs = &sc->sge.qs[i]; 2158 2159 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2160 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2161 } 2162 #endif 2163 } 2164 2165 /** 2166 * t3_free_tx_desc - reclaims Tx descriptors and their buffers 2167 * @adapter: the adapter 2168 * @q: the Tx queue to reclaim descriptors from 2169 * @reclaimable: the number of descriptors to reclaim 2170 * @m_vec_size: maximum number of buffers to reclaim 2171 * @desc_reclaimed: returns the number of descriptors reclaimed 2172 * 2173 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 2174 * Tx buffers. Called with the Tx queue lock held. 2175 * 2176 * Returns number of buffers of reclaimed 2177 */ 2178 void 2179 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue) 2180 { 2181 struct tx_sw_desc *txsd; 2182 unsigned int cidx, mask; 2183 struct sge_txq *q = &qs->txq[queue]; 2184 2185 #ifdef T3_TRACE 2186 T3_TRACE2(sc->tb[q->cntxt_id & 7], 2187 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); 2188 #endif 2189 cidx = q->cidx; 2190 mask = q->size - 1; 2191 txsd = &q->sdesc[cidx]; 2192 2193 mtx_assert(&qs->lock, MA_OWNED); 2194 while (reclaimable--) { 2195 prefetch(q->sdesc[(cidx + 1) & mask].m); 2196 prefetch(q->sdesc[(cidx + 2) & mask].m); 2197 2198 if (txsd->m != NULL) { 2199 if (txsd->flags & TX_SW_DESC_MAPPED) { 2200 bus_dmamap_unload(q->entry_tag, txsd->map); 2201 txsd->flags &= ~TX_SW_DESC_MAPPED; 2202 } 2203 m_freem_list(txsd->m); 2204 txsd->m = NULL; 2205 } else 2206 q->txq_skipped++; 2207 2208 ++txsd; 2209 if (++cidx == q->size) { 2210 cidx = 0; 2211 txsd = q->sdesc; 2212 } 2213 } 2214 q->cidx = cidx; 2215 2216 } 2217 2218 /** 2219 * is_new_response - check if a response is newly written 2220 * @r: the response descriptor 2221 * @q: the response queue 2222 * 2223 * Returns true if a response descriptor contains a yet unprocessed 2224 * response. 2225 */ 2226 static __inline int 2227 is_new_response(const struct rsp_desc *r, 2228 const struct sge_rspq *q) 2229 { 2230 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 2231 } 2232 2233 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 2234 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 2235 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 2236 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 2237 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 2238 2239 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 2240 #define NOMEM_INTR_DELAY 2500 2241 2242 /** 2243 * write_ofld_wr - write an offload work request 2244 * @adap: the adapter 2245 * @m: the packet to send 2246 * @q: the Tx queue 2247 * @pidx: index of the first Tx descriptor to write 2248 * @gen: the generation value to use 2249 * @ndesc: number of descriptors the packet will occupy 2250 * 2251 * Write an offload work request to send the supplied packet. The packet 2252 * data already carry the work request with most fields populated. 2253 */ 2254 static void 2255 write_ofld_wr(adapter_t *adap, struct mbuf *m, 2256 struct sge_txq *q, unsigned int pidx, 2257 unsigned int gen, unsigned int ndesc, 2258 bus_dma_segment_t *segs, unsigned int nsegs) 2259 { 2260 unsigned int sgl_flits, flits; 2261 struct work_request_hdr *from; 2262 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 2263 struct tx_desc *d = &q->desc[pidx]; 2264 struct txq_state txqs; 2265 2266 if (immediate(m) && nsegs == 0) { 2267 write_imm(d, m, m->m_len, gen); 2268 return; 2269 } 2270 2271 /* Only TX_DATA builds SGLs */ 2272 from = mtod(m, struct work_request_hdr *); 2273 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from)); 2274 2275 flits = m->m_len / 8; 2276 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 2277 2278 make_sgl(sgp, segs, nsegs); 2279 sgl_flits = sgl_len(nsegs); 2280 2281 txqs.gen = gen; 2282 txqs.pidx = pidx; 2283 txqs.compl = 0; 2284 2285 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 2286 from->wrh_hi, from->wrh_lo); 2287 } 2288 2289 /** 2290 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 2291 * @m: the packet 2292 * 2293 * Returns the number of Tx descriptors needed for the given offload 2294 * packet. These packets are already fully constructed. 2295 */ 2296 static __inline unsigned int 2297 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 2298 { 2299 unsigned int flits, cnt = 0; 2300 int ndescs; 2301 2302 if (m->m_len <= WR_LEN && nsegs == 0) 2303 return (1); /* packet fits as immediate data */ 2304 2305 /* 2306 * This needs to be re-visited for TOE 2307 */ 2308 2309 cnt = nsegs; 2310 2311 /* headers */ 2312 flits = m->m_len / 8; 2313 2314 ndescs = flits_to_desc(flits + sgl_len(cnt)); 2315 2316 return (ndescs); 2317 } 2318 2319 /** 2320 * ofld_xmit - send a packet through an offload queue 2321 * @adap: the adapter 2322 * @q: the Tx offload queue 2323 * @m: the packet 2324 * 2325 * Send an offload packet through an SGE offload queue. 2326 */ 2327 static int 2328 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 2329 { 2330 int ret, nsegs; 2331 unsigned int ndesc; 2332 unsigned int pidx, gen; 2333 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2334 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs; 2335 struct tx_sw_desc *stx; 2336 2337 nsegs = m_get_sgllen(m); 2338 vsegs = m_get_sgl(m); 2339 ndesc = calc_tx_descs_ofld(m, nsegs); 2340 busdma_map_sgl(vsegs, segs, nsegs); 2341 2342 stx = &q->sdesc[q->pidx]; 2343 2344 TXQ_LOCK(qs); 2345 again: reclaim_completed_tx(qs, 16, TXQ_OFLD); 2346 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 2347 if (__predict_false(ret)) { 2348 if (ret == 1) { 2349 printf("no ofld desc avail\n"); 2350 2351 m_set_priority(m, ndesc); /* save for restart */ 2352 TXQ_UNLOCK(qs); 2353 return (EINTR); 2354 } 2355 goto again; 2356 } 2357 2358 gen = q->gen; 2359 q->in_use += ndesc; 2360 pidx = q->pidx; 2361 q->pidx += ndesc; 2362 if (q->pidx >= q->size) { 2363 q->pidx -= q->size; 2364 q->gen ^= 1; 2365 } 2366 #ifdef T3_TRACE 2367 T3_TRACE5(adap->tb[q->cntxt_id & 7], 2368 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 2369 ndesc, pidx, skb->len, skb->len - skb->data_len, 2370 skb_shinfo(skb)->nr_frags); 2371 #endif 2372 TXQ_UNLOCK(qs); 2373 2374 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2375 check_ring_tx_db(adap, q, 1); 2376 return (0); 2377 } 2378 2379 /** 2380 * restart_offloadq - restart a suspended offload queue 2381 * @qs: the queue set cotaining the offload queue 2382 * 2383 * Resumes transmission on a suspended Tx offload queue. 2384 */ 2385 static void 2386 restart_offloadq(void *data, int npending) 2387 { 2388 struct mbuf *m; 2389 struct sge_qset *qs = data; 2390 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2391 adapter_t *adap = qs->port->adapter; 2392 bus_dma_segment_t segs[TX_MAX_SEGS]; 2393 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 2394 int nsegs, cleaned; 2395 2396 TXQ_LOCK(qs); 2397 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD); 2398 2399 while ((m = mbufq_peek(&q->sendq)) != NULL) { 2400 unsigned int gen, pidx; 2401 unsigned int ndesc = m_get_priority(m); 2402 2403 if (__predict_false(q->size - q->in_use < ndesc)) { 2404 setbit(&qs->txq_stopped, TXQ_OFLD); 2405 if (should_restart_tx(q) && 2406 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 2407 goto again; 2408 q->stops++; 2409 break; 2410 } 2411 2412 gen = q->gen; 2413 q->in_use += ndesc; 2414 pidx = q->pidx; 2415 q->pidx += ndesc; 2416 if (q->pidx >= q->size) { 2417 q->pidx -= q->size; 2418 q->gen ^= 1; 2419 } 2420 2421 (void)mbufq_dequeue(&q->sendq); 2422 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 2423 TXQ_UNLOCK(qs); 2424 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2425 TXQ_LOCK(qs); 2426 } 2427 #if USE_GTS 2428 set_bit(TXQ_RUNNING, &q->flags); 2429 set_bit(TXQ_LAST_PKT_DB, &q->flags); 2430 #endif 2431 TXQ_UNLOCK(qs); 2432 wmb(); 2433 t3_write_reg(adap, A_SG_KDOORBELL, 2434 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2435 } 2436 2437 /** 2438 * queue_set - return the queue set a packet should use 2439 * @m: the packet 2440 * 2441 * Maps a packet to the SGE queue set it should use. The desired queue 2442 * set is carried in bits 1-3 in the packet's priority. 2443 */ 2444 static __inline int 2445 queue_set(const struct mbuf *m) 2446 { 2447 return m_get_priority(m) >> 1; 2448 } 2449 2450 /** 2451 * is_ctrl_pkt - return whether an offload packet is a control packet 2452 * @m: the packet 2453 * 2454 * Determines whether an offload packet should use an OFLD or a CTRL 2455 * Tx queue. This is indicated by bit 0 in the packet's priority. 2456 */ 2457 static __inline int 2458 is_ctrl_pkt(const struct mbuf *m) 2459 { 2460 return m_get_priority(m) & 1; 2461 } 2462 2463 /** 2464 * t3_offload_tx - send an offload packet 2465 * @tdev: the offload device to send to 2466 * @m: the packet 2467 * 2468 * Sends an offload packet. We use the packet priority to select the 2469 * appropriate Tx queue as follows: bit 0 indicates whether the packet 2470 * should be sent as regular or control, bits 1-3 select the queue set. 2471 */ 2472 int 2473 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m) 2474 { 2475 adapter_t *adap = tdev2adap(tdev); 2476 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 2477 2478 if (__predict_false(is_ctrl_pkt(m))) 2479 return ctrl_xmit(adap, qs, m); 2480 2481 return ofld_xmit(adap, qs, m); 2482 } 2483 2484 /** 2485 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 2486 * @tdev: the offload device that will be receiving the packets 2487 * @q: the SGE response queue that assembled the bundle 2488 * @m: the partial bundle 2489 * @n: the number of packets in the bundle 2490 * 2491 * Delivers a (partial) bundle of Rx offload packets to an offload device. 2492 */ 2493 static __inline void 2494 deliver_partial_bundle(struct t3cdev *tdev, 2495 struct sge_rspq *q, 2496 struct mbuf *mbufs[], int n) 2497 { 2498 if (n) { 2499 q->offload_bundles++; 2500 cxgb_ofld_recv(tdev, mbufs, n); 2501 } 2502 } 2503 2504 static __inline int 2505 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 2506 struct mbuf *m, struct mbuf *rx_gather[], 2507 unsigned int gather_idx) 2508 { 2509 2510 rq->offload_pkts++; 2511 m->m_pkthdr.header = mtod(m, void *); 2512 rx_gather[gather_idx++] = m; 2513 if (gather_idx == RX_BUNDLE_SIZE) { 2514 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 2515 gather_idx = 0; 2516 rq->offload_bundles++; 2517 } 2518 return (gather_idx); 2519 } 2520 2521 static void 2522 restart_tx(struct sge_qset *qs) 2523 { 2524 struct adapter *sc = qs->port->adapter; 2525 2526 2527 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2528 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2529 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2530 qs->txq[TXQ_OFLD].restarts++; 2531 DPRINTF("restarting TXQ_OFLD\n"); 2532 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2533 } 2534 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n", 2535 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]), 2536 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned, 2537 qs->txq[TXQ_CTRL].in_use); 2538 2539 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2540 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2541 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2542 qs->txq[TXQ_CTRL].restarts++; 2543 DPRINTF("restarting TXQ_CTRL\n"); 2544 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2545 } 2546 } 2547 2548 /** 2549 * t3_sge_alloc_qset - initialize an SGE queue set 2550 * @sc: the controller softc 2551 * @id: the queue set id 2552 * @nports: how many Ethernet ports will be using this queue set 2553 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2554 * @p: configuration parameters for this queue set 2555 * @ntxq: number of Tx queues for the queue set 2556 * @pi: port info for queue set 2557 * 2558 * Allocate resources and initialize an SGE queue set. A queue set 2559 * comprises a response queue, two Rx free-buffer queues, and up to 3 2560 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2561 * queue, offload queue, and control queue. 2562 */ 2563 int 2564 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2565 const struct qset_params *p, int ntxq, struct port_info *pi) 2566 { 2567 struct sge_qset *q = &sc->sge.qs[id]; 2568 int i, ret = 0; 2569 2570 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF); 2571 q->port = pi; 2572 2573 if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, 2574 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) { 2575 device_printf(sc->dev, "failed to allocate mbuf ring\n"); 2576 goto err; 2577 } 2578 if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF, 2579 M_NOWAIT | M_ZERO)) == NULL) { 2580 device_printf(sc->dev, "failed to allocate ifq\n"); 2581 goto err; 2582 } 2583 ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp); 2584 callout_init(&q->txq[TXQ_ETH].txq_timer, 1); 2585 callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1); 2586 q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus; 2587 q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus; 2588 2589 init_qset_cntxt(q, id); 2590 q->idx = id; 2591 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2592 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2593 &q->fl[0].desc, &q->fl[0].sdesc, 2594 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2595 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2596 printf("error %d from alloc ring fl0\n", ret); 2597 goto err; 2598 } 2599 2600 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2601 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2602 &q->fl[1].desc, &q->fl[1].sdesc, 2603 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2604 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2605 printf("error %d from alloc ring fl1\n", ret); 2606 goto err; 2607 } 2608 2609 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2610 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2611 &q->rspq.desc_tag, &q->rspq.desc_map, 2612 NULL, NULL)) != 0) { 2613 printf("error %d from alloc ring rspq\n", ret); 2614 goto err; 2615 } 2616 2617 for (i = 0; i < ntxq; ++i) { 2618 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2619 2620 if ((ret = alloc_ring(sc, p->txq_size[i], 2621 sizeof(struct tx_desc), sz, 2622 &q->txq[i].phys_addr, &q->txq[i].desc, 2623 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2624 &q->txq[i].desc_map, 2625 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2626 printf("error %d from alloc ring tx %i\n", ret, i); 2627 goto err; 2628 } 2629 mbufq_init(&q->txq[i].sendq); 2630 q->txq[i].gen = 1; 2631 q->txq[i].size = p->txq_size[i]; 2632 } 2633 2634 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2635 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2636 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2637 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2638 2639 q->fl[0].gen = q->fl[1].gen = 1; 2640 q->fl[0].size = p->fl_size; 2641 q->fl[1].size = p->jumbo_size; 2642 2643 q->rspq.gen = 1; 2644 q->rspq.cidx = 0; 2645 q->rspq.size = p->rspq_size; 2646 2647 q->txq[TXQ_ETH].stop_thres = nports * 2648 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2649 2650 q->fl[0].buf_size = MCLBYTES; 2651 q->fl[0].zone = zone_pack; 2652 q->fl[0].type = EXT_PACKET; 2653 2654 if (p->jumbo_buf_size == MJUM16BYTES) { 2655 q->fl[1].zone = zone_jumbo16; 2656 q->fl[1].type = EXT_JUMBO16; 2657 } else if (p->jumbo_buf_size == MJUM9BYTES) { 2658 q->fl[1].zone = zone_jumbo9; 2659 q->fl[1].type = EXT_JUMBO9; 2660 } else if (p->jumbo_buf_size == MJUMPAGESIZE) { 2661 q->fl[1].zone = zone_jumbop; 2662 q->fl[1].type = EXT_JUMBOP; 2663 } else { 2664 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size)); 2665 ret = EDOOFUS; 2666 goto err; 2667 } 2668 q->fl[1].buf_size = p->jumbo_buf_size; 2669 2670 /* Allocate and setup the lro_ctrl structure */ 2671 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); 2672 #if defined(INET6) || defined(INET) 2673 ret = tcp_lro_init(&q->lro.ctrl); 2674 if (ret) { 2675 printf("error %d from tcp_lro_init\n", ret); 2676 goto err; 2677 } 2678 #endif 2679 q->lro.ctrl.ifp = pi->ifp; 2680 2681 mtx_lock_spin(&sc->sge.reg_lock); 2682 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2683 q->rspq.phys_addr, q->rspq.size, 2684 q->fl[0].buf_size, 1, 0); 2685 if (ret) { 2686 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2687 goto err_unlock; 2688 } 2689 2690 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2691 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2692 q->fl[i].phys_addr, q->fl[i].size, 2693 q->fl[i].buf_size, p->cong_thres, 1, 2694 0); 2695 if (ret) { 2696 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2697 goto err_unlock; 2698 } 2699 } 2700 2701 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2702 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2703 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2704 1, 0); 2705 if (ret) { 2706 printf("error %d from t3_sge_init_ecntxt\n", ret); 2707 goto err_unlock; 2708 } 2709 2710 if (ntxq > 1) { 2711 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2712 USE_GTS, SGE_CNTXT_OFLD, id, 2713 q->txq[TXQ_OFLD].phys_addr, 2714 q->txq[TXQ_OFLD].size, 0, 1, 0); 2715 if (ret) { 2716 printf("error %d from t3_sge_init_ecntxt\n", ret); 2717 goto err_unlock; 2718 } 2719 } 2720 2721 if (ntxq > 2) { 2722 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2723 SGE_CNTXT_CTRL, id, 2724 q->txq[TXQ_CTRL].phys_addr, 2725 q->txq[TXQ_CTRL].size, 2726 q->txq[TXQ_CTRL].token, 1, 0); 2727 if (ret) { 2728 printf("error %d from t3_sge_init_ecntxt\n", ret); 2729 goto err_unlock; 2730 } 2731 } 2732 2733 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2734 device_get_unit(sc->dev), irq_vec_idx); 2735 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2736 2737 mtx_unlock_spin(&sc->sge.reg_lock); 2738 t3_update_qset_coalesce(q, p); 2739 q->port = pi; 2740 2741 refill_fl(sc, &q->fl[0], q->fl[0].size); 2742 refill_fl(sc, &q->fl[1], q->fl[1].size); 2743 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2744 2745 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2746 V_NEWTIMER(q->rspq.holdoff_tmr)); 2747 2748 return (0); 2749 2750 err_unlock: 2751 mtx_unlock_spin(&sc->sge.reg_lock); 2752 err: 2753 TXQ_LOCK(q); 2754 t3_free_qset(sc, q); 2755 2756 return (ret); 2757 } 2758 2759 /* 2760 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with 2761 * ethernet data. Hardware assistance with various checksums and any vlan tag 2762 * will also be taken into account here. 2763 */ 2764 void 2765 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2766 { 2767 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2768 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2769 struct ifnet *ifp = pi->ifp; 2770 2771 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2772 2773 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2774 cpl->csum_valid && cpl->csum == 0xffff) { 2775 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2776 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2777 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2778 m->m_pkthdr.csum_data = 0xffff; 2779 } 2780 2781 if (cpl->vlan_valid) { 2782 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2783 m->m_flags |= M_VLANTAG; 2784 } 2785 2786 m->m_pkthdr.rcvif = ifp; 2787 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2788 /* 2789 * adjust after conversion to mbuf chain 2790 */ 2791 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); 2792 m->m_len -= (sizeof(*cpl) + ethpad); 2793 m->m_data += (sizeof(*cpl) + ethpad); 2794 } 2795 2796 /** 2797 * get_packet - return the next ingress packet buffer from a free list 2798 * @adap: the adapter that received the packet 2799 * @drop_thres: # of remaining buffers before we start dropping packets 2800 * @qs: the qset that the SGE free list holding the packet belongs to 2801 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2802 * @r: response descriptor 2803 * 2804 * Get the next packet from a free list and complete setup of the 2805 * sk_buff. If the packet is small we make a copy and recycle the 2806 * original buffer, otherwise we use the original buffer itself. If a 2807 * positive drop threshold is supplied packets are dropped and their 2808 * buffers recycled if (a) the number of remaining buffers is under the 2809 * threshold and the packet is too big to copy, or (b) the packet should 2810 * be copied but there is no memory for the copy. 2811 */ 2812 static int 2813 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2814 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 2815 { 2816 2817 unsigned int len_cq = ntohl(r->len_cq); 2818 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2819 int mask, cidx = fl->cidx; 2820 struct rx_sw_desc *sd = &fl->sdesc[cidx]; 2821 uint32_t len = G_RSPD_LEN(len_cq); 2822 uint32_t flags = M_EXT; 2823 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags)); 2824 caddr_t cl; 2825 struct mbuf *m; 2826 int ret = 0; 2827 2828 mask = fl->size - 1; 2829 prefetch(fl->sdesc[(cidx + 1) & mask].m); 2830 prefetch(fl->sdesc[(cidx + 2) & mask].m); 2831 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); 2832 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 2833 2834 fl->credits--; 2835 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2836 2837 if (recycle_enable && len <= SGE_RX_COPY_THRES && 2838 sopeop == RSPQ_SOP_EOP) { 2839 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2840 goto skip_recycle; 2841 cl = mtod(m, void *); 2842 memcpy(cl, sd->rxsd_cl, len); 2843 recycle_rx_buf(adap, fl, fl->cidx); 2844 m->m_pkthdr.len = m->m_len = len; 2845 m->m_flags = 0; 2846 mh->mh_head = mh->mh_tail = m; 2847 ret = 1; 2848 goto done; 2849 } else { 2850 skip_recycle: 2851 bus_dmamap_unload(fl->entry_tag, sd->map); 2852 cl = sd->rxsd_cl; 2853 m = sd->m; 2854 2855 if ((sopeop == RSPQ_SOP_EOP) || 2856 (sopeop == RSPQ_SOP)) 2857 flags |= M_PKTHDR; 2858 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags); 2859 if (fl->zone == zone_pack) { 2860 /* 2861 * restore clobbered data pointer 2862 */ 2863 m->m_data = m->m_ext.ext_buf; 2864 } else { 2865 m_cljset(m, cl, fl->type); 2866 } 2867 m->m_len = len; 2868 } 2869 switch(sopeop) { 2870 case RSPQ_SOP_EOP: 2871 ret = 1; 2872 /* FALLTHROUGH */ 2873 case RSPQ_SOP: 2874 mh->mh_head = mh->mh_tail = m; 2875 m->m_pkthdr.len = len; 2876 break; 2877 case RSPQ_EOP: 2878 ret = 1; 2879 /* FALLTHROUGH */ 2880 case RSPQ_NSOP_NEOP: 2881 if (mh->mh_tail == NULL) { 2882 log(LOG_ERR, "discarding intermediate descriptor entry\n"); 2883 m_freem(m); 2884 break; 2885 } 2886 mh->mh_tail->m_next = m; 2887 mh->mh_tail = m; 2888 mh->mh_head->m_pkthdr.len += len; 2889 break; 2890 } 2891 if (cxgb_debug) 2892 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len); 2893 done: 2894 if (++fl->cidx == fl->size) 2895 fl->cidx = 0; 2896 2897 return (ret); 2898 } 2899 2900 /** 2901 * handle_rsp_cntrl_info - handles control information in a response 2902 * @qs: the queue set corresponding to the response 2903 * @flags: the response control flags 2904 * 2905 * Handles the control information of an SGE response, such as GTS 2906 * indications and completion credits for the queue set's Tx queues. 2907 * HW coalesces credits, we don't do any extra SW coalescing. 2908 */ 2909 static __inline void 2910 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2911 { 2912 unsigned int credits; 2913 2914 #if USE_GTS 2915 if (flags & F_RSPD_TXQ0_GTS) 2916 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2917 #endif 2918 credits = G_RSPD_TXQ0_CR(flags); 2919 if (credits) 2920 qs->txq[TXQ_ETH].processed += credits; 2921 2922 credits = G_RSPD_TXQ2_CR(flags); 2923 if (credits) 2924 qs->txq[TXQ_CTRL].processed += credits; 2925 2926 # if USE_GTS 2927 if (flags & F_RSPD_TXQ1_GTS) 2928 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2929 # endif 2930 credits = G_RSPD_TXQ1_CR(flags); 2931 if (credits) 2932 qs->txq[TXQ_OFLD].processed += credits; 2933 2934 } 2935 2936 static void 2937 check_ring_db(adapter_t *adap, struct sge_qset *qs, 2938 unsigned int sleeping) 2939 { 2940 ; 2941 } 2942 2943 /** 2944 * process_responses - process responses from an SGE response queue 2945 * @adap: the adapter 2946 * @qs: the queue set to which the response queue belongs 2947 * @budget: how many responses can be processed in this round 2948 * 2949 * Process responses from an SGE response queue up to the supplied budget. 2950 * Responses include received packets as well as credits and other events 2951 * for the queues that belong to the response queue's queue set. 2952 * A negative budget is effectively unlimited. 2953 * 2954 * Additionally choose the interrupt holdoff time for the next interrupt 2955 * on this queue. If the system is under memory shortage use a fairly 2956 * long delay to help recovery. 2957 */ 2958 static int 2959 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2960 { 2961 struct sge_rspq *rspq = &qs->rspq; 2962 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2963 int budget_left = budget; 2964 unsigned int sleeping = 0; 2965 #if defined(INET6) || defined(INET) 2966 int lro_enabled = qs->lro.enabled; 2967 int skip_lro; 2968 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; 2969 #endif 2970 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2971 int ngathered = 0; 2972 struct t3_mbuf_hdr *mh = &rspq->rspq_mh; 2973 #ifdef DEBUG 2974 static int last_holdoff = 0; 2975 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2976 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2977 last_holdoff = rspq->holdoff_tmr; 2978 } 2979 #endif 2980 rspq->next_holdoff = rspq->holdoff_tmr; 2981 2982 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2983 int eth, eop = 0, ethpad = 0; 2984 uint32_t flags = ntohl(r->flags); 2985 uint32_t rss_csum = *(const uint32_t *)r; 2986 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); 2987 2988 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2989 2990 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2991 struct mbuf *m; 2992 2993 if (cxgb_debug) 2994 printf("async notification\n"); 2995 2996 if (mh->mh_head == NULL) { 2997 mh->mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 2998 m = mh->mh_head; 2999 } else { 3000 m = m_gethdr(M_DONTWAIT, MT_DATA); 3001 } 3002 if (m == NULL) 3003 goto no_mem; 3004 3005 memcpy(mtod(m, char *), r, AN_PKT_SIZE); 3006 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; 3007 *mtod(m, char *) = CPL_ASYNC_NOTIF; 3008 rss_csum = htonl(CPL_ASYNC_NOTIF << 24); 3009 eop = 1; 3010 rspq->async_notif++; 3011 goto skip; 3012 } else if (flags & F_RSPD_IMM_DATA_VALID) { 3013 struct mbuf *m = NULL; 3014 3015 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", 3016 r->rss_hdr.opcode, rspq->cidx); 3017 if (mh->mh_head == NULL) 3018 mh->mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 3019 else 3020 m = m_gethdr(M_DONTWAIT, MT_DATA); 3021 3022 if (mh->mh_head == NULL && m == NULL) { 3023 no_mem: 3024 rspq->next_holdoff = NOMEM_INTR_DELAY; 3025 budget_left--; 3026 break; 3027 } 3028 get_imm_packet(adap, r, mh->mh_head); 3029 eop = 1; 3030 rspq->imm_data++; 3031 } else if (r->len_cq) { 3032 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 3033 3034 eop = get_packet(adap, drop_thresh, qs, mh, r); 3035 if (eop) { 3036 if (r->rss_hdr.hash_type && !adap->timestamp) 3037 mh->mh_head->m_flags |= M_FLOWID; 3038 mh->mh_head->m_pkthdr.flowid = rss_hash; 3039 } 3040 3041 ethpad = 2; 3042 } else { 3043 rspq->pure_rsps++; 3044 } 3045 skip: 3046 if (flags & RSPD_CTRL_MASK) { 3047 sleeping |= flags & RSPD_GTS_MASK; 3048 handle_rsp_cntrl_info(qs, flags); 3049 } 3050 3051 r++; 3052 if (__predict_false(++rspq->cidx == rspq->size)) { 3053 rspq->cidx = 0; 3054 rspq->gen ^= 1; 3055 r = rspq->desc; 3056 } 3057 3058 if (++rspq->credits >= 64) { 3059 refill_rspq(adap, rspq, rspq->credits); 3060 rspq->credits = 0; 3061 } 3062 if (!eth && eop) { 3063 mh->mh_head->m_pkthdr.csum_data = rss_csum; 3064 /* 3065 * XXX size mismatch 3066 */ 3067 m_set_priority(mh->mh_head, rss_hash); 3068 3069 3070 ngathered = rx_offload(&adap->tdev, rspq, 3071 mh->mh_head, offload_mbufs, ngathered); 3072 mh->mh_head = NULL; 3073 DPRINTF("received offload packet\n"); 3074 3075 } else if (eth && eop) { 3076 struct mbuf *m = mh->mh_head; 3077 3078 t3_rx_eth(adap, rspq, m, ethpad); 3079 3080 /* 3081 * The T304 sends incoming packets on any qset. If LRO 3082 * is also enabled, we could end up sending packet up 3083 * lro_ctrl->ifp's input. That is incorrect. 3084 * 3085 * The mbuf's rcvif was derived from the cpl header and 3086 * is accurate. Skip LRO and just use that. 3087 */ 3088 #if defined(INET6) || defined(INET) 3089 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); 3090 3091 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro 3092 && (tcp_lro_rx(lro_ctrl, m, 0) == 0) 3093 ) { 3094 /* successfully queue'd for LRO */ 3095 } else 3096 #endif 3097 { 3098 /* 3099 * LRO not enabled, packet unsuitable for LRO, 3100 * or unable to queue. Pass it up right now in 3101 * either case. 3102 */ 3103 struct ifnet *ifp = m->m_pkthdr.rcvif; 3104 (*ifp->if_input)(ifp, m); 3105 } 3106 mh->mh_head = NULL; 3107 3108 } 3109 __refill_fl_lt(adap, &qs->fl[0], 32); 3110 __refill_fl_lt(adap, &qs->fl[1], 32); 3111 --budget_left; 3112 } 3113 3114 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 3115 3116 #if defined(INET6) || defined(INET) 3117 /* Flush LRO */ 3118 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) { 3119 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active); 3120 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next); 3121 tcp_lro_flush(lro_ctrl, queued); 3122 } 3123 #endif 3124 3125 if (sleeping) 3126 check_ring_db(adap, qs, sleeping); 3127 3128 mb(); /* commit Tx queue processed updates */ 3129 if (__predict_false(qs->txq_stopped > 1)) 3130 restart_tx(qs); 3131 3132 __refill_fl_lt(adap, &qs->fl[0], 512); 3133 __refill_fl_lt(adap, &qs->fl[1], 512); 3134 budget -= budget_left; 3135 return (budget); 3136 } 3137 3138 /* 3139 * A helper function that processes responses and issues GTS. 3140 */ 3141 static __inline int 3142 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 3143 { 3144 int work; 3145 static int last_holdoff = 0; 3146 3147 work = process_responses(adap, rspq_to_qset(rq), -1); 3148 3149 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 3150 printf("next_holdoff=%d\n", rq->next_holdoff); 3151 last_holdoff = rq->next_holdoff; 3152 } 3153 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 3154 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 3155 3156 return (work); 3157 } 3158 3159 3160 /* 3161 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 3162 * Handles data events from SGE response queues as well as error and other 3163 * async events as they all use the same interrupt pin. We use one SGE 3164 * response queue per port in this mode and protect all response queues with 3165 * queue 0's lock. 3166 */ 3167 void 3168 t3b_intr(void *data) 3169 { 3170 uint32_t i, map; 3171 adapter_t *adap = data; 3172 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3173 3174 t3_write_reg(adap, A_PL_CLI, 0); 3175 map = t3_read_reg(adap, A_SG_DATA_INTR); 3176 3177 if (!map) 3178 return; 3179 3180 if (__predict_false(map & F_ERRINTR)) { 3181 t3_write_reg(adap, A_PL_INT_ENABLE0, 0); 3182 (void) t3_read_reg(adap, A_PL_INT_ENABLE0); 3183 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3184 } 3185 3186 mtx_lock(&q0->lock); 3187 for_each_port(adap, i) 3188 if (map & (1 << i)) 3189 process_responses_gts(adap, &adap->sge.qs[i].rspq); 3190 mtx_unlock(&q0->lock); 3191 } 3192 3193 /* 3194 * The MSI interrupt handler. This needs to handle data events from SGE 3195 * response queues as well as error and other async events as they all use 3196 * the same MSI vector. We use one SGE response queue per port in this mode 3197 * and protect all response queues with queue 0's lock. 3198 */ 3199 void 3200 t3_intr_msi(void *data) 3201 { 3202 adapter_t *adap = data; 3203 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3204 int i, new_packets = 0; 3205 3206 mtx_lock(&q0->lock); 3207 3208 for_each_port(adap, i) 3209 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 3210 new_packets = 1; 3211 mtx_unlock(&q0->lock); 3212 if (new_packets == 0) { 3213 t3_write_reg(adap, A_PL_INT_ENABLE0, 0); 3214 (void) t3_read_reg(adap, A_PL_INT_ENABLE0); 3215 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3216 } 3217 } 3218 3219 void 3220 t3_intr_msix(void *data) 3221 { 3222 struct sge_qset *qs = data; 3223 adapter_t *adap = qs->port->adapter; 3224 struct sge_rspq *rspq = &qs->rspq; 3225 3226 if (process_responses_gts(adap, rspq) == 0) 3227 rspq->unhandled_irqs++; 3228 } 3229 3230 #define QDUMP_SBUF_SIZE 32 * 400 3231 static int 3232 t3_dump_rspq(SYSCTL_HANDLER_ARGS) 3233 { 3234 struct sge_rspq *rspq; 3235 struct sge_qset *qs; 3236 int i, err, dump_end, idx; 3237 struct sbuf *sb; 3238 struct rsp_desc *rspd; 3239 uint32_t data[4]; 3240 3241 rspq = arg1; 3242 qs = rspq_to_qset(rspq); 3243 if (rspq->rspq_dump_count == 0) 3244 return (0); 3245 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { 3246 log(LOG_WARNING, 3247 "dump count is too large %d\n", rspq->rspq_dump_count); 3248 rspq->rspq_dump_count = 0; 3249 return (EINVAL); 3250 } 3251 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { 3252 log(LOG_WARNING, 3253 "dump start of %d is greater than queue size\n", 3254 rspq->rspq_dump_start); 3255 rspq->rspq_dump_start = 0; 3256 return (EINVAL); 3257 } 3258 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); 3259 if (err) 3260 return (err); 3261 err = sysctl_wire_old_buffer(req, 0); 3262 if (err) 3263 return (err); 3264 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3265 3266 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", 3267 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), 3268 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); 3269 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", 3270 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); 3271 3272 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, 3273 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); 3274 3275 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; 3276 for (i = rspq->rspq_dump_start; i < dump_end; i++) { 3277 idx = i & (RSPQ_Q_SIZE-1); 3278 3279 rspd = &rspq->desc[idx]; 3280 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", 3281 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, 3282 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); 3283 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", 3284 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), 3285 be32toh(rspd->len_cq), rspd->intr_gen); 3286 } 3287 3288 err = sbuf_finish(sb); 3289 /* Output a trailing NUL. */ 3290 if (err == 0) 3291 err = SYSCTL_OUT(req, "", 1); 3292 sbuf_delete(sb); 3293 return (err); 3294 } 3295 3296 static int 3297 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) 3298 { 3299 struct sge_txq *txq; 3300 struct sge_qset *qs; 3301 int i, j, err, dump_end; 3302 struct sbuf *sb; 3303 struct tx_desc *txd; 3304 uint32_t *WR, wr_hi, wr_lo, gen; 3305 uint32_t data[4]; 3306 3307 txq = arg1; 3308 qs = txq_to_qset(txq, TXQ_ETH); 3309 if (txq->txq_dump_count == 0) { 3310 return (0); 3311 } 3312 if (txq->txq_dump_count > TX_ETH_Q_SIZE) { 3313 log(LOG_WARNING, 3314 "dump count is too large %d\n", txq->txq_dump_count); 3315 txq->txq_dump_count = 1; 3316 return (EINVAL); 3317 } 3318 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { 3319 log(LOG_WARNING, 3320 "dump start of %d is greater than queue size\n", 3321 txq->txq_dump_start); 3322 txq->txq_dump_start = 0; 3323 return (EINVAL); 3324 } 3325 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); 3326 if (err) 3327 return (err); 3328 err = sysctl_wire_old_buffer(req, 0); 3329 if (err) 3330 return (err); 3331 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3332 3333 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", 3334 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 3335 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); 3336 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", 3337 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), 3338 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); 3339 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3340 txq->txq_dump_start, 3341 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); 3342 3343 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3344 for (i = txq->txq_dump_start; i < dump_end; i++) { 3345 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; 3346 WR = (uint32_t *)txd->flit; 3347 wr_hi = ntohl(WR[0]); 3348 wr_lo = ntohl(WR[1]); 3349 gen = G_WR_GEN(wr_lo); 3350 3351 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3352 wr_hi, wr_lo, gen); 3353 for (j = 2; j < 30; j += 4) 3354 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3355 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3356 3357 } 3358 err = sbuf_finish(sb); 3359 /* Output a trailing NUL. */ 3360 if (err == 0) 3361 err = SYSCTL_OUT(req, "", 1); 3362 sbuf_delete(sb); 3363 return (err); 3364 } 3365 3366 static int 3367 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) 3368 { 3369 struct sge_txq *txq; 3370 struct sge_qset *qs; 3371 int i, j, err, dump_end; 3372 struct sbuf *sb; 3373 struct tx_desc *txd; 3374 uint32_t *WR, wr_hi, wr_lo, gen; 3375 3376 txq = arg1; 3377 qs = txq_to_qset(txq, TXQ_CTRL); 3378 if (txq->txq_dump_count == 0) { 3379 return (0); 3380 } 3381 if (txq->txq_dump_count > 256) { 3382 log(LOG_WARNING, 3383 "dump count is too large %d\n", txq->txq_dump_count); 3384 txq->txq_dump_count = 1; 3385 return (EINVAL); 3386 } 3387 if (txq->txq_dump_start > 255) { 3388 log(LOG_WARNING, 3389 "dump start of %d is greater than queue size\n", 3390 txq->txq_dump_start); 3391 txq->txq_dump_start = 0; 3392 return (EINVAL); 3393 } 3394 3395 err = sysctl_wire_old_buffer(req, 0); 3396 if (err != 0) 3397 return (err); 3398 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3399 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3400 txq->txq_dump_start, 3401 (txq->txq_dump_start + txq->txq_dump_count) & 255); 3402 3403 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3404 for (i = txq->txq_dump_start; i < dump_end; i++) { 3405 txd = &txq->desc[i & (255)]; 3406 WR = (uint32_t *)txd->flit; 3407 wr_hi = ntohl(WR[0]); 3408 wr_lo = ntohl(WR[1]); 3409 gen = G_WR_GEN(wr_lo); 3410 3411 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3412 wr_hi, wr_lo, gen); 3413 for (j = 2; j < 30; j += 4) 3414 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3415 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3416 3417 } 3418 err = sbuf_finish(sb); 3419 /* Output a trailing NUL. */ 3420 if (err == 0) 3421 err = SYSCTL_OUT(req, "", 1); 3422 sbuf_delete(sb); 3423 return (err); 3424 } 3425 3426 static int 3427 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) 3428 { 3429 adapter_t *sc = arg1; 3430 struct qset_params *qsp = &sc->params.sge.qset[0]; 3431 int coalesce_usecs; 3432 struct sge_qset *qs; 3433 int i, j, err, nqsets = 0; 3434 struct mtx *lock; 3435 3436 if ((sc->flags & FULL_INIT_DONE) == 0) 3437 return (ENXIO); 3438 3439 coalesce_usecs = qsp->coalesce_usecs; 3440 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); 3441 3442 if (err != 0) { 3443 return (err); 3444 } 3445 if (coalesce_usecs == qsp->coalesce_usecs) 3446 return (0); 3447 3448 for (i = 0; i < sc->params.nports; i++) 3449 for (j = 0; j < sc->port[i].nqsets; j++) 3450 nqsets++; 3451 3452 coalesce_usecs = max(1, coalesce_usecs); 3453 3454 for (i = 0; i < nqsets; i++) { 3455 qs = &sc->sge.qs[i]; 3456 qsp = &sc->params.sge.qset[i]; 3457 qsp->coalesce_usecs = coalesce_usecs; 3458 3459 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 3460 &sc->sge.qs[0].rspq.lock; 3461 3462 mtx_lock(lock); 3463 t3_update_qset_coalesce(qs, qsp); 3464 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 3465 V_NEWTIMER(qs->rspq.holdoff_tmr)); 3466 mtx_unlock(lock); 3467 } 3468 3469 return (0); 3470 } 3471 3472 static int 3473 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS) 3474 { 3475 adapter_t *sc = arg1; 3476 int rc, timestamp; 3477 3478 if ((sc->flags & FULL_INIT_DONE) == 0) 3479 return (ENXIO); 3480 3481 timestamp = sc->timestamp; 3482 rc = sysctl_handle_int(oidp, ×tamp, arg2, req); 3483 3484 if (rc != 0) 3485 return (rc); 3486 3487 if (timestamp != sc->timestamp) { 3488 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS, 3489 timestamp ? F_ENABLERXPKTTMSTPRSS : 0); 3490 sc->timestamp = timestamp; 3491 } 3492 3493 return (0); 3494 } 3495 3496 void 3497 t3_add_attach_sysctls(adapter_t *sc) 3498 { 3499 struct sysctl_ctx_list *ctx; 3500 struct sysctl_oid_list *children; 3501 3502 ctx = device_get_sysctl_ctx(sc->dev); 3503 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3504 3505 /* random information */ 3506 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3507 "firmware_version", 3508 CTLFLAG_RD, &sc->fw_version, 3509 0, "firmware version"); 3510 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, 3511 "hw_revision", 3512 CTLFLAG_RD, &sc->params.rev, 3513 0, "chip model"); 3514 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3515 "port_types", 3516 CTLFLAG_RD, &sc->port_types, 3517 0, "type of ports"); 3518 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3519 "enable_debug", 3520 CTLFLAG_RW, &cxgb_debug, 3521 0, "enable verbose debugging output"); 3522 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce", 3523 CTLFLAG_RD, &sc->tunq_coalesce, 3524 "#tunneled packets freed"); 3525 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3526 "txq_overrun", 3527 CTLFLAG_RD, &txq_fills, 3528 0, "#times txq overrun"); 3529 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, 3530 "core_clock", 3531 CTLFLAG_RD, &sc->params.vpd.cclk, 3532 0, "core clock frequency (in KHz)"); 3533 } 3534 3535 3536 static const char *rspq_name = "rspq"; 3537 static const char *txq_names[] = 3538 { 3539 "txq_eth", 3540 "txq_ofld", 3541 "txq_ctrl" 3542 }; 3543 3544 static int 3545 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) 3546 { 3547 struct port_info *p = arg1; 3548 uint64_t *parg; 3549 3550 if (!p) 3551 return (EINVAL); 3552 3553 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); 3554 PORT_LOCK(p); 3555 t3_mac_update_stats(&p->mac); 3556 PORT_UNLOCK(p); 3557 3558 return (sysctl_handle_64(oidp, parg, 0, req)); 3559 } 3560 3561 void 3562 t3_add_configured_sysctls(adapter_t *sc) 3563 { 3564 struct sysctl_ctx_list *ctx; 3565 struct sysctl_oid_list *children; 3566 int i, j; 3567 3568 ctx = device_get_sysctl_ctx(sc->dev); 3569 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3570 3571 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3572 "intr_coal", 3573 CTLTYPE_INT|CTLFLAG_RW, sc, 3574 0, t3_set_coalesce_usecs, 3575 "I", "interrupt coalescing timer (us)"); 3576 3577 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3578 "pkt_timestamp", 3579 CTLTYPE_INT | CTLFLAG_RW, sc, 3580 0, t3_pkt_timestamp, 3581 "I", "provide packet timestamp instead of connection hash"); 3582 3583 for (i = 0; i < sc->params.nports; i++) { 3584 struct port_info *pi = &sc->port[i]; 3585 struct sysctl_oid *poid; 3586 struct sysctl_oid_list *poidlist; 3587 struct mac_stats *mstats = &pi->mac.stats; 3588 3589 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); 3590 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 3591 pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); 3592 poidlist = SYSCTL_CHILDREN(poid); 3593 SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO, 3594 "nqsets", CTLFLAG_RD, &pi->nqsets, 3595 0, "#queue sets"); 3596 3597 for (j = 0; j < pi->nqsets; j++) { 3598 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; 3599 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, 3600 *ctrlqpoid, *lropoid; 3601 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, 3602 *txqpoidlist, *ctrlqpoidlist, 3603 *lropoidlist; 3604 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 3605 3606 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); 3607 3608 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 3609 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); 3610 qspoidlist = SYSCTL_CHILDREN(qspoid); 3611 3612 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty", 3613 CTLFLAG_RD, &qs->fl[0].empty, 0, 3614 "freelist #0 empty"); 3615 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty", 3616 CTLFLAG_RD, &qs->fl[1].empty, 0, 3617 "freelist #1 empty"); 3618 3619 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3620 rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); 3621 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); 3622 3623 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3624 txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); 3625 txqpoidlist = SYSCTL_CHILDREN(txqpoid); 3626 3627 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3628 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics"); 3629 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); 3630 3631 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3632 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics"); 3633 lropoidlist = SYSCTL_CHILDREN(lropoid); 3634 3635 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", 3636 CTLFLAG_RD, &qs->rspq.size, 3637 0, "#entries in response queue"); 3638 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", 3639 CTLFLAG_RD, &qs->rspq.cidx, 3640 0, "consumer index"); 3641 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", 3642 CTLFLAG_RD, &qs->rspq.credits, 3643 0, "#credits"); 3644 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved", 3645 CTLFLAG_RD, &qs->rspq.starved, 3646 0, "#times starved"); 3647 SYSCTL_ADD_ULONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr", 3648 CTLFLAG_RD, &qs->rspq.phys_addr, 3649 "physical_address_of the queue"); 3650 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", 3651 CTLFLAG_RW, &qs->rspq.rspq_dump_start, 3652 0, "start rspq dump entry"); 3653 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", 3654 CTLFLAG_RW, &qs->rspq.rspq_dump_count, 3655 0, "#rspq entries to dump"); 3656 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", 3657 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 3658 0, t3_dump_rspq, "A", "dump of the response queue"); 3659 3660 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped", 3661 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops, 3662 "#tunneled packets dropped"); 3663 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen", 3664 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen, 3665 0, "#tunneled packets waiting to be sent"); 3666 #if 0 3667 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", 3668 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 3669 0, "#tunneled packets queue producer index"); 3670 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", 3671 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 3672 0, "#tunneled packets queue consumer index"); 3673 #endif 3674 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed", 3675 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 3676 0, "#tunneled packets processed by the card"); 3677 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", 3678 CTLFLAG_RD, &txq->cleaned, 3679 0, "#tunneled packets cleaned"); 3680 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", 3681 CTLFLAG_RD, &txq->in_use, 3682 0, "#tunneled packet slots in use"); 3683 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees", 3684 CTLFLAG_RD, &txq->txq_frees, 3685 "#tunneled packets freed"); 3686 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", 3687 CTLFLAG_RD, &txq->txq_skipped, 3688 0, "#tunneled packet descriptors skipped"); 3689 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced", 3690 CTLFLAG_RD, &txq->txq_coalesced, 3691 "#tunneled packets coalesced"); 3692 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", 3693 CTLFLAG_RD, &txq->txq_enqueued, 3694 0, "#tunneled packets enqueued to hardware"); 3695 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", 3696 CTLFLAG_RD, &qs->txq_stopped, 3697 0, "tx queues stopped"); 3698 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "phys_addr", 3699 CTLFLAG_RD, &txq->phys_addr, 3700 "physical_address_of the queue"); 3701 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", 3702 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 3703 0, "txq generation"); 3704 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", 3705 CTLFLAG_RD, &txq->cidx, 3706 0, "hardware queue cidx"); 3707 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", 3708 CTLFLAG_RD, &txq->pidx, 3709 0, "hardware queue pidx"); 3710 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", 3711 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 3712 0, "txq start idx for dump"); 3713 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", 3714 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 3715 0, "txq #entries to dump"); 3716 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", 3717 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 3718 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); 3719 3720 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", 3721 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 3722 0, "ctrlq start idx for dump"); 3723 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", 3724 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 3725 0, "ctrl #entries to dump"); 3726 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", 3727 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], 3728 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); 3729 3730 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued", 3731 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); 3732 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed", 3733 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); 3734 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", 3735 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); 3736 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", 3737 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); 3738 } 3739 3740 /* Now add a node for mac stats. */ 3741 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", 3742 CTLFLAG_RD, NULL, "MAC statistics"); 3743 poidlist = SYSCTL_CHILDREN(poid); 3744 3745 /* 3746 * We (ab)use the length argument (arg2) to pass on the offset 3747 * of the data that we are interested in. This is only required 3748 * for the quad counters that are updated from the hardware (we 3749 * make sure that we return the latest value). 3750 * sysctl_handle_macstat first updates *all* the counters from 3751 * the hardware, and then returns the latest value of the 3752 * requested counter. Best would be to update only the 3753 * requested counter from hardware, but t3_mac_update_stats() 3754 * hides all the register details and we don't want to dive into 3755 * all that here. 3756 */ 3757 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ 3758 (CTLTYPE_U64 | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \ 3759 sysctl_handle_macstat, "QU", 0) 3760 CXGB_SYSCTL_ADD_QUAD(tx_octets); 3761 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); 3762 CXGB_SYSCTL_ADD_QUAD(tx_frames); 3763 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); 3764 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); 3765 CXGB_SYSCTL_ADD_QUAD(tx_pause); 3766 CXGB_SYSCTL_ADD_QUAD(tx_deferred); 3767 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); 3768 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); 3769 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); 3770 CXGB_SYSCTL_ADD_QUAD(tx_underrun); 3771 CXGB_SYSCTL_ADD_QUAD(tx_len_errs); 3772 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); 3773 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); 3774 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); 3775 CXGB_SYSCTL_ADD_QUAD(tx_frames_64); 3776 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); 3777 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); 3778 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); 3779 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); 3780 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); 3781 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); 3782 CXGB_SYSCTL_ADD_QUAD(rx_octets); 3783 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); 3784 CXGB_SYSCTL_ADD_QUAD(rx_frames); 3785 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); 3786 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); 3787 CXGB_SYSCTL_ADD_QUAD(rx_pause); 3788 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs); 3789 CXGB_SYSCTL_ADD_QUAD(rx_align_errs); 3790 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); 3791 CXGB_SYSCTL_ADD_QUAD(rx_data_errs); 3792 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); 3793 CXGB_SYSCTL_ADD_QUAD(rx_runt); 3794 CXGB_SYSCTL_ADD_QUAD(rx_jabber); 3795 CXGB_SYSCTL_ADD_QUAD(rx_short); 3796 CXGB_SYSCTL_ADD_QUAD(rx_too_long); 3797 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); 3798 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); 3799 CXGB_SYSCTL_ADD_QUAD(rx_frames_64); 3800 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); 3801 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); 3802 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); 3803 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); 3804 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); 3805 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); 3806 #undef CXGB_SYSCTL_ADD_QUAD 3807 3808 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ 3809 CTLFLAG_RD, &mstats->a, 0) 3810 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); 3811 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); 3812 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); 3813 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); 3814 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); 3815 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); 3816 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); 3817 CXGB_SYSCTL_ADD_ULONG(num_toggled); 3818 CXGB_SYSCTL_ADD_ULONG(num_resets); 3819 CXGB_SYSCTL_ADD_ULONG(link_faults); 3820 #undef CXGB_SYSCTL_ADD_ULONG 3821 } 3822 } 3823 3824 /** 3825 * t3_get_desc - dump an SGE descriptor for debugging purposes 3826 * @qs: the queue set 3827 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 3828 * @idx: the descriptor index in the queue 3829 * @data: where to dump the descriptor contents 3830 * 3831 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 3832 * size of the descriptor. 3833 */ 3834 int 3835 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 3836 unsigned char *data) 3837 { 3838 if (qnum >= 6) 3839 return (EINVAL); 3840 3841 if (qnum < 3) { 3842 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 3843 return -EINVAL; 3844 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 3845 return sizeof(struct tx_desc); 3846 } 3847 3848 if (qnum == 3) { 3849 if (!qs->rspq.desc || idx >= qs->rspq.size) 3850 return (EINVAL); 3851 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 3852 return sizeof(struct rsp_desc); 3853 } 3854 3855 qnum -= 4; 3856 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 3857 return (EINVAL); 3858 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 3859 return sizeof(struct rx_desc); 3860 } 3861