1 /************************************************************************** 2 3 Copyright (c) 2007-2009, Chelsio Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_inet.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/module.h> 39 #include <sys/bus.h> 40 #include <sys/conf.h> 41 #include <machine/bus.h> 42 #include <machine/resource.h> 43 #include <sys/bus_dma.h> 44 #include <sys/rman.h> 45 #include <sys/queue.h> 46 #include <sys/sysctl.h> 47 #include <sys/taskqueue.h> 48 49 #include <sys/proc.h> 50 #include <sys/sbuf.h> 51 #include <sys/sched.h> 52 #include <sys/smp.h> 53 #include <sys/systm.h> 54 #include <sys/syslog.h> 55 #include <sys/socket.h> 56 57 #include <net/bpf.h> 58 #include <net/ethernet.h> 59 #include <net/if.h> 60 #include <net/if_vlan_var.h> 61 62 #include <netinet/in_systm.h> 63 #include <netinet/in.h> 64 #include <netinet/ip.h> 65 #include <netinet/ip6.h> 66 #include <netinet/tcp.h> 67 68 #include <dev/pci/pcireg.h> 69 #include <dev/pci/pcivar.h> 70 71 #include <vm/vm.h> 72 #include <vm/pmap.h> 73 74 #include <cxgb_include.h> 75 #include <sys/mvec.h> 76 77 int txq_fills = 0; 78 int multiq_tx_enable = 1; 79 80 extern struct sysctl_oid_list sysctl__hw_cxgb_children; 81 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; 82 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size); 83 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, 84 "size of per-queue mbuf ring"); 85 86 static int cxgb_tx_coalesce_force = 0; 87 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force); 88 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW, 89 &cxgb_tx_coalesce_force, 0, 90 "coalesce small packets into a single work request regardless of ring state"); 91 92 #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1 93 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3)) 94 #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2 95 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5 96 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5 97 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2 98 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6 99 100 101 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT; 102 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start", 103 &cxgb_tx_coalesce_enable_start); 104 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW, 105 &cxgb_tx_coalesce_enable_start, 0, 106 "coalesce enable threshold"); 107 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT; 108 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop); 109 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW, 110 &cxgb_tx_coalesce_enable_stop, 0, 111 "coalesce disable threshold"); 112 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 113 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold); 114 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW, 115 &cxgb_tx_reclaim_threshold, 0, 116 "tx cleaning minimum threshold"); 117 118 /* 119 * XXX don't re-enable this until TOE stops assuming 120 * we have an m_ext 121 */ 122 static int recycle_enable = 0; 123 124 extern int cxgb_use_16k_clusters; 125 extern int nmbjumbop; 126 extern int nmbjumbo9; 127 extern int nmbjumbo16; 128 129 #define USE_GTS 0 130 131 #define SGE_RX_SM_BUF_SIZE 1536 132 #define SGE_RX_DROP_THRES 16 133 #define SGE_RX_COPY_THRES 128 134 135 /* 136 * Period of the Tx buffer reclaim timer. This timer does not need to run 137 * frequently as Tx buffers are usually reclaimed by new Tx packets. 138 */ 139 #define TX_RECLAIM_PERIOD (hz >> 1) 140 141 /* 142 * Values for sge_txq.flags 143 */ 144 enum { 145 TXQ_RUNNING = 1 << 0, /* fetch engine is running */ 146 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ 147 }; 148 149 struct tx_desc { 150 uint64_t flit[TX_DESC_FLITS]; 151 } __packed; 152 153 struct rx_desc { 154 uint32_t addr_lo; 155 uint32_t len_gen; 156 uint32_t gen2; 157 uint32_t addr_hi; 158 } __packed; 159 160 struct rsp_desc { /* response queue descriptor */ 161 struct rss_header rss_hdr; 162 uint32_t flags; 163 uint32_t len_cq; 164 uint8_t imm_data[47]; 165 uint8_t intr_gen; 166 } __packed; 167 168 #define RX_SW_DESC_MAP_CREATED (1 << 0) 169 #define TX_SW_DESC_MAP_CREATED (1 << 1) 170 #define RX_SW_DESC_INUSE (1 << 3) 171 #define TX_SW_DESC_MAPPED (1 << 4) 172 173 #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) 174 #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) 175 #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) 176 #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) 177 178 struct tx_sw_desc { /* SW state per Tx descriptor */ 179 struct mbuf *m; 180 bus_dmamap_t map; 181 int flags; 182 }; 183 184 struct rx_sw_desc { /* SW state per Rx descriptor */ 185 caddr_t rxsd_cl; 186 struct mbuf *m; 187 bus_dmamap_t map; 188 int flags; 189 }; 190 191 struct txq_state { 192 unsigned int compl; 193 unsigned int gen; 194 unsigned int pidx; 195 }; 196 197 struct refill_fl_cb_arg { 198 int error; 199 bus_dma_segment_t seg; 200 int nseg; 201 }; 202 203 204 /* 205 * Maps a number of flits to the number of Tx descriptors that can hold them. 206 * The formula is 207 * 208 * desc = 1 + (flits - 2) / (WR_FLITS - 1). 209 * 210 * HW allows up to 4 descriptors to be combined into a WR. 211 */ 212 static uint8_t flit_desc_map[] = { 213 0, 214 #if SGE_NUM_GENBITS == 1 215 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 216 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 217 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 218 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 219 #elif SGE_NUM_GENBITS == 2 220 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 221 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 222 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 223 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 224 #else 225 # error "SGE_NUM_GENBITS must be 1 or 2" 226 #endif 227 }; 228 229 #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED) 230 #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock) 231 #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock) 232 #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock) 233 #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 234 #define TXQ_RING_NEEDS_ENQUEUE(qs) \ 235 drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 236 #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 237 #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \ 238 drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg) 239 #define TXQ_RING_DEQUEUE(qs) \ 240 drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) 241 242 int cxgb_debug = 0; 243 244 static void sge_timer_cb(void *arg); 245 static void sge_timer_reclaim(void *arg, int ncount); 246 static void sge_txq_reclaim_handler(void *arg, int ncount); 247 static void cxgb_start_locked(struct sge_qset *qs); 248 249 /* 250 * XXX need to cope with bursty scheduling by looking at a wider 251 * window than we are now for determining the need for coalescing 252 * 253 */ 254 static __inline uint64_t 255 check_pkt_coalesce(struct sge_qset *qs) 256 { 257 struct adapter *sc; 258 struct sge_txq *txq; 259 uint8_t *fill; 260 261 if (__predict_false(cxgb_tx_coalesce_force)) 262 return (1); 263 txq = &qs->txq[TXQ_ETH]; 264 sc = qs->port->adapter; 265 fill = &sc->tunq_fill[qs->idx]; 266 267 if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX) 268 cxgb_tx_coalesce_enable_start = COALESCE_START_MAX; 269 if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN) 270 cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN; 271 /* 272 * if the hardware transmit queue is more than 1/8 full 273 * we mark it as coalescing - we drop back from coalescing 274 * when we go below 1/32 full and there are no packets enqueued, 275 * this provides us with some degree of hysteresis 276 */ 277 if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 278 TXQ_RING_EMPTY(qs) && (qs->coalescing == 0)) 279 *fill = 0; 280 else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) 281 *fill = 1; 282 283 return (sc->tunq_coalesce); 284 } 285 286 #ifdef __LP64__ 287 static void 288 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 289 { 290 uint64_t wr_hilo; 291 #if _BYTE_ORDER == _LITTLE_ENDIAN 292 wr_hilo = wr_hi; 293 wr_hilo |= (((uint64_t)wr_lo)<<32); 294 #else 295 wr_hilo = wr_lo; 296 wr_hilo |= (((uint64_t)wr_hi)<<32); 297 #endif 298 wrp->wrh_hilo = wr_hilo; 299 } 300 #else 301 static void 302 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) 303 { 304 305 wrp->wrh_hi = wr_hi; 306 wmb(); 307 wrp->wrh_lo = wr_lo; 308 } 309 #endif 310 311 struct coalesce_info { 312 int count; 313 int nbytes; 314 }; 315 316 static int 317 coalesce_check(struct mbuf *m, void *arg) 318 { 319 struct coalesce_info *ci = arg; 320 int *count = &ci->count; 321 int *nbytes = &ci->nbytes; 322 323 if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) && 324 (*count < 7) && (m->m_next == NULL))) { 325 *count += 1; 326 *nbytes += m->m_len; 327 return (1); 328 } 329 return (0); 330 } 331 332 static struct mbuf * 333 cxgb_dequeue(struct sge_qset *qs) 334 { 335 struct mbuf *m, *m_head, *m_tail; 336 struct coalesce_info ci; 337 338 339 if (check_pkt_coalesce(qs) == 0) 340 return TXQ_RING_DEQUEUE(qs); 341 342 m_head = m_tail = NULL; 343 ci.count = ci.nbytes = 0; 344 do { 345 m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci); 346 if (m_head == NULL) { 347 m_tail = m_head = m; 348 } else if (m != NULL) { 349 m_tail->m_nextpkt = m; 350 m_tail = m; 351 } 352 } while (m != NULL); 353 if (ci.count > 7) 354 panic("trying to coalesce %d packets in to one WR", ci.count); 355 return (m_head); 356 } 357 358 /** 359 * reclaim_completed_tx - reclaims completed Tx descriptors 360 * @adapter: the adapter 361 * @q: the Tx queue to reclaim completed descriptors from 362 * 363 * Reclaims Tx descriptors that the SGE has indicated it has processed, 364 * and frees the associated buffers if possible. Called with the Tx 365 * queue's lock held. 366 */ 367 static __inline int 368 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) 369 { 370 struct sge_txq *q = &qs->txq[queue]; 371 int reclaim = desc_reclaimable(q); 372 373 if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) || 374 (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN)) 375 cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; 376 377 if (reclaim < reclaim_min) 378 return (0); 379 380 mtx_assert(&qs->lock, MA_OWNED); 381 if (reclaim > 0) { 382 t3_free_tx_desc(qs, reclaim, queue); 383 q->cleaned += reclaim; 384 q->in_use -= reclaim; 385 } 386 if (isset(&qs->txq_stopped, TXQ_ETH)) 387 clrbit(&qs->txq_stopped, TXQ_ETH); 388 389 return (reclaim); 390 } 391 392 /** 393 * should_restart_tx - are there enough resources to restart a Tx queue? 394 * @q: the Tx queue 395 * 396 * Checks if there are enough descriptors to restart a suspended Tx queue. 397 */ 398 static __inline int 399 should_restart_tx(const struct sge_txq *q) 400 { 401 unsigned int r = q->processed - q->cleaned; 402 403 return q->in_use - r < (q->size >> 1); 404 } 405 406 /** 407 * t3_sge_init - initialize SGE 408 * @adap: the adapter 409 * @p: the SGE parameters 410 * 411 * Performs SGE initialization needed every time after a chip reset. 412 * We do not initialize any of the queue sets here, instead the driver 413 * top-level must request those individually. We also do not enable DMA 414 * here, that should be done after the queues have been set up. 415 */ 416 void 417 t3_sge_init(adapter_t *adap, struct sge_params *p) 418 { 419 u_int ctrl, ups; 420 421 ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ 422 423 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | 424 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | 425 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | 426 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; 427 #if SGE_NUM_GENBITS == 1 428 ctrl |= F_EGRGENCTRL; 429 #endif 430 if (adap->params.rev > 0) { 431 if (!(adap->flags & (USING_MSIX | USING_MSI))) 432 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; 433 } 434 t3_write_reg(adap, A_SG_CONTROL, ctrl); 435 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | 436 V_LORCQDRBTHRSH(512)); 437 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); 438 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | 439 V_TIMEOUT(200 * core_ticks_per_usec(adap))); 440 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 441 adap->params.rev < T3_REV_C ? 1000 : 500); 442 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); 443 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); 444 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); 445 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); 446 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); 447 } 448 449 450 /** 451 * sgl_len - calculates the size of an SGL of the given capacity 452 * @n: the number of SGL entries 453 * 454 * Calculates the number of flits needed for a scatter/gather list that 455 * can hold the given number of entries. 456 */ 457 static __inline unsigned int 458 sgl_len(unsigned int n) 459 { 460 return ((3 * n) / 2 + (n & 1)); 461 } 462 463 /** 464 * get_imm_packet - return the next ingress packet buffer from a response 465 * @resp: the response descriptor containing the packet data 466 * 467 * Return a packet containing the immediate data of the given response. 468 */ 469 static int 470 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) 471 { 472 473 m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE; 474 m->m_ext.ext_buf = NULL; 475 m->m_ext.ext_type = 0; 476 memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE); 477 return (0); 478 } 479 480 static __inline u_int 481 flits_to_desc(u_int n) 482 { 483 return (flit_desc_map[n]); 484 } 485 486 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ 487 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ 488 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ 489 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ 490 F_HIRCQPARITYERROR) 491 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) 492 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ 493 F_RSPQDISABLED) 494 495 /** 496 * t3_sge_err_intr_handler - SGE async event interrupt handler 497 * @adapter: the adapter 498 * 499 * Interrupt handler for SGE asynchronous (non-data) events. 500 */ 501 void 502 t3_sge_err_intr_handler(adapter_t *adapter) 503 { 504 unsigned int v, status; 505 506 status = t3_read_reg(adapter, A_SG_INT_CAUSE); 507 if (status & SGE_PARERR) 508 CH_ALERT(adapter, "SGE parity error (0x%x)\n", 509 status & SGE_PARERR); 510 if (status & SGE_FRAMINGERR) 511 CH_ALERT(adapter, "SGE framing error (0x%x)\n", 512 status & SGE_FRAMINGERR); 513 if (status & F_RSPQCREDITOVERFOW) 514 CH_ALERT(adapter, "SGE response queue credit overflow\n"); 515 516 if (status & F_RSPQDISABLED) { 517 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); 518 519 CH_ALERT(adapter, 520 "packet delivered to disabled response queue (0x%x)\n", 521 (v >> S_RSPQ0DISABLED) & 0xff); 522 } 523 524 t3_write_reg(adapter, A_SG_INT_CAUSE, status); 525 if (status & SGE_FATALERR) 526 t3_fatal_err(adapter); 527 } 528 529 void 530 t3_sge_prep(adapter_t *adap, struct sge_params *p) 531 { 532 int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size; 533 534 nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus); 535 nqsets *= adap->params.nports; 536 537 fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); 538 539 while (!powerof2(fl_q_size)) 540 fl_q_size--; 541 542 use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters : 543 is_offload(adap); 544 545 #if __FreeBSD_version >= 700111 546 if (use_16k) { 547 jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); 548 jumbo_buf_size = MJUM16BYTES; 549 } else { 550 jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); 551 jumbo_buf_size = MJUM9BYTES; 552 } 553 #else 554 jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE); 555 jumbo_buf_size = MJUMPAGESIZE; 556 #endif 557 while (!powerof2(jumbo_q_size)) 558 jumbo_q_size--; 559 560 if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2)) 561 device_printf(adap->dev, 562 "Insufficient clusters and/or jumbo buffers.\n"); 563 564 p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data); 565 566 for (i = 0; i < SGE_QSETS; ++i) { 567 struct qset_params *q = p->qset + i; 568 569 if (adap->params.nports > 2) { 570 q->coalesce_usecs = 50; 571 } else { 572 #ifdef INVARIANTS 573 q->coalesce_usecs = 10; 574 #else 575 q->coalesce_usecs = 5; 576 #endif 577 } 578 q->polling = 0; 579 q->rspq_size = RSPQ_Q_SIZE; 580 q->fl_size = fl_q_size; 581 q->jumbo_size = jumbo_q_size; 582 q->jumbo_buf_size = jumbo_buf_size; 583 q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; 584 q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16; 585 q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE; 586 q->cong_thres = 0; 587 } 588 } 589 590 int 591 t3_sge_alloc(adapter_t *sc) 592 { 593 594 /* The parent tag. */ 595 if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */ 596 1, 0, /* algnmnt, boundary */ 597 BUS_SPACE_MAXADDR, /* lowaddr */ 598 BUS_SPACE_MAXADDR, /* highaddr */ 599 NULL, NULL, /* filter, filterarg */ 600 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 601 BUS_SPACE_UNRESTRICTED, /* nsegments */ 602 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 603 0, /* flags */ 604 NULL, NULL, /* lock, lockarg */ 605 &sc->parent_dmat)) { 606 device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); 607 return (ENOMEM); 608 } 609 610 /* 611 * DMA tag for normal sized RX frames 612 */ 613 if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, 614 BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, 615 MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { 616 device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); 617 return (ENOMEM); 618 } 619 620 /* 621 * DMA tag for jumbo sized RX frames. 622 */ 623 if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, 624 BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, 625 BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { 626 device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); 627 return (ENOMEM); 628 } 629 630 /* 631 * DMA tag for TX frames. 632 */ 633 if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, 634 BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 635 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 636 NULL, NULL, &sc->tx_dmat)) { 637 device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); 638 return (ENOMEM); 639 } 640 641 return (0); 642 } 643 644 int 645 t3_sge_free(struct adapter * sc) 646 { 647 648 if (sc->tx_dmat != NULL) 649 bus_dma_tag_destroy(sc->tx_dmat); 650 651 if (sc->rx_jumbo_dmat != NULL) 652 bus_dma_tag_destroy(sc->rx_jumbo_dmat); 653 654 if (sc->rx_dmat != NULL) 655 bus_dma_tag_destroy(sc->rx_dmat); 656 657 if (sc->parent_dmat != NULL) 658 bus_dma_tag_destroy(sc->parent_dmat); 659 660 return (0); 661 } 662 663 void 664 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) 665 { 666 667 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); 668 qs->rspq.polling = 0 /* p->polling */; 669 } 670 671 #if !defined(__i386__) && !defined(__amd64__) 672 static void 673 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 674 { 675 struct refill_fl_cb_arg *cb_arg = arg; 676 677 cb_arg->error = error; 678 cb_arg->seg = segs[0]; 679 cb_arg->nseg = nseg; 680 681 } 682 #endif 683 /** 684 * refill_fl - refill an SGE free-buffer list 685 * @sc: the controller softc 686 * @q: the free-list to refill 687 * @n: the number of new buffers to allocate 688 * 689 * (Re)populate an SGE free-buffer list with up to @n new packet buffers. 690 * The caller must assure that @n does not exceed the queue's capacity. 691 */ 692 static void 693 refill_fl(adapter_t *sc, struct sge_fl *q, int n) 694 { 695 struct rx_sw_desc *sd = &q->sdesc[q->pidx]; 696 struct rx_desc *d = &q->desc[q->pidx]; 697 struct refill_fl_cb_arg cb_arg; 698 struct mbuf *m; 699 caddr_t cl; 700 int err; 701 702 cb_arg.error = 0; 703 while (n--) { 704 /* 705 * We only allocate a cluster, mbuf allocation happens after rx 706 */ 707 if (q->zone == zone_pack) { 708 if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL) 709 break; 710 cl = m->m_ext.ext_buf; 711 } else { 712 if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL) 713 break; 714 if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { 715 uma_zfree(q->zone, cl); 716 break; 717 } 718 } 719 if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { 720 if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { 721 log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); 722 uma_zfree(q->zone, cl); 723 goto done; 724 } 725 sd->flags |= RX_SW_DESC_MAP_CREATED; 726 } 727 #if !defined(__i386__) && !defined(__amd64__) 728 err = bus_dmamap_load(q->entry_tag, sd->map, 729 cl, q->buf_size, refill_fl_cb, &cb_arg, 0); 730 731 if (err != 0 || cb_arg.error) { 732 if (q->zone == zone_pack) 733 uma_zfree(q->zone, cl); 734 m_free(m); 735 goto done; 736 } 737 #else 738 cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl); 739 #endif 740 sd->flags |= RX_SW_DESC_INUSE; 741 sd->rxsd_cl = cl; 742 sd->m = m; 743 d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); 744 d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); 745 d->len_gen = htobe32(V_FLD_GEN1(q->gen)); 746 d->gen2 = htobe32(V_FLD_GEN2(q->gen)); 747 748 d++; 749 sd++; 750 751 if (++q->pidx == q->size) { 752 q->pidx = 0; 753 q->gen ^= 1; 754 sd = q->sdesc; 755 d = q->desc; 756 } 757 q->credits++; 758 q->db_pending++; 759 } 760 761 done: 762 if (q->db_pending >= 32) { 763 q->db_pending = 0; 764 t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 765 } 766 } 767 768 769 /** 770 * free_rx_bufs - free the Rx buffers on an SGE free list 771 * @sc: the controle softc 772 * @q: the SGE free list to clean up 773 * 774 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from 775 * this queue should be stopped before calling this function. 776 */ 777 static void 778 free_rx_bufs(adapter_t *sc, struct sge_fl *q) 779 { 780 u_int cidx = q->cidx; 781 782 while (q->credits--) { 783 struct rx_sw_desc *d = &q->sdesc[cidx]; 784 785 if (d->flags & RX_SW_DESC_INUSE) { 786 bus_dmamap_unload(q->entry_tag, d->map); 787 bus_dmamap_destroy(q->entry_tag, d->map); 788 if (q->zone == zone_pack) { 789 m_init(d->m, zone_pack, MCLBYTES, 790 M_NOWAIT, MT_DATA, M_EXT); 791 uma_zfree(zone_pack, d->m); 792 } else { 793 m_init(d->m, zone_mbuf, MLEN, 794 M_NOWAIT, MT_DATA, 0); 795 uma_zfree(zone_mbuf, d->m); 796 uma_zfree(q->zone, d->rxsd_cl); 797 } 798 } 799 800 d->rxsd_cl = NULL; 801 d->m = NULL; 802 if (++cidx == q->size) 803 cidx = 0; 804 } 805 } 806 807 static __inline void 808 __refill_fl(adapter_t *adap, struct sge_fl *fl) 809 { 810 refill_fl(adap, fl, min(16U, fl->size - fl->credits)); 811 } 812 813 static __inline void 814 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) 815 { 816 uint32_t reclaimable = fl->size - fl->credits; 817 818 if (reclaimable > 0) 819 refill_fl(adap, fl, min(max, reclaimable)); 820 } 821 822 /** 823 * recycle_rx_buf - recycle a receive buffer 824 * @adapter: the adapter 825 * @q: the SGE free list 826 * @idx: index of buffer to recycle 827 * 828 * Recycles the specified buffer on the given free list by adding it at 829 * the next available slot on the list. 830 */ 831 static void 832 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) 833 { 834 struct rx_desc *from = &q->desc[idx]; 835 struct rx_desc *to = &q->desc[q->pidx]; 836 837 q->sdesc[q->pidx] = q->sdesc[idx]; 838 to->addr_lo = from->addr_lo; // already big endian 839 to->addr_hi = from->addr_hi; // likewise 840 wmb(); /* necessary ? */ 841 to->len_gen = htobe32(V_FLD_GEN1(q->gen)); 842 to->gen2 = htobe32(V_FLD_GEN2(q->gen)); 843 q->credits++; 844 845 if (++q->pidx == q->size) { 846 q->pidx = 0; 847 q->gen ^= 1; 848 } 849 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); 850 } 851 852 static void 853 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 854 { 855 uint32_t *addr; 856 857 addr = arg; 858 *addr = segs[0].ds_addr; 859 } 860 861 static int 862 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, 863 bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, 864 bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) 865 { 866 size_t len = nelem * elem_size; 867 void *s = NULL; 868 void *p = NULL; 869 int err; 870 871 if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, 872 BUS_SPACE_MAXADDR_32BIT, 873 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, 874 len, 0, NULL, NULL, tag)) != 0) { 875 device_printf(sc->dev, "Cannot allocate descriptor tag\n"); 876 return (ENOMEM); 877 } 878 879 if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, 880 map)) != 0) { 881 device_printf(sc->dev, "Cannot allocate descriptor memory\n"); 882 return (ENOMEM); 883 } 884 885 bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); 886 bzero(p, len); 887 *(void **)desc = p; 888 889 if (sw_size) { 890 len = nelem * sw_size; 891 s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); 892 *(void **)sdesc = s; 893 } 894 if (parent_entry_tag == NULL) 895 return (0); 896 897 if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, 898 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 899 NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, 900 TX_MAX_SIZE, BUS_DMA_ALLOCNOW, 901 NULL, NULL, entry_tag)) != 0) { 902 device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); 903 return (ENOMEM); 904 } 905 return (0); 906 } 907 908 static void 909 sge_slow_intr_handler(void *arg, int ncount) 910 { 911 adapter_t *sc = arg; 912 913 t3_slow_intr_handler(sc); 914 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask); 915 (void) t3_read_reg(sc, A_PL_INT_ENABLE0); 916 } 917 918 /** 919 * sge_timer_cb - perform periodic maintenance of an SGE qset 920 * @data: the SGE queue set to maintain 921 * 922 * Runs periodically from a timer to perform maintenance of an SGE queue 923 * set. It performs two tasks: 924 * 925 * a) Cleans up any completed Tx descriptors that may still be pending. 926 * Normal descriptor cleanup happens when new packets are added to a Tx 927 * queue so this timer is relatively infrequent and does any cleanup only 928 * if the Tx queue has not seen any new packets in a while. We make a 929 * best effort attempt to reclaim descriptors, in that we don't wait 930 * around if we cannot get a queue's lock (which most likely is because 931 * someone else is queueing new packets and so will also handle the clean 932 * up). Since control queues use immediate data exclusively we don't 933 * bother cleaning them up here. 934 * 935 * b) Replenishes Rx queues that have run out due to memory shortage. 936 * Normally new Rx buffers are added when existing ones are consumed but 937 * when out of memory a queue can become empty. We try to add only a few 938 * buffers here, the queue will be replenished fully as these new buffers 939 * are used up if memory shortage has subsided. 940 * 941 * c) Return coalesced response queue credits in case a response queue is 942 * starved. 943 * 944 * d) Ring doorbells for T304 tunnel queues since we have seen doorbell 945 * fifo overflows and the FW doesn't implement any recovery scheme yet. 946 */ 947 static void 948 sge_timer_cb(void *arg) 949 { 950 adapter_t *sc = arg; 951 if ((sc->flags & USING_MSIX) == 0) { 952 953 struct port_info *pi; 954 struct sge_qset *qs; 955 struct sge_txq *txq; 956 int i, j; 957 int reclaim_ofl, refill_rx; 958 959 if (sc->open_device_map == 0) 960 return; 961 962 for (i = 0; i < sc->params.nports; i++) { 963 pi = &sc->port[i]; 964 for (j = 0; j < pi->nqsets; j++) { 965 qs = &sc->sge.qs[pi->first_qset + j]; 966 txq = &qs->txq[0]; 967 reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; 968 refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || 969 (qs->fl[1].credits < qs->fl[1].size)); 970 if (reclaim_ofl || refill_rx) { 971 taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); 972 break; 973 } 974 } 975 } 976 } 977 978 if (sc->params.nports > 2) { 979 int i; 980 981 for_each_port(sc, i) { 982 struct port_info *pi = &sc->port[i]; 983 984 t3_write_reg(sc, A_SG_KDOORBELL, 985 F_SELEGRCNTX | 986 (FW_TUNNEL_SGEEC_START + pi->first_qset)); 987 } 988 } 989 if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) && 990 sc->open_device_map != 0) 991 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 992 } 993 994 /* 995 * This is meant to be a catch-all function to keep sge state private 996 * to sge.c 997 * 998 */ 999 int 1000 t3_sge_init_adapter(adapter_t *sc) 1001 { 1002 callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE); 1003 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 1004 TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); 1005 return (0); 1006 } 1007 1008 int 1009 t3_sge_reset_adapter(adapter_t *sc) 1010 { 1011 callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); 1012 return (0); 1013 } 1014 1015 int 1016 t3_sge_init_port(struct port_info *pi) 1017 { 1018 TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); 1019 return (0); 1020 } 1021 1022 /** 1023 * refill_rspq - replenish an SGE response queue 1024 * @adapter: the adapter 1025 * @q: the response queue to replenish 1026 * @credits: how many new responses to make available 1027 * 1028 * Replenishes a response queue by making the supplied number of responses 1029 * available to HW. 1030 */ 1031 static __inline void 1032 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) 1033 { 1034 1035 /* mbufs are allocated on demand when a rspq entry is processed. */ 1036 t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, 1037 V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); 1038 } 1039 1040 static void 1041 sge_txq_reclaim_handler(void *arg, int ncount) 1042 { 1043 struct sge_qset *qs = arg; 1044 int i; 1045 1046 for (i = 0; i < 3; i++) 1047 reclaim_completed_tx(qs, 16, i); 1048 } 1049 1050 static void 1051 sge_timer_reclaim(void *arg, int ncount) 1052 { 1053 struct port_info *pi = arg; 1054 int i, nqsets = pi->nqsets; 1055 adapter_t *sc = pi->adapter; 1056 struct sge_qset *qs; 1057 struct mtx *lock; 1058 1059 KASSERT((sc->flags & USING_MSIX) == 0, 1060 ("can't call timer reclaim for msi-x")); 1061 1062 for (i = 0; i < nqsets; i++) { 1063 qs = &sc->sge.qs[pi->first_qset + i]; 1064 1065 reclaim_completed_tx(qs, 16, TXQ_OFLD); 1066 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 1067 &sc->sge.qs[0].rspq.lock; 1068 1069 if (mtx_trylock(lock)) { 1070 /* XXX currently assume that we are *NOT* polling */ 1071 uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); 1072 1073 if (qs->fl[0].credits < qs->fl[0].size - 16) 1074 __refill_fl(sc, &qs->fl[0]); 1075 if (qs->fl[1].credits < qs->fl[1].size - 16) 1076 __refill_fl(sc, &qs->fl[1]); 1077 1078 if (status & (1 << qs->rspq.cntxt_id)) { 1079 if (qs->rspq.credits) { 1080 refill_rspq(sc, &qs->rspq, 1); 1081 qs->rspq.credits--; 1082 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 1083 1 << qs->rspq.cntxt_id); 1084 } 1085 } 1086 mtx_unlock(lock); 1087 } 1088 } 1089 } 1090 1091 /** 1092 * init_qset_cntxt - initialize an SGE queue set context info 1093 * @qs: the queue set 1094 * @id: the queue set id 1095 * 1096 * Initializes the TIDs and context ids for the queues of a queue set. 1097 */ 1098 static void 1099 init_qset_cntxt(struct sge_qset *qs, u_int id) 1100 { 1101 1102 qs->rspq.cntxt_id = id; 1103 qs->fl[0].cntxt_id = 2 * id; 1104 qs->fl[1].cntxt_id = 2 * id + 1; 1105 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; 1106 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; 1107 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; 1108 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; 1109 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; 1110 1111 mbufq_init(&qs->txq[TXQ_ETH].sendq); 1112 mbufq_init(&qs->txq[TXQ_OFLD].sendq); 1113 mbufq_init(&qs->txq[TXQ_CTRL].sendq); 1114 } 1115 1116 1117 static void 1118 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) 1119 { 1120 txq->in_use += ndesc; 1121 /* 1122 * XXX we don't handle stopping of queue 1123 * presumably start handles this when we bump against the end 1124 */ 1125 txqs->gen = txq->gen; 1126 txq->unacked += ndesc; 1127 txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); 1128 txq->unacked &= 31; 1129 txqs->pidx = txq->pidx; 1130 txq->pidx += ndesc; 1131 #ifdef INVARIANTS 1132 if (((txqs->pidx > txq->cidx) && 1133 (txq->pidx < txqs->pidx) && 1134 (txq->pidx >= txq->cidx)) || 1135 ((txqs->pidx < txq->cidx) && 1136 (txq->pidx >= txq-> cidx)) || 1137 ((txqs->pidx < txq->cidx) && 1138 (txq->cidx < txqs->pidx))) 1139 panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", 1140 txqs->pidx, txq->pidx, txq->cidx); 1141 #endif 1142 if (txq->pidx >= txq->size) { 1143 txq->pidx -= txq->size; 1144 txq->gen ^= 1; 1145 } 1146 1147 } 1148 1149 /** 1150 * calc_tx_descs - calculate the number of Tx descriptors for a packet 1151 * @m: the packet mbufs 1152 * @nsegs: the number of segments 1153 * 1154 * Returns the number of Tx descriptors needed for the given Ethernet 1155 * packet. Ethernet packets require addition of WR and CPL headers. 1156 */ 1157 static __inline unsigned int 1158 calc_tx_descs(const struct mbuf *m, int nsegs) 1159 { 1160 unsigned int flits; 1161 1162 if (m->m_pkthdr.len <= PIO_LEN) 1163 return 1; 1164 1165 flits = sgl_len(nsegs) + 2; 1166 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1167 flits++; 1168 1169 return flits_to_desc(flits); 1170 } 1171 1172 static unsigned int 1173 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq, 1174 struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs) 1175 { 1176 struct mbuf *m0; 1177 int err, pktlen, pass = 0; 1178 bus_dma_tag_t tag = txq->entry_tag; 1179 1180 retry: 1181 err = 0; 1182 m0 = *m; 1183 pktlen = m0->m_pkthdr.len; 1184 #if defined(__i386__) || defined(__amd64__) 1185 if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) { 1186 goto done; 1187 } else 1188 #endif 1189 err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0); 1190 1191 if (err == 0) { 1192 goto done; 1193 } 1194 if (err == EFBIG && pass == 0) { 1195 pass = 1; 1196 /* Too many segments, try to defrag */ 1197 m0 = m_defrag(m0, M_DONTWAIT); 1198 if (m0 == NULL) { 1199 m_freem(*m); 1200 *m = NULL; 1201 return (ENOBUFS); 1202 } 1203 *m = m0; 1204 goto retry; 1205 } else if (err == ENOMEM) { 1206 return (err); 1207 } if (err) { 1208 if (cxgb_debug) 1209 printf("map failure err=%d pktlen=%d\n", err, pktlen); 1210 m_freem(m0); 1211 *m = NULL; 1212 return (err); 1213 } 1214 done: 1215 #if !defined(__i386__) && !defined(__amd64__) 1216 bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE); 1217 #endif 1218 txsd->flags |= TX_SW_DESC_MAPPED; 1219 1220 return (0); 1221 } 1222 1223 /** 1224 * make_sgl - populate a scatter/gather list for a packet 1225 * @sgp: the SGL to populate 1226 * @segs: the packet dma segments 1227 * @nsegs: the number of segments 1228 * 1229 * Generates a scatter/gather list for the buffers that make up a packet 1230 * and returns the SGL size in 8-byte words. The caller must size the SGL 1231 * appropriately. 1232 */ 1233 static __inline void 1234 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) 1235 { 1236 int i, idx; 1237 1238 for (idx = 0, i = 0; i < nsegs; i++) { 1239 /* 1240 * firmware doesn't like empty segments 1241 */ 1242 if (segs[i].ds_len == 0) 1243 continue; 1244 if (i && idx == 0) 1245 ++sgp; 1246 1247 sgp->len[idx] = htobe32(segs[i].ds_len); 1248 sgp->addr[idx] = htobe64(segs[i].ds_addr); 1249 idx ^= 1; 1250 } 1251 1252 if (idx) { 1253 sgp->len[idx] = 0; 1254 sgp->addr[idx] = 0; 1255 } 1256 } 1257 1258 /** 1259 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell 1260 * @adap: the adapter 1261 * @q: the Tx queue 1262 * 1263 * Ring the doorbell if a Tx queue is asleep. There is a natural race, 1264 * where the HW is going to sleep just after we checked, however, 1265 * then the interrupt handler will detect the outstanding TX packet 1266 * and ring the doorbell for us. 1267 * 1268 * When GTS is disabled we unconditionally ring the doorbell. 1269 */ 1270 static __inline void 1271 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring) 1272 { 1273 #if USE_GTS 1274 clear_bit(TXQ_LAST_PKT_DB, &q->flags); 1275 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { 1276 set_bit(TXQ_LAST_PKT_DB, &q->flags); 1277 #ifdef T3_TRACE 1278 T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", 1279 q->cntxt_id); 1280 #endif 1281 t3_write_reg(adap, A_SG_KDOORBELL, 1282 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1283 } 1284 #else 1285 if (mustring || ++q->db_pending >= 32) { 1286 wmb(); /* write descriptors before telling HW */ 1287 t3_write_reg(adap, A_SG_KDOORBELL, 1288 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1289 q->db_pending = 0; 1290 } 1291 #endif 1292 } 1293 1294 static __inline void 1295 wr_gen2(struct tx_desc *d, unsigned int gen) 1296 { 1297 #if SGE_NUM_GENBITS == 2 1298 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); 1299 #endif 1300 } 1301 1302 /** 1303 * write_wr_hdr_sgl - write a WR header and, optionally, SGL 1304 * @ndesc: number of Tx descriptors spanned by the SGL 1305 * @txd: first Tx descriptor to be written 1306 * @txqs: txq state (generation and producer index) 1307 * @txq: the SGE Tx queue 1308 * @sgl: the SGL 1309 * @flits: number of flits to the start of the SGL in the first descriptor 1310 * @sgl_flits: the SGL size in flits 1311 * @wr_hi: top 32 bits of WR header based on WR type (big endian) 1312 * @wr_lo: low 32 bits of WR header based on WR type (big endian) 1313 * 1314 * Write a work request header and an associated SGL. If the SGL is 1315 * small enough to fit into one Tx descriptor it has already been written 1316 * and we just need to write the WR header. Otherwise we distribute the 1317 * SGL across the number of descriptors it spans. 1318 */ 1319 static void 1320 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, 1321 const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, 1322 unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) 1323 { 1324 1325 struct work_request_hdr *wrp = (struct work_request_hdr *)txd; 1326 struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; 1327 1328 if (__predict_true(ndesc == 1)) { 1329 set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1330 V_WR_SGLSFLT(flits)) | wr_hi, 1331 htonl(V_WR_LEN(flits + sgl_flits) | 1332 V_WR_GEN(txqs->gen)) | wr_lo); 1333 /* XXX gen? */ 1334 wr_gen2(txd, txqs->gen); 1335 1336 } else { 1337 unsigned int ogen = txqs->gen; 1338 const uint64_t *fp = (const uint64_t *)sgl; 1339 struct work_request_hdr *wp = wrp; 1340 1341 wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | 1342 V_WR_SGLSFLT(flits)) | wr_hi; 1343 1344 while (sgl_flits) { 1345 unsigned int avail = WR_FLITS - flits; 1346 1347 if (avail > sgl_flits) 1348 avail = sgl_flits; 1349 memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); 1350 sgl_flits -= avail; 1351 ndesc--; 1352 if (!sgl_flits) 1353 break; 1354 1355 fp += avail; 1356 txd++; 1357 txsd++; 1358 if (++txqs->pidx == txq->size) { 1359 txqs->pidx = 0; 1360 txqs->gen ^= 1; 1361 txd = txq->desc; 1362 txsd = txq->sdesc; 1363 } 1364 1365 /* 1366 * when the head of the mbuf chain 1367 * is freed all clusters will be freed 1368 * with it 1369 */ 1370 wrp = (struct work_request_hdr *)txd; 1371 wrp->wrh_hi = htonl(V_WR_DATATYPE(1) | 1372 V_WR_SGLSFLT(1)) | wr_hi; 1373 wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS, 1374 sgl_flits + 1)) | 1375 V_WR_GEN(txqs->gen)) | wr_lo; 1376 wr_gen2(txd, txqs->gen); 1377 flits = 1; 1378 } 1379 wrp->wrh_hi |= htonl(F_WR_EOP); 1380 wmb(); 1381 wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; 1382 wr_gen2((struct tx_desc *)wp, ogen); 1383 } 1384 } 1385 1386 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */ 1387 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20) 1388 1389 #define GET_VTAG(cntrl, m) \ 1390 do { \ 1391 if ((m)->m_flags & M_VLANTAG) \ 1392 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ 1393 } while (0) 1394 1395 static int 1396 t3_encap(struct sge_qset *qs, struct mbuf **m) 1397 { 1398 adapter_t *sc; 1399 struct mbuf *m0; 1400 struct sge_txq *txq; 1401 struct txq_state txqs; 1402 struct port_info *pi; 1403 unsigned int ndesc, flits, cntrl, mlen; 1404 int err, nsegs, tso_info = 0; 1405 1406 struct work_request_hdr *wrp; 1407 struct tx_sw_desc *txsd; 1408 struct sg_ent *sgp, *sgl; 1409 uint32_t wr_hi, wr_lo, sgl_flits; 1410 bus_dma_segment_t segs[TX_MAX_SEGS]; 1411 1412 struct tx_desc *txd; 1413 1414 pi = qs->port; 1415 sc = pi->adapter; 1416 txq = &qs->txq[TXQ_ETH]; 1417 txd = &txq->desc[txq->pidx]; 1418 txsd = &txq->sdesc[txq->pidx]; 1419 sgl = txq->txq_sgl; 1420 1421 prefetch(txd); 1422 m0 = *m; 1423 1424 mtx_assert(&qs->lock, MA_OWNED); 1425 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1426 KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n")); 1427 1428 if (m0->m_nextpkt == NULL && m0->m_next != NULL && 1429 m0->m_pkthdr.csum_flags & (CSUM_TSO)) 1430 tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); 1431 1432 if (m0->m_nextpkt != NULL) { 1433 busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs); 1434 ndesc = 1; 1435 mlen = 0; 1436 } else { 1437 if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map, 1438 &m0, segs, &nsegs))) { 1439 if (cxgb_debug) 1440 printf("failed ... err=%d\n", err); 1441 return (err); 1442 } 1443 mlen = m0->m_pkthdr.len; 1444 ndesc = calc_tx_descs(m0, nsegs); 1445 } 1446 txq_prod(txq, ndesc, &txqs); 1447 1448 KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs)); 1449 txsd->m = m0; 1450 1451 if (m0->m_nextpkt != NULL) { 1452 struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; 1453 int i, fidx; 1454 1455 if (nsegs > 7) 1456 panic("trying to coalesce %d packets in to one WR", nsegs); 1457 txq->txq_coalesced += nsegs; 1458 wrp = (struct work_request_hdr *)txd; 1459 flits = nsegs*2 + 1; 1460 1461 for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) { 1462 struct cpl_tx_pkt_batch_entry *cbe; 1463 uint64_t flit; 1464 uint32_t *hflit = (uint32_t *)&flit; 1465 int cflags = m0->m_pkthdr.csum_flags; 1466 1467 cntrl = V_TXPKT_INTF(pi->txpkt_intf); 1468 GET_VTAG(cntrl, m0); 1469 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1470 if (__predict_false(!(cflags & CSUM_IP))) 1471 cntrl |= F_TXPKT_IPCSUM_DIS; 1472 if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP)))) 1473 cntrl |= F_TXPKT_L4CSUM_DIS; 1474 1475 hflit[0] = htonl(cntrl); 1476 hflit[1] = htonl(segs[i].ds_len | 0x80000000); 1477 flit |= htobe64(1 << 24); 1478 cbe = &cpl_batch->pkt_entry[i]; 1479 cbe->cntrl = hflit[0]; 1480 cbe->len = hflit[1]; 1481 cbe->addr = htobe64(segs[i].ds_addr); 1482 } 1483 1484 wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | 1485 V_WR_SGLSFLT(flits)) | 1486 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1487 wr_lo = htonl(V_WR_LEN(flits) | 1488 V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); 1489 set_wr_hdr(wrp, wr_hi, wr_lo); 1490 wmb(); 1491 ETHER_BPF_MTAP(pi->ifp, m0); 1492 wr_gen2(txd, txqs.gen); 1493 check_ring_tx_db(sc, txq, 0); 1494 return (0); 1495 } else if (tso_info) { 1496 uint16_t eth_type; 1497 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; 1498 struct ether_header *eh; 1499 void *l3hdr; 1500 struct tcphdr *tcp; 1501 1502 txd->flit[2] = 0; 1503 GET_VTAG(cntrl, m0); 1504 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); 1505 hdr->cntrl = htonl(cntrl); 1506 hdr->len = htonl(mlen | 0x80000000); 1507 1508 if (__predict_false(mlen < TCPPKTHDRSIZE)) { 1509 printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x", 1510 m0, mlen, m0->m_pkthdr.tso_segsz, 1511 m0->m_pkthdr.csum_flags, m0->m_flags); 1512 panic("tx tso packet too small"); 1513 } 1514 1515 /* Make sure that ether, ip, tcp headers are all in m0 */ 1516 if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { 1517 m0 = m_pullup(m0, TCPPKTHDRSIZE); 1518 if (__predict_false(m0 == NULL)) { 1519 /* XXX panic probably an overreaction */ 1520 panic("couldn't fit header into mbuf"); 1521 } 1522 } 1523 1524 eh = mtod(m0, struct ether_header *); 1525 eth_type = eh->ether_type; 1526 if (eth_type == htons(ETHERTYPE_VLAN)) { 1527 struct ether_vlan_header *evh = (void *)eh; 1528 1529 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN); 1530 l3hdr = evh + 1; 1531 eth_type = evh->evl_proto; 1532 } else { 1533 tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II); 1534 l3hdr = eh + 1; 1535 } 1536 1537 if (eth_type == htons(ETHERTYPE_IP)) { 1538 struct ip *ip = l3hdr; 1539 1540 tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl); 1541 tcp = (struct tcphdr *)(ip + 1); 1542 } else if (eth_type == htons(ETHERTYPE_IPV6)) { 1543 struct ip6_hdr *ip6 = l3hdr; 1544 1545 KASSERT(ip6->ip6_nxt == IPPROTO_TCP, 1546 ("%s: CSUM_TSO with ip6_nxt %d", 1547 __func__, ip6->ip6_nxt)); 1548 1549 tso_info |= F_LSO_IPV6; 1550 tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2); 1551 tcp = (struct tcphdr *)(ip6 + 1); 1552 } else 1553 panic("%s: CSUM_TSO but neither ip nor ip6", __func__); 1554 1555 tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off); 1556 hdr->lso_info = htonl(tso_info); 1557 1558 if (__predict_false(mlen <= PIO_LEN)) { 1559 /* 1560 * pkt not undersized but fits in PIO_LEN 1561 * Indicates a TSO bug at the higher levels. 1562 */ 1563 txsd->m = NULL; 1564 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); 1565 flits = (mlen + 7) / 8 + 3; 1566 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1567 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1568 F_WR_SOP | F_WR_EOP | txqs.compl); 1569 wr_lo = htonl(V_WR_LEN(flits) | 1570 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1571 set_wr_hdr(&hdr->wr, wr_hi, wr_lo); 1572 wmb(); 1573 ETHER_BPF_MTAP(pi->ifp, m0); 1574 wr_gen2(txd, txqs.gen); 1575 check_ring_tx_db(sc, txq, 0); 1576 m_freem(m0); 1577 return (0); 1578 } 1579 flits = 3; 1580 } else { 1581 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; 1582 1583 GET_VTAG(cntrl, m0); 1584 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); 1585 if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) 1586 cntrl |= F_TXPKT_IPCSUM_DIS; 1587 if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))) 1588 cntrl |= F_TXPKT_L4CSUM_DIS; 1589 cpl->cntrl = htonl(cntrl); 1590 cpl->len = htonl(mlen | 0x80000000); 1591 1592 if (mlen <= PIO_LEN) { 1593 txsd->m = NULL; 1594 m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); 1595 flits = (mlen + 7) / 8 + 2; 1596 1597 wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | 1598 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | 1599 F_WR_SOP | F_WR_EOP | txqs.compl); 1600 wr_lo = htonl(V_WR_LEN(flits) | 1601 V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); 1602 set_wr_hdr(&cpl->wr, wr_hi, wr_lo); 1603 wmb(); 1604 ETHER_BPF_MTAP(pi->ifp, m0); 1605 wr_gen2(txd, txqs.gen); 1606 check_ring_tx_db(sc, txq, 0); 1607 m_freem(m0); 1608 return (0); 1609 } 1610 flits = 2; 1611 } 1612 wrp = (struct work_request_hdr *)txd; 1613 sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; 1614 make_sgl(sgp, segs, nsegs); 1615 1616 sgl_flits = sgl_len(nsegs); 1617 1618 ETHER_BPF_MTAP(pi->ifp, m0); 1619 1620 KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc)); 1621 wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); 1622 wr_lo = htonl(V_WR_TID(txq->token)); 1623 write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, 1624 sgl_flits, wr_hi, wr_lo); 1625 check_ring_tx_db(sc, txq, 0); 1626 1627 return (0); 1628 } 1629 1630 void 1631 cxgb_tx_watchdog(void *arg) 1632 { 1633 struct sge_qset *qs = arg; 1634 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1635 1636 if (qs->coalescing != 0 && 1637 (txq->in_use <= cxgb_tx_coalesce_enable_stop) && 1638 TXQ_RING_EMPTY(qs)) 1639 qs->coalescing = 0; 1640 else if (qs->coalescing == 0 && 1641 (txq->in_use >= cxgb_tx_coalesce_enable_start)) 1642 qs->coalescing = 1; 1643 if (TXQ_TRYLOCK(qs)) { 1644 qs->qs_flags |= QS_FLUSHING; 1645 cxgb_start_locked(qs); 1646 qs->qs_flags &= ~QS_FLUSHING; 1647 TXQ_UNLOCK(qs); 1648 } 1649 if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) 1650 callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog, 1651 qs, txq->txq_watchdog.c_cpu); 1652 } 1653 1654 static void 1655 cxgb_tx_timeout(void *arg) 1656 { 1657 struct sge_qset *qs = arg; 1658 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1659 1660 if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3))) 1661 qs->coalescing = 1; 1662 if (TXQ_TRYLOCK(qs)) { 1663 qs->qs_flags |= QS_TIMEOUT; 1664 cxgb_start_locked(qs); 1665 qs->qs_flags &= ~QS_TIMEOUT; 1666 TXQ_UNLOCK(qs); 1667 } 1668 } 1669 1670 static void 1671 cxgb_start_locked(struct sge_qset *qs) 1672 { 1673 struct mbuf *m_head = NULL; 1674 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1675 struct port_info *pi = qs->port; 1676 struct ifnet *ifp = pi->ifp; 1677 1678 if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT)) 1679 reclaim_completed_tx(qs, 0, TXQ_ETH); 1680 1681 if (!pi->link_config.link_ok) { 1682 TXQ_RING_FLUSH(qs); 1683 return; 1684 } 1685 TXQ_LOCK_ASSERT(qs); 1686 while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) && 1687 pi->link_config.link_ok) { 1688 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1689 1690 if (txq->size - txq->in_use <= TX_MAX_DESC) 1691 break; 1692 1693 if ((m_head = cxgb_dequeue(qs)) == NULL) 1694 break; 1695 /* 1696 * Encapsulation can modify our pointer, and or make it 1697 * NULL on failure. In that event, we can't requeue. 1698 */ 1699 if (t3_encap(qs, &m_head) || m_head == NULL) 1700 break; 1701 1702 m_head = NULL; 1703 } 1704 1705 if (txq->db_pending) 1706 check_ring_tx_db(pi->adapter, txq, 1); 1707 1708 if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && 1709 pi->link_config.link_ok) 1710 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1711 qs, txq->txq_timer.c_cpu); 1712 if (m_head != NULL) 1713 m_freem(m_head); 1714 } 1715 1716 static int 1717 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m) 1718 { 1719 struct port_info *pi = qs->port; 1720 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 1721 struct buf_ring *br = txq->txq_mr; 1722 int error, avail; 1723 1724 avail = txq->size - txq->in_use; 1725 TXQ_LOCK_ASSERT(qs); 1726 1727 /* 1728 * We can only do a direct transmit if the following are true: 1729 * - we aren't coalescing (ring < 3/4 full) 1730 * - the link is up -- checked in caller 1731 * - there are no packets enqueued already 1732 * - there is space in hardware transmit queue 1733 */ 1734 if (check_pkt_coalesce(qs) == 0 && 1735 !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) { 1736 if (t3_encap(qs, &m)) { 1737 if (m != NULL && 1738 (error = drbr_enqueue(ifp, br, m)) != 0) 1739 return (error); 1740 } else { 1741 if (txq->db_pending) 1742 check_ring_tx_db(pi->adapter, txq, 1); 1743 1744 /* 1745 * We've bypassed the buf ring so we need to update 1746 * the stats directly 1747 */ 1748 txq->txq_direct_packets++; 1749 txq->txq_direct_bytes += m->m_pkthdr.len; 1750 } 1751 } else if ((error = drbr_enqueue(ifp, br, m)) != 0) 1752 return (error); 1753 1754 reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); 1755 if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && 1756 (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) 1757 cxgb_start_locked(qs); 1758 else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) 1759 callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, 1760 qs, txq->txq_timer.c_cpu); 1761 return (0); 1762 } 1763 1764 int 1765 cxgb_transmit(struct ifnet *ifp, struct mbuf *m) 1766 { 1767 struct sge_qset *qs; 1768 struct port_info *pi = ifp->if_softc; 1769 int error, qidx = pi->first_qset; 1770 1771 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 1772 ||(!pi->link_config.link_ok)) { 1773 m_freem(m); 1774 return (0); 1775 } 1776 1777 if (m->m_flags & M_FLOWID) 1778 qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset; 1779 1780 qs = &pi->adapter->sge.qs[qidx]; 1781 1782 if (TXQ_TRYLOCK(qs)) { 1783 /* XXX running */ 1784 error = cxgb_transmit_locked(ifp, qs, m); 1785 TXQ_UNLOCK(qs); 1786 } else 1787 error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); 1788 return (error); 1789 } 1790 1791 void 1792 cxgb_qflush(struct ifnet *ifp) 1793 { 1794 /* 1795 * flush any enqueued mbufs in the buf_rings 1796 * and in the transmit queues 1797 * no-op for now 1798 */ 1799 return; 1800 } 1801 1802 /** 1803 * write_imm - write a packet into a Tx descriptor as immediate data 1804 * @d: the Tx descriptor to write 1805 * @m: the packet 1806 * @len: the length of packet data to write as immediate data 1807 * @gen: the generation bit value to write 1808 * 1809 * Writes a packet as immediate data into a Tx descriptor. The packet 1810 * contains a work request at its beginning. We must write the packet 1811 * carefully so the SGE doesn't read accidentally before it's written in 1812 * its entirety. 1813 */ 1814 static __inline void 1815 write_imm(struct tx_desc *d, struct mbuf *m, 1816 unsigned int len, unsigned int gen) 1817 { 1818 struct work_request_hdr *from = mtod(m, struct work_request_hdr *); 1819 struct work_request_hdr *to = (struct work_request_hdr *)d; 1820 uint32_t wr_hi, wr_lo; 1821 1822 if (len > WR_LEN) 1823 panic("len too big %d\n", len); 1824 if (len < sizeof(*from)) 1825 panic("len too small %d", len); 1826 1827 memcpy(&to[1], &from[1], len - sizeof(*from)); 1828 wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | 1829 V_WR_BCNTLFLT(len & 7)); 1830 wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | 1831 V_WR_LEN((len + 7) / 8)); 1832 set_wr_hdr(to, wr_hi, wr_lo); 1833 wmb(); 1834 wr_gen2(d, gen); 1835 1836 /* 1837 * This check is a hack we should really fix the logic so 1838 * that this can't happen 1839 */ 1840 if (m->m_type != MT_DONTFREE) 1841 m_freem(m); 1842 1843 } 1844 1845 /** 1846 * check_desc_avail - check descriptor availability on a send queue 1847 * @adap: the adapter 1848 * @q: the TX queue 1849 * @m: the packet needing the descriptors 1850 * @ndesc: the number of Tx descriptors needed 1851 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) 1852 * 1853 * Checks if the requested number of Tx descriptors is available on an 1854 * SGE send queue. If the queue is already suspended or not enough 1855 * descriptors are available the packet is queued for later transmission. 1856 * Must be called with the Tx queue locked. 1857 * 1858 * Returns 0 if enough descriptors are available, 1 if there aren't 1859 * enough descriptors and the packet has been queued, and 2 if the caller 1860 * needs to retry because there weren't enough descriptors at the 1861 * beginning of the call but some freed up in the mean time. 1862 */ 1863 static __inline int 1864 check_desc_avail(adapter_t *adap, struct sge_txq *q, 1865 struct mbuf *m, unsigned int ndesc, 1866 unsigned int qid) 1867 { 1868 /* 1869 * XXX We currently only use this for checking the control queue 1870 * the control queue is only used for binding qsets which happens 1871 * at init time so we are guaranteed enough descriptors 1872 */ 1873 if (__predict_false(!mbufq_empty(&q->sendq))) { 1874 addq_exit: mbufq_tail(&q->sendq, m); 1875 return 1; 1876 } 1877 if (__predict_false(q->size - q->in_use < ndesc)) { 1878 1879 struct sge_qset *qs = txq_to_qset(q, qid); 1880 1881 setbit(&qs->txq_stopped, qid); 1882 if (should_restart_tx(q) && 1883 test_and_clear_bit(qid, &qs->txq_stopped)) 1884 return 2; 1885 1886 q->stops++; 1887 goto addq_exit; 1888 } 1889 return 0; 1890 } 1891 1892 1893 /** 1894 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs 1895 * @q: the SGE control Tx queue 1896 * 1897 * This is a variant of reclaim_completed_tx() that is used for Tx queues 1898 * that send only immediate data (presently just the control queues) and 1899 * thus do not have any mbufs 1900 */ 1901 static __inline void 1902 reclaim_completed_tx_imm(struct sge_txq *q) 1903 { 1904 unsigned int reclaim = q->processed - q->cleaned; 1905 1906 q->in_use -= reclaim; 1907 q->cleaned += reclaim; 1908 } 1909 1910 static __inline int 1911 immediate(const struct mbuf *m) 1912 { 1913 return m->m_len <= WR_LEN && m->m_pkthdr.len <= WR_LEN ; 1914 } 1915 1916 /** 1917 * ctrl_xmit - send a packet through an SGE control Tx queue 1918 * @adap: the adapter 1919 * @q: the control queue 1920 * @m: the packet 1921 * 1922 * Send a packet through an SGE control Tx queue. Packets sent through 1923 * a control queue must fit entirely as immediate data in a single Tx 1924 * descriptor and have no page fragments. 1925 */ 1926 static int 1927 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 1928 { 1929 int ret; 1930 struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); 1931 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1932 1933 if (__predict_false(!immediate(m))) { 1934 m_freem(m); 1935 return 0; 1936 } 1937 1938 wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP); 1939 wrp->wrh_lo = htonl(V_WR_TID(q->token)); 1940 1941 TXQ_LOCK(qs); 1942 again: reclaim_completed_tx_imm(q); 1943 1944 ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); 1945 if (__predict_false(ret)) { 1946 if (ret == 1) { 1947 TXQ_UNLOCK(qs); 1948 return (ENOSPC); 1949 } 1950 goto again; 1951 } 1952 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1953 1954 q->in_use++; 1955 if (++q->pidx >= q->size) { 1956 q->pidx = 0; 1957 q->gen ^= 1; 1958 } 1959 TXQ_UNLOCK(qs); 1960 wmb(); 1961 t3_write_reg(adap, A_SG_KDOORBELL, 1962 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 1963 return (0); 1964 } 1965 1966 1967 /** 1968 * restart_ctrlq - restart a suspended control queue 1969 * @qs: the queue set cotaining the control queue 1970 * 1971 * Resumes transmission on a suspended Tx control queue. 1972 */ 1973 static void 1974 restart_ctrlq(void *data, int npending) 1975 { 1976 struct mbuf *m; 1977 struct sge_qset *qs = (struct sge_qset *)data; 1978 struct sge_txq *q = &qs->txq[TXQ_CTRL]; 1979 adapter_t *adap = qs->port->adapter; 1980 1981 TXQ_LOCK(qs); 1982 again: reclaim_completed_tx_imm(q); 1983 1984 while (q->in_use < q->size && 1985 (m = mbufq_dequeue(&q->sendq)) != NULL) { 1986 1987 write_imm(&q->desc[q->pidx], m, m->m_len, q->gen); 1988 1989 if (++q->pidx >= q->size) { 1990 q->pidx = 0; 1991 q->gen ^= 1; 1992 } 1993 q->in_use++; 1994 } 1995 if (!mbufq_empty(&q->sendq)) { 1996 setbit(&qs->txq_stopped, TXQ_CTRL); 1997 1998 if (should_restart_tx(q) && 1999 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) 2000 goto again; 2001 q->stops++; 2002 } 2003 TXQ_UNLOCK(qs); 2004 t3_write_reg(adap, A_SG_KDOORBELL, 2005 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2006 } 2007 2008 2009 /* 2010 * Send a management message through control queue 0 2011 */ 2012 int 2013 t3_mgmt_tx(struct adapter *adap, struct mbuf *m) 2014 { 2015 return ctrl_xmit(adap, &adap->sge.qs[0], m); 2016 } 2017 2018 /** 2019 * free_qset - free the resources of an SGE queue set 2020 * @sc: the controller owning the queue set 2021 * @q: the queue set 2022 * 2023 * Release the HW and SW resources associated with an SGE queue set, such 2024 * as HW contexts, packet buffers, and descriptor rings. Traffic to the 2025 * queue set must be quiesced prior to calling this. 2026 */ 2027 static void 2028 t3_free_qset(adapter_t *sc, struct sge_qset *q) 2029 { 2030 int i; 2031 2032 reclaim_completed_tx(q, 0, TXQ_ETH); 2033 if (q->txq[TXQ_ETH].txq_mr != NULL) 2034 buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF); 2035 if (q->txq[TXQ_ETH].txq_ifq != NULL) { 2036 ifq_delete(q->txq[TXQ_ETH].txq_ifq); 2037 free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF); 2038 } 2039 2040 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2041 if (q->fl[i].desc) { 2042 mtx_lock_spin(&sc->sge.reg_lock); 2043 t3_sge_disable_fl(sc, q->fl[i].cntxt_id); 2044 mtx_unlock_spin(&sc->sge.reg_lock); 2045 bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); 2046 bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, 2047 q->fl[i].desc_map); 2048 bus_dma_tag_destroy(q->fl[i].desc_tag); 2049 bus_dma_tag_destroy(q->fl[i].entry_tag); 2050 } 2051 if (q->fl[i].sdesc) { 2052 free_rx_bufs(sc, &q->fl[i]); 2053 free(q->fl[i].sdesc, M_DEVBUF); 2054 } 2055 } 2056 2057 mtx_unlock(&q->lock); 2058 MTX_DESTROY(&q->lock); 2059 for (i = 0; i < SGE_TXQ_PER_SET; i++) { 2060 if (q->txq[i].desc) { 2061 mtx_lock_spin(&sc->sge.reg_lock); 2062 t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); 2063 mtx_unlock_spin(&sc->sge.reg_lock); 2064 bus_dmamap_unload(q->txq[i].desc_tag, 2065 q->txq[i].desc_map); 2066 bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, 2067 q->txq[i].desc_map); 2068 bus_dma_tag_destroy(q->txq[i].desc_tag); 2069 bus_dma_tag_destroy(q->txq[i].entry_tag); 2070 } 2071 if (q->txq[i].sdesc) { 2072 free(q->txq[i].sdesc, M_DEVBUF); 2073 } 2074 } 2075 2076 if (q->rspq.desc) { 2077 mtx_lock_spin(&sc->sge.reg_lock); 2078 t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); 2079 mtx_unlock_spin(&sc->sge.reg_lock); 2080 2081 bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); 2082 bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, 2083 q->rspq.desc_map); 2084 bus_dma_tag_destroy(q->rspq.desc_tag); 2085 MTX_DESTROY(&q->rspq.lock); 2086 } 2087 2088 #ifdef INET 2089 tcp_lro_free(&q->lro.ctrl); 2090 #endif 2091 2092 bzero(q, sizeof(*q)); 2093 } 2094 2095 /** 2096 * t3_free_sge_resources - free SGE resources 2097 * @sc: the adapter softc 2098 * 2099 * Frees resources used by the SGE queue sets. 2100 */ 2101 void 2102 t3_free_sge_resources(adapter_t *sc, int nqsets) 2103 { 2104 int i; 2105 2106 for (i = 0; i < nqsets; ++i) { 2107 TXQ_LOCK(&sc->sge.qs[i]); 2108 t3_free_qset(sc, &sc->sge.qs[i]); 2109 } 2110 } 2111 2112 /** 2113 * t3_sge_start - enable SGE 2114 * @sc: the controller softc 2115 * 2116 * Enables the SGE for DMAs. This is the last step in starting packet 2117 * transfers. 2118 */ 2119 void 2120 t3_sge_start(adapter_t *sc) 2121 { 2122 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); 2123 } 2124 2125 /** 2126 * t3_sge_stop - disable SGE operation 2127 * @sc: the adapter 2128 * 2129 * Disables the DMA engine. This can be called in emeregencies (e.g., 2130 * from error interrupts) or from normal process context. In the latter 2131 * case it also disables any pending queue restart tasklets. Note that 2132 * if it is called in interrupt context it cannot disable the restart 2133 * tasklets as it cannot wait, however the tasklets will have no effect 2134 * since the doorbells are disabled and the driver will call this again 2135 * later from process context, at which time the tasklets will be stopped 2136 * if they are still running. 2137 */ 2138 void 2139 t3_sge_stop(adapter_t *sc) 2140 { 2141 int i, nqsets; 2142 2143 t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); 2144 2145 if (sc->tq == NULL) 2146 return; 2147 2148 for (nqsets = i = 0; i < (sc)->params.nports; i++) 2149 nqsets += sc->port[i].nqsets; 2150 #ifdef notyet 2151 /* 2152 * 2153 * XXX 2154 */ 2155 for (i = 0; i < nqsets; ++i) { 2156 struct sge_qset *qs = &sc->sge.qs[i]; 2157 2158 taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2159 taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2160 } 2161 #endif 2162 } 2163 2164 /** 2165 * t3_free_tx_desc - reclaims Tx descriptors and their buffers 2166 * @adapter: the adapter 2167 * @q: the Tx queue to reclaim descriptors from 2168 * @reclaimable: the number of descriptors to reclaim 2169 * @m_vec_size: maximum number of buffers to reclaim 2170 * @desc_reclaimed: returns the number of descriptors reclaimed 2171 * 2172 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated 2173 * Tx buffers. Called with the Tx queue lock held. 2174 * 2175 * Returns number of buffers of reclaimed 2176 */ 2177 void 2178 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue) 2179 { 2180 struct tx_sw_desc *txsd; 2181 unsigned int cidx, mask; 2182 struct sge_txq *q = &qs->txq[queue]; 2183 2184 #ifdef T3_TRACE 2185 T3_TRACE2(sc->tb[q->cntxt_id & 7], 2186 "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); 2187 #endif 2188 cidx = q->cidx; 2189 mask = q->size - 1; 2190 txsd = &q->sdesc[cidx]; 2191 2192 mtx_assert(&qs->lock, MA_OWNED); 2193 while (reclaimable--) { 2194 prefetch(q->sdesc[(cidx + 1) & mask].m); 2195 prefetch(q->sdesc[(cidx + 2) & mask].m); 2196 2197 if (txsd->m != NULL) { 2198 if (txsd->flags & TX_SW_DESC_MAPPED) { 2199 bus_dmamap_unload(q->entry_tag, txsd->map); 2200 txsd->flags &= ~TX_SW_DESC_MAPPED; 2201 } 2202 m_freem_list(txsd->m); 2203 txsd->m = NULL; 2204 } else 2205 q->txq_skipped++; 2206 2207 ++txsd; 2208 if (++cidx == q->size) { 2209 cidx = 0; 2210 txsd = q->sdesc; 2211 } 2212 } 2213 q->cidx = cidx; 2214 2215 } 2216 2217 /** 2218 * is_new_response - check if a response is newly written 2219 * @r: the response descriptor 2220 * @q: the response queue 2221 * 2222 * Returns true if a response descriptor contains a yet unprocessed 2223 * response. 2224 */ 2225 static __inline int 2226 is_new_response(const struct rsp_desc *r, 2227 const struct sge_rspq *q) 2228 { 2229 return (r->intr_gen & F_RSPD_GEN2) == q->gen; 2230 } 2231 2232 #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) 2233 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ 2234 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ 2235 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ 2236 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) 2237 2238 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ 2239 #define NOMEM_INTR_DELAY 2500 2240 2241 /** 2242 * write_ofld_wr - write an offload work request 2243 * @adap: the adapter 2244 * @m: the packet to send 2245 * @q: the Tx queue 2246 * @pidx: index of the first Tx descriptor to write 2247 * @gen: the generation value to use 2248 * @ndesc: number of descriptors the packet will occupy 2249 * 2250 * Write an offload work request to send the supplied packet. The packet 2251 * data already carry the work request with most fields populated. 2252 */ 2253 static void 2254 write_ofld_wr(adapter_t *adap, struct mbuf *m, 2255 struct sge_txq *q, unsigned int pidx, 2256 unsigned int gen, unsigned int ndesc, 2257 bus_dma_segment_t *segs, unsigned int nsegs) 2258 { 2259 unsigned int sgl_flits, flits; 2260 struct work_request_hdr *from; 2261 struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1]; 2262 struct tx_desc *d = &q->desc[pidx]; 2263 struct txq_state txqs; 2264 2265 if (immediate(m) && nsegs == 0) { 2266 write_imm(d, m, m->m_len, gen); 2267 return; 2268 } 2269 2270 /* Only TX_DATA builds SGLs */ 2271 from = mtod(m, struct work_request_hdr *); 2272 memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from)); 2273 2274 flits = m->m_len / 8; 2275 sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl; 2276 2277 make_sgl(sgp, segs, nsegs); 2278 sgl_flits = sgl_len(nsegs); 2279 2280 txqs.gen = gen; 2281 txqs.pidx = pidx; 2282 txqs.compl = 0; 2283 2284 write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits, 2285 from->wrh_hi, from->wrh_lo); 2286 } 2287 2288 /** 2289 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet 2290 * @m: the packet 2291 * 2292 * Returns the number of Tx descriptors needed for the given offload 2293 * packet. These packets are already fully constructed. 2294 */ 2295 static __inline unsigned int 2296 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs) 2297 { 2298 unsigned int flits, cnt = 0; 2299 int ndescs; 2300 2301 if (m->m_len <= WR_LEN && nsegs == 0) 2302 return (1); /* packet fits as immediate data */ 2303 2304 /* 2305 * This needs to be re-visited for TOE 2306 */ 2307 2308 cnt = nsegs; 2309 2310 /* headers */ 2311 flits = m->m_len / 8; 2312 2313 ndescs = flits_to_desc(flits + sgl_len(cnt)); 2314 2315 return (ndescs); 2316 } 2317 2318 /** 2319 * ofld_xmit - send a packet through an offload queue 2320 * @adap: the adapter 2321 * @q: the Tx offload queue 2322 * @m: the packet 2323 * 2324 * Send an offload packet through an SGE offload queue. 2325 */ 2326 static int 2327 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) 2328 { 2329 int ret, nsegs; 2330 unsigned int ndesc; 2331 unsigned int pidx, gen; 2332 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2333 bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs; 2334 struct tx_sw_desc *stx; 2335 2336 nsegs = m_get_sgllen(m); 2337 vsegs = m_get_sgl(m); 2338 ndesc = calc_tx_descs_ofld(m, nsegs); 2339 busdma_map_sgl(vsegs, segs, nsegs); 2340 2341 stx = &q->sdesc[q->pidx]; 2342 2343 TXQ_LOCK(qs); 2344 again: reclaim_completed_tx(qs, 16, TXQ_OFLD); 2345 ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); 2346 if (__predict_false(ret)) { 2347 if (ret == 1) { 2348 printf("no ofld desc avail\n"); 2349 2350 m_set_priority(m, ndesc); /* save for restart */ 2351 TXQ_UNLOCK(qs); 2352 return (EINTR); 2353 } 2354 goto again; 2355 } 2356 2357 gen = q->gen; 2358 q->in_use += ndesc; 2359 pidx = q->pidx; 2360 q->pidx += ndesc; 2361 if (q->pidx >= q->size) { 2362 q->pidx -= q->size; 2363 q->gen ^= 1; 2364 } 2365 #ifdef T3_TRACE 2366 T3_TRACE5(adap->tb[q->cntxt_id & 7], 2367 "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u", 2368 ndesc, pidx, skb->len, skb->len - skb->data_len, 2369 skb_shinfo(skb)->nr_frags); 2370 #endif 2371 TXQ_UNLOCK(qs); 2372 2373 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2374 check_ring_tx_db(adap, q, 1); 2375 return (0); 2376 } 2377 2378 /** 2379 * restart_offloadq - restart a suspended offload queue 2380 * @qs: the queue set cotaining the offload queue 2381 * 2382 * Resumes transmission on a suspended Tx offload queue. 2383 */ 2384 static void 2385 restart_offloadq(void *data, int npending) 2386 { 2387 struct mbuf *m; 2388 struct sge_qset *qs = data; 2389 struct sge_txq *q = &qs->txq[TXQ_OFLD]; 2390 adapter_t *adap = qs->port->adapter; 2391 bus_dma_segment_t segs[TX_MAX_SEGS]; 2392 struct tx_sw_desc *stx = &q->sdesc[q->pidx]; 2393 int nsegs, cleaned; 2394 2395 TXQ_LOCK(qs); 2396 again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD); 2397 2398 while ((m = mbufq_peek(&q->sendq)) != NULL) { 2399 unsigned int gen, pidx; 2400 unsigned int ndesc = m_get_priority(m); 2401 2402 if (__predict_false(q->size - q->in_use < ndesc)) { 2403 setbit(&qs->txq_stopped, TXQ_OFLD); 2404 if (should_restart_tx(q) && 2405 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) 2406 goto again; 2407 q->stops++; 2408 break; 2409 } 2410 2411 gen = q->gen; 2412 q->in_use += ndesc; 2413 pidx = q->pidx; 2414 q->pidx += ndesc; 2415 if (q->pidx >= q->size) { 2416 q->pidx -= q->size; 2417 q->gen ^= 1; 2418 } 2419 2420 (void)mbufq_dequeue(&q->sendq); 2421 busdma_map_mbufs(&m, q, stx, segs, &nsegs); 2422 TXQ_UNLOCK(qs); 2423 write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs); 2424 TXQ_LOCK(qs); 2425 } 2426 #if USE_GTS 2427 set_bit(TXQ_RUNNING, &q->flags); 2428 set_bit(TXQ_LAST_PKT_DB, &q->flags); 2429 #endif 2430 TXQ_UNLOCK(qs); 2431 wmb(); 2432 t3_write_reg(adap, A_SG_KDOORBELL, 2433 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); 2434 } 2435 2436 /** 2437 * queue_set - return the queue set a packet should use 2438 * @m: the packet 2439 * 2440 * Maps a packet to the SGE queue set it should use. The desired queue 2441 * set is carried in bits 1-3 in the packet's priority. 2442 */ 2443 static __inline int 2444 queue_set(const struct mbuf *m) 2445 { 2446 return m_get_priority(m) >> 1; 2447 } 2448 2449 /** 2450 * is_ctrl_pkt - return whether an offload packet is a control packet 2451 * @m: the packet 2452 * 2453 * Determines whether an offload packet should use an OFLD or a CTRL 2454 * Tx queue. This is indicated by bit 0 in the packet's priority. 2455 */ 2456 static __inline int 2457 is_ctrl_pkt(const struct mbuf *m) 2458 { 2459 return m_get_priority(m) & 1; 2460 } 2461 2462 /** 2463 * t3_offload_tx - send an offload packet 2464 * @tdev: the offload device to send to 2465 * @m: the packet 2466 * 2467 * Sends an offload packet. We use the packet priority to select the 2468 * appropriate Tx queue as follows: bit 0 indicates whether the packet 2469 * should be sent as regular or control, bits 1-3 select the queue set. 2470 */ 2471 int 2472 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m) 2473 { 2474 adapter_t *adap = tdev2adap(tdev); 2475 struct sge_qset *qs = &adap->sge.qs[queue_set(m)]; 2476 2477 if (__predict_false(is_ctrl_pkt(m))) 2478 return ctrl_xmit(adap, qs, m); 2479 2480 return ofld_xmit(adap, qs, m); 2481 } 2482 2483 /** 2484 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts 2485 * @tdev: the offload device that will be receiving the packets 2486 * @q: the SGE response queue that assembled the bundle 2487 * @m: the partial bundle 2488 * @n: the number of packets in the bundle 2489 * 2490 * Delivers a (partial) bundle of Rx offload packets to an offload device. 2491 */ 2492 static __inline void 2493 deliver_partial_bundle(struct t3cdev *tdev, 2494 struct sge_rspq *q, 2495 struct mbuf *mbufs[], int n) 2496 { 2497 if (n) { 2498 q->offload_bundles++; 2499 cxgb_ofld_recv(tdev, mbufs, n); 2500 } 2501 } 2502 2503 static __inline int 2504 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq, 2505 struct mbuf *m, struct mbuf *rx_gather[], 2506 unsigned int gather_idx) 2507 { 2508 2509 rq->offload_pkts++; 2510 m->m_pkthdr.header = mtod(m, void *); 2511 rx_gather[gather_idx++] = m; 2512 if (gather_idx == RX_BUNDLE_SIZE) { 2513 cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE); 2514 gather_idx = 0; 2515 rq->offload_bundles++; 2516 } 2517 return (gather_idx); 2518 } 2519 2520 static void 2521 restart_tx(struct sge_qset *qs) 2522 { 2523 struct adapter *sc = qs->port->adapter; 2524 2525 2526 if (isset(&qs->txq_stopped, TXQ_OFLD) && 2527 should_restart_tx(&qs->txq[TXQ_OFLD]) && 2528 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { 2529 qs->txq[TXQ_OFLD].restarts++; 2530 DPRINTF("restarting TXQ_OFLD\n"); 2531 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); 2532 } 2533 DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n", 2534 qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]), 2535 qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned, 2536 qs->txq[TXQ_CTRL].in_use); 2537 2538 if (isset(&qs->txq_stopped, TXQ_CTRL) && 2539 should_restart_tx(&qs->txq[TXQ_CTRL]) && 2540 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { 2541 qs->txq[TXQ_CTRL].restarts++; 2542 DPRINTF("restarting TXQ_CTRL\n"); 2543 taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); 2544 } 2545 } 2546 2547 /** 2548 * t3_sge_alloc_qset - initialize an SGE queue set 2549 * @sc: the controller softc 2550 * @id: the queue set id 2551 * @nports: how many Ethernet ports will be using this queue set 2552 * @irq_vec_idx: the IRQ vector index for response queue interrupts 2553 * @p: configuration parameters for this queue set 2554 * @ntxq: number of Tx queues for the queue set 2555 * @pi: port info for queue set 2556 * 2557 * Allocate resources and initialize an SGE queue set. A queue set 2558 * comprises a response queue, two Rx free-buffer queues, and up to 3 2559 * Tx queues. The Tx queues are assigned roles in the order Ethernet 2560 * queue, offload queue, and control queue. 2561 */ 2562 int 2563 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, 2564 const struct qset_params *p, int ntxq, struct port_info *pi) 2565 { 2566 struct sge_qset *q = &sc->sge.qs[id]; 2567 int i, ret = 0; 2568 2569 MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF); 2570 q->port = pi; 2571 2572 if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, 2573 M_DEVBUF, M_WAITOK, &q->lock)) == NULL) { 2574 device_printf(sc->dev, "failed to allocate mbuf ring\n"); 2575 goto err; 2576 } 2577 if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF, 2578 M_NOWAIT | M_ZERO)) == NULL) { 2579 device_printf(sc->dev, "failed to allocate ifq\n"); 2580 goto err; 2581 } 2582 ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp); 2583 callout_init(&q->txq[TXQ_ETH].txq_timer, 1); 2584 callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1); 2585 q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus; 2586 q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus; 2587 2588 init_qset_cntxt(q, id); 2589 q->idx = id; 2590 if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), 2591 sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, 2592 &q->fl[0].desc, &q->fl[0].sdesc, 2593 &q->fl[0].desc_tag, &q->fl[0].desc_map, 2594 sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { 2595 printf("error %d from alloc ring fl0\n", ret); 2596 goto err; 2597 } 2598 2599 if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), 2600 sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, 2601 &q->fl[1].desc, &q->fl[1].sdesc, 2602 &q->fl[1].desc_tag, &q->fl[1].desc_map, 2603 sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { 2604 printf("error %d from alloc ring fl1\n", ret); 2605 goto err; 2606 } 2607 2608 if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, 2609 &q->rspq.phys_addr, &q->rspq.desc, NULL, 2610 &q->rspq.desc_tag, &q->rspq.desc_map, 2611 NULL, NULL)) != 0) { 2612 printf("error %d from alloc ring rspq\n", ret); 2613 goto err; 2614 } 2615 2616 for (i = 0; i < ntxq; ++i) { 2617 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); 2618 2619 if ((ret = alloc_ring(sc, p->txq_size[i], 2620 sizeof(struct tx_desc), sz, 2621 &q->txq[i].phys_addr, &q->txq[i].desc, 2622 &q->txq[i].sdesc, &q->txq[i].desc_tag, 2623 &q->txq[i].desc_map, 2624 sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { 2625 printf("error %d from alloc ring tx %i\n", ret, i); 2626 goto err; 2627 } 2628 mbufq_init(&q->txq[i].sendq); 2629 q->txq[i].gen = 1; 2630 q->txq[i].size = p->txq_size[i]; 2631 } 2632 2633 TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); 2634 TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); 2635 TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2636 TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q); 2637 2638 q->fl[0].gen = q->fl[1].gen = 1; 2639 q->fl[0].size = p->fl_size; 2640 q->fl[1].size = p->jumbo_size; 2641 2642 q->rspq.gen = 1; 2643 q->rspq.cidx = 0; 2644 q->rspq.size = p->rspq_size; 2645 2646 q->txq[TXQ_ETH].stop_thres = nports * 2647 flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); 2648 2649 q->fl[0].buf_size = MCLBYTES; 2650 q->fl[0].zone = zone_pack; 2651 q->fl[0].type = EXT_PACKET; 2652 2653 if (p->jumbo_buf_size == MJUM16BYTES) { 2654 q->fl[1].zone = zone_jumbo16; 2655 q->fl[1].type = EXT_JUMBO16; 2656 } else if (p->jumbo_buf_size == MJUM9BYTES) { 2657 q->fl[1].zone = zone_jumbo9; 2658 q->fl[1].type = EXT_JUMBO9; 2659 } else if (p->jumbo_buf_size == MJUMPAGESIZE) { 2660 q->fl[1].zone = zone_jumbop; 2661 q->fl[1].type = EXT_JUMBOP; 2662 } else { 2663 KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size)); 2664 ret = EDOOFUS; 2665 goto err; 2666 } 2667 q->fl[1].buf_size = p->jumbo_buf_size; 2668 2669 /* Allocate and setup the lro_ctrl structure */ 2670 q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); 2671 #ifdef INET 2672 ret = tcp_lro_init(&q->lro.ctrl); 2673 if (ret) { 2674 printf("error %d from tcp_lro_init\n", ret); 2675 goto err; 2676 } 2677 #endif 2678 q->lro.ctrl.ifp = pi->ifp; 2679 2680 mtx_lock_spin(&sc->sge.reg_lock); 2681 ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, 2682 q->rspq.phys_addr, q->rspq.size, 2683 q->fl[0].buf_size, 1, 0); 2684 if (ret) { 2685 printf("error %d from t3_sge_init_rspcntxt\n", ret); 2686 goto err_unlock; 2687 } 2688 2689 for (i = 0; i < SGE_RXQ_PER_SET; ++i) { 2690 ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, 2691 q->fl[i].phys_addr, q->fl[i].size, 2692 q->fl[i].buf_size, p->cong_thres, 1, 2693 0); 2694 if (ret) { 2695 printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); 2696 goto err_unlock; 2697 } 2698 } 2699 2700 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, 2701 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, 2702 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 2703 1, 0); 2704 if (ret) { 2705 printf("error %d from t3_sge_init_ecntxt\n", ret); 2706 goto err_unlock; 2707 } 2708 2709 if (ntxq > 1) { 2710 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, 2711 USE_GTS, SGE_CNTXT_OFLD, id, 2712 q->txq[TXQ_OFLD].phys_addr, 2713 q->txq[TXQ_OFLD].size, 0, 1, 0); 2714 if (ret) { 2715 printf("error %d from t3_sge_init_ecntxt\n", ret); 2716 goto err_unlock; 2717 } 2718 } 2719 2720 if (ntxq > 2) { 2721 ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, 2722 SGE_CNTXT_CTRL, id, 2723 q->txq[TXQ_CTRL].phys_addr, 2724 q->txq[TXQ_CTRL].size, 2725 q->txq[TXQ_CTRL].token, 1, 0); 2726 if (ret) { 2727 printf("error %d from t3_sge_init_ecntxt\n", ret); 2728 goto err_unlock; 2729 } 2730 } 2731 2732 snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", 2733 device_get_unit(sc->dev), irq_vec_idx); 2734 MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); 2735 2736 mtx_unlock_spin(&sc->sge.reg_lock); 2737 t3_update_qset_coalesce(q, p); 2738 q->port = pi; 2739 2740 refill_fl(sc, &q->fl[0], q->fl[0].size); 2741 refill_fl(sc, &q->fl[1], q->fl[1].size); 2742 refill_rspq(sc, &q->rspq, q->rspq.size - 1); 2743 2744 t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | 2745 V_NEWTIMER(q->rspq.holdoff_tmr)); 2746 2747 return (0); 2748 2749 err_unlock: 2750 mtx_unlock_spin(&sc->sge.reg_lock); 2751 err: 2752 TXQ_LOCK(q); 2753 t3_free_qset(sc, q); 2754 2755 return (ret); 2756 } 2757 2758 /* 2759 * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with 2760 * ethernet data. Hardware assistance with various checksums and any vlan tag 2761 * will also be taken into account here. 2762 */ 2763 void 2764 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad) 2765 { 2766 struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); 2767 struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; 2768 struct ifnet *ifp = pi->ifp; 2769 2770 DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff); 2771 2772 if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment && 2773 cpl->csum_valid && cpl->csum == 0xffff) { 2774 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID); 2775 rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++; 2776 m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 2777 m->m_pkthdr.csum_data = 0xffff; 2778 } 2779 2780 if (cpl->vlan_valid) { 2781 m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); 2782 m->m_flags |= M_VLANTAG; 2783 } 2784 2785 m->m_pkthdr.rcvif = ifp; 2786 m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; 2787 /* 2788 * adjust after conversion to mbuf chain 2789 */ 2790 m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); 2791 m->m_len -= (sizeof(*cpl) + ethpad); 2792 m->m_data += (sizeof(*cpl) + ethpad); 2793 } 2794 2795 /** 2796 * get_packet - return the next ingress packet buffer from a free list 2797 * @adap: the adapter that received the packet 2798 * @drop_thres: # of remaining buffers before we start dropping packets 2799 * @qs: the qset that the SGE free list holding the packet belongs to 2800 * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain 2801 * @r: response descriptor 2802 * 2803 * Get the next packet from a free list and complete setup of the 2804 * sk_buff. If the packet is small we make a copy and recycle the 2805 * original buffer, otherwise we use the original buffer itself. If a 2806 * positive drop threshold is supplied packets are dropped and their 2807 * buffers recycled if (a) the number of remaining buffers is under the 2808 * threshold and the packet is too big to copy, or (b) the packet should 2809 * be copied but there is no memory for the copy. 2810 */ 2811 static int 2812 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, 2813 struct t3_mbuf_hdr *mh, struct rsp_desc *r) 2814 { 2815 2816 unsigned int len_cq = ntohl(r->len_cq); 2817 struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; 2818 int mask, cidx = fl->cidx; 2819 struct rx_sw_desc *sd = &fl->sdesc[cidx]; 2820 uint32_t len = G_RSPD_LEN(len_cq); 2821 uint32_t flags = M_EXT; 2822 uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags)); 2823 caddr_t cl; 2824 struct mbuf *m; 2825 int ret = 0; 2826 2827 mask = fl->size - 1; 2828 prefetch(fl->sdesc[(cidx + 1) & mask].m); 2829 prefetch(fl->sdesc[(cidx + 2) & mask].m); 2830 prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); 2831 prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); 2832 2833 fl->credits--; 2834 bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); 2835 2836 if (recycle_enable && len <= SGE_RX_COPY_THRES && 2837 sopeop == RSPQ_SOP_EOP) { 2838 if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) 2839 goto skip_recycle; 2840 cl = mtod(m, void *); 2841 memcpy(cl, sd->rxsd_cl, len); 2842 recycle_rx_buf(adap, fl, fl->cidx); 2843 m->m_pkthdr.len = m->m_len = len; 2844 m->m_flags = 0; 2845 mh->mh_head = mh->mh_tail = m; 2846 ret = 1; 2847 goto done; 2848 } else { 2849 skip_recycle: 2850 bus_dmamap_unload(fl->entry_tag, sd->map); 2851 cl = sd->rxsd_cl; 2852 m = sd->m; 2853 2854 if ((sopeop == RSPQ_SOP_EOP) || 2855 (sopeop == RSPQ_SOP)) 2856 flags |= M_PKTHDR; 2857 m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags); 2858 if (fl->zone == zone_pack) { 2859 /* 2860 * restore clobbered data pointer 2861 */ 2862 m->m_data = m->m_ext.ext_buf; 2863 } else { 2864 m_cljset(m, cl, fl->type); 2865 } 2866 m->m_len = len; 2867 } 2868 switch(sopeop) { 2869 case RSPQ_SOP_EOP: 2870 ret = 1; 2871 /* FALLTHROUGH */ 2872 case RSPQ_SOP: 2873 mh->mh_head = mh->mh_tail = m; 2874 m->m_pkthdr.len = len; 2875 break; 2876 case RSPQ_EOP: 2877 ret = 1; 2878 /* FALLTHROUGH */ 2879 case RSPQ_NSOP_NEOP: 2880 if (mh->mh_tail == NULL) { 2881 log(LOG_ERR, "discarding intermediate descriptor entry\n"); 2882 m_freem(m); 2883 break; 2884 } 2885 mh->mh_tail->m_next = m; 2886 mh->mh_tail = m; 2887 mh->mh_head->m_pkthdr.len += len; 2888 break; 2889 } 2890 if (cxgb_debug) 2891 printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len); 2892 done: 2893 if (++fl->cidx == fl->size) 2894 fl->cidx = 0; 2895 2896 return (ret); 2897 } 2898 2899 /** 2900 * handle_rsp_cntrl_info - handles control information in a response 2901 * @qs: the queue set corresponding to the response 2902 * @flags: the response control flags 2903 * 2904 * Handles the control information of an SGE response, such as GTS 2905 * indications and completion credits for the queue set's Tx queues. 2906 * HW coalesces credits, we don't do any extra SW coalescing. 2907 */ 2908 static __inline void 2909 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) 2910 { 2911 unsigned int credits; 2912 2913 #if USE_GTS 2914 if (flags & F_RSPD_TXQ0_GTS) 2915 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); 2916 #endif 2917 credits = G_RSPD_TXQ0_CR(flags); 2918 if (credits) 2919 qs->txq[TXQ_ETH].processed += credits; 2920 2921 credits = G_RSPD_TXQ2_CR(flags); 2922 if (credits) 2923 qs->txq[TXQ_CTRL].processed += credits; 2924 2925 # if USE_GTS 2926 if (flags & F_RSPD_TXQ1_GTS) 2927 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); 2928 # endif 2929 credits = G_RSPD_TXQ1_CR(flags); 2930 if (credits) 2931 qs->txq[TXQ_OFLD].processed += credits; 2932 2933 } 2934 2935 static void 2936 check_ring_db(adapter_t *adap, struct sge_qset *qs, 2937 unsigned int sleeping) 2938 { 2939 ; 2940 } 2941 2942 /** 2943 * process_responses - process responses from an SGE response queue 2944 * @adap: the adapter 2945 * @qs: the queue set to which the response queue belongs 2946 * @budget: how many responses can be processed in this round 2947 * 2948 * Process responses from an SGE response queue up to the supplied budget. 2949 * Responses include received packets as well as credits and other events 2950 * for the queues that belong to the response queue's queue set. 2951 * A negative budget is effectively unlimited. 2952 * 2953 * Additionally choose the interrupt holdoff time for the next interrupt 2954 * on this queue. If the system is under memory shortage use a fairly 2955 * long delay to help recovery. 2956 */ 2957 static int 2958 process_responses(adapter_t *adap, struct sge_qset *qs, int budget) 2959 { 2960 struct sge_rspq *rspq = &qs->rspq; 2961 struct rsp_desc *r = &rspq->desc[rspq->cidx]; 2962 int budget_left = budget; 2963 unsigned int sleeping = 0; 2964 int lro_enabled = qs->lro.enabled; 2965 int skip_lro; 2966 struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; 2967 struct mbuf *offload_mbufs[RX_BUNDLE_SIZE]; 2968 int ngathered = 0; 2969 struct t3_mbuf_hdr *mh = &rspq->rspq_mh; 2970 #ifdef DEBUG 2971 static int last_holdoff = 0; 2972 if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { 2973 printf("next_holdoff=%d\n", rspq->holdoff_tmr); 2974 last_holdoff = rspq->holdoff_tmr; 2975 } 2976 #endif 2977 rspq->next_holdoff = rspq->holdoff_tmr; 2978 2979 while (__predict_true(budget_left && is_new_response(r, rspq))) { 2980 int eth, eop = 0, ethpad = 0; 2981 uint32_t flags = ntohl(r->flags); 2982 uint32_t rss_csum = *(const uint32_t *)r; 2983 uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); 2984 2985 eth = (r->rss_hdr.opcode == CPL_RX_PKT); 2986 2987 if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { 2988 struct mbuf *m; 2989 2990 if (cxgb_debug) 2991 printf("async notification\n"); 2992 2993 if (mh->mh_head == NULL) { 2994 mh->mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 2995 m = mh->mh_head; 2996 } else { 2997 m = m_gethdr(M_DONTWAIT, MT_DATA); 2998 } 2999 if (m == NULL) 3000 goto no_mem; 3001 3002 memcpy(mtod(m, char *), r, AN_PKT_SIZE); 3003 m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; 3004 *mtod(m, char *) = CPL_ASYNC_NOTIF; 3005 rss_csum = htonl(CPL_ASYNC_NOTIF << 24); 3006 eop = 1; 3007 rspq->async_notif++; 3008 goto skip; 3009 } else if (flags & F_RSPD_IMM_DATA_VALID) { 3010 struct mbuf *m = NULL; 3011 3012 DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n", 3013 r->rss_hdr.opcode, rspq->cidx); 3014 if (mh->mh_head == NULL) 3015 mh->mh_head = m_gethdr(M_DONTWAIT, MT_DATA); 3016 else 3017 m = m_gethdr(M_DONTWAIT, MT_DATA); 3018 3019 if (mh->mh_head == NULL && m == NULL) { 3020 no_mem: 3021 rspq->next_holdoff = NOMEM_INTR_DELAY; 3022 budget_left--; 3023 break; 3024 } 3025 get_imm_packet(adap, r, mh->mh_head); 3026 eop = 1; 3027 rspq->imm_data++; 3028 } else if (r->len_cq) { 3029 int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; 3030 3031 eop = get_packet(adap, drop_thresh, qs, mh, r); 3032 if (eop) { 3033 if (r->rss_hdr.hash_type && !adap->timestamp) 3034 mh->mh_head->m_flags |= M_FLOWID; 3035 mh->mh_head->m_pkthdr.flowid = rss_hash; 3036 } 3037 3038 ethpad = 2; 3039 } else { 3040 rspq->pure_rsps++; 3041 } 3042 skip: 3043 if (flags & RSPD_CTRL_MASK) { 3044 sleeping |= flags & RSPD_GTS_MASK; 3045 handle_rsp_cntrl_info(qs, flags); 3046 } 3047 3048 r++; 3049 if (__predict_false(++rspq->cidx == rspq->size)) { 3050 rspq->cidx = 0; 3051 rspq->gen ^= 1; 3052 r = rspq->desc; 3053 } 3054 3055 if (++rspq->credits >= 64) { 3056 refill_rspq(adap, rspq, rspq->credits); 3057 rspq->credits = 0; 3058 } 3059 if (!eth && eop) { 3060 mh->mh_head->m_pkthdr.csum_data = rss_csum; 3061 /* 3062 * XXX size mismatch 3063 */ 3064 m_set_priority(mh->mh_head, rss_hash); 3065 3066 3067 ngathered = rx_offload(&adap->tdev, rspq, 3068 mh->mh_head, offload_mbufs, ngathered); 3069 mh->mh_head = NULL; 3070 DPRINTF("received offload packet\n"); 3071 3072 } else if (eth && eop) { 3073 struct mbuf *m = mh->mh_head; 3074 3075 t3_rx_eth(adap, rspq, m, ethpad); 3076 3077 /* 3078 * The T304 sends incoming packets on any qset. If LRO 3079 * is also enabled, we could end up sending packet up 3080 * lro_ctrl->ifp's input. That is incorrect. 3081 * 3082 * The mbuf's rcvif was derived from the cpl header and 3083 * is accurate. Skip LRO and just use that. 3084 */ 3085 skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); 3086 3087 if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro 3088 #ifdef INET 3089 && (tcp_lro_rx(lro_ctrl, m, 0) == 0) 3090 #endif 3091 ) { 3092 /* successfully queue'd for LRO */ 3093 } else { 3094 /* 3095 * LRO not enabled, packet unsuitable for LRO, 3096 * or unable to queue. Pass it up right now in 3097 * either case. 3098 */ 3099 struct ifnet *ifp = m->m_pkthdr.rcvif; 3100 (*ifp->if_input)(ifp, m); 3101 } 3102 mh->mh_head = NULL; 3103 3104 } 3105 __refill_fl_lt(adap, &qs->fl[0], 32); 3106 __refill_fl_lt(adap, &qs->fl[1], 32); 3107 --budget_left; 3108 } 3109 3110 deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered); 3111 3112 #ifdef INET 3113 /* Flush LRO */ 3114 while (!SLIST_EMPTY(&lro_ctrl->lro_active)) { 3115 struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active); 3116 SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next); 3117 tcp_lro_flush(lro_ctrl, queued); 3118 } 3119 #endif 3120 3121 if (sleeping) 3122 check_ring_db(adap, qs, sleeping); 3123 3124 mb(); /* commit Tx queue processed updates */ 3125 if (__predict_false(qs->txq_stopped > 1)) 3126 restart_tx(qs); 3127 3128 __refill_fl_lt(adap, &qs->fl[0], 512); 3129 __refill_fl_lt(adap, &qs->fl[1], 512); 3130 budget -= budget_left; 3131 return (budget); 3132 } 3133 3134 /* 3135 * A helper function that processes responses and issues GTS. 3136 */ 3137 static __inline int 3138 process_responses_gts(adapter_t *adap, struct sge_rspq *rq) 3139 { 3140 int work; 3141 static int last_holdoff = 0; 3142 3143 work = process_responses(adap, rspq_to_qset(rq), -1); 3144 3145 if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { 3146 printf("next_holdoff=%d\n", rq->next_holdoff); 3147 last_holdoff = rq->next_holdoff; 3148 } 3149 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | 3150 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); 3151 3152 return (work); 3153 } 3154 3155 3156 /* 3157 * Interrupt handler for legacy INTx interrupts for T3B-based cards. 3158 * Handles data events from SGE response queues as well as error and other 3159 * async events as they all use the same interrupt pin. We use one SGE 3160 * response queue per port in this mode and protect all response queues with 3161 * queue 0's lock. 3162 */ 3163 void 3164 t3b_intr(void *data) 3165 { 3166 uint32_t i, map; 3167 adapter_t *adap = data; 3168 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3169 3170 t3_write_reg(adap, A_PL_CLI, 0); 3171 map = t3_read_reg(adap, A_SG_DATA_INTR); 3172 3173 if (!map) 3174 return; 3175 3176 if (__predict_false(map & F_ERRINTR)) { 3177 t3_write_reg(adap, A_PL_INT_ENABLE0, 0); 3178 (void) t3_read_reg(adap, A_PL_INT_ENABLE0); 3179 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3180 } 3181 3182 mtx_lock(&q0->lock); 3183 for_each_port(adap, i) 3184 if (map & (1 << i)) 3185 process_responses_gts(adap, &adap->sge.qs[i].rspq); 3186 mtx_unlock(&q0->lock); 3187 } 3188 3189 /* 3190 * The MSI interrupt handler. This needs to handle data events from SGE 3191 * response queues as well as error and other async events as they all use 3192 * the same MSI vector. We use one SGE response queue per port in this mode 3193 * and protect all response queues with queue 0's lock. 3194 */ 3195 void 3196 t3_intr_msi(void *data) 3197 { 3198 adapter_t *adap = data; 3199 struct sge_rspq *q0 = &adap->sge.qs[0].rspq; 3200 int i, new_packets = 0; 3201 3202 mtx_lock(&q0->lock); 3203 3204 for_each_port(adap, i) 3205 if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) 3206 new_packets = 1; 3207 mtx_unlock(&q0->lock); 3208 if (new_packets == 0) { 3209 t3_write_reg(adap, A_PL_INT_ENABLE0, 0); 3210 (void) t3_read_reg(adap, A_PL_INT_ENABLE0); 3211 taskqueue_enqueue(adap->tq, &adap->slow_intr_task); 3212 } 3213 } 3214 3215 void 3216 t3_intr_msix(void *data) 3217 { 3218 struct sge_qset *qs = data; 3219 adapter_t *adap = qs->port->adapter; 3220 struct sge_rspq *rspq = &qs->rspq; 3221 3222 if (process_responses_gts(adap, rspq) == 0) 3223 rspq->unhandled_irqs++; 3224 } 3225 3226 #define QDUMP_SBUF_SIZE 32 * 400 3227 static int 3228 t3_dump_rspq(SYSCTL_HANDLER_ARGS) 3229 { 3230 struct sge_rspq *rspq; 3231 struct sge_qset *qs; 3232 int i, err, dump_end, idx; 3233 struct sbuf *sb; 3234 struct rsp_desc *rspd; 3235 uint32_t data[4]; 3236 3237 rspq = arg1; 3238 qs = rspq_to_qset(rspq); 3239 if (rspq->rspq_dump_count == 0) 3240 return (0); 3241 if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { 3242 log(LOG_WARNING, 3243 "dump count is too large %d\n", rspq->rspq_dump_count); 3244 rspq->rspq_dump_count = 0; 3245 return (EINVAL); 3246 } 3247 if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { 3248 log(LOG_WARNING, 3249 "dump start of %d is greater than queue size\n", 3250 rspq->rspq_dump_start); 3251 rspq->rspq_dump_start = 0; 3252 return (EINVAL); 3253 } 3254 err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); 3255 if (err) 3256 return (err); 3257 err = sysctl_wire_old_buffer(req, 0); 3258 if (err) 3259 return (err); 3260 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3261 3262 sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", 3263 (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), 3264 ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); 3265 sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", 3266 ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); 3267 3268 sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, 3269 (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); 3270 3271 dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; 3272 for (i = rspq->rspq_dump_start; i < dump_end; i++) { 3273 idx = i & (RSPQ_Q_SIZE-1); 3274 3275 rspd = &rspq->desc[idx]; 3276 sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", 3277 idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, 3278 rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); 3279 sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", 3280 rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), 3281 be32toh(rspd->len_cq), rspd->intr_gen); 3282 } 3283 3284 err = sbuf_finish(sb); 3285 /* Output a trailing NUL. */ 3286 if (err == 0) 3287 err = SYSCTL_OUT(req, "", 1); 3288 sbuf_delete(sb); 3289 return (err); 3290 } 3291 3292 static int 3293 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) 3294 { 3295 struct sge_txq *txq; 3296 struct sge_qset *qs; 3297 int i, j, err, dump_end; 3298 struct sbuf *sb; 3299 struct tx_desc *txd; 3300 uint32_t *WR, wr_hi, wr_lo, gen; 3301 uint32_t data[4]; 3302 3303 txq = arg1; 3304 qs = txq_to_qset(txq, TXQ_ETH); 3305 if (txq->txq_dump_count == 0) { 3306 return (0); 3307 } 3308 if (txq->txq_dump_count > TX_ETH_Q_SIZE) { 3309 log(LOG_WARNING, 3310 "dump count is too large %d\n", txq->txq_dump_count); 3311 txq->txq_dump_count = 1; 3312 return (EINVAL); 3313 } 3314 if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { 3315 log(LOG_WARNING, 3316 "dump start of %d is greater than queue size\n", 3317 txq->txq_dump_start); 3318 txq->txq_dump_start = 0; 3319 return (EINVAL); 3320 } 3321 err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); 3322 if (err) 3323 return (err); 3324 err = sysctl_wire_old_buffer(req, 0); 3325 if (err) 3326 return (err); 3327 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3328 3329 sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", 3330 (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), 3331 (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); 3332 sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", 3333 ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), 3334 ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); 3335 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3336 txq->txq_dump_start, 3337 (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); 3338 3339 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3340 for (i = txq->txq_dump_start; i < dump_end; i++) { 3341 txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; 3342 WR = (uint32_t *)txd->flit; 3343 wr_hi = ntohl(WR[0]); 3344 wr_lo = ntohl(WR[1]); 3345 gen = G_WR_GEN(wr_lo); 3346 3347 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3348 wr_hi, wr_lo, gen); 3349 for (j = 2; j < 30; j += 4) 3350 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3351 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3352 3353 } 3354 err = sbuf_finish(sb); 3355 /* Output a trailing NUL. */ 3356 if (err == 0) 3357 err = SYSCTL_OUT(req, "", 1); 3358 sbuf_delete(sb); 3359 return (err); 3360 } 3361 3362 static int 3363 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) 3364 { 3365 struct sge_txq *txq; 3366 struct sge_qset *qs; 3367 int i, j, err, dump_end; 3368 struct sbuf *sb; 3369 struct tx_desc *txd; 3370 uint32_t *WR, wr_hi, wr_lo, gen; 3371 3372 txq = arg1; 3373 qs = txq_to_qset(txq, TXQ_CTRL); 3374 if (txq->txq_dump_count == 0) { 3375 return (0); 3376 } 3377 if (txq->txq_dump_count > 256) { 3378 log(LOG_WARNING, 3379 "dump count is too large %d\n", txq->txq_dump_count); 3380 txq->txq_dump_count = 1; 3381 return (EINVAL); 3382 } 3383 if (txq->txq_dump_start > 255) { 3384 log(LOG_WARNING, 3385 "dump start of %d is greater than queue size\n", 3386 txq->txq_dump_start); 3387 txq->txq_dump_start = 0; 3388 return (EINVAL); 3389 } 3390 3391 err = sysctl_wire_old_buffer(req, 0); 3392 if (err != 0) 3393 return (err); 3394 sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); 3395 sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, 3396 txq->txq_dump_start, 3397 (txq->txq_dump_start + txq->txq_dump_count) & 255); 3398 3399 dump_end = txq->txq_dump_start + txq->txq_dump_count; 3400 for (i = txq->txq_dump_start; i < dump_end; i++) { 3401 txd = &txq->desc[i & (255)]; 3402 WR = (uint32_t *)txd->flit; 3403 wr_hi = ntohl(WR[0]); 3404 wr_lo = ntohl(WR[1]); 3405 gen = G_WR_GEN(wr_lo); 3406 3407 sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", 3408 wr_hi, wr_lo, gen); 3409 for (j = 2; j < 30; j += 4) 3410 sbuf_printf(sb, "\t%08x %08x %08x %08x \n", 3411 WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); 3412 3413 } 3414 err = sbuf_finish(sb); 3415 /* Output a trailing NUL. */ 3416 if (err == 0) 3417 err = SYSCTL_OUT(req, "", 1); 3418 sbuf_delete(sb); 3419 return (err); 3420 } 3421 3422 static int 3423 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) 3424 { 3425 adapter_t *sc = arg1; 3426 struct qset_params *qsp = &sc->params.sge.qset[0]; 3427 int coalesce_usecs; 3428 struct sge_qset *qs; 3429 int i, j, err, nqsets = 0; 3430 struct mtx *lock; 3431 3432 if ((sc->flags & FULL_INIT_DONE) == 0) 3433 return (ENXIO); 3434 3435 coalesce_usecs = qsp->coalesce_usecs; 3436 err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); 3437 3438 if (err != 0) { 3439 return (err); 3440 } 3441 if (coalesce_usecs == qsp->coalesce_usecs) 3442 return (0); 3443 3444 for (i = 0; i < sc->params.nports; i++) 3445 for (j = 0; j < sc->port[i].nqsets; j++) 3446 nqsets++; 3447 3448 coalesce_usecs = max(1, coalesce_usecs); 3449 3450 for (i = 0; i < nqsets; i++) { 3451 qs = &sc->sge.qs[i]; 3452 qsp = &sc->params.sge.qset[i]; 3453 qsp->coalesce_usecs = coalesce_usecs; 3454 3455 lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : 3456 &sc->sge.qs[0].rspq.lock; 3457 3458 mtx_lock(lock); 3459 t3_update_qset_coalesce(qs, qsp); 3460 t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | 3461 V_NEWTIMER(qs->rspq.holdoff_tmr)); 3462 mtx_unlock(lock); 3463 } 3464 3465 return (0); 3466 } 3467 3468 static int 3469 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS) 3470 { 3471 adapter_t *sc = arg1; 3472 int rc, timestamp; 3473 3474 if ((sc->flags & FULL_INIT_DONE) == 0) 3475 return (ENXIO); 3476 3477 timestamp = sc->timestamp; 3478 rc = sysctl_handle_int(oidp, ×tamp, arg2, req); 3479 3480 if (rc != 0) 3481 return (rc); 3482 3483 if (timestamp != sc->timestamp) { 3484 t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS, 3485 timestamp ? F_ENABLERXPKTTMSTPRSS : 0); 3486 sc->timestamp = timestamp; 3487 } 3488 3489 return (0); 3490 } 3491 3492 void 3493 t3_add_attach_sysctls(adapter_t *sc) 3494 { 3495 struct sysctl_ctx_list *ctx; 3496 struct sysctl_oid_list *children; 3497 3498 ctx = device_get_sysctl_ctx(sc->dev); 3499 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3500 3501 /* random information */ 3502 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3503 "firmware_version", 3504 CTLFLAG_RD, &sc->fw_version, 3505 0, "firmware version"); 3506 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, 3507 "hw_revision", 3508 CTLFLAG_RD, &sc->params.rev, 3509 0, "chip model"); 3510 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 3511 "port_types", 3512 CTLFLAG_RD, &sc->port_types, 3513 0, "type of ports"); 3514 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3515 "enable_debug", 3516 CTLFLAG_RW, &cxgb_debug, 3517 0, "enable verbose debugging output"); 3518 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce", 3519 CTLFLAG_RD, &sc->tunq_coalesce, 3520 "#tunneled packets freed"); 3521 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 3522 "txq_overrun", 3523 CTLFLAG_RD, &txq_fills, 3524 0, "#times txq overrun"); 3525 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, 3526 "core_clock", 3527 CTLFLAG_RD, &sc->params.vpd.cclk, 3528 0, "core clock frequency (in KHz)"); 3529 } 3530 3531 3532 static const char *rspq_name = "rspq"; 3533 static const char *txq_names[] = 3534 { 3535 "txq_eth", 3536 "txq_ofld", 3537 "txq_ctrl" 3538 }; 3539 3540 static int 3541 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) 3542 { 3543 struct port_info *p = arg1; 3544 uint64_t *parg; 3545 3546 if (!p) 3547 return (EINVAL); 3548 3549 parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); 3550 PORT_LOCK(p); 3551 t3_mac_update_stats(&p->mac); 3552 PORT_UNLOCK(p); 3553 3554 return (sysctl_handle_64(oidp, parg, 0, req)); 3555 } 3556 3557 void 3558 t3_add_configured_sysctls(adapter_t *sc) 3559 { 3560 struct sysctl_ctx_list *ctx; 3561 struct sysctl_oid_list *children; 3562 int i, j; 3563 3564 ctx = device_get_sysctl_ctx(sc->dev); 3565 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 3566 3567 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3568 "intr_coal", 3569 CTLTYPE_INT|CTLFLAG_RW, sc, 3570 0, t3_set_coalesce_usecs, 3571 "I", "interrupt coalescing timer (us)"); 3572 3573 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 3574 "pkt_timestamp", 3575 CTLTYPE_INT | CTLFLAG_RW, sc, 3576 0, t3_pkt_timestamp, 3577 "I", "provide packet timestamp instead of connection hash"); 3578 3579 for (i = 0; i < sc->params.nports; i++) { 3580 struct port_info *pi = &sc->port[i]; 3581 struct sysctl_oid *poid; 3582 struct sysctl_oid_list *poidlist; 3583 struct mac_stats *mstats = &pi->mac.stats; 3584 3585 snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); 3586 poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 3587 pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); 3588 poidlist = SYSCTL_CHILDREN(poid); 3589 SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO, 3590 "nqsets", CTLFLAG_RD, &pi->nqsets, 3591 0, "#queue sets"); 3592 3593 for (j = 0; j < pi->nqsets; j++) { 3594 struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; 3595 struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, 3596 *ctrlqpoid, *lropoid; 3597 struct sysctl_oid_list *qspoidlist, *rspqpoidlist, 3598 *txqpoidlist, *ctrlqpoidlist, 3599 *lropoidlist; 3600 struct sge_txq *txq = &qs->txq[TXQ_ETH]; 3601 3602 snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); 3603 3604 qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, 3605 qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); 3606 qspoidlist = SYSCTL_CHILDREN(qspoid); 3607 3608 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty", 3609 CTLFLAG_RD, &qs->fl[0].empty, 0, 3610 "freelist #0 empty"); 3611 SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty", 3612 CTLFLAG_RD, &qs->fl[1].empty, 0, 3613 "freelist #1 empty"); 3614 3615 rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3616 rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); 3617 rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); 3618 3619 txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3620 txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); 3621 txqpoidlist = SYSCTL_CHILDREN(txqpoid); 3622 3623 ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3624 txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics"); 3625 ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); 3626 3627 lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, 3628 "lro_stats", CTLFLAG_RD, NULL, "LRO statistics"); 3629 lropoidlist = SYSCTL_CHILDREN(lropoid); 3630 3631 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", 3632 CTLFLAG_RD, &qs->rspq.size, 3633 0, "#entries in response queue"); 3634 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", 3635 CTLFLAG_RD, &qs->rspq.cidx, 3636 0, "consumer index"); 3637 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", 3638 CTLFLAG_RD, &qs->rspq.credits, 3639 0, "#credits"); 3640 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved", 3641 CTLFLAG_RD, &qs->rspq.starved, 3642 0, "#times starved"); 3643 SYSCTL_ADD_ULONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr", 3644 CTLFLAG_RD, &qs->rspq.phys_addr, 3645 "physical_address_of the queue"); 3646 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", 3647 CTLFLAG_RW, &qs->rspq.rspq_dump_start, 3648 0, "start rspq dump entry"); 3649 SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", 3650 CTLFLAG_RW, &qs->rspq.rspq_dump_count, 3651 0, "#rspq entries to dump"); 3652 SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", 3653 CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 3654 0, t3_dump_rspq, "A", "dump of the response queue"); 3655 3656 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped", 3657 CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops, 3658 "#tunneled packets dropped"); 3659 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen", 3660 CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen, 3661 0, "#tunneled packets waiting to be sent"); 3662 #if 0 3663 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", 3664 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 3665 0, "#tunneled packets queue producer index"); 3666 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", 3667 CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 3668 0, "#tunneled packets queue consumer index"); 3669 #endif 3670 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed", 3671 CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 3672 0, "#tunneled packets processed by the card"); 3673 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", 3674 CTLFLAG_RD, &txq->cleaned, 3675 0, "#tunneled packets cleaned"); 3676 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", 3677 CTLFLAG_RD, &txq->in_use, 3678 0, "#tunneled packet slots in use"); 3679 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees", 3680 CTLFLAG_RD, &txq->txq_frees, 3681 "#tunneled packets freed"); 3682 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", 3683 CTLFLAG_RD, &txq->txq_skipped, 3684 0, "#tunneled packet descriptors skipped"); 3685 SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced", 3686 CTLFLAG_RD, &txq->txq_coalesced, 3687 "#tunneled packets coalesced"); 3688 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", 3689 CTLFLAG_RD, &txq->txq_enqueued, 3690 0, "#tunneled packets enqueued to hardware"); 3691 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", 3692 CTLFLAG_RD, &qs->txq_stopped, 3693 0, "tx queues stopped"); 3694 SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "phys_addr", 3695 CTLFLAG_RD, &txq->phys_addr, 3696 "physical_address_of the queue"); 3697 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", 3698 CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 3699 0, "txq generation"); 3700 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", 3701 CTLFLAG_RD, &txq->cidx, 3702 0, "hardware queue cidx"); 3703 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", 3704 CTLFLAG_RD, &txq->pidx, 3705 0, "hardware queue pidx"); 3706 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", 3707 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 3708 0, "txq start idx for dump"); 3709 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", 3710 CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 3711 0, "txq #entries to dump"); 3712 SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", 3713 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 3714 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); 3715 3716 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", 3717 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 3718 0, "ctrlq start idx for dump"); 3719 SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", 3720 CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 3721 0, "ctrl #entries to dump"); 3722 SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", 3723 CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], 3724 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); 3725 3726 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued", 3727 CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); 3728 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed", 3729 CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); 3730 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", 3731 CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); 3732 SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", 3733 CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); 3734 } 3735 3736 /* Now add a node for mac stats. */ 3737 poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", 3738 CTLFLAG_RD, NULL, "MAC statistics"); 3739 poidlist = SYSCTL_CHILDREN(poid); 3740 3741 /* 3742 * We (ab)use the length argument (arg2) to pass on the offset 3743 * of the data that we are interested in. This is only required 3744 * for the quad counters that are updated from the hardware (we 3745 * make sure that we return the latest value). 3746 * sysctl_handle_macstat first updates *all* the counters from 3747 * the hardware, and then returns the latest value of the 3748 * requested counter. Best would be to update only the 3749 * requested counter from hardware, but t3_mac_update_stats() 3750 * hides all the register details and we don't want to dive into 3751 * all that here. 3752 */ 3753 #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ 3754 (CTLTYPE_U64 | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \ 3755 sysctl_handle_macstat, "QU", 0) 3756 CXGB_SYSCTL_ADD_QUAD(tx_octets); 3757 CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); 3758 CXGB_SYSCTL_ADD_QUAD(tx_frames); 3759 CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); 3760 CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); 3761 CXGB_SYSCTL_ADD_QUAD(tx_pause); 3762 CXGB_SYSCTL_ADD_QUAD(tx_deferred); 3763 CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); 3764 CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); 3765 CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); 3766 CXGB_SYSCTL_ADD_QUAD(tx_underrun); 3767 CXGB_SYSCTL_ADD_QUAD(tx_len_errs); 3768 CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); 3769 CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); 3770 CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); 3771 CXGB_SYSCTL_ADD_QUAD(tx_frames_64); 3772 CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); 3773 CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); 3774 CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); 3775 CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); 3776 CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); 3777 CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); 3778 CXGB_SYSCTL_ADD_QUAD(rx_octets); 3779 CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); 3780 CXGB_SYSCTL_ADD_QUAD(rx_frames); 3781 CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); 3782 CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); 3783 CXGB_SYSCTL_ADD_QUAD(rx_pause); 3784 CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs); 3785 CXGB_SYSCTL_ADD_QUAD(rx_align_errs); 3786 CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); 3787 CXGB_SYSCTL_ADD_QUAD(rx_data_errs); 3788 CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); 3789 CXGB_SYSCTL_ADD_QUAD(rx_runt); 3790 CXGB_SYSCTL_ADD_QUAD(rx_jabber); 3791 CXGB_SYSCTL_ADD_QUAD(rx_short); 3792 CXGB_SYSCTL_ADD_QUAD(rx_too_long); 3793 CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); 3794 CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); 3795 CXGB_SYSCTL_ADD_QUAD(rx_frames_64); 3796 CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); 3797 CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); 3798 CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); 3799 CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); 3800 CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); 3801 CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); 3802 #undef CXGB_SYSCTL_ADD_QUAD 3803 3804 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ 3805 CTLFLAG_RD, &mstats->a, 0) 3806 CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); 3807 CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); 3808 CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); 3809 CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); 3810 CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); 3811 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); 3812 CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); 3813 CXGB_SYSCTL_ADD_ULONG(num_toggled); 3814 CXGB_SYSCTL_ADD_ULONG(num_resets); 3815 CXGB_SYSCTL_ADD_ULONG(link_faults); 3816 #undef CXGB_SYSCTL_ADD_ULONG 3817 } 3818 } 3819 3820 /** 3821 * t3_get_desc - dump an SGE descriptor for debugging purposes 3822 * @qs: the queue set 3823 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) 3824 * @idx: the descriptor index in the queue 3825 * @data: where to dump the descriptor contents 3826 * 3827 * Dumps the contents of a HW descriptor of an SGE queue. Returns the 3828 * size of the descriptor. 3829 */ 3830 int 3831 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, 3832 unsigned char *data) 3833 { 3834 if (qnum >= 6) 3835 return (EINVAL); 3836 3837 if (qnum < 3) { 3838 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) 3839 return -EINVAL; 3840 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); 3841 return sizeof(struct tx_desc); 3842 } 3843 3844 if (qnum == 3) { 3845 if (!qs->rspq.desc || idx >= qs->rspq.size) 3846 return (EINVAL); 3847 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); 3848 return sizeof(struct rsp_desc); 3849 } 3850 3851 qnum -= 4; 3852 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) 3853 return (EINVAL); 3854 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); 3855 return sizeof(struct rx_desc); 3856 } 3857