1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * This file is part of the Chelsio T4 support code. 14 * 15 * Copyright (C) 2010-2013 Chelsio Communications. All rights reserved. 16 * 17 * This program is distributed in the hope that it will be useful, but WITHOUT 18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 19 * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this 20 * release for licensing terms and conditions. 21 */ 22 23 /* 24 * Copyright 2025 Oxide Computer Company 25 */ 26 27 #include <sys/ddi.h> 28 #include <sys/sunddi.h> 29 #include <sys/sunndi.h> 30 #include <sys/atomic.h> 31 #include <sys/dlpi.h> 32 #include <sys/pattr.h> 33 #include <sys/strsubr.h> 34 #include <sys/stream.h> 35 #include <sys/strsun.h> 36 #include <inet/ip.h> 37 #include <inet/tcp.h> 38 39 #include "common/common.h" 40 #include "common/t4_msg.h" 41 #include "common/t4_regs.h" 42 #include "common/t4_regs_values.h" 43 44 /* TODO: Tune. */ 45 int rx_buf_size = 8192; 46 int tx_copy_threshold = 256; 47 uint16_t rx_copy_threshold = 256; 48 49 /* Used to track coalesced tx work request */ 50 struct txpkts { 51 mblk_t *tail; /* head is in the software descriptor */ 52 uint64_t *flitp; /* ptr to flit where next pkt should start */ 53 uint8_t npkt; /* # of packets in this work request */ 54 uint8_t nflits; /* # of flits used by this work request */ 55 uint16_t plen; /* total payload (sum of all packets) */ 56 }; 57 58 /* All information needed to tx a frame */ 59 struct txinfo { 60 uint32_t len; /* Total length of frame */ 61 uint32_t flags; /* Checksum and LSO flags */ 62 uint32_t mss; /* MSS for LSO */ 63 uint8_t nsegs; /* # of segments in the SGL, 0 means imm. tx */ 64 uint8_t nflits; /* # of flits needed for the SGL */ 65 uint8_t hdls_used; /* # of DMA handles used */ 66 uint32_t txb_used; /* txb_space used */ 67 mac_ether_offload_info_t meoi; /* pkt hdr info for offloads */ 68 struct ulptx_sgl sgl __attribute__((aligned(8))); 69 struct ulptx_sge_pair reserved[TX_SGL_SEGS / 2]; 70 }; 71 72 struct mblk_pair { 73 mblk_t *head, *tail; 74 }; 75 76 struct rxbuf { 77 kmem_cache_t *cache; /* the kmem_cache this rxb came from */ 78 ddi_dma_handle_t dhdl; 79 ddi_acc_handle_t ahdl; 80 caddr_t va; /* KVA of buffer */ 81 uint64_t ba; /* bus address of buffer */ 82 frtn_t freefunc; 83 uint_t buf_size; 84 volatile uint_t ref_cnt; 85 }; 86 87 static int service_iq(struct sge_iq *iq, int budget); 88 static inline void init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, 89 int8_t pktc_idx, int qsize, uint8_t esize); 90 static inline void init_fl(struct sge_fl *fl, uint16_t qsize); 91 static int alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, 92 struct sge_fl *fl, int intr_idx, int cong); 93 static int free_iq_fl(struct port_info *pi, struct sge_iq *iq, 94 struct sge_fl *fl); 95 static int alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, 96 int i); 97 static int free_rxq(struct port_info *pi, struct sge_rxq *rxq); 98 static int eth_eq_alloc(struct adapter *sc, struct port_info *pi, 99 struct sge_eq *eq); 100 static int alloc_eq(struct adapter *sc, struct port_info *pi, 101 struct sge_eq *eq); 102 static int free_eq(struct adapter *sc, struct sge_eq *eq); 103 static int alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx); 104 static int free_txq(struct port_info *pi, struct sge_txq *txq); 105 static int alloc_dma_memory(struct adapter *sc, size_t len, int flags, 106 ddi_device_acc_attr_t *acc_attr, ddi_dma_attr_t *dma_attr, 107 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl, uint64_t *pba, 108 caddr_t *pva); 109 static int free_dma_memory(ddi_dma_handle_t *dhdl, ddi_acc_handle_t *ahdl); 110 static int alloc_desc_ring(struct adapter *sc, size_t len, int rw, 111 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl, uint64_t *pba, 112 caddr_t *pva); 113 static int free_desc_ring(ddi_dma_handle_t *dhdl, ddi_acc_handle_t *ahdl); 114 static int alloc_tx_copybuffer(struct adapter *sc, size_t len, 115 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl, uint64_t *pba, 116 caddr_t *pva); 117 static inline bool is_new_response(const struct sge_iq *iq, 118 struct rsp_ctrl **ctrl); 119 static inline void iq_next(struct sge_iq *iq); 120 static int refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs); 121 static void refill_sfl(void *arg); 122 static void add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl); 123 static void free_fl_bufs(struct sge_fl *fl); 124 static mblk_t *get_fl_payload(struct adapter *sc, struct sge_fl *fl, 125 uint32_t len_newbuf, int *fl_bufs_used); 126 static int get_frame_txinfo(struct sge_txq *txq, mblk_t **fp, 127 struct txinfo *txinfo, int sgl_only); 128 static inline int fits_in_txb(struct sge_txq *txq, int len, int *waste); 129 static inline int copy_into_txb(struct sge_txq *txq, mblk_t *m, int len, 130 struct txinfo *txinfo); 131 static inline void add_seg(struct txinfo *txinfo, uint64_t ba, uint32_t len); 132 static inline int add_mblk(struct sge_txq *txq, struct txinfo *txinfo, 133 mblk_t *m, int len); 134 static void free_txinfo_resources(struct sge_txq *txq, struct txinfo *txinfo); 135 static int add_to_txpkts(struct sge_txq *txq, struct txpkts *txpkts, mblk_t *m, 136 struct txinfo *txinfo); 137 static void write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts); 138 static int write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, mblk_t *m, 139 struct txinfo *txinfo); 140 static void t4_write_flush_wr(struct sge_txq *); 141 static inline void write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq, 142 struct txpkts *txpkts, struct txinfo *txinfo); 143 static inline void copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, 144 int len); 145 static void t4_tx_ring_db(struct sge_txq *); 146 static uint_t t4_tx_reclaim_descs(struct sge_txq *, uint_t, mblk_t **); 147 static int t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, 148 mblk_t *m); 149 static inline void ring_fl_db(struct adapter *sc, struct sge_fl *fl); 150 static kstat_t *setup_port_config_kstats(struct port_info *pi); 151 static kstat_t *setup_port_info_kstats(struct port_info *pi); 152 static kstat_t *setup_rxq_kstats(struct port_info *pi, struct sge_rxq *rxq, 153 int idx); 154 static int update_rxq_kstats(kstat_t *ksp, int rw); 155 static int update_port_info_kstats(kstat_t *ksp, int rw); 156 static kstat_t *setup_txq_kstats(struct port_info *pi, struct sge_txq *txq, 157 int idx); 158 static int update_txq_kstats(kstat_t *ksp, int rw); 159 static void t4_sge_egr_update(struct sge_iq *, const struct rss_header *); 160 static int t4_handle_cpl_msg(struct sge_iq *, const struct rss_header *, 161 mblk_t *); 162 static int t4_handle_fw_msg(struct sge_iq *, const struct rss_header *); 163 164 static kmem_cache_t *rxbuf_cache_create(struct rxbuf_cache_params *); 165 static struct rxbuf *rxbuf_alloc(kmem_cache_t *, int, uint_t); 166 static void rxbuf_free(struct rxbuf *); 167 static int rxbuf_ctor(void *, void *, int); 168 static void rxbuf_dtor(void *, void *); 169 170 static inline void * 171 t4_rss_payload(const struct rss_header *rss) 172 { 173 return ((void *)(&rss[1])); 174 } 175 176 static inline struct sge_iq ** 177 t4_iqmap_slot(struct adapter *sc, uint_t cntxt_id) 178 { 179 const uint_t idx = cntxt_id - sc->sge.iq_start; 180 VERIFY3U(idx, <, sc->sge.iqmap_sz); 181 return (&sc->sge.iqmap[idx]); 182 } 183 184 static inline struct sge_eq ** 185 t4_eqmap_slot(struct adapter *sc, uint_t cntxt_id) 186 { 187 const uint_t idx = cntxt_id - sc->sge.eq_start; 188 VERIFY3U(idx, <, sc->sge.eqmap_sz); 189 return (&sc->sge.eqmap[idx]); 190 } 191 192 static inline int 193 reclaimable(struct sge_eq *eq) 194 { 195 unsigned int cidx; 196 197 cidx = eq->spg->cidx; /* stable snapshot */ 198 cidx = be16_to_cpu(cidx); 199 200 if (cidx >= eq->cidx) 201 return (cidx - eq->cidx); 202 else 203 return (cidx + eq->cap - eq->cidx); 204 } 205 206 void 207 t4_sge_init(struct adapter *sc) 208 { 209 struct driver_properties *p = &sc->props; 210 ddi_dma_attr_t *dma_attr; 211 ddi_device_acc_attr_t *acc_attr; 212 uint32_t sge_control, sge_conm_ctrl; 213 int egress_threshold; 214 215 /* 216 * Device access and DMA attributes for descriptor rings 217 */ 218 acc_attr = &sc->sge.acc_attr_desc; 219 acc_attr->devacc_attr_version = DDI_DEVICE_ATTR_V0; 220 acc_attr->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC; 221 acc_attr->devacc_attr_dataorder = DDI_STRICTORDER_ACC; 222 223 dma_attr = &sc->sge.dma_attr_desc; 224 dma_attr->dma_attr_version = DMA_ATTR_V0; 225 dma_attr->dma_attr_addr_lo = 0; 226 dma_attr->dma_attr_addr_hi = UINT64_MAX; 227 dma_attr->dma_attr_count_max = UINT64_MAX; 228 dma_attr->dma_attr_align = 512; 229 dma_attr->dma_attr_burstsizes = 0xfff; 230 dma_attr->dma_attr_minxfer = 1; 231 dma_attr->dma_attr_maxxfer = UINT64_MAX; 232 dma_attr->dma_attr_seg = UINT64_MAX; 233 dma_attr->dma_attr_sgllen = 1; 234 dma_attr->dma_attr_granular = 1; 235 dma_attr->dma_attr_flags = 0; 236 237 /* 238 * Device access and DMA attributes for tx buffers 239 */ 240 acc_attr = &sc->sge.acc_attr_tx; 241 acc_attr->devacc_attr_version = DDI_DEVICE_ATTR_V0; 242 acc_attr->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC; 243 244 dma_attr = &sc->sge.dma_attr_tx; 245 dma_attr->dma_attr_version = DMA_ATTR_V0; 246 dma_attr->dma_attr_addr_lo = 0; 247 dma_attr->dma_attr_addr_hi = UINT64_MAX; 248 dma_attr->dma_attr_count_max = UINT64_MAX; 249 dma_attr->dma_attr_align = 1; 250 dma_attr->dma_attr_burstsizes = 0xfff; 251 dma_attr->dma_attr_minxfer = 1; 252 dma_attr->dma_attr_maxxfer = UINT64_MAX; 253 dma_attr->dma_attr_seg = UINT64_MAX; 254 dma_attr->dma_attr_sgllen = TX_SGL_SEGS; 255 dma_attr->dma_attr_granular = 1; 256 dma_attr->dma_attr_flags = 0; 257 258 /* 259 * Ingress Padding Boundary and Egress Status Page Size are set up by 260 * t4_fixup_host_params(). 261 */ 262 sge_control = t4_read_reg(sc, A_SGE_CONTROL); 263 sc->sge.pktshift = G_PKTSHIFT(sge_control); 264 sc->sge.stat_len = (sge_control & F_EGRSTATUSPAGESIZE) ? 128 : 64; 265 266 /* t4_nex uses FLM packed mode */ 267 sc->sge.fl_align = t4_fl_pkt_align(sc, true); 268 269 /* 270 * Device access and DMA attributes for rx buffers 271 */ 272 sc->sge.rxb_params.dip = sc->dip; 273 sc->sge.rxb_params.buf_size = rx_buf_size; 274 275 acc_attr = &sc->sge.rxb_params.acc_attr_rx; 276 acc_attr->devacc_attr_version = DDI_DEVICE_ATTR_V0; 277 acc_attr->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC; 278 279 dma_attr = &sc->sge.rxb_params.dma_attr_rx; 280 dma_attr->dma_attr_version = DMA_ATTR_V0; 281 dma_attr->dma_attr_addr_lo = 0; 282 dma_attr->dma_attr_addr_hi = UINT64_MAX; 283 dma_attr->dma_attr_count_max = UINT64_MAX; 284 /* 285 * Low 4 bits of an rx buffer address have a special meaning to the SGE 286 * and an rx buf cannot have an address with any of these bits set. 287 * FL_ALIGN is >= 32 so we're sure things are ok. 288 */ 289 dma_attr->dma_attr_align = sc->sge.fl_align; 290 dma_attr->dma_attr_burstsizes = 0xfff; 291 dma_attr->dma_attr_minxfer = 1; 292 dma_attr->dma_attr_maxxfer = UINT64_MAX; 293 dma_attr->dma_attr_seg = UINT64_MAX; 294 dma_attr->dma_attr_sgllen = 1; 295 dma_attr->dma_attr_granular = 1; 296 dma_attr->dma_attr_flags = 0; 297 298 sc->sge.rxbuf_cache = rxbuf_cache_create(&sc->sge.rxb_params); 299 300 /* 301 * A FL with <= fl_starve_thres buffers is starving and a periodic 302 * timer will attempt to refill it. This needs to be larger than the 303 * SGE's Egress Congestion Threshold. If it isn't, then we can get 304 * stuck waiting for new packets while the SGE is waiting for us to 305 * give it more Free List entries. (Note that the SGE's Egress 306 * Congestion Threshold is in units of 2 Free List pointers.) For T4, 307 * there was only a single field to control this. For T5 there's the 308 * original field which now only applies to Unpacked Mode Free List 309 * buffers and a new field which only applies to Packed Mode Free List 310 * buffers. 311 */ 312 313 sge_conm_ctrl = t4_read_reg(sc, A_SGE_CONM_CTRL); 314 switch (CHELSIO_CHIP_VERSION(sc->params.chip)) { 315 case CHELSIO_T4: 316 egress_threshold = G_EGRTHRESHOLD(sge_conm_ctrl); 317 break; 318 case CHELSIO_T5: 319 egress_threshold = G_EGRTHRESHOLDPACKING(sge_conm_ctrl); 320 break; 321 case CHELSIO_T6: 322 default: 323 egress_threshold = G_T6_EGRTHRESHOLDPACKING(sge_conm_ctrl); 324 } 325 sc->sge.fl_starve_threshold = 2*egress_threshold + 1; 326 327 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0, rx_buf_size); 328 329 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, 330 V_THRESHOLD_0(p->holdoff_pktcnt[0]) | 331 V_THRESHOLD_1(p->holdoff_pktcnt[1]) | 332 V_THRESHOLD_2(p->holdoff_pktcnt[2]) | 333 V_THRESHOLD_3(p->holdoff_pktcnt[3])); 334 335 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, 336 V_TIMERVALUE0(us_to_core_ticks(sc, p->holdoff_timer_us[0])) | 337 V_TIMERVALUE1(us_to_core_ticks(sc, p->holdoff_timer_us[1]))); 338 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, 339 V_TIMERVALUE2(us_to_core_ticks(sc, p->holdoff_timer_us[2])) | 340 V_TIMERVALUE3(us_to_core_ticks(sc, p->holdoff_timer_us[3]))); 341 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, 342 V_TIMERVALUE4(us_to_core_ticks(sc, p->holdoff_timer_us[4])) | 343 V_TIMERVALUE5(us_to_core_ticks(sc, p->holdoff_timer_us[5]))); 344 } 345 346 static inline int 347 first_vector(struct port_info *pi) 348 { 349 struct adapter *sc = pi->adapter; 350 int rc = T4_EXTRA_INTR, i; 351 352 if (sc->intr_count == 1) 353 return (0); 354 355 for_each_port(sc, i) { 356 struct port_info *p = sc->port[i]; 357 358 if (i == pi->port_id) 359 break; 360 361 /* 362 * Not compiled with offload support and intr_count > 1. Only 363 * NIC queues exist and they'd better be taking direct 364 * interrupts. 365 */ 366 ASSERT(!(sc->flags & TAF_INTR_FWD)); 367 rc += p->nrxq; 368 } 369 return (rc); 370 } 371 372 /* 373 * Given an arbitrary "index," come up with an iq that can be used by other 374 * queues (of this port) for interrupt forwarding, SGE egress updates, etc. 375 * The iq returned is guaranteed to be something that takes direct interrupts. 376 */ 377 static struct sge_iq * 378 port_intr_iq(struct port_info *pi, int idx) 379 { 380 struct adapter *sc = pi->adapter; 381 struct sge *s = &sc->sge; 382 struct sge_iq *iq = NULL; 383 384 if (sc->intr_count == 1) 385 return (&sc->sge.fwq); 386 387 /* 388 * Not compiled with offload support and intr_count > 1. Only NIC 389 * queues exist and they'd better be taking direct interrupts. 390 */ 391 ASSERT(!(sc->flags & TAF_INTR_FWD)); 392 393 idx %= pi->nrxq; 394 iq = &s->rxq[pi->first_rxq + idx].iq; 395 396 return (iq); 397 } 398 399 int 400 t4_setup_port_queues(struct port_info *pi) 401 { 402 int rc = 0, i, intr_idx, j; 403 struct sge_rxq *rxq; 404 struct sge_txq *txq; 405 struct adapter *sc = pi->adapter; 406 struct driver_properties *p = &sc->props; 407 408 pi->ksp_config = setup_port_config_kstats(pi); 409 pi->ksp_info = setup_port_info_kstats(pi); 410 411 /* Interrupt vector to start from (when using multiple vectors) */ 412 intr_idx = first_vector(pi); 413 414 /* 415 * First pass over all rx queues (NIC and TOE): 416 * a) initialize iq and fl 417 * b) allocate queue iff it will take direct interrupts. 418 */ 419 420 for_each_rxq(pi, i, rxq) { 421 422 init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, p->qsize_rxq, 423 RX_IQ_ESIZE); 424 425 init_fl(&rxq->fl, p->qsize_rxq / 8); /* 8 bufs in each entry */ 426 427 if ((!(sc->flags & TAF_INTR_FWD)) || 428 (sc->intr_count > 1 && pi->nrxq)) { 429 rxq->iq.flags |= IQ_INTR; 430 rc = alloc_rxq(pi, rxq, intr_idx, i); 431 if (rc != 0) 432 goto done; 433 intr_idx++; 434 } 435 436 } 437 438 /* 439 * Second pass over all rx queues (NIC and TOE). The queues forwarding 440 * their interrupts are allocated now. 441 */ 442 j = 0; 443 for_each_rxq(pi, i, rxq) { 444 if (rxq->iq.flags & IQ_INTR) 445 continue; 446 447 intr_idx = port_intr_iq(pi, j)->abs_id; 448 449 rc = alloc_rxq(pi, rxq, intr_idx, i); 450 if (rc != 0) 451 goto done; 452 j++; 453 } 454 455 /* 456 * Now the tx queues. Only one pass needed. 457 */ 458 j = 0; 459 for_each_txq(pi, i, txq) { 460 txq->eq.flags = 0; 461 txq->eq.tx_chan = pi->tx_chan; 462 txq->eq.qsize = p->qsize_txq; 463 464 /* For now, direct all TX queue notifications to the FW IQ. */ 465 txq->eq.iqid = sc->sge.fwq.cntxt_id; 466 467 rc = alloc_txq(pi, txq, i); 468 if (rc != 0) 469 goto done; 470 } 471 472 done: 473 if (rc != 0) 474 (void) t4_teardown_port_queues(pi); 475 476 return (rc); 477 } 478 479 /* 480 * Idempotent 481 */ 482 int 483 t4_teardown_port_queues(struct port_info *pi) 484 { 485 int i; 486 struct sge_rxq *rxq; 487 struct sge_txq *txq; 488 489 if (pi->ksp_config != NULL) { 490 kstat_delete(pi->ksp_config); 491 pi->ksp_config = NULL; 492 } 493 if (pi->ksp_info != NULL) { 494 kstat_delete(pi->ksp_info); 495 pi->ksp_info = NULL; 496 } 497 498 for_each_txq(pi, i, txq) { 499 (void) free_txq(pi, txq); 500 } 501 502 for_each_rxq(pi, i, rxq) { 503 if ((rxq->iq.flags & IQ_INTR) == 0) 504 (void) free_rxq(pi, rxq); 505 } 506 507 /* 508 * Then take down the rx queues that take direct interrupts. 509 */ 510 511 for_each_rxq(pi, i, rxq) { 512 if (rxq->iq.flags & IQ_INTR) 513 (void) free_rxq(pi, rxq); 514 } 515 516 return (0); 517 } 518 519 /* Deals with errors and forwarded interrupts */ 520 uint_t 521 t4_intr_all(caddr_t arg1, caddr_t arg2) 522 { 523 524 (void) t4_intr_err(arg1, arg2); 525 (void) t4_intr(arg1, arg2); 526 527 return (DDI_INTR_CLAIMED); 528 } 529 530 /* 531 * We are counting on the values of t4_intr_config_t matching the register 532 * definitions from the shared code. 533 */ 534 CTASSERT(TIC_SE_INTR_ARM == F_QINTR_CNT_EN); 535 CTASSERT(TIC_TIMER0 == V_QINTR_TIMER_IDX(X_TIMERREG_COUNTER0)); 536 CTASSERT(TIC_TIMER5 == V_QINTR_TIMER_IDX(X_TIMERREG_COUNTER5)); 537 CTASSERT(TIC_START_COUNTER == V_QINTR_TIMER_IDX(X_TIMERREG_RESTART_COUNTER)); 538 539 void 540 t4_iq_update_intr_cfg(struct sge_iq *iq, uint8_t tmr_idx, int8_t pktc_idx) 541 { 542 ASSERT((pktc_idx >= 0 && pktc_idx < SGE_NCOUNTERS) || pktc_idx == -1); 543 IQ_LOCK_ASSERT_OWNED(iq); 544 /* 545 * Strictly speaking, the IQ could be programmed with a TimerReg value 546 * of 6 (TICK_START_COUNTER), which is outside the range of SGE_NTIMERS. 547 * 548 * Since we do not currently offer an interface to configure such 549 * behavior, we assert its absence here for now. 550 */ 551 ASSERT3U(tmr_idx, <, SGE_NTIMERS); 552 553 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx) | 554 ((pktc_idx != -1) ? TIC_SE_INTR_ARM : 0); 555 556 /* Update IQ for new packet count threshold, but only if enabled */ 557 if (pktc_idx != iq->intr_pktc_idx && pktc_idx >= 0) { 558 const uint32_t param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 559 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) | 560 V_FW_PARAMS_PARAM_YZ(iq->cntxt_id); 561 const uint32_t val = pktc_idx; 562 563 struct adapter *sc = iq->adapter; 564 int rc = 565 -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); 566 if (rc != 0) { 567 /* report error but carry on */ 568 cxgb_printf(sc->dip, CE_WARN, 569 "failed to set intr pktcnt index for IQ %d: %d", 570 iq->cntxt_id, rc); 571 } 572 } 573 iq->intr_pktc_idx = pktc_idx; 574 } 575 576 void 577 t4_eq_update_dbq_timer(struct sge_eq *eq, struct port_info *pi) 578 { 579 struct adapter *sc = pi->adapter; 580 581 const uint32_t param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 582 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_TIMERIX) | 583 V_FW_PARAMS_PARAM_YZ(eq->cntxt_id); 584 const uint32_t val = pi->dbq_timer_idx; 585 586 int rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); 587 if (rc != 0) { 588 /* report error but carry on */ 589 cxgb_printf(sc->dip, CE_WARN, 590 "failed to set DBQ timer index for EQ %d: %d", 591 eq->cntxt_id, rc); 592 } 593 } 594 595 /* 596 * Update (via GTS) the interrupt/timer config and CIDX value for a specified 597 * ingress queue. 598 */ 599 void 600 t4_iq_gts_update(struct sge_iq *iq, t4_intr_config_t cfg, uint16_t cidx_incr) 601 { 602 const uint32_t value = 603 V_INGRESSQID((uint32_t)iq->cntxt_id) | 604 V_CIDXINC((uint32_t)cidx_incr) | 605 V_SEINTARM((uint32_t)cfg); 606 t4_write_reg(iq->adapter, MYPF_REG(A_SGE_PF_GTS), value); 607 } 608 609 /* 610 * Update (via GTS) the CIDX value for a specified ingress queue. 611 * 612 * This _only_ increments CIDX and does not alter any other timer related state 613 * associated with the IQ. 614 */ 615 static void 616 t4_iq_gts_incr(struct sge_iq *iq, uint16_t cidx_incr) 617 { 618 if (cidx_incr == 0) { 619 return; 620 } 621 622 const uint32_t value = 623 V_INGRESSQID((uint32_t)iq->cntxt_id) | 624 V_CIDXINC((uint32_t)cidx_incr) | 625 V_SEINTARM((uint32_t)V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)); 626 t4_write_reg(iq->adapter, MYPF_REG(A_SGE_PF_GTS), value); 627 } 628 629 static void 630 t4_intr_rx_work(struct sge_iq *iq) 631 { 632 mblk_t *mp = NULL; 633 struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */ 634 RXQ_LOCK(rxq); 635 if (!iq->polling) { 636 mp = t4_ring_rx(rxq, iq->qsize/8); 637 t4_iq_gts_update(iq, iq->intr_params, 0); 638 } 639 RXQ_UNLOCK(rxq); 640 if (mp != NULL) { 641 mac_rx_ring(rxq->port->mh, rxq->ring_handle, mp, 642 rxq->ring_gen_num); 643 } 644 } 645 646 /* Deals with interrupts on the given ingress queue */ 647 /* ARGSUSED */ 648 uint_t 649 t4_intr(caddr_t arg1, caddr_t arg2) 650 { 651 struct sge_iq *iq = (struct sge_iq *)arg2; 652 int state; 653 654 /* 655 * Right now receive polling is only enabled for MSI-X and 656 * when we have enough msi-x vectors i.e no interrupt forwarding. 657 */ 658 if (iq->adapter->props.multi_rings) { 659 t4_intr_rx_work(iq); 660 } else { 661 state = atomic_cas_uint(&iq->state, IQS_IDLE, IQS_BUSY); 662 if (state == IQS_IDLE) { 663 (void) service_iq(iq, 0); 664 (void) atomic_cas_uint(&iq->state, IQS_BUSY, IQS_IDLE); 665 } 666 } 667 return (DDI_INTR_CLAIMED); 668 } 669 670 /* Deals with error interrupts */ 671 /* ARGSUSED */ 672 uint_t 673 t4_intr_err(caddr_t arg1, caddr_t arg2) 674 { 675 struct adapter *sc = (struct adapter *)arg1; 676 677 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); 678 (void) t4_slow_intr_handler(sc); 679 680 return (DDI_INTR_CLAIMED); 681 } 682 683 /* 684 * t4_ring_rx - Process responses from an SGE response queue. 685 * 686 * This function processes responses from an SGE response queue up to the 687 * supplied budget. Responses include received packets as well as control 688 * messages from FW or HW. 689 * 690 * It returns a chain of mblks containing the received data, to be 691 * passed up to mac_rx_ring(). 692 */ 693 mblk_t * 694 t4_ring_rx(struct sge_rxq *rxq, int budget) 695 { 696 struct sge_iq *iq = &rxq->iq; 697 struct sge_fl *fl = &rxq->fl; /* Use iff IQ_HAS_FL */ 698 struct adapter *sc = iq->adapter; 699 struct rsp_ctrl *ctrl; 700 int ndescs = 0, fl_bufs_used = 0; 701 mblk_t *mblk_head = NULL, **mblk_tail = &mblk_head; 702 uint32_t received_bytes = 0, pkt_len = 0; 703 uint16_t err_vec; 704 705 while (is_new_response(iq, &ctrl)) { 706 membar_consumer(); 707 708 const uint8_t type_gen = ctrl->u.type_gen; 709 const uint8_t rsp_type = G_RSPD_TYPE(type_gen); 710 const bool overflowed = (type_gen & F_RSPD_QOVFL) != 0; 711 const uint32_t data_len = BE_32(ctrl->pldbuflen_qid); 712 713 iq->stats.sis_processed++; 714 if (overflowed) { 715 iq->stats.sis_overflow++; 716 } 717 718 const struct rss_header *rss = 719 (const struct rss_header *)iq->cdesc; 720 mblk_t *m = NULL; 721 722 switch (rsp_type) { 723 case X_RSPD_TYPE_FLBUF: 724 725 ASSERT(iq->flags & IQ_HAS_FL); 726 727 if (CPL_RX_PKT == rss->opcode) { 728 const struct cpl_rx_pkt *cpl = 729 t4_rss_payload(rss); 730 pkt_len = be16_to_cpu(cpl->len); 731 732 if (iq->polling && 733 ((received_bytes + pkt_len) > budget)) 734 goto done; 735 736 m = get_fl_payload(sc, fl, data_len, 737 &fl_bufs_used); 738 if (m == NULL) 739 goto done; 740 741 m->b_rptr += sc->sge.pktshift; 742 if (sc->params.tp.rx_pkt_encap) { 743 /* Enabled only in T6 config file */ 744 err_vec = G_T6_COMPR_RXERR_VEC( 745 ntohs(cpl->err_vec)); 746 } else { 747 err_vec = ntohs(cpl->err_vec); 748 } 749 750 const bool csum_ok = cpl->csum_calc && !err_vec; 751 752 /* TODO: what about cpl->ip_frag? */ 753 if (csum_ok && !cpl->ip_frag) { 754 mac_hcksum_set(m, 0, 0, 0, 0xffff, 755 HCK_FULLCKSUM_OK | HCK_FULLCKSUM | 756 HCK_IPV4_HDRCKSUM_OK); 757 rxq->rxcsum++; 758 } 759 rxq->rxpkts++; 760 rxq->rxbytes += pkt_len; 761 received_bytes += pkt_len; 762 763 *mblk_tail = m; 764 mblk_tail = &m->b_next; 765 766 break; 767 } 768 769 m = get_fl_payload(sc, fl, data_len, &fl_bufs_used); 770 if (m == NULL) 771 goto done; 772 /* FALLTHROUGH */ 773 774 case X_RSPD_TYPE_CPL: 775 (void) t4_handle_cpl_msg(iq, rss, m); 776 break; 777 778 default: 779 break; 780 } 781 iq_next(iq); 782 ++ndescs; 783 if (!iq->polling && (ndescs == budget)) 784 break; 785 } 786 787 done: 788 789 t4_iq_gts_incr(iq, ndescs); 790 791 if ((fl_bufs_used > 0) || (iq->flags & IQ_HAS_FL)) { 792 int starved; 793 FL_LOCK(fl); 794 fl->needed += fl_bufs_used; 795 starved = refill_fl(sc, fl, fl->cap / 8); 796 FL_UNLOCK(fl); 797 if (starved) 798 add_fl_to_sfl(sc, fl); 799 } 800 return (mblk_head); 801 } 802 803 /* 804 * Deals with anything and everything on the given ingress queue. 805 */ 806 static int 807 service_iq(struct sge_iq *iq, int budget) 808 { 809 struct sge_iq *q; 810 struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */ 811 struct sge_fl *fl = &rxq->fl; /* Use iff IQ_HAS_FL */ 812 struct adapter *sc = iq->adapter; 813 struct rsp_ctrl *ctrl; 814 int ndescs = 0, fl_bufs_used = 0; 815 int starved; 816 STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql); 817 818 const uint_t limit = (budget != 0) ? budget : iq->qsize / 8; 819 820 /* 821 * We always come back and check the descriptor ring for new indirect 822 * interrupts and other responses after running a single handler. 823 */ 824 for (;;) { 825 while (is_new_response(iq, &ctrl)) { 826 membar_consumer(); 827 828 const uint8_t type_gen = ctrl->u.type_gen; 829 const uint8_t rsp_type = G_RSPD_TYPE(type_gen); 830 const uint32_t dlen_qid = BE_32(ctrl->pldbuflen_qid); 831 832 mblk_t *m = NULL; 833 const struct rss_header *rss = 834 (const struct rss_header *)iq->cdesc; 835 836 switch (rsp_type) { 837 case X_RSPD_TYPE_FLBUF: 838 839 ASSERT(iq->flags & IQ_HAS_FL); 840 841 m = get_fl_payload(sc, fl, dlen_qid, 842 &fl_bufs_used); 843 if (m == NULL) { 844 /* 845 * Rearm the iq with a 846 * longer-than-default timer 847 */ 848 t4_iq_gts_update(iq, TIC_TIMER5, 849 ndescs); 850 if (fl_bufs_used > 0) { 851 ASSERT(iq->flags & IQ_HAS_FL); 852 FL_LOCK(fl); 853 fl->needed += fl_bufs_used; 854 starved = refill_fl(sc, fl, 855 fl->cap / 8); 856 FL_UNLOCK(fl); 857 if (starved) 858 add_fl_to_sfl(sc, fl); 859 } 860 return (0); 861 } 862 863 /* FALLTHRU */ 864 case X_RSPD_TYPE_CPL: 865 (void) t4_handle_cpl_msg(iq, rss, m); 866 break; 867 868 case X_RSPD_TYPE_INTR: 869 870 /* 871 * Interrupts should be forwarded only to queues 872 * that are not forwarding their interrupts. 873 * This means service_iq can recurse but only 1 874 * level deep. 875 */ 876 ASSERT(budget == 0); 877 878 q = *t4_iqmap_slot(sc, dlen_qid); 879 if (atomic_cas_uint(&q->state, IQS_IDLE, 880 IQS_BUSY) == IQS_IDLE) { 881 if (service_iq(q, q->qsize / 8) == 0) { 882 (void) atomic_cas_uint( 883 &q->state, IQS_BUSY, 884 IQS_IDLE); 885 } else { 886 STAILQ_INSERT_TAIL(&iql, q, 887 link); 888 } 889 } 890 break; 891 892 default: 893 break; 894 } 895 896 iq_next(iq); 897 if (++ndescs == limit) { 898 t4_iq_gts_incr(iq, ndescs); 899 ndescs = 0; 900 901 if (fl_bufs_used > 0) { 902 ASSERT(iq->flags & IQ_HAS_FL); 903 FL_LOCK(fl); 904 fl->needed += fl_bufs_used; 905 (void) refill_fl(sc, fl, fl->cap / 8); 906 FL_UNLOCK(fl); 907 fl_bufs_used = 0; 908 } 909 910 if (budget != 0) 911 return (EINPROGRESS); 912 } 913 } 914 915 if (STAILQ_EMPTY(&iql) != 0) 916 break; 917 918 /* 919 * Process the head only, and send it to the back of the list if 920 * it's still not done. 921 */ 922 q = STAILQ_FIRST(&iql); 923 STAILQ_REMOVE_HEAD(&iql, link); 924 if (service_iq(q, q->qsize / 8) == 0) 925 (void) atomic_cas_uint(&q->state, IQS_BUSY, IQS_IDLE); 926 else 927 STAILQ_INSERT_TAIL(&iql, q, link); 928 } 929 930 t4_iq_gts_update(iq, iq->intr_params, ndescs); 931 932 if (iq->flags & IQ_HAS_FL) { 933 FL_LOCK(fl); 934 fl->needed += fl_bufs_used; 935 starved = refill_fl(sc, fl, fl->cap / 4); 936 FL_UNLOCK(fl); 937 if (starved != 0) 938 add_fl_to_sfl(sc, fl); 939 } 940 941 return (0); 942 } 943 944 /* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */ 945 #define TXPKTS_PKT_HDR ((\ 946 sizeof (struct ulp_txpkt) + \ 947 sizeof (struct ulptx_idata) + \ 948 sizeof (struct cpl_tx_pkt_core)) / 8) 949 950 /* Header of a coalesced tx WR, before SGL of first packet (in flits) */ 951 #define TXPKTS_WR_HDR (\ 952 sizeof (struct fw_eth_tx_pkts_wr) / 8 + \ 953 TXPKTS_PKT_HDR) 954 955 /* Header of a tx WR, before SGL of first packet (in flits) */ 956 #define TXPKT_WR_HDR ((\ 957 sizeof (struct fw_eth_tx_pkt_wr) + \ 958 sizeof (struct cpl_tx_pkt_core)) / 8) 959 960 /* Header of a tx LSO WR, before SGL of first packet (in flits) */ 961 #define TXPKT_LSO_WR_HDR ((\ 962 sizeof (struct fw_eth_tx_pkt_wr) + \ 963 sizeof (struct cpl_tx_pkt_lso_core) + \ 964 sizeof (struct cpl_tx_pkt_core)) / 8) 965 966 mblk_t * 967 t4_eth_tx(void *arg, mblk_t *frame) 968 { 969 struct sge_txq *txq = (struct sge_txq *)arg; 970 struct port_info *pi = txq->port; 971 struct sge_eq *eq = &txq->eq; 972 mblk_t *next_frame; 973 int rc, coalescing; 974 struct txpkts txpkts; 975 struct txinfo txinfo; 976 977 txpkts.npkt = 0; /* indicates there's nothing in txpkts */ 978 coalescing = 0; 979 980 TXQ_LOCK(txq); 981 if (eq->avail < 8) 982 (void) t4_tx_reclaim_descs(txq, 8, NULL); 983 for (; frame; frame = next_frame) { 984 985 if (eq->avail < 8) 986 break; 987 988 next_frame = frame->b_next; 989 frame->b_next = NULL; 990 991 if (next_frame != NULL) 992 coalescing = 1; 993 994 rc = get_frame_txinfo(txq, &frame, &txinfo, coalescing); 995 if (rc != 0) { 996 if (rc == ENOMEM) { 997 /* Short of resources, suspend tx */ 998 frame->b_next = next_frame; 999 1000 /* 1001 * Since we are out of memory for this packet, 1002 * rather than TX descriptors, enqueue an 1003 * flush work request. This will ensure that a 1004 * completion notification is delivered for this 1005 * EQ which will trigger a call to update the 1006 * state in mac to continue transmissions. 1007 */ 1008 t4_write_flush_wr(txq); 1009 1010 break; 1011 } 1012 1013 /* 1014 * Unrecoverable error for this frame, throw it away and 1015 * move on to the next. 1016 */ 1017 freemsg(frame); 1018 continue; 1019 } 1020 1021 if (coalescing != 0 && 1022 add_to_txpkts(txq, &txpkts, frame, &txinfo) == 0) { 1023 1024 /* Successfully absorbed into txpkts */ 1025 1026 write_ulp_cpl_sgl(pi, txq, &txpkts, &txinfo); 1027 goto doorbell; 1028 } 1029 1030 /* 1031 * We weren't coalescing to begin with, or current frame could 1032 * not be coalesced (add_to_txpkts flushes txpkts if a frame 1033 * given to it can't be coalesced). Either way there should be 1034 * nothing in txpkts. 1035 */ 1036 ASSERT(txpkts.npkt == 0); 1037 1038 /* We're sending out individual frames now */ 1039 coalescing = 0; 1040 1041 if (eq->avail < 8) 1042 (void) t4_tx_reclaim_descs(txq, 8, NULL); 1043 rc = write_txpkt_wr(pi, txq, frame, &txinfo); 1044 if (rc != 0) { 1045 1046 /* Short of hardware descriptors, suspend tx */ 1047 1048 /* 1049 * This is an unlikely but expensive failure. We've 1050 * done all the hard work (DMA bindings etc.) and now we 1051 * can't send out the frame. What's worse, we have to 1052 * spend even more time freeing up everything in txinfo. 1053 */ 1054 txq->qfull++; 1055 free_txinfo_resources(txq, &txinfo); 1056 1057 frame->b_next = next_frame; 1058 break; 1059 } 1060 1061 doorbell: 1062 /* Fewer and fewer doorbells as the queue fills up */ 1063 if (eq->pending >= (1 << (fls(eq->qsize - eq->avail) / 2))) { 1064 txq->txbytes += txinfo.len; 1065 txq->txpkts++; 1066 t4_tx_ring_db(txq); 1067 } 1068 (void) t4_tx_reclaim_descs(txq, 32, NULL); 1069 } 1070 1071 if (txpkts.npkt > 0) { 1072 write_txpkts_wr(txq, &txpkts); 1073 } 1074 1075 if (eq->pending != 0) { 1076 t4_tx_ring_db(txq); 1077 } 1078 1079 if (frame != NULL) { 1080 eq->flags |= EQ_CORKED; 1081 } 1082 1083 (void) t4_tx_reclaim_descs(txq, eq->qsize, NULL); 1084 TXQ_UNLOCK(txq); 1085 1086 return (frame); 1087 } 1088 1089 static inline void 1090 init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int8_t pktc_idx, 1091 int qsize, uint8_t esize) 1092 { 1093 ASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS); 1094 ASSERT(pktc_idx < SGE_NCOUNTERS); /* -ve is ok, means don't use */ 1095 1096 iq->flags = 0; 1097 iq->adapter = sc; 1098 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx); 1099 iq->intr_pktc_idx = -1; 1100 if (pktc_idx >= 0) { 1101 iq->intr_params |= TIC_SE_INTR_ARM; 1102 iq->intr_pktc_idx = pktc_idx; 1103 } 1104 iq->qsize = roundup(qsize, 16); /* See FW_IQ_CMD/iqsize */ 1105 iq->esize = max(esize, 16); /* See FW_IQ_CMD/iqesize */ 1106 } 1107 1108 static inline void 1109 init_fl(struct sge_fl *fl, uint16_t qsize) 1110 { 1111 1112 fl->qsize = qsize; 1113 fl->allocb_fail = 0; 1114 } 1115 1116 /* 1117 * Allocates the ring for an ingress queue and an optional freelist. If the 1118 * freelist is specified it will be allocated and then associated with the 1119 * ingress queue. 1120 * 1121 * Returns errno on failure. Resources allocated up to that point may still be 1122 * allocated. Caller is responsible for cleanup in case this function fails. 1123 * 1124 * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then 1125 * the intr_idx specifies the vector, starting from 0. Otherwise it specifies 1126 * the index of the queue to which its interrupts will be forwarded. 1127 */ 1128 static int 1129 alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, 1130 int intr_idx, int cong) 1131 { 1132 int rc, i; 1133 size_t len; 1134 struct fw_iq_cmd c; 1135 struct adapter *sc = iq->adapter; 1136 uint32_t v = 0; 1137 1138 len = iq->qsize * iq->esize; 1139 rc = alloc_desc_ring(sc, len, DDI_DMA_READ, &iq->dhdl, &iq->ahdl, 1140 &iq->ba, (caddr_t *)&iq->desc); 1141 if (rc != 0) 1142 return (rc); 1143 1144 bzero(&c, sizeof (c)); 1145 c.op_to_vfn = cpu_to_be32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | 1146 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | 1147 V_FW_IQ_CMD_VFN(0)); 1148 1149 c.alloc_to_len16 = cpu_to_be32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | 1150 FW_LEN16(c)); 1151 1152 /* Special handling for firmware event queue */ 1153 if (iq == &sc->sge.fwq) 1154 v |= F_FW_IQ_CMD_IQASYNCH; 1155 1156 if (iq->flags & IQ_INTR) 1157 ASSERT(intr_idx < sc->intr_count); 1158 else 1159 v |= F_FW_IQ_CMD_IQANDST; 1160 v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); 1161 1162 /* 1163 * If the coalescing counter is not enabled for this IQ, use the 0 1164 * index, rather than populating it with the invalid -1 value. 1165 * 1166 * The selected index does not matter when the counter is not enabled 1167 * through the GTS flags. 1168 */ 1169 const uint_t pktc_idx = (iq->intr_pktc_idx < 0) ? 0 : iq->intr_pktc_idx; 1170 1171 c.type_to_iqandstindex = cpu_to_be32(v | 1172 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | 1173 V_FW_IQ_CMD_VIID(pi->viid) | 1174 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); 1175 c.iqdroprss_to_iqesize = cpu_to_be16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | 1176 F_FW_IQ_CMD_IQGTSMODE | 1177 V_FW_IQ_CMD_IQINTCNTTHRESH(pktc_idx) | 1178 V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4)); 1179 c.iqsize = cpu_to_be16(iq->qsize); 1180 c.iqaddr = cpu_to_be64(iq->ba); 1181 if (cong >= 0) { 1182 const uint32_t iq_type = 1183 cong ? FW_IQ_IQTYPE_NIC : FW_IQ_IQTYPE_OFLD; 1184 c.iqns_to_fl0congen = BE_32(F_FW_IQ_CMD_IQFLINTCONGEN | 1185 V_FW_IQ_CMD_IQTYPE(iq_type)); 1186 } 1187 1188 if (fl != NULL) { 1189 mutex_init(&fl->lock, NULL, MUTEX_DRIVER, 1190 DDI_INTR_PRI(sc->intr_pri)); 1191 fl->flags |= FL_MTX; 1192 1193 len = fl->qsize * RX_FL_ESIZE; 1194 rc = alloc_desc_ring(sc, len, DDI_DMA_WRITE, &fl->dhdl, 1195 &fl->ahdl, &fl->ba, (caddr_t *)&fl->desc); 1196 if (rc != 0) 1197 return (rc); 1198 1199 /* Allocate space for one software descriptor per buffer. */ 1200 fl->cap = (fl->qsize - sc->sge.stat_len / RX_FL_ESIZE) * 8; 1201 fl->sdesc = kmem_zalloc(sizeof (struct fl_sdesc) * fl->cap, 1202 KM_SLEEP); 1203 fl->needed = fl->cap; 1204 fl->lowat = roundup(sc->sge.fl_starve_threshold, 8); 1205 1206 c.iqns_to_fl0congen |= 1207 cpu_to_be32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | 1208 F_FW_IQ_CMD_FL0PACKEN | F_FW_IQ_CMD_FL0PADEN); 1209 if (cong >= 0) { 1210 c.iqns_to_fl0congen |= 1211 BE_32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) | 1212 F_FW_IQ_CMD_FL0CONGCIF | 1213 F_FW_IQ_CMD_FL0CONGEN); 1214 } 1215 1216 /* 1217 * In T6, for egress queue type FL there is internal overhead 1218 * of 16B for header going into FLM module. Hence the maximum 1219 * allowed burst size is 448 bytes. For T4/T5, the hardware 1220 * doesn't coalesce fetch requests if more than 64 bytes of 1221 * Free List pointers are provided, so we use a 128-byte Fetch 1222 * Burst Minimum there (T6 implements coalescing so we can use 1223 * the smaller 64-byte value there). 1224 */ 1225 const uint_t fbmin = t4_cver_ge(sc, CHELSIO_T6) ? 1226 X_FETCHBURSTMIN_64B_T6: X_FETCHBURSTMIN_128B; 1227 const uint_t fbmax = t4_cver_ge(sc, CHELSIO_T6) ? 1228 X_FETCHBURSTMAX_256B : X_FETCHBURSTMAX_512B; 1229 c.fl0dcaen_to_fl0cidxfthresh = cpu_to_be16( 1230 V_FW_IQ_CMD_FL0FBMIN(fbmin) | 1231 V_FW_IQ_CMD_FL0FBMAX(fbmax)); 1232 c.fl0size = cpu_to_be16(fl->qsize); 1233 c.fl0addr = cpu_to_be64(fl->ba); 1234 } 1235 1236 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof (c), &c); 1237 if (rc != 0) { 1238 cxgb_printf(sc->dip, CE_WARN, 1239 "failed to create ingress queue: %d", rc); 1240 return (rc); 1241 } 1242 1243 iq->cdesc = iq->desc; 1244 iq->cidx = 0; 1245 iq->gen = 1; 1246 iq->adapter = sc; 1247 iq->cntxt_id = be16_to_cpu(c.iqid); 1248 iq->abs_id = be16_to_cpu(c.physiqid); 1249 iq->flags |= IQ_ALLOCATED; 1250 mutex_init(&iq->lock, NULL, MUTEX_DRIVER, 1251 DDI_INTR_PRI(DDI_INTR_PRI(sc->intr_pri))); 1252 iq->polling = 0; 1253 1254 *t4_iqmap_slot(sc, iq->cntxt_id) = iq; 1255 1256 if (fl != NULL) { 1257 fl->cntxt_id = be16_to_cpu(c.fl0id); 1258 fl->pidx = fl->cidx = 0; 1259 fl->copy_threshold = rx_copy_threshold; 1260 1261 *t4_eqmap_slot(sc, fl->cntxt_id) = (struct sge_eq *)fl; 1262 1263 FL_LOCK(fl); 1264 (void) refill_fl(sc, fl, fl->lowat); 1265 FL_UNLOCK(fl); 1266 1267 iq->flags |= IQ_HAS_FL; 1268 } 1269 1270 if (t4_cver_ge(sc, CHELSIO_T5) && cong >= 0) { 1271 uint32_t param, val; 1272 1273 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 1274 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | 1275 V_FW_PARAMS_PARAM_YZ(iq->cntxt_id); 1276 if (cong == 0) 1277 val = 1 << 19; 1278 else { 1279 val = 2 << 19; 1280 for (i = 0; i < 4; i++) { 1281 if (cong & (1 << i)) 1282 val |= 1 << (i << 2); 1283 } 1284 } 1285 1286 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); 1287 if (rc != 0) { 1288 /* report error but carry on */ 1289 cxgb_printf(sc->dip, CE_WARN, 1290 "failed to set congestion manager context for " 1291 "ingress queue %d: %d", iq->cntxt_id, rc); 1292 } 1293 } 1294 1295 /* Enable IQ interrupts */ 1296 iq->state = IQS_IDLE; 1297 t4_iq_gts_update(iq, iq->intr_params, 0); 1298 1299 return (0); 1300 } 1301 1302 static int 1303 free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl) 1304 { 1305 int rc; 1306 1307 if (iq != NULL) { 1308 struct adapter *sc = iq->adapter; 1309 dev_info_t *dip; 1310 1311 dip = pi ? pi->dip : sc->dip; 1312 if (iq->flags & IQ_ALLOCATED) { 1313 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, 1314 FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id, 1315 fl ? fl->cntxt_id : 0xffff, 0xffff); 1316 if (rc != 0) { 1317 cxgb_printf(dip, CE_WARN, 1318 "failed to free queue %p: %d", iq, rc); 1319 return (rc); 1320 } 1321 mutex_destroy(&iq->lock); 1322 iq->flags &= ~IQ_ALLOCATED; 1323 } 1324 1325 if (iq->desc != NULL) { 1326 (void) free_desc_ring(&iq->dhdl, &iq->ahdl); 1327 iq->desc = NULL; 1328 } 1329 1330 bzero(iq, sizeof (*iq)); 1331 } 1332 1333 if (fl != NULL) { 1334 if (fl->sdesc != NULL) { 1335 FL_LOCK(fl); 1336 free_fl_bufs(fl); 1337 FL_UNLOCK(fl); 1338 1339 kmem_free(fl->sdesc, sizeof (struct fl_sdesc) * 1340 fl->cap); 1341 fl->sdesc = NULL; 1342 } 1343 1344 if (fl->desc != NULL) { 1345 (void) free_desc_ring(&fl->dhdl, &fl->ahdl); 1346 fl->desc = NULL; 1347 } 1348 1349 if (fl->flags & FL_MTX) { 1350 mutex_destroy(&fl->lock); 1351 fl->flags &= ~FL_MTX; 1352 } 1353 1354 bzero(fl, sizeof (struct sge_fl)); 1355 } 1356 1357 return (0); 1358 } 1359 1360 int 1361 t4_alloc_fwq(struct adapter *sc) 1362 { 1363 int rc, intr_idx; 1364 struct sge_iq *fwq = &sc->sge.fwq; 1365 1366 init_iq(fwq, sc, sc->sge.fwq_tmr_idx, sc->sge.fwq_pktc_idx, 1367 FW_IQ_QSIZE, FW_IQ_ESIZE); 1368 fwq->flags |= IQ_INTR; /* always */ 1369 intr_idx = sc->intr_count > 1 ? 1 : 0; 1370 rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1); 1371 if (rc != 0) { 1372 cxgb_printf(sc->dip, CE_WARN, 1373 "failed to create firmware event queue: %d.", rc); 1374 return (rc); 1375 } 1376 1377 return (0); 1378 } 1379 1380 int 1381 t4_free_fwq(struct adapter *sc) 1382 { 1383 return (free_iq_fl(NULL, &sc->sge.fwq, NULL)); 1384 } 1385 1386 static int 1387 alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int i) 1388 { 1389 int rc; 1390 1391 rxq->port = pi; 1392 rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, 1393 t4_get_tp_ch_map(pi->adapter, pi->tx_chan)); 1394 if (rc != 0) 1395 return (rc); 1396 1397 rxq->ksp = setup_rxq_kstats(pi, rxq, i); 1398 1399 return (rc); 1400 } 1401 1402 static int 1403 free_rxq(struct port_info *pi, struct sge_rxq *rxq) 1404 { 1405 int rc; 1406 1407 if (rxq->ksp != NULL) { 1408 kstat_delete(rxq->ksp); 1409 rxq->ksp = NULL; 1410 } 1411 1412 rc = free_iq_fl(pi, &rxq->iq, &rxq->fl); 1413 if (rc == 0) 1414 bzero(&rxq->fl, sizeof (*rxq) - offsetof(struct sge_rxq, fl)); 1415 1416 return (rc); 1417 } 1418 1419 static int 1420 eth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) 1421 { 1422 struct fw_eq_eth_cmd c = { 1423 .op_to_vfn = BE_32( 1424 V_FW_CMD_OP(FW_EQ_ETH_CMD) | 1425 F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | 1426 V_FW_EQ_ETH_CMD_PFN(sc->pf) | 1427 V_FW_EQ_ETH_CMD_VFN(0)), 1428 .alloc_to_len16 = BE_32( 1429 F_FW_EQ_ETH_CMD_ALLOC | 1430 F_FW_EQ_ETH_CMD_EQSTART | 1431 FW_LEN16(struct fw_eq_eth_cmd)), 1432 .autoequiqe_to_viid = BE_32( 1433 F_FW_EQ_ETH_CMD_AUTOEQUIQE | 1434 F_FW_EQ_ETH_CMD_AUTOEQUEQE | 1435 V_FW_EQ_ETH_CMD_VIID(pi->viid)), 1436 .fetchszm_to_iqid = BE_32( 1437 V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_BOTH) | 1438 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | 1439 F_FW_EQ_ETH_CMD_FETCHRO | 1440 V_FW_EQ_ETH_CMD_IQID(eq->iqid)), 1441 .dcaen_to_eqsize = BE_32( 1442 V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 1443 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 1444 V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | 1445 V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize)), 1446 .eqaddr = BE_64(eq->ba), 1447 }; 1448 1449 /* 1450 * The EQ is configured to send a notification for every 32 consumed 1451 * entries (X_CIDXFLUSHTHRESH_32). In order to ensure timely 1452 * notification of entry consumption during slow periods when that 1453 * threshold may not be reached with regularity, two mechanisms exist: 1454 * 1455 * 1. The DBQ timer can be configured to fire (and send a notification) 1456 * after a period when the EQ has gone idle. This is available on T6 1457 * and later adapters. 1458 * 1459 * 2. The CIDXFlushThresholdOverride flag will send a notification 1460 * whenever a consumed entry causes CDIX==PIDX, even if the 1461 * CIDXFlushThreshold has not been reached. 1462 * 1463 * The DBQ timer is preferred, as it results in no additional 1464 * notifications when the EQ is kept busy with small transmissions. 1465 * Comparatively, flows of many short packets (like frequent ACKs) can 1466 * cause the CIDXFlushThresholdOverride mechanism to induce a 1467 * notification for every transmitted packet. 1468 */ 1469 if (sc->flags & TAF_DBQ_TIMER) { 1470 /* Configure the DBQ timer when it is available */ 1471 c.timeren_timerix = BE_32( 1472 F_FW_EQ_ETH_CMD_TIMEREN | 1473 V_FW_EQ_ETH_CMD_TIMERIX(pi->dbq_timer_idx)); 1474 } else { 1475 /* Otherwise fall back to CIDXFlushThresholdOverride */ 1476 c.dcaen_to_eqsize |= BE_32(F_FW_EQ_ETH_CMD_CIDXFTHRESHO); 1477 } 1478 1479 int rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof (c), &c); 1480 if (rc != 0) { 1481 cxgb_printf(pi->dip, CE_WARN, 1482 "failed to create Ethernet egress queue: %d", rc); 1483 return (rc); 1484 } 1485 eq->flags |= EQ_ALLOCATED; 1486 1487 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(BE_32(c.eqid_pkd)); 1488 1489 *t4_eqmap_slot(sc, eq->cntxt_id) = eq; 1490 1491 return (rc); 1492 } 1493 1494 static int 1495 alloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) 1496 { 1497 int rc; 1498 size_t len; 1499 1500 mutex_init(&eq->lock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(sc->intr_pri)); 1501 eq->flags |= EQ_MTX; 1502 1503 len = eq->qsize * EQ_ESIZE; 1504 rc = alloc_desc_ring(sc, len, DDI_DMA_WRITE, &eq->desc_dhdl, 1505 &eq->desc_ahdl, &eq->ba, (caddr_t *)&eq->desc); 1506 if (rc != 0) 1507 return (rc); 1508 1509 eq->cap = eq->qsize - sc->sge.stat_len / EQ_ESIZE; 1510 eq->spg = (void *)&eq->desc[eq->cap]; 1511 eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */ 1512 eq->pidx = eq->cidx = 0; 1513 eq->doorbells = sc->doorbells; 1514 1515 rc = eth_eq_alloc(sc, pi, eq); 1516 if (rc != 0) { 1517 cxgb_printf(sc->dip, CE_WARN, 1518 "failed to allocate egress queue: %d", rc); 1519 } 1520 1521 if (eq->doorbells & (DOORBELL_UDB | DOORBELL_UDBWC | DOORBELL_WCWR)) { 1522 uint64_t udb_offset; 1523 uint_t udb_qid; 1524 1525 rc = t4_bar2_sge_qregs(sc, eq->cntxt_id, T4_BAR2_QTYPE_EGRESS, 1526 0, &udb_offset, &udb_qid); 1527 1528 if (rc == 0) { 1529 eq->udb = sc->bar2_ptr + udb_offset; 1530 eq->udb_qid = udb_qid; 1531 } else { 1532 eq->doorbells &= 1533 ~(DOORBELL_UDB | DOORBELL_UDBWC | DOORBELL_WCWR); 1534 eq->udb = NULL; 1535 eq->udb_qid = 0; 1536 } 1537 } 1538 1539 return (rc); 1540 } 1541 1542 static int 1543 free_eq(struct adapter *sc, struct sge_eq *eq) 1544 { 1545 int rc; 1546 1547 if (eq->flags & EQ_ALLOCATED) { 1548 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); 1549 if (rc != 0) { 1550 cxgb_printf(sc->dip, CE_WARN, 1551 "failed to free egress queue: %d", rc); 1552 return (rc); 1553 } 1554 eq->flags &= ~EQ_ALLOCATED; 1555 } 1556 1557 if (eq->desc != NULL) { 1558 (void) free_desc_ring(&eq->desc_dhdl, &eq->desc_ahdl); 1559 eq->desc = NULL; 1560 } 1561 1562 if (eq->flags & EQ_MTX) 1563 mutex_destroy(&eq->lock); 1564 1565 bzero(eq, sizeof (*eq)); 1566 return (0); 1567 } 1568 1569 static int 1570 alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx) 1571 { 1572 int rc, i; 1573 struct adapter *sc = pi->adapter; 1574 struct sge_eq *eq = &txq->eq; 1575 1576 rc = alloc_eq(sc, pi, eq); 1577 if (rc != 0) 1578 return (rc); 1579 1580 txq->port = pi; 1581 txq->sdesc = kmem_zalloc(sizeof (struct tx_sdesc) * eq->cap, KM_SLEEP); 1582 txq->copy_threshold = tx_copy_threshold; 1583 txq->txb_size = eq->qsize * txq->copy_threshold; 1584 rc = alloc_tx_copybuffer(sc, txq->txb_size, &txq->txb_dhdl, 1585 &txq->txb_ahdl, &txq->txb_ba, &txq->txb_va); 1586 if (rc == 0) 1587 txq->txb_avail = txq->txb_size; 1588 else 1589 txq->txb_avail = txq->txb_size = 0; 1590 1591 /* 1592 * TODO: is this too low? Worst case would need around 4 times qsize 1593 * (all tx descriptors filled to the brim with SGLs, with each entry in 1594 * the SGL coming from a distinct DMA handle). Increase tx_dhdl_total 1595 * if you see too many dma_hdl_failed. 1596 */ 1597 txq->tx_dhdl_total = eq->qsize * 2; 1598 txq->tx_dhdl = kmem_zalloc(sizeof (ddi_dma_handle_t) * 1599 txq->tx_dhdl_total, KM_SLEEP); 1600 for (i = 0; i < txq->tx_dhdl_total; i++) { 1601 rc = ddi_dma_alloc_handle(sc->dip, &sc->sge.dma_attr_tx, 1602 DDI_DMA_SLEEP, 0, &txq->tx_dhdl[i]); 1603 if (rc != DDI_SUCCESS) { 1604 cxgb_printf(sc->dip, CE_WARN, 1605 "%s: failed to allocate DMA handle (%d)", 1606 __func__, rc); 1607 return (rc == DDI_DMA_NORESOURCES ? ENOMEM : EINVAL); 1608 } 1609 txq->tx_dhdl_avail++; 1610 } 1611 1612 txq->ksp = setup_txq_kstats(pi, txq, idx); 1613 1614 return (rc); 1615 } 1616 1617 static int 1618 free_txq(struct port_info *pi, struct sge_txq *txq) 1619 { 1620 int i; 1621 struct adapter *sc = pi->adapter; 1622 struct sge_eq *eq = &txq->eq; 1623 1624 if (txq->ksp != NULL) { 1625 kstat_delete(txq->ksp); 1626 txq->ksp = NULL; 1627 } 1628 1629 if (txq->txb_va != NULL) { 1630 (void) free_desc_ring(&txq->txb_dhdl, &txq->txb_ahdl); 1631 txq->txb_va = NULL; 1632 } 1633 1634 if (txq->sdesc != NULL) { 1635 struct tx_sdesc *sd; 1636 ddi_dma_handle_t hdl; 1637 1638 TXQ_LOCK(txq); 1639 while (eq->cidx != eq->pidx) { 1640 sd = &txq->sdesc[eq->cidx]; 1641 1642 for (i = sd->hdls_used; i; i--) { 1643 hdl = txq->tx_dhdl[txq->tx_dhdl_cidx]; 1644 (void) ddi_dma_unbind_handle(hdl); 1645 if (++txq->tx_dhdl_cidx == txq->tx_dhdl_total) 1646 txq->tx_dhdl_cidx = 0; 1647 } 1648 1649 ASSERT(sd->mp_head); 1650 freemsgchain(sd->mp_head); 1651 sd->mp_head = sd->mp_tail = NULL; 1652 1653 eq->cidx += sd->desc_used; 1654 if (eq->cidx >= eq->cap) 1655 eq->cidx -= eq->cap; 1656 1657 txq->txb_avail += txq->txb_used; 1658 } 1659 ASSERT(txq->tx_dhdl_cidx == txq->tx_dhdl_pidx); 1660 ASSERT(txq->txb_avail == txq->txb_size); 1661 TXQ_UNLOCK(txq); 1662 1663 kmem_free(txq->sdesc, sizeof (struct tx_sdesc) * eq->cap); 1664 txq->sdesc = NULL; 1665 } 1666 1667 if (txq->tx_dhdl != NULL) { 1668 for (i = 0; i < txq->tx_dhdl_total; i++) { 1669 if (txq->tx_dhdl[i] != NULL) 1670 ddi_dma_free_handle(&txq->tx_dhdl[i]); 1671 } 1672 kmem_free(txq->tx_dhdl, 1673 sizeof (ddi_dma_handle_t) * txq->tx_dhdl_total); 1674 txq->tx_dhdl = NULL; 1675 } 1676 1677 (void) free_eq(sc, &txq->eq); 1678 1679 bzero(txq, sizeof (*txq)); 1680 return (0); 1681 } 1682 1683 /* 1684 * Allocates a block of contiguous memory for DMA. Can be used to allocate 1685 * memory for descriptor rings or for tx/rx copy buffers. 1686 * 1687 * Caller does not have to clean up anything if this function fails, it cleans 1688 * up after itself. 1689 * 1690 * Caller provides the following: 1691 * len length of the block of memory to allocate. 1692 * flags DDI_DMA_* flags to use (CONSISTENT/STREAMING, READ/WRITE/RDWR) 1693 * acc_attr device access attributes for the allocation. 1694 * dma_attr DMA attributes for the allocation 1695 * 1696 * If the function is successful it fills up this information: 1697 * dma_hdl DMA handle for the allocated memory 1698 * acc_hdl access handle for the allocated memory 1699 * ba bus address of the allocated memory 1700 * va KVA of the allocated memory. 1701 */ 1702 static int 1703 alloc_dma_memory(struct adapter *sc, size_t len, int flags, 1704 ddi_device_acc_attr_t *acc_attr, ddi_dma_attr_t *dma_attr, 1705 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl, 1706 uint64_t *pba, caddr_t *pva) 1707 { 1708 int rc; 1709 ddi_dma_handle_t dhdl; 1710 ddi_acc_handle_t ahdl; 1711 ddi_dma_cookie_t cookie; 1712 uint_t ccount; 1713 caddr_t va; 1714 size_t real_len; 1715 1716 *pva = NULL; 1717 1718 /* 1719 * DMA handle. 1720 */ 1721 rc = ddi_dma_alloc_handle(sc->dip, dma_attr, DDI_DMA_SLEEP, 0, &dhdl); 1722 if (rc != DDI_SUCCESS) { 1723 return (rc == DDI_DMA_NORESOURCES ? ENOMEM : EINVAL); 1724 } 1725 1726 /* 1727 * Memory suitable for DMA. 1728 */ 1729 rc = ddi_dma_mem_alloc(dhdl, len, acc_attr, 1730 flags & DDI_DMA_CONSISTENT ? DDI_DMA_CONSISTENT : DDI_DMA_STREAMING, 1731 DDI_DMA_SLEEP, 0, &va, &real_len, &ahdl); 1732 if (rc != DDI_SUCCESS) { 1733 ddi_dma_free_handle(&dhdl); 1734 return (ENOMEM); 1735 } 1736 1737 /* 1738 * DMA bindings. 1739 */ 1740 rc = ddi_dma_addr_bind_handle(dhdl, NULL, va, real_len, flags, NULL, 1741 NULL, &cookie, &ccount); 1742 if (rc != DDI_DMA_MAPPED) { 1743 ddi_dma_mem_free(&ahdl); 1744 ddi_dma_free_handle(&dhdl); 1745 return (ENOMEM); 1746 } 1747 if (ccount != 1) { 1748 /* unusable DMA mapping */ 1749 (void) free_desc_ring(&dhdl, &ahdl); 1750 return (ENOMEM); 1751 } 1752 1753 bzero(va, real_len); 1754 *dma_hdl = dhdl; 1755 *acc_hdl = ahdl; 1756 *pba = cookie.dmac_laddress; 1757 *pva = va; 1758 1759 return (0); 1760 } 1761 1762 static int 1763 free_dma_memory(ddi_dma_handle_t *dhdl, ddi_acc_handle_t *ahdl) 1764 { 1765 (void) ddi_dma_unbind_handle(*dhdl); 1766 ddi_dma_mem_free(ahdl); 1767 ddi_dma_free_handle(dhdl); 1768 1769 return (0); 1770 } 1771 1772 static int 1773 alloc_desc_ring(struct adapter *sc, size_t len, int rw, 1774 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl, 1775 uint64_t *pba, caddr_t *pva) 1776 { 1777 ddi_device_acc_attr_t *acc_attr = &sc->sge.acc_attr_desc; 1778 ddi_dma_attr_t *dma_attr = &sc->sge.dma_attr_desc; 1779 1780 return (alloc_dma_memory(sc, len, DDI_DMA_CONSISTENT | rw, acc_attr, 1781 dma_attr, dma_hdl, acc_hdl, pba, pva)); 1782 } 1783 1784 static int 1785 free_desc_ring(ddi_dma_handle_t *dhdl, ddi_acc_handle_t *ahdl) 1786 { 1787 return (free_dma_memory(dhdl, ahdl)); 1788 } 1789 1790 static int 1791 alloc_tx_copybuffer(struct adapter *sc, size_t len, 1792 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl, 1793 uint64_t *pba, caddr_t *pva) 1794 { 1795 ddi_device_acc_attr_t *acc_attr = &sc->sge.acc_attr_tx; 1796 ddi_dma_attr_t *dma_attr = &sc->sge.dma_attr_desc; /* NOT dma_attr_tx */ 1797 1798 return (alloc_dma_memory(sc, len, DDI_DMA_STREAMING | DDI_DMA_WRITE, 1799 acc_attr, dma_attr, dma_hdl, acc_hdl, pba, pva)); 1800 } 1801 1802 static inline bool 1803 is_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl) 1804 { 1805 (void) ddi_dma_sync(iq->dhdl, (uintptr_t)iq->cdesc - 1806 (uintptr_t)iq->desc, iq->esize, DDI_DMA_SYNC_FORKERNEL); 1807 1808 *ctrl = (void *)((uintptr_t)iq->cdesc + 1809 (iq->esize - sizeof (struct rsp_ctrl))); 1810 1811 return ((((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen)); 1812 } 1813 1814 static inline void 1815 iq_next(struct sge_iq *iq) 1816 { 1817 iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize); 1818 if (++iq->cidx == iq->qsize - 1) { 1819 iq->cidx = 0; 1820 iq->gen ^= 1; 1821 iq->cdesc = iq->desc; 1822 } 1823 } 1824 1825 /* 1826 * Fill up the freelist by upto nbufs and maybe ring its doorbell. 1827 * 1828 * Returns non-zero to indicate that it should be added to the list of starving 1829 * freelists. 1830 */ 1831 static int 1832 refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs) 1833 { 1834 uint64_t *d = &fl->desc[fl->pidx]; 1835 struct fl_sdesc *sd = &fl->sdesc[fl->pidx]; 1836 1837 FL_LOCK_ASSERT_OWNED(fl); 1838 ASSERT(nbufs >= 0); 1839 1840 if (nbufs > fl->needed) 1841 nbufs = fl->needed; 1842 1843 while (nbufs--) { 1844 if (sd->rxb != NULL) { 1845 if (sd->rxb->ref_cnt == 1) { 1846 /* 1847 * Buffer is available for recycling. Two ways 1848 * this can happen: 1849 * 1850 * a) All the packets DMA'd into it last time 1851 * around were within the rx_copy_threshold 1852 * and no part of the buffer was ever passed 1853 * up (ref_cnt never went over 1). 1854 * 1855 * b) Packets DMA'd into the buffer were passed 1856 * up but have all been freed by the upper 1857 * layers by now (ref_cnt went over 1 but is 1858 * now back to 1). 1859 * 1860 * Either way the bus address in the descriptor 1861 * ring is already valid. 1862 */ 1863 ASSERT(*d == cpu_to_be64(sd->rxb->ba)); 1864 d++; 1865 goto recycled; 1866 } else { 1867 /* 1868 * Buffer still in use and we need a 1869 * replacement. But first release our reference 1870 * on the existing buffer. 1871 */ 1872 rxbuf_free(sd->rxb); 1873 } 1874 } 1875 1876 sd->rxb = rxbuf_alloc(sc->sge.rxbuf_cache, KM_NOSLEEP, 1); 1877 if (sd->rxb == NULL) 1878 break; 1879 *d++ = cpu_to_be64(sd->rxb->ba); 1880 1881 recycled: fl->pending++; 1882 sd++; 1883 fl->needed--; 1884 if (++fl->pidx == fl->cap) { 1885 fl->pidx = 0; 1886 sd = fl->sdesc; 1887 d = fl->desc; 1888 } 1889 } 1890 1891 if (fl->pending >= 8) 1892 ring_fl_db(sc, fl); 1893 1894 return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING)); 1895 } 1896 1897 #ifndef TAILQ_FOREACH_SAFE 1898 #define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ 1899 for ((var) = TAILQ_FIRST((head)); \ 1900 (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ 1901 (var) = (tvar)) 1902 #endif 1903 1904 /* 1905 * Attempt to refill all starving freelists. 1906 */ 1907 static void 1908 refill_sfl(void *arg) 1909 { 1910 struct adapter *sc = arg; 1911 struct sge_fl *fl, *fl_temp; 1912 1913 mutex_enter(&sc->sfl_lock); 1914 TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) { 1915 FL_LOCK(fl); 1916 (void) refill_fl(sc, fl, 64); 1917 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) { 1918 TAILQ_REMOVE(&sc->sfl, fl, link); 1919 fl->flags &= ~FL_STARVING; 1920 } 1921 FL_UNLOCK(fl); 1922 } 1923 1924 if (!TAILQ_EMPTY(&sc->sfl) != 0) 1925 sc->sfl_timer = timeout(refill_sfl, sc, drv_usectohz(100000)); 1926 mutex_exit(&sc->sfl_lock); 1927 } 1928 1929 static void 1930 add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl) 1931 { 1932 mutex_enter(&sc->sfl_lock); 1933 FL_LOCK(fl); 1934 if ((fl->flags & FL_DOOMED) == 0) { 1935 if (TAILQ_EMPTY(&sc->sfl) != 0) { 1936 sc->sfl_timer = timeout(refill_sfl, sc, 1937 drv_usectohz(100000)); 1938 } 1939 fl->flags |= FL_STARVING; 1940 TAILQ_INSERT_TAIL(&sc->sfl, fl, link); 1941 } 1942 FL_UNLOCK(fl); 1943 mutex_exit(&sc->sfl_lock); 1944 } 1945 1946 static void 1947 free_fl_bufs(struct sge_fl *fl) 1948 { 1949 struct fl_sdesc *sd; 1950 unsigned int i; 1951 1952 FL_LOCK_ASSERT_OWNED(fl); 1953 1954 for (i = 0; i < fl->cap; i++) { 1955 sd = &fl->sdesc[i]; 1956 1957 if (sd->rxb != NULL) { 1958 rxbuf_free(sd->rxb); 1959 sd->rxb = NULL; 1960 } 1961 } 1962 } 1963 1964 /* 1965 * Note that fl->cidx and fl->offset are left unchanged in case of failure. 1966 */ 1967 static mblk_t * 1968 get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf, 1969 int *fl_bufs_used) 1970 { 1971 struct mblk_pair frame = {0}; 1972 struct rxbuf *rxb; 1973 mblk_t *m = NULL; 1974 uint_t nbuf = 0, len, copy, n; 1975 uint32_t cidx, offset, rcidx, roffset; 1976 1977 /* 1978 * The SGE won't pack a new frame into the current buffer if the entire 1979 * payload doesn't fit in the remaining space. Move on to the next buf 1980 * in that case. 1981 */ 1982 rcidx = fl->cidx; 1983 roffset = fl->offset; 1984 if (fl->offset > 0 && len_newbuf & F_RSPD_NEWBUF) { 1985 fl->offset = 0; 1986 if (++fl->cidx == fl->cap) 1987 fl->cidx = 0; 1988 nbuf++; 1989 } 1990 cidx = fl->cidx; 1991 offset = fl->offset; 1992 1993 len = G_RSPD_LEN(len_newbuf); /* pktshift + payload length */ 1994 copy = (len <= fl->copy_threshold); 1995 if (copy != 0) { 1996 frame.head = m = allocb(len, BPRI_HI); 1997 if (m == NULL) { 1998 fl->allocb_fail++; 1999 DTRACE_PROBE1(t4__fl_alloc_fail, struct sge_fl *, fl); 2000 fl->cidx = rcidx; 2001 fl->offset = roffset; 2002 return (NULL); 2003 } 2004 } 2005 2006 while (len) { 2007 rxb = fl->sdesc[cidx].rxb; 2008 n = min(len, rxb->buf_size - offset); 2009 2010 (void) ddi_dma_sync(rxb->dhdl, offset, n, 2011 DDI_DMA_SYNC_FORKERNEL); 2012 2013 if (copy != 0) 2014 bcopy(rxb->va + offset, m->b_wptr, n); 2015 else { 2016 m = desballoc((unsigned char *)rxb->va + offset, n, 2017 BPRI_HI, &rxb->freefunc); 2018 if (m == NULL) { 2019 fl->allocb_fail++; 2020 DTRACE_PROBE1(t4__fl_alloc_fail, 2021 struct sge_fl *, fl); 2022 if (frame.head) 2023 freemsgchain(frame.head); 2024 fl->cidx = rcidx; 2025 fl->offset = roffset; 2026 return (NULL); 2027 } 2028 atomic_inc_uint(&rxb->ref_cnt); 2029 if (frame.head != NULL) 2030 frame.tail->b_cont = m; 2031 else 2032 frame.head = m; 2033 frame.tail = m; 2034 } 2035 m->b_wptr += n; 2036 len -= n; 2037 offset += roundup(n, sc->sge.fl_align); 2038 ASSERT(offset <= rxb->buf_size); 2039 if (offset == rxb->buf_size) { 2040 offset = 0; 2041 if (++cidx == fl->cap) 2042 cidx = 0; 2043 nbuf++; 2044 } 2045 } 2046 2047 fl->cidx = cidx; 2048 fl->offset = offset; 2049 (*fl_bufs_used) += nbuf; 2050 2051 ASSERT(frame.head != NULL); 2052 return (frame.head); 2053 } 2054 2055 /* 2056 * We'll do immediate data tx for non-LSO, but only when not coalescing. We're 2057 * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes 2058 * of immediate data. 2059 */ 2060 #define IMM_LEN ( \ 2061 2 * EQ_ESIZE \ 2062 - sizeof (struct fw_eth_tx_pkt_wr) \ 2063 - sizeof (struct cpl_tx_pkt_core)) 2064 2065 /* 2066 * Returns non-zero on failure, no need to cleanup anything in that case. 2067 * 2068 * Note 1: We always try to pull up the mblk if required and return E2BIG only 2069 * if this fails. 2070 * 2071 * Note 2: We'll also pullup incoming mblk if HW_LSO is set and the first mblk 2072 * does not have the TCP header in it. 2073 */ 2074 static int 2075 get_frame_txinfo(struct sge_txq *txq, mblk_t **fp, struct txinfo *txinfo, 2076 int sgl_only) 2077 { 2078 uint32_t flags = 0, len, n; 2079 mblk_t *m = *fp; 2080 int rc; 2081 2082 TXQ_LOCK_ASSERT_OWNED(txq); /* will manipulate txb and dma_hdls */ 2083 2084 mac_hcksum_get(m, NULL, NULL, NULL, NULL, &flags); 2085 txinfo->flags = (flags & HCK_TX_FLAGS); 2086 2087 mac_lso_get(m, &txinfo->mss, &flags); 2088 txinfo->flags |= (flags & HW_LSO_FLAGS); 2089 2090 if (flags & HW_LSO) 2091 sgl_only = 1; /* Do not allow immediate data with LSO */ 2092 2093 /* 2094 * If checksum or segmentation offloads are requested, gather 2095 * information about the sizes and types of headers in the packet. 2096 */ 2097 if (txinfo->flags != 0) { 2098 mac_ether_offload_info(m, &txinfo->meoi); 2099 } else { 2100 bzero(&txinfo->meoi, sizeof (txinfo->meoi)); 2101 } 2102 2103 start: txinfo->nsegs = 0; 2104 txinfo->hdls_used = 0; 2105 txinfo->txb_used = 0; 2106 txinfo->len = 0; 2107 2108 /* total length and a rough estimate of # of segments */ 2109 n = 0; 2110 for (; m; m = m->b_cont) { 2111 len = MBLKL(m); 2112 n += (len / PAGE_SIZE) + 1; 2113 txinfo->len += len; 2114 } 2115 m = *fp; 2116 2117 if (n >= TX_SGL_SEGS || ((flags & HW_LSO) && MBLKL(m) < 50)) { 2118 txq->pullup_early++; 2119 m = msgpullup(*fp, -1); 2120 if (m == NULL) { 2121 txq->pullup_failed++; 2122 return (E2BIG); /* (*fp) left as it was */ 2123 } 2124 freemsg(*fp); 2125 *fp = m; 2126 mac_hcksum_set(m, 0, 0, 0, 0, txinfo->flags); 2127 } 2128 2129 if (txinfo->len <= IMM_LEN && !sgl_only) 2130 return (0); /* nsegs = 0 tells caller to use imm. tx */ 2131 2132 if (txinfo->len <= txq->copy_threshold && 2133 copy_into_txb(txq, m, txinfo->len, txinfo) == 0) { 2134 goto done; 2135 } 2136 2137 for (; m; m = m->b_cont) { 2138 2139 len = MBLKL(m); 2140 2141 /* 2142 * Use tx copy buffer if this mblk is small enough and there is 2143 * room, otherwise add DMA bindings for this mblk to the SGL. 2144 */ 2145 if (len > txq->copy_threshold || 2146 (rc = copy_into_txb(txq, m, len, txinfo)) != 0) { 2147 rc = add_mblk(txq, txinfo, m, len); 2148 } 2149 2150 if (rc == E2BIG || 2151 (txinfo->nsegs == TX_SGL_SEGS && m->b_cont)) { 2152 2153 txq->pullup_late++; 2154 m = msgpullup(*fp, -1); 2155 if (m != NULL) { 2156 free_txinfo_resources(txq, txinfo); 2157 freemsg(*fp); 2158 *fp = m; 2159 mac_hcksum_set(m, 0, 0, 0, 0, txinfo->flags); 2160 goto start; 2161 } 2162 2163 txq->pullup_failed++; 2164 rc = E2BIG; 2165 } 2166 2167 if (rc != 0) { 2168 free_txinfo_resources(txq, txinfo); 2169 return (rc); 2170 } 2171 } 2172 2173 ASSERT(txinfo->nsegs > 0 && txinfo->nsegs <= TX_SGL_SEGS); 2174 2175 done: 2176 2177 /* 2178 * Store the # of flits required to hold this frame's SGL in nflits. An 2179 * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by 2180 * multiple (len0 + len1, addr0, addr1) tuples. If addr1 is not used 2181 * then len1 must be set to 0. 2182 */ 2183 n = txinfo->nsegs - 1; 2184 txinfo->nflits = (3 * n) / 2 + (n & 1) + 2; 2185 if (n & 1) 2186 txinfo->sgl.sge[n / 2].len[1] = cpu_to_be32(0); 2187 2188 txinfo->sgl.cmd_nsge = cpu_to_be32(V_ULPTX_CMD((u32)ULP_TX_SC_DSGL) | 2189 V_ULPTX_NSGE(txinfo->nsegs)); 2190 2191 return (0); 2192 } 2193 2194 static inline int 2195 fits_in_txb(struct sge_txq *txq, int len, int *waste) 2196 { 2197 if (txq->txb_avail < len) 2198 return (0); 2199 2200 if (txq->txb_next + len <= txq->txb_size) { 2201 *waste = 0; 2202 return (1); 2203 } 2204 2205 *waste = txq->txb_size - txq->txb_next; 2206 2207 return (txq->txb_avail - *waste < len ? 0 : 1); 2208 } 2209 2210 #define TXB_CHUNK 64 2211 2212 /* 2213 * Copies the specified # of bytes into txq's tx copy buffer and updates txinfo 2214 * and txq to indicate resources used. Caller has to make sure that those many 2215 * bytes are available in the mblk chain (b_cont linked). 2216 */ 2217 static inline int 2218 copy_into_txb(struct sge_txq *txq, mblk_t *m, int len, struct txinfo *txinfo) 2219 { 2220 int waste, n; 2221 2222 TXQ_LOCK_ASSERT_OWNED(txq); /* will manipulate txb */ 2223 2224 if (!fits_in_txb(txq, len, &waste)) { 2225 txq->txb_full++; 2226 return (ENOMEM); 2227 } 2228 2229 if (waste != 0) { 2230 ASSERT((waste & (TXB_CHUNK - 1)) == 0); 2231 txinfo->txb_used += waste; 2232 txq->txb_avail -= waste; 2233 txq->txb_next = 0; 2234 } 2235 2236 for (n = 0; n < len; m = m->b_cont) { 2237 bcopy(m->b_rptr, txq->txb_va + txq->txb_next + n, MBLKL(m)); 2238 n += MBLKL(m); 2239 } 2240 2241 add_seg(txinfo, txq->txb_ba + txq->txb_next, len); 2242 2243 n = roundup(len, TXB_CHUNK); 2244 txinfo->txb_used += n; 2245 txq->txb_avail -= n; 2246 txq->txb_next += n; 2247 ASSERT(txq->txb_next <= txq->txb_size); 2248 if (txq->txb_next == txq->txb_size) 2249 txq->txb_next = 0; 2250 2251 return (0); 2252 } 2253 2254 static inline void 2255 add_seg(struct txinfo *txinfo, uint64_t ba, uint32_t len) 2256 { 2257 ASSERT(txinfo->nsegs < TX_SGL_SEGS); /* must have room */ 2258 2259 if (txinfo->nsegs != 0) { 2260 int idx = txinfo->nsegs - 1; 2261 txinfo->sgl.sge[idx / 2].len[idx & 1] = cpu_to_be32(len); 2262 txinfo->sgl.sge[idx / 2].addr[idx & 1] = cpu_to_be64(ba); 2263 } else { 2264 txinfo->sgl.len0 = cpu_to_be32(len); 2265 txinfo->sgl.addr0 = cpu_to_be64(ba); 2266 } 2267 txinfo->nsegs++; 2268 } 2269 2270 /* 2271 * This function cleans up any partially allocated resources when it fails so 2272 * there's nothing for the caller to clean up in that case. 2273 * 2274 * EIO indicates permanent failure. Caller should drop the frame containing 2275 * this mblk and continue. 2276 * 2277 * E2BIG indicates that the SGL length for this mblk exceeds the hardware 2278 * limit. Caller should pull up the frame before trying to send it out. 2279 * (This error means our pullup_early heuristic did not work for this frame) 2280 * 2281 * ENOMEM indicates a temporary shortage of resources (DMA handles, other DMA 2282 * resources, etc.). Caller should suspend the tx queue and wait for reclaim to 2283 * free up resources. 2284 */ 2285 static inline int 2286 add_mblk(struct sge_txq *txq, struct txinfo *txinfo, mblk_t *m, int len) 2287 { 2288 ddi_dma_handle_t dhdl; 2289 ddi_dma_cookie_t cookie; 2290 uint_t ccount = 0; 2291 int rc; 2292 2293 TXQ_LOCK_ASSERT_OWNED(txq); /* will manipulate dhdls */ 2294 2295 if (txq->tx_dhdl_avail == 0) { 2296 txq->dma_hdl_failed++; 2297 return (ENOMEM); 2298 } 2299 2300 dhdl = txq->tx_dhdl[txq->tx_dhdl_pidx]; 2301 rc = ddi_dma_addr_bind_handle(dhdl, NULL, (caddr_t)m->b_rptr, len, 2302 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, NULL, &cookie, 2303 &ccount); 2304 if (rc != DDI_DMA_MAPPED) { 2305 txq->dma_map_failed++; 2306 2307 ASSERT(rc != DDI_DMA_INUSE && rc != DDI_DMA_PARTIAL_MAP); 2308 2309 return (rc == DDI_DMA_NORESOURCES ? ENOMEM : EIO); 2310 } 2311 2312 if (ccount + txinfo->nsegs > TX_SGL_SEGS) { 2313 (void) ddi_dma_unbind_handle(dhdl); 2314 return (E2BIG); 2315 } 2316 2317 add_seg(txinfo, cookie.dmac_laddress, cookie.dmac_size); 2318 while (--ccount) { 2319 ddi_dma_nextcookie(dhdl, &cookie); 2320 add_seg(txinfo, cookie.dmac_laddress, cookie.dmac_size); 2321 } 2322 2323 if (++txq->tx_dhdl_pidx == txq->tx_dhdl_total) 2324 txq->tx_dhdl_pidx = 0; 2325 txq->tx_dhdl_avail--; 2326 txinfo->hdls_used++; 2327 2328 return (0); 2329 } 2330 2331 /* 2332 * Releases all the txq resources used up in the specified txinfo. 2333 */ 2334 static void 2335 free_txinfo_resources(struct sge_txq *txq, struct txinfo *txinfo) 2336 { 2337 int n; 2338 2339 TXQ_LOCK_ASSERT_OWNED(txq); /* dhdls, txb */ 2340 2341 n = txinfo->txb_used; 2342 if (n > 0) { 2343 txq->txb_avail += n; 2344 if (n <= txq->txb_next) 2345 txq->txb_next -= n; 2346 else { 2347 n -= txq->txb_next; 2348 txq->txb_next = txq->txb_size - n; 2349 } 2350 } 2351 2352 for (n = txinfo->hdls_used; n > 0; n--) { 2353 if (txq->tx_dhdl_pidx > 0) 2354 txq->tx_dhdl_pidx--; 2355 else 2356 txq->tx_dhdl_pidx = txq->tx_dhdl_total - 1; 2357 txq->tx_dhdl_avail++; 2358 (void) ddi_dma_unbind_handle(txq->tx_dhdl[txq->tx_dhdl_pidx]); 2359 } 2360 } 2361 2362 /* 2363 * Returns 0 to indicate that m has been accepted into a coalesced tx work 2364 * request. It has either been folded into txpkts or txpkts was flushed and m 2365 * has started a new coalesced work request (as the first frame in a fresh 2366 * txpkts). 2367 * 2368 * Returns non-zero to indicate a failure - caller is responsible for 2369 * transmitting m, if there was anything in txpkts it has been flushed. 2370 */ 2371 static int 2372 add_to_txpkts(struct sge_txq *txq, struct txpkts *txpkts, mblk_t *m, 2373 struct txinfo *txinfo) 2374 { 2375 struct sge_eq *eq = &txq->eq; 2376 int can_coalesce; 2377 struct tx_sdesc *txsd; 2378 uint8_t flits; 2379 2380 TXQ_LOCK_ASSERT_OWNED(txq); 2381 ASSERT(m->b_next == NULL); 2382 2383 if (txpkts->npkt > 0) { 2384 flits = TXPKTS_PKT_HDR + txinfo->nflits; 2385 can_coalesce = (txinfo->flags & HW_LSO) == 0 && 2386 txpkts->nflits + flits <= TX_WR_FLITS && 2387 txpkts->nflits + flits <= eq->avail * 8 && 2388 txpkts->plen + txinfo->len < 65536; 2389 2390 if (can_coalesce != 0) { 2391 txpkts->tail->b_next = m; 2392 txpkts->tail = m; 2393 txpkts->npkt++; 2394 txpkts->nflits += flits; 2395 txpkts->plen += txinfo->len; 2396 2397 txsd = &txq->sdesc[eq->pidx]; 2398 txsd->txb_used += txinfo->txb_used; 2399 txsd->hdls_used += txinfo->hdls_used; 2400 2401 /* 2402 * The txpkts chaining above has already placed `m` at 2403 * the end with b_next. Keep the txsd notion of this 2404 * new tail up to date. 2405 */ 2406 ASSERT3P(txsd->mp_tail->b_next, ==, m); 2407 txsd->mp_tail = m; 2408 2409 return (0); 2410 } 2411 2412 /* 2413 * Couldn't coalesce m into txpkts. The first order of business 2414 * is to send txpkts on its way. Then we'll revisit m. 2415 */ 2416 write_txpkts_wr(txq, txpkts); 2417 } 2418 2419 /* 2420 * Check if we can start a new coalesced tx work request with m as 2421 * the first packet in it. 2422 */ 2423 2424 ASSERT(txpkts->npkt == 0); 2425 ASSERT(txinfo->len < 65536); 2426 2427 flits = TXPKTS_WR_HDR + txinfo->nflits; 2428 can_coalesce = (txinfo->flags & HW_LSO) == 0 && 2429 flits <= eq->avail * 8 && flits <= TX_WR_FLITS; 2430 2431 if (can_coalesce == 0) 2432 return (EINVAL); 2433 2434 /* 2435 * Start a fresh coalesced tx WR with m as the first frame in it. 2436 */ 2437 txpkts->tail = m; 2438 txpkts->npkt = 1; 2439 txpkts->nflits = flits; 2440 txpkts->flitp = &eq->desc[eq->pidx].flit[2]; 2441 txpkts->plen = txinfo->len; 2442 2443 txsd = &txq->sdesc[eq->pidx]; 2444 txsd->mp_head = txsd->mp_tail = m; 2445 txsd->txb_used = txinfo->txb_used; 2446 txsd->hdls_used = txinfo->hdls_used; 2447 2448 return (0); 2449 } 2450 2451 static inline void 2452 t4_tx_incr_pending(struct sge_txq *txq, uint_t ndesc) 2453 { 2454 struct sge_eq *eq = &txq->eq; 2455 2456 TXQ_LOCK_ASSERT_OWNED(txq); 2457 ASSERT3U(ndesc, !=, 0); 2458 ASSERT3U(eq->avail, >=, ndesc); 2459 2460 eq->pending += ndesc; 2461 eq->avail -= ndesc; 2462 eq->pidx += ndesc; 2463 if (eq->pidx >= eq->cap) { 2464 eq->pidx -= eq->cap; 2465 } 2466 } 2467 2468 /* 2469 * Note that write_txpkts_wr can never run out of hardware descriptors (but 2470 * write_txpkt_wr can). add_to_txpkts ensures that a frame is accepted for 2471 * coalescing only if sufficient hardware descriptors are available. 2472 */ 2473 static void 2474 write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts) 2475 { 2476 struct sge_eq *eq = &txq->eq; 2477 struct fw_eth_tx_pkts_wr *wr; 2478 struct tx_sdesc *txsd; 2479 uint32_t ctrl; 2480 uint16_t ndesc; 2481 2482 TXQ_LOCK_ASSERT_OWNED(txq); /* pidx, avail */ 2483 2484 ndesc = howmany(txpkts->nflits, 8); 2485 2486 wr = (void *)&eq->desc[eq->pidx]; 2487 wr->op_pkd = cpu_to_be32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR) | 2488 V_FW_WR_IMMDLEN(0)); /* immdlen does not matter in this WR */ 2489 ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2)); 2490 if (eq->avail == ndesc) 2491 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; 2492 wr->equiq_to_len16 = cpu_to_be32(ctrl); 2493 wr->plen = cpu_to_be16(txpkts->plen); 2494 wr->npkt = txpkts->npkt; 2495 wr->r3 = wr->type = 0; 2496 2497 /* Everything else already written */ 2498 2499 txsd = &txq->sdesc[eq->pidx]; 2500 txsd->desc_used = ndesc; 2501 2502 txq->txb_used += txsd->txb_used / TXB_CHUNK; 2503 txq->hdl_used += txsd->hdls_used; 2504 2505 t4_tx_incr_pending(txq, ndesc); 2506 2507 txq->txpkts_pkts += txpkts->npkt; 2508 txq->txpkts_wrs++; 2509 txpkts->npkt = 0; /* emptied */ 2510 } 2511 2512 typedef enum { 2513 COS_SUCCESS, /* ctrl flit contains proper bits for csum offload */ 2514 COS_IGNORE, /* no csum offload requested */ 2515 COS_FAIL, /* csum offload requested, but pkt data missing */ 2516 } csum_offload_status_t; 2517 /* 2518 * Build a ctrl1 flit for checksum offload in CPL_TX_PKT_XT command 2519 */ 2520 static csum_offload_status_t 2521 csum_to_ctrl(const struct txinfo *txinfo, uint32_t chip_version, 2522 uint64_t *ctrlp) 2523 { 2524 const mac_ether_offload_info_t *meoi = &txinfo->meoi; 2525 const uint32_t tx_flags = txinfo->flags; 2526 const boolean_t needs_l3_csum = (tx_flags & HW_LSO) != 0 || 2527 (tx_flags & HCK_IPV4_HDRCKSUM) != 0; 2528 const boolean_t needs_l4_csum = (tx_flags & HW_LSO) != 0 || 2529 (tx_flags & (HCK_FULLCKSUM | HCK_PARTIALCKSUM)) != 0; 2530 2531 /* 2532 * Default to disabling any checksumming both for cases where it is not 2533 * requested, but also if we cannot appropriately interrogate the 2534 * required information from the packet. 2535 */ 2536 uint64_t ctrl = F_TXPKT_L4CSUM_DIS | F_TXPKT_IPCSUM_DIS; 2537 if (!needs_l3_csum && !needs_l4_csum) { 2538 *ctrlp = ctrl; 2539 return (COS_IGNORE); 2540 } 2541 2542 if (needs_l3_csum) { 2543 /* Only IPv4 checksums are supported (for L3) */ 2544 if ((meoi->meoi_flags & MEOI_L3INFO_SET) == 0 || 2545 meoi->meoi_l3proto != ETHERTYPE_IP) { 2546 *ctrlp = ctrl; 2547 return (COS_FAIL); 2548 } 2549 ctrl &= ~F_TXPKT_IPCSUM_DIS; 2550 } 2551 2552 if (needs_l4_csum) { 2553 /* 2554 * We need at least all of the L3 header to make decisions about 2555 * the contained L4 protocol. If not all of the L4 information 2556 * is present, we will leave it to the NIC to checksum all it is 2557 * able to. 2558 */ 2559 if ((meoi->meoi_flags & MEOI_L3INFO_SET) == 0) { 2560 *ctrlp = ctrl; 2561 return (COS_FAIL); 2562 } 2563 2564 /* 2565 * Since we are parsing the packet anyways, make the checksum 2566 * decision based on the L4 protocol, rather than using the 2567 * Generic TCP/UDP checksum using start & end offsets in the 2568 * packet (like requested with PARTIALCKSUM). 2569 */ 2570 int csum_type = -1; 2571 if (meoi->meoi_l3proto == ETHERTYPE_IP && 2572 meoi->meoi_l4proto == IPPROTO_TCP) { 2573 csum_type = TX_CSUM_TCPIP; 2574 } else if (meoi->meoi_l3proto == ETHERTYPE_IPV6 && 2575 meoi->meoi_l4proto == IPPROTO_TCP) { 2576 csum_type = TX_CSUM_TCPIP6; 2577 } else if (meoi->meoi_l3proto == ETHERTYPE_IP && 2578 meoi->meoi_l4proto == IPPROTO_UDP) { 2579 csum_type = TX_CSUM_UDPIP; 2580 } else if (meoi->meoi_l3proto == ETHERTYPE_IPV6 && 2581 meoi->meoi_l4proto == IPPROTO_UDP) { 2582 csum_type = TX_CSUM_UDPIP6; 2583 } else { 2584 *ctrlp = ctrl; 2585 return (COS_FAIL); 2586 } 2587 2588 ASSERT(csum_type != -1); 2589 ctrl &= ~F_TXPKT_L4CSUM_DIS; 2590 ctrl |= V_TXPKT_CSUM_TYPE(csum_type); 2591 } 2592 2593 if ((ctrl & F_TXPKT_IPCSUM_DIS) == 0 && 2594 (ctrl & F_TXPKT_L4CSUM_DIS) != 0) { 2595 /* 2596 * If only the IPv4 checksum is requested, we need to set an 2597 * appropriate type in the command for it. 2598 */ 2599 ctrl |= V_TXPKT_CSUM_TYPE(TX_CSUM_IP); 2600 } 2601 2602 ASSERT(ctrl != (F_TXPKT_L4CSUM_DIS | F_TXPKT_IPCSUM_DIS)); 2603 2604 /* 2605 * Fill in the requisite L2/L3 header length data. 2606 * 2607 * The Ethernet header length is recorded as 'size - 14 bytes' 2608 */ 2609 const uint8_t eth_len = meoi->meoi_l2hlen - 14; 2610 if (chip_version >= CHELSIO_T6) { 2611 ctrl |= V_T6_TXPKT_ETHHDR_LEN(eth_len); 2612 } else { 2613 ctrl |= V_TXPKT_ETHHDR_LEN(eth_len); 2614 } 2615 ctrl |= V_TXPKT_IPHDR_LEN(meoi->meoi_l3hlen); 2616 2617 *ctrlp = ctrl; 2618 return (COS_SUCCESS); 2619 } 2620 2621 static int 2622 write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, mblk_t *m, 2623 struct txinfo *txinfo) 2624 { 2625 struct sge_eq *eq = &txq->eq; 2626 struct fw_eth_tx_pkt_wr *wr; 2627 struct cpl_tx_pkt_core *cpl; 2628 uint32_t ctrl; /* used in many unrelated places */ 2629 uint64_t ctrl1; 2630 int nflits, ndesc; 2631 struct tx_sdesc *txsd; 2632 caddr_t dst; 2633 const mac_ether_offload_info_t *meoi = &txinfo->meoi; 2634 2635 TXQ_LOCK_ASSERT_OWNED(txq); /* pidx, avail */ 2636 2637 /* 2638 * Do we have enough flits to send this frame out? 2639 */ 2640 ctrl = sizeof (struct cpl_tx_pkt_core); 2641 if (txinfo->flags & HW_LSO) { 2642 nflits = TXPKT_LSO_WR_HDR; 2643 ctrl += sizeof (struct cpl_tx_pkt_lso_core); 2644 } else { 2645 nflits = TXPKT_WR_HDR; 2646 } 2647 if (txinfo->nsegs > 0) 2648 nflits += txinfo->nflits; 2649 else { 2650 nflits += howmany(txinfo->len, 8); 2651 ctrl += txinfo->len; 2652 } 2653 ndesc = howmany(nflits, 8); 2654 if (ndesc > eq->avail) 2655 return (ENOMEM); 2656 2657 /* Firmware work request header */ 2658 wr = (void *)&eq->desc[eq->pidx]; 2659 wr->op_immdlen = cpu_to_be32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | 2660 V_FW_WR_IMMDLEN(ctrl)); 2661 ctrl = V_FW_WR_LEN16(howmany(nflits, 2)); 2662 if (eq->avail == ndesc) 2663 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; 2664 wr->equiq_to_len16 = cpu_to_be32(ctrl); 2665 wr->r3 = 0; 2666 2667 if (txinfo->flags & HW_LSO && 2668 (meoi->meoi_flags & MEOI_L4INFO_SET) != 0 && 2669 meoi->meoi_l4proto == IPPROTO_TCP) { 2670 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); 2671 2672 ctrl = V_LSO_OPCODE((u32)CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | 2673 F_LSO_LAST_SLICE; 2674 2675 if (meoi->meoi_l2hlen > sizeof (struct ether_header)) { 2676 /* 2677 * This presently assumes a standard VLAN header, 2678 * without support for Q-in-Q. 2679 */ 2680 ctrl |= V_LSO_ETHHDR_LEN(1); 2681 } 2682 2683 switch (meoi->meoi_l3proto) { 2684 case ETHERTYPE_IPV6: 2685 ctrl |= F_LSO_IPV6; 2686 /* FALLTHROUGH */ 2687 case ETHERTYPE_IP: 2688 ctrl |= V_LSO_IPHDR_LEN(meoi->meoi_l3hlen / 4); 2689 break; 2690 default: 2691 break; 2692 } 2693 2694 ctrl |= V_LSO_TCPHDR_LEN(meoi->meoi_l4hlen / 4); 2695 2696 lso->lso_ctrl = cpu_to_be32(ctrl); 2697 lso->ipid_ofst = cpu_to_be16(0); 2698 lso->mss = cpu_to_be16(txinfo->mss); 2699 lso->seqno_offset = cpu_to_be32(0); 2700 if (t4_cver_eq(pi->adapter, CHELSIO_T4)) 2701 lso->len = cpu_to_be32(txinfo->len); 2702 else 2703 lso->len = cpu_to_be32(V_LSO_T5_XFER_SIZE(txinfo->len)); 2704 2705 cpl = (void *)(lso + 1); 2706 2707 txq->tso_wrs++; 2708 } else { 2709 cpl = (void *)(wr + 1); 2710 } 2711 2712 /* Checksum offload */ 2713 switch (csum_to_ctrl(txinfo, 2714 CHELSIO_CHIP_VERSION(pi->adapter->params.chip), &ctrl1)) { 2715 case COS_SUCCESS: 2716 txq->txcsum++; 2717 break; 2718 case COS_FAIL: 2719 /* 2720 * Packet will be going out with checksums which are probably 2721 * wrong but there is little we can do now. 2722 */ 2723 txq->csum_failed++; 2724 break; 2725 default: 2726 break; 2727 } 2728 2729 /* CPL header */ 2730 cpl->ctrl0 = cpu_to_be32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 2731 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf)); 2732 cpl->pack = 0; 2733 cpl->len = cpu_to_be16(txinfo->len); 2734 cpl->ctrl1 = cpu_to_be64(ctrl1); 2735 2736 /* Software descriptor */ 2737 txsd = &txq->sdesc[eq->pidx]; 2738 txsd->mp_head = txsd->mp_tail = m; 2739 txsd->txb_used = txinfo->txb_used; 2740 txsd->hdls_used = txinfo->hdls_used; 2741 txsd->desc_used = ndesc; 2742 2743 txq->txb_used += txinfo->txb_used / TXB_CHUNK; 2744 txq->hdl_used += txinfo->hdls_used; 2745 2746 t4_tx_incr_pending(txq, ndesc); 2747 2748 /* SGL */ 2749 dst = (void *)(cpl + 1); 2750 if (txinfo->nsegs > 0) { 2751 txq->sgl_wrs++; 2752 copy_to_txd(eq, (void *)&txinfo->sgl, &dst, txinfo->nflits * 8); 2753 2754 /* Need to zero-pad to a 16 byte boundary if not on one */ 2755 if ((uintptr_t)dst & 0xf) 2756 *(uint64_t *)dst = 0; 2757 2758 } else { 2759 txq->imm_wrs++; 2760 #ifdef DEBUG 2761 ctrl = txinfo->len; 2762 #endif 2763 for (; m; m = m->b_cont) { 2764 copy_to_txd(eq, (void *)m->b_rptr, &dst, MBLKL(m)); 2765 #ifdef DEBUG 2766 ctrl -= MBLKL(m); 2767 #endif 2768 } 2769 ASSERT(ctrl == 0); 2770 } 2771 2772 txq->txpkt_wrs++; 2773 return (0); 2774 } 2775 2776 static void 2777 t4_write_flush_wr(struct sge_txq *txq) 2778 { 2779 struct sge_eq *eq = &txq->eq; 2780 2781 EQ_LOCK_ASSERT_OWNED(eq); 2782 ASSERT(eq->avail > 0); 2783 2784 const struct fw_eq_flush_wr wr = { 2785 .opcode = FW_EQ_FLUSH_WR, 2786 .equiq_to_len16 = BE_32( 2787 V_FW_WR_LEN16(sizeof (struct fw_eq_flush_wr) / 16) | 2788 F_FW_WR_EQUEQ | F_FW_WR_EQUIQ), 2789 }; 2790 *(struct fw_eq_flush_wr *)&eq->desc[eq->pidx] = wr; 2791 2792 const struct tx_sdesc txsd = { 2793 .mp_head = NULL, 2794 .mp_tail = NULL, 2795 .txb_used = 0, 2796 .hdls_used = 0, 2797 .desc_used = 1, 2798 }; 2799 txq->sdesc[eq->pidx] = txsd; 2800 2801 t4_tx_incr_pending(txq, 1); 2802 } 2803 2804 static inline void 2805 write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq, 2806 struct txpkts *txpkts, struct txinfo *txinfo) 2807 { 2808 struct ulp_txpkt *ulpmc; 2809 struct ulptx_idata *ulpsc; 2810 struct cpl_tx_pkt_core *cpl; 2811 uintptr_t flitp, start, end; 2812 uint64_t ctrl; 2813 caddr_t dst; 2814 2815 ASSERT(txpkts->npkt > 0); 2816 2817 start = (uintptr_t)txq->eq.desc; 2818 end = (uintptr_t)txq->eq.spg; 2819 2820 /* Checksum offload */ 2821 switch (csum_to_ctrl(txinfo, 2822 CHELSIO_CHIP_VERSION(pi->adapter->params.chip), &ctrl)) { 2823 case COS_SUCCESS: 2824 txq->txcsum++; 2825 break; 2826 case COS_FAIL: 2827 /* 2828 * Packet will be going out with checksums which are probably 2829 * wrong but there is little we can do now. 2830 */ 2831 txq->csum_failed++; 2832 break; 2833 default: 2834 break; 2835 } 2836 2837 /* 2838 * The previous packet's SGL must have ended at a 16 byte boundary (this 2839 * is required by the firmware/hardware). It follows that flitp cannot 2840 * wrap around between the ULPTX master command and ULPTX subcommand (8 2841 * bytes each), and that it can not wrap around in the middle of the 2842 * cpl_tx_pkt_core either. 2843 */ 2844 flitp = (uintptr_t)txpkts->flitp; 2845 ASSERT((flitp & 0xf) == 0); 2846 2847 /* ULP master command */ 2848 ulpmc = (void *)flitp; 2849 ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0)); 2850 ulpmc->len = htonl(howmany(sizeof (*ulpmc) + sizeof (*ulpsc) + 2851 sizeof (*cpl) + 8 * txinfo->nflits, 16)); 2852 2853 /* ULP subcommand */ 2854 ulpsc = (void *)(ulpmc + 1); 2855 ulpsc->cmd_more = cpu_to_be32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) | 2856 F_ULP_TX_SC_MORE); 2857 ulpsc->len = cpu_to_be32(sizeof (struct cpl_tx_pkt_core)); 2858 2859 flitp += sizeof (*ulpmc) + sizeof (*ulpsc); 2860 if (flitp == end) 2861 flitp = start; 2862 2863 /* CPL_TX_PKT_XT */ 2864 cpl = (void *)flitp; 2865 cpl->ctrl0 = cpu_to_be32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 2866 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf)); 2867 cpl->pack = 0; 2868 cpl->len = cpu_to_be16(txinfo->len); 2869 cpl->ctrl1 = cpu_to_be64(ctrl); 2870 2871 flitp += sizeof (*cpl); 2872 if (flitp == end) 2873 flitp = start; 2874 2875 /* SGL for this frame */ 2876 dst = (caddr_t)flitp; 2877 copy_to_txd(&txq->eq, (void *)&txinfo->sgl, &dst, txinfo->nflits * 8); 2878 flitp = (uintptr_t)dst; 2879 2880 /* Zero pad and advance to a 16 byte boundary if not already at one. */ 2881 if (flitp & 0xf) { 2882 2883 /* no matter what, flitp should be on an 8 byte boundary */ 2884 ASSERT((flitp & 0x7) == 0); 2885 2886 *(uint64_t *)flitp = 0; 2887 flitp += sizeof (uint64_t); 2888 txpkts->nflits++; 2889 } 2890 2891 if (flitp == end) 2892 flitp = start; 2893 2894 txpkts->flitp = (void *)flitp; 2895 } 2896 2897 static inline void 2898 copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) 2899 { 2900 if ((uintptr_t)(*to) + len <= (uintptr_t)eq->spg) { 2901 bcopy(from, *to, len); 2902 (*to) += len; 2903 } else { 2904 int portion = (uintptr_t)eq->spg - (uintptr_t)(*to); 2905 2906 bcopy(from, *to, portion); 2907 from += portion; 2908 portion = len - portion; /* remaining */ 2909 bcopy(from, (void *)eq->desc, portion); 2910 (*to) = (caddr_t)eq->desc + portion; 2911 } 2912 } 2913 2914 static void 2915 t4_tx_ring_db(struct sge_txq *txq) 2916 { 2917 struct sge_eq *eq = &txq->eq; 2918 struct adapter *sc = txq->port->adapter; 2919 int val, db_mode; 2920 t4_doorbells_t db = eq->doorbells; 2921 2922 EQ_LOCK_ASSERT_OWNED(eq); 2923 2924 if (eq->pending > 1) 2925 db &= ~DOORBELL_WCWR; 2926 2927 if (eq->pending > eq->pidx) { 2928 int offset = eq->cap - (eq->pending - eq->pidx); 2929 2930 /* pidx has wrapped around since last doorbell */ 2931 2932 (void) ddi_dma_sync(eq->desc_dhdl, 2933 offset * sizeof (struct tx_desc), 0, 2934 DDI_DMA_SYNC_FORDEV); 2935 (void) ddi_dma_sync(eq->desc_dhdl, 2936 0, eq->pidx * sizeof (struct tx_desc), 2937 DDI_DMA_SYNC_FORDEV); 2938 } else if (eq->pending > 0) { 2939 (void) ddi_dma_sync(eq->desc_dhdl, 2940 (eq->pidx - eq->pending) * sizeof (struct tx_desc), 2941 eq->pending * sizeof (struct tx_desc), 2942 DDI_DMA_SYNC_FORDEV); 2943 } 2944 2945 membar_producer(); 2946 2947 if (t4_cver_eq(sc, CHELSIO_T4)) 2948 val = V_PIDX(eq->pending); 2949 else 2950 val = V_PIDX_T5(eq->pending); 2951 2952 db_mode = (1 << (ffs(db) - 1)); 2953 switch (db_mode) { 2954 case DOORBELL_WCWR: { 2955 /* 2956 * Queues whose 128B doorbell segment fits in 2957 * the page do not use relative qid 2958 * (udb_qid is always 0). Only queues with 2959 * doorbell segments can do WCWR. 2960 */ 2961 ASSERT(eq->udb_qid == 0 && eq->pending == 1); 2962 2963 const uint_t desc_idx = 2964 eq->pidx != 0 ? eq->pidx - 1 : eq->cap - 1; 2965 uint64_t *src = (uint64_t *)&eq->desc[desc_idx]; 2966 volatile uint64_t *dst = 2967 (uint64_t *)(eq->udb + UDBS_WR_OFFSET); 2968 2969 /* Copy the 8 flits of the TX descriptor to the DB */ 2970 const uint_t flit_count = 2971 sizeof (struct tx_desc) / sizeof (uint64_t); 2972 for (uint_t i = 0; i < flit_count; i++) { 2973 /* 2974 * Perform the copy directly through the BAR 2975 * mapping, rather than using ddi_put64(). 2976 * 2977 * The latter was found to impose a significant 2978 * performance burden when called in this loop. 2979 */ 2980 dst[i] = src[i]; 2981 } 2982 2983 membar_producer(); 2984 break; 2985 } 2986 2987 case DOORBELL_UDB: 2988 case DOORBELL_UDBWC: 2989 ddi_put32(sc->bar2_hdl, 2990 (uint32_t *)(eq->udb + UDBS_DB_OFFSET), 2991 LE_32(V_QID(eq->udb_qid) | val)); 2992 membar_producer(); 2993 break; 2994 2995 case DOORBELL_KDB: 2996 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), 2997 V_QID(eq->cntxt_id) | val); 2998 break; 2999 } 3000 3001 eq->pending = 0; 3002 } 3003 3004 /* 3005 * Reclaim consumed descriptors from egress queue. This will be capped at an 3006 * upper bound of `howmany`. The corresponding mblks will be freed inline, 3007 * unless a non-NULL `defer_freemp` is provided, in which case the to-be-freed 3008 * mblk chain will be provided to the caller. 3009 * 3010 * Returns the number of descriptors which underwent reclamation. 3011 */ 3012 static uint_t 3013 t4_tx_reclaim_descs(struct sge_txq *txq, uint_t howmany, mblk_t **defer_freemp) 3014 { 3015 struct sge_eq *eq = &txq->eq; 3016 3017 EQ_LOCK_ASSERT_OWNED(eq); 3018 3019 const uint_t cur_cidx = BE_16(eq->spg->cidx); 3020 const uint_t reclaim_avail = (cur_cidx >= eq->cidx) ? 3021 (cur_cidx - eq->cidx) : (cur_cidx + eq->cap - eq->cidx); 3022 3023 if (reclaim_avail == 0) { 3024 return (0); 3025 } 3026 3027 uint_t txb_freed = 0, hdl_freed = 0, reclaimed = 0; 3028 do { 3029 struct tx_sdesc *txsd = &txq->sdesc[eq->cidx]; 3030 const uint_t ndesc = txsd->desc_used; 3031 3032 /* Firmware doesn't return "partial" credits. */ 3033 ASSERT3U(reclaimed + ndesc, <=, reclaim_avail); 3034 3035 if (txsd->mp_head != NULL) { 3036 /* 3037 * Even when packet content fits entirely in immediate 3038 * buffer, the mblk is kept around until the 3039 * transmission completes. 3040 */ 3041 if (defer_freemp != NULL) { 3042 /* 3043 * Append the mblk chain from this descriptor 3044 * onto the end of the defer list. 3045 * 3046 * In the case that this is the first mblk we 3047 * have processed, the below assignment will 3048 * communicate the head of the chain to the 3049 * caller. 3050 */ 3051 *defer_freemp = txsd->mp_head; 3052 defer_freemp = &txsd->mp_tail->b_next; 3053 } else { 3054 freemsgchain(txsd->mp_head); 3055 } 3056 txsd->mp_head = txsd->mp_tail = NULL; 3057 } else { 3058 /* 3059 * If mblk is NULL, this has to be the software 3060 * descriptor for a credit flush work request. 3061 */ 3062 ASSERT0(txsd->txb_used); 3063 ASSERT0(txsd->hdls_used); 3064 ASSERT3U(ndesc, ==, 1); 3065 } 3066 3067 txb_freed += txsd->txb_used; 3068 hdl_freed += txsd->hdls_used; 3069 reclaimed += ndesc; 3070 3071 eq->cidx += ndesc; 3072 if (eq->cidx >= eq->cap) { 3073 eq->cidx -= eq->cap; 3074 } 3075 } while (reclaimed < reclaim_avail && reclaimed < howmany); 3076 3077 eq->avail += reclaimed; 3078 txq->txb_avail += txb_freed; 3079 txq->tx_dhdl_avail += hdl_freed; 3080 3081 ASSERT3U(eq->avail, <, eq->cap); 3082 ASSERT3U(txq->tx_dhdl_avail, <=, txq->tx_dhdl_total); 3083 3084 for (; hdl_freed; hdl_freed--) { 3085 (void) ddi_dma_unbind_handle(txq->tx_dhdl[txq->tx_dhdl_cidx]); 3086 if (++txq->tx_dhdl_cidx == txq->tx_dhdl_total) 3087 txq->tx_dhdl_cidx = 0; 3088 } 3089 3090 return (reclaimed); 3091 } 3092 3093 static int 3094 t4_handle_cpl_msg(struct sge_iq *iq, const struct rss_header *rss, mblk_t *mp) 3095 { 3096 const uint8_t opcode = rss->opcode; 3097 3098 DTRACE_PROBE4(t4__cpl_msg, struct sge_iq *, iq, uint8_t, opcode, 3099 const struct rss_header *, rss, mblk_t *, mp); 3100 3101 switch (opcode) { 3102 case CPL_FW4_MSG: 3103 case CPL_FW6_MSG: 3104 ASSERT3P(mp, ==, NULL); 3105 return (t4_handle_fw_msg(iq, rss)); 3106 case CPL_SGE_EGR_UPDATE: 3107 ASSERT3P(mp, ==, NULL); 3108 t4_sge_egr_update(iq, rss); 3109 return (0); 3110 case CPL_RX_PKT: 3111 return (t4_eth_rx(iq, rss, mp)); 3112 default: 3113 cxgb_printf(iq->adapter->dip, CE_WARN, 3114 "unhandled CPL opcode 0x%02x", opcode); 3115 if (mp != NULL) { 3116 freemsg(mp); 3117 } 3118 return (0); 3119 } 3120 } 3121 3122 static int 3123 t4_handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss) 3124 { 3125 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); 3126 const uint8_t msg_type = cpl->type; 3127 const struct rss_header *rss2; 3128 struct adapter *sc = iq->adapter; 3129 3130 DTRACE_PROBE3(t4__fw_msg, struct sge_iq *, iq, uint8_t, msg_type, 3131 const struct rss_header *, rss); 3132 3133 switch (msg_type) { 3134 case FW_TYPE_RSSCPL: /* also synonym for FW6_TYPE_RSSCPL */ 3135 rss2 = (const struct rss_header *)&cpl->data[0]; 3136 return (t4_handle_cpl_msg(iq, rss2, NULL)); 3137 case FW6_TYPE_CMD_RPL: 3138 return (t4_handle_fw_rpl(sc, &cpl->data[0])); 3139 default: 3140 cxgb_printf(sc->dip, CE_WARN, 3141 "unhandled fw_msg type 0x%02x", msg_type); 3142 return (0); 3143 } 3144 } 3145 3146 static int 3147 t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, mblk_t *m) 3148 { 3149 bool csum_ok; 3150 uint16_t err_vec; 3151 struct sge_rxq *rxq = (void *)iq; 3152 struct mblk_pair chain = {0}; 3153 struct adapter *sc = iq->adapter; 3154 const struct cpl_rx_pkt *cpl = t4_rss_payload(rss); 3155 3156 m->b_rptr += sc->sge.pktshift; 3157 3158 /* Compressed error vector is enabled for T6 only */ 3159 if (sc->params.tp.rx_pkt_encap) 3160 /* It is enabled only in T6 config file */ 3161 err_vec = G_T6_COMPR_RXERR_VEC(ntohs(cpl->err_vec)); 3162 else 3163 err_vec = ntohs(cpl->err_vec); 3164 3165 csum_ok = cpl->csum_calc && !err_vec; 3166 /* TODO: what about cpl->ip_frag? */ 3167 if (csum_ok && !cpl->ip_frag) { 3168 mac_hcksum_set(m, 0, 0, 0, 0xffff, 3169 HCK_FULLCKSUM_OK | HCK_FULLCKSUM | 3170 HCK_IPV4_HDRCKSUM_OK); 3171 rxq->rxcsum++; 3172 } 3173 3174 /* Add to the chain that we'll send up */ 3175 if (chain.head != NULL) 3176 chain.tail->b_next = m; 3177 else 3178 chain.head = m; 3179 chain.tail = m; 3180 3181 t4_mac_rx(rxq->port, rxq, chain.head); 3182 3183 rxq->rxpkts++; 3184 rxq->rxbytes += be16_to_cpu(cpl->len); 3185 return (0); 3186 } 3187 3188 #define FL_HW_IDX(idx) ((idx) >> 3) 3189 3190 static inline void 3191 ring_fl_db(struct adapter *sc, struct sge_fl *fl) 3192 { 3193 int desc_start, desc_last, ndesc; 3194 uint32_t v = sc->params.arch.sge_fl_db; 3195 3196 ndesc = FL_HW_IDX(fl->pending); 3197 3198 /* Hold back one credit if pidx = cidx */ 3199 if (FL_HW_IDX(fl->pidx) == FL_HW_IDX(fl->cidx)) 3200 ndesc--; 3201 3202 /* 3203 * There are chances of ndesc modified above (to avoid pidx = cidx). 3204 * If there is nothing to post, return. 3205 */ 3206 if (ndesc <= 0) 3207 return; 3208 3209 desc_last = FL_HW_IDX(fl->pidx); 3210 3211 if (fl->pidx < fl->pending) { 3212 /* There was a wrap */ 3213 desc_start = FL_HW_IDX(fl->pidx + fl->cap - fl->pending); 3214 3215 /* From desc_start to the end of list */ 3216 (void) ddi_dma_sync(fl->dhdl, desc_start * RX_FL_ESIZE, 0, 3217 DDI_DMA_SYNC_FORDEV); 3218 3219 /* From start of list to the desc_last */ 3220 if (desc_last != 0) 3221 (void) ddi_dma_sync(fl->dhdl, 0, desc_last * 3222 RX_FL_ESIZE, DDI_DMA_SYNC_FORDEV); 3223 } else { 3224 /* There was no wrap, sync from start_desc to last_desc */ 3225 desc_start = FL_HW_IDX(fl->pidx - fl->pending); 3226 (void) ddi_dma_sync(fl->dhdl, desc_start * RX_FL_ESIZE, 3227 ndesc * RX_FL_ESIZE, DDI_DMA_SYNC_FORDEV); 3228 } 3229 3230 if (t4_cver_eq(sc, CHELSIO_T4)) 3231 v |= V_PIDX(ndesc); 3232 else 3233 v |= V_PIDX_T5(ndesc); 3234 v |= V_QID(fl->cntxt_id) | V_PIDX(ndesc); 3235 3236 membar_producer(); 3237 3238 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), v); 3239 3240 /* 3241 * Update pending count: 3242 * Deduct the number of descriptors posted 3243 */ 3244 fl->pending -= ndesc * 8; 3245 } 3246 3247 static void 3248 t4_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss) 3249 { 3250 struct adapter *sc = iq->adapter; 3251 const struct cpl_sge_egr_update *cpl = t4_rss_payload(rss); 3252 const uint_t qid = G_EGR_QID(BE_32(cpl->opcode_qid)); 3253 struct sge_txq *txq = (struct sge_txq *)(*t4_eqmap_slot(sc, qid)); 3254 struct sge_eq *eq = &txq->eq; 3255 3256 /* 3257 * Get a "live" snapshot of the flags and PIDX state from the TXQ. 3258 * 3259 * This is done without the protection of the TXQ/EQ lock, since the 3260 * gathered information is used to avoid contending on that lock for the 3261 * reclaim. 3262 */ 3263 membar_consumer(); 3264 const uint16_t live_pidx = BE_16(eq->pidx); 3265 const t4_eq_flags_t live_flags = eq->flags; 3266 3267 if ((live_flags & EQ_CORKED) == 0 && 3268 (cpl->pidx != cpl->cidx || live_pidx != cpl->cidx)) { 3269 /* 3270 * A reclaim of the ring can be skipped if: 3271 * 3272 * 1. The EQ is not in the "corked" state, where it was unable 3273 * allocate descriptors (or memory) while attempting to place 3274 * a packet in the TXQ. 3275 * 3276 * 2. There are additional transmit descriptors in the EQ which 3277 * will trigger a subsequent SGE_EGR_UPDATE notification. 3278 * 3279 * When those conditions are met, it is safe to skip performing 3280 * a reclaim here, reducing the chance that we contend with 3281 * other transmission activity against the TXQ. 3282 */ 3283 DTRACE_PROBE2(t4__elide__reclaim, 3284 struct sge_txq *, txq, struct cpl_sge_egr_update *, cpl); 3285 return; 3286 } 3287 3288 mblk_t *freemp = NULL; 3289 bool do_mac_update = false; 3290 3291 TXQ_LOCK(txq); 3292 (void) t4_tx_reclaim_descs(txq, eq->qsize, &freemp); 3293 if (eq->flags & EQ_CORKED && eq->avail != 0) { 3294 do_mac_update = true; 3295 eq->flags &= ~EQ_CORKED; 3296 } 3297 TXQ_UNLOCK(txq); 3298 3299 freemsgchain(freemp); 3300 if (do_mac_update) { 3301 t4_mac_tx_update(txq->port, txq); 3302 } 3303 } 3304 3305 #define KS_UINIT(x) kstat_named_init(&kstatp->x, #x, KSTAT_DATA_ULONG) 3306 #define KS_CINIT(x) kstat_named_init(&kstatp->x, #x, KSTAT_DATA_CHAR) 3307 #define KS_U_SET(x, y) kstatp->x.value.ul = (y) 3308 #define KS_U_FROM(x, y) kstatp->x.value.ul = (y)->x 3309 #define KS_C_SET(x, ...) \ 3310 (void) snprintf(kstatp->x.value.c, 16, __VA_ARGS__) 3311 3312 /* 3313 * cxgbe:X:config 3314 */ 3315 struct cxgbe_port_config_kstats { 3316 kstat_named_t idx; 3317 kstat_named_t nrxq; 3318 kstat_named_t ntxq; 3319 kstat_named_t first_rxq; 3320 kstat_named_t first_txq; 3321 kstat_named_t controller; 3322 kstat_named_t factory_mac_address; 3323 }; 3324 3325 /* 3326 * cxgbe:X:info 3327 */ 3328 struct cxgbe_port_info_kstats { 3329 kstat_named_t transceiver; 3330 kstat_named_t rx_ovflow0; 3331 kstat_named_t rx_ovflow1; 3332 kstat_named_t rx_ovflow2; 3333 kstat_named_t rx_ovflow3; 3334 kstat_named_t rx_trunc0; 3335 kstat_named_t rx_trunc1; 3336 kstat_named_t rx_trunc2; 3337 kstat_named_t rx_trunc3; 3338 kstat_named_t tx_pause; 3339 kstat_named_t rx_pause; 3340 }; 3341 3342 static kstat_t * 3343 setup_port_config_kstats(struct port_info *pi) 3344 { 3345 kstat_t *ksp; 3346 struct cxgbe_port_config_kstats *kstatp; 3347 int ndata; 3348 dev_info_t *pdip = ddi_get_parent(pi->dip); 3349 uint8_t *ma = &pi->hw_addr[0]; 3350 3351 ndata = sizeof (struct cxgbe_port_config_kstats) / 3352 sizeof (kstat_named_t); 3353 3354 ksp = kstat_create(T4_PORT_NAME, ddi_get_instance(pi->dip), "config", 3355 "net", KSTAT_TYPE_NAMED, ndata, 0); 3356 if (ksp == NULL) { 3357 cxgb_printf(pi->dip, CE_WARN, "failed to initialize kstats."); 3358 return (NULL); 3359 } 3360 3361 kstatp = (struct cxgbe_port_config_kstats *)ksp->ks_data; 3362 3363 KS_UINIT(idx); 3364 KS_UINIT(nrxq); 3365 KS_UINIT(ntxq); 3366 KS_UINIT(first_rxq); 3367 KS_UINIT(first_txq); 3368 KS_CINIT(controller); 3369 KS_CINIT(factory_mac_address); 3370 3371 KS_U_SET(idx, pi->port_id); 3372 KS_U_SET(nrxq, pi->nrxq); 3373 KS_U_SET(ntxq, pi->ntxq); 3374 KS_U_SET(first_rxq, pi->first_rxq); 3375 KS_U_SET(first_txq, pi->first_txq); 3376 KS_C_SET(controller, "%s%d", ddi_driver_name(pdip), 3377 ddi_get_instance(pdip)); 3378 KS_C_SET(factory_mac_address, "%02X%02X%02X%02X%02X%02X", 3379 ma[0], ma[1], ma[2], ma[3], ma[4], ma[5]); 3380 3381 /* Do NOT set ksp->ks_update. These kstats do not change. */ 3382 3383 /* Install the kstat */ 3384 ksp->ks_private = (void *)pi; 3385 kstat_install(ksp); 3386 3387 return (ksp); 3388 } 3389 3390 static kstat_t * 3391 setup_port_info_kstats(struct port_info *pi) 3392 { 3393 kstat_t *ksp; 3394 struct cxgbe_port_info_kstats *kstatp; 3395 int ndata; 3396 3397 ndata = sizeof (struct cxgbe_port_info_kstats) / sizeof (kstat_named_t); 3398 3399 ksp = kstat_create(T4_PORT_NAME, ddi_get_instance(pi->dip), "info", 3400 "net", KSTAT_TYPE_NAMED, ndata, 0); 3401 if (ksp == NULL) { 3402 cxgb_printf(pi->dip, CE_WARN, "failed to initialize kstats."); 3403 return (NULL); 3404 } 3405 3406 kstatp = (struct cxgbe_port_info_kstats *)ksp->ks_data; 3407 3408 KS_CINIT(transceiver); 3409 KS_UINIT(rx_ovflow0); 3410 KS_UINIT(rx_ovflow1); 3411 KS_UINIT(rx_ovflow2); 3412 KS_UINIT(rx_ovflow3); 3413 KS_UINIT(rx_trunc0); 3414 KS_UINIT(rx_trunc1); 3415 KS_UINIT(rx_trunc2); 3416 KS_UINIT(rx_trunc3); 3417 KS_UINIT(tx_pause); 3418 KS_UINIT(rx_pause); 3419 3420 /* Install the kstat */ 3421 ksp->ks_update = update_port_info_kstats; 3422 ksp->ks_private = (void *)pi; 3423 kstat_install(ksp); 3424 3425 return (ksp); 3426 } 3427 3428 static int 3429 update_port_info_kstats(kstat_t *ksp, int rw) 3430 { 3431 struct cxgbe_port_info_kstats *kstatp = 3432 (struct cxgbe_port_info_kstats *)ksp->ks_data; 3433 struct port_info *pi = ksp->ks_private; 3434 static const char *mod_str[] = { NULL, "LR", "SR", "ER", "TWINAX", 3435 "active TWINAX", "LRM" }; 3436 uint32_t bgmap; 3437 3438 if (rw == KSTAT_WRITE) 3439 return (0); 3440 3441 if (pi->mod_type == FW_PORT_MOD_TYPE_NONE) 3442 KS_C_SET(transceiver, "unplugged"); 3443 else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN) 3444 KS_C_SET(transceiver, "unknown"); 3445 else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED) 3446 KS_C_SET(transceiver, "unsupported"); 3447 else if (pi->mod_type > 0 && pi->mod_type < ARRAY_SIZE(mod_str)) 3448 KS_C_SET(transceiver, "%s", mod_str[pi->mod_type]); 3449 else 3450 KS_C_SET(transceiver, "type %d", pi->mod_type); 3451 3452 #define GET_STAT(name) t4_read_reg64(pi->adapter, \ 3453 PORT_REG(pi->port_id, A_MPS_PORT_STAT_##name##_L)) 3454 #define GET_STAT_COM(name) t4_read_reg64(pi->adapter, \ 3455 A_MPS_STAT_##name##_L) 3456 3457 bgmap = G_NUMPORTS(t4_read_reg(pi->adapter, A_MPS_CMN_CTL)); 3458 if (bgmap == 0) 3459 bgmap = (pi->port_id == 0) ? 0xf : 0; 3460 else if (bgmap == 1) 3461 bgmap = (pi->port_id < 2) ? (3 << (2 * pi->port_id)) : 0; 3462 else 3463 bgmap = 1; 3464 3465 KS_U_SET(rx_ovflow0, (bgmap & 1) ? 3466 GET_STAT_COM(RX_BG_0_MAC_DROP_FRAME) : 0); 3467 KS_U_SET(rx_ovflow1, (bgmap & 2) ? 3468 GET_STAT_COM(RX_BG_1_MAC_DROP_FRAME) : 0); 3469 KS_U_SET(rx_ovflow2, (bgmap & 4) ? 3470 GET_STAT_COM(RX_BG_2_MAC_DROP_FRAME) : 0); 3471 KS_U_SET(rx_ovflow3, (bgmap & 8) ? 3472 GET_STAT_COM(RX_BG_3_MAC_DROP_FRAME) : 0); 3473 KS_U_SET(rx_trunc0, (bgmap & 1) ? 3474 GET_STAT_COM(RX_BG_0_MAC_TRUNC_FRAME) : 0); 3475 KS_U_SET(rx_trunc1, (bgmap & 2) ? 3476 GET_STAT_COM(RX_BG_1_MAC_TRUNC_FRAME) : 0); 3477 KS_U_SET(rx_trunc2, (bgmap & 4) ? 3478 GET_STAT_COM(RX_BG_2_MAC_TRUNC_FRAME) : 0); 3479 KS_U_SET(rx_trunc3, (bgmap & 8) ? 3480 GET_STAT_COM(RX_BG_3_MAC_TRUNC_FRAME) : 0); 3481 3482 KS_U_SET(tx_pause, GET_STAT(TX_PORT_PAUSE)); 3483 KS_U_SET(rx_pause, GET_STAT(RX_PORT_PAUSE)); 3484 3485 return (0); 3486 3487 } 3488 3489 /* 3490 * cxgbe:X:rxqY 3491 */ 3492 struct rxq_kstats { 3493 kstat_named_t rxcsum; 3494 kstat_named_t rxpkts; 3495 kstat_named_t rxbytes; 3496 kstat_named_t nomem; 3497 }; 3498 3499 static kstat_t * 3500 setup_rxq_kstats(struct port_info *pi, struct sge_rxq *rxq, int idx) 3501 { 3502 struct kstat *ksp; 3503 struct rxq_kstats *kstatp; 3504 int ndata; 3505 char str[16]; 3506 3507 ndata = sizeof (struct rxq_kstats) / sizeof (kstat_named_t); 3508 (void) snprintf(str, sizeof (str), "rxq%u", idx); 3509 3510 ksp = kstat_create(T4_PORT_NAME, ddi_get_instance(pi->dip), str, "rxq", 3511 KSTAT_TYPE_NAMED, ndata, 0); 3512 if (ksp == NULL) { 3513 cxgb_printf(pi->dip, CE_WARN, 3514 "%s: failed to initialize rxq kstats for queue %d.", 3515 __func__, idx); 3516 return (NULL); 3517 } 3518 3519 kstatp = (struct rxq_kstats *)ksp->ks_data; 3520 3521 KS_UINIT(rxcsum); 3522 KS_UINIT(rxpkts); 3523 KS_UINIT(rxbytes); 3524 KS_UINIT(nomem); 3525 3526 ksp->ks_update = update_rxq_kstats; 3527 ksp->ks_private = (void *)rxq; 3528 kstat_install(ksp); 3529 3530 return (ksp); 3531 } 3532 3533 static int 3534 update_rxq_kstats(kstat_t *ksp, int rw) 3535 { 3536 struct rxq_kstats *kstatp = (struct rxq_kstats *)ksp->ks_data; 3537 struct sge_rxq *rxq = ksp->ks_private; 3538 3539 if (rw == KSTAT_WRITE) 3540 return (0); 3541 3542 KS_U_FROM(rxcsum, rxq); 3543 KS_U_FROM(rxpkts, rxq); 3544 KS_U_FROM(rxbytes, rxq); 3545 KS_U_FROM(nomem, rxq); 3546 3547 return (0); 3548 } 3549 3550 /* 3551 * cxgbe:X:txqY 3552 */ 3553 struct txq_kstats { 3554 kstat_named_t txcsum; 3555 kstat_named_t tso_wrs; 3556 kstat_named_t imm_wrs; 3557 kstat_named_t sgl_wrs; 3558 kstat_named_t txpkt_wrs; 3559 kstat_named_t txpkts_wrs; 3560 kstat_named_t txpkts_pkts; 3561 kstat_named_t txb_used; 3562 kstat_named_t hdl_used; 3563 kstat_named_t txb_full; 3564 kstat_named_t dma_hdl_failed; 3565 kstat_named_t dma_map_failed; 3566 kstat_named_t qfull; 3567 kstat_named_t pullup_early; 3568 kstat_named_t pullup_late; 3569 kstat_named_t pullup_failed; 3570 kstat_named_t csum_failed; 3571 }; 3572 3573 static kstat_t * 3574 setup_txq_kstats(struct port_info *pi, struct sge_txq *txq, int idx) 3575 { 3576 struct kstat *ksp; 3577 struct txq_kstats *kstatp; 3578 int ndata; 3579 char str[16]; 3580 3581 ndata = sizeof (struct txq_kstats) / sizeof (kstat_named_t); 3582 (void) snprintf(str, sizeof (str), "txq%u", idx); 3583 3584 ksp = kstat_create(T4_PORT_NAME, ddi_get_instance(pi->dip), str, "txq", 3585 KSTAT_TYPE_NAMED, ndata, 0); 3586 if (ksp == NULL) { 3587 cxgb_printf(pi->dip, CE_WARN, 3588 "%s: failed to initialize txq kstats for queue %d.", 3589 __func__, idx); 3590 return (NULL); 3591 } 3592 3593 kstatp = (struct txq_kstats *)ksp->ks_data; 3594 3595 KS_UINIT(txcsum); 3596 KS_UINIT(tso_wrs); 3597 KS_UINIT(imm_wrs); 3598 KS_UINIT(sgl_wrs); 3599 KS_UINIT(txpkt_wrs); 3600 KS_UINIT(txpkts_wrs); 3601 KS_UINIT(txpkts_pkts); 3602 KS_UINIT(txb_used); 3603 KS_UINIT(hdl_used); 3604 KS_UINIT(txb_full); 3605 KS_UINIT(dma_hdl_failed); 3606 KS_UINIT(dma_map_failed); 3607 KS_UINIT(qfull); 3608 KS_UINIT(pullup_early); 3609 KS_UINIT(pullup_late); 3610 KS_UINIT(pullup_failed); 3611 KS_UINIT(csum_failed); 3612 3613 ksp->ks_update = update_txq_kstats; 3614 ksp->ks_private = (void *)txq; 3615 kstat_install(ksp); 3616 3617 return (ksp); 3618 } 3619 3620 static int 3621 update_txq_kstats(kstat_t *ksp, int rw) 3622 { 3623 struct txq_kstats *kstatp = (struct txq_kstats *)ksp->ks_data; 3624 struct sge_txq *txq = ksp->ks_private; 3625 3626 if (rw == KSTAT_WRITE) 3627 return (0); 3628 3629 KS_U_FROM(txcsum, txq); 3630 KS_U_FROM(tso_wrs, txq); 3631 KS_U_FROM(imm_wrs, txq); 3632 KS_U_FROM(sgl_wrs, txq); 3633 KS_U_FROM(txpkt_wrs, txq); 3634 KS_U_FROM(txpkts_wrs, txq); 3635 KS_U_FROM(txpkts_pkts, txq); 3636 KS_U_FROM(txb_used, txq); 3637 KS_U_FROM(hdl_used, txq); 3638 KS_U_FROM(txb_full, txq); 3639 KS_U_FROM(dma_hdl_failed, txq); 3640 KS_U_FROM(dma_map_failed, txq); 3641 KS_U_FROM(qfull, txq); 3642 KS_U_FROM(pullup_early, txq); 3643 KS_U_FROM(pullup_late, txq); 3644 KS_U_FROM(pullup_failed, txq); 3645 KS_U_FROM(csum_failed, txq); 3646 3647 return (0); 3648 } 3649 3650 static int rxbuf_ctor(void *, void *, int); 3651 static void rxbuf_dtor(void *, void *); 3652 3653 static kmem_cache_t * 3654 rxbuf_cache_create(struct rxbuf_cache_params *p) 3655 { 3656 char name[32]; 3657 3658 (void) snprintf(name, sizeof (name), "%s%d_rxbuf_cache", 3659 ddi_driver_name(p->dip), ddi_get_instance(p->dip)); 3660 3661 return kmem_cache_create(name, sizeof (struct rxbuf), _CACHE_LINE_SIZE, 3662 rxbuf_ctor, rxbuf_dtor, NULL, p, NULL, 0); 3663 } 3664 3665 /* 3666 * If ref_cnt is more than 1 then those many calls to rxbuf_free will 3667 * have to be made before the rxb is released back to the kmem_cache. 3668 */ 3669 static struct rxbuf * 3670 rxbuf_alloc(kmem_cache_t *cache, int kmflags, uint_t ref_cnt) 3671 { 3672 struct rxbuf *rxb; 3673 3674 ASSERT(ref_cnt > 0); 3675 3676 rxb = kmem_cache_alloc(cache, kmflags); 3677 if (rxb != NULL) { 3678 rxb->ref_cnt = ref_cnt; 3679 rxb->cache = cache; 3680 } 3681 3682 return (rxb); 3683 } 3684 3685 /* 3686 * This is normally called via the rxb's freefunc, when an mblk referencing the 3687 * rxb is freed. 3688 */ 3689 static void 3690 rxbuf_free(struct rxbuf *rxb) 3691 { 3692 if (atomic_dec_uint_nv(&rxb->ref_cnt) == 0) 3693 kmem_cache_free(rxb->cache, rxb); 3694 } 3695 3696 static int 3697 rxbuf_ctor(void *arg1, void *arg2, int kmflag) 3698 { 3699 struct rxbuf *rxb = arg1; 3700 struct rxbuf_cache_params *p = arg2; 3701 size_t real_len; 3702 ddi_dma_cookie_t cookie; 3703 uint_t ccount = 0; 3704 int (*callback)(caddr_t); 3705 int rc = ENOMEM; 3706 3707 if ((kmflag & KM_NOSLEEP) != 0) 3708 callback = DDI_DMA_DONTWAIT; 3709 else 3710 callback = DDI_DMA_SLEEP; 3711 3712 rc = ddi_dma_alloc_handle(p->dip, &p->dma_attr_rx, callback, 0, 3713 &rxb->dhdl); 3714 if (rc != DDI_SUCCESS) 3715 return (rc == DDI_DMA_BADATTR ? EINVAL : ENOMEM); 3716 3717 rc = ddi_dma_mem_alloc(rxb->dhdl, p->buf_size, &p->acc_attr_rx, 3718 DDI_DMA_STREAMING, callback, 0, &rxb->va, &real_len, &rxb->ahdl); 3719 if (rc != DDI_SUCCESS) { 3720 rc = ENOMEM; 3721 goto fail1; 3722 } 3723 3724 rc = ddi_dma_addr_bind_handle(rxb->dhdl, NULL, rxb->va, p->buf_size, 3725 DDI_DMA_READ | DDI_DMA_STREAMING, NULL, NULL, &cookie, &ccount); 3726 if (rc != DDI_DMA_MAPPED) { 3727 if (rc == DDI_DMA_INUSE) 3728 rc = EBUSY; 3729 else if (rc == DDI_DMA_TOOBIG) 3730 rc = E2BIG; 3731 else 3732 rc = ENOMEM; 3733 goto fail2; 3734 } 3735 3736 if (ccount != 1) { 3737 rc = E2BIG; 3738 goto fail3; 3739 } 3740 3741 rxb->ref_cnt = 0; 3742 rxb->buf_size = p->buf_size; 3743 rxb->freefunc.free_arg = (caddr_t)rxb; 3744 rxb->freefunc.free_func = rxbuf_free; 3745 rxb->ba = cookie.dmac_laddress; 3746 3747 return (0); 3748 3749 fail3: (void) ddi_dma_unbind_handle(rxb->dhdl); 3750 fail2: ddi_dma_mem_free(&rxb->ahdl); 3751 fail1: ddi_dma_free_handle(&rxb->dhdl); 3752 return (rc); 3753 } 3754 3755 static void 3756 rxbuf_dtor(void *arg1, void *arg2) 3757 { 3758 struct rxbuf *rxb = arg1; 3759 3760 (void) ddi_dma_unbind_handle(rxb->dhdl); 3761 ddi_dma_mem_free(&rxb->ahdl); 3762 ddi_dma_free_handle(&rxb->dhdl); 3763 } 3764