1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * This file is part of the Chelsio T4 support code. 14 * 15 * Copyright (C) 2010-2013 Chelsio Communications. All rights reserved. 16 * 17 * This program is distributed in the hope that it will be useful, but WITHOUT 18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 19 * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this 20 * release for licensing terms and conditions. 21 */ 22 23 /* 24 * Copyright 2024 Oxide Computer Company 25 */ 26 27 #include <sys/ddi.h> 28 #include <sys/sunddi.h> 29 #include <sys/sunndi.h> 30 #include <sys/atomic.h> 31 #include <sys/dlpi.h> 32 #include <sys/pattr.h> 33 #include <sys/strsubr.h> 34 #include <sys/stream.h> 35 #include <sys/strsun.h> 36 #include <inet/ip.h> 37 #include <inet/tcp.h> 38 39 #include "version.h" 40 #include "common/common.h" 41 #include "common/t4_msg.h" 42 #include "common/t4_regs.h" 43 #include "common/t4_regs_values.h" 44 45 /* TODO: Tune. */ 46 int rx_buf_size = 8192; 47 int tx_copy_threshold = 256; 48 uint16_t rx_copy_threshold = 256; 49 50 /* Used to track coalesced tx work request */ 51 struct txpkts { 52 mblk_t *tail; /* head is in the software descriptor */ 53 uint64_t *flitp; /* ptr to flit where next pkt should start */ 54 uint8_t npkt; /* # of packets in this work request */ 55 uint8_t nflits; /* # of flits used by this work request */ 56 uint16_t plen; /* total payload (sum of all packets) */ 57 }; 58 59 /* All information needed to tx a frame */ 60 struct txinfo { 61 uint32_t len; /* Total length of frame */ 62 uint32_t flags; /* Checksum and LSO flags */ 63 uint32_t mss; /* MSS for LSO */ 64 uint8_t nsegs; /* # of segments in the SGL, 0 means imm. tx */ 65 uint8_t nflits; /* # of flits needed for the SGL */ 66 uint8_t hdls_used; /* # of DMA handles used */ 67 uint32_t txb_used; /* txb_space used */ 68 mac_ether_offload_info_t meoi; /* pkt hdr info for offloads */ 69 struct ulptx_sgl sgl __attribute__((aligned(8))); 70 struct ulptx_sge_pair reserved[TX_SGL_SEGS / 2]; 71 }; 72 73 static int service_iq(struct sge_iq *iq, int budget); 74 static inline void init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, 75 int8_t pktc_idx, int qsize, uint8_t esize); 76 static inline void init_fl(struct sge_fl *fl, uint16_t qsize); 77 static inline void init_eq(struct adapter *sc, struct sge_eq *eq, 78 uint16_t eqtype, uint16_t qsize,uint8_t tx_chan, uint16_t iqid); 79 static int alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, 80 struct sge_fl *fl, int intr_idx, int cong); 81 static int free_iq_fl(struct port_info *pi, struct sge_iq *iq, 82 struct sge_fl *fl); 83 static int alloc_fwq(struct adapter *sc); 84 static int free_fwq(struct adapter *sc); 85 #ifdef TCP_OFFLOAD_ENABLE 86 static int alloc_mgmtq(struct adapter *sc); 87 #endif 88 static int alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, 89 int i); 90 static int free_rxq(struct port_info *pi, struct sge_rxq *rxq); 91 #ifdef TCP_OFFLOAD_ENABLE 92 static int alloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq, 93 int intr_idx); 94 static int free_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq); 95 #endif 96 static int ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq); 97 static int eth_eq_alloc(struct adapter *sc, struct port_info *pi, 98 struct sge_eq *eq); 99 #ifdef TCP_OFFLOAD_ENABLE 100 static int ofld_eq_alloc(struct adapter *sc, struct port_info *pi, 101 struct sge_eq *eq); 102 #endif 103 static int alloc_eq(struct adapter *sc, struct port_info *pi, 104 struct sge_eq *eq); 105 static int free_eq(struct adapter *sc, struct sge_eq *eq); 106 #ifdef TCP_OFFLOAD_ENABLE 107 static int alloc_wrq(struct adapter *sc, struct port_info *pi, 108 struct sge_wrq *wrq, int idx); 109 static int free_wrq(struct adapter *sc, struct sge_wrq *wrq); 110 #endif 111 static int alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx); 112 static int free_txq(struct port_info *pi, struct sge_txq *txq); 113 static int alloc_dma_memory(struct adapter *sc, size_t len, int flags, 114 ddi_device_acc_attr_t *acc_attr, ddi_dma_attr_t *dma_attr, 115 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl, uint64_t *pba, 116 caddr_t *pva); 117 static int free_dma_memory(ddi_dma_handle_t *dhdl, ddi_acc_handle_t *ahdl); 118 static int alloc_desc_ring(struct adapter *sc, size_t len, int rw, 119 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl, uint64_t *pba, 120 caddr_t *pva); 121 static int free_desc_ring(ddi_dma_handle_t *dhdl, ddi_acc_handle_t *ahdl); 122 static int alloc_tx_copybuffer(struct adapter *sc, size_t len, 123 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl, uint64_t *pba, 124 caddr_t *pva); 125 static inline bool is_new_response(const struct sge_iq *iq, 126 struct rsp_ctrl **ctrl); 127 static inline void iq_next(struct sge_iq *iq); 128 static int refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs); 129 static void refill_sfl(void *arg); 130 static void add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl); 131 static void free_fl_bufs(struct sge_fl *fl); 132 static mblk_t *get_fl_payload(struct adapter *sc, struct sge_fl *fl, 133 uint32_t len_newbuf, int *fl_bufs_used); 134 static int get_frame_txinfo(struct sge_txq *txq, mblk_t **fp, 135 struct txinfo *txinfo, int sgl_only); 136 static inline int fits_in_txb(struct sge_txq *txq, int len, int *waste); 137 static inline int copy_into_txb(struct sge_txq *txq, mblk_t *m, int len, 138 struct txinfo *txinfo); 139 static inline void add_seg(struct txinfo *txinfo, uint64_t ba, uint32_t len); 140 static inline int add_mblk(struct sge_txq *txq, struct txinfo *txinfo, 141 mblk_t *m, int len); 142 static void free_txinfo_resources(struct sge_txq *txq, struct txinfo *txinfo); 143 static int add_to_txpkts(struct sge_txq *txq, struct txpkts *txpkts, mblk_t *m, 144 struct txinfo *txinfo); 145 static void write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts); 146 static int write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, mblk_t *m, 147 struct txinfo *txinfo); 148 static inline void write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq, 149 struct txpkts *txpkts, struct txinfo *txinfo); 150 static inline void copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, 151 int len); 152 static inline void ring_tx_db(struct adapter *sc, struct sge_eq *eq); 153 static int reclaim_tx_descs(struct sge_txq *txq, int howmany); 154 static void write_txqflush_wr(struct sge_txq *txq); 155 static int t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, 156 mblk_t *m); 157 static inline void ring_fl_db(struct adapter *sc, struct sge_fl *fl); 158 static kstat_t *setup_port_config_kstats(struct port_info *pi); 159 static kstat_t *setup_port_info_kstats(struct port_info *pi); 160 static kstat_t *setup_rxq_kstats(struct port_info *pi, struct sge_rxq *rxq, 161 int idx); 162 static int update_rxq_kstats(kstat_t *ksp, int rw); 163 static int update_port_info_kstats(kstat_t *ksp, int rw); 164 static kstat_t *setup_txq_kstats(struct port_info *pi, struct sge_txq *txq, 165 int idx); 166 static int update_txq_kstats(kstat_t *ksp, int rw); 167 static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *, 168 mblk_t *); 169 static int handle_fw_rpl(struct sge_iq *iq, const struct rss_header *rss, 170 mblk_t *m); 171 172 static inline int 173 reclaimable(struct sge_eq *eq) 174 { 175 unsigned int cidx; 176 177 cidx = eq->spg->cidx; /* stable snapshot */ 178 cidx = be16_to_cpu(cidx); 179 180 if (cidx >= eq->cidx) 181 return (cidx - eq->cidx); 182 else 183 return (cidx + eq->cap - eq->cidx); 184 } 185 186 void 187 t4_sge_init(struct adapter *sc) 188 { 189 struct driver_properties *p = &sc->props; 190 ddi_dma_attr_t *dma_attr; 191 ddi_device_acc_attr_t *acc_attr; 192 uint32_t sge_control, sge_conm_ctrl; 193 int egress_threshold; 194 195 /* 196 * Device access and DMA attributes for descriptor rings 197 */ 198 acc_attr = &sc->sge.acc_attr_desc; 199 acc_attr->devacc_attr_version = DDI_DEVICE_ATTR_V0; 200 acc_attr->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC; 201 acc_attr->devacc_attr_dataorder = DDI_STRICTORDER_ACC; 202 203 dma_attr = &sc->sge.dma_attr_desc; 204 dma_attr->dma_attr_version = DMA_ATTR_V0; 205 dma_attr->dma_attr_addr_lo = 0; 206 dma_attr->dma_attr_addr_hi = UINT64_MAX; 207 dma_attr->dma_attr_count_max = UINT64_MAX; 208 dma_attr->dma_attr_align = 512; 209 dma_attr->dma_attr_burstsizes = 0xfff; 210 dma_attr->dma_attr_minxfer = 1; 211 dma_attr->dma_attr_maxxfer = UINT64_MAX; 212 dma_attr->dma_attr_seg = UINT64_MAX; 213 dma_attr->dma_attr_sgllen = 1; 214 dma_attr->dma_attr_granular = 1; 215 dma_attr->dma_attr_flags = 0; 216 217 /* 218 * Device access and DMA attributes for tx buffers 219 */ 220 acc_attr = &sc->sge.acc_attr_tx; 221 acc_attr->devacc_attr_version = DDI_DEVICE_ATTR_V0; 222 acc_attr->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC; 223 224 dma_attr = &sc->sge.dma_attr_tx; 225 dma_attr->dma_attr_version = DMA_ATTR_V0; 226 dma_attr->dma_attr_addr_lo = 0; 227 dma_attr->dma_attr_addr_hi = UINT64_MAX; 228 dma_attr->dma_attr_count_max = UINT64_MAX; 229 dma_attr->dma_attr_align = 1; 230 dma_attr->dma_attr_burstsizes = 0xfff; 231 dma_attr->dma_attr_minxfer = 1; 232 dma_attr->dma_attr_maxxfer = UINT64_MAX; 233 dma_attr->dma_attr_seg = UINT64_MAX; 234 dma_attr->dma_attr_sgllen = TX_SGL_SEGS; 235 dma_attr->dma_attr_granular = 1; 236 dma_attr->dma_attr_flags = 0; 237 238 /* 239 * Ingress Padding Boundary and Egress Status Page Size are set up by 240 * t4_fixup_host_params(). 241 */ 242 sge_control = t4_read_reg(sc, A_SGE_CONTROL); 243 sc->sge.pktshift = G_PKTSHIFT(sge_control); 244 sc->sge.stat_len = (sge_control & F_EGRSTATUSPAGESIZE) ? 128 : 64; 245 246 /* t4_nex uses FLM packed mode */ 247 sc->sge.fl_align = t4_fl_pkt_align(sc, true); 248 249 /* 250 * Device access and DMA attributes for rx buffers 251 */ 252 sc->sge.rxb_params.dip = sc->dip; 253 sc->sge.rxb_params.buf_size = rx_buf_size; 254 255 acc_attr = &sc->sge.rxb_params.acc_attr_rx; 256 acc_attr->devacc_attr_version = DDI_DEVICE_ATTR_V0; 257 acc_attr->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC; 258 259 dma_attr = &sc->sge.rxb_params.dma_attr_rx; 260 dma_attr->dma_attr_version = DMA_ATTR_V0; 261 dma_attr->dma_attr_addr_lo = 0; 262 dma_attr->dma_attr_addr_hi = UINT64_MAX; 263 dma_attr->dma_attr_count_max = UINT64_MAX; 264 /* 265 * Low 4 bits of an rx buffer address have a special meaning to the SGE 266 * and an rx buf cannot have an address with any of these bits set. 267 * FL_ALIGN is >= 32 so we're sure things are ok. 268 */ 269 dma_attr->dma_attr_align = sc->sge.fl_align; 270 dma_attr->dma_attr_burstsizes = 0xfff; 271 dma_attr->dma_attr_minxfer = 1; 272 dma_attr->dma_attr_maxxfer = UINT64_MAX; 273 dma_attr->dma_attr_seg = UINT64_MAX; 274 dma_attr->dma_attr_sgllen = 1; 275 dma_attr->dma_attr_granular = 1; 276 dma_attr->dma_attr_flags = 0; 277 278 sc->sge.rxbuf_cache = rxbuf_cache_create(&sc->sge.rxb_params); 279 280 /* 281 * A FL with <= fl_starve_thres buffers is starving and a periodic 282 * timer will attempt to refill it. This needs to be larger than the 283 * SGE's Egress Congestion Threshold. If it isn't, then we can get 284 * stuck waiting for new packets while the SGE is waiting for us to 285 * give it more Free List entries. (Note that the SGE's Egress 286 * Congestion Threshold is in units of 2 Free List pointers.) For T4, 287 * there was only a single field to control this. For T5 there's the 288 * original field which now only applies to Unpacked Mode Free List 289 * buffers and a new field which only applies to Packed Mode Free List 290 * buffers. 291 */ 292 293 sge_conm_ctrl = t4_read_reg(sc, A_SGE_CONM_CTRL); 294 switch (CHELSIO_CHIP_VERSION(sc->params.chip)) { 295 case CHELSIO_T4: 296 egress_threshold = G_EGRTHRESHOLD(sge_conm_ctrl); 297 break; 298 case CHELSIO_T5: 299 egress_threshold = G_EGRTHRESHOLDPACKING(sge_conm_ctrl); 300 break; 301 case CHELSIO_T6: 302 default: 303 egress_threshold = G_T6_EGRTHRESHOLDPACKING(sge_conm_ctrl); 304 } 305 sc->sge.fl_starve_threshold = 2*egress_threshold + 1; 306 307 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0, rx_buf_size); 308 309 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, 310 V_THRESHOLD_0(p->counter_val[0]) | 311 V_THRESHOLD_1(p->counter_val[1]) | 312 V_THRESHOLD_2(p->counter_val[2]) | 313 V_THRESHOLD_3(p->counter_val[3])); 314 315 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, 316 V_TIMERVALUE0(us_to_core_ticks(sc, p->timer_val[0])) | 317 V_TIMERVALUE1(us_to_core_ticks(sc, p->timer_val[1]))); 318 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, 319 V_TIMERVALUE2(us_to_core_ticks(sc, p->timer_val[2])) | 320 V_TIMERVALUE3(us_to_core_ticks(sc, p->timer_val[3]))); 321 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, 322 V_TIMERVALUE4(us_to_core_ticks(sc, p->timer_val[4])) | 323 V_TIMERVALUE5(us_to_core_ticks(sc, p->timer_val[5]))); 324 325 (void) t4_register_cpl_handler(sc, CPL_FW4_MSG, handle_fw_rpl); 326 (void) t4_register_cpl_handler(sc, CPL_FW6_MSG, handle_fw_rpl); 327 (void) t4_register_cpl_handler(sc, CPL_SGE_EGR_UPDATE, handle_sge_egr_update); 328 (void) t4_register_cpl_handler(sc, CPL_RX_PKT, t4_eth_rx); 329 (void) t4_register_fw_msg_handler(sc, FW6_TYPE_CMD_RPL, 330 t4_handle_fw_rpl); 331 } 332 333 /* 334 * Allocate and initialize the firmware event queue and the forwarded interrupt 335 * queues, if any. The adapter owns all these queues as they are not associated 336 * with any particular port. 337 * 338 * Returns errno on failure. Resources allocated up to that point may still be 339 * allocated. Caller is responsible for cleanup in case this function fails. 340 */ 341 int 342 t4_setup_adapter_queues(struct adapter *sc) 343 { 344 int rc; 345 346 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 347 348 /* 349 * Firmware event queue 350 */ 351 rc = alloc_fwq(sc); 352 if (rc != 0) 353 return (rc); 354 355 #ifdef TCP_OFFLOAD_ENABLE 356 /* 357 * Management queue. This is just a control queue that uses the fwq as 358 * its associated iq. 359 */ 360 rc = alloc_mgmtq(sc); 361 #endif 362 363 return (rc); 364 } 365 366 /* 367 * Idempotent 368 */ 369 int 370 t4_teardown_adapter_queues(struct adapter *sc) 371 { 372 373 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 374 375 (void) free_fwq(sc); 376 377 return (0); 378 } 379 380 static inline int 381 first_vector(struct port_info *pi) 382 { 383 struct adapter *sc = pi->adapter; 384 int rc = T4_EXTRA_INTR, i; 385 386 if (sc->intr_count == 1) 387 return (0); 388 389 for_each_port(sc, i) { 390 struct port_info *p = sc->port[i]; 391 392 if (i == pi->port_id) 393 break; 394 395 #ifdef TCP_OFFLOAD_ENABLE 396 if (!(sc->flags & INTR_FWD)) 397 rc += p->nrxq + p->nofldrxq; 398 else 399 rc += max(p->nrxq, p->nofldrxq); 400 #else 401 /* 402 * Not compiled with offload support and intr_count > 1. Only 403 * NIC queues exist and they'd better be taking direct 404 * interrupts. 405 */ 406 ASSERT(!(sc->flags & INTR_FWD)); 407 rc += p->nrxq; 408 #endif 409 } 410 return (rc); 411 } 412 413 /* 414 * Given an arbitrary "index," come up with an iq that can be used by other 415 * queues (of this port) for interrupt forwarding, SGE egress updates, etc. 416 * The iq returned is guaranteed to be something that takes direct interrupts. 417 */ 418 static struct sge_iq * 419 port_intr_iq(struct port_info *pi, int idx) 420 { 421 struct adapter *sc = pi->adapter; 422 struct sge *s = &sc->sge; 423 struct sge_iq *iq = NULL; 424 425 if (sc->intr_count == 1) 426 return (&sc->sge.fwq); 427 428 #ifdef TCP_OFFLOAD_ENABLE 429 if (!(sc->flags & INTR_FWD)) { 430 idx %= pi->nrxq + pi->nofldrxq; 431 432 if (idx >= pi->nrxq) { 433 idx -= pi->nrxq; 434 iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq; 435 } else 436 iq = &s->rxq[pi->first_rxq + idx].iq; 437 438 } else { 439 idx %= max(pi->nrxq, pi->nofldrxq); 440 441 if (pi->nrxq >= pi->nofldrxq) 442 iq = &s->rxq[pi->first_rxq + idx].iq; 443 else 444 iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq; 445 } 446 #else 447 /* 448 * Not compiled with offload support and intr_count > 1. Only NIC 449 * queues exist and they'd better be taking direct interrupts. 450 */ 451 ASSERT(!(sc->flags & INTR_FWD)); 452 453 idx %= pi->nrxq; 454 iq = &s->rxq[pi->first_rxq + idx].iq; 455 #endif 456 457 return (iq); 458 } 459 460 int 461 t4_setup_port_queues(struct port_info *pi) 462 { 463 int rc = 0, i, intr_idx, j; 464 struct sge_rxq *rxq; 465 struct sge_txq *txq; 466 #ifdef TCP_OFFLOAD_ENABLE 467 int iqid; 468 struct sge_wrq *ctrlq; 469 struct sge_ofld_rxq *ofld_rxq; 470 struct sge_wrq *ofld_txq; 471 #endif 472 struct adapter *sc = pi->adapter; 473 struct driver_properties *p = &sc->props; 474 475 pi->ksp_config = setup_port_config_kstats(pi); 476 pi->ksp_info = setup_port_info_kstats(pi); 477 478 /* Interrupt vector to start from (when using multiple vectors) */ 479 intr_idx = first_vector(pi); 480 481 /* 482 * First pass over all rx queues (NIC and TOE): 483 * a) initialize iq and fl 484 * b) allocate queue iff it will take direct interrupts. 485 */ 486 487 for_each_rxq(pi, i, rxq) { 488 489 init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, p->qsize_rxq, 490 RX_IQ_ESIZE); 491 492 init_fl(&rxq->fl, p->qsize_rxq / 8); /* 8 bufs in each entry */ 493 494 if ((!(sc->flags & INTR_FWD)) 495 #ifdef TCP_OFFLOAD_ENABLE 496 || (sc->intr_count > 1 && pi->nrxq >= pi->nofldrxq) 497 #else 498 || (sc->intr_count > 1 && pi->nrxq) 499 #endif 500 ) { 501 rxq->iq.flags |= IQ_INTR; 502 rc = alloc_rxq(pi, rxq, intr_idx, i); 503 if (rc != 0) 504 goto done; 505 intr_idx++; 506 } 507 508 } 509 510 #ifdef TCP_OFFLOAD_ENABLE 511 for_each_ofld_rxq(pi, i, ofld_rxq) { 512 513 init_iq(&ofld_rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, 514 p->qsize_rxq, RX_IQ_ESIZE); 515 516 init_fl(&ofld_rxq->fl, p->qsize_rxq / 8); 517 518 if (!(sc->flags & INTR_FWD) || 519 (sc->intr_count > 1 && pi->nofldrxq > pi->nrxq)) { 520 ofld_rxq->iq.flags = IQ_INTR; 521 rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx); 522 if (rc != 0) 523 goto done; 524 525 intr_idx++; 526 } 527 } 528 #endif 529 530 /* 531 * Second pass over all rx queues (NIC and TOE). The queues forwarding 532 * their interrupts are allocated now. 533 */ 534 j = 0; 535 for_each_rxq(pi, i, rxq) { 536 if (rxq->iq.flags & IQ_INTR) 537 continue; 538 539 intr_idx = port_intr_iq(pi, j)->abs_id; 540 541 rc = alloc_rxq(pi, rxq, intr_idx, i); 542 if (rc != 0) 543 goto done; 544 j++; 545 } 546 547 #ifdef TCP_OFFLOAD_ENABLE 548 for_each_ofld_rxq(pi, i, ofld_rxq) { 549 if (ofld_rxq->iq.flags & IQ_INTR) 550 continue; 551 552 intr_idx = port_intr_iq(pi, j)->abs_id; 553 rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx); 554 if (rc != 0) 555 goto done; 556 j++; 557 } 558 #endif 559 /* 560 * Now the tx queues. Only one pass needed. 561 */ 562 j = 0; 563 for_each_txq(pi, i, txq) { 564 uint16_t iqid; 565 566 iqid = port_intr_iq(pi, j)->cntxt_id; 567 init_eq(sc, &txq->eq, EQ_ETH, p->qsize_txq, pi->tx_chan, iqid); 568 rc = alloc_txq(pi, txq, i); 569 if (rc != 0) 570 goto done; 571 } 572 573 #ifdef TCP_OFFLOAD_ENABLE 574 for_each_ofld_txq(pi, i, ofld_txq) { 575 uint16_t iqid; 576 577 iqid = port_intr_iq(pi, j)->cntxt_id; 578 init_eq(sc, &ofld_txq->eq, EQ_OFLD, p->qsize_txq, pi->tx_chan, 579 iqid); 580 rc = alloc_wrq(sc, pi, ofld_txq, i); 581 if (rc != 0) 582 goto done; 583 } 584 585 /* 586 * Finally, the control queue. 587 */ 588 ctrlq = &sc->sge.ctrlq[pi->port_id]; 589 iqid = port_intr_iq(pi, 0)->cntxt_id; 590 init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, iqid); 591 rc = alloc_wrq(sc, pi, ctrlq, 0); 592 #endif 593 594 done: 595 if (rc != 0) 596 (void) t4_teardown_port_queues(pi); 597 598 return (rc); 599 } 600 601 /* 602 * Idempotent 603 */ 604 int 605 t4_teardown_port_queues(struct port_info *pi) 606 { 607 int i; 608 struct sge_rxq *rxq; 609 struct sge_txq *txq; 610 #ifdef TCP_OFFLOAD_ENABLE 611 struct adapter *sc = pi->adapter; 612 struct sge_ofld_rxq *ofld_rxq; 613 struct sge_wrq *ofld_txq; 614 #endif 615 616 if (pi->ksp_config != NULL) { 617 kstat_delete(pi->ksp_config); 618 pi->ksp_config = NULL; 619 } 620 if (pi->ksp_info != NULL) { 621 kstat_delete(pi->ksp_info); 622 pi->ksp_info = NULL; 623 } 624 625 #ifdef TCP_OFFLOAD_ENABLE 626 (void) free_wrq(sc, &sc->sge.ctrlq[pi->port_id]); 627 #endif 628 629 for_each_txq(pi, i, txq) { 630 (void) free_txq(pi, txq); 631 } 632 633 #ifdef TCP_OFFLOAD_ENABLE 634 for_each_ofld_txq(pi, i, ofld_txq) { 635 (void) free_wrq(sc, ofld_txq); 636 } 637 638 for_each_ofld_rxq(pi, i, ofld_rxq) { 639 if ((ofld_rxq->iq.flags & IQ_INTR) == 0) 640 (void) free_ofld_rxq(pi, ofld_rxq); 641 } 642 #endif 643 644 for_each_rxq(pi, i, rxq) { 645 if ((rxq->iq.flags & IQ_INTR) == 0) 646 (void) free_rxq(pi, rxq); 647 } 648 649 /* 650 * Then take down the rx queues that take direct interrupts. 651 */ 652 653 for_each_rxq(pi, i, rxq) { 654 if (rxq->iq.flags & IQ_INTR) 655 (void) free_rxq(pi, rxq); 656 } 657 658 #ifdef TCP_OFFLOAD_ENABLE 659 for_each_ofld_rxq(pi, i, ofld_rxq) { 660 if (ofld_rxq->iq.flags & IQ_INTR) 661 (void) free_ofld_rxq(pi, ofld_rxq); 662 } 663 #endif 664 665 return (0); 666 } 667 668 /* Deals with errors and forwarded interrupts */ 669 uint_t 670 t4_intr_all(caddr_t arg1, caddr_t arg2) 671 { 672 673 (void) t4_intr_err(arg1, arg2); 674 (void) t4_intr(arg1, arg2); 675 676 return (DDI_INTR_CLAIMED); 677 } 678 679 static void 680 t4_intr_rx_work(struct sge_iq *iq) 681 { 682 mblk_t *mp = NULL; 683 struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */ 684 RXQ_LOCK(rxq); 685 if (!iq->polling) { 686 mp = t4_ring_rx(rxq, iq->qsize/8); 687 t4_write_reg(iq->adapter, MYPF_REG(A_SGE_PF_GTS), 688 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_next)); 689 } 690 RXQ_UNLOCK(rxq); 691 if (mp != NULL) 692 mac_rx_ring(rxq->port->mh, rxq->ring_handle, mp, 693 rxq->ring_gen_num); 694 } 695 696 /* Deals with interrupts on the given ingress queue */ 697 /* ARGSUSED */ 698 uint_t 699 t4_intr(caddr_t arg1, caddr_t arg2) 700 { 701 struct sge_iq *iq = (struct sge_iq *)arg2; 702 int state; 703 704 /* Right now receive polling is only enabled for MSI-X and 705 * when we have enough msi-x vectors i.e no interrupt forwarding. 706 */ 707 if (iq->adapter->props.multi_rings) { 708 t4_intr_rx_work(iq); 709 } else { 710 state = atomic_cas_uint(&iq->state, IQS_IDLE, IQS_BUSY); 711 if (state == IQS_IDLE) { 712 (void) service_iq(iq, 0); 713 (void) atomic_cas_uint(&iq->state, IQS_BUSY, IQS_IDLE); 714 } 715 } 716 return (DDI_INTR_CLAIMED); 717 } 718 719 /* Deals with error interrupts */ 720 /* ARGSUSED */ 721 uint_t 722 t4_intr_err(caddr_t arg1, caddr_t arg2) 723 { 724 /* LINTED: E_BAD_PTR_CAST_ALIGN */ 725 struct adapter *sc = (struct adapter *)arg1; 726 727 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); 728 (void) t4_slow_intr_handler(sc); 729 730 return (DDI_INTR_CLAIMED); 731 } 732 733 /* 734 * t4_ring_rx - Process responses from an SGE response queue. 735 * 736 * This function processes responses from an SGE response queue up to the supplied budget. 737 * Responses include received packets as well as control messages from FW 738 * or HW. 739 * It returns a chain of mblks containing the received data, to be 740 * passed up to mac_ring_rx(). 741 */ 742 mblk_t * 743 t4_ring_rx(struct sge_rxq *rxq, int budget) 744 { 745 struct sge_iq *iq = &rxq->iq; 746 struct sge_fl *fl = &rxq->fl; /* Use iff IQ_HAS_FL */ 747 struct adapter *sc = iq->adapter; 748 struct rsp_ctrl *ctrl; 749 const struct rss_header *rss; 750 int ndescs = 0, fl_bufs_used = 0; 751 int rsp_type; 752 uint32_t lq; 753 mblk_t *mblk_head = NULL, **mblk_tail, *m; 754 struct cpl_rx_pkt *cpl; 755 uint32_t received_bytes = 0, pkt_len = 0; 756 bool csum_ok; 757 uint16_t err_vec; 758 759 mblk_tail = &mblk_head; 760 761 while (is_new_response(iq, &ctrl)) { 762 763 membar_consumer(); 764 765 m = NULL; 766 rsp_type = G_RSPD_TYPE(ctrl->u.type_gen); 767 lq = be32_to_cpu(ctrl->pldbuflen_qid); 768 rss = (const void *)iq->cdesc; 769 770 switch (rsp_type) { 771 case X_RSPD_TYPE_FLBUF: 772 773 ASSERT(iq->flags & IQ_HAS_FL); 774 775 if (CPL_RX_PKT == rss->opcode) { 776 cpl = (void *)(rss + 1); 777 pkt_len = be16_to_cpu(cpl->len); 778 779 if (iq->polling && ((received_bytes + pkt_len) > budget)) 780 goto done; 781 782 m = get_fl_payload(sc, fl, lq, &fl_bufs_used); 783 if (m == NULL) 784 goto done; 785 786 iq->intr_next = iq->intr_params; 787 m->b_rptr += sc->sge.pktshift; 788 if (sc->params.tp.rx_pkt_encap) 789 /* It is enabled only in T6 config file */ 790 err_vec = G_T6_COMPR_RXERR_VEC(ntohs(cpl->err_vec)); 791 else 792 err_vec = ntohs(cpl->err_vec); 793 794 csum_ok = cpl->csum_calc && !err_vec; 795 796 /* TODO: what about cpl->ip_frag? */ 797 if (csum_ok && !cpl->ip_frag) { 798 mac_hcksum_set(m, 0, 0, 0, 0xffff, 799 HCK_FULLCKSUM_OK | HCK_FULLCKSUM | 800 HCK_IPV4_HDRCKSUM_OK); 801 rxq->rxcsum++; 802 } 803 rxq->rxpkts++; 804 rxq->rxbytes += pkt_len; 805 received_bytes += pkt_len; 806 807 *mblk_tail = m; 808 mblk_tail = &m->b_next; 809 810 break; 811 } 812 813 m = get_fl_payload(sc, fl, lq, &fl_bufs_used); 814 if (m == NULL) 815 goto done; 816 /* FALLTHROUGH */ 817 818 case X_RSPD_TYPE_CPL: 819 ASSERT(rss->opcode < NUM_CPL_CMDS); 820 sc->cpl_handler[rss->opcode](iq, rss, m); 821 break; 822 823 default: 824 break; 825 } 826 iq_next(iq); 827 ++ndescs; 828 if (!iq->polling && (ndescs == budget)) 829 break; 830 } 831 832 done: 833 834 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), 835 V_CIDXINC(ndescs) | V_INGRESSQID(iq->cntxt_id) | 836 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 837 838 if ((fl_bufs_used > 0) || (iq->flags & IQ_HAS_FL)) { 839 int starved; 840 FL_LOCK(fl); 841 fl->needed += fl_bufs_used; 842 starved = refill_fl(sc, fl, fl->cap / 8); 843 FL_UNLOCK(fl); 844 if (starved) 845 add_fl_to_sfl(sc, fl); 846 } 847 return (mblk_head); 848 } 849 850 /* 851 * Deals with anything and everything on the given ingress queue. 852 */ 853 static int 854 service_iq(struct sge_iq *iq, int budget) 855 { 856 struct sge_iq *q; 857 struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */ 858 struct sge_fl *fl = &rxq->fl; /* Use iff IQ_HAS_FL */ 859 struct adapter *sc = iq->adapter; 860 struct rsp_ctrl *ctrl; 861 const struct rss_header *rss; 862 int ndescs = 0, limit, fl_bufs_used = 0; 863 int rsp_type; 864 uint32_t lq; 865 int starved; 866 mblk_t *m; 867 STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql); 868 869 limit = budget ? budget : iq->qsize / 8; 870 871 /* 872 * We always come back and check the descriptor ring for new indirect 873 * interrupts and other responses after running a single handler. 874 */ 875 for (;;) { 876 while (is_new_response(iq, &ctrl)) { 877 878 membar_consumer(); 879 880 m = NULL; 881 rsp_type = G_RSPD_TYPE(ctrl->u.type_gen); 882 lq = be32_to_cpu(ctrl->pldbuflen_qid); 883 rss = (const void *)iq->cdesc; 884 885 switch (rsp_type) { 886 case X_RSPD_TYPE_FLBUF: 887 888 ASSERT(iq->flags & IQ_HAS_FL); 889 890 m = get_fl_payload(sc, fl, lq, &fl_bufs_used); 891 if (m == NULL) { 892 /* 893 * Rearm the iq with a 894 * longer-than-default timer 895 */ 896 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) | 897 V_INGRESSQID((u32)iq->cntxt_id) | 898 V_SEINTARM(V_QINTR_TIMER_IDX(SGE_NTIMERS-1))); 899 if (fl_bufs_used > 0) { 900 ASSERT(iq->flags & IQ_HAS_FL); 901 FL_LOCK(fl); 902 fl->needed += fl_bufs_used; 903 starved = refill_fl(sc, fl, fl->cap / 8); 904 FL_UNLOCK(fl); 905 if (starved) 906 add_fl_to_sfl(sc, fl); 907 } 908 return (0); 909 } 910 911 /* FALLTHRU */ 912 case X_RSPD_TYPE_CPL: 913 914 ASSERT(rss->opcode < NUM_CPL_CMDS); 915 sc->cpl_handler[rss->opcode](iq, rss, m); 916 break; 917 918 case X_RSPD_TYPE_INTR: 919 920 /* 921 * Interrupts should be forwarded only to queues 922 * that are not forwarding their interrupts. 923 * This means service_iq can recurse but only 1 924 * level deep. 925 */ 926 ASSERT(budget == 0); 927 928 q = sc->sge.iqmap[lq - sc->sge.iq_start]; 929 if (atomic_cas_uint(&q->state, IQS_IDLE, 930 IQS_BUSY) == IQS_IDLE) { 931 if (service_iq(q, q->qsize / 8) == 0) { 932 (void) atomic_cas_uint( 933 &q->state, IQS_BUSY, 934 IQS_IDLE); 935 } else { 936 STAILQ_INSERT_TAIL(&iql, q, 937 link); 938 } 939 } 940 break; 941 942 default: 943 break; 944 } 945 946 iq_next(iq); 947 if (++ndescs == limit) { 948 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), 949 V_CIDXINC(ndescs) | 950 V_INGRESSQID(iq->cntxt_id) | 951 V_SEINTARM(V_QINTR_TIMER_IDX( 952 X_TIMERREG_UPDATE_CIDX))); 953 ndescs = 0; 954 955 if (fl_bufs_used > 0) { 956 ASSERT(iq->flags & IQ_HAS_FL); 957 FL_LOCK(fl); 958 fl->needed += fl_bufs_used; 959 (void) refill_fl(sc, fl, fl->cap / 8); 960 FL_UNLOCK(fl); 961 fl_bufs_used = 0; 962 } 963 964 if (budget != 0) 965 return (EINPROGRESS); 966 } 967 } 968 969 if (STAILQ_EMPTY(&iql) != 0) 970 break; 971 972 /* 973 * Process the head only, and send it to the back of the list if 974 * it's still not done. 975 */ 976 q = STAILQ_FIRST(&iql); 977 STAILQ_REMOVE_HEAD(&iql, link); 978 if (service_iq(q, q->qsize / 8) == 0) 979 (void) atomic_cas_uint(&q->state, IQS_BUSY, IQS_IDLE); 980 else 981 STAILQ_INSERT_TAIL(&iql, q, link); 982 } 983 984 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) | 985 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_next)); 986 987 if (iq->flags & IQ_HAS_FL) { 988 989 FL_LOCK(fl); 990 fl->needed += fl_bufs_used; 991 starved = refill_fl(sc, fl, fl->cap / 4); 992 FL_UNLOCK(fl); 993 if (starved != 0) 994 add_fl_to_sfl(sc, fl); 995 } 996 997 return (0); 998 } 999 1000 #ifdef TCP_OFFLOAD_ENABLE 1001 int 1002 t4_mgmt_tx(struct adapter *sc, mblk_t *m) 1003 { 1004 return (t4_wrq_tx(sc, &sc->sge.mgmtq, m)); 1005 } 1006 1007 /* 1008 * Doesn't fail. Holds on to work requests it can't send right away. 1009 */ 1010 int 1011 t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, mblk_t *m0) 1012 { 1013 struct sge_eq *eq = &wrq->eq; 1014 struct mblk_pair *wr_list = &wrq->wr_list; 1015 int can_reclaim; 1016 caddr_t dst; 1017 mblk_t *wr, *next; 1018 1019 TXQ_LOCK_ASSERT_OWNED(wrq); 1020 #ifdef TCP_OFFLOAD_ENABLE 1021 ASSERT((eq->flags & EQ_TYPEMASK) == EQ_OFLD || 1022 (eq->flags & EQ_TYPEMASK) == EQ_CTRL); 1023 #else 1024 ASSERT((eq->flags & EQ_TYPEMASK) == EQ_CTRL); 1025 #endif 1026 1027 if (m0 != NULL) { 1028 if (wr_list->head != NULL) 1029 wr_list->tail->b_next = m0; 1030 else 1031 wr_list->head = m0; 1032 while (m0->b_next) 1033 m0 = m0->b_next; 1034 wr_list->tail = m0; 1035 } 1036 1037 can_reclaim = reclaimable(eq); 1038 eq->cidx += can_reclaim; 1039 eq->avail += can_reclaim; 1040 if (eq->cidx >= eq->cap) 1041 eq->cidx -= eq->cap; 1042 1043 for (wr = wr_list->head; wr; wr = next) { 1044 int ndesc, len = 0; 1045 mblk_t *m; 1046 1047 next = wr->b_next; 1048 wr->b_next = NULL; 1049 1050 for (m = wr; m; m = m->b_cont) 1051 len += MBLKL(m); 1052 1053 ASSERT(len > 0 && (len & 0x7) == 0); 1054 ASSERT(len <= SGE_MAX_WR_LEN); 1055 1056 ndesc = howmany(len, EQ_ESIZE); 1057 if (eq->avail < ndesc) { 1058 wr->b_next = next; 1059 wrq->no_desc++; 1060 break; 1061 } 1062 1063 dst = (void *)&eq->desc[eq->pidx]; 1064 for (m = wr; m; m = m->b_cont) 1065 copy_to_txd(eq, (void *)m->b_rptr, &dst, MBLKL(m)); 1066 1067 eq->pidx += ndesc; 1068 eq->avail -= ndesc; 1069 if (eq->pidx >= eq->cap) 1070 eq->pidx -= eq->cap; 1071 1072 eq->pending += ndesc; 1073 if (eq->pending > 16) 1074 ring_tx_db(sc, eq); 1075 1076 wrq->tx_wrs++; 1077 freemsg(wr); 1078 1079 if (eq->avail < 8) { 1080 can_reclaim = reclaimable(eq); 1081 eq->cidx += can_reclaim; 1082 eq->avail += can_reclaim; 1083 if (eq->cidx >= eq->cap) 1084 eq->cidx -= eq->cap; 1085 } 1086 } 1087 1088 if (eq->pending != 0) 1089 ring_tx_db(sc, eq); 1090 1091 if (wr == NULL) 1092 wr_list->head = wr_list->tail = NULL; 1093 else { 1094 wr_list->head = wr; 1095 1096 ASSERT(wr_list->tail->b_next == NULL); 1097 } 1098 1099 return (0); 1100 } 1101 #endif 1102 1103 /* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */ 1104 #define TXPKTS_PKT_HDR ((\ 1105 sizeof (struct ulp_txpkt) + \ 1106 sizeof (struct ulptx_idata) + \ 1107 sizeof (struct cpl_tx_pkt_core)) / 8) 1108 1109 /* Header of a coalesced tx WR, before SGL of first packet (in flits) */ 1110 #define TXPKTS_WR_HDR (\ 1111 sizeof (struct fw_eth_tx_pkts_wr) / 8 + \ 1112 TXPKTS_PKT_HDR) 1113 1114 /* Header of a tx WR, before SGL of first packet (in flits) */ 1115 #define TXPKT_WR_HDR ((\ 1116 sizeof (struct fw_eth_tx_pkt_wr) + \ 1117 sizeof (struct cpl_tx_pkt_core)) / 8) 1118 1119 /* Header of a tx LSO WR, before SGL of first packet (in flits) */ 1120 #define TXPKT_LSO_WR_HDR ((\ 1121 sizeof (struct fw_eth_tx_pkt_wr) + \ 1122 sizeof(struct cpl_tx_pkt_lso_core) + \ 1123 sizeof (struct cpl_tx_pkt_core)) / 8) 1124 1125 mblk_t * 1126 t4_eth_tx(void *arg, mblk_t *frame) 1127 { 1128 struct sge_txq *txq = (struct sge_txq *) arg; 1129 struct port_info *pi = txq->port; 1130 struct adapter *sc = pi->adapter; 1131 struct sge_eq *eq = &txq->eq; 1132 mblk_t *next_frame; 1133 int rc, coalescing; 1134 struct txpkts txpkts; 1135 struct txinfo txinfo; 1136 1137 txpkts.npkt = 0; /* indicates there's nothing in txpkts */ 1138 coalescing = 0; 1139 1140 TXQ_LOCK(txq); 1141 if (eq->avail < 8) 1142 (void) reclaim_tx_descs(txq, 8); 1143 for (; frame; frame = next_frame) { 1144 1145 if (eq->avail < 8) 1146 break; 1147 1148 next_frame = frame->b_next; 1149 frame->b_next = NULL; 1150 1151 if (next_frame != NULL) 1152 coalescing = 1; 1153 1154 rc = get_frame_txinfo(txq, &frame, &txinfo, coalescing); 1155 if (rc != 0) { 1156 if (rc == ENOMEM) { 1157 1158 /* Short of resources, suspend tx */ 1159 1160 frame->b_next = next_frame; 1161 break; 1162 } 1163 1164 /* 1165 * Unrecoverable error for this frame, throw it 1166 * away and move on to the next. 1167 */ 1168 1169 freemsg(frame); 1170 continue; 1171 } 1172 1173 if (coalescing != 0 && 1174 add_to_txpkts(txq, &txpkts, frame, &txinfo) == 0) { 1175 1176 /* Successfully absorbed into txpkts */ 1177 1178 write_ulp_cpl_sgl(pi, txq, &txpkts, &txinfo); 1179 goto doorbell; 1180 } 1181 1182 /* 1183 * We weren't coalescing to begin with, or current frame could 1184 * not be coalesced (add_to_txpkts flushes txpkts if a frame 1185 * given to it can't be coalesced). Either way there should be 1186 * nothing in txpkts. 1187 */ 1188 ASSERT(txpkts.npkt == 0); 1189 1190 /* We're sending out individual frames now */ 1191 coalescing = 0; 1192 1193 if (eq->avail < 8) 1194 (void) reclaim_tx_descs(txq, 8); 1195 rc = write_txpkt_wr(pi, txq, frame, &txinfo); 1196 if (rc != 0) { 1197 1198 /* Short of hardware descriptors, suspend tx */ 1199 1200 /* 1201 * This is an unlikely but expensive failure. We've 1202 * done all the hard work (DMA bindings etc.) and now we 1203 * can't send out the frame. What's worse, we have to 1204 * spend even more time freeing up everything in txinfo. 1205 */ 1206 txq->qfull++; 1207 free_txinfo_resources(txq, &txinfo); 1208 1209 frame->b_next = next_frame; 1210 break; 1211 } 1212 1213 doorbell: 1214 /* Fewer and fewer doorbells as the queue fills up */ 1215 if (eq->pending >= (1 << (fls(eq->qsize - eq->avail) / 2))) { 1216 txq->txbytes += txinfo.len; 1217 txq->txpkts++; 1218 ring_tx_db(sc, eq); 1219 } 1220 (void) reclaim_tx_descs(txq, 32); 1221 } 1222 1223 if (txpkts.npkt > 0) 1224 write_txpkts_wr(txq, &txpkts); 1225 1226 /* 1227 * frame not NULL means there was an error but we haven't thrown it 1228 * away. This can happen when we're short of tx descriptors (qfull) or 1229 * maybe even DMA handles (dma_hdl_failed). Either way, a credit flush 1230 * and reclaim will get things going again. 1231 * 1232 * If eq->avail is already 0 we know a credit flush was requested in the 1233 * WR that reduced it to 0 so we don't need another flush (we don't have 1234 * any descriptor for a flush WR anyway, duh). 1235 */ 1236 if (frame && eq->avail > 0) 1237 write_txqflush_wr(txq); 1238 1239 if (eq->pending != 0) 1240 ring_tx_db(sc, eq); 1241 1242 (void) reclaim_tx_descs(txq, eq->qsize); 1243 TXQ_UNLOCK(txq); 1244 1245 return (frame); 1246 } 1247 1248 static inline void 1249 init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int8_t pktc_idx, 1250 int qsize, uint8_t esize) 1251 { 1252 ASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS); 1253 ASSERT(pktc_idx < SGE_NCOUNTERS); /* -ve is ok, means don't use */ 1254 1255 iq->flags = 0; 1256 iq->adapter = sc; 1257 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx); 1258 iq->intr_pktc_idx = SGE_NCOUNTERS - 1; 1259 if (pktc_idx >= 0) { 1260 iq->intr_params |= F_QINTR_CNT_EN; 1261 iq->intr_pktc_idx = pktc_idx; 1262 } 1263 iq->qsize = roundup(qsize, 16); /* See FW_IQ_CMD/iqsize */ 1264 iq->esize = max(esize, 16); /* See FW_IQ_CMD/iqesize */ 1265 } 1266 1267 static inline void 1268 init_fl(struct sge_fl *fl, uint16_t qsize) 1269 { 1270 1271 fl->qsize = qsize; 1272 fl->allocb_fail = 0; 1273 } 1274 1275 static inline void 1276 init_eq(struct adapter *sc, struct sge_eq *eq, uint16_t eqtype, uint16_t qsize, 1277 uint8_t tx_chan, uint16_t iqid) 1278 { 1279 struct sge *s = &sc->sge; 1280 uint32_t r; 1281 1282 ASSERT(tx_chan < NCHAN); 1283 ASSERT(eqtype <= EQ_TYPEMASK); 1284 1285 if (is_t5(sc->params.chip)) { 1286 r = t4_read_reg(sc, A_SGE_EGRESS_QUEUES_PER_PAGE_PF); 1287 r >>= S_QUEUESPERPAGEPF0 + 1288 (S_QUEUESPERPAGEPF1 - S_QUEUESPERPAGEPF0) * sc->pf; 1289 s->s_qpp = r & M_QUEUESPERPAGEPF0; 1290 } 1291 1292 eq->flags = eqtype & EQ_TYPEMASK; 1293 eq->tx_chan = tx_chan; 1294 eq->iqid = iqid; 1295 eq->qsize = qsize; 1296 } 1297 1298 /* 1299 * Allocates the ring for an ingress queue and an optional freelist. If the 1300 * freelist is specified it will be allocated and then associated with the 1301 * ingress queue. 1302 * 1303 * Returns errno on failure. Resources allocated up to that point may still be 1304 * allocated. Caller is responsible for cleanup in case this function fails. 1305 * 1306 * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then 1307 * the intr_idx specifies the vector, starting from 0. Otherwise it specifies 1308 * the index of the queue to which its interrupts will be forwarded. 1309 */ 1310 static int 1311 alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, 1312 int intr_idx, int cong) 1313 { 1314 int rc, i, cntxt_id; 1315 size_t len; 1316 struct fw_iq_cmd c; 1317 struct adapter *sc = iq->adapter; 1318 uint32_t v = 0; 1319 1320 len = iq->qsize * iq->esize; 1321 rc = alloc_desc_ring(sc, len, DDI_DMA_READ, &iq->dhdl, &iq->ahdl, 1322 &iq->ba, (caddr_t *)&iq->desc); 1323 if (rc != 0) 1324 return (rc); 1325 1326 bzero(&c, sizeof (c)); 1327 c.op_to_vfn = cpu_to_be32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | 1328 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | 1329 V_FW_IQ_CMD_VFN(0)); 1330 1331 c.alloc_to_len16 = cpu_to_be32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | 1332 FW_LEN16(c)); 1333 1334 /* Special handling for firmware event queue */ 1335 if (iq == &sc->sge.fwq) 1336 v |= F_FW_IQ_CMD_IQASYNCH; 1337 1338 if (iq->flags & IQ_INTR) 1339 ASSERT(intr_idx < sc->intr_count); 1340 else 1341 v |= F_FW_IQ_CMD_IQANDST; 1342 v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); 1343 1344 c.type_to_iqandstindex = cpu_to_be32(v | 1345 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | 1346 V_FW_IQ_CMD_VIID(pi->viid) | 1347 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); 1348 c.iqdroprss_to_iqesize = cpu_to_be16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | 1349 F_FW_IQ_CMD_IQGTSMODE | 1350 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | 1351 V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4)); 1352 c.iqsize = cpu_to_be16(iq->qsize); 1353 c.iqaddr = cpu_to_be64(iq->ba); 1354 if (cong >= 0) 1355 c.iqns_to_fl0congen = BE_32(F_FW_IQ_CMD_IQFLINTCONGEN | 1356 V_FW_IQ_CMD_IQTYPE(cong ? 1357 FW_IQ_IQTYPE_NIC : FW_IQ_IQTYPE_OFLD)); 1358 1359 if (fl != NULL) { 1360 unsigned int chip_ver = CHELSIO_CHIP_VERSION(sc->params.chip); 1361 1362 mutex_init(&fl->lock, NULL, MUTEX_DRIVER, 1363 DDI_INTR_PRI(sc->intr_pri)); 1364 fl->flags |= FL_MTX; 1365 1366 len = fl->qsize * RX_FL_ESIZE; 1367 rc = alloc_desc_ring(sc, len, DDI_DMA_WRITE, &fl->dhdl, 1368 &fl->ahdl, &fl->ba, (caddr_t *)&fl->desc); 1369 if (rc != 0) 1370 return (rc); 1371 1372 /* Allocate space for one software descriptor per buffer. */ 1373 fl->cap = (fl->qsize - sc->sge.stat_len / RX_FL_ESIZE) * 8; 1374 fl->sdesc = kmem_zalloc(sizeof (struct fl_sdesc) * fl->cap, 1375 KM_SLEEP); 1376 fl->needed = fl->cap; 1377 fl->lowat = roundup(sc->sge.fl_starve_threshold, 8); 1378 1379 c.iqns_to_fl0congen |= 1380 cpu_to_be32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | 1381 F_FW_IQ_CMD_FL0PACKEN | F_FW_IQ_CMD_FL0PADEN); 1382 if (cong >= 0) { 1383 c.iqns_to_fl0congen |= 1384 BE_32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) | 1385 F_FW_IQ_CMD_FL0CONGCIF | 1386 F_FW_IQ_CMD_FL0CONGEN); 1387 } 1388 1389 /* In T6, for egress queue type FL there is internal overhead 1390 * of 16B for header going into FLM module. Hence the maximum 1391 * allowed burst size is 448 bytes. For T4/T5, the hardware 1392 * doesn't coalesce fetch requests if more than 64 bytes of 1393 * Free List pointers are provided, so we use a 128-byte Fetch 1394 * Burst Minimum there (T6 implements coalescing so we can use 1395 * the smaller 64-byte value there). 1396 */ 1397 1398 c.fl0dcaen_to_fl0cidxfthresh = 1399 cpu_to_be16(V_FW_IQ_CMD_FL0FBMIN(chip_ver <= CHELSIO_T5 1400 ? X_FETCHBURSTMIN_128B 1401 : X_FETCHBURSTMIN_64B) | 1402 V_FW_IQ_CMD_FL0FBMAX(chip_ver <= CHELSIO_T5 1403 ? X_FETCHBURSTMAX_512B 1404 : X_FETCHBURSTMAX_256B)); 1405 c.fl0size = cpu_to_be16(fl->qsize); 1406 c.fl0addr = cpu_to_be64(fl->ba); 1407 } 1408 1409 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof (c), &c); 1410 if (rc != 0) { 1411 cxgb_printf(sc->dip, CE_WARN, 1412 "failed to create ingress queue: %d", rc); 1413 return (rc); 1414 } 1415 1416 iq->cdesc = iq->desc; 1417 iq->cidx = 0; 1418 iq->gen = 1; 1419 iq->intr_next = iq->intr_params; 1420 iq->adapter = sc; 1421 iq->cntxt_id = be16_to_cpu(c.iqid); 1422 iq->abs_id = be16_to_cpu(c.physiqid); 1423 iq->flags |= IQ_ALLOCATED; 1424 mutex_init(&iq->lock, NULL, 1425 MUTEX_DRIVER, DDI_INTR_PRI(DDI_INTR_PRI(sc->intr_pri))); 1426 iq->polling = 0; 1427 1428 cntxt_id = iq->cntxt_id - sc->sge.iq_start; 1429 if (cntxt_id >= sc->sge.iqmap_sz) { 1430 panic("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, 1431 cntxt_id, sc->sge.iqmap_sz - 1); 1432 } 1433 sc->sge.iqmap[cntxt_id] = iq; 1434 1435 if (fl != NULL) { 1436 fl->cntxt_id = be16_to_cpu(c.fl0id); 1437 fl->pidx = fl->cidx = 0; 1438 fl->copy_threshold = rx_copy_threshold; 1439 1440 cntxt_id = fl->cntxt_id - sc->sge.eq_start; 1441 if (cntxt_id >= sc->sge.eqmap_sz) { 1442 panic("%s: fl->cntxt_id (%d) more than the max (%d)", 1443 __func__, cntxt_id, sc->sge.eqmap_sz - 1); 1444 } 1445 sc->sge.eqmap[cntxt_id] = (void *)fl; 1446 1447 FL_LOCK(fl); 1448 (void) refill_fl(sc, fl, fl->lowat); 1449 FL_UNLOCK(fl); 1450 1451 iq->flags |= IQ_HAS_FL; 1452 } 1453 1454 if (is_t5(sc->params.chip) && cong >= 0) { 1455 uint32_t param, val; 1456 1457 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 1458 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | 1459 V_FW_PARAMS_PARAM_YZ(iq->cntxt_id); 1460 if (cong == 0) 1461 val = 1 << 19; 1462 else { 1463 val = 2 << 19; 1464 for (i = 0; i < 4; i++) { 1465 if (cong & (1 << i)) 1466 val |= 1 << (i << 2); 1467 } 1468 } 1469 1470 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); 1471 if (rc != 0) { 1472 /* report error but carry on */ 1473 cxgb_printf(sc->dip, CE_WARN, 1474 "failed to set congestion manager context for " 1475 "ingress queue %d: %d", iq->cntxt_id, rc); 1476 } 1477 } 1478 1479 /* Enable IQ interrupts */ 1480 iq->state = IQS_IDLE; 1481 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) | 1482 V_INGRESSQID(iq->cntxt_id)); 1483 1484 return (0); 1485 } 1486 1487 static int 1488 free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl) 1489 { 1490 int rc; 1491 1492 if (iq != NULL) { 1493 struct adapter *sc = iq->adapter; 1494 dev_info_t *dip; 1495 1496 dip = pi ? pi->dip : sc->dip; 1497 if (iq->flags & IQ_ALLOCATED) { 1498 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, 1499 FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id, 1500 fl ? fl->cntxt_id : 0xffff, 0xffff); 1501 if (rc != 0) { 1502 cxgb_printf(dip, CE_WARN, 1503 "failed to free queue %p: %d", iq, rc); 1504 return (rc); 1505 } 1506 mutex_destroy(&iq->lock); 1507 iq->flags &= ~IQ_ALLOCATED; 1508 } 1509 1510 if (iq->desc != NULL) { 1511 (void) free_desc_ring(&iq->dhdl, &iq->ahdl); 1512 iq->desc = NULL; 1513 } 1514 1515 bzero(iq, sizeof (*iq)); 1516 } 1517 1518 if (fl != NULL) { 1519 if (fl->sdesc != NULL) { 1520 FL_LOCK(fl); 1521 free_fl_bufs(fl); 1522 FL_UNLOCK(fl); 1523 1524 kmem_free(fl->sdesc, sizeof (struct fl_sdesc) * 1525 fl->cap); 1526 fl->sdesc = NULL; 1527 } 1528 1529 if (fl->desc != NULL) { 1530 (void) free_desc_ring(&fl->dhdl, &fl->ahdl); 1531 fl->desc = NULL; 1532 } 1533 1534 if (fl->flags & FL_MTX) { 1535 mutex_destroy(&fl->lock); 1536 fl->flags &= ~FL_MTX; 1537 } 1538 1539 bzero(fl, sizeof (struct sge_fl)); 1540 } 1541 1542 return (0); 1543 } 1544 1545 static int 1546 alloc_fwq(struct adapter *sc) 1547 { 1548 int rc, intr_idx; 1549 struct sge_iq *fwq = &sc->sge.fwq; 1550 1551 init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE); 1552 fwq->flags |= IQ_INTR; /* always */ 1553 intr_idx = sc->intr_count > 1 ? 1 : 0; 1554 rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1); 1555 if (rc != 0) { 1556 cxgb_printf(sc->dip, CE_WARN, 1557 "failed to create firmware event queue: %d.", rc); 1558 return (rc); 1559 } 1560 1561 return (0); 1562 } 1563 1564 static int 1565 free_fwq(struct adapter *sc) 1566 { 1567 1568 return (free_iq_fl(NULL, &sc->sge.fwq, NULL)); 1569 } 1570 1571 #ifdef TCP_OFFLOAD_ENABLE 1572 static int 1573 alloc_mgmtq(struct adapter *sc) 1574 { 1575 int rc; 1576 struct sge_wrq *mgmtq = &sc->sge.mgmtq; 1577 1578 init_eq(sc, &mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan, 1579 sc->sge.fwq.cntxt_id); 1580 rc = alloc_wrq(sc, NULL, mgmtq, 0); 1581 if (rc != 0) { 1582 cxgb_printf(sc->dip, CE_WARN, 1583 "failed to create management queue: %d\n", rc); 1584 return (rc); 1585 } 1586 1587 return (0); 1588 } 1589 #endif 1590 1591 static int 1592 alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int i) 1593 { 1594 int rc; 1595 1596 rxq->port = pi; 1597 rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, 1598 t4_get_tp_ch_map(pi->adapter, pi->tx_chan)); 1599 if (rc != 0) 1600 return (rc); 1601 1602 rxq->ksp = setup_rxq_kstats(pi, rxq, i); 1603 1604 return (rc); 1605 } 1606 1607 static int 1608 free_rxq(struct port_info *pi, struct sge_rxq *rxq) 1609 { 1610 int rc; 1611 1612 if (rxq->ksp != NULL) { 1613 kstat_delete(rxq->ksp); 1614 rxq->ksp = NULL; 1615 } 1616 1617 rc = free_iq_fl(pi, &rxq->iq, &rxq->fl); 1618 if (rc == 0) 1619 bzero(&rxq->fl, sizeof (*rxq) - offsetof(struct sge_rxq, fl)); 1620 1621 return (rc); 1622 } 1623 1624 #ifdef TCP_OFFLOAD_ENABLE 1625 static int 1626 alloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq, 1627 int intr_idx) 1628 { 1629 int rc; 1630 1631 rc = alloc_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx, 1632 t4_get_tp_ch_map(pi->adapter, pi->tx_chan)); 1633 if (rc != 0) 1634 return (rc); 1635 1636 return (rc); 1637 } 1638 1639 static int 1640 free_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq) 1641 { 1642 int rc; 1643 1644 rc = free_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl); 1645 if (rc == 0) 1646 bzero(&ofld_rxq->fl, sizeof (*ofld_rxq) - 1647 offsetof(struct sge_ofld_rxq, fl)); 1648 1649 return (rc); 1650 } 1651 #endif 1652 1653 static int 1654 ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq) 1655 { 1656 int rc, cntxt_id; 1657 struct fw_eq_ctrl_cmd c; 1658 1659 bzero(&c, sizeof (c)); 1660 1661 c.op_to_vfn = BE_32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | 1662 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) | 1663 V_FW_EQ_CTRL_CMD_VFN(0)); 1664 c.alloc_to_len16 = BE_32(F_FW_EQ_CTRL_CMD_ALLOC | 1665 F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); 1666 c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); /* TODO */ 1667 c.physeqid_pkd = BE_32(0); 1668 c.fetchszm_to_iqid = 1669 BE_32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 1670 V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) | 1671 F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); 1672 c.dcaen_to_eqsize = 1673 BE_32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 1674 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 1675 V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | 1676 V_FW_EQ_CTRL_CMD_EQSIZE(eq->qsize)); 1677 c.eqaddr = BE_64(eq->ba); 1678 1679 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof (c), &c); 1680 if (rc != 0) { 1681 cxgb_printf(sc->dip, CE_WARN, 1682 "failed to create control queue %d: %d", eq->tx_chan, rc); 1683 return (rc); 1684 } 1685 eq->flags |= EQ_ALLOCATED; 1686 1687 eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(BE_32(c.cmpliqid_eqid)); 1688 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 1689 if (cntxt_id >= sc->sge.eqmap_sz) 1690 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 1691 cntxt_id, sc->sge.eqmap_sz - 1); 1692 sc->sge.eqmap[cntxt_id] = eq; 1693 1694 return (rc); 1695 } 1696 1697 static int 1698 eth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) 1699 { 1700 int rc, cntxt_id; 1701 struct fw_eq_eth_cmd c; 1702 1703 bzero(&c, sizeof (c)); 1704 1705 c.op_to_vfn = BE_32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | 1706 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | 1707 V_FW_EQ_ETH_CMD_VFN(0)); 1708 c.alloc_to_len16 = BE_32(F_FW_EQ_ETH_CMD_ALLOC | 1709 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); 1710 c.autoequiqe_to_viid = BE_32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | 1711 F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(pi->viid)); 1712 c.fetchszm_to_iqid = 1713 BE_32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 1714 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | 1715 V_FW_EQ_ETH_CMD_IQID(eq->iqid)); 1716 c.dcaen_to_eqsize = BE_32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 1717 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 1718 V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | 1719 V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize)); 1720 c.eqaddr = BE_64(eq->ba); 1721 1722 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof (c), &c); 1723 if (rc != 0) { 1724 cxgb_printf(pi->dip, CE_WARN, 1725 "failed to create Ethernet egress queue: %d", rc); 1726 return (rc); 1727 } 1728 eq->flags |= EQ_ALLOCATED; 1729 1730 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(BE_32(c.eqid_pkd)); 1731 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 1732 if (cntxt_id >= sc->sge.eqmap_sz) 1733 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 1734 cntxt_id, sc->sge.eqmap_sz - 1); 1735 sc->sge.eqmap[cntxt_id] = eq; 1736 1737 return (rc); 1738 } 1739 1740 #ifdef TCP_OFFLOAD_ENABLE 1741 static int 1742 ofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) 1743 { 1744 int rc, cntxt_id; 1745 struct fw_eq_ofld_cmd c; 1746 1747 bzero(&c, sizeof (c)); 1748 1749 c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST | 1750 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) | 1751 V_FW_EQ_OFLD_CMD_VFN(0)); 1752 c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC | 1753 F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c)); 1754 c.fetchszm_to_iqid = 1755 htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 1756 V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) | 1757 F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid)); 1758 c.dcaen_to_eqsize = 1759 BE_32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 1760 V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 1761 V_FW_EQ_OFLD_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | 1762 V_FW_EQ_OFLD_CMD_EQSIZE(eq->qsize)); 1763 c.eqaddr = BE_64(eq->ba); 1764 1765 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof (c), &c); 1766 if (rc != 0) { 1767 cxgb_printf(pi->dip, CE_WARN, 1768 "failed to create egress queue for TCP offload: %d", rc); 1769 return (rc); 1770 } 1771 eq->flags |= EQ_ALLOCATED; 1772 1773 eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(BE_32(c.eqid_pkd)); 1774 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 1775 if (cntxt_id >= sc->sge.eqmap_sz) 1776 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 1777 cntxt_id, sc->sge.eqmap_sz - 1); 1778 sc->sge.eqmap[cntxt_id] = eq; 1779 1780 return (rc); 1781 } 1782 #endif 1783 1784 static int 1785 alloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) 1786 { 1787 int rc; 1788 size_t len; 1789 1790 mutex_init(&eq->lock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(sc->intr_pri)); 1791 eq->flags |= EQ_MTX; 1792 1793 len = eq->qsize * EQ_ESIZE; 1794 rc = alloc_desc_ring(sc, len, DDI_DMA_WRITE, &eq->desc_dhdl, 1795 &eq->desc_ahdl, &eq->ba, (caddr_t *)&eq->desc); 1796 if (rc != 0) 1797 return (rc); 1798 1799 eq->cap = eq->qsize - sc->sge.stat_len / EQ_ESIZE; 1800 eq->spg = (void *)&eq->desc[eq->cap]; 1801 eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */ 1802 eq->pidx = eq->cidx = 0; 1803 eq->doorbells = sc->doorbells; 1804 1805 switch (eq->flags & EQ_TYPEMASK) { 1806 case EQ_CTRL: 1807 rc = ctrl_eq_alloc(sc, eq); 1808 break; 1809 1810 case EQ_ETH: 1811 rc = eth_eq_alloc(sc, pi, eq); 1812 break; 1813 1814 #ifdef TCP_OFFLOAD_ENABLE 1815 case EQ_OFLD: 1816 rc = ofld_eq_alloc(sc, pi, eq); 1817 break; 1818 #endif 1819 1820 default: 1821 panic("%s: invalid eq type %d.", __func__, 1822 eq->flags & EQ_TYPEMASK); 1823 } 1824 1825 if (eq->doorbells & 1826 (DOORBELL_UDB | DOORBELL_UDBWC | DOORBELL_WCWR)) { 1827 uint32_t s_qpp = sc->sge.s_qpp; 1828 uint32_t mask = (1 << s_qpp) - 1; 1829 volatile uint8_t *udb; 1830 1831 udb = (volatile uint8_t *)sc->reg1p + UDBS_DB_OFFSET; 1832 udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT; /* pg offset */ 1833 eq->udb_qid = eq->cntxt_id & mask; /* id in page */ 1834 if (eq->udb_qid > PAGE_SIZE / UDBS_SEG_SIZE) 1835 eq->doorbells &= ~DOORBELL_WCWR; 1836 else { 1837 udb += eq->udb_qid << UDBS_SEG_SHIFT; /* seg offset */ 1838 eq->udb_qid = 0; 1839 } 1840 eq->udb = (volatile void *)udb; 1841 } 1842 1843 if (rc != 0) { 1844 cxgb_printf(sc->dip, CE_WARN, 1845 "failed to allocate egress queue(%d): %d", 1846 eq->flags & EQ_TYPEMASK, rc); 1847 } 1848 1849 return (rc); 1850 } 1851 1852 static int 1853 free_eq(struct adapter *sc, struct sge_eq *eq) 1854 { 1855 int rc; 1856 1857 if (eq->flags & EQ_ALLOCATED) { 1858 switch (eq->flags & EQ_TYPEMASK) { 1859 case EQ_CTRL: 1860 rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, 1861 eq->cntxt_id); 1862 break; 1863 1864 case EQ_ETH: 1865 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, 1866 eq->cntxt_id); 1867 break; 1868 1869 #ifdef TCP_OFFLOAD_ENABLE 1870 case EQ_OFLD: 1871 rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0, 1872 eq->cntxt_id); 1873 break; 1874 #endif 1875 1876 default: 1877 panic("%s: invalid eq type %d.", __func__, 1878 eq->flags & EQ_TYPEMASK); 1879 } 1880 if (rc != 0) { 1881 cxgb_printf(sc->dip, CE_WARN, 1882 "failed to free egress queue (%d): %d", 1883 eq->flags & EQ_TYPEMASK, rc); 1884 return (rc); 1885 } 1886 eq->flags &= ~EQ_ALLOCATED; 1887 } 1888 1889 if (eq->desc != NULL) { 1890 (void) free_desc_ring(&eq->desc_dhdl, &eq->desc_ahdl); 1891 eq->desc = NULL; 1892 } 1893 1894 if (eq->flags & EQ_MTX) 1895 mutex_destroy(&eq->lock); 1896 1897 bzero(eq, sizeof (*eq)); 1898 return (0); 1899 } 1900 1901 #ifdef TCP_OFFLOAD_ENABLE 1902 /* ARGSUSED */ 1903 static int 1904 alloc_wrq(struct adapter *sc, struct port_info *pi, struct sge_wrq *wrq, 1905 int idx) 1906 { 1907 int rc; 1908 1909 rc = alloc_eq(sc, pi, &wrq->eq); 1910 if (rc != 0) 1911 return (rc); 1912 1913 wrq->adapter = sc; 1914 wrq->wr_list.head = NULL; 1915 wrq->wr_list.tail = NULL; 1916 1917 /* 1918 * TODO: use idx to figure out what kind of wrq this is and install 1919 * useful kstats for it. 1920 */ 1921 1922 return (rc); 1923 } 1924 1925 static int 1926 free_wrq(struct adapter *sc, struct sge_wrq *wrq) 1927 { 1928 int rc; 1929 1930 rc = free_eq(sc, &wrq->eq); 1931 if (rc != 0) 1932 return (rc); 1933 1934 bzero(wrq, sizeof (*wrq)); 1935 return (0); 1936 } 1937 #endif 1938 1939 static int 1940 alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx) 1941 { 1942 int rc, i; 1943 struct adapter *sc = pi->adapter; 1944 struct sge_eq *eq = &txq->eq; 1945 1946 rc = alloc_eq(sc, pi, eq); 1947 if (rc != 0) 1948 return (rc); 1949 1950 txq->port = pi; 1951 txq->sdesc = kmem_zalloc(sizeof (struct tx_sdesc) * eq->cap, KM_SLEEP); 1952 txq->txb_size = eq->qsize * tx_copy_threshold; 1953 rc = alloc_tx_copybuffer(sc, txq->txb_size, &txq->txb_dhdl, 1954 &txq->txb_ahdl, &txq->txb_ba, &txq->txb_va); 1955 if (rc == 0) 1956 txq->txb_avail = txq->txb_size; 1957 else 1958 txq->txb_avail = txq->txb_size = 0; 1959 1960 /* 1961 * TODO: is this too low? Worst case would need around 4 times qsize 1962 * (all tx descriptors filled to the brim with SGLs, with each entry in 1963 * the SGL coming from a distinct DMA handle). Increase tx_dhdl_total 1964 * if you see too many dma_hdl_failed. 1965 */ 1966 txq->tx_dhdl_total = eq->qsize * 2; 1967 txq->tx_dhdl = kmem_zalloc(sizeof (ddi_dma_handle_t) * 1968 txq->tx_dhdl_total, KM_SLEEP); 1969 for (i = 0; i < txq->tx_dhdl_total; i++) { 1970 rc = ddi_dma_alloc_handle(sc->dip, &sc->sge.dma_attr_tx, 1971 DDI_DMA_SLEEP, 0, &txq->tx_dhdl[i]); 1972 if (rc != DDI_SUCCESS) { 1973 cxgb_printf(sc->dip, CE_WARN, 1974 "%s: failed to allocate DMA handle (%d)", 1975 __func__, rc); 1976 return (rc == DDI_DMA_NORESOURCES ? ENOMEM : EINVAL); 1977 } 1978 txq->tx_dhdl_avail++; 1979 } 1980 1981 txq->ksp = setup_txq_kstats(pi, txq, idx); 1982 1983 return (rc); 1984 } 1985 1986 static int 1987 free_txq(struct port_info *pi, struct sge_txq *txq) 1988 { 1989 int i; 1990 struct adapter *sc = pi->adapter; 1991 struct sge_eq *eq = &txq->eq; 1992 1993 if (txq->ksp != NULL) { 1994 kstat_delete(txq->ksp); 1995 txq->ksp = NULL; 1996 } 1997 1998 if (txq->txb_va != NULL) { 1999 (void) free_desc_ring(&txq->txb_dhdl, &txq->txb_ahdl); 2000 txq->txb_va = NULL; 2001 } 2002 2003 if (txq->sdesc != NULL) { 2004 struct tx_sdesc *sd; 2005 ddi_dma_handle_t hdl; 2006 2007 TXQ_LOCK(txq); 2008 while (eq->cidx != eq->pidx) { 2009 sd = &txq->sdesc[eq->cidx]; 2010 2011 for (i = sd->hdls_used; i; i--) { 2012 hdl = txq->tx_dhdl[txq->tx_dhdl_cidx]; 2013 (void) ddi_dma_unbind_handle(hdl); 2014 if (++txq->tx_dhdl_cidx == txq->tx_dhdl_total) 2015 txq->tx_dhdl_cidx = 0; 2016 } 2017 2018 ASSERT(sd->m); 2019 freemsgchain(sd->m); 2020 2021 eq->cidx += sd->desc_used; 2022 if (eq->cidx >= eq->cap) 2023 eq->cidx -= eq->cap; 2024 2025 txq->txb_avail += txq->txb_used; 2026 } 2027 ASSERT(txq->tx_dhdl_cidx == txq->tx_dhdl_pidx); 2028 ASSERT(txq->txb_avail == txq->txb_size); 2029 TXQ_UNLOCK(txq); 2030 2031 kmem_free(txq->sdesc, sizeof (struct tx_sdesc) * eq->cap); 2032 txq->sdesc = NULL; 2033 } 2034 2035 if (txq->tx_dhdl != NULL) { 2036 for (i = 0; i < txq->tx_dhdl_total; i++) { 2037 if (txq->tx_dhdl[i] != NULL) 2038 ddi_dma_free_handle(&txq->tx_dhdl[i]); 2039 } 2040 } 2041 2042 (void) free_eq(sc, &txq->eq); 2043 2044 bzero(txq, sizeof (*txq)); 2045 return (0); 2046 } 2047 2048 /* 2049 * Allocates a block of contiguous memory for DMA. Can be used to allocate 2050 * memory for descriptor rings or for tx/rx copy buffers. 2051 * 2052 * Caller does not have to clean up anything if this function fails, it cleans 2053 * up after itself. 2054 * 2055 * Caller provides the following: 2056 * len length of the block of memory to allocate. 2057 * flags DDI_DMA_* flags to use (CONSISTENT/STREAMING, READ/WRITE/RDWR) 2058 * acc_attr device access attributes for the allocation. 2059 * dma_attr DMA attributes for the allocation 2060 * 2061 * If the function is successful it fills up this information: 2062 * dma_hdl DMA handle for the allocated memory 2063 * acc_hdl access handle for the allocated memory 2064 * ba bus address of the allocated memory 2065 * va KVA of the allocated memory. 2066 */ 2067 static int 2068 alloc_dma_memory(struct adapter *sc, size_t len, int flags, 2069 ddi_device_acc_attr_t *acc_attr, ddi_dma_attr_t *dma_attr, 2070 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl, 2071 uint64_t *pba, caddr_t *pva) 2072 { 2073 int rc; 2074 ddi_dma_handle_t dhdl; 2075 ddi_acc_handle_t ahdl; 2076 ddi_dma_cookie_t cookie; 2077 uint_t ccount; 2078 caddr_t va; 2079 size_t real_len; 2080 2081 *pva = NULL; 2082 2083 /* 2084 * DMA handle. 2085 */ 2086 rc = ddi_dma_alloc_handle(sc->dip, dma_attr, DDI_DMA_SLEEP, 0, &dhdl); 2087 if (rc != DDI_SUCCESS) { 2088 cxgb_printf(sc->dip, CE_WARN, 2089 "failed to allocate DMA handle: %d", rc); 2090 2091 return (rc == DDI_DMA_NORESOURCES ? ENOMEM : EINVAL); 2092 } 2093 2094 /* 2095 * Memory suitable for DMA. 2096 */ 2097 rc = ddi_dma_mem_alloc(dhdl, len, acc_attr, 2098 flags & DDI_DMA_CONSISTENT ? DDI_DMA_CONSISTENT : DDI_DMA_STREAMING, 2099 DDI_DMA_SLEEP, 0, &va, &real_len, &ahdl); 2100 if (rc != DDI_SUCCESS) { 2101 cxgb_printf(sc->dip, CE_WARN, 2102 "failed to allocate DMA memory: %d", rc); 2103 2104 ddi_dma_free_handle(&dhdl); 2105 return (ENOMEM); 2106 } 2107 2108 if (len != real_len) { 2109 cxgb_printf(sc->dip, CE_WARN, 2110 "%s: len (%u) != real_len (%u)\n", len, real_len); 2111 } 2112 2113 /* 2114 * DMA bindings. 2115 */ 2116 rc = ddi_dma_addr_bind_handle(dhdl, NULL, va, real_len, flags, NULL, 2117 NULL, &cookie, &ccount); 2118 if (rc != DDI_DMA_MAPPED) { 2119 cxgb_printf(sc->dip, CE_WARN, 2120 "failed to map DMA memory: %d", rc); 2121 2122 ddi_dma_mem_free(&ahdl); 2123 ddi_dma_free_handle(&dhdl); 2124 return (ENOMEM); 2125 } 2126 if (ccount != 1) { 2127 cxgb_printf(sc->dip, CE_WARN, 2128 "unusable DMA mapping (%d segments)", ccount); 2129 (void) free_desc_ring(&dhdl, &ahdl); 2130 } 2131 2132 bzero(va, real_len); 2133 *dma_hdl = dhdl; 2134 *acc_hdl = ahdl; 2135 *pba = cookie.dmac_laddress; 2136 *pva = va; 2137 2138 return (0); 2139 } 2140 2141 static int 2142 free_dma_memory(ddi_dma_handle_t *dhdl, ddi_acc_handle_t *ahdl) 2143 { 2144 (void) ddi_dma_unbind_handle(*dhdl); 2145 ddi_dma_mem_free(ahdl); 2146 ddi_dma_free_handle(dhdl); 2147 2148 return (0); 2149 } 2150 2151 static int 2152 alloc_desc_ring(struct adapter *sc, size_t len, int rw, 2153 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl, 2154 uint64_t *pba, caddr_t *pva) 2155 { 2156 ddi_device_acc_attr_t *acc_attr = &sc->sge.acc_attr_desc; 2157 ddi_dma_attr_t *dma_attr = &sc->sge.dma_attr_desc; 2158 2159 return (alloc_dma_memory(sc, len, DDI_DMA_CONSISTENT | rw, acc_attr, 2160 dma_attr, dma_hdl, acc_hdl, pba, pva)); 2161 } 2162 2163 static int 2164 free_desc_ring(ddi_dma_handle_t *dhdl, ddi_acc_handle_t *ahdl) 2165 { 2166 return (free_dma_memory(dhdl, ahdl)); 2167 } 2168 2169 static int 2170 alloc_tx_copybuffer(struct adapter *sc, size_t len, 2171 ddi_dma_handle_t *dma_hdl, ddi_acc_handle_t *acc_hdl, 2172 uint64_t *pba, caddr_t *pva) 2173 { 2174 ddi_device_acc_attr_t *acc_attr = &sc->sge.acc_attr_tx; 2175 ddi_dma_attr_t *dma_attr = &sc->sge.dma_attr_desc; /* NOT dma_attr_tx */ 2176 2177 return (alloc_dma_memory(sc, len, DDI_DMA_STREAMING | DDI_DMA_WRITE, 2178 acc_attr, dma_attr, dma_hdl, acc_hdl, pba, pva)); 2179 } 2180 2181 static inline bool 2182 is_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl) 2183 { 2184 (void) ddi_dma_sync(iq->dhdl, (uintptr_t)iq->cdesc - 2185 (uintptr_t)iq->desc, iq->esize, DDI_DMA_SYNC_FORKERNEL); 2186 2187 *ctrl = (void *)((uintptr_t)iq->cdesc + 2188 (iq->esize - sizeof (struct rsp_ctrl))); 2189 2190 return ((((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen)); 2191 } 2192 2193 static inline void 2194 iq_next(struct sge_iq *iq) 2195 { 2196 iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize); 2197 if (++iq->cidx == iq->qsize - 1) { 2198 iq->cidx = 0; 2199 iq->gen ^= 1; 2200 iq->cdesc = iq->desc; 2201 } 2202 } 2203 2204 /* 2205 * Fill up the freelist by upto nbufs and maybe ring its doorbell. 2206 * 2207 * Returns non-zero to indicate that it should be added to the list of starving 2208 * freelists. 2209 */ 2210 static int 2211 refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs) 2212 { 2213 uint64_t *d = &fl->desc[fl->pidx]; 2214 struct fl_sdesc *sd = &fl->sdesc[fl->pidx]; 2215 2216 FL_LOCK_ASSERT_OWNED(fl); 2217 ASSERT(nbufs >= 0); 2218 2219 if (nbufs > fl->needed) 2220 nbufs = fl->needed; 2221 2222 while (nbufs--) { 2223 if (sd->rxb != NULL) { 2224 if (sd->rxb->ref_cnt == 1) { 2225 /* 2226 * Buffer is available for recycling. Two ways 2227 * this can happen: 2228 * 2229 * a) All the packets DMA'd into it last time 2230 * around were within the rx_copy_threshold 2231 * and no part of the buffer was ever passed 2232 * up (ref_cnt never went over 1). 2233 * 2234 * b) Packets DMA'd into the buffer were passed 2235 * up but have all been freed by the upper 2236 * layers by now (ref_cnt went over 1 but is 2237 * now back to 1). 2238 * 2239 * Either way the bus address in the descriptor 2240 * ring is already valid. 2241 */ 2242 ASSERT(*d == cpu_to_be64(sd->rxb->ba)); 2243 d++; 2244 goto recycled; 2245 } else { 2246 /* 2247 * Buffer still in use and we need a 2248 * replacement. But first release our reference 2249 * on the existing buffer. 2250 */ 2251 rxbuf_free(sd->rxb); 2252 } 2253 } 2254 2255 sd->rxb = rxbuf_alloc(sc->sge.rxbuf_cache, KM_NOSLEEP, 1); 2256 if (sd->rxb == NULL) 2257 break; 2258 *d++ = cpu_to_be64(sd->rxb->ba); 2259 2260 recycled: fl->pending++; 2261 sd++; 2262 fl->needed--; 2263 if (++fl->pidx == fl->cap) { 2264 fl->pidx = 0; 2265 sd = fl->sdesc; 2266 d = fl->desc; 2267 } 2268 } 2269 2270 if (fl->pending >= 8) 2271 ring_fl_db(sc, fl); 2272 2273 return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING)); 2274 } 2275 2276 #ifndef TAILQ_FOREACH_SAFE 2277 #define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ 2278 for ((var) = TAILQ_FIRST((head)); \ 2279 (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ 2280 (var) = (tvar)) 2281 #endif 2282 2283 /* 2284 * Attempt to refill all starving freelists. 2285 */ 2286 static void 2287 refill_sfl(void *arg) 2288 { 2289 struct adapter *sc = arg; 2290 struct sge_fl *fl, *fl_temp; 2291 2292 mutex_enter(&sc->sfl_lock); 2293 TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) { 2294 FL_LOCK(fl); 2295 (void) refill_fl(sc, fl, 64); 2296 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) { 2297 TAILQ_REMOVE(&sc->sfl, fl, link); 2298 fl->flags &= ~FL_STARVING; 2299 } 2300 FL_UNLOCK(fl); 2301 } 2302 2303 if (!TAILQ_EMPTY(&sc->sfl) != 0) 2304 sc->sfl_timer = timeout(refill_sfl, sc, drv_usectohz(100000)); 2305 mutex_exit(&sc->sfl_lock); 2306 } 2307 2308 static void 2309 add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl) 2310 { 2311 mutex_enter(&sc->sfl_lock); 2312 FL_LOCK(fl); 2313 if ((fl->flags & FL_DOOMED) == 0) { 2314 if (TAILQ_EMPTY(&sc->sfl) != 0) { 2315 sc->sfl_timer = timeout(refill_sfl, sc, 2316 drv_usectohz(100000)); 2317 } 2318 fl->flags |= FL_STARVING; 2319 TAILQ_INSERT_TAIL(&sc->sfl, fl, link); 2320 } 2321 FL_UNLOCK(fl); 2322 mutex_exit(&sc->sfl_lock); 2323 } 2324 2325 static void 2326 free_fl_bufs(struct sge_fl *fl) 2327 { 2328 struct fl_sdesc *sd; 2329 unsigned int i; 2330 2331 FL_LOCK_ASSERT_OWNED(fl); 2332 2333 for (i = 0; i < fl->cap; i++) { 2334 sd = &fl->sdesc[i]; 2335 2336 if (sd->rxb != NULL) { 2337 rxbuf_free(sd->rxb); 2338 sd->rxb = NULL; 2339 } 2340 } 2341 } 2342 2343 /* 2344 * Note that fl->cidx and fl->offset are left unchanged in case of failure. 2345 */ 2346 static mblk_t * 2347 get_fl_payload(struct adapter *sc, struct sge_fl *fl, 2348 uint32_t len_newbuf, int *fl_bufs_used) 2349 { 2350 struct mblk_pair frame = {0}; 2351 struct rxbuf *rxb; 2352 mblk_t *m = NULL; 2353 uint_t nbuf = 0, len, copy, n; 2354 uint32_t cidx, offset, rcidx, roffset; 2355 2356 /* 2357 * The SGE won't pack a new frame into the current buffer if the entire 2358 * payload doesn't fit in the remaining space. Move on to the next buf 2359 * in that case. 2360 */ 2361 rcidx = fl->cidx; 2362 roffset = fl->offset; 2363 if (fl->offset > 0 && len_newbuf & F_RSPD_NEWBUF) { 2364 fl->offset = 0; 2365 if (++fl->cidx == fl->cap) 2366 fl->cidx = 0; 2367 nbuf++; 2368 } 2369 cidx = fl->cidx; 2370 offset = fl->offset; 2371 2372 len = G_RSPD_LEN(len_newbuf); /* pktshift + payload length */ 2373 copy = (len <= fl->copy_threshold); 2374 if (copy != 0) { 2375 frame.head = m = allocb(len, BPRI_HI); 2376 if (m == NULL) { 2377 fl->allocb_fail++; 2378 cmn_err(CE_WARN,"%s: mbuf allocation failure " 2379 "count = %llu", __func__, 2380 (unsigned long long)fl->allocb_fail); 2381 fl->cidx = rcidx; 2382 fl->offset = roffset; 2383 return (NULL); 2384 } 2385 } 2386 2387 while (len) { 2388 rxb = fl->sdesc[cidx].rxb; 2389 n = min(len, rxb->buf_size - offset); 2390 2391 (void) ddi_dma_sync(rxb->dhdl, offset, n, 2392 DDI_DMA_SYNC_FORKERNEL); 2393 2394 if (copy != 0) 2395 bcopy(rxb->va + offset, m->b_wptr, n); 2396 else { 2397 m = desballoc((unsigned char *)rxb->va + offset, n, 2398 BPRI_HI, &rxb->freefunc); 2399 if (m == NULL) { 2400 fl->allocb_fail++; 2401 cmn_err(CE_WARN, 2402 "%s: mbuf allocation failure " 2403 "count = %llu", __func__, 2404 (unsigned long long)fl->allocb_fail); 2405 if (frame.head) 2406 freemsgchain(frame.head); 2407 fl->cidx = rcidx; 2408 fl->offset = roffset; 2409 return (NULL); 2410 } 2411 atomic_inc_uint(&rxb->ref_cnt); 2412 if (frame.head != NULL) 2413 frame.tail->b_cont = m; 2414 else 2415 frame.head = m; 2416 frame.tail = m; 2417 } 2418 m->b_wptr += n; 2419 len -= n; 2420 offset += roundup(n, sc->sge.fl_align); 2421 ASSERT(offset <= rxb->buf_size); 2422 if (offset == rxb->buf_size) { 2423 offset = 0; 2424 if (++cidx == fl->cap) 2425 cidx = 0; 2426 nbuf++; 2427 } 2428 } 2429 2430 fl->cidx = cidx; 2431 fl->offset = offset; 2432 (*fl_bufs_used) += nbuf; 2433 2434 ASSERT(frame.head != NULL); 2435 return (frame.head); 2436 } 2437 2438 /* 2439 * We'll do immediate data tx for non-LSO, but only when not coalescing. We're 2440 * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes 2441 * of immediate data. 2442 */ 2443 #define IMM_LEN ( \ 2444 2 * EQ_ESIZE \ 2445 - sizeof (struct fw_eth_tx_pkt_wr) \ 2446 - sizeof (struct cpl_tx_pkt_core)) 2447 2448 /* 2449 * Returns non-zero on failure, no need to cleanup anything in that case. 2450 * 2451 * Note 1: We always try to pull up the mblk if required and return E2BIG only 2452 * if this fails. 2453 * 2454 * Note 2: We'll also pullup incoming mblk if HW_LSO is set and the first mblk 2455 * does not have the TCP header in it. 2456 */ 2457 static int 2458 get_frame_txinfo(struct sge_txq *txq, mblk_t **fp, struct txinfo *txinfo, 2459 int sgl_only) 2460 { 2461 uint32_t flags = 0, len, n; 2462 mblk_t *m = *fp; 2463 int rc; 2464 2465 TXQ_LOCK_ASSERT_OWNED(txq); /* will manipulate txb and dma_hdls */ 2466 2467 mac_hcksum_get(m, NULL, NULL, NULL, NULL, &flags); 2468 txinfo->flags = (flags & HCK_TX_FLAGS); 2469 2470 mac_lso_get(m, &txinfo->mss, &flags); 2471 txinfo->flags |= (flags & HW_LSO_FLAGS); 2472 2473 if (flags & HW_LSO) 2474 sgl_only = 1; /* Do not allow immediate data with LSO */ 2475 2476 /* 2477 * If checksum or segmentation offloads are requested, gather 2478 * information about the sizes and types of headers in the packet. 2479 */ 2480 if (txinfo->flags != 0) { 2481 /* 2482 * Even if this fails, the meoi_flags field will be capable of 2483 * communicating the lack of useful packet information. 2484 */ 2485 (void) mac_ether_offload_info(m, &txinfo->meoi); 2486 } else { 2487 bzero(&txinfo->meoi, sizeof (txinfo->meoi)); 2488 } 2489 2490 start: txinfo->nsegs = 0; 2491 txinfo->hdls_used = 0; 2492 txinfo->txb_used = 0; 2493 txinfo->len = 0; 2494 2495 /* total length and a rough estimate of # of segments */ 2496 n = 0; 2497 for (; m; m = m->b_cont) { 2498 len = MBLKL(m); 2499 n += (len / PAGE_SIZE) + 1; 2500 txinfo->len += len; 2501 } 2502 m = *fp; 2503 2504 if (n >= TX_SGL_SEGS || (flags & HW_LSO && MBLKL(m) < 50)) { 2505 txq->pullup_early++; 2506 m = msgpullup(*fp, -1); 2507 if (m == NULL) { 2508 txq->pullup_failed++; 2509 return (E2BIG); /* (*fp) left as it was */ 2510 } 2511 freemsg(*fp); 2512 *fp = m; 2513 mac_hcksum_set(m, 0, 0, 0, 0, txinfo->flags); 2514 } 2515 2516 if (txinfo->len <= IMM_LEN && !sgl_only) 2517 return (0); /* nsegs = 0 tells caller to use imm. tx */ 2518 2519 if (txinfo->len <= txq->copy_threshold && 2520 copy_into_txb(txq, m, txinfo->len, txinfo) == 0) 2521 goto done; 2522 2523 for (; m; m = m->b_cont) { 2524 2525 len = MBLKL(m); 2526 2527 /* Use tx copy buffer if this mblk is small enough */ 2528 if (len <= txq->copy_threshold && 2529 copy_into_txb(txq, m, len, txinfo) == 0) 2530 continue; 2531 2532 /* Add DMA bindings for this mblk to the SGL */ 2533 rc = add_mblk(txq, txinfo, m, len); 2534 2535 if (rc == E2BIG || 2536 (txinfo->nsegs == TX_SGL_SEGS && m->b_cont)) { 2537 2538 txq->pullup_late++; 2539 m = msgpullup(*fp, -1); 2540 if (m != NULL) { 2541 free_txinfo_resources(txq, txinfo); 2542 freemsg(*fp); 2543 *fp = m; 2544 mac_hcksum_set(m, 0, 0, 0, 0, txinfo->flags); 2545 goto start; 2546 } 2547 2548 txq->pullup_failed++; 2549 rc = E2BIG; 2550 } 2551 2552 if (rc != 0) { 2553 free_txinfo_resources(txq, txinfo); 2554 return (rc); 2555 } 2556 } 2557 2558 ASSERT(txinfo->nsegs > 0 && txinfo->nsegs <= TX_SGL_SEGS); 2559 2560 done: 2561 2562 /* 2563 * Store the # of flits required to hold this frame's SGL in nflits. An 2564 * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by 2565 * multiple (len0 + len1, addr0, addr1) tuples. If addr1 is not used 2566 * then len1 must be set to 0. 2567 */ 2568 n = txinfo->nsegs - 1; 2569 txinfo->nflits = (3 * n) / 2 + (n & 1) + 2; 2570 if (n & 1) 2571 txinfo->sgl.sge[n / 2].len[1] = cpu_to_be32(0); 2572 2573 txinfo->sgl.cmd_nsge = cpu_to_be32(V_ULPTX_CMD((u32)ULP_TX_SC_DSGL) | 2574 V_ULPTX_NSGE(txinfo->nsegs)); 2575 2576 return (0); 2577 } 2578 2579 static inline int 2580 fits_in_txb(struct sge_txq *txq, int len, int *waste) 2581 { 2582 if (txq->txb_avail < len) 2583 return (0); 2584 2585 if (txq->txb_next + len <= txq->txb_size) { 2586 *waste = 0; 2587 return (1); 2588 } 2589 2590 *waste = txq->txb_size - txq->txb_next; 2591 2592 return (txq->txb_avail - *waste < len ? 0 : 1); 2593 } 2594 2595 #define TXB_CHUNK 64 2596 2597 /* 2598 * Copies the specified # of bytes into txq's tx copy buffer and updates txinfo 2599 * and txq to indicate resources used. Caller has to make sure that those many 2600 * bytes are available in the mblk chain (b_cont linked). 2601 */ 2602 static inline int 2603 copy_into_txb(struct sge_txq *txq, mblk_t *m, int len, struct txinfo *txinfo) 2604 { 2605 int waste, n; 2606 2607 TXQ_LOCK_ASSERT_OWNED(txq); /* will manipulate txb */ 2608 2609 if (!fits_in_txb(txq, len, &waste)) { 2610 txq->txb_full++; 2611 return (ENOMEM); 2612 } 2613 2614 if (waste != 0) { 2615 ASSERT((waste & (TXB_CHUNK - 1)) == 0); 2616 txinfo->txb_used += waste; 2617 txq->txb_avail -= waste; 2618 txq->txb_next = 0; 2619 } 2620 2621 for (n = 0; n < len; m = m->b_cont) { 2622 bcopy(m->b_rptr, txq->txb_va + txq->txb_next + n, MBLKL(m)); 2623 n += MBLKL(m); 2624 } 2625 2626 add_seg(txinfo, txq->txb_ba + txq->txb_next, len); 2627 2628 n = roundup(len, TXB_CHUNK); 2629 txinfo->txb_used += n; 2630 txq->txb_avail -= n; 2631 txq->txb_next += n; 2632 ASSERT(txq->txb_next <= txq->txb_size); 2633 if (txq->txb_next == txq->txb_size) 2634 txq->txb_next = 0; 2635 2636 return (0); 2637 } 2638 2639 static inline void 2640 add_seg(struct txinfo *txinfo, uint64_t ba, uint32_t len) 2641 { 2642 ASSERT(txinfo->nsegs < TX_SGL_SEGS); /* must have room */ 2643 2644 if (txinfo->nsegs != 0) { 2645 int idx = txinfo->nsegs - 1; 2646 txinfo->sgl.sge[idx / 2].len[idx & 1] = cpu_to_be32(len); 2647 txinfo->sgl.sge[idx / 2].addr[idx & 1] = cpu_to_be64(ba); 2648 } else { 2649 txinfo->sgl.len0 = cpu_to_be32(len); 2650 txinfo->sgl.addr0 = cpu_to_be64(ba); 2651 } 2652 txinfo->nsegs++; 2653 } 2654 2655 /* 2656 * This function cleans up any partially allocated resources when it fails so 2657 * there's nothing for the caller to clean up in that case. 2658 * 2659 * EIO indicates permanent failure. Caller should drop the frame containing 2660 * this mblk and continue. 2661 * 2662 * E2BIG indicates that the SGL length for this mblk exceeds the hardware 2663 * limit. Caller should pull up the frame before trying to send it out. 2664 * (This error means our pullup_early heuristic did not work for this frame) 2665 * 2666 * ENOMEM indicates a temporary shortage of resources (DMA handles, other DMA 2667 * resources, etc.). Caller should suspend the tx queue and wait for reclaim to 2668 * free up resources. 2669 */ 2670 static inline int 2671 add_mblk(struct sge_txq *txq, struct txinfo *txinfo, mblk_t *m, int len) 2672 { 2673 ddi_dma_handle_t dhdl; 2674 ddi_dma_cookie_t cookie; 2675 uint_t ccount = 0; 2676 int rc; 2677 2678 TXQ_LOCK_ASSERT_OWNED(txq); /* will manipulate dhdls */ 2679 2680 if (txq->tx_dhdl_avail == 0) { 2681 txq->dma_hdl_failed++; 2682 return (ENOMEM); 2683 } 2684 2685 dhdl = txq->tx_dhdl[txq->tx_dhdl_pidx]; 2686 rc = ddi_dma_addr_bind_handle(dhdl, NULL, (caddr_t)m->b_rptr, len, 2687 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, NULL, &cookie, 2688 &ccount); 2689 if (rc != DDI_DMA_MAPPED) { 2690 txq->dma_map_failed++; 2691 2692 ASSERT(rc != DDI_DMA_INUSE && rc != DDI_DMA_PARTIAL_MAP); 2693 2694 return (rc == DDI_DMA_NORESOURCES ? ENOMEM : EIO); 2695 } 2696 2697 if (ccount + txinfo->nsegs > TX_SGL_SEGS) { 2698 (void) ddi_dma_unbind_handle(dhdl); 2699 return (E2BIG); 2700 } 2701 2702 add_seg(txinfo, cookie.dmac_laddress, cookie.dmac_size); 2703 while (--ccount) { 2704 ddi_dma_nextcookie(dhdl, &cookie); 2705 add_seg(txinfo, cookie.dmac_laddress, cookie.dmac_size); 2706 } 2707 2708 if (++txq->tx_dhdl_pidx == txq->tx_dhdl_total) 2709 txq->tx_dhdl_pidx = 0; 2710 txq->tx_dhdl_avail--; 2711 txinfo->hdls_used++; 2712 2713 return (0); 2714 } 2715 2716 /* 2717 * Releases all the txq resources used up in the specified txinfo. 2718 */ 2719 static void 2720 free_txinfo_resources(struct sge_txq *txq, struct txinfo *txinfo) 2721 { 2722 int n; 2723 2724 TXQ_LOCK_ASSERT_OWNED(txq); /* dhdls, txb */ 2725 2726 n = txinfo->txb_used; 2727 if (n > 0) { 2728 txq->txb_avail += n; 2729 if (n <= txq->txb_next) 2730 txq->txb_next -= n; 2731 else { 2732 n -= txq->txb_next; 2733 txq->txb_next = txq->txb_size - n; 2734 } 2735 } 2736 2737 for (n = txinfo->hdls_used; n > 0; n--) { 2738 if (txq->tx_dhdl_pidx > 0) 2739 txq->tx_dhdl_pidx--; 2740 else 2741 txq->tx_dhdl_pidx = txq->tx_dhdl_total - 1; 2742 txq->tx_dhdl_avail++; 2743 (void) ddi_dma_unbind_handle(txq->tx_dhdl[txq->tx_dhdl_pidx]); 2744 } 2745 } 2746 2747 /* 2748 * Returns 0 to indicate that m has been accepted into a coalesced tx work 2749 * request. It has either been folded into txpkts or txpkts was flushed and m 2750 * has started a new coalesced work request (as the first frame in a fresh 2751 * txpkts). 2752 * 2753 * Returns non-zero to indicate a failure - caller is responsible for 2754 * transmitting m, if there was anything in txpkts it has been flushed. 2755 */ 2756 static int 2757 add_to_txpkts(struct sge_txq *txq, struct txpkts *txpkts, mblk_t *m, 2758 struct txinfo *txinfo) 2759 { 2760 struct sge_eq *eq = &txq->eq; 2761 int can_coalesce; 2762 struct tx_sdesc *txsd; 2763 uint8_t flits; 2764 2765 TXQ_LOCK_ASSERT_OWNED(txq); 2766 2767 if (txpkts->npkt > 0) { 2768 flits = TXPKTS_PKT_HDR + txinfo->nflits; 2769 can_coalesce = (txinfo->flags & HW_LSO) == 0 && 2770 txpkts->nflits + flits <= TX_WR_FLITS && 2771 txpkts->nflits + flits <= eq->avail * 8 && 2772 txpkts->plen + txinfo->len < 65536; 2773 2774 if (can_coalesce != 0) { 2775 txpkts->tail->b_next = m; 2776 txpkts->tail = m; 2777 txpkts->npkt++; 2778 txpkts->nflits += flits; 2779 txpkts->plen += txinfo->len; 2780 2781 txsd = &txq->sdesc[eq->pidx]; 2782 txsd->txb_used += txinfo->txb_used; 2783 txsd->hdls_used += txinfo->hdls_used; 2784 2785 return (0); 2786 } 2787 2788 /* 2789 * Couldn't coalesce m into txpkts. The first order of business 2790 * is to send txpkts on its way. Then we'll revisit m. 2791 */ 2792 write_txpkts_wr(txq, txpkts); 2793 } 2794 2795 /* 2796 * Check if we can start a new coalesced tx work request with m as 2797 * the first packet in it. 2798 */ 2799 2800 ASSERT(txpkts->npkt == 0); 2801 ASSERT(txinfo->len < 65536); 2802 2803 flits = TXPKTS_WR_HDR + txinfo->nflits; 2804 can_coalesce = (txinfo->flags & HW_LSO) == 0 && 2805 flits <= eq->avail * 8 && flits <= TX_WR_FLITS; 2806 2807 if (can_coalesce == 0) 2808 return (EINVAL); 2809 2810 /* 2811 * Start a fresh coalesced tx WR with m as the first frame in it. 2812 */ 2813 txpkts->tail = m; 2814 txpkts->npkt = 1; 2815 txpkts->nflits = flits; 2816 txpkts->flitp = &eq->desc[eq->pidx].flit[2]; 2817 txpkts->plen = txinfo->len; 2818 2819 txsd = &txq->sdesc[eq->pidx]; 2820 txsd->m = m; 2821 txsd->txb_used = txinfo->txb_used; 2822 txsd->hdls_used = txinfo->hdls_used; 2823 2824 return (0); 2825 } 2826 2827 /* 2828 * Note that write_txpkts_wr can never run out of hardware descriptors (but 2829 * write_txpkt_wr can). add_to_txpkts ensures that a frame is accepted for 2830 * coalescing only if sufficient hardware descriptors are available. 2831 */ 2832 static void 2833 write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts) 2834 { 2835 struct sge_eq *eq = &txq->eq; 2836 struct fw_eth_tx_pkts_wr *wr; 2837 struct tx_sdesc *txsd; 2838 uint32_t ctrl; 2839 uint16_t ndesc; 2840 2841 TXQ_LOCK_ASSERT_OWNED(txq); /* pidx, avail */ 2842 2843 ndesc = howmany(txpkts->nflits, 8); 2844 2845 wr = (void *)&eq->desc[eq->pidx]; 2846 wr->op_pkd = cpu_to_be32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR) | 2847 V_FW_WR_IMMDLEN(0)); /* immdlen does not matter in this WR */ 2848 ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2)); 2849 if (eq->avail == ndesc) 2850 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; 2851 wr->equiq_to_len16 = cpu_to_be32(ctrl); 2852 wr->plen = cpu_to_be16(txpkts->plen); 2853 wr->npkt = txpkts->npkt; 2854 wr->r3 = wr->type = 0; 2855 2856 /* Everything else already written */ 2857 2858 txsd = &txq->sdesc[eq->pidx]; 2859 txsd->desc_used = ndesc; 2860 2861 txq->txb_used += txsd->txb_used / TXB_CHUNK; 2862 txq->hdl_used += txsd->hdls_used; 2863 2864 ASSERT(eq->avail >= ndesc); 2865 2866 eq->pending += ndesc; 2867 eq->avail -= ndesc; 2868 eq->pidx += ndesc; 2869 if (eq->pidx >= eq->cap) 2870 eq->pidx -= eq->cap; 2871 2872 txq->txpkts_pkts += txpkts->npkt; 2873 txq->txpkts_wrs++; 2874 txpkts->npkt = 0; /* emptied */ 2875 } 2876 2877 typedef enum { 2878 COS_SUCCESS, /* ctrl flit contains proper bits for csum offload */ 2879 COS_IGNORE, /* no csum offload requested */ 2880 COS_FAIL, /* csum offload requested, but pkt data missing */ 2881 } csum_offload_status_t; 2882 /* 2883 * Build a ctrl1 flit for checksum offload in CPL_TX_PKT_XT command 2884 */ 2885 static csum_offload_status_t 2886 csum_to_ctrl(const struct txinfo *txinfo, uint32_t chip_version, 2887 uint64_t *ctrlp) 2888 { 2889 const mac_ether_offload_info_t *meoi = &txinfo->meoi; 2890 const uint32_t tx_flags = txinfo->flags; 2891 const boolean_t needs_l3_csum = (tx_flags & HW_LSO) != 0 || 2892 (tx_flags & HCK_IPV4_HDRCKSUM) != 0; 2893 const boolean_t needs_l4_csum = (tx_flags & HW_LSO) != 0 || 2894 (tx_flags & (HCK_FULLCKSUM | HCK_PARTIALCKSUM)) != 0; 2895 2896 /* 2897 * Default to disabling any checksumming both for cases where it is not 2898 * requested, but also if we cannot appropriately interrogate the 2899 * required information from the packet. 2900 */ 2901 uint64_t ctrl = F_TXPKT_L4CSUM_DIS | F_TXPKT_IPCSUM_DIS; 2902 if (!needs_l3_csum && !needs_l4_csum) { 2903 *ctrlp = ctrl; 2904 return (COS_IGNORE); 2905 } 2906 2907 if (needs_l3_csum) { 2908 /* Only IPv4 checksums are supported (for L3) */ 2909 if ((meoi->meoi_flags & MEOI_L3INFO_SET) == 0 || 2910 meoi->meoi_l3proto != ETHERTYPE_IP) { 2911 *ctrlp = ctrl; 2912 return (COS_FAIL); 2913 } 2914 ctrl &= ~F_TXPKT_IPCSUM_DIS; 2915 } 2916 2917 if (needs_l4_csum) { 2918 /* 2919 * We need at least all of the L3 header to make decisions about 2920 * the contained L4 protocol. If not all of the L4 information 2921 * is present, we will leave it to the NIC to checksum all it is 2922 * able to. 2923 */ 2924 if ((meoi->meoi_flags & MEOI_L3INFO_SET) == 0) { 2925 *ctrlp = ctrl; 2926 return (COS_FAIL); 2927 } 2928 2929 /* 2930 * Since we are parsing the packet anyways, make the checksum 2931 * decision based on the L4 protocol, rather than using the 2932 * Generic TCP/UDP checksum using start & end offsets in the 2933 * packet (like requested with PARTIALCKSUM). 2934 */ 2935 int csum_type = -1; 2936 if (meoi->meoi_l3proto == ETHERTYPE_IP && 2937 meoi->meoi_l4proto == IPPROTO_TCP) { 2938 csum_type = TX_CSUM_TCPIP; 2939 } else if (meoi->meoi_l3proto == ETHERTYPE_IPV6 && 2940 meoi->meoi_l4proto == IPPROTO_TCP) { 2941 csum_type = TX_CSUM_TCPIP6; 2942 } else if (meoi->meoi_l3proto == ETHERTYPE_IP && 2943 meoi->meoi_l4proto == IPPROTO_UDP) { 2944 csum_type = TX_CSUM_UDPIP; 2945 } else if (meoi->meoi_l3proto == ETHERTYPE_IPV6 && 2946 meoi->meoi_l4proto == IPPROTO_UDP) { 2947 csum_type = TX_CSUM_UDPIP6; 2948 } else { 2949 *ctrlp = ctrl; 2950 return (COS_FAIL); 2951 } 2952 2953 ASSERT(csum_type != -1); 2954 ctrl &= ~F_TXPKT_L4CSUM_DIS; 2955 ctrl |= V_TXPKT_CSUM_TYPE(csum_type); 2956 } 2957 2958 if ((ctrl & F_TXPKT_IPCSUM_DIS) == 0 && 2959 (ctrl & F_TXPKT_L4CSUM_DIS) != 0) { 2960 /* 2961 * If only the IPv4 checksum is requested, we need to set an 2962 * appropriate type in the command for it. 2963 */ 2964 ctrl |= V_TXPKT_CSUM_TYPE(TX_CSUM_IP); 2965 } 2966 2967 ASSERT(ctrl != (F_TXPKT_L4CSUM_DIS | F_TXPKT_IPCSUM_DIS)); 2968 2969 /* 2970 * Fill in the requisite L2/L3 header length data. 2971 * 2972 * The Ethernet header length is recorded as 'size - 14 bytes' 2973 */ 2974 const uint8_t eth_len = meoi->meoi_l2hlen - 14; 2975 if (chip_version >= CHELSIO_T6) { 2976 ctrl |= V_T6_TXPKT_ETHHDR_LEN(eth_len); 2977 } else { 2978 ctrl |= V_TXPKT_ETHHDR_LEN(eth_len); 2979 } 2980 ctrl |= V_TXPKT_IPHDR_LEN(meoi->meoi_l3hlen); 2981 2982 *ctrlp = ctrl; 2983 return (COS_SUCCESS); 2984 } 2985 2986 static int 2987 write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, mblk_t *m, 2988 struct txinfo *txinfo) 2989 { 2990 struct sge_eq *eq = &txq->eq; 2991 struct fw_eth_tx_pkt_wr *wr; 2992 struct cpl_tx_pkt_core *cpl; 2993 uint32_t ctrl; /* used in many unrelated places */ 2994 uint64_t ctrl1; 2995 int nflits, ndesc; 2996 struct tx_sdesc *txsd; 2997 caddr_t dst; 2998 const mac_ether_offload_info_t *meoi = &txinfo->meoi; 2999 3000 TXQ_LOCK_ASSERT_OWNED(txq); /* pidx, avail */ 3001 3002 /* 3003 * Do we have enough flits to send this frame out? 3004 */ 3005 ctrl = sizeof (struct cpl_tx_pkt_core); 3006 if (txinfo->flags & HW_LSO) { 3007 nflits = TXPKT_LSO_WR_HDR; 3008 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 3009 } else 3010 nflits = TXPKT_WR_HDR; 3011 if (txinfo->nsegs > 0) 3012 nflits += txinfo->nflits; 3013 else { 3014 nflits += howmany(txinfo->len, 8); 3015 ctrl += txinfo->len; 3016 } 3017 ndesc = howmany(nflits, 8); 3018 if (ndesc > eq->avail) 3019 return (ENOMEM); 3020 3021 /* Firmware work request header */ 3022 wr = (void *)&eq->desc[eq->pidx]; 3023 wr->op_immdlen = cpu_to_be32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | 3024 V_FW_WR_IMMDLEN(ctrl)); 3025 ctrl = V_FW_WR_LEN16(howmany(nflits, 2)); 3026 if (eq->avail == ndesc) 3027 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; 3028 wr->equiq_to_len16 = cpu_to_be32(ctrl); 3029 wr->r3 = 0; 3030 3031 if (txinfo->flags & HW_LSO && 3032 (meoi->meoi_flags & MEOI_L4INFO_SET) != 0 && 3033 meoi->meoi_l4proto == IPPROTO_TCP) { 3034 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); 3035 3036 ctrl = V_LSO_OPCODE((u32)CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | 3037 F_LSO_LAST_SLICE; 3038 3039 if (meoi->meoi_l2hlen > sizeof (struct ether_header)) { 3040 /* 3041 * This presently assumes a standard VLAN header, 3042 * without support for Q-in-Q. 3043 */ 3044 ctrl |= V_LSO_ETHHDR_LEN(1); 3045 } 3046 3047 switch (meoi->meoi_l3proto) { 3048 case ETHERTYPE_IPV6: 3049 ctrl |= F_LSO_IPV6; 3050 /* FALLTHROUGH */ 3051 case ETHERTYPE_IP: 3052 ctrl |= V_LSO_IPHDR_LEN(meoi->meoi_l3hlen / 4); 3053 break; 3054 default: 3055 break; 3056 } 3057 3058 ctrl |= V_LSO_TCPHDR_LEN(meoi->meoi_l4hlen / 4); 3059 3060 lso->lso_ctrl = cpu_to_be32(ctrl); 3061 lso->ipid_ofst = cpu_to_be16(0); 3062 lso->mss = cpu_to_be16(txinfo->mss); 3063 lso->seqno_offset = cpu_to_be32(0); 3064 if (is_t4(pi->adapter->params.chip)) 3065 lso->len = cpu_to_be32(txinfo->len); 3066 else 3067 lso->len = cpu_to_be32(V_LSO_T5_XFER_SIZE(txinfo->len)); 3068 3069 cpl = (void *)(lso + 1); 3070 3071 txq->tso_wrs++; 3072 } else { 3073 cpl = (void *)(wr + 1); 3074 } 3075 3076 /* Checksum offload */ 3077 switch (csum_to_ctrl(txinfo, 3078 CHELSIO_CHIP_VERSION(pi->adapter->params.chip), &ctrl1)) { 3079 case COS_SUCCESS: 3080 txq->txcsum++; 3081 break; 3082 case COS_FAIL: 3083 /* 3084 * Packet will be going out with checksums which are probably 3085 * wrong but there is little we can do now. 3086 */ 3087 txq->csum_failed++; 3088 break; 3089 default: 3090 break; 3091 } 3092 3093 /* CPL header */ 3094 cpl->ctrl0 = cpu_to_be32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 3095 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf)); 3096 cpl->pack = 0; 3097 cpl->len = cpu_to_be16(txinfo->len); 3098 cpl->ctrl1 = cpu_to_be64(ctrl1); 3099 3100 /* Software descriptor */ 3101 txsd = &txq->sdesc[eq->pidx]; 3102 txsd->m = m; 3103 txsd->txb_used = txinfo->txb_used; 3104 txsd->hdls_used = txinfo->hdls_used; 3105 /* LINTED: E_ASSIGN_NARROW_CONV */ 3106 txsd->desc_used = ndesc; 3107 3108 txq->txb_used += txinfo->txb_used / TXB_CHUNK; 3109 txq->hdl_used += txinfo->hdls_used; 3110 3111 eq->pending += ndesc; 3112 eq->avail -= ndesc; 3113 eq->pidx += ndesc; 3114 if (eq->pidx >= eq->cap) 3115 eq->pidx -= eq->cap; 3116 3117 /* SGL */ 3118 dst = (void *)(cpl + 1); 3119 if (txinfo->nsegs > 0) { 3120 txq->sgl_wrs++; 3121 copy_to_txd(eq, (void *)&txinfo->sgl, &dst, txinfo->nflits * 8); 3122 3123 /* Need to zero-pad to a 16 byte boundary if not on one */ 3124 if ((uintptr_t)dst & 0xf) 3125 /* LINTED: E_BAD_PTR_CAST_ALIGN */ 3126 *(uint64_t *)dst = 0; 3127 3128 } else { 3129 txq->imm_wrs++; 3130 #ifdef DEBUG 3131 ctrl = txinfo->len; 3132 #endif 3133 for (; m; m = m->b_cont) { 3134 copy_to_txd(eq, (void *)m->b_rptr, &dst, MBLKL(m)); 3135 #ifdef DEBUG 3136 ctrl -= MBLKL(m); 3137 #endif 3138 } 3139 ASSERT(ctrl == 0); 3140 } 3141 3142 txq->txpkt_wrs++; 3143 return (0); 3144 } 3145 3146 static inline void 3147 write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq, 3148 struct txpkts *txpkts, struct txinfo *txinfo) 3149 { 3150 struct ulp_txpkt *ulpmc; 3151 struct ulptx_idata *ulpsc; 3152 struct cpl_tx_pkt_core *cpl; 3153 uintptr_t flitp, start, end; 3154 uint64_t ctrl; 3155 caddr_t dst; 3156 3157 ASSERT(txpkts->npkt > 0); 3158 3159 start = (uintptr_t)txq->eq.desc; 3160 end = (uintptr_t)txq->eq.spg; 3161 3162 /* Checksum offload */ 3163 switch (csum_to_ctrl(txinfo, 3164 CHELSIO_CHIP_VERSION(pi->adapter->params.chip), &ctrl)) { 3165 case COS_SUCCESS: 3166 txq->txcsum++; 3167 break; 3168 case COS_FAIL: 3169 /* 3170 * Packet will be going out with checksums which are probably 3171 * wrong but there is little we can do now. 3172 */ 3173 txq->csum_failed++; 3174 break; 3175 default: 3176 break; 3177 } 3178 3179 /* 3180 * The previous packet's SGL must have ended at a 16 byte boundary (this 3181 * is required by the firmware/hardware). It follows that flitp cannot 3182 * wrap around between the ULPTX master command and ULPTX subcommand (8 3183 * bytes each), and that it can not wrap around in the middle of the 3184 * cpl_tx_pkt_core either. 3185 */ 3186 flitp = (uintptr_t)txpkts->flitp; 3187 ASSERT((flitp & 0xf) == 0); 3188 3189 /* ULP master command */ 3190 ulpmc = (void *)flitp; 3191 ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0)); 3192 ulpmc->len = htonl(howmany(sizeof (*ulpmc) + sizeof (*ulpsc) + 3193 sizeof (*cpl) + 8 * txinfo->nflits, 16)); 3194 3195 /* ULP subcommand */ 3196 ulpsc = (void *)(ulpmc + 1); 3197 ulpsc->cmd_more = cpu_to_be32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) | 3198 F_ULP_TX_SC_MORE); 3199 ulpsc->len = cpu_to_be32(sizeof (struct cpl_tx_pkt_core)); 3200 3201 flitp += sizeof (*ulpmc) + sizeof (*ulpsc); 3202 if (flitp == end) 3203 flitp = start; 3204 3205 /* CPL_TX_PKT_XT */ 3206 cpl = (void *)flitp; 3207 cpl->ctrl0 = cpu_to_be32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 3208 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf)); 3209 cpl->pack = 0; 3210 cpl->len = cpu_to_be16(txinfo->len); 3211 cpl->ctrl1 = cpu_to_be64(ctrl); 3212 3213 flitp += sizeof (*cpl); 3214 if (flitp == end) 3215 flitp = start; 3216 3217 /* SGL for this frame */ 3218 dst = (caddr_t)flitp; 3219 copy_to_txd(&txq->eq, (void *)&txinfo->sgl, &dst, txinfo->nflits * 8); 3220 flitp = (uintptr_t)dst; 3221 3222 /* Zero pad and advance to a 16 byte boundary if not already at one. */ 3223 if (flitp & 0xf) { 3224 3225 /* no matter what, flitp should be on an 8 byte boundary */ 3226 ASSERT((flitp & 0x7) == 0); 3227 3228 *(uint64_t *)flitp = 0; 3229 flitp += sizeof (uint64_t); 3230 txpkts->nflits++; 3231 } 3232 3233 if (flitp == end) 3234 flitp = start; 3235 3236 txpkts->flitp = (void *)flitp; 3237 } 3238 3239 static inline void 3240 copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) 3241 { 3242 if ((uintptr_t)(*to) + len <= (uintptr_t)eq->spg) { 3243 bcopy(from, *to, len); 3244 (*to) += len; 3245 } else { 3246 int portion = (uintptr_t)eq->spg - (uintptr_t)(*to); 3247 3248 bcopy(from, *to, portion); 3249 from += portion; 3250 portion = len - portion; /* remaining */ 3251 bcopy(from, (void *)eq->desc, portion); 3252 (*to) = (caddr_t)eq->desc + portion; 3253 } 3254 } 3255 3256 static inline void 3257 ring_tx_db(struct adapter *sc, struct sge_eq *eq) 3258 { 3259 int val, db_mode; 3260 u_int db = eq->doorbells; 3261 3262 if (eq->pending > 1) 3263 db &= ~DOORBELL_WCWR; 3264 3265 if (eq->pending > eq->pidx) { 3266 int offset = eq->cap - (eq->pending - eq->pidx); 3267 3268 /* pidx has wrapped around since last doorbell */ 3269 3270 (void) ddi_dma_sync(eq->desc_dhdl, 3271 offset * sizeof (struct tx_desc), 0, 3272 DDI_DMA_SYNC_FORDEV); 3273 (void) ddi_dma_sync(eq->desc_dhdl, 3274 0, eq->pidx * sizeof (struct tx_desc), 3275 DDI_DMA_SYNC_FORDEV); 3276 } else if (eq->pending > 0) { 3277 (void) ddi_dma_sync(eq->desc_dhdl, 3278 (eq->pidx - eq->pending) * sizeof (struct tx_desc), 3279 eq->pending * sizeof (struct tx_desc), 3280 DDI_DMA_SYNC_FORDEV); 3281 } 3282 3283 membar_producer(); 3284 3285 if (is_t4(sc->params.chip)) 3286 val = V_PIDX(eq->pending); 3287 else 3288 val = V_PIDX_T5(eq->pending); 3289 3290 db_mode = (1 << (ffs(db) - 1)); 3291 switch (db_mode) { 3292 case DOORBELL_UDB: 3293 *eq->udb = LE_32(V_QID(eq->udb_qid) | val); 3294 break; 3295 3296 case DOORBELL_WCWR: 3297 { 3298 volatile uint64_t *dst, *src; 3299 int i; 3300 /* 3301 * Queues whose 128B doorbell segment fits in 3302 * the page do not use relative qid 3303 * (udb_qid is always 0). Only queues with 3304 * doorbell segments can do WCWR. 3305 */ 3306 ASSERT(eq->udb_qid == 0 && eq->pending == 1); 3307 3308 dst = (volatile void *)((uintptr_t)eq->udb + 3309 UDBS_WR_OFFSET - UDBS_DB_OFFSET); 3310 i = eq->pidx ? eq->pidx - 1 : eq->cap - 1; 3311 src = (void *)&eq->desc[i]; 3312 while (src != (void *)&eq->desc[i + 1]) 3313 *dst++ = *src++; 3314 membar_producer(); 3315 break; 3316 } 3317 3318 case DOORBELL_UDBWC: 3319 *eq->udb = LE_32(V_QID(eq->udb_qid) | val); 3320 membar_producer(); 3321 break; 3322 3323 case DOORBELL_KDB: 3324 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), 3325 V_QID(eq->cntxt_id) | val); 3326 break; 3327 } 3328 3329 eq->pending = 0; 3330 } 3331 3332 static int 3333 reclaim_tx_descs(struct sge_txq *txq, int howmany) 3334 { 3335 struct tx_sdesc *txsd; 3336 uint_t cidx, can_reclaim, reclaimed, txb_freed, hdls_freed; 3337 struct sge_eq *eq = &txq->eq; 3338 3339 EQ_LOCK_ASSERT_OWNED(eq); 3340 3341 cidx = eq->spg->cidx; /* stable snapshot */ 3342 cidx = be16_to_cpu(cidx); 3343 3344 if (cidx >= eq->cidx) 3345 can_reclaim = cidx - eq->cidx; 3346 else 3347 can_reclaim = cidx + eq->cap - eq->cidx; 3348 3349 if (can_reclaim == 0) 3350 return (0); 3351 3352 txb_freed = hdls_freed = reclaimed = 0; 3353 do { 3354 int ndesc; 3355 3356 txsd = &txq->sdesc[eq->cidx]; 3357 ndesc = txsd->desc_used; 3358 3359 /* Firmware doesn't return "partial" credits. */ 3360 ASSERT(can_reclaim >= ndesc); 3361 3362 /* 3363 * We always keep mblk around, even for immediate data. If mblk 3364 * is NULL, this has to be the software descriptor for a credit 3365 * flush work request. 3366 */ 3367 if (txsd->m != NULL) 3368 freemsgchain(txsd->m); 3369 #ifdef DEBUG 3370 else { 3371 ASSERT(txsd->txb_used == 0); 3372 ASSERT(txsd->hdls_used == 0); 3373 ASSERT(ndesc == 1); 3374 } 3375 #endif 3376 3377 txb_freed += txsd->txb_used; 3378 hdls_freed += txsd->hdls_used; 3379 reclaimed += ndesc; 3380 3381 eq->cidx += ndesc; 3382 if (eq->cidx >= eq->cap) 3383 eq->cidx -= eq->cap; 3384 3385 can_reclaim -= ndesc; 3386 3387 } while (can_reclaim && reclaimed < howmany); 3388 3389 eq->avail += reclaimed; 3390 ASSERT(eq->avail < eq->cap); /* avail tops out at (cap - 1) */ 3391 3392 txq->txb_avail += txb_freed; 3393 3394 txq->tx_dhdl_avail += hdls_freed; 3395 ASSERT(txq->tx_dhdl_avail <= txq->tx_dhdl_total); 3396 for (; hdls_freed; hdls_freed--) { 3397 (void) ddi_dma_unbind_handle(txq->tx_dhdl[txq->tx_dhdl_cidx]); 3398 if (++txq->tx_dhdl_cidx == txq->tx_dhdl_total) 3399 txq->tx_dhdl_cidx = 0; 3400 } 3401 3402 return (reclaimed); 3403 } 3404 3405 static void 3406 write_txqflush_wr(struct sge_txq *txq) 3407 { 3408 struct sge_eq *eq = &txq->eq; 3409 struct fw_eq_flush_wr *wr; 3410 struct tx_sdesc *txsd; 3411 3412 EQ_LOCK_ASSERT_OWNED(eq); 3413 ASSERT(eq->avail > 0); 3414 3415 wr = (void *)&eq->desc[eq->pidx]; 3416 bzero(wr, sizeof (*wr)); 3417 wr->opcode = FW_EQ_FLUSH_WR; 3418 wr->equiq_to_len16 = cpu_to_be32(V_FW_WR_LEN16(sizeof (*wr) / 16) | 3419 F_FW_WR_EQUEQ | F_FW_WR_EQUIQ); 3420 3421 txsd = &txq->sdesc[eq->pidx]; 3422 txsd->m = NULL; 3423 txsd->txb_used = 0; 3424 txsd->hdls_used = 0; 3425 txsd->desc_used = 1; 3426 3427 eq->pending++; 3428 eq->avail--; 3429 if (++eq->pidx == eq->cap) 3430 eq->pidx = 0; 3431 } 3432 3433 static int 3434 t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, mblk_t *m) 3435 { 3436 bool csum_ok; 3437 uint16_t err_vec; 3438 struct sge_rxq *rxq = (void *)iq; 3439 struct mblk_pair chain = {0}; 3440 struct adapter *sc = iq->adapter; 3441 const struct cpl_rx_pkt *cpl = (const void *)(rss + 1); 3442 3443 iq->intr_next = iq->intr_params; 3444 3445 m->b_rptr += sc->sge.pktshift; 3446 3447 /* Compressed error vector is enabled for T6 only */ 3448 if (sc->params.tp.rx_pkt_encap) 3449 /* It is enabled only in T6 config file */ 3450 err_vec = G_T6_COMPR_RXERR_VEC(ntohs(cpl->err_vec)); 3451 else 3452 err_vec = ntohs(cpl->err_vec); 3453 3454 csum_ok = cpl->csum_calc && !err_vec; 3455 /* TODO: what about cpl->ip_frag? */ 3456 if (csum_ok && !cpl->ip_frag) { 3457 mac_hcksum_set(m, 0, 0, 0, 0xffff, 3458 HCK_FULLCKSUM_OK | HCK_FULLCKSUM | 3459 HCK_IPV4_HDRCKSUM_OK); 3460 rxq->rxcsum++; 3461 } 3462 3463 /* Add to the chain that we'll send up */ 3464 if (chain.head != NULL) 3465 chain.tail->b_next = m; 3466 else 3467 chain.head = m; 3468 chain.tail = m; 3469 3470 t4_mac_rx(rxq->port, rxq, chain.head); 3471 3472 rxq->rxpkts++; 3473 rxq->rxbytes += be16_to_cpu(cpl->len); 3474 return (0); 3475 } 3476 3477 #define FL_HW_IDX(idx) ((idx) >> 3) 3478 3479 static inline void 3480 ring_fl_db(struct adapter *sc, struct sge_fl *fl) 3481 { 3482 int desc_start, desc_last, ndesc; 3483 uint32_t v = sc->params.arch.sge_fl_db ; 3484 3485 ndesc = FL_HW_IDX(fl->pending); 3486 3487 /* Hold back one credit if pidx = cidx */ 3488 if (FL_HW_IDX(fl->pidx) == FL_HW_IDX(fl->cidx)) 3489 ndesc--; 3490 3491 /* 3492 * There are chances of ndesc modified above (to avoid pidx = cidx). 3493 * If there is nothing to post, return. 3494 */ 3495 if (ndesc <= 0) 3496 return; 3497 3498 desc_last = FL_HW_IDX(fl->pidx); 3499 3500 if (fl->pidx < fl->pending) { 3501 /* There was a wrap */ 3502 desc_start = FL_HW_IDX(fl->pidx + fl->cap - fl->pending); 3503 3504 /* From desc_start to the end of list */ 3505 (void) ddi_dma_sync(fl->dhdl, desc_start * RX_FL_ESIZE, 0, 3506 DDI_DMA_SYNC_FORDEV); 3507 3508 /* From start of list to the desc_last */ 3509 if (desc_last != 0) 3510 (void) ddi_dma_sync(fl->dhdl, 0, desc_last * 3511 RX_FL_ESIZE, DDI_DMA_SYNC_FORDEV); 3512 } else { 3513 /* There was no wrap, sync from start_desc to last_desc */ 3514 desc_start = FL_HW_IDX(fl->pidx - fl->pending); 3515 (void) ddi_dma_sync(fl->dhdl, desc_start * RX_FL_ESIZE, 3516 ndesc * RX_FL_ESIZE, DDI_DMA_SYNC_FORDEV); 3517 } 3518 3519 if (is_t4(sc->params.chip)) 3520 v |= V_PIDX(ndesc); 3521 else 3522 v |= V_PIDX_T5(ndesc); 3523 v |= V_QID(fl->cntxt_id) | V_PIDX(ndesc); 3524 3525 membar_producer(); 3526 3527 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), v); 3528 3529 /* 3530 * Update pending count: 3531 * Deduct the number of descriptors posted 3532 */ 3533 fl->pending -= ndesc * 8; 3534 } 3535 3536 static void 3537 tx_reclaim_task(void *arg) 3538 { 3539 struct sge_txq *txq = arg; 3540 3541 TXQ_LOCK(txq); 3542 reclaim_tx_descs(txq, txq->eq.qsize); 3543 TXQ_UNLOCK(txq); 3544 } 3545 3546 /* ARGSUSED */ 3547 static int 3548 handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss, 3549 mblk_t *m) 3550 { 3551 const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1); 3552 unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); 3553 struct adapter *sc = iq->adapter; 3554 struct sge *s = &sc->sge; 3555 struct sge_eq *eq; 3556 struct sge_txq *txq; 3557 3558 txq = (void *)s->eqmap[qid - s->eq_start]; 3559 eq = &txq->eq; 3560 txq->qflush++; 3561 t4_mac_tx_update(txq->port, txq); 3562 3563 ddi_taskq_dispatch(sc->tq[eq->tx_chan], tx_reclaim_task, 3564 (void *)txq, DDI_NOSLEEP); 3565 3566 return (0); 3567 } 3568 3569 static int 3570 handle_fw_rpl(struct sge_iq *iq, const struct rss_header *rss, mblk_t *m) 3571 { 3572 struct adapter *sc = iq->adapter; 3573 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); 3574 3575 ASSERT(m == NULL); 3576 3577 if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) { 3578 const struct rss_header *rss2; 3579 3580 rss2 = (const struct rss_header *)&cpl->data[0]; 3581 return (sc->cpl_handler[rss2->opcode](iq, rss2, m)); 3582 } 3583 return (sc->fw_msg_handler[cpl->type](sc, &cpl->data[0])); 3584 } 3585 3586 int 3587 t4_alloc_tx_maps(struct adapter *sc, struct tx_maps *txmaps, int count, 3588 int flags) 3589 { 3590 int i, rc; 3591 3592 txmaps->map_total = count; 3593 txmaps->map_avail = txmaps->map_cidx = txmaps->map_pidx = 0; 3594 3595 txmaps->map = kmem_zalloc(sizeof (ddi_dma_handle_t) * 3596 txmaps->map_total, flags); 3597 3598 for (i = 0; i < count; i++) { 3599 rc = ddi_dma_alloc_handle(sc->dip, &sc->sge.dma_attr_tx, 3600 DDI_DMA_SLEEP, 0, &txmaps->map[i]); 3601 if (rc != DDI_SUCCESS) { 3602 cxgb_printf(sc->dip, CE_WARN, 3603 "%s: failed to allocate DMA handle (%d)", 3604 __func__, rc); 3605 return (rc == DDI_DMA_NORESOURCES ? ENOMEM : EINVAL); 3606 } 3607 txmaps->map_avail++; 3608 } 3609 3610 return (0); 3611 } 3612 3613 #define KS_UINIT(x) kstat_named_init(&kstatp->x, #x, KSTAT_DATA_ULONG) 3614 #define KS_CINIT(x) kstat_named_init(&kstatp->x, #x, KSTAT_DATA_CHAR) 3615 #define KS_U_SET(x, y) kstatp->x.value.ul = (y) 3616 #define KS_U_FROM(x, y) kstatp->x.value.ul = (y)->x 3617 #define KS_C_SET(x, ...) \ 3618 (void) snprintf(kstatp->x.value.c, 16, __VA_ARGS__) 3619 3620 /* 3621 * cxgbe:X:config 3622 */ 3623 struct cxgbe_port_config_kstats { 3624 kstat_named_t idx; 3625 kstat_named_t nrxq; 3626 kstat_named_t ntxq; 3627 kstat_named_t first_rxq; 3628 kstat_named_t first_txq; 3629 kstat_named_t controller; 3630 kstat_named_t factory_mac_address; 3631 }; 3632 3633 /* 3634 * cxgbe:X:info 3635 */ 3636 struct cxgbe_port_info_kstats { 3637 kstat_named_t transceiver; 3638 kstat_named_t rx_ovflow0; 3639 kstat_named_t rx_ovflow1; 3640 kstat_named_t rx_ovflow2; 3641 kstat_named_t rx_ovflow3; 3642 kstat_named_t rx_trunc0; 3643 kstat_named_t rx_trunc1; 3644 kstat_named_t rx_trunc2; 3645 kstat_named_t rx_trunc3; 3646 kstat_named_t tx_pause; 3647 kstat_named_t rx_pause; 3648 }; 3649 3650 static kstat_t * 3651 setup_port_config_kstats(struct port_info *pi) 3652 { 3653 kstat_t *ksp; 3654 struct cxgbe_port_config_kstats *kstatp; 3655 int ndata; 3656 dev_info_t *pdip = ddi_get_parent(pi->dip); 3657 uint8_t *ma = &pi->hw_addr[0]; 3658 3659 ndata = sizeof (struct cxgbe_port_config_kstats) / 3660 sizeof (kstat_named_t); 3661 3662 ksp = kstat_create(T4_PORT_NAME, ddi_get_instance(pi->dip), "config", 3663 "net", KSTAT_TYPE_NAMED, ndata, 0); 3664 if (ksp == NULL) { 3665 cxgb_printf(pi->dip, CE_WARN, "failed to initialize kstats."); 3666 return (NULL); 3667 } 3668 3669 kstatp = (struct cxgbe_port_config_kstats *)ksp->ks_data; 3670 3671 KS_UINIT(idx); 3672 KS_UINIT(nrxq); 3673 KS_UINIT(ntxq); 3674 KS_UINIT(first_rxq); 3675 KS_UINIT(first_txq); 3676 KS_CINIT(controller); 3677 KS_CINIT(factory_mac_address); 3678 3679 KS_U_SET(idx, pi->port_id); 3680 KS_U_SET(nrxq, pi->nrxq); 3681 KS_U_SET(ntxq, pi->ntxq); 3682 KS_U_SET(first_rxq, pi->first_rxq); 3683 KS_U_SET(first_txq, pi->first_txq); 3684 KS_C_SET(controller, "%s%d", ddi_driver_name(pdip), 3685 ddi_get_instance(pdip)); 3686 KS_C_SET(factory_mac_address, "%02X%02X%02X%02X%02X%02X", 3687 ma[0], ma[1], ma[2], ma[3], ma[4], ma[5]); 3688 3689 /* Do NOT set ksp->ks_update. These kstats do not change. */ 3690 3691 /* Install the kstat */ 3692 ksp->ks_private = (void *)pi; 3693 kstat_install(ksp); 3694 3695 return (ksp); 3696 } 3697 3698 static kstat_t * 3699 setup_port_info_kstats(struct port_info *pi) 3700 { 3701 kstat_t *ksp; 3702 struct cxgbe_port_info_kstats *kstatp; 3703 int ndata; 3704 3705 ndata = sizeof (struct cxgbe_port_info_kstats) / sizeof (kstat_named_t); 3706 3707 ksp = kstat_create(T4_PORT_NAME, ddi_get_instance(pi->dip), "info", 3708 "net", KSTAT_TYPE_NAMED, ndata, 0); 3709 if (ksp == NULL) { 3710 cxgb_printf(pi->dip, CE_WARN, "failed to initialize kstats."); 3711 return (NULL); 3712 } 3713 3714 kstatp = (struct cxgbe_port_info_kstats *)ksp->ks_data; 3715 3716 KS_CINIT(transceiver); 3717 KS_UINIT(rx_ovflow0); 3718 KS_UINIT(rx_ovflow1); 3719 KS_UINIT(rx_ovflow2); 3720 KS_UINIT(rx_ovflow3); 3721 KS_UINIT(rx_trunc0); 3722 KS_UINIT(rx_trunc1); 3723 KS_UINIT(rx_trunc2); 3724 KS_UINIT(rx_trunc3); 3725 KS_UINIT(tx_pause); 3726 KS_UINIT(rx_pause); 3727 3728 /* Install the kstat */ 3729 ksp->ks_update = update_port_info_kstats; 3730 ksp->ks_private = (void *)pi; 3731 kstat_install(ksp); 3732 3733 return (ksp); 3734 } 3735 3736 static int 3737 update_port_info_kstats(kstat_t *ksp, int rw) 3738 { 3739 struct cxgbe_port_info_kstats *kstatp = 3740 (struct cxgbe_port_info_kstats *)ksp->ks_data; 3741 struct port_info *pi = ksp->ks_private; 3742 static const char *mod_str[] = { NULL, "LR", "SR", "ER", "TWINAX", 3743 "active TWINAX", "LRM" }; 3744 uint32_t bgmap; 3745 3746 if (rw == KSTAT_WRITE) 3747 return (0); 3748 3749 if (pi->mod_type == FW_PORT_MOD_TYPE_NONE) 3750 KS_C_SET(transceiver, "unplugged"); 3751 else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN) 3752 KS_C_SET(transceiver, "unknown"); 3753 else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED) 3754 KS_C_SET(transceiver, "unsupported"); 3755 else if (pi->mod_type > 0 && pi->mod_type < ARRAY_SIZE(mod_str)) 3756 KS_C_SET(transceiver, "%s", mod_str[pi->mod_type]); 3757 else 3758 KS_C_SET(transceiver, "type %d", pi->mod_type); 3759 3760 #define GET_STAT(name) t4_read_reg64(pi->adapter, \ 3761 PORT_REG(pi->port_id, A_MPS_PORT_STAT_##name##_L)) 3762 #define GET_STAT_COM(name) t4_read_reg64(pi->adapter, \ 3763 A_MPS_STAT_##name##_L) 3764 3765 bgmap = G_NUMPORTS(t4_read_reg(pi->adapter, A_MPS_CMN_CTL)); 3766 if (bgmap == 0) 3767 bgmap = (pi->port_id == 0) ? 0xf : 0; 3768 else if (bgmap == 1) 3769 bgmap = (pi->port_id < 2) ? (3 << (2 * pi->port_id)) : 0; 3770 else 3771 bgmap = 1; 3772 3773 KS_U_SET(rx_ovflow0, (bgmap & 1) ? 3774 GET_STAT_COM(RX_BG_0_MAC_DROP_FRAME) : 0); 3775 KS_U_SET(rx_ovflow1, (bgmap & 2) ? 3776 GET_STAT_COM(RX_BG_1_MAC_DROP_FRAME) : 0); 3777 KS_U_SET(rx_ovflow2, (bgmap & 4) ? 3778 GET_STAT_COM(RX_BG_2_MAC_DROP_FRAME) : 0); 3779 KS_U_SET(rx_ovflow3, (bgmap & 8) ? 3780 GET_STAT_COM(RX_BG_3_MAC_DROP_FRAME) : 0); 3781 KS_U_SET(rx_trunc0, (bgmap & 1) ? 3782 GET_STAT_COM(RX_BG_0_MAC_TRUNC_FRAME) : 0); 3783 KS_U_SET(rx_trunc1, (bgmap & 2) ? 3784 GET_STAT_COM(RX_BG_1_MAC_TRUNC_FRAME) : 0); 3785 KS_U_SET(rx_trunc2, (bgmap & 4) ? 3786 GET_STAT_COM(RX_BG_2_MAC_TRUNC_FRAME) : 0); 3787 KS_U_SET(rx_trunc3, (bgmap & 8) ? 3788 GET_STAT_COM(RX_BG_3_MAC_TRUNC_FRAME) : 0); 3789 3790 KS_U_SET(tx_pause, GET_STAT(TX_PORT_PAUSE)); 3791 KS_U_SET(rx_pause, GET_STAT(RX_PORT_PAUSE)); 3792 3793 return (0); 3794 3795 } 3796 3797 /* 3798 * cxgbe:X:rxqY 3799 */ 3800 struct rxq_kstats { 3801 kstat_named_t rxcsum; 3802 kstat_named_t rxpkts; 3803 kstat_named_t rxbytes; 3804 kstat_named_t nomem; 3805 }; 3806 3807 static kstat_t * 3808 setup_rxq_kstats(struct port_info *pi, struct sge_rxq *rxq, int idx) 3809 { 3810 struct kstat *ksp; 3811 struct rxq_kstats *kstatp; 3812 int ndata; 3813 char str[16]; 3814 3815 ndata = sizeof (struct rxq_kstats) / sizeof (kstat_named_t); 3816 (void) snprintf(str, sizeof (str), "rxq%u", idx); 3817 3818 ksp = kstat_create(T4_PORT_NAME, ddi_get_instance(pi->dip), str, "rxq", 3819 KSTAT_TYPE_NAMED, ndata, 0); 3820 if (ksp == NULL) { 3821 cxgb_printf(pi->dip, CE_WARN, 3822 "%s: failed to initialize rxq kstats for queue %d.", 3823 __func__, idx); 3824 return (NULL); 3825 } 3826 3827 kstatp = (struct rxq_kstats *)ksp->ks_data; 3828 3829 KS_UINIT(rxcsum); 3830 KS_UINIT(rxpkts); 3831 KS_UINIT(rxbytes); 3832 KS_UINIT(nomem); 3833 3834 ksp->ks_update = update_rxq_kstats; 3835 ksp->ks_private = (void *)rxq; 3836 kstat_install(ksp); 3837 3838 return (ksp); 3839 } 3840 3841 static int 3842 update_rxq_kstats(kstat_t *ksp, int rw) 3843 { 3844 struct rxq_kstats *kstatp = (struct rxq_kstats *)ksp->ks_data; 3845 struct sge_rxq *rxq = ksp->ks_private; 3846 3847 if (rw == KSTAT_WRITE) 3848 return (0); 3849 3850 KS_U_FROM(rxcsum, rxq); 3851 KS_U_FROM(rxpkts, rxq); 3852 KS_U_FROM(rxbytes, rxq); 3853 KS_U_FROM(nomem, rxq); 3854 3855 return (0); 3856 } 3857 3858 /* 3859 * cxgbe:X:txqY 3860 */ 3861 struct txq_kstats { 3862 kstat_named_t txcsum; 3863 kstat_named_t tso_wrs; 3864 kstat_named_t imm_wrs; 3865 kstat_named_t sgl_wrs; 3866 kstat_named_t txpkt_wrs; 3867 kstat_named_t txpkts_wrs; 3868 kstat_named_t txpkts_pkts; 3869 kstat_named_t txb_used; 3870 kstat_named_t hdl_used; 3871 kstat_named_t txb_full; 3872 kstat_named_t dma_hdl_failed; 3873 kstat_named_t dma_map_failed; 3874 kstat_named_t qfull; 3875 kstat_named_t qflush; 3876 kstat_named_t pullup_early; 3877 kstat_named_t pullup_late; 3878 kstat_named_t pullup_failed; 3879 kstat_named_t csum_failed; 3880 }; 3881 3882 static kstat_t * 3883 setup_txq_kstats(struct port_info *pi, struct sge_txq *txq, int idx) 3884 { 3885 struct kstat *ksp; 3886 struct txq_kstats *kstatp; 3887 int ndata; 3888 char str[16]; 3889 3890 ndata = sizeof (struct txq_kstats) / sizeof (kstat_named_t); 3891 (void) snprintf(str, sizeof (str), "txq%u", idx); 3892 3893 ksp = kstat_create(T4_PORT_NAME, ddi_get_instance(pi->dip), str, "txq", 3894 KSTAT_TYPE_NAMED, ndata, 0); 3895 if (ksp == NULL) { 3896 cxgb_printf(pi->dip, CE_WARN, 3897 "%s: failed to initialize txq kstats for queue %d.", 3898 __func__, idx); 3899 return (NULL); 3900 } 3901 3902 kstatp = (struct txq_kstats *)ksp->ks_data; 3903 3904 KS_UINIT(txcsum); 3905 KS_UINIT(tso_wrs); 3906 KS_UINIT(imm_wrs); 3907 KS_UINIT(sgl_wrs); 3908 KS_UINIT(txpkt_wrs); 3909 KS_UINIT(txpkts_wrs); 3910 KS_UINIT(txpkts_pkts); 3911 KS_UINIT(txb_used); 3912 KS_UINIT(hdl_used); 3913 KS_UINIT(txb_full); 3914 KS_UINIT(dma_hdl_failed); 3915 KS_UINIT(dma_map_failed); 3916 KS_UINIT(qfull); 3917 KS_UINIT(qflush); 3918 KS_UINIT(pullup_early); 3919 KS_UINIT(pullup_late); 3920 KS_UINIT(pullup_failed); 3921 KS_UINIT(csum_failed); 3922 3923 ksp->ks_update = update_txq_kstats; 3924 ksp->ks_private = (void *)txq; 3925 kstat_install(ksp); 3926 3927 return (ksp); 3928 } 3929 3930 static int 3931 update_txq_kstats(kstat_t *ksp, int rw) 3932 { 3933 struct txq_kstats *kstatp = (struct txq_kstats *)ksp->ks_data; 3934 struct sge_txq *txq = ksp->ks_private; 3935 3936 if (rw == KSTAT_WRITE) 3937 return (0); 3938 3939 KS_U_FROM(txcsum, txq); 3940 KS_U_FROM(tso_wrs, txq); 3941 KS_U_FROM(imm_wrs, txq); 3942 KS_U_FROM(sgl_wrs, txq); 3943 KS_U_FROM(txpkt_wrs, txq); 3944 KS_U_FROM(txpkts_wrs, txq); 3945 KS_U_FROM(txpkts_pkts, txq); 3946 KS_U_FROM(txb_used, txq); 3947 KS_U_FROM(hdl_used, txq); 3948 KS_U_FROM(txb_full, txq); 3949 KS_U_FROM(dma_hdl_failed, txq); 3950 KS_U_FROM(dma_map_failed, txq); 3951 KS_U_FROM(qfull, txq); 3952 KS_U_FROM(qflush, txq); 3953 KS_U_FROM(pullup_early, txq); 3954 KS_U_FROM(pullup_late, txq); 3955 KS_U_FROM(pullup_failed, txq); 3956 KS_U_FROM(csum_failed, txq); 3957 3958 return (0); 3959 } 3960