1 /*- 2 * Copyright (c) 2011 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/types.h> 35 #include <sys/eventhandler.h> 36 #include <sys/mbuf.h> 37 #include <sys/socket.h> 38 #include <sys/kernel.h> 39 #include <sys/kdb.h> 40 #include <sys/malloc.h> 41 #include <sys/queue.h> 42 #include <sys/taskqueue.h> 43 #include <sys/time.h> 44 #include <sys/sysctl.h> 45 #include <sys/smp.h> 46 #include <net/bpf.h> 47 #include <net/ethernet.h> 48 #include <net/if.h> 49 #include <net/if_vlan_var.h> 50 #include <netinet/in.h> 51 #include <netinet/ip.h> 52 #include <netinet/ip6.h> 53 #include <netinet/tcp.h> 54 #include <machine/md_var.h> 55 56 #include "common/common.h" 57 #include "common/t4_regs.h" 58 #include "common/t4_regs_values.h" 59 #include "common/t4_msg.h" 60 61 #ifdef T4_PKT_TIMESTAMP 62 #define RX_COPY_THRESHOLD (MINCLSIZE - 8) 63 #else 64 #define RX_COPY_THRESHOLD MINCLSIZE 65 #endif 66 67 /* 68 * Ethernet frames are DMA'd at this byte offset into the freelist buffer. 69 * 0-7 are valid values. 70 */ 71 static int fl_pktshift = 2; 72 TUNABLE_INT("hw.cxgbe.fl_pktshift", &fl_pktshift); 73 74 /* 75 * Pad ethernet payload up to this boundary. 76 * -1: driver should figure out a good value. 77 * 0: disable padding. 78 * Any power of 2 from 32 to 4096 (both inclusive) is also a valid value. 79 */ 80 static int fl_pad = -1; 81 TUNABLE_INT("hw.cxgbe.fl_pad", &fl_pad); 82 83 /* 84 * Status page length. 85 * -1: driver should figure out a good value. 86 * 64 or 128 are the only other valid values. 87 */ 88 static int spg_len = -1; 89 TUNABLE_INT("hw.cxgbe.spg_len", &spg_len); 90 91 /* 92 * Congestion drops. 93 * -1: no congestion feedback (not recommended). 94 * 0: backpressure the channel instead of dropping packets right away. 95 * 1: no backpressure, drop packets for the congested queue immediately. 96 */ 97 static int cong_drop = 0; 98 TUNABLE_INT("hw.cxgbe.cong_drop", &cong_drop); 99 100 /* 101 * Deliver multiple frames in the same free list buffer if they fit. 102 * -1: let the driver decide whether to enable buffer packing or not. 103 * 0: disable buffer packing. 104 * 1: enable buffer packing. 105 */ 106 static int buffer_packing = -1; 107 TUNABLE_INT("hw.cxgbe.buffer_packing", &buffer_packing); 108 109 /* 110 * Start next frame in a packed buffer at this boundary. 111 * -1: driver should figure out a good value. 112 * T4: 113 * --- 114 * if fl_pad != 0 115 * value specified here will be overridden by fl_pad. 116 * else 117 * power of 2 from 32 to 4096 (both inclusive) is a valid value here. 118 * T5: 119 * --- 120 * 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value. 121 */ 122 static int fl_pack = -1; 123 static int t4_fl_pack; 124 static int t5_fl_pack; 125 TUNABLE_INT("hw.cxgbe.fl_pack", &fl_pack); 126 127 /* Used to track coalesced tx work request */ 128 struct txpkts { 129 uint64_t *flitp; /* ptr to flit where next pkt should start */ 130 uint8_t npkt; /* # of packets in this work request */ 131 uint8_t nflits; /* # of flits used by this work request */ 132 uint16_t plen; /* total payload (sum of all packets) */ 133 }; 134 135 /* A packet's SGL. This + m_pkthdr has all info needed for tx */ 136 struct sgl { 137 int nsegs; /* # of segments in the SGL, 0 means imm. tx */ 138 int nflits; /* # of flits needed for the SGL */ 139 bus_dma_segment_t seg[TX_SGL_SEGS]; 140 }; 141 142 static int service_iq(struct sge_iq *, int); 143 static struct mbuf *get_fl_payload1(struct adapter *, struct sge_fl *, uint32_t, 144 int *); 145 static struct mbuf *get_fl_payload2(struct adapter *, struct sge_fl *, uint32_t, 146 int *); 147 static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *); 148 static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int, 149 int); 150 static inline void init_fl(struct adapter *, struct sge_fl *, int, int, int, 151 char *); 152 static inline void init_eq(struct sge_eq *, int, int, uint8_t, uint16_t, 153 char *); 154 static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, 155 bus_addr_t *, void **); 156 static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, 157 void *); 158 static int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *, 159 int, int); 160 static int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *); 161 static int alloc_fwq(struct adapter *); 162 static int free_fwq(struct adapter *); 163 static int alloc_mgmtq(struct adapter *); 164 static int free_mgmtq(struct adapter *); 165 static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int, 166 struct sysctl_oid *); 167 static int free_rxq(struct port_info *, struct sge_rxq *); 168 #ifdef TCP_OFFLOAD 169 static int alloc_ofld_rxq(struct port_info *, struct sge_ofld_rxq *, int, int, 170 struct sysctl_oid *); 171 static int free_ofld_rxq(struct port_info *, struct sge_ofld_rxq *); 172 #endif 173 static int ctrl_eq_alloc(struct adapter *, struct sge_eq *); 174 static int eth_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *); 175 #ifdef TCP_OFFLOAD 176 static int ofld_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *); 177 #endif 178 static int alloc_eq(struct adapter *, struct port_info *, struct sge_eq *); 179 static int free_eq(struct adapter *, struct sge_eq *); 180 static int alloc_wrq(struct adapter *, struct port_info *, struct sge_wrq *, 181 struct sysctl_oid *); 182 static int free_wrq(struct adapter *, struct sge_wrq *); 183 static int alloc_txq(struct port_info *, struct sge_txq *, int, 184 struct sysctl_oid *); 185 static int free_txq(struct port_info *, struct sge_txq *); 186 static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); 187 static inline bool is_new_response(const struct sge_iq *, struct rsp_ctrl **); 188 static inline void iq_next(struct sge_iq *); 189 static inline void ring_fl_db(struct adapter *, struct sge_fl *); 190 static int refill_fl(struct adapter *, struct sge_fl *, int); 191 static void refill_sfl(void *); 192 static int alloc_fl_sdesc(struct sge_fl *); 193 static void free_fl_sdesc(struct adapter *, struct sge_fl *); 194 static void set_fl_tag_idx(struct adapter *, struct sge_fl *, int); 195 static void add_fl_to_sfl(struct adapter *, struct sge_fl *); 196 197 static int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int); 198 static int free_pkt_sgl(struct sge_txq *, struct sgl *); 199 static int write_txpkt_wr(struct port_info *, struct sge_txq *, struct mbuf *, 200 struct sgl *); 201 static int add_to_txpkts(struct port_info *, struct sge_txq *, struct txpkts *, 202 struct mbuf *, struct sgl *); 203 static void write_txpkts_wr(struct sge_txq *, struct txpkts *); 204 static inline void write_ulp_cpl_sgl(struct port_info *, struct sge_txq *, 205 struct txpkts *, struct mbuf *, struct sgl *); 206 static int write_sgl_to_txd(struct sge_eq *, struct sgl *, caddr_t *); 207 static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int); 208 static inline void ring_eq_db(struct adapter *, struct sge_eq *); 209 static inline int reclaimable(struct sge_eq *); 210 static int reclaim_tx_descs(struct sge_txq *, int, int); 211 static void write_eqflush_wr(struct sge_eq *); 212 static __be64 get_flit(bus_dma_segment_t *, int, int); 213 static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *, 214 struct mbuf *); 215 static int handle_fw_msg(struct sge_iq *, const struct rss_header *, 216 struct mbuf *); 217 218 static int sysctl_uint16(SYSCTL_HANDLER_ARGS); 219 220 /* 221 * Called on MOD_LOAD. Validates and calculates the SGE tunables. 222 */ 223 void 224 t4_sge_modload(void) 225 { 226 int pad; 227 228 /* set pad to a reasonable powerof2 between 16 and 4096 (inclusive) */ 229 #if defined(__i386__) || defined(__amd64__) 230 pad = max(cpu_clflush_line_size, 16); 231 #else 232 pad = max(CACHE_LINE_SIZE, 16); 233 #endif 234 pad = min(pad, 4096); 235 236 if (fl_pktshift < 0 || fl_pktshift > 7) { 237 printf("Invalid hw.cxgbe.fl_pktshift value (%d)," 238 " using 2 instead.\n", fl_pktshift); 239 fl_pktshift = 2; 240 } 241 242 if (fl_pad != 0 && 243 (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad))) { 244 245 if (fl_pad != -1) { 246 printf("Invalid hw.cxgbe.fl_pad value (%d)," 247 " using %d instead.\n", fl_pad, max(pad, 32)); 248 } 249 fl_pad = max(pad, 32); 250 } 251 252 /* 253 * T4 has the same pad and pack boundary. If a pad boundary is set, 254 * pack boundary must be set to the same value. Otherwise take the 255 * specified value or auto-calculate something reasonable. 256 */ 257 if (fl_pad) 258 t4_fl_pack = fl_pad; 259 else if (fl_pack < 32 || fl_pack > 4096 || !powerof2(fl_pack)) 260 t4_fl_pack = max(pad, 32); 261 else 262 t4_fl_pack = fl_pack; 263 264 /* T5's pack boundary is independent of the pad boundary. */ 265 if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 || 266 !powerof2(fl_pack)) 267 t5_fl_pack = max(pad, 64); 268 else 269 t5_fl_pack = fl_pack; 270 271 if (spg_len != 64 && spg_len != 128) { 272 int len; 273 274 #if defined(__i386__) || defined(__amd64__) 275 len = cpu_clflush_line_size > 64 ? 128 : 64; 276 #else 277 len = 64; 278 #endif 279 if (spg_len != -1) { 280 printf("Invalid hw.cxgbe.spg_len value (%d)," 281 " using %d instead.\n", spg_len, len); 282 } 283 spg_len = len; 284 } 285 286 if (cong_drop < -1 || cong_drop > 1) { 287 printf("Invalid hw.cxgbe.cong_drop value (%d)," 288 " using 0 instead.\n", cong_drop); 289 cong_drop = 0; 290 } 291 } 292 293 void 294 t4_init_sge_cpl_handlers(struct adapter *sc) 295 { 296 297 t4_register_cpl_handler(sc, CPL_FW4_MSG, handle_fw_msg); 298 t4_register_cpl_handler(sc, CPL_FW6_MSG, handle_fw_msg); 299 t4_register_cpl_handler(sc, CPL_SGE_EGR_UPDATE, handle_sge_egr_update); 300 t4_register_cpl_handler(sc, CPL_RX_PKT, t4_eth_rx); 301 t4_register_fw_msg_handler(sc, FW6_TYPE_CMD_RPL, t4_handle_fw_rpl); 302 } 303 304 /* 305 * adap->params.vpd.cclk must be set up before this is called. 306 */ 307 void 308 t4_tweak_chip_settings(struct adapter *sc) 309 { 310 int i; 311 uint32_t v, m; 312 int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200}; 313 int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk; 314 int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ 315 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 316 int sw_flbuf_sizes[] = { 317 MCLBYTES, 318 #if MJUMPAGESIZE != MCLBYTES 319 MJUMPAGESIZE, 320 #endif 321 MJUM9BYTES, 322 MJUM16BYTES, 323 MJUMPAGESIZE - MSIZE 324 }; 325 326 KASSERT(sc->flags & MASTER_PF, 327 ("%s: trying to change chip settings when not master.", __func__)); 328 329 m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE; 330 v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE | 331 V_EGRSTATUSPAGESIZE(spg_len == 128); 332 if (is_t4(sc) && (fl_pad || buffer_packing)) { 333 /* t4_fl_pack has the correct value even when fl_pad = 0 */ 334 m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY); 335 v |= V_INGPADBOUNDARY(ilog2(t4_fl_pack) - 5); 336 } else if (is_t5(sc) && fl_pad) { 337 m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY); 338 v |= V_INGPADBOUNDARY(ilog2(fl_pad) - 5); 339 } 340 t4_set_reg_field(sc, A_SGE_CONTROL, m, v); 341 342 if (is_t5(sc) && buffer_packing) { 343 m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); 344 if (t5_fl_pack == 16) 345 v = V_INGPACKBOUNDARY(0); 346 else 347 v = V_INGPACKBOUNDARY(ilog2(t5_fl_pack) - 5); 348 t4_set_reg_field(sc, A_SGE_CONTROL2, m, v); 349 } 350 351 v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | 352 V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | 353 V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) | 354 V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) | 355 V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) | 356 V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) | 357 V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) | 358 V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10); 359 t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v); 360 361 for (i = 0; i < min(nitems(sw_flbuf_sizes), 16); i++) { 362 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i), 363 sw_flbuf_sizes[i]); 364 } 365 366 v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) | 367 V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]); 368 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v); 369 370 KASSERT(intr_timer[0] <= timer_max, 371 ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0], 372 timer_max)); 373 for (i = 1; i < nitems(intr_timer); i++) { 374 KASSERT(intr_timer[i] >= intr_timer[i - 1], 375 ("%s: timers not listed in increasing order (%d)", 376 __func__, i)); 377 378 while (intr_timer[i] > timer_max) { 379 if (i == nitems(intr_timer) - 1) { 380 intr_timer[i] = timer_max; 381 break; 382 } 383 intr_timer[i] += intr_timer[i - 1]; 384 intr_timer[i] /= 2; 385 } 386 } 387 388 v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) | 389 V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1])); 390 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v); 391 v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) | 392 V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3])); 393 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v); 394 v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) | 395 V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5])); 396 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v); 397 398 if (cong_drop == 0) { 399 m = F_TUNNELCNGDROP0 | F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 | 400 F_TUNNELCNGDROP3; 401 t4_set_reg_field(sc, A_TP_PARA_REG3, m, 0); 402 } 403 404 /* 4K, 16K, 64K, 256K DDP "page sizes" */ 405 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 406 t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v); 407 408 m = v = F_TDDPTAGTCB; 409 t4_set_reg_field(sc, A_ULP_RX_CTL, m, v); 410 411 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 412 F_RESETDDPOFFSET; 413 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 414 t4_set_reg_field(sc, A_TP_PARA_REG5, m, v); 415 } 416 417 /* 418 * XXX: driver really should be able to deal with unexpected settings. 419 */ 420 int 421 t4_read_chip_settings(struct adapter *sc) 422 { 423 struct sge *s = &sc->sge; 424 int i, j, n, rc = 0; 425 uint32_t m, v, r; 426 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 427 uint32_t sge_flbuf_sizes[16], sw_flbuf_sizes[] = { 428 MCLBYTES, 429 #if MJUMPAGESIZE != MCLBYTES 430 MJUMPAGESIZE, 431 #endif 432 MJUM9BYTES, 433 MJUM16BYTES 434 }; 435 436 m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE; 437 v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE | 438 V_EGRSTATUSPAGESIZE(spg_len == 128); 439 if (is_t4(sc) && (fl_pad || buffer_packing)) { 440 m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY); 441 v |= V_INGPADBOUNDARY(ilog2(t4_fl_pack) - 5); 442 } else if (is_t5(sc) && fl_pad) { 443 m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY); 444 v |= V_INGPADBOUNDARY(ilog2(fl_pad) - 5); 445 } 446 r = t4_read_reg(sc, A_SGE_CONTROL); 447 if ((r & m) != v) { 448 device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r); 449 rc = EINVAL; 450 } 451 452 if (is_t5(sc) && buffer_packing) { 453 m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); 454 if (t5_fl_pack == 16) 455 v = V_INGPACKBOUNDARY(0); 456 else 457 v = V_INGPACKBOUNDARY(ilog2(t5_fl_pack) - 5); 458 r = t4_read_reg(sc, A_SGE_CONTROL2); 459 if ((r & m) != v) { 460 device_printf(sc->dev, 461 "invalid SGE_CONTROL2(0x%x)\n", r); 462 rc = EINVAL; 463 } 464 } 465 466 v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | 467 V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | 468 V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) | 469 V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) | 470 V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) | 471 V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) | 472 V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) | 473 V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10); 474 r = t4_read_reg(sc, A_SGE_HOST_PAGE_SIZE); 475 if (r != v) { 476 device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r); 477 rc = EINVAL; 478 } 479 480 /* 481 * Make a list of SGE FL buffer sizes programmed in the chip and tally 482 * it with the FL buffer sizes that we'd like to use. 483 */ 484 n = 0; 485 for (i = 0; i < nitems(sge_flbuf_sizes); i++) { 486 r = t4_read_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i)); 487 sge_flbuf_sizes[i] = r; 488 if (r == MJUMPAGESIZE - MSIZE && 489 (sc->flags & BUF_PACKING_OK) == 0) { 490 sc->flags |= BUF_PACKING_OK; 491 FL_BUF_HWTAG(sc, n) = i; 492 FL_BUF_SIZE(sc, n) = MJUMPAGESIZE - MSIZE; 493 FL_BUF_TYPE(sc, n) = m_gettype(MJUMPAGESIZE); 494 FL_BUF_ZONE(sc, n) = m_getzone(MJUMPAGESIZE); 495 n++; 496 } 497 } 498 for (i = 0; i < nitems(sw_flbuf_sizes); i++) { 499 for (j = 0; j < nitems(sge_flbuf_sizes); j++) { 500 if (sw_flbuf_sizes[i] != sge_flbuf_sizes[j]) 501 continue; 502 FL_BUF_HWTAG(sc, n) = j; 503 FL_BUF_SIZE(sc, n) = sw_flbuf_sizes[i]; 504 FL_BUF_TYPE(sc, n) = m_gettype(sw_flbuf_sizes[i]); 505 FL_BUF_ZONE(sc, n) = m_getzone(sw_flbuf_sizes[i]); 506 n++; 507 break; 508 } 509 } 510 if (n == 0) { 511 device_printf(sc->dev, "no usable SGE FL buffer size.\n"); 512 rc = EINVAL; 513 } else if (n == 1 && (sc->flags & BUF_PACKING_OK)) { 514 device_printf(sc->dev, 515 "no usable SGE FL buffer size when not packing buffers.\n"); 516 rc = EINVAL; 517 } 518 FL_BUF_SIZES(sc) = n; 519 520 r = t4_read_reg(sc, A_SGE_INGRESS_RX_THRESHOLD); 521 s->counter_val[0] = G_THRESHOLD_0(r); 522 s->counter_val[1] = G_THRESHOLD_1(r); 523 s->counter_val[2] = G_THRESHOLD_2(r); 524 s->counter_val[3] = G_THRESHOLD_3(r); 525 526 r = t4_read_reg(sc, A_SGE_TIMER_VALUE_0_AND_1); 527 s->timer_val[0] = G_TIMERVALUE0(r) / core_ticks_per_usec(sc); 528 s->timer_val[1] = G_TIMERVALUE1(r) / core_ticks_per_usec(sc); 529 r = t4_read_reg(sc, A_SGE_TIMER_VALUE_2_AND_3); 530 s->timer_val[2] = G_TIMERVALUE2(r) / core_ticks_per_usec(sc); 531 s->timer_val[3] = G_TIMERVALUE3(r) / core_ticks_per_usec(sc); 532 r = t4_read_reg(sc, A_SGE_TIMER_VALUE_4_AND_5); 533 s->timer_val[4] = G_TIMERVALUE4(r) / core_ticks_per_usec(sc); 534 s->timer_val[5] = G_TIMERVALUE5(r) / core_ticks_per_usec(sc); 535 536 if (cong_drop == 0) { 537 m = F_TUNNELCNGDROP0 | F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 | 538 F_TUNNELCNGDROP3; 539 r = t4_read_reg(sc, A_TP_PARA_REG3); 540 if (r & m) { 541 device_printf(sc->dev, 542 "invalid TP_PARA_REG3(0x%x)\n", r); 543 rc = EINVAL; 544 } 545 } 546 547 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 548 r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ); 549 if (r != v) { 550 device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r); 551 rc = EINVAL; 552 } 553 554 m = v = F_TDDPTAGTCB; 555 r = t4_read_reg(sc, A_ULP_RX_CTL); 556 if ((r & m) != v) { 557 device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r); 558 rc = EINVAL; 559 } 560 561 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 562 F_RESETDDPOFFSET; 563 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 564 r = t4_read_reg(sc, A_TP_PARA_REG5); 565 if ((r & m) != v) { 566 device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r); 567 rc = EINVAL; 568 } 569 570 r = t4_read_reg(sc, A_SGE_CONM_CTRL); 571 s->fl_starve_threshold = G_EGRTHRESHOLD(r) * 2 + 1; 572 if (is_t4(sc)) 573 s->fl_starve_threshold2 = s->fl_starve_threshold; 574 else 575 s->fl_starve_threshold2 = G_EGRTHRESHOLDPACKING(r) * 2 + 1; 576 577 /* egress queues: log2 of # of doorbells per BAR2 page */ 578 r = t4_read_reg(sc, A_SGE_EGRESS_QUEUES_PER_PAGE_PF); 579 r >>= S_QUEUESPERPAGEPF0 + 580 (S_QUEUESPERPAGEPF1 - S_QUEUESPERPAGEPF0) * sc->pf; 581 s->eq_s_qpp = r & M_QUEUESPERPAGEPF0; 582 583 /* ingress queues: log2 of # of doorbells per BAR2 page */ 584 r = t4_read_reg(sc, A_SGE_INGRESS_QUEUES_PER_PAGE_PF); 585 r >>= S_QUEUESPERPAGEPF0 + 586 (S_QUEUESPERPAGEPF1 - S_QUEUESPERPAGEPF0) * sc->pf; 587 s->iq_s_qpp = r & M_QUEUESPERPAGEPF0; 588 589 t4_init_tp_params(sc); 590 591 t4_read_mtu_tbl(sc, sc->params.mtus, NULL); 592 t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd); 593 594 return (rc); 595 } 596 597 int 598 t4_create_dma_tag(struct adapter *sc) 599 { 600 int rc; 601 602 rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, 603 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 604 BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, 605 NULL, &sc->dmat); 606 if (rc != 0) { 607 device_printf(sc->dev, 608 "failed to create main DMA tag: %d\n", rc); 609 } 610 611 return (rc); 612 } 613 614 static inline int 615 enable_buffer_packing(struct adapter *sc) 616 { 617 618 if (sc->flags & BUF_PACKING_OK && 619 ((is_t5(sc) && buffer_packing) || /* 1 or -1 both ok for T5 */ 620 (is_t4(sc) && buffer_packing == 1))) 621 return (1); 622 return (0); 623 } 624 625 void 626 t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 627 struct sysctl_oid_list *children) 628 { 629 630 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD, 631 NULL, fl_pktshift, "payload DMA offset in rx buffer (bytes)"); 632 633 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD, 634 NULL, fl_pad, "payload pad boundary (bytes)"); 635 636 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD, 637 NULL, spg_len, "status page size (bytes)"); 638 639 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD, 640 NULL, cong_drop, "congestion drop setting"); 641 642 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "buffer_packing", CTLFLAG_RD, 643 NULL, enable_buffer_packing(sc), 644 "pack multiple frames in one fl buffer"); 645 646 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD, 647 NULL, is_t5(sc) ? t5_fl_pack : t4_fl_pack, 648 "payload pack boundary (bytes)"); 649 } 650 651 int 652 t4_destroy_dma_tag(struct adapter *sc) 653 { 654 if (sc->dmat) 655 bus_dma_tag_destroy(sc->dmat); 656 657 return (0); 658 } 659 660 /* 661 * Allocate and initialize the firmware event queue and the management queue. 662 * 663 * Returns errno on failure. Resources allocated up to that point may still be 664 * allocated. Caller is responsible for cleanup in case this function fails. 665 */ 666 int 667 t4_setup_adapter_queues(struct adapter *sc) 668 { 669 int rc; 670 671 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 672 673 sysctl_ctx_init(&sc->ctx); 674 sc->flags |= ADAP_SYSCTL_CTX; 675 676 /* 677 * Firmware event queue 678 */ 679 rc = alloc_fwq(sc); 680 if (rc != 0) 681 return (rc); 682 683 /* 684 * Management queue. This is just a control queue that uses the fwq as 685 * its associated iq. 686 */ 687 rc = alloc_mgmtq(sc); 688 689 return (rc); 690 } 691 692 /* 693 * Idempotent 694 */ 695 int 696 t4_teardown_adapter_queues(struct adapter *sc) 697 { 698 699 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 700 701 /* Do this before freeing the queue */ 702 if (sc->flags & ADAP_SYSCTL_CTX) { 703 sysctl_ctx_free(&sc->ctx); 704 sc->flags &= ~ADAP_SYSCTL_CTX; 705 } 706 707 free_mgmtq(sc); 708 free_fwq(sc); 709 710 return (0); 711 } 712 713 static inline int 714 first_vector(struct port_info *pi) 715 { 716 struct adapter *sc = pi->adapter; 717 int rc = T4_EXTRA_INTR, i; 718 719 if (sc->intr_count == 1) 720 return (0); 721 722 for_each_port(sc, i) { 723 struct port_info *p = sc->port[i]; 724 725 if (i == pi->port_id) 726 break; 727 728 #ifdef TCP_OFFLOAD 729 if (sc->flags & INTR_DIRECT) 730 rc += p->nrxq + p->nofldrxq; 731 else 732 rc += max(p->nrxq, p->nofldrxq); 733 #else 734 /* 735 * Not compiled with offload support and intr_count > 1. Only 736 * NIC queues exist and they'd better be taking direct 737 * interrupts. 738 */ 739 KASSERT(sc->flags & INTR_DIRECT, 740 ("%s: intr_count %d, !INTR_DIRECT", __func__, 741 sc->intr_count)); 742 743 rc += p->nrxq; 744 #endif 745 } 746 747 return (rc); 748 } 749 750 /* 751 * Given an arbitrary "index," come up with an iq that can be used by other 752 * queues (of this port) for interrupt forwarding, SGE egress updates, etc. 753 * The iq returned is guaranteed to be something that takes direct interrupts. 754 */ 755 static struct sge_iq * 756 port_intr_iq(struct port_info *pi, int idx) 757 { 758 struct adapter *sc = pi->adapter; 759 struct sge *s = &sc->sge; 760 struct sge_iq *iq = NULL; 761 762 if (sc->intr_count == 1) 763 return (&sc->sge.fwq); 764 765 #ifdef TCP_OFFLOAD 766 if (sc->flags & INTR_DIRECT) { 767 idx %= pi->nrxq + pi->nofldrxq; 768 769 if (idx >= pi->nrxq) { 770 idx -= pi->nrxq; 771 iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq; 772 } else 773 iq = &s->rxq[pi->first_rxq + idx].iq; 774 775 } else { 776 idx %= max(pi->nrxq, pi->nofldrxq); 777 778 if (pi->nrxq >= pi->nofldrxq) 779 iq = &s->rxq[pi->first_rxq + idx].iq; 780 else 781 iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq; 782 } 783 #else 784 /* 785 * Not compiled with offload support and intr_count > 1. Only NIC 786 * queues exist and they'd better be taking direct interrupts. 787 */ 788 KASSERT(sc->flags & INTR_DIRECT, 789 ("%s: intr_count %d, !INTR_DIRECT", __func__, sc->intr_count)); 790 791 idx %= pi->nrxq; 792 iq = &s->rxq[pi->first_rxq + idx].iq; 793 #endif 794 795 KASSERT(iq->flags & IQ_INTR, ("%s: EDOOFUS", __func__)); 796 return (iq); 797 } 798 799 static inline int 800 mtu_to_bufsize(int mtu) 801 { 802 int bufsize; 803 804 /* large enough for a frame even when VLAN extraction is disabled */ 805 bufsize = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + mtu; 806 bufsize = roundup2(bufsize + fl_pktshift, fl_pad); 807 808 return (bufsize); 809 } 810 811 #ifdef TCP_OFFLOAD 812 static inline int 813 mtu_to_bufsize_toe(struct adapter *sc, int mtu) 814 { 815 816 if (sc->tt.rx_coalesce) 817 return (G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2))); 818 819 return (mtu); 820 } 821 #endif 822 823 int 824 t4_setup_port_queues(struct port_info *pi) 825 { 826 int rc = 0, i, j, intr_idx, iqid; 827 struct sge_rxq *rxq; 828 struct sge_txq *txq; 829 struct sge_wrq *ctrlq; 830 #ifdef TCP_OFFLOAD 831 struct sge_ofld_rxq *ofld_rxq; 832 struct sge_wrq *ofld_txq; 833 struct sysctl_oid *oid2 = NULL; 834 #endif 835 char name[16]; 836 struct adapter *sc = pi->adapter; 837 struct ifnet *ifp = pi->ifp; 838 struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev); 839 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 840 int bufsize, pack; 841 842 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD, 843 NULL, "rx queues"); 844 845 #ifdef TCP_OFFLOAD 846 if (is_offload(sc)) { 847 oid2 = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_rxq", 848 CTLFLAG_RD, NULL, 849 "rx queues for offloaded TCP connections"); 850 } 851 #endif 852 853 /* Interrupt vector to start from (when using multiple vectors) */ 854 intr_idx = first_vector(pi); 855 856 /* 857 * First pass over all rx queues (NIC and TOE): 858 * a) initialize iq and fl 859 * b) allocate queue iff it will take direct interrupts. 860 */ 861 bufsize = mtu_to_bufsize(ifp->if_mtu); 862 pack = enable_buffer_packing(sc); 863 for_each_rxq(pi, i, rxq) { 864 865 init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, pi->qsize_rxq, 866 RX_IQ_ESIZE); 867 868 snprintf(name, sizeof(name), "%s rxq%d-fl", 869 device_get_nameunit(pi->dev), i); 870 init_fl(sc, &rxq->fl, pi->qsize_rxq / 8, bufsize, pack, name); 871 872 if (sc->flags & INTR_DIRECT 873 #ifdef TCP_OFFLOAD 874 || (sc->intr_count > 1 && pi->nrxq >= pi->nofldrxq) 875 #endif 876 ) { 877 rxq->iq.flags |= IQ_INTR; 878 rc = alloc_rxq(pi, rxq, intr_idx, i, oid); 879 if (rc != 0) 880 goto done; 881 intr_idx++; 882 } 883 } 884 885 #ifdef TCP_OFFLOAD 886 bufsize = mtu_to_bufsize_toe(sc, ifp->if_mtu); 887 pack = 0; /* XXX: think about this some more */ 888 for_each_ofld_rxq(pi, i, ofld_rxq) { 889 890 init_iq(&ofld_rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, 891 pi->qsize_rxq, RX_IQ_ESIZE); 892 893 snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", 894 device_get_nameunit(pi->dev), i); 895 init_fl(sc, &ofld_rxq->fl, pi->qsize_rxq / 8, bufsize, pack, 896 name); 897 898 if (sc->flags & INTR_DIRECT || 899 (sc->intr_count > 1 && pi->nofldrxq > pi->nrxq)) { 900 ofld_rxq->iq.flags |= IQ_INTR; 901 rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2); 902 if (rc != 0) 903 goto done; 904 intr_idx++; 905 } 906 } 907 #endif 908 909 /* 910 * Second pass over all rx queues (NIC and TOE). The queues forwarding 911 * their interrupts are allocated now. 912 */ 913 j = 0; 914 for_each_rxq(pi, i, rxq) { 915 if (rxq->iq.flags & IQ_INTR) 916 continue; 917 918 intr_idx = port_intr_iq(pi, j)->abs_id; 919 920 rc = alloc_rxq(pi, rxq, intr_idx, i, oid); 921 if (rc != 0) 922 goto done; 923 j++; 924 } 925 926 #ifdef TCP_OFFLOAD 927 for_each_ofld_rxq(pi, i, ofld_rxq) { 928 if (ofld_rxq->iq.flags & IQ_INTR) 929 continue; 930 931 intr_idx = port_intr_iq(pi, j)->abs_id; 932 933 rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2); 934 if (rc != 0) 935 goto done; 936 j++; 937 } 938 #endif 939 940 /* 941 * Now the tx queues. Only one pass needed. 942 */ 943 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD, 944 NULL, "tx queues"); 945 j = 0; 946 for_each_txq(pi, i, txq) { 947 uint16_t iqid; 948 949 iqid = port_intr_iq(pi, j)->cntxt_id; 950 951 snprintf(name, sizeof(name), "%s txq%d", 952 device_get_nameunit(pi->dev), i); 953 init_eq(&txq->eq, EQ_ETH, pi->qsize_txq, pi->tx_chan, iqid, 954 name); 955 956 rc = alloc_txq(pi, txq, i, oid); 957 if (rc != 0) 958 goto done; 959 j++; 960 } 961 962 #ifdef TCP_OFFLOAD 963 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_txq", 964 CTLFLAG_RD, NULL, "tx queues for offloaded TCP connections"); 965 for_each_ofld_txq(pi, i, ofld_txq) { 966 uint16_t iqid; 967 968 iqid = port_intr_iq(pi, j)->cntxt_id; 969 970 snprintf(name, sizeof(name), "%s ofld_txq%d", 971 device_get_nameunit(pi->dev), i); 972 init_eq(&ofld_txq->eq, EQ_OFLD, pi->qsize_txq, pi->tx_chan, 973 iqid, name); 974 975 snprintf(name, sizeof(name), "%d", i); 976 oid2 = SYSCTL_ADD_NODE(&pi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, 977 name, CTLFLAG_RD, NULL, "offload tx queue"); 978 979 rc = alloc_wrq(sc, pi, ofld_txq, oid2); 980 if (rc != 0) 981 goto done; 982 j++; 983 } 984 #endif 985 986 /* 987 * Finally, the control queue. 988 */ 989 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD, 990 NULL, "ctrl queue"); 991 ctrlq = &sc->sge.ctrlq[pi->port_id]; 992 iqid = port_intr_iq(pi, 0)->cntxt_id; 993 snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(pi->dev)); 994 init_eq(&ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, iqid, name); 995 rc = alloc_wrq(sc, pi, ctrlq, oid); 996 997 done: 998 if (rc) 999 t4_teardown_port_queues(pi); 1000 1001 return (rc); 1002 } 1003 1004 /* 1005 * Idempotent 1006 */ 1007 int 1008 t4_teardown_port_queues(struct port_info *pi) 1009 { 1010 int i; 1011 struct adapter *sc = pi->adapter; 1012 struct sge_rxq *rxq; 1013 struct sge_txq *txq; 1014 #ifdef TCP_OFFLOAD 1015 struct sge_ofld_rxq *ofld_rxq; 1016 struct sge_wrq *ofld_txq; 1017 #endif 1018 1019 /* Do this before freeing the queues */ 1020 if (pi->flags & PORT_SYSCTL_CTX) { 1021 sysctl_ctx_free(&pi->ctx); 1022 pi->flags &= ~PORT_SYSCTL_CTX; 1023 } 1024 1025 /* 1026 * Take down all the tx queues first, as they reference the rx queues 1027 * (for egress updates, etc.). 1028 */ 1029 1030 free_wrq(sc, &sc->sge.ctrlq[pi->port_id]); 1031 1032 for_each_txq(pi, i, txq) { 1033 free_txq(pi, txq); 1034 } 1035 1036 #ifdef TCP_OFFLOAD 1037 for_each_ofld_txq(pi, i, ofld_txq) { 1038 free_wrq(sc, ofld_txq); 1039 } 1040 #endif 1041 1042 /* 1043 * Then take down the rx queues that forward their interrupts, as they 1044 * reference other rx queues. 1045 */ 1046 1047 for_each_rxq(pi, i, rxq) { 1048 if ((rxq->iq.flags & IQ_INTR) == 0) 1049 free_rxq(pi, rxq); 1050 } 1051 1052 #ifdef TCP_OFFLOAD 1053 for_each_ofld_rxq(pi, i, ofld_rxq) { 1054 if ((ofld_rxq->iq.flags & IQ_INTR) == 0) 1055 free_ofld_rxq(pi, ofld_rxq); 1056 } 1057 #endif 1058 1059 /* 1060 * Then take down the rx queues that take direct interrupts. 1061 */ 1062 1063 for_each_rxq(pi, i, rxq) { 1064 if (rxq->iq.flags & IQ_INTR) 1065 free_rxq(pi, rxq); 1066 } 1067 1068 #ifdef TCP_OFFLOAD 1069 for_each_ofld_rxq(pi, i, ofld_rxq) { 1070 if (ofld_rxq->iq.flags & IQ_INTR) 1071 free_ofld_rxq(pi, ofld_rxq); 1072 } 1073 #endif 1074 1075 return (0); 1076 } 1077 1078 /* 1079 * Deals with errors and the firmware event queue. All data rx queues forward 1080 * their interrupt to the firmware event queue. 1081 */ 1082 void 1083 t4_intr_all(void *arg) 1084 { 1085 struct adapter *sc = arg; 1086 struct sge_iq *fwq = &sc->sge.fwq; 1087 1088 t4_intr_err(arg); 1089 if (atomic_cmpset_int(&fwq->state, IQS_IDLE, IQS_BUSY)) { 1090 service_iq(fwq, 0); 1091 atomic_cmpset_int(&fwq->state, IQS_BUSY, IQS_IDLE); 1092 } 1093 } 1094 1095 /* Deals with error interrupts */ 1096 void 1097 t4_intr_err(void *arg) 1098 { 1099 struct adapter *sc = arg; 1100 1101 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); 1102 t4_slow_intr_handler(sc); 1103 } 1104 1105 void 1106 t4_intr_evt(void *arg) 1107 { 1108 struct sge_iq *iq = arg; 1109 1110 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1111 service_iq(iq, 0); 1112 atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1113 } 1114 } 1115 1116 void 1117 t4_intr(void *arg) 1118 { 1119 struct sge_iq *iq = arg; 1120 1121 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1122 service_iq(iq, 0); 1123 atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1124 } 1125 } 1126 1127 /* 1128 * Deals with anything and everything on the given ingress queue. 1129 */ 1130 static int 1131 service_iq(struct sge_iq *iq, int budget) 1132 { 1133 struct sge_iq *q; 1134 struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */ 1135 struct sge_fl *fl = &rxq->fl; /* Use iff IQ_HAS_FL */ 1136 struct adapter *sc = iq->adapter; 1137 struct rsp_ctrl *ctrl; 1138 const struct rss_header *rss; 1139 int ndescs = 0, limit, fl_bufs_used = 0; 1140 int rsp_type; 1141 uint32_t lq; 1142 struct mbuf *m0; 1143 STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql); 1144 #if defined(INET) || defined(INET6) 1145 const struct timeval lro_timeout = {0, sc->lro_timeout}; 1146 #endif 1147 1148 limit = budget ? budget : iq->qsize / 8; 1149 1150 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); 1151 1152 /* 1153 * We always come back and check the descriptor ring for new indirect 1154 * interrupts and other responses after running a single handler. 1155 */ 1156 for (;;) { 1157 while (is_new_response(iq, &ctrl)) { 1158 1159 rmb(); 1160 1161 m0 = NULL; 1162 rsp_type = G_RSPD_TYPE(ctrl->u.type_gen); 1163 lq = be32toh(ctrl->pldbuflen_qid); 1164 rss = (const void *)iq->cdesc; 1165 1166 switch (rsp_type) { 1167 case X_RSPD_TYPE_FLBUF: 1168 1169 KASSERT(iq->flags & IQ_HAS_FL, 1170 ("%s: data for an iq (%p) with no freelist", 1171 __func__, iq)); 1172 1173 m0 = fl->flags & FL_BUF_PACKING ? 1174 get_fl_payload1(sc, fl, lq, &fl_bufs_used) : 1175 get_fl_payload2(sc, fl, lq, &fl_bufs_used); 1176 1177 if (__predict_false(m0 == NULL)) 1178 goto process_iql; 1179 #ifdef T4_PKT_TIMESTAMP 1180 /* 1181 * 60 bit timestamp for the payload is 1182 * *(uint64_t *)m0->m_pktdat. Note that it is 1183 * in the leading free-space in the mbuf. The 1184 * kernel can clobber it during a pullup, 1185 * m_copymdata, etc. You need to make sure that 1186 * the mbuf reaches you unmolested if you care 1187 * about the timestamp. 1188 */ 1189 *(uint64_t *)m0->m_pktdat = 1190 be64toh(ctrl->u.last_flit) & 1191 0xfffffffffffffff; 1192 #endif 1193 1194 /* fall through */ 1195 1196 case X_RSPD_TYPE_CPL: 1197 KASSERT(rss->opcode < NUM_CPL_CMDS, 1198 ("%s: bad opcode %02x.", __func__, 1199 rss->opcode)); 1200 sc->cpl_handler[rss->opcode](iq, rss, m0); 1201 break; 1202 1203 case X_RSPD_TYPE_INTR: 1204 1205 /* 1206 * Interrupts should be forwarded only to queues 1207 * that are not forwarding their interrupts. 1208 * This means service_iq can recurse but only 1 1209 * level deep. 1210 */ 1211 KASSERT(budget == 0, 1212 ("%s: budget %u, rsp_type %u", __func__, 1213 budget, rsp_type)); 1214 1215 /* 1216 * There are 1K interrupt-capable queues (qids 0 1217 * through 1023). A response type indicating a 1218 * forwarded interrupt with a qid >= 1K is an 1219 * iWARP async notification. 1220 */ 1221 if (lq >= 1024) { 1222 sc->an_handler(iq, ctrl); 1223 break; 1224 } 1225 1226 q = sc->sge.iqmap[lq - sc->sge.iq_start]; 1227 if (atomic_cmpset_int(&q->state, IQS_IDLE, 1228 IQS_BUSY)) { 1229 if (service_iq(q, q->qsize / 8) == 0) { 1230 atomic_cmpset_int(&q->state, 1231 IQS_BUSY, IQS_IDLE); 1232 } else { 1233 STAILQ_INSERT_TAIL(&iql, q, 1234 link); 1235 } 1236 } 1237 break; 1238 1239 default: 1240 KASSERT(0, 1241 ("%s: illegal response type %d on iq %p", 1242 __func__, rsp_type, iq)); 1243 log(LOG_ERR, 1244 "%s: illegal response type %d on iq %p", 1245 device_get_nameunit(sc->dev), rsp_type, iq); 1246 break; 1247 } 1248 1249 iq_next(iq); 1250 if (++ndescs == limit) { 1251 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), 1252 V_CIDXINC(ndescs) | 1253 V_INGRESSQID(iq->cntxt_id) | 1254 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 1255 ndescs = 0; 1256 1257 #if defined(INET) || defined(INET6) 1258 if (iq->flags & IQ_LRO_ENABLED && 1259 sc->lro_timeout != 0) { 1260 tcp_lro_flush_inactive(&rxq->lro, 1261 &lro_timeout); 1262 } 1263 #endif 1264 1265 if (fl_bufs_used > 0) { 1266 FL_LOCK(fl); 1267 fl->needed += fl_bufs_used; 1268 refill_fl(sc, fl, fl->cap / 8); 1269 FL_UNLOCK(fl); 1270 fl_bufs_used = 0; 1271 } 1272 1273 if (budget) 1274 return (EINPROGRESS); 1275 } 1276 } 1277 1278 process_iql: 1279 if (STAILQ_EMPTY(&iql)) 1280 break; 1281 1282 /* 1283 * Process the head only, and send it to the back of the list if 1284 * it's still not done. 1285 */ 1286 q = STAILQ_FIRST(&iql); 1287 STAILQ_REMOVE_HEAD(&iql, link); 1288 if (service_iq(q, q->qsize / 8) == 0) 1289 atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE); 1290 else 1291 STAILQ_INSERT_TAIL(&iql, q, link); 1292 } 1293 1294 #if defined(INET) || defined(INET6) 1295 if (iq->flags & IQ_LRO_ENABLED) { 1296 struct lro_ctrl *lro = &rxq->lro; 1297 struct lro_entry *l; 1298 1299 while (!SLIST_EMPTY(&lro->lro_active)) { 1300 l = SLIST_FIRST(&lro->lro_active); 1301 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1302 tcp_lro_flush(lro, l); 1303 } 1304 } 1305 #endif 1306 1307 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) | 1308 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 1309 1310 if (iq->flags & IQ_HAS_FL) { 1311 int starved; 1312 1313 FL_LOCK(fl); 1314 fl->needed += fl_bufs_used; 1315 starved = refill_fl(sc, fl, fl->cap / 4); 1316 FL_UNLOCK(fl); 1317 if (__predict_false(starved != 0)) 1318 add_fl_to_sfl(sc, fl); 1319 } 1320 1321 return (0); 1322 } 1323 1324 static int 1325 fill_mbuf_stash(struct sge_fl *fl) 1326 { 1327 int i; 1328 1329 for (i = 0; i < nitems(fl->mstash); i++) { 1330 if (fl->mstash[i] == NULL) { 1331 struct mbuf *m; 1332 if ((m = m_get(M_NOWAIT, MT_NOINIT)) == NULL) 1333 return (ENOBUFS); 1334 fl->mstash[i] = m; 1335 } 1336 } 1337 return (0); 1338 } 1339 1340 static struct mbuf * 1341 get_mbuf_from_stash(struct sge_fl *fl) 1342 { 1343 int i; 1344 1345 for (i = 0; i < nitems(fl->mstash); i++) { 1346 if (fl->mstash[i] != NULL) { 1347 struct mbuf *m; 1348 1349 m = fl->mstash[i]; 1350 fl->mstash[i] = NULL; 1351 return (m); 1352 } else 1353 fl->mstash[i] = m_get(M_NOWAIT, MT_NOINIT); 1354 } 1355 1356 return (m_get(M_NOWAIT, MT_NOINIT)); 1357 } 1358 1359 static void 1360 return_mbuf_to_stash(struct sge_fl *fl, struct mbuf *m) 1361 { 1362 int i; 1363 1364 if (m == NULL) 1365 return; 1366 1367 for (i = 0; i < nitems(fl->mstash); i++) { 1368 if (fl->mstash[i] == NULL) { 1369 fl->mstash[i] = m; 1370 return; 1371 } 1372 } 1373 m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0); 1374 m_free(m); 1375 } 1376 1377 /* buf can be any address within the buffer */ 1378 static inline u_int * 1379 find_buf_refcnt(caddr_t buf) 1380 { 1381 uintptr_t ptr = (uintptr_t)buf; 1382 1383 return ((u_int *)((ptr & ~(MJUMPAGESIZE - 1)) + MSIZE - sizeof(u_int))); 1384 } 1385 1386 static inline struct mbuf * 1387 find_buf_mbuf(caddr_t buf) 1388 { 1389 uintptr_t ptr = (uintptr_t)buf; 1390 1391 return ((struct mbuf *)(ptr & ~(MJUMPAGESIZE - 1))); 1392 } 1393 1394 static int 1395 rxb_free(struct mbuf *m, void *arg1, void *arg2) 1396 { 1397 uma_zone_t zone = arg1; 1398 caddr_t cl = arg2; 1399 #ifdef notyet 1400 u_int refcount; 1401 1402 refcount = *find_buf_refcnt(cl); 1403 KASSERT(refcount == 0, ("%s: cl %p refcount is %u", __func__, 1404 cl - MSIZE, refcount)); 1405 #endif 1406 cl -= MSIZE; 1407 uma_zfree(zone, cl); 1408 1409 return (EXT_FREE_OK); 1410 } 1411 1412 static struct mbuf * 1413 get_fl_payload1(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf, 1414 int *fl_bufs_used) 1415 { 1416 struct mbuf *m0, *m; 1417 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1418 unsigned int nbuf, len; 1419 int pack_boundary = is_t4(sc) ? t4_fl_pack : t5_fl_pack; 1420 1421 /* 1422 * No assertion for the fl lock because we don't need it. This routine 1423 * is called only from the rx interrupt handler and it only updates 1424 * fl->cidx. (Contrast that with fl->pidx/fl->needed which could be 1425 * updated in the rx interrupt handler or the starvation helper routine. 1426 * That's why code that manipulates fl->pidx/fl->needed needs the fl 1427 * lock but this routine does not). 1428 */ 1429 1430 KASSERT(fl->flags & FL_BUF_PACKING, 1431 ("%s: buffer packing disabled for fl %p", __func__, fl)); 1432 1433 len = G_RSPD_LEN(len_newbuf); 1434 1435 if ((len_newbuf & F_RSPD_NEWBUF) == 0) { 1436 KASSERT(fl->rx_offset > 0, 1437 ("%s: packed frame but driver at offset=0", __func__)); 1438 1439 /* A packed frame is guaranteed to fit entirely in this buf. */ 1440 KASSERT(FL_BUF_SIZE(sc, sd->tag_idx) - fl->rx_offset >= len, 1441 ("%s: packing error. bufsz=%u, offset=%u, len=%u", 1442 __func__, FL_BUF_SIZE(sc, sd->tag_idx), fl->rx_offset, 1443 len)); 1444 1445 m0 = get_mbuf_from_stash(fl); 1446 if (m0 == NULL || 1447 m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR) != 0) { 1448 return_mbuf_to_stash(fl, m0); 1449 return (NULL); 1450 } 1451 1452 bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, 1453 BUS_DMASYNC_POSTREAD); 1454 if (sc->sc_do_rxcopy && (len < RX_COPY_THRESHOLD)) { 1455 #ifdef T4_PKT_TIMESTAMP 1456 /* Leave room for a timestamp */ 1457 m0->m_data += 8; 1458 #endif 1459 bcopy(sd->cl + fl->rx_offset, mtod(m0, caddr_t), len); 1460 m0->m_pkthdr.len = len; 1461 m0->m_len = len; 1462 } else { 1463 m0->m_pkthdr.len = len; 1464 m0->m_len = len; 1465 m_extaddref(m0, sd->cl + fl->rx_offset, 1466 roundup2(m0->m_len, fl_pad), 1467 find_buf_refcnt(sd->cl), rxb_free, 1468 FL_BUF_ZONE(sc, sd->tag_idx), sd->cl); 1469 } 1470 fl->rx_offset += len; 1471 fl->rx_offset = roundup2(fl->rx_offset, fl_pad); 1472 fl->rx_offset = roundup2(fl->rx_offset, pack_boundary); 1473 if (fl->rx_offset >= FL_BUF_SIZE(sc, sd->tag_idx)) { 1474 fl->rx_offset = 0; 1475 (*fl_bufs_used) += 1; 1476 if (__predict_false(++fl->cidx == fl->cap)) 1477 fl->cidx = 0; 1478 } 1479 1480 return (m0); 1481 } 1482 1483 KASSERT(len_newbuf & F_RSPD_NEWBUF, 1484 ("%s: only new buffer handled here", __func__)); 1485 1486 nbuf = 0; 1487 1488 /* 1489 * Move to the start of the next buffer if we are still in the middle of 1490 * some buffer. This is the case where there was some room left in the 1491 * previous buffer but not enough to fit this frame in its entirety. 1492 */ 1493 if (fl->rx_offset > 0) { 1494 KASSERT(roundup2(len, fl_pad) > FL_BUF_SIZE(sc, sd->tag_idx) - 1495 fl->rx_offset, ("%s: frame (%u bytes) should have fit at " 1496 "cidx %u offset %u bufsize %u", __func__, len, fl->cidx, 1497 fl->rx_offset, FL_BUF_SIZE(sc, sd->tag_idx))); 1498 nbuf++; 1499 fl->rx_offset = 0; 1500 sd++; 1501 if (__predict_false(++fl->cidx == fl->cap)) { 1502 sd = fl->sdesc; 1503 fl->cidx = 0; 1504 } 1505 } 1506 1507 m0 = find_buf_mbuf(sd->cl); 1508 if (m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR | M_NOFREE)) 1509 goto done; 1510 bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD); 1511 m0->m_len = min(len, FL_BUF_SIZE(sc, sd->tag_idx)); 1512 m_extaddref(m0, sd->cl, roundup2(m0->m_len, fl_pad), 1513 find_buf_refcnt(sd->cl), rxb_free, FL_BUF_ZONE(sc, sd->tag_idx), 1514 sd->cl); 1515 m0->m_pkthdr.len = len; 1516 1517 fl->rx_offset = roundup2(m0->m_len, fl_pad); 1518 fl->rx_offset = roundup2(fl->rx_offset, pack_boundary); 1519 if (fl->rx_offset >= FL_BUF_SIZE(sc, sd->tag_idx)) { 1520 fl->rx_offset = 0; 1521 nbuf++; 1522 sd++; 1523 if (__predict_false(++fl->cidx == fl->cap)) { 1524 sd = fl->sdesc; 1525 fl->cidx = 0; 1526 } 1527 } 1528 1529 m = m0; 1530 len -= m->m_len; 1531 1532 while (len > 0) { 1533 m->m_next = find_buf_mbuf(sd->cl); 1534 m = m->m_next; 1535 1536 bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, 1537 BUS_DMASYNC_POSTREAD); 1538 1539 /* m_init for !M_PKTHDR can't fail so don't bother */ 1540 m_init(m, NULL, 0, M_NOWAIT, MT_DATA, M_NOFREE); 1541 m->m_len = min(len, FL_BUF_SIZE(sc, sd->tag_idx)); 1542 m_extaddref(m, sd->cl, roundup2(m->m_len, fl_pad), 1543 find_buf_refcnt(sd->cl), rxb_free, 1544 FL_BUF_ZONE(sc, sd->tag_idx), sd->cl); 1545 1546 fl->rx_offset = roundup2(m->m_len, fl_pad); 1547 fl->rx_offset = roundup2(fl->rx_offset, pack_boundary); 1548 if (fl->rx_offset >= FL_BUF_SIZE(sc, sd->tag_idx)) { 1549 fl->rx_offset = 0; 1550 nbuf++; 1551 sd++; 1552 if (__predict_false(++fl->cidx == fl->cap)) { 1553 sd = fl->sdesc; 1554 fl->cidx = 0; 1555 } 1556 } 1557 1558 len -= m->m_len; 1559 } 1560 done: 1561 (*fl_bufs_used) += nbuf; 1562 return (m0); 1563 } 1564 1565 static struct mbuf * 1566 get_fl_payload2(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf, 1567 int *fl_bufs_used) 1568 { 1569 struct mbuf *m0, *m; 1570 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1571 unsigned int nbuf, len; 1572 1573 /* 1574 * No assertion for the fl lock because we don't need it. This routine 1575 * is called only from the rx interrupt handler and it only updates 1576 * fl->cidx. (Contrast that with fl->pidx/fl->needed which could be 1577 * updated in the rx interrupt handler or the starvation helper routine. 1578 * That's why code that manipulates fl->pidx/fl->needed needs the fl 1579 * lock but this routine does not). 1580 */ 1581 1582 KASSERT((fl->flags & FL_BUF_PACKING) == 0, 1583 ("%s: buffer packing enabled for fl %p", __func__, fl)); 1584 if (__predict_false((len_newbuf & F_RSPD_NEWBUF) == 0)) 1585 panic("%s: cannot handle packed frames", __func__); 1586 len = G_RSPD_LEN(len_newbuf); 1587 1588 /* 1589 * We never want to run out of mbufs in between a frame when a frame 1590 * spans multiple fl buffers. If the fl's mbuf stash isn't full and 1591 * can't be filled up to the brim then fail early. 1592 */ 1593 if (len > FL_BUF_SIZE(sc, sd->tag_idx) && fill_mbuf_stash(fl) != 0) 1594 return (NULL); 1595 1596 m0 = get_mbuf_from_stash(fl); 1597 if (m0 == NULL || 1598 m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR) != 0) { 1599 return_mbuf_to_stash(fl, m0); 1600 return (NULL); 1601 } 1602 1603 bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD); 1604 1605 if (sc->sc_do_rxcopy && (len < RX_COPY_THRESHOLD)) { 1606 #ifdef T4_PKT_TIMESTAMP 1607 /* Leave room for a timestamp */ 1608 m0->m_data += 8; 1609 #endif 1610 /* copy data to mbuf, buffer will be recycled */ 1611 bcopy(sd->cl, mtod(m0, caddr_t), len); 1612 m0->m_len = len; 1613 } else { 1614 bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); 1615 m_cljset(m0, sd->cl, FL_BUF_TYPE(sc, sd->tag_idx)); 1616 sd->cl = NULL; /* consumed */ 1617 m0->m_len = min(len, FL_BUF_SIZE(sc, sd->tag_idx)); 1618 } 1619 m0->m_pkthdr.len = len; 1620 1621 sd++; 1622 if (__predict_false(++fl->cidx == fl->cap)) { 1623 sd = fl->sdesc; 1624 fl->cidx = 0; 1625 } 1626 1627 m = m0; 1628 len -= m->m_len; 1629 nbuf = 1; /* # of fl buffers used */ 1630 1631 while (len > 0) { 1632 /* Can't fail, we checked earlier that the stash was full. */ 1633 m->m_next = get_mbuf_from_stash(fl); 1634 m = m->m_next; 1635 1636 bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, 1637 BUS_DMASYNC_POSTREAD); 1638 1639 /* m_init for !M_PKTHDR can't fail so don't bother */ 1640 m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0); 1641 if (len <= MLEN) { 1642 bcopy(sd->cl, mtod(m, caddr_t), len); 1643 m->m_len = len; 1644 } else { 1645 bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); 1646 m_cljset(m, sd->cl, FL_BUF_TYPE(sc, sd->tag_idx)); 1647 sd->cl = NULL; /* consumed */ 1648 m->m_len = min(len, FL_BUF_SIZE(sc, sd->tag_idx)); 1649 } 1650 1651 sd++; 1652 if (__predict_false(++fl->cidx == fl->cap)) { 1653 sd = fl->sdesc; 1654 fl->cidx = 0; 1655 } 1656 1657 len -= m->m_len; 1658 nbuf++; 1659 } 1660 1661 (*fl_bufs_used) += nbuf; 1662 1663 return (m0); 1664 } 1665 1666 static int 1667 t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) 1668 { 1669 struct sge_rxq *rxq = iq_to_rxq(iq); 1670 struct ifnet *ifp = rxq->ifp; 1671 const struct cpl_rx_pkt *cpl = (const void *)(rss + 1); 1672 #if defined(INET) || defined(INET6) 1673 struct lro_ctrl *lro = &rxq->lro; 1674 #endif 1675 1676 KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__, 1677 rss->opcode)); 1678 1679 m0->m_pkthdr.len -= fl_pktshift; 1680 m0->m_len -= fl_pktshift; 1681 m0->m_data += fl_pktshift; 1682 1683 m0->m_pkthdr.rcvif = ifp; 1684 m0->m_flags |= M_FLOWID; 1685 m0->m_pkthdr.flowid = be32toh(rss->hash_val); 1686 1687 if (cpl->csum_calc && !cpl->err_vec) { 1688 if (ifp->if_capenable & IFCAP_RXCSUM && 1689 cpl->l2info & htobe32(F_RXF_IP)) { 1690 m0->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | 1691 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1692 rxq->rxcsum++; 1693 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && 1694 cpl->l2info & htobe32(F_RXF_IP6)) { 1695 m0->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 | 1696 CSUM_PSEUDO_HDR); 1697 rxq->rxcsum++; 1698 } 1699 1700 if (__predict_false(cpl->ip_frag)) 1701 m0->m_pkthdr.csum_data = be16toh(cpl->csum); 1702 else 1703 m0->m_pkthdr.csum_data = 0xffff; 1704 } 1705 1706 if (cpl->vlan_ex) { 1707 m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); 1708 m0->m_flags |= M_VLANTAG; 1709 rxq->vlan_extraction++; 1710 } 1711 1712 #if defined(INET) || defined(INET6) 1713 if (cpl->l2info & htobe32(F_RXF_LRO) && 1714 iq->flags & IQ_LRO_ENABLED && 1715 tcp_lro_rx(lro, m0, 0) == 0) { 1716 /* queued for LRO */ 1717 } else 1718 #endif 1719 ifp->if_input(ifp, m0); 1720 1721 return (0); 1722 } 1723 1724 /* 1725 * Doesn't fail. Holds on to work requests it can't send right away. 1726 */ 1727 void 1728 t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr) 1729 { 1730 struct sge_eq *eq = &wrq->eq; 1731 int can_reclaim; 1732 caddr_t dst; 1733 1734 TXQ_LOCK_ASSERT_OWNED(wrq); 1735 #ifdef TCP_OFFLOAD 1736 KASSERT((eq->flags & EQ_TYPEMASK) == EQ_OFLD || 1737 (eq->flags & EQ_TYPEMASK) == EQ_CTRL, 1738 ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK)); 1739 #else 1740 KASSERT((eq->flags & EQ_TYPEMASK) == EQ_CTRL, 1741 ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK)); 1742 #endif 1743 1744 if (__predict_true(wr != NULL)) 1745 STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link); 1746 1747 can_reclaim = reclaimable(eq); 1748 if (__predict_false(eq->flags & EQ_STALLED)) { 1749 if (can_reclaim < tx_resume_threshold(eq)) 1750 return; 1751 eq->flags &= ~EQ_STALLED; 1752 eq->unstalled++; 1753 } 1754 eq->cidx += can_reclaim; 1755 eq->avail += can_reclaim; 1756 if (__predict_false(eq->cidx >= eq->cap)) 1757 eq->cidx -= eq->cap; 1758 1759 while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL) { 1760 int ndesc; 1761 1762 if (__predict_false(wr->wr_len < 0 || 1763 wr->wr_len > SGE_MAX_WR_LEN || (wr->wr_len & 0x7))) { 1764 1765 #ifdef INVARIANTS 1766 panic("%s: work request with length %d", __func__, 1767 wr->wr_len); 1768 #endif 1769 #ifdef KDB 1770 kdb_backtrace(); 1771 #endif 1772 log(LOG_ERR, "%s: %s work request with length %d", 1773 device_get_nameunit(sc->dev), __func__, wr->wr_len); 1774 STAILQ_REMOVE_HEAD(&wrq->wr_list, link); 1775 free_wrqe(wr); 1776 continue; 1777 } 1778 1779 ndesc = howmany(wr->wr_len, EQ_ESIZE); 1780 if (eq->avail < ndesc) { 1781 wrq->no_desc++; 1782 break; 1783 } 1784 1785 dst = (void *)&eq->desc[eq->pidx]; 1786 copy_to_txd(eq, wrtod(wr), &dst, wr->wr_len); 1787 1788 eq->pidx += ndesc; 1789 eq->avail -= ndesc; 1790 if (__predict_false(eq->pidx >= eq->cap)) 1791 eq->pidx -= eq->cap; 1792 1793 eq->pending += ndesc; 1794 if (eq->pending >= 8) 1795 ring_eq_db(sc, eq); 1796 1797 wrq->tx_wrs++; 1798 STAILQ_REMOVE_HEAD(&wrq->wr_list, link); 1799 free_wrqe(wr); 1800 1801 if (eq->avail < 8) { 1802 can_reclaim = reclaimable(eq); 1803 eq->cidx += can_reclaim; 1804 eq->avail += can_reclaim; 1805 if (__predict_false(eq->cidx >= eq->cap)) 1806 eq->cidx -= eq->cap; 1807 } 1808 } 1809 1810 if (eq->pending) 1811 ring_eq_db(sc, eq); 1812 1813 if (wr != NULL) { 1814 eq->flags |= EQ_STALLED; 1815 if (callout_pending(&eq->tx_callout) == 0) 1816 callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq); 1817 } 1818 } 1819 1820 /* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */ 1821 #define TXPKTS_PKT_HDR ((\ 1822 sizeof(struct ulp_txpkt) + \ 1823 sizeof(struct ulptx_idata) + \ 1824 sizeof(struct cpl_tx_pkt_core) \ 1825 ) / 8) 1826 1827 /* Header of a coalesced tx WR, before SGL of first packet (in flits) */ 1828 #define TXPKTS_WR_HDR (\ 1829 sizeof(struct fw_eth_tx_pkts_wr) / 8 + \ 1830 TXPKTS_PKT_HDR) 1831 1832 /* Header of a tx WR, before SGL of first packet (in flits) */ 1833 #define TXPKT_WR_HDR ((\ 1834 sizeof(struct fw_eth_tx_pkt_wr) + \ 1835 sizeof(struct cpl_tx_pkt_core) \ 1836 ) / 8 ) 1837 1838 /* Header of a tx LSO WR, before SGL of first packet (in flits) */ 1839 #define TXPKT_LSO_WR_HDR ((\ 1840 sizeof(struct fw_eth_tx_pkt_wr) + \ 1841 sizeof(struct cpl_tx_pkt_lso_core) + \ 1842 sizeof(struct cpl_tx_pkt_core) \ 1843 ) / 8 ) 1844 1845 int 1846 t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m) 1847 { 1848 struct port_info *pi = (void *)ifp->if_softc; 1849 struct adapter *sc = pi->adapter; 1850 struct sge_eq *eq = &txq->eq; 1851 struct buf_ring *br = txq->br; 1852 struct mbuf *next; 1853 int rc, coalescing, can_reclaim; 1854 struct txpkts txpkts; 1855 struct sgl sgl; 1856 1857 TXQ_LOCK_ASSERT_OWNED(txq); 1858 KASSERT(m, ("%s: called with nothing to do.", __func__)); 1859 KASSERT((eq->flags & EQ_TYPEMASK) == EQ_ETH, 1860 ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK)); 1861 1862 prefetch(&eq->desc[eq->pidx]); 1863 prefetch(&txq->sdesc[eq->pidx]); 1864 1865 txpkts.npkt = 0;/* indicates there's nothing in txpkts */ 1866 coalescing = 0; 1867 1868 can_reclaim = reclaimable(eq); 1869 if (__predict_false(eq->flags & EQ_STALLED)) { 1870 if (can_reclaim < tx_resume_threshold(eq)) { 1871 txq->m = m; 1872 return (0); 1873 } 1874 eq->flags &= ~EQ_STALLED; 1875 eq->unstalled++; 1876 } 1877 1878 if (__predict_false(eq->flags & EQ_DOOMED)) { 1879 m_freem(m); 1880 while ((m = buf_ring_dequeue_sc(txq->br)) != NULL) 1881 m_freem(m); 1882 return (ENETDOWN); 1883 } 1884 1885 if (eq->avail < 8 && can_reclaim) 1886 reclaim_tx_descs(txq, can_reclaim, 32); 1887 1888 for (; m; m = next ? next : drbr_dequeue(ifp, br)) { 1889 1890 if (eq->avail < 8) 1891 break; 1892 1893 next = m->m_nextpkt; 1894 m->m_nextpkt = NULL; 1895 1896 if (next || buf_ring_peek(br)) 1897 coalescing = 1; 1898 1899 rc = get_pkt_sgl(txq, &m, &sgl, coalescing); 1900 if (rc != 0) { 1901 if (rc == ENOMEM) { 1902 1903 /* Short of resources, suspend tx */ 1904 1905 m->m_nextpkt = next; 1906 break; 1907 } 1908 1909 /* 1910 * Unrecoverable error for this packet, throw it away 1911 * and move on to the next. get_pkt_sgl may already 1912 * have freed m (it will be NULL in that case and the 1913 * m_freem here is still safe). 1914 */ 1915 1916 m_freem(m); 1917 continue; 1918 } 1919 1920 if (coalescing && 1921 add_to_txpkts(pi, txq, &txpkts, m, &sgl) == 0) { 1922 1923 /* Successfully absorbed into txpkts */ 1924 1925 write_ulp_cpl_sgl(pi, txq, &txpkts, m, &sgl); 1926 goto doorbell; 1927 } 1928 1929 /* 1930 * We weren't coalescing to begin with, or current frame could 1931 * not be coalesced (add_to_txpkts flushes txpkts if a frame 1932 * given to it can't be coalesced). Either way there should be 1933 * nothing in txpkts. 1934 */ 1935 KASSERT(txpkts.npkt == 0, 1936 ("%s: txpkts not empty: %d", __func__, txpkts.npkt)); 1937 1938 /* We're sending out individual packets now */ 1939 coalescing = 0; 1940 1941 if (eq->avail < 8) 1942 reclaim_tx_descs(txq, 0, 8); 1943 rc = write_txpkt_wr(pi, txq, m, &sgl); 1944 if (rc != 0) { 1945 1946 /* Short of hardware descriptors, suspend tx */ 1947 1948 /* 1949 * This is an unlikely but expensive failure. We've 1950 * done all the hard work (DMA mappings etc.) and now we 1951 * can't send out the packet. What's worse, we have to 1952 * spend even more time freeing up everything in sgl. 1953 */ 1954 txq->no_desc++; 1955 free_pkt_sgl(txq, &sgl); 1956 1957 m->m_nextpkt = next; 1958 break; 1959 } 1960 1961 ETHER_BPF_MTAP(ifp, m); 1962 if (sgl.nsegs == 0) 1963 m_freem(m); 1964 doorbell: 1965 if (eq->pending >= 8) 1966 ring_eq_db(sc, eq); 1967 1968 can_reclaim = reclaimable(eq); 1969 if (can_reclaim >= 32) 1970 reclaim_tx_descs(txq, can_reclaim, 64); 1971 } 1972 1973 if (txpkts.npkt > 0) 1974 write_txpkts_wr(txq, &txpkts); 1975 1976 /* 1977 * m not NULL means there was an error but we haven't thrown it away. 1978 * This can happen when we're short of tx descriptors (no_desc) or maybe 1979 * even DMA maps (no_dmamap). Either way, a credit flush and reclaim 1980 * will get things going again. 1981 */ 1982 if (m && !(eq->flags & EQ_CRFLUSHED)) { 1983 struct tx_sdesc *txsd = &txq->sdesc[eq->pidx]; 1984 1985 /* 1986 * If EQ_CRFLUSHED is not set then we know we have at least one 1987 * available descriptor because any WR that reduces eq->avail to 1988 * 0 also sets EQ_CRFLUSHED. 1989 */ 1990 KASSERT(eq->avail > 0, ("%s: no space for eqflush.", __func__)); 1991 1992 txsd->desc_used = 1; 1993 txsd->credits = 0; 1994 write_eqflush_wr(eq); 1995 } 1996 txq->m = m; 1997 1998 if (eq->pending) 1999 ring_eq_db(sc, eq); 2000 2001 reclaim_tx_descs(txq, 0, 128); 2002 2003 if (eq->flags & EQ_STALLED && callout_pending(&eq->tx_callout) == 0) 2004 callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq); 2005 2006 return (0); 2007 } 2008 2009 void 2010 t4_update_fl_bufsize(struct ifnet *ifp) 2011 { 2012 struct port_info *pi = ifp->if_softc; 2013 struct adapter *sc = pi->adapter; 2014 struct sge_rxq *rxq; 2015 #ifdef TCP_OFFLOAD 2016 struct sge_ofld_rxq *ofld_rxq; 2017 #endif 2018 struct sge_fl *fl; 2019 int i, bufsize; 2020 2021 bufsize = mtu_to_bufsize(ifp->if_mtu); 2022 for_each_rxq(pi, i, rxq) { 2023 fl = &rxq->fl; 2024 2025 FL_LOCK(fl); 2026 set_fl_tag_idx(sc, fl, bufsize); 2027 FL_UNLOCK(fl); 2028 } 2029 #ifdef TCP_OFFLOAD 2030 bufsize = mtu_to_bufsize_toe(pi->adapter, ifp->if_mtu); 2031 for_each_ofld_rxq(pi, i, ofld_rxq) { 2032 fl = &ofld_rxq->fl; 2033 2034 FL_LOCK(fl); 2035 set_fl_tag_idx(sc, fl, bufsize); 2036 FL_UNLOCK(fl); 2037 } 2038 #endif 2039 } 2040 2041 int 2042 can_resume_tx(struct sge_eq *eq) 2043 { 2044 return (reclaimable(eq) >= tx_resume_threshold(eq)); 2045 } 2046 2047 static inline void 2048 init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, 2049 int qsize, int esize) 2050 { 2051 KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, 2052 ("%s: bad tmr_idx %d", __func__, tmr_idx)); 2053 KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */ 2054 ("%s: bad pktc_idx %d", __func__, pktc_idx)); 2055 2056 iq->flags = 0; 2057 iq->adapter = sc; 2058 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx); 2059 iq->intr_pktc_idx = SGE_NCOUNTERS - 1; 2060 if (pktc_idx >= 0) { 2061 iq->intr_params |= F_QINTR_CNT_EN; 2062 iq->intr_pktc_idx = pktc_idx; 2063 } 2064 iq->qsize = roundup2(qsize, 16); /* See FW_IQ_CMD/iqsize */ 2065 iq->esize = max(esize, 16); /* See FW_IQ_CMD/iqesize */ 2066 } 2067 2068 static inline void 2069 init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int bufsize, int pack, 2070 char *name) 2071 { 2072 2073 fl->qsize = qsize; 2074 strlcpy(fl->lockname, name, sizeof(fl->lockname)); 2075 if (pack) 2076 fl->flags |= FL_BUF_PACKING; 2077 set_fl_tag_idx(sc, fl, bufsize); 2078 } 2079 2080 static inline void 2081 init_eq(struct sge_eq *eq, int eqtype, int qsize, uint8_t tx_chan, 2082 uint16_t iqid, char *name) 2083 { 2084 KASSERT(tx_chan < NCHAN, ("%s: bad tx channel %d", __func__, tx_chan)); 2085 KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype)); 2086 2087 eq->flags = eqtype & EQ_TYPEMASK; 2088 eq->tx_chan = tx_chan; 2089 eq->iqid = iqid; 2090 eq->qsize = qsize; 2091 strlcpy(eq->lockname, name, sizeof(eq->lockname)); 2092 2093 TASK_INIT(&eq->tx_task, 0, t4_tx_task, eq); 2094 callout_init(&eq->tx_callout, CALLOUT_MPSAFE); 2095 } 2096 2097 static int 2098 alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag, 2099 bus_dmamap_t *map, bus_addr_t *pa, void **va) 2100 { 2101 int rc; 2102 2103 rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR, 2104 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag); 2105 if (rc != 0) { 2106 device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc); 2107 goto done; 2108 } 2109 2110 rc = bus_dmamem_alloc(*tag, va, 2111 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map); 2112 if (rc != 0) { 2113 device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc); 2114 goto done; 2115 } 2116 2117 rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0); 2118 if (rc != 0) { 2119 device_printf(sc->dev, "cannot load DMA map: %d\n", rc); 2120 goto done; 2121 } 2122 done: 2123 if (rc) 2124 free_ring(sc, *tag, *map, *pa, *va); 2125 2126 return (rc); 2127 } 2128 2129 static int 2130 free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, 2131 bus_addr_t pa, void *va) 2132 { 2133 if (pa) 2134 bus_dmamap_unload(tag, map); 2135 if (va) 2136 bus_dmamem_free(tag, va, map); 2137 if (tag) 2138 bus_dma_tag_destroy(tag); 2139 2140 return (0); 2141 } 2142 2143 /* 2144 * Allocates the ring for an ingress queue and an optional freelist. If the 2145 * freelist is specified it will be allocated and then associated with the 2146 * ingress queue. 2147 * 2148 * Returns errno on failure. Resources allocated up to that point may still be 2149 * allocated. Caller is responsible for cleanup in case this function fails. 2150 * 2151 * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then 2152 * the intr_idx specifies the vector, starting from 0. Otherwise it specifies 2153 * the abs_id of the ingress queue to which its interrupts should be forwarded. 2154 */ 2155 static int 2156 alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, 2157 int intr_idx, int cong) 2158 { 2159 int rc, i, cntxt_id; 2160 size_t len; 2161 struct fw_iq_cmd c; 2162 struct adapter *sc = iq->adapter; 2163 __be32 v = 0; 2164 2165 len = iq->qsize * iq->esize; 2166 rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, 2167 (void **)&iq->desc); 2168 if (rc != 0) 2169 return (rc); 2170 2171 bzero(&c, sizeof(c)); 2172 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | 2173 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | 2174 V_FW_IQ_CMD_VFN(0)); 2175 2176 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | 2177 FW_LEN16(c)); 2178 2179 /* Special handling for firmware event queue */ 2180 if (iq == &sc->sge.fwq) 2181 v |= F_FW_IQ_CMD_IQASYNCH; 2182 2183 if (iq->flags & IQ_INTR) { 2184 KASSERT(intr_idx < sc->intr_count, 2185 ("%s: invalid direct intr_idx %d", __func__, intr_idx)); 2186 } else 2187 v |= F_FW_IQ_CMD_IQANDST; 2188 v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); 2189 2190 c.type_to_iqandstindex = htobe32(v | 2191 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | 2192 V_FW_IQ_CMD_VIID(pi->viid) | 2193 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); 2194 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | 2195 F_FW_IQ_CMD_IQGTSMODE | 2196 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | 2197 V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4)); 2198 c.iqsize = htobe16(iq->qsize); 2199 c.iqaddr = htobe64(iq->ba); 2200 if (cong >= 0) 2201 c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN); 2202 2203 if (fl) { 2204 mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); 2205 2206 for (i = 0; i < FL_BUF_SIZES(sc); i++) { 2207 2208 /* 2209 * A freelist buffer must be 16 byte aligned as the SGE 2210 * uses the low 4 bits of the bus addr to figure out the 2211 * buffer size. 2212 */ 2213 rc = bus_dma_tag_create(sc->dmat, 16, 0, 2214 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 2215 FL_BUF_SIZE(sc, i), 1, FL_BUF_SIZE(sc, i), 2216 BUS_DMA_ALLOCNOW, NULL, NULL, &fl->tag[i]); 2217 if (rc != 0) { 2218 device_printf(sc->dev, 2219 "failed to create fl DMA tag[%d]: %d\n", 2220 i, rc); 2221 return (rc); 2222 } 2223 } 2224 len = fl->qsize * RX_FL_ESIZE; 2225 rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, 2226 &fl->ba, (void **)&fl->desc); 2227 if (rc) 2228 return (rc); 2229 2230 /* Allocate space for one software descriptor per buffer. */ 2231 fl->cap = (fl->qsize - spg_len / RX_FL_ESIZE) * 8; 2232 rc = alloc_fl_sdesc(fl); 2233 if (rc != 0) { 2234 device_printf(sc->dev, 2235 "failed to setup fl software descriptors: %d\n", 2236 rc); 2237 return (rc); 2238 } 2239 fl->needed = fl->cap; 2240 fl->lowat = fl->flags & FL_BUF_PACKING ? 2241 roundup2(sc->sge.fl_starve_threshold2, 8) : 2242 roundup2(sc->sge.fl_starve_threshold, 8); 2243 2244 c.iqns_to_fl0congen |= 2245 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | 2246 F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | 2247 (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) | 2248 (fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN : 2249 0)); 2250 if (cong >= 0) { 2251 c.iqns_to_fl0congen |= 2252 htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) | 2253 F_FW_IQ_CMD_FL0CONGCIF | 2254 F_FW_IQ_CMD_FL0CONGEN); 2255 } 2256 c.fl0dcaen_to_fl0cidxfthresh = 2257 htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) | 2258 V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B)); 2259 c.fl0size = htobe16(fl->qsize); 2260 c.fl0addr = htobe64(fl->ba); 2261 } 2262 2263 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 2264 if (rc != 0) { 2265 device_printf(sc->dev, 2266 "failed to create ingress queue: %d\n", rc); 2267 return (rc); 2268 } 2269 2270 iq->cdesc = iq->desc; 2271 iq->cidx = 0; 2272 iq->gen = 1; 2273 iq->intr_next = iq->intr_params; 2274 iq->cntxt_id = be16toh(c.iqid); 2275 iq->abs_id = be16toh(c.physiqid); 2276 iq->flags |= IQ_ALLOCATED; 2277 2278 cntxt_id = iq->cntxt_id - sc->sge.iq_start; 2279 if (cntxt_id >= sc->sge.niq) { 2280 panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, 2281 cntxt_id, sc->sge.niq - 1); 2282 } 2283 sc->sge.iqmap[cntxt_id] = iq; 2284 2285 if (fl) { 2286 fl->cntxt_id = be16toh(c.fl0id); 2287 fl->pidx = fl->cidx = 0; 2288 2289 cntxt_id = fl->cntxt_id - sc->sge.eq_start; 2290 if (cntxt_id >= sc->sge.neq) { 2291 panic("%s: fl->cntxt_id (%d) more than the max (%d)", 2292 __func__, cntxt_id, sc->sge.neq - 1); 2293 } 2294 sc->sge.eqmap[cntxt_id] = (void *)fl; 2295 2296 FL_LOCK(fl); 2297 /* Enough to make sure the SGE doesn't think it's starved */ 2298 refill_fl(sc, fl, fl->lowat); 2299 FL_UNLOCK(fl); 2300 2301 iq->flags |= IQ_HAS_FL; 2302 } 2303 2304 if (is_t5(sc) && cong >= 0) { 2305 uint32_t param, val; 2306 2307 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 2308 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | 2309 V_FW_PARAMS_PARAM_YZ(iq->cntxt_id); 2310 if (cong == 0) 2311 val = 1 << 19; 2312 else { 2313 val = 2 << 19; 2314 for (i = 0; i < 4; i++) { 2315 if (cong & (1 << i)) 2316 val |= 1 << (i << 2); 2317 } 2318 } 2319 2320 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); 2321 if (rc != 0) { 2322 /* report error but carry on */ 2323 device_printf(sc->dev, 2324 "failed to set congestion manager context for " 2325 "ingress queue %d: %d\n", iq->cntxt_id, rc); 2326 } 2327 } 2328 2329 /* Enable IQ interrupts */ 2330 atomic_store_rel_int(&iq->state, IQS_IDLE); 2331 t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) | 2332 V_INGRESSQID(iq->cntxt_id)); 2333 2334 return (0); 2335 } 2336 2337 static int 2338 free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl) 2339 { 2340 int i, rc; 2341 struct adapter *sc = iq->adapter; 2342 device_t dev; 2343 2344 if (sc == NULL) 2345 return (0); /* nothing to do */ 2346 2347 dev = pi ? pi->dev : sc->dev; 2348 2349 if (iq->flags & IQ_ALLOCATED) { 2350 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, 2351 FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id, 2352 fl ? fl->cntxt_id : 0xffff, 0xffff); 2353 if (rc != 0) { 2354 device_printf(dev, 2355 "failed to free queue %p: %d\n", iq, rc); 2356 return (rc); 2357 } 2358 iq->flags &= ~IQ_ALLOCATED; 2359 } 2360 2361 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc); 2362 2363 bzero(iq, sizeof(*iq)); 2364 2365 if (fl) { 2366 free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, 2367 fl->desc); 2368 2369 if (fl->sdesc) 2370 free_fl_sdesc(sc, fl); 2371 2372 for (i = 0; i < nitems(fl->mstash); i++) { 2373 struct mbuf *m = fl->mstash[i]; 2374 2375 if (m != NULL) { 2376 m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0); 2377 m_free(m); 2378 } 2379 } 2380 2381 if (mtx_initialized(&fl->fl_lock)) 2382 mtx_destroy(&fl->fl_lock); 2383 2384 for (i = 0; i < FL_BUF_SIZES(sc); i++) { 2385 if (fl->tag[i]) 2386 bus_dma_tag_destroy(fl->tag[i]); 2387 } 2388 2389 bzero(fl, sizeof(*fl)); 2390 } 2391 2392 return (0); 2393 } 2394 2395 static int 2396 alloc_fwq(struct adapter *sc) 2397 { 2398 int rc, intr_idx; 2399 struct sge_iq *fwq = &sc->sge.fwq; 2400 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); 2401 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 2402 2403 init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE); 2404 fwq->flags |= IQ_INTR; /* always */ 2405 intr_idx = sc->intr_count > 1 ? 1 : 0; 2406 rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1); 2407 if (rc != 0) { 2408 device_printf(sc->dev, 2409 "failed to create firmware event queue: %d\n", rc); 2410 return (rc); 2411 } 2412 2413 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD, 2414 NULL, "firmware event queue"); 2415 children = SYSCTL_CHILDREN(oid); 2416 2417 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "abs_id", 2418 CTLTYPE_INT | CTLFLAG_RD, &fwq->abs_id, 0, sysctl_uint16, "I", 2419 "absolute id of the queue"); 2420 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cntxt_id", 2421 CTLTYPE_INT | CTLFLAG_RD, &fwq->cntxt_id, 0, sysctl_uint16, "I", 2422 "SGE context id of the queue"); 2423 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx", 2424 CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I", 2425 "consumer index"); 2426 2427 return (0); 2428 } 2429 2430 static int 2431 free_fwq(struct adapter *sc) 2432 { 2433 return free_iq_fl(NULL, &sc->sge.fwq, NULL); 2434 } 2435 2436 static int 2437 alloc_mgmtq(struct adapter *sc) 2438 { 2439 int rc; 2440 struct sge_wrq *mgmtq = &sc->sge.mgmtq; 2441 char name[16]; 2442 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); 2443 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 2444 2445 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "mgmtq", CTLFLAG_RD, 2446 NULL, "management queue"); 2447 2448 snprintf(name, sizeof(name), "%s mgmtq", device_get_nameunit(sc->dev)); 2449 init_eq(&mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan, 2450 sc->sge.fwq.cntxt_id, name); 2451 rc = alloc_wrq(sc, NULL, mgmtq, oid); 2452 if (rc != 0) { 2453 device_printf(sc->dev, 2454 "failed to create management queue: %d\n", rc); 2455 return (rc); 2456 } 2457 2458 return (0); 2459 } 2460 2461 static int 2462 free_mgmtq(struct adapter *sc) 2463 { 2464 2465 return free_wrq(sc, &sc->sge.mgmtq); 2466 } 2467 2468 static inline int 2469 tnl_cong(struct port_info *pi) 2470 { 2471 2472 if (cong_drop == -1) 2473 return (-1); 2474 else if (cong_drop == 1) 2475 return (0); 2476 else 2477 return (pi->rx_chan_map); 2478 } 2479 2480 static int 2481 alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx, 2482 struct sysctl_oid *oid) 2483 { 2484 int rc; 2485 struct sysctl_oid_list *children; 2486 char name[16]; 2487 2488 rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, tnl_cong(pi)); 2489 if (rc != 0) 2490 return (rc); 2491 2492 FL_LOCK(&rxq->fl); 2493 refill_fl(pi->adapter, &rxq->fl, rxq->fl.needed / 8); 2494 FL_UNLOCK(&rxq->fl); 2495 2496 #if defined(INET) || defined(INET6) 2497 rc = tcp_lro_init(&rxq->lro); 2498 if (rc != 0) 2499 return (rc); 2500 rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */ 2501 2502 if (pi->ifp->if_capenable & IFCAP_LRO) 2503 rxq->iq.flags |= IQ_LRO_ENABLED; 2504 #endif 2505 rxq->ifp = pi->ifp; 2506 2507 children = SYSCTL_CHILDREN(oid); 2508 2509 snprintf(name, sizeof(name), "%d", idx); 2510 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 2511 NULL, "rx queue"); 2512 children = SYSCTL_CHILDREN(oid); 2513 2514 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id", 2515 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_uint16, "I", 2516 "absolute id of the queue"); 2517 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", 2518 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cntxt_id, 0, sysctl_uint16, "I", 2519 "SGE context id of the queue"); 2520 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx", 2521 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cidx, 0, sysctl_uint16, "I", 2522 "consumer index"); 2523 #if defined(INET) || defined(INET6) 2524 SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, 2525 &rxq->lro.lro_queued, 0, NULL); 2526 SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, 2527 &rxq->lro.lro_flushed, 0, NULL); 2528 #endif 2529 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, 2530 &rxq->rxcsum, "# of times hardware assisted with checksum"); 2531 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_extraction", 2532 CTLFLAG_RD, &rxq->vlan_extraction, 2533 "# of times hardware extracted 802.1Q tag"); 2534 2535 children = SYSCTL_CHILDREN(oid); 2536 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD, 2537 NULL, "freelist"); 2538 children = SYSCTL_CHILDREN(oid); 2539 2540 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", 2541 CTLTYPE_INT | CTLFLAG_RD, &rxq->fl.cntxt_id, 0, sysctl_uint16, "I", 2542 "SGE context id of the queue"); 2543 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, 2544 &rxq->fl.cidx, 0, "consumer index"); 2545 if (rxq->fl.flags & FL_BUF_PACKING) { 2546 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "rx_offset", 2547 CTLFLAG_RD, &rxq->fl.rx_offset, 0, "packing rx offset"); 2548 } 2549 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, 2550 &rxq->fl.pidx, 0, "producer index"); 2551 2552 return (rc); 2553 } 2554 2555 static int 2556 free_rxq(struct port_info *pi, struct sge_rxq *rxq) 2557 { 2558 int rc; 2559 2560 #if defined(INET) || defined(INET6) 2561 if (rxq->lro.ifp) { 2562 tcp_lro_free(&rxq->lro); 2563 rxq->lro.ifp = NULL; 2564 } 2565 #endif 2566 2567 rc = free_iq_fl(pi, &rxq->iq, &rxq->fl); 2568 if (rc == 0) 2569 bzero(rxq, sizeof(*rxq)); 2570 2571 return (rc); 2572 } 2573 2574 #ifdef TCP_OFFLOAD 2575 static int 2576 alloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq, 2577 int intr_idx, int idx, struct sysctl_oid *oid) 2578 { 2579 int rc; 2580 struct sysctl_oid_list *children; 2581 char name[16]; 2582 2583 rc = alloc_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx, 2584 pi->rx_chan_map); 2585 if (rc != 0) 2586 return (rc); 2587 2588 children = SYSCTL_CHILDREN(oid); 2589 2590 snprintf(name, sizeof(name), "%d", idx); 2591 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 2592 NULL, "rx queue"); 2593 children = SYSCTL_CHILDREN(oid); 2594 2595 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id", 2596 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.abs_id, 0, sysctl_uint16, 2597 "I", "absolute id of the queue"); 2598 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", 2599 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cntxt_id, 0, sysctl_uint16, 2600 "I", "SGE context id of the queue"); 2601 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx", 2602 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cidx, 0, sysctl_uint16, "I", 2603 "consumer index"); 2604 2605 children = SYSCTL_CHILDREN(oid); 2606 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD, 2607 NULL, "freelist"); 2608 children = SYSCTL_CHILDREN(oid); 2609 2610 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", 2611 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->fl.cntxt_id, 0, sysctl_uint16, 2612 "I", "SGE context id of the queue"); 2613 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, 2614 &ofld_rxq->fl.cidx, 0, "consumer index"); 2615 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, 2616 &ofld_rxq->fl.pidx, 0, "producer index"); 2617 2618 return (rc); 2619 } 2620 2621 static int 2622 free_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq) 2623 { 2624 int rc; 2625 2626 rc = free_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl); 2627 if (rc == 0) 2628 bzero(ofld_rxq, sizeof(*ofld_rxq)); 2629 2630 return (rc); 2631 } 2632 #endif 2633 2634 static int 2635 ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq) 2636 { 2637 int rc, cntxt_id; 2638 struct fw_eq_ctrl_cmd c; 2639 2640 bzero(&c, sizeof(c)); 2641 2642 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | 2643 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) | 2644 V_FW_EQ_CTRL_CMD_VFN(0)); 2645 c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC | 2646 F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); 2647 c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); /* XXX */ 2648 c.physeqid_pkd = htobe32(0); 2649 c.fetchszm_to_iqid = 2650 htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 2651 V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) | 2652 F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); 2653 c.dcaen_to_eqsize = 2654 htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 2655 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 2656 V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | 2657 V_FW_EQ_CTRL_CMD_EQSIZE(eq->qsize)); 2658 c.eqaddr = htobe64(eq->ba); 2659 2660 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 2661 if (rc != 0) { 2662 device_printf(sc->dev, 2663 "failed to create control queue %d: %d\n", eq->tx_chan, rc); 2664 return (rc); 2665 } 2666 eq->flags |= EQ_ALLOCATED; 2667 2668 eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid)); 2669 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 2670 if (cntxt_id >= sc->sge.neq) 2671 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 2672 cntxt_id, sc->sge.neq - 1); 2673 sc->sge.eqmap[cntxt_id] = eq; 2674 2675 return (rc); 2676 } 2677 2678 static int 2679 eth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) 2680 { 2681 int rc, cntxt_id; 2682 struct fw_eq_eth_cmd c; 2683 2684 bzero(&c, sizeof(c)); 2685 2686 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | 2687 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | 2688 V_FW_EQ_ETH_CMD_VFN(0)); 2689 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | 2690 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); 2691 c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid)); 2692 c.fetchszm_to_iqid = 2693 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 2694 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | 2695 V_FW_EQ_ETH_CMD_IQID(eq->iqid)); 2696 c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 2697 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 2698 V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | 2699 V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize)); 2700 c.eqaddr = htobe64(eq->ba); 2701 2702 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 2703 if (rc != 0) { 2704 device_printf(pi->dev, 2705 "failed to create Ethernet egress queue: %d\n", rc); 2706 return (rc); 2707 } 2708 eq->flags |= EQ_ALLOCATED; 2709 2710 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); 2711 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 2712 if (cntxt_id >= sc->sge.neq) 2713 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 2714 cntxt_id, sc->sge.neq - 1); 2715 sc->sge.eqmap[cntxt_id] = eq; 2716 2717 return (rc); 2718 } 2719 2720 #ifdef TCP_OFFLOAD 2721 static int 2722 ofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) 2723 { 2724 int rc, cntxt_id; 2725 struct fw_eq_ofld_cmd c; 2726 2727 bzero(&c, sizeof(c)); 2728 2729 c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST | 2730 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) | 2731 V_FW_EQ_OFLD_CMD_VFN(0)); 2732 c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC | 2733 F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c)); 2734 c.fetchszm_to_iqid = 2735 htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 2736 V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) | 2737 F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid)); 2738 c.dcaen_to_eqsize = 2739 htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 2740 V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 2741 V_FW_EQ_OFLD_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | 2742 V_FW_EQ_OFLD_CMD_EQSIZE(eq->qsize)); 2743 c.eqaddr = htobe64(eq->ba); 2744 2745 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 2746 if (rc != 0) { 2747 device_printf(pi->dev, 2748 "failed to create egress queue for TCP offload: %d\n", rc); 2749 return (rc); 2750 } 2751 eq->flags |= EQ_ALLOCATED; 2752 2753 eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd)); 2754 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 2755 if (cntxt_id >= sc->sge.neq) 2756 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 2757 cntxt_id, sc->sge.neq - 1); 2758 sc->sge.eqmap[cntxt_id] = eq; 2759 2760 return (rc); 2761 } 2762 #endif 2763 2764 static int 2765 alloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) 2766 { 2767 int rc; 2768 size_t len; 2769 2770 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); 2771 2772 len = eq->qsize * EQ_ESIZE; 2773 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, 2774 &eq->ba, (void **)&eq->desc); 2775 if (rc) 2776 return (rc); 2777 2778 eq->cap = eq->qsize - spg_len / EQ_ESIZE; 2779 eq->spg = (void *)&eq->desc[eq->cap]; 2780 eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */ 2781 eq->pidx = eq->cidx = 0; 2782 eq->doorbells = sc->doorbells; 2783 2784 switch (eq->flags & EQ_TYPEMASK) { 2785 case EQ_CTRL: 2786 rc = ctrl_eq_alloc(sc, eq); 2787 break; 2788 2789 case EQ_ETH: 2790 rc = eth_eq_alloc(sc, pi, eq); 2791 break; 2792 2793 #ifdef TCP_OFFLOAD 2794 case EQ_OFLD: 2795 rc = ofld_eq_alloc(sc, pi, eq); 2796 break; 2797 #endif 2798 2799 default: 2800 panic("%s: invalid eq type %d.", __func__, 2801 eq->flags & EQ_TYPEMASK); 2802 } 2803 if (rc != 0) { 2804 device_printf(sc->dev, 2805 "failed to allocate egress queue(%d): %d", 2806 eq->flags & EQ_TYPEMASK, rc); 2807 } 2808 2809 eq->tx_callout.c_cpu = eq->cntxt_id % mp_ncpus; 2810 2811 if (isset(&eq->doorbells, DOORBELL_UDB) || 2812 isset(&eq->doorbells, DOORBELL_UDBWC) || 2813 isset(&eq->doorbells, DOORBELL_WCWR)) { 2814 uint32_t s_qpp = sc->sge.eq_s_qpp; 2815 uint32_t mask = (1 << s_qpp) - 1; 2816 volatile uint8_t *udb; 2817 2818 udb = sc->udbs_base + UDBS_DB_OFFSET; 2819 udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT; /* pg offset */ 2820 eq->udb_qid = eq->cntxt_id & mask; /* id in page */ 2821 if (eq->udb_qid > PAGE_SIZE / UDBS_SEG_SIZE) 2822 clrbit(&eq->doorbells, DOORBELL_WCWR); 2823 else { 2824 udb += eq->udb_qid << UDBS_SEG_SHIFT; /* seg offset */ 2825 eq->udb_qid = 0; 2826 } 2827 eq->udb = (volatile void *)udb; 2828 } 2829 2830 return (rc); 2831 } 2832 2833 static int 2834 free_eq(struct adapter *sc, struct sge_eq *eq) 2835 { 2836 int rc; 2837 2838 if (eq->flags & EQ_ALLOCATED) { 2839 switch (eq->flags & EQ_TYPEMASK) { 2840 case EQ_CTRL: 2841 rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, 2842 eq->cntxt_id); 2843 break; 2844 2845 case EQ_ETH: 2846 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, 2847 eq->cntxt_id); 2848 break; 2849 2850 #ifdef TCP_OFFLOAD 2851 case EQ_OFLD: 2852 rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0, 2853 eq->cntxt_id); 2854 break; 2855 #endif 2856 2857 default: 2858 panic("%s: invalid eq type %d.", __func__, 2859 eq->flags & EQ_TYPEMASK); 2860 } 2861 if (rc != 0) { 2862 device_printf(sc->dev, 2863 "failed to free egress queue (%d): %d\n", 2864 eq->flags & EQ_TYPEMASK, rc); 2865 return (rc); 2866 } 2867 eq->flags &= ~EQ_ALLOCATED; 2868 } 2869 2870 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); 2871 2872 if (mtx_initialized(&eq->eq_lock)) 2873 mtx_destroy(&eq->eq_lock); 2874 2875 bzero(eq, sizeof(*eq)); 2876 return (0); 2877 } 2878 2879 static int 2880 alloc_wrq(struct adapter *sc, struct port_info *pi, struct sge_wrq *wrq, 2881 struct sysctl_oid *oid) 2882 { 2883 int rc; 2884 struct sysctl_ctx_list *ctx = pi ? &pi->ctx : &sc->ctx; 2885 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 2886 2887 rc = alloc_eq(sc, pi, &wrq->eq); 2888 if (rc) 2889 return (rc); 2890 2891 wrq->adapter = sc; 2892 STAILQ_INIT(&wrq->wr_list); 2893 2894 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 2895 &wrq->eq.cntxt_id, 0, "SGE context id of the queue"); 2896 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", 2897 CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I", 2898 "consumer index"); 2899 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx", 2900 CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I", 2901 "producer index"); 2902 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs", CTLFLAG_RD, 2903 &wrq->tx_wrs, "# of work requests"); 2904 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD, 2905 &wrq->no_desc, 0, 2906 "# of times queue ran out of hardware descriptors"); 2907 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD, 2908 &wrq->eq.unstalled, 0, "# of times queue recovered after stall"); 2909 2910 return (rc); 2911 } 2912 2913 static int 2914 free_wrq(struct adapter *sc, struct sge_wrq *wrq) 2915 { 2916 int rc; 2917 2918 rc = free_eq(sc, &wrq->eq); 2919 if (rc) 2920 return (rc); 2921 2922 bzero(wrq, sizeof(*wrq)); 2923 return (0); 2924 } 2925 2926 static int 2927 alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx, 2928 struct sysctl_oid *oid) 2929 { 2930 int rc; 2931 struct adapter *sc = pi->adapter; 2932 struct sge_eq *eq = &txq->eq; 2933 char name[16]; 2934 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 2935 2936 rc = alloc_eq(sc, pi, eq); 2937 if (rc) 2938 return (rc); 2939 2940 txq->ifp = pi->ifp; 2941 2942 txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE, 2943 M_ZERO | M_WAITOK); 2944 txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock); 2945 2946 rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR, 2947 BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS, 2948 BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &txq->tx_tag); 2949 if (rc != 0) { 2950 device_printf(sc->dev, 2951 "failed to create tx DMA tag: %d\n", rc); 2952 return (rc); 2953 } 2954 2955 /* 2956 * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE 2957 * limit for any WR). txq->no_dmamap events shouldn't occur if maps is 2958 * sized for the worst case. 2959 */ 2960 rc = t4_alloc_tx_maps(&txq->txmaps, txq->tx_tag, eq->qsize * 10 / 8, 2961 M_WAITOK); 2962 if (rc != 0) { 2963 device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc); 2964 return (rc); 2965 } 2966 2967 snprintf(name, sizeof(name), "%d", idx); 2968 oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 2969 NULL, "tx queue"); 2970 children = SYSCTL_CHILDREN(oid); 2971 2972 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 2973 &eq->cntxt_id, 0, "SGE context id of the queue"); 2974 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx", 2975 CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I", 2976 "consumer index"); 2977 SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "pidx", 2978 CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I", 2979 "producer index"); 2980 2981 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, 2982 &txq->txcsum, "# of times hardware assisted with checksum"); 2983 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_insertion", 2984 CTLFLAG_RD, &txq->vlan_insertion, 2985 "# of times hardware inserted 802.1Q tag"); 2986 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, 2987 &txq->tso_wrs, "# of TSO work requests"); 2988 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, 2989 &txq->imm_wrs, "# of work requests with immediate data"); 2990 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, 2991 &txq->sgl_wrs, "# of work requests with direct SGL"); 2992 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, 2993 &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); 2994 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_wrs", CTLFLAG_RD, 2995 &txq->txpkts_wrs, "# of txpkts work requests (multiple pkts/WR)"); 2996 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD, 2997 &txq->txpkts_pkts, "# of frames tx'd using txpkts work requests"); 2998 2999 SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "br_drops", CTLFLAG_RD, 3000 &txq->br->br_drops, "# of drops in the buf_ring for this queue"); 3001 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD, 3002 &txq->no_dmamap, 0, "# of times txq ran out of DMA maps"); 3003 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD, 3004 &txq->no_desc, 0, "# of times txq ran out of hardware descriptors"); 3005 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD, 3006 &eq->egr_update, 0, "egress update notifications from the SGE"); 3007 SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD, 3008 &eq->unstalled, 0, "# of times txq recovered after stall"); 3009 3010 return (rc); 3011 } 3012 3013 static int 3014 free_txq(struct port_info *pi, struct sge_txq *txq) 3015 { 3016 int rc; 3017 struct adapter *sc = pi->adapter; 3018 struct sge_eq *eq = &txq->eq; 3019 3020 rc = free_eq(sc, eq); 3021 if (rc) 3022 return (rc); 3023 3024 free(txq->sdesc, M_CXGBE); 3025 3026 if (txq->txmaps.maps) 3027 t4_free_tx_maps(&txq->txmaps, txq->tx_tag); 3028 3029 buf_ring_free(txq->br, M_CXGBE); 3030 3031 if (txq->tx_tag) 3032 bus_dma_tag_destroy(txq->tx_tag); 3033 3034 bzero(txq, sizeof(*txq)); 3035 return (0); 3036 } 3037 3038 static void 3039 oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) 3040 { 3041 bus_addr_t *ba = arg; 3042 3043 KASSERT(nseg == 1, 3044 ("%s meant for single segment mappings only.", __func__)); 3045 3046 *ba = error ? 0 : segs->ds_addr; 3047 } 3048 3049 static inline bool 3050 is_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl) 3051 { 3052 *ctrl = (void *)((uintptr_t)iq->cdesc + 3053 (iq->esize - sizeof(struct rsp_ctrl))); 3054 3055 return (((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen); 3056 } 3057 3058 static inline void 3059 iq_next(struct sge_iq *iq) 3060 { 3061 iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize); 3062 if (__predict_false(++iq->cidx == iq->qsize - 1)) { 3063 iq->cidx = 0; 3064 iq->gen ^= 1; 3065 iq->cdesc = iq->desc; 3066 } 3067 } 3068 3069 #define FL_HW_IDX(x) ((x) >> 3) 3070 static inline void 3071 ring_fl_db(struct adapter *sc, struct sge_fl *fl) 3072 { 3073 int ndesc = fl->pending / 8; 3074 uint32_t v; 3075 3076 if (FL_HW_IDX(fl->pidx) == FL_HW_IDX(fl->cidx)) 3077 ndesc--; /* hold back one credit */ 3078 3079 if (ndesc <= 0) 3080 return; /* nothing to do */ 3081 3082 v = F_DBPRIO | V_QID(fl->cntxt_id) | V_PIDX(ndesc); 3083 if (is_t5(sc)) 3084 v |= F_DBTYPE; 3085 3086 wmb(); 3087 3088 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), v); 3089 fl->pending -= ndesc * 8; 3090 } 3091 3092 /* 3093 * Fill up the freelist by upto nbufs and maybe ring its doorbell. 3094 * 3095 * Returns non-zero to indicate that it should be added to the list of starving 3096 * freelists. 3097 */ 3098 static int 3099 refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs) 3100 { 3101 __be64 *d = &fl->desc[fl->pidx]; 3102 struct fl_sdesc *sd = &fl->sdesc[fl->pidx]; 3103 bus_dma_tag_t tag; 3104 bus_addr_t pa; 3105 caddr_t cl; 3106 int rc; 3107 3108 FL_LOCK_ASSERT_OWNED(fl); 3109 #ifdef INVARIANTS 3110 if (fl->flags & FL_BUF_PACKING) 3111 KASSERT(sd->tag_idx == 0, 3112 ("%s: expected tag 0 but found tag %d at pidx %u instead", 3113 __func__, sd->tag_idx, fl->pidx)); 3114 #endif 3115 3116 if (nbufs > fl->needed) 3117 nbufs = fl->needed; 3118 3119 while (nbufs--) { 3120 3121 if (sd->cl != NULL) { 3122 3123 KASSERT(*d == sd->ba_hwtag, 3124 ("%s: recyling problem at pidx %d", 3125 __func__, fl->pidx)); 3126 3127 if (fl->flags & FL_BUF_PACKING) { 3128 u_int *refcount = find_buf_refcnt(sd->cl); 3129 3130 if (atomic_fetchadd_int(refcount, -1) == 1) { 3131 *refcount = 1; /* reinstate */ 3132 d++; 3133 goto recycled; 3134 } 3135 sd->cl = NULL; /* gave up my reference */ 3136 } else { 3137 /* 3138 * This happens when a frame small enough to fit 3139 * entirely in an mbuf was received in cl last 3140 * time. We'd held on to cl and can reuse it 3141 * now. Note that we reuse a cluster of the old 3142 * size if fl->tag_idx is no longer the same as 3143 * sd->tag_idx. 3144 */ 3145 d++; 3146 goto recycled; 3147 } 3148 } 3149 3150 if (__predict_false(fl->tag_idx != sd->tag_idx)) { 3151 bus_dmamap_t map; 3152 bus_dma_tag_t newtag = fl->tag[fl->tag_idx]; 3153 bus_dma_tag_t oldtag = fl->tag[sd->tag_idx]; 3154 3155 /* 3156 * An MTU change can get us here. Discard the old map 3157 * which was created with the old tag, but only if 3158 * we're able to get a new one. 3159 */ 3160 rc = bus_dmamap_create(newtag, 0, &map); 3161 if (rc == 0) { 3162 bus_dmamap_destroy(oldtag, sd->map); 3163 sd->map = map; 3164 sd->tag_idx = fl->tag_idx; 3165 } 3166 } 3167 3168 tag = fl->tag[sd->tag_idx]; 3169 3170 cl = uma_zalloc(FL_BUF_ZONE(sc, sd->tag_idx), M_NOWAIT); 3171 if (cl == NULL) 3172 break; 3173 if (fl->flags & FL_BUF_PACKING) { 3174 *find_buf_refcnt(cl) = 1; 3175 cl += MSIZE; 3176 } 3177 3178 rc = bus_dmamap_load(tag, sd->map, cl, 3179 FL_BUF_SIZE(sc, sd->tag_idx), oneseg_dma_callback, &pa, 0); 3180 if (rc != 0 || pa == 0) { 3181 fl->dmamap_failed++; 3182 if (fl->flags & FL_BUF_PACKING) 3183 cl -= MSIZE; 3184 uma_zfree(FL_BUF_ZONE(sc, sd->tag_idx), cl); 3185 break; 3186 } 3187 3188 sd->cl = cl; 3189 *d++ = htobe64(pa | FL_BUF_HWTAG(sc, sd->tag_idx)); 3190 3191 #ifdef INVARIANTS 3192 sd->ba_hwtag = htobe64(pa | FL_BUF_HWTAG(sc, sd->tag_idx)); 3193 #endif 3194 3195 recycled: 3196 fl->pending++; 3197 fl->needed--; 3198 sd++; 3199 if (++fl->pidx == fl->cap) { 3200 fl->pidx = 0; 3201 sd = fl->sdesc; 3202 d = fl->desc; 3203 } 3204 } 3205 3206 if (fl->pending >= 8) 3207 ring_fl_db(sc, fl); 3208 3209 return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING)); 3210 } 3211 3212 /* 3213 * Attempt to refill all starving freelists. 3214 */ 3215 static void 3216 refill_sfl(void *arg) 3217 { 3218 struct adapter *sc = arg; 3219 struct sge_fl *fl, *fl_temp; 3220 3221 mtx_lock(&sc->sfl_lock); 3222 TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) { 3223 FL_LOCK(fl); 3224 refill_fl(sc, fl, 64); 3225 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) { 3226 TAILQ_REMOVE(&sc->sfl, fl, link); 3227 fl->flags &= ~FL_STARVING; 3228 } 3229 FL_UNLOCK(fl); 3230 } 3231 3232 if (!TAILQ_EMPTY(&sc->sfl)) 3233 callout_schedule(&sc->sfl_callout, hz / 5); 3234 mtx_unlock(&sc->sfl_lock); 3235 } 3236 3237 static int 3238 alloc_fl_sdesc(struct sge_fl *fl) 3239 { 3240 struct fl_sdesc *sd; 3241 bus_dma_tag_t tag; 3242 int i, rc; 3243 3244 fl->sdesc = malloc(fl->cap * sizeof(struct fl_sdesc), M_CXGBE, 3245 M_ZERO | M_WAITOK); 3246 3247 tag = fl->tag[fl->tag_idx]; 3248 sd = fl->sdesc; 3249 for (i = 0; i < fl->cap; i++, sd++) { 3250 3251 sd->tag_idx = fl->tag_idx; 3252 rc = bus_dmamap_create(tag, 0, &sd->map); 3253 if (rc != 0) 3254 goto failed; 3255 } 3256 3257 return (0); 3258 failed: 3259 while (--i >= 0) { 3260 sd--; 3261 bus_dmamap_destroy(tag, sd->map); 3262 } 3263 KASSERT(sd == fl->sdesc, ("%s: EDOOFUS", __func__)); 3264 3265 free(fl->sdesc, M_CXGBE); 3266 fl->sdesc = NULL; 3267 3268 return (rc); 3269 } 3270 3271 static void 3272 free_fl_sdesc(struct adapter *sc, struct sge_fl *fl) 3273 { 3274 struct fl_sdesc *sd; 3275 int i; 3276 3277 sd = fl->sdesc; 3278 for (i = 0; i < fl->cap; i++, sd++) { 3279 3280 if (sd->cl) { 3281 bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); 3282 uma_zfree(FL_BUF_ZONE(sc, sd->tag_idx), sd->cl); 3283 sd->cl = NULL; 3284 } 3285 3286 bus_dmamap_destroy(fl->tag[sd->tag_idx], sd->map); 3287 } 3288 3289 free(fl->sdesc, M_CXGBE); 3290 fl->sdesc = NULL; 3291 } 3292 3293 int 3294 t4_alloc_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag, int count, 3295 int flags) 3296 { 3297 struct tx_map *txm; 3298 int i, rc; 3299 3300 txmaps->map_total = txmaps->map_avail = count; 3301 txmaps->map_cidx = txmaps->map_pidx = 0; 3302 3303 txmaps->maps = malloc(count * sizeof(struct tx_map), M_CXGBE, 3304 M_ZERO | flags); 3305 3306 txm = txmaps->maps; 3307 for (i = 0; i < count; i++, txm++) { 3308 rc = bus_dmamap_create(tx_tag, 0, &txm->map); 3309 if (rc != 0) 3310 goto failed; 3311 } 3312 3313 return (0); 3314 failed: 3315 while (--i >= 0) { 3316 txm--; 3317 bus_dmamap_destroy(tx_tag, txm->map); 3318 } 3319 KASSERT(txm == txmaps->maps, ("%s: EDOOFUS", __func__)); 3320 3321 free(txmaps->maps, M_CXGBE); 3322 txmaps->maps = NULL; 3323 3324 return (rc); 3325 } 3326 3327 void 3328 t4_free_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag) 3329 { 3330 struct tx_map *txm; 3331 int i; 3332 3333 txm = txmaps->maps; 3334 for (i = 0; i < txmaps->map_total; i++, txm++) { 3335 3336 if (txm->m) { 3337 bus_dmamap_unload(tx_tag, txm->map); 3338 m_freem(txm->m); 3339 txm->m = NULL; 3340 } 3341 3342 bus_dmamap_destroy(tx_tag, txm->map); 3343 } 3344 3345 free(txmaps->maps, M_CXGBE); 3346 txmaps->maps = NULL; 3347 } 3348 3349 /* 3350 * We'll do immediate data tx for non-TSO, but only when not coalescing. We're 3351 * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes 3352 * of immediate data. 3353 */ 3354 #define IMM_LEN ( \ 3355 2 * EQ_ESIZE \ 3356 - sizeof(struct fw_eth_tx_pkt_wr) \ 3357 - sizeof(struct cpl_tx_pkt_core)) 3358 3359 /* 3360 * Returns non-zero on failure, no need to cleanup anything in that case. 3361 * 3362 * Note 1: We always try to defrag the mbuf if required and return EFBIG only 3363 * if the resulting chain still won't fit in a tx descriptor. 3364 * 3365 * Note 2: We'll pullup the mbuf chain if TSO is requested and the first mbuf 3366 * does not have the TCP header in it. 3367 */ 3368 static int 3369 get_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl, 3370 int sgl_only) 3371 { 3372 struct mbuf *m = *fp; 3373 struct tx_maps *txmaps; 3374 struct tx_map *txm; 3375 int rc, defragged = 0, n; 3376 3377 TXQ_LOCK_ASSERT_OWNED(txq); 3378 3379 if (m->m_pkthdr.tso_segsz) 3380 sgl_only = 1; /* Do not allow immediate data with LSO */ 3381 3382 start: sgl->nsegs = 0; 3383 3384 if (m->m_pkthdr.len <= IMM_LEN && !sgl_only) 3385 return (0); /* nsegs = 0 tells caller to use imm. tx */ 3386 3387 txmaps = &txq->txmaps; 3388 if (txmaps->map_avail == 0) { 3389 txq->no_dmamap++; 3390 return (ENOMEM); 3391 } 3392 txm = &txmaps->maps[txmaps->map_pidx]; 3393 3394 if (m->m_pkthdr.tso_segsz && m->m_len < 50) { 3395 *fp = m_pullup(m, 50); 3396 m = *fp; 3397 if (m == NULL) 3398 return (ENOBUFS); 3399 } 3400 3401 rc = bus_dmamap_load_mbuf_sg(txq->tx_tag, txm->map, m, sgl->seg, 3402 &sgl->nsegs, BUS_DMA_NOWAIT); 3403 if (rc == EFBIG && defragged == 0) { 3404 m = m_defrag(m, M_NOWAIT); 3405 if (m == NULL) 3406 return (EFBIG); 3407 3408 defragged = 1; 3409 *fp = m; 3410 goto start; 3411 } 3412 if (rc != 0) 3413 return (rc); 3414 3415 txm->m = m; 3416 txmaps->map_avail--; 3417 if (++txmaps->map_pidx == txmaps->map_total) 3418 txmaps->map_pidx = 0; 3419 3420 KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS, 3421 ("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs)); 3422 3423 /* 3424 * Store the # of flits required to hold this frame's SGL in nflits. An 3425 * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by 3426 * multiple (len0 + len1, addr0, addr1) tuples. If addr1 is not used 3427 * then len1 must be set to 0. 3428 */ 3429 n = sgl->nsegs - 1; 3430 sgl->nflits = (3 * n) / 2 + (n & 1) + 2; 3431 3432 return (0); 3433 } 3434 3435 3436 /* 3437 * Releases all the txq resources used up in the specified sgl. 3438 */ 3439 static int 3440 free_pkt_sgl(struct sge_txq *txq, struct sgl *sgl) 3441 { 3442 struct tx_maps *txmaps; 3443 struct tx_map *txm; 3444 3445 TXQ_LOCK_ASSERT_OWNED(txq); 3446 3447 if (sgl->nsegs == 0) 3448 return (0); /* didn't use any map */ 3449 3450 txmaps = &txq->txmaps; 3451 3452 /* 1 pkt uses exactly 1 map, back it out */ 3453 3454 txmaps->map_avail++; 3455 if (txmaps->map_pidx > 0) 3456 txmaps->map_pidx--; 3457 else 3458 txmaps->map_pidx = txmaps->map_total - 1; 3459 3460 txm = &txmaps->maps[txmaps->map_pidx]; 3461 bus_dmamap_unload(txq->tx_tag, txm->map); 3462 txm->m = NULL; 3463 3464 return (0); 3465 } 3466 3467 static int 3468 write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m, 3469 struct sgl *sgl) 3470 { 3471 struct sge_eq *eq = &txq->eq; 3472 struct fw_eth_tx_pkt_wr *wr; 3473 struct cpl_tx_pkt_core *cpl; 3474 uint32_t ctrl; /* used in many unrelated places */ 3475 uint64_t ctrl1; 3476 int nflits, ndesc, pktlen; 3477 struct tx_sdesc *txsd; 3478 caddr_t dst; 3479 3480 TXQ_LOCK_ASSERT_OWNED(txq); 3481 3482 pktlen = m->m_pkthdr.len; 3483 3484 /* 3485 * Do we have enough flits to send this frame out? 3486 */ 3487 ctrl = sizeof(struct cpl_tx_pkt_core); 3488 if (m->m_pkthdr.tso_segsz) { 3489 nflits = TXPKT_LSO_WR_HDR; 3490 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 3491 } else 3492 nflits = TXPKT_WR_HDR; 3493 if (sgl->nsegs > 0) 3494 nflits += sgl->nflits; 3495 else { 3496 nflits += howmany(pktlen, 8); 3497 ctrl += pktlen; 3498 } 3499 ndesc = howmany(nflits, 8); 3500 if (ndesc > eq->avail) 3501 return (ENOMEM); 3502 3503 /* Firmware work request header */ 3504 wr = (void *)&eq->desc[eq->pidx]; 3505 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | 3506 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 3507 ctrl = V_FW_WR_LEN16(howmany(nflits, 2)); 3508 if (eq->avail == ndesc) { 3509 if (!(eq->flags & EQ_CRFLUSHED)) { 3510 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; 3511 eq->flags |= EQ_CRFLUSHED; 3512 } 3513 eq->flags |= EQ_STALLED; 3514 } 3515 3516 wr->equiq_to_len16 = htobe32(ctrl); 3517 wr->r3 = 0; 3518 3519 if (m->m_pkthdr.tso_segsz) { 3520 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); 3521 struct ether_header *eh; 3522 void *l3hdr; 3523 #if defined(INET) || defined(INET6) 3524 struct tcphdr *tcp; 3525 #endif 3526 uint16_t eh_type; 3527 3528 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | 3529 F_LSO_LAST_SLICE; 3530 3531 eh = mtod(m, struct ether_header *); 3532 eh_type = ntohs(eh->ether_type); 3533 if (eh_type == ETHERTYPE_VLAN) { 3534 struct ether_vlan_header *evh = (void *)eh; 3535 3536 ctrl |= V_LSO_ETHHDR_LEN(1); 3537 l3hdr = evh + 1; 3538 eh_type = ntohs(evh->evl_proto); 3539 } else 3540 l3hdr = eh + 1; 3541 3542 switch (eh_type) { 3543 #ifdef INET6 3544 case ETHERTYPE_IPV6: 3545 { 3546 struct ip6_hdr *ip6 = l3hdr; 3547 3548 /* 3549 * XXX-BZ For now we do not pretend to support 3550 * IPv6 extension headers. 3551 */ 3552 KASSERT(ip6->ip6_nxt == IPPROTO_TCP, ("%s: CSUM_TSO " 3553 "with ip6_nxt != TCP: %u", __func__, ip6->ip6_nxt)); 3554 tcp = (struct tcphdr *)(ip6 + 1); 3555 ctrl |= F_LSO_IPV6; 3556 ctrl |= V_LSO_IPHDR_LEN(sizeof(*ip6) >> 2) | 3557 V_LSO_TCPHDR_LEN(tcp->th_off); 3558 break; 3559 } 3560 #endif 3561 #ifdef INET 3562 case ETHERTYPE_IP: 3563 { 3564 struct ip *ip = l3hdr; 3565 3566 tcp = (void *)((uintptr_t)ip + ip->ip_hl * 4); 3567 ctrl |= V_LSO_IPHDR_LEN(ip->ip_hl) | 3568 V_LSO_TCPHDR_LEN(tcp->th_off); 3569 break; 3570 } 3571 #endif 3572 default: 3573 panic("%s: CSUM_TSO but no supported IP version " 3574 "(0x%04x)", __func__, eh_type); 3575 } 3576 3577 lso->lso_ctrl = htobe32(ctrl); 3578 lso->ipid_ofst = htobe16(0); 3579 lso->mss = htobe16(m->m_pkthdr.tso_segsz); 3580 lso->seqno_offset = htobe32(0); 3581 lso->len = htobe32(pktlen); 3582 3583 cpl = (void *)(lso + 1); 3584 3585 txq->tso_wrs++; 3586 } else 3587 cpl = (void *)(wr + 1); 3588 3589 /* Checksum offload */ 3590 ctrl1 = 0; 3591 if (!(m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO))) 3592 ctrl1 |= F_TXPKT_IPCSUM_DIS; 3593 if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | 3594 CSUM_TCP_IPV6 | CSUM_TSO))) 3595 ctrl1 |= F_TXPKT_L4CSUM_DIS; 3596 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | 3597 CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) 3598 txq->txcsum++; /* some hardware assistance provided */ 3599 3600 /* VLAN tag insertion */ 3601 if (m->m_flags & M_VLANTAG) { 3602 ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 3603 txq->vlan_insertion++; 3604 } 3605 3606 /* CPL header */ 3607 cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 3608 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf)); 3609 cpl->pack = 0; 3610 cpl->len = htobe16(pktlen); 3611 cpl->ctrl1 = htobe64(ctrl1); 3612 3613 /* Software descriptor */ 3614 txsd = &txq->sdesc[eq->pidx]; 3615 txsd->desc_used = ndesc; 3616 3617 eq->pending += ndesc; 3618 eq->avail -= ndesc; 3619 eq->pidx += ndesc; 3620 if (eq->pidx >= eq->cap) 3621 eq->pidx -= eq->cap; 3622 3623 /* SGL */ 3624 dst = (void *)(cpl + 1); 3625 if (sgl->nsegs > 0) { 3626 txsd->credits = 1; 3627 txq->sgl_wrs++; 3628 write_sgl_to_txd(eq, sgl, &dst); 3629 } else { 3630 txsd->credits = 0; 3631 txq->imm_wrs++; 3632 for (; m; m = m->m_next) { 3633 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 3634 #ifdef INVARIANTS 3635 pktlen -= m->m_len; 3636 #endif 3637 } 3638 #ifdef INVARIANTS 3639 KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen)); 3640 #endif 3641 3642 } 3643 3644 txq->txpkt_wrs++; 3645 return (0); 3646 } 3647 3648 /* 3649 * Returns 0 to indicate that m has been accepted into a coalesced tx work 3650 * request. It has either been folded into txpkts or txpkts was flushed and m 3651 * has started a new coalesced work request (as the first frame in a fresh 3652 * txpkts). 3653 * 3654 * Returns non-zero to indicate a failure - caller is responsible for 3655 * transmitting m, if there was anything in txpkts it has been flushed. 3656 */ 3657 static int 3658 add_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts, 3659 struct mbuf *m, struct sgl *sgl) 3660 { 3661 struct sge_eq *eq = &txq->eq; 3662 int can_coalesce; 3663 struct tx_sdesc *txsd; 3664 int flits; 3665 3666 TXQ_LOCK_ASSERT_OWNED(txq); 3667 3668 KASSERT(sgl->nsegs, ("%s: can't coalesce imm data", __func__)); 3669 3670 if (txpkts->npkt > 0) { 3671 flits = TXPKTS_PKT_HDR + sgl->nflits; 3672 can_coalesce = m->m_pkthdr.tso_segsz == 0 && 3673 txpkts->nflits + flits <= TX_WR_FLITS && 3674 txpkts->nflits + flits <= eq->avail * 8 && 3675 txpkts->plen + m->m_pkthdr.len < 65536; 3676 3677 if (can_coalesce) { 3678 txpkts->npkt++; 3679 txpkts->nflits += flits; 3680 txpkts->plen += m->m_pkthdr.len; 3681 3682 txsd = &txq->sdesc[eq->pidx]; 3683 txsd->credits++; 3684 3685 return (0); 3686 } 3687 3688 /* 3689 * Couldn't coalesce m into txpkts. The first order of business 3690 * is to send txpkts on its way. Then we'll revisit m. 3691 */ 3692 write_txpkts_wr(txq, txpkts); 3693 } 3694 3695 /* 3696 * Check if we can start a new coalesced tx work request with m as 3697 * the first packet in it. 3698 */ 3699 3700 KASSERT(txpkts->npkt == 0, ("%s: txpkts not empty", __func__)); 3701 3702 flits = TXPKTS_WR_HDR + sgl->nflits; 3703 can_coalesce = m->m_pkthdr.tso_segsz == 0 && 3704 flits <= eq->avail * 8 && flits <= TX_WR_FLITS; 3705 3706 if (can_coalesce == 0) 3707 return (EINVAL); 3708 3709 /* 3710 * Start a fresh coalesced tx WR with m as the first frame in it. 3711 */ 3712 txpkts->npkt = 1; 3713 txpkts->nflits = flits; 3714 txpkts->flitp = &eq->desc[eq->pidx].flit[2]; 3715 txpkts->plen = m->m_pkthdr.len; 3716 3717 txsd = &txq->sdesc[eq->pidx]; 3718 txsd->credits = 1; 3719 3720 return (0); 3721 } 3722 3723 /* 3724 * Note that write_txpkts_wr can never run out of hardware descriptors (but 3725 * write_txpkt_wr can). add_to_txpkts ensures that a frame is accepted for 3726 * coalescing only if sufficient hardware descriptors are available. 3727 */ 3728 static void 3729 write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts) 3730 { 3731 struct sge_eq *eq = &txq->eq; 3732 struct fw_eth_tx_pkts_wr *wr; 3733 struct tx_sdesc *txsd; 3734 uint32_t ctrl; 3735 int ndesc; 3736 3737 TXQ_LOCK_ASSERT_OWNED(txq); 3738 3739 ndesc = howmany(txpkts->nflits, 8); 3740 3741 wr = (void *)&eq->desc[eq->pidx]; 3742 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); 3743 ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2)); 3744 if (eq->avail == ndesc) { 3745 if (!(eq->flags & EQ_CRFLUSHED)) { 3746 ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; 3747 eq->flags |= EQ_CRFLUSHED; 3748 } 3749 eq->flags |= EQ_STALLED; 3750 } 3751 wr->equiq_to_len16 = htobe32(ctrl); 3752 wr->plen = htobe16(txpkts->plen); 3753 wr->npkt = txpkts->npkt; 3754 wr->r3 = wr->type = 0; 3755 3756 /* Everything else already written */ 3757 3758 txsd = &txq->sdesc[eq->pidx]; 3759 txsd->desc_used = ndesc; 3760 3761 KASSERT(eq->avail >= ndesc, ("%s: out of descriptors", __func__)); 3762 3763 eq->pending += ndesc; 3764 eq->avail -= ndesc; 3765 eq->pidx += ndesc; 3766 if (eq->pidx >= eq->cap) 3767 eq->pidx -= eq->cap; 3768 3769 txq->txpkts_pkts += txpkts->npkt; 3770 txq->txpkts_wrs++; 3771 txpkts->npkt = 0; /* emptied */ 3772 } 3773 3774 static inline void 3775 write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq, 3776 struct txpkts *txpkts, struct mbuf *m, struct sgl *sgl) 3777 { 3778 struct ulp_txpkt *ulpmc; 3779 struct ulptx_idata *ulpsc; 3780 struct cpl_tx_pkt_core *cpl; 3781 struct sge_eq *eq = &txq->eq; 3782 uintptr_t flitp, start, end; 3783 uint64_t ctrl; 3784 caddr_t dst; 3785 3786 KASSERT(txpkts->npkt > 0, ("%s: txpkts is empty", __func__)); 3787 3788 start = (uintptr_t)eq->desc; 3789 end = (uintptr_t)eq->spg; 3790 3791 /* Checksum offload */ 3792 ctrl = 0; 3793 if (!(m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO))) 3794 ctrl |= F_TXPKT_IPCSUM_DIS; 3795 if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | 3796 CSUM_TCP_IPV6 | CSUM_TSO))) 3797 ctrl |= F_TXPKT_L4CSUM_DIS; 3798 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | 3799 CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) 3800 txq->txcsum++; /* some hardware assistance provided */ 3801 3802 /* VLAN tag insertion */ 3803 if (m->m_flags & M_VLANTAG) { 3804 ctrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 3805 txq->vlan_insertion++; 3806 } 3807 3808 /* 3809 * The previous packet's SGL must have ended at a 16 byte boundary (this 3810 * is required by the firmware/hardware). It follows that flitp cannot 3811 * wrap around between the ULPTX master command and ULPTX subcommand (8 3812 * bytes each), and that it can not wrap around in the middle of the 3813 * cpl_tx_pkt_core either. 3814 */ 3815 flitp = (uintptr_t)txpkts->flitp; 3816 KASSERT((flitp & 0xf) == 0, 3817 ("%s: last SGL did not end at 16 byte boundary: %p", 3818 __func__, txpkts->flitp)); 3819 3820 /* ULP master command */ 3821 ulpmc = (void *)flitp; 3822 ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) | 3823 V_ULP_TXPKT_FID(eq->iqid)); 3824 ulpmc->len = htonl(howmany(sizeof(*ulpmc) + sizeof(*ulpsc) + 3825 sizeof(*cpl) + 8 * sgl->nflits, 16)); 3826 3827 /* ULP subcommand */ 3828 ulpsc = (void *)(ulpmc + 1); 3829 ulpsc->cmd_more = htobe32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) | 3830 F_ULP_TX_SC_MORE); 3831 ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core)); 3832 3833 flitp += sizeof(*ulpmc) + sizeof(*ulpsc); 3834 if (flitp == end) 3835 flitp = start; 3836 3837 /* CPL_TX_PKT */ 3838 cpl = (void *)flitp; 3839 cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 3840 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf)); 3841 cpl->pack = 0; 3842 cpl->len = htobe16(m->m_pkthdr.len); 3843 cpl->ctrl1 = htobe64(ctrl); 3844 3845 flitp += sizeof(*cpl); 3846 if (flitp == end) 3847 flitp = start; 3848 3849 /* SGL for this frame */ 3850 dst = (caddr_t)flitp; 3851 txpkts->nflits += write_sgl_to_txd(eq, sgl, &dst); 3852 txpkts->flitp = (void *)dst; 3853 3854 KASSERT(((uintptr_t)dst & 0xf) == 0, 3855 ("%s: SGL ends at %p (not a 16 byte boundary)", __func__, dst)); 3856 } 3857 3858 /* 3859 * If the SGL ends on an address that is not 16 byte aligned, this function will 3860 * add a 0 filled flit at the end. It returns 1 in that case. 3861 */ 3862 static int 3863 write_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to) 3864 { 3865 __be64 *flitp, *end; 3866 struct ulptx_sgl *usgl; 3867 bus_dma_segment_t *seg; 3868 int i, padded; 3869 3870 KASSERT(sgl->nsegs > 0 && sgl->nflits > 0, 3871 ("%s: bad SGL - nsegs=%d, nflits=%d", 3872 __func__, sgl->nsegs, sgl->nflits)); 3873 3874 KASSERT(((uintptr_t)(*to) & 0xf) == 0, 3875 ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to)); 3876 3877 flitp = (__be64 *)(*to); 3878 end = flitp + sgl->nflits; 3879 seg = &sgl->seg[0]; 3880 usgl = (void *)flitp; 3881 3882 /* 3883 * We start at a 16 byte boundary somewhere inside the tx descriptor 3884 * ring, so we're at least 16 bytes away from the status page. There is 3885 * no chance of a wrap around in the middle of usgl (which is 16 bytes). 3886 */ 3887 3888 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 3889 V_ULPTX_NSGE(sgl->nsegs)); 3890 usgl->len0 = htobe32(seg->ds_len); 3891 usgl->addr0 = htobe64(seg->ds_addr); 3892 seg++; 3893 3894 if ((uintptr_t)end <= (uintptr_t)eq->spg) { 3895 3896 /* Won't wrap around at all */ 3897 3898 for (i = 0; i < sgl->nsegs - 1; i++, seg++) { 3899 usgl->sge[i / 2].len[i & 1] = htobe32(seg->ds_len); 3900 usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ds_addr); 3901 } 3902 if (i & 1) 3903 usgl->sge[i / 2].len[1] = htobe32(0); 3904 } else { 3905 3906 /* Will wrap somewhere in the rest of the SGL */ 3907 3908 /* 2 flits already written, write the rest flit by flit */ 3909 flitp = (void *)(usgl + 1); 3910 for (i = 0; i < sgl->nflits - 2; i++) { 3911 if ((uintptr_t)flitp == (uintptr_t)eq->spg) 3912 flitp = (void *)eq->desc; 3913 *flitp++ = get_flit(seg, sgl->nsegs - 1, i); 3914 } 3915 end = flitp; 3916 } 3917 3918 if ((uintptr_t)end & 0xf) { 3919 *(uint64_t *)end = 0; 3920 end++; 3921 padded = 1; 3922 } else 3923 padded = 0; 3924 3925 if ((uintptr_t)end == (uintptr_t)eq->spg) 3926 *to = (void *)eq->desc; 3927 else 3928 *to = (void *)end; 3929 3930 return (padded); 3931 } 3932 3933 static inline void 3934 copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) 3935 { 3936 if (__predict_true((uintptr_t)(*to) + len <= (uintptr_t)eq->spg)) { 3937 bcopy(from, *to, len); 3938 (*to) += len; 3939 } else { 3940 int portion = (uintptr_t)eq->spg - (uintptr_t)(*to); 3941 3942 bcopy(from, *to, portion); 3943 from += portion; 3944 portion = len - portion; /* remaining */ 3945 bcopy(from, (void *)eq->desc, portion); 3946 (*to) = (caddr_t)eq->desc + portion; 3947 } 3948 } 3949 3950 static inline void 3951 ring_eq_db(struct adapter *sc, struct sge_eq *eq) 3952 { 3953 u_int db, pending; 3954 3955 db = eq->doorbells; 3956 pending = eq->pending; 3957 if (pending > 1) 3958 clrbit(&db, DOORBELL_WCWR); 3959 eq->pending = 0; 3960 wmb(); 3961 3962 switch (ffs(db) - 1) { 3963 case DOORBELL_UDB: 3964 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(pending)); 3965 return; 3966 3967 case DOORBELL_WCWR: { 3968 volatile uint64_t *dst, *src; 3969 int i; 3970 3971 /* 3972 * Queues whose 128B doorbell segment fits in the page do not 3973 * use relative qid (udb_qid is always 0). Only queues with 3974 * doorbell segments can do WCWR. 3975 */ 3976 KASSERT(eq->udb_qid == 0 && pending == 1, 3977 ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p", 3978 __func__, eq->doorbells, pending, eq->pidx, eq)); 3979 3980 dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET - 3981 UDBS_DB_OFFSET); 3982 i = eq->pidx ? eq->pidx - 1 : eq->cap - 1; 3983 src = (void *)&eq->desc[i]; 3984 while (src != (void *)&eq->desc[i + 1]) 3985 *dst++ = *src++; 3986 wmb(); 3987 return; 3988 } 3989 3990 case DOORBELL_UDBWC: 3991 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(pending)); 3992 wmb(); 3993 return; 3994 3995 case DOORBELL_KDB: 3996 t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), 3997 V_QID(eq->cntxt_id) | V_PIDX(pending)); 3998 return; 3999 } 4000 } 4001 4002 static inline int 4003 reclaimable(struct sge_eq *eq) 4004 { 4005 unsigned int cidx; 4006 4007 cidx = eq->spg->cidx; /* stable snapshot */ 4008 cidx = be16toh(cidx); 4009 4010 if (cidx >= eq->cidx) 4011 return (cidx - eq->cidx); 4012 else 4013 return (cidx + eq->cap - eq->cidx); 4014 } 4015 4016 /* 4017 * There are "can_reclaim" tx descriptors ready to be reclaimed. Reclaim as 4018 * many as possible but stop when there are around "n" mbufs to free. 4019 * 4020 * The actual number reclaimed is provided as the return value. 4021 */ 4022 static int 4023 reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n) 4024 { 4025 struct tx_sdesc *txsd; 4026 struct tx_maps *txmaps; 4027 struct tx_map *txm; 4028 unsigned int reclaimed, maps; 4029 struct sge_eq *eq = &txq->eq; 4030 4031 TXQ_LOCK_ASSERT_OWNED(txq); 4032 4033 if (can_reclaim == 0) 4034 can_reclaim = reclaimable(eq); 4035 4036 maps = reclaimed = 0; 4037 while (can_reclaim && maps < n) { 4038 int ndesc; 4039 4040 txsd = &txq->sdesc[eq->cidx]; 4041 ndesc = txsd->desc_used; 4042 4043 /* Firmware doesn't return "partial" credits. */ 4044 KASSERT(can_reclaim >= ndesc, 4045 ("%s: unexpected number of credits: %d, %d", 4046 __func__, can_reclaim, ndesc)); 4047 4048 maps += txsd->credits; 4049 4050 reclaimed += ndesc; 4051 can_reclaim -= ndesc; 4052 4053 eq->cidx += ndesc; 4054 if (__predict_false(eq->cidx >= eq->cap)) 4055 eq->cidx -= eq->cap; 4056 } 4057 4058 txmaps = &txq->txmaps; 4059 txm = &txmaps->maps[txmaps->map_cidx]; 4060 if (maps) 4061 prefetch(txm->m); 4062 4063 eq->avail += reclaimed; 4064 KASSERT(eq->avail < eq->cap, /* avail tops out at (cap - 1) */ 4065 ("%s: too many descriptors available", __func__)); 4066 4067 txmaps->map_avail += maps; 4068 KASSERT(txmaps->map_avail <= txmaps->map_total, 4069 ("%s: too many maps available", __func__)); 4070 4071 while (maps--) { 4072 struct tx_map *next; 4073 4074 next = txm + 1; 4075 if (__predict_false(txmaps->map_cidx + 1 == txmaps->map_total)) 4076 next = txmaps->maps; 4077 prefetch(next->m); 4078 4079 bus_dmamap_unload(txq->tx_tag, txm->map); 4080 m_freem(txm->m); 4081 txm->m = NULL; 4082 4083 txm = next; 4084 if (__predict_false(++txmaps->map_cidx == txmaps->map_total)) 4085 txmaps->map_cidx = 0; 4086 } 4087 4088 return (reclaimed); 4089 } 4090 4091 static void 4092 write_eqflush_wr(struct sge_eq *eq) 4093 { 4094 struct fw_eq_flush_wr *wr; 4095 4096 EQ_LOCK_ASSERT_OWNED(eq); 4097 KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__)); 4098 KASSERT(!(eq->flags & EQ_CRFLUSHED), ("%s: flushed already", __func__)); 4099 4100 wr = (void *)&eq->desc[eq->pidx]; 4101 bzero(wr, sizeof(*wr)); 4102 wr->opcode = FW_EQ_FLUSH_WR; 4103 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) | 4104 F_FW_WR_EQUEQ | F_FW_WR_EQUIQ); 4105 4106 eq->flags |= (EQ_CRFLUSHED | EQ_STALLED); 4107 eq->pending++; 4108 eq->avail--; 4109 if (++eq->pidx == eq->cap) 4110 eq->pidx = 0; 4111 } 4112 4113 static __be64 4114 get_flit(bus_dma_segment_t *sgl, int nsegs, int idx) 4115 { 4116 int i = (idx / 3) * 2; 4117 4118 switch (idx % 3) { 4119 case 0: { 4120 __be64 rc; 4121 4122 rc = htobe32(sgl[i].ds_len); 4123 if (i + 1 < nsegs) 4124 rc |= (uint64_t)htobe32(sgl[i + 1].ds_len) << 32; 4125 4126 return (rc); 4127 } 4128 case 1: 4129 return htobe64(sgl[i].ds_addr); 4130 case 2: 4131 return htobe64(sgl[i + 1].ds_addr); 4132 } 4133 4134 return (0); 4135 } 4136 4137 /* 4138 * Find an SGE FL buffer size to use for the given bufsize. Look for the the 4139 * smallest size that is large enough to hold bufsize or pick the largest size 4140 * if all sizes are less than bufsize. 4141 */ 4142 static void 4143 set_fl_tag_idx(struct adapter *sc, struct sge_fl *fl, int bufsize) 4144 { 4145 int i, largest, best, delta, start; 4146 4147 if (fl->flags & FL_BUF_PACKING) { 4148 fl->tag_idx = 0; /* first tag is the one for packing */ 4149 return; 4150 } 4151 4152 start = sc->flags & BUF_PACKING_OK ? 1 : 0; 4153 delta = FL_BUF_SIZE(sc, start) - bufsize; 4154 if (delta == 0) { 4155 fl->tag_idx = start; /* ideal fit, look no further */ 4156 return; 4157 } 4158 best = start; 4159 largest = start; 4160 4161 for (i = start + 1; i < FL_BUF_SIZES(sc); i++) { 4162 int d, fl_buf_size; 4163 4164 fl_buf_size = FL_BUF_SIZE(sc, i); 4165 d = fl_buf_size - bufsize; 4166 4167 if (d == 0) { 4168 fl->tag_idx = i; /* ideal fit, look no further */ 4169 return; 4170 } 4171 if (fl_buf_size > FL_BUF_SIZE(sc, largest)) 4172 largest = i; 4173 if (d > 0 && (delta < 0 || delta > d)) { 4174 delta = d; 4175 best = i; 4176 } 4177 } 4178 4179 if (delta > 0) 4180 fl->tag_idx = best; /* Found a buf bigger than bufsize */ 4181 else 4182 fl->tag_idx = largest; /* No buf large enough for bufsize */ 4183 } 4184 4185 static void 4186 add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl) 4187 { 4188 mtx_lock(&sc->sfl_lock); 4189 FL_LOCK(fl); 4190 if ((fl->flags & FL_DOOMED) == 0) { 4191 fl->flags |= FL_STARVING; 4192 TAILQ_INSERT_TAIL(&sc->sfl, fl, link); 4193 callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc); 4194 } 4195 FL_UNLOCK(fl); 4196 mtx_unlock(&sc->sfl_lock); 4197 } 4198 4199 static int 4200 handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss, 4201 struct mbuf *m) 4202 { 4203 const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1); 4204 unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); 4205 struct adapter *sc = iq->adapter; 4206 struct sge *s = &sc->sge; 4207 struct sge_eq *eq; 4208 4209 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 4210 rss->opcode)); 4211 4212 eq = s->eqmap[qid - s->eq_start]; 4213 EQ_LOCK(eq); 4214 KASSERT(eq->flags & EQ_CRFLUSHED, 4215 ("%s: unsolicited egress update", __func__)); 4216 eq->flags &= ~EQ_CRFLUSHED; 4217 eq->egr_update++; 4218 4219 if (__predict_false(eq->flags & EQ_DOOMED)) 4220 wakeup_one(eq); 4221 else if (eq->flags & EQ_STALLED && can_resume_tx(eq)) 4222 taskqueue_enqueue(sc->tq[eq->tx_chan], &eq->tx_task); 4223 EQ_UNLOCK(eq); 4224 4225 return (0); 4226 } 4227 4228 /* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */ 4229 CTASSERT(offsetof(struct cpl_fw4_msg, data) == \ 4230 offsetof(struct cpl_fw6_msg, data)); 4231 4232 static int 4233 handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 4234 { 4235 struct adapter *sc = iq->adapter; 4236 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); 4237 4238 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 4239 rss->opcode)); 4240 4241 if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) { 4242 const struct rss_header *rss2; 4243 4244 rss2 = (const struct rss_header *)&cpl->data[0]; 4245 return (sc->cpl_handler[rss2->opcode](iq, rss2, m)); 4246 } 4247 4248 return (sc->fw_msg_handler[cpl->type](sc, &cpl->data[0])); 4249 } 4250 4251 static int 4252 sysctl_uint16(SYSCTL_HANDLER_ARGS) 4253 { 4254 uint16_t *id = arg1; 4255 int i = *id; 4256 4257 return sysctl_handle_int(oidp, &i, 0, req); 4258 } 4259