1 /*- 2 * Copyright (c) 2011 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/types.h> 35 #include <sys/eventhandler.h> 36 #include <sys/mbuf.h> 37 #include <sys/socket.h> 38 #include <sys/kernel.h> 39 #include <sys/malloc.h> 40 #include <sys/queue.h> 41 #include <sys/sbuf.h> 42 #include <sys/taskqueue.h> 43 #include <sys/time.h> 44 #include <sys/sglist.h> 45 #include <sys/sysctl.h> 46 #include <sys/smp.h> 47 #include <sys/counter.h> 48 #include <net/bpf.h> 49 #include <net/ethernet.h> 50 #include <net/if.h> 51 #include <net/if_vlan_var.h> 52 #include <netinet/in.h> 53 #include <netinet/ip.h> 54 #include <netinet/ip6.h> 55 #include <netinet/tcp.h> 56 #include <machine/md_var.h> 57 #include <vm/vm.h> 58 #include <vm/pmap.h> 59 #ifdef DEV_NETMAP 60 #include <machine/bus.h> 61 #include <sys/selinfo.h> 62 #include <net/if_var.h> 63 #include <net/netmap.h> 64 #include <dev/netmap/netmap_kern.h> 65 #endif 66 67 #include "common/common.h" 68 #include "common/t4_regs.h" 69 #include "common/t4_regs_values.h" 70 #include "common/t4_msg.h" 71 #include "t4_l2t.h" 72 #include "t4_mp_ring.h" 73 74 #ifdef T4_PKT_TIMESTAMP 75 #define RX_COPY_THRESHOLD (MINCLSIZE - 8) 76 #else 77 #define RX_COPY_THRESHOLD MINCLSIZE 78 #endif 79 80 /* 81 * Ethernet frames are DMA'd at this byte offset into the freelist buffer. 82 * 0-7 are valid values. 83 */ 84 static int fl_pktshift = 2; 85 TUNABLE_INT("hw.cxgbe.fl_pktshift", &fl_pktshift); 86 87 /* 88 * Pad ethernet payload up to this boundary. 89 * -1: driver should figure out a good value. 90 * 0: disable padding. 91 * Any power of 2 from 32 to 4096 (both inclusive) is also a valid value. 92 */ 93 int fl_pad = -1; 94 TUNABLE_INT("hw.cxgbe.fl_pad", &fl_pad); 95 96 /* 97 * Status page length. 98 * -1: driver should figure out a good value. 99 * 64 or 128 are the only other valid values. 100 */ 101 static int spg_len = -1; 102 TUNABLE_INT("hw.cxgbe.spg_len", &spg_len); 103 104 /* 105 * Congestion drops. 106 * -1: no congestion feedback (not recommended). 107 * 0: backpressure the channel instead of dropping packets right away. 108 * 1: no backpressure, drop packets for the congested queue immediately. 109 */ 110 static int cong_drop = 0; 111 TUNABLE_INT("hw.cxgbe.cong_drop", &cong_drop); 112 113 /* 114 * Deliver multiple frames in the same free list buffer if they fit. 115 * -1: let the driver decide whether to enable buffer packing or not. 116 * 0: disable buffer packing. 117 * 1: enable buffer packing. 118 */ 119 static int buffer_packing = -1; 120 TUNABLE_INT("hw.cxgbe.buffer_packing", &buffer_packing); 121 122 /* 123 * Start next frame in a packed buffer at this boundary. 124 * -1: driver should figure out a good value. 125 * T4: driver will ignore this and use the same value as fl_pad above. 126 * T5: 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value. 127 */ 128 static int fl_pack = -1; 129 TUNABLE_INT("hw.cxgbe.fl_pack", &fl_pack); 130 131 /* 132 * Allow the driver to create mbuf(s) in a cluster allocated for rx. 133 * 0: never; always allocate mbufs from the zone_mbuf UMA zone. 134 * 1: ok to create mbuf(s) within a cluster if there is room. 135 */ 136 static int allow_mbufs_in_cluster = 1; 137 TUNABLE_INT("hw.cxgbe.allow_mbufs_in_cluster", &allow_mbufs_in_cluster); 138 139 /* 140 * Largest rx cluster size that the driver is allowed to allocate. 141 */ 142 static int largest_rx_cluster = MJUM16BYTES; 143 TUNABLE_INT("hw.cxgbe.largest_rx_cluster", &largest_rx_cluster); 144 145 /* 146 * Size of cluster allocation that's most likely to succeed. The driver will 147 * fall back to this size if it fails to allocate clusters larger than this. 148 */ 149 static int safest_rx_cluster = PAGE_SIZE; 150 TUNABLE_INT("hw.cxgbe.safest_rx_cluster", &safest_rx_cluster); 151 152 struct txpkts { 153 u_int wr_type; /* type 0 or type 1 */ 154 u_int npkt; /* # of packets in this work request */ 155 u_int plen; /* total payload (sum of all packets) */ 156 u_int len16; /* # of 16B pieces used by this work request */ 157 }; 158 159 /* A packet's SGL. This + m_pkthdr has all info needed for tx */ 160 struct sgl { 161 struct sglist sg; 162 struct sglist_seg seg[TX_SGL_SEGS]; 163 }; 164 165 static int service_iq(struct sge_iq *, int); 166 static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t); 167 static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *); 168 static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int); 169 static inline void init_fl(struct adapter *, struct sge_fl *, int, int, char *); 170 static inline void init_eq(struct adapter *, struct sge_eq *, int, int, uint8_t, 171 uint16_t, char *); 172 static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, 173 bus_addr_t *, void **); 174 static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, 175 void *); 176 static int alloc_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *, 177 int, int); 178 static int free_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *); 179 static void add_fl_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 180 struct sge_fl *); 181 static int alloc_fwq(struct adapter *); 182 static int free_fwq(struct adapter *); 183 static int alloc_mgmtq(struct adapter *); 184 static int free_mgmtq(struct adapter *); 185 static int alloc_rxq(struct vi_info *, struct sge_rxq *, int, int, 186 struct sysctl_oid *); 187 static int free_rxq(struct vi_info *, struct sge_rxq *); 188 #ifdef TCP_OFFLOAD 189 static int alloc_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *, int, int, 190 struct sysctl_oid *); 191 static int free_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *); 192 #endif 193 #ifdef DEV_NETMAP 194 static int alloc_nm_rxq(struct vi_info *, struct sge_nm_rxq *, int, int, 195 struct sysctl_oid *); 196 static int free_nm_rxq(struct vi_info *, struct sge_nm_rxq *); 197 static int alloc_nm_txq(struct vi_info *, struct sge_nm_txq *, int, int, 198 struct sysctl_oid *); 199 static int free_nm_txq(struct vi_info *, struct sge_nm_txq *); 200 #endif 201 static int ctrl_eq_alloc(struct adapter *, struct sge_eq *); 202 static int eth_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); 203 #ifdef TCP_OFFLOAD 204 static int ofld_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); 205 #endif 206 static int alloc_eq(struct adapter *, struct vi_info *, struct sge_eq *); 207 static int free_eq(struct adapter *, struct sge_eq *); 208 static int alloc_wrq(struct adapter *, struct vi_info *, struct sge_wrq *, 209 struct sysctl_oid *); 210 static int free_wrq(struct adapter *, struct sge_wrq *); 211 static int alloc_txq(struct vi_info *, struct sge_txq *, int, 212 struct sysctl_oid *); 213 static int free_txq(struct vi_info *, struct sge_txq *); 214 static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); 215 static inline void ring_fl_db(struct adapter *, struct sge_fl *); 216 static int refill_fl(struct adapter *, struct sge_fl *, int); 217 static void refill_sfl(void *); 218 static int alloc_fl_sdesc(struct sge_fl *); 219 static void free_fl_sdesc(struct adapter *, struct sge_fl *); 220 static void find_best_refill_source(struct adapter *, struct sge_fl *, int); 221 static void find_safe_refill_source(struct adapter *, struct sge_fl *); 222 static void add_fl_to_sfl(struct adapter *, struct sge_fl *); 223 224 static inline void get_pkt_gl(struct mbuf *, struct sglist *); 225 static inline u_int txpkt_len16(u_int, u_int); 226 static inline u_int txpkts0_len16(u_int); 227 static inline u_int txpkts1_len16(void); 228 static u_int write_txpkt_wr(struct sge_txq *, struct fw_eth_tx_pkt_wr *, 229 struct mbuf *, u_int); 230 static int try_txpkts(struct mbuf *, struct mbuf *, struct txpkts *, u_int); 231 static int add_to_txpkts(struct mbuf *, struct txpkts *, u_int); 232 static u_int write_txpkts_wr(struct sge_txq *, struct fw_eth_tx_pkts_wr *, 233 struct mbuf *, const struct txpkts *, u_int); 234 static void write_gl_to_txd(struct sge_txq *, struct mbuf *, caddr_t *, int); 235 static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int); 236 static inline void ring_eq_db(struct adapter *, struct sge_eq *, u_int); 237 static inline uint16_t read_hw_cidx(struct sge_eq *); 238 static inline u_int reclaimable_tx_desc(struct sge_eq *); 239 static inline u_int total_available_tx_desc(struct sge_eq *); 240 static u_int reclaim_tx_descs(struct sge_txq *, u_int); 241 static void tx_reclaim(void *, int); 242 static __be64 get_flit(struct sglist_seg *, int, int); 243 static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *, 244 struct mbuf *); 245 static int handle_fw_msg(struct sge_iq *, const struct rss_header *, 246 struct mbuf *); 247 static int t4_handle_wrerr_rpl(struct adapter *, const __be64 *); 248 static void wrq_tx_drain(void *, int); 249 static void drain_wrq_wr_list(struct adapter *, struct sge_wrq *); 250 251 static int sysctl_uint16(SYSCTL_HANDLER_ARGS); 252 static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS); 253 static int sysctl_tc(SYSCTL_HANDLER_ARGS); 254 255 static counter_u64_t extfree_refs; 256 static counter_u64_t extfree_rels; 257 258 an_handler_t t4_an_handler; 259 fw_msg_handler_t t4_fw_msg_handler[NUM_FW6_TYPES]; 260 cpl_handler_t t4_cpl_handler[NUM_CPL_CMDS]; 261 262 263 static int 264 an_not_handled(struct sge_iq *iq, const struct rsp_ctrl *ctrl) 265 { 266 267 #ifdef INVARIANTS 268 panic("%s: async notification on iq %p (ctrl %p)", __func__, iq, ctrl); 269 #else 270 log(LOG_ERR, "%s: async notification on iq %p (ctrl %p)\n", 271 __func__, iq, ctrl); 272 #endif 273 return (EDOOFUS); 274 } 275 276 int 277 t4_register_an_handler(an_handler_t h) 278 { 279 uintptr_t *loc, new; 280 281 new = h ? (uintptr_t)h : (uintptr_t)an_not_handled; 282 loc = (uintptr_t *) &t4_an_handler; 283 atomic_store_rel_ptr(loc, new); 284 285 return (0); 286 } 287 288 static int 289 fw_msg_not_handled(struct adapter *sc, const __be64 *rpl) 290 { 291 const struct cpl_fw6_msg *cpl = 292 __containerof(rpl, struct cpl_fw6_msg, data[0]); 293 294 #ifdef INVARIANTS 295 panic("%s: fw_msg type %d", __func__, cpl->type); 296 #else 297 log(LOG_ERR, "%s: fw_msg type %d\n", __func__, cpl->type); 298 #endif 299 return (EDOOFUS); 300 } 301 302 int 303 t4_register_fw_msg_handler(int type, fw_msg_handler_t h) 304 { 305 uintptr_t *loc, new; 306 307 if (type >= nitems(t4_fw_msg_handler)) 308 return (EINVAL); 309 310 /* 311 * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL 312 * handler dispatch table. Reject any attempt to install a handler for 313 * this subtype. 314 */ 315 if (type == FW_TYPE_RSSCPL || type == FW6_TYPE_RSSCPL) 316 return (EINVAL); 317 318 new = h ? (uintptr_t)h : (uintptr_t)fw_msg_not_handled; 319 loc = (uintptr_t *) &t4_fw_msg_handler[type]; 320 atomic_store_rel_ptr(loc, new); 321 322 return (0); 323 } 324 325 static int 326 cpl_not_handled(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 327 { 328 329 #ifdef INVARIANTS 330 panic("%s: opcode 0x%02x on iq %p with payload %p", 331 __func__, rss->opcode, iq, m); 332 #else 333 log(LOG_ERR, "%s: opcode 0x%02x on iq %p with payload %p\n", 334 __func__, rss->opcode, iq, m); 335 m_freem(m); 336 #endif 337 return (EDOOFUS); 338 } 339 340 int 341 t4_register_cpl_handler(int opcode, cpl_handler_t h) 342 { 343 uintptr_t *loc, new; 344 345 if (opcode >= nitems(t4_cpl_handler)) 346 return (EINVAL); 347 348 new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled; 349 loc = (uintptr_t *) &t4_cpl_handler[opcode]; 350 atomic_store_rel_ptr(loc, new); 351 352 return (0); 353 } 354 355 /* 356 * Called on MOD_LOAD. Validates and calculates the SGE tunables. 357 */ 358 void 359 t4_sge_modload(void) 360 { 361 int i; 362 363 if (fl_pktshift < 0 || fl_pktshift > 7) { 364 printf("Invalid hw.cxgbe.fl_pktshift value (%d)," 365 " using 2 instead.\n", fl_pktshift); 366 fl_pktshift = 2; 367 } 368 369 if (spg_len != 64 && spg_len != 128) { 370 int len; 371 372 #if defined(__i386__) || defined(__amd64__) 373 len = cpu_clflush_line_size > 64 ? 128 : 64; 374 #else 375 len = 64; 376 #endif 377 if (spg_len != -1) { 378 printf("Invalid hw.cxgbe.spg_len value (%d)," 379 " using %d instead.\n", spg_len, len); 380 } 381 spg_len = len; 382 } 383 384 if (cong_drop < -1 || cong_drop > 1) { 385 printf("Invalid hw.cxgbe.cong_drop value (%d)," 386 " using 0 instead.\n", cong_drop); 387 cong_drop = 0; 388 } 389 390 extfree_refs = counter_u64_alloc(M_WAITOK); 391 extfree_rels = counter_u64_alloc(M_WAITOK); 392 counter_u64_zero(extfree_refs); 393 counter_u64_zero(extfree_rels); 394 395 t4_an_handler = an_not_handled; 396 for (i = 0; i < nitems(t4_fw_msg_handler); i++) 397 t4_fw_msg_handler[i] = fw_msg_not_handled; 398 for (i = 0; i < nitems(t4_cpl_handler); i++) 399 t4_cpl_handler[i] = cpl_not_handled; 400 401 t4_register_cpl_handler(CPL_FW4_MSG, handle_fw_msg); 402 t4_register_cpl_handler(CPL_FW6_MSG, handle_fw_msg); 403 t4_register_cpl_handler(CPL_SGE_EGR_UPDATE, handle_sge_egr_update); 404 t4_register_cpl_handler(CPL_RX_PKT, t4_eth_rx); 405 t4_register_fw_msg_handler(FW6_TYPE_CMD_RPL, t4_handle_fw_rpl); 406 t4_register_fw_msg_handler(FW6_TYPE_WRERR_RPL, t4_handle_wrerr_rpl); 407 } 408 409 void 410 t4_sge_modunload(void) 411 { 412 413 counter_u64_free(extfree_refs); 414 counter_u64_free(extfree_rels); 415 } 416 417 uint64_t 418 t4_sge_extfree_refs(void) 419 { 420 uint64_t refs, rels; 421 422 rels = counter_u64_fetch(extfree_rels); 423 refs = counter_u64_fetch(extfree_refs); 424 425 return (refs - rels); 426 } 427 428 static inline void 429 setup_pad_and_pack_boundaries(struct adapter *sc) 430 { 431 uint32_t v, m; 432 int pad, pack; 433 434 pad = fl_pad; 435 if (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad)) { 436 /* 437 * If there is any chance that we might use buffer packing and 438 * the chip is a T4, then pick 64 as the pad/pack boundary. Set 439 * it to 32 in all other cases. 440 */ 441 pad = is_t4(sc) && buffer_packing ? 64 : 32; 442 443 /* 444 * For fl_pad = 0 we'll still write a reasonable value to the 445 * register but all the freelists will opt out of padding. 446 * We'll complain here only if the user tried to set it to a 447 * value greater than 0 that was invalid. 448 */ 449 if (fl_pad > 0) { 450 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pad value" 451 " (%d), using %d instead.\n", fl_pad, pad); 452 } 453 } 454 m = V_INGPADBOUNDARY(M_INGPADBOUNDARY); 455 v = V_INGPADBOUNDARY(ilog2(pad) - 5); 456 t4_set_reg_field(sc, A_SGE_CONTROL, m, v); 457 458 if (is_t4(sc)) { 459 if (fl_pack != -1 && fl_pack != pad) { 460 /* Complain but carry on. */ 461 device_printf(sc->dev, "hw.cxgbe.fl_pack (%d) ignored," 462 " using %d instead.\n", fl_pack, pad); 463 } 464 return; 465 } 466 467 pack = fl_pack; 468 if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 || 469 !powerof2(fl_pack)) { 470 pack = max(sc->params.pci.mps, CACHE_LINE_SIZE); 471 MPASS(powerof2(pack)); 472 if (pack < 16) 473 pack = 16; 474 if (pack == 32) 475 pack = 64; 476 if (pack > 4096) 477 pack = 4096; 478 if (fl_pack != -1) { 479 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pack value" 480 " (%d), using %d instead.\n", fl_pack, pack); 481 } 482 } 483 m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); 484 if (pack == 16) 485 v = V_INGPACKBOUNDARY(0); 486 else 487 v = V_INGPACKBOUNDARY(ilog2(pack) - 5); 488 489 MPASS(!is_t4(sc)); /* T4 doesn't have SGE_CONTROL2 */ 490 t4_set_reg_field(sc, A_SGE_CONTROL2, m, v); 491 } 492 493 /* 494 * adap->params.vpd.cclk must be set up before this is called. 495 */ 496 void 497 t4_tweak_chip_settings(struct adapter *sc) 498 { 499 int i; 500 uint32_t v, m; 501 int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200}; 502 int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk; 503 int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ 504 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 505 static int sge_flbuf_sizes[] = { 506 MCLBYTES, 507 #if MJUMPAGESIZE != MCLBYTES 508 MJUMPAGESIZE, 509 MJUMPAGESIZE - CL_METADATA_SIZE, 510 MJUMPAGESIZE - 2 * MSIZE - CL_METADATA_SIZE, 511 #endif 512 MJUM9BYTES, 513 MJUM16BYTES, 514 MCLBYTES - MSIZE - CL_METADATA_SIZE, 515 MJUM9BYTES - CL_METADATA_SIZE, 516 MJUM16BYTES - CL_METADATA_SIZE, 517 }; 518 519 KASSERT(sc->flags & MASTER_PF, 520 ("%s: trying to change chip settings when not master.", __func__)); 521 522 m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE; 523 v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE | 524 V_EGRSTATUSPAGESIZE(spg_len == 128); 525 t4_set_reg_field(sc, A_SGE_CONTROL, m, v); 526 527 setup_pad_and_pack_boundaries(sc); 528 529 v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | 530 V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | 531 V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) | 532 V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) | 533 V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) | 534 V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) | 535 V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) | 536 V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10); 537 t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v); 538 539 KASSERT(nitems(sge_flbuf_sizes) <= SGE_FLBUF_SIZES, 540 ("%s: hw buffer size table too big", __func__)); 541 for (i = 0; i < min(nitems(sge_flbuf_sizes), SGE_FLBUF_SIZES); i++) { 542 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i), 543 sge_flbuf_sizes[i]); 544 } 545 546 v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) | 547 V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]); 548 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v); 549 550 KASSERT(intr_timer[0] <= timer_max, 551 ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0], 552 timer_max)); 553 for (i = 1; i < nitems(intr_timer); i++) { 554 KASSERT(intr_timer[i] >= intr_timer[i - 1], 555 ("%s: timers not listed in increasing order (%d)", 556 __func__, i)); 557 558 while (intr_timer[i] > timer_max) { 559 if (i == nitems(intr_timer) - 1) { 560 intr_timer[i] = timer_max; 561 break; 562 } 563 intr_timer[i] += intr_timer[i - 1]; 564 intr_timer[i] /= 2; 565 } 566 } 567 568 v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) | 569 V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1])); 570 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v); 571 v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) | 572 V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3])); 573 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v); 574 v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) | 575 V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5])); 576 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v); 577 578 /* 4K, 16K, 64K, 256K DDP "page sizes" for TDDP */ 579 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 580 t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v); 581 582 /* 583 * 4K, 8K, 16K, 64K DDP "page sizes" for iSCSI DDP. These have been 584 * chosen with MAXPHYS = 128K in mind. The largest DDP buffer that we 585 * may have to deal with is MAXPHYS + 1 page. 586 */ 587 v = V_HPZ0(0) | V_HPZ1(1) | V_HPZ2(2) | V_HPZ3(4); 588 t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, v); 589 590 /* We use multiple DDP page sizes both in plain-TOE and ISCSI modes. */ 591 m = v = F_TDDPTAGTCB | F_ISCSITAGTCB; 592 t4_set_reg_field(sc, A_ULP_RX_CTL, m, v); 593 594 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 595 F_RESETDDPOFFSET; 596 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 597 t4_set_reg_field(sc, A_TP_PARA_REG5, m, v); 598 } 599 600 /* 601 * SGE wants the buffer to be at least 64B and then a multiple of 16. If 602 * padding is in use, the buffer's start and end need to be aligned to the pad 603 * boundary as well. We'll just make sure that the size is a multiple of the 604 * boundary here, it is up to the buffer allocation code to make sure the start 605 * of the buffer is aligned as well. 606 */ 607 static inline int 608 hwsz_ok(struct adapter *sc, int hwsz) 609 { 610 int mask = fl_pad ? sc->params.sge.pad_boundary - 1 : 16 - 1; 611 612 return (hwsz >= 64 && (hwsz & mask) == 0); 613 } 614 615 /* 616 * XXX: driver really should be able to deal with unexpected settings. 617 */ 618 int 619 t4_read_chip_settings(struct adapter *sc) 620 { 621 struct sge *s = &sc->sge; 622 struct sge_params *sp = &sc->params.sge; 623 int i, j, n, rc = 0; 624 uint32_t m, v, r; 625 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 626 static int sw_buf_sizes[] = { /* Sorted by size */ 627 MCLBYTES, 628 #if MJUMPAGESIZE != MCLBYTES 629 MJUMPAGESIZE, 630 #endif 631 MJUM9BYTES, 632 MJUM16BYTES 633 }; 634 struct sw_zone_info *swz, *safe_swz; 635 struct hw_buf_info *hwb; 636 637 m = F_RXPKTCPLMODE; 638 v = F_RXPKTCPLMODE; 639 r = sc->params.sge.sge_control; 640 if ((r & m) != v) { 641 device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r); 642 rc = EINVAL; 643 } 644 645 /* 646 * If this changes then every single use of PAGE_SHIFT in the driver 647 * needs to be carefully reviewed for PAGE_SHIFT vs sp->page_shift. 648 */ 649 if (sp->page_shift != PAGE_SHIFT) { 650 device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r); 651 rc = EINVAL; 652 } 653 654 /* Filter out unusable hw buffer sizes entirely (mark with -2). */ 655 hwb = &s->hw_buf_info[0]; 656 for (i = 0; i < nitems(s->hw_buf_info); i++, hwb++) { 657 r = sc->params.sge.sge_fl_buffer_size[i]; 658 hwb->size = r; 659 hwb->zidx = hwsz_ok(sc, r) ? -1 : -2; 660 hwb->next = -1; 661 } 662 663 /* 664 * Create a sorted list in decreasing order of hw buffer sizes (and so 665 * increasing order of spare area) for each software zone. 666 * 667 * If padding is enabled then the start and end of the buffer must align 668 * to the pad boundary; if packing is enabled then they must align with 669 * the pack boundary as well. Allocations from the cluster zones are 670 * aligned to min(size, 4K), so the buffer starts at that alignment and 671 * ends at hwb->size alignment. If mbuf inlining is allowed the 672 * starting alignment will be reduced to MSIZE and the driver will 673 * exercise appropriate caution when deciding on the best buffer layout 674 * to use. 675 */ 676 n = 0; /* no usable buffer size to begin with */ 677 swz = &s->sw_zone_info[0]; 678 safe_swz = NULL; 679 for (i = 0; i < SW_ZONE_SIZES; i++, swz++) { 680 int8_t head = -1, tail = -1; 681 682 swz->size = sw_buf_sizes[i]; 683 swz->zone = m_getzone(swz->size); 684 swz->type = m_gettype(swz->size); 685 686 if (swz->size < PAGE_SIZE) { 687 MPASS(powerof2(swz->size)); 688 if (fl_pad && (swz->size % sp->pad_boundary != 0)) 689 continue; 690 } 691 692 if (swz->size == safest_rx_cluster) 693 safe_swz = swz; 694 695 hwb = &s->hw_buf_info[0]; 696 for (j = 0; j < SGE_FLBUF_SIZES; j++, hwb++) { 697 if (hwb->zidx != -1 || hwb->size > swz->size) 698 continue; 699 #ifdef INVARIANTS 700 if (fl_pad) 701 MPASS(hwb->size % sp->pad_boundary == 0); 702 #endif 703 hwb->zidx = i; 704 if (head == -1) 705 head = tail = j; 706 else if (hwb->size < s->hw_buf_info[tail].size) { 707 s->hw_buf_info[tail].next = j; 708 tail = j; 709 } else { 710 int8_t *cur; 711 struct hw_buf_info *t; 712 713 for (cur = &head; *cur != -1; cur = &t->next) { 714 t = &s->hw_buf_info[*cur]; 715 if (hwb->size == t->size) { 716 hwb->zidx = -2; 717 break; 718 } 719 if (hwb->size > t->size) { 720 hwb->next = *cur; 721 *cur = j; 722 break; 723 } 724 } 725 } 726 } 727 swz->head_hwidx = head; 728 swz->tail_hwidx = tail; 729 730 if (tail != -1) { 731 n++; 732 if (swz->size - s->hw_buf_info[tail].size >= 733 CL_METADATA_SIZE) 734 sc->flags |= BUF_PACKING_OK; 735 } 736 } 737 if (n == 0) { 738 device_printf(sc->dev, "no usable SGE FL buffer size.\n"); 739 rc = EINVAL; 740 } 741 742 s->safe_hwidx1 = -1; 743 s->safe_hwidx2 = -1; 744 if (safe_swz != NULL) { 745 s->safe_hwidx1 = safe_swz->head_hwidx; 746 for (i = safe_swz->head_hwidx; i != -1; i = hwb->next) { 747 int spare; 748 749 hwb = &s->hw_buf_info[i]; 750 #ifdef INVARIANTS 751 if (fl_pad) 752 MPASS(hwb->size % sp->pad_boundary == 0); 753 #endif 754 spare = safe_swz->size - hwb->size; 755 if (spare >= CL_METADATA_SIZE) { 756 s->safe_hwidx2 = i; 757 break; 758 } 759 } 760 } 761 762 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 763 r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ); 764 if (r != v) { 765 device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r); 766 rc = EINVAL; 767 } 768 769 m = v = F_TDDPTAGTCB; 770 r = t4_read_reg(sc, A_ULP_RX_CTL); 771 if ((r & m) != v) { 772 device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r); 773 rc = EINVAL; 774 } 775 776 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 777 F_RESETDDPOFFSET; 778 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 779 r = t4_read_reg(sc, A_TP_PARA_REG5); 780 if ((r & m) != v) { 781 device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r); 782 rc = EINVAL; 783 } 784 785 t4_init_tp_params(sc); 786 787 t4_read_mtu_tbl(sc, sc->params.mtus, NULL); 788 t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd); 789 790 return (rc); 791 } 792 793 int 794 t4_create_dma_tag(struct adapter *sc) 795 { 796 int rc; 797 798 rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, 799 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 800 BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, 801 NULL, &sc->dmat); 802 if (rc != 0) { 803 device_printf(sc->dev, 804 "failed to create main DMA tag: %d\n", rc); 805 } 806 807 return (rc); 808 } 809 810 void 811 t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 812 struct sysctl_oid_list *children) 813 { 814 struct sge_params *sp = &sc->params.sge; 815 816 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes", 817 CTLTYPE_STRING | CTLFLAG_RD, &sc->sge, 0, sysctl_bufsizes, "A", 818 "freelist buffer sizes"); 819 820 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD, 821 NULL, sp->fl_pktshift, "payload DMA offset in rx buffer (bytes)"); 822 823 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD, 824 NULL, sp->pad_boundary, "payload pad boundary (bytes)"); 825 826 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD, 827 NULL, sp->spg_len, "status page size (bytes)"); 828 829 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD, 830 NULL, cong_drop, "congestion drop setting"); 831 832 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD, 833 NULL, sp->pack_boundary, "payload pack boundary (bytes)"); 834 } 835 836 int 837 t4_destroy_dma_tag(struct adapter *sc) 838 { 839 if (sc->dmat) 840 bus_dma_tag_destroy(sc->dmat); 841 842 return (0); 843 } 844 845 /* 846 * Allocate and initialize the firmware event queue and the management queue. 847 * 848 * Returns errno on failure. Resources allocated up to that point may still be 849 * allocated. Caller is responsible for cleanup in case this function fails. 850 */ 851 int 852 t4_setup_adapter_queues(struct adapter *sc) 853 { 854 int rc; 855 856 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 857 858 sysctl_ctx_init(&sc->ctx); 859 sc->flags |= ADAP_SYSCTL_CTX; 860 861 /* 862 * Firmware event queue 863 */ 864 rc = alloc_fwq(sc); 865 if (rc != 0) 866 return (rc); 867 868 /* 869 * Management queue. This is just a control queue that uses the fwq as 870 * its associated iq. 871 */ 872 rc = alloc_mgmtq(sc); 873 874 return (rc); 875 } 876 877 /* 878 * Idempotent 879 */ 880 int 881 t4_teardown_adapter_queues(struct adapter *sc) 882 { 883 884 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 885 886 /* Do this before freeing the queue */ 887 if (sc->flags & ADAP_SYSCTL_CTX) { 888 sysctl_ctx_free(&sc->ctx); 889 sc->flags &= ~ADAP_SYSCTL_CTX; 890 } 891 892 free_mgmtq(sc); 893 free_fwq(sc); 894 895 return (0); 896 } 897 898 static inline int 899 first_vector(struct vi_info *vi) 900 { 901 struct adapter *sc = vi->pi->adapter; 902 903 if (sc->intr_count == 1) 904 return (0); 905 906 return (vi->first_intr); 907 } 908 909 /* 910 * Given an arbitrary "index," come up with an iq that can be used by other 911 * queues (of this VI) for interrupt forwarding, SGE egress updates, etc. 912 * The iq returned is guaranteed to be something that takes direct interrupts. 913 */ 914 static struct sge_iq * 915 vi_intr_iq(struct vi_info *vi, int idx) 916 { 917 struct adapter *sc = vi->pi->adapter; 918 struct sge *s = &sc->sge; 919 struct sge_iq *iq = NULL; 920 int nintr, i; 921 922 if (sc->intr_count == 1) 923 return (&sc->sge.fwq); 924 925 nintr = vi->nintr; 926 KASSERT(nintr != 0, 927 ("%s: vi %p has no exclusive interrupts, total interrupts = %d", 928 __func__, vi, sc->intr_count)); 929 i = idx % nintr; 930 931 if (vi->flags & INTR_RXQ) { 932 if (i < vi->nrxq) { 933 iq = &s->rxq[vi->first_rxq + i].iq; 934 goto done; 935 } 936 i -= vi->nrxq; 937 } 938 #ifdef TCP_OFFLOAD 939 if (vi->flags & INTR_OFLD_RXQ) { 940 if (i < vi->nofldrxq) { 941 iq = &s->ofld_rxq[vi->first_ofld_rxq + i].iq; 942 goto done; 943 } 944 i -= vi->nofldrxq; 945 } 946 #endif 947 panic("%s: vi %p, intr_flags 0x%lx, idx %d, total intr %d\n", __func__, 948 vi, vi->flags & INTR_ALL, idx, nintr); 949 done: 950 MPASS(iq != NULL); 951 KASSERT(iq->flags & IQ_INTR, 952 ("%s: iq %p (vi %p, intr_flags 0x%lx, idx %d)", __func__, iq, vi, 953 vi->flags & INTR_ALL, idx)); 954 return (iq); 955 } 956 957 /* Maximum payload that can be delivered with a single iq descriptor */ 958 static inline int 959 mtu_to_max_payload(struct adapter *sc, int mtu, const int toe) 960 { 961 int payload; 962 963 #ifdef TCP_OFFLOAD 964 if (toe) { 965 payload = sc->tt.rx_coalesce ? 966 G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2)) : mtu; 967 } else { 968 #endif 969 /* large enough even when hw VLAN extraction is disabled */ 970 payload = sc->params.sge.fl_pktshift + ETHER_HDR_LEN + 971 ETHER_VLAN_ENCAP_LEN + mtu; 972 #ifdef TCP_OFFLOAD 973 } 974 #endif 975 976 return (payload); 977 } 978 979 int 980 t4_setup_vi_queues(struct vi_info *vi) 981 { 982 int rc = 0, i, j, intr_idx, iqid; 983 struct sge_rxq *rxq; 984 struct sge_txq *txq; 985 struct sge_wrq *ctrlq; 986 #ifdef TCP_OFFLOAD 987 struct sge_ofld_rxq *ofld_rxq; 988 struct sge_wrq *ofld_txq; 989 #endif 990 #ifdef DEV_NETMAP 991 int saved_idx; 992 struct sge_nm_rxq *nm_rxq; 993 struct sge_nm_txq *nm_txq; 994 #endif 995 char name[16]; 996 struct port_info *pi = vi->pi; 997 struct adapter *sc = pi->adapter; 998 struct ifnet *ifp = vi->ifp; 999 struct sysctl_oid *oid = device_get_sysctl_tree(vi->dev); 1000 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 1001 int maxp, mtu = ifp->if_mtu; 1002 1003 /* Interrupt vector to start from (when using multiple vectors) */ 1004 intr_idx = first_vector(vi); 1005 1006 #ifdef DEV_NETMAP 1007 saved_idx = intr_idx; 1008 if (ifp->if_capabilities & IFCAP_NETMAP) { 1009 1010 /* netmap is supported with direct interrupts only. */ 1011 MPASS(vi->flags & INTR_RXQ); 1012 1013 /* 1014 * We don't have buffers to back the netmap rx queues 1015 * right now so we create the queues in a way that 1016 * doesn't set off any congestion signal in the chip. 1017 */ 1018 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_rxq", 1019 CTLFLAG_RD, NULL, "rx queues"); 1020 for_each_nm_rxq(vi, i, nm_rxq) { 1021 rc = alloc_nm_rxq(vi, nm_rxq, intr_idx, i, oid); 1022 if (rc != 0) 1023 goto done; 1024 intr_idx++; 1025 } 1026 1027 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_txq", 1028 CTLFLAG_RD, NULL, "tx queues"); 1029 for_each_nm_txq(vi, i, nm_txq) { 1030 iqid = vi->first_nm_rxq + (i % vi->nnmrxq); 1031 rc = alloc_nm_txq(vi, nm_txq, iqid, i, oid); 1032 if (rc != 0) 1033 goto done; 1034 } 1035 } 1036 1037 /* Normal rx queues and netmap rx queues share the same interrupts. */ 1038 intr_idx = saved_idx; 1039 #endif 1040 1041 /* 1042 * First pass over all NIC and TOE rx queues: 1043 * a) initialize iq and fl 1044 * b) allocate queue iff it will take direct interrupts. 1045 */ 1046 maxp = mtu_to_max_payload(sc, mtu, 0); 1047 if (vi->flags & INTR_RXQ) { 1048 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq", 1049 CTLFLAG_RD, NULL, "rx queues"); 1050 } 1051 for_each_rxq(vi, i, rxq) { 1052 1053 init_iq(&rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, vi->qsize_rxq); 1054 1055 snprintf(name, sizeof(name), "%s rxq%d-fl", 1056 device_get_nameunit(vi->dev), i); 1057 init_fl(sc, &rxq->fl, vi->qsize_rxq / 8, maxp, name); 1058 1059 if (vi->flags & INTR_RXQ) { 1060 rxq->iq.flags |= IQ_INTR; 1061 rc = alloc_rxq(vi, rxq, intr_idx, i, oid); 1062 if (rc != 0) 1063 goto done; 1064 intr_idx++; 1065 } 1066 } 1067 #ifdef DEV_NETMAP 1068 if (ifp->if_capabilities & IFCAP_NETMAP) 1069 intr_idx = saved_idx + max(vi->nrxq, vi->nnmrxq); 1070 #endif 1071 #ifdef TCP_OFFLOAD 1072 maxp = mtu_to_max_payload(sc, mtu, 1); 1073 if (vi->flags & INTR_OFLD_RXQ) { 1074 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq", 1075 CTLFLAG_RD, NULL, 1076 "rx queues for offloaded TCP connections"); 1077 } 1078 for_each_ofld_rxq(vi, i, ofld_rxq) { 1079 1080 init_iq(&ofld_rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, 1081 vi->qsize_rxq); 1082 1083 snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", 1084 device_get_nameunit(vi->dev), i); 1085 init_fl(sc, &ofld_rxq->fl, vi->qsize_rxq / 8, maxp, name); 1086 1087 if (vi->flags & INTR_OFLD_RXQ) { 1088 ofld_rxq->iq.flags |= IQ_INTR; 1089 rc = alloc_ofld_rxq(vi, ofld_rxq, intr_idx, i, oid); 1090 if (rc != 0) 1091 goto done; 1092 intr_idx++; 1093 } 1094 } 1095 #endif 1096 1097 /* 1098 * Second pass over all NIC and TOE rx queues. The queues forwarding 1099 * their interrupts are allocated now. 1100 */ 1101 j = 0; 1102 if (!(vi->flags & INTR_RXQ)) { 1103 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq", 1104 CTLFLAG_RD, NULL, "rx queues"); 1105 for_each_rxq(vi, i, rxq) { 1106 MPASS(!(rxq->iq.flags & IQ_INTR)); 1107 1108 intr_idx = vi_intr_iq(vi, j)->abs_id; 1109 1110 rc = alloc_rxq(vi, rxq, intr_idx, i, oid); 1111 if (rc != 0) 1112 goto done; 1113 j++; 1114 } 1115 } 1116 #ifdef TCP_OFFLOAD 1117 if (vi->nofldrxq != 0 && !(vi->flags & INTR_OFLD_RXQ)) { 1118 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq", 1119 CTLFLAG_RD, NULL, 1120 "rx queues for offloaded TCP connections"); 1121 for_each_ofld_rxq(vi, i, ofld_rxq) { 1122 MPASS(!(ofld_rxq->iq.flags & IQ_INTR)); 1123 1124 intr_idx = vi_intr_iq(vi, j)->abs_id; 1125 1126 rc = alloc_ofld_rxq(vi, ofld_rxq, intr_idx, i, oid); 1127 if (rc != 0) 1128 goto done; 1129 j++; 1130 } 1131 } 1132 #endif 1133 1134 /* 1135 * Now the tx queues. Only one pass needed. 1136 */ 1137 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD, 1138 NULL, "tx queues"); 1139 j = 0; 1140 for_each_txq(vi, i, txq) { 1141 iqid = vi_intr_iq(vi, j)->cntxt_id; 1142 snprintf(name, sizeof(name), "%s txq%d", 1143 device_get_nameunit(vi->dev), i); 1144 init_eq(sc, &txq->eq, EQ_ETH, vi->qsize_txq, pi->tx_chan, iqid, 1145 name); 1146 1147 rc = alloc_txq(vi, txq, i, oid); 1148 if (rc != 0) 1149 goto done; 1150 j++; 1151 } 1152 #ifdef TCP_OFFLOAD 1153 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_txq", 1154 CTLFLAG_RD, NULL, "tx queues for offloaded TCP connections"); 1155 for_each_ofld_txq(vi, i, ofld_txq) { 1156 struct sysctl_oid *oid2; 1157 1158 iqid = vi_intr_iq(vi, j)->cntxt_id; 1159 snprintf(name, sizeof(name), "%s ofld_txq%d", 1160 device_get_nameunit(vi->dev), i); 1161 init_eq(sc, &ofld_txq->eq, EQ_OFLD, vi->qsize_txq, pi->tx_chan, 1162 iqid, name); 1163 1164 snprintf(name, sizeof(name), "%d", i); 1165 oid2 = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, 1166 name, CTLFLAG_RD, NULL, "offload tx queue"); 1167 1168 rc = alloc_wrq(sc, vi, ofld_txq, oid2); 1169 if (rc != 0) 1170 goto done; 1171 j++; 1172 } 1173 #endif 1174 1175 /* 1176 * Finally, the control queue. 1177 */ 1178 if (!IS_MAIN_VI(vi)) 1179 goto done; 1180 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD, 1181 NULL, "ctrl queue"); 1182 ctrlq = &sc->sge.ctrlq[pi->port_id]; 1183 iqid = vi_intr_iq(vi, 0)->cntxt_id; 1184 snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(vi->dev)); 1185 init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, iqid, 1186 name); 1187 rc = alloc_wrq(sc, vi, ctrlq, oid); 1188 1189 done: 1190 if (rc) 1191 t4_teardown_vi_queues(vi); 1192 1193 return (rc); 1194 } 1195 1196 /* 1197 * Idempotent 1198 */ 1199 int 1200 t4_teardown_vi_queues(struct vi_info *vi) 1201 { 1202 int i; 1203 struct port_info *pi = vi->pi; 1204 struct adapter *sc = pi->adapter; 1205 struct sge_rxq *rxq; 1206 struct sge_txq *txq; 1207 #ifdef TCP_OFFLOAD 1208 struct sge_ofld_rxq *ofld_rxq; 1209 struct sge_wrq *ofld_txq; 1210 #endif 1211 #ifdef DEV_NETMAP 1212 struct sge_nm_rxq *nm_rxq; 1213 struct sge_nm_txq *nm_txq; 1214 #endif 1215 1216 /* Do this before freeing the queues */ 1217 if (vi->flags & VI_SYSCTL_CTX) { 1218 sysctl_ctx_free(&vi->ctx); 1219 vi->flags &= ~VI_SYSCTL_CTX; 1220 } 1221 1222 #ifdef DEV_NETMAP 1223 if (vi->ifp->if_capabilities & IFCAP_NETMAP) { 1224 for_each_nm_txq(vi, i, nm_txq) { 1225 free_nm_txq(vi, nm_txq); 1226 } 1227 1228 for_each_nm_rxq(vi, i, nm_rxq) { 1229 free_nm_rxq(vi, nm_rxq); 1230 } 1231 } 1232 #endif 1233 1234 /* 1235 * Take down all the tx queues first, as they reference the rx queues 1236 * (for egress updates, etc.). 1237 */ 1238 1239 if (IS_MAIN_VI(vi)) 1240 free_wrq(sc, &sc->sge.ctrlq[pi->port_id]); 1241 1242 for_each_txq(vi, i, txq) { 1243 free_txq(vi, txq); 1244 } 1245 #ifdef TCP_OFFLOAD 1246 for_each_ofld_txq(vi, i, ofld_txq) { 1247 free_wrq(sc, ofld_txq); 1248 } 1249 #endif 1250 1251 /* 1252 * Then take down the rx queues that forward their interrupts, as they 1253 * reference other rx queues. 1254 */ 1255 1256 for_each_rxq(vi, i, rxq) { 1257 if ((rxq->iq.flags & IQ_INTR) == 0) 1258 free_rxq(vi, rxq); 1259 } 1260 #ifdef TCP_OFFLOAD 1261 for_each_ofld_rxq(vi, i, ofld_rxq) { 1262 if ((ofld_rxq->iq.flags & IQ_INTR) == 0) 1263 free_ofld_rxq(vi, ofld_rxq); 1264 } 1265 #endif 1266 1267 /* 1268 * Then take down the rx queues that take direct interrupts. 1269 */ 1270 1271 for_each_rxq(vi, i, rxq) { 1272 if (rxq->iq.flags & IQ_INTR) 1273 free_rxq(vi, rxq); 1274 } 1275 #ifdef TCP_OFFLOAD 1276 for_each_ofld_rxq(vi, i, ofld_rxq) { 1277 if (ofld_rxq->iq.flags & IQ_INTR) 1278 free_ofld_rxq(vi, ofld_rxq); 1279 } 1280 #endif 1281 1282 return (0); 1283 } 1284 1285 /* 1286 * Deals with errors and the firmware event queue. All data rx queues forward 1287 * their interrupt to the firmware event queue. 1288 */ 1289 void 1290 t4_intr_all(void *arg) 1291 { 1292 struct adapter *sc = arg; 1293 struct sge_iq *fwq = &sc->sge.fwq; 1294 1295 t4_intr_err(arg); 1296 if (atomic_cmpset_int(&fwq->state, IQS_IDLE, IQS_BUSY)) { 1297 service_iq(fwq, 0); 1298 atomic_cmpset_int(&fwq->state, IQS_BUSY, IQS_IDLE); 1299 } 1300 } 1301 1302 /* Deals with error interrupts */ 1303 void 1304 t4_intr_err(void *arg) 1305 { 1306 struct adapter *sc = arg; 1307 1308 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); 1309 t4_slow_intr_handler(sc); 1310 } 1311 1312 void 1313 t4_intr_evt(void *arg) 1314 { 1315 struct sge_iq *iq = arg; 1316 1317 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1318 service_iq(iq, 0); 1319 atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1320 } 1321 } 1322 1323 void 1324 t4_intr(void *arg) 1325 { 1326 struct sge_iq *iq = arg; 1327 1328 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1329 service_iq(iq, 0); 1330 atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1331 } 1332 } 1333 1334 void 1335 t4_vi_intr(void *arg) 1336 { 1337 struct irq *irq = arg; 1338 1339 #ifdef DEV_NETMAP 1340 if (atomic_cmpset_int(&irq->nm_state, NM_ON, NM_BUSY)) { 1341 t4_nm_intr(irq->nm_rxq); 1342 atomic_cmpset_int(&irq->nm_state, NM_BUSY, NM_ON); 1343 } 1344 #endif 1345 if (irq->rxq != NULL) 1346 t4_intr(irq->rxq); 1347 } 1348 1349 /* 1350 * Deals with anything and everything on the given ingress queue. 1351 */ 1352 static int 1353 service_iq(struct sge_iq *iq, int budget) 1354 { 1355 struct sge_iq *q; 1356 struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */ 1357 struct sge_fl *fl; /* Use iff IQ_HAS_FL */ 1358 struct adapter *sc = iq->adapter; 1359 struct iq_desc *d = &iq->desc[iq->cidx]; 1360 int ndescs = 0, limit; 1361 int rsp_type, refill; 1362 uint32_t lq; 1363 uint16_t fl_hw_cidx; 1364 struct mbuf *m0; 1365 STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql); 1366 #if defined(INET) || defined(INET6) 1367 const struct timeval lro_timeout = {0, sc->lro_timeout}; 1368 #endif 1369 1370 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); 1371 1372 limit = budget ? budget : iq->qsize / 16; 1373 1374 if (iq->flags & IQ_HAS_FL) { 1375 fl = &rxq->fl; 1376 fl_hw_cidx = fl->hw_cidx; /* stable snapshot */ 1377 } else { 1378 fl = NULL; 1379 fl_hw_cidx = 0; /* to silence gcc warning */ 1380 } 1381 1382 /* 1383 * We always come back and check the descriptor ring for new indirect 1384 * interrupts and other responses after running a single handler. 1385 */ 1386 for (;;) { 1387 while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { 1388 1389 rmb(); 1390 1391 refill = 0; 1392 m0 = NULL; 1393 rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); 1394 lq = be32toh(d->rsp.pldbuflen_qid); 1395 1396 switch (rsp_type) { 1397 case X_RSPD_TYPE_FLBUF: 1398 1399 KASSERT(iq->flags & IQ_HAS_FL, 1400 ("%s: data for an iq (%p) with no freelist", 1401 __func__, iq)); 1402 1403 m0 = get_fl_payload(sc, fl, lq); 1404 if (__predict_false(m0 == NULL)) 1405 goto process_iql; 1406 refill = IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 2; 1407 #ifdef T4_PKT_TIMESTAMP 1408 /* 1409 * 60 bit timestamp for the payload is 1410 * *(uint64_t *)m0->m_pktdat. Note that it is 1411 * in the leading free-space in the mbuf. The 1412 * kernel can clobber it during a pullup, 1413 * m_copymdata, etc. You need to make sure that 1414 * the mbuf reaches you unmolested if you care 1415 * about the timestamp. 1416 */ 1417 *(uint64_t *)m0->m_pktdat = 1418 be64toh(ctrl->u.last_flit) & 1419 0xfffffffffffffff; 1420 #endif 1421 1422 /* fall through */ 1423 1424 case X_RSPD_TYPE_CPL: 1425 KASSERT(d->rss.opcode < NUM_CPL_CMDS, 1426 ("%s: bad opcode %02x.", __func__, 1427 d->rss.opcode)); 1428 t4_cpl_handler[d->rss.opcode](iq, &d->rss, m0); 1429 break; 1430 1431 case X_RSPD_TYPE_INTR: 1432 1433 /* 1434 * Interrupts should be forwarded only to queues 1435 * that are not forwarding their interrupts. 1436 * This means service_iq can recurse but only 1 1437 * level deep. 1438 */ 1439 KASSERT(budget == 0, 1440 ("%s: budget %u, rsp_type %u", __func__, 1441 budget, rsp_type)); 1442 1443 /* 1444 * There are 1K interrupt-capable queues (qids 0 1445 * through 1023). A response type indicating a 1446 * forwarded interrupt with a qid >= 1K is an 1447 * iWARP async notification. 1448 */ 1449 if (lq >= 1024) { 1450 t4_an_handler(iq, &d->rsp); 1451 break; 1452 } 1453 1454 q = sc->sge.iqmap[lq - sc->sge.iq_start - 1455 sc->sge.iq_base]; 1456 if (atomic_cmpset_int(&q->state, IQS_IDLE, 1457 IQS_BUSY)) { 1458 if (service_iq(q, q->qsize / 16) == 0) { 1459 atomic_cmpset_int(&q->state, 1460 IQS_BUSY, IQS_IDLE); 1461 } else { 1462 STAILQ_INSERT_TAIL(&iql, q, 1463 link); 1464 } 1465 } 1466 break; 1467 1468 default: 1469 KASSERT(0, 1470 ("%s: illegal response type %d on iq %p", 1471 __func__, rsp_type, iq)); 1472 log(LOG_ERR, 1473 "%s: illegal response type %d on iq %p", 1474 device_get_nameunit(sc->dev), rsp_type, iq); 1475 break; 1476 } 1477 1478 d++; 1479 if (__predict_false(++iq->cidx == iq->sidx)) { 1480 iq->cidx = 0; 1481 iq->gen ^= F_RSPD_GEN; 1482 d = &iq->desc[0]; 1483 } 1484 if (__predict_false(++ndescs == limit)) { 1485 t4_write_reg(sc, sc->sge_gts_reg, 1486 V_CIDXINC(ndescs) | 1487 V_INGRESSQID(iq->cntxt_id) | 1488 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 1489 ndescs = 0; 1490 1491 #if defined(INET) || defined(INET6) 1492 if (iq->flags & IQ_LRO_ENABLED && 1493 sc->lro_timeout != 0) { 1494 tcp_lro_flush_inactive(&rxq->lro, 1495 &lro_timeout); 1496 } 1497 #endif 1498 1499 if (budget) { 1500 if (iq->flags & IQ_HAS_FL) { 1501 FL_LOCK(fl); 1502 refill_fl(sc, fl, 32); 1503 FL_UNLOCK(fl); 1504 } 1505 return (EINPROGRESS); 1506 } 1507 } 1508 if (refill) { 1509 FL_LOCK(fl); 1510 refill_fl(sc, fl, 32); 1511 FL_UNLOCK(fl); 1512 fl_hw_cidx = fl->hw_cidx; 1513 } 1514 } 1515 1516 process_iql: 1517 if (STAILQ_EMPTY(&iql)) 1518 break; 1519 1520 /* 1521 * Process the head only, and send it to the back of the list if 1522 * it's still not done. 1523 */ 1524 q = STAILQ_FIRST(&iql); 1525 STAILQ_REMOVE_HEAD(&iql, link); 1526 if (service_iq(q, q->qsize / 8) == 0) 1527 atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE); 1528 else 1529 STAILQ_INSERT_TAIL(&iql, q, link); 1530 } 1531 1532 #if defined(INET) || defined(INET6) 1533 if (iq->flags & IQ_LRO_ENABLED) { 1534 struct lro_ctrl *lro = &rxq->lro; 1535 1536 tcp_lro_flush_all(lro); 1537 } 1538 #endif 1539 1540 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1541 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 1542 1543 if (iq->flags & IQ_HAS_FL) { 1544 int starved; 1545 1546 FL_LOCK(fl); 1547 starved = refill_fl(sc, fl, 64); 1548 FL_UNLOCK(fl); 1549 if (__predict_false(starved != 0)) 1550 add_fl_to_sfl(sc, fl); 1551 } 1552 1553 return (0); 1554 } 1555 1556 static inline int 1557 cl_has_metadata(struct sge_fl *fl, struct cluster_layout *cll) 1558 { 1559 int rc = fl->flags & FL_BUF_PACKING || cll->region1 > 0; 1560 1561 if (rc) 1562 MPASS(cll->region3 >= CL_METADATA_SIZE); 1563 1564 return (rc); 1565 } 1566 1567 static inline struct cluster_metadata * 1568 cl_metadata(struct adapter *sc, struct sge_fl *fl, struct cluster_layout *cll, 1569 caddr_t cl) 1570 { 1571 1572 if (cl_has_metadata(fl, cll)) { 1573 struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; 1574 1575 return ((struct cluster_metadata *)(cl + swz->size) - 1); 1576 } 1577 return (NULL); 1578 } 1579 1580 static void 1581 rxb_free(struct mbuf *m, void *arg1, void *arg2) 1582 { 1583 uma_zone_t zone = arg1; 1584 caddr_t cl = arg2; 1585 1586 uma_zfree(zone, cl); 1587 counter_u64_add(extfree_rels, 1); 1588 } 1589 1590 /* 1591 * The mbuf returned by this function could be allocated from zone_mbuf or 1592 * constructed in spare room in the cluster. 1593 * 1594 * The mbuf carries the payload in one of these ways 1595 * a) frame inside the mbuf (mbuf from zone_mbuf) 1596 * b) m_cljset (for clusters without metadata) zone_mbuf 1597 * c) m_extaddref (cluster with metadata) inline mbuf 1598 * d) m_extaddref (cluster with metadata) zone_mbuf 1599 */ 1600 static struct mbuf * 1601 get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset, 1602 int remaining) 1603 { 1604 struct mbuf *m; 1605 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1606 struct cluster_layout *cll = &sd->cll; 1607 struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; 1608 struct hw_buf_info *hwb = &sc->sge.hw_buf_info[cll->hwidx]; 1609 struct cluster_metadata *clm = cl_metadata(sc, fl, cll, sd->cl); 1610 int len, blen; 1611 caddr_t payload; 1612 1613 blen = hwb->size - fl->rx_offset; /* max possible in this buf */ 1614 len = min(remaining, blen); 1615 payload = sd->cl + cll->region1 + fl->rx_offset; 1616 if (fl->flags & FL_BUF_PACKING) { 1617 const u_int l = fr_offset + len; 1618 const u_int pad = roundup2(l, fl->buf_boundary) - l; 1619 1620 if (fl->rx_offset + len + pad < hwb->size) 1621 blen = len + pad; 1622 MPASS(fl->rx_offset + blen <= hwb->size); 1623 } else { 1624 MPASS(fl->rx_offset == 0); /* not packing */ 1625 } 1626 1627 1628 if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) { 1629 1630 /* 1631 * Copy payload into a freshly allocated mbuf. 1632 */ 1633 1634 m = fr_offset == 0 ? 1635 m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); 1636 if (m == NULL) 1637 return (NULL); 1638 fl->mbuf_allocated++; 1639 #ifdef T4_PKT_TIMESTAMP 1640 /* Leave room for a timestamp */ 1641 m->m_data += 8; 1642 #endif 1643 /* copy data to mbuf */ 1644 bcopy(payload, mtod(m, caddr_t), len); 1645 1646 } else if (sd->nmbuf * MSIZE < cll->region1) { 1647 1648 /* 1649 * There's spare room in the cluster for an mbuf. Create one 1650 * and associate it with the payload that's in the cluster. 1651 */ 1652 1653 MPASS(clm != NULL); 1654 m = (struct mbuf *)(sd->cl + sd->nmbuf * MSIZE); 1655 /* No bzero required */ 1656 if (m_init(m, M_NOWAIT, MT_DATA, 1657 fr_offset == 0 ? M_PKTHDR | M_NOFREE : M_NOFREE)) 1658 return (NULL); 1659 fl->mbuf_inlined++; 1660 m_extaddref(m, payload, blen, &clm->refcount, rxb_free, 1661 swz->zone, sd->cl); 1662 if (sd->nmbuf++ == 0) 1663 counter_u64_add(extfree_refs, 1); 1664 1665 } else { 1666 1667 /* 1668 * Grab an mbuf from zone_mbuf and associate it with the 1669 * payload in the cluster. 1670 */ 1671 1672 m = fr_offset == 0 ? 1673 m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); 1674 if (m == NULL) 1675 return (NULL); 1676 fl->mbuf_allocated++; 1677 if (clm != NULL) { 1678 m_extaddref(m, payload, blen, &clm->refcount, 1679 rxb_free, swz->zone, sd->cl); 1680 if (sd->nmbuf++ == 0) 1681 counter_u64_add(extfree_refs, 1); 1682 } else { 1683 m_cljset(m, sd->cl, swz->type); 1684 sd->cl = NULL; /* consumed, not a recycle candidate */ 1685 } 1686 } 1687 if (fr_offset == 0) 1688 m->m_pkthdr.len = remaining; 1689 m->m_len = len; 1690 1691 if (fl->flags & FL_BUF_PACKING) { 1692 fl->rx_offset += blen; 1693 MPASS(fl->rx_offset <= hwb->size); 1694 if (fl->rx_offset < hwb->size) 1695 return (m); /* without advancing the cidx */ 1696 } 1697 1698 if (__predict_false(++fl->cidx % 8 == 0)) { 1699 uint16_t cidx = fl->cidx / 8; 1700 1701 if (__predict_false(cidx == fl->sidx)) 1702 fl->cidx = cidx = 0; 1703 fl->hw_cidx = cidx; 1704 } 1705 fl->rx_offset = 0; 1706 1707 return (m); 1708 } 1709 1710 static struct mbuf * 1711 get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf) 1712 { 1713 struct mbuf *m0, *m, **pnext; 1714 u_int remaining; 1715 const u_int total = G_RSPD_LEN(len_newbuf); 1716 1717 if (__predict_false(fl->flags & FL_BUF_RESUME)) { 1718 M_ASSERTPKTHDR(fl->m0); 1719 MPASS(fl->m0->m_pkthdr.len == total); 1720 MPASS(fl->remaining < total); 1721 1722 m0 = fl->m0; 1723 pnext = fl->pnext; 1724 remaining = fl->remaining; 1725 fl->flags &= ~FL_BUF_RESUME; 1726 goto get_segment; 1727 } 1728 1729 if (fl->rx_offset > 0 && len_newbuf & F_RSPD_NEWBUF) { 1730 fl->rx_offset = 0; 1731 if (__predict_false(++fl->cidx % 8 == 0)) { 1732 uint16_t cidx = fl->cidx / 8; 1733 1734 if (__predict_false(cidx == fl->sidx)) 1735 fl->cidx = cidx = 0; 1736 fl->hw_cidx = cidx; 1737 } 1738 } 1739 1740 /* 1741 * Payload starts at rx_offset in the current hw buffer. Its length is 1742 * 'len' and it may span multiple hw buffers. 1743 */ 1744 1745 m0 = get_scatter_segment(sc, fl, 0, total); 1746 if (m0 == NULL) 1747 return (NULL); 1748 remaining = total - m0->m_len; 1749 pnext = &m0->m_next; 1750 while (remaining > 0) { 1751 get_segment: 1752 MPASS(fl->rx_offset == 0); 1753 m = get_scatter_segment(sc, fl, total - remaining, remaining); 1754 if (__predict_false(m == NULL)) { 1755 fl->m0 = m0; 1756 fl->pnext = pnext; 1757 fl->remaining = remaining; 1758 fl->flags |= FL_BUF_RESUME; 1759 return (NULL); 1760 } 1761 *pnext = m; 1762 pnext = &m->m_next; 1763 remaining -= m->m_len; 1764 } 1765 *pnext = NULL; 1766 1767 M_ASSERTPKTHDR(m0); 1768 return (m0); 1769 } 1770 1771 static int 1772 t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) 1773 { 1774 struct sge_rxq *rxq = iq_to_rxq(iq); 1775 struct ifnet *ifp = rxq->ifp; 1776 struct adapter *sc = iq->adapter; 1777 const struct cpl_rx_pkt *cpl = (const void *)(rss + 1); 1778 #if defined(INET) || defined(INET6) 1779 struct lro_ctrl *lro = &rxq->lro; 1780 #endif 1781 static const int sw_hashtype[4][2] = { 1782 {M_HASHTYPE_NONE, M_HASHTYPE_NONE}, 1783 {M_HASHTYPE_RSS_IPV4, M_HASHTYPE_RSS_IPV6}, 1784 {M_HASHTYPE_RSS_TCP_IPV4, M_HASHTYPE_RSS_TCP_IPV6}, 1785 {M_HASHTYPE_RSS_UDP_IPV4, M_HASHTYPE_RSS_UDP_IPV6}, 1786 }; 1787 1788 KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__, 1789 rss->opcode)); 1790 1791 m0->m_pkthdr.len -= sc->params.sge.fl_pktshift; 1792 m0->m_len -= sc->params.sge.fl_pktshift; 1793 m0->m_data += sc->params.sge.fl_pktshift; 1794 1795 m0->m_pkthdr.rcvif = ifp; 1796 M_HASHTYPE_SET(m0, sw_hashtype[rss->hash_type][rss->ipv6]); 1797 m0->m_pkthdr.flowid = be32toh(rss->hash_val); 1798 1799 if (cpl->csum_calc && !cpl->err_vec) { 1800 if (ifp->if_capenable & IFCAP_RXCSUM && 1801 cpl->l2info & htobe32(F_RXF_IP)) { 1802 m0->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | 1803 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1804 rxq->rxcsum++; 1805 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && 1806 cpl->l2info & htobe32(F_RXF_IP6)) { 1807 m0->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 | 1808 CSUM_PSEUDO_HDR); 1809 rxq->rxcsum++; 1810 } 1811 1812 if (__predict_false(cpl->ip_frag)) 1813 m0->m_pkthdr.csum_data = be16toh(cpl->csum); 1814 else 1815 m0->m_pkthdr.csum_data = 0xffff; 1816 } 1817 1818 if (cpl->vlan_ex) { 1819 m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); 1820 m0->m_flags |= M_VLANTAG; 1821 rxq->vlan_extraction++; 1822 } 1823 1824 #if defined(INET) || defined(INET6) 1825 if (cpl->l2info & htobe32(F_RXF_LRO) && 1826 iq->flags & IQ_LRO_ENABLED && 1827 tcp_lro_rx(lro, m0, 0) == 0) { 1828 /* queued for LRO */ 1829 } else 1830 #endif 1831 ifp->if_input(ifp, m0); 1832 1833 return (0); 1834 } 1835 1836 /* 1837 * Must drain the wrq or make sure that someone else will. 1838 */ 1839 static void 1840 wrq_tx_drain(void *arg, int n) 1841 { 1842 struct sge_wrq *wrq = arg; 1843 struct sge_eq *eq = &wrq->eq; 1844 1845 EQ_LOCK(eq); 1846 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 1847 drain_wrq_wr_list(wrq->adapter, wrq); 1848 EQ_UNLOCK(eq); 1849 } 1850 1851 static void 1852 drain_wrq_wr_list(struct adapter *sc, struct sge_wrq *wrq) 1853 { 1854 struct sge_eq *eq = &wrq->eq; 1855 u_int available, dbdiff; /* # of hardware descriptors */ 1856 u_int n; 1857 struct wrqe *wr; 1858 struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ 1859 1860 EQ_LOCK_ASSERT_OWNED(eq); 1861 MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs)); 1862 wr = STAILQ_FIRST(&wrq->wr_list); 1863 MPASS(wr != NULL); /* Must be called with something useful to do */ 1864 MPASS(eq->pidx == eq->dbidx); 1865 dbdiff = 0; 1866 1867 do { 1868 eq->cidx = read_hw_cidx(eq); 1869 if (eq->pidx == eq->cidx) 1870 available = eq->sidx - 1; 1871 else 1872 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 1873 1874 MPASS(wr->wrq == wrq); 1875 n = howmany(wr->wr_len, EQ_ESIZE); 1876 if (available < n) 1877 break; 1878 1879 dst = (void *)&eq->desc[eq->pidx]; 1880 if (__predict_true(eq->sidx - eq->pidx > n)) { 1881 /* Won't wrap, won't end exactly at the status page. */ 1882 bcopy(&wr->wr[0], dst, wr->wr_len); 1883 eq->pidx += n; 1884 } else { 1885 int first_portion = (eq->sidx - eq->pidx) * EQ_ESIZE; 1886 1887 bcopy(&wr->wr[0], dst, first_portion); 1888 if (wr->wr_len > first_portion) { 1889 bcopy(&wr->wr[first_portion], &eq->desc[0], 1890 wr->wr_len - first_portion); 1891 } 1892 eq->pidx = n - (eq->sidx - eq->pidx); 1893 } 1894 1895 if (available < eq->sidx / 4 && 1896 atomic_cmpset_int(&eq->equiq, 0, 1)) { 1897 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 1898 F_FW_WR_EQUEQ); 1899 eq->equeqidx = eq->pidx; 1900 } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { 1901 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); 1902 eq->equeqidx = eq->pidx; 1903 } 1904 1905 dbdiff += n; 1906 if (dbdiff >= 16) { 1907 ring_eq_db(sc, eq, dbdiff); 1908 dbdiff = 0; 1909 } 1910 1911 STAILQ_REMOVE_HEAD(&wrq->wr_list, link); 1912 free_wrqe(wr); 1913 MPASS(wrq->nwr_pending > 0); 1914 wrq->nwr_pending--; 1915 MPASS(wrq->ndesc_needed >= n); 1916 wrq->ndesc_needed -= n; 1917 } while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL); 1918 1919 if (dbdiff) 1920 ring_eq_db(sc, eq, dbdiff); 1921 } 1922 1923 /* 1924 * Doesn't fail. Holds on to work requests it can't send right away. 1925 */ 1926 void 1927 t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr) 1928 { 1929 #ifdef INVARIANTS 1930 struct sge_eq *eq = &wrq->eq; 1931 #endif 1932 1933 EQ_LOCK_ASSERT_OWNED(eq); 1934 MPASS(wr != NULL); 1935 MPASS(wr->wr_len > 0 && wr->wr_len <= SGE_MAX_WR_LEN); 1936 MPASS((wr->wr_len & 0x7) == 0); 1937 1938 STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link); 1939 wrq->nwr_pending++; 1940 wrq->ndesc_needed += howmany(wr->wr_len, EQ_ESIZE); 1941 1942 if (!TAILQ_EMPTY(&wrq->incomplete_wrs)) 1943 return; /* commit_wrq_wr will drain wr_list as well. */ 1944 1945 drain_wrq_wr_list(sc, wrq); 1946 1947 /* Doorbell must have caught up to the pidx. */ 1948 MPASS(eq->pidx == eq->dbidx); 1949 } 1950 1951 void 1952 t4_update_fl_bufsize(struct ifnet *ifp) 1953 { 1954 struct vi_info *vi = ifp->if_softc; 1955 struct adapter *sc = vi->pi->adapter; 1956 struct sge_rxq *rxq; 1957 #ifdef TCP_OFFLOAD 1958 struct sge_ofld_rxq *ofld_rxq; 1959 #endif 1960 struct sge_fl *fl; 1961 int i, maxp, mtu = ifp->if_mtu; 1962 1963 maxp = mtu_to_max_payload(sc, mtu, 0); 1964 for_each_rxq(vi, i, rxq) { 1965 fl = &rxq->fl; 1966 1967 FL_LOCK(fl); 1968 find_best_refill_source(sc, fl, maxp); 1969 FL_UNLOCK(fl); 1970 } 1971 #ifdef TCP_OFFLOAD 1972 maxp = mtu_to_max_payload(sc, mtu, 1); 1973 for_each_ofld_rxq(vi, i, ofld_rxq) { 1974 fl = &ofld_rxq->fl; 1975 1976 FL_LOCK(fl); 1977 find_best_refill_source(sc, fl, maxp); 1978 FL_UNLOCK(fl); 1979 } 1980 #endif 1981 } 1982 1983 static inline int 1984 mbuf_nsegs(struct mbuf *m) 1985 { 1986 1987 M_ASSERTPKTHDR(m); 1988 KASSERT(m->m_pkthdr.l5hlen > 0, 1989 ("%s: mbuf %p missing information on # of segments.", __func__, m)); 1990 1991 return (m->m_pkthdr.l5hlen); 1992 } 1993 1994 static inline void 1995 set_mbuf_nsegs(struct mbuf *m, uint8_t nsegs) 1996 { 1997 1998 M_ASSERTPKTHDR(m); 1999 m->m_pkthdr.l5hlen = nsegs; 2000 } 2001 2002 static inline int 2003 mbuf_len16(struct mbuf *m) 2004 { 2005 int n; 2006 2007 M_ASSERTPKTHDR(m); 2008 n = m->m_pkthdr.PH_loc.eight[0]; 2009 MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); 2010 2011 return (n); 2012 } 2013 2014 static inline void 2015 set_mbuf_len16(struct mbuf *m, uint8_t len16) 2016 { 2017 2018 M_ASSERTPKTHDR(m); 2019 m->m_pkthdr.PH_loc.eight[0] = len16; 2020 } 2021 2022 static inline int 2023 needs_tso(struct mbuf *m) 2024 { 2025 2026 M_ASSERTPKTHDR(m); 2027 2028 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 2029 KASSERT(m->m_pkthdr.tso_segsz > 0, 2030 ("%s: TSO requested in mbuf %p but MSS not provided", 2031 __func__, m)); 2032 return (1); 2033 } 2034 2035 return (0); 2036 } 2037 2038 static inline int 2039 needs_l3_csum(struct mbuf *m) 2040 { 2041 2042 M_ASSERTPKTHDR(m); 2043 2044 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) 2045 return (1); 2046 return (0); 2047 } 2048 2049 static inline int 2050 needs_l4_csum(struct mbuf *m) 2051 { 2052 2053 M_ASSERTPKTHDR(m); 2054 2055 if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | 2056 CSUM_TCP_IPV6 | CSUM_TSO)) 2057 return (1); 2058 return (0); 2059 } 2060 2061 static inline int 2062 needs_vlan_insertion(struct mbuf *m) 2063 { 2064 2065 M_ASSERTPKTHDR(m); 2066 2067 if (m->m_flags & M_VLANTAG) { 2068 KASSERT(m->m_pkthdr.ether_vtag != 0, 2069 ("%s: HWVLAN requested in mbuf %p but tag not provided", 2070 __func__, m)); 2071 return (1); 2072 } 2073 return (0); 2074 } 2075 2076 static void * 2077 m_advance(struct mbuf **pm, int *poffset, int len) 2078 { 2079 struct mbuf *m = *pm; 2080 int offset = *poffset; 2081 uintptr_t p = 0; 2082 2083 MPASS(len > 0); 2084 2085 while (len) { 2086 if (offset + len < m->m_len) { 2087 offset += len; 2088 p = mtod(m, uintptr_t) + offset; 2089 break; 2090 } 2091 len -= m->m_len - offset; 2092 m = m->m_next; 2093 offset = 0; 2094 MPASS(m != NULL); 2095 } 2096 *poffset = offset; 2097 *pm = m; 2098 return ((void *)p); 2099 } 2100 2101 static inline int 2102 same_paddr(char *a, char *b) 2103 { 2104 2105 if (a == b) 2106 return (1); 2107 else if (a != NULL && b != NULL) { 2108 vm_offset_t x = (vm_offset_t)a; 2109 vm_offset_t y = (vm_offset_t)b; 2110 2111 if ((x & PAGE_MASK) == (y & PAGE_MASK) && 2112 pmap_kextract(x) == pmap_kextract(y)) 2113 return (1); 2114 } 2115 2116 return (0); 2117 } 2118 2119 /* 2120 * Can deal with empty mbufs in the chain that have m_len = 0, but the chain 2121 * must have at least one mbuf that's not empty. 2122 */ 2123 static inline int 2124 count_mbuf_nsegs(struct mbuf *m) 2125 { 2126 char *prev_end, *start; 2127 int len, nsegs; 2128 2129 MPASS(m != NULL); 2130 2131 nsegs = 0; 2132 prev_end = NULL; 2133 for (; m; m = m->m_next) { 2134 2135 len = m->m_len; 2136 if (__predict_false(len == 0)) 2137 continue; 2138 start = mtod(m, char *); 2139 2140 nsegs += sglist_count(start, len); 2141 if (same_paddr(prev_end, start)) 2142 nsegs--; 2143 prev_end = start + len; 2144 } 2145 2146 MPASS(nsegs > 0); 2147 return (nsegs); 2148 } 2149 2150 /* 2151 * Analyze the mbuf to determine its tx needs. The mbuf passed in may change: 2152 * a) caller can assume it's been freed if this function returns with an error. 2153 * b) it may get defragged up if the gather list is too long for the hardware. 2154 */ 2155 int 2156 parse_pkt(struct mbuf **mp) 2157 { 2158 struct mbuf *m0 = *mp, *m; 2159 int rc, nsegs, defragged = 0, offset; 2160 struct ether_header *eh; 2161 void *l3hdr; 2162 #if defined(INET) || defined(INET6) 2163 struct tcphdr *tcp; 2164 #endif 2165 uint16_t eh_type; 2166 2167 M_ASSERTPKTHDR(m0); 2168 if (__predict_false(m0->m_pkthdr.len < ETHER_HDR_LEN)) { 2169 rc = EINVAL; 2170 fail: 2171 m_freem(m0); 2172 *mp = NULL; 2173 return (rc); 2174 } 2175 restart: 2176 /* 2177 * First count the number of gather list segments in the payload. 2178 * Defrag the mbuf if nsegs exceeds the hardware limit. 2179 */ 2180 M_ASSERTPKTHDR(m0); 2181 MPASS(m0->m_pkthdr.len > 0); 2182 nsegs = count_mbuf_nsegs(m0); 2183 if (nsegs > (needs_tso(m0) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)) { 2184 if (defragged++ > 0 || (m = m_defrag(m0, M_NOWAIT)) == NULL) { 2185 rc = EFBIG; 2186 goto fail; 2187 } 2188 *mp = m0 = m; /* update caller's copy after defrag */ 2189 goto restart; 2190 } 2191 2192 if (__predict_false(nsegs > 2 && m0->m_pkthdr.len <= MHLEN)) { 2193 m0 = m_pullup(m0, m0->m_pkthdr.len); 2194 if (m0 == NULL) { 2195 /* Should have left well enough alone. */ 2196 rc = EFBIG; 2197 goto fail; 2198 } 2199 *mp = m0; /* update caller's copy after pullup */ 2200 goto restart; 2201 } 2202 set_mbuf_nsegs(m0, nsegs); 2203 set_mbuf_len16(m0, txpkt_len16(nsegs, needs_tso(m0))); 2204 2205 if (!needs_tso(m0)) 2206 return (0); 2207 2208 m = m0; 2209 eh = mtod(m, struct ether_header *); 2210 eh_type = ntohs(eh->ether_type); 2211 if (eh_type == ETHERTYPE_VLAN) { 2212 struct ether_vlan_header *evh = (void *)eh; 2213 2214 eh_type = ntohs(evh->evl_proto); 2215 m0->m_pkthdr.l2hlen = sizeof(*evh); 2216 } else 2217 m0->m_pkthdr.l2hlen = sizeof(*eh); 2218 2219 offset = 0; 2220 l3hdr = m_advance(&m, &offset, m0->m_pkthdr.l2hlen); 2221 2222 switch (eh_type) { 2223 #ifdef INET6 2224 case ETHERTYPE_IPV6: 2225 { 2226 struct ip6_hdr *ip6 = l3hdr; 2227 2228 MPASS(ip6->ip6_nxt == IPPROTO_TCP); 2229 2230 m0->m_pkthdr.l3hlen = sizeof(*ip6); 2231 break; 2232 } 2233 #endif 2234 #ifdef INET 2235 case ETHERTYPE_IP: 2236 { 2237 struct ip *ip = l3hdr; 2238 2239 m0->m_pkthdr.l3hlen = ip->ip_hl * 4; 2240 break; 2241 } 2242 #endif 2243 default: 2244 panic("%s: ethertype 0x%04x unknown. if_cxgbe must be compiled" 2245 " with the same INET/INET6 options as the kernel.", 2246 __func__, eh_type); 2247 } 2248 2249 #if defined(INET) || defined(INET6) 2250 tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen); 2251 m0->m_pkthdr.l4hlen = tcp->th_off * 4; 2252 #endif 2253 MPASS(m0 == *mp); 2254 return (0); 2255 } 2256 2257 void * 2258 start_wrq_wr(struct sge_wrq *wrq, int len16, struct wrq_cookie *cookie) 2259 { 2260 struct sge_eq *eq = &wrq->eq; 2261 struct adapter *sc = wrq->adapter; 2262 int ndesc, available; 2263 struct wrqe *wr; 2264 void *w; 2265 2266 MPASS(len16 > 0); 2267 ndesc = howmany(len16, EQ_ESIZE / 16); 2268 MPASS(ndesc > 0 && ndesc <= SGE_MAX_WR_NDESC); 2269 2270 EQ_LOCK(eq); 2271 2272 if (!STAILQ_EMPTY(&wrq->wr_list)) 2273 drain_wrq_wr_list(sc, wrq); 2274 2275 if (!STAILQ_EMPTY(&wrq->wr_list)) { 2276 slowpath: 2277 EQ_UNLOCK(eq); 2278 wr = alloc_wrqe(len16 * 16, wrq); 2279 if (__predict_false(wr == NULL)) 2280 return (NULL); 2281 cookie->pidx = -1; 2282 cookie->ndesc = ndesc; 2283 return (&wr->wr); 2284 } 2285 2286 eq->cidx = read_hw_cidx(eq); 2287 if (eq->pidx == eq->cidx) 2288 available = eq->sidx - 1; 2289 else 2290 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2291 if (available < ndesc) 2292 goto slowpath; 2293 2294 cookie->pidx = eq->pidx; 2295 cookie->ndesc = ndesc; 2296 TAILQ_INSERT_TAIL(&wrq->incomplete_wrs, cookie, link); 2297 2298 w = &eq->desc[eq->pidx]; 2299 IDXINCR(eq->pidx, ndesc, eq->sidx); 2300 if (__predict_false(eq->pidx < ndesc - 1)) { 2301 w = &wrq->ss[0]; 2302 wrq->ss_pidx = cookie->pidx; 2303 wrq->ss_len = len16 * 16; 2304 } 2305 2306 EQ_UNLOCK(eq); 2307 2308 return (w); 2309 } 2310 2311 void 2312 commit_wrq_wr(struct sge_wrq *wrq, void *w, struct wrq_cookie *cookie) 2313 { 2314 struct sge_eq *eq = &wrq->eq; 2315 struct adapter *sc = wrq->adapter; 2316 int ndesc, pidx; 2317 struct wrq_cookie *prev, *next; 2318 2319 if (cookie->pidx == -1) { 2320 struct wrqe *wr = __containerof(w, struct wrqe, wr); 2321 2322 t4_wrq_tx(sc, wr); 2323 return; 2324 } 2325 2326 ndesc = cookie->ndesc; /* Can be more than SGE_MAX_WR_NDESC here. */ 2327 pidx = cookie->pidx; 2328 MPASS(pidx >= 0 && pidx < eq->sidx); 2329 if (__predict_false(w == &wrq->ss[0])) { 2330 int n = (eq->sidx - wrq->ss_pidx) * EQ_ESIZE; 2331 2332 MPASS(wrq->ss_len > n); /* WR had better wrap around. */ 2333 bcopy(&wrq->ss[0], &eq->desc[wrq->ss_pidx], n); 2334 bcopy(&wrq->ss[n], &eq->desc[0], wrq->ss_len - n); 2335 wrq->tx_wrs_ss++; 2336 } else 2337 wrq->tx_wrs_direct++; 2338 2339 EQ_LOCK(eq); 2340 prev = TAILQ_PREV(cookie, wrq_incomplete_wrs, link); 2341 next = TAILQ_NEXT(cookie, link); 2342 if (prev == NULL) { 2343 MPASS(pidx == eq->dbidx); 2344 if (next == NULL || ndesc >= 16) 2345 ring_eq_db(wrq->adapter, eq, ndesc); 2346 else { 2347 MPASS(IDXDIFF(next->pidx, pidx, eq->sidx) == ndesc); 2348 next->pidx = pidx; 2349 next->ndesc += ndesc; 2350 } 2351 } else { 2352 MPASS(IDXDIFF(pidx, prev->pidx, eq->sidx) == prev->ndesc); 2353 prev->ndesc += ndesc; 2354 } 2355 TAILQ_REMOVE(&wrq->incomplete_wrs, cookie, link); 2356 2357 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 2358 drain_wrq_wr_list(sc, wrq); 2359 2360 #ifdef INVARIANTS 2361 if (TAILQ_EMPTY(&wrq->incomplete_wrs)) { 2362 /* Doorbell must have caught up to the pidx. */ 2363 MPASS(wrq->eq.pidx == wrq->eq.dbidx); 2364 } 2365 #endif 2366 EQ_UNLOCK(eq); 2367 } 2368 2369 static u_int 2370 can_resume_eth_tx(struct mp_ring *r) 2371 { 2372 struct sge_eq *eq = r->cookie; 2373 2374 return (total_available_tx_desc(eq) > eq->sidx / 8); 2375 } 2376 2377 static inline int 2378 cannot_use_txpkts(struct mbuf *m) 2379 { 2380 /* maybe put a GL limit too, to avoid silliness? */ 2381 2382 return (needs_tso(m)); 2383 } 2384 2385 /* 2386 * r->items[cidx] to r->items[pidx], with a wraparound at r->size, are ready to 2387 * be consumed. Return the actual number consumed. 0 indicates a stall. 2388 */ 2389 static u_int 2390 eth_tx(struct mp_ring *r, u_int cidx, u_int pidx) 2391 { 2392 struct sge_txq *txq = r->cookie; 2393 struct sge_eq *eq = &txq->eq; 2394 struct ifnet *ifp = txq->ifp; 2395 struct vi_info *vi = ifp->if_softc; 2396 struct port_info *pi = vi->pi; 2397 struct adapter *sc = pi->adapter; 2398 u_int total, remaining; /* # of packets */ 2399 u_int available, dbdiff; /* # of hardware descriptors */ 2400 u_int n, next_cidx; 2401 struct mbuf *m0, *tail; 2402 struct txpkts txp; 2403 struct fw_eth_tx_pkts_wr *wr; /* any fw WR struct will do */ 2404 2405 remaining = IDXDIFF(pidx, cidx, r->size); 2406 MPASS(remaining > 0); /* Must not be called without work to do. */ 2407 total = 0; 2408 2409 TXQ_LOCK(txq); 2410 if (__predict_false((eq->flags & EQ_ENABLED) == 0)) { 2411 while (cidx != pidx) { 2412 m0 = r->items[cidx]; 2413 m_freem(m0); 2414 if (++cidx == r->size) 2415 cidx = 0; 2416 } 2417 reclaim_tx_descs(txq, 2048); 2418 total = remaining; 2419 goto done; 2420 } 2421 2422 /* How many hardware descriptors do we have readily available. */ 2423 if (eq->pidx == eq->cidx) 2424 available = eq->sidx - 1; 2425 else 2426 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2427 dbdiff = IDXDIFF(eq->pidx, eq->dbidx, eq->sidx); 2428 2429 while (remaining > 0) { 2430 2431 m0 = r->items[cidx]; 2432 M_ASSERTPKTHDR(m0); 2433 MPASS(m0->m_nextpkt == NULL); 2434 2435 if (available < SGE_MAX_WR_NDESC) { 2436 available += reclaim_tx_descs(txq, 64); 2437 if (available < howmany(mbuf_len16(m0), EQ_ESIZE / 16)) 2438 break; /* out of descriptors */ 2439 } 2440 2441 next_cidx = cidx + 1; 2442 if (__predict_false(next_cidx == r->size)) 2443 next_cidx = 0; 2444 2445 wr = (void *)&eq->desc[eq->pidx]; 2446 if (remaining > 1 && 2447 try_txpkts(m0, r->items[next_cidx], &txp, available) == 0) { 2448 2449 /* pkts at cidx, next_cidx should both be in txp. */ 2450 MPASS(txp.npkt == 2); 2451 tail = r->items[next_cidx]; 2452 MPASS(tail->m_nextpkt == NULL); 2453 ETHER_BPF_MTAP(ifp, m0); 2454 ETHER_BPF_MTAP(ifp, tail); 2455 m0->m_nextpkt = tail; 2456 2457 if (__predict_false(++next_cidx == r->size)) 2458 next_cidx = 0; 2459 2460 while (next_cidx != pidx) { 2461 if (add_to_txpkts(r->items[next_cidx], &txp, 2462 available) != 0) 2463 break; 2464 tail->m_nextpkt = r->items[next_cidx]; 2465 tail = tail->m_nextpkt; 2466 ETHER_BPF_MTAP(ifp, tail); 2467 if (__predict_false(++next_cidx == r->size)) 2468 next_cidx = 0; 2469 } 2470 2471 n = write_txpkts_wr(txq, wr, m0, &txp, available); 2472 total += txp.npkt; 2473 remaining -= txp.npkt; 2474 } else { 2475 total++; 2476 remaining--; 2477 ETHER_BPF_MTAP(ifp, m0); 2478 n = write_txpkt_wr(txq, (void *)wr, m0, available); 2479 } 2480 MPASS(n >= 1 && n <= available && n <= SGE_MAX_WR_NDESC); 2481 2482 available -= n; 2483 dbdiff += n; 2484 IDXINCR(eq->pidx, n, eq->sidx); 2485 2486 if (total_available_tx_desc(eq) < eq->sidx / 4 && 2487 atomic_cmpset_int(&eq->equiq, 0, 1)) { 2488 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 2489 F_FW_WR_EQUEQ); 2490 eq->equeqidx = eq->pidx; 2491 } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { 2492 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); 2493 eq->equeqidx = eq->pidx; 2494 } 2495 2496 if (dbdiff >= 16 && remaining >= 4) { 2497 ring_eq_db(sc, eq, dbdiff); 2498 available += reclaim_tx_descs(txq, 4 * dbdiff); 2499 dbdiff = 0; 2500 } 2501 2502 cidx = next_cidx; 2503 } 2504 if (dbdiff != 0) { 2505 ring_eq_db(sc, eq, dbdiff); 2506 reclaim_tx_descs(txq, 32); 2507 } 2508 done: 2509 TXQ_UNLOCK(txq); 2510 2511 return (total); 2512 } 2513 2514 static inline void 2515 init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, 2516 int qsize) 2517 { 2518 2519 KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, 2520 ("%s: bad tmr_idx %d", __func__, tmr_idx)); 2521 KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */ 2522 ("%s: bad pktc_idx %d", __func__, pktc_idx)); 2523 2524 iq->flags = 0; 2525 iq->adapter = sc; 2526 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx); 2527 iq->intr_pktc_idx = SGE_NCOUNTERS - 1; 2528 if (pktc_idx >= 0) { 2529 iq->intr_params |= F_QINTR_CNT_EN; 2530 iq->intr_pktc_idx = pktc_idx; 2531 } 2532 iq->qsize = roundup2(qsize, 16); /* See FW_IQ_CMD/iqsize */ 2533 iq->sidx = iq->qsize - sc->params.sge.spg_len / IQ_ESIZE; 2534 } 2535 2536 static inline void 2537 init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, char *name) 2538 { 2539 2540 fl->qsize = qsize; 2541 fl->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; 2542 strlcpy(fl->lockname, name, sizeof(fl->lockname)); 2543 if (sc->flags & BUF_PACKING_OK && 2544 ((!is_t4(sc) && buffer_packing) || /* T5+: enabled unless 0 */ 2545 (is_t4(sc) && buffer_packing == 1)))/* T4: disabled unless 1 */ 2546 fl->flags |= FL_BUF_PACKING; 2547 find_best_refill_source(sc, fl, maxp); 2548 find_safe_refill_source(sc, fl); 2549 } 2550 2551 static inline void 2552 init_eq(struct adapter *sc, struct sge_eq *eq, int eqtype, int qsize, 2553 uint8_t tx_chan, uint16_t iqid, char *name) 2554 { 2555 KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype)); 2556 2557 eq->flags = eqtype & EQ_TYPEMASK; 2558 eq->tx_chan = tx_chan; 2559 eq->iqid = iqid; 2560 eq->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; 2561 strlcpy(eq->lockname, name, sizeof(eq->lockname)); 2562 } 2563 2564 static int 2565 alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag, 2566 bus_dmamap_t *map, bus_addr_t *pa, void **va) 2567 { 2568 int rc; 2569 2570 rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR, 2571 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag); 2572 if (rc != 0) { 2573 device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc); 2574 goto done; 2575 } 2576 2577 rc = bus_dmamem_alloc(*tag, va, 2578 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map); 2579 if (rc != 0) { 2580 device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc); 2581 goto done; 2582 } 2583 2584 rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0); 2585 if (rc != 0) { 2586 device_printf(sc->dev, "cannot load DMA map: %d\n", rc); 2587 goto done; 2588 } 2589 done: 2590 if (rc) 2591 free_ring(sc, *tag, *map, *pa, *va); 2592 2593 return (rc); 2594 } 2595 2596 static int 2597 free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, 2598 bus_addr_t pa, void *va) 2599 { 2600 if (pa) 2601 bus_dmamap_unload(tag, map); 2602 if (va) 2603 bus_dmamem_free(tag, va, map); 2604 if (tag) 2605 bus_dma_tag_destroy(tag); 2606 2607 return (0); 2608 } 2609 2610 /* 2611 * Allocates the ring for an ingress queue and an optional freelist. If the 2612 * freelist is specified it will be allocated and then associated with the 2613 * ingress queue. 2614 * 2615 * Returns errno on failure. Resources allocated up to that point may still be 2616 * allocated. Caller is responsible for cleanup in case this function fails. 2617 * 2618 * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then 2619 * the intr_idx specifies the vector, starting from 0. Otherwise it specifies 2620 * the abs_id of the ingress queue to which its interrupts should be forwarded. 2621 */ 2622 static int 2623 alloc_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl, 2624 int intr_idx, int cong) 2625 { 2626 int rc, i, cntxt_id; 2627 size_t len; 2628 struct fw_iq_cmd c; 2629 struct port_info *pi = vi->pi; 2630 struct adapter *sc = iq->adapter; 2631 struct sge_params *sp = &sc->params.sge; 2632 __be32 v = 0; 2633 2634 len = iq->qsize * IQ_ESIZE; 2635 rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, 2636 (void **)&iq->desc); 2637 if (rc != 0) 2638 return (rc); 2639 2640 bzero(&c, sizeof(c)); 2641 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | 2642 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | 2643 V_FW_IQ_CMD_VFN(0)); 2644 2645 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | 2646 FW_LEN16(c)); 2647 2648 /* Special handling for firmware event queue */ 2649 if (iq == &sc->sge.fwq) 2650 v |= F_FW_IQ_CMD_IQASYNCH; 2651 2652 if (iq->flags & IQ_INTR) { 2653 KASSERT(intr_idx < sc->intr_count, 2654 ("%s: invalid direct intr_idx %d", __func__, intr_idx)); 2655 } else 2656 v |= F_FW_IQ_CMD_IQANDST; 2657 v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); 2658 2659 c.type_to_iqandstindex = htobe32(v | 2660 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | 2661 V_FW_IQ_CMD_VIID(vi->viid) | 2662 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); 2663 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | 2664 F_FW_IQ_CMD_IQGTSMODE | 2665 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | 2666 V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4)); 2667 c.iqsize = htobe16(iq->qsize); 2668 c.iqaddr = htobe64(iq->ba); 2669 if (cong >= 0) 2670 c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN); 2671 2672 if (fl) { 2673 mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); 2674 2675 len = fl->qsize * EQ_ESIZE; 2676 rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, 2677 &fl->ba, (void **)&fl->desc); 2678 if (rc) 2679 return (rc); 2680 2681 /* Allocate space for one software descriptor per buffer. */ 2682 rc = alloc_fl_sdesc(fl); 2683 if (rc != 0) { 2684 device_printf(sc->dev, 2685 "failed to setup fl software descriptors: %d\n", 2686 rc); 2687 return (rc); 2688 } 2689 2690 if (fl->flags & FL_BUF_PACKING) { 2691 fl->lowat = roundup2(sp->fl_starve_threshold2, 8); 2692 fl->buf_boundary = sp->pack_boundary; 2693 } else { 2694 fl->lowat = roundup2(sp->fl_starve_threshold, 8); 2695 fl->buf_boundary = 16; 2696 } 2697 if (fl_pad && fl->buf_boundary < sp->pad_boundary) 2698 fl->buf_boundary = sp->pad_boundary; 2699 2700 c.iqns_to_fl0congen |= 2701 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | 2702 F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | 2703 (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) | 2704 (fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN : 2705 0)); 2706 if (cong >= 0) { 2707 c.iqns_to_fl0congen |= 2708 htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) | 2709 F_FW_IQ_CMD_FL0CONGCIF | 2710 F_FW_IQ_CMD_FL0CONGEN); 2711 } 2712 c.fl0dcaen_to_fl0cidxfthresh = 2713 htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_128B) | 2714 V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B)); 2715 c.fl0size = htobe16(fl->qsize); 2716 c.fl0addr = htobe64(fl->ba); 2717 } 2718 2719 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 2720 if (rc != 0) { 2721 device_printf(sc->dev, 2722 "failed to create ingress queue: %d\n", rc); 2723 return (rc); 2724 } 2725 2726 iq->cidx = 0; 2727 iq->gen = F_RSPD_GEN; 2728 iq->intr_next = iq->intr_params; 2729 iq->cntxt_id = be16toh(c.iqid); 2730 iq->abs_id = be16toh(c.physiqid); 2731 iq->flags |= IQ_ALLOCATED; 2732 2733 cntxt_id = iq->cntxt_id - sc->sge.iq_start; 2734 if (cntxt_id >= sc->sge.niq) { 2735 panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, 2736 cntxt_id, sc->sge.niq - 1); 2737 } 2738 sc->sge.iqmap[cntxt_id] = iq; 2739 2740 if (fl) { 2741 u_int qid; 2742 2743 iq->flags |= IQ_HAS_FL; 2744 fl->cntxt_id = be16toh(c.fl0id); 2745 fl->pidx = fl->cidx = 0; 2746 2747 cntxt_id = fl->cntxt_id - sc->sge.eq_start; 2748 if (cntxt_id >= sc->sge.neq) { 2749 panic("%s: fl->cntxt_id (%d) more than the max (%d)", 2750 __func__, cntxt_id, sc->sge.neq - 1); 2751 } 2752 sc->sge.eqmap[cntxt_id] = (void *)fl; 2753 2754 qid = fl->cntxt_id; 2755 if (isset(&sc->doorbells, DOORBELL_UDB)) { 2756 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 2757 uint32_t mask = (1 << s_qpp) - 1; 2758 volatile uint8_t *udb; 2759 2760 udb = sc->udbs_base + UDBS_DB_OFFSET; 2761 udb += (qid >> s_qpp) << PAGE_SHIFT; 2762 qid &= mask; 2763 if (qid < PAGE_SIZE / UDBS_SEG_SIZE) { 2764 udb += qid << UDBS_SEG_SHIFT; 2765 qid = 0; 2766 } 2767 fl->udb = (volatile void *)udb; 2768 } 2769 fl->dbval = V_QID(qid) | sc->chip_params->sge_fl_db; 2770 2771 FL_LOCK(fl); 2772 /* Enough to make sure the SGE doesn't think it's starved */ 2773 refill_fl(sc, fl, fl->lowat); 2774 FL_UNLOCK(fl); 2775 } 2776 2777 if (is_t5(sc) && cong >= 0) { 2778 uint32_t param, val; 2779 2780 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 2781 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | 2782 V_FW_PARAMS_PARAM_YZ(iq->cntxt_id); 2783 if (cong == 0) 2784 val = 1 << 19; 2785 else { 2786 val = 2 << 19; 2787 for (i = 0; i < 4; i++) { 2788 if (cong & (1 << i)) 2789 val |= 1 << (i << 2); 2790 } 2791 } 2792 2793 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); 2794 if (rc != 0) { 2795 /* report error but carry on */ 2796 device_printf(sc->dev, 2797 "failed to set congestion manager context for " 2798 "ingress queue %d: %d\n", iq->cntxt_id, rc); 2799 } 2800 } 2801 2802 /* Enable IQ interrupts */ 2803 atomic_store_rel_int(&iq->state, IQS_IDLE); 2804 t4_write_reg(sc, sc->sge_gts_reg, V_SEINTARM(iq->intr_params) | 2805 V_INGRESSQID(iq->cntxt_id)); 2806 2807 return (0); 2808 } 2809 2810 static int 2811 free_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl) 2812 { 2813 int rc; 2814 struct adapter *sc = iq->adapter; 2815 device_t dev; 2816 2817 if (sc == NULL) 2818 return (0); /* nothing to do */ 2819 2820 dev = vi ? vi->dev : sc->dev; 2821 2822 if (iq->flags & IQ_ALLOCATED) { 2823 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, 2824 FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id, 2825 fl ? fl->cntxt_id : 0xffff, 0xffff); 2826 if (rc != 0) { 2827 device_printf(dev, 2828 "failed to free queue %p: %d\n", iq, rc); 2829 return (rc); 2830 } 2831 iq->flags &= ~IQ_ALLOCATED; 2832 } 2833 2834 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc); 2835 2836 bzero(iq, sizeof(*iq)); 2837 2838 if (fl) { 2839 free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, 2840 fl->desc); 2841 2842 if (fl->sdesc) 2843 free_fl_sdesc(sc, fl); 2844 2845 if (mtx_initialized(&fl->fl_lock)) 2846 mtx_destroy(&fl->fl_lock); 2847 2848 bzero(fl, sizeof(*fl)); 2849 } 2850 2851 return (0); 2852 } 2853 2854 static void 2855 add_fl_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 2856 struct sge_fl *fl) 2857 { 2858 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 2859 2860 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL, 2861 "freelist"); 2862 children = SYSCTL_CHILDREN(oid); 2863 2864 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 2865 CTLTYPE_INT | CTLFLAG_RD, &fl->cntxt_id, 0, sysctl_uint16, "I", 2866 "SGE context id of the freelist"); 2867 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "padding", CTLFLAG_RD, NULL, 2868 fl_pad ? 1 : 0, "padding enabled"); 2869 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "packing", CTLFLAG_RD, NULL, 2870 fl->flags & FL_BUF_PACKING ? 1 : 0, "packing enabled"); 2871 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx, 2872 0, "consumer index"); 2873 if (fl->flags & FL_BUF_PACKING) { 2874 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_offset", 2875 CTLFLAG_RD, &fl->rx_offset, 0, "packing rx offset"); 2876 } 2877 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx, 2878 0, "producer index"); 2879 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_allocated", 2880 CTLFLAG_RD, &fl->mbuf_allocated, "# of mbuf allocated"); 2881 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_inlined", 2882 CTLFLAG_RD, &fl->mbuf_inlined, "# of mbuf inlined in clusters"); 2883 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated", 2884 CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated"); 2885 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled", 2886 CTLFLAG_RD, &fl->cl_recycled, "# of clusters recycled"); 2887 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_fast_recycled", 2888 CTLFLAG_RD, &fl->cl_fast_recycled, "# of clusters recycled (fast)"); 2889 } 2890 2891 static int 2892 alloc_fwq(struct adapter *sc) 2893 { 2894 int rc, intr_idx; 2895 struct sge_iq *fwq = &sc->sge.fwq; 2896 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); 2897 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 2898 2899 init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE); 2900 fwq->flags |= IQ_INTR; /* always */ 2901 intr_idx = sc->intr_count > 1 ? 1 : 0; 2902 fwq->set_tcb_rpl = t4_filter_rpl; 2903 fwq->l2t_write_rpl = do_l2t_write_rpl; 2904 rc = alloc_iq_fl(&sc->port[0]->vi[0], fwq, NULL, intr_idx, -1); 2905 if (rc != 0) { 2906 device_printf(sc->dev, 2907 "failed to create firmware event queue: %d\n", rc); 2908 return (rc); 2909 } 2910 2911 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD, 2912 NULL, "firmware event queue"); 2913 children = SYSCTL_CHILDREN(oid); 2914 2915 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "abs_id", 2916 CTLTYPE_INT | CTLFLAG_RD, &fwq->abs_id, 0, sysctl_uint16, "I", 2917 "absolute id of the queue"); 2918 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cntxt_id", 2919 CTLTYPE_INT | CTLFLAG_RD, &fwq->cntxt_id, 0, sysctl_uint16, "I", 2920 "SGE context id of the queue"); 2921 SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx", 2922 CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I", 2923 "consumer index"); 2924 2925 return (0); 2926 } 2927 2928 static int 2929 free_fwq(struct adapter *sc) 2930 { 2931 return free_iq_fl(NULL, &sc->sge.fwq, NULL); 2932 } 2933 2934 static int 2935 alloc_mgmtq(struct adapter *sc) 2936 { 2937 int rc; 2938 struct sge_wrq *mgmtq = &sc->sge.mgmtq; 2939 char name[16]; 2940 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); 2941 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 2942 2943 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "mgmtq", CTLFLAG_RD, 2944 NULL, "management queue"); 2945 2946 snprintf(name, sizeof(name), "%s mgmtq", device_get_nameunit(sc->dev)); 2947 init_eq(sc, &mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan, 2948 sc->sge.fwq.cntxt_id, name); 2949 rc = alloc_wrq(sc, NULL, mgmtq, oid); 2950 if (rc != 0) { 2951 device_printf(sc->dev, 2952 "failed to create management queue: %d\n", rc); 2953 return (rc); 2954 } 2955 2956 return (0); 2957 } 2958 2959 static int 2960 free_mgmtq(struct adapter *sc) 2961 { 2962 2963 return free_wrq(sc, &sc->sge.mgmtq); 2964 } 2965 2966 int 2967 tnl_cong(struct port_info *pi, int drop) 2968 { 2969 2970 if (drop == -1) 2971 return (-1); 2972 else if (drop == 1) 2973 return (0); 2974 else 2975 return (pi->rx_chan_map); 2976 } 2977 2978 static int 2979 alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int intr_idx, int idx, 2980 struct sysctl_oid *oid) 2981 { 2982 int rc; 2983 struct adapter *sc = vi->pi->adapter; 2984 struct sysctl_oid_list *children; 2985 char name[16]; 2986 2987 rc = alloc_iq_fl(vi, &rxq->iq, &rxq->fl, intr_idx, 2988 tnl_cong(vi->pi, cong_drop)); 2989 if (rc != 0) 2990 return (rc); 2991 2992 if (idx == 0) 2993 sc->sge.iq_base = rxq->iq.abs_id - rxq->iq.cntxt_id; 2994 else 2995 KASSERT(rxq->iq.cntxt_id + sc->sge.iq_base == rxq->iq.abs_id, 2996 ("iq_base mismatch")); 2997 KASSERT(sc->sge.iq_base == 0 || sc->flags & IS_VF, 2998 ("PF with non-zero iq_base")); 2999 3000 /* 3001 * The freelist is just barely above the starvation threshold right now, 3002 * fill it up a bit more. 3003 */ 3004 FL_LOCK(&rxq->fl); 3005 refill_fl(sc, &rxq->fl, 128); 3006 FL_UNLOCK(&rxq->fl); 3007 3008 #if defined(INET) || defined(INET6) 3009 rc = tcp_lro_init(&rxq->lro); 3010 if (rc != 0) 3011 return (rc); 3012 rxq->lro.ifp = vi->ifp; /* also indicates LRO init'ed */ 3013 3014 if (vi->ifp->if_capenable & IFCAP_LRO) 3015 rxq->iq.flags |= IQ_LRO_ENABLED; 3016 #endif 3017 rxq->ifp = vi->ifp; 3018 3019 children = SYSCTL_CHILDREN(oid); 3020 3021 snprintf(name, sizeof(name), "%d", idx); 3022 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 3023 NULL, "rx queue"); 3024 children = SYSCTL_CHILDREN(oid); 3025 3026 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "abs_id", 3027 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_uint16, "I", 3028 "absolute id of the queue"); 3029 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cntxt_id", 3030 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cntxt_id, 0, sysctl_uint16, "I", 3031 "SGE context id of the queue"); 3032 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", 3033 CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cidx, 0, sysctl_uint16, "I", 3034 "consumer index"); 3035 #if defined(INET) || defined(INET6) 3036 SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, 3037 &rxq->lro.lro_queued, 0, NULL); 3038 SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, 3039 &rxq->lro.lro_flushed, 0, NULL); 3040 #endif 3041 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, 3042 &rxq->rxcsum, "# of times hardware assisted with checksum"); 3043 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_extraction", 3044 CTLFLAG_RD, &rxq->vlan_extraction, 3045 "# of times hardware extracted 802.1Q tag"); 3046 3047 add_fl_sysctls(&vi->ctx, oid, &rxq->fl); 3048 3049 return (rc); 3050 } 3051 3052 static int 3053 free_rxq(struct vi_info *vi, struct sge_rxq *rxq) 3054 { 3055 int rc; 3056 3057 #if defined(INET) || defined(INET6) 3058 if (rxq->lro.ifp) { 3059 tcp_lro_free(&rxq->lro); 3060 rxq->lro.ifp = NULL; 3061 } 3062 #endif 3063 3064 rc = free_iq_fl(vi, &rxq->iq, &rxq->fl); 3065 if (rc == 0) 3066 bzero(rxq, sizeof(*rxq)); 3067 3068 return (rc); 3069 } 3070 3071 #ifdef TCP_OFFLOAD 3072 static int 3073 alloc_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq, 3074 int intr_idx, int idx, struct sysctl_oid *oid) 3075 { 3076 int rc; 3077 struct sysctl_oid_list *children; 3078 char name[16]; 3079 3080 rc = alloc_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx, 3081 vi->pi->rx_chan_map); 3082 if (rc != 0) 3083 return (rc); 3084 3085 children = SYSCTL_CHILDREN(oid); 3086 3087 snprintf(name, sizeof(name), "%d", idx); 3088 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 3089 NULL, "rx queue"); 3090 children = SYSCTL_CHILDREN(oid); 3091 3092 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "abs_id", 3093 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.abs_id, 0, sysctl_uint16, 3094 "I", "absolute id of the queue"); 3095 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cntxt_id", 3096 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cntxt_id, 0, sysctl_uint16, 3097 "I", "SGE context id of the queue"); 3098 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", 3099 CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cidx, 0, sysctl_uint16, "I", 3100 "consumer index"); 3101 3102 add_fl_sysctls(&vi->ctx, oid, &ofld_rxq->fl); 3103 3104 return (rc); 3105 } 3106 3107 static int 3108 free_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq) 3109 { 3110 int rc; 3111 3112 rc = free_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl); 3113 if (rc == 0) 3114 bzero(ofld_rxq, sizeof(*ofld_rxq)); 3115 3116 return (rc); 3117 } 3118 #endif 3119 3120 #ifdef DEV_NETMAP 3121 static int 3122 alloc_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int intr_idx, 3123 int idx, struct sysctl_oid *oid) 3124 { 3125 int rc; 3126 struct sysctl_oid_list *children; 3127 struct sysctl_ctx_list *ctx; 3128 char name[16]; 3129 size_t len; 3130 struct adapter *sc = vi->pi->adapter; 3131 struct netmap_adapter *na = NA(vi->ifp); 3132 3133 MPASS(na != NULL); 3134 3135 len = vi->qsize_rxq * IQ_ESIZE; 3136 rc = alloc_ring(sc, len, &nm_rxq->iq_desc_tag, &nm_rxq->iq_desc_map, 3137 &nm_rxq->iq_ba, (void **)&nm_rxq->iq_desc); 3138 if (rc != 0) 3139 return (rc); 3140 3141 len = na->num_rx_desc * EQ_ESIZE + sc->params.sge.spg_len; 3142 rc = alloc_ring(sc, len, &nm_rxq->fl_desc_tag, &nm_rxq->fl_desc_map, 3143 &nm_rxq->fl_ba, (void **)&nm_rxq->fl_desc); 3144 if (rc != 0) 3145 return (rc); 3146 3147 nm_rxq->vi = vi; 3148 nm_rxq->nid = idx; 3149 nm_rxq->iq_cidx = 0; 3150 nm_rxq->iq_sidx = vi->qsize_rxq - sc->params.sge.spg_len / IQ_ESIZE; 3151 nm_rxq->iq_gen = F_RSPD_GEN; 3152 nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0; 3153 nm_rxq->fl_sidx = na->num_rx_desc; 3154 nm_rxq->intr_idx = intr_idx; 3155 3156 ctx = &vi->ctx; 3157 children = SYSCTL_CHILDREN(oid); 3158 3159 snprintf(name, sizeof(name), "%d", idx); 3160 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, 3161 "rx queue"); 3162 children = SYSCTL_CHILDREN(oid); 3163 3164 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "abs_id", 3165 CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_abs_id, 0, sysctl_uint16, 3166 "I", "absolute id of the queue"); 3167 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 3168 CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_cntxt_id, 0, sysctl_uint16, 3169 "I", "SGE context id of the queue"); 3170 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", 3171 CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_cidx, 0, sysctl_uint16, "I", 3172 "consumer index"); 3173 3174 children = SYSCTL_CHILDREN(oid); 3175 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL, 3176 "freelist"); 3177 children = SYSCTL_CHILDREN(oid); 3178 3179 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 3180 CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->fl_cntxt_id, 0, sysctl_uint16, 3181 "I", "SGE context id of the freelist"); 3182 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, 3183 &nm_rxq->fl_cidx, 0, "consumer index"); 3184 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, 3185 &nm_rxq->fl_pidx, 0, "producer index"); 3186 3187 return (rc); 3188 } 3189 3190 3191 static int 3192 free_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq) 3193 { 3194 struct adapter *sc = vi->pi->adapter; 3195 3196 free_ring(sc, nm_rxq->iq_desc_tag, nm_rxq->iq_desc_map, nm_rxq->iq_ba, 3197 nm_rxq->iq_desc); 3198 free_ring(sc, nm_rxq->fl_desc_tag, nm_rxq->fl_desc_map, nm_rxq->fl_ba, 3199 nm_rxq->fl_desc); 3200 3201 return (0); 3202 } 3203 3204 static int 3205 alloc_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq, int iqidx, int idx, 3206 struct sysctl_oid *oid) 3207 { 3208 int rc; 3209 size_t len; 3210 struct port_info *pi = vi->pi; 3211 struct adapter *sc = pi->adapter; 3212 struct netmap_adapter *na = NA(vi->ifp); 3213 char name[16]; 3214 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3215 3216 len = na->num_tx_desc * EQ_ESIZE + sc->params.sge.spg_len; 3217 rc = alloc_ring(sc, len, &nm_txq->desc_tag, &nm_txq->desc_map, 3218 &nm_txq->ba, (void **)&nm_txq->desc); 3219 if (rc) 3220 return (rc); 3221 3222 nm_txq->pidx = nm_txq->cidx = 0; 3223 nm_txq->sidx = na->num_tx_desc; 3224 nm_txq->nid = idx; 3225 nm_txq->iqidx = iqidx; 3226 nm_txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 3227 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_VF_VLD(1) | 3228 V_TXPKT_VF(vi->viid)); 3229 3230 snprintf(name, sizeof(name), "%d", idx); 3231 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 3232 NULL, "netmap tx queue"); 3233 children = SYSCTL_CHILDREN(oid); 3234 3235 SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 3236 &nm_txq->cntxt_id, 0, "SGE context id of the queue"); 3237 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", 3238 CTLTYPE_INT | CTLFLAG_RD, &nm_txq->cidx, 0, sysctl_uint16, "I", 3239 "consumer index"); 3240 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", 3241 CTLTYPE_INT | CTLFLAG_RD, &nm_txq->pidx, 0, sysctl_uint16, "I", 3242 "producer index"); 3243 3244 return (rc); 3245 } 3246 3247 static int 3248 free_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq) 3249 { 3250 struct adapter *sc = vi->pi->adapter; 3251 3252 free_ring(sc, nm_txq->desc_tag, nm_txq->desc_map, nm_txq->ba, 3253 nm_txq->desc); 3254 3255 return (0); 3256 } 3257 #endif 3258 3259 static int 3260 ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq) 3261 { 3262 int rc, cntxt_id; 3263 struct fw_eq_ctrl_cmd c; 3264 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 3265 3266 bzero(&c, sizeof(c)); 3267 3268 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | 3269 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) | 3270 V_FW_EQ_CTRL_CMD_VFN(0)); 3271 c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC | 3272 F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); 3273 c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); 3274 c.physeqid_pkd = htobe32(0); 3275 c.fetchszm_to_iqid = 3276 htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | 3277 V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) | 3278 F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); 3279 c.dcaen_to_eqsize = 3280 htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 3281 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 3282 V_FW_EQ_CTRL_CMD_EQSIZE(qsize)); 3283 c.eqaddr = htobe64(eq->ba); 3284 3285 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 3286 if (rc != 0) { 3287 device_printf(sc->dev, 3288 "failed to create control queue %d: %d\n", eq->tx_chan, rc); 3289 return (rc); 3290 } 3291 eq->flags |= EQ_ALLOCATED; 3292 3293 eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid)); 3294 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 3295 if (cntxt_id >= sc->sge.neq) 3296 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 3297 cntxt_id, sc->sge.neq - 1); 3298 sc->sge.eqmap[cntxt_id] = eq; 3299 3300 return (rc); 3301 } 3302 3303 static int 3304 eth_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 3305 { 3306 int rc, cntxt_id; 3307 struct fw_eq_eth_cmd c; 3308 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 3309 3310 bzero(&c, sizeof(c)); 3311 3312 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | 3313 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | 3314 V_FW_EQ_ETH_CMD_VFN(0)); 3315 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | 3316 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); 3317 c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | 3318 F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid)); 3319 c.fetchszm_to_iqid = 3320 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | 3321 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | 3322 V_FW_EQ_ETH_CMD_IQID(eq->iqid)); 3323 c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 3324 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 3325 V_FW_EQ_ETH_CMD_EQSIZE(qsize)); 3326 c.eqaddr = htobe64(eq->ba); 3327 3328 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 3329 if (rc != 0) { 3330 device_printf(vi->dev, 3331 "failed to create Ethernet egress queue: %d\n", rc); 3332 return (rc); 3333 } 3334 eq->flags |= EQ_ALLOCATED; 3335 3336 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); 3337 eq->abs_id = G_FW_EQ_ETH_CMD_PHYSEQID(be32toh(c.physeqid_pkd)); 3338 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 3339 if (cntxt_id >= sc->sge.neq) 3340 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 3341 cntxt_id, sc->sge.neq - 1); 3342 sc->sge.eqmap[cntxt_id] = eq; 3343 3344 return (rc); 3345 } 3346 3347 #ifdef TCP_OFFLOAD 3348 static int 3349 ofld_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 3350 { 3351 int rc, cntxt_id; 3352 struct fw_eq_ofld_cmd c; 3353 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 3354 3355 bzero(&c, sizeof(c)); 3356 3357 c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST | 3358 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) | 3359 V_FW_EQ_OFLD_CMD_VFN(0)); 3360 c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC | 3361 F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c)); 3362 c.fetchszm_to_iqid = 3363 htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | 3364 V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) | 3365 F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid)); 3366 c.dcaen_to_eqsize = 3367 htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 3368 V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 3369 V_FW_EQ_OFLD_CMD_EQSIZE(qsize)); 3370 c.eqaddr = htobe64(eq->ba); 3371 3372 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 3373 if (rc != 0) { 3374 device_printf(vi->dev, 3375 "failed to create egress queue for TCP offload: %d\n", rc); 3376 return (rc); 3377 } 3378 eq->flags |= EQ_ALLOCATED; 3379 3380 eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd)); 3381 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 3382 if (cntxt_id >= sc->sge.neq) 3383 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 3384 cntxt_id, sc->sge.neq - 1); 3385 sc->sge.eqmap[cntxt_id] = eq; 3386 3387 return (rc); 3388 } 3389 #endif 3390 3391 static int 3392 alloc_eq(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 3393 { 3394 int rc, qsize; 3395 size_t len; 3396 3397 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); 3398 3399 qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 3400 len = qsize * EQ_ESIZE; 3401 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, 3402 &eq->ba, (void **)&eq->desc); 3403 if (rc) 3404 return (rc); 3405 3406 eq->pidx = eq->cidx = 0; 3407 eq->equeqidx = eq->dbidx = 0; 3408 eq->doorbells = sc->doorbells; 3409 3410 switch (eq->flags & EQ_TYPEMASK) { 3411 case EQ_CTRL: 3412 rc = ctrl_eq_alloc(sc, eq); 3413 break; 3414 3415 case EQ_ETH: 3416 rc = eth_eq_alloc(sc, vi, eq); 3417 break; 3418 3419 #ifdef TCP_OFFLOAD 3420 case EQ_OFLD: 3421 rc = ofld_eq_alloc(sc, vi, eq); 3422 break; 3423 #endif 3424 3425 default: 3426 panic("%s: invalid eq type %d.", __func__, 3427 eq->flags & EQ_TYPEMASK); 3428 } 3429 if (rc != 0) { 3430 device_printf(sc->dev, 3431 "failed to allocate egress queue(%d): %d\n", 3432 eq->flags & EQ_TYPEMASK, rc); 3433 } 3434 3435 if (isset(&eq->doorbells, DOORBELL_UDB) || 3436 isset(&eq->doorbells, DOORBELL_UDBWC) || 3437 isset(&eq->doorbells, DOORBELL_WCWR)) { 3438 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 3439 uint32_t mask = (1 << s_qpp) - 1; 3440 volatile uint8_t *udb; 3441 3442 udb = sc->udbs_base + UDBS_DB_OFFSET; 3443 udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT; /* pg offset */ 3444 eq->udb_qid = eq->cntxt_id & mask; /* id in page */ 3445 if (eq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE) 3446 clrbit(&eq->doorbells, DOORBELL_WCWR); 3447 else { 3448 udb += eq->udb_qid << UDBS_SEG_SHIFT; /* seg offset */ 3449 eq->udb_qid = 0; 3450 } 3451 eq->udb = (volatile void *)udb; 3452 } 3453 3454 return (rc); 3455 } 3456 3457 static int 3458 free_eq(struct adapter *sc, struct sge_eq *eq) 3459 { 3460 int rc; 3461 3462 if (eq->flags & EQ_ALLOCATED) { 3463 switch (eq->flags & EQ_TYPEMASK) { 3464 case EQ_CTRL: 3465 rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, 3466 eq->cntxt_id); 3467 break; 3468 3469 case EQ_ETH: 3470 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, 3471 eq->cntxt_id); 3472 break; 3473 3474 #ifdef TCP_OFFLOAD 3475 case EQ_OFLD: 3476 rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0, 3477 eq->cntxt_id); 3478 break; 3479 #endif 3480 3481 default: 3482 panic("%s: invalid eq type %d.", __func__, 3483 eq->flags & EQ_TYPEMASK); 3484 } 3485 if (rc != 0) { 3486 device_printf(sc->dev, 3487 "failed to free egress queue (%d): %d\n", 3488 eq->flags & EQ_TYPEMASK, rc); 3489 return (rc); 3490 } 3491 eq->flags &= ~EQ_ALLOCATED; 3492 } 3493 3494 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); 3495 3496 if (mtx_initialized(&eq->eq_lock)) 3497 mtx_destroy(&eq->eq_lock); 3498 3499 bzero(eq, sizeof(*eq)); 3500 return (0); 3501 } 3502 3503 static int 3504 alloc_wrq(struct adapter *sc, struct vi_info *vi, struct sge_wrq *wrq, 3505 struct sysctl_oid *oid) 3506 { 3507 int rc; 3508 struct sysctl_ctx_list *ctx = vi ? &vi->ctx : &sc->ctx; 3509 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3510 3511 rc = alloc_eq(sc, vi, &wrq->eq); 3512 if (rc) 3513 return (rc); 3514 3515 wrq->adapter = sc; 3516 TASK_INIT(&wrq->wrq_tx_task, 0, wrq_tx_drain, wrq); 3517 TAILQ_INIT(&wrq->incomplete_wrs); 3518 STAILQ_INIT(&wrq->wr_list); 3519 wrq->nwr_pending = 0; 3520 wrq->ndesc_needed = 0; 3521 3522 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 3523 &wrq->eq.cntxt_id, 0, "SGE context id of the queue"); 3524 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", 3525 CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I", 3526 "consumer index"); 3527 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx", 3528 CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I", 3529 "producer index"); 3530 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_direct", CTLFLAG_RD, 3531 &wrq->tx_wrs_direct, "# of work requests (direct)"); 3532 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_copied", CTLFLAG_RD, 3533 &wrq->tx_wrs_copied, "# of work requests (copied)"); 3534 3535 return (rc); 3536 } 3537 3538 static int 3539 free_wrq(struct adapter *sc, struct sge_wrq *wrq) 3540 { 3541 int rc; 3542 3543 rc = free_eq(sc, &wrq->eq); 3544 if (rc) 3545 return (rc); 3546 3547 bzero(wrq, sizeof(*wrq)); 3548 return (0); 3549 } 3550 3551 static int 3552 alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx, 3553 struct sysctl_oid *oid) 3554 { 3555 int rc; 3556 struct port_info *pi = vi->pi; 3557 struct adapter *sc = pi->adapter; 3558 struct sge_eq *eq = &txq->eq; 3559 char name[16]; 3560 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3561 3562 rc = mp_ring_alloc(&txq->r, eq->sidx, txq, eth_tx, can_resume_eth_tx, 3563 M_CXGBE, M_WAITOK); 3564 if (rc != 0) { 3565 device_printf(sc->dev, "failed to allocate mp_ring: %d\n", rc); 3566 return (rc); 3567 } 3568 3569 rc = alloc_eq(sc, vi, eq); 3570 if (rc != 0) { 3571 mp_ring_free(txq->r); 3572 txq->r = NULL; 3573 return (rc); 3574 } 3575 3576 /* Can't fail after this point. */ 3577 3578 if (idx == 0) 3579 sc->sge.eq_base = eq->abs_id - eq->cntxt_id; 3580 else 3581 KASSERT(eq->cntxt_id + sc->sge.eq_base == eq->abs_id, 3582 ("eq_base mismatch")); 3583 KASSERT(sc->sge.eq_base == 0 || sc->flags & IS_VF, 3584 ("PF with non-zero eq_base")); 3585 3586 TASK_INIT(&txq->tx_reclaim_task, 0, tx_reclaim, eq); 3587 txq->ifp = vi->ifp; 3588 txq->gl = sglist_alloc(TX_SGL_SEGS, M_WAITOK); 3589 txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 3590 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_VF_VLD(1) | 3591 V_TXPKT_VF(vi->viid)); 3592 txq->tc_idx = -1; 3593 txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE, 3594 M_ZERO | M_WAITOK); 3595 3596 snprintf(name, sizeof(name), "%d", idx); 3597 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 3598 NULL, "tx queue"); 3599 children = SYSCTL_CHILDREN(oid); 3600 3601 SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD, 3602 &eq->abs_id, 0, "absolute id of the queue"); 3603 SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 3604 &eq->cntxt_id, 0, "SGE context id of the queue"); 3605 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", 3606 CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I", 3607 "consumer index"); 3608 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", 3609 CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I", 3610 "producer index"); 3611 3612 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "tc", 3613 CTLTYPE_INT | CTLFLAG_RW, vi, idx, sysctl_tc, "I", 3614 "traffic class (-1 means none)"); 3615 3616 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, 3617 &txq->txcsum, "# of times hardware assisted with checksum"); 3618 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_insertion", 3619 CTLFLAG_RD, &txq->vlan_insertion, 3620 "# of times hardware inserted 802.1Q tag"); 3621 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, 3622 &txq->tso_wrs, "# of TSO work requests"); 3623 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, 3624 &txq->imm_wrs, "# of work requests with immediate data"); 3625 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, 3626 &txq->sgl_wrs, "# of work requests with direct SGL"); 3627 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, 3628 &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); 3629 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_wrs", 3630 CTLFLAG_RD, &txq->txpkts0_wrs, 3631 "# of txpkts (type 0) work requests"); 3632 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_wrs", 3633 CTLFLAG_RD, &txq->txpkts1_wrs, 3634 "# of txpkts (type 1) work requests"); 3635 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_pkts", 3636 CTLFLAG_RD, &txq->txpkts0_pkts, 3637 "# of frames tx'd using type0 txpkts work requests"); 3638 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_pkts", 3639 CTLFLAG_RD, &txq->txpkts1_pkts, 3640 "# of frames tx'd using type1 txpkts work requests"); 3641 3642 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_enqueues", 3643 CTLFLAG_RD, &txq->r->enqueues, 3644 "# of enqueues to the mp_ring for this queue"); 3645 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_drops", 3646 CTLFLAG_RD, &txq->r->drops, 3647 "# of drops in the mp_ring for this queue"); 3648 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_starts", 3649 CTLFLAG_RD, &txq->r->starts, 3650 "# of normal consumer starts in the mp_ring for this queue"); 3651 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_stalls", 3652 CTLFLAG_RD, &txq->r->stalls, 3653 "# of consumer stalls in the mp_ring for this queue"); 3654 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_restarts", 3655 CTLFLAG_RD, &txq->r->restarts, 3656 "# of consumer restarts in the mp_ring for this queue"); 3657 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_abdications", 3658 CTLFLAG_RD, &txq->r->abdications, 3659 "# of consumer abdications in the mp_ring for this queue"); 3660 3661 return (0); 3662 } 3663 3664 static int 3665 free_txq(struct vi_info *vi, struct sge_txq *txq) 3666 { 3667 int rc; 3668 struct adapter *sc = vi->pi->adapter; 3669 struct sge_eq *eq = &txq->eq; 3670 3671 rc = free_eq(sc, eq); 3672 if (rc) 3673 return (rc); 3674 3675 sglist_free(txq->gl); 3676 free(txq->sdesc, M_CXGBE); 3677 mp_ring_free(txq->r); 3678 3679 bzero(txq, sizeof(*txq)); 3680 return (0); 3681 } 3682 3683 static void 3684 oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) 3685 { 3686 bus_addr_t *ba = arg; 3687 3688 KASSERT(nseg == 1, 3689 ("%s meant for single segment mappings only.", __func__)); 3690 3691 *ba = error ? 0 : segs->ds_addr; 3692 } 3693 3694 static inline void 3695 ring_fl_db(struct adapter *sc, struct sge_fl *fl) 3696 { 3697 uint32_t n, v; 3698 3699 n = IDXDIFF(fl->pidx / 8, fl->dbidx, fl->sidx); 3700 MPASS(n > 0); 3701 3702 wmb(); 3703 v = fl->dbval | V_PIDX(n); 3704 if (fl->udb) 3705 *fl->udb = htole32(v); 3706 else 3707 t4_write_reg(sc, sc->sge_kdoorbell_reg, v); 3708 IDXINCR(fl->dbidx, n, fl->sidx); 3709 } 3710 3711 /* 3712 * Fills up the freelist by allocating up to 'n' buffers. Buffers that are 3713 * recycled do not count towards this allocation budget. 3714 * 3715 * Returns non-zero to indicate that this freelist should be added to the list 3716 * of starving freelists. 3717 */ 3718 static int 3719 refill_fl(struct adapter *sc, struct sge_fl *fl, int n) 3720 { 3721 __be64 *d; 3722 struct fl_sdesc *sd; 3723 uintptr_t pa; 3724 caddr_t cl; 3725 struct cluster_layout *cll; 3726 struct sw_zone_info *swz; 3727 struct cluster_metadata *clm; 3728 uint16_t max_pidx; 3729 uint16_t hw_cidx = fl->hw_cidx; /* stable snapshot */ 3730 3731 FL_LOCK_ASSERT_OWNED(fl); 3732 3733 /* 3734 * We always stop at the beginning of the hardware descriptor that's just 3735 * before the one with the hw cidx. This is to avoid hw pidx = hw cidx, 3736 * which would mean an empty freelist to the chip. 3737 */ 3738 max_pidx = __predict_false(hw_cidx == 0) ? fl->sidx - 1 : hw_cidx - 1; 3739 if (fl->pidx == max_pidx * 8) 3740 return (0); 3741 3742 d = &fl->desc[fl->pidx]; 3743 sd = &fl->sdesc[fl->pidx]; 3744 cll = &fl->cll_def; /* default layout */ 3745 swz = &sc->sge.sw_zone_info[cll->zidx]; 3746 3747 while (n > 0) { 3748 3749 if (sd->cl != NULL) { 3750 3751 if (sd->nmbuf == 0) { 3752 /* 3753 * Fast recycle without involving any atomics on 3754 * the cluster's metadata (if the cluster has 3755 * metadata). This happens when all frames 3756 * received in the cluster were small enough to 3757 * fit within a single mbuf each. 3758 */ 3759 fl->cl_fast_recycled++; 3760 #ifdef INVARIANTS 3761 clm = cl_metadata(sc, fl, &sd->cll, sd->cl); 3762 if (clm != NULL) 3763 MPASS(clm->refcount == 1); 3764 #endif 3765 goto recycled_fast; 3766 } 3767 3768 /* 3769 * Cluster is guaranteed to have metadata. Clusters 3770 * without metadata always take the fast recycle path 3771 * when they're recycled. 3772 */ 3773 clm = cl_metadata(sc, fl, &sd->cll, sd->cl); 3774 MPASS(clm != NULL); 3775 3776 if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { 3777 fl->cl_recycled++; 3778 counter_u64_add(extfree_rels, 1); 3779 goto recycled; 3780 } 3781 sd->cl = NULL; /* gave up my reference */ 3782 } 3783 MPASS(sd->cl == NULL); 3784 alloc: 3785 cl = uma_zalloc(swz->zone, M_NOWAIT); 3786 if (__predict_false(cl == NULL)) { 3787 if (cll == &fl->cll_alt || fl->cll_alt.zidx == -1 || 3788 fl->cll_def.zidx == fl->cll_alt.zidx) 3789 break; 3790 3791 /* fall back to the safe zone */ 3792 cll = &fl->cll_alt; 3793 swz = &sc->sge.sw_zone_info[cll->zidx]; 3794 goto alloc; 3795 } 3796 fl->cl_allocated++; 3797 n--; 3798 3799 pa = pmap_kextract((vm_offset_t)cl); 3800 pa += cll->region1; 3801 sd->cl = cl; 3802 sd->cll = *cll; 3803 *d = htobe64(pa | cll->hwidx); 3804 clm = cl_metadata(sc, fl, cll, cl); 3805 if (clm != NULL) { 3806 recycled: 3807 #ifdef INVARIANTS 3808 clm->sd = sd; 3809 #endif 3810 clm->refcount = 1; 3811 } 3812 sd->nmbuf = 0; 3813 recycled_fast: 3814 d++; 3815 sd++; 3816 if (__predict_false(++fl->pidx % 8 == 0)) { 3817 uint16_t pidx = fl->pidx / 8; 3818 3819 if (__predict_false(pidx == fl->sidx)) { 3820 fl->pidx = 0; 3821 pidx = 0; 3822 sd = fl->sdesc; 3823 d = fl->desc; 3824 } 3825 if (pidx == max_pidx) 3826 break; 3827 3828 if (IDXDIFF(pidx, fl->dbidx, fl->sidx) >= 4) 3829 ring_fl_db(sc, fl); 3830 } 3831 } 3832 3833 if (fl->pidx / 8 != fl->dbidx) 3834 ring_fl_db(sc, fl); 3835 3836 return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING)); 3837 } 3838 3839 /* 3840 * Attempt to refill all starving freelists. 3841 */ 3842 static void 3843 refill_sfl(void *arg) 3844 { 3845 struct adapter *sc = arg; 3846 struct sge_fl *fl, *fl_temp; 3847 3848 mtx_assert(&sc->sfl_lock, MA_OWNED); 3849 TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) { 3850 FL_LOCK(fl); 3851 refill_fl(sc, fl, 64); 3852 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) { 3853 TAILQ_REMOVE(&sc->sfl, fl, link); 3854 fl->flags &= ~FL_STARVING; 3855 } 3856 FL_UNLOCK(fl); 3857 } 3858 3859 if (!TAILQ_EMPTY(&sc->sfl)) 3860 callout_schedule(&sc->sfl_callout, hz / 5); 3861 } 3862 3863 static int 3864 alloc_fl_sdesc(struct sge_fl *fl) 3865 { 3866 3867 fl->sdesc = malloc(fl->sidx * 8 * sizeof(struct fl_sdesc), M_CXGBE, 3868 M_ZERO | M_WAITOK); 3869 3870 return (0); 3871 } 3872 3873 static void 3874 free_fl_sdesc(struct adapter *sc, struct sge_fl *fl) 3875 { 3876 struct fl_sdesc *sd; 3877 struct cluster_metadata *clm; 3878 struct cluster_layout *cll; 3879 int i; 3880 3881 sd = fl->sdesc; 3882 for (i = 0; i < fl->sidx * 8; i++, sd++) { 3883 if (sd->cl == NULL) 3884 continue; 3885 3886 cll = &sd->cll; 3887 clm = cl_metadata(sc, fl, cll, sd->cl); 3888 if (sd->nmbuf == 0) 3889 uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl); 3890 else if (clm && atomic_fetchadd_int(&clm->refcount, -1) == 1) { 3891 uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl); 3892 counter_u64_add(extfree_rels, 1); 3893 } 3894 sd->cl = NULL; 3895 } 3896 3897 free(fl->sdesc, M_CXGBE); 3898 fl->sdesc = NULL; 3899 } 3900 3901 static inline void 3902 get_pkt_gl(struct mbuf *m, struct sglist *gl) 3903 { 3904 int rc; 3905 3906 M_ASSERTPKTHDR(m); 3907 3908 sglist_reset(gl); 3909 rc = sglist_append_mbuf(gl, m); 3910 if (__predict_false(rc != 0)) { 3911 panic("%s: mbuf %p (%d segs) was vetted earlier but now fails " 3912 "with %d.", __func__, m, mbuf_nsegs(m), rc); 3913 } 3914 3915 KASSERT(gl->sg_nseg == mbuf_nsegs(m), 3916 ("%s: nsegs changed for mbuf %p from %d to %d", __func__, m, 3917 mbuf_nsegs(m), gl->sg_nseg)); 3918 KASSERT(gl->sg_nseg > 0 && 3919 gl->sg_nseg <= (needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS), 3920 ("%s: %d segments, should have been 1 <= nsegs <= %d", __func__, 3921 gl->sg_nseg, needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)); 3922 } 3923 3924 /* 3925 * len16 for a txpkt WR with a GL. Includes the firmware work request header. 3926 */ 3927 static inline u_int 3928 txpkt_len16(u_int nsegs, u_int tso) 3929 { 3930 u_int n; 3931 3932 MPASS(nsegs > 0); 3933 3934 nsegs--; /* first segment is part of ulptx_sgl */ 3935 n = sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_core) + 3936 sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 3937 if (tso) 3938 n += sizeof(struct cpl_tx_pkt_lso_core); 3939 3940 return (howmany(n, 16)); 3941 } 3942 3943 /* 3944 * len16 for a txpkts type 0 WR with a GL. Does not include the firmware work 3945 * request header. 3946 */ 3947 static inline u_int 3948 txpkts0_len16(u_int nsegs) 3949 { 3950 u_int n; 3951 3952 MPASS(nsegs > 0); 3953 3954 nsegs--; /* first segment is part of ulptx_sgl */ 3955 n = sizeof(struct ulp_txpkt) + sizeof(struct ulptx_idata) + 3956 sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) + 3957 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 3958 3959 return (howmany(n, 16)); 3960 } 3961 3962 /* 3963 * len16 for a txpkts type 1 WR with a GL. Does not include the firmware work 3964 * request header. 3965 */ 3966 static inline u_int 3967 txpkts1_len16(void) 3968 { 3969 u_int n; 3970 3971 n = sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl); 3972 3973 return (howmany(n, 16)); 3974 } 3975 3976 static inline u_int 3977 imm_payload(u_int ndesc) 3978 { 3979 u_int n; 3980 3981 n = ndesc * EQ_ESIZE - sizeof(struct fw_eth_tx_pkt_wr) - 3982 sizeof(struct cpl_tx_pkt_core); 3983 3984 return (n); 3985 } 3986 3987 /* 3988 * Write a txpkt WR for this packet to the hardware descriptors, update the 3989 * software descriptor, and advance the pidx. It is guaranteed that enough 3990 * descriptors are available. 3991 * 3992 * The return value is the # of hardware descriptors used. 3993 */ 3994 static u_int 3995 write_txpkt_wr(struct sge_txq *txq, struct fw_eth_tx_pkt_wr *wr, 3996 struct mbuf *m0, u_int available) 3997 { 3998 struct sge_eq *eq = &txq->eq; 3999 struct tx_sdesc *txsd; 4000 struct cpl_tx_pkt_core *cpl; 4001 uint32_t ctrl; /* used in many unrelated places */ 4002 uint64_t ctrl1; 4003 int len16, ndesc, pktlen, nsegs; 4004 caddr_t dst; 4005 4006 TXQ_LOCK_ASSERT_OWNED(txq); 4007 M_ASSERTPKTHDR(m0); 4008 MPASS(available > 0 && available < eq->sidx); 4009 4010 len16 = mbuf_len16(m0); 4011 nsegs = mbuf_nsegs(m0); 4012 pktlen = m0->m_pkthdr.len; 4013 ctrl = sizeof(struct cpl_tx_pkt_core); 4014 if (needs_tso(m0)) 4015 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 4016 else if (pktlen <= imm_payload(2) && available >= 2) { 4017 /* Immediate data. Recalculate len16 and set nsegs to 0. */ 4018 ctrl += pktlen; 4019 len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + 4020 sizeof(struct cpl_tx_pkt_core) + pktlen, 16); 4021 nsegs = 0; 4022 } 4023 ndesc = howmany(len16, EQ_ESIZE / 16); 4024 MPASS(ndesc <= available); 4025 4026 /* Firmware work request header */ 4027 MPASS(wr == (void *)&eq->desc[eq->pidx]); 4028 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | 4029 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 4030 4031 ctrl = V_FW_WR_LEN16(len16); 4032 wr->equiq_to_len16 = htobe32(ctrl); 4033 wr->r3 = 0; 4034 4035 if (needs_tso(m0)) { 4036 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); 4037 4038 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 4039 m0->m_pkthdr.l4hlen > 0, 4040 ("%s: mbuf %p needs TSO but missing header lengths", 4041 __func__, m0)); 4042 4043 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | 4044 F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) 4045 | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); 4046 if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header)) 4047 ctrl |= V_LSO_ETHHDR_LEN(1); 4048 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 4049 ctrl |= F_LSO_IPV6; 4050 4051 lso->lso_ctrl = htobe32(ctrl); 4052 lso->ipid_ofst = htobe16(0); 4053 lso->mss = htobe16(m0->m_pkthdr.tso_segsz); 4054 lso->seqno_offset = htobe32(0); 4055 lso->len = htobe32(pktlen); 4056 4057 cpl = (void *)(lso + 1); 4058 4059 txq->tso_wrs++; 4060 } else 4061 cpl = (void *)(wr + 1); 4062 4063 /* Checksum offload */ 4064 ctrl1 = 0; 4065 if (needs_l3_csum(m0) == 0) 4066 ctrl1 |= F_TXPKT_IPCSUM_DIS; 4067 if (needs_l4_csum(m0) == 0) 4068 ctrl1 |= F_TXPKT_L4CSUM_DIS; 4069 if (m0->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | 4070 CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) 4071 txq->txcsum++; /* some hardware assistance provided */ 4072 4073 /* VLAN tag insertion */ 4074 if (needs_vlan_insertion(m0)) { 4075 ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 4076 txq->vlan_insertion++; 4077 } 4078 4079 /* CPL header */ 4080 cpl->ctrl0 = txq->cpl_ctrl0; 4081 cpl->pack = 0; 4082 cpl->len = htobe16(pktlen); 4083 cpl->ctrl1 = htobe64(ctrl1); 4084 4085 /* SGL */ 4086 dst = (void *)(cpl + 1); 4087 if (nsegs > 0) { 4088 4089 write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); 4090 txq->sgl_wrs++; 4091 } else { 4092 struct mbuf *m; 4093 4094 for (m = m0; m != NULL; m = m->m_next) { 4095 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 4096 #ifdef INVARIANTS 4097 pktlen -= m->m_len; 4098 #endif 4099 } 4100 #ifdef INVARIANTS 4101 KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen)); 4102 #endif 4103 txq->imm_wrs++; 4104 } 4105 4106 txq->txpkt_wrs++; 4107 4108 txsd = &txq->sdesc[eq->pidx]; 4109 txsd->m = m0; 4110 txsd->desc_used = ndesc; 4111 4112 return (ndesc); 4113 } 4114 4115 static int 4116 try_txpkts(struct mbuf *m, struct mbuf *n, struct txpkts *txp, u_int available) 4117 { 4118 u_int needed, nsegs1, nsegs2, l1, l2; 4119 4120 if (cannot_use_txpkts(m) || cannot_use_txpkts(n)) 4121 return (1); 4122 4123 nsegs1 = mbuf_nsegs(m); 4124 nsegs2 = mbuf_nsegs(n); 4125 if (nsegs1 + nsegs2 == 2) { 4126 txp->wr_type = 1; 4127 l1 = l2 = txpkts1_len16(); 4128 } else { 4129 txp->wr_type = 0; 4130 l1 = txpkts0_len16(nsegs1); 4131 l2 = txpkts0_len16(nsegs2); 4132 } 4133 txp->len16 = howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + l1 + l2; 4134 needed = howmany(txp->len16, EQ_ESIZE / 16); 4135 if (needed > SGE_MAX_WR_NDESC || needed > available) 4136 return (1); 4137 4138 txp->plen = m->m_pkthdr.len + n->m_pkthdr.len; 4139 if (txp->plen > 65535) 4140 return (1); 4141 4142 txp->npkt = 2; 4143 set_mbuf_len16(m, l1); 4144 set_mbuf_len16(n, l2); 4145 4146 return (0); 4147 } 4148 4149 static int 4150 add_to_txpkts(struct mbuf *m, struct txpkts *txp, u_int available) 4151 { 4152 u_int plen, len16, needed, nsegs; 4153 4154 MPASS(txp->wr_type == 0 || txp->wr_type == 1); 4155 4156 nsegs = mbuf_nsegs(m); 4157 if (needs_tso(m) || (txp->wr_type == 1 && nsegs != 1)) 4158 return (1); 4159 4160 plen = txp->plen + m->m_pkthdr.len; 4161 if (plen > 65535) 4162 return (1); 4163 4164 if (txp->wr_type == 0) 4165 len16 = txpkts0_len16(nsegs); 4166 else 4167 len16 = txpkts1_len16(); 4168 needed = howmany(txp->len16 + len16, EQ_ESIZE / 16); 4169 if (needed > SGE_MAX_WR_NDESC || needed > available) 4170 return (1); 4171 4172 txp->npkt++; 4173 txp->plen = plen; 4174 txp->len16 += len16; 4175 set_mbuf_len16(m, len16); 4176 4177 return (0); 4178 } 4179 4180 /* 4181 * Write a txpkts WR for the packets in txp to the hardware descriptors, update 4182 * the software descriptor, and advance the pidx. It is guaranteed that enough 4183 * descriptors are available. 4184 * 4185 * The return value is the # of hardware descriptors used. 4186 */ 4187 static u_int 4188 write_txpkts_wr(struct sge_txq *txq, struct fw_eth_tx_pkts_wr *wr, 4189 struct mbuf *m0, const struct txpkts *txp, u_int available) 4190 { 4191 struct sge_eq *eq = &txq->eq; 4192 struct tx_sdesc *txsd; 4193 struct cpl_tx_pkt_core *cpl; 4194 uint32_t ctrl; 4195 uint64_t ctrl1; 4196 int ndesc, checkwrap; 4197 struct mbuf *m; 4198 void *flitp; 4199 4200 TXQ_LOCK_ASSERT_OWNED(txq); 4201 MPASS(txp->npkt > 0); 4202 MPASS(txp->plen < 65536); 4203 MPASS(m0 != NULL); 4204 MPASS(m0->m_nextpkt != NULL); 4205 MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16)); 4206 MPASS(available > 0 && available < eq->sidx); 4207 4208 ndesc = howmany(txp->len16, EQ_ESIZE / 16); 4209 MPASS(ndesc <= available); 4210 4211 MPASS(wr == (void *)&eq->desc[eq->pidx]); 4212 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); 4213 ctrl = V_FW_WR_LEN16(txp->len16); 4214 wr->equiq_to_len16 = htobe32(ctrl); 4215 wr->plen = htobe16(txp->plen); 4216 wr->npkt = txp->npkt; 4217 wr->r3 = 0; 4218 wr->type = txp->wr_type; 4219 flitp = wr + 1; 4220 4221 /* 4222 * At this point we are 16B into a hardware descriptor. If checkwrap is 4223 * set then we know the WR is going to wrap around somewhere. We'll 4224 * check for that at appropriate points. 4225 */ 4226 checkwrap = eq->sidx - ndesc < eq->pidx; 4227 for (m = m0; m != NULL; m = m->m_nextpkt) { 4228 if (txp->wr_type == 0) { 4229 struct ulp_txpkt *ulpmc; 4230 struct ulptx_idata *ulpsc; 4231 4232 /* ULP master command */ 4233 ulpmc = flitp; 4234 ulpmc->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) | 4235 V_ULP_TXPKT_DEST(0) | V_ULP_TXPKT_FID(eq->iqid)); 4236 ulpmc->len = htobe32(mbuf_len16(m)); 4237 4238 /* ULP subcommand */ 4239 ulpsc = (void *)(ulpmc + 1); 4240 ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) | 4241 F_ULP_TX_SC_MORE); 4242 ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core)); 4243 4244 cpl = (void *)(ulpsc + 1); 4245 if (checkwrap && 4246 (uintptr_t)cpl == (uintptr_t)&eq->desc[eq->sidx]) 4247 cpl = (void *)&eq->desc[0]; 4248 txq->txpkts0_pkts += txp->npkt; 4249 txq->txpkts0_wrs++; 4250 } else { 4251 cpl = flitp; 4252 txq->txpkts1_pkts += txp->npkt; 4253 txq->txpkts1_wrs++; 4254 } 4255 4256 /* Checksum offload */ 4257 ctrl1 = 0; 4258 if (needs_l3_csum(m) == 0) 4259 ctrl1 |= F_TXPKT_IPCSUM_DIS; 4260 if (needs_l4_csum(m) == 0) 4261 ctrl1 |= F_TXPKT_L4CSUM_DIS; 4262 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | 4263 CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) 4264 txq->txcsum++; /* some hardware assistance provided */ 4265 4266 /* VLAN tag insertion */ 4267 if (needs_vlan_insertion(m)) { 4268 ctrl1 |= F_TXPKT_VLAN_VLD | 4269 V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 4270 txq->vlan_insertion++; 4271 } 4272 4273 /* CPL header */ 4274 cpl->ctrl0 = txq->cpl_ctrl0; 4275 cpl->pack = 0; 4276 cpl->len = htobe16(m->m_pkthdr.len); 4277 cpl->ctrl1 = htobe64(ctrl1); 4278 4279 flitp = cpl + 1; 4280 if (checkwrap && 4281 (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx]) 4282 flitp = (void *)&eq->desc[0]; 4283 4284 write_gl_to_txd(txq, m, (caddr_t *)(&flitp), checkwrap); 4285 4286 } 4287 4288 txsd = &txq->sdesc[eq->pidx]; 4289 txsd->m = m0; 4290 txsd->desc_used = ndesc; 4291 4292 return (ndesc); 4293 } 4294 4295 /* 4296 * If the SGL ends on an address that is not 16 byte aligned, this function will 4297 * add a 0 filled flit at the end. 4298 */ 4299 static void 4300 write_gl_to_txd(struct sge_txq *txq, struct mbuf *m, caddr_t *to, int checkwrap) 4301 { 4302 struct sge_eq *eq = &txq->eq; 4303 struct sglist *gl = txq->gl; 4304 struct sglist_seg *seg; 4305 __be64 *flitp, *wrap; 4306 struct ulptx_sgl *usgl; 4307 int i, nflits, nsegs; 4308 4309 KASSERT(((uintptr_t)(*to) & 0xf) == 0, 4310 ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to)); 4311 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); 4312 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); 4313 4314 get_pkt_gl(m, gl); 4315 nsegs = gl->sg_nseg; 4316 MPASS(nsegs > 0); 4317 4318 nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2; 4319 flitp = (__be64 *)(*to); 4320 wrap = (__be64 *)(&eq->desc[eq->sidx]); 4321 seg = &gl->sg_segs[0]; 4322 usgl = (void *)flitp; 4323 4324 /* 4325 * We start at a 16 byte boundary somewhere inside the tx descriptor 4326 * ring, so we're at least 16 bytes away from the status page. There is 4327 * no chance of a wrap around in the middle of usgl (which is 16 bytes). 4328 */ 4329 4330 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 4331 V_ULPTX_NSGE(nsegs)); 4332 usgl->len0 = htobe32(seg->ss_len); 4333 usgl->addr0 = htobe64(seg->ss_paddr); 4334 seg++; 4335 4336 if (checkwrap == 0 || (uintptr_t)(flitp + nflits) <= (uintptr_t)wrap) { 4337 4338 /* Won't wrap around at all */ 4339 4340 for (i = 0; i < nsegs - 1; i++, seg++) { 4341 usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len); 4342 usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr); 4343 } 4344 if (i & 1) 4345 usgl->sge[i / 2].len[1] = htobe32(0); 4346 flitp += nflits; 4347 } else { 4348 4349 /* Will wrap somewhere in the rest of the SGL */ 4350 4351 /* 2 flits already written, write the rest flit by flit */ 4352 flitp = (void *)(usgl + 1); 4353 for (i = 0; i < nflits - 2; i++) { 4354 if (flitp == wrap) 4355 flitp = (void *)eq->desc; 4356 *flitp++ = get_flit(seg, nsegs - 1, i); 4357 } 4358 } 4359 4360 if (nflits & 1) { 4361 MPASS(((uintptr_t)flitp) & 0xf); 4362 *flitp++ = 0; 4363 } 4364 4365 MPASS((((uintptr_t)flitp) & 0xf) == 0); 4366 if (__predict_false(flitp == wrap)) 4367 *to = (void *)eq->desc; 4368 else 4369 *to = (void *)flitp; 4370 } 4371 4372 static inline void 4373 copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) 4374 { 4375 4376 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); 4377 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); 4378 4379 if (__predict_true((uintptr_t)(*to) + len <= 4380 (uintptr_t)&eq->desc[eq->sidx])) { 4381 bcopy(from, *to, len); 4382 (*to) += len; 4383 } else { 4384 int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to); 4385 4386 bcopy(from, *to, portion); 4387 from += portion; 4388 portion = len - portion; /* remaining */ 4389 bcopy(from, (void *)eq->desc, portion); 4390 (*to) = (caddr_t)eq->desc + portion; 4391 } 4392 } 4393 4394 static inline void 4395 ring_eq_db(struct adapter *sc, struct sge_eq *eq, u_int n) 4396 { 4397 u_int db; 4398 4399 MPASS(n > 0); 4400 4401 db = eq->doorbells; 4402 if (n > 1) 4403 clrbit(&db, DOORBELL_WCWR); 4404 wmb(); 4405 4406 switch (ffs(db) - 1) { 4407 case DOORBELL_UDB: 4408 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); 4409 break; 4410 4411 case DOORBELL_WCWR: { 4412 volatile uint64_t *dst, *src; 4413 int i; 4414 4415 /* 4416 * Queues whose 128B doorbell segment fits in the page do not 4417 * use relative qid (udb_qid is always 0). Only queues with 4418 * doorbell segments can do WCWR. 4419 */ 4420 KASSERT(eq->udb_qid == 0 && n == 1, 4421 ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p", 4422 __func__, eq->doorbells, n, eq->dbidx, eq)); 4423 4424 dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET - 4425 UDBS_DB_OFFSET); 4426 i = eq->dbidx; 4427 src = (void *)&eq->desc[i]; 4428 while (src != (void *)&eq->desc[i + 1]) 4429 *dst++ = *src++; 4430 wmb(); 4431 break; 4432 } 4433 4434 case DOORBELL_UDBWC: 4435 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); 4436 wmb(); 4437 break; 4438 4439 case DOORBELL_KDB: 4440 t4_write_reg(sc, sc->sge_kdoorbell_reg, 4441 V_QID(eq->cntxt_id) | V_PIDX(n)); 4442 break; 4443 } 4444 4445 IDXINCR(eq->dbidx, n, eq->sidx); 4446 } 4447 4448 static inline u_int 4449 reclaimable_tx_desc(struct sge_eq *eq) 4450 { 4451 uint16_t hw_cidx; 4452 4453 hw_cidx = read_hw_cidx(eq); 4454 return (IDXDIFF(hw_cidx, eq->cidx, eq->sidx)); 4455 } 4456 4457 static inline u_int 4458 total_available_tx_desc(struct sge_eq *eq) 4459 { 4460 uint16_t hw_cidx, pidx; 4461 4462 hw_cidx = read_hw_cidx(eq); 4463 pidx = eq->pidx; 4464 4465 if (pidx == hw_cidx) 4466 return (eq->sidx - 1); 4467 else 4468 return (IDXDIFF(hw_cidx, pidx, eq->sidx) - 1); 4469 } 4470 4471 static inline uint16_t 4472 read_hw_cidx(struct sge_eq *eq) 4473 { 4474 struct sge_qstat *spg = (void *)&eq->desc[eq->sidx]; 4475 uint16_t cidx = spg->cidx; /* stable snapshot */ 4476 4477 return (be16toh(cidx)); 4478 } 4479 4480 /* 4481 * Reclaim 'n' descriptors approximately. 4482 */ 4483 static u_int 4484 reclaim_tx_descs(struct sge_txq *txq, u_int n) 4485 { 4486 struct tx_sdesc *txsd; 4487 struct sge_eq *eq = &txq->eq; 4488 u_int can_reclaim, reclaimed; 4489 4490 TXQ_LOCK_ASSERT_OWNED(txq); 4491 MPASS(n > 0); 4492 4493 reclaimed = 0; 4494 can_reclaim = reclaimable_tx_desc(eq); 4495 while (can_reclaim && reclaimed < n) { 4496 int ndesc; 4497 struct mbuf *m, *nextpkt; 4498 4499 txsd = &txq->sdesc[eq->cidx]; 4500 ndesc = txsd->desc_used; 4501 4502 /* Firmware doesn't return "partial" credits. */ 4503 KASSERT(can_reclaim >= ndesc, 4504 ("%s: unexpected number of credits: %d, %d", 4505 __func__, can_reclaim, ndesc)); 4506 4507 for (m = txsd->m; m != NULL; m = nextpkt) { 4508 nextpkt = m->m_nextpkt; 4509 m->m_nextpkt = NULL; 4510 m_freem(m); 4511 } 4512 reclaimed += ndesc; 4513 can_reclaim -= ndesc; 4514 IDXINCR(eq->cidx, ndesc, eq->sidx); 4515 } 4516 4517 return (reclaimed); 4518 } 4519 4520 static void 4521 tx_reclaim(void *arg, int n) 4522 { 4523 struct sge_txq *txq = arg; 4524 struct sge_eq *eq = &txq->eq; 4525 4526 do { 4527 if (TXQ_TRYLOCK(txq) == 0) 4528 break; 4529 n = reclaim_tx_descs(txq, 32); 4530 if (eq->cidx == eq->pidx) 4531 eq->equeqidx = eq->pidx; 4532 TXQ_UNLOCK(txq); 4533 } while (n > 0); 4534 } 4535 4536 static __be64 4537 get_flit(struct sglist_seg *segs, int nsegs, int idx) 4538 { 4539 int i = (idx / 3) * 2; 4540 4541 switch (idx % 3) { 4542 case 0: { 4543 __be64 rc; 4544 4545 rc = htobe32(segs[i].ss_len); 4546 if (i + 1 < nsegs) 4547 rc |= (uint64_t)htobe32(segs[i + 1].ss_len) << 32; 4548 4549 return (rc); 4550 } 4551 case 1: 4552 return (htobe64(segs[i].ss_paddr)); 4553 case 2: 4554 return (htobe64(segs[i + 1].ss_paddr)); 4555 } 4556 4557 return (0); 4558 } 4559 4560 static void 4561 find_best_refill_source(struct adapter *sc, struct sge_fl *fl, int maxp) 4562 { 4563 int8_t zidx, hwidx, idx; 4564 uint16_t region1, region3; 4565 int spare, spare_needed, n; 4566 struct sw_zone_info *swz; 4567 struct hw_buf_info *hwb, *hwb_list = &sc->sge.hw_buf_info[0]; 4568 4569 /* 4570 * Buffer Packing: Look for PAGE_SIZE or larger zone which has a bufsize 4571 * large enough for the max payload and cluster metadata. Otherwise 4572 * settle for the largest bufsize that leaves enough room in the cluster 4573 * for metadata. 4574 * 4575 * Without buffer packing: Look for the smallest zone which has a 4576 * bufsize large enough for the max payload. Settle for the largest 4577 * bufsize available if there's nothing big enough for max payload. 4578 */ 4579 spare_needed = fl->flags & FL_BUF_PACKING ? CL_METADATA_SIZE : 0; 4580 swz = &sc->sge.sw_zone_info[0]; 4581 hwidx = -1; 4582 for (zidx = 0; zidx < SW_ZONE_SIZES; zidx++, swz++) { 4583 if (swz->size > largest_rx_cluster) { 4584 if (__predict_true(hwidx != -1)) 4585 break; 4586 4587 /* 4588 * This is a misconfiguration. largest_rx_cluster is 4589 * preventing us from finding a refill source. See 4590 * dev.t5nex.<n>.buffer_sizes to figure out why. 4591 */ 4592 device_printf(sc->dev, "largest_rx_cluster=%u leaves no" 4593 " refill source for fl %p (dma %u). Ignored.\n", 4594 largest_rx_cluster, fl, maxp); 4595 } 4596 for (idx = swz->head_hwidx; idx != -1; idx = hwb->next) { 4597 hwb = &hwb_list[idx]; 4598 spare = swz->size - hwb->size; 4599 if (spare < spare_needed) 4600 continue; 4601 4602 hwidx = idx; /* best option so far */ 4603 if (hwb->size >= maxp) { 4604 4605 if ((fl->flags & FL_BUF_PACKING) == 0) 4606 goto done; /* stop looking (not packing) */ 4607 4608 if (swz->size >= safest_rx_cluster) 4609 goto done; /* stop looking (packing) */ 4610 } 4611 break; /* keep looking, next zone */ 4612 } 4613 } 4614 done: 4615 /* A usable hwidx has been located. */ 4616 MPASS(hwidx != -1); 4617 hwb = &hwb_list[hwidx]; 4618 zidx = hwb->zidx; 4619 swz = &sc->sge.sw_zone_info[zidx]; 4620 region1 = 0; 4621 region3 = swz->size - hwb->size; 4622 4623 /* 4624 * Stay within this zone and see if there is a better match when mbuf 4625 * inlining is allowed. Remember that the hwidx's are sorted in 4626 * decreasing order of size (so in increasing order of spare area). 4627 */ 4628 for (idx = hwidx; idx != -1; idx = hwb->next) { 4629 hwb = &hwb_list[idx]; 4630 spare = swz->size - hwb->size; 4631 4632 if (allow_mbufs_in_cluster == 0 || hwb->size < maxp) 4633 break; 4634 4635 /* 4636 * Do not inline mbufs if doing so would violate the pad/pack 4637 * boundary alignment requirement. 4638 */ 4639 if (fl_pad && (MSIZE % sc->params.sge.pad_boundary) != 0) 4640 continue; 4641 if (fl->flags & FL_BUF_PACKING && 4642 (MSIZE % sc->params.sge.pack_boundary) != 0) 4643 continue; 4644 4645 if (spare < CL_METADATA_SIZE + MSIZE) 4646 continue; 4647 n = (spare - CL_METADATA_SIZE) / MSIZE; 4648 if (n > howmany(hwb->size, maxp)) 4649 break; 4650 4651 hwidx = idx; 4652 if (fl->flags & FL_BUF_PACKING) { 4653 region1 = n * MSIZE; 4654 region3 = spare - region1; 4655 } else { 4656 region1 = MSIZE; 4657 region3 = spare - region1; 4658 break; 4659 } 4660 } 4661 4662 KASSERT(zidx >= 0 && zidx < SW_ZONE_SIZES, 4663 ("%s: bad zone %d for fl %p, maxp %d", __func__, zidx, fl, maxp)); 4664 KASSERT(hwidx >= 0 && hwidx <= SGE_FLBUF_SIZES, 4665 ("%s: bad hwidx %d for fl %p, maxp %d", __func__, hwidx, fl, maxp)); 4666 KASSERT(region1 + sc->sge.hw_buf_info[hwidx].size + region3 == 4667 sc->sge.sw_zone_info[zidx].size, 4668 ("%s: bad buffer layout for fl %p, maxp %d. " 4669 "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, 4670 sc->sge.sw_zone_info[zidx].size, region1, 4671 sc->sge.hw_buf_info[hwidx].size, region3)); 4672 if (fl->flags & FL_BUF_PACKING || region1 > 0) { 4673 KASSERT(region3 >= CL_METADATA_SIZE, 4674 ("%s: no room for metadata. fl %p, maxp %d; " 4675 "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, 4676 sc->sge.sw_zone_info[zidx].size, region1, 4677 sc->sge.hw_buf_info[hwidx].size, region3)); 4678 KASSERT(region1 % MSIZE == 0, 4679 ("%s: bad mbuf region for fl %p, maxp %d. " 4680 "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, 4681 sc->sge.sw_zone_info[zidx].size, region1, 4682 sc->sge.hw_buf_info[hwidx].size, region3)); 4683 } 4684 4685 fl->cll_def.zidx = zidx; 4686 fl->cll_def.hwidx = hwidx; 4687 fl->cll_def.region1 = region1; 4688 fl->cll_def.region3 = region3; 4689 } 4690 4691 static void 4692 find_safe_refill_source(struct adapter *sc, struct sge_fl *fl) 4693 { 4694 struct sge *s = &sc->sge; 4695 struct hw_buf_info *hwb; 4696 struct sw_zone_info *swz; 4697 int spare; 4698 int8_t hwidx; 4699 4700 if (fl->flags & FL_BUF_PACKING) 4701 hwidx = s->safe_hwidx2; /* with room for metadata */ 4702 else if (allow_mbufs_in_cluster && s->safe_hwidx2 != -1) { 4703 hwidx = s->safe_hwidx2; 4704 hwb = &s->hw_buf_info[hwidx]; 4705 swz = &s->sw_zone_info[hwb->zidx]; 4706 spare = swz->size - hwb->size; 4707 4708 /* no good if there isn't room for an mbuf as well */ 4709 if (spare < CL_METADATA_SIZE + MSIZE) 4710 hwidx = s->safe_hwidx1; 4711 } else 4712 hwidx = s->safe_hwidx1; 4713 4714 if (hwidx == -1) { 4715 /* No fallback source */ 4716 fl->cll_alt.hwidx = -1; 4717 fl->cll_alt.zidx = -1; 4718 4719 return; 4720 } 4721 4722 hwb = &s->hw_buf_info[hwidx]; 4723 swz = &s->sw_zone_info[hwb->zidx]; 4724 spare = swz->size - hwb->size; 4725 fl->cll_alt.hwidx = hwidx; 4726 fl->cll_alt.zidx = hwb->zidx; 4727 if (allow_mbufs_in_cluster && 4728 (fl_pad == 0 || (MSIZE % sc->params.sge.pad_boundary) == 0)) 4729 fl->cll_alt.region1 = ((spare - CL_METADATA_SIZE) / MSIZE) * MSIZE; 4730 else 4731 fl->cll_alt.region1 = 0; 4732 fl->cll_alt.region3 = spare - fl->cll_alt.region1; 4733 } 4734 4735 static void 4736 add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl) 4737 { 4738 mtx_lock(&sc->sfl_lock); 4739 FL_LOCK(fl); 4740 if ((fl->flags & FL_DOOMED) == 0) { 4741 fl->flags |= FL_STARVING; 4742 TAILQ_INSERT_TAIL(&sc->sfl, fl, link); 4743 callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc); 4744 } 4745 FL_UNLOCK(fl); 4746 mtx_unlock(&sc->sfl_lock); 4747 } 4748 4749 static void 4750 handle_wrq_egr_update(struct adapter *sc, struct sge_eq *eq) 4751 { 4752 struct sge_wrq *wrq = (void *)eq; 4753 4754 atomic_readandclear_int(&eq->equiq); 4755 taskqueue_enqueue(sc->tq[eq->tx_chan], &wrq->wrq_tx_task); 4756 } 4757 4758 static void 4759 handle_eth_egr_update(struct adapter *sc, struct sge_eq *eq) 4760 { 4761 struct sge_txq *txq = (void *)eq; 4762 4763 MPASS((eq->flags & EQ_TYPEMASK) == EQ_ETH); 4764 4765 atomic_readandclear_int(&eq->equiq); 4766 mp_ring_check_drainage(txq->r, 0); 4767 taskqueue_enqueue(sc->tq[eq->tx_chan], &txq->tx_reclaim_task); 4768 } 4769 4770 static int 4771 handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss, 4772 struct mbuf *m) 4773 { 4774 const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1); 4775 unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); 4776 struct adapter *sc = iq->adapter; 4777 struct sge *s = &sc->sge; 4778 struct sge_eq *eq; 4779 static void (*h[])(struct adapter *, struct sge_eq *) = {NULL, 4780 &handle_wrq_egr_update, &handle_eth_egr_update, 4781 &handle_wrq_egr_update}; 4782 4783 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 4784 rss->opcode)); 4785 4786 eq = s->eqmap[qid - s->eq_start - s->eq_base]; 4787 (*h[eq->flags & EQ_TYPEMASK])(sc, eq); 4788 4789 return (0); 4790 } 4791 4792 /* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */ 4793 CTASSERT(offsetof(struct cpl_fw4_msg, data) == \ 4794 offsetof(struct cpl_fw6_msg, data)); 4795 4796 static int 4797 handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 4798 { 4799 struct adapter *sc = iq->adapter; 4800 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); 4801 4802 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 4803 rss->opcode)); 4804 4805 if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) { 4806 const struct rss_header *rss2; 4807 4808 rss2 = (const struct rss_header *)&cpl->data[0]; 4809 return (t4_cpl_handler[rss2->opcode](iq, rss2, m)); 4810 } 4811 4812 return (t4_fw_msg_handler[cpl->type](sc, &cpl->data[0])); 4813 } 4814 4815 /** 4816 * t4_handle_wrerr_rpl - process a FW work request error message 4817 * @adap: the adapter 4818 * @rpl: start of the FW message 4819 */ 4820 static int 4821 t4_handle_wrerr_rpl(struct adapter *adap, const __be64 *rpl) 4822 { 4823 u8 opcode = *(const u8 *)rpl; 4824 const struct fw_error_cmd *e = (const void *)rpl; 4825 unsigned int i; 4826 4827 if (opcode != FW_ERROR_CMD) { 4828 log(LOG_ERR, 4829 "%s: Received WRERR_RPL message with opcode %#x\n", 4830 device_get_nameunit(adap->dev), opcode); 4831 return (EINVAL); 4832 } 4833 log(LOG_ERR, "%s: FW_ERROR (%s) ", device_get_nameunit(adap->dev), 4834 G_FW_ERROR_CMD_FATAL(be32toh(e->op_to_type)) ? "fatal" : 4835 "non-fatal"); 4836 switch (G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))) { 4837 case FW_ERROR_TYPE_EXCEPTION: 4838 log(LOG_ERR, "exception info:\n"); 4839 for (i = 0; i < nitems(e->u.exception.info); i++) 4840 log(LOG_ERR, "%s%08x", i == 0 ? "\t" : " ", 4841 be32toh(e->u.exception.info[i])); 4842 log(LOG_ERR, "\n"); 4843 break; 4844 case FW_ERROR_TYPE_HWMODULE: 4845 log(LOG_ERR, "HW module regaddr %08x regval %08x\n", 4846 be32toh(e->u.hwmodule.regaddr), 4847 be32toh(e->u.hwmodule.regval)); 4848 break; 4849 case FW_ERROR_TYPE_WR: 4850 log(LOG_ERR, "WR cidx %d PF %d VF %d eqid %d hdr:\n", 4851 be16toh(e->u.wr.cidx), 4852 G_FW_ERROR_CMD_PFN(be16toh(e->u.wr.pfn_vfn)), 4853 G_FW_ERROR_CMD_VFN(be16toh(e->u.wr.pfn_vfn)), 4854 be32toh(e->u.wr.eqid)); 4855 for (i = 0; i < nitems(e->u.wr.wrhdr); i++) 4856 log(LOG_ERR, "%s%02x", i == 0 ? "\t" : " ", 4857 e->u.wr.wrhdr[i]); 4858 log(LOG_ERR, "\n"); 4859 break; 4860 case FW_ERROR_TYPE_ACL: 4861 log(LOG_ERR, "ACL cidx %d PF %d VF %d eqid %d %s", 4862 be16toh(e->u.acl.cidx), 4863 G_FW_ERROR_CMD_PFN(be16toh(e->u.acl.pfn_vfn)), 4864 G_FW_ERROR_CMD_VFN(be16toh(e->u.acl.pfn_vfn)), 4865 be32toh(e->u.acl.eqid), 4866 G_FW_ERROR_CMD_MV(be16toh(e->u.acl.mv_pkd)) ? "vlanid" : 4867 "MAC"); 4868 for (i = 0; i < nitems(e->u.acl.val); i++) 4869 log(LOG_ERR, " %02x", e->u.acl.val[i]); 4870 log(LOG_ERR, "\n"); 4871 break; 4872 default: 4873 log(LOG_ERR, "type %#x\n", 4874 G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))); 4875 return (EINVAL); 4876 } 4877 return (0); 4878 } 4879 4880 static int 4881 sysctl_uint16(SYSCTL_HANDLER_ARGS) 4882 { 4883 uint16_t *id = arg1; 4884 int i = *id; 4885 4886 return sysctl_handle_int(oidp, &i, 0, req); 4887 } 4888 4889 static int 4890 sysctl_bufsizes(SYSCTL_HANDLER_ARGS) 4891 { 4892 struct sge *s = arg1; 4893 struct hw_buf_info *hwb = &s->hw_buf_info[0]; 4894 struct sw_zone_info *swz = &s->sw_zone_info[0]; 4895 int i, rc; 4896 struct sbuf sb; 4897 char c; 4898 4899 sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND); 4900 for (i = 0; i < SGE_FLBUF_SIZES; i++, hwb++) { 4901 if (hwb->zidx >= 0 && swz[hwb->zidx].size <= largest_rx_cluster) 4902 c = '*'; 4903 else 4904 c = '\0'; 4905 4906 sbuf_printf(&sb, "%u%c ", hwb->size, c); 4907 } 4908 sbuf_trim(&sb); 4909 sbuf_finish(&sb); 4910 rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 4911 sbuf_delete(&sb); 4912 return (rc); 4913 } 4914 4915 static int 4916 sysctl_tc(SYSCTL_HANDLER_ARGS) 4917 { 4918 struct vi_info *vi = arg1; 4919 struct port_info *pi; 4920 struct adapter *sc; 4921 struct sge_txq *txq; 4922 struct tx_sched_class *tc; 4923 int qidx = arg2, rc, tc_idx; 4924 uint32_t fw_queue, fw_class; 4925 4926 MPASS(qidx >= 0 && qidx < vi->ntxq); 4927 pi = vi->pi; 4928 sc = pi->adapter; 4929 txq = &sc->sge.txq[vi->first_txq + qidx]; 4930 4931 tc_idx = txq->tc_idx; 4932 rc = sysctl_handle_int(oidp, &tc_idx, 0, req); 4933 if (rc != 0 || req->newptr == NULL) 4934 return (rc); 4935 4936 /* Note that -1 is legitimate input (it means unbind). */ 4937 if (tc_idx < -1 || tc_idx >= sc->chip_params->nsched_cls) 4938 return (EINVAL); 4939 4940 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4stc"); 4941 if (rc) 4942 return (rc); 4943 4944 if (tc_idx == txq->tc_idx) { 4945 rc = 0; /* No change, nothing to do. */ 4946 goto done; 4947 } 4948 4949 fw_queue = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 4950 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH) | 4951 V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id); 4952 4953 if (tc_idx == -1) 4954 fw_class = 0xffffffff; /* Unbind. */ 4955 else { 4956 /* 4957 * Bind to a different class. Ethernet txq's are only allowed 4958 * to bind to cl-rl mode-class for now. XXX: too restrictive. 4959 */ 4960 tc = &pi->tc[tc_idx]; 4961 if (tc->flags & TX_SC_OK && 4962 tc->params.level == SCHED_CLASS_LEVEL_CL_RL && 4963 tc->params.mode == SCHED_CLASS_MODE_CLASS) { 4964 /* Ok to proceed. */ 4965 fw_class = tc_idx; 4966 } else { 4967 rc = tc->flags & TX_SC_OK ? EBUSY : ENXIO; 4968 goto done; 4969 } 4970 } 4971 4972 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue, &fw_class); 4973 if (rc == 0) { 4974 if (txq->tc_idx != -1) { 4975 tc = &pi->tc[txq->tc_idx]; 4976 MPASS(tc->refcount > 0); 4977 tc->refcount--; 4978 } 4979 if (tc_idx != -1) { 4980 tc = &pi->tc[tc_idx]; 4981 tc->refcount++; 4982 } 4983 txq->tc_idx = tc_idx; 4984 } 4985 done: 4986 end_synchronized_op(sc, 0); 4987 return (rc); 4988 } 4989