1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 Chelsio Communications, Inc. 5 * All rights reserved. 6 * Written by: Navdeep Parhar <np@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 #include "opt_ratelimit.h" 36 37 #include <sys/types.h> 38 #include <sys/eventhandler.h> 39 #include <sys/mbuf.h> 40 #include <sys/socket.h> 41 #include <sys/kernel.h> 42 #include <sys/malloc.h> 43 #include <sys/queue.h> 44 #include <sys/sbuf.h> 45 #include <sys/taskqueue.h> 46 #include <sys/time.h> 47 #include <sys/sglist.h> 48 #include <sys/sysctl.h> 49 #include <sys/smp.h> 50 #include <sys/counter.h> 51 #include <net/bpf.h> 52 #include <net/ethernet.h> 53 #include <net/if.h> 54 #include <net/if_vlan_var.h> 55 #include <netinet/in.h> 56 #include <netinet/ip.h> 57 #include <netinet/ip6.h> 58 #include <netinet/tcp.h> 59 #include <netinet/udp.h> 60 #include <machine/in_cksum.h> 61 #include <machine/md_var.h> 62 #include <vm/vm.h> 63 #include <vm/pmap.h> 64 #ifdef DEV_NETMAP 65 #include <machine/bus.h> 66 #include <sys/selinfo.h> 67 #include <net/if_var.h> 68 #include <net/netmap.h> 69 #include <dev/netmap/netmap_kern.h> 70 #endif 71 72 #include "common/common.h" 73 #include "common/t4_regs.h" 74 #include "common/t4_regs_values.h" 75 #include "common/t4_msg.h" 76 #include "t4_l2t.h" 77 #include "t4_mp_ring.h" 78 79 #ifdef T4_PKT_TIMESTAMP 80 #define RX_COPY_THRESHOLD (MINCLSIZE - 8) 81 #else 82 #define RX_COPY_THRESHOLD MINCLSIZE 83 #endif 84 85 /* 86 * Ethernet frames are DMA'd at this byte offset into the freelist buffer. 87 * 0-7 are valid values. 88 */ 89 static int fl_pktshift = 2; 90 TUNABLE_INT("hw.cxgbe.fl_pktshift", &fl_pktshift); 91 92 /* 93 * Pad ethernet payload up to this boundary. 94 * -1: driver should figure out a good value. 95 * 0: disable padding. 96 * Any power of 2 from 32 to 4096 (both inclusive) is also a valid value. 97 */ 98 int fl_pad = -1; 99 TUNABLE_INT("hw.cxgbe.fl_pad", &fl_pad); 100 101 /* 102 * Status page length. 103 * -1: driver should figure out a good value. 104 * 64 or 128 are the only other valid values. 105 */ 106 static int spg_len = -1; 107 TUNABLE_INT("hw.cxgbe.spg_len", &spg_len); 108 109 /* 110 * Congestion drops. 111 * -1: no congestion feedback (not recommended). 112 * 0: backpressure the channel instead of dropping packets right away. 113 * 1: no backpressure, drop packets for the congested queue immediately. 114 */ 115 static int cong_drop = 0; 116 TUNABLE_INT("hw.cxgbe.cong_drop", &cong_drop); 117 118 /* 119 * Deliver multiple frames in the same free list buffer if they fit. 120 * -1: let the driver decide whether to enable buffer packing or not. 121 * 0: disable buffer packing. 122 * 1: enable buffer packing. 123 */ 124 static int buffer_packing = -1; 125 TUNABLE_INT("hw.cxgbe.buffer_packing", &buffer_packing); 126 127 /* 128 * Start next frame in a packed buffer at this boundary. 129 * -1: driver should figure out a good value. 130 * T4: driver will ignore this and use the same value as fl_pad above. 131 * T5: 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value. 132 */ 133 static int fl_pack = -1; 134 TUNABLE_INT("hw.cxgbe.fl_pack", &fl_pack); 135 136 /* 137 * Allow the driver to create mbuf(s) in a cluster allocated for rx. 138 * 0: never; always allocate mbufs from the zone_mbuf UMA zone. 139 * 1: ok to create mbuf(s) within a cluster if there is room. 140 */ 141 static int allow_mbufs_in_cluster = 1; 142 TUNABLE_INT("hw.cxgbe.allow_mbufs_in_cluster", &allow_mbufs_in_cluster); 143 144 /* 145 * Largest rx cluster size that the driver is allowed to allocate. 146 */ 147 static int largest_rx_cluster = MJUM16BYTES; 148 TUNABLE_INT("hw.cxgbe.largest_rx_cluster", &largest_rx_cluster); 149 150 /* 151 * Size of cluster allocation that's most likely to succeed. The driver will 152 * fall back to this size if it fails to allocate clusters larger than this. 153 */ 154 static int safest_rx_cluster = PAGE_SIZE; 155 TUNABLE_INT("hw.cxgbe.safest_rx_cluster", &safest_rx_cluster); 156 157 #ifdef RATELIMIT 158 /* 159 * Knob to control TCP timestamp rewriting, and the granularity of the tick used 160 * for rewriting. -1 and 0-3 are all valid values. 161 * -1: hardware should leave the TCP timestamps alone. 162 * 0: 1ms 163 * 1: 100us 164 * 2: 10us 165 * 3: 1us 166 */ 167 static int tsclk = -1; 168 TUNABLE_INT("hw.cxgbe.tsclk", &tsclk); 169 170 static int eo_max_backlog = 1024 * 1024; 171 TUNABLE_INT("hw.cxgbe.eo_max_backlog", &eo_max_backlog); 172 #endif 173 174 /* 175 * The interrupt holdoff timers are multiplied by this value on T6+. 176 * 1 and 3-17 (both inclusive) are legal values. 177 */ 178 static int tscale = 1; 179 TUNABLE_INT("hw.cxgbe.tscale", &tscale); 180 181 /* 182 * Number of LRO entries in the lro_ctrl structure per rx queue. 183 */ 184 static int lro_entries = TCP_LRO_ENTRIES; 185 TUNABLE_INT("hw.cxgbe.lro_entries", &lro_entries); 186 187 /* 188 * This enables presorting of frames before they're fed into tcp_lro_rx. 189 */ 190 static int lro_mbufs = 0; 191 TUNABLE_INT("hw.cxgbe.lro_mbufs", &lro_mbufs); 192 193 struct txpkts { 194 u_int wr_type; /* type 0 or type 1 */ 195 u_int npkt; /* # of packets in this work request */ 196 u_int plen; /* total payload (sum of all packets) */ 197 u_int len16; /* # of 16B pieces used by this work request */ 198 }; 199 200 /* A packet's SGL. This + m_pkthdr has all info needed for tx */ 201 struct sgl { 202 struct sglist sg; 203 struct sglist_seg seg[TX_SGL_SEGS]; 204 }; 205 206 static int service_iq(struct sge_iq *, int); 207 static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t); 208 static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *); 209 static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int); 210 static inline void init_fl(struct adapter *, struct sge_fl *, int, int, char *); 211 static inline void init_eq(struct adapter *, struct sge_eq *, int, int, uint8_t, 212 uint16_t, char *); 213 static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, 214 bus_addr_t *, void **); 215 static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, 216 void *); 217 static int alloc_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *, 218 int, int); 219 static int free_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *); 220 static void add_iq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 221 struct sge_iq *); 222 static void add_fl_sysctls(struct adapter *, struct sysctl_ctx_list *, 223 struct sysctl_oid *, struct sge_fl *); 224 static int alloc_fwq(struct adapter *); 225 static int free_fwq(struct adapter *); 226 static int alloc_mgmtq(struct adapter *); 227 static int free_mgmtq(struct adapter *); 228 static int alloc_rxq(struct vi_info *, struct sge_rxq *, int, int, 229 struct sysctl_oid *); 230 static int free_rxq(struct vi_info *, struct sge_rxq *); 231 #ifdef TCP_OFFLOAD 232 static int alloc_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *, int, int, 233 struct sysctl_oid *); 234 static int free_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *); 235 #endif 236 #ifdef DEV_NETMAP 237 static int alloc_nm_rxq(struct vi_info *, struct sge_nm_rxq *, int, int, 238 struct sysctl_oid *); 239 static int free_nm_rxq(struct vi_info *, struct sge_nm_rxq *); 240 static int alloc_nm_txq(struct vi_info *, struct sge_nm_txq *, int, int, 241 struct sysctl_oid *); 242 static int free_nm_txq(struct vi_info *, struct sge_nm_txq *); 243 #endif 244 static int ctrl_eq_alloc(struct adapter *, struct sge_eq *); 245 static int eth_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); 246 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 247 static int ofld_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); 248 #endif 249 static int alloc_eq(struct adapter *, struct vi_info *, struct sge_eq *); 250 static int free_eq(struct adapter *, struct sge_eq *); 251 static int alloc_wrq(struct adapter *, struct vi_info *, struct sge_wrq *, 252 struct sysctl_oid *); 253 static int free_wrq(struct adapter *, struct sge_wrq *); 254 static int alloc_txq(struct vi_info *, struct sge_txq *, int, 255 struct sysctl_oid *); 256 static int free_txq(struct vi_info *, struct sge_txq *); 257 static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); 258 static inline void ring_fl_db(struct adapter *, struct sge_fl *); 259 static int refill_fl(struct adapter *, struct sge_fl *, int); 260 static void refill_sfl(void *); 261 static int alloc_fl_sdesc(struct sge_fl *); 262 static void free_fl_sdesc(struct adapter *, struct sge_fl *); 263 static void find_best_refill_source(struct adapter *, struct sge_fl *, int); 264 static void find_safe_refill_source(struct adapter *, struct sge_fl *); 265 static void add_fl_to_sfl(struct adapter *, struct sge_fl *); 266 267 static inline void get_pkt_gl(struct mbuf *, struct sglist *); 268 static inline u_int txpkt_len16(u_int, u_int); 269 static inline u_int txpkt_vm_len16(u_int, u_int); 270 static inline u_int txpkts0_len16(u_int); 271 static inline u_int txpkts1_len16(void); 272 static u_int write_txpkt_wr(struct sge_txq *, struct fw_eth_tx_pkt_wr *, 273 struct mbuf *, u_int); 274 static u_int write_txpkt_vm_wr(struct adapter *, struct sge_txq *, 275 struct fw_eth_tx_pkt_vm_wr *, struct mbuf *, u_int); 276 static int try_txpkts(struct mbuf *, struct mbuf *, struct txpkts *, u_int); 277 static int add_to_txpkts(struct mbuf *, struct txpkts *, u_int); 278 static u_int write_txpkts_wr(struct sge_txq *, struct fw_eth_tx_pkts_wr *, 279 struct mbuf *, const struct txpkts *, u_int); 280 static void write_gl_to_txd(struct sge_txq *, struct mbuf *, caddr_t *, int); 281 static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int); 282 static inline void ring_eq_db(struct adapter *, struct sge_eq *, u_int); 283 static inline uint16_t read_hw_cidx(struct sge_eq *); 284 static inline u_int reclaimable_tx_desc(struct sge_eq *); 285 static inline u_int total_available_tx_desc(struct sge_eq *); 286 static u_int reclaim_tx_descs(struct sge_txq *, u_int); 287 static void tx_reclaim(void *, int); 288 static __be64 get_flit(struct sglist_seg *, int, int); 289 static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *, 290 struct mbuf *); 291 static int handle_fw_msg(struct sge_iq *, const struct rss_header *, 292 struct mbuf *); 293 static int t4_handle_wrerr_rpl(struct adapter *, const __be64 *); 294 static void wrq_tx_drain(void *, int); 295 static void drain_wrq_wr_list(struct adapter *, struct sge_wrq *); 296 297 static int sysctl_uint16(SYSCTL_HANDLER_ARGS); 298 static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS); 299 #ifdef RATELIMIT 300 static inline u_int txpkt_eo_len16(u_int, u_int, u_int); 301 static int ethofld_fw4_ack(struct sge_iq *, const struct rss_header *, 302 struct mbuf *); 303 #endif 304 305 static counter_u64_t extfree_refs; 306 static counter_u64_t extfree_rels; 307 308 an_handler_t t4_an_handler; 309 fw_msg_handler_t t4_fw_msg_handler[NUM_FW6_TYPES]; 310 cpl_handler_t t4_cpl_handler[NUM_CPL_CMDS]; 311 cpl_handler_t set_tcb_rpl_handlers[NUM_CPL_COOKIES]; 312 cpl_handler_t l2t_write_rpl_handlers[NUM_CPL_COOKIES]; 313 cpl_handler_t act_open_rpl_handlers[NUM_CPL_COOKIES]; 314 cpl_handler_t abort_rpl_rss_handlers[NUM_CPL_COOKIES]; 315 cpl_handler_t fw4_ack_handlers[NUM_CPL_COOKIES]; 316 317 void 318 t4_register_an_handler(an_handler_t h) 319 { 320 uintptr_t *loc; 321 322 MPASS(h == NULL || t4_an_handler == NULL); 323 324 loc = (uintptr_t *)&t4_an_handler; 325 atomic_store_rel_ptr(loc, (uintptr_t)h); 326 } 327 328 void 329 t4_register_fw_msg_handler(int type, fw_msg_handler_t h) 330 { 331 uintptr_t *loc; 332 333 MPASS(type < nitems(t4_fw_msg_handler)); 334 MPASS(h == NULL || t4_fw_msg_handler[type] == NULL); 335 /* 336 * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL 337 * handler dispatch table. Reject any attempt to install a handler for 338 * this subtype. 339 */ 340 MPASS(type != FW_TYPE_RSSCPL); 341 MPASS(type != FW6_TYPE_RSSCPL); 342 343 loc = (uintptr_t *)&t4_fw_msg_handler[type]; 344 atomic_store_rel_ptr(loc, (uintptr_t)h); 345 } 346 347 void 348 t4_register_cpl_handler(int opcode, cpl_handler_t h) 349 { 350 uintptr_t *loc; 351 352 MPASS(opcode < nitems(t4_cpl_handler)); 353 MPASS(h == NULL || t4_cpl_handler[opcode] == NULL); 354 355 loc = (uintptr_t *)&t4_cpl_handler[opcode]; 356 atomic_store_rel_ptr(loc, (uintptr_t)h); 357 } 358 359 static int 360 set_tcb_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 361 struct mbuf *m) 362 { 363 const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1); 364 u_int tid; 365 int cookie; 366 367 MPASS(m == NULL); 368 369 tid = GET_TID(cpl); 370 if (is_ftid(iq->adapter, tid)) { 371 /* 372 * The return code for filter-write is put in the CPL cookie so 373 * we have to rely on the hardware tid (is_ftid) to determine 374 * that this is a response to a filter. 375 */ 376 cookie = CPL_COOKIE_FILTER; 377 } else { 378 cookie = G_COOKIE(cpl->cookie); 379 } 380 MPASS(cookie > CPL_COOKIE_RESERVED); 381 MPASS(cookie < nitems(set_tcb_rpl_handlers)); 382 383 return (set_tcb_rpl_handlers[cookie](iq, rss, m)); 384 } 385 386 static int 387 l2t_write_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 388 struct mbuf *m) 389 { 390 const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); 391 unsigned int cookie; 392 393 MPASS(m == NULL); 394 395 cookie = GET_TID(rpl) & F_SYNC_WR ? CPL_COOKIE_TOM : CPL_COOKIE_FILTER; 396 return (l2t_write_rpl_handlers[cookie](iq, rss, m)); 397 } 398 399 static int 400 act_open_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 401 struct mbuf *m) 402 { 403 const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1); 404 u_int cookie = G_TID_COOKIE(G_AOPEN_ATID(be32toh(cpl->atid_status))); 405 406 MPASS(m == NULL); 407 MPASS(cookie != CPL_COOKIE_RESERVED); 408 409 return (act_open_rpl_handlers[cookie](iq, rss, m)); 410 } 411 412 static int 413 abort_rpl_rss_handler(struct sge_iq *iq, const struct rss_header *rss, 414 struct mbuf *m) 415 { 416 struct adapter *sc = iq->adapter; 417 u_int cookie; 418 419 MPASS(m == NULL); 420 if (is_hashfilter(sc)) 421 cookie = CPL_COOKIE_HASHFILTER; 422 else 423 cookie = CPL_COOKIE_TOM; 424 425 return (abort_rpl_rss_handlers[cookie](iq, rss, m)); 426 } 427 428 static int 429 fw4_ack_handler(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 430 { 431 struct adapter *sc = iq->adapter; 432 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 433 unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 434 u_int cookie; 435 436 MPASS(m == NULL); 437 if (is_etid(sc, tid)) 438 cookie = CPL_COOKIE_ETHOFLD; 439 else 440 cookie = CPL_COOKIE_TOM; 441 442 return (fw4_ack_handlers[cookie](iq, rss, m)); 443 } 444 445 static void 446 t4_init_shared_cpl_handlers(void) 447 { 448 449 t4_register_cpl_handler(CPL_SET_TCB_RPL, set_tcb_rpl_handler); 450 t4_register_cpl_handler(CPL_L2T_WRITE_RPL, l2t_write_rpl_handler); 451 t4_register_cpl_handler(CPL_ACT_OPEN_RPL, act_open_rpl_handler); 452 t4_register_cpl_handler(CPL_ABORT_RPL_RSS, abort_rpl_rss_handler); 453 t4_register_cpl_handler(CPL_FW4_ACK, fw4_ack_handler); 454 } 455 456 void 457 t4_register_shared_cpl_handler(int opcode, cpl_handler_t h, int cookie) 458 { 459 uintptr_t *loc; 460 461 MPASS(opcode < nitems(t4_cpl_handler)); 462 MPASS(cookie > CPL_COOKIE_RESERVED); 463 MPASS(cookie < NUM_CPL_COOKIES); 464 MPASS(t4_cpl_handler[opcode] != NULL); 465 466 switch (opcode) { 467 case CPL_SET_TCB_RPL: 468 loc = (uintptr_t *)&set_tcb_rpl_handlers[cookie]; 469 break; 470 case CPL_L2T_WRITE_RPL: 471 loc = (uintptr_t *)&l2t_write_rpl_handlers[cookie]; 472 break; 473 case CPL_ACT_OPEN_RPL: 474 loc = (uintptr_t *)&act_open_rpl_handlers[cookie]; 475 break; 476 case CPL_ABORT_RPL_RSS: 477 loc = (uintptr_t *)&abort_rpl_rss_handlers[cookie]; 478 break; 479 case CPL_FW4_ACK: 480 loc = (uintptr_t *)&fw4_ack_handlers[cookie]; 481 break; 482 default: 483 MPASS(0); 484 return; 485 } 486 MPASS(h == NULL || *loc == (uintptr_t)NULL); 487 atomic_store_rel_ptr(loc, (uintptr_t)h); 488 } 489 490 /* 491 * Called on MOD_LOAD. Validates and calculates the SGE tunables. 492 */ 493 void 494 t4_sge_modload(void) 495 { 496 497 if (fl_pktshift < 0 || fl_pktshift > 7) { 498 printf("Invalid hw.cxgbe.fl_pktshift value (%d)," 499 " using 2 instead.\n", fl_pktshift); 500 fl_pktshift = 2; 501 } 502 503 if (spg_len != 64 && spg_len != 128) { 504 int len; 505 506 #if defined(__i386__) || defined(__amd64__) 507 len = cpu_clflush_line_size > 64 ? 128 : 64; 508 #else 509 len = 64; 510 #endif 511 if (spg_len != -1) { 512 printf("Invalid hw.cxgbe.spg_len value (%d)," 513 " using %d instead.\n", spg_len, len); 514 } 515 spg_len = len; 516 } 517 518 if (cong_drop < -1 || cong_drop > 1) { 519 printf("Invalid hw.cxgbe.cong_drop value (%d)," 520 " using 0 instead.\n", cong_drop); 521 cong_drop = 0; 522 } 523 524 if (tscale != 1 && (tscale < 3 || tscale > 17)) { 525 printf("Invalid hw.cxgbe.tscale value (%d)," 526 " using 1 instead.\n", tscale); 527 tscale = 1; 528 } 529 530 extfree_refs = counter_u64_alloc(M_WAITOK); 531 extfree_rels = counter_u64_alloc(M_WAITOK); 532 counter_u64_zero(extfree_refs); 533 counter_u64_zero(extfree_rels); 534 535 t4_init_shared_cpl_handlers(); 536 t4_register_cpl_handler(CPL_FW4_MSG, handle_fw_msg); 537 t4_register_cpl_handler(CPL_FW6_MSG, handle_fw_msg); 538 t4_register_cpl_handler(CPL_SGE_EGR_UPDATE, handle_sge_egr_update); 539 t4_register_cpl_handler(CPL_RX_PKT, t4_eth_rx); 540 #ifdef RATELIMIT 541 t4_register_shared_cpl_handler(CPL_FW4_ACK, ethofld_fw4_ack, 542 CPL_COOKIE_ETHOFLD); 543 #endif 544 t4_register_fw_msg_handler(FW6_TYPE_CMD_RPL, t4_handle_fw_rpl); 545 t4_register_fw_msg_handler(FW6_TYPE_WRERR_RPL, t4_handle_wrerr_rpl); 546 } 547 548 void 549 t4_sge_modunload(void) 550 { 551 552 counter_u64_free(extfree_refs); 553 counter_u64_free(extfree_rels); 554 } 555 556 uint64_t 557 t4_sge_extfree_refs(void) 558 { 559 uint64_t refs, rels; 560 561 rels = counter_u64_fetch(extfree_rels); 562 refs = counter_u64_fetch(extfree_refs); 563 564 return (refs - rels); 565 } 566 567 static inline void 568 setup_pad_and_pack_boundaries(struct adapter *sc) 569 { 570 uint32_t v, m; 571 int pad, pack, pad_shift; 572 573 pad_shift = chip_id(sc) > CHELSIO_T5 ? X_T6_INGPADBOUNDARY_SHIFT : 574 X_INGPADBOUNDARY_SHIFT; 575 pad = fl_pad; 576 if (fl_pad < (1 << pad_shift) || 577 fl_pad > (1 << (pad_shift + M_INGPADBOUNDARY)) || 578 !powerof2(fl_pad)) { 579 /* 580 * If there is any chance that we might use buffer packing and 581 * the chip is a T4, then pick 64 as the pad/pack boundary. Set 582 * it to the minimum allowed in all other cases. 583 */ 584 pad = is_t4(sc) && buffer_packing ? 64 : 1 << pad_shift; 585 586 /* 587 * For fl_pad = 0 we'll still write a reasonable value to the 588 * register but all the freelists will opt out of padding. 589 * We'll complain here only if the user tried to set it to a 590 * value greater than 0 that was invalid. 591 */ 592 if (fl_pad > 0) { 593 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pad value" 594 " (%d), using %d instead.\n", fl_pad, pad); 595 } 596 } 597 m = V_INGPADBOUNDARY(M_INGPADBOUNDARY); 598 v = V_INGPADBOUNDARY(ilog2(pad) - pad_shift); 599 t4_set_reg_field(sc, A_SGE_CONTROL, m, v); 600 601 if (is_t4(sc)) { 602 if (fl_pack != -1 && fl_pack != pad) { 603 /* Complain but carry on. */ 604 device_printf(sc->dev, "hw.cxgbe.fl_pack (%d) ignored," 605 " using %d instead.\n", fl_pack, pad); 606 } 607 return; 608 } 609 610 pack = fl_pack; 611 if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 || 612 !powerof2(fl_pack)) { 613 pack = max(sc->params.pci.mps, CACHE_LINE_SIZE); 614 MPASS(powerof2(pack)); 615 if (pack < 16) 616 pack = 16; 617 if (pack == 32) 618 pack = 64; 619 if (pack > 4096) 620 pack = 4096; 621 if (fl_pack != -1) { 622 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pack value" 623 " (%d), using %d instead.\n", fl_pack, pack); 624 } 625 } 626 m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); 627 if (pack == 16) 628 v = V_INGPACKBOUNDARY(0); 629 else 630 v = V_INGPACKBOUNDARY(ilog2(pack) - 5); 631 632 MPASS(!is_t4(sc)); /* T4 doesn't have SGE_CONTROL2 */ 633 t4_set_reg_field(sc, A_SGE_CONTROL2, m, v); 634 } 635 636 /* 637 * adap->params.vpd.cclk must be set up before this is called. 638 */ 639 void 640 t4_tweak_chip_settings(struct adapter *sc) 641 { 642 int i; 643 uint32_t v, m; 644 int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200}; 645 int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk; 646 int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ 647 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 648 static int sge_flbuf_sizes[] = { 649 MCLBYTES, 650 #if MJUMPAGESIZE != MCLBYTES 651 MJUMPAGESIZE, 652 MJUMPAGESIZE - CL_METADATA_SIZE, 653 MJUMPAGESIZE - 2 * MSIZE - CL_METADATA_SIZE, 654 #endif 655 MJUM9BYTES, 656 MJUM16BYTES, 657 MCLBYTES - MSIZE - CL_METADATA_SIZE, 658 MJUM9BYTES - CL_METADATA_SIZE, 659 MJUM16BYTES - CL_METADATA_SIZE, 660 }; 661 662 KASSERT(sc->flags & MASTER_PF, 663 ("%s: trying to change chip settings when not master.", __func__)); 664 665 m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE; 666 v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE | 667 V_EGRSTATUSPAGESIZE(spg_len == 128); 668 t4_set_reg_field(sc, A_SGE_CONTROL, m, v); 669 670 setup_pad_and_pack_boundaries(sc); 671 672 v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | 673 V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | 674 V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) | 675 V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) | 676 V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) | 677 V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) | 678 V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) | 679 V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10); 680 t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v); 681 682 KASSERT(nitems(sge_flbuf_sizes) <= SGE_FLBUF_SIZES, 683 ("%s: hw buffer size table too big", __func__)); 684 for (i = 0; i < min(nitems(sge_flbuf_sizes), SGE_FLBUF_SIZES); i++) { 685 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i), 686 sge_flbuf_sizes[i]); 687 } 688 689 v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) | 690 V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]); 691 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v); 692 693 KASSERT(intr_timer[0] <= timer_max, 694 ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0], 695 timer_max)); 696 for (i = 1; i < nitems(intr_timer); i++) { 697 KASSERT(intr_timer[i] >= intr_timer[i - 1], 698 ("%s: timers not listed in increasing order (%d)", 699 __func__, i)); 700 701 while (intr_timer[i] > timer_max) { 702 if (i == nitems(intr_timer) - 1) { 703 intr_timer[i] = timer_max; 704 break; 705 } 706 intr_timer[i] += intr_timer[i - 1]; 707 intr_timer[i] /= 2; 708 } 709 } 710 711 v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) | 712 V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1])); 713 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v); 714 v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) | 715 V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3])); 716 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v); 717 v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) | 718 V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5])); 719 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v); 720 721 if (chip_id(sc) >= CHELSIO_T6) { 722 m = V_TSCALE(M_TSCALE); 723 if (tscale == 1) 724 v = 0; 725 else 726 v = V_TSCALE(tscale - 2); 727 t4_set_reg_field(sc, A_SGE_ITP_CONTROL, m, v); 728 729 if (sc->debug_flags & DF_DISABLE_TCB_CACHE) { 730 m = V_RDTHRESHOLD(M_RDTHRESHOLD) | F_WRTHRTHRESHEN | 731 V_WRTHRTHRESH(M_WRTHRTHRESH); 732 t4_tp_pio_read(sc, &v, 1, A_TP_CMM_CONFIG, 1); 733 v &= ~m; 734 v |= V_RDTHRESHOLD(1) | F_WRTHRTHRESHEN | 735 V_WRTHRTHRESH(16); 736 t4_tp_pio_write(sc, &v, 1, A_TP_CMM_CONFIG, 1); 737 } 738 } 739 740 /* 4K, 16K, 64K, 256K DDP "page sizes" for TDDP */ 741 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 742 t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v); 743 744 /* 745 * 4K, 8K, 16K, 64K DDP "page sizes" for iSCSI DDP. These have been 746 * chosen with MAXPHYS = 128K in mind. The largest DDP buffer that we 747 * may have to deal with is MAXPHYS + 1 page. 748 */ 749 v = V_HPZ0(0) | V_HPZ1(1) | V_HPZ2(2) | V_HPZ3(4); 750 t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, v); 751 752 /* We use multiple DDP page sizes both in plain-TOE and ISCSI modes. */ 753 m = v = F_TDDPTAGTCB | F_ISCSITAGTCB; 754 t4_set_reg_field(sc, A_ULP_RX_CTL, m, v); 755 756 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 757 F_RESETDDPOFFSET; 758 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 759 t4_set_reg_field(sc, A_TP_PARA_REG5, m, v); 760 } 761 762 /* 763 * SGE wants the buffer to be at least 64B and then a multiple of 16. If 764 * padding is in use, the buffer's start and end need to be aligned to the pad 765 * boundary as well. We'll just make sure that the size is a multiple of the 766 * boundary here, it is up to the buffer allocation code to make sure the start 767 * of the buffer is aligned as well. 768 */ 769 static inline int 770 hwsz_ok(struct adapter *sc, int hwsz) 771 { 772 int mask = fl_pad ? sc->params.sge.pad_boundary - 1 : 16 - 1; 773 774 return (hwsz >= 64 && (hwsz & mask) == 0); 775 } 776 777 /* 778 * XXX: driver really should be able to deal with unexpected settings. 779 */ 780 int 781 t4_read_chip_settings(struct adapter *sc) 782 { 783 struct sge *s = &sc->sge; 784 struct sge_params *sp = &sc->params.sge; 785 int i, j, n, rc = 0; 786 uint32_t m, v, r; 787 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 788 static int sw_buf_sizes[] = { /* Sorted by size */ 789 MCLBYTES, 790 #if MJUMPAGESIZE != MCLBYTES 791 MJUMPAGESIZE, 792 #endif 793 MJUM9BYTES, 794 MJUM16BYTES 795 }; 796 struct sw_zone_info *swz, *safe_swz; 797 struct hw_buf_info *hwb; 798 799 m = F_RXPKTCPLMODE; 800 v = F_RXPKTCPLMODE; 801 r = sc->params.sge.sge_control; 802 if ((r & m) != v) { 803 device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r); 804 rc = EINVAL; 805 } 806 807 /* 808 * If this changes then every single use of PAGE_SHIFT in the driver 809 * needs to be carefully reviewed for PAGE_SHIFT vs sp->page_shift. 810 */ 811 if (sp->page_shift != PAGE_SHIFT) { 812 device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r); 813 rc = EINVAL; 814 } 815 816 /* Filter out unusable hw buffer sizes entirely (mark with -2). */ 817 hwb = &s->hw_buf_info[0]; 818 for (i = 0; i < nitems(s->hw_buf_info); i++, hwb++) { 819 r = sc->params.sge.sge_fl_buffer_size[i]; 820 hwb->size = r; 821 hwb->zidx = hwsz_ok(sc, r) ? -1 : -2; 822 hwb->next = -1; 823 } 824 825 /* 826 * Create a sorted list in decreasing order of hw buffer sizes (and so 827 * increasing order of spare area) for each software zone. 828 * 829 * If padding is enabled then the start and end of the buffer must align 830 * to the pad boundary; if packing is enabled then they must align with 831 * the pack boundary as well. Allocations from the cluster zones are 832 * aligned to min(size, 4K), so the buffer starts at that alignment and 833 * ends at hwb->size alignment. If mbuf inlining is allowed the 834 * starting alignment will be reduced to MSIZE and the driver will 835 * exercise appropriate caution when deciding on the best buffer layout 836 * to use. 837 */ 838 n = 0; /* no usable buffer size to begin with */ 839 swz = &s->sw_zone_info[0]; 840 safe_swz = NULL; 841 for (i = 0; i < SW_ZONE_SIZES; i++, swz++) { 842 int8_t head = -1, tail = -1; 843 844 swz->size = sw_buf_sizes[i]; 845 swz->zone = m_getzone(swz->size); 846 swz->type = m_gettype(swz->size); 847 848 if (swz->size < PAGE_SIZE) { 849 MPASS(powerof2(swz->size)); 850 if (fl_pad && (swz->size % sp->pad_boundary != 0)) 851 continue; 852 } 853 854 if (swz->size == safest_rx_cluster) 855 safe_swz = swz; 856 857 hwb = &s->hw_buf_info[0]; 858 for (j = 0; j < SGE_FLBUF_SIZES; j++, hwb++) { 859 if (hwb->zidx != -1 || hwb->size > swz->size) 860 continue; 861 #ifdef INVARIANTS 862 if (fl_pad) 863 MPASS(hwb->size % sp->pad_boundary == 0); 864 #endif 865 hwb->zidx = i; 866 if (head == -1) 867 head = tail = j; 868 else if (hwb->size < s->hw_buf_info[tail].size) { 869 s->hw_buf_info[tail].next = j; 870 tail = j; 871 } else { 872 int8_t *cur; 873 struct hw_buf_info *t; 874 875 for (cur = &head; *cur != -1; cur = &t->next) { 876 t = &s->hw_buf_info[*cur]; 877 if (hwb->size == t->size) { 878 hwb->zidx = -2; 879 break; 880 } 881 if (hwb->size > t->size) { 882 hwb->next = *cur; 883 *cur = j; 884 break; 885 } 886 } 887 } 888 } 889 swz->head_hwidx = head; 890 swz->tail_hwidx = tail; 891 892 if (tail != -1) { 893 n++; 894 if (swz->size - s->hw_buf_info[tail].size >= 895 CL_METADATA_SIZE) 896 sc->flags |= BUF_PACKING_OK; 897 } 898 } 899 if (n == 0) { 900 device_printf(sc->dev, "no usable SGE FL buffer size.\n"); 901 rc = EINVAL; 902 } 903 904 s->safe_hwidx1 = -1; 905 s->safe_hwidx2 = -1; 906 if (safe_swz != NULL) { 907 s->safe_hwidx1 = safe_swz->head_hwidx; 908 for (i = safe_swz->head_hwidx; i != -1; i = hwb->next) { 909 int spare; 910 911 hwb = &s->hw_buf_info[i]; 912 #ifdef INVARIANTS 913 if (fl_pad) 914 MPASS(hwb->size % sp->pad_boundary == 0); 915 #endif 916 spare = safe_swz->size - hwb->size; 917 if (spare >= CL_METADATA_SIZE) { 918 s->safe_hwidx2 = i; 919 break; 920 } 921 } 922 } 923 924 if (sc->flags & IS_VF) 925 return (0); 926 927 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 928 r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ); 929 if (r != v) { 930 device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r); 931 rc = EINVAL; 932 } 933 934 m = v = F_TDDPTAGTCB; 935 r = t4_read_reg(sc, A_ULP_RX_CTL); 936 if ((r & m) != v) { 937 device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r); 938 rc = EINVAL; 939 } 940 941 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 942 F_RESETDDPOFFSET; 943 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 944 r = t4_read_reg(sc, A_TP_PARA_REG5); 945 if ((r & m) != v) { 946 device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r); 947 rc = EINVAL; 948 } 949 950 t4_init_tp_params(sc, 1); 951 952 t4_read_mtu_tbl(sc, sc->params.mtus, NULL); 953 t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd); 954 955 return (rc); 956 } 957 958 int 959 t4_create_dma_tag(struct adapter *sc) 960 { 961 int rc; 962 963 rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, 964 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 965 BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, 966 NULL, &sc->dmat); 967 if (rc != 0) { 968 device_printf(sc->dev, 969 "failed to create main DMA tag: %d\n", rc); 970 } 971 972 return (rc); 973 } 974 975 void 976 t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 977 struct sysctl_oid_list *children) 978 { 979 struct sge_params *sp = &sc->params.sge; 980 981 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes", 982 CTLTYPE_STRING | CTLFLAG_RD, &sc->sge, 0, sysctl_bufsizes, "A", 983 "freelist buffer sizes"); 984 985 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD, 986 NULL, sp->fl_pktshift, "payload DMA offset in rx buffer (bytes)"); 987 988 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD, 989 NULL, sp->pad_boundary, "payload pad boundary (bytes)"); 990 991 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD, 992 NULL, sp->spg_len, "status page size (bytes)"); 993 994 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD, 995 NULL, cong_drop, "congestion drop setting"); 996 997 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD, 998 NULL, sp->pack_boundary, "payload pack boundary (bytes)"); 999 } 1000 1001 int 1002 t4_destroy_dma_tag(struct adapter *sc) 1003 { 1004 if (sc->dmat) 1005 bus_dma_tag_destroy(sc->dmat); 1006 1007 return (0); 1008 } 1009 1010 /* 1011 * Allocate and initialize the firmware event queue and the management queue. 1012 * 1013 * Returns errno on failure. Resources allocated up to that point may still be 1014 * allocated. Caller is responsible for cleanup in case this function fails. 1015 */ 1016 int 1017 t4_setup_adapter_queues(struct adapter *sc) 1018 { 1019 int rc; 1020 1021 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 1022 1023 sysctl_ctx_init(&sc->ctx); 1024 sc->flags |= ADAP_SYSCTL_CTX; 1025 1026 /* 1027 * Firmware event queue 1028 */ 1029 rc = alloc_fwq(sc); 1030 if (rc != 0) 1031 return (rc); 1032 1033 /* 1034 * Management queue. This is just a control queue that uses the fwq as 1035 * its associated iq. 1036 */ 1037 if (!(sc->flags & IS_VF)) 1038 rc = alloc_mgmtq(sc); 1039 1040 return (rc); 1041 } 1042 1043 /* 1044 * Idempotent 1045 */ 1046 int 1047 t4_teardown_adapter_queues(struct adapter *sc) 1048 { 1049 1050 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 1051 1052 /* Do this before freeing the queue */ 1053 if (sc->flags & ADAP_SYSCTL_CTX) { 1054 sysctl_ctx_free(&sc->ctx); 1055 sc->flags &= ~ADAP_SYSCTL_CTX; 1056 } 1057 1058 free_mgmtq(sc); 1059 free_fwq(sc); 1060 1061 return (0); 1062 } 1063 1064 /* Maximum payload that can be delivered with a single iq descriptor */ 1065 static inline int 1066 mtu_to_max_payload(struct adapter *sc, int mtu, const int toe) 1067 { 1068 int payload; 1069 1070 #ifdef TCP_OFFLOAD 1071 if (toe) { 1072 int rxcs = G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2)); 1073 1074 /* Note that COP can set rx_coalesce on/off per connection. */ 1075 payload = max(mtu, rxcs); 1076 } else { 1077 #endif 1078 /* large enough even when hw VLAN extraction is disabled */ 1079 payload = sc->params.sge.fl_pktshift + ETHER_HDR_LEN + 1080 ETHER_VLAN_ENCAP_LEN + mtu; 1081 #ifdef TCP_OFFLOAD 1082 } 1083 #endif 1084 1085 return (payload); 1086 } 1087 1088 int 1089 t4_setup_vi_queues(struct vi_info *vi) 1090 { 1091 int rc = 0, i, intr_idx, iqidx; 1092 struct sge_rxq *rxq; 1093 struct sge_txq *txq; 1094 struct sge_wrq *ctrlq; 1095 #ifdef TCP_OFFLOAD 1096 struct sge_ofld_rxq *ofld_rxq; 1097 #endif 1098 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1099 struct sge_wrq *ofld_txq; 1100 #endif 1101 #ifdef DEV_NETMAP 1102 int saved_idx; 1103 struct sge_nm_rxq *nm_rxq; 1104 struct sge_nm_txq *nm_txq; 1105 #endif 1106 char name[16]; 1107 struct port_info *pi = vi->pi; 1108 struct adapter *sc = pi->adapter; 1109 struct ifnet *ifp = vi->ifp; 1110 struct sysctl_oid *oid = device_get_sysctl_tree(vi->dev); 1111 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 1112 int maxp, mtu = ifp->if_mtu; 1113 1114 /* Interrupt vector to start from (when using multiple vectors) */ 1115 intr_idx = vi->first_intr; 1116 1117 #ifdef DEV_NETMAP 1118 saved_idx = intr_idx; 1119 if (ifp->if_capabilities & IFCAP_NETMAP) { 1120 1121 /* netmap is supported with direct interrupts only. */ 1122 MPASS(!forwarding_intr_to_fwq(sc)); 1123 1124 /* 1125 * We don't have buffers to back the netmap rx queues 1126 * right now so we create the queues in a way that 1127 * doesn't set off any congestion signal in the chip. 1128 */ 1129 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_rxq", 1130 CTLFLAG_RD, NULL, "rx queues"); 1131 for_each_nm_rxq(vi, i, nm_rxq) { 1132 rc = alloc_nm_rxq(vi, nm_rxq, intr_idx, i, oid); 1133 if (rc != 0) 1134 goto done; 1135 intr_idx++; 1136 } 1137 1138 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_txq", 1139 CTLFLAG_RD, NULL, "tx queues"); 1140 for_each_nm_txq(vi, i, nm_txq) { 1141 iqidx = vi->first_nm_rxq + (i % vi->nnmrxq); 1142 rc = alloc_nm_txq(vi, nm_txq, iqidx, i, oid); 1143 if (rc != 0) 1144 goto done; 1145 } 1146 } 1147 1148 /* Normal rx queues and netmap rx queues share the same interrupts. */ 1149 intr_idx = saved_idx; 1150 #endif 1151 1152 /* 1153 * Allocate rx queues first because a default iqid is required when 1154 * creating a tx queue. 1155 */ 1156 maxp = mtu_to_max_payload(sc, mtu, 0); 1157 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq", 1158 CTLFLAG_RD, NULL, "rx queues"); 1159 for_each_rxq(vi, i, rxq) { 1160 1161 init_iq(&rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, vi->qsize_rxq); 1162 1163 snprintf(name, sizeof(name), "%s rxq%d-fl", 1164 device_get_nameunit(vi->dev), i); 1165 init_fl(sc, &rxq->fl, vi->qsize_rxq / 8, maxp, name); 1166 1167 rc = alloc_rxq(vi, rxq, 1168 forwarding_intr_to_fwq(sc) ? -1 : intr_idx, i, oid); 1169 if (rc != 0) 1170 goto done; 1171 intr_idx++; 1172 } 1173 #ifdef DEV_NETMAP 1174 if (ifp->if_capabilities & IFCAP_NETMAP) 1175 intr_idx = saved_idx + max(vi->nrxq, vi->nnmrxq); 1176 #endif 1177 #ifdef TCP_OFFLOAD 1178 maxp = mtu_to_max_payload(sc, mtu, 1); 1179 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq", 1180 CTLFLAG_RD, NULL, "rx queues for offloaded TCP connections"); 1181 for_each_ofld_rxq(vi, i, ofld_rxq) { 1182 1183 init_iq(&ofld_rxq->iq, sc, vi->ofld_tmr_idx, vi->ofld_pktc_idx, 1184 vi->qsize_rxq); 1185 1186 snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", 1187 device_get_nameunit(vi->dev), i); 1188 init_fl(sc, &ofld_rxq->fl, vi->qsize_rxq / 8, maxp, name); 1189 1190 rc = alloc_ofld_rxq(vi, ofld_rxq, 1191 forwarding_intr_to_fwq(sc) ? -1 : intr_idx, i, oid); 1192 if (rc != 0) 1193 goto done; 1194 intr_idx++; 1195 } 1196 #endif 1197 1198 /* 1199 * Now the tx queues. 1200 */ 1201 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD, 1202 NULL, "tx queues"); 1203 for_each_txq(vi, i, txq) { 1204 iqidx = vi->first_rxq + (i % vi->nrxq); 1205 snprintf(name, sizeof(name), "%s txq%d", 1206 device_get_nameunit(vi->dev), i); 1207 init_eq(sc, &txq->eq, EQ_ETH, vi->qsize_txq, pi->tx_chan, 1208 sc->sge.rxq[iqidx].iq.cntxt_id, name); 1209 1210 rc = alloc_txq(vi, txq, i, oid); 1211 if (rc != 0) 1212 goto done; 1213 } 1214 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1215 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_txq", 1216 CTLFLAG_RD, NULL, "tx queues for TOE/ETHOFLD"); 1217 for_each_ofld_txq(vi, i, ofld_txq) { 1218 struct sysctl_oid *oid2; 1219 1220 snprintf(name, sizeof(name), "%s ofld_txq%d", 1221 device_get_nameunit(vi->dev), i); 1222 #ifdef TCP_OFFLOAD 1223 iqidx = vi->first_ofld_rxq + (i % vi->nofldrxq); 1224 init_eq(sc, &ofld_txq->eq, EQ_OFLD, vi->qsize_txq, pi->tx_chan, 1225 sc->sge.ofld_rxq[iqidx].iq.cntxt_id, name); 1226 #else 1227 iqidx = vi->first_rxq + (i % vi->nrxq); 1228 init_eq(sc, &ofld_txq->eq, EQ_OFLD, vi->qsize_txq, pi->tx_chan, 1229 sc->sge.rxq[iqidx].iq.cntxt_id, name); 1230 #endif 1231 1232 snprintf(name, sizeof(name), "%d", i); 1233 oid2 = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, 1234 name, CTLFLAG_RD, NULL, "offload tx queue"); 1235 1236 rc = alloc_wrq(sc, vi, ofld_txq, oid2); 1237 if (rc != 0) 1238 goto done; 1239 } 1240 #endif 1241 1242 /* 1243 * Finally, the control queue. 1244 */ 1245 if (!IS_MAIN_VI(vi) || sc->flags & IS_VF) 1246 goto done; 1247 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD, 1248 NULL, "ctrl queue"); 1249 ctrlq = &sc->sge.ctrlq[pi->port_id]; 1250 snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(vi->dev)); 1251 init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, 1252 sc->sge.rxq[vi->first_rxq].iq.cntxt_id, name); 1253 rc = alloc_wrq(sc, vi, ctrlq, oid); 1254 1255 done: 1256 if (rc) 1257 t4_teardown_vi_queues(vi); 1258 1259 return (rc); 1260 } 1261 1262 /* 1263 * Idempotent 1264 */ 1265 int 1266 t4_teardown_vi_queues(struct vi_info *vi) 1267 { 1268 int i; 1269 struct port_info *pi = vi->pi; 1270 struct adapter *sc = pi->adapter; 1271 struct sge_rxq *rxq; 1272 struct sge_txq *txq; 1273 #ifdef TCP_OFFLOAD 1274 struct sge_ofld_rxq *ofld_rxq; 1275 #endif 1276 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1277 struct sge_wrq *ofld_txq; 1278 #endif 1279 #ifdef DEV_NETMAP 1280 struct sge_nm_rxq *nm_rxq; 1281 struct sge_nm_txq *nm_txq; 1282 #endif 1283 1284 /* Do this before freeing the queues */ 1285 if (vi->flags & VI_SYSCTL_CTX) { 1286 sysctl_ctx_free(&vi->ctx); 1287 vi->flags &= ~VI_SYSCTL_CTX; 1288 } 1289 1290 #ifdef DEV_NETMAP 1291 if (vi->ifp->if_capabilities & IFCAP_NETMAP) { 1292 for_each_nm_txq(vi, i, nm_txq) { 1293 free_nm_txq(vi, nm_txq); 1294 } 1295 1296 for_each_nm_rxq(vi, i, nm_rxq) { 1297 free_nm_rxq(vi, nm_rxq); 1298 } 1299 } 1300 #endif 1301 1302 /* 1303 * Take down all the tx queues first, as they reference the rx queues 1304 * (for egress updates, etc.). 1305 */ 1306 1307 if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF)) 1308 free_wrq(sc, &sc->sge.ctrlq[pi->port_id]); 1309 1310 for_each_txq(vi, i, txq) { 1311 free_txq(vi, txq); 1312 } 1313 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1314 for_each_ofld_txq(vi, i, ofld_txq) { 1315 free_wrq(sc, ofld_txq); 1316 } 1317 #endif 1318 1319 /* 1320 * Then take down the rx queues. 1321 */ 1322 1323 for_each_rxq(vi, i, rxq) { 1324 free_rxq(vi, rxq); 1325 } 1326 #ifdef TCP_OFFLOAD 1327 for_each_ofld_rxq(vi, i, ofld_rxq) { 1328 free_ofld_rxq(vi, ofld_rxq); 1329 } 1330 #endif 1331 1332 return (0); 1333 } 1334 1335 /* 1336 * Deals with errors and the firmware event queue. All data rx queues forward 1337 * their interrupt to the firmware event queue. 1338 */ 1339 void 1340 t4_intr_all(void *arg) 1341 { 1342 struct adapter *sc = arg; 1343 struct sge_iq *fwq = &sc->sge.fwq; 1344 1345 t4_intr_err(arg); 1346 if (atomic_cmpset_int(&fwq->state, IQS_IDLE, IQS_BUSY)) { 1347 service_iq(fwq, 0); 1348 atomic_cmpset_int(&fwq->state, IQS_BUSY, IQS_IDLE); 1349 } 1350 } 1351 1352 /* Deals with error interrupts */ 1353 void 1354 t4_intr_err(void *arg) 1355 { 1356 struct adapter *sc = arg; 1357 1358 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); 1359 t4_slow_intr_handler(sc); 1360 } 1361 1362 void 1363 t4_intr_evt(void *arg) 1364 { 1365 struct sge_iq *iq = arg; 1366 1367 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1368 service_iq(iq, 0); 1369 atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1370 } 1371 } 1372 1373 void 1374 t4_intr(void *arg) 1375 { 1376 struct sge_iq *iq = arg; 1377 1378 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1379 service_iq(iq, 0); 1380 atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1381 } 1382 } 1383 1384 void 1385 t4_vi_intr(void *arg) 1386 { 1387 struct irq *irq = arg; 1388 1389 #ifdef DEV_NETMAP 1390 if (atomic_cmpset_int(&irq->nm_state, NM_ON, NM_BUSY)) { 1391 t4_nm_intr(irq->nm_rxq); 1392 atomic_cmpset_int(&irq->nm_state, NM_BUSY, NM_ON); 1393 } 1394 #endif 1395 if (irq->rxq != NULL) 1396 t4_intr(irq->rxq); 1397 } 1398 1399 static inline int 1400 sort_before_lro(struct lro_ctrl *lro) 1401 { 1402 1403 return (lro->lro_mbuf_max != 0); 1404 } 1405 1406 /* 1407 * Deals with anything and everything on the given ingress queue. 1408 */ 1409 static int 1410 service_iq(struct sge_iq *iq, int budget) 1411 { 1412 struct sge_iq *q; 1413 struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */ 1414 struct sge_fl *fl; /* Use iff IQ_HAS_FL */ 1415 struct adapter *sc = iq->adapter; 1416 struct iq_desc *d = &iq->desc[iq->cidx]; 1417 int ndescs = 0, limit; 1418 int rsp_type, refill; 1419 uint32_t lq; 1420 uint16_t fl_hw_cidx; 1421 struct mbuf *m0; 1422 STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql); 1423 #if defined(INET) || defined(INET6) 1424 const struct timeval lro_timeout = {0, sc->lro_timeout}; 1425 struct lro_ctrl *lro = &rxq->lro; 1426 #endif 1427 1428 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); 1429 1430 limit = budget ? budget : iq->qsize / 16; 1431 1432 if (iq->flags & IQ_HAS_FL) { 1433 fl = &rxq->fl; 1434 fl_hw_cidx = fl->hw_cidx; /* stable snapshot */ 1435 } else { 1436 fl = NULL; 1437 fl_hw_cidx = 0; /* to silence gcc warning */ 1438 } 1439 1440 #if defined(INET) || defined(INET6) 1441 if (iq->flags & IQ_ADJ_CREDIT) { 1442 MPASS(sort_before_lro(lro)); 1443 iq->flags &= ~IQ_ADJ_CREDIT; 1444 if ((d->rsp.u.type_gen & F_RSPD_GEN) != iq->gen) { 1445 tcp_lro_flush_all(lro); 1446 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(1) | 1447 V_INGRESSQID((u32)iq->cntxt_id) | 1448 V_SEINTARM(iq->intr_params)); 1449 return (0); 1450 } 1451 ndescs = 1; 1452 } 1453 #else 1454 MPASS((iq->flags & IQ_ADJ_CREDIT) == 0); 1455 #endif 1456 1457 /* 1458 * We always come back and check the descriptor ring for new indirect 1459 * interrupts and other responses after running a single handler. 1460 */ 1461 for (;;) { 1462 while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { 1463 1464 rmb(); 1465 1466 refill = 0; 1467 m0 = NULL; 1468 rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); 1469 lq = be32toh(d->rsp.pldbuflen_qid); 1470 1471 switch (rsp_type) { 1472 case X_RSPD_TYPE_FLBUF: 1473 1474 KASSERT(iq->flags & IQ_HAS_FL, 1475 ("%s: data for an iq (%p) with no freelist", 1476 __func__, iq)); 1477 1478 m0 = get_fl_payload(sc, fl, lq); 1479 if (__predict_false(m0 == NULL)) 1480 goto process_iql; 1481 refill = IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 2; 1482 #ifdef T4_PKT_TIMESTAMP 1483 /* 1484 * 60 bit timestamp for the payload is 1485 * *(uint64_t *)m0->m_pktdat. Note that it is 1486 * in the leading free-space in the mbuf. The 1487 * kernel can clobber it during a pullup, 1488 * m_copymdata, etc. You need to make sure that 1489 * the mbuf reaches you unmolested if you care 1490 * about the timestamp. 1491 */ 1492 *(uint64_t *)m0->m_pktdat = 1493 be64toh(ctrl->u.last_flit) & 1494 0xfffffffffffffff; 1495 #endif 1496 1497 /* fall through */ 1498 1499 case X_RSPD_TYPE_CPL: 1500 KASSERT(d->rss.opcode < NUM_CPL_CMDS, 1501 ("%s: bad opcode %02x.", __func__, 1502 d->rss.opcode)); 1503 t4_cpl_handler[d->rss.opcode](iq, &d->rss, m0); 1504 break; 1505 1506 case X_RSPD_TYPE_INTR: 1507 1508 /* 1509 * Interrupts should be forwarded only to queues 1510 * that are not forwarding their interrupts. 1511 * This means service_iq can recurse but only 1 1512 * level deep. 1513 */ 1514 KASSERT(budget == 0, 1515 ("%s: budget %u, rsp_type %u", __func__, 1516 budget, rsp_type)); 1517 1518 /* 1519 * There are 1K interrupt-capable queues (qids 0 1520 * through 1023). A response type indicating a 1521 * forwarded interrupt with a qid >= 1K is an 1522 * iWARP async notification. 1523 */ 1524 if (lq >= 1024) { 1525 t4_an_handler(iq, &d->rsp); 1526 break; 1527 } 1528 1529 q = sc->sge.iqmap[lq - sc->sge.iq_start - 1530 sc->sge.iq_base]; 1531 if (atomic_cmpset_int(&q->state, IQS_IDLE, 1532 IQS_BUSY)) { 1533 if (service_iq(q, q->qsize / 16) == 0) { 1534 atomic_cmpset_int(&q->state, 1535 IQS_BUSY, IQS_IDLE); 1536 } else { 1537 STAILQ_INSERT_TAIL(&iql, q, 1538 link); 1539 } 1540 } 1541 break; 1542 1543 default: 1544 KASSERT(0, 1545 ("%s: illegal response type %d on iq %p", 1546 __func__, rsp_type, iq)); 1547 log(LOG_ERR, 1548 "%s: illegal response type %d on iq %p", 1549 device_get_nameunit(sc->dev), rsp_type, iq); 1550 break; 1551 } 1552 1553 d++; 1554 if (__predict_false(++iq->cidx == iq->sidx)) { 1555 iq->cidx = 0; 1556 iq->gen ^= F_RSPD_GEN; 1557 d = &iq->desc[0]; 1558 } 1559 if (__predict_false(++ndescs == limit)) { 1560 t4_write_reg(sc, sc->sge_gts_reg, 1561 V_CIDXINC(ndescs) | 1562 V_INGRESSQID(iq->cntxt_id) | 1563 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 1564 ndescs = 0; 1565 1566 #if defined(INET) || defined(INET6) 1567 if (iq->flags & IQ_LRO_ENABLED && 1568 !sort_before_lro(lro) && 1569 sc->lro_timeout != 0) { 1570 tcp_lro_flush_inactive(lro, 1571 &lro_timeout); 1572 } 1573 #endif 1574 1575 if (budget) { 1576 if (iq->flags & IQ_HAS_FL) { 1577 FL_LOCK(fl); 1578 refill_fl(sc, fl, 32); 1579 FL_UNLOCK(fl); 1580 } 1581 return (EINPROGRESS); 1582 } 1583 } 1584 if (refill) { 1585 FL_LOCK(fl); 1586 refill_fl(sc, fl, 32); 1587 FL_UNLOCK(fl); 1588 fl_hw_cidx = fl->hw_cidx; 1589 } 1590 } 1591 1592 process_iql: 1593 if (STAILQ_EMPTY(&iql)) 1594 break; 1595 1596 /* 1597 * Process the head only, and send it to the back of the list if 1598 * it's still not done. 1599 */ 1600 q = STAILQ_FIRST(&iql); 1601 STAILQ_REMOVE_HEAD(&iql, link); 1602 if (service_iq(q, q->qsize / 8) == 0) 1603 atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE); 1604 else 1605 STAILQ_INSERT_TAIL(&iql, q, link); 1606 } 1607 1608 #if defined(INET) || defined(INET6) 1609 if (iq->flags & IQ_LRO_ENABLED) { 1610 if (ndescs > 0 && lro->lro_mbuf_count > 8) { 1611 MPASS(sort_before_lro(lro)); 1612 /* hold back one credit and don't flush LRO state */ 1613 iq->flags |= IQ_ADJ_CREDIT; 1614 ndescs--; 1615 } else { 1616 tcp_lro_flush_all(lro); 1617 } 1618 } 1619 #endif 1620 1621 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1622 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 1623 1624 if (iq->flags & IQ_HAS_FL) { 1625 int starved; 1626 1627 FL_LOCK(fl); 1628 starved = refill_fl(sc, fl, 64); 1629 FL_UNLOCK(fl); 1630 if (__predict_false(starved != 0)) 1631 add_fl_to_sfl(sc, fl); 1632 } 1633 1634 return (0); 1635 } 1636 1637 static inline int 1638 cl_has_metadata(struct sge_fl *fl, struct cluster_layout *cll) 1639 { 1640 int rc = fl->flags & FL_BUF_PACKING || cll->region1 > 0; 1641 1642 if (rc) 1643 MPASS(cll->region3 >= CL_METADATA_SIZE); 1644 1645 return (rc); 1646 } 1647 1648 static inline struct cluster_metadata * 1649 cl_metadata(struct adapter *sc, struct sge_fl *fl, struct cluster_layout *cll, 1650 caddr_t cl) 1651 { 1652 1653 if (cl_has_metadata(fl, cll)) { 1654 struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; 1655 1656 return ((struct cluster_metadata *)(cl + swz->size) - 1); 1657 } 1658 return (NULL); 1659 } 1660 1661 static void 1662 rxb_free(struct mbuf *m) 1663 { 1664 uma_zone_t zone = m->m_ext.ext_arg1; 1665 void *cl = m->m_ext.ext_arg2; 1666 1667 uma_zfree(zone, cl); 1668 counter_u64_add(extfree_rels, 1); 1669 } 1670 1671 /* 1672 * The mbuf returned by this function could be allocated from zone_mbuf or 1673 * constructed in spare room in the cluster. 1674 * 1675 * The mbuf carries the payload in one of these ways 1676 * a) frame inside the mbuf (mbuf from zone_mbuf) 1677 * b) m_cljset (for clusters without metadata) zone_mbuf 1678 * c) m_extaddref (cluster with metadata) inline mbuf 1679 * d) m_extaddref (cluster with metadata) zone_mbuf 1680 */ 1681 static struct mbuf * 1682 get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset, 1683 int remaining) 1684 { 1685 struct mbuf *m; 1686 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1687 struct cluster_layout *cll = &sd->cll; 1688 struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; 1689 struct hw_buf_info *hwb = &sc->sge.hw_buf_info[cll->hwidx]; 1690 struct cluster_metadata *clm = cl_metadata(sc, fl, cll, sd->cl); 1691 int len, blen; 1692 caddr_t payload; 1693 1694 blen = hwb->size - fl->rx_offset; /* max possible in this buf */ 1695 len = min(remaining, blen); 1696 payload = sd->cl + cll->region1 + fl->rx_offset; 1697 if (fl->flags & FL_BUF_PACKING) { 1698 const u_int l = fr_offset + len; 1699 const u_int pad = roundup2(l, fl->buf_boundary) - l; 1700 1701 if (fl->rx_offset + len + pad < hwb->size) 1702 blen = len + pad; 1703 MPASS(fl->rx_offset + blen <= hwb->size); 1704 } else { 1705 MPASS(fl->rx_offset == 0); /* not packing */ 1706 } 1707 1708 1709 if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) { 1710 1711 /* 1712 * Copy payload into a freshly allocated mbuf. 1713 */ 1714 1715 m = fr_offset == 0 ? 1716 m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); 1717 if (m == NULL) 1718 return (NULL); 1719 fl->mbuf_allocated++; 1720 #ifdef T4_PKT_TIMESTAMP 1721 /* Leave room for a timestamp */ 1722 m->m_data += 8; 1723 #endif 1724 /* copy data to mbuf */ 1725 bcopy(payload, mtod(m, caddr_t), len); 1726 1727 } else if (sd->nmbuf * MSIZE < cll->region1) { 1728 1729 /* 1730 * There's spare room in the cluster for an mbuf. Create one 1731 * and associate it with the payload that's in the cluster. 1732 */ 1733 1734 MPASS(clm != NULL); 1735 m = (struct mbuf *)(sd->cl + sd->nmbuf * MSIZE); 1736 /* No bzero required */ 1737 if (m_init(m, M_NOWAIT, MT_DATA, 1738 fr_offset == 0 ? M_PKTHDR | M_NOFREE : M_NOFREE)) 1739 return (NULL); 1740 fl->mbuf_inlined++; 1741 m_extaddref(m, payload, blen, &clm->refcount, rxb_free, 1742 swz->zone, sd->cl); 1743 if (sd->nmbuf++ == 0) 1744 counter_u64_add(extfree_refs, 1); 1745 1746 } else { 1747 1748 /* 1749 * Grab an mbuf from zone_mbuf and associate it with the 1750 * payload in the cluster. 1751 */ 1752 1753 m = fr_offset == 0 ? 1754 m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); 1755 if (m == NULL) 1756 return (NULL); 1757 fl->mbuf_allocated++; 1758 if (clm != NULL) { 1759 m_extaddref(m, payload, blen, &clm->refcount, 1760 rxb_free, swz->zone, sd->cl); 1761 if (sd->nmbuf++ == 0) 1762 counter_u64_add(extfree_refs, 1); 1763 } else { 1764 m_cljset(m, sd->cl, swz->type); 1765 sd->cl = NULL; /* consumed, not a recycle candidate */ 1766 } 1767 } 1768 if (fr_offset == 0) 1769 m->m_pkthdr.len = remaining; 1770 m->m_len = len; 1771 1772 if (fl->flags & FL_BUF_PACKING) { 1773 fl->rx_offset += blen; 1774 MPASS(fl->rx_offset <= hwb->size); 1775 if (fl->rx_offset < hwb->size) 1776 return (m); /* without advancing the cidx */ 1777 } 1778 1779 if (__predict_false(++fl->cidx % 8 == 0)) { 1780 uint16_t cidx = fl->cidx / 8; 1781 1782 if (__predict_false(cidx == fl->sidx)) 1783 fl->cidx = cidx = 0; 1784 fl->hw_cidx = cidx; 1785 } 1786 fl->rx_offset = 0; 1787 1788 return (m); 1789 } 1790 1791 static struct mbuf * 1792 get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf) 1793 { 1794 struct mbuf *m0, *m, **pnext; 1795 u_int remaining; 1796 const u_int total = G_RSPD_LEN(len_newbuf); 1797 1798 if (__predict_false(fl->flags & FL_BUF_RESUME)) { 1799 M_ASSERTPKTHDR(fl->m0); 1800 MPASS(fl->m0->m_pkthdr.len == total); 1801 MPASS(fl->remaining < total); 1802 1803 m0 = fl->m0; 1804 pnext = fl->pnext; 1805 remaining = fl->remaining; 1806 fl->flags &= ~FL_BUF_RESUME; 1807 goto get_segment; 1808 } 1809 1810 if (fl->rx_offset > 0 && len_newbuf & F_RSPD_NEWBUF) { 1811 fl->rx_offset = 0; 1812 if (__predict_false(++fl->cidx % 8 == 0)) { 1813 uint16_t cidx = fl->cidx / 8; 1814 1815 if (__predict_false(cidx == fl->sidx)) 1816 fl->cidx = cidx = 0; 1817 fl->hw_cidx = cidx; 1818 } 1819 } 1820 1821 /* 1822 * Payload starts at rx_offset in the current hw buffer. Its length is 1823 * 'len' and it may span multiple hw buffers. 1824 */ 1825 1826 m0 = get_scatter_segment(sc, fl, 0, total); 1827 if (m0 == NULL) 1828 return (NULL); 1829 remaining = total - m0->m_len; 1830 pnext = &m0->m_next; 1831 while (remaining > 0) { 1832 get_segment: 1833 MPASS(fl->rx_offset == 0); 1834 m = get_scatter_segment(sc, fl, total - remaining, remaining); 1835 if (__predict_false(m == NULL)) { 1836 fl->m0 = m0; 1837 fl->pnext = pnext; 1838 fl->remaining = remaining; 1839 fl->flags |= FL_BUF_RESUME; 1840 return (NULL); 1841 } 1842 *pnext = m; 1843 pnext = &m->m_next; 1844 remaining -= m->m_len; 1845 } 1846 *pnext = NULL; 1847 1848 M_ASSERTPKTHDR(m0); 1849 return (m0); 1850 } 1851 1852 static int 1853 t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) 1854 { 1855 struct sge_rxq *rxq = iq_to_rxq(iq); 1856 struct ifnet *ifp = rxq->ifp; 1857 struct adapter *sc = iq->adapter; 1858 const struct cpl_rx_pkt *cpl = (const void *)(rss + 1); 1859 #if defined(INET) || defined(INET6) 1860 struct lro_ctrl *lro = &rxq->lro; 1861 #endif 1862 static const int sw_hashtype[4][2] = { 1863 {M_HASHTYPE_NONE, M_HASHTYPE_NONE}, 1864 {M_HASHTYPE_RSS_IPV4, M_HASHTYPE_RSS_IPV6}, 1865 {M_HASHTYPE_RSS_TCP_IPV4, M_HASHTYPE_RSS_TCP_IPV6}, 1866 {M_HASHTYPE_RSS_UDP_IPV4, M_HASHTYPE_RSS_UDP_IPV6}, 1867 }; 1868 1869 KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__, 1870 rss->opcode)); 1871 1872 m0->m_pkthdr.len -= sc->params.sge.fl_pktshift; 1873 m0->m_len -= sc->params.sge.fl_pktshift; 1874 m0->m_data += sc->params.sge.fl_pktshift; 1875 1876 m0->m_pkthdr.rcvif = ifp; 1877 M_HASHTYPE_SET(m0, sw_hashtype[rss->hash_type][rss->ipv6]); 1878 m0->m_pkthdr.flowid = be32toh(rss->hash_val); 1879 1880 if (cpl->csum_calc && !(cpl->err_vec & sc->params.tp.err_vec_mask)) { 1881 if (ifp->if_capenable & IFCAP_RXCSUM && 1882 cpl->l2info & htobe32(F_RXF_IP)) { 1883 m0->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | 1884 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1885 rxq->rxcsum++; 1886 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && 1887 cpl->l2info & htobe32(F_RXF_IP6)) { 1888 m0->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 | 1889 CSUM_PSEUDO_HDR); 1890 rxq->rxcsum++; 1891 } 1892 1893 if (__predict_false(cpl->ip_frag)) 1894 m0->m_pkthdr.csum_data = be16toh(cpl->csum); 1895 else 1896 m0->m_pkthdr.csum_data = 0xffff; 1897 } 1898 1899 if (cpl->vlan_ex) { 1900 m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); 1901 m0->m_flags |= M_VLANTAG; 1902 rxq->vlan_extraction++; 1903 } 1904 1905 #if defined(INET) || defined(INET6) 1906 if (iq->flags & IQ_LRO_ENABLED) { 1907 if (sort_before_lro(lro)) { 1908 tcp_lro_queue_mbuf(lro, m0); 1909 return (0); /* queued for sort, then LRO */ 1910 } 1911 if (tcp_lro_rx(lro, m0, 0) == 0) 1912 return (0); /* queued for LRO */ 1913 } 1914 #endif 1915 ifp->if_input(ifp, m0); 1916 1917 return (0); 1918 } 1919 1920 /* 1921 * Must drain the wrq or make sure that someone else will. 1922 */ 1923 static void 1924 wrq_tx_drain(void *arg, int n) 1925 { 1926 struct sge_wrq *wrq = arg; 1927 struct sge_eq *eq = &wrq->eq; 1928 1929 EQ_LOCK(eq); 1930 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 1931 drain_wrq_wr_list(wrq->adapter, wrq); 1932 EQ_UNLOCK(eq); 1933 } 1934 1935 static void 1936 drain_wrq_wr_list(struct adapter *sc, struct sge_wrq *wrq) 1937 { 1938 struct sge_eq *eq = &wrq->eq; 1939 u_int available, dbdiff; /* # of hardware descriptors */ 1940 u_int n; 1941 struct wrqe *wr; 1942 struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ 1943 1944 EQ_LOCK_ASSERT_OWNED(eq); 1945 MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs)); 1946 wr = STAILQ_FIRST(&wrq->wr_list); 1947 MPASS(wr != NULL); /* Must be called with something useful to do */ 1948 MPASS(eq->pidx == eq->dbidx); 1949 dbdiff = 0; 1950 1951 do { 1952 eq->cidx = read_hw_cidx(eq); 1953 if (eq->pidx == eq->cidx) 1954 available = eq->sidx - 1; 1955 else 1956 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 1957 1958 MPASS(wr->wrq == wrq); 1959 n = howmany(wr->wr_len, EQ_ESIZE); 1960 if (available < n) 1961 break; 1962 1963 dst = (void *)&eq->desc[eq->pidx]; 1964 if (__predict_true(eq->sidx - eq->pidx > n)) { 1965 /* Won't wrap, won't end exactly at the status page. */ 1966 bcopy(&wr->wr[0], dst, wr->wr_len); 1967 eq->pidx += n; 1968 } else { 1969 int first_portion = (eq->sidx - eq->pidx) * EQ_ESIZE; 1970 1971 bcopy(&wr->wr[0], dst, first_portion); 1972 if (wr->wr_len > first_portion) { 1973 bcopy(&wr->wr[first_portion], &eq->desc[0], 1974 wr->wr_len - first_portion); 1975 } 1976 eq->pidx = n - (eq->sidx - eq->pidx); 1977 } 1978 wrq->tx_wrs_copied++; 1979 1980 if (available < eq->sidx / 4 && 1981 atomic_cmpset_int(&eq->equiq, 0, 1)) { 1982 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 1983 F_FW_WR_EQUEQ); 1984 eq->equeqidx = eq->pidx; 1985 } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { 1986 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); 1987 eq->equeqidx = eq->pidx; 1988 } 1989 1990 dbdiff += n; 1991 if (dbdiff >= 16) { 1992 ring_eq_db(sc, eq, dbdiff); 1993 dbdiff = 0; 1994 } 1995 1996 STAILQ_REMOVE_HEAD(&wrq->wr_list, link); 1997 free_wrqe(wr); 1998 MPASS(wrq->nwr_pending > 0); 1999 wrq->nwr_pending--; 2000 MPASS(wrq->ndesc_needed >= n); 2001 wrq->ndesc_needed -= n; 2002 } while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL); 2003 2004 if (dbdiff) 2005 ring_eq_db(sc, eq, dbdiff); 2006 } 2007 2008 /* 2009 * Doesn't fail. Holds on to work requests it can't send right away. 2010 */ 2011 void 2012 t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr) 2013 { 2014 #ifdef INVARIANTS 2015 struct sge_eq *eq = &wrq->eq; 2016 #endif 2017 2018 EQ_LOCK_ASSERT_OWNED(eq); 2019 MPASS(wr != NULL); 2020 MPASS(wr->wr_len > 0 && wr->wr_len <= SGE_MAX_WR_LEN); 2021 MPASS((wr->wr_len & 0x7) == 0); 2022 2023 STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link); 2024 wrq->nwr_pending++; 2025 wrq->ndesc_needed += howmany(wr->wr_len, EQ_ESIZE); 2026 2027 if (!TAILQ_EMPTY(&wrq->incomplete_wrs)) 2028 return; /* commit_wrq_wr will drain wr_list as well. */ 2029 2030 drain_wrq_wr_list(sc, wrq); 2031 2032 /* Doorbell must have caught up to the pidx. */ 2033 MPASS(eq->pidx == eq->dbidx); 2034 } 2035 2036 void 2037 t4_update_fl_bufsize(struct ifnet *ifp) 2038 { 2039 struct vi_info *vi = ifp->if_softc; 2040 struct adapter *sc = vi->pi->adapter; 2041 struct sge_rxq *rxq; 2042 #ifdef TCP_OFFLOAD 2043 struct sge_ofld_rxq *ofld_rxq; 2044 #endif 2045 struct sge_fl *fl; 2046 int i, maxp, mtu = ifp->if_mtu; 2047 2048 maxp = mtu_to_max_payload(sc, mtu, 0); 2049 for_each_rxq(vi, i, rxq) { 2050 fl = &rxq->fl; 2051 2052 FL_LOCK(fl); 2053 find_best_refill_source(sc, fl, maxp); 2054 FL_UNLOCK(fl); 2055 } 2056 #ifdef TCP_OFFLOAD 2057 maxp = mtu_to_max_payload(sc, mtu, 1); 2058 for_each_ofld_rxq(vi, i, ofld_rxq) { 2059 fl = &ofld_rxq->fl; 2060 2061 FL_LOCK(fl); 2062 find_best_refill_source(sc, fl, maxp); 2063 FL_UNLOCK(fl); 2064 } 2065 #endif 2066 } 2067 2068 static inline int 2069 mbuf_nsegs(struct mbuf *m) 2070 { 2071 2072 M_ASSERTPKTHDR(m); 2073 KASSERT(m->m_pkthdr.l5hlen > 0, 2074 ("%s: mbuf %p missing information on # of segments.", __func__, m)); 2075 2076 return (m->m_pkthdr.l5hlen); 2077 } 2078 2079 static inline void 2080 set_mbuf_nsegs(struct mbuf *m, uint8_t nsegs) 2081 { 2082 2083 M_ASSERTPKTHDR(m); 2084 m->m_pkthdr.l5hlen = nsegs; 2085 } 2086 2087 static inline int 2088 mbuf_len16(struct mbuf *m) 2089 { 2090 int n; 2091 2092 M_ASSERTPKTHDR(m); 2093 n = m->m_pkthdr.PH_loc.eight[0]; 2094 MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); 2095 2096 return (n); 2097 } 2098 2099 static inline void 2100 set_mbuf_len16(struct mbuf *m, uint8_t len16) 2101 { 2102 2103 M_ASSERTPKTHDR(m); 2104 m->m_pkthdr.PH_loc.eight[0] = len16; 2105 } 2106 2107 #ifdef RATELIMIT 2108 static inline int 2109 mbuf_eo_nsegs(struct mbuf *m) 2110 { 2111 2112 M_ASSERTPKTHDR(m); 2113 return (m->m_pkthdr.PH_loc.eight[1]); 2114 } 2115 2116 static inline void 2117 set_mbuf_eo_nsegs(struct mbuf *m, uint8_t nsegs) 2118 { 2119 2120 M_ASSERTPKTHDR(m); 2121 m->m_pkthdr.PH_loc.eight[1] = nsegs; 2122 } 2123 2124 static inline int 2125 mbuf_eo_len16(struct mbuf *m) 2126 { 2127 int n; 2128 2129 M_ASSERTPKTHDR(m); 2130 n = m->m_pkthdr.PH_loc.eight[2]; 2131 MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); 2132 2133 return (n); 2134 } 2135 2136 static inline void 2137 set_mbuf_eo_len16(struct mbuf *m, uint8_t len16) 2138 { 2139 2140 M_ASSERTPKTHDR(m); 2141 m->m_pkthdr.PH_loc.eight[2] = len16; 2142 } 2143 2144 static inline int 2145 mbuf_eo_tsclk_tsoff(struct mbuf *m) 2146 { 2147 2148 M_ASSERTPKTHDR(m); 2149 return (m->m_pkthdr.PH_loc.eight[3]); 2150 } 2151 2152 static inline void 2153 set_mbuf_eo_tsclk_tsoff(struct mbuf *m, uint8_t tsclk_tsoff) 2154 { 2155 2156 M_ASSERTPKTHDR(m); 2157 m->m_pkthdr.PH_loc.eight[3] = tsclk_tsoff; 2158 } 2159 2160 static inline int 2161 needs_eo(struct mbuf *m) 2162 { 2163 2164 return (m->m_pkthdr.snd_tag != NULL); 2165 } 2166 #endif 2167 2168 static inline int 2169 needs_tso(struct mbuf *m) 2170 { 2171 2172 M_ASSERTPKTHDR(m); 2173 2174 return (m->m_pkthdr.csum_flags & CSUM_TSO); 2175 } 2176 2177 static inline int 2178 needs_l3_csum(struct mbuf *m) 2179 { 2180 2181 M_ASSERTPKTHDR(m); 2182 2183 return (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)); 2184 } 2185 2186 static inline int 2187 needs_l4_csum(struct mbuf *m) 2188 { 2189 2190 M_ASSERTPKTHDR(m); 2191 2192 return (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | 2193 CSUM_TCP_IPV6 | CSUM_TSO)); 2194 } 2195 2196 static inline int 2197 needs_tcp_csum(struct mbuf *m) 2198 { 2199 2200 M_ASSERTPKTHDR(m); 2201 return (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TCP_IPV6 | CSUM_TSO)); 2202 } 2203 2204 #ifdef RATELIMIT 2205 static inline int 2206 needs_udp_csum(struct mbuf *m) 2207 { 2208 2209 M_ASSERTPKTHDR(m); 2210 return (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6)); 2211 } 2212 #endif 2213 2214 static inline int 2215 needs_vlan_insertion(struct mbuf *m) 2216 { 2217 2218 M_ASSERTPKTHDR(m); 2219 2220 return (m->m_flags & M_VLANTAG); 2221 } 2222 2223 static void * 2224 m_advance(struct mbuf **pm, int *poffset, int len) 2225 { 2226 struct mbuf *m = *pm; 2227 int offset = *poffset; 2228 uintptr_t p = 0; 2229 2230 MPASS(len > 0); 2231 2232 for (;;) { 2233 if (offset + len < m->m_len) { 2234 offset += len; 2235 p = mtod(m, uintptr_t) + offset; 2236 break; 2237 } 2238 len -= m->m_len - offset; 2239 m = m->m_next; 2240 offset = 0; 2241 MPASS(m != NULL); 2242 } 2243 *poffset = offset; 2244 *pm = m; 2245 return ((void *)p); 2246 } 2247 2248 /* 2249 * Can deal with empty mbufs in the chain that have m_len = 0, but the chain 2250 * must have at least one mbuf that's not empty. It is possible for this 2251 * routine to return 0 if skip accounts for all the contents of the mbuf chain. 2252 */ 2253 static inline int 2254 count_mbuf_nsegs(struct mbuf *m, int skip) 2255 { 2256 vm_paddr_t lastb, next; 2257 vm_offset_t va; 2258 int len, nsegs; 2259 2260 M_ASSERTPKTHDR(m); 2261 MPASS(m->m_pkthdr.len > 0); 2262 MPASS(m->m_pkthdr.len >= skip); 2263 2264 nsegs = 0; 2265 lastb = 0; 2266 for (; m; m = m->m_next) { 2267 2268 len = m->m_len; 2269 if (__predict_false(len == 0)) 2270 continue; 2271 if (skip >= len) { 2272 skip -= len; 2273 continue; 2274 } 2275 va = mtod(m, vm_offset_t) + skip; 2276 len -= skip; 2277 skip = 0; 2278 next = pmap_kextract(va); 2279 nsegs += sglist_count((void *)(uintptr_t)va, len); 2280 if (lastb + 1 == next) 2281 nsegs--; 2282 lastb = pmap_kextract(va + len - 1); 2283 } 2284 2285 return (nsegs); 2286 } 2287 2288 /* 2289 * Analyze the mbuf to determine its tx needs. The mbuf passed in may change: 2290 * a) caller can assume it's been freed if this function returns with an error. 2291 * b) it may get defragged up if the gather list is too long for the hardware. 2292 */ 2293 int 2294 parse_pkt(struct adapter *sc, struct mbuf **mp) 2295 { 2296 struct mbuf *m0 = *mp, *m; 2297 int rc, nsegs, defragged = 0, offset; 2298 struct ether_header *eh; 2299 void *l3hdr; 2300 #if defined(INET) || defined(INET6) 2301 struct tcphdr *tcp; 2302 #endif 2303 uint16_t eh_type; 2304 2305 M_ASSERTPKTHDR(m0); 2306 if (__predict_false(m0->m_pkthdr.len < ETHER_HDR_LEN)) { 2307 rc = EINVAL; 2308 fail: 2309 m_freem(m0); 2310 *mp = NULL; 2311 return (rc); 2312 } 2313 restart: 2314 /* 2315 * First count the number of gather list segments in the payload. 2316 * Defrag the mbuf if nsegs exceeds the hardware limit. 2317 */ 2318 M_ASSERTPKTHDR(m0); 2319 MPASS(m0->m_pkthdr.len > 0); 2320 nsegs = count_mbuf_nsegs(m0, 0); 2321 if (nsegs > (needs_tso(m0) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)) { 2322 if (defragged++ > 0 || (m = m_defrag(m0, M_NOWAIT)) == NULL) { 2323 rc = EFBIG; 2324 goto fail; 2325 } 2326 *mp = m0 = m; /* update caller's copy after defrag */ 2327 goto restart; 2328 } 2329 2330 if (__predict_false(nsegs > 2 && m0->m_pkthdr.len <= MHLEN)) { 2331 m0 = m_pullup(m0, m0->m_pkthdr.len); 2332 if (m0 == NULL) { 2333 /* Should have left well enough alone. */ 2334 rc = EFBIG; 2335 goto fail; 2336 } 2337 *mp = m0; /* update caller's copy after pullup */ 2338 goto restart; 2339 } 2340 set_mbuf_nsegs(m0, nsegs); 2341 if (sc->flags & IS_VF) 2342 set_mbuf_len16(m0, txpkt_vm_len16(nsegs, needs_tso(m0))); 2343 else 2344 set_mbuf_len16(m0, txpkt_len16(nsegs, needs_tso(m0))); 2345 2346 #ifdef RATELIMIT 2347 /* 2348 * Ethofld is limited to TCP and UDP for now, and only when L4 hw 2349 * checksumming is enabled. needs_l4_csum happens to check for all the 2350 * right things. 2351 */ 2352 if (__predict_false(needs_eo(m0) && !needs_l4_csum(m0))) 2353 m0->m_pkthdr.snd_tag = NULL; 2354 #endif 2355 2356 if (!needs_tso(m0) && 2357 #ifdef RATELIMIT 2358 !needs_eo(m0) && 2359 #endif 2360 !(sc->flags & IS_VF && (needs_l3_csum(m0) || needs_l4_csum(m0)))) 2361 return (0); 2362 2363 m = m0; 2364 eh = mtod(m, struct ether_header *); 2365 eh_type = ntohs(eh->ether_type); 2366 if (eh_type == ETHERTYPE_VLAN) { 2367 struct ether_vlan_header *evh = (void *)eh; 2368 2369 eh_type = ntohs(evh->evl_proto); 2370 m0->m_pkthdr.l2hlen = sizeof(*evh); 2371 } else 2372 m0->m_pkthdr.l2hlen = sizeof(*eh); 2373 2374 offset = 0; 2375 l3hdr = m_advance(&m, &offset, m0->m_pkthdr.l2hlen); 2376 2377 switch (eh_type) { 2378 #ifdef INET6 2379 case ETHERTYPE_IPV6: 2380 { 2381 struct ip6_hdr *ip6 = l3hdr; 2382 2383 MPASS(!needs_tso(m0) || ip6->ip6_nxt == IPPROTO_TCP); 2384 2385 m0->m_pkthdr.l3hlen = sizeof(*ip6); 2386 break; 2387 } 2388 #endif 2389 #ifdef INET 2390 case ETHERTYPE_IP: 2391 { 2392 struct ip *ip = l3hdr; 2393 2394 m0->m_pkthdr.l3hlen = ip->ip_hl * 4; 2395 break; 2396 } 2397 #endif 2398 default: 2399 panic("%s: ethertype 0x%04x unknown. if_cxgbe must be compiled" 2400 " with the same INET/INET6 options as the kernel.", 2401 __func__, eh_type); 2402 } 2403 2404 #if defined(INET) || defined(INET6) 2405 if (needs_tcp_csum(m0)) { 2406 tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen); 2407 m0->m_pkthdr.l4hlen = tcp->th_off * 4; 2408 #ifdef RATELIMIT 2409 if (tsclk >= 0 && *(uint32_t *)(tcp + 1) == ntohl(0x0101080a)) { 2410 set_mbuf_eo_tsclk_tsoff(m0, 2411 V_FW_ETH_TX_EO_WR_TSCLK(tsclk) | 2412 V_FW_ETH_TX_EO_WR_TSOFF(sizeof(*tcp) / 2 + 1)); 2413 } else 2414 set_mbuf_eo_tsclk_tsoff(m0, 0); 2415 } else if (needs_udp_csum(m)) { 2416 m0->m_pkthdr.l4hlen = sizeof(struct udphdr); 2417 #endif 2418 } 2419 #ifdef RATELIMIT 2420 if (needs_eo(m0)) { 2421 u_int immhdrs; 2422 2423 /* EO WRs have the headers in the WR and not the GL. */ 2424 immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + 2425 m0->m_pkthdr.l4hlen; 2426 nsegs = count_mbuf_nsegs(m0, immhdrs); 2427 set_mbuf_eo_nsegs(m0, nsegs); 2428 set_mbuf_eo_len16(m0, 2429 txpkt_eo_len16(nsegs, immhdrs, needs_tso(m0))); 2430 } 2431 #endif 2432 #endif 2433 MPASS(m0 == *mp); 2434 return (0); 2435 } 2436 2437 void * 2438 start_wrq_wr(struct sge_wrq *wrq, int len16, struct wrq_cookie *cookie) 2439 { 2440 struct sge_eq *eq = &wrq->eq; 2441 struct adapter *sc = wrq->adapter; 2442 int ndesc, available; 2443 struct wrqe *wr; 2444 void *w; 2445 2446 MPASS(len16 > 0); 2447 ndesc = howmany(len16, EQ_ESIZE / 16); 2448 MPASS(ndesc > 0 && ndesc <= SGE_MAX_WR_NDESC); 2449 2450 EQ_LOCK(eq); 2451 2452 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 2453 drain_wrq_wr_list(sc, wrq); 2454 2455 if (!STAILQ_EMPTY(&wrq->wr_list)) { 2456 slowpath: 2457 EQ_UNLOCK(eq); 2458 wr = alloc_wrqe(len16 * 16, wrq); 2459 if (__predict_false(wr == NULL)) 2460 return (NULL); 2461 cookie->pidx = -1; 2462 cookie->ndesc = ndesc; 2463 return (&wr->wr); 2464 } 2465 2466 eq->cidx = read_hw_cidx(eq); 2467 if (eq->pidx == eq->cidx) 2468 available = eq->sidx - 1; 2469 else 2470 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2471 if (available < ndesc) 2472 goto slowpath; 2473 2474 cookie->pidx = eq->pidx; 2475 cookie->ndesc = ndesc; 2476 TAILQ_INSERT_TAIL(&wrq->incomplete_wrs, cookie, link); 2477 2478 w = &eq->desc[eq->pidx]; 2479 IDXINCR(eq->pidx, ndesc, eq->sidx); 2480 if (__predict_false(cookie->pidx + ndesc > eq->sidx)) { 2481 w = &wrq->ss[0]; 2482 wrq->ss_pidx = cookie->pidx; 2483 wrq->ss_len = len16 * 16; 2484 } 2485 2486 EQ_UNLOCK(eq); 2487 2488 return (w); 2489 } 2490 2491 void 2492 commit_wrq_wr(struct sge_wrq *wrq, void *w, struct wrq_cookie *cookie) 2493 { 2494 struct sge_eq *eq = &wrq->eq; 2495 struct adapter *sc = wrq->adapter; 2496 int ndesc, pidx; 2497 struct wrq_cookie *prev, *next; 2498 2499 if (cookie->pidx == -1) { 2500 struct wrqe *wr = __containerof(w, struct wrqe, wr); 2501 2502 t4_wrq_tx(sc, wr); 2503 return; 2504 } 2505 2506 if (__predict_false(w == &wrq->ss[0])) { 2507 int n = (eq->sidx - wrq->ss_pidx) * EQ_ESIZE; 2508 2509 MPASS(wrq->ss_len > n); /* WR had better wrap around. */ 2510 bcopy(&wrq->ss[0], &eq->desc[wrq->ss_pidx], n); 2511 bcopy(&wrq->ss[n], &eq->desc[0], wrq->ss_len - n); 2512 wrq->tx_wrs_ss++; 2513 } else 2514 wrq->tx_wrs_direct++; 2515 2516 EQ_LOCK(eq); 2517 ndesc = cookie->ndesc; /* Can be more than SGE_MAX_WR_NDESC here. */ 2518 pidx = cookie->pidx; 2519 MPASS(pidx >= 0 && pidx < eq->sidx); 2520 prev = TAILQ_PREV(cookie, wrq_incomplete_wrs, link); 2521 next = TAILQ_NEXT(cookie, link); 2522 if (prev == NULL) { 2523 MPASS(pidx == eq->dbidx); 2524 if (next == NULL || ndesc >= 16) { 2525 int available; 2526 struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ 2527 2528 /* 2529 * Note that the WR via which we'll request tx updates 2530 * is at pidx and not eq->pidx, which has moved on 2531 * already. 2532 */ 2533 dst = (void *)&eq->desc[pidx]; 2534 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2535 if (available < eq->sidx / 4 && 2536 atomic_cmpset_int(&eq->equiq, 0, 1)) { 2537 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 2538 F_FW_WR_EQUEQ); 2539 eq->equeqidx = pidx; 2540 } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { 2541 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); 2542 eq->equeqidx = pidx; 2543 } 2544 2545 ring_eq_db(wrq->adapter, eq, ndesc); 2546 } else { 2547 MPASS(IDXDIFF(next->pidx, pidx, eq->sidx) == ndesc); 2548 next->pidx = pidx; 2549 next->ndesc += ndesc; 2550 } 2551 } else { 2552 MPASS(IDXDIFF(pidx, prev->pidx, eq->sidx) == prev->ndesc); 2553 prev->ndesc += ndesc; 2554 } 2555 TAILQ_REMOVE(&wrq->incomplete_wrs, cookie, link); 2556 2557 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 2558 drain_wrq_wr_list(sc, wrq); 2559 2560 #ifdef INVARIANTS 2561 if (TAILQ_EMPTY(&wrq->incomplete_wrs)) { 2562 /* Doorbell must have caught up to the pidx. */ 2563 MPASS(wrq->eq.pidx == wrq->eq.dbidx); 2564 } 2565 #endif 2566 EQ_UNLOCK(eq); 2567 } 2568 2569 static u_int 2570 can_resume_eth_tx(struct mp_ring *r) 2571 { 2572 struct sge_eq *eq = r->cookie; 2573 2574 return (total_available_tx_desc(eq) > eq->sidx / 8); 2575 } 2576 2577 static inline int 2578 cannot_use_txpkts(struct mbuf *m) 2579 { 2580 /* maybe put a GL limit too, to avoid silliness? */ 2581 2582 return (needs_tso(m)); 2583 } 2584 2585 static inline int 2586 discard_tx(struct sge_eq *eq) 2587 { 2588 2589 return ((eq->flags & (EQ_ENABLED | EQ_QFLUSH)) != EQ_ENABLED); 2590 } 2591 2592 /* 2593 * r->items[cidx] to r->items[pidx], with a wraparound at r->size, are ready to 2594 * be consumed. Return the actual number consumed. 0 indicates a stall. 2595 */ 2596 static u_int 2597 eth_tx(struct mp_ring *r, u_int cidx, u_int pidx) 2598 { 2599 struct sge_txq *txq = r->cookie; 2600 struct sge_eq *eq = &txq->eq; 2601 struct ifnet *ifp = txq->ifp; 2602 struct vi_info *vi = ifp->if_softc; 2603 struct port_info *pi = vi->pi; 2604 struct adapter *sc = pi->adapter; 2605 u_int total, remaining; /* # of packets */ 2606 u_int available, dbdiff; /* # of hardware descriptors */ 2607 u_int n, next_cidx; 2608 struct mbuf *m0, *tail; 2609 struct txpkts txp; 2610 struct fw_eth_tx_pkts_wr *wr; /* any fw WR struct will do */ 2611 2612 remaining = IDXDIFF(pidx, cidx, r->size); 2613 MPASS(remaining > 0); /* Must not be called without work to do. */ 2614 total = 0; 2615 2616 TXQ_LOCK(txq); 2617 if (__predict_false(discard_tx(eq))) { 2618 while (cidx != pidx) { 2619 m0 = r->items[cidx]; 2620 m_freem(m0); 2621 if (++cidx == r->size) 2622 cidx = 0; 2623 } 2624 reclaim_tx_descs(txq, 2048); 2625 total = remaining; 2626 goto done; 2627 } 2628 2629 /* How many hardware descriptors do we have readily available. */ 2630 if (eq->pidx == eq->cidx) 2631 available = eq->sidx - 1; 2632 else 2633 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2634 dbdiff = IDXDIFF(eq->pidx, eq->dbidx, eq->sidx); 2635 2636 while (remaining > 0) { 2637 2638 m0 = r->items[cidx]; 2639 M_ASSERTPKTHDR(m0); 2640 MPASS(m0->m_nextpkt == NULL); 2641 2642 if (available < SGE_MAX_WR_NDESC) { 2643 available += reclaim_tx_descs(txq, 64); 2644 if (available < howmany(mbuf_len16(m0), EQ_ESIZE / 16)) 2645 break; /* out of descriptors */ 2646 } 2647 2648 next_cidx = cidx + 1; 2649 if (__predict_false(next_cidx == r->size)) 2650 next_cidx = 0; 2651 2652 wr = (void *)&eq->desc[eq->pidx]; 2653 if (sc->flags & IS_VF) { 2654 total++; 2655 remaining--; 2656 ETHER_BPF_MTAP(ifp, m0); 2657 n = write_txpkt_vm_wr(sc, txq, (void *)wr, m0, 2658 available); 2659 } else if (remaining > 1 && 2660 try_txpkts(m0, r->items[next_cidx], &txp, available) == 0) { 2661 2662 /* pkts at cidx, next_cidx should both be in txp. */ 2663 MPASS(txp.npkt == 2); 2664 tail = r->items[next_cidx]; 2665 MPASS(tail->m_nextpkt == NULL); 2666 ETHER_BPF_MTAP(ifp, m0); 2667 ETHER_BPF_MTAP(ifp, tail); 2668 m0->m_nextpkt = tail; 2669 2670 if (__predict_false(++next_cidx == r->size)) 2671 next_cidx = 0; 2672 2673 while (next_cidx != pidx) { 2674 if (add_to_txpkts(r->items[next_cidx], &txp, 2675 available) != 0) 2676 break; 2677 tail->m_nextpkt = r->items[next_cidx]; 2678 tail = tail->m_nextpkt; 2679 ETHER_BPF_MTAP(ifp, tail); 2680 if (__predict_false(++next_cidx == r->size)) 2681 next_cidx = 0; 2682 } 2683 2684 n = write_txpkts_wr(txq, wr, m0, &txp, available); 2685 total += txp.npkt; 2686 remaining -= txp.npkt; 2687 } else { 2688 total++; 2689 remaining--; 2690 ETHER_BPF_MTAP(ifp, m0); 2691 n = write_txpkt_wr(txq, (void *)wr, m0, available); 2692 } 2693 MPASS(n >= 1 && n <= available && n <= SGE_MAX_WR_NDESC); 2694 2695 available -= n; 2696 dbdiff += n; 2697 IDXINCR(eq->pidx, n, eq->sidx); 2698 2699 if (total_available_tx_desc(eq) < eq->sidx / 4 && 2700 atomic_cmpset_int(&eq->equiq, 0, 1)) { 2701 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 2702 F_FW_WR_EQUEQ); 2703 eq->equeqidx = eq->pidx; 2704 } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { 2705 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); 2706 eq->equeqidx = eq->pidx; 2707 } 2708 2709 if (dbdiff >= 16 && remaining >= 4) { 2710 ring_eq_db(sc, eq, dbdiff); 2711 available += reclaim_tx_descs(txq, 4 * dbdiff); 2712 dbdiff = 0; 2713 } 2714 2715 cidx = next_cidx; 2716 } 2717 if (dbdiff != 0) { 2718 ring_eq_db(sc, eq, dbdiff); 2719 reclaim_tx_descs(txq, 32); 2720 } 2721 done: 2722 TXQ_UNLOCK(txq); 2723 2724 return (total); 2725 } 2726 2727 static inline void 2728 init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, 2729 int qsize) 2730 { 2731 2732 KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, 2733 ("%s: bad tmr_idx %d", __func__, tmr_idx)); 2734 KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */ 2735 ("%s: bad pktc_idx %d", __func__, pktc_idx)); 2736 2737 iq->flags = 0; 2738 iq->adapter = sc; 2739 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx); 2740 iq->intr_pktc_idx = SGE_NCOUNTERS - 1; 2741 if (pktc_idx >= 0) { 2742 iq->intr_params |= F_QINTR_CNT_EN; 2743 iq->intr_pktc_idx = pktc_idx; 2744 } 2745 iq->qsize = roundup2(qsize, 16); /* See FW_IQ_CMD/iqsize */ 2746 iq->sidx = iq->qsize - sc->params.sge.spg_len / IQ_ESIZE; 2747 } 2748 2749 static inline void 2750 init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, char *name) 2751 { 2752 2753 fl->qsize = qsize; 2754 fl->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; 2755 strlcpy(fl->lockname, name, sizeof(fl->lockname)); 2756 if (sc->flags & BUF_PACKING_OK && 2757 ((!is_t4(sc) && buffer_packing) || /* T5+: enabled unless 0 */ 2758 (is_t4(sc) && buffer_packing == 1)))/* T4: disabled unless 1 */ 2759 fl->flags |= FL_BUF_PACKING; 2760 find_best_refill_source(sc, fl, maxp); 2761 find_safe_refill_source(sc, fl); 2762 } 2763 2764 static inline void 2765 init_eq(struct adapter *sc, struct sge_eq *eq, int eqtype, int qsize, 2766 uint8_t tx_chan, uint16_t iqid, char *name) 2767 { 2768 KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype)); 2769 2770 eq->flags = eqtype & EQ_TYPEMASK; 2771 eq->tx_chan = tx_chan; 2772 eq->iqid = iqid; 2773 eq->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; 2774 strlcpy(eq->lockname, name, sizeof(eq->lockname)); 2775 } 2776 2777 static int 2778 alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag, 2779 bus_dmamap_t *map, bus_addr_t *pa, void **va) 2780 { 2781 int rc; 2782 2783 rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR, 2784 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag); 2785 if (rc != 0) { 2786 device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc); 2787 goto done; 2788 } 2789 2790 rc = bus_dmamem_alloc(*tag, va, 2791 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map); 2792 if (rc != 0) { 2793 device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc); 2794 goto done; 2795 } 2796 2797 rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0); 2798 if (rc != 0) { 2799 device_printf(sc->dev, "cannot load DMA map: %d\n", rc); 2800 goto done; 2801 } 2802 done: 2803 if (rc) 2804 free_ring(sc, *tag, *map, *pa, *va); 2805 2806 return (rc); 2807 } 2808 2809 static int 2810 free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, 2811 bus_addr_t pa, void *va) 2812 { 2813 if (pa) 2814 bus_dmamap_unload(tag, map); 2815 if (va) 2816 bus_dmamem_free(tag, va, map); 2817 if (tag) 2818 bus_dma_tag_destroy(tag); 2819 2820 return (0); 2821 } 2822 2823 /* 2824 * Allocates the ring for an ingress queue and an optional freelist. If the 2825 * freelist is specified it will be allocated and then associated with the 2826 * ingress queue. 2827 * 2828 * Returns errno on failure. Resources allocated up to that point may still be 2829 * allocated. Caller is responsible for cleanup in case this function fails. 2830 * 2831 * If the ingress queue will take interrupts directly then the intr_idx 2832 * specifies the vector, starting from 0. -1 means the interrupts for this 2833 * queue should be forwarded to the fwq. 2834 */ 2835 static int 2836 alloc_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl, 2837 int intr_idx, int cong) 2838 { 2839 int rc, i, cntxt_id; 2840 size_t len; 2841 struct fw_iq_cmd c; 2842 struct port_info *pi = vi->pi; 2843 struct adapter *sc = iq->adapter; 2844 struct sge_params *sp = &sc->params.sge; 2845 __be32 v = 0; 2846 2847 len = iq->qsize * IQ_ESIZE; 2848 rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, 2849 (void **)&iq->desc); 2850 if (rc != 0) 2851 return (rc); 2852 2853 bzero(&c, sizeof(c)); 2854 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | 2855 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | 2856 V_FW_IQ_CMD_VFN(0)); 2857 2858 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | 2859 FW_LEN16(c)); 2860 2861 /* Special handling for firmware event queue */ 2862 if (iq == &sc->sge.fwq) 2863 v |= F_FW_IQ_CMD_IQASYNCH; 2864 2865 if (intr_idx < 0) { 2866 /* Forwarded interrupts, all headed to fwq */ 2867 v |= F_FW_IQ_CMD_IQANDST; 2868 v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.fwq.cntxt_id); 2869 } else { 2870 KASSERT(intr_idx < sc->intr_count, 2871 ("%s: invalid direct intr_idx %d", __func__, intr_idx)); 2872 v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); 2873 } 2874 2875 c.type_to_iqandstindex = htobe32(v | 2876 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | 2877 V_FW_IQ_CMD_VIID(vi->viid) | 2878 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); 2879 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | 2880 F_FW_IQ_CMD_IQGTSMODE | 2881 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | 2882 V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4)); 2883 c.iqsize = htobe16(iq->qsize); 2884 c.iqaddr = htobe64(iq->ba); 2885 if (cong >= 0) 2886 c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN); 2887 2888 if (fl) { 2889 mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); 2890 2891 len = fl->qsize * EQ_ESIZE; 2892 rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, 2893 &fl->ba, (void **)&fl->desc); 2894 if (rc) 2895 return (rc); 2896 2897 /* Allocate space for one software descriptor per buffer. */ 2898 rc = alloc_fl_sdesc(fl); 2899 if (rc != 0) { 2900 device_printf(sc->dev, 2901 "failed to setup fl software descriptors: %d\n", 2902 rc); 2903 return (rc); 2904 } 2905 2906 if (fl->flags & FL_BUF_PACKING) { 2907 fl->lowat = roundup2(sp->fl_starve_threshold2, 8); 2908 fl->buf_boundary = sp->pack_boundary; 2909 } else { 2910 fl->lowat = roundup2(sp->fl_starve_threshold, 8); 2911 fl->buf_boundary = 16; 2912 } 2913 if (fl_pad && fl->buf_boundary < sp->pad_boundary) 2914 fl->buf_boundary = sp->pad_boundary; 2915 2916 c.iqns_to_fl0congen |= 2917 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | 2918 F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | 2919 (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) | 2920 (fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN : 2921 0)); 2922 if (cong >= 0) { 2923 c.iqns_to_fl0congen |= 2924 htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) | 2925 F_FW_IQ_CMD_FL0CONGCIF | 2926 F_FW_IQ_CMD_FL0CONGEN); 2927 } 2928 c.fl0dcaen_to_fl0cidxfthresh = 2929 htobe16(V_FW_IQ_CMD_FL0FBMIN(chip_id(sc) <= CHELSIO_T5 ? 2930 X_FETCHBURSTMIN_128B : X_FETCHBURSTMIN_64B) | 2931 V_FW_IQ_CMD_FL0FBMAX(chip_id(sc) <= CHELSIO_T5 ? 2932 X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B)); 2933 c.fl0size = htobe16(fl->qsize); 2934 c.fl0addr = htobe64(fl->ba); 2935 } 2936 2937 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 2938 if (rc != 0) { 2939 device_printf(sc->dev, 2940 "failed to create ingress queue: %d\n", rc); 2941 return (rc); 2942 } 2943 2944 iq->cidx = 0; 2945 iq->gen = F_RSPD_GEN; 2946 iq->intr_next = iq->intr_params; 2947 iq->cntxt_id = be16toh(c.iqid); 2948 iq->abs_id = be16toh(c.physiqid); 2949 iq->flags |= IQ_ALLOCATED; 2950 2951 cntxt_id = iq->cntxt_id - sc->sge.iq_start; 2952 if (cntxt_id >= sc->sge.niq) { 2953 panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, 2954 cntxt_id, sc->sge.niq - 1); 2955 } 2956 sc->sge.iqmap[cntxt_id] = iq; 2957 2958 if (fl) { 2959 u_int qid; 2960 2961 iq->flags |= IQ_HAS_FL; 2962 fl->cntxt_id = be16toh(c.fl0id); 2963 fl->pidx = fl->cidx = 0; 2964 2965 cntxt_id = fl->cntxt_id - sc->sge.eq_start; 2966 if (cntxt_id >= sc->sge.neq) { 2967 panic("%s: fl->cntxt_id (%d) more than the max (%d)", 2968 __func__, cntxt_id, sc->sge.neq - 1); 2969 } 2970 sc->sge.eqmap[cntxt_id] = (void *)fl; 2971 2972 qid = fl->cntxt_id; 2973 if (isset(&sc->doorbells, DOORBELL_UDB)) { 2974 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 2975 uint32_t mask = (1 << s_qpp) - 1; 2976 volatile uint8_t *udb; 2977 2978 udb = sc->udbs_base + UDBS_DB_OFFSET; 2979 udb += (qid >> s_qpp) << PAGE_SHIFT; 2980 qid &= mask; 2981 if (qid < PAGE_SIZE / UDBS_SEG_SIZE) { 2982 udb += qid << UDBS_SEG_SHIFT; 2983 qid = 0; 2984 } 2985 fl->udb = (volatile void *)udb; 2986 } 2987 fl->dbval = V_QID(qid) | sc->chip_params->sge_fl_db; 2988 2989 FL_LOCK(fl); 2990 /* Enough to make sure the SGE doesn't think it's starved */ 2991 refill_fl(sc, fl, fl->lowat); 2992 FL_UNLOCK(fl); 2993 } 2994 2995 if (chip_id(sc) >= CHELSIO_T5 && !(sc->flags & IS_VF) && cong >= 0) { 2996 uint32_t param, val; 2997 2998 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 2999 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | 3000 V_FW_PARAMS_PARAM_YZ(iq->cntxt_id); 3001 if (cong == 0) 3002 val = 1 << 19; 3003 else { 3004 val = 2 << 19; 3005 for (i = 0; i < 4; i++) { 3006 if (cong & (1 << i)) 3007 val |= 1 << (i << 2); 3008 } 3009 } 3010 3011 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); 3012 if (rc != 0) { 3013 /* report error but carry on */ 3014 device_printf(sc->dev, 3015 "failed to set congestion manager context for " 3016 "ingress queue %d: %d\n", iq->cntxt_id, rc); 3017 } 3018 } 3019 3020 /* Enable IQ interrupts */ 3021 atomic_store_rel_int(&iq->state, IQS_IDLE); 3022 t4_write_reg(sc, sc->sge_gts_reg, V_SEINTARM(iq->intr_params) | 3023 V_INGRESSQID(iq->cntxt_id)); 3024 3025 return (0); 3026 } 3027 3028 static int 3029 free_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl) 3030 { 3031 int rc; 3032 struct adapter *sc = iq->adapter; 3033 device_t dev; 3034 3035 if (sc == NULL) 3036 return (0); /* nothing to do */ 3037 3038 dev = vi ? vi->dev : sc->dev; 3039 3040 if (iq->flags & IQ_ALLOCATED) { 3041 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, 3042 FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id, 3043 fl ? fl->cntxt_id : 0xffff, 0xffff); 3044 if (rc != 0) { 3045 device_printf(dev, 3046 "failed to free queue %p: %d\n", iq, rc); 3047 return (rc); 3048 } 3049 iq->flags &= ~IQ_ALLOCATED; 3050 } 3051 3052 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc); 3053 3054 bzero(iq, sizeof(*iq)); 3055 3056 if (fl) { 3057 free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, 3058 fl->desc); 3059 3060 if (fl->sdesc) 3061 free_fl_sdesc(sc, fl); 3062 3063 if (mtx_initialized(&fl->fl_lock)) 3064 mtx_destroy(&fl->fl_lock); 3065 3066 bzero(fl, sizeof(*fl)); 3067 } 3068 3069 return (0); 3070 } 3071 3072 static void 3073 add_iq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 3074 struct sge_iq *iq) 3075 { 3076 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3077 3078 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &iq->ba, 3079 "bus address of descriptor ring"); 3080 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 3081 iq->qsize * IQ_ESIZE, "descriptor ring size in bytes"); 3082 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "abs_id", 3083 CTLTYPE_INT | CTLFLAG_RD, &iq->abs_id, 0, sysctl_uint16, "I", 3084 "absolute id of the queue"); 3085 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 3086 CTLTYPE_INT | CTLFLAG_RD, &iq->cntxt_id, 0, sysctl_uint16, "I", 3087 "SGE context id of the queue"); 3088 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", 3089 CTLTYPE_INT | CTLFLAG_RD, &iq->cidx, 0, sysctl_uint16, "I", 3090 "consumer index"); 3091 } 3092 3093 static void 3094 add_fl_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 3095 struct sysctl_oid *oid, struct sge_fl *fl) 3096 { 3097 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3098 3099 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL, 3100 "freelist"); 3101 children = SYSCTL_CHILDREN(oid); 3102 3103 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, 3104 &fl->ba, "bus address of descriptor ring"); 3105 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 3106 fl->sidx * EQ_ESIZE + sc->params.sge.spg_len, 3107 "desc ring size in bytes"); 3108 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 3109 CTLTYPE_INT | CTLFLAG_RD, &fl->cntxt_id, 0, sysctl_uint16, "I", 3110 "SGE context id of the freelist"); 3111 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "padding", CTLFLAG_RD, NULL, 3112 fl_pad ? 1 : 0, "padding enabled"); 3113 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "packing", CTLFLAG_RD, NULL, 3114 fl->flags & FL_BUF_PACKING ? 1 : 0, "packing enabled"); 3115 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx, 3116 0, "consumer index"); 3117 if (fl->flags & FL_BUF_PACKING) { 3118 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_offset", 3119 CTLFLAG_RD, &fl->rx_offset, 0, "packing rx offset"); 3120 } 3121 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx, 3122 0, "producer index"); 3123 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_allocated", 3124 CTLFLAG_RD, &fl->mbuf_allocated, "# of mbuf allocated"); 3125 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_inlined", 3126 CTLFLAG_RD, &fl->mbuf_inlined, "# of mbuf inlined in clusters"); 3127 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated", 3128 CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated"); 3129 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled", 3130 CTLFLAG_RD, &fl->cl_recycled, "# of clusters recycled"); 3131 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_fast_recycled", 3132 CTLFLAG_RD, &fl->cl_fast_recycled, "# of clusters recycled (fast)"); 3133 } 3134 3135 static int 3136 alloc_fwq(struct adapter *sc) 3137 { 3138 int rc, intr_idx; 3139 struct sge_iq *fwq = &sc->sge.fwq; 3140 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); 3141 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3142 3143 init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE); 3144 if (sc->flags & IS_VF) 3145 intr_idx = 0; 3146 else 3147 intr_idx = sc->intr_count > 1 ? 1 : 0; 3148 rc = alloc_iq_fl(&sc->port[0]->vi[0], fwq, NULL, intr_idx, -1); 3149 if (rc != 0) { 3150 device_printf(sc->dev, 3151 "failed to create firmware event queue: %d\n", rc); 3152 return (rc); 3153 } 3154 3155 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD, 3156 NULL, "firmware event queue"); 3157 add_iq_sysctls(&sc->ctx, oid, fwq); 3158 3159 return (0); 3160 } 3161 3162 static int 3163 free_fwq(struct adapter *sc) 3164 { 3165 return free_iq_fl(NULL, &sc->sge.fwq, NULL); 3166 } 3167 3168 static int 3169 alloc_mgmtq(struct adapter *sc) 3170 { 3171 int rc; 3172 struct sge_wrq *mgmtq = &sc->sge.mgmtq; 3173 char name[16]; 3174 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); 3175 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3176 3177 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "mgmtq", CTLFLAG_RD, 3178 NULL, "management queue"); 3179 3180 snprintf(name, sizeof(name), "%s mgmtq", device_get_nameunit(sc->dev)); 3181 init_eq(sc, &mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan, 3182 sc->sge.fwq.cntxt_id, name); 3183 rc = alloc_wrq(sc, NULL, mgmtq, oid); 3184 if (rc != 0) { 3185 device_printf(sc->dev, 3186 "failed to create management queue: %d\n", rc); 3187 return (rc); 3188 } 3189 3190 return (0); 3191 } 3192 3193 static int 3194 free_mgmtq(struct adapter *sc) 3195 { 3196 3197 return free_wrq(sc, &sc->sge.mgmtq); 3198 } 3199 3200 int 3201 tnl_cong(struct port_info *pi, int drop) 3202 { 3203 3204 if (drop == -1) 3205 return (-1); 3206 else if (drop == 1) 3207 return (0); 3208 else 3209 return (pi->rx_e_chan_map); 3210 } 3211 3212 static int 3213 alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int intr_idx, int idx, 3214 struct sysctl_oid *oid) 3215 { 3216 int rc; 3217 struct adapter *sc = vi->pi->adapter; 3218 struct sysctl_oid_list *children; 3219 char name[16]; 3220 3221 rc = alloc_iq_fl(vi, &rxq->iq, &rxq->fl, intr_idx, 3222 tnl_cong(vi->pi, cong_drop)); 3223 if (rc != 0) 3224 return (rc); 3225 3226 if (idx == 0) 3227 sc->sge.iq_base = rxq->iq.abs_id - rxq->iq.cntxt_id; 3228 else 3229 KASSERT(rxq->iq.cntxt_id + sc->sge.iq_base == rxq->iq.abs_id, 3230 ("iq_base mismatch")); 3231 KASSERT(sc->sge.iq_base == 0 || sc->flags & IS_VF, 3232 ("PF with non-zero iq_base")); 3233 3234 /* 3235 * The freelist is just barely above the starvation threshold right now, 3236 * fill it up a bit more. 3237 */ 3238 FL_LOCK(&rxq->fl); 3239 refill_fl(sc, &rxq->fl, 128); 3240 FL_UNLOCK(&rxq->fl); 3241 3242 #if defined(INET) || defined(INET6) 3243 rc = tcp_lro_init_args(&rxq->lro, vi->ifp, lro_entries, lro_mbufs); 3244 if (rc != 0) 3245 return (rc); 3246 MPASS(rxq->lro.ifp == vi->ifp); /* also indicates LRO init'ed */ 3247 3248 if (vi->ifp->if_capenable & IFCAP_LRO) 3249 rxq->iq.flags |= IQ_LRO_ENABLED; 3250 #endif 3251 rxq->ifp = vi->ifp; 3252 3253 children = SYSCTL_CHILDREN(oid); 3254 3255 snprintf(name, sizeof(name), "%d", idx); 3256 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 3257 NULL, "rx queue"); 3258 children = SYSCTL_CHILDREN(oid); 3259 3260 add_iq_sysctls(&vi->ctx, oid, &rxq->iq); 3261 #if defined(INET) || defined(INET6) 3262 SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, 3263 &rxq->lro.lro_queued, 0, NULL); 3264 SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, 3265 &rxq->lro.lro_flushed, 0, NULL); 3266 #endif 3267 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, 3268 &rxq->rxcsum, "# of times hardware assisted with checksum"); 3269 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_extraction", 3270 CTLFLAG_RD, &rxq->vlan_extraction, 3271 "# of times hardware extracted 802.1Q tag"); 3272 3273 add_fl_sysctls(sc, &vi->ctx, oid, &rxq->fl); 3274 3275 return (rc); 3276 } 3277 3278 static int 3279 free_rxq(struct vi_info *vi, struct sge_rxq *rxq) 3280 { 3281 int rc; 3282 3283 #if defined(INET) || defined(INET6) 3284 if (rxq->lro.ifp) { 3285 tcp_lro_free(&rxq->lro); 3286 rxq->lro.ifp = NULL; 3287 } 3288 #endif 3289 3290 rc = free_iq_fl(vi, &rxq->iq, &rxq->fl); 3291 if (rc == 0) 3292 bzero(rxq, sizeof(*rxq)); 3293 3294 return (rc); 3295 } 3296 3297 #ifdef TCP_OFFLOAD 3298 static int 3299 alloc_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq, 3300 int intr_idx, int idx, struct sysctl_oid *oid) 3301 { 3302 struct port_info *pi = vi->pi; 3303 int rc; 3304 struct sysctl_oid_list *children; 3305 char name[16]; 3306 3307 rc = alloc_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx, 0); 3308 if (rc != 0) 3309 return (rc); 3310 3311 children = SYSCTL_CHILDREN(oid); 3312 3313 snprintf(name, sizeof(name), "%d", idx); 3314 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 3315 NULL, "rx queue"); 3316 add_iq_sysctls(&vi->ctx, oid, &ofld_rxq->iq); 3317 add_fl_sysctls(pi->adapter, &vi->ctx, oid, &ofld_rxq->fl); 3318 3319 return (rc); 3320 } 3321 3322 static int 3323 free_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq) 3324 { 3325 int rc; 3326 3327 rc = free_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl); 3328 if (rc == 0) 3329 bzero(ofld_rxq, sizeof(*ofld_rxq)); 3330 3331 return (rc); 3332 } 3333 #endif 3334 3335 #ifdef DEV_NETMAP 3336 static int 3337 alloc_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int intr_idx, 3338 int idx, struct sysctl_oid *oid) 3339 { 3340 int rc; 3341 struct sysctl_oid_list *children; 3342 struct sysctl_ctx_list *ctx; 3343 char name[16]; 3344 size_t len; 3345 struct adapter *sc = vi->pi->adapter; 3346 struct netmap_adapter *na = NA(vi->ifp); 3347 3348 MPASS(na != NULL); 3349 3350 len = vi->qsize_rxq * IQ_ESIZE; 3351 rc = alloc_ring(sc, len, &nm_rxq->iq_desc_tag, &nm_rxq->iq_desc_map, 3352 &nm_rxq->iq_ba, (void **)&nm_rxq->iq_desc); 3353 if (rc != 0) 3354 return (rc); 3355 3356 len = na->num_rx_desc * EQ_ESIZE + sc->params.sge.spg_len; 3357 rc = alloc_ring(sc, len, &nm_rxq->fl_desc_tag, &nm_rxq->fl_desc_map, 3358 &nm_rxq->fl_ba, (void **)&nm_rxq->fl_desc); 3359 if (rc != 0) 3360 return (rc); 3361 3362 nm_rxq->vi = vi; 3363 nm_rxq->nid = idx; 3364 nm_rxq->iq_cidx = 0; 3365 nm_rxq->iq_sidx = vi->qsize_rxq - sc->params.sge.spg_len / IQ_ESIZE; 3366 nm_rxq->iq_gen = F_RSPD_GEN; 3367 nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0; 3368 nm_rxq->fl_sidx = na->num_rx_desc; 3369 nm_rxq->intr_idx = intr_idx; 3370 nm_rxq->iq_cntxt_id = INVALID_NM_RXQ_CNTXT_ID; 3371 3372 ctx = &vi->ctx; 3373 children = SYSCTL_CHILDREN(oid); 3374 3375 snprintf(name, sizeof(name), "%d", idx); 3376 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, 3377 "rx queue"); 3378 children = SYSCTL_CHILDREN(oid); 3379 3380 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "abs_id", 3381 CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_abs_id, 0, sysctl_uint16, 3382 "I", "absolute id of the queue"); 3383 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 3384 CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_cntxt_id, 0, sysctl_uint16, 3385 "I", "SGE context id of the queue"); 3386 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", 3387 CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_cidx, 0, sysctl_uint16, "I", 3388 "consumer index"); 3389 3390 children = SYSCTL_CHILDREN(oid); 3391 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL, 3392 "freelist"); 3393 children = SYSCTL_CHILDREN(oid); 3394 3395 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 3396 CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->fl_cntxt_id, 0, sysctl_uint16, 3397 "I", "SGE context id of the freelist"); 3398 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, 3399 &nm_rxq->fl_cidx, 0, "consumer index"); 3400 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, 3401 &nm_rxq->fl_pidx, 0, "producer index"); 3402 3403 return (rc); 3404 } 3405 3406 3407 static int 3408 free_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq) 3409 { 3410 struct adapter *sc = vi->pi->adapter; 3411 3412 if (vi->flags & VI_INIT_DONE) 3413 MPASS(nm_rxq->iq_cntxt_id == INVALID_NM_RXQ_CNTXT_ID); 3414 else 3415 MPASS(nm_rxq->iq_cntxt_id == 0); 3416 3417 free_ring(sc, nm_rxq->iq_desc_tag, nm_rxq->iq_desc_map, nm_rxq->iq_ba, 3418 nm_rxq->iq_desc); 3419 free_ring(sc, nm_rxq->fl_desc_tag, nm_rxq->fl_desc_map, nm_rxq->fl_ba, 3420 nm_rxq->fl_desc); 3421 3422 return (0); 3423 } 3424 3425 static int 3426 alloc_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq, int iqidx, int idx, 3427 struct sysctl_oid *oid) 3428 { 3429 int rc; 3430 size_t len; 3431 struct port_info *pi = vi->pi; 3432 struct adapter *sc = pi->adapter; 3433 struct netmap_adapter *na = NA(vi->ifp); 3434 char name[16]; 3435 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3436 3437 len = na->num_tx_desc * EQ_ESIZE + sc->params.sge.spg_len; 3438 rc = alloc_ring(sc, len, &nm_txq->desc_tag, &nm_txq->desc_map, 3439 &nm_txq->ba, (void **)&nm_txq->desc); 3440 if (rc) 3441 return (rc); 3442 3443 nm_txq->pidx = nm_txq->cidx = 0; 3444 nm_txq->sidx = na->num_tx_desc; 3445 nm_txq->nid = idx; 3446 nm_txq->iqidx = iqidx; 3447 nm_txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 3448 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(G_FW_VIID_PFN(vi->viid)) | 3449 V_TXPKT_VF(G_FW_VIID_VIN(vi->viid)) | 3450 V_TXPKT_VF_VLD(G_FW_VIID_VIVLD(vi->viid))); 3451 nm_txq->cntxt_id = INVALID_NM_TXQ_CNTXT_ID; 3452 3453 snprintf(name, sizeof(name), "%d", idx); 3454 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 3455 NULL, "netmap tx queue"); 3456 children = SYSCTL_CHILDREN(oid); 3457 3458 SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 3459 &nm_txq->cntxt_id, 0, "SGE context id of the queue"); 3460 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", 3461 CTLTYPE_INT | CTLFLAG_RD, &nm_txq->cidx, 0, sysctl_uint16, "I", 3462 "consumer index"); 3463 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", 3464 CTLTYPE_INT | CTLFLAG_RD, &nm_txq->pidx, 0, sysctl_uint16, "I", 3465 "producer index"); 3466 3467 return (rc); 3468 } 3469 3470 static int 3471 free_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq) 3472 { 3473 struct adapter *sc = vi->pi->adapter; 3474 3475 if (vi->flags & VI_INIT_DONE) 3476 MPASS(nm_txq->cntxt_id == INVALID_NM_TXQ_CNTXT_ID); 3477 else 3478 MPASS(nm_txq->cntxt_id == 0); 3479 3480 free_ring(sc, nm_txq->desc_tag, nm_txq->desc_map, nm_txq->ba, 3481 nm_txq->desc); 3482 3483 return (0); 3484 } 3485 #endif 3486 3487 static int 3488 ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq) 3489 { 3490 int rc, cntxt_id; 3491 struct fw_eq_ctrl_cmd c; 3492 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 3493 3494 bzero(&c, sizeof(c)); 3495 3496 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | 3497 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) | 3498 V_FW_EQ_CTRL_CMD_VFN(0)); 3499 c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC | 3500 F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); 3501 c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); 3502 c.physeqid_pkd = htobe32(0); 3503 c.fetchszm_to_iqid = 3504 htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 3505 V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) | 3506 F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); 3507 c.dcaen_to_eqsize = 3508 htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 3509 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 3510 V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | 3511 V_FW_EQ_CTRL_CMD_EQSIZE(qsize)); 3512 c.eqaddr = htobe64(eq->ba); 3513 3514 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 3515 if (rc != 0) { 3516 device_printf(sc->dev, 3517 "failed to create control queue %d: %d\n", eq->tx_chan, rc); 3518 return (rc); 3519 } 3520 eq->flags |= EQ_ALLOCATED; 3521 3522 eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid)); 3523 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 3524 if (cntxt_id >= sc->sge.neq) 3525 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 3526 cntxt_id, sc->sge.neq - 1); 3527 sc->sge.eqmap[cntxt_id] = eq; 3528 3529 return (rc); 3530 } 3531 3532 static int 3533 eth_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 3534 { 3535 int rc, cntxt_id; 3536 struct fw_eq_eth_cmd c; 3537 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 3538 3539 bzero(&c, sizeof(c)); 3540 3541 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | 3542 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | 3543 V_FW_EQ_ETH_CMD_VFN(0)); 3544 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | 3545 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); 3546 c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | 3547 F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid)); 3548 c.fetchszm_to_iqid = 3549 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | 3550 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | 3551 V_FW_EQ_ETH_CMD_IQID(eq->iqid)); 3552 c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 3553 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 3554 V_FW_EQ_ETH_CMD_EQSIZE(qsize)); 3555 c.eqaddr = htobe64(eq->ba); 3556 3557 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 3558 if (rc != 0) { 3559 device_printf(vi->dev, 3560 "failed to create Ethernet egress queue: %d\n", rc); 3561 return (rc); 3562 } 3563 eq->flags |= EQ_ALLOCATED; 3564 3565 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); 3566 eq->abs_id = G_FW_EQ_ETH_CMD_PHYSEQID(be32toh(c.physeqid_pkd)); 3567 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 3568 if (cntxt_id >= sc->sge.neq) 3569 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 3570 cntxt_id, sc->sge.neq - 1); 3571 sc->sge.eqmap[cntxt_id] = eq; 3572 3573 return (rc); 3574 } 3575 3576 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 3577 static int 3578 ofld_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 3579 { 3580 int rc, cntxt_id; 3581 struct fw_eq_ofld_cmd c; 3582 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 3583 3584 bzero(&c, sizeof(c)); 3585 3586 c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST | 3587 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) | 3588 V_FW_EQ_OFLD_CMD_VFN(0)); 3589 c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC | 3590 F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c)); 3591 c.fetchszm_to_iqid = 3592 htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | 3593 V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) | 3594 F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid)); 3595 c.dcaen_to_eqsize = 3596 htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 3597 V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 3598 V_FW_EQ_OFLD_CMD_EQSIZE(qsize)); 3599 c.eqaddr = htobe64(eq->ba); 3600 3601 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 3602 if (rc != 0) { 3603 device_printf(vi->dev, 3604 "failed to create egress queue for TCP offload: %d\n", rc); 3605 return (rc); 3606 } 3607 eq->flags |= EQ_ALLOCATED; 3608 3609 eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd)); 3610 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 3611 if (cntxt_id >= sc->sge.neq) 3612 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 3613 cntxt_id, sc->sge.neq - 1); 3614 sc->sge.eqmap[cntxt_id] = eq; 3615 3616 return (rc); 3617 } 3618 #endif 3619 3620 static int 3621 alloc_eq(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 3622 { 3623 int rc, qsize; 3624 size_t len; 3625 3626 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); 3627 3628 qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 3629 len = qsize * EQ_ESIZE; 3630 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, 3631 &eq->ba, (void **)&eq->desc); 3632 if (rc) 3633 return (rc); 3634 3635 eq->pidx = eq->cidx = 0; 3636 eq->equeqidx = eq->dbidx = 0; 3637 eq->doorbells = sc->doorbells; 3638 3639 switch (eq->flags & EQ_TYPEMASK) { 3640 case EQ_CTRL: 3641 rc = ctrl_eq_alloc(sc, eq); 3642 break; 3643 3644 case EQ_ETH: 3645 rc = eth_eq_alloc(sc, vi, eq); 3646 break; 3647 3648 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 3649 case EQ_OFLD: 3650 rc = ofld_eq_alloc(sc, vi, eq); 3651 break; 3652 #endif 3653 3654 default: 3655 panic("%s: invalid eq type %d.", __func__, 3656 eq->flags & EQ_TYPEMASK); 3657 } 3658 if (rc != 0) { 3659 device_printf(sc->dev, 3660 "failed to allocate egress queue(%d): %d\n", 3661 eq->flags & EQ_TYPEMASK, rc); 3662 } 3663 3664 if (isset(&eq->doorbells, DOORBELL_UDB) || 3665 isset(&eq->doorbells, DOORBELL_UDBWC) || 3666 isset(&eq->doorbells, DOORBELL_WCWR)) { 3667 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 3668 uint32_t mask = (1 << s_qpp) - 1; 3669 volatile uint8_t *udb; 3670 3671 udb = sc->udbs_base + UDBS_DB_OFFSET; 3672 udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT; /* pg offset */ 3673 eq->udb_qid = eq->cntxt_id & mask; /* id in page */ 3674 if (eq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE) 3675 clrbit(&eq->doorbells, DOORBELL_WCWR); 3676 else { 3677 udb += eq->udb_qid << UDBS_SEG_SHIFT; /* seg offset */ 3678 eq->udb_qid = 0; 3679 } 3680 eq->udb = (volatile void *)udb; 3681 } 3682 3683 return (rc); 3684 } 3685 3686 static int 3687 free_eq(struct adapter *sc, struct sge_eq *eq) 3688 { 3689 int rc; 3690 3691 if (eq->flags & EQ_ALLOCATED) { 3692 switch (eq->flags & EQ_TYPEMASK) { 3693 case EQ_CTRL: 3694 rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, 3695 eq->cntxt_id); 3696 break; 3697 3698 case EQ_ETH: 3699 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, 3700 eq->cntxt_id); 3701 break; 3702 3703 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 3704 case EQ_OFLD: 3705 rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0, 3706 eq->cntxt_id); 3707 break; 3708 #endif 3709 3710 default: 3711 panic("%s: invalid eq type %d.", __func__, 3712 eq->flags & EQ_TYPEMASK); 3713 } 3714 if (rc != 0) { 3715 device_printf(sc->dev, 3716 "failed to free egress queue (%d): %d\n", 3717 eq->flags & EQ_TYPEMASK, rc); 3718 return (rc); 3719 } 3720 eq->flags &= ~EQ_ALLOCATED; 3721 } 3722 3723 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); 3724 3725 if (mtx_initialized(&eq->eq_lock)) 3726 mtx_destroy(&eq->eq_lock); 3727 3728 bzero(eq, sizeof(*eq)); 3729 return (0); 3730 } 3731 3732 static int 3733 alloc_wrq(struct adapter *sc, struct vi_info *vi, struct sge_wrq *wrq, 3734 struct sysctl_oid *oid) 3735 { 3736 int rc; 3737 struct sysctl_ctx_list *ctx = vi ? &vi->ctx : &sc->ctx; 3738 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3739 3740 rc = alloc_eq(sc, vi, &wrq->eq); 3741 if (rc) 3742 return (rc); 3743 3744 wrq->adapter = sc; 3745 TASK_INIT(&wrq->wrq_tx_task, 0, wrq_tx_drain, wrq); 3746 TAILQ_INIT(&wrq->incomplete_wrs); 3747 STAILQ_INIT(&wrq->wr_list); 3748 wrq->nwr_pending = 0; 3749 wrq->ndesc_needed = 0; 3750 3751 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, 3752 &wrq->eq.ba, "bus address of descriptor ring"); 3753 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 3754 wrq->eq.sidx * EQ_ESIZE + sc->params.sge.spg_len, 3755 "desc ring size in bytes"); 3756 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 3757 &wrq->eq.cntxt_id, 0, "SGE context id of the queue"); 3758 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", 3759 CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I", 3760 "consumer index"); 3761 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx", 3762 CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I", 3763 "producer index"); 3764 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL, 3765 wrq->eq.sidx, "status page index"); 3766 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_direct", CTLFLAG_RD, 3767 &wrq->tx_wrs_direct, "# of work requests (direct)"); 3768 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_copied", CTLFLAG_RD, 3769 &wrq->tx_wrs_copied, "# of work requests (copied)"); 3770 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_sspace", CTLFLAG_RD, 3771 &wrq->tx_wrs_ss, "# of work requests (copied from scratch space)"); 3772 3773 return (rc); 3774 } 3775 3776 static int 3777 free_wrq(struct adapter *sc, struct sge_wrq *wrq) 3778 { 3779 int rc; 3780 3781 rc = free_eq(sc, &wrq->eq); 3782 if (rc) 3783 return (rc); 3784 3785 bzero(wrq, sizeof(*wrq)); 3786 return (0); 3787 } 3788 3789 static int 3790 alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx, 3791 struct sysctl_oid *oid) 3792 { 3793 int rc; 3794 struct port_info *pi = vi->pi; 3795 struct adapter *sc = pi->adapter; 3796 struct sge_eq *eq = &txq->eq; 3797 char name[16]; 3798 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3799 3800 rc = mp_ring_alloc(&txq->r, eq->sidx, txq, eth_tx, can_resume_eth_tx, 3801 M_CXGBE, M_WAITOK); 3802 if (rc != 0) { 3803 device_printf(sc->dev, "failed to allocate mp_ring: %d\n", rc); 3804 return (rc); 3805 } 3806 3807 rc = alloc_eq(sc, vi, eq); 3808 if (rc != 0) { 3809 mp_ring_free(txq->r); 3810 txq->r = NULL; 3811 return (rc); 3812 } 3813 3814 /* Can't fail after this point. */ 3815 3816 if (idx == 0) 3817 sc->sge.eq_base = eq->abs_id - eq->cntxt_id; 3818 else 3819 KASSERT(eq->cntxt_id + sc->sge.eq_base == eq->abs_id, 3820 ("eq_base mismatch")); 3821 KASSERT(sc->sge.eq_base == 0 || sc->flags & IS_VF, 3822 ("PF with non-zero eq_base")); 3823 3824 TASK_INIT(&txq->tx_reclaim_task, 0, tx_reclaim, eq); 3825 txq->ifp = vi->ifp; 3826 txq->gl = sglist_alloc(TX_SGL_SEGS, M_WAITOK); 3827 if (sc->flags & IS_VF) 3828 txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 3829 V_TXPKT_INTF(pi->tx_chan)); 3830 else 3831 txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 3832 V_TXPKT_INTF(pi->tx_chan) | 3833 V_TXPKT_PF(G_FW_VIID_PFN(vi->viid)) | 3834 V_TXPKT_VF(G_FW_VIID_VIN(vi->viid)) | 3835 V_TXPKT_VF_VLD(G_FW_VIID_VIVLD(vi->viid))); 3836 txq->tc_idx = -1; 3837 txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE, 3838 M_ZERO | M_WAITOK); 3839 3840 snprintf(name, sizeof(name), "%d", idx); 3841 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 3842 NULL, "tx queue"); 3843 children = SYSCTL_CHILDREN(oid); 3844 3845 SYSCTL_ADD_UAUTO(&vi->ctx, children, OID_AUTO, "ba", CTLFLAG_RD, 3846 &eq->ba, "bus address of descriptor ring"); 3847 SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 3848 eq->sidx * EQ_ESIZE + sc->params.sge.spg_len, 3849 "desc ring size in bytes"); 3850 SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD, 3851 &eq->abs_id, 0, "absolute id of the queue"); 3852 SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 3853 &eq->cntxt_id, 0, "SGE context id of the queue"); 3854 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", 3855 CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I", 3856 "consumer index"); 3857 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", 3858 CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I", 3859 "producer index"); 3860 SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL, 3861 eq->sidx, "status page index"); 3862 3863 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "tc", 3864 CTLTYPE_INT | CTLFLAG_RW, vi, idx, sysctl_tc, "I", 3865 "traffic class (-1 means none)"); 3866 3867 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, 3868 &txq->txcsum, "# of times hardware assisted with checksum"); 3869 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_insertion", 3870 CTLFLAG_RD, &txq->vlan_insertion, 3871 "# of times hardware inserted 802.1Q tag"); 3872 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, 3873 &txq->tso_wrs, "# of TSO work requests"); 3874 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, 3875 &txq->imm_wrs, "# of work requests with immediate data"); 3876 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, 3877 &txq->sgl_wrs, "# of work requests with direct SGL"); 3878 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, 3879 &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); 3880 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_wrs", 3881 CTLFLAG_RD, &txq->txpkts0_wrs, 3882 "# of txpkts (type 0) work requests"); 3883 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_wrs", 3884 CTLFLAG_RD, &txq->txpkts1_wrs, 3885 "# of txpkts (type 1) work requests"); 3886 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_pkts", 3887 CTLFLAG_RD, &txq->txpkts0_pkts, 3888 "# of frames tx'd using type0 txpkts work requests"); 3889 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_pkts", 3890 CTLFLAG_RD, &txq->txpkts1_pkts, 3891 "# of frames tx'd using type1 txpkts work requests"); 3892 3893 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_enqueues", 3894 CTLFLAG_RD, &txq->r->enqueues, 3895 "# of enqueues to the mp_ring for this queue"); 3896 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_drops", 3897 CTLFLAG_RD, &txq->r->drops, 3898 "# of drops in the mp_ring for this queue"); 3899 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_starts", 3900 CTLFLAG_RD, &txq->r->starts, 3901 "# of normal consumer starts in the mp_ring for this queue"); 3902 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_stalls", 3903 CTLFLAG_RD, &txq->r->stalls, 3904 "# of consumer stalls in the mp_ring for this queue"); 3905 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_restarts", 3906 CTLFLAG_RD, &txq->r->restarts, 3907 "# of consumer restarts in the mp_ring for this queue"); 3908 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_abdications", 3909 CTLFLAG_RD, &txq->r->abdications, 3910 "# of consumer abdications in the mp_ring for this queue"); 3911 3912 return (0); 3913 } 3914 3915 static int 3916 free_txq(struct vi_info *vi, struct sge_txq *txq) 3917 { 3918 int rc; 3919 struct adapter *sc = vi->pi->adapter; 3920 struct sge_eq *eq = &txq->eq; 3921 3922 rc = free_eq(sc, eq); 3923 if (rc) 3924 return (rc); 3925 3926 sglist_free(txq->gl); 3927 free(txq->sdesc, M_CXGBE); 3928 mp_ring_free(txq->r); 3929 3930 bzero(txq, sizeof(*txq)); 3931 return (0); 3932 } 3933 3934 static void 3935 oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) 3936 { 3937 bus_addr_t *ba = arg; 3938 3939 KASSERT(nseg == 1, 3940 ("%s meant for single segment mappings only.", __func__)); 3941 3942 *ba = error ? 0 : segs->ds_addr; 3943 } 3944 3945 static inline void 3946 ring_fl_db(struct adapter *sc, struct sge_fl *fl) 3947 { 3948 uint32_t n, v; 3949 3950 n = IDXDIFF(fl->pidx / 8, fl->dbidx, fl->sidx); 3951 MPASS(n > 0); 3952 3953 wmb(); 3954 v = fl->dbval | V_PIDX(n); 3955 if (fl->udb) 3956 *fl->udb = htole32(v); 3957 else 3958 t4_write_reg(sc, sc->sge_kdoorbell_reg, v); 3959 IDXINCR(fl->dbidx, n, fl->sidx); 3960 } 3961 3962 /* 3963 * Fills up the freelist by allocating up to 'n' buffers. Buffers that are 3964 * recycled do not count towards this allocation budget. 3965 * 3966 * Returns non-zero to indicate that this freelist should be added to the list 3967 * of starving freelists. 3968 */ 3969 static int 3970 refill_fl(struct adapter *sc, struct sge_fl *fl, int n) 3971 { 3972 __be64 *d; 3973 struct fl_sdesc *sd; 3974 uintptr_t pa; 3975 caddr_t cl; 3976 struct cluster_layout *cll; 3977 struct sw_zone_info *swz; 3978 struct cluster_metadata *clm; 3979 uint16_t max_pidx; 3980 uint16_t hw_cidx = fl->hw_cidx; /* stable snapshot */ 3981 3982 FL_LOCK_ASSERT_OWNED(fl); 3983 3984 /* 3985 * We always stop at the beginning of the hardware descriptor that's just 3986 * before the one with the hw cidx. This is to avoid hw pidx = hw cidx, 3987 * which would mean an empty freelist to the chip. 3988 */ 3989 max_pidx = __predict_false(hw_cidx == 0) ? fl->sidx - 1 : hw_cidx - 1; 3990 if (fl->pidx == max_pidx * 8) 3991 return (0); 3992 3993 d = &fl->desc[fl->pidx]; 3994 sd = &fl->sdesc[fl->pidx]; 3995 cll = &fl->cll_def; /* default layout */ 3996 swz = &sc->sge.sw_zone_info[cll->zidx]; 3997 3998 while (n > 0) { 3999 4000 if (sd->cl != NULL) { 4001 4002 if (sd->nmbuf == 0) { 4003 /* 4004 * Fast recycle without involving any atomics on 4005 * the cluster's metadata (if the cluster has 4006 * metadata). This happens when all frames 4007 * received in the cluster were small enough to 4008 * fit within a single mbuf each. 4009 */ 4010 fl->cl_fast_recycled++; 4011 #ifdef INVARIANTS 4012 clm = cl_metadata(sc, fl, &sd->cll, sd->cl); 4013 if (clm != NULL) 4014 MPASS(clm->refcount == 1); 4015 #endif 4016 goto recycled_fast; 4017 } 4018 4019 /* 4020 * Cluster is guaranteed to have metadata. Clusters 4021 * without metadata always take the fast recycle path 4022 * when they're recycled. 4023 */ 4024 clm = cl_metadata(sc, fl, &sd->cll, sd->cl); 4025 MPASS(clm != NULL); 4026 4027 if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { 4028 fl->cl_recycled++; 4029 counter_u64_add(extfree_rels, 1); 4030 goto recycled; 4031 } 4032 sd->cl = NULL; /* gave up my reference */ 4033 } 4034 MPASS(sd->cl == NULL); 4035 alloc: 4036 cl = uma_zalloc(swz->zone, M_NOWAIT); 4037 if (__predict_false(cl == NULL)) { 4038 if (cll == &fl->cll_alt || fl->cll_alt.zidx == -1 || 4039 fl->cll_def.zidx == fl->cll_alt.zidx) 4040 break; 4041 4042 /* fall back to the safe zone */ 4043 cll = &fl->cll_alt; 4044 swz = &sc->sge.sw_zone_info[cll->zidx]; 4045 goto alloc; 4046 } 4047 fl->cl_allocated++; 4048 n--; 4049 4050 pa = pmap_kextract((vm_offset_t)cl); 4051 pa += cll->region1; 4052 sd->cl = cl; 4053 sd->cll = *cll; 4054 *d = htobe64(pa | cll->hwidx); 4055 clm = cl_metadata(sc, fl, cll, cl); 4056 if (clm != NULL) { 4057 recycled: 4058 #ifdef INVARIANTS 4059 clm->sd = sd; 4060 #endif 4061 clm->refcount = 1; 4062 } 4063 sd->nmbuf = 0; 4064 recycled_fast: 4065 d++; 4066 sd++; 4067 if (__predict_false(++fl->pidx % 8 == 0)) { 4068 uint16_t pidx = fl->pidx / 8; 4069 4070 if (__predict_false(pidx == fl->sidx)) { 4071 fl->pidx = 0; 4072 pidx = 0; 4073 sd = fl->sdesc; 4074 d = fl->desc; 4075 } 4076 if (pidx == max_pidx) 4077 break; 4078 4079 if (IDXDIFF(pidx, fl->dbidx, fl->sidx) >= 4) 4080 ring_fl_db(sc, fl); 4081 } 4082 } 4083 4084 if (fl->pidx / 8 != fl->dbidx) 4085 ring_fl_db(sc, fl); 4086 4087 return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING)); 4088 } 4089 4090 /* 4091 * Attempt to refill all starving freelists. 4092 */ 4093 static void 4094 refill_sfl(void *arg) 4095 { 4096 struct adapter *sc = arg; 4097 struct sge_fl *fl, *fl_temp; 4098 4099 mtx_assert(&sc->sfl_lock, MA_OWNED); 4100 TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) { 4101 FL_LOCK(fl); 4102 refill_fl(sc, fl, 64); 4103 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) { 4104 TAILQ_REMOVE(&sc->sfl, fl, link); 4105 fl->flags &= ~FL_STARVING; 4106 } 4107 FL_UNLOCK(fl); 4108 } 4109 4110 if (!TAILQ_EMPTY(&sc->sfl)) 4111 callout_schedule(&sc->sfl_callout, hz / 5); 4112 } 4113 4114 static int 4115 alloc_fl_sdesc(struct sge_fl *fl) 4116 { 4117 4118 fl->sdesc = malloc(fl->sidx * 8 * sizeof(struct fl_sdesc), M_CXGBE, 4119 M_ZERO | M_WAITOK); 4120 4121 return (0); 4122 } 4123 4124 static void 4125 free_fl_sdesc(struct adapter *sc, struct sge_fl *fl) 4126 { 4127 struct fl_sdesc *sd; 4128 struct cluster_metadata *clm; 4129 struct cluster_layout *cll; 4130 int i; 4131 4132 sd = fl->sdesc; 4133 for (i = 0; i < fl->sidx * 8; i++, sd++) { 4134 if (sd->cl == NULL) 4135 continue; 4136 4137 cll = &sd->cll; 4138 clm = cl_metadata(sc, fl, cll, sd->cl); 4139 if (sd->nmbuf == 0) 4140 uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl); 4141 else if (clm && atomic_fetchadd_int(&clm->refcount, -1) == 1) { 4142 uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl); 4143 counter_u64_add(extfree_rels, 1); 4144 } 4145 sd->cl = NULL; 4146 } 4147 4148 free(fl->sdesc, M_CXGBE); 4149 fl->sdesc = NULL; 4150 } 4151 4152 static inline void 4153 get_pkt_gl(struct mbuf *m, struct sglist *gl) 4154 { 4155 int rc; 4156 4157 M_ASSERTPKTHDR(m); 4158 4159 sglist_reset(gl); 4160 rc = sglist_append_mbuf(gl, m); 4161 if (__predict_false(rc != 0)) { 4162 panic("%s: mbuf %p (%d segs) was vetted earlier but now fails " 4163 "with %d.", __func__, m, mbuf_nsegs(m), rc); 4164 } 4165 4166 KASSERT(gl->sg_nseg == mbuf_nsegs(m), 4167 ("%s: nsegs changed for mbuf %p from %d to %d", __func__, m, 4168 mbuf_nsegs(m), gl->sg_nseg)); 4169 KASSERT(gl->sg_nseg > 0 && 4170 gl->sg_nseg <= (needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS), 4171 ("%s: %d segments, should have been 1 <= nsegs <= %d", __func__, 4172 gl->sg_nseg, needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)); 4173 } 4174 4175 /* 4176 * len16 for a txpkt WR with a GL. Includes the firmware work request header. 4177 */ 4178 static inline u_int 4179 txpkt_len16(u_int nsegs, u_int tso) 4180 { 4181 u_int n; 4182 4183 MPASS(nsegs > 0); 4184 4185 nsegs--; /* first segment is part of ulptx_sgl */ 4186 n = sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_core) + 4187 sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 4188 if (tso) 4189 n += sizeof(struct cpl_tx_pkt_lso_core); 4190 4191 return (howmany(n, 16)); 4192 } 4193 4194 /* 4195 * len16 for a txpkt_vm WR with a GL. Includes the firmware work 4196 * request header. 4197 */ 4198 static inline u_int 4199 txpkt_vm_len16(u_int nsegs, u_int tso) 4200 { 4201 u_int n; 4202 4203 MPASS(nsegs > 0); 4204 4205 nsegs--; /* first segment is part of ulptx_sgl */ 4206 n = sizeof(struct fw_eth_tx_pkt_vm_wr) + 4207 sizeof(struct cpl_tx_pkt_core) + 4208 sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 4209 if (tso) 4210 n += sizeof(struct cpl_tx_pkt_lso_core); 4211 4212 return (howmany(n, 16)); 4213 } 4214 4215 /* 4216 * len16 for a txpkts type 0 WR with a GL. Does not include the firmware work 4217 * request header. 4218 */ 4219 static inline u_int 4220 txpkts0_len16(u_int nsegs) 4221 { 4222 u_int n; 4223 4224 MPASS(nsegs > 0); 4225 4226 nsegs--; /* first segment is part of ulptx_sgl */ 4227 n = sizeof(struct ulp_txpkt) + sizeof(struct ulptx_idata) + 4228 sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) + 4229 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 4230 4231 return (howmany(n, 16)); 4232 } 4233 4234 /* 4235 * len16 for a txpkts type 1 WR with a GL. Does not include the firmware work 4236 * request header. 4237 */ 4238 static inline u_int 4239 txpkts1_len16(void) 4240 { 4241 u_int n; 4242 4243 n = sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl); 4244 4245 return (howmany(n, 16)); 4246 } 4247 4248 static inline u_int 4249 imm_payload(u_int ndesc) 4250 { 4251 u_int n; 4252 4253 n = ndesc * EQ_ESIZE - sizeof(struct fw_eth_tx_pkt_wr) - 4254 sizeof(struct cpl_tx_pkt_core); 4255 4256 return (n); 4257 } 4258 4259 /* 4260 * Write a VM txpkt WR for this packet to the hardware descriptors, update the 4261 * software descriptor, and advance the pidx. It is guaranteed that enough 4262 * descriptors are available. 4263 * 4264 * The return value is the # of hardware descriptors used. 4265 */ 4266 static u_int 4267 write_txpkt_vm_wr(struct adapter *sc, struct sge_txq *txq, 4268 struct fw_eth_tx_pkt_vm_wr *wr, struct mbuf *m0, u_int available) 4269 { 4270 struct sge_eq *eq = &txq->eq; 4271 struct tx_sdesc *txsd; 4272 struct cpl_tx_pkt_core *cpl; 4273 uint32_t ctrl; /* used in many unrelated places */ 4274 uint64_t ctrl1; 4275 int csum_type, len16, ndesc, pktlen, nsegs; 4276 caddr_t dst; 4277 4278 TXQ_LOCK_ASSERT_OWNED(txq); 4279 M_ASSERTPKTHDR(m0); 4280 MPASS(available > 0 && available < eq->sidx); 4281 4282 len16 = mbuf_len16(m0); 4283 nsegs = mbuf_nsegs(m0); 4284 pktlen = m0->m_pkthdr.len; 4285 ctrl = sizeof(struct cpl_tx_pkt_core); 4286 if (needs_tso(m0)) 4287 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 4288 ndesc = howmany(len16, EQ_ESIZE / 16); 4289 MPASS(ndesc <= available); 4290 4291 /* Firmware work request header */ 4292 MPASS(wr == (void *)&eq->desc[eq->pidx]); 4293 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_VM_WR) | 4294 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 4295 4296 ctrl = V_FW_WR_LEN16(len16); 4297 wr->equiq_to_len16 = htobe32(ctrl); 4298 wr->r3[0] = 0; 4299 wr->r3[1] = 0; 4300 4301 /* 4302 * Copy over ethmacdst, ethmacsrc, ethtype, and vlantci. 4303 * vlantci is ignored unless the ethtype is 0x8100, so it's 4304 * simpler to always copy it rather than making it 4305 * conditional. Also, it seems that we do not have to set 4306 * vlantci or fake the ethtype when doing VLAN tag insertion. 4307 */ 4308 m_copydata(m0, 0, sizeof(struct ether_header) + 2, wr->ethmacdst); 4309 4310 csum_type = -1; 4311 if (needs_tso(m0)) { 4312 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); 4313 4314 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 4315 m0->m_pkthdr.l4hlen > 0, 4316 ("%s: mbuf %p needs TSO but missing header lengths", 4317 __func__, m0)); 4318 4319 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | 4320 F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) 4321 | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); 4322 if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header)) 4323 ctrl |= V_LSO_ETHHDR_LEN(1); 4324 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 4325 ctrl |= F_LSO_IPV6; 4326 4327 lso->lso_ctrl = htobe32(ctrl); 4328 lso->ipid_ofst = htobe16(0); 4329 lso->mss = htobe16(m0->m_pkthdr.tso_segsz); 4330 lso->seqno_offset = htobe32(0); 4331 lso->len = htobe32(pktlen); 4332 4333 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 4334 csum_type = TX_CSUM_TCPIP6; 4335 else 4336 csum_type = TX_CSUM_TCPIP; 4337 4338 cpl = (void *)(lso + 1); 4339 4340 txq->tso_wrs++; 4341 } else { 4342 if (m0->m_pkthdr.csum_flags & CSUM_IP_TCP) 4343 csum_type = TX_CSUM_TCPIP; 4344 else if (m0->m_pkthdr.csum_flags & CSUM_IP_UDP) 4345 csum_type = TX_CSUM_UDPIP; 4346 else if (m0->m_pkthdr.csum_flags & CSUM_IP6_TCP) 4347 csum_type = TX_CSUM_TCPIP6; 4348 else if (m0->m_pkthdr.csum_flags & CSUM_IP6_UDP) 4349 csum_type = TX_CSUM_UDPIP6; 4350 #if defined(INET) 4351 else if (m0->m_pkthdr.csum_flags & CSUM_IP) { 4352 /* 4353 * XXX: The firmware appears to stomp on the 4354 * fragment/flags field of the IP header when 4355 * using TX_CSUM_IP. Fall back to doing 4356 * software checksums. 4357 */ 4358 u_short *sump; 4359 struct mbuf *m; 4360 int offset; 4361 4362 m = m0; 4363 offset = 0; 4364 sump = m_advance(&m, &offset, m0->m_pkthdr.l2hlen + 4365 offsetof(struct ip, ip_sum)); 4366 *sump = in_cksum_skip(m0, m0->m_pkthdr.l2hlen + 4367 m0->m_pkthdr.l3hlen, m0->m_pkthdr.l2hlen); 4368 m0->m_pkthdr.csum_flags &= ~CSUM_IP; 4369 } 4370 #endif 4371 4372 cpl = (void *)(wr + 1); 4373 } 4374 4375 /* Checksum offload */ 4376 ctrl1 = 0; 4377 if (needs_l3_csum(m0) == 0) 4378 ctrl1 |= F_TXPKT_IPCSUM_DIS; 4379 if (csum_type >= 0) { 4380 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0, 4381 ("%s: mbuf %p needs checksum offload but missing header lengths", 4382 __func__, m0)); 4383 4384 if (chip_id(sc) <= CHELSIO_T5) { 4385 ctrl1 |= V_TXPKT_ETHHDR_LEN(m0->m_pkthdr.l2hlen - 4386 ETHER_HDR_LEN); 4387 } else { 4388 ctrl1 |= V_T6_TXPKT_ETHHDR_LEN(m0->m_pkthdr.l2hlen - 4389 ETHER_HDR_LEN); 4390 } 4391 ctrl1 |= V_TXPKT_IPHDR_LEN(m0->m_pkthdr.l3hlen); 4392 ctrl1 |= V_TXPKT_CSUM_TYPE(csum_type); 4393 } else 4394 ctrl1 |= F_TXPKT_L4CSUM_DIS; 4395 if (m0->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | 4396 CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) 4397 txq->txcsum++; /* some hardware assistance provided */ 4398 4399 /* VLAN tag insertion */ 4400 if (needs_vlan_insertion(m0)) { 4401 ctrl1 |= F_TXPKT_VLAN_VLD | 4402 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 4403 txq->vlan_insertion++; 4404 } 4405 4406 /* CPL header */ 4407 cpl->ctrl0 = txq->cpl_ctrl0; 4408 cpl->pack = 0; 4409 cpl->len = htobe16(pktlen); 4410 cpl->ctrl1 = htobe64(ctrl1); 4411 4412 /* SGL */ 4413 dst = (void *)(cpl + 1); 4414 4415 /* 4416 * A packet using TSO will use up an entire descriptor for the 4417 * firmware work request header, LSO CPL, and TX_PKT_XT CPL. 4418 * If this descriptor is the last descriptor in the ring, wrap 4419 * around to the front of the ring explicitly for the start of 4420 * the sgl. 4421 */ 4422 if (dst == (void *)&eq->desc[eq->sidx]) { 4423 dst = (void *)&eq->desc[0]; 4424 write_gl_to_txd(txq, m0, &dst, 0); 4425 } else 4426 write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); 4427 txq->sgl_wrs++; 4428 4429 txq->txpkt_wrs++; 4430 4431 txsd = &txq->sdesc[eq->pidx]; 4432 txsd->m = m0; 4433 txsd->desc_used = ndesc; 4434 4435 return (ndesc); 4436 } 4437 4438 /* 4439 * Write a txpkt WR for this packet to the hardware descriptors, update the 4440 * software descriptor, and advance the pidx. It is guaranteed that enough 4441 * descriptors are available. 4442 * 4443 * The return value is the # of hardware descriptors used. 4444 */ 4445 static u_int 4446 write_txpkt_wr(struct sge_txq *txq, struct fw_eth_tx_pkt_wr *wr, 4447 struct mbuf *m0, u_int available) 4448 { 4449 struct sge_eq *eq = &txq->eq; 4450 struct tx_sdesc *txsd; 4451 struct cpl_tx_pkt_core *cpl; 4452 uint32_t ctrl; /* used in many unrelated places */ 4453 uint64_t ctrl1; 4454 int len16, ndesc, pktlen, nsegs; 4455 caddr_t dst; 4456 4457 TXQ_LOCK_ASSERT_OWNED(txq); 4458 M_ASSERTPKTHDR(m0); 4459 MPASS(available > 0 && available < eq->sidx); 4460 4461 len16 = mbuf_len16(m0); 4462 nsegs = mbuf_nsegs(m0); 4463 pktlen = m0->m_pkthdr.len; 4464 ctrl = sizeof(struct cpl_tx_pkt_core); 4465 if (needs_tso(m0)) 4466 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 4467 else if (pktlen <= imm_payload(2) && available >= 2) { 4468 /* Immediate data. Recalculate len16 and set nsegs to 0. */ 4469 ctrl += pktlen; 4470 len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + 4471 sizeof(struct cpl_tx_pkt_core) + pktlen, 16); 4472 nsegs = 0; 4473 } 4474 ndesc = howmany(len16, EQ_ESIZE / 16); 4475 MPASS(ndesc <= available); 4476 4477 /* Firmware work request header */ 4478 MPASS(wr == (void *)&eq->desc[eq->pidx]); 4479 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | 4480 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 4481 4482 ctrl = V_FW_WR_LEN16(len16); 4483 wr->equiq_to_len16 = htobe32(ctrl); 4484 wr->r3 = 0; 4485 4486 if (needs_tso(m0)) { 4487 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); 4488 4489 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 4490 m0->m_pkthdr.l4hlen > 0, 4491 ("%s: mbuf %p needs TSO but missing header lengths", 4492 __func__, m0)); 4493 4494 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | 4495 F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) 4496 | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); 4497 if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header)) 4498 ctrl |= V_LSO_ETHHDR_LEN(1); 4499 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 4500 ctrl |= F_LSO_IPV6; 4501 4502 lso->lso_ctrl = htobe32(ctrl); 4503 lso->ipid_ofst = htobe16(0); 4504 lso->mss = htobe16(m0->m_pkthdr.tso_segsz); 4505 lso->seqno_offset = htobe32(0); 4506 lso->len = htobe32(pktlen); 4507 4508 cpl = (void *)(lso + 1); 4509 4510 txq->tso_wrs++; 4511 } else 4512 cpl = (void *)(wr + 1); 4513 4514 /* Checksum offload */ 4515 ctrl1 = 0; 4516 if (needs_l3_csum(m0) == 0) 4517 ctrl1 |= F_TXPKT_IPCSUM_DIS; 4518 if (needs_l4_csum(m0) == 0) 4519 ctrl1 |= F_TXPKT_L4CSUM_DIS; 4520 if (m0->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | 4521 CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) 4522 txq->txcsum++; /* some hardware assistance provided */ 4523 4524 /* VLAN tag insertion */ 4525 if (needs_vlan_insertion(m0)) { 4526 ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 4527 txq->vlan_insertion++; 4528 } 4529 4530 /* CPL header */ 4531 cpl->ctrl0 = txq->cpl_ctrl0; 4532 cpl->pack = 0; 4533 cpl->len = htobe16(pktlen); 4534 cpl->ctrl1 = htobe64(ctrl1); 4535 4536 /* SGL */ 4537 dst = (void *)(cpl + 1); 4538 if (nsegs > 0) { 4539 4540 write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); 4541 txq->sgl_wrs++; 4542 } else { 4543 struct mbuf *m; 4544 4545 for (m = m0; m != NULL; m = m->m_next) { 4546 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 4547 #ifdef INVARIANTS 4548 pktlen -= m->m_len; 4549 #endif 4550 } 4551 #ifdef INVARIANTS 4552 KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen)); 4553 #endif 4554 txq->imm_wrs++; 4555 } 4556 4557 txq->txpkt_wrs++; 4558 4559 txsd = &txq->sdesc[eq->pidx]; 4560 txsd->m = m0; 4561 txsd->desc_used = ndesc; 4562 4563 return (ndesc); 4564 } 4565 4566 static int 4567 try_txpkts(struct mbuf *m, struct mbuf *n, struct txpkts *txp, u_int available) 4568 { 4569 u_int needed, nsegs1, nsegs2, l1, l2; 4570 4571 if (cannot_use_txpkts(m) || cannot_use_txpkts(n)) 4572 return (1); 4573 4574 nsegs1 = mbuf_nsegs(m); 4575 nsegs2 = mbuf_nsegs(n); 4576 if (nsegs1 + nsegs2 == 2) { 4577 txp->wr_type = 1; 4578 l1 = l2 = txpkts1_len16(); 4579 } else { 4580 txp->wr_type = 0; 4581 l1 = txpkts0_len16(nsegs1); 4582 l2 = txpkts0_len16(nsegs2); 4583 } 4584 txp->len16 = howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + l1 + l2; 4585 needed = howmany(txp->len16, EQ_ESIZE / 16); 4586 if (needed > SGE_MAX_WR_NDESC || needed > available) 4587 return (1); 4588 4589 txp->plen = m->m_pkthdr.len + n->m_pkthdr.len; 4590 if (txp->plen > 65535) 4591 return (1); 4592 4593 txp->npkt = 2; 4594 set_mbuf_len16(m, l1); 4595 set_mbuf_len16(n, l2); 4596 4597 return (0); 4598 } 4599 4600 static int 4601 add_to_txpkts(struct mbuf *m, struct txpkts *txp, u_int available) 4602 { 4603 u_int plen, len16, needed, nsegs; 4604 4605 MPASS(txp->wr_type == 0 || txp->wr_type == 1); 4606 4607 nsegs = mbuf_nsegs(m); 4608 if (needs_tso(m) || (txp->wr_type == 1 && nsegs != 1)) 4609 return (1); 4610 4611 plen = txp->plen + m->m_pkthdr.len; 4612 if (plen > 65535) 4613 return (1); 4614 4615 if (txp->wr_type == 0) 4616 len16 = txpkts0_len16(nsegs); 4617 else 4618 len16 = txpkts1_len16(); 4619 needed = howmany(txp->len16 + len16, EQ_ESIZE / 16); 4620 if (needed > SGE_MAX_WR_NDESC || needed > available) 4621 return (1); 4622 4623 txp->npkt++; 4624 txp->plen = plen; 4625 txp->len16 += len16; 4626 set_mbuf_len16(m, len16); 4627 4628 return (0); 4629 } 4630 4631 /* 4632 * Write a txpkts WR for the packets in txp to the hardware descriptors, update 4633 * the software descriptor, and advance the pidx. It is guaranteed that enough 4634 * descriptors are available. 4635 * 4636 * The return value is the # of hardware descriptors used. 4637 */ 4638 static u_int 4639 write_txpkts_wr(struct sge_txq *txq, struct fw_eth_tx_pkts_wr *wr, 4640 struct mbuf *m0, const struct txpkts *txp, u_int available) 4641 { 4642 struct sge_eq *eq = &txq->eq; 4643 struct tx_sdesc *txsd; 4644 struct cpl_tx_pkt_core *cpl; 4645 uint32_t ctrl; 4646 uint64_t ctrl1; 4647 int ndesc, checkwrap; 4648 struct mbuf *m; 4649 void *flitp; 4650 4651 TXQ_LOCK_ASSERT_OWNED(txq); 4652 MPASS(txp->npkt > 0); 4653 MPASS(txp->plen < 65536); 4654 MPASS(m0 != NULL); 4655 MPASS(m0->m_nextpkt != NULL); 4656 MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16)); 4657 MPASS(available > 0 && available < eq->sidx); 4658 4659 ndesc = howmany(txp->len16, EQ_ESIZE / 16); 4660 MPASS(ndesc <= available); 4661 4662 MPASS(wr == (void *)&eq->desc[eq->pidx]); 4663 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); 4664 ctrl = V_FW_WR_LEN16(txp->len16); 4665 wr->equiq_to_len16 = htobe32(ctrl); 4666 wr->plen = htobe16(txp->plen); 4667 wr->npkt = txp->npkt; 4668 wr->r3 = 0; 4669 wr->type = txp->wr_type; 4670 flitp = wr + 1; 4671 4672 /* 4673 * At this point we are 16B into a hardware descriptor. If checkwrap is 4674 * set then we know the WR is going to wrap around somewhere. We'll 4675 * check for that at appropriate points. 4676 */ 4677 checkwrap = eq->sidx - ndesc < eq->pidx; 4678 for (m = m0; m != NULL; m = m->m_nextpkt) { 4679 if (txp->wr_type == 0) { 4680 struct ulp_txpkt *ulpmc; 4681 struct ulptx_idata *ulpsc; 4682 4683 /* ULP master command */ 4684 ulpmc = flitp; 4685 ulpmc->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) | 4686 V_ULP_TXPKT_DEST(0) | V_ULP_TXPKT_FID(eq->iqid)); 4687 ulpmc->len = htobe32(mbuf_len16(m)); 4688 4689 /* ULP subcommand */ 4690 ulpsc = (void *)(ulpmc + 1); 4691 ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) | 4692 F_ULP_TX_SC_MORE); 4693 ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core)); 4694 4695 cpl = (void *)(ulpsc + 1); 4696 if (checkwrap && 4697 (uintptr_t)cpl == (uintptr_t)&eq->desc[eq->sidx]) 4698 cpl = (void *)&eq->desc[0]; 4699 } else { 4700 cpl = flitp; 4701 } 4702 4703 /* Checksum offload */ 4704 ctrl1 = 0; 4705 if (needs_l3_csum(m) == 0) 4706 ctrl1 |= F_TXPKT_IPCSUM_DIS; 4707 if (needs_l4_csum(m) == 0) 4708 ctrl1 |= F_TXPKT_L4CSUM_DIS; 4709 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | 4710 CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) 4711 txq->txcsum++; /* some hardware assistance provided */ 4712 4713 /* VLAN tag insertion */ 4714 if (needs_vlan_insertion(m)) { 4715 ctrl1 |= F_TXPKT_VLAN_VLD | 4716 V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 4717 txq->vlan_insertion++; 4718 } 4719 4720 /* CPL header */ 4721 cpl->ctrl0 = txq->cpl_ctrl0; 4722 cpl->pack = 0; 4723 cpl->len = htobe16(m->m_pkthdr.len); 4724 cpl->ctrl1 = htobe64(ctrl1); 4725 4726 flitp = cpl + 1; 4727 if (checkwrap && 4728 (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx]) 4729 flitp = (void *)&eq->desc[0]; 4730 4731 write_gl_to_txd(txq, m, (caddr_t *)(&flitp), checkwrap); 4732 4733 } 4734 4735 if (txp->wr_type == 0) { 4736 txq->txpkts0_pkts += txp->npkt; 4737 txq->txpkts0_wrs++; 4738 } else { 4739 txq->txpkts1_pkts += txp->npkt; 4740 txq->txpkts1_wrs++; 4741 } 4742 4743 txsd = &txq->sdesc[eq->pidx]; 4744 txsd->m = m0; 4745 txsd->desc_used = ndesc; 4746 4747 return (ndesc); 4748 } 4749 4750 /* 4751 * If the SGL ends on an address that is not 16 byte aligned, this function will 4752 * add a 0 filled flit at the end. 4753 */ 4754 static void 4755 write_gl_to_txd(struct sge_txq *txq, struct mbuf *m, caddr_t *to, int checkwrap) 4756 { 4757 struct sge_eq *eq = &txq->eq; 4758 struct sglist *gl = txq->gl; 4759 struct sglist_seg *seg; 4760 __be64 *flitp, *wrap; 4761 struct ulptx_sgl *usgl; 4762 int i, nflits, nsegs; 4763 4764 KASSERT(((uintptr_t)(*to) & 0xf) == 0, 4765 ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to)); 4766 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); 4767 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); 4768 4769 get_pkt_gl(m, gl); 4770 nsegs = gl->sg_nseg; 4771 MPASS(nsegs > 0); 4772 4773 nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2; 4774 flitp = (__be64 *)(*to); 4775 wrap = (__be64 *)(&eq->desc[eq->sidx]); 4776 seg = &gl->sg_segs[0]; 4777 usgl = (void *)flitp; 4778 4779 /* 4780 * We start at a 16 byte boundary somewhere inside the tx descriptor 4781 * ring, so we're at least 16 bytes away from the status page. There is 4782 * no chance of a wrap around in the middle of usgl (which is 16 bytes). 4783 */ 4784 4785 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 4786 V_ULPTX_NSGE(nsegs)); 4787 usgl->len0 = htobe32(seg->ss_len); 4788 usgl->addr0 = htobe64(seg->ss_paddr); 4789 seg++; 4790 4791 if (checkwrap == 0 || (uintptr_t)(flitp + nflits) <= (uintptr_t)wrap) { 4792 4793 /* Won't wrap around at all */ 4794 4795 for (i = 0; i < nsegs - 1; i++, seg++) { 4796 usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len); 4797 usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr); 4798 } 4799 if (i & 1) 4800 usgl->sge[i / 2].len[1] = htobe32(0); 4801 flitp += nflits; 4802 } else { 4803 4804 /* Will wrap somewhere in the rest of the SGL */ 4805 4806 /* 2 flits already written, write the rest flit by flit */ 4807 flitp = (void *)(usgl + 1); 4808 for (i = 0; i < nflits - 2; i++) { 4809 if (flitp == wrap) 4810 flitp = (void *)eq->desc; 4811 *flitp++ = get_flit(seg, nsegs - 1, i); 4812 } 4813 } 4814 4815 if (nflits & 1) { 4816 MPASS(((uintptr_t)flitp) & 0xf); 4817 *flitp++ = 0; 4818 } 4819 4820 MPASS((((uintptr_t)flitp) & 0xf) == 0); 4821 if (__predict_false(flitp == wrap)) 4822 *to = (void *)eq->desc; 4823 else 4824 *to = (void *)flitp; 4825 } 4826 4827 static inline void 4828 copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) 4829 { 4830 4831 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); 4832 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); 4833 4834 if (__predict_true((uintptr_t)(*to) + len <= 4835 (uintptr_t)&eq->desc[eq->sidx])) { 4836 bcopy(from, *to, len); 4837 (*to) += len; 4838 } else { 4839 int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to); 4840 4841 bcopy(from, *to, portion); 4842 from += portion; 4843 portion = len - portion; /* remaining */ 4844 bcopy(from, (void *)eq->desc, portion); 4845 (*to) = (caddr_t)eq->desc + portion; 4846 } 4847 } 4848 4849 static inline void 4850 ring_eq_db(struct adapter *sc, struct sge_eq *eq, u_int n) 4851 { 4852 u_int db; 4853 4854 MPASS(n > 0); 4855 4856 db = eq->doorbells; 4857 if (n > 1) 4858 clrbit(&db, DOORBELL_WCWR); 4859 wmb(); 4860 4861 switch (ffs(db) - 1) { 4862 case DOORBELL_UDB: 4863 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); 4864 break; 4865 4866 case DOORBELL_WCWR: { 4867 volatile uint64_t *dst, *src; 4868 int i; 4869 4870 /* 4871 * Queues whose 128B doorbell segment fits in the page do not 4872 * use relative qid (udb_qid is always 0). Only queues with 4873 * doorbell segments can do WCWR. 4874 */ 4875 KASSERT(eq->udb_qid == 0 && n == 1, 4876 ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p", 4877 __func__, eq->doorbells, n, eq->dbidx, eq)); 4878 4879 dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET - 4880 UDBS_DB_OFFSET); 4881 i = eq->dbidx; 4882 src = (void *)&eq->desc[i]; 4883 while (src != (void *)&eq->desc[i + 1]) 4884 *dst++ = *src++; 4885 wmb(); 4886 break; 4887 } 4888 4889 case DOORBELL_UDBWC: 4890 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); 4891 wmb(); 4892 break; 4893 4894 case DOORBELL_KDB: 4895 t4_write_reg(sc, sc->sge_kdoorbell_reg, 4896 V_QID(eq->cntxt_id) | V_PIDX(n)); 4897 break; 4898 } 4899 4900 IDXINCR(eq->dbidx, n, eq->sidx); 4901 } 4902 4903 static inline u_int 4904 reclaimable_tx_desc(struct sge_eq *eq) 4905 { 4906 uint16_t hw_cidx; 4907 4908 hw_cidx = read_hw_cidx(eq); 4909 return (IDXDIFF(hw_cidx, eq->cidx, eq->sidx)); 4910 } 4911 4912 static inline u_int 4913 total_available_tx_desc(struct sge_eq *eq) 4914 { 4915 uint16_t hw_cidx, pidx; 4916 4917 hw_cidx = read_hw_cidx(eq); 4918 pidx = eq->pidx; 4919 4920 if (pidx == hw_cidx) 4921 return (eq->sidx - 1); 4922 else 4923 return (IDXDIFF(hw_cidx, pidx, eq->sidx) - 1); 4924 } 4925 4926 static inline uint16_t 4927 read_hw_cidx(struct sge_eq *eq) 4928 { 4929 struct sge_qstat *spg = (void *)&eq->desc[eq->sidx]; 4930 uint16_t cidx = spg->cidx; /* stable snapshot */ 4931 4932 return (be16toh(cidx)); 4933 } 4934 4935 /* 4936 * Reclaim 'n' descriptors approximately. 4937 */ 4938 static u_int 4939 reclaim_tx_descs(struct sge_txq *txq, u_int n) 4940 { 4941 struct tx_sdesc *txsd; 4942 struct sge_eq *eq = &txq->eq; 4943 u_int can_reclaim, reclaimed; 4944 4945 TXQ_LOCK_ASSERT_OWNED(txq); 4946 MPASS(n > 0); 4947 4948 reclaimed = 0; 4949 can_reclaim = reclaimable_tx_desc(eq); 4950 while (can_reclaim && reclaimed < n) { 4951 int ndesc; 4952 struct mbuf *m, *nextpkt; 4953 4954 txsd = &txq->sdesc[eq->cidx]; 4955 ndesc = txsd->desc_used; 4956 4957 /* Firmware doesn't return "partial" credits. */ 4958 KASSERT(can_reclaim >= ndesc, 4959 ("%s: unexpected number of credits: %d, %d", 4960 __func__, can_reclaim, ndesc)); 4961 4962 for (m = txsd->m; m != NULL; m = nextpkt) { 4963 nextpkt = m->m_nextpkt; 4964 m->m_nextpkt = NULL; 4965 m_freem(m); 4966 } 4967 reclaimed += ndesc; 4968 can_reclaim -= ndesc; 4969 IDXINCR(eq->cidx, ndesc, eq->sidx); 4970 } 4971 4972 return (reclaimed); 4973 } 4974 4975 static void 4976 tx_reclaim(void *arg, int n) 4977 { 4978 struct sge_txq *txq = arg; 4979 struct sge_eq *eq = &txq->eq; 4980 4981 do { 4982 if (TXQ_TRYLOCK(txq) == 0) 4983 break; 4984 n = reclaim_tx_descs(txq, 32); 4985 if (eq->cidx == eq->pidx) 4986 eq->equeqidx = eq->pidx; 4987 TXQ_UNLOCK(txq); 4988 } while (n > 0); 4989 } 4990 4991 static __be64 4992 get_flit(struct sglist_seg *segs, int nsegs, int idx) 4993 { 4994 int i = (idx / 3) * 2; 4995 4996 switch (idx % 3) { 4997 case 0: { 4998 uint64_t rc; 4999 5000 rc = (uint64_t)segs[i].ss_len << 32; 5001 if (i + 1 < nsegs) 5002 rc |= (uint64_t)(segs[i + 1].ss_len); 5003 5004 return (htobe64(rc)); 5005 } 5006 case 1: 5007 return (htobe64(segs[i].ss_paddr)); 5008 case 2: 5009 return (htobe64(segs[i + 1].ss_paddr)); 5010 } 5011 5012 return (0); 5013 } 5014 5015 static void 5016 find_best_refill_source(struct adapter *sc, struct sge_fl *fl, int maxp) 5017 { 5018 int8_t zidx, hwidx, idx; 5019 uint16_t region1, region3; 5020 int spare, spare_needed, n; 5021 struct sw_zone_info *swz; 5022 struct hw_buf_info *hwb, *hwb_list = &sc->sge.hw_buf_info[0]; 5023 5024 /* 5025 * Buffer Packing: Look for PAGE_SIZE or larger zone which has a bufsize 5026 * large enough for the max payload and cluster metadata. Otherwise 5027 * settle for the largest bufsize that leaves enough room in the cluster 5028 * for metadata. 5029 * 5030 * Without buffer packing: Look for the smallest zone which has a 5031 * bufsize large enough for the max payload. Settle for the largest 5032 * bufsize available if there's nothing big enough for max payload. 5033 */ 5034 spare_needed = fl->flags & FL_BUF_PACKING ? CL_METADATA_SIZE : 0; 5035 swz = &sc->sge.sw_zone_info[0]; 5036 hwidx = -1; 5037 for (zidx = 0; zidx < SW_ZONE_SIZES; zidx++, swz++) { 5038 if (swz->size > largest_rx_cluster) { 5039 if (__predict_true(hwidx != -1)) 5040 break; 5041 5042 /* 5043 * This is a misconfiguration. largest_rx_cluster is 5044 * preventing us from finding a refill source. See 5045 * dev.t5nex.<n>.buffer_sizes to figure out why. 5046 */ 5047 device_printf(sc->dev, "largest_rx_cluster=%u leaves no" 5048 " refill source for fl %p (dma %u). Ignored.\n", 5049 largest_rx_cluster, fl, maxp); 5050 } 5051 for (idx = swz->head_hwidx; idx != -1; idx = hwb->next) { 5052 hwb = &hwb_list[idx]; 5053 spare = swz->size - hwb->size; 5054 if (spare < spare_needed) 5055 continue; 5056 5057 hwidx = idx; /* best option so far */ 5058 if (hwb->size >= maxp) { 5059 5060 if ((fl->flags & FL_BUF_PACKING) == 0) 5061 goto done; /* stop looking (not packing) */ 5062 5063 if (swz->size >= safest_rx_cluster) 5064 goto done; /* stop looking (packing) */ 5065 } 5066 break; /* keep looking, next zone */ 5067 } 5068 } 5069 done: 5070 /* A usable hwidx has been located. */ 5071 MPASS(hwidx != -1); 5072 hwb = &hwb_list[hwidx]; 5073 zidx = hwb->zidx; 5074 swz = &sc->sge.sw_zone_info[zidx]; 5075 region1 = 0; 5076 region3 = swz->size - hwb->size; 5077 5078 /* 5079 * Stay within this zone and see if there is a better match when mbuf 5080 * inlining is allowed. Remember that the hwidx's are sorted in 5081 * decreasing order of size (so in increasing order of spare area). 5082 */ 5083 for (idx = hwidx; idx != -1; idx = hwb->next) { 5084 hwb = &hwb_list[idx]; 5085 spare = swz->size - hwb->size; 5086 5087 if (allow_mbufs_in_cluster == 0 || hwb->size < maxp) 5088 break; 5089 5090 /* 5091 * Do not inline mbufs if doing so would violate the pad/pack 5092 * boundary alignment requirement. 5093 */ 5094 if (fl_pad && (MSIZE % sc->params.sge.pad_boundary) != 0) 5095 continue; 5096 if (fl->flags & FL_BUF_PACKING && 5097 (MSIZE % sc->params.sge.pack_boundary) != 0) 5098 continue; 5099 5100 if (spare < CL_METADATA_SIZE + MSIZE) 5101 continue; 5102 n = (spare - CL_METADATA_SIZE) / MSIZE; 5103 if (n > howmany(hwb->size, maxp)) 5104 break; 5105 5106 hwidx = idx; 5107 if (fl->flags & FL_BUF_PACKING) { 5108 region1 = n * MSIZE; 5109 region3 = spare - region1; 5110 } else { 5111 region1 = MSIZE; 5112 region3 = spare - region1; 5113 break; 5114 } 5115 } 5116 5117 KASSERT(zidx >= 0 && zidx < SW_ZONE_SIZES, 5118 ("%s: bad zone %d for fl %p, maxp %d", __func__, zidx, fl, maxp)); 5119 KASSERT(hwidx >= 0 && hwidx <= SGE_FLBUF_SIZES, 5120 ("%s: bad hwidx %d for fl %p, maxp %d", __func__, hwidx, fl, maxp)); 5121 KASSERT(region1 + sc->sge.hw_buf_info[hwidx].size + region3 == 5122 sc->sge.sw_zone_info[zidx].size, 5123 ("%s: bad buffer layout for fl %p, maxp %d. " 5124 "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, 5125 sc->sge.sw_zone_info[zidx].size, region1, 5126 sc->sge.hw_buf_info[hwidx].size, region3)); 5127 if (fl->flags & FL_BUF_PACKING || region1 > 0) { 5128 KASSERT(region3 >= CL_METADATA_SIZE, 5129 ("%s: no room for metadata. fl %p, maxp %d; " 5130 "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, 5131 sc->sge.sw_zone_info[zidx].size, region1, 5132 sc->sge.hw_buf_info[hwidx].size, region3)); 5133 KASSERT(region1 % MSIZE == 0, 5134 ("%s: bad mbuf region for fl %p, maxp %d. " 5135 "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, 5136 sc->sge.sw_zone_info[zidx].size, region1, 5137 sc->sge.hw_buf_info[hwidx].size, region3)); 5138 } 5139 5140 fl->cll_def.zidx = zidx; 5141 fl->cll_def.hwidx = hwidx; 5142 fl->cll_def.region1 = region1; 5143 fl->cll_def.region3 = region3; 5144 } 5145 5146 static void 5147 find_safe_refill_source(struct adapter *sc, struct sge_fl *fl) 5148 { 5149 struct sge *s = &sc->sge; 5150 struct hw_buf_info *hwb; 5151 struct sw_zone_info *swz; 5152 int spare; 5153 int8_t hwidx; 5154 5155 if (fl->flags & FL_BUF_PACKING) 5156 hwidx = s->safe_hwidx2; /* with room for metadata */ 5157 else if (allow_mbufs_in_cluster && s->safe_hwidx2 != -1) { 5158 hwidx = s->safe_hwidx2; 5159 hwb = &s->hw_buf_info[hwidx]; 5160 swz = &s->sw_zone_info[hwb->zidx]; 5161 spare = swz->size - hwb->size; 5162 5163 /* no good if there isn't room for an mbuf as well */ 5164 if (spare < CL_METADATA_SIZE + MSIZE) 5165 hwidx = s->safe_hwidx1; 5166 } else 5167 hwidx = s->safe_hwidx1; 5168 5169 if (hwidx == -1) { 5170 /* No fallback source */ 5171 fl->cll_alt.hwidx = -1; 5172 fl->cll_alt.zidx = -1; 5173 5174 return; 5175 } 5176 5177 hwb = &s->hw_buf_info[hwidx]; 5178 swz = &s->sw_zone_info[hwb->zidx]; 5179 spare = swz->size - hwb->size; 5180 fl->cll_alt.hwidx = hwidx; 5181 fl->cll_alt.zidx = hwb->zidx; 5182 if (allow_mbufs_in_cluster && 5183 (fl_pad == 0 || (MSIZE % sc->params.sge.pad_boundary) == 0)) 5184 fl->cll_alt.region1 = ((spare - CL_METADATA_SIZE) / MSIZE) * MSIZE; 5185 else 5186 fl->cll_alt.region1 = 0; 5187 fl->cll_alt.region3 = spare - fl->cll_alt.region1; 5188 } 5189 5190 static void 5191 add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl) 5192 { 5193 mtx_lock(&sc->sfl_lock); 5194 FL_LOCK(fl); 5195 if ((fl->flags & FL_DOOMED) == 0) { 5196 fl->flags |= FL_STARVING; 5197 TAILQ_INSERT_TAIL(&sc->sfl, fl, link); 5198 callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc); 5199 } 5200 FL_UNLOCK(fl); 5201 mtx_unlock(&sc->sfl_lock); 5202 } 5203 5204 static void 5205 handle_wrq_egr_update(struct adapter *sc, struct sge_eq *eq) 5206 { 5207 struct sge_wrq *wrq = (void *)eq; 5208 5209 atomic_readandclear_int(&eq->equiq); 5210 taskqueue_enqueue(sc->tq[eq->tx_chan], &wrq->wrq_tx_task); 5211 } 5212 5213 static void 5214 handle_eth_egr_update(struct adapter *sc, struct sge_eq *eq) 5215 { 5216 struct sge_txq *txq = (void *)eq; 5217 5218 MPASS((eq->flags & EQ_TYPEMASK) == EQ_ETH); 5219 5220 atomic_readandclear_int(&eq->equiq); 5221 mp_ring_check_drainage(txq->r, 0); 5222 taskqueue_enqueue(sc->tq[eq->tx_chan], &txq->tx_reclaim_task); 5223 } 5224 5225 static int 5226 handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss, 5227 struct mbuf *m) 5228 { 5229 const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1); 5230 unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); 5231 struct adapter *sc = iq->adapter; 5232 struct sge *s = &sc->sge; 5233 struct sge_eq *eq; 5234 static void (*h[])(struct adapter *, struct sge_eq *) = {NULL, 5235 &handle_wrq_egr_update, &handle_eth_egr_update, 5236 &handle_wrq_egr_update}; 5237 5238 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 5239 rss->opcode)); 5240 5241 eq = s->eqmap[qid - s->eq_start - s->eq_base]; 5242 (*h[eq->flags & EQ_TYPEMASK])(sc, eq); 5243 5244 return (0); 5245 } 5246 5247 /* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */ 5248 CTASSERT(offsetof(struct cpl_fw4_msg, data) == \ 5249 offsetof(struct cpl_fw6_msg, data)); 5250 5251 static int 5252 handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 5253 { 5254 struct adapter *sc = iq->adapter; 5255 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); 5256 5257 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 5258 rss->opcode)); 5259 5260 if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) { 5261 const struct rss_header *rss2; 5262 5263 rss2 = (const struct rss_header *)&cpl->data[0]; 5264 return (t4_cpl_handler[rss2->opcode](iq, rss2, m)); 5265 } 5266 5267 return (t4_fw_msg_handler[cpl->type](sc, &cpl->data[0])); 5268 } 5269 5270 /** 5271 * t4_handle_wrerr_rpl - process a FW work request error message 5272 * @adap: the adapter 5273 * @rpl: start of the FW message 5274 */ 5275 static int 5276 t4_handle_wrerr_rpl(struct adapter *adap, const __be64 *rpl) 5277 { 5278 u8 opcode = *(const u8 *)rpl; 5279 const struct fw_error_cmd *e = (const void *)rpl; 5280 unsigned int i; 5281 5282 if (opcode != FW_ERROR_CMD) { 5283 log(LOG_ERR, 5284 "%s: Received WRERR_RPL message with opcode %#x\n", 5285 device_get_nameunit(adap->dev), opcode); 5286 return (EINVAL); 5287 } 5288 log(LOG_ERR, "%s: FW_ERROR (%s) ", device_get_nameunit(adap->dev), 5289 G_FW_ERROR_CMD_FATAL(be32toh(e->op_to_type)) ? "fatal" : 5290 "non-fatal"); 5291 switch (G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))) { 5292 case FW_ERROR_TYPE_EXCEPTION: 5293 log(LOG_ERR, "exception info:\n"); 5294 for (i = 0; i < nitems(e->u.exception.info); i++) 5295 log(LOG_ERR, "%s%08x", i == 0 ? "\t" : " ", 5296 be32toh(e->u.exception.info[i])); 5297 log(LOG_ERR, "\n"); 5298 break; 5299 case FW_ERROR_TYPE_HWMODULE: 5300 log(LOG_ERR, "HW module regaddr %08x regval %08x\n", 5301 be32toh(e->u.hwmodule.regaddr), 5302 be32toh(e->u.hwmodule.regval)); 5303 break; 5304 case FW_ERROR_TYPE_WR: 5305 log(LOG_ERR, "WR cidx %d PF %d VF %d eqid %d hdr:\n", 5306 be16toh(e->u.wr.cidx), 5307 G_FW_ERROR_CMD_PFN(be16toh(e->u.wr.pfn_vfn)), 5308 G_FW_ERROR_CMD_VFN(be16toh(e->u.wr.pfn_vfn)), 5309 be32toh(e->u.wr.eqid)); 5310 for (i = 0; i < nitems(e->u.wr.wrhdr); i++) 5311 log(LOG_ERR, "%s%02x", i == 0 ? "\t" : " ", 5312 e->u.wr.wrhdr[i]); 5313 log(LOG_ERR, "\n"); 5314 break; 5315 case FW_ERROR_TYPE_ACL: 5316 log(LOG_ERR, "ACL cidx %d PF %d VF %d eqid %d %s", 5317 be16toh(e->u.acl.cidx), 5318 G_FW_ERROR_CMD_PFN(be16toh(e->u.acl.pfn_vfn)), 5319 G_FW_ERROR_CMD_VFN(be16toh(e->u.acl.pfn_vfn)), 5320 be32toh(e->u.acl.eqid), 5321 G_FW_ERROR_CMD_MV(be16toh(e->u.acl.mv_pkd)) ? "vlanid" : 5322 "MAC"); 5323 for (i = 0; i < nitems(e->u.acl.val); i++) 5324 log(LOG_ERR, " %02x", e->u.acl.val[i]); 5325 log(LOG_ERR, "\n"); 5326 break; 5327 default: 5328 log(LOG_ERR, "type %#x\n", 5329 G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))); 5330 return (EINVAL); 5331 } 5332 return (0); 5333 } 5334 5335 static int 5336 sysctl_uint16(SYSCTL_HANDLER_ARGS) 5337 { 5338 uint16_t *id = arg1; 5339 int i = *id; 5340 5341 return sysctl_handle_int(oidp, &i, 0, req); 5342 } 5343 5344 static int 5345 sysctl_bufsizes(SYSCTL_HANDLER_ARGS) 5346 { 5347 struct sge *s = arg1; 5348 struct hw_buf_info *hwb = &s->hw_buf_info[0]; 5349 struct sw_zone_info *swz = &s->sw_zone_info[0]; 5350 int i, rc; 5351 struct sbuf sb; 5352 char c; 5353 5354 sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND); 5355 for (i = 0; i < SGE_FLBUF_SIZES; i++, hwb++) { 5356 if (hwb->zidx >= 0 && swz[hwb->zidx].size <= largest_rx_cluster) 5357 c = '*'; 5358 else 5359 c = '\0'; 5360 5361 sbuf_printf(&sb, "%u%c ", hwb->size, c); 5362 } 5363 sbuf_trim(&sb); 5364 sbuf_finish(&sb); 5365 rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 5366 sbuf_delete(&sb); 5367 return (rc); 5368 } 5369 5370 #ifdef RATELIMIT 5371 /* 5372 * len16 for a txpkt WR with a GL. Includes the firmware work request header. 5373 */ 5374 static inline u_int 5375 txpkt_eo_len16(u_int nsegs, u_int immhdrs, u_int tso) 5376 { 5377 u_int n; 5378 5379 MPASS(immhdrs > 0); 5380 5381 n = roundup2(sizeof(struct fw_eth_tx_eo_wr) + 5382 sizeof(struct cpl_tx_pkt_core) + immhdrs, 16); 5383 if (__predict_false(nsegs == 0)) 5384 goto done; 5385 5386 nsegs--; /* first segment is part of ulptx_sgl */ 5387 n += sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 5388 if (tso) 5389 n += sizeof(struct cpl_tx_pkt_lso_core); 5390 5391 done: 5392 return (howmany(n, 16)); 5393 } 5394 5395 #define ETID_FLOWC_NPARAMS 6 5396 #define ETID_FLOWC_LEN (roundup2((sizeof(struct fw_flowc_wr) + \ 5397 ETID_FLOWC_NPARAMS * sizeof(struct fw_flowc_mnemval)), 16)) 5398 #define ETID_FLOWC_LEN16 (howmany(ETID_FLOWC_LEN, 16)) 5399 5400 static int 5401 send_etid_flowc_wr(struct cxgbe_snd_tag *cst, struct port_info *pi, 5402 struct vi_info *vi) 5403 { 5404 struct wrq_cookie cookie; 5405 u_int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN; 5406 struct fw_flowc_wr *flowc; 5407 5408 mtx_assert(&cst->lock, MA_OWNED); 5409 MPASS((cst->flags & (EO_FLOWC_PENDING | EO_FLOWC_RPL_PENDING)) == 5410 EO_FLOWC_PENDING); 5411 5412 flowc = start_wrq_wr(cst->eo_txq, ETID_FLOWC_LEN16, &cookie); 5413 if (__predict_false(flowc == NULL)) 5414 return (ENOMEM); 5415 5416 bzero(flowc, ETID_FLOWC_LEN); 5417 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 5418 V_FW_FLOWC_WR_NPARAMS(ETID_FLOWC_NPARAMS) | V_FW_WR_COMPL(0)); 5419 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(ETID_FLOWC_LEN16) | 5420 V_FW_WR_FLOWID(cst->etid)); 5421 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; 5422 flowc->mnemval[0].val = htobe32(pfvf); 5423 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; 5424 flowc->mnemval[1].val = htobe32(pi->tx_chan); 5425 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; 5426 flowc->mnemval[2].val = htobe32(pi->tx_chan); 5427 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; 5428 flowc->mnemval[3].val = htobe32(cst->iqid); 5429 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_EOSTATE; 5430 flowc->mnemval[4].val = htobe32(FW_FLOWC_MNEM_EOSTATE_ESTABLISHED); 5431 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; 5432 flowc->mnemval[5].val = htobe32(cst->schedcl); 5433 5434 commit_wrq_wr(cst->eo_txq, flowc, &cookie); 5435 5436 cst->flags &= ~EO_FLOWC_PENDING; 5437 cst->flags |= EO_FLOWC_RPL_PENDING; 5438 MPASS(cst->tx_credits >= ETID_FLOWC_LEN16); /* flowc is first WR. */ 5439 cst->tx_credits -= ETID_FLOWC_LEN16; 5440 5441 return (0); 5442 } 5443 5444 #define ETID_FLUSH_LEN16 (howmany(sizeof (struct fw_flowc_wr), 16)) 5445 5446 void 5447 send_etid_flush_wr(struct cxgbe_snd_tag *cst) 5448 { 5449 struct fw_flowc_wr *flowc; 5450 struct wrq_cookie cookie; 5451 5452 mtx_assert(&cst->lock, MA_OWNED); 5453 5454 flowc = start_wrq_wr(cst->eo_txq, ETID_FLUSH_LEN16, &cookie); 5455 if (__predict_false(flowc == NULL)) 5456 CXGBE_UNIMPLEMENTED(__func__); 5457 5458 bzero(flowc, ETID_FLUSH_LEN16 * 16); 5459 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 5460 V_FW_FLOWC_WR_NPARAMS(0) | F_FW_WR_COMPL); 5461 flowc->flowid_len16 = htobe32(V_FW_WR_LEN16(ETID_FLUSH_LEN16) | 5462 V_FW_WR_FLOWID(cst->etid)); 5463 5464 commit_wrq_wr(cst->eo_txq, flowc, &cookie); 5465 5466 cst->flags |= EO_FLUSH_RPL_PENDING; 5467 MPASS(cst->tx_credits >= ETID_FLUSH_LEN16); 5468 cst->tx_credits -= ETID_FLUSH_LEN16; 5469 cst->ncompl++; 5470 } 5471 5472 static void 5473 write_ethofld_wr(struct cxgbe_snd_tag *cst, struct fw_eth_tx_eo_wr *wr, 5474 struct mbuf *m0, int compl) 5475 { 5476 struct cpl_tx_pkt_core *cpl; 5477 uint64_t ctrl1; 5478 uint32_t ctrl; /* used in many unrelated places */ 5479 int len16, pktlen, nsegs, immhdrs; 5480 caddr_t dst; 5481 uintptr_t p; 5482 struct ulptx_sgl *usgl; 5483 struct sglist sg; 5484 struct sglist_seg segs[38]; /* XXX: find real limit. XXX: get off the stack */ 5485 5486 mtx_assert(&cst->lock, MA_OWNED); 5487 M_ASSERTPKTHDR(m0); 5488 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 5489 m0->m_pkthdr.l4hlen > 0, 5490 ("%s: ethofld mbuf %p is missing header lengths", __func__, m0)); 5491 5492 if (needs_udp_csum(m0)) { 5493 CXGBE_UNIMPLEMENTED("UDP ethofld"); 5494 } 5495 5496 len16 = mbuf_eo_len16(m0); 5497 nsegs = mbuf_eo_nsegs(m0); 5498 pktlen = m0->m_pkthdr.len; 5499 ctrl = sizeof(struct cpl_tx_pkt_core); 5500 if (needs_tso(m0)) 5501 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 5502 immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen; 5503 ctrl += immhdrs; 5504 5505 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_EO_WR) | 5506 V_FW_ETH_TX_EO_WR_IMMDLEN(ctrl) | V_FW_WR_COMPL(!!compl)); 5507 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(len16) | 5508 V_FW_WR_FLOWID(cst->etid)); 5509 wr->r3 = 0; 5510 wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG; 5511 wr->u.tcpseg.ethlen = m0->m_pkthdr.l2hlen; 5512 wr->u.tcpseg.iplen = htobe16(m0->m_pkthdr.l3hlen); 5513 wr->u.tcpseg.tcplen = m0->m_pkthdr.l4hlen; 5514 wr->u.tcpseg.tsclk_tsoff = mbuf_eo_tsclk_tsoff(m0); 5515 wr->u.tcpseg.r4 = 0; 5516 wr->u.tcpseg.r5 = 0; 5517 wr->u.tcpseg.plen = htobe32(pktlen - immhdrs); 5518 5519 if (needs_tso(m0)) { 5520 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); 5521 5522 wr->u.tcpseg.mss = htobe16(m0->m_pkthdr.tso_segsz); 5523 5524 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | 5525 F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) 5526 | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); 5527 if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header)) 5528 ctrl |= V_LSO_ETHHDR_LEN(1); 5529 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 5530 ctrl |= F_LSO_IPV6; 5531 lso->lso_ctrl = htobe32(ctrl); 5532 lso->ipid_ofst = htobe16(0); 5533 lso->mss = htobe16(m0->m_pkthdr.tso_segsz); 5534 lso->seqno_offset = htobe32(0); 5535 lso->len = htobe32(pktlen); 5536 5537 cpl = (void *)(lso + 1); 5538 } else { 5539 wr->u.tcpseg.mss = htobe16(0xffff); 5540 cpl = (void *)(wr + 1); 5541 } 5542 5543 /* Checksum offload must be requested for ethofld. */ 5544 ctrl1 = 0; 5545 MPASS(needs_l4_csum(m0)); 5546 5547 /* VLAN tag insertion */ 5548 if (needs_vlan_insertion(m0)) { 5549 ctrl1 |= F_TXPKT_VLAN_VLD | 5550 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 5551 } 5552 5553 /* CPL header */ 5554 cpl->ctrl0 = cst->ctrl0; 5555 cpl->pack = 0; 5556 cpl->len = htobe16(pktlen); 5557 cpl->ctrl1 = htobe64(ctrl1); 5558 5559 /* Copy Ethernet, IP & TCP hdrs as immediate data */ 5560 p = (uintptr_t)(cpl + 1); 5561 m_copydata(m0, 0, immhdrs, (void *)p); 5562 5563 /* SGL */ 5564 dst = (void *)(cpl + 1); 5565 if (nsegs > 0) { 5566 int i, pad; 5567 5568 /* zero-pad upto next 16Byte boundary, if not 16Byte aligned */ 5569 p += immhdrs; 5570 pad = 16 - (immhdrs & 0xf); 5571 bzero((void *)p, pad); 5572 5573 usgl = (void *)(p + pad); 5574 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 5575 V_ULPTX_NSGE(nsegs)); 5576 5577 sglist_init(&sg, nitems(segs), segs); 5578 for (; m0 != NULL; m0 = m0->m_next) { 5579 if (__predict_false(m0->m_len == 0)) 5580 continue; 5581 if (immhdrs >= m0->m_len) { 5582 immhdrs -= m0->m_len; 5583 continue; 5584 } 5585 5586 sglist_append(&sg, mtod(m0, char *) + immhdrs, 5587 m0->m_len - immhdrs); 5588 immhdrs = 0; 5589 } 5590 MPASS(sg.sg_nseg == nsegs); 5591 5592 /* 5593 * Zero pad last 8B in case the WR doesn't end on a 16B 5594 * boundary. 5595 */ 5596 *(uint64_t *)((char *)wr + len16 * 16 - 8) = 0; 5597 5598 usgl->len0 = htobe32(segs[0].ss_len); 5599 usgl->addr0 = htobe64(segs[0].ss_paddr); 5600 for (i = 0; i < nsegs - 1; i++) { 5601 usgl->sge[i / 2].len[i & 1] = htobe32(segs[i + 1].ss_len); 5602 usgl->sge[i / 2].addr[i & 1] = htobe64(segs[i + 1].ss_paddr); 5603 } 5604 if (i & 1) 5605 usgl->sge[i / 2].len[1] = htobe32(0); 5606 } 5607 5608 } 5609 5610 static void 5611 ethofld_tx(struct cxgbe_snd_tag *cst) 5612 { 5613 struct mbuf *m; 5614 struct wrq_cookie cookie; 5615 int next_credits, compl; 5616 struct fw_eth_tx_eo_wr *wr; 5617 5618 mtx_assert(&cst->lock, MA_OWNED); 5619 5620 while ((m = mbufq_first(&cst->pending_tx)) != NULL) { 5621 M_ASSERTPKTHDR(m); 5622 5623 /* How many len16 credits do we need to send this mbuf. */ 5624 next_credits = mbuf_eo_len16(m); 5625 MPASS(next_credits > 0); 5626 if (next_credits > cst->tx_credits) { 5627 /* 5628 * Tx will make progress eventually because there is at 5629 * least one outstanding fw4_ack that will return 5630 * credits and kick the tx. 5631 */ 5632 MPASS(cst->ncompl > 0); 5633 return; 5634 } 5635 wr = start_wrq_wr(cst->eo_txq, next_credits, &cookie); 5636 if (__predict_false(wr == NULL)) { 5637 /* XXX: wishful thinking, not a real assertion. */ 5638 MPASS(cst->ncompl > 0); 5639 return; 5640 } 5641 cst->tx_credits -= next_credits; 5642 cst->tx_nocompl += next_credits; 5643 compl = cst->ncompl == 0 || cst->tx_nocompl >= cst->tx_total / 2; 5644 ETHER_BPF_MTAP(cst->com.ifp, m); 5645 write_ethofld_wr(cst, wr, m, compl); 5646 commit_wrq_wr(cst->eo_txq, wr, &cookie); 5647 if (compl) { 5648 cst->ncompl++; 5649 cst->tx_nocompl = 0; 5650 } 5651 (void) mbufq_dequeue(&cst->pending_tx); 5652 mbufq_enqueue(&cst->pending_fwack, m); 5653 } 5654 } 5655 5656 int 5657 ethofld_transmit(struct ifnet *ifp, struct mbuf *m0) 5658 { 5659 struct cxgbe_snd_tag *cst; 5660 int rc; 5661 5662 MPASS(m0->m_nextpkt == NULL); 5663 MPASS(m0->m_pkthdr.snd_tag != NULL); 5664 cst = mst_to_cst(m0->m_pkthdr.snd_tag); 5665 5666 mtx_lock(&cst->lock); 5667 MPASS(cst->flags & EO_SND_TAG_REF); 5668 5669 if (__predict_false(cst->flags & EO_FLOWC_PENDING)) { 5670 struct vi_info *vi = ifp->if_softc; 5671 struct port_info *pi = vi->pi; 5672 struct adapter *sc = pi->adapter; 5673 const uint32_t rss_mask = vi->rss_size - 1; 5674 uint32_t rss_hash; 5675 5676 cst->eo_txq = &sc->sge.ofld_txq[vi->first_ofld_txq]; 5677 if (M_HASHTYPE_ISHASH(m0)) 5678 rss_hash = m0->m_pkthdr.flowid; 5679 else 5680 rss_hash = arc4random(); 5681 /* We assume RSS hashing */ 5682 cst->iqid = vi->rss[rss_hash & rss_mask]; 5683 cst->eo_txq += rss_hash % vi->nofldtxq; 5684 rc = send_etid_flowc_wr(cst, pi, vi); 5685 if (rc != 0) 5686 goto done; 5687 } 5688 5689 if (__predict_false(cst->plen + m0->m_pkthdr.len > eo_max_backlog)) { 5690 rc = ENOBUFS; 5691 goto done; 5692 } 5693 5694 mbufq_enqueue(&cst->pending_tx, m0); 5695 cst->plen += m0->m_pkthdr.len; 5696 5697 ethofld_tx(cst); 5698 rc = 0; 5699 done: 5700 mtx_unlock(&cst->lock); 5701 if (__predict_false(rc != 0)) 5702 m_freem(m0); 5703 return (rc); 5704 } 5705 5706 static int 5707 ethofld_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) 5708 { 5709 struct adapter *sc = iq->adapter; 5710 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 5711 struct mbuf *m; 5712 u_int etid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 5713 struct cxgbe_snd_tag *cst; 5714 uint8_t credits = cpl->credits; 5715 5716 cst = lookup_etid(sc, etid); 5717 mtx_lock(&cst->lock); 5718 if (__predict_false(cst->flags & EO_FLOWC_RPL_PENDING)) { 5719 MPASS(credits >= ETID_FLOWC_LEN16); 5720 credits -= ETID_FLOWC_LEN16; 5721 cst->flags &= ~EO_FLOWC_RPL_PENDING; 5722 } 5723 5724 KASSERT(cst->ncompl > 0, 5725 ("%s: etid %u (%p) wasn't expecting completion.", 5726 __func__, etid, cst)); 5727 cst->ncompl--; 5728 5729 while (credits > 0) { 5730 m = mbufq_dequeue(&cst->pending_fwack); 5731 if (__predict_false(m == NULL)) { 5732 /* 5733 * The remaining credits are for the final flush that 5734 * was issued when the tag was freed by the kernel. 5735 */ 5736 MPASS((cst->flags & 5737 (EO_FLUSH_RPL_PENDING | EO_SND_TAG_REF)) == 5738 EO_FLUSH_RPL_PENDING); 5739 MPASS(credits == ETID_FLUSH_LEN16); 5740 MPASS(cst->tx_credits + cpl->credits == cst->tx_total); 5741 MPASS(cst->ncompl == 0); 5742 5743 cst->flags &= ~EO_FLUSH_RPL_PENDING; 5744 cst->tx_credits += cpl->credits; 5745 freetag: 5746 cxgbe_snd_tag_free_locked(cst); 5747 return (0); /* cst is gone. */ 5748 } 5749 KASSERT(m != NULL, 5750 ("%s: too many credits (%u, %u)", __func__, cpl->credits, 5751 credits)); 5752 KASSERT(credits >= mbuf_eo_len16(m), 5753 ("%s: too few credits (%u, %u, %u)", __func__, 5754 cpl->credits, credits, mbuf_eo_len16(m))); 5755 credits -= mbuf_eo_len16(m); 5756 cst->plen -= m->m_pkthdr.len; 5757 m_freem(m); 5758 } 5759 5760 cst->tx_credits += cpl->credits; 5761 MPASS(cst->tx_credits <= cst->tx_total); 5762 5763 m = mbufq_first(&cst->pending_tx); 5764 if (m != NULL && cst->tx_credits >= mbuf_eo_len16(m)) 5765 ethofld_tx(cst); 5766 5767 if (__predict_false((cst->flags & EO_SND_TAG_REF) == 0) && 5768 cst->ncompl == 0) { 5769 if (cst->tx_credits == cst->tx_total) 5770 goto freetag; 5771 else { 5772 MPASS((cst->flags & EO_FLUSH_RPL_PENDING) == 0); 5773 send_etid_flush_wr(cst); 5774 } 5775 } 5776 5777 mtx_unlock(&cst->lock); 5778 5779 return (0); 5780 } 5781 #endif 5782