1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 Chelsio Communications, Inc. 5 * All rights reserved. 6 * Written by: Navdeep Parhar <np@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 #include "opt_kern_tls.h" 36 #include "opt_ratelimit.h" 37 38 #include <sys/types.h> 39 #include <sys/eventhandler.h> 40 #include <sys/mbuf.h> 41 #include <sys/socket.h> 42 #include <sys/kernel.h> 43 #include <sys/ktls.h> 44 #include <sys/malloc.h> 45 #include <sys/queue.h> 46 #include <sys/sbuf.h> 47 #include <sys/taskqueue.h> 48 #include <sys/time.h> 49 #include <sys/sglist.h> 50 #include <sys/sysctl.h> 51 #include <sys/smp.h> 52 #include <sys/socketvar.h> 53 #include <sys/counter.h> 54 #include <net/bpf.h> 55 #include <net/ethernet.h> 56 #include <net/if.h> 57 #include <net/if_vlan_var.h> 58 #include <net/if_vxlan.h> 59 #include <netinet/in.h> 60 #include <netinet/ip.h> 61 #include <netinet/ip6.h> 62 #include <netinet/tcp.h> 63 #include <netinet/udp.h> 64 #include <machine/in_cksum.h> 65 #include <machine/md_var.h> 66 #include <vm/vm.h> 67 #include <vm/pmap.h> 68 #ifdef DEV_NETMAP 69 #include <machine/bus.h> 70 #include <sys/selinfo.h> 71 #include <net/if_var.h> 72 #include <net/netmap.h> 73 #include <dev/netmap/netmap_kern.h> 74 #endif 75 76 #include "common/common.h" 77 #include "common/t4_regs.h" 78 #include "common/t4_regs_values.h" 79 #include "common/t4_msg.h" 80 #include "t4_l2t.h" 81 #include "t4_mp_ring.h" 82 83 #ifdef T4_PKT_TIMESTAMP 84 #define RX_COPY_THRESHOLD (MINCLSIZE - 8) 85 #else 86 #define RX_COPY_THRESHOLD MINCLSIZE 87 #endif 88 89 /* Internal mbuf flags stored in PH_loc.eight[1]. */ 90 #define MC_NOMAP 0x01 91 #define MC_RAW_WR 0x02 92 #define MC_TLS 0x04 93 94 /* 95 * Ethernet frames are DMA'd at this byte offset into the freelist buffer. 96 * 0-7 are valid values. 97 */ 98 static int fl_pktshift = 0; 99 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pktshift, CTLFLAG_RDTUN, &fl_pktshift, 0, 100 "payload DMA offset in rx buffer (bytes)"); 101 102 /* 103 * Pad ethernet payload up to this boundary. 104 * -1: driver should figure out a good value. 105 * 0: disable padding. 106 * Any power of 2 from 32 to 4096 (both inclusive) is also a valid value. 107 */ 108 int fl_pad = -1; 109 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pad, CTLFLAG_RDTUN, &fl_pad, 0, 110 "payload pad boundary (bytes)"); 111 112 /* 113 * Status page length. 114 * -1: driver should figure out a good value. 115 * 64 or 128 are the only other valid values. 116 */ 117 static int spg_len = -1; 118 SYSCTL_INT(_hw_cxgbe, OID_AUTO, spg_len, CTLFLAG_RDTUN, &spg_len, 0, 119 "status page size (bytes)"); 120 121 /* 122 * Congestion drops. 123 * -1: no congestion feedback (not recommended). 124 * 0: backpressure the channel instead of dropping packets right away. 125 * 1: no backpressure, drop packets for the congested queue immediately. 126 */ 127 static int cong_drop = 0; 128 SYSCTL_INT(_hw_cxgbe, OID_AUTO, cong_drop, CTLFLAG_RDTUN, &cong_drop, 0, 129 "Congestion control for RX queues (0 = backpressure, 1 = drop"); 130 131 /* 132 * Deliver multiple frames in the same free list buffer if they fit. 133 * -1: let the driver decide whether to enable buffer packing or not. 134 * 0: disable buffer packing. 135 * 1: enable buffer packing. 136 */ 137 static int buffer_packing = -1; 138 SYSCTL_INT(_hw_cxgbe, OID_AUTO, buffer_packing, CTLFLAG_RDTUN, &buffer_packing, 139 0, "Enable buffer packing"); 140 141 /* 142 * Start next frame in a packed buffer at this boundary. 143 * -1: driver should figure out a good value. 144 * T4: driver will ignore this and use the same value as fl_pad above. 145 * T5: 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value. 146 */ 147 static int fl_pack = -1; 148 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pack, CTLFLAG_RDTUN, &fl_pack, 0, 149 "payload pack boundary (bytes)"); 150 151 /* 152 * Largest rx cluster size that the driver is allowed to allocate. 153 */ 154 static int largest_rx_cluster = MJUM16BYTES; 155 SYSCTL_INT(_hw_cxgbe, OID_AUTO, largest_rx_cluster, CTLFLAG_RDTUN, 156 &largest_rx_cluster, 0, "Largest rx cluster (bytes)"); 157 158 /* 159 * Size of cluster allocation that's most likely to succeed. The driver will 160 * fall back to this size if it fails to allocate clusters larger than this. 161 */ 162 static int safest_rx_cluster = PAGE_SIZE; 163 SYSCTL_INT(_hw_cxgbe, OID_AUTO, safest_rx_cluster, CTLFLAG_RDTUN, 164 &safest_rx_cluster, 0, "Safe rx cluster (bytes)"); 165 166 #ifdef RATELIMIT 167 /* 168 * Knob to control TCP timestamp rewriting, and the granularity of the tick used 169 * for rewriting. -1 and 0-3 are all valid values. 170 * -1: hardware should leave the TCP timestamps alone. 171 * 0: 1ms 172 * 1: 100us 173 * 2: 10us 174 * 3: 1us 175 */ 176 static int tsclk = -1; 177 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tsclk, CTLFLAG_RDTUN, &tsclk, 0, 178 "Control TCP timestamp rewriting when using pacing"); 179 180 static int eo_max_backlog = 1024 * 1024; 181 SYSCTL_INT(_hw_cxgbe, OID_AUTO, eo_max_backlog, CTLFLAG_RDTUN, &eo_max_backlog, 182 0, "Maximum backlog of ratelimited data per flow"); 183 #endif 184 185 /* 186 * The interrupt holdoff timers are multiplied by this value on T6+. 187 * 1 and 3-17 (both inclusive) are legal values. 188 */ 189 static int tscale = 1; 190 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tscale, CTLFLAG_RDTUN, &tscale, 0, 191 "Interrupt holdoff timer scale on T6+"); 192 193 /* 194 * Number of LRO entries in the lro_ctrl structure per rx queue. 195 */ 196 static int lro_entries = TCP_LRO_ENTRIES; 197 SYSCTL_INT(_hw_cxgbe, OID_AUTO, lro_entries, CTLFLAG_RDTUN, &lro_entries, 0, 198 "Number of LRO entries per RX queue"); 199 200 /* 201 * This enables presorting of frames before they're fed into tcp_lro_rx. 202 */ 203 static int lro_mbufs = 0; 204 SYSCTL_INT(_hw_cxgbe, OID_AUTO, lro_mbufs, CTLFLAG_RDTUN, &lro_mbufs, 0, 205 "Enable presorting of LRO frames"); 206 207 static int service_iq(struct sge_iq *, int); 208 static int service_iq_fl(struct sge_iq *, int); 209 static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t); 210 static int eth_rx(struct adapter *, struct sge_rxq *, const struct iq_desc *, 211 u_int); 212 static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int); 213 static inline void init_fl(struct adapter *, struct sge_fl *, int, int, char *); 214 static inline void init_eq(struct adapter *, struct sge_eq *, int, int, uint8_t, 215 uint16_t, char *); 216 static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, 217 bus_addr_t *, void **); 218 static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, 219 void *); 220 static int alloc_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *, 221 int, int); 222 static int free_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *); 223 static void add_iq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 224 struct sge_iq *); 225 static void add_fl_sysctls(struct adapter *, struct sysctl_ctx_list *, 226 struct sysctl_oid *, struct sge_fl *); 227 static int alloc_fwq(struct adapter *); 228 static int free_fwq(struct adapter *); 229 static int alloc_ctrlq(struct adapter *, struct sge_wrq *, int, 230 struct sysctl_oid *); 231 static int alloc_rxq(struct vi_info *, struct sge_rxq *, int, int, 232 struct sysctl_oid *); 233 static int free_rxq(struct vi_info *, struct sge_rxq *); 234 #ifdef TCP_OFFLOAD 235 static int alloc_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *, int, int, 236 struct sysctl_oid *); 237 static int free_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *); 238 #endif 239 #ifdef DEV_NETMAP 240 static int alloc_nm_rxq(struct vi_info *, struct sge_nm_rxq *, int, int, 241 struct sysctl_oid *); 242 static int free_nm_rxq(struct vi_info *, struct sge_nm_rxq *); 243 static int alloc_nm_txq(struct vi_info *, struct sge_nm_txq *, int, int, 244 struct sysctl_oid *); 245 static int free_nm_txq(struct vi_info *, struct sge_nm_txq *); 246 #endif 247 static int ctrl_eq_alloc(struct adapter *, struct sge_eq *); 248 static int eth_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); 249 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 250 static int ofld_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); 251 #endif 252 static int alloc_eq(struct adapter *, struct vi_info *, struct sge_eq *); 253 static int free_eq(struct adapter *, struct sge_eq *); 254 static int alloc_wrq(struct adapter *, struct vi_info *, struct sge_wrq *, 255 struct sysctl_oid *); 256 static int free_wrq(struct adapter *, struct sge_wrq *); 257 static int alloc_txq(struct vi_info *, struct sge_txq *, int, 258 struct sysctl_oid *); 259 static int free_txq(struct vi_info *, struct sge_txq *); 260 static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); 261 static inline void ring_fl_db(struct adapter *, struct sge_fl *); 262 static int refill_fl(struct adapter *, struct sge_fl *, int); 263 static void refill_sfl(void *); 264 static int alloc_fl_sdesc(struct sge_fl *); 265 static void free_fl_sdesc(struct adapter *, struct sge_fl *); 266 static int find_refill_source(struct adapter *, int, bool); 267 static void add_fl_to_sfl(struct adapter *, struct sge_fl *); 268 269 static inline void get_pkt_gl(struct mbuf *, struct sglist *); 270 static inline u_int txpkt_len16(u_int, const u_int); 271 static inline u_int txpkt_vm_len16(u_int, const u_int); 272 static inline void calculate_mbuf_len16(struct adapter *, struct mbuf *); 273 static inline u_int txpkts0_len16(u_int); 274 static inline u_int txpkts1_len16(void); 275 static u_int write_raw_wr(struct sge_txq *, void *, struct mbuf *, u_int); 276 static u_int write_txpkt_wr(struct adapter *, struct sge_txq *, struct mbuf *, 277 u_int); 278 static u_int write_txpkt_vm_wr(struct adapter *, struct sge_txq *, 279 struct mbuf *); 280 static int add_to_txpkts_vf(struct adapter *, struct sge_txq *, struct mbuf *, 281 int, bool *); 282 static int add_to_txpkts_pf(struct adapter *, struct sge_txq *, struct mbuf *, 283 int, bool *); 284 static u_int write_txpkts_wr(struct adapter *, struct sge_txq *); 285 static u_int write_txpkts_vm_wr(struct adapter *, struct sge_txq *); 286 static void write_gl_to_txd(struct sge_txq *, struct mbuf *, caddr_t *, int); 287 static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int); 288 static inline void ring_eq_db(struct adapter *, struct sge_eq *, u_int); 289 static inline uint16_t read_hw_cidx(struct sge_eq *); 290 static inline u_int reclaimable_tx_desc(struct sge_eq *); 291 static inline u_int total_available_tx_desc(struct sge_eq *); 292 static u_int reclaim_tx_descs(struct sge_txq *, u_int); 293 static void tx_reclaim(void *, int); 294 static __be64 get_flit(struct sglist_seg *, int, int); 295 static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *, 296 struct mbuf *); 297 static int handle_fw_msg(struct sge_iq *, const struct rss_header *, 298 struct mbuf *); 299 static int t4_handle_wrerr_rpl(struct adapter *, const __be64 *); 300 static void wrq_tx_drain(void *, int); 301 static void drain_wrq_wr_list(struct adapter *, struct sge_wrq *); 302 303 static int sysctl_uint16(SYSCTL_HANDLER_ARGS); 304 static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS); 305 #ifdef RATELIMIT 306 static inline u_int txpkt_eo_len16(u_int, u_int, u_int); 307 static int ethofld_fw4_ack(struct sge_iq *, const struct rss_header *, 308 struct mbuf *); 309 #endif 310 311 static counter_u64_t extfree_refs; 312 static counter_u64_t extfree_rels; 313 314 an_handler_t t4_an_handler; 315 fw_msg_handler_t t4_fw_msg_handler[NUM_FW6_TYPES]; 316 cpl_handler_t t4_cpl_handler[NUM_CPL_CMDS]; 317 cpl_handler_t set_tcb_rpl_handlers[NUM_CPL_COOKIES]; 318 cpl_handler_t l2t_write_rpl_handlers[NUM_CPL_COOKIES]; 319 cpl_handler_t act_open_rpl_handlers[NUM_CPL_COOKIES]; 320 cpl_handler_t abort_rpl_rss_handlers[NUM_CPL_COOKIES]; 321 cpl_handler_t fw4_ack_handlers[NUM_CPL_COOKIES]; 322 323 void 324 t4_register_an_handler(an_handler_t h) 325 { 326 uintptr_t *loc; 327 328 MPASS(h == NULL || t4_an_handler == NULL); 329 330 loc = (uintptr_t *)&t4_an_handler; 331 atomic_store_rel_ptr(loc, (uintptr_t)h); 332 } 333 334 void 335 t4_register_fw_msg_handler(int type, fw_msg_handler_t h) 336 { 337 uintptr_t *loc; 338 339 MPASS(type < nitems(t4_fw_msg_handler)); 340 MPASS(h == NULL || t4_fw_msg_handler[type] == NULL); 341 /* 342 * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL 343 * handler dispatch table. Reject any attempt to install a handler for 344 * this subtype. 345 */ 346 MPASS(type != FW_TYPE_RSSCPL); 347 MPASS(type != FW6_TYPE_RSSCPL); 348 349 loc = (uintptr_t *)&t4_fw_msg_handler[type]; 350 atomic_store_rel_ptr(loc, (uintptr_t)h); 351 } 352 353 void 354 t4_register_cpl_handler(int opcode, cpl_handler_t h) 355 { 356 uintptr_t *loc; 357 358 MPASS(opcode < nitems(t4_cpl_handler)); 359 MPASS(h == NULL || t4_cpl_handler[opcode] == NULL); 360 361 loc = (uintptr_t *)&t4_cpl_handler[opcode]; 362 atomic_store_rel_ptr(loc, (uintptr_t)h); 363 } 364 365 static int 366 set_tcb_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 367 struct mbuf *m) 368 { 369 const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1); 370 u_int tid; 371 int cookie; 372 373 MPASS(m == NULL); 374 375 tid = GET_TID(cpl); 376 if (is_hpftid(iq->adapter, tid) || is_ftid(iq->adapter, tid)) { 377 /* 378 * The return code for filter-write is put in the CPL cookie so 379 * we have to rely on the hardware tid (is_ftid) to determine 380 * that this is a response to a filter. 381 */ 382 cookie = CPL_COOKIE_FILTER; 383 } else { 384 cookie = G_COOKIE(cpl->cookie); 385 } 386 MPASS(cookie > CPL_COOKIE_RESERVED); 387 MPASS(cookie < nitems(set_tcb_rpl_handlers)); 388 389 return (set_tcb_rpl_handlers[cookie](iq, rss, m)); 390 } 391 392 static int 393 l2t_write_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 394 struct mbuf *m) 395 { 396 const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); 397 unsigned int cookie; 398 399 MPASS(m == NULL); 400 401 cookie = GET_TID(rpl) & F_SYNC_WR ? CPL_COOKIE_TOM : CPL_COOKIE_FILTER; 402 return (l2t_write_rpl_handlers[cookie](iq, rss, m)); 403 } 404 405 static int 406 act_open_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 407 struct mbuf *m) 408 { 409 const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1); 410 u_int cookie = G_TID_COOKIE(G_AOPEN_ATID(be32toh(cpl->atid_status))); 411 412 MPASS(m == NULL); 413 MPASS(cookie != CPL_COOKIE_RESERVED); 414 415 return (act_open_rpl_handlers[cookie](iq, rss, m)); 416 } 417 418 static int 419 abort_rpl_rss_handler(struct sge_iq *iq, const struct rss_header *rss, 420 struct mbuf *m) 421 { 422 struct adapter *sc = iq->adapter; 423 u_int cookie; 424 425 MPASS(m == NULL); 426 if (is_hashfilter(sc)) 427 cookie = CPL_COOKIE_HASHFILTER; 428 else 429 cookie = CPL_COOKIE_TOM; 430 431 return (abort_rpl_rss_handlers[cookie](iq, rss, m)); 432 } 433 434 static int 435 fw4_ack_handler(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 436 { 437 struct adapter *sc = iq->adapter; 438 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 439 unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 440 u_int cookie; 441 442 MPASS(m == NULL); 443 if (is_etid(sc, tid)) 444 cookie = CPL_COOKIE_ETHOFLD; 445 else 446 cookie = CPL_COOKIE_TOM; 447 448 return (fw4_ack_handlers[cookie](iq, rss, m)); 449 } 450 451 static void 452 t4_init_shared_cpl_handlers(void) 453 { 454 455 t4_register_cpl_handler(CPL_SET_TCB_RPL, set_tcb_rpl_handler); 456 t4_register_cpl_handler(CPL_L2T_WRITE_RPL, l2t_write_rpl_handler); 457 t4_register_cpl_handler(CPL_ACT_OPEN_RPL, act_open_rpl_handler); 458 t4_register_cpl_handler(CPL_ABORT_RPL_RSS, abort_rpl_rss_handler); 459 t4_register_cpl_handler(CPL_FW4_ACK, fw4_ack_handler); 460 } 461 462 void 463 t4_register_shared_cpl_handler(int opcode, cpl_handler_t h, int cookie) 464 { 465 uintptr_t *loc; 466 467 MPASS(opcode < nitems(t4_cpl_handler)); 468 MPASS(cookie > CPL_COOKIE_RESERVED); 469 MPASS(cookie < NUM_CPL_COOKIES); 470 MPASS(t4_cpl_handler[opcode] != NULL); 471 472 switch (opcode) { 473 case CPL_SET_TCB_RPL: 474 loc = (uintptr_t *)&set_tcb_rpl_handlers[cookie]; 475 break; 476 case CPL_L2T_WRITE_RPL: 477 loc = (uintptr_t *)&l2t_write_rpl_handlers[cookie]; 478 break; 479 case CPL_ACT_OPEN_RPL: 480 loc = (uintptr_t *)&act_open_rpl_handlers[cookie]; 481 break; 482 case CPL_ABORT_RPL_RSS: 483 loc = (uintptr_t *)&abort_rpl_rss_handlers[cookie]; 484 break; 485 case CPL_FW4_ACK: 486 loc = (uintptr_t *)&fw4_ack_handlers[cookie]; 487 break; 488 default: 489 MPASS(0); 490 return; 491 } 492 MPASS(h == NULL || *loc == (uintptr_t)NULL); 493 atomic_store_rel_ptr(loc, (uintptr_t)h); 494 } 495 496 /* 497 * Called on MOD_LOAD. Validates and calculates the SGE tunables. 498 */ 499 void 500 t4_sge_modload(void) 501 { 502 503 if (fl_pktshift < 0 || fl_pktshift > 7) { 504 printf("Invalid hw.cxgbe.fl_pktshift value (%d)," 505 " using 0 instead.\n", fl_pktshift); 506 fl_pktshift = 0; 507 } 508 509 if (spg_len != 64 && spg_len != 128) { 510 int len; 511 512 #if defined(__i386__) || defined(__amd64__) 513 len = cpu_clflush_line_size > 64 ? 128 : 64; 514 #else 515 len = 64; 516 #endif 517 if (spg_len != -1) { 518 printf("Invalid hw.cxgbe.spg_len value (%d)," 519 " using %d instead.\n", spg_len, len); 520 } 521 spg_len = len; 522 } 523 524 if (cong_drop < -1 || cong_drop > 1) { 525 printf("Invalid hw.cxgbe.cong_drop value (%d)," 526 " using 0 instead.\n", cong_drop); 527 cong_drop = 0; 528 } 529 530 if (tscale != 1 && (tscale < 3 || tscale > 17)) { 531 printf("Invalid hw.cxgbe.tscale value (%d)," 532 " using 1 instead.\n", tscale); 533 tscale = 1; 534 } 535 536 extfree_refs = counter_u64_alloc(M_WAITOK); 537 extfree_rels = counter_u64_alloc(M_WAITOK); 538 counter_u64_zero(extfree_refs); 539 counter_u64_zero(extfree_rels); 540 541 t4_init_shared_cpl_handlers(); 542 t4_register_cpl_handler(CPL_FW4_MSG, handle_fw_msg); 543 t4_register_cpl_handler(CPL_FW6_MSG, handle_fw_msg); 544 t4_register_cpl_handler(CPL_SGE_EGR_UPDATE, handle_sge_egr_update); 545 #ifdef RATELIMIT 546 t4_register_shared_cpl_handler(CPL_FW4_ACK, ethofld_fw4_ack, 547 CPL_COOKIE_ETHOFLD); 548 #endif 549 t4_register_fw_msg_handler(FW6_TYPE_CMD_RPL, t4_handle_fw_rpl); 550 t4_register_fw_msg_handler(FW6_TYPE_WRERR_RPL, t4_handle_wrerr_rpl); 551 } 552 553 void 554 t4_sge_modunload(void) 555 { 556 557 counter_u64_free(extfree_refs); 558 counter_u64_free(extfree_rels); 559 } 560 561 uint64_t 562 t4_sge_extfree_refs(void) 563 { 564 uint64_t refs, rels; 565 566 rels = counter_u64_fetch(extfree_rels); 567 refs = counter_u64_fetch(extfree_refs); 568 569 return (refs - rels); 570 } 571 572 /* max 4096 */ 573 #define MAX_PACK_BOUNDARY 512 574 575 static inline void 576 setup_pad_and_pack_boundaries(struct adapter *sc) 577 { 578 uint32_t v, m; 579 int pad, pack, pad_shift; 580 581 pad_shift = chip_id(sc) > CHELSIO_T5 ? X_T6_INGPADBOUNDARY_SHIFT : 582 X_INGPADBOUNDARY_SHIFT; 583 pad = fl_pad; 584 if (fl_pad < (1 << pad_shift) || 585 fl_pad > (1 << (pad_shift + M_INGPADBOUNDARY)) || 586 !powerof2(fl_pad)) { 587 /* 588 * If there is any chance that we might use buffer packing and 589 * the chip is a T4, then pick 64 as the pad/pack boundary. Set 590 * it to the minimum allowed in all other cases. 591 */ 592 pad = is_t4(sc) && buffer_packing ? 64 : 1 << pad_shift; 593 594 /* 595 * For fl_pad = 0 we'll still write a reasonable value to the 596 * register but all the freelists will opt out of padding. 597 * We'll complain here only if the user tried to set it to a 598 * value greater than 0 that was invalid. 599 */ 600 if (fl_pad > 0) { 601 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pad value" 602 " (%d), using %d instead.\n", fl_pad, pad); 603 } 604 } 605 m = V_INGPADBOUNDARY(M_INGPADBOUNDARY); 606 v = V_INGPADBOUNDARY(ilog2(pad) - pad_shift); 607 t4_set_reg_field(sc, A_SGE_CONTROL, m, v); 608 609 if (is_t4(sc)) { 610 if (fl_pack != -1 && fl_pack != pad) { 611 /* Complain but carry on. */ 612 device_printf(sc->dev, "hw.cxgbe.fl_pack (%d) ignored," 613 " using %d instead.\n", fl_pack, pad); 614 } 615 return; 616 } 617 618 pack = fl_pack; 619 if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 || 620 !powerof2(fl_pack)) { 621 if (sc->params.pci.mps > MAX_PACK_BOUNDARY) 622 pack = MAX_PACK_BOUNDARY; 623 else 624 pack = max(sc->params.pci.mps, CACHE_LINE_SIZE); 625 MPASS(powerof2(pack)); 626 if (pack < 16) 627 pack = 16; 628 if (pack == 32) 629 pack = 64; 630 if (pack > 4096) 631 pack = 4096; 632 if (fl_pack != -1) { 633 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pack value" 634 " (%d), using %d instead.\n", fl_pack, pack); 635 } 636 } 637 m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); 638 if (pack == 16) 639 v = V_INGPACKBOUNDARY(0); 640 else 641 v = V_INGPACKBOUNDARY(ilog2(pack) - 5); 642 643 MPASS(!is_t4(sc)); /* T4 doesn't have SGE_CONTROL2 */ 644 t4_set_reg_field(sc, A_SGE_CONTROL2, m, v); 645 } 646 647 /* 648 * adap->params.vpd.cclk must be set up before this is called. 649 */ 650 void 651 t4_tweak_chip_settings(struct adapter *sc) 652 { 653 int i, reg; 654 uint32_t v, m; 655 int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200}; 656 int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk; 657 int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ 658 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 659 static int sw_buf_sizes[] = { 660 MCLBYTES, 661 #if MJUMPAGESIZE != MCLBYTES 662 MJUMPAGESIZE, 663 #endif 664 MJUM9BYTES, 665 MJUM16BYTES 666 }; 667 668 KASSERT(sc->flags & MASTER_PF, 669 ("%s: trying to change chip settings when not master.", __func__)); 670 671 m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE; 672 v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE | 673 V_EGRSTATUSPAGESIZE(spg_len == 128); 674 t4_set_reg_field(sc, A_SGE_CONTROL, m, v); 675 676 setup_pad_and_pack_boundaries(sc); 677 678 v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | 679 V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | 680 V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) | 681 V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) | 682 V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) | 683 V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) | 684 V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) | 685 V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10); 686 t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v); 687 688 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0, 4096); 689 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE1, 65536); 690 reg = A_SGE_FL_BUFFER_SIZE2; 691 for (i = 0; i < nitems(sw_buf_sizes); i++) { 692 MPASS(reg <= A_SGE_FL_BUFFER_SIZE15); 693 t4_write_reg(sc, reg, sw_buf_sizes[i]); 694 reg += 4; 695 MPASS(reg <= A_SGE_FL_BUFFER_SIZE15); 696 t4_write_reg(sc, reg, sw_buf_sizes[i] - CL_METADATA_SIZE); 697 reg += 4; 698 } 699 700 v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) | 701 V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]); 702 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v); 703 704 KASSERT(intr_timer[0] <= timer_max, 705 ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0], 706 timer_max)); 707 for (i = 1; i < nitems(intr_timer); i++) { 708 KASSERT(intr_timer[i] >= intr_timer[i - 1], 709 ("%s: timers not listed in increasing order (%d)", 710 __func__, i)); 711 712 while (intr_timer[i] > timer_max) { 713 if (i == nitems(intr_timer) - 1) { 714 intr_timer[i] = timer_max; 715 break; 716 } 717 intr_timer[i] += intr_timer[i - 1]; 718 intr_timer[i] /= 2; 719 } 720 } 721 722 v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) | 723 V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1])); 724 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v); 725 v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) | 726 V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3])); 727 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v); 728 v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) | 729 V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5])); 730 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v); 731 732 if (chip_id(sc) >= CHELSIO_T6) { 733 m = V_TSCALE(M_TSCALE); 734 if (tscale == 1) 735 v = 0; 736 else 737 v = V_TSCALE(tscale - 2); 738 t4_set_reg_field(sc, A_SGE_ITP_CONTROL, m, v); 739 740 if (sc->debug_flags & DF_DISABLE_TCB_CACHE) { 741 m = V_RDTHRESHOLD(M_RDTHRESHOLD) | F_WRTHRTHRESHEN | 742 V_WRTHRTHRESH(M_WRTHRTHRESH); 743 t4_tp_pio_read(sc, &v, 1, A_TP_CMM_CONFIG, 1); 744 v &= ~m; 745 v |= V_RDTHRESHOLD(1) | F_WRTHRTHRESHEN | 746 V_WRTHRTHRESH(16); 747 t4_tp_pio_write(sc, &v, 1, A_TP_CMM_CONFIG, 1); 748 } 749 } 750 751 /* 4K, 16K, 64K, 256K DDP "page sizes" for TDDP */ 752 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 753 t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v); 754 755 /* 756 * 4K, 8K, 16K, 64K DDP "page sizes" for iSCSI DDP. These have been 757 * chosen with MAXPHYS = 128K in mind. The largest DDP buffer that we 758 * may have to deal with is MAXPHYS + 1 page. 759 */ 760 v = V_HPZ0(0) | V_HPZ1(1) | V_HPZ2(2) | V_HPZ3(4); 761 t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, v); 762 763 /* We use multiple DDP page sizes both in plain-TOE and ISCSI modes. */ 764 m = v = F_TDDPTAGTCB | F_ISCSITAGTCB; 765 t4_set_reg_field(sc, A_ULP_RX_CTL, m, v); 766 767 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 768 F_RESETDDPOFFSET; 769 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 770 t4_set_reg_field(sc, A_TP_PARA_REG5, m, v); 771 } 772 773 /* 774 * SGE wants the buffer to be at least 64B and then a multiple of 16. Its 775 * address mut be 16B aligned. If padding is in use the buffer's start and end 776 * need to be aligned to the pad boundary as well. We'll just make sure that 777 * the size is a multiple of the pad boundary here, it is up to the buffer 778 * allocation code to make sure the start of the buffer is aligned. 779 */ 780 static inline int 781 hwsz_ok(struct adapter *sc, int hwsz) 782 { 783 int mask = fl_pad ? sc->params.sge.pad_boundary - 1 : 16 - 1; 784 785 return (hwsz >= 64 && (hwsz & mask) == 0); 786 } 787 788 /* 789 * XXX: driver really should be able to deal with unexpected settings. 790 */ 791 int 792 t4_read_chip_settings(struct adapter *sc) 793 { 794 struct sge *s = &sc->sge; 795 struct sge_params *sp = &sc->params.sge; 796 int i, j, n, rc = 0; 797 uint32_t m, v, r; 798 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 799 static int sw_buf_sizes[] = { /* Sorted by size */ 800 MCLBYTES, 801 #if MJUMPAGESIZE != MCLBYTES 802 MJUMPAGESIZE, 803 #endif 804 MJUM9BYTES, 805 MJUM16BYTES 806 }; 807 struct rx_buf_info *rxb; 808 809 m = F_RXPKTCPLMODE; 810 v = F_RXPKTCPLMODE; 811 r = sc->params.sge.sge_control; 812 if ((r & m) != v) { 813 device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r); 814 rc = EINVAL; 815 } 816 817 /* 818 * If this changes then every single use of PAGE_SHIFT in the driver 819 * needs to be carefully reviewed for PAGE_SHIFT vs sp->page_shift. 820 */ 821 if (sp->page_shift != PAGE_SHIFT) { 822 device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r); 823 rc = EINVAL; 824 } 825 826 s->safe_zidx = -1; 827 rxb = &s->rx_buf_info[0]; 828 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 829 rxb->size1 = sw_buf_sizes[i]; 830 rxb->zone = m_getzone(rxb->size1); 831 rxb->type = m_gettype(rxb->size1); 832 rxb->size2 = 0; 833 rxb->hwidx1 = -1; 834 rxb->hwidx2 = -1; 835 for (j = 0; j < SGE_FLBUF_SIZES; j++) { 836 int hwsize = sp->sge_fl_buffer_size[j]; 837 838 if (!hwsz_ok(sc, hwsize)) 839 continue; 840 841 /* hwidx for size1 */ 842 if (rxb->hwidx1 == -1 && rxb->size1 == hwsize) 843 rxb->hwidx1 = j; 844 845 /* hwidx for size2 (buffer packing) */ 846 if (rxb->size1 - CL_METADATA_SIZE < hwsize) 847 continue; 848 n = rxb->size1 - hwsize - CL_METADATA_SIZE; 849 if (n == 0) { 850 rxb->hwidx2 = j; 851 rxb->size2 = hwsize; 852 break; /* stop looking */ 853 } 854 if (rxb->hwidx2 != -1) { 855 if (n < sp->sge_fl_buffer_size[rxb->hwidx2] - 856 hwsize - CL_METADATA_SIZE) { 857 rxb->hwidx2 = j; 858 rxb->size2 = hwsize; 859 } 860 } else if (n <= 2 * CL_METADATA_SIZE) { 861 rxb->hwidx2 = j; 862 rxb->size2 = hwsize; 863 } 864 } 865 if (rxb->hwidx2 != -1) 866 sc->flags |= BUF_PACKING_OK; 867 if (s->safe_zidx == -1 && rxb->size1 == safest_rx_cluster) 868 s->safe_zidx = i; 869 } 870 871 if (sc->flags & IS_VF) 872 return (0); 873 874 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 875 r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ); 876 if (r != v) { 877 device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r); 878 rc = EINVAL; 879 } 880 881 m = v = F_TDDPTAGTCB; 882 r = t4_read_reg(sc, A_ULP_RX_CTL); 883 if ((r & m) != v) { 884 device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r); 885 rc = EINVAL; 886 } 887 888 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 889 F_RESETDDPOFFSET; 890 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 891 r = t4_read_reg(sc, A_TP_PARA_REG5); 892 if ((r & m) != v) { 893 device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r); 894 rc = EINVAL; 895 } 896 897 t4_init_tp_params(sc, 1); 898 899 t4_read_mtu_tbl(sc, sc->params.mtus, NULL); 900 t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd); 901 902 return (rc); 903 } 904 905 int 906 t4_create_dma_tag(struct adapter *sc) 907 { 908 int rc; 909 910 rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, 911 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 912 BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, 913 NULL, &sc->dmat); 914 if (rc != 0) { 915 device_printf(sc->dev, 916 "failed to create main DMA tag: %d\n", rc); 917 } 918 919 return (rc); 920 } 921 922 void 923 t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 924 struct sysctl_oid_list *children) 925 { 926 struct sge_params *sp = &sc->params.sge; 927 928 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes", 929 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, sc, 0, 930 sysctl_bufsizes, "A", "freelist buffer sizes"); 931 932 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD, 933 NULL, sp->fl_pktshift, "payload DMA offset in rx buffer (bytes)"); 934 935 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD, 936 NULL, sp->pad_boundary, "payload pad boundary (bytes)"); 937 938 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD, 939 NULL, sp->spg_len, "status page size (bytes)"); 940 941 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD, 942 NULL, cong_drop, "congestion drop setting"); 943 944 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD, 945 NULL, sp->pack_boundary, "payload pack boundary (bytes)"); 946 } 947 948 int 949 t4_destroy_dma_tag(struct adapter *sc) 950 { 951 if (sc->dmat) 952 bus_dma_tag_destroy(sc->dmat); 953 954 return (0); 955 } 956 957 /* 958 * Allocate and initialize the firmware event queue, control queues, and special 959 * purpose rx queues owned by the adapter. 960 * 961 * Returns errno on failure. Resources allocated up to that point may still be 962 * allocated. Caller is responsible for cleanup in case this function fails. 963 */ 964 int 965 t4_setup_adapter_queues(struct adapter *sc) 966 { 967 struct sysctl_oid *oid; 968 struct sysctl_oid_list *children; 969 int rc, i; 970 971 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 972 973 sysctl_ctx_init(&sc->ctx); 974 sc->flags |= ADAP_SYSCTL_CTX; 975 976 /* 977 * Firmware event queue 978 */ 979 rc = alloc_fwq(sc); 980 if (rc != 0) 981 return (rc); 982 983 /* 984 * That's all for the VF driver. 985 */ 986 if (sc->flags & IS_VF) 987 return (rc); 988 989 oid = device_get_sysctl_tree(sc->dev); 990 children = SYSCTL_CHILDREN(oid); 991 992 /* 993 * XXX: General purpose rx queues, one per port. 994 */ 995 996 /* 997 * Control queues, one per port. 998 */ 999 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "ctrlq", 1000 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "control queues"); 1001 for_each_port(sc, i) { 1002 struct sge_wrq *ctrlq = &sc->sge.ctrlq[i]; 1003 1004 rc = alloc_ctrlq(sc, ctrlq, i, oid); 1005 if (rc != 0) 1006 return (rc); 1007 } 1008 1009 return (rc); 1010 } 1011 1012 /* 1013 * Idempotent 1014 */ 1015 int 1016 t4_teardown_adapter_queues(struct adapter *sc) 1017 { 1018 int i; 1019 1020 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 1021 1022 /* Do this before freeing the queue */ 1023 if (sc->flags & ADAP_SYSCTL_CTX) { 1024 sysctl_ctx_free(&sc->ctx); 1025 sc->flags &= ~ADAP_SYSCTL_CTX; 1026 } 1027 1028 if (!(sc->flags & IS_VF)) { 1029 for_each_port(sc, i) 1030 free_wrq(sc, &sc->sge.ctrlq[i]); 1031 } 1032 free_fwq(sc); 1033 1034 return (0); 1035 } 1036 1037 /* Maximum payload that could arrive with a single iq descriptor. */ 1038 static inline int 1039 max_rx_payload(struct adapter *sc, struct ifnet *ifp, const bool ofld) 1040 { 1041 int maxp; 1042 1043 /* large enough even when hw VLAN extraction is disabled */ 1044 maxp = sc->params.sge.fl_pktshift + ETHER_HDR_LEN + 1045 ETHER_VLAN_ENCAP_LEN + ifp->if_mtu; 1046 if (ofld && sc->tt.tls && sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS && 1047 maxp < sc->params.tp.max_rx_pdu) 1048 maxp = sc->params.tp.max_rx_pdu; 1049 return (maxp); 1050 } 1051 1052 int 1053 t4_setup_vi_queues(struct vi_info *vi) 1054 { 1055 int rc = 0, i, intr_idx, iqidx; 1056 struct sge_rxq *rxq; 1057 struct sge_txq *txq; 1058 #ifdef TCP_OFFLOAD 1059 struct sge_ofld_rxq *ofld_rxq; 1060 #endif 1061 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1062 struct sge_wrq *ofld_txq; 1063 #endif 1064 #ifdef DEV_NETMAP 1065 int saved_idx; 1066 struct sge_nm_rxq *nm_rxq; 1067 struct sge_nm_txq *nm_txq; 1068 #endif 1069 char name[16]; 1070 struct port_info *pi = vi->pi; 1071 struct adapter *sc = pi->adapter; 1072 struct ifnet *ifp = vi->ifp; 1073 struct sysctl_oid *oid = device_get_sysctl_tree(vi->dev); 1074 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 1075 int maxp; 1076 1077 /* Interrupt vector to start from (when using multiple vectors) */ 1078 intr_idx = vi->first_intr; 1079 1080 #ifdef DEV_NETMAP 1081 saved_idx = intr_idx; 1082 if (ifp->if_capabilities & IFCAP_NETMAP) { 1083 1084 /* netmap is supported with direct interrupts only. */ 1085 MPASS(!forwarding_intr_to_fwq(sc)); 1086 1087 /* 1088 * We don't have buffers to back the netmap rx queues 1089 * right now so we create the queues in a way that 1090 * doesn't set off any congestion signal in the chip. 1091 */ 1092 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_rxq", 1093 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "rx queues"); 1094 for_each_nm_rxq(vi, i, nm_rxq) { 1095 rc = alloc_nm_rxq(vi, nm_rxq, intr_idx, i, oid); 1096 if (rc != 0) 1097 goto done; 1098 intr_idx++; 1099 } 1100 1101 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_txq", 1102 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "tx queues"); 1103 for_each_nm_txq(vi, i, nm_txq) { 1104 iqidx = vi->first_nm_rxq + (i % vi->nnmrxq); 1105 rc = alloc_nm_txq(vi, nm_txq, iqidx, i, oid); 1106 if (rc != 0) 1107 goto done; 1108 } 1109 } 1110 1111 /* Normal rx queues and netmap rx queues share the same interrupts. */ 1112 intr_idx = saved_idx; 1113 #endif 1114 1115 /* 1116 * Allocate rx queues first because a default iqid is required when 1117 * creating a tx queue. 1118 */ 1119 maxp = max_rx_payload(sc, ifp, false); 1120 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq", 1121 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "rx queues"); 1122 for_each_rxq(vi, i, rxq) { 1123 1124 init_iq(&rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, vi->qsize_rxq); 1125 1126 snprintf(name, sizeof(name), "%s rxq%d-fl", 1127 device_get_nameunit(vi->dev), i); 1128 init_fl(sc, &rxq->fl, vi->qsize_rxq / 8, maxp, name); 1129 1130 rc = alloc_rxq(vi, rxq, 1131 forwarding_intr_to_fwq(sc) ? -1 : intr_idx, i, oid); 1132 if (rc != 0) 1133 goto done; 1134 intr_idx++; 1135 } 1136 #ifdef DEV_NETMAP 1137 if (ifp->if_capabilities & IFCAP_NETMAP) 1138 intr_idx = saved_idx + max(vi->nrxq, vi->nnmrxq); 1139 #endif 1140 #ifdef TCP_OFFLOAD 1141 maxp = max_rx_payload(sc, ifp, true); 1142 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq", 1143 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "rx queues for offloaded TCP connections"); 1144 for_each_ofld_rxq(vi, i, ofld_rxq) { 1145 1146 init_iq(&ofld_rxq->iq, sc, vi->ofld_tmr_idx, vi->ofld_pktc_idx, 1147 vi->qsize_rxq); 1148 1149 snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", 1150 device_get_nameunit(vi->dev), i); 1151 init_fl(sc, &ofld_rxq->fl, vi->qsize_rxq / 8, maxp, name); 1152 1153 rc = alloc_ofld_rxq(vi, ofld_rxq, 1154 forwarding_intr_to_fwq(sc) ? -1 : intr_idx, i, oid); 1155 if (rc != 0) 1156 goto done; 1157 intr_idx++; 1158 } 1159 #endif 1160 1161 /* 1162 * Now the tx queues. 1163 */ 1164 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "txq", 1165 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "tx queues"); 1166 for_each_txq(vi, i, txq) { 1167 iqidx = vi->first_rxq + (i % vi->nrxq); 1168 snprintf(name, sizeof(name), "%s txq%d", 1169 device_get_nameunit(vi->dev), i); 1170 init_eq(sc, &txq->eq, EQ_ETH, vi->qsize_txq, pi->tx_chan, 1171 sc->sge.rxq[iqidx].iq.cntxt_id, name); 1172 1173 rc = alloc_txq(vi, txq, i, oid); 1174 if (rc != 0) 1175 goto done; 1176 } 1177 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1178 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_txq", 1179 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "tx queues for TOE/ETHOFLD"); 1180 for_each_ofld_txq(vi, i, ofld_txq) { 1181 struct sysctl_oid *oid2; 1182 1183 snprintf(name, sizeof(name), "%s ofld_txq%d", 1184 device_get_nameunit(vi->dev), i); 1185 if (vi->nofldrxq > 0) { 1186 iqidx = vi->first_ofld_rxq + (i % vi->nofldrxq); 1187 init_eq(sc, &ofld_txq->eq, EQ_OFLD, vi->qsize_txq, 1188 pi->tx_chan, sc->sge.ofld_rxq[iqidx].iq.cntxt_id, 1189 name); 1190 } else { 1191 iqidx = vi->first_rxq + (i % vi->nrxq); 1192 init_eq(sc, &ofld_txq->eq, EQ_OFLD, vi->qsize_txq, 1193 pi->tx_chan, sc->sge.rxq[iqidx].iq.cntxt_id, name); 1194 } 1195 1196 snprintf(name, sizeof(name), "%d", i); 1197 oid2 = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, 1198 name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "offload tx queue"); 1199 1200 rc = alloc_wrq(sc, vi, ofld_txq, oid2); 1201 if (rc != 0) 1202 goto done; 1203 } 1204 #endif 1205 done: 1206 if (rc) 1207 t4_teardown_vi_queues(vi); 1208 1209 return (rc); 1210 } 1211 1212 /* 1213 * Idempotent 1214 */ 1215 int 1216 t4_teardown_vi_queues(struct vi_info *vi) 1217 { 1218 int i; 1219 struct sge_rxq *rxq; 1220 struct sge_txq *txq; 1221 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1222 struct port_info *pi = vi->pi; 1223 struct adapter *sc = pi->adapter; 1224 struct sge_wrq *ofld_txq; 1225 #endif 1226 #ifdef TCP_OFFLOAD 1227 struct sge_ofld_rxq *ofld_rxq; 1228 #endif 1229 #ifdef DEV_NETMAP 1230 struct sge_nm_rxq *nm_rxq; 1231 struct sge_nm_txq *nm_txq; 1232 #endif 1233 1234 /* Do this before freeing the queues */ 1235 if (vi->flags & VI_SYSCTL_CTX) { 1236 sysctl_ctx_free(&vi->ctx); 1237 vi->flags &= ~VI_SYSCTL_CTX; 1238 } 1239 1240 #ifdef DEV_NETMAP 1241 if (vi->ifp->if_capabilities & IFCAP_NETMAP) { 1242 for_each_nm_txq(vi, i, nm_txq) { 1243 free_nm_txq(vi, nm_txq); 1244 } 1245 1246 for_each_nm_rxq(vi, i, nm_rxq) { 1247 free_nm_rxq(vi, nm_rxq); 1248 } 1249 } 1250 #endif 1251 1252 /* 1253 * Take down all the tx queues first, as they reference the rx queues 1254 * (for egress updates, etc.). 1255 */ 1256 1257 for_each_txq(vi, i, txq) { 1258 free_txq(vi, txq); 1259 } 1260 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1261 for_each_ofld_txq(vi, i, ofld_txq) { 1262 free_wrq(sc, ofld_txq); 1263 } 1264 #endif 1265 1266 /* 1267 * Then take down the rx queues. 1268 */ 1269 1270 for_each_rxq(vi, i, rxq) { 1271 free_rxq(vi, rxq); 1272 } 1273 #ifdef TCP_OFFLOAD 1274 for_each_ofld_rxq(vi, i, ofld_rxq) { 1275 free_ofld_rxq(vi, ofld_rxq); 1276 } 1277 #endif 1278 1279 return (0); 1280 } 1281 1282 /* 1283 * Interrupt handler when the driver is using only 1 interrupt. This is a very 1284 * unusual scenario. 1285 * 1286 * a) Deals with errors, if any. 1287 * b) Services firmware event queue, which is taking interrupts for all other 1288 * queues. 1289 */ 1290 void 1291 t4_intr_all(void *arg) 1292 { 1293 struct adapter *sc = arg; 1294 struct sge_iq *fwq = &sc->sge.fwq; 1295 1296 MPASS(sc->intr_count == 1); 1297 1298 if (sc->intr_type == INTR_INTX) 1299 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); 1300 1301 t4_intr_err(arg); 1302 t4_intr_evt(fwq); 1303 } 1304 1305 /* 1306 * Interrupt handler for errors (installed directly when multiple interrupts are 1307 * being used, or called by t4_intr_all). 1308 */ 1309 void 1310 t4_intr_err(void *arg) 1311 { 1312 struct adapter *sc = arg; 1313 uint32_t v; 1314 const bool verbose = (sc->debug_flags & DF_VERBOSE_SLOWINTR) != 0; 1315 1316 if (sc->flags & ADAP_ERR) 1317 return; 1318 1319 v = t4_read_reg(sc, MYPF_REG(A_PL_PF_INT_CAUSE)); 1320 if (v & F_PFSW) { 1321 sc->swintr++; 1322 t4_write_reg(sc, MYPF_REG(A_PL_PF_INT_CAUSE), v); 1323 } 1324 1325 t4_slow_intr_handler(sc, verbose); 1326 } 1327 1328 /* 1329 * Interrupt handler for iq-only queues. The firmware event queue is the only 1330 * such queue right now. 1331 */ 1332 void 1333 t4_intr_evt(void *arg) 1334 { 1335 struct sge_iq *iq = arg; 1336 1337 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1338 service_iq(iq, 0); 1339 (void) atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1340 } 1341 } 1342 1343 /* 1344 * Interrupt handler for iq+fl queues. 1345 */ 1346 void 1347 t4_intr(void *arg) 1348 { 1349 struct sge_iq *iq = arg; 1350 1351 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1352 service_iq_fl(iq, 0); 1353 (void) atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1354 } 1355 } 1356 1357 #ifdef DEV_NETMAP 1358 /* 1359 * Interrupt handler for netmap rx queues. 1360 */ 1361 void 1362 t4_nm_intr(void *arg) 1363 { 1364 struct sge_nm_rxq *nm_rxq = arg; 1365 1366 if (atomic_cmpset_int(&nm_rxq->nm_state, NM_ON, NM_BUSY)) { 1367 service_nm_rxq(nm_rxq); 1368 (void) atomic_cmpset_int(&nm_rxq->nm_state, NM_BUSY, NM_ON); 1369 } 1370 } 1371 1372 /* 1373 * Interrupt handler for vectors shared between NIC and netmap rx queues. 1374 */ 1375 void 1376 t4_vi_intr(void *arg) 1377 { 1378 struct irq *irq = arg; 1379 1380 MPASS(irq->nm_rxq != NULL); 1381 t4_nm_intr(irq->nm_rxq); 1382 1383 MPASS(irq->rxq != NULL); 1384 t4_intr(irq->rxq); 1385 } 1386 #endif 1387 1388 /* 1389 * Deals with interrupts on an iq-only (no freelist) queue. 1390 */ 1391 static int 1392 service_iq(struct sge_iq *iq, int budget) 1393 { 1394 struct sge_iq *q; 1395 struct adapter *sc = iq->adapter; 1396 struct iq_desc *d = &iq->desc[iq->cidx]; 1397 int ndescs = 0, limit; 1398 int rsp_type; 1399 uint32_t lq; 1400 STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql); 1401 1402 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); 1403 KASSERT((iq->flags & IQ_HAS_FL) == 0, 1404 ("%s: called for iq %p with fl (iq->flags 0x%x)", __func__, iq, 1405 iq->flags)); 1406 MPASS((iq->flags & IQ_ADJ_CREDIT) == 0); 1407 MPASS((iq->flags & IQ_LRO_ENABLED) == 0); 1408 1409 limit = budget ? budget : iq->qsize / 16; 1410 1411 /* 1412 * We always come back and check the descriptor ring for new indirect 1413 * interrupts and other responses after running a single handler. 1414 */ 1415 for (;;) { 1416 while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { 1417 1418 rmb(); 1419 1420 rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); 1421 lq = be32toh(d->rsp.pldbuflen_qid); 1422 1423 switch (rsp_type) { 1424 case X_RSPD_TYPE_FLBUF: 1425 panic("%s: data for an iq (%p) with no freelist", 1426 __func__, iq); 1427 1428 /* NOTREACHED */ 1429 1430 case X_RSPD_TYPE_CPL: 1431 KASSERT(d->rss.opcode < NUM_CPL_CMDS, 1432 ("%s: bad opcode %02x.", __func__, 1433 d->rss.opcode)); 1434 t4_cpl_handler[d->rss.opcode](iq, &d->rss, NULL); 1435 break; 1436 1437 case X_RSPD_TYPE_INTR: 1438 /* 1439 * There are 1K interrupt-capable queues (qids 0 1440 * through 1023). A response type indicating a 1441 * forwarded interrupt with a qid >= 1K is an 1442 * iWARP async notification. 1443 */ 1444 if (__predict_true(lq >= 1024)) { 1445 t4_an_handler(iq, &d->rsp); 1446 break; 1447 } 1448 1449 q = sc->sge.iqmap[lq - sc->sge.iq_start - 1450 sc->sge.iq_base]; 1451 if (atomic_cmpset_int(&q->state, IQS_IDLE, 1452 IQS_BUSY)) { 1453 if (service_iq_fl(q, q->qsize / 16) == 0) { 1454 (void) atomic_cmpset_int(&q->state, 1455 IQS_BUSY, IQS_IDLE); 1456 } else { 1457 STAILQ_INSERT_TAIL(&iql, q, 1458 link); 1459 } 1460 } 1461 break; 1462 1463 default: 1464 KASSERT(0, 1465 ("%s: illegal response type %d on iq %p", 1466 __func__, rsp_type, iq)); 1467 log(LOG_ERR, 1468 "%s: illegal response type %d on iq %p", 1469 device_get_nameunit(sc->dev), rsp_type, iq); 1470 break; 1471 } 1472 1473 d++; 1474 if (__predict_false(++iq->cidx == iq->sidx)) { 1475 iq->cidx = 0; 1476 iq->gen ^= F_RSPD_GEN; 1477 d = &iq->desc[0]; 1478 } 1479 if (__predict_false(++ndescs == limit)) { 1480 t4_write_reg(sc, sc->sge_gts_reg, 1481 V_CIDXINC(ndescs) | 1482 V_INGRESSQID(iq->cntxt_id) | 1483 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 1484 ndescs = 0; 1485 1486 if (budget) { 1487 return (EINPROGRESS); 1488 } 1489 } 1490 } 1491 1492 if (STAILQ_EMPTY(&iql)) 1493 break; 1494 1495 /* 1496 * Process the head only, and send it to the back of the list if 1497 * it's still not done. 1498 */ 1499 q = STAILQ_FIRST(&iql); 1500 STAILQ_REMOVE_HEAD(&iql, link); 1501 if (service_iq_fl(q, q->qsize / 8) == 0) 1502 (void) atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE); 1503 else 1504 STAILQ_INSERT_TAIL(&iql, q, link); 1505 } 1506 1507 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1508 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 1509 1510 return (0); 1511 } 1512 1513 static inline int 1514 sort_before_lro(struct lro_ctrl *lro) 1515 { 1516 1517 return (lro->lro_mbuf_max != 0); 1518 } 1519 1520 static inline uint64_t 1521 last_flit_to_ns(struct adapter *sc, uint64_t lf) 1522 { 1523 uint64_t n = be64toh(lf) & 0xfffffffffffffff; /* 60b, not 64b. */ 1524 1525 if (n > UINT64_MAX / 1000000) 1526 return (n / sc->params.vpd.cclk * 1000000); 1527 else 1528 return (n * 1000000 / sc->params.vpd.cclk); 1529 } 1530 1531 static inline void 1532 move_to_next_rxbuf(struct sge_fl *fl) 1533 { 1534 1535 fl->rx_offset = 0; 1536 if (__predict_false((++fl->cidx & 7) == 0)) { 1537 uint16_t cidx = fl->cidx >> 3; 1538 1539 if (__predict_false(cidx == fl->sidx)) 1540 fl->cidx = cidx = 0; 1541 fl->hw_cidx = cidx; 1542 } 1543 } 1544 1545 /* 1546 * Deals with interrupts on an iq+fl queue. 1547 */ 1548 static int 1549 service_iq_fl(struct sge_iq *iq, int budget) 1550 { 1551 struct sge_rxq *rxq = iq_to_rxq(iq); 1552 struct sge_fl *fl; 1553 struct adapter *sc = iq->adapter; 1554 struct iq_desc *d = &iq->desc[iq->cidx]; 1555 int ndescs, limit; 1556 int rsp_type, starved; 1557 uint32_t lq; 1558 uint16_t fl_hw_cidx; 1559 struct mbuf *m0; 1560 #if defined(INET) || defined(INET6) 1561 const struct timeval lro_timeout = {0, sc->lro_timeout}; 1562 struct lro_ctrl *lro = &rxq->lro; 1563 #endif 1564 1565 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); 1566 MPASS(iq->flags & IQ_HAS_FL); 1567 1568 ndescs = 0; 1569 #if defined(INET) || defined(INET6) 1570 if (iq->flags & IQ_ADJ_CREDIT) { 1571 MPASS(sort_before_lro(lro)); 1572 iq->flags &= ~IQ_ADJ_CREDIT; 1573 if ((d->rsp.u.type_gen & F_RSPD_GEN) != iq->gen) { 1574 tcp_lro_flush_all(lro); 1575 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(1) | 1576 V_INGRESSQID((u32)iq->cntxt_id) | 1577 V_SEINTARM(iq->intr_params)); 1578 return (0); 1579 } 1580 ndescs = 1; 1581 } 1582 #else 1583 MPASS((iq->flags & IQ_ADJ_CREDIT) == 0); 1584 #endif 1585 1586 limit = budget ? budget : iq->qsize / 16; 1587 fl = &rxq->fl; 1588 fl_hw_cidx = fl->hw_cidx; /* stable snapshot */ 1589 while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { 1590 1591 rmb(); 1592 1593 m0 = NULL; 1594 rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); 1595 lq = be32toh(d->rsp.pldbuflen_qid); 1596 1597 switch (rsp_type) { 1598 case X_RSPD_TYPE_FLBUF: 1599 if (lq & F_RSPD_NEWBUF) { 1600 if (fl->rx_offset > 0) 1601 move_to_next_rxbuf(fl); 1602 lq = G_RSPD_LEN(lq); 1603 } 1604 if (IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 4) { 1605 FL_LOCK(fl); 1606 refill_fl(sc, fl, 64); 1607 FL_UNLOCK(fl); 1608 fl_hw_cidx = fl->hw_cidx; 1609 } 1610 1611 if (d->rss.opcode == CPL_RX_PKT) { 1612 if (__predict_true(eth_rx(sc, rxq, d, lq) == 0)) 1613 break; 1614 goto out; 1615 } 1616 m0 = get_fl_payload(sc, fl, lq); 1617 if (__predict_false(m0 == NULL)) 1618 goto out; 1619 1620 /* fall through */ 1621 1622 case X_RSPD_TYPE_CPL: 1623 KASSERT(d->rss.opcode < NUM_CPL_CMDS, 1624 ("%s: bad opcode %02x.", __func__, d->rss.opcode)); 1625 t4_cpl_handler[d->rss.opcode](iq, &d->rss, m0); 1626 break; 1627 1628 case X_RSPD_TYPE_INTR: 1629 1630 /* 1631 * There are 1K interrupt-capable queues (qids 0 1632 * through 1023). A response type indicating a 1633 * forwarded interrupt with a qid >= 1K is an 1634 * iWARP async notification. That is the only 1635 * acceptable indirect interrupt on this queue. 1636 */ 1637 if (__predict_false(lq < 1024)) { 1638 panic("%s: indirect interrupt on iq_fl %p " 1639 "with qid %u", __func__, iq, lq); 1640 } 1641 1642 t4_an_handler(iq, &d->rsp); 1643 break; 1644 1645 default: 1646 KASSERT(0, ("%s: illegal response type %d on iq %p", 1647 __func__, rsp_type, iq)); 1648 log(LOG_ERR, "%s: illegal response type %d on iq %p", 1649 device_get_nameunit(sc->dev), rsp_type, iq); 1650 break; 1651 } 1652 1653 d++; 1654 if (__predict_false(++iq->cidx == iq->sidx)) { 1655 iq->cidx = 0; 1656 iq->gen ^= F_RSPD_GEN; 1657 d = &iq->desc[0]; 1658 } 1659 if (__predict_false(++ndescs == limit)) { 1660 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1661 V_INGRESSQID(iq->cntxt_id) | 1662 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 1663 1664 #if defined(INET) || defined(INET6) 1665 if (iq->flags & IQ_LRO_ENABLED && 1666 !sort_before_lro(lro) && 1667 sc->lro_timeout != 0) { 1668 tcp_lro_flush_inactive(lro, &lro_timeout); 1669 } 1670 #endif 1671 if (budget) 1672 return (EINPROGRESS); 1673 ndescs = 0; 1674 } 1675 } 1676 out: 1677 #if defined(INET) || defined(INET6) 1678 if (iq->flags & IQ_LRO_ENABLED) { 1679 if (ndescs > 0 && lro->lro_mbuf_count > 8) { 1680 MPASS(sort_before_lro(lro)); 1681 /* hold back one credit and don't flush LRO state */ 1682 iq->flags |= IQ_ADJ_CREDIT; 1683 ndescs--; 1684 } else { 1685 tcp_lro_flush_all(lro); 1686 } 1687 } 1688 #endif 1689 1690 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1691 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 1692 1693 FL_LOCK(fl); 1694 starved = refill_fl(sc, fl, 64); 1695 FL_UNLOCK(fl); 1696 if (__predict_false(starved != 0)) 1697 add_fl_to_sfl(sc, fl); 1698 1699 return (0); 1700 } 1701 1702 static inline struct cluster_metadata * 1703 cl_metadata(struct fl_sdesc *sd) 1704 { 1705 1706 return ((void *)(sd->cl + sd->moff)); 1707 } 1708 1709 static void 1710 rxb_free(struct mbuf *m) 1711 { 1712 struct cluster_metadata *clm = m->m_ext.ext_arg1; 1713 1714 uma_zfree(clm->zone, clm->cl); 1715 counter_u64_add(extfree_rels, 1); 1716 } 1717 1718 /* 1719 * The mbuf returned comes from zone_muf and carries the payload in one of these 1720 * ways 1721 * a) complete frame inside the mbuf 1722 * b) m_cljset (for clusters without metadata) 1723 * d) m_extaddref (cluster with metadata) 1724 */ 1725 static struct mbuf * 1726 get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset, 1727 int remaining) 1728 { 1729 struct mbuf *m; 1730 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1731 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx]; 1732 struct cluster_metadata *clm; 1733 int len, blen; 1734 caddr_t payload; 1735 1736 if (fl->flags & FL_BUF_PACKING) { 1737 u_int l, pad; 1738 1739 blen = rxb->size2 - fl->rx_offset; /* max possible in this buf */ 1740 len = min(remaining, blen); 1741 payload = sd->cl + fl->rx_offset; 1742 1743 l = fr_offset + len; 1744 pad = roundup2(l, fl->buf_boundary) - l; 1745 if (fl->rx_offset + len + pad < rxb->size2) 1746 blen = len + pad; 1747 MPASS(fl->rx_offset + blen <= rxb->size2); 1748 } else { 1749 MPASS(fl->rx_offset == 0); /* not packing */ 1750 blen = rxb->size1; 1751 len = min(remaining, blen); 1752 payload = sd->cl; 1753 } 1754 1755 if (fr_offset == 0) { 1756 m = m_gethdr(M_NOWAIT, MT_DATA); 1757 if (__predict_false(m == NULL)) 1758 return (NULL); 1759 m->m_pkthdr.len = remaining; 1760 } else { 1761 m = m_get(M_NOWAIT, MT_DATA); 1762 if (__predict_false(m == NULL)) 1763 return (NULL); 1764 } 1765 m->m_len = len; 1766 1767 if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) { 1768 /* copy data to mbuf */ 1769 bcopy(payload, mtod(m, caddr_t), len); 1770 if (fl->flags & FL_BUF_PACKING) { 1771 fl->rx_offset += blen; 1772 MPASS(fl->rx_offset <= rxb->size2); 1773 if (fl->rx_offset < rxb->size2) 1774 return (m); /* without advancing the cidx */ 1775 } 1776 } else if (fl->flags & FL_BUF_PACKING) { 1777 clm = cl_metadata(sd); 1778 if (sd->nmbuf++ == 0) { 1779 clm->refcount = 1; 1780 clm->zone = rxb->zone; 1781 clm->cl = sd->cl; 1782 counter_u64_add(extfree_refs, 1); 1783 } 1784 m_extaddref(m, payload, blen, &clm->refcount, rxb_free, clm, 1785 NULL); 1786 1787 fl->rx_offset += blen; 1788 MPASS(fl->rx_offset <= rxb->size2); 1789 if (fl->rx_offset < rxb->size2) 1790 return (m); /* without advancing the cidx */ 1791 } else { 1792 m_cljset(m, sd->cl, rxb->type); 1793 sd->cl = NULL; /* consumed, not a recycle candidate */ 1794 } 1795 1796 move_to_next_rxbuf(fl); 1797 1798 return (m); 1799 } 1800 1801 static struct mbuf * 1802 get_fl_payload(struct adapter *sc, struct sge_fl *fl, const u_int plen) 1803 { 1804 struct mbuf *m0, *m, **pnext; 1805 u_int remaining; 1806 1807 if (__predict_false(fl->flags & FL_BUF_RESUME)) { 1808 M_ASSERTPKTHDR(fl->m0); 1809 MPASS(fl->m0->m_pkthdr.len == plen); 1810 MPASS(fl->remaining < plen); 1811 1812 m0 = fl->m0; 1813 pnext = fl->pnext; 1814 remaining = fl->remaining; 1815 fl->flags &= ~FL_BUF_RESUME; 1816 goto get_segment; 1817 } 1818 1819 /* 1820 * Payload starts at rx_offset in the current hw buffer. Its length is 1821 * 'len' and it may span multiple hw buffers. 1822 */ 1823 1824 m0 = get_scatter_segment(sc, fl, 0, plen); 1825 if (m0 == NULL) 1826 return (NULL); 1827 remaining = plen - m0->m_len; 1828 pnext = &m0->m_next; 1829 while (remaining > 0) { 1830 get_segment: 1831 MPASS(fl->rx_offset == 0); 1832 m = get_scatter_segment(sc, fl, plen - remaining, remaining); 1833 if (__predict_false(m == NULL)) { 1834 fl->m0 = m0; 1835 fl->pnext = pnext; 1836 fl->remaining = remaining; 1837 fl->flags |= FL_BUF_RESUME; 1838 return (NULL); 1839 } 1840 *pnext = m; 1841 pnext = &m->m_next; 1842 remaining -= m->m_len; 1843 } 1844 *pnext = NULL; 1845 1846 M_ASSERTPKTHDR(m0); 1847 return (m0); 1848 } 1849 1850 static int 1851 skip_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset, 1852 int remaining) 1853 { 1854 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1855 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx]; 1856 int len, blen; 1857 1858 if (fl->flags & FL_BUF_PACKING) { 1859 u_int l, pad; 1860 1861 blen = rxb->size2 - fl->rx_offset; /* max possible in this buf */ 1862 len = min(remaining, blen); 1863 1864 l = fr_offset + len; 1865 pad = roundup2(l, fl->buf_boundary) - l; 1866 if (fl->rx_offset + len + pad < rxb->size2) 1867 blen = len + pad; 1868 fl->rx_offset += blen; 1869 MPASS(fl->rx_offset <= rxb->size2); 1870 if (fl->rx_offset < rxb->size2) 1871 return (len); /* without advancing the cidx */ 1872 } else { 1873 MPASS(fl->rx_offset == 0); /* not packing */ 1874 blen = rxb->size1; 1875 len = min(remaining, blen); 1876 } 1877 move_to_next_rxbuf(fl); 1878 return (len); 1879 } 1880 1881 static inline void 1882 skip_fl_payload(struct adapter *sc, struct sge_fl *fl, int plen) 1883 { 1884 int remaining, fr_offset, len; 1885 1886 fr_offset = 0; 1887 remaining = plen; 1888 while (remaining > 0) { 1889 len = skip_scatter_segment(sc, fl, fr_offset, remaining); 1890 fr_offset += len; 1891 remaining -= len; 1892 } 1893 } 1894 1895 static inline int 1896 get_segment_len(struct adapter *sc, struct sge_fl *fl, int plen) 1897 { 1898 int len; 1899 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1900 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx]; 1901 1902 if (fl->flags & FL_BUF_PACKING) 1903 len = rxb->size2 - fl->rx_offset; 1904 else 1905 len = rxb->size1; 1906 1907 return (min(plen, len)); 1908 } 1909 1910 static int 1911 eth_rx(struct adapter *sc, struct sge_rxq *rxq, const struct iq_desc *d, 1912 u_int plen) 1913 { 1914 struct mbuf *m0; 1915 struct ifnet *ifp = rxq->ifp; 1916 struct sge_fl *fl = &rxq->fl; 1917 struct vi_info *vi = ifp->if_softc; 1918 const struct cpl_rx_pkt *cpl; 1919 #if defined(INET) || defined(INET6) 1920 struct lro_ctrl *lro = &rxq->lro; 1921 #endif 1922 uint16_t err_vec, tnl_type, tnlhdr_len; 1923 static const int sw_hashtype[4][2] = { 1924 {M_HASHTYPE_NONE, M_HASHTYPE_NONE}, 1925 {M_HASHTYPE_RSS_IPV4, M_HASHTYPE_RSS_IPV6}, 1926 {M_HASHTYPE_RSS_TCP_IPV4, M_HASHTYPE_RSS_TCP_IPV6}, 1927 {M_HASHTYPE_RSS_UDP_IPV4, M_HASHTYPE_RSS_UDP_IPV6}, 1928 }; 1929 static const int sw_csum_flags[2][2] = { 1930 { 1931 /* IP, inner IP */ 1932 CSUM_ENCAP_VXLAN | 1933 CSUM_L3_CALC | CSUM_L3_VALID | 1934 CSUM_L4_CALC | CSUM_L4_VALID | 1935 CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID | 1936 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1937 1938 /* IP, inner IP6 */ 1939 CSUM_ENCAP_VXLAN | 1940 CSUM_L3_CALC | CSUM_L3_VALID | 1941 CSUM_L4_CALC | CSUM_L4_VALID | 1942 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1943 }, 1944 { 1945 /* IP6, inner IP */ 1946 CSUM_ENCAP_VXLAN | 1947 CSUM_L4_CALC | CSUM_L4_VALID | 1948 CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID | 1949 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1950 1951 /* IP6, inner IP6 */ 1952 CSUM_ENCAP_VXLAN | 1953 CSUM_L4_CALC | CSUM_L4_VALID | 1954 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1955 }, 1956 }; 1957 1958 MPASS(plen > sc->params.sge.fl_pktshift); 1959 if (vi->pfil != NULL && PFIL_HOOKED_IN(vi->pfil) && 1960 __predict_true((fl->flags & FL_BUF_RESUME) == 0)) { 1961 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1962 caddr_t frame; 1963 int rc, slen; 1964 1965 slen = get_segment_len(sc, fl, plen) - 1966 sc->params.sge.fl_pktshift; 1967 frame = sd->cl + fl->rx_offset + sc->params.sge.fl_pktshift; 1968 CURVNET_SET_QUIET(ifp->if_vnet); 1969 rc = pfil_run_hooks(vi->pfil, frame, ifp, 1970 slen | PFIL_MEMPTR | PFIL_IN, NULL); 1971 CURVNET_RESTORE(); 1972 if (rc == PFIL_DROPPED || rc == PFIL_CONSUMED) { 1973 skip_fl_payload(sc, fl, plen); 1974 return (0); 1975 } 1976 if (rc == PFIL_REALLOCED) { 1977 skip_fl_payload(sc, fl, plen); 1978 m0 = pfil_mem2mbuf(frame); 1979 goto have_mbuf; 1980 } 1981 } 1982 1983 m0 = get_fl_payload(sc, fl, plen); 1984 if (__predict_false(m0 == NULL)) 1985 return (ENOMEM); 1986 1987 m0->m_pkthdr.len -= sc->params.sge.fl_pktshift; 1988 m0->m_len -= sc->params.sge.fl_pktshift; 1989 m0->m_data += sc->params.sge.fl_pktshift; 1990 1991 have_mbuf: 1992 m0->m_pkthdr.rcvif = ifp; 1993 M_HASHTYPE_SET(m0, sw_hashtype[d->rss.hash_type][d->rss.ipv6]); 1994 m0->m_pkthdr.flowid = be32toh(d->rss.hash_val); 1995 1996 cpl = (const void *)(&d->rss + 1); 1997 if (sc->params.tp.rx_pkt_encap) { 1998 const uint16_t ev = be16toh(cpl->err_vec); 1999 2000 err_vec = G_T6_COMPR_RXERR_VEC(ev); 2001 tnl_type = G_T6_RX_TNL_TYPE(ev); 2002 tnlhdr_len = G_T6_RX_TNLHDR_LEN(ev); 2003 } else { 2004 err_vec = be16toh(cpl->err_vec); 2005 tnl_type = 0; 2006 tnlhdr_len = 0; 2007 } 2008 if (cpl->csum_calc && err_vec == 0) { 2009 int ipv6 = !!(cpl->l2info & htobe32(F_RXF_IP6)); 2010 2011 /* checksum(s) calculated and found to be correct. */ 2012 2013 MPASS((cpl->l2info & htobe32(F_RXF_IP)) ^ 2014 (cpl->l2info & htobe32(F_RXF_IP6))); 2015 m0->m_pkthdr.csum_data = be16toh(cpl->csum); 2016 if (tnl_type == 0) { 2017 if (!ipv6 && ifp->if_capenable & IFCAP_RXCSUM) { 2018 m0->m_pkthdr.csum_flags = CSUM_L3_CALC | 2019 CSUM_L3_VALID | CSUM_L4_CALC | 2020 CSUM_L4_VALID; 2021 } else if (ipv6 && ifp->if_capenable & IFCAP_RXCSUM_IPV6) { 2022 m0->m_pkthdr.csum_flags = CSUM_L4_CALC | 2023 CSUM_L4_VALID; 2024 } 2025 rxq->rxcsum++; 2026 } else { 2027 MPASS(tnl_type == RX_PKT_TNL_TYPE_VXLAN); 2028 if (__predict_false(cpl->ip_frag)) { 2029 /* 2030 * csum_data is for the inner frame (which is an 2031 * IP fragment) and is not 0xffff. There is no 2032 * way to pass the inner csum_data to the stack. 2033 * We don't want the stack to use the inner 2034 * csum_data to validate the outer frame or it 2035 * will get rejected. So we fix csum_data here 2036 * and let sw do the checksum of inner IP 2037 * fragments. 2038 * 2039 * XXX: Need 32b for csum_data2 in an rx mbuf. 2040 * Maybe stuff it into rcv_tstmp? 2041 */ 2042 m0->m_pkthdr.csum_data = 0xffff; 2043 if (ipv6) { 2044 m0->m_pkthdr.csum_flags = CSUM_L4_CALC | 2045 CSUM_L4_VALID; 2046 } else { 2047 m0->m_pkthdr.csum_flags = CSUM_L3_CALC | 2048 CSUM_L3_VALID | CSUM_L4_CALC | 2049 CSUM_L4_VALID; 2050 } 2051 } else { 2052 int outer_ipv6; 2053 2054 MPASS(m0->m_pkthdr.csum_data == 0xffff); 2055 2056 outer_ipv6 = tnlhdr_len >= 2057 sizeof(struct ether_header) + 2058 sizeof(struct ip6_hdr); 2059 m0->m_pkthdr.csum_flags = 2060 sw_csum_flags[outer_ipv6][ipv6]; 2061 } 2062 rxq->vxlan_rxcsum++; 2063 } 2064 } 2065 2066 if (cpl->vlan_ex) { 2067 m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); 2068 m0->m_flags |= M_VLANTAG; 2069 rxq->vlan_extraction++; 2070 } 2071 2072 if (rxq->iq.flags & IQ_RX_TIMESTAMP) { 2073 /* 2074 * Fill up rcv_tstmp but do not set M_TSTMP. 2075 * rcv_tstmp is not in the format that the 2076 * kernel expects and we don't want to mislead 2077 * it. For now this is only for custom code 2078 * that knows how to interpret cxgbe's stamp. 2079 */ 2080 m0->m_pkthdr.rcv_tstmp = 2081 last_flit_to_ns(sc, d->rsp.u.last_flit); 2082 #ifdef notyet 2083 m0->m_flags |= M_TSTMP; 2084 #endif 2085 } 2086 2087 #ifdef NUMA 2088 m0->m_pkthdr.numa_domain = ifp->if_numa_domain; 2089 #endif 2090 #if defined(INET) || defined(INET6) 2091 if (rxq->iq.flags & IQ_LRO_ENABLED && tnl_type == 0 && 2092 (M_HASHTYPE_GET(m0) == M_HASHTYPE_RSS_TCP_IPV4 || 2093 M_HASHTYPE_GET(m0) == M_HASHTYPE_RSS_TCP_IPV6)) { 2094 if (sort_before_lro(lro)) { 2095 tcp_lro_queue_mbuf(lro, m0); 2096 return (0); /* queued for sort, then LRO */ 2097 } 2098 if (tcp_lro_rx(lro, m0, 0) == 0) 2099 return (0); /* queued for LRO */ 2100 } 2101 #endif 2102 ifp->if_input(ifp, m0); 2103 2104 return (0); 2105 } 2106 2107 /* 2108 * Must drain the wrq or make sure that someone else will. 2109 */ 2110 static void 2111 wrq_tx_drain(void *arg, int n) 2112 { 2113 struct sge_wrq *wrq = arg; 2114 struct sge_eq *eq = &wrq->eq; 2115 2116 EQ_LOCK(eq); 2117 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 2118 drain_wrq_wr_list(wrq->adapter, wrq); 2119 EQ_UNLOCK(eq); 2120 } 2121 2122 static void 2123 drain_wrq_wr_list(struct adapter *sc, struct sge_wrq *wrq) 2124 { 2125 struct sge_eq *eq = &wrq->eq; 2126 u_int available, dbdiff; /* # of hardware descriptors */ 2127 u_int n; 2128 struct wrqe *wr; 2129 struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ 2130 2131 EQ_LOCK_ASSERT_OWNED(eq); 2132 MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs)); 2133 wr = STAILQ_FIRST(&wrq->wr_list); 2134 MPASS(wr != NULL); /* Must be called with something useful to do */ 2135 MPASS(eq->pidx == eq->dbidx); 2136 dbdiff = 0; 2137 2138 do { 2139 eq->cidx = read_hw_cidx(eq); 2140 if (eq->pidx == eq->cidx) 2141 available = eq->sidx - 1; 2142 else 2143 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2144 2145 MPASS(wr->wrq == wrq); 2146 n = howmany(wr->wr_len, EQ_ESIZE); 2147 if (available < n) 2148 break; 2149 2150 dst = (void *)&eq->desc[eq->pidx]; 2151 if (__predict_true(eq->sidx - eq->pidx > n)) { 2152 /* Won't wrap, won't end exactly at the status page. */ 2153 bcopy(&wr->wr[0], dst, wr->wr_len); 2154 eq->pidx += n; 2155 } else { 2156 int first_portion = (eq->sidx - eq->pidx) * EQ_ESIZE; 2157 2158 bcopy(&wr->wr[0], dst, first_portion); 2159 if (wr->wr_len > first_portion) { 2160 bcopy(&wr->wr[first_portion], &eq->desc[0], 2161 wr->wr_len - first_portion); 2162 } 2163 eq->pidx = n - (eq->sidx - eq->pidx); 2164 } 2165 wrq->tx_wrs_copied++; 2166 2167 if (available < eq->sidx / 4 && 2168 atomic_cmpset_int(&eq->equiq, 0, 1)) { 2169 /* 2170 * XXX: This is not 100% reliable with some 2171 * types of WRs. But this is a very unusual 2172 * situation for an ofld/ctrl queue anyway. 2173 */ 2174 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 2175 F_FW_WR_EQUEQ); 2176 } 2177 2178 dbdiff += n; 2179 if (dbdiff >= 16) { 2180 ring_eq_db(sc, eq, dbdiff); 2181 dbdiff = 0; 2182 } 2183 2184 STAILQ_REMOVE_HEAD(&wrq->wr_list, link); 2185 free_wrqe(wr); 2186 MPASS(wrq->nwr_pending > 0); 2187 wrq->nwr_pending--; 2188 MPASS(wrq->ndesc_needed >= n); 2189 wrq->ndesc_needed -= n; 2190 } while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL); 2191 2192 if (dbdiff) 2193 ring_eq_db(sc, eq, dbdiff); 2194 } 2195 2196 /* 2197 * Doesn't fail. Holds on to work requests it can't send right away. 2198 */ 2199 void 2200 t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr) 2201 { 2202 #ifdef INVARIANTS 2203 struct sge_eq *eq = &wrq->eq; 2204 #endif 2205 2206 EQ_LOCK_ASSERT_OWNED(eq); 2207 MPASS(wr != NULL); 2208 MPASS(wr->wr_len > 0 && wr->wr_len <= SGE_MAX_WR_LEN); 2209 MPASS((wr->wr_len & 0x7) == 0); 2210 2211 STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link); 2212 wrq->nwr_pending++; 2213 wrq->ndesc_needed += howmany(wr->wr_len, EQ_ESIZE); 2214 2215 if (!TAILQ_EMPTY(&wrq->incomplete_wrs)) 2216 return; /* commit_wrq_wr will drain wr_list as well. */ 2217 2218 drain_wrq_wr_list(sc, wrq); 2219 2220 /* Doorbell must have caught up to the pidx. */ 2221 MPASS(eq->pidx == eq->dbidx); 2222 } 2223 2224 void 2225 t4_update_fl_bufsize(struct ifnet *ifp) 2226 { 2227 struct vi_info *vi = ifp->if_softc; 2228 struct adapter *sc = vi->adapter; 2229 struct sge_rxq *rxq; 2230 #ifdef TCP_OFFLOAD 2231 struct sge_ofld_rxq *ofld_rxq; 2232 #endif 2233 struct sge_fl *fl; 2234 int i, maxp; 2235 2236 maxp = max_rx_payload(sc, ifp, false); 2237 for_each_rxq(vi, i, rxq) { 2238 fl = &rxq->fl; 2239 2240 FL_LOCK(fl); 2241 fl->zidx = find_refill_source(sc, maxp, 2242 fl->flags & FL_BUF_PACKING); 2243 FL_UNLOCK(fl); 2244 } 2245 #ifdef TCP_OFFLOAD 2246 maxp = max_rx_payload(sc, ifp, true); 2247 for_each_ofld_rxq(vi, i, ofld_rxq) { 2248 fl = &ofld_rxq->fl; 2249 2250 FL_LOCK(fl); 2251 fl->zidx = find_refill_source(sc, maxp, 2252 fl->flags & FL_BUF_PACKING); 2253 FL_UNLOCK(fl); 2254 } 2255 #endif 2256 } 2257 2258 static inline int 2259 mbuf_nsegs(struct mbuf *m) 2260 { 2261 2262 M_ASSERTPKTHDR(m); 2263 KASSERT(m->m_pkthdr.inner_l5hlen > 0, 2264 ("%s: mbuf %p missing information on # of segments.", __func__, m)); 2265 2266 return (m->m_pkthdr.inner_l5hlen); 2267 } 2268 2269 static inline void 2270 set_mbuf_nsegs(struct mbuf *m, uint8_t nsegs) 2271 { 2272 2273 M_ASSERTPKTHDR(m); 2274 m->m_pkthdr.inner_l5hlen = nsegs; 2275 } 2276 2277 static inline int 2278 mbuf_cflags(struct mbuf *m) 2279 { 2280 2281 M_ASSERTPKTHDR(m); 2282 return (m->m_pkthdr.PH_loc.eight[4]); 2283 } 2284 2285 static inline void 2286 set_mbuf_cflags(struct mbuf *m, uint8_t flags) 2287 { 2288 2289 M_ASSERTPKTHDR(m); 2290 m->m_pkthdr.PH_loc.eight[4] = flags; 2291 } 2292 2293 static inline int 2294 mbuf_len16(struct mbuf *m) 2295 { 2296 int n; 2297 2298 M_ASSERTPKTHDR(m); 2299 n = m->m_pkthdr.PH_loc.eight[0]; 2300 if (!(mbuf_cflags(m) & MC_TLS)) 2301 MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); 2302 2303 return (n); 2304 } 2305 2306 static inline void 2307 set_mbuf_len16(struct mbuf *m, uint8_t len16) 2308 { 2309 2310 M_ASSERTPKTHDR(m); 2311 m->m_pkthdr.PH_loc.eight[0] = len16; 2312 } 2313 2314 #ifdef RATELIMIT 2315 static inline int 2316 mbuf_eo_nsegs(struct mbuf *m) 2317 { 2318 2319 M_ASSERTPKTHDR(m); 2320 return (m->m_pkthdr.PH_loc.eight[1]); 2321 } 2322 2323 static inline void 2324 set_mbuf_eo_nsegs(struct mbuf *m, uint8_t nsegs) 2325 { 2326 2327 M_ASSERTPKTHDR(m); 2328 m->m_pkthdr.PH_loc.eight[1] = nsegs; 2329 } 2330 2331 static inline int 2332 mbuf_eo_len16(struct mbuf *m) 2333 { 2334 int n; 2335 2336 M_ASSERTPKTHDR(m); 2337 n = m->m_pkthdr.PH_loc.eight[2]; 2338 MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); 2339 2340 return (n); 2341 } 2342 2343 static inline void 2344 set_mbuf_eo_len16(struct mbuf *m, uint8_t len16) 2345 { 2346 2347 M_ASSERTPKTHDR(m); 2348 m->m_pkthdr.PH_loc.eight[2] = len16; 2349 } 2350 2351 static inline int 2352 mbuf_eo_tsclk_tsoff(struct mbuf *m) 2353 { 2354 2355 M_ASSERTPKTHDR(m); 2356 return (m->m_pkthdr.PH_loc.eight[3]); 2357 } 2358 2359 static inline void 2360 set_mbuf_eo_tsclk_tsoff(struct mbuf *m, uint8_t tsclk_tsoff) 2361 { 2362 2363 M_ASSERTPKTHDR(m); 2364 m->m_pkthdr.PH_loc.eight[3] = tsclk_tsoff; 2365 } 2366 2367 static inline int 2368 needs_eo(struct cxgbe_snd_tag *cst) 2369 { 2370 2371 return (cst != NULL && cst->type == IF_SND_TAG_TYPE_RATE_LIMIT); 2372 } 2373 #endif 2374 2375 /* 2376 * Try to allocate an mbuf to contain a raw work request. To make it 2377 * easy to construct the work request, don't allocate a chain but a 2378 * single mbuf. 2379 */ 2380 struct mbuf * 2381 alloc_wr_mbuf(int len, int how) 2382 { 2383 struct mbuf *m; 2384 2385 if (len <= MHLEN) 2386 m = m_gethdr(how, MT_DATA); 2387 else if (len <= MCLBYTES) 2388 m = m_getcl(how, MT_DATA, M_PKTHDR); 2389 else 2390 m = NULL; 2391 if (m == NULL) 2392 return (NULL); 2393 m->m_pkthdr.len = len; 2394 m->m_len = len; 2395 set_mbuf_cflags(m, MC_RAW_WR); 2396 set_mbuf_len16(m, howmany(len, 16)); 2397 return (m); 2398 } 2399 2400 static inline bool 2401 needs_hwcsum(struct mbuf *m) 2402 { 2403 const uint32_t csum_flags = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP | 2404 CSUM_IP_TSO | CSUM_INNER_IP | CSUM_INNER_IP_UDP | 2405 CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | CSUM_IP6_UDP | 2406 CSUM_IP6_TCP | CSUM_IP6_TSO | CSUM_INNER_IP6_UDP | 2407 CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO; 2408 2409 M_ASSERTPKTHDR(m); 2410 2411 return (m->m_pkthdr.csum_flags & csum_flags); 2412 } 2413 2414 static inline bool 2415 needs_tso(struct mbuf *m) 2416 { 2417 const uint32_t csum_flags = CSUM_IP_TSO | CSUM_IP6_TSO | 2418 CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO; 2419 2420 M_ASSERTPKTHDR(m); 2421 2422 return (m->m_pkthdr.csum_flags & csum_flags); 2423 } 2424 2425 static inline bool 2426 needs_vxlan_csum(struct mbuf *m) 2427 { 2428 2429 M_ASSERTPKTHDR(m); 2430 2431 return (m->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN); 2432 } 2433 2434 static inline bool 2435 needs_vxlan_tso(struct mbuf *m) 2436 { 2437 const uint32_t csum_flags = CSUM_ENCAP_VXLAN | CSUM_INNER_IP_TSO | 2438 CSUM_INNER_IP6_TSO; 2439 2440 M_ASSERTPKTHDR(m); 2441 2442 return ((m->m_pkthdr.csum_flags & csum_flags) != 0 && 2443 (m->m_pkthdr.csum_flags & csum_flags) != CSUM_ENCAP_VXLAN); 2444 } 2445 2446 static inline bool 2447 needs_inner_tcp_csum(struct mbuf *m) 2448 { 2449 const uint32_t csum_flags = CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO; 2450 2451 M_ASSERTPKTHDR(m); 2452 2453 return (m->m_pkthdr.csum_flags & csum_flags); 2454 } 2455 2456 static inline bool 2457 needs_l3_csum(struct mbuf *m) 2458 { 2459 const uint32_t csum_flags = CSUM_IP | CSUM_IP_TSO | CSUM_INNER_IP | 2460 CSUM_INNER_IP_TSO; 2461 2462 M_ASSERTPKTHDR(m); 2463 2464 return (m->m_pkthdr.csum_flags & csum_flags); 2465 } 2466 2467 static inline bool 2468 needs_outer_tcp_csum(struct mbuf *m) 2469 { 2470 const uint32_t csum_flags = CSUM_IP_TCP | CSUM_IP_TSO | CSUM_IP6_TCP | 2471 CSUM_IP6_TSO; 2472 2473 M_ASSERTPKTHDR(m); 2474 2475 return (m->m_pkthdr.csum_flags & csum_flags); 2476 } 2477 2478 #ifdef RATELIMIT 2479 static inline bool 2480 needs_outer_l4_csum(struct mbuf *m) 2481 { 2482 const uint32_t csum_flags = CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP_TSO | 2483 CSUM_IP6_UDP | CSUM_IP6_TCP | CSUM_IP6_TSO; 2484 2485 M_ASSERTPKTHDR(m); 2486 2487 return (m->m_pkthdr.csum_flags & csum_flags); 2488 } 2489 2490 static inline bool 2491 needs_outer_udp_csum(struct mbuf *m) 2492 { 2493 const uint32_t csum_flags = CSUM_IP_UDP | CSUM_IP6_UDP; 2494 2495 M_ASSERTPKTHDR(m); 2496 2497 return (m->m_pkthdr.csum_flags & csum_flags); 2498 } 2499 #endif 2500 2501 static inline bool 2502 needs_vlan_insertion(struct mbuf *m) 2503 { 2504 2505 M_ASSERTPKTHDR(m); 2506 2507 return (m->m_flags & M_VLANTAG); 2508 } 2509 2510 static void * 2511 m_advance(struct mbuf **pm, int *poffset, int len) 2512 { 2513 struct mbuf *m = *pm; 2514 int offset = *poffset; 2515 uintptr_t p = 0; 2516 2517 MPASS(len > 0); 2518 2519 for (;;) { 2520 if (offset + len < m->m_len) { 2521 offset += len; 2522 p = mtod(m, uintptr_t) + offset; 2523 break; 2524 } 2525 len -= m->m_len - offset; 2526 m = m->m_next; 2527 offset = 0; 2528 MPASS(m != NULL); 2529 } 2530 *poffset = offset; 2531 *pm = m; 2532 return ((void *)p); 2533 } 2534 2535 static inline int 2536 count_mbuf_ext_pgs(struct mbuf *m, int skip, vm_paddr_t *nextaddr) 2537 { 2538 vm_paddr_t paddr; 2539 int i, len, off, pglen, pgoff, seglen, segoff; 2540 int nsegs = 0; 2541 2542 M_ASSERTEXTPG(m); 2543 off = mtod(m, vm_offset_t); 2544 len = m->m_len; 2545 off += skip; 2546 len -= skip; 2547 2548 if (m->m_epg_hdrlen != 0) { 2549 if (off >= m->m_epg_hdrlen) { 2550 off -= m->m_epg_hdrlen; 2551 } else { 2552 seglen = m->m_epg_hdrlen - off; 2553 segoff = off; 2554 seglen = min(seglen, len); 2555 off = 0; 2556 len -= seglen; 2557 paddr = pmap_kextract( 2558 (vm_offset_t)&m->m_epg_hdr[segoff]); 2559 if (*nextaddr != paddr) 2560 nsegs++; 2561 *nextaddr = paddr + seglen; 2562 } 2563 } 2564 pgoff = m->m_epg_1st_off; 2565 for (i = 0; i < m->m_epg_npgs && len > 0; i++) { 2566 pglen = m_epg_pagelen(m, i, pgoff); 2567 if (off >= pglen) { 2568 off -= pglen; 2569 pgoff = 0; 2570 continue; 2571 } 2572 seglen = pglen - off; 2573 segoff = pgoff + off; 2574 off = 0; 2575 seglen = min(seglen, len); 2576 len -= seglen; 2577 paddr = m->m_epg_pa[i] + segoff; 2578 if (*nextaddr != paddr) 2579 nsegs++; 2580 *nextaddr = paddr + seglen; 2581 pgoff = 0; 2582 }; 2583 if (len != 0) { 2584 seglen = min(len, m->m_epg_trllen - off); 2585 len -= seglen; 2586 paddr = pmap_kextract((vm_offset_t)&m->m_epg_trail[off]); 2587 if (*nextaddr != paddr) 2588 nsegs++; 2589 *nextaddr = paddr + seglen; 2590 } 2591 2592 return (nsegs); 2593 } 2594 2595 2596 /* 2597 * Can deal with empty mbufs in the chain that have m_len = 0, but the chain 2598 * must have at least one mbuf that's not empty. It is possible for this 2599 * routine to return 0 if skip accounts for all the contents of the mbuf chain. 2600 */ 2601 static inline int 2602 count_mbuf_nsegs(struct mbuf *m, int skip, uint8_t *cflags) 2603 { 2604 vm_paddr_t nextaddr, paddr; 2605 vm_offset_t va; 2606 int len, nsegs; 2607 2608 M_ASSERTPKTHDR(m); 2609 MPASS(m->m_pkthdr.len > 0); 2610 MPASS(m->m_pkthdr.len >= skip); 2611 2612 nsegs = 0; 2613 nextaddr = 0; 2614 for (; m; m = m->m_next) { 2615 len = m->m_len; 2616 if (__predict_false(len == 0)) 2617 continue; 2618 if (skip >= len) { 2619 skip -= len; 2620 continue; 2621 } 2622 if ((m->m_flags & M_EXTPG) != 0) { 2623 *cflags |= MC_NOMAP; 2624 nsegs += count_mbuf_ext_pgs(m, skip, &nextaddr); 2625 skip = 0; 2626 continue; 2627 } 2628 va = mtod(m, vm_offset_t) + skip; 2629 len -= skip; 2630 skip = 0; 2631 paddr = pmap_kextract(va); 2632 nsegs += sglist_count((void *)(uintptr_t)va, len); 2633 if (paddr == nextaddr) 2634 nsegs--; 2635 nextaddr = pmap_kextract(va + len - 1) + 1; 2636 } 2637 2638 return (nsegs); 2639 } 2640 2641 /* 2642 * The maximum number of segments that can fit in a WR. 2643 */ 2644 static int 2645 max_nsegs_allowed(struct mbuf *m) 2646 { 2647 2648 if (needs_tso(m)) { 2649 if (needs_vxlan_tso(m)) 2650 return (TX_SGL_SEGS_VXLAN_TSO); 2651 else 2652 return (TX_SGL_SEGS_TSO); 2653 } 2654 2655 return (TX_SGL_SEGS); 2656 } 2657 2658 /* 2659 * Analyze the mbuf to determine its tx needs. The mbuf passed in may change: 2660 * a) caller can assume it's been freed if this function returns with an error. 2661 * b) it may get defragged up if the gather list is too long for the hardware. 2662 */ 2663 int 2664 parse_pkt(struct adapter *sc, struct mbuf **mp) 2665 { 2666 struct mbuf *m0 = *mp, *m; 2667 int rc, nsegs, defragged = 0, offset; 2668 struct ether_header *eh; 2669 void *l3hdr; 2670 #if defined(INET) || defined(INET6) 2671 struct tcphdr *tcp; 2672 #endif 2673 #if defined(KERN_TLS) || defined(RATELIMIT) 2674 struct cxgbe_snd_tag *cst; 2675 #endif 2676 uint16_t eh_type; 2677 uint8_t cflags; 2678 2679 cflags = 0; 2680 M_ASSERTPKTHDR(m0); 2681 if (__predict_false(m0->m_pkthdr.len < ETHER_HDR_LEN)) { 2682 rc = EINVAL; 2683 fail: 2684 m_freem(m0); 2685 *mp = NULL; 2686 return (rc); 2687 } 2688 restart: 2689 /* 2690 * First count the number of gather list segments in the payload. 2691 * Defrag the mbuf if nsegs exceeds the hardware limit. 2692 */ 2693 M_ASSERTPKTHDR(m0); 2694 MPASS(m0->m_pkthdr.len > 0); 2695 nsegs = count_mbuf_nsegs(m0, 0, &cflags); 2696 #if defined(KERN_TLS) || defined(RATELIMIT) 2697 if (m0->m_pkthdr.csum_flags & CSUM_SND_TAG) 2698 cst = mst_to_cst(m0->m_pkthdr.snd_tag); 2699 else 2700 cst = NULL; 2701 #endif 2702 #ifdef KERN_TLS 2703 if (cst != NULL && cst->type == IF_SND_TAG_TYPE_TLS) { 2704 int len16; 2705 2706 cflags |= MC_TLS; 2707 set_mbuf_cflags(m0, cflags); 2708 rc = t6_ktls_parse_pkt(m0, &nsegs, &len16); 2709 if (rc != 0) 2710 goto fail; 2711 set_mbuf_nsegs(m0, nsegs); 2712 set_mbuf_len16(m0, len16); 2713 return (0); 2714 } 2715 #endif 2716 if (nsegs > max_nsegs_allowed(m0)) { 2717 if (defragged++ > 0 || (m = m_defrag(m0, M_NOWAIT)) == NULL) { 2718 rc = EFBIG; 2719 goto fail; 2720 } 2721 *mp = m0 = m; /* update caller's copy after defrag */ 2722 goto restart; 2723 } 2724 2725 if (__predict_false(nsegs > 2 && m0->m_pkthdr.len <= MHLEN && 2726 !(cflags & MC_NOMAP))) { 2727 m0 = m_pullup(m0, m0->m_pkthdr.len); 2728 if (m0 == NULL) { 2729 /* Should have left well enough alone. */ 2730 rc = EFBIG; 2731 goto fail; 2732 } 2733 *mp = m0; /* update caller's copy after pullup */ 2734 goto restart; 2735 } 2736 set_mbuf_nsegs(m0, nsegs); 2737 set_mbuf_cflags(m0, cflags); 2738 calculate_mbuf_len16(sc, m0); 2739 2740 #ifdef RATELIMIT 2741 /* 2742 * Ethofld is limited to TCP and UDP for now, and only when L4 hw 2743 * checksumming is enabled. needs_outer_l4_csum happens to check for 2744 * all the right things. 2745 */ 2746 if (__predict_false(needs_eo(cst) && !needs_outer_l4_csum(m0))) { 2747 m_snd_tag_rele(m0->m_pkthdr.snd_tag); 2748 m0->m_pkthdr.snd_tag = NULL; 2749 m0->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; 2750 cst = NULL; 2751 } 2752 #endif 2753 2754 if (!needs_hwcsum(m0) 2755 #ifdef RATELIMIT 2756 && !needs_eo(cst) 2757 #endif 2758 ) 2759 return (0); 2760 2761 m = m0; 2762 eh = mtod(m, struct ether_header *); 2763 eh_type = ntohs(eh->ether_type); 2764 if (eh_type == ETHERTYPE_VLAN) { 2765 struct ether_vlan_header *evh = (void *)eh; 2766 2767 eh_type = ntohs(evh->evl_proto); 2768 m0->m_pkthdr.l2hlen = sizeof(*evh); 2769 } else 2770 m0->m_pkthdr.l2hlen = sizeof(*eh); 2771 2772 offset = 0; 2773 l3hdr = m_advance(&m, &offset, m0->m_pkthdr.l2hlen); 2774 2775 switch (eh_type) { 2776 #ifdef INET6 2777 case ETHERTYPE_IPV6: 2778 m0->m_pkthdr.l3hlen = sizeof(struct ip6_hdr); 2779 break; 2780 #endif 2781 #ifdef INET 2782 case ETHERTYPE_IP: 2783 { 2784 struct ip *ip = l3hdr; 2785 2786 if (needs_vxlan_csum(m0)) { 2787 /* Driver will do the outer IP hdr checksum. */ 2788 ip->ip_sum = 0; 2789 if (needs_vxlan_tso(m0)) { 2790 const uint16_t ipl = ip->ip_len; 2791 2792 ip->ip_len = 0; 2793 ip->ip_sum = ~in_cksum_hdr(ip); 2794 ip->ip_len = ipl; 2795 } else 2796 ip->ip_sum = in_cksum_hdr(ip); 2797 } 2798 m0->m_pkthdr.l3hlen = ip->ip_hl << 2; 2799 break; 2800 } 2801 #endif 2802 default: 2803 panic("%s: ethertype 0x%04x unknown. if_cxgbe must be compiled" 2804 " with the same INET/INET6 options as the kernel.", 2805 __func__, eh_type); 2806 } 2807 2808 if (needs_vxlan_csum(m0)) { 2809 m0->m_pkthdr.l4hlen = sizeof(struct udphdr); 2810 m0->m_pkthdr.l5hlen = sizeof(struct vxlan_header); 2811 2812 /* Inner headers. */ 2813 eh = m_advance(&m, &offset, m0->m_pkthdr.l3hlen + 2814 sizeof(struct udphdr) + sizeof(struct vxlan_header)); 2815 eh_type = ntohs(eh->ether_type); 2816 if (eh_type == ETHERTYPE_VLAN) { 2817 struct ether_vlan_header *evh = (void *)eh; 2818 2819 eh_type = ntohs(evh->evl_proto); 2820 m0->m_pkthdr.inner_l2hlen = sizeof(*evh); 2821 } else 2822 m0->m_pkthdr.inner_l2hlen = sizeof(*eh); 2823 l3hdr = m_advance(&m, &offset, m0->m_pkthdr.inner_l2hlen); 2824 2825 switch (eh_type) { 2826 #ifdef INET6 2827 case ETHERTYPE_IPV6: 2828 m0->m_pkthdr.inner_l3hlen = sizeof(struct ip6_hdr); 2829 break; 2830 #endif 2831 #ifdef INET 2832 case ETHERTYPE_IP: 2833 { 2834 struct ip *ip = l3hdr; 2835 2836 m0->m_pkthdr.inner_l3hlen = ip->ip_hl << 2; 2837 break; 2838 } 2839 #endif 2840 default: 2841 panic("%s: VXLAN hw offload requested with unknown " 2842 "ethertype 0x%04x. if_cxgbe must be compiled" 2843 " with the same INET/INET6 options as the kernel.", 2844 __func__, eh_type); 2845 } 2846 #if defined(INET) || defined(INET6) 2847 if (needs_inner_tcp_csum(m0)) { 2848 tcp = m_advance(&m, &offset, m0->m_pkthdr.inner_l3hlen); 2849 m0->m_pkthdr.inner_l4hlen = tcp->th_off * 4; 2850 } 2851 #endif 2852 MPASS((m0->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 2853 m0->m_pkthdr.csum_flags &= CSUM_INNER_IP6_UDP | 2854 CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO | CSUM_INNER_IP | 2855 CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | 2856 CSUM_ENCAP_VXLAN; 2857 } 2858 2859 #if defined(INET) || defined(INET6) 2860 if (needs_outer_tcp_csum(m0)) { 2861 tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen); 2862 m0->m_pkthdr.l4hlen = tcp->th_off * 4; 2863 #ifdef RATELIMIT 2864 if (tsclk >= 0 && *(uint32_t *)(tcp + 1) == ntohl(0x0101080a)) { 2865 set_mbuf_eo_tsclk_tsoff(m0, 2866 V_FW_ETH_TX_EO_WR_TSCLK(tsclk) | 2867 V_FW_ETH_TX_EO_WR_TSOFF(sizeof(*tcp) / 2 + 1)); 2868 } else 2869 set_mbuf_eo_tsclk_tsoff(m0, 0); 2870 } else if (needs_outer_udp_csum(m0)) { 2871 m0->m_pkthdr.l4hlen = sizeof(struct udphdr); 2872 #endif 2873 } 2874 #ifdef RATELIMIT 2875 if (needs_eo(cst)) { 2876 u_int immhdrs; 2877 2878 /* EO WRs have the headers in the WR and not the GL. */ 2879 immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + 2880 m0->m_pkthdr.l4hlen; 2881 cflags = 0; 2882 nsegs = count_mbuf_nsegs(m0, immhdrs, &cflags); 2883 MPASS(cflags == mbuf_cflags(m0)); 2884 set_mbuf_eo_nsegs(m0, nsegs); 2885 set_mbuf_eo_len16(m0, 2886 txpkt_eo_len16(nsegs, immhdrs, needs_tso(m0))); 2887 } 2888 #endif 2889 #endif 2890 MPASS(m0 == *mp); 2891 return (0); 2892 } 2893 2894 void * 2895 start_wrq_wr(struct sge_wrq *wrq, int len16, struct wrq_cookie *cookie) 2896 { 2897 struct sge_eq *eq = &wrq->eq; 2898 struct adapter *sc = wrq->adapter; 2899 int ndesc, available; 2900 struct wrqe *wr; 2901 void *w; 2902 2903 MPASS(len16 > 0); 2904 ndesc = tx_len16_to_desc(len16); 2905 MPASS(ndesc > 0 && ndesc <= SGE_MAX_WR_NDESC); 2906 2907 EQ_LOCK(eq); 2908 2909 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 2910 drain_wrq_wr_list(sc, wrq); 2911 2912 if (!STAILQ_EMPTY(&wrq->wr_list)) { 2913 slowpath: 2914 EQ_UNLOCK(eq); 2915 wr = alloc_wrqe(len16 * 16, wrq); 2916 if (__predict_false(wr == NULL)) 2917 return (NULL); 2918 cookie->pidx = -1; 2919 cookie->ndesc = ndesc; 2920 return (&wr->wr); 2921 } 2922 2923 eq->cidx = read_hw_cidx(eq); 2924 if (eq->pidx == eq->cidx) 2925 available = eq->sidx - 1; 2926 else 2927 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2928 if (available < ndesc) 2929 goto slowpath; 2930 2931 cookie->pidx = eq->pidx; 2932 cookie->ndesc = ndesc; 2933 TAILQ_INSERT_TAIL(&wrq->incomplete_wrs, cookie, link); 2934 2935 w = &eq->desc[eq->pidx]; 2936 IDXINCR(eq->pidx, ndesc, eq->sidx); 2937 if (__predict_false(cookie->pidx + ndesc > eq->sidx)) { 2938 w = &wrq->ss[0]; 2939 wrq->ss_pidx = cookie->pidx; 2940 wrq->ss_len = len16 * 16; 2941 } 2942 2943 EQ_UNLOCK(eq); 2944 2945 return (w); 2946 } 2947 2948 void 2949 commit_wrq_wr(struct sge_wrq *wrq, void *w, struct wrq_cookie *cookie) 2950 { 2951 struct sge_eq *eq = &wrq->eq; 2952 struct adapter *sc = wrq->adapter; 2953 int ndesc, pidx; 2954 struct wrq_cookie *prev, *next; 2955 2956 if (cookie->pidx == -1) { 2957 struct wrqe *wr = __containerof(w, struct wrqe, wr); 2958 2959 t4_wrq_tx(sc, wr); 2960 return; 2961 } 2962 2963 if (__predict_false(w == &wrq->ss[0])) { 2964 int n = (eq->sidx - wrq->ss_pidx) * EQ_ESIZE; 2965 2966 MPASS(wrq->ss_len > n); /* WR had better wrap around. */ 2967 bcopy(&wrq->ss[0], &eq->desc[wrq->ss_pidx], n); 2968 bcopy(&wrq->ss[n], &eq->desc[0], wrq->ss_len - n); 2969 wrq->tx_wrs_ss++; 2970 } else 2971 wrq->tx_wrs_direct++; 2972 2973 EQ_LOCK(eq); 2974 ndesc = cookie->ndesc; /* Can be more than SGE_MAX_WR_NDESC here. */ 2975 pidx = cookie->pidx; 2976 MPASS(pidx >= 0 && pidx < eq->sidx); 2977 prev = TAILQ_PREV(cookie, wrq_incomplete_wrs, link); 2978 next = TAILQ_NEXT(cookie, link); 2979 if (prev == NULL) { 2980 MPASS(pidx == eq->dbidx); 2981 if (next == NULL || ndesc >= 16) { 2982 int available; 2983 struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ 2984 2985 /* 2986 * Note that the WR via which we'll request tx updates 2987 * is at pidx and not eq->pidx, which has moved on 2988 * already. 2989 */ 2990 dst = (void *)&eq->desc[pidx]; 2991 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2992 if (available < eq->sidx / 4 && 2993 atomic_cmpset_int(&eq->equiq, 0, 1)) { 2994 /* 2995 * XXX: This is not 100% reliable with some 2996 * types of WRs. But this is a very unusual 2997 * situation for an ofld/ctrl queue anyway. 2998 */ 2999 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 3000 F_FW_WR_EQUEQ); 3001 } 3002 3003 ring_eq_db(wrq->adapter, eq, ndesc); 3004 } else { 3005 MPASS(IDXDIFF(next->pidx, pidx, eq->sidx) == ndesc); 3006 next->pidx = pidx; 3007 next->ndesc += ndesc; 3008 } 3009 } else { 3010 MPASS(IDXDIFF(pidx, prev->pidx, eq->sidx) == prev->ndesc); 3011 prev->ndesc += ndesc; 3012 } 3013 TAILQ_REMOVE(&wrq->incomplete_wrs, cookie, link); 3014 3015 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 3016 drain_wrq_wr_list(sc, wrq); 3017 3018 #ifdef INVARIANTS 3019 if (TAILQ_EMPTY(&wrq->incomplete_wrs)) { 3020 /* Doorbell must have caught up to the pidx. */ 3021 MPASS(wrq->eq.pidx == wrq->eq.dbidx); 3022 } 3023 #endif 3024 EQ_UNLOCK(eq); 3025 } 3026 3027 static u_int 3028 can_resume_eth_tx(struct mp_ring *r) 3029 { 3030 struct sge_eq *eq = r->cookie; 3031 3032 return (total_available_tx_desc(eq) > eq->sidx / 8); 3033 } 3034 3035 static inline bool 3036 cannot_use_txpkts(struct mbuf *m) 3037 { 3038 /* maybe put a GL limit too, to avoid silliness? */ 3039 3040 return (needs_tso(m) || (mbuf_cflags(m) & (MC_RAW_WR | MC_TLS)) != 0); 3041 } 3042 3043 static inline int 3044 discard_tx(struct sge_eq *eq) 3045 { 3046 3047 return ((eq->flags & (EQ_ENABLED | EQ_QFLUSH)) != EQ_ENABLED); 3048 } 3049 3050 static inline int 3051 wr_can_update_eq(void *p) 3052 { 3053 struct fw_eth_tx_pkts_wr *wr = p; 3054 3055 switch (G_FW_WR_OP(be32toh(wr->op_pkd))) { 3056 case FW_ULPTX_WR: 3057 case FW_ETH_TX_PKT_WR: 3058 case FW_ETH_TX_PKTS_WR: 3059 case FW_ETH_TX_PKTS2_WR: 3060 case FW_ETH_TX_PKT_VM_WR: 3061 case FW_ETH_TX_PKTS_VM_WR: 3062 return (1); 3063 default: 3064 return (0); 3065 } 3066 } 3067 3068 static inline void 3069 set_txupdate_flags(struct sge_txq *txq, u_int avail, 3070 struct fw_eth_tx_pkt_wr *wr) 3071 { 3072 struct sge_eq *eq = &txq->eq; 3073 struct txpkts *txp = &txq->txp; 3074 3075 if ((txp->npkt > 0 || avail < eq->sidx / 2) && 3076 atomic_cmpset_int(&eq->equiq, 0, 1)) { 3077 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ | F_FW_WR_EQUIQ); 3078 eq->equeqidx = eq->pidx; 3079 } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { 3080 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); 3081 eq->equeqidx = eq->pidx; 3082 } 3083 } 3084 3085 /* 3086 * r->items[cidx] to r->items[pidx], with a wraparound at r->size, are ready to 3087 * be consumed. Return the actual number consumed. 0 indicates a stall. 3088 */ 3089 static u_int 3090 eth_tx(struct mp_ring *r, u_int cidx, u_int pidx, bool *coalescing) 3091 { 3092 struct sge_txq *txq = r->cookie; 3093 struct ifnet *ifp = txq->ifp; 3094 struct sge_eq *eq = &txq->eq; 3095 struct txpkts *txp = &txq->txp; 3096 struct vi_info *vi = ifp->if_softc; 3097 struct adapter *sc = vi->adapter; 3098 u_int total, remaining; /* # of packets */ 3099 u_int n, avail, dbdiff; /* # of hardware descriptors */ 3100 int i, rc; 3101 struct mbuf *m0; 3102 bool snd; 3103 void *wr; /* start of the last WR written to the ring */ 3104 3105 TXQ_LOCK_ASSERT_OWNED(txq); 3106 3107 remaining = IDXDIFF(pidx, cidx, r->size); 3108 if (__predict_false(discard_tx(eq))) { 3109 for (i = 0; i < txp->npkt; i++) 3110 m_freem(txp->mb[i]); 3111 txp->npkt = 0; 3112 while (cidx != pidx) { 3113 m0 = r->items[cidx]; 3114 m_freem(m0); 3115 if (++cidx == r->size) 3116 cidx = 0; 3117 } 3118 reclaim_tx_descs(txq, eq->sidx); 3119 *coalescing = false; 3120 return (remaining); /* emptied */ 3121 } 3122 3123 /* How many hardware descriptors do we have readily available. */ 3124 if (eq->pidx == eq->cidx) { 3125 avail = eq->sidx - 1; 3126 if (txp->score++ >= 5) 3127 txp->score = 5; /* tx is completely idle, reset. */ 3128 } else 3129 avail = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 3130 3131 total = 0; 3132 if (remaining == 0) { 3133 if (txp->score-- == 1) /* egr_update had to drain txpkts */ 3134 txp->score = 1; 3135 goto send_txpkts; 3136 } 3137 3138 dbdiff = 0; 3139 MPASS(remaining > 0); 3140 while (remaining > 0) { 3141 m0 = r->items[cidx]; 3142 M_ASSERTPKTHDR(m0); 3143 MPASS(m0->m_nextpkt == NULL); 3144 3145 if (avail < 2 * SGE_MAX_WR_NDESC) 3146 avail += reclaim_tx_descs(txq, 64); 3147 3148 if (txp->npkt > 0 || remaining > 1 || txp->score > 3 || 3149 atomic_load_int(&txq->eq.equiq) != 0) { 3150 if (sc->flags & IS_VF) 3151 rc = add_to_txpkts_vf(sc, txq, m0, avail, &snd); 3152 else 3153 rc = add_to_txpkts_pf(sc, txq, m0, avail, &snd); 3154 } else { 3155 snd = false; 3156 rc = EINVAL; 3157 } 3158 if (snd) { 3159 MPASS(txp->npkt > 0); 3160 for (i = 0; i < txp->npkt; i++) 3161 ETHER_BPF_MTAP(ifp, txp->mb[i]); 3162 if (txp->npkt > 1) { 3163 if (txp->score++ >= 10) 3164 txp->score = 10; 3165 MPASS(avail >= tx_len16_to_desc(txp->len16)); 3166 if (sc->flags & IS_VF) 3167 n = write_txpkts_vm_wr(sc, txq); 3168 else 3169 n = write_txpkts_wr(sc, txq); 3170 } else { 3171 MPASS(avail >= 3172 tx_len16_to_desc(mbuf_len16(txp->mb[0]))); 3173 if (sc->flags & IS_VF) 3174 n = write_txpkt_vm_wr(sc, txq, 3175 txp->mb[0]); 3176 else 3177 n = write_txpkt_wr(sc, txq, txp->mb[0], 3178 avail); 3179 } 3180 MPASS(n <= SGE_MAX_WR_NDESC); 3181 avail -= n; 3182 dbdiff += n; 3183 wr = &eq->desc[eq->pidx]; 3184 IDXINCR(eq->pidx, n, eq->sidx); 3185 txp->npkt = 0; /* emptied */ 3186 } 3187 if (rc == 0) { 3188 /* m0 was coalesced into txq->txpkts. */ 3189 goto next_mbuf; 3190 } 3191 if (rc == EAGAIN) { 3192 /* 3193 * m0 is suitable for tx coalescing but could not be 3194 * combined with the existing txq->txpkts, which has now 3195 * been transmitted. Start a new txpkts with m0. 3196 */ 3197 MPASS(snd); 3198 MPASS(txp->npkt == 0); 3199 continue; 3200 } 3201 3202 MPASS(rc != 0 && rc != EAGAIN); 3203 MPASS(txp->npkt == 0); 3204 3205 n = tx_len16_to_desc(mbuf_len16(m0)); 3206 if (__predict_false(avail < n)) { 3207 avail += reclaim_tx_descs(txq, min(n, 32)); 3208 if (avail < n) 3209 break; /* out of descriptors */ 3210 } 3211 3212 wr = &eq->desc[eq->pidx]; 3213 if (mbuf_cflags(m0) & MC_RAW_WR) { 3214 n = write_raw_wr(txq, wr, m0, avail); 3215 #ifdef KERN_TLS 3216 } else if (mbuf_cflags(m0) & MC_TLS) { 3217 ETHER_BPF_MTAP(ifp, m0); 3218 n = t6_ktls_write_wr(txq, wr, m0, mbuf_nsegs(m0), 3219 avail); 3220 #endif 3221 } else { 3222 ETHER_BPF_MTAP(ifp, m0); 3223 if (sc->flags & IS_VF) 3224 n = write_txpkt_vm_wr(sc, txq, m0); 3225 else 3226 n = write_txpkt_wr(sc, txq, m0, avail); 3227 } 3228 MPASS(n >= 1 && n <= avail); 3229 if (!(mbuf_cflags(m0) & MC_TLS)) 3230 MPASS(n <= SGE_MAX_WR_NDESC); 3231 3232 avail -= n; 3233 dbdiff += n; 3234 IDXINCR(eq->pidx, n, eq->sidx); 3235 3236 if (dbdiff >= 512 / EQ_ESIZE) { /* X_FETCHBURSTMAX_512B */ 3237 if (wr_can_update_eq(wr)) 3238 set_txupdate_flags(txq, avail, wr); 3239 ring_eq_db(sc, eq, dbdiff); 3240 avail += reclaim_tx_descs(txq, 32); 3241 dbdiff = 0; 3242 } 3243 next_mbuf: 3244 total++; 3245 remaining--; 3246 if (__predict_false(++cidx == r->size)) 3247 cidx = 0; 3248 } 3249 if (dbdiff != 0) { 3250 if (wr_can_update_eq(wr)) 3251 set_txupdate_flags(txq, avail, wr); 3252 ring_eq_db(sc, eq, dbdiff); 3253 reclaim_tx_descs(txq, 32); 3254 } else if (eq->pidx == eq->cidx && txp->npkt > 0 && 3255 atomic_load_int(&txq->eq.equiq) == 0) { 3256 /* 3257 * If nothing was submitted to the chip for tx (it was coalesced 3258 * into txpkts instead) and there is no tx update outstanding 3259 * then we need to send txpkts now. 3260 */ 3261 send_txpkts: 3262 MPASS(txp->npkt > 0); 3263 for (i = 0; i < txp->npkt; i++) 3264 ETHER_BPF_MTAP(ifp, txp->mb[i]); 3265 if (txp->npkt > 1) { 3266 MPASS(avail >= tx_len16_to_desc(txp->len16)); 3267 if (sc->flags & IS_VF) 3268 n = write_txpkts_vm_wr(sc, txq); 3269 else 3270 n = write_txpkts_wr(sc, txq); 3271 } else { 3272 MPASS(avail >= 3273 tx_len16_to_desc(mbuf_len16(txp->mb[0]))); 3274 if (sc->flags & IS_VF) 3275 n = write_txpkt_vm_wr(sc, txq, txp->mb[0]); 3276 else 3277 n = write_txpkt_wr(sc, txq, txp->mb[0], avail); 3278 } 3279 MPASS(n <= SGE_MAX_WR_NDESC); 3280 wr = &eq->desc[eq->pidx]; 3281 IDXINCR(eq->pidx, n, eq->sidx); 3282 txp->npkt = 0; /* emptied */ 3283 3284 MPASS(wr_can_update_eq(wr)); 3285 set_txupdate_flags(txq, avail - n, wr); 3286 ring_eq_db(sc, eq, n); 3287 reclaim_tx_descs(txq, 32); 3288 } 3289 *coalescing = txp->npkt > 0; 3290 3291 return (total); 3292 } 3293 3294 static inline void 3295 init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, 3296 int qsize) 3297 { 3298 3299 KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, 3300 ("%s: bad tmr_idx %d", __func__, tmr_idx)); 3301 KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */ 3302 ("%s: bad pktc_idx %d", __func__, pktc_idx)); 3303 3304 iq->flags = 0; 3305 iq->adapter = sc; 3306 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx); 3307 iq->intr_pktc_idx = SGE_NCOUNTERS - 1; 3308 if (pktc_idx >= 0) { 3309 iq->intr_params |= F_QINTR_CNT_EN; 3310 iq->intr_pktc_idx = pktc_idx; 3311 } 3312 iq->qsize = roundup2(qsize, 16); /* See FW_IQ_CMD/iqsize */ 3313 iq->sidx = iq->qsize - sc->params.sge.spg_len / IQ_ESIZE; 3314 } 3315 3316 static inline void 3317 init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, char *name) 3318 { 3319 3320 fl->qsize = qsize; 3321 fl->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; 3322 strlcpy(fl->lockname, name, sizeof(fl->lockname)); 3323 if (sc->flags & BUF_PACKING_OK && 3324 ((!is_t4(sc) && buffer_packing) || /* T5+: enabled unless 0 */ 3325 (is_t4(sc) && buffer_packing == 1)))/* T4: disabled unless 1 */ 3326 fl->flags |= FL_BUF_PACKING; 3327 fl->zidx = find_refill_source(sc, maxp, fl->flags & FL_BUF_PACKING); 3328 fl->safe_zidx = sc->sge.safe_zidx; 3329 } 3330 3331 static inline void 3332 init_eq(struct adapter *sc, struct sge_eq *eq, int eqtype, int qsize, 3333 uint8_t tx_chan, uint16_t iqid, char *name) 3334 { 3335 KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype)); 3336 3337 eq->flags = eqtype & EQ_TYPEMASK; 3338 eq->tx_chan = tx_chan; 3339 eq->iqid = iqid; 3340 eq->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; 3341 strlcpy(eq->lockname, name, sizeof(eq->lockname)); 3342 } 3343 3344 static int 3345 alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag, 3346 bus_dmamap_t *map, bus_addr_t *pa, void **va) 3347 { 3348 int rc; 3349 3350 rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR, 3351 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag); 3352 if (rc != 0) { 3353 device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc); 3354 goto done; 3355 } 3356 3357 rc = bus_dmamem_alloc(*tag, va, 3358 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map); 3359 if (rc != 0) { 3360 device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc); 3361 goto done; 3362 } 3363 3364 rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0); 3365 if (rc != 0) { 3366 device_printf(sc->dev, "cannot load DMA map: %d\n", rc); 3367 goto done; 3368 } 3369 done: 3370 if (rc) 3371 free_ring(sc, *tag, *map, *pa, *va); 3372 3373 return (rc); 3374 } 3375 3376 static int 3377 free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, 3378 bus_addr_t pa, void *va) 3379 { 3380 if (pa) 3381 bus_dmamap_unload(tag, map); 3382 if (va) 3383 bus_dmamem_free(tag, va, map); 3384 if (tag) 3385 bus_dma_tag_destroy(tag); 3386 3387 return (0); 3388 } 3389 3390 /* 3391 * Allocates the ring for an ingress queue and an optional freelist. If the 3392 * freelist is specified it will be allocated and then associated with the 3393 * ingress queue. 3394 * 3395 * Returns errno on failure. Resources allocated up to that point may still be 3396 * allocated. Caller is responsible for cleanup in case this function fails. 3397 * 3398 * If the ingress queue will take interrupts directly then the intr_idx 3399 * specifies the vector, starting from 0. -1 means the interrupts for this 3400 * queue should be forwarded to the fwq. 3401 */ 3402 static int 3403 alloc_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl, 3404 int intr_idx, int cong) 3405 { 3406 int rc, i, cntxt_id; 3407 size_t len; 3408 struct fw_iq_cmd c; 3409 struct port_info *pi = vi->pi; 3410 struct adapter *sc = iq->adapter; 3411 struct sge_params *sp = &sc->params.sge; 3412 __be32 v = 0; 3413 3414 len = iq->qsize * IQ_ESIZE; 3415 rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, 3416 (void **)&iq->desc); 3417 if (rc != 0) 3418 return (rc); 3419 3420 bzero(&c, sizeof(c)); 3421 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | 3422 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | 3423 V_FW_IQ_CMD_VFN(0)); 3424 3425 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | 3426 FW_LEN16(c)); 3427 3428 /* Special handling for firmware event queue */ 3429 if (iq == &sc->sge.fwq) 3430 v |= F_FW_IQ_CMD_IQASYNCH; 3431 3432 if (intr_idx < 0) { 3433 /* Forwarded interrupts, all headed to fwq */ 3434 v |= F_FW_IQ_CMD_IQANDST; 3435 v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.fwq.cntxt_id); 3436 } else { 3437 KASSERT(intr_idx < sc->intr_count, 3438 ("%s: invalid direct intr_idx %d", __func__, intr_idx)); 3439 v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); 3440 } 3441 3442 c.type_to_iqandstindex = htobe32(v | 3443 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | 3444 V_FW_IQ_CMD_VIID(vi->viid) | 3445 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); 3446 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | 3447 F_FW_IQ_CMD_IQGTSMODE | 3448 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | 3449 V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4)); 3450 c.iqsize = htobe16(iq->qsize); 3451 c.iqaddr = htobe64(iq->ba); 3452 if (cong >= 0) 3453 c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN); 3454 3455 if (fl) { 3456 mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); 3457 3458 len = fl->qsize * EQ_ESIZE; 3459 rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, 3460 &fl->ba, (void **)&fl->desc); 3461 if (rc) 3462 return (rc); 3463 3464 /* Allocate space for one software descriptor per buffer. */ 3465 rc = alloc_fl_sdesc(fl); 3466 if (rc != 0) { 3467 device_printf(sc->dev, 3468 "failed to setup fl software descriptors: %d\n", 3469 rc); 3470 return (rc); 3471 } 3472 3473 if (fl->flags & FL_BUF_PACKING) { 3474 fl->lowat = roundup2(sp->fl_starve_threshold2, 8); 3475 fl->buf_boundary = sp->pack_boundary; 3476 } else { 3477 fl->lowat = roundup2(sp->fl_starve_threshold, 8); 3478 fl->buf_boundary = 16; 3479 } 3480 if (fl_pad && fl->buf_boundary < sp->pad_boundary) 3481 fl->buf_boundary = sp->pad_boundary; 3482 3483 c.iqns_to_fl0congen |= 3484 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | 3485 F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | 3486 (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) | 3487 (fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN : 3488 0)); 3489 if (cong >= 0) { 3490 c.iqns_to_fl0congen |= 3491 htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) | 3492 F_FW_IQ_CMD_FL0CONGCIF | 3493 F_FW_IQ_CMD_FL0CONGEN); 3494 } 3495 c.fl0dcaen_to_fl0cidxfthresh = 3496 htobe16(V_FW_IQ_CMD_FL0FBMIN(chip_id(sc) <= CHELSIO_T5 ? 3497 X_FETCHBURSTMIN_128B : X_FETCHBURSTMIN_64B_T6) | 3498 V_FW_IQ_CMD_FL0FBMAX(chip_id(sc) <= CHELSIO_T5 ? 3499 X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B)); 3500 c.fl0size = htobe16(fl->qsize); 3501 c.fl0addr = htobe64(fl->ba); 3502 } 3503 3504 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 3505 if (rc != 0) { 3506 device_printf(sc->dev, 3507 "failed to create ingress queue: %d\n", rc); 3508 return (rc); 3509 } 3510 3511 iq->cidx = 0; 3512 iq->gen = F_RSPD_GEN; 3513 iq->intr_next = iq->intr_params; 3514 iq->cntxt_id = be16toh(c.iqid); 3515 iq->abs_id = be16toh(c.physiqid); 3516 iq->flags |= IQ_ALLOCATED; 3517 3518 cntxt_id = iq->cntxt_id - sc->sge.iq_start; 3519 if (cntxt_id >= sc->sge.niq) { 3520 panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, 3521 cntxt_id, sc->sge.niq - 1); 3522 } 3523 sc->sge.iqmap[cntxt_id] = iq; 3524 3525 if (fl) { 3526 u_int qid; 3527 3528 iq->flags |= IQ_HAS_FL; 3529 fl->cntxt_id = be16toh(c.fl0id); 3530 fl->pidx = fl->cidx = 0; 3531 3532 cntxt_id = fl->cntxt_id - sc->sge.eq_start; 3533 if (cntxt_id >= sc->sge.neq) { 3534 panic("%s: fl->cntxt_id (%d) more than the max (%d)", 3535 __func__, cntxt_id, sc->sge.neq - 1); 3536 } 3537 sc->sge.eqmap[cntxt_id] = (void *)fl; 3538 3539 qid = fl->cntxt_id; 3540 if (isset(&sc->doorbells, DOORBELL_UDB)) { 3541 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 3542 uint32_t mask = (1 << s_qpp) - 1; 3543 volatile uint8_t *udb; 3544 3545 udb = sc->udbs_base + UDBS_DB_OFFSET; 3546 udb += (qid >> s_qpp) << PAGE_SHIFT; 3547 qid &= mask; 3548 if (qid < PAGE_SIZE / UDBS_SEG_SIZE) { 3549 udb += qid << UDBS_SEG_SHIFT; 3550 qid = 0; 3551 } 3552 fl->udb = (volatile void *)udb; 3553 } 3554 fl->dbval = V_QID(qid) | sc->chip_params->sge_fl_db; 3555 3556 FL_LOCK(fl); 3557 /* Enough to make sure the SGE doesn't think it's starved */ 3558 refill_fl(sc, fl, fl->lowat); 3559 FL_UNLOCK(fl); 3560 } 3561 3562 if (chip_id(sc) >= CHELSIO_T5 && !(sc->flags & IS_VF) && cong >= 0) { 3563 uint32_t param, val; 3564 3565 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 3566 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | 3567 V_FW_PARAMS_PARAM_YZ(iq->cntxt_id); 3568 if (cong == 0) 3569 val = 1 << 19; 3570 else { 3571 val = 2 << 19; 3572 for (i = 0; i < 4; i++) { 3573 if (cong & (1 << i)) 3574 val |= 1 << (i << 2); 3575 } 3576 } 3577 3578 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); 3579 if (rc != 0) { 3580 /* report error but carry on */ 3581 device_printf(sc->dev, 3582 "failed to set congestion manager context for " 3583 "ingress queue %d: %d\n", iq->cntxt_id, rc); 3584 } 3585 } 3586 3587 /* Enable IQ interrupts */ 3588 atomic_store_rel_int(&iq->state, IQS_IDLE); 3589 t4_write_reg(sc, sc->sge_gts_reg, V_SEINTARM(iq->intr_params) | 3590 V_INGRESSQID(iq->cntxt_id)); 3591 3592 return (0); 3593 } 3594 3595 static int 3596 free_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl) 3597 { 3598 int rc; 3599 struct adapter *sc = iq->adapter; 3600 device_t dev; 3601 3602 if (sc == NULL) 3603 return (0); /* nothing to do */ 3604 3605 dev = vi ? vi->dev : sc->dev; 3606 3607 if (iq->flags & IQ_ALLOCATED) { 3608 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, 3609 FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id, 3610 fl ? fl->cntxt_id : 0xffff, 0xffff); 3611 if (rc != 0) { 3612 device_printf(dev, 3613 "failed to free queue %p: %d\n", iq, rc); 3614 return (rc); 3615 } 3616 iq->flags &= ~IQ_ALLOCATED; 3617 } 3618 3619 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc); 3620 3621 bzero(iq, sizeof(*iq)); 3622 3623 if (fl) { 3624 free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, 3625 fl->desc); 3626 3627 if (fl->sdesc) 3628 free_fl_sdesc(sc, fl); 3629 3630 if (mtx_initialized(&fl->fl_lock)) 3631 mtx_destroy(&fl->fl_lock); 3632 3633 bzero(fl, sizeof(*fl)); 3634 } 3635 3636 return (0); 3637 } 3638 3639 static void 3640 add_iq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 3641 struct sge_iq *iq) 3642 { 3643 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3644 3645 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &iq->ba, 3646 "bus address of descriptor ring"); 3647 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 3648 iq->qsize * IQ_ESIZE, "descriptor ring size in bytes"); 3649 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "abs_id", 3650 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, &iq->abs_id, 0, 3651 sysctl_uint16, "I", "absolute id of the queue"); 3652 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 3653 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, &iq->cntxt_id, 0, 3654 sysctl_uint16, "I", "SGE context id of the queue"); 3655 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", 3656 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, &iq->cidx, 0, 3657 sysctl_uint16, "I", "consumer index"); 3658 } 3659 3660 static void 3661 add_fl_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 3662 struct sysctl_oid *oid, struct sge_fl *fl) 3663 { 3664 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3665 3666 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", 3667 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "freelist"); 3668 children = SYSCTL_CHILDREN(oid); 3669 3670 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, 3671 &fl->ba, "bus address of descriptor ring"); 3672 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 3673 fl->sidx * EQ_ESIZE + sc->params.sge.spg_len, 3674 "desc ring size in bytes"); 3675 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 3676 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, &fl->cntxt_id, 0, 3677 sysctl_uint16, "I", "SGE context id of the freelist"); 3678 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "padding", CTLFLAG_RD, NULL, 3679 fl_pad ? 1 : 0, "padding enabled"); 3680 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "packing", CTLFLAG_RD, NULL, 3681 fl->flags & FL_BUF_PACKING ? 1 : 0, "packing enabled"); 3682 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx, 3683 0, "consumer index"); 3684 if (fl->flags & FL_BUF_PACKING) { 3685 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_offset", 3686 CTLFLAG_RD, &fl->rx_offset, 0, "packing rx offset"); 3687 } 3688 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx, 3689 0, "producer index"); 3690 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated", 3691 CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated"); 3692 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled", 3693 CTLFLAG_RD, &fl->cl_recycled, "# of clusters recycled"); 3694 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_fast_recycled", 3695 CTLFLAG_RD, &fl->cl_fast_recycled, "# of clusters recycled (fast)"); 3696 } 3697 3698 static int 3699 alloc_fwq(struct adapter *sc) 3700 { 3701 int rc, intr_idx; 3702 struct sge_iq *fwq = &sc->sge.fwq; 3703 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); 3704 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3705 3706 init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE); 3707 if (sc->flags & IS_VF) 3708 intr_idx = 0; 3709 else 3710 intr_idx = sc->intr_count > 1 ? 1 : 0; 3711 rc = alloc_iq_fl(&sc->port[0]->vi[0], fwq, NULL, intr_idx, -1); 3712 if (rc != 0) { 3713 device_printf(sc->dev, 3714 "failed to create firmware event queue: %d\n", rc); 3715 return (rc); 3716 } 3717 3718 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", 3719 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "firmware event queue"); 3720 add_iq_sysctls(&sc->ctx, oid, fwq); 3721 3722 return (0); 3723 } 3724 3725 static int 3726 free_fwq(struct adapter *sc) 3727 { 3728 return free_iq_fl(NULL, &sc->sge.fwq, NULL); 3729 } 3730 3731 static int 3732 alloc_ctrlq(struct adapter *sc, struct sge_wrq *ctrlq, int idx, 3733 struct sysctl_oid *oid) 3734 { 3735 int rc; 3736 char name[16]; 3737 struct sysctl_oid_list *children; 3738 3739 snprintf(name, sizeof(name), "%s ctrlq%d", device_get_nameunit(sc->dev), 3740 idx); 3741 init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[idx]->tx_chan, 3742 sc->sge.fwq.cntxt_id, name); 3743 3744 children = SYSCTL_CHILDREN(oid); 3745 snprintf(name, sizeof(name), "%d", idx); 3746 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, name, 3747 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ctrl queue"); 3748 rc = alloc_wrq(sc, NULL, ctrlq, oid); 3749 3750 return (rc); 3751 } 3752 3753 int 3754 tnl_cong(struct port_info *pi, int drop) 3755 { 3756 3757 if (drop == -1) 3758 return (-1); 3759 else if (drop == 1) 3760 return (0); 3761 else 3762 return (pi->rx_e_chan_map); 3763 } 3764 3765 static int 3766 alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int intr_idx, int idx, 3767 struct sysctl_oid *oid) 3768 { 3769 int rc; 3770 struct adapter *sc = vi->adapter; 3771 struct sysctl_oid_list *children; 3772 char name[16]; 3773 3774 rc = alloc_iq_fl(vi, &rxq->iq, &rxq->fl, intr_idx, 3775 tnl_cong(vi->pi, cong_drop)); 3776 if (rc != 0) 3777 return (rc); 3778 3779 if (idx == 0) 3780 sc->sge.iq_base = rxq->iq.abs_id - rxq->iq.cntxt_id; 3781 else 3782 KASSERT(rxq->iq.cntxt_id + sc->sge.iq_base == rxq->iq.abs_id, 3783 ("iq_base mismatch")); 3784 KASSERT(sc->sge.iq_base == 0 || sc->flags & IS_VF, 3785 ("PF with non-zero iq_base")); 3786 3787 /* 3788 * The freelist is just barely above the starvation threshold right now, 3789 * fill it up a bit more. 3790 */ 3791 FL_LOCK(&rxq->fl); 3792 refill_fl(sc, &rxq->fl, 128); 3793 FL_UNLOCK(&rxq->fl); 3794 3795 #if defined(INET) || defined(INET6) 3796 rc = tcp_lro_init_args(&rxq->lro, vi->ifp, lro_entries, lro_mbufs); 3797 if (rc != 0) 3798 return (rc); 3799 MPASS(rxq->lro.ifp == vi->ifp); /* also indicates LRO init'ed */ 3800 3801 if (vi->ifp->if_capenable & IFCAP_LRO) 3802 rxq->iq.flags |= IQ_LRO_ENABLED; 3803 #endif 3804 if (vi->ifp->if_capenable & IFCAP_HWRXTSTMP) 3805 rxq->iq.flags |= IQ_RX_TIMESTAMP; 3806 rxq->ifp = vi->ifp; 3807 3808 children = SYSCTL_CHILDREN(oid); 3809 3810 snprintf(name, sizeof(name), "%d", idx); 3811 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, 3812 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "rx queue"); 3813 children = SYSCTL_CHILDREN(oid); 3814 3815 add_iq_sysctls(&vi->ctx, oid, &rxq->iq); 3816 #if defined(INET) || defined(INET6) 3817 SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, 3818 &rxq->lro.lro_queued, 0, NULL); 3819 SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, 3820 &rxq->lro.lro_flushed, 0, NULL); 3821 #endif 3822 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, 3823 &rxq->rxcsum, "# of times hardware assisted with checksum"); 3824 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_extraction", 3825 CTLFLAG_RD, &rxq->vlan_extraction, 3826 "# of times hardware extracted 802.1Q tag"); 3827 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vxlan_rxcsum", 3828 CTLFLAG_RD, &rxq->vxlan_rxcsum, 3829 "# of times hardware assisted with inner checksum (VXLAN) "); 3830 3831 add_fl_sysctls(sc, &vi->ctx, oid, &rxq->fl); 3832 3833 return (rc); 3834 } 3835 3836 static int 3837 free_rxq(struct vi_info *vi, struct sge_rxq *rxq) 3838 { 3839 int rc; 3840 3841 #if defined(INET) || defined(INET6) 3842 if (rxq->lro.ifp) { 3843 tcp_lro_free(&rxq->lro); 3844 rxq->lro.ifp = NULL; 3845 } 3846 #endif 3847 3848 rc = free_iq_fl(vi, &rxq->iq, &rxq->fl); 3849 if (rc == 0) 3850 bzero(rxq, sizeof(*rxq)); 3851 3852 return (rc); 3853 } 3854 3855 #ifdef TCP_OFFLOAD 3856 static int 3857 alloc_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq, 3858 int intr_idx, int idx, struct sysctl_oid *oid) 3859 { 3860 struct port_info *pi = vi->pi; 3861 int rc; 3862 struct sysctl_oid_list *children; 3863 char name[16]; 3864 3865 rc = alloc_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx, 0); 3866 if (rc != 0) 3867 return (rc); 3868 3869 children = SYSCTL_CHILDREN(oid); 3870 3871 snprintf(name, sizeof(name), "%d", idx); 3872 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, 3873 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "rx queue"); 3874 add_iq_sysctls(&vi->ctx, oid, &ofld_rxq->iq); 3875 add_fl_sysctls(pi->adapter, &vi->ctx, oid, &ofld_rxq->fl); 3876 3877 return (rc); 3878 } 3879 3880 static int 3881 free_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq) 3882 { 3883 int rc; 3884 3885 rc = free_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl); 3886 if (rc == 0) 3887 bzero(ofld_rxq, sizeof(*ofld_rxq)); 3888 3889 return (rc); 3890 } 3891 #endif 3892 3893 #ifdef DEV_NETMAP 3894 static int 3895 alloc_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int intr_idx, 3896 int idx, struct sysctl_oid *oid) 3897 { 3898 int rc; 3899 struct sysctl_oid_list *children; 3900 struct sysctl_ctx_list *ctx; 3901 char name[16]; 3902 size_t len; 3903 struct adapter *sc = vi->adapter; 3904 struct netmap_adapter *na = NA(vi->ifp); 3905 3906 MPASS(na != NULL); 3907 3908 len = vi->qsize_rxq * IQ_ESIZE; 3909 rc = alloc_ring(sc, len, &nm_rxq->iq_desc_tag, &nm_rxq->iq_desc_map, 3910 &nm_rxq->iq_ba, (void **)&nm_rxq->iq_desc); 3911 if (rc != 0) 3912 return (rc); 3913 3914 len = na->num_rx_desc * EQ_ESIZE + sc->params.sge.spg_len; 3915 rc = alloc_ring(sc, len, &nm_rxq->fl_desc_tag, &nm_rxq->fl_desc_map, 3916 &nm_rxq->fl_ba, (void **)&nm_rxq->fl_desc); 3917 if (rc != 0) 3918 return (rc); 3919 3920 nm_rxq->vi = vi; 3921 nm_rxq->nid = idx; 3922 nm_rxq->iq_cidx = 0; 3923 nm_rxq->iq_sidx = vi->qsize_rxq - sc->params.sge.spg_len / IQ_ESIZE; 3924 nm_rxq->iq_gen = F_RSPD_GEN; 3925 nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0; 3926 nm_rxq->fl_sidx = na->num_rx_desc; 3927 nm_rxq->fl_sidx2 = nm_rxq->fl_sidx; /* copy for rxsync cacheline */ 3928 nm_rxq->intr_idx = intr_idx; 3929 nm_rxq->iq_cntxt_id = INVALID_NM_RXQ_CNTXT_ID; 3930 3931 ctx = &vi->ctx; 3932 children = SYSCTL_CHILDREN(oid); 3933 3934 snprintf(name, sizeof(name), "%d", idx); 3935 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, name, 3936 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "rx queue"); 3937 children = SYSCTL_CHILDREN(oid); 3938 3939 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "abs_id", 3940 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, &nm_rxq->iq_abs_id, 3941 0, sysctl_uint16, "I", "absolute id of the queue"); 3942 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 3943 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, &nm_rxq->iq_cntxt_id, 3944 0, sysctl_uint16, "I", "SGE context id of the queue"); 3945 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", 3946 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, &nm_rxq->iq_cidx, 0, 3947 sysctl_uint16, "I", "consumer index"); 3948 3949 children = SYSCTL_CHILDREN(oid); 3950 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", 3951 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "freelist"); 3952 children = SYSCTL_CHILDREN(oid); 3953 3954 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 3955 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, &nm_rxq->fl_cntxt_id, 3956 0, sysctl_uint16, "I", "SGE context id of the freelist"); 3957 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, 3958 &nm_rxq->fl_cidx, 0, "consumer index"); 3959 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, 3960 &nm_rxq->fl_pidx, 0, "producer index"); 3961 3962 return (rc); 3963 } 3964 3965 3966 static int 3967 free_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq) 3968 { 3969 struct adapter *sc = vi->adapter; 3970 3971 if (vi->flags & VI_INIT_DONE) 3972 MPASS(nm_rxq->iq_cntxt_id == INVALID_NM_RXQ_CNTXT_ID); 3973 else 3974 MPASS(nm_rxq->iq_cntxt_id == 0); 3975 3976 free_ring(sc, nm_rxq->iq_desc_tag, nm_rxq->iq_desc_map, nm_rxq->iq_ba, 3977 nm_rxq->iq_desc); 3978 free_ring(sc, nm_rxq->fl_desc_tag, nm_rxq->fl_desc_map, nm_rxq->fl_ba, 3979 nm_rxq->fl_desc); 3980 3981 return (0); 3982 } 3983 3984 static int 3985 alloc_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq, int iqidx, int idx, 3986 struct sysctl_oid *oid) 3987 { 3988 int rc; 3989 size_t len; 3990 struct port_info *pi = vi->pi; 3991 struct adapter *sc = pi->adapter; 3992 struct netmap_adapter *na = NA(vi->ifp); 3993 char name[16]; 3994 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3995 3996 len = na->num_tx_desc * EQ_ESIZE + sc->params.sge.spg_len; 3997 rc = alloc_ring(sc, len, &nm_txq->desc_tag, &nm_txq->desc_map, 3998 &nm_txq->ba, (void **)&nm_txq->desc); 3999 if (rc) 4000 return (rc); 4001 4002 nm_txq->pidx = nm_txq->cidx = 0; 4003 nm_txq->sidx = na->num_tx_desc; 4004 nm_txq->nid = idx; 4005 nm_txq->iqidx = iqidx; 4006 nm_txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 4007 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) | 4008 V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld)); 4009 if (sc->params.fw_vers >= FW_VERSION32(1, 24, 11, 0)) 4010 nm_txq->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS2_WR)); 4011 else 4012 nm_txq->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); 4013 nm_txq->cntxt_id = INVALID_NM_TXQ_CNTXT_ID; 4014 4015 snprintf(name, sizeof(name), "%d", idx); 4016 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, 4017 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "netmap tx queue"); 4018 children = SYSCTL_CHILDREN(oid); 4019 4020 SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 4021 &nm_txq->cntxt_id, 0, "SGE context id of the queue"); 4022 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", 4023 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, &nm_txq->cidx, 0, 4024 sysctl_uint16, "I", "consumer index"); 4025 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", 4026 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, &nm_txq->pidx, 0, 4027 sysctl_uint16, "I", "producer index"); 4028 4029 return (rc); 4030 } 4031 4032 static int 4033 free_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq) 4034 { 4035 struct adapter *sc = vi->adapter; 4036 4037 if (vi->flags & VI_INIT_DONE) 4038 MPASS(nm_txq->cntxt_id == INVALID_NM_TXQ_CNTXT_ID); 4039 else 4040 MPASS(nm_txq->cntxt_id == 0); 4041 4042 free_ring(sc, nm_txq->desc_tag, nm_txq->desc_map, nm_txq->ba, 4043 nm_txq->desc); 4044 4045 return (0); 4046 } 4047 #endif 4048 4049 /* 4050 * Returns a reasonable automatic cidx flush threshold for a given queue size. 4051 */ 4052 static u_int 4053 qsize_to_fthresh(int qsize) 4054 { 4055 u_int fthresh; 4056 4057 while (!powerof2(qsize)) 4058 qsize++; 4059 fthresh = ilog2(qsize); 4060 if (fthresh > X_CIDXFLUSHTHRESH_128) 4061 fthresh = X_CIDXFLUSHTHRESH_128; 4062 4063 return (fthresh); 4064 } 4065 4066 static int 4067 ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq) 4068 { 4069 int rc, cntxt_id; 4070 struct fw_eq_ctrl_cmd c; 4071 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4072 4073 bzero(&c, sizeof(c)); 4074 4075 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | 4076 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) | 4077 V_FW_EQ_CTRL_CMD_VFN(0)); 4078 c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC | 4079 F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); 4080 c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); 4081 c.physeqid_pkd = htobe32(0); 4082 c.fetchszm_to_iqid = 4083 htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 4084 V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) | 4085 F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); 4086 c.dcaen_to_eqsize = 4087 htobe32(V_FW_EQ_CTRL_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ? 4088 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) | 4089 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 4090 V_FW_EQ_CTRL_CMD_CIDXFTHRESH(qsize_to_fthresh(qsize)) | 4091 V_FW_EQ_CTRL_CMD_EQSIZE(qsize)); 4092 c.eqaddr = htobe64(eq->ba); 4093 4094 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 4095 if (rc != 0) { 4096 device_printf(sc->dev, 4097 "failed to create control queue %d: %d\n", eq->tx_chan, rc); 4098 return (rc); 4099 } 4100 eq->flags |= EQ_ALLOCATED; 4101 4102 eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid)); 4103 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 4104 if (cntxt_id >= sc->sge.neq) 4105 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 4106 cntxt_id, sc->sge.neq - 1); 4107 sc->sge.eqmap[cntxt_id] = eq; 4108 4109 return (rc); 4110 } 4111 4112 static int 4113 eth_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 4114 { 4115 int rc, cntxt_id; 4116 struct fw_eq_eth_cmd c; 4117 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4118 4119 bzero(&c, sizeof(c)); 4120 4121 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | 4122 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | 4123 V_FW_EQ_ETH_CMD_VFN(0)); 4124 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | 4125 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); 4126 c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | 4127 F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid)); 4128 c.fetchszm_to_iqid = 4129 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | 4130 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | 4131 V_FW_EQ_ETH_CMD_IQID(eq->iqid)); 4132 c.dcaen_to_eqsize = 4133 htobe32(V_FW_EQ_ETH_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ? 4134 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) | 4135 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 4136 V_FW_EQ_ETH_CMD_EQSIZE(qsize)); 4137 c.eqaddr = htobe64(eq->ba); 4138 4139 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 4140 if (rc != 0) { 4141 device_printf(vi->dev, 4142 "failed to create Ethernet egress queue: %d\n", rc); 4143 return (rc); 4144 } 4145 eq->flags |= EQ_ALLOCATED; 4146 4147 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); 4148 eq->abs_id = G_FW_EQ_ETH_CMD_PHYSEQID(be32toh(c.physeqid_pkd)); 4149 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 4150 if (cntxt_id >= sc->sge.neq) 4151 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 4152 cntxt_id, sc->sge.neq - 1); 4153 sc->sge.eqmap[cntxt_id] = eq; 4154 4155 return (rc); 4156 } 4157 4158 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 4159 static int 4160 ofld_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 4161 { 4162 int rc, cntxt_id; 4163 struct fw_eq_ofld_cmd c; 4164 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4165 4166 bzero(&c, sizeof(c)); 4167 4168 c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST | 4169 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) | 4170 V_FW_EQ_OFLD_CMD_VFN(0)); 4171 c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC | 4172 F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c)); 4173 c.fetchszm_to_iqid = 4174 htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 4175 V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) | 4176 F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid)); 4177 c.dcaen_to_eqsize = 4178 htobe32(V_FW_EQ_OFLD_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ? 4179 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) | 4180 V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 4181 V_FW_EQ_OFLD_CMD_CIDXFTHRESH(qsize_to_fthresh(qsize)) | 4182 V_FW_EQ_OFLD_CMD_EQSIZE(qsize)); 4183 c.eqaddr = htobe64(eq->ba); 4184 4185 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 4186 if (rc != 0) { 4187 device_printf(vi->dev, 4188 "failed to create egress queue for TCP offload: %d\n", rc); 4189 return (rc); 4190 } 4191 eq->flags |= EQ_ALLOCATED; 4192 4193 eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd)); 4194 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 4195 if (cntxt_id >= sc->sge.neq) 4196 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 4197 cntxt_id, sc->sge.neq - 1); 4198 sc->sge.eqmap[cntxt_id] = eq; 4199 4200 return (rc); 4201 } 4202 #endif 4203 4204 static int 4205 alloc_eq(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 4206 { 4207 int rc, qsize; 4208 size_t len; 4209 4210 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); 4211 4212 qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4213 len = qsize * EQ_ESIZE; 4214 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, 4215 &eq->ba, (void **)&eq->desc); 4216 if (rc) 4217 return (rc); 4218 4219 eq->pidx = eq->cidx = eq->dbidx = 0; 4220 /* Note that equeqidx is not used with sge_wrq (OFLD/CTRL) queues. */ 4221 eq->equeqidx = 0; 4222 eq->doorbells = sc->doorbells; 4223 4224 switch (eq->flags & EQ_TYPEMASK) { 4225 case EQ_CTRL: 4226 rc = ctrl_eq_alloc(sc, eq); 4227 break; 4228 4229 case EQ_ETH: 4230 rc = eth_eq_alloc(sc, vi, eq); 4231 break; 4232 4233 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 4234 case EQ_OFLD: 4235 rc = ofld_eq_alloc(sc, vi, eq); 4236 break; 4237 #endif 4238 4239 default: 4240 panic("%s: invalid eq type %d.", __func__, 4241 eq->flags & EQ_TYPEMASK); 4242 } 4243 if (rc != 0) { 4244 device_printf(sc->dev, 4245 "failed to allocate egress queue(%d): %d\n", 4246 eq->flags & EQ_TYPEMASK, rc); 4247 } 4248 4249 if (isset(&eq->doorbells, DOORBELL_UDB) || 4250 isset(&eq->doorbells, DOORBELL_UDBWC) || 4251 isset(&eq->doorbells, DOORBELL_WCWR)) { 4252 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 4253 uint32_t mask = (1 << s_qpp) - 1; 4254 volatile uint8_t *udb; 4255 4256 udb = sc->udbs_base + UDBS_DB_OFFSET; 4257 udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT; /* pg offset */ 4258 eq->udb_qid = eq->cntxt_id & mask; /* id in page */ 4259 if (eq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE) 4260 clrbit(&eq->doorbells, DOORBELL_WCWR); 4261 else { 4262 udb += eq->udb_qid << UDBS_SEG_SHIFT; /* seg offset */ 4263 eq->udb_qid = 0; 4264 } 4265 eq->udb = (volatile void *)udb; 4266 } 4267 4268 return (rc); 4269 } 4270 4271 static int 4272 free_eq(struct adapter *sc, struct sge_eq *eq) 4273 { 4274 int rc; 4275 4276 if (eq->flags & EQ_ALLOCATED) { 4277 switch (eq->flags & EQ_TYPEMASK) { 4278 case EQ_CTRL: 4279 rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, 4280 eq->cntxt_id); 4281 break; 4282 4283 case EQ_ETH: 4284 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, 4285 eq->cntxt_id); 4286 break; 4287 4288 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 4289 case EQ_OFLD: 4290 rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0, 4291 eq->cntxt_id); 4292 break; 4293 #endif 4294 4295 default: 4296 panic("%s: invalid eq type %d.", __func__, 4297 eq->flags & EQ_TYPEMASK); 4298 } 4299 if (rc != 0) { 4300 device_printf(sc->dev, 4301 "failed to free egress queue (%d): %d\n", 4302 eq->flags & EQ_TYPEMASK, rc); 4303 return (rc); 4304 } 4305 eq->flags &= ~EQ_ALLOCATED; 4306 } 4307 4308 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); 4309 4310 if (mtx_initialized(&eq->eq_lock)) 4311 mtx_destroy(&eq->eq_lock); 4312 4313 bzero(eq, sizeof(*eq)); 4314 return (0); 4315 } 4316 4317 static int 4318 alloc_wrq(struct adapter *sc, struct vi_info *vi, struct sge_wrq *wrq, 4319 struct sysctl_oid *oid) 4320 { 4321 int rc; 4322 struct sysctl_ctx_list *ctx = vi ? &vi->ctx : &sc->ctx; 4323 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 4324 4325 rc = alloc_eq(sc, vi, &wrq->eq); 4326 if (rc) 4327 return (rc); 4328 4329 wrq->adapter = sc; 4330 TASK_INIT(&wrq->wrq_tx_task, 0, wrq_tx_drain, wrq); 4331 TAILQ_INIT(&wrq->incomplete_wrs); 4332 STAILQ_INIT(&wrq->wr_list); 4333 wrq->nwr_pending = 0; 4334 wrq->ndesc_needed = 0; 4335 4336 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, 4337 &wrq->eq.ba, "bus address of descriptor ring"); 4338 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 4339 wrq->eq.sidx * EQ_ESIZE + sc->params.sge.spg_len, 4340 "desc ring size in bytes"); 4341 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 4342 &wrq->eq.cntxt_id, 0, "SGE context id of the queue"); 4343 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", 4344 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, &wrq->eq.cidx, 0, 4345 sysctl_uint16, "I", "consumer index"); 4346 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx", 4347 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, &wrq->eq.pidx, 0, 4348 sysctl_uint16, "I", "producer index"); 4349 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL, 4350 wrq->eq.sidx, "status page index"); 4351 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_direct", CTLFLAG_RD, 4352 &wrq->tx_wrs_direct, "# of work requests (direct)"); 4353 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_copied", CTLFLAG_RD, 4354 &wrq->tx_wrs_copied, "# of work requests (copied)"); 4355 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_sspace", CTLFLAG_RD, 4356 &wrq->tx_wrs_ss, "# of work requests (copied from scratch space)"); 4357 4358 return (rc); 4359 } 4360 4361 static int 4362 free_wrq(struct adapter *sc, struct sge_wrq *wrq) 4363 { 4364 int rc; 4365 4366 rc = free_eq(sc, &wrq->eq); 4367 if (rc) 4368 return (rc); 4369 4370 bzero(wrq, sizeof(*wrq)); 4371 return (0); 4372 } 4373 4374 static int 4375 alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx, 4376 struct sysctl_oid *oid) 4377 { 4378 int rc; 4379 struct port_info *pi = vi->pi; 4380 struct adapter *sc = pi->adapter; 4381 struct sge_eq *eq = &txq->eq; 4382 struct txpkts *txp; 4383 char name[16]; 4384 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 4385 4386 rc = mp_ring_alloc(&txq->r, eq->sidx, txq, eth_tx, can_resume_eth_tx, 4387 M_CXGBE, &eq->eq_lock, M_WAITOK); 4388 if (rc != 0) { 4389 device_printf(sc->dev, "failed to allocate mp_ring: %d\n", rc); 4390 return (rc); 4391 } 4392 4393 rc = alloc_eq(sc, vi, eq); 4394 if (rc != 0) { 4395 mp_ring_free(txq->r); 4396 txq->r = NULL; 4397 return (rc); 4398 } 4399 4400 /* Can't fail after this point. */ 4401 4402 if (idx == 0) 4403 sc->sge.eq_base = eq->abs_id - eq->cntxt_id; 4404 else 4405 KASSERT(eq->cntxt_id + sc->sge.eq_base == eq->abs_id, 4406 ("eq_base mismatch")); 4407 KASSERT(sc->sge.eq_base == 0 || sc->flags & IS_VF, 4408 ("PF with non-zero eq_base")); 4409 4410 TASK_INIT(&txq->tx_reclaim_task, 0, tx_reclaim, eq); 4411 txq->ifp = vi->ifp; 4412 txq->gl = sglist_alloc(TX_SGL_SEGS, M_WAITOK); 4413 if (sc->flags & IS_VF) 4414 txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 4415 V_TXPKT_INTF(pi->tx_chan)); 4416 else 4417 txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 4418 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) | 4419 V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld)); 4420 txq->tc_idx = -1; 4421 txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE, 4422 M_ZERO | M_WAITOK); 4423 4424 txp = &txq->txp; 4425 txp->score = 5; 4426 MPASS(nitems(txp->mb) >= sc->params.max_pkts_per_eth_tx_pkts_wr); 4427 txq->txp.max_npkt = min(nitems(txp->mb), 4428 sc->params.max_pkts_per_eth_tx_pkts_wr); 4429 4430 snprintf(name, sizeof(name), "%d", idx); 4431 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, 4432 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "tx queue"); 4433 children = SYSCTL_CHILDREN(oid); 4434 4435 SYSCTL_ADD_UAUTO(&vi->ctx, children, OID_AUTO, "ba", CTLFLAG_RD, 4436 &eq->ba, "bus address of descriptor ring"); 4437 SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 4438 eq->sidx * EQ_ESIZE + sc->params.sge.spg_len, 4439 "desc ring size in bytes"); 4440 SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD, 4441 &eq->abs_id, 0, "absolute id of the queue"); 4442 SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 4443 &eq->cntxt_id, 0, "SGE context id of the queue"); 4444 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", 4445 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, &eq->cidx, 0, 4446 sysctl_uint16, "I", "consumer index"); 4447 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", 4448 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, &eq->pidx, 0, 4449 sysctl_uint16, "I", "producer index"); 4450 SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL, 4451 eq->sidx, "status page index"); 4452 4453 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "tc", 4454 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, vi, idx, sysctl_tc, 4455 "I", "traffic class (-1 means none)"); 4456 4457 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, 4458 &txq->txcsum, "# of times hardware assisted with checksum"); 4459 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_insertion", 4460 CTLFLAG_RD, &txq->vlan_insertion, 4461 "# of times hardware inserted 802.1Q tag"); 4462 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, 4463 &txq->tso_wrs, "# of TSO work requests"); 4464 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, 4465 &txq->imm_wrs, "# of work requests with immediate data"); 4466 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, 4467 &txq->sgl_wrs, "# of work requests with direct SGL"); 4468 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, 4469 &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); 4470 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_wrs", 4471 CTLFLAG_RD, &txq->txpkts0_wrs, 4472 "# of txpkts (type 0) work requests"); 4473 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_wrs", 4474 CTLFLAG_RD, &txq->txpkts1_wrs, 4475 "# of txpkts (type 1) work requests"); 4476 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_pkts", 4477 CTLFLAG_RD, &txq->txpkts0_pkts, 4478 "# of frames tx'd using type0 txpkts work requests"); 4479 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_pkts", 4480 CTLFLAG_RD, &txq->txpkts1_pkts, 4481 "# of frames tx'd using type1 txpkts work requests"); 4482 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "raw_wrs", CTLFLAG_RD, 4483 &txq->raw_wrs, "# of raw work requests (non-packets)"); 4484 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vxlan_tso_wrs", 4485 CTLFLAG_RD, &txq->vxlan_tso_wrs, "# of VXLAN TSO work requests"); 4486 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vxlan_txcsum", 4487 CTLFLAG_RD, &txq->vxlan_txcsum, 4488 "# of times hardware assisted with inner checksums (VXLAN)"); 4489 4490 #ifdef KERN_TLS 4491 if (sc->flags & KERN_TLS_OK) { 4492 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4493 "kern_tls_records", CTLFLAG_RD, &txq->kern_tls_records, 4494 "# of NIC TLS records transmitted"); 4495 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4496 "kern_tls_short", CTLFLAG_RD, &txq->kern_tls_short, 4497 "# of short NIC TLS records transmitted"); 4498 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4499 "kern_tls_partial", CTLFLAG_RD, &txq->kern_tls_partial, 4500 "# of partial NIC TLS records transmitted"); 4501 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4502 "kern_tls_full", CTLFLAG_RD, &txq->kern_tls_full, 4503 "# of full NIC TLS records transmitted"); 4504 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4505 "kern_tls_octets", CTLFLAG_RD, &txq->kern_tls_octets, 4506 "# of payload octets in transmitted NIC TLS records"); 4507 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4508 "kern_tls_waste", CTLFLAG_RD, &txq->kern_tls_waste, 4509 "# of octets DMAd but not transmitted in NIC TLS records"); 4510 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4511 "kern_tls_options", CTLFLAG_RD, &txq->kern_tls_options, 4512 "# of NIC TLS options-only packets transmitted"); 4513 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4514 "kern_tls_header", CTLFLAG_RD, &txq->kern_tls_header, 4515 "# of NIC TLS header-only packets transmitted"); 4516 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4517 "kern_tls_fin", CTLFLAG_RD, &txq->kern_tls_fin, 4518 "# of NIC TLS FIN-only packets transmitted"); 4519 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4520 "kern_tls_fin_short", CTLFLAG_RD, &txq->kern_tls_fin_short, 4521 "# of NIC TLS padded FIN packets on short TLS records"); 4522 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4523 "kern_tls_cbc", CTLFLAG_RD, &txq->kern_tls_cbc, 4524 "# of NIC TLS sessions using AES-CBC"); 4525 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4526 "kern_tls_gcm", CTLFLAG_RD, &txq->kern_tls_gcm, 4527 "# of NIC TLS sessions using AES-GCM"); 4528 } 4529 #endif 4530 mp_ring_sysctls(txq->r, &vi->ctx, children); 4531 4532 return (0); 4533 } 4534 4535 static int 4536 free_txq(struct vi_info *vi, struct sge_txq *txq) 4537 { 4538 int rc; 4539 struct adapter *sc = vi->adapter; 4540 struct sge_eq *eq = &txq->eq; 4541 4542 rc = free_eq(sc, eq); 4543 if (rc) 4544 return (rc); 4545 4546 sglist_free(txq->gl); 4547 free(txq->sdesc, M_CXGBE); 4548 mp_ring_free(txq->r); 4549 4550 bzero(txq, sizeof(*txq)); 4551 return (0); 4552 } 4553 4554 static void 4555 oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) 4556 { 4557 bus_addr_t *ba = arg; 4558 4559 KASSERT(nseg == 1, 4560 ("%s meant for single segment mappings only.", __func__)); 4561 4562 *ba = error ? 0 : segs->ds_addr; 4563 } 4564 4565 static inline void 4566 ring_fl_db(struct adapter *sc, struct sge_fl *fl) 4567 { 4568 uint32_t n, v; 4569 4570 n = IDXDIFF(fl->pidx >> 3, fl->dbidx, fl->sidx); 4571 MPASS(n > 0); 4572 4573 wmb(); 4574 v = fl->dbval | V_PIDX(n); 4575 if (fl->udb) 4576 *fl->udb = htole32(v); 4577 else 4578 t4_write_reg(sc, sc->sge_kdoorbell_reg, v); 4579 IDXINCR(fl->dbidx, n, fl->sidx); 4580 } 4581 4582 /* 4583 * Fills up the freelist by allocating up to 'n' buffers. Buffers that are 4584 * recycled do not count towards this allocation budget. 4585 * 4586 * Returns non-zero to indicate that this freelist should be added to the list 4587 * of starving freelists. 4588 */ 4589 static int 4590 refill_fl(struct adapter *sc, struct sge_fl *fl, int n) 4591 { 4592 __be64 *d; 4593 struct fl_sdesc *sd; 4594 uintptr_t pa; 4595 caddr_t cl; 4596 struct rx_buf_info *rxb; 4597 struct cluster_metadata *clm; 4598 uint16_t max_pidx; 4599 uint16_t hw_cidx = fl->hw_cidx; /* stable snapshot */ 4600 4601 FL_LOCK_ASSERT_OWNED(fl); 4602 4603 /* 4604 * We always stop at the beginning of the hardware descriptor that's just 4605 * before the one with the hw cidx. This is to avoid hw pidx = hw cidx, 4606 * which would mean an empty freelist to the chip. 4607 */ 4608 max_pidx = __predict_false(hw_cidx == 0) ? fl->sidx - 1 : hw_cidx - 1; 4609 if (fl->pidx == max_pidx * 8) 4610 return (0); 4611 4612 d = &fl->desc[fl->pidx]; 4613 sd = &fl->sdesc[fl->pidx]; 4614 4615 while (n > 0) { 4616 4617 if (sd->cl != NULL) { 4618 4619 if (sd->nmbuf == 0) { 4620 /* 4621 * Fast recycle without involving any atomics on 4622 * the cluster's metadata (if the cluster has 4623 * metadata). This happens when all frames 4624 * received in the cluster were small enough to 4625 * fit within a single mbuf each. 4626 */ 4627 fl->cl_fast_recycled++; 4628 goto recycled; 4629 } 4630 4631 /* 4632 * Cluster is guaranteed to have metadata. Clusters 4633 * without metadata always take the fast recycle path 4634 * when they're recycled. 4635 */ 4636 clm = cl_metadata(sd); 4637 MPASS(clm != NULL); 4638 4639 if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { 4640 fl->cl_recycled++; 4641 counter_u64_add(extfree_rels, 1); 4642 goto recycled; 4643 } 4644 sd->cl = NULL; /* gave up my reference */ 4645 } 4646 MPASS(sd->cl == NULL); 4647 rxb = &sc->sge.rx_buf_info[fl->zidx]; 4648 cl = uma_zalloc(rxb->zone, M_NOWAIT); 4649 if (__predict_false(cl == NULL)) { 4650 if (fl->zidx != fl->safe_zidx) { 4651 rxb = &sc->sge.rx_buf_info[fl->safe_zidx]; 4652 cl = uma_zalloc(rxb->zone, M_NOWAIT); 4653 } 4654 if (cl == NULL) 4655 break; 4656 } 4657 fl->cl_allocated++; 4658 n--; 4659 4660 pa = pmap_kextract((vm_offset_t)cl); 4661 sd->cl = cl; 4662 sd->zidx = fl->zidx; 4663 4664 if (fl->flags & FL_BUF_PACKING) { 4665 *d = htobe64(pa | rxb->hwidx2); 4666 sd->moff = rxb->size2; 4667 } else { 4668 *d = htobe64(pa | rxb->hwidx1); 4669 sd->moff = 0; 4670 } 4671 recycled: 4672 sd->nmbuf = 0; 4673 d++; 4674 sd++; 4675 if (__predict_false((++fl->pidx & 7) == 0)) { 4676 uint16_t pidx = fl->pidx >> 3; 4677 4678 if (__predict_false(pidx == fl->sidx)) { 4679 fl->pidx = 0; 4680 pidx = 0; 4681 sd = fl->sdesc; 4682 d = fl->desc; 4683 } 4684 if (n < 8 || pidx == max_pidx) 4685 break; 4686 4687 if (IDXDIFF(pidx, fl->dbidx, fl->sidx) >= 4) 4688 ring_fl_db(sc, fl); 4689 } 4690 } 4691 4692 if ((fl->pidx >> 3) != fl->dbidx) 4693 ring_fl_db(sc, fl); 4694 4695 return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING)); 4696 } 4697 4698 /* 4699 * Attempt to refill all starving freelists. 4700 */ 4701 static void 4702 refill_sfl(void *arg) 4703 { 4704 struct adapter *sc = arg; 4705 struct sge_fl *fl, *fl_temp; 4706 4707 mtx_assert(&sc->sfl_lock, MA_OWNED); 4708 TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) { 4709 FL_LOCK(fl); 4710 refill_fl(sc, fl, 64); 4711 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) { 4712 TAILQ_REMOVE(&sc->sfl, fl, link); 4713 fl->flags &= ~FL_STARVING; 4714 } 4715 FL_UNLOCK(fl); 4716 } 4717 4718 if (!TAILQ_EMPTY(&sc->sfl)) 4719 callout_schedule(&sc->sfl_callout, hz / 5); 4720 } 4721 4722 static int 4723 alloc_fl_sdesc(struct sge_fl *fl) 4724 { 4725 4726 fl->sdesc = malloc(fl->sidx * 8 * sizeof(struct fl_sdesc), M_CXGBE, 4727 M_ZERO | M_WAITOK); 4728 4729 return (0); 4730 } 4731 4732 static void 4733 free_fl_sdesc(struct adapter *sc, struct sge_fl *fl) 4734 { 4735 struct fl_sdesc *sd; 4736 struct cluster_metadata *clm; 4737 int i; 4738 4739 sd = fl->sdesc; 4740 for (i = 0; i < fl->sidx * 8; i++, sd++) { 4741 if (sd->cl == NULL) 4742 continue; 4743 4744 if (sd->nmbuf == 0) 4745 uma_zfree(sc->sge.rx_buf_info[sd->zidx].zone, sd->cl); 4746 else if (fl->flags & FL_BUF_PACKING) { 4747 clm = cl_metadata(sd); 4748 if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { 4749 uma_zfree(sc->sge.rx_buf_info[sd->zidx].zone, 4750 sd->cl); 4751 counter_u64_add(extfree_rels, 1); 4752 } 4753 } 4754 sd->cl = NULL; 4755 } 4756 4757 free(fl->sdesc, M_CXGBE); 4758 fl->sdesc = NULL; 4759 } 4760 4761 static inline void 4762 get_pkt_gl(struct mbuf *m, struct sglist *gl) 4763 { 4764 int rc; 4765 4766 M_ASSERTPKTHDR(m); 4767 4768 sglist_reset(gl); 4769 rc = sglist_append_mbuf(gl, m); 4770 if (__predict_false(rc != 0)) { 4771 panic("%s: mbuf %p (%d segs) was vetted earlier but now fails " 4772 "with %d.", __func__, m, mbuf_nsegs(m), rc); 4773 } 4774 4775 KASSERT(gl->sg_nseg == mbuf_nsegs(m), 4776 ("%s: nsegs changed for mbuf %p from %d to %d", __func__, m, 4777 mbuf_nsegs(m), gl->sg_nseg)); 4778 KASSERT(gl->sg_nseg > 0 && gl->sg_nseg <= max_nsegs_allowed(m), 4779 ("%s: %d segments, should have been 1 <= nsegs <= %d", __func__, 4780 gl->sg_nseg, max_nsegs_allowed(m))); 4781 } 4782 4783 /* 4784 * len16 for a txpkt WR with a GL. Includes the firmware work request header. 4785 */ 4786 static inline u_int 4787 txpkt_len16(u_int nsegs, const u_int extra) 4788 { 4789 u_int n; 4790 4791 MPASS(nsegs > 0); 4792 4793 nsegs--; /* first segment is part of ulptx_sgl */ 4794 n = extra + sizeof(struct fw_eth_tx_pkt_wr) + 4795 sizeof(struct cpl_tx_pkt_core) + 4796 sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 4797 4798 return (howmany(n, 16)); 4799 } 4800 4801 /* 4802 * len16 for a txpkt_vm WR with a GL. Includes the firmware work 4803 * request header. 4804 */ 4805 static inline u_int 4806 txpkt_vm_len16(u_int nsegs, const u_int extra) 4807 { 4808 u_int n; 4809 4810 MPASS(nsegs > 0); 4811 4812 nsegs--; /* first segment is part of ulptx_sgl */ 4813 n = extra + sizeof(struct fw_eth_tx_pkt_vm_wr) + 4814 sizeof(struct cpl_tx_pkt_core) + 4815 sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 4816 4817 return (howmany(n, 16)); 4818 } 4819 4820 static inline void 4821 calculate_mbuf_len16(struct adapter *sc, struct mbuf *m) 4822 { 4823 const int lso = sizeof(struct cpl_tx_pkt_lso_core); 4824 const int tnl_lso = sizeof(struct cpl_tx_tnl_lso); 4825 4826 if (sc->flags & IS_VF) { 4827 if (needs_tso(m)) 4828 set_mbuf_len16(m, txpkt_vm_len16(mbuf_nsegs(m), lso)); 4829 else 4830 set_mbuf_len16(m, txpkt_vm_len16(mbuf_nsegs(m), 0)); 4831 return; 4832 } 4833 4834 if (needs_tso(m)) { 4835 if (needs_vxlan_tso(m)) 4836 set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), tnl_lso)); 4837 else 4838 set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), lso)); 4839 } else 4840 set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), 0)); 4841 } 4842 4843 /* 4844 * len16 for a txpkts type 0 WR with a GL. Does not include the firmware work 4845 * request header. 4846 */ 4847 static inline u_int 4848 txpkts0_len16(u_int nsegs) 4849 { 4850 u_int n; 4851 4852 MPASS(nsegs > 0); 4853 4854 nsegs--; /* first segment is part of ulptx_sgl */ 4855 n = sizeof(struct ulp_txpkt) + sizeof(struct ulptx_idata) + 4856 sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) + 4857 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 4858 4859 return (howmany(n, 16)); 4860 } 4861 4862 /* 4863 * len16 for a txpkts type 1 WR with a GL. Does not include the firmware work 4864 * request header. 4865 */ 4866 static inline u_int 4867 txpkts1_len16(void) 4868 { 4869 u_int n; 4870 4871 n = sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl); 4872 4873 return (howmany(n, 16)); 4874 } 4875 4876 static inline u_int 4877 imm_payload(u_int ndesc) 4878 { 4879 u_int n; 4880 4881 n = ndesc * EQ_ESIZE - sizeof(struct fw_eth_tx_pkt_wr) - 4882 sizeof(struct cpl_tx_pkt_core); 4883 4884 return (n); 4885 } 4886 4887 static inline uint64_t 4888 csum_to_ctrl(struct adapter *sc, struct mbuf *m) 4889 { 4890 uint64_t ctrl; 4891 int csum_type, l2hlen, l3hlen; 4892 int x, y; 4893 static const int csum_types[3][2] = { 4894 {TX_CSUM_TCPIP, TX_CSUM_TCPIP6}, 4895 {TX_CSUM_UDPIP, TX_CSUM_UDPIP6}, 4896 {TX_CSUM_IP, 0} 4897 }; 4898 4899 M_ASSERTPKTHDR(m); 4900 4901 if (!needs_hwcsum(m)) 4902 return (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS); 4903 4904 MPASS(m->m_pkthdr.l2hlen >= ETHER_HDR_LEN); 4905 MPASS(m->m_pkthdr.l3hlen >= sizeof(struct ip)); 4906 4907 if (needs_vxlan_csum(m)) { 4908 MPASS(m->m_pkthdr.l4hlen > 0); 4909 MPASS(m->m_pkthdr.l5hlen > 0); 4910 MPASS(m->m_pkthdr.inner_l2hlen >= ETHER_HDR_LEN); 4911 MPASS(m->m_pkthdr.inner_l3hlen >= sizeof(struct ip)); 4912 4913 l2hlen = m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + 4914 m->m_pkthdr.l4hlen + m->m_pkthdr.l5hlen + 4915 m->m_pkthdr.inner_l2hlen - ETHER_HDR_LEN; 4916 l3hlen = m->m_pkthdr.inner_l3hlen; 4917 } else { 4918 l2hlen = m->m_pkthdr.l2hlen - ETHER_HDR_LEN; 4919 l3hlen = m->m_pkthdr.l3hlen; 4920 } 4921 4922 ctrl = 0; 4923 if (!needs_l3_csum(m)) 4924 ctrl |= F_TXPKT_IPCSUM_DIS; 4925 4926 if (m->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_INNER_IP_TCP | 4927 CSUM_IP6_TCP | CSUM_INNER_IP6_TCP)) 4928 x = 0; /* TCP */ 4929 else if (m->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_INNER_IP_UDP | 4930 CSUM_IP6_UDP | CSUM_INNER_IP6_UDP)) 4931 x = 1; /* UDP */ 4932 else 4933 x = 2; 4934 4935 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP | 4936 CSUM_INNER_IP | CSUM_INNER_IP_TCP | CSUM_INNER_IP_UDP)) 4937 y = 0; /* IPv4 */ 4938 else { 4939 MPASS(m->m_pkthdr.csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | 4940 CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_UDP)); 4941 y = 1; /* IPv6 */ 4942 } 4943 /* 4944 * needs_hwcsum returned true earlier so there must be some kind of 4945 * checksum to calculate. 4946 */ 4947 csum_type = csum_types[x][y]; 4948 MPASS(csum_type != 0); 4949 if (csum_type == TX_CSUM_IP) 4950 ctrl |= F_TXPKT_L4CSUM_DIS; 4951 ctrl |= V_TXPKT_CSUM_TYPE(csum_type) | V_TXPKT_IPHDR_LEN(l3hlen); 4952 if (chip_id(sc) <= CHELSIO_T5) 4953 ctrl |= V_TXPKT_ETHHDR_LEN(l2hlen); 4954 else 4955 ctrl |= V_T6_TXPKT_ETHHDR_LEN(l2hlen); 4956 4957 return (ctrl); 4958 } 4959 4960 static inline void * 4961 write_lso_cpl(void *cpl, struct mbuf *m0) 4962 { 4963 struct cpl_tx_pkt_lso_core *lso; 4964 uint32_t ctrl; 4965 4966 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 4967 m0->m_pkthdr.l4hlen > 0, 4968 ("%s: mbuf %p needs TSO but missing header lengths", 4969 __func__, m0)); 4970 4971 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | 4972 F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE | 4973 V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen - ETHER_HDR_LEN) >> 2) | 4974 V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) | 4975 V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); 4976 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 4977 ctrl |= F_LSO_IPV6; 4978 4979 lso = cpl; 4980 lso->lso_ctrl = htobe32(ctrl); 4981 lso->ipid_ofst = htobe16(0); 4982 lso->mss = htobe16(m0->m_pkthdr.tso_segsz); 4983 lso->seqno_offset = htobe32(0); 4984 lso->len = htobe32(m0->m_pkthdr.len); 4985 4986 return (lso + 1); 4987 } 4988 4989 static void * 4990 write_tnl_lso_cpl(void *cpl, struct mbuf *m0) 4991 { 4992 struct cpl_tx_tnl_lso *tnl_lso = cpl; 4993 uint32_t ctrl; 4994 4995 KASSERT(m0->m_pkthdr.inner_l2hlen > 0 && 4996 m0->m_pkthdr.inner_l3hlen > 0 && m0->m_pkthdr.inner_l4hlen > 0 && 4997 m0->m_pkthdr.inner_l5hlen > 0, 4998 ("%s: mbuf %p needs VXLAN_TSO but missing inner header lengths", 4999 __func__, m0)); 5000 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 5001 m0->m_pkthdr.l4hlen > 0 && m0->m_pkthdr.l5hlen > 0, 5002 ("%s: mbuf %p needs VXLAN_TSO but missing outer header lengths", 5003 __func__, m0)); 5004 5005 /* Outer headers. */ 5006 ctrl = V_CPL_TX_TNL_LSO_OPCODE(CPL_TX_TNL_LSO) | 5007 F_CPL_TX_TNL_LSO_FIRST | F_CPL_TX_TNL_LSO_LAST | 5008 V_CPL_TX_TNL_LSO_ETHHDRLENOUT( 5009 (m0->m_pkthdr.l2hlen - ETHER_HDR_LEN) >> 2) | 5010 V_CPL_TX_TNL_LSO_IPHDRLENOUT(m0->m_pkthdr.l3hlen >> 2) | 5011 F_CPL_TX_TNL_LSO_IPLENSETOUT; 5012 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 5013 ctrl |= F_CPL_TX_TNL_LSO_IPV6OUT; 5014 else { 5015 ctrl |= F_CPL_TX_TNL_LSO_IPHDRCHKOUT | 5016 F_CPL_TX_TNL_LSO_IPIDINCOUT; 5017 } 5018 tnl_lso->op_to_IpIdSplitOut = htobe32(ctrl); 5019 tnl_lso->IpIdOffsetOut = 0; 5020 tnl_lso->UdpLenSetOut_to_TnlHdrLen = 5021 htobe16(F_CPL_TX_TNL_LSO_UDPCHKCLROUT | 5022 F_CPL_TX_TNL_LSO_UDPLENSETOUT | 5023 V_CPL_TX_TNL_LSO_TNLHDRLEN(m0->m_pkthdr.l2hlen + 5024 m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen + 5025 m0->m_pkthdr.l5hlen) | 5026 V_CPL_TX_TNL_LSO_TNLTYPE(TX_TNL_TYPE_VXLAN)); 5027 tnl_lso->r1 = 0; 5028 5029 /* Inner headers. */ 5030 ctrl = V_CPL_TX_TNL_LSO_ETHHDRLEN( 5031 (m0->m_pkthdr.inner_l2hlen - ETHER_HDR_LEN) >> 2) | 5032 V_CPL_TX_TNL_LSO_IPHDRLEN(m0->m_pkthdr.inner_l3hlen >> 2) | 5033 V_CPL_TX_TNL_LSO_TCPHDRLEN(m0->m_pkthdr.inner_l4hlen >> 2); 5034 if (m0->m_pkthdr.inner_l3hlen == sizeof(struct ip6_hdr)) 5035 ctrl |= F_CPL_TX_TNL_LSO_IPV6; 5036 tnl_lso->Flow_to_TcpHdrLen = htobe32(ctrl); 5037 tnl_lso->IpIdOffset = 0; 5038 tnl_lso->IpIdSplit_to_Mss = 5039 htobe16(V_CPL_TX_TNL_LSO_MSS(m0->m_pkthdr.tso_segsz)); 5040 tnl_lso->TCPSeqOffset = 0; 5041 tnl_lso->EthLenOffset_Size = 5042 htobe32(V_CPL_TX_TNL_LSO_SIZE(m0->m_pkthdr.len)); 5043 5044 return (tnl_lso + 1); 5045 } 5046 5047 #define VM_TX_L2HDR_LEN 16 /* ethmacdst to vlantci */ 5048 5049 /* 5050 * Write a VM txpkt WR for this packet to the hardware descriptors, update the 5051 * software descriptor, and advance the pidx. It is guaranteed that enough 5052 * descriptors are available. 5053 * 5054 * The return value is the # of hardware descriptors used. 5055 */ 5056 static u_int 5057 write_txpkt_vm_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0) 5058 { 5059 struct sge_eq *eq; 5060 struct fw_eth_tx_pkt_vm_wr *wr; 5061 struct tx_sdesc *txsd; 5062 struct cpl_tx_pkt_core *cpl; 5063 uint32_t ctrl; /* used in many unrelated places */ 5064 uint64_t ctrl1; 5065 int len16, ndesc, pktlen, nsegs; 5066 caddr_t dst; 5067 5068 TXQ_LOCK_ASSERT_OWNED(txq); 5069 M_ASSERTPKTHDR(m0); 5070 5071 len16 = mbuf_len16(m0); 5072 nsegs = mbuf_nsegs(m0); 5073 pktlen = m0->m_pkthdr.len; 5074 ctrl = sizeof(struct cpl_tx_pkt_core); 5075 if (needs_tso(m0)) 5076 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 5077 ndesc = tx_len16_to_desc(len16); 5078 5079 /* Firmware work request header */ 5080 eq = &txq->eq; 5081 wr = (void *)&eq->desc[eq->pidx]; 5082 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_VM_WR) | 5083 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 5084 5085 ctrl = V_FW_WR_LEN16(len16); 5086 wr->equiq_to_len16 = htobe32(ctrl); 5087 wr->r3[0] = 0; 5088 wr->r3[1] = 0; 5089 5090 /* 5091 * Copy over ethmacdst, ethmacsrc, ethtype, and vlantci. 5092 * vlantci is ignored unless the ethtype is 0x8100, so it's 5093 * simpler to always copy it rather than making it 5094 * conditional. Also, it seems that we do not have to set 5095 * vlantci or fake the ethtype when doing VLAN tag insertion. 5096 */ 5097 m_copydata(m0, 0, VM_TX_L2HDR_LEN, wr->ethmacdst); 5098 5099 if (needs_tso(m0)) { 5100 cpl = write_lso_cpl(wr + 1, m0); 5101 txq->tso_wrs++; 5102 } else 5103 cpl = (void *)(wr + 1); 5104 5105 /* Checksum offload */ 5106 ctrl1 = csum_to_ctrl(sc, m0); 5107 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) 5108 txq->txcsum++; /* some hardware assistance provided */ 5109 5110 /* VLAN tag insertion */ 5111 if (needs_vlan_insertion(m0)) { 5112 ctrl1 |= F_TXPKT_VLAN_VLD | 5113 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 5114 txq->vlan_insertion++; 5115 } 5116 5117 /* CPL header */ 5118 cpl->ctrl0 = txq->cpl_ctrl0; 5119 cpl->pack = 0; 5120 cpl->len = htobe16(pktlen); 5121 cpl->ctrl1 = htobe64(ctrl1); 5122 5123 /* SGL */ 5124 dst = (void *)(cpl + 1); 5125 5126 /* 5127 * A packet using TSO will use up an entire descriptor for the 5128 * firmware work request header, LSO CPL, and TX_PKT_XT CPL. 5129 * If this descriptor is the last descriptor in the ring, wrap 5130 * around to the front of the ring explicitly for the start of 5131 * the sgl. 5132 */ 5133 if (dst == (void *)&eq->desc[eq->sidx]) { 5134 dst = (void *)&eq->desc[0]; 5135 write_gl_to_txd(txq, m0, &dst, 0); 5136 } else 5137 write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); 5138 txq->sgl_wrs++; 5139 txq->txpkt_wrs++; 5140 5141 txsd = &txq->sdesc[eq->pidx]; 5142 txsd->m = m0; 5143 txsd->desc_used = ndesc; 5144 5145 return (ndesc); 5146 } 5147 5148 /* 5149 * Write a raw WR to the hardware descriptors, update the software 5150 * descriptor, and advance the pidx. It is guaranteed that enough 5151 * descriptors are available. 5152 * 5153 * The return value is the # of hardware descriptors used. 5154 */ 5155 static u_int 5156 write_raw_wr(struct sge_txq *txq, void *wr, struct mbuf *m0, u_int available) 5157 { 5158 struct sge_eq *eq = &txq->eq; 5159 struct tx_sdesc *txsd; 5160 struct mbuf *m; 5161 caddr_t dst; 5162 int len16, ndesc; 5163 5164 len16 = mbuf_len16(m0); 5165 ndesc = tx_len16_to_desc(len16); 5166 MPASS(ndesc <= available); 5167 5168 dst = wr; 5169 for (m = m0; m != NULL; m = m->m_next) 5170 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 5171 5172 txq->raw_wrs++; 5173 5174 txsd = &txq->sdesc[eq->pidx]; 5175 txsd->m = m0; 5176 txsd->desc_used = ndesc; 5177 5178 return (ndesc); 5179 } 5180 5181 /* 5182 * Write a txpkt WR for this packet to the hardware descriptors, update the 5183 * software descriptor, and advance the pidx. It is guaranteed that enough 5184 * descriptors are available. 5185 * 5186 * The return value is the # of hardware descriptors used. 5187 */ 5188 static u_int 5189 write_txpkt_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0, 5190 u_int available) 5191 { 5192 struct sge_eq *eq; 5193 struct fw_eth_tx_pkt_wr *wr; 5194 struct tx_sdesc *txsd; 5195 struct cpl_tx_pkt_core *cpl; 5196 uint32_t ctrl; /* used in many unrelated places */ 5197 uint64_t ctrl1; 5198 int len16, ndesc, pktlen, nsegs; 5199 caddr_t dst; 5200 5201 TXQ_LOCK_ASSERT_OWNED(txq); 5202 M_ASSERTPKTHDR(m0); 5203 5204 len16 = mbuf_len16(m0); 5205 nsegs = mbuf_nsegs(m0); 5206 pktlen = m0->m_pkthdr.len; 5207 ctrl = sizeof(struct cpl_tx_pkt_core); 5208 if (needs_tso(m0)) { 5209 if (needs_vxlan_tso(m0)) 5210 ctrl += sizeof(struct cpl_tx_tnl_lso); 5211 else 5212 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 5213 } else if (!(mbuf_cflags(m0) & MC_NOMAP) && pktlen <= imm_payload(2) && 5214 available >= 2) { 5215 /* Immediate data. Recalculate len16 and set nsegs to 0. */ 5216 ctrl += pktlen; 5217 len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + 5218 sizeof(struct cpl_tx_pkt_core) + pktlen, 16); 5219 nsegs = 0; 5220 } 5221 ndesc = tx_len16_to_desc(len16); 5222 MPASS(ndesc <= available); 5223 5224 /* Firmware work request header */ 5225 eq = &txq->eq; 5226 wr = (void *)&eq->desc[eq->pidx]; 5227 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | 5228 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 5229 5230 ctrl = V_FW_WR_LEN16(len16); 5231 wr->equiq_to_len16 = htobe32(ctrl); 5232 wr->r3 = 0; 5233 5234 if (needs_tso(m0)) { 5235 if (needs_vxlan_tso(m0)) { 5236 cpl = write_tnl_lso_cpl(wr + 1, m0); 5237 txq->vxlan_tso_wrs++; 5238 } else { 5239 cpl = write_lso_cpl(wr + 1, m0); 5240 txq->tso_wrs++; 5241 } 5242 } else 5243 cpl = (void *)(wr + 1); 5244 5245 /* Checksum offload */ 5246 ctrl1 = csum_to_ctrl(sc, m0); 5247 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) { 5248 /* some hardware assistance provided */ 5249 if (needs_vxlan_csum(m0)) 5250 txq->vxlan_txcsum++; 5251 else 5252 txq->txcsum++; 5253 } 5254 5255 /* VLAN tag insertion */ 5256 if (needs_vlan_insertion(m0)) { 5257 ctrl1 |= F_TXPKT_VLAN_VLD | 5258 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 5259 txq->vlan_insertion++; 5260 } 5261 5262 /* CPL header */ 5263 cpl->ctrl0 = txq->cpl_ctrl0; 5264 cpl->pack = 0; 5265 cpl->len = htobe16(pktlen); 5266 cpl->ctrl1 = htobe64(ctrl1); 5267 5268 /* SGL */ 5269 dst = (void *)(cpl + 1); 5270 if (__predict_false((uintptr_t)dst == (uintptr_t)&eq->desc[eq->sidx])) 5271 dst = (caddr_t)&eq->desc[0]; 5272 if (nsegs > 0) { 5273 5274 write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); 5275 txq->sgl_wrs++; 5276 } else { 5277 struct mbuf *m; 5278 5279 for (m = m0; m != NULL; m = m->m_next) { 5280 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 5281 #ifdef INVARIANTS 5282 pktlen -= m->m_len; 5283 #endif 5284 } 5285 #ifdef INVARIANTS 5286 KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen)); 5287 #endif 5288 txq->imm_wrs++; 5289 } 5290 5291 txq->txpkt_wrs++; 5292 5293 txsd = &txq->sdesc[eq->pidx]; 5294 txsd->m = m0; 5295 txsd->desc_used = ndesc; 5296 5297 return (ndesc); 5298 } 5299 5300 static inline bool 5301 cmp_l2hdr(struct txpkts *txp, struct mbuf *m) 5302 { 5303 int len; 5304 5305 MPASS(txp->npkt > 0); 5306 MPASS(m->m_len >= VM_TX_L2HDR_LEN); 5307 5308 if (txp->ethtype == be16toh(ETHERTYPE_VLAN)) 5309 len = VM_TX_L2HDR_LEN; 5310 else 5311 len = sizeof(struct ether_header); 5312 5313 return (memcmp(m->m_data, &txp->ethmacdst[0], len) != 0); 5314 } 5315 5316 static inline void 5317 save_l2hdr(struct txpkts *txp, struct mbuf *m) 5318 { 5319 MPASS(m->m_len >= VM_TX_L2HDR_LEN); 5320 5321 memcpy(&txp->ethmacdst[0], mtod(m, const void *), VM_TX_L2HDR_LEN); 5322 } 5323 5324 static int 5325 add_to_txpkts_vf(struct adapter *sc, struct sge_txq *txq, struct mbuf *m, 5326 int avail, bool *send) 5327 { 5328 struct txpkts *txp = &txq->txp; 5329 5330 MPASS(sc->flags & IS_VF); 5331 5332 /* Cannot have TSO and coalesce at the same time. */ 5333 if (cannot_use_txpkts(m)) { 5334 cannot_coalesce: 5335 *send = txp->npkt > 0; 5336 return (EINVAL); 5337 } 5338 5339 /* VF allows coalescing of type 1 (1 GL) only */ 5340 if (mbuf_nsegs(m) > 1) 5341 goto cannot_coalesce; 5342 5343 *send = false; 5344 if (txp->npkt > 0) { 5345 MPASS(tx_len16_to_desc(txp->len16) <= avail); 5346 MPASS(txp->npkt < txp->max_npkt); 5347 MPASS(txp->wr_type == 1); /* VF supports type 1 only */ 5348 5349 if (tx_len16_to_desc(txp->len16 + txpkts1_len16()) > avail) { 5350 retry_after_send: 5351 *send = true; 5352 return (EAGAIN); 5353 } 5354 if (m->m_pkthdr.len + txp->plen > 65535) 5355 goto retry_after_send; 5356 if (cmp_l2hdr(txp, m)) 5357 goto retry_after_send; 5358 5359 txp->len16 += txpkts1_len16(); 5360 txp->plen += m->m_pkthdr.len; 5361 txp->mb[txp->npkt++] = m; 5362 if (txp->npkt == txp->max_npkt) 5363 *send = true; 5364 } else { 5365 txp->len16 = howmany(sizeof(struct fw_eth_tx_pkts_vm_wr), 16) + 5366 txpkts1_len16(); 5367 if (tx_len16_to_desc(txp->len16) > avail) 5368 goto cannot_coalesce; 5369 txp->npkt = 1; 5370 txp->wr_type = 1; 5371 txp->plen = m->m_pkthdr.len; 5372 txp->mb[0] = m; 5373 save_l2hdr(txp, m); 5374 } 5375 return (0); 5376 } 5377 5378 static int 5379 add_to_txpkts_pf(struct adapter *sc, struct sge_txq *txq, struct mbuf *m, 5380 int avail, bool *send) 5381 { 5382 struct txpkts *txp = &txq->txp; 5383 int nsegs; 5384 5385 MPASS(!(sc->flags & IS_VF)); 5386 5387 /* Cannot have TSO and coalesce at the same time. */ 5388 if (cannot_use_txpkts(m)) { 5389 cannot_coalesce: 5390 *send = txp->npkt > 0; 5391 return (EINVAL); 5392 } 5393 5394 *send = false; 5395 nsegs = mbuf_nsegs(m); 5396 if (txp->npkt == 0) { 5397 if (m->m_pkthdr.len > 65535) 5398 goto cannot_coalesce; 5399 if (nsegs > 1) { 5400 txp->wr_type = 0; 5401 txp->len16 = 5402 howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + 5403 txpkts0_len16(nsegs); 5404 } else { 5405 txp->wr_type = 1; 5406 txp->len16 = 5407 howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + 5408 txpkts1_len16(); 5409 } 5410 if (tx_len16_to_desc(txp->len16) > avail) 5411 goto cannot_coalesce; 5412 txp->npkt = 1; 5413 txp->plen = m->m_pkthdr.len; 5414 txp->mb[0] = m; 5415 } else { 5416 MPASS(tx_len16_to_desc(txp->len16) <= avail); 5417 MPASS(txp->npkt < txp->max_npkt); 5418 5419 if (m->m_pkthdr.len + txp->plen > 65535) { 5420 retry_after_send: 5421 *send = true; 5422 return (EAGAIN); 5423 } 5424 5425 MPASS(txp->wr_type == 0 || txp->wr_type == 1); 5426 if (txp->wr_type == 0) { 5427 if (tx_len16_to_desc(txp->len16 + 5428 txpkts0_len16(nsegs)) > min(avail, SGE_MAX_WR_NDESC)) 5429 goto retry_after_send; 5430 txp->len16 += txpkts0_len16(nsegs); 5431 } else { 5432 if (nsegs != 1) 5433 goto retry_after_send; 5434 if (tx_len16_to_desc(txp->len16 + txpkts1_len16()) > 5435 avail) 5436 goto retry_after_send; 5437 txp->len16 += txpkts1_len16(); 5438 } 5439 5440 txp->plen += m->m_pkthdr.len; 5441 txp->mb[txp->npkt++] = m; 5442 if (txp->npkt == txp->max_npkt) 5443 *send = true; 5444 } 5445 return (0); 5446 } 5447 5448 /* 5449 * Write a txpkts WR for the packets in txp to the hardware descriptors, update 5450 * the software descriptor, and advance the pidx. It is guaranteed that enough 5451 * descriptors are available. 5452 * 5453 * The return value is the # of hardware descriptors used. 5454 */ 5455 static u_int 5456 write_txpkts_wr(struct adapter *sc, struct sge_txq *txq) 5457 { 5458 const struct txpkts *txp = &txq->txp; 5459 struct sge_eq *eq = &txq->eq; 5460 struct fw_eth_tx_pkts_wr *wr; 5461 struct tx_sdesc *txsd; 5462 struct cpl_tx_pkt_core *cpl; 5463 uint64_t ctrl1; 5464 int ndesc, i, checkwrap; 5465 struct mbuf *m, *last; 5466 void *flitp; 5467 5468 TXQ_LOCK_ASSERT_OWNED(txq); 5469 MPASS(txp->npkt > 0); 5470 MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16)); 5471 5472 wr = (void *)&eq->desc[eq->pidx]; 5473 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); 5474 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(txp->len16)); 5475 wr->plen = htobe16(txp->plen); 5476 wr->npkt = txp->npkt; 5477 wr->r3 = 0; 5478 wr->type = txp->wr_type; 5479 flitp = wr + 1; 5480 5481 /* 5482 * At this point we are 16B into a hardware descriptor. If checkwrap is 5483 * set then we know the WR is going to wrap around somewhere. We'll 5484 * check for that at appropriate points. 5485 */ 5486 ndesc = tx_len16_to_desc(txp->len16); 5487 last = NULL; 5488 checkwrap = eq->sidx - ndesc < eq->pidx; 5489 for (i = 0; i < txp->npkt; i++) { 5490 m = txp->mb[i]; 5491 if (txp->wr_type == 0) { 5492 struct ulp_txpkt *ulpmc; 5493 struct ulptx_idata *ulpsc; 5494 5495 /* ULP master command */ 5496 ulpmc = flitp; 5497 ulpmc->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) | 5498 V_ULP_TXPKT_DEST(0) | V_ULP_TXPKT_FID(eq->iqid)); 5499 ulpmc->len = htobe32(txpkts0_len16(mbuf_nsegs(m))); 5500 5501 /* ULP subcommand */ 5502 ulpsc = (void *)(ulpmc + 1); 5503 ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) | 5504 F_ULP_TX_SC_MORE); 5505 ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core)); 5506 5507 cpl = (void *)(ulpsc + 1); 5508 if (checkwrap && 5509 (uintptr_t)cpl == (uintptr_t)&eq->desc[eq->sidx]) 5510 cpl = (void *)&eq->desc[0]; 5511 } else { 5512 cpl = flitp; 5513 } 5514 5515 /* Checksum offload */ 5516 ctrl1 = csum_to_ctrl(sc, m); 5517 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) { 5518 /* some hardware assistance provided */ 5519 if (needs_vxlan_csum(m)) 5520 txq->vxlan_txcsum++; 5521 else 5522 txq->txcsum++; 5523 } 5524 5525 /* VLAN tag insertion */ 5526 if (needs_vlan_insertion(m)) { 5527 ctrl1 |= F_TXPKT_VLAN_VLD | 5528 V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 5529 txq->vlan_insertion++; 5530 } 5531 5532 /* CPL header */ 5533 cpl->ctrl0 = txq->cpl_ctrl0; 5534 cpl->pack = 0; 5535 cpl->len = htobe16(m->m_pkthdr.len); 5536 cpl->ctrl1 = htobe64(ctrl1); 5537 5538 flitp = cpl + 1; 5539 if (checkwrap && 5540 (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx]) 5541 flitp = (void *)&eq->desc[0]; 5542 5543 write_gl_to_txd(txq, m, (caddr_t *)(&flitp), checkwrap); 5544 5545 if (last != NULL) 5546 last->m_nextpkt = m; 5547 last = m; 5548 } 5549 5550 txq->sgl_wrs++; 5551 if (txp->wr_type == 0) { 5552 txq->txpkts0_pkts += txp->npkt; 5553 txq->txpkts0_wrs++; 5554 } else { 5555 txq->txpkts1_pkts += txp->npkt; 5556 txq->txpkts1_wrs++; 5557 } 5558 5559 txsd = &txq->sdesc[eq->pidx]; 5560 txsd->m = txp->mb[0]; 5561 txsd->desc_used = ndesc; 5562 5563 return (ndesc); 5564 } 5565 5566 static u_int 5567 write_txpkts_vm_wr(struct adapter *sc, struct sge_txq *txq) 5568 { 5569 const struct txpkts *txp = &txq->txp; 5570 struct sge_eq *eq = &txq->eq; 5571 struct fw_eth_tx_pkts_vm_wr *wr; 5572 struct tx_sdesc *txsd; 5573 struct cpl_tx_pkt_core *cpl; 5574 uint64_t ctrl1; 5575 int ndesc, i; 5576 struct mbuf *m, *last; 5577 void *flitp; 5578 5579 TXQ_LOCK_ASSERT_OWNED(txq); 5580 MPASS(txp->npkt > 0); 5581 MPASS(txp->wr_type == 1); /* VF supports type 1 only */ 5582 MPASS(txp->mb[0] != NULL); 5583 MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16)); 5584 5585 wr = (void *)&eq->desc[eq->pidx]; 5586 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_VM_WR)); 5587 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(txp->len16)); 5588 wr->r3 = 0; 5589 wr->plen = htobe16(txp->plen); 5590 wr->npkt = txp->npkt; 5591 wr->r4 = 0; 5592 memcpy(&wr->ethmacdst[0], &txp->ethmacdst[0], 16); 5593 flitp = wr + 1; 5594 5595 /* 5596 * At this point we are 32B into a hardware descriptor. Each mbuf in 5597 * the WR will take 32B so we check for the end of the descriptor ring 5598 * before writing odd mbufs (mb[1], 3, 5, ..) 5599 */ 5600 ndesc = tx_len16_to_desc(txp->len16); 5601 last = NULL; 5602 for (i = 0; i < txp->npkt; i++) { 5603 m = txp->mb[i]; 5604 if (i & 1 && (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx]) 5605 flitp = &eq->desc[0]; 5606 cpl = flitp; 5607 5608 /* Checksum offload */ 5609 ctrl1 = csum_to_ctrl(sc, m); 5610 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) 5611 txq->txcsum++; /* some hardware assistance provided */ 5612 5613 /* VLAN tag insertion */ 5614 if (needs_vlan_insertion(m)) { 5615 ctrl1 |= F_TXPKT_VLAN_VLD | 5616 V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 5617 txq->vlan_insertion++; 5618 } 5619 5620 /* CPL header */ 5621 cpl->ctrl0 = txq->cpl_ctrl0; 5622 cpl->pack = 0; 5623 cpl->len = htobe16(m->m_pkthdr.len); 5624 cpl->ctrl1 = htobe64(ctrl1); 5625 5626 flitp = cpl + 1; 5627 MPASS(mbuf_nsegs(m) == 1); 5628 write_gl_to_txd(txq, m, (caddr_t *)(&flitp), 0); 5629 5630 if (last != NULL) 5631 last->m_nextpkt = m; 5632 last = m; 5633 } 5634 5635 txq->sgl_wrs++; 5636 txq->txpkts1_pkts += txp->npkt; 5637 txq->txpkts1_wrs++; 5638 5639 txsd = &txq->sdesc[eq->pidx]; 5640 txsd->m = txp->mb[0]; 5641 txsd->desc_used = ndesc; 5642 5643 return (ndesc); 5644 } 5645 5646 /* 5647 * If the SGL ends on an address that is not 16 byte aligned, this function will 5648 * add a 0 filled flit at the end. 5649 */ 5650 static void 5651 write_gl_to_txd(struct sge_txq *txq, struct mbuf *m, caddr_t *to, int checkwrap) 5652 { 5653 struct sge_eq *eq = &txq->eq; 5654 struct sglist *gl = txq->gl; 5655 struct sglist_seg *seg; 5656 __be64 *flitp, *wrap; 5657 struct ulptx_sgl *usgl; 5658 int i, nflits, nsegs; 5659 5660 KASSERT(((uintptr_t)(*to) & 0xf) == 0, 5661 ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to)); 5662 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); 5663 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); 5664 5665 get_pkt_gl(m, gl); 5666 nsegs = gl->sg_nseg; 5667 MPASS(nsegs > 0); 5668 5669 nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2; 5670 flitp = (__be64 *)(*to); 5671 wrap = (__be64 *)(&eq->desc[eq->sidx]); 5672 seg = &gl->sg_segs[0]; 5673 usgl = (void *)flitp; 5674 5675 /* 5676 * We start at a 16 byte boundary somewhere inside the tx descriptor 5677 * ring, so we're at least 16 bytes away from the status page. There is 5678 * no chance of a wrap around in the middle of usgl (which is 16 bytes). 5679 */ 5680 5681 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 5682 V_ULPTX_NSGE(nsegs)); 5683 usgl->len0 = htobe32(seg->ss_len); 5684 usgl->addr0 = htobe64(seg->ss_paddr); 5685 seg++; 5686 5687 if (checkwrap == 0 || (uintptr_t)(flitp + nflits) <= (uintptr_t)wrap) { 5688 5689 /* Won't wrap around at all */ 5690 5691 for (i = 0; i < nsegs - 1; i++, seg++) { 5692 usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len); 5693 usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr); 5694 } 5695 if (i & 1) 5696 usgl->sge[i / 2].len[1] = htobe32(0); 5697 flitp += nflits; 5698 } else { 5699 5700 /* Will wrap somewhere in the rest of the SGL */ 5701 5702 /* 2 flits already written, write the rest flit by flit */ 5703 flitp = (void *)(usgl + 1); 5704 for (i = 0; i < nflits - 2; i++) { 5705 if (flitp == wrap) 5706 flitp = (void *)eq->desc; 5707 *flitp++ = get_flit(seg, nsegs - 1, i); 5708 } 5709 } 5710 5711 if (nflits & 1) { 5712 MPASS(((uintptr_t)flitp) & 0xf); 5713 *flitp++ = 0; 5714 } 5715 5716 MPASS((((uintptr_t)flitp) & 0xf) == 0); 5717 if (__predict_false(flitp == wrap)) 5718 *to = (void *)eq->desc; 5719 else 5720 *to = (void *)flitp; 5721 } 5722 5723 static inline void 5724 copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) 5725 { 5726 5727 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); 5728 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); 5729 5730 if (__predict_true((uintptr_t)(*to) + len <= 5731 (uintptr_t)&eq->desc[eq->sidx])) { 5732 bcopy(from, *to, len); 5733 (*to) += len; 5734 } else { 5735 int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to); 5736 5737 bcopy(from, *to, portion); 5738 from += portion; 5739 portion = len - portion; /* remaining */ 5740 bcopy(from, (void *)eq->desc, portion); 5741 (*to) = (caddr_t)eq->desc + portion; 5742 } 5743 } 5744 5745 static inline void 5746 ring_eq_db(struct adapter *sc, struct sge_eq *eq, u_int n) 5747 { 5748 u_int db; 5749 5750 MPASS(n > 0); 5751 5752 db = eq->doorbells; 5753 if (n > 1) 5754 clrbit(&db, DOORBELL_WCWR); 5755 wmb(); 5756 5757 switch (ffs(db) - 1) { 5758 case DOORBELL_UDB: 5759 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); 5760 break; 5761 5762 case DOORBELL_WCWR: { 5763 volatile uint64_t *dst, *src; 5764 int i; 5765 5766 /* 5767 * Queues whose 128B doorbell segment fits in the page do not 5768 * use relative qid (udb_qid is always 0). Only queues with 5769 * doorbell segments can do WCWR. 5770 */ 5771 KASSERT(eq->udb_qid == 0 && n == 1, 5772 ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p", 5773 __func__, eq->doorbells, n, eq->dbidx, eq)); 5774 5775 dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET - 5776 UDBS_DB_OFFSET); 5777 i = eq->dbidx; 5778 src = (void *)&eq->desc[i]; 5779 while (src != (void *)&eq->desc[i + 1]) 5780 *dst++ = *src++; 5781 wmb(); 5782 break; 5783 } 5784 5785 case DOORBELL_UDBWC: 5786 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); 5787 wmb(); 5788 break; 5789 5790 case DOORBELL_KDB: 5791 t4_write_reg(sc, sc->sge_kdoorbell_reg, 5792 V_QID(eq->cntxt_id) | V_PIDX(n)); 5793 break; 5794 } 5795 5796 IDXINCR(eq->dbidx, n, eq->sidx); 5797 } 5798 5799 static inline u_int 5800 reclaimable_tx_desc(struct sge_eq *eq) 5801 { 5802 uint16_t hw_cidx; 5803 5804 hw_cidx = read_hw_cidx(eq); 5805 return (IDXDIFF(hw_cidx, eq->cidx, eq->sidx)); 5806 } 5807 5808 static inline u_int 5809 total_available_tx_desc(struct sge_eq *eq) 5810 { 5811 uint16_t hw_cidx, pidx; 5812 5813 hw_cidx = read_hw_cidx(eq); 5814 pidx = eq->pidx; 5815 5816 if (pidx == hw_cidx) 5817 return (eq->sidx - 1); 5818 else 5819 return (IDXDIFF(hw_cidx, pidx, eq->sidx) - 1); 5820 } 5821 5822 static inline uint16_t 5823 read_hw_cidx(struct sge_eq *eq) 5824 { 5825 struct sge_qstat *spg = (void *)&eq->desc[eq->sidx]; 5826 uint16_t cidx = spg->cidx; /* stable snapshot */ 5827 5828 return (be16toh(cidx)); 5829 } 5830 5831 /* 5832 * Reclaim 'n' descriptors approximately. 5833 */ 5834 static u_int 5835 reclaim_tx_descs(struct sge_txq *txq, u_int n) 5836 { 5837 struct tx_sdesc *txsd; 5838 struct sge_eq *eq = &txq->eq; 5839 u_int can_reclaim, reclaimed; 5840 5841 TXQ_LOCK_ASSERT_OWNED(txq); 5842 MPASS(n > 0); 5843 5844 reclaimed = 0; 5845 can_reclaim = reclaimable_tx_desc(eq); 5846 while (can_reclaim && reclaimed < n) { 5847 int ndesc; 5848 struct mbuf *m, *nextpkt; 5849 5850 txsd = &txq->sdesc[eq->cidx]; 5851 ndesc = txsd->desc_used; 5852 5853 /* Firmware doesn't return "partial" credits. */ 5854 KASSERT(can_reclaim >= ndesc, 5855 ("%s: unexpected number of credits: %d, %d", 5856 __func__, can_reclaim, ndesc)); 5857 KASSERT(ndesc != 0, 5858 ("%s: descriptor with no credits: cidx %d", 5859 __func__, eq->cidx)); 5860 5861 for (m = txsd->m; m != NULL; m = nextpkt) { 5862 nextpkt = m->m_nextpkt; 5863 m->m_nextpkt = NULL; 5864 m_freem(m); 5865 } 5866 reclaimed += ndesc; 5867 can_reclaim -= ndesc; 5868 IDXINCR(eq->cidx, ndesc, eq->sidx); 5869 } 5870 5871 return (reclaimed); 5872 } 5873 5874 static void 5875 tx_reclaim(void *arg, int n) 5876 { 5877 struct sge_txq *txq = arg; 5878 struct sge_eq *eq = &txq->eq; 5879 5880 do { 5881 if (TXQ_TRYLOCK(txq) == 0) 5882 break; 5883 n = reclaim_tx_descs(txq, 32); 5884 if (eq->cidx == eq->pidx) 5885 eq->equeqidx = eq->pidx; 5886 TXQ_UNLOCK(txq); 5887 } while (n > 0); 5888 } 5889 5890 static __be64 5891 get_flit(struct sglist_seg *segs, int nsegs, int idx) 5892 { 5893 int i = (idx / 3) * 2; 5894 5895 switch (idx % 3) { 5896 case 0: { 5897 uint64_t rc; 5898 5899 rc = (uint64_t)segs[i].ss_len << 32; 5900 if (i + 1 < nsegs) 5901 rc |= (uint64_t)(segs[i + 1].ss_len); 5902 5903 return (htobe64(rc)); 5904 } 5905 case 1: 5906 return (htobe64(segs[i].ss_paddr)); 5907 case 2: 5908 return (htobe64(segs[i + 1].ss_paddr)); 5909 } 5910 5911 return (0); 5912 } 5913 5914 static int 5915 find_refill_source(struct adapter *sc, int maxp, bool packing) 5916 { 5917 int i, zidx = -1; 5918 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[0]; 5919 5920 if (packing) { 5921 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 5922 if (rxb->hwidx2 == -1) 5923 continue; 5924 if (rxb->size1 < PAGE_SIZE && 5925 rxb->size1 < largest_rx_cluster) 5926 continue; 5927 if (rxb->size1 > largest_rx_cluster) 5928 break; 5929 MPASS(rxb->size1 - rxb->size2 >= CL_METADATA_SIZE); 5930 if (rxb->size2 >= maxp) 5931 return (i); 5932 zidx = i; 5933 } 5934 } else { 5935 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 5936 if (rxb->hwidx1 == -1) 5937 continue; 5938 if (rxb->size1 > largest_rx_cluster) 5939 break; 5940 if (rxb->size1 >= maxp) 5941 return (i); 5942 zidx = i; 5943 } 5944 } 5945 5946 return (zidx); 5947 } 5948 5949 static void 5950 add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl) 5951 { 5952 mtx_lock(&sc->sfl_lock); 5953 FL_LOCK(fl); 5954 if ((fl->flags & FL_DOOMED) == 0) { 5955 fl->flags |= FL_STARVING; 5956 TAILQ_INSERT_TAIL(&sc->sfl, fl, link); 5957 callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc); 5958 } 5959 FL_UNLOCK(fl); 5960 mtx_unlock(&sc->sfl_lock); 5961 } 5962 5963 static void 5964 handle_wrq_egr_update(struct adapter *sc, struct sge_eq *eq) 5965 { 5966 struct sge_wrq *wrq = (void *)eq; 5967 5968 atomic_readandclear_int(&eq->equiq); 5969 taskqueue_enqueue(sc->tq[eq->tx_chan], &wrq->wrq_tx_task); 5970 } 5971 5972 static void 5973 handle_eth_egr_update(struct adapter *sc, struct sge_eq *eq) 5974 { 5975 struct sge_txq *txq = (void *)eq; 5976 5977 MPASS((eq->flags & EQ_TYPEMASK) == EQ_ETH); 5978 5979 atomic_readandclear_int(&eq->equiq); 5980 if (mp_ring_is_idle(txq->r)) 5981 taskqueue_enqueue(sc->tq[eq->tx_chan], &txq->tx_reclaim_task); 5982 else 5983 mp_ring_check_drainage(txq->r, 64); 5984 } 5985 5986 static int 5987 handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss, 5988 struct mbuf *m) 5989 { 5990 const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1); 5991 unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); 5992 struct adapter *sc = iq->adapter; 5993 struct sge *s = &sc->sge; 5994 struct sge_eq *eq; 5995 static void (*h[])(struct adapter *, struct sge_eq *) = {NULL, 5996 &handle_wrq_egr_update, &handle_eth_egr_update, 5997 &handle_wrq_egr_update}; 5998 5999 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 6000 rss->opcode)); 6001 6002 eq = s->eqmap[qid - s->eq_start - s->eq_base]; 6003 (*h[eq->flags & EQ_TYPEMASK])(sc, eq); 6004 6005 return (0); 6006 } 6007 6008 /* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */ 6009 CTASSERT(offsetof(struct cpl_fw4_msg, data) == \ 6010 offsetof(struct cpl_fw6_msg, data)); 6011 6012 static int 6013 handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 6014 { 6015 struct adapter *sc = iq->adapter; 6016 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); 6017 6018 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 6019 rss->opcode)); 6020 6021 if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) { 6022 const struct rss_header *rss2; 6023 6024 rss2 = (const struct rss_header *)&cpl->data[0]; 6025 return (t4_cpl_handler[rss2->opcode](iq, rss2, m)); 6026 } 6027 6028 return (t4_fw_msg_handler[cpl->type](sc, &cpl->data[0])); 6029 } 6030 6031 /** 6032 * t4_handle_wrerr_rpl - process a FW work request error message 6033 * @adap: the adapter 6034 * @rpl: start of the FW message 6035 */ 6036 static int 6037 t4_handle_wrerr_rpl(struct adapter *adap, const __be64 *rpl) 6038 { 6039 u8 opcode = *(const u8 *)rpl; 6040 const struct fw_error_cmd *e = (const void *)rpl; 6041 unsigned int i; 6042 6043 if (opcode != FW_ERROR_CMD) { 6044 log(LOG_ERR, 6045 "%s: Received WRERR_RPL message with opcode %#x\n", 6046 device_get_nameunit(adap->dev), opcode); 6047 return (EINVAL); 6048 } 6049 log(LOG_ERR, "%s: FW_ERROR (%s) ", device_get_nameunit(adap->dev), 6050 G_FW_ERROR_CMD_FATAL(be32toh(e->op_to_type)) ? "fatal" : 6051 "non-fatal"); 6052 switch (G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))) { 6053 case FW_ERROR_TYPE_EXCEPTION: 6054 log(LOG_ERR, "exception info:\n"); 6055 for (i = 0; i < nitems(e->u.exception.info); i++) 6056 log(LOG_ERR, "%s%08x", i == 0 ? "\t" : " ", 6057 be32toh(e->u.exception.info[i])); 6058 log(LOG_ERR, "\n"); 6059 break; 6060 case FW_ERROR_TYPE_HWMODULE: 6061 log(LOG_ERR, "HW module regaddr %08x regval %08x\n", 6062 be32toh(e->u.hwmodule.regaddr), 6063 be32toh(e->u.hwmodule.regval)); 6064 break; 6065 case FW_ERROR_TYPE_WR: 6066 log(LOG_ERR, "WR cidx %d PF %d VF %d eqid %d hdr:\n", 6067 be16toh(e->u.wr.cidx), 6068 G_FW_ERROR_CMD_PFN(be16toh(e->u.wr.pfn_vfn)), 6069 G_FW_ERROR_CMD_VFN(be16toh(e->u.wr.pfn_vfn)), 6070 be32toh(e->u.wr.eqid)); 6071 for (i = 0; i < nitems(e->u.wr.wrhdr); i++) 6072 log(LOG_ERR, "%s%02x", i == 0 ? "\t" : " ", 6073 e->u.wr.wrhdr[i]); 6074 log(LOG_ERR, "\n"); 6075 break; 6076 case FW_ERROR_TYPE_ACL: 6077 log(LOG_ERR, "ACL cidx %d PF %d VF %d eqid %d %s", 6078 be16toh(e->u.acl.cidx), 6079 G_FW_ERROR_CMD_PFN(be16toh(e->u.acl.pfn_vfn)), 6080 G_FW_ERROR_CMD_VFN(be16toh(e->u.acl.pfn_vfn)), 6081 be32toh(e->u.acl.eqid), 6082 G_FW_ERROR_CMD_MV(be16toh(e->u.acl.mv_pkd)) ? "vlanid" : 6083 "MAC"); 6084 for (i = 0; i < nitems(e->u.acl.val); i++) 6085 log(LOG_ERR, " %02x", e->u.acl.val[i]); 6086 log(LOG_ERR, "\n"); 6087 break; 6088 default: 6089 log(LOG_ERR, "type %#x\n", 6090 G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))); 6091 return (EINVAL); 6092 } 6093 return (0); 6094 } 6095 6096 static int 6097 sysctl_uint16(SYSCTL_HANDLER_ARGS) 6098 { 6099 uint16_t *id = arg1; 6100 int i = *id; 6101 6102 return sysctl_handle_int(oidp, &i, 0, req); 6103 } 6104 6105 static inline bool 6106 bufidx_used(struct adapter *sc, int idx) 6107 { 6108 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[0]; 6109 int i; 6110 6111 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 6112 if (rxb->size1 > largest_rx_cluster) 6113 continue; 6114 if (rxb->hwidx1 == idx || rxb->hwidx2 == idx) 6115 return (true); 6116 } 6117 6118 return (false); 6119 } 6120 6121 static int 6122 sysctl_bufsizes(SYSCTL_HANDLER_ARGS) 6123 { 6124 struct adapter *sc = arg1; 6125 struct sge_params *sp = &sc->params.sge; 6126 int i, rc; 6127 struct sbuf sb; 6128 char c; 6129 6130 sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND); 6131 for (i = 0; i < SGE_FLBUF_SIZES; i++) { 6132 if (bufidx_used(sc, i)) 6133 c = '*'; 6134 else 6135 c = '\0'; 6136 6137 sbuf_printf(&sb, "%u%c ", sp->sge_fl_buffer_size[i], c); 6138 } 6139 sbuf_trim(&sb); 6140 sbuf_finish(&sb); 6141 rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 6142 sbuf_delete(&sb); 6143 return (rc); 6144 } 6145 6146 #ifdef RATELIMIT 6147 /* 6148 * len16 for a txpkt WR with a GL. Includes the firmware work request header. 6149 */ 6150 static inline u_int 6151 txpkt_eo_len16(u_int nsegs, u_int immhdrs, u_int tso) 6152 { 6153 u_int n; 6154 6155 MPASS(immhdrs > 0); 6156 6157 n = roundup2(sizeof(struct fw_eth_tx_eo_wr) + 6158 sizeof(struct cpl_tx_pkt_core) + immhdrs, 16); 6159 if (__predict_false(nsegs == 0)) 6160 goto done; 6161 6162 nsegs--; /* first segment is part of ulptx_sgl */ 6163 n += sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 6164 if (tso) 6165 n += sizeof(struct cpl_tx_pkt_lso_core); 6166 6167 done: 6168 return (howmany(n, 16)); 6169 } 6170 6171 #define ETID_FLOWC_NPARAMS 6 6172 #define ETID_FLOWC_LEN (roundup2((sizeof(struct fw_flowc_wr) + \ 6173 ETID_FLOWC_NPARAMS * sizeof(struct fw_flowc_mnemval)), 16)) 6174 #define ETID_FLOWC_LEN16 (howmany(ETID_FLOWC_LEN, 16)) 6175 6176 static int 6177 send_etid_flowc_wr(struct cxgbe_rate_tag *cst, struct port_info *pi, 6178 struct vi_info *vi) 6179 { 6180 struct wrq_cookie cookie; 6181 u_int pfvf = pi->adapter->pf << S_FW_VIID_PFN; 6182 struct fw_flowc_wr *flowc; 6183 6184 mtx_assert(&cst->lock, MA_OWNED); 6185 MPASS((cst->flags & (EO_FLOWC_PENDING | EO_FLOWC_RPL_PENDING)) == 6186 EO_FLOWC_PENDING); 6187 6188 flowc = start_wrq_wr(cst->eo_txq, ETID_FLOWC_LEN16, &cookie); 6189 if (__predict_false(flowc == NULL)) 6190 return (ENOMEM); 6191 6192 bzero(flowc, ETID_FLOWC_LEN); 6193 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 6194 V_FW_FLOWC_WR_NPARAMS(ETID_FLOWC_NPARAMS) | V_FW_WR_COMPL(0)); 6195 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(ETID_FLOWC_LEN16) | 6196 V_FW_WR_FLOWID(cst->etid)); 6197 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; 6198 flowc->mnemval[0].val = htobe32(pfvf); 6199 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; 6200 flowc->mnemval[1].val = htobe32(pi->tx_chan); 6201 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; 6202 flowc->mnemval[2].val = htobe32(pi->tx_chan); 6203 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; 6204 flowc->mnemval[3].val = htobe32(cst->iqid); 6205 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_EOSTATE; 6206 flowc->mnemval[4].val = htobe32(FW_FLOWC_MNEM_EOSTATE_ESTABLISHED); 6207 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; 6208 flowc->mnemval[5].val = htobe32(cst->schedcl); 6209 6210 commit_wrq_wr(cst->eo_txq, flowc, &cookie); 6211 6212 cst->flags &= ~EO_FLOWC_PENDING; 6213 cst->flags |= EO_FLOWC_RPL_PENDING; 6214 MPASS(cst->tx_credits >= ETID_FLOWC_LEN16); /* flowc is first WR. */ 6215 cst->tx_credits -= ETID_FLOWC_LEN16; 6216 6217 return (0); 6218 } 6219 6220 #define ETID_FLUSH_LEN16 (howmany(sizeof (struct fw_flowc_wr), 16)) 6221 6222 void 6223 send_etid_flush_wr(struct cxgbe_rate_tag *cst) 6224 { 6225 struct fw_flowc_wr *flowc; 6226 struct wrq_cookie cookie; 6227 6228 mtx_assert(&cst->lock, MA_OWNED); 6229 6230 flowc = start_wrq_wr(cst->eo_txq, ETID_FLUSH_LEN16, &cookie); 6231 if (__predict_false(flowc == NULL)) 6232 CXGBE_UNIMPLEMENTED(__func__); 6233 6234 bzero(flowc, ETID_FLUSH_LEN16 * 16); 6235 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 6236 V_FW_FLOWC_WR_NPARAMS(0) | F_FW_WR_COMPL); 6237 flowc->flowid_len16 = htobe32(V_FW_WR_LEN16(ETID_FLUSH_LEN16) | 6238 V_FW_WR_FLOWID(cst->etid)); 6239 6240 commit_wrq_wr(cst->eo_txq, flowc, &cookie); 6241 6242 cst->flags |= EO_FLUSH_RPL_PENDING; 6243 MPASS(cst->tx_credits >= ETID_FLUSH_LEN16); 6244 cst->tx_credits -= ETID_FLUSH_LEN16; 6245 cst->ncompl++; 6246 } 6247 6248 static void 6249 write_ethofld_wr(struct cxgbe_rate_tag *cst, struct fw_eth_tx_eo_wr *wr, 6250 struct mbuf *m0, int compl) 6251 { 6252 struct cpl_tx_pkt_core *cpl; 6253 uint64_t ctrl1; 6254 uint32_t ctrl; /* used in many unrelated places */ 6255 int len16, pktlen, nsegs, immhdrs; 6256 caddr_t dst; 6257 uintptr_t p; 6258 struct ulptx_sgl *usgl; 6259 struct sglist sg; 6260 struct sglist_seg segs[38]; /* XXX: find real limit. XXX: get off the stack */ 6261 6262 mtx_assert(&cst->lock, MA_OWNED); 6263 M_ASSERTPKTHDR(m0); 6264 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 6265 m0->m_pkthdr.l4hlen > 0, 6266 ("%s: ethofld mbuf %p is missing header lengths", __func__, m0)); 6267 6268 len16 = mbuf_eo_len16(m0); 6269 nsegs = mbuf_eo_nsegs(m0); 6270 pktlen = m0->m_pkthdr.len; 6271 ctrl = sizeof(struct cpl_tx_pkt_core); 6272 if (needs_tso(m0)) 6273 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 6274 immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen; 6275 ctrl += immhdrs; 6276 6277 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_EO_WR) | 6278 V_FW_ETH_TX_EO_WR_IMMDLEN(ctrl) | V_FW_WR_COMPL(!!compl)); 6279 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(len16) | 6280 V_FW_WR_FLOWID(cst->etid)); 6281 wr->r3 = 0; 6282 if (needs_outer_udp_csum(m0)) { 6283 wr->u.udpseg.type = FW_ETH_TX_EO_TYPE_UDPSEG; 6284 wr->u.udpseg.ethlen = m0->m_pkthdr.l2hlen; 6285 wr->u.udpseg.iplen = htobe16(m0->m_pkthdr.l3hlen); 6286 wr->u.udpseg.udplen = m0->m_pkthdr.l4hlen; 6287 wr->u.udpseg.rtplen = 0; 6288 wr->u.udpseg.r4 = 0; 6289 wr->u.udpseg.mss = htobe16(pktlen - immhdrs); 6290 wr->u.udpseg.schedpktsize = wr->u.udpseg.mss; 6291 wr->u.udpseg.plen = htobe32(pktlen - immhdrs); 6292 cpl = (void *)(wr + 1); 6293 } else { 6294 MPASS(needs_outer_tcp_csum(m0)); 6295 wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG; 6296 wr->u.tcpseg.ethlen = m0->m_pkthdr.l2hlen; 6297 wr->u.tcpseg.iplen = htobe16(m0->m_pkthdr.l3hlen); 6298 wr->u.tcpseg.tcplen = m0->m_pkthdr.l4hlen; 6299 wr->u.tcpseg.tsclk_tsoff = mbuf_eo_tsclk_tsoff(m0); 6300 wr->u.tcpseg.r4 = 0; 6301 wr->u.tcpseg.r5 = 0; 6302 wr->u.tcpseg.plen = htobe32(pktlen - immhdrs); 6303 6304 if (needs_tso(m0)) { 6305 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); 6306 6307 wr->u.tcpseg.mss = htobe16(m0->m_pkthdr.tso_segsz); 6308 6309 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | 6310 F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE | 6311 V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen - 6312 ETHER_HDR_LEN) >> 2) | 6313 V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) | 6314 V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); 6315 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 6316 ctrl |= F_LSO_IPV6; 6317 lso->lso_ctrl = htobe32(ctrl); 6318 lso->ipid_ofst = htobe16(0); 6319 lso->mss = htobe16(m0->m_pkthdr.tso_segsz); 6320 lso->seqno_offset = htobe32(0); 6321 lso->len = htobe32(pktlen); 6322 6323 cpl = (void *)(lso + 1); 6324 } else { 6325 wr->u.tcpseg.mss = htobe16(0xffff); 6326 cpl = (void *)(wr + 1); 6327 } 6328 } 6329 6330 /* Checksum offload must be requested for ethofld. */ 6331 MPASS(needs_outer_l4_csum(m0)); 6332 ctrl1 = csum_to_ctrl(cst->adapter, m0); 6333 6334 /* VLAN tag insertion */ 6335 if (needs_vlan_insertion(m0)) { 6336 ctrl1 |= F_TXPKT_VLAN_VLD | 6337 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 6338 } 6339 6340 /* CPL header */ 6341 cpl->ctrl0 = cst->ctrl0; 6342 cpl->pack = 0; 6343 cpl->len = htobe16(pktlen); 6344 cpl->ctrl1 = htobe64(ctrl1); 6345 6346 /* Copy Ethernet, IP & TCP/UDP hdrs as immediate data */ 6347 p = (uintptr_t)(cpl + 1); 6348 m_copydata(m0, 0, immhdrs, (void *)p); 6349 6350 /* SGL */ 6351 dst = (void *)(cpl + 1); 6352 if (nsegs > 0) { 6353 int i, pad; 6354 6355 /* zero-pad upto next 16Byte boundary, if not 16Byte aligned */ 6356 p += immhdrs; 6357 pad = 16 - (immhdrs & 0xf); 6358 bzero((void *)p, pad); 6359 6360 usgl = (void *)(p + pad); 6361 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 6362 V_ULPTX_NSGE(nsegs)); 6363 6364 sglist_init(&sg, nitems(segs), segs); 6365 for (; m0 != NULL; m0 = m0->m_next) { 6366 if (__predict_false(m0->m_len == 0)) 6367 continue; 6368 if (immhdrs >= m0->m_len) { 6369 immhdrs -= m0->m_len; 6370 continue; 6371 } 6372 if (m0->m_flags & M_EXTPG) 6373 sglist_append_mbuf_epg(&sg, m0, 6374 mtod(m0, vm_offset_t), m0->m_len); 6375 else 6376 sglist_append(&sg, mtod(m0, char *) + immhdrs, 6377 m0->m_len - immhdrs); 6378 immhdrs = 0; 6379 } 6380 MPASS(sg.sg_nseg == nsegs); 6381 6382 /* 6383 * Zero pad last 8B in case the WR doesn't end on a 16B 6384 * boundary. 6385 */ 6386 *(uint64_t *)((char *)wr + len16 * 16 - 8) = 0; 6387 6388 usgl->len0 = htobe32(segs[0].ss_len); 6389 usgl->addr0 = htobe64(segs[0].ss_paddr); 6390 for (i = 0; i < nsegs - 1; i++) { 6391 usgl->sge[i / 2].len[i & 1] = htobe32(segs[i + 1].ss_len); 6392 usgl->sge[i / 2].addr[i & 1] = htobe64(segs[i + 1].ss_paddr); 6393 } 6394 if (i & 1) 6395 usgl->sge[i / 2].len[1] = htobe32(0); 6396 } 6397 6398 } 6399 6400 static void 6401 ethofld_tx(struct cxgbe_rate_tag *cst) 6402 { 6403 struct mbuf *m; 6404 struct wrq_cookie cookie; 6405 int next_credits, compl; 6406 struct fw_eth_tx_eo_wr *wr; 6407 6408 mtx_assert(&cst->lock, MA_OWNED); 6409 6410 while ((m = mbufq_first(&cst->pending_tx)) != NULL) { 6411 M_ASSERTPKTHDR(m); 6412 6413 /* How many len16 credits do we need to send this mbuf. */ 6414 next_credits = mbuf_eo_len16(m); 6415 MPASS(next_credits > 0); 6416 if (next_credits > cst->tx_credits) { 6417 /* 6418 * Tx will make progress eventually because there is at 6419 * least one outstanding fw4_ack that will return 6420 * credits and kick the tx. 6421 */ 6422 MPASS(cst->ncompl > 0); 6423 return; 6424 } 6425 wr = start_wrq_wr(cst->eo_txq, next_credits, &cookie); 6426 if (__predict_false(wr == NULL)) { 6427 /* XXX: wishful thinking, not a real assertion. */ 6428 MPASS(cst->ncompl > 0); 6429 return; 6430 } 6431 cst->tx_credits -= next_credits; 6432 cst->tx_nocompl += next_credits; 6433 compl = cst->ncompl == 0 || cst->tx_nocompl >= cst->tx_total / 2; 6434 ETHER_BPF_MTAP(cst->com.com.ifp, m); 6435 write_ethofld_wr(cst, wr, m, compl); 6436 commit_wrq_wr(cst->eo_txq, wr, &cookie); 6437 if (compl) { 6438 cst->ncompl++; 6439 cst->tx_nocompl = 0; 6440 } 6441 (void) mbufq_dequeue(&cst->pending_tx); 6442 6443 /* 6444 * Drop the mbuf's reference on the tag now rather 6445 * than waiting until m_freem(). This ensures that 6446 * cxgbe_rate_tag_free gets called when the inp drops 6447 * its reference on the tag and there are no more 6448 * mbufs in the pending_tx queue and can flush any 6449 * pending requests. Otherwise if the last mbuf 6450 * doesn't request a completion the etid will never be 6451 * released. 6452 */ 6453 m->m_pkthdr.snd_tag = NULL; 6454 m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; 6455 m_snd_tag_rele(&cst->com.com); 6456 6457 mbufq_enqueue(&cst->pending_fwack, m); 6458 } 6459 } 6460 6461 int 6462 ethofld_transmit(struct ifnet *ifp, struct mbuf *m0) 6463 { 6464 struct cxgbe_rate_tag *cst; 6465 int rc; 6466 6467 MPASS(m0->m_nextpkt == NULL); 6468 MPASS(m0->m_pkthdr.csum_flags & CSUM_SND_TAG); 6469 MPASS(m0->m_pkthdr.snd_tag != NULL); 6470 cst = mst_to_crt(m0->m_pkthdr.snd_tag); 6471 6472 mtx_lock(&cst->lock); 6473 MPASS(cst->flags & EO_SND_TAG_REF); 6474 6475 if (__predict_false(cst->flags & EO_FLOWC_PENDING)) { 6476 struct vi_info *vi = ifp->if_softc; 6477 struct port_info *pi = vi->pi; 6478 struct adapter *sc = pi->adapter; 6479 const uint32_t rss_mask = vi->rss_size - 1; 6480 uint32_t rss_hash; 6481 6482 cst->eo_txq = &sc->sge.ofld_txq[vi->first_ofld_txq]; 6483 if (M_HASHTYPE_ISHASH(m0)) 6484 rss_hash = m0->m_pkthdr.flowid; 6485 else 6486 rss_hash = arc4random(); 6487 /* We assume RSS hashing */ 6488 cst->iqid = vi->rss[rss_hash & rss_mask]; 6489 cst->eo_txq += rss_hash % vi->nofldtxq; 6490 rc = send_etid_flowc_wr(cst, pi, vi); 6491 if (rc != 0) 6492 goto done; 6493 } 6494 6495 if (__predict_false(cst->plen + m0->m_pkthdr.len > eo_max_backlog)) { 6496 rc = ENOBUFS; 6497 goto done; 6498 } 6499 6500 mbufq_enqueue(&cst->pending_tx, m0); 6501 cst->plen += m0->m_pkthdr.len; 6502 6503 /* 6504 * Hold an extra reference on the tag while generating work 6505 * requests to ensure that we don't try to free the tag during 6506 * ethofld_tx() in case we are sending the final mbuf after 6507 * the inp was freed. 6508 */ 6509 m_snd_tag_ref(&cst->com.com); 6510 ethofld_tx(cst); 6511 mtx_unlock(&cst->lock); 6512 m_snd_tag_rele(&cst->com.com); 6513 return (0); 6514 6515 done: 6516 mtx_unlock(&cst->lock); 6517 if (__predict_false(rc != 0)) 6518 m_freem(m0); 6519 return (rc); 6520 } 6521 6522 static int 6523 ethofld_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) 6524 { 6525 struct adapter *sc = iq->adapter; 6526 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 6527 struct mbuf *m; 6528 u_int etid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 6529 struct cxgbe_rate_tag *cst; 6530 uint8_t credits = cpl->credits; 6531 6532 cst = lookup_etid(sc, etid); 6533 mtx_lock(&cst->lock); 6534 if (__predict_false(cst->flags & EO_FLOWC_RPL_PENDING)) { 6535 MPASS(credits >= ETID_FLOWC_LEN16); 6536 credits -= ETID_FLOWC_LEN16; 6537 cst->flags &= ~EO_FLOWC_RPL_PENDING; 6538 } 6539 6540 KASSERT(cst->ncompl > 0, 6541 ("%s: etid %u (%p) wasn't expecting completion.", 6542 __func__, etid, cst)); 6543 cst->ncompl--; 6544 6545 while (credits > 0) { 6546 m = mbufq_dequeue(&cst->pending_fwack); 6547 if (__predict_false(m == NULL)) { 6548 /* 6549 * The remaining credits are for the final flush that 6550 * was issued when the tag was freed by the kernel. 6551 */ 6552 MPASS((cst->flags & 6553 (EO_FLUSH_RPL_PENDING | EO_SND_TAG_REF)) == 6554 EO_FLUSH_RPL_PENDING); 6555 MPASS(credits == ETID_FLUSH_LEN16); 6556 MPASS(cst->tx_credits + cpl->credits == cst->tx_total); 6557 MPASS(cst->ncompl == 0); 6558 6559 cst->flags &= ~EO_FLUSH_RPL_PENDING; 6560 cst->tx_credits += cpl->credits; 6561 cxgbe_rate_tag_free_locked(cst); 6562 return (0); /* cst is gone. */ 6563 } 6564 KASSERT(m != NULL, 6565 ("%s: too many credits (%u, %u)", __func__, cpl->credits, 6566 credits)); 6567 KASSERT(credits >= mbuf_eo_len16(m), 6568 ("%s: too few credits (%u, %u, %u)", __func__, 6569 cpl->credits, credits, mbuf_eo_len16(m))); 6570 credits -= mbuf_eo_len16(m); 6571 cst->plen -= m->m_pkthdr.len; 6572 m_freem(m); 6573 } 6574 6575 cst->tx_credits += cpl->credits; 6576 MPASS(cst->tx_credits <= cst->tx_total); 6577 6578 if (cst->flags & EO_SND_TAG_REF) { 6579 /* 6580 * As with ethofld_transmit(), hold an extra reference 6581 * so that the tag is stable across ethold_tx(). 6582 */ 6583 m_snd_tag_ref(&cst->com.com); 6584 m = mbufq_first(&cst->pending_tx); 6585 if (m != NULL && cst->tx_credits >= mbuf_eo_len16(m)) 6586 ethofld_tx(cst); 6587 mtx_unlock(&cst->lock); 6588 m_snd_tag_rele(&cst->com.com); 6589 } else { 6590 /* 6591 * There shouldn't be any pending packets if the tag 6592 * was freed by the kernel since any pending packet 6593 * should hold a reference to the tag. 6594 */ 6595 MPASS(mbufq_first(&cst->pending_tx) == NULL); 6596 mtx_unlock(&cst->lock); 6597 } 6598 6599 return (0); 6600 } 6601 #endif 6602