1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 Chelsio Communications, Inc. 5 * All rights reserved. 6 * Written by: Navdeep Parhar <np@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 #include "opt_kern_tls.h" 36 #include "opt_ratelimit.h" 37 38 #include <sys/types.h> 39 #include <sys/eventhandler.h> 40 #include <sys/mbuf.h> 41 #include <sys/socket.h> 42 #include <sys/kernel.h> 43 #include <sys/ktls.h> 44 #include <sys/malloc.h> 45 #include <sys/queue.h> 46 #include <sys/sbuf.h> 47 #include <sys/taskqueue.h> 48 #include <sys/time.h> 49 #include <sys/sglist.h> 50 #include <sys/sysctl.h> 51 #include <sys/smp.h> 52 #include <sys/socketvar.h> 53 #include <sys/counter.h> 54 #include <net/bpf.h> 55 #include <net/ethernet.h> 56 #include <net/if.h> 57 #include <net/if_vlan_var.h> 58 #include <net/if_vxlan.h> 59 #include <netinet/in.h> 60 #include <netinet/ip.h> 61 #include <netinet/ip6.h> 62 #include <netinet/tcp.h> 63 #include <netinet/udp.h> 64 #include <machine/in_cksum.h> 65 #include <machine/md_var.h> 66 #include <vm/vm.h> 67 #include <vm/pmap.h> 68 #ifdef DEV_NETMAP 69 #include <machine/bus.h> 70 #include <sys/selinfo.h> 71 #include <net/if_var.h> 72 #include <net/netmap.h> 73 #include <dev/netmap/netmap_kern.h> 74 #endif 75 76 #include "common/common.h" 77 #include "common/t4_regs.h" 78 #include "common/t4_regs_values.h" 79 #include "common/t4_msg.h" 80 #include "t4_l2t.h" 81 #include "t4_mp_ring.h" 82 83 #ifdef T4_PKT_TIMESTAMP 84 #define RX_COPY_THRESHOLD (MINCLSIZE - 8) 85 #else 86 #define RX_COPY_THRESHOLD MINCLSIZE 87 #endif 88 89 /* Internal mbuf flags stored in PH_loc.eight[1]. */ 90 #define MC_NOMAP 0x01 91 #define MC_RAW_WR 0x02 92 #define MC_TLS 0x04 93 94 /* 95 * Ethernet frames are DMA'd at this byte offset into the freelist buffer. 96 * 0-7 are valid values. 97 */ 98 static int fl_pktshift = 0; 99 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pktshift, CTLFLAG_RDTUN, &fl_pktshift, 0, 100 "payload DMA offset in rx buffer (bytes)"); 101 102 /* 103 * Pad ethernet payload up to this boundary. 104 * -1: driver should figure out a good value. 105 * 0: disable padding. 106 * Any power of 2 from 32 to 4096 (both inclusive) is also a valid value. 107 */ 108 int fl_pad = -1; 109 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pad, CTLFLAG_RDTUN, &fl_pad, 0, 110 "payload pad boundary (bytes)"); 111 112 /* 113 * Status page length. 114 * -1: driver should figure out a good value. 115 * 64 or 128 are the only other valid values. 116 */ 117 static int spg_len = -1; 118 SYSCTL_INT(_hw_cxgbe, OID_AUTO, spg_len, CTLFLAG_RDTUN, &spg_len, 0, 119 "status page size (bytes)"); 120 121 /* 122 * Congestion drops. 123 * -1: no congestion feedback (not recommended). 124 * 0: backpressure the channel instead of dropping packets right away. 125 * 1: no backpressure, drop packets for the congested queue immediately. 126 */ 127 static int cong_drop = 0; 128 SYSCTL_INT(_hw_cxgbe, OID_AUTO, cong_drop, CTLFLAG_RDTUN, &cong_drop, 0, 129 "Congestion control for RX queues (0 = backpressure, 1 = drop"); 130 131 /* 132 * Deliver multiple frames in the same free list buffer if they fit. 133 * -1: let the driver decide whether to enable buffer packing or not. 134 * 0: disable buffer packing. 135 * 1: enable buffer packing. 136 */ 137 static int buffer_packing = -1; 138 SYSCTL_INT(_hw_cxgbe, OID_AUTO, buffer_packing, CTLFLAG_RDTUN, &buffer_packing, 139 0, "Enable buffer packing"); 140 141 /* 142 * Start next frame in a packed buffer at this boundary. 143 * -1: driver should figure out a good value. 144 * T4: driver will ignore this and use the same value as fl_pad above. 145 * T5: 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value. 146 */ 147 static int fl_pack = -1; 148 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pack, CTLFLAG_RDTUN, &fl_pack, 0, 149 "payload pack boundary (bytes)"); 150 151 /* 152 * Largest rx cluster size that the driver is allowed to allocate. 153 */ 154 static int largest_rx_cluster = MJUM16BYTES; 155 SYSCTL_INT(_hw_cxgbe, OID_AUTO, largest_rx_cluster, CTLFLAG_RDTUN, 156 &largest_rx_cluster, 0, "Largest rx cluster (bytes)"); 157 158 /* 159 * Size of cluster allocation that's most likely to succeed. The driver will 160 * fall back to this size if it fails to allocate clusters larger than this. 161 */ 162 static int safest_rx_cluster = PAGE_SIZE; 163 SYSCTL_INT(_hw_cxgbe, OID_AUTO, safest_rx_cluster, CTLFLAG_RDTUN, 164 &safest_rx_cluster, 0, "Safe rx cluster (bytes)"); 165 166 #ifdef RATELIMIT 167 /* 168 * Knob to control TCP timestamp rewriting, and the granularity of the tick used 169 * for rewriting. -1 and 0-3 are all valid values. 170 * -1: hardware should leave the TCP timestamps alone. 171 * 0: 1ms 172 * 1: 100us 173 * 2: 10us 174 * 3: 1us 175 */ 176 static int tsclk = -1; 177 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tsclk, CTLFLAG_RDTUN, &tsclk, 0, 178 "Control TCP timestamp rewriting when using pacing"); 179 180 static int eo_max_backlog = 1024 * 1024; 181 SYSCTL_INT(_hw_cxgbe, OID_AUTO, eo_max_backlog, CTLFLAG_RDTUN, &eo_max_backlog, 182 0, "Maximum backlog of ratelimited data per flow"); 183 #endif 184 185 /* 186 * The interrupt holdoff timers are multiplied by this value on T6+. 187 * 1 and 3-17 (both inclusive) are legal values. 188 */ 189 static int tscale = 1; 190 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tscale, CTLFLAG_RDTUN, &tscale, 0, 191 "Interrupt holdoff timer scale on T6+"); 192 193 /* 194 * Number of LRO entries in the lro_ctrl structure per rx queue. 195 */ 196 static int lro_entries = TCP_LRO_ENTRIES; 197 SYSCTL_INT(_hw_cxgbe, OID_AUTO, lro_entries, CTLFLAG_RDTUN, &lro_entries, 0, 198 "Number of LRO entries per RX queue"); 199 200 /* 201 * This enables presorting of frames before they're fed into tcp_lro_rx. 202 */ 203 static int lro_mbufs = 0; 204 SYSCTL_INT(_hw_cxgbe, OID_AUTO, lro_mbufs, CTLFLAG_RDTUN, &lro_mbufs, 0, 205 "Enable presorting of LRO frames"); 206 207 static counter_u64_t pullups; 208 SYSCTL_COUNTER_U64(_hw_cxgbe, OID_AUTO, pullups, CTLFLAG_RD, &pullups, 209 "Number of mbuf pullups performed"); 210 211 static counter_u64_t defrags; 212 SYSCTL_COUNTER_U64(_hw_cxgbe, OID_AUTO, defrags, CTLFLAG_RD, &defrags, 213 "Number of mbuf defrags performed"); 214 215 216 static int service_iq(struct sge_iq *, int); 217 static int service_iq_fl(struct sge_iq *, int); 218 static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t); 219 static int eth_rx(struct adapter *, struct sge_rxq *, const struct iq_desc *, 220 u_int); 221 static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int); 222 static inline void init_fl(struct adapter *, struct sge_fl *, int, int, char *); 223 static inline void init_eq(struct adapter *, struct sge_eq *, int, int, uint8_t, 224 uint16_t, char *); 225 static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, 226 bus_addr_t *, void **); 227 static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, 228 void *); 229 static int alloc_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *, 230 int, int); 231 static int free_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *); 232 static void add_iq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 233 struct sge_iq *); 234 static void add_fl_sysctls(struct adapter *, struct sysctl_ctx_list *, 235 struct sysctl_oid *, struct sge_fl *); 236 static int alloc_fwq(struct adapter *); 237 static int free_fwq(struct adapter *); 238 static int alloc_ctrlq(struct adapter *, struct sge_wrq *, int, 239 struct sysctl_oid *); 240 static int alloc_rxq(struct vi_info *, struct sge_rxq *, int, int, 241 struct sysctl_oid *); 242 static int free_rxq(struct vi_info *, struct sge_rxq *); 243 #ifdef TCP_OFFLOAD 244 static int alloc_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *, int, int, 245 struct sysctl_oid *); 246 static int free_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *); 247 #endif 248 #ifdef DEV_NETMAP 249 static int alloc_nm_rxq(struct vi_info *, struct sge_nm_rxq *, int, int, 250 struct sysctl_oid *); 251 static int free_nm_rxq(struct vi_info *, struct sge_nm_rxq *); 252 static int alloc_nm_txq(struct vi_info *, struct sge_nm_txq *, int, int, 253 struct sysctl_oid *); 254 static int free_nm_txq(struct vi_info *, struct sge_nm_txq *); 255 #endif 256 static int ctrl_eq_alloc(struct adapter *, struct sge_eq *); 257 static int eth_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); 258 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 259 static int ofld_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); 260 #endif 261 static int alloc_eq(struct adapter *, struct vi_info *, struct sge_eq *); 262 static int free_eq(struct adapter *, struct sge_eq *); 263 static int alloc_wrq(struct adapter *, struct vi_info *, struct sge_wrq *, 264 struct sysctl_oid *); 265 static int free_wrq(struct adapter *, struct sge_wrq *); 266 static int alloc_txq(struct vi_info *, struct sge_txq *, int, 267 struct sysctl_oid *); 268 static int free_txq(struct vi_info *, struct sge_txq *); 269 static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); 270 static inline void ring_fl_db(struct adapter *, struct sge_fl *); 271 static int refill_fl(struct adapter *, struct sge_fl *, int); 272 static void refill_sfl(void *); 273 static int alloc_fl_sdesc(struct sge_fl *); 274 static void free_fl_sdesc(struct adapter *, struct sge_fl *); 275 static int find_refill_source(struct adapter *, int, bool); 276 static void add_fl_to_sfl(struct adapter *, struct sge_fl *); 277 278 static inline void get_pkt_gl(struct mbuf *, struct sglist *); 279 static inline u_int txpkt_len16(u_int, const u_int); 280 static inline u_int txpkt_vm_len16(u_int, const u_int); 281 static inline void calculate_mbuf_len16(struct mbuf *, bool); 282 static inline u_int txpkts0_len16(u_int); 283 static inline u_int txpkts1_len16(void); 284 static u_int write_raw_wr(struct sge_txq *, void *, struct mbuf *, u_int); 285 static u_int write_txpkt_wr(struct adapter *, struct sge_txq *, struct mbuf *, 286 u_int); 287 static u_int write_txpkt_vm_wr(struct adapter *, struct sge_txq *, 288 struct mbuf *); 289 static int add_to_txpkts_vf(struct adapter *, struct sge_txq *, struct mbuf *, 290 int, bool *); 291 static int add_to_txpkts_pf(struct adapter *, struct sge_txq *, struct mbuf *, 292 int, bool *); 293 static u_int write_txpkts_wr(struct adapter *, struct sge_txq *); 294 static u_int write_txpkts_vm_wr(struct adapter *, struct sge_txq *); 295 static void write_gl_to_txd(struct sge_txq *, struct mbuf *, caddr_t *, int); 296 static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int); 297 static inline void ring_eq_db(struct adapter *, struct sge_eq *, u_int); 298 static inline uint16_t read_hw_cidx(struct sge_eq *); 299 static inline u_int reclaimable_tx_desc(struct sge_eq *); 300 static inline u_int total_available_tx_desc(struct sge_eq *); 301 static u_int reclaim_tx_descs(struct sge_txq *, u_int); 302 static void tx_reclaim(void *, int); 303 static __be64 get_flit(struct sglist_seg *, int, int); 304 static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *, 305 struct mbuf *); 306 static int handle_fw_msg(struct sge_iq *, const struct rss_header *, 307 struct mbuf *); 308 static int t4_handle_wrerr_rpl(struct adapter *, const __be64 *); 309 static void wrq_tx_drain(void *, int); 310 static void drain_wrq_wr_list(struct adapter *, struct sge_wrq *); 311 312 static int sysctl_uint16(SYSCTL_HANDLER_ARGS); 313 static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS); 314 #ifdef RATELIMIT 315 static inline u_int txpkt_eo_len16(u_int, u_int, u_int); 316 static int ethofld_fw4_ack(struct sge_iq *, const struct rss_header *, 317 struct mbuf *); 318 #endif 319 320 static counter_u64_t extfree_refs; 321 static counter_u64_t extfree_rels; 322 323 an_handler_t t4_an_handler; 324 fw_msg_handler_t t4_fw_msg_handler[NUM_FW6_TYPES]; 325 cpl_handler_t t4_cpl_handler[NUM_CPL_CMDS]; 326 cpl_handler_t set_tcb_rpl_handlers[NUM_CPL_COOKIES]; 327 cpl_handler_t l2t_write_rpl_handlers[NUM_CPL_COOKIES]; 328 cpl_handler_t act_open_rpl_handlers[NUM_CPL_COOKIES]; 329 cpl_handler_t abort_rpl_rss_handlers[NUM_CPL_COOKIES]; 330 cpl_handler_t fw4_ack_handlers[NUM_CPL_COOKIES]; 331 332 void 333 t4_register_an_handler(an_handler_t h) 334 { 335 uintptr_t *loc; 336 337 MPASS(h == NULL || t4_an_handler == NULL); 338 339 loc = (uintptr_t *)&t4_an_handler; 340 atomic_store_rel_ptr(loc, (uintptr_t)h); 341 } 342 343 void 344 t4_register_fw_msg_handler(int type, fw_msg_handler_t h) 345 { 346 uintptr_t *loc; 347 348 MPASS(type < nitems(t4_fw_msg_handler)); 349 MPASS(h == NULL || t4_fw_msg_handler[type] == NULL); 350 /* 351 * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL 352 * handler dispatch table. Reject any attempt to install a handler for 353 * this subtype. 354 */ 355 MPASS(type != FW_TYPE_RSSCPL); 356 MPASS(type != FW6_TYPE_RSSCPL); 357 358 loc = (uintptr_t *)&t4_fw_msg_handler[type]; 359 atomic_store_rel_ptr(loc, (uintptr_t)h); 360 } 361 362 void 363 t4_register_cpl_handler(int opcode, cpl_handler_t h) 364 { 365 uintptr_t *loc; 366 367 MPASS(opcode < nitems(t4_cpl_handler)); 368 MPASS(h == NULL || t4_cpl_handler[opcode] == NULL); 369 370 loc = (uintptr_t *)&t4_cpl_handler[opcode]; 371 atomic_store_rel_ptr(loc, (uintptr_t)h); 372 } 373 374 static int 375 set_tcb_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 376 struct mbuf *m) 377 { 378 const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1); 379 u_int tid; 380 int cookie; 381 382 MPASS(m == NULL); 383 384 tid = GET_TID(cpl); 385 if (is_hpftid(iq->adapter, tid) || is_ftid(iq->adapter, tid)) { 386 /* 387 * The return code for filter-write is put in the CPL cookie so 388 * we have to rely on the hardware tid (is_ftid) to determine 389 * that this is a response to a filter. 390 */ 391 cookie = CPL_COOKIE_FILTER; 392 } else { 393 cookie = G_COOKIE(cpl->cookie); 394 } 395 MPASS(cookie > CPL_COOKIE_RESERVED); 396 MPASS(cookie < nitems(set_tcb_rpl_handlers)); 397 398 return (set_tcb_rpl_handlers[cookie](iq, rss, m)); 399 } 400 401 static int 402 l2t_write_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 403 struct mbuf *m) 404 { 405 const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); 406 unsigned int cookie; 407 408 MPASS(m == NULL); 409 410 cookie = GET_TID(rpl) & F_SYNC_WR ? CPL_COOKIE_TOM : CPL_COOKIE_FILTER; 411 return (l2t_write_rpl_handlers[cookie](iq, rss, m)); 412 } 413 414 static int 415 act_open_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 416 struct mbuf *m) 417 { 418 const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1); 419 u_int cookie = G_TID_COOKIE(G_AOPEN_ATID(be32toh(cpl->atid_status))); 420 421 MPASS(m == NULL); 422 MPASS(cookie != CPL_COOKIE_RESERVED); 423 424 return (act_open_rpl_handlers[cookie](iq, rss, m)); 425 } 426 427 static int 428 abort_rpl_rss_handler(struct sge_iq *iq, const struct rss_header *rss, 429 struct mbuf *m) 430 { 431 struct adapter *sc = iq->adapter; 432 u_int cookie; 433 434 MPASS(m == NULL); 435 if (is_hashfilter(sc)) 436 cookie = CPL_COOKIE_HASHFILTER; 437 else 438 cookie = CPL_COOKIE_TOM; 439 440 return (abort_rpl_rss_handlers[cookie](iq, rss, m)); 441 } 442 443 static int 444 fw4_ack_handler(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 445 { 446 struct adapter *sc = iq->adapter; 447 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 448 unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 449 u_int cookie; 450 451 MPASS(m == NULL); 452 if (is_etid(sc, tid)) 453 cookie = CPL_COOKIE_ETHOFLD; 454 else 455 cookie = CPL_COOKIE_TOM; 456 457 return (fw4_ack_handlers[cookie](iq, rss, m)); 458 } 459 460 static void 461 t4_init_shared_cpl_handlers(void) 462 { 463 464 t4_register_cpl_handler(CPL_SET_TCB_RPL, set_tcb_rpl_handler); 465 t4_register_cpl_handler(CPL_L2T_WRITE_RPL, l2t_write_rpl_handler); 466 t4_register_cpl_handler(CPL_ACT_OPEN_RPL, act_open_rpl_handler); 467 t4_register_cpl_handler(CPL_ABORT_RPL_RSS, abort_rpl_rss_handler); 468 t4_register_cpl_handler(CPL_FW4_ACK, fw4_ack_handler); 469 } 470 471 void 472 t4_register_shared_cpl_handler(int opcode, cpl_handler_t h, int cookie) 473 { 474 uintptr_t *loc; 475 476 MPASS(opcode < nitems(t4_cpl_handler)); 477 MPASS(cookie > CPL_COOKIE_RESERVED); 478 MPASS(cookie < NUM_CPL_COOKIES); 479 MPASS(t4_cpl_handler[opcode] != NULL); 480 481 switch (opcode) { 482 case CPL_SET_TCB_RPL: 483 loc = (uintptr_t *)&set_tcb_rpl_handlers[cookie]; 484 break; 485 case CPL_L2T_WRITE_RPL: 486 loc = (uintptr_t *)&l2t_write_rpl_handlers[cookie]; 487 break; 488 case CPL_ACT_OPEN_RPL: 489 loc = (uintptr_t *)&act_open_rpl_handlers[cookie]; 490 break; 491 case CPL_ABORT_RPL_RSS: 492 loc = (uintptr_t *)&abort_rpl_rss_handlers[cookie]; 493 break; 494 case CPL_FW4_ACK: 495 loc = (uintptr_t *)&fw4_ack_handlers[cookie]; 496 break; 497 default: 498 MPASS(0); 499 return; 500 } 501 MPASS(h == NULL || *loc == (uintptr_t)NULL); 502 atomic_store_rel_ptr(loc, (uintptr_t)h); 503 } 504 505 /* 506 * Called on MOD_LOAD. Validates and calculates the SGE tunables. 507 */ 508 void 509 t4_sge_modload(void) 510 { 511 512 if (fl_pktshift < 0 || fl_pktshift > 7) { 513 printf("Invalid hw.cxgbe.fl_pktshift value (%d)," 514 " using 0 instead.\n", fl_pktshift); 515 fl_pktshift = 0; 516 } 517 518 if (spg_len != 64 && spg_len != 128) { 519 int len; 520 521 #if defined(__i386__) || defined(__amd64__) 522 len = cpu_clflush_line_size > 64 ? 128 : 64; 523 #else 524 len = 64; 525 #endif 526 if (spg_len != -1) { 527 printf("Invalid hw.cxgbe.spg_len value (%d)," 528 " using %d instead.\n", spg_len, len); 529 } 530 spg_len = len; 531 } 532 533 if (cong_drop < -1 || cong_drop > 1) { 534 printf("Invalid hw.cxgbe.cong_drop value (%d)," 535 " using 0 instead.\n", cong_drop); 536 cong_drop = 0; 537 } 538 539 if (tscale != 1 && (tscale < 3 || tscale > 17)) { 540 printf("Invalid hw.cxgbe.tscale value (%d)," 541 " using 1 instead.\n", tscale); 542 tscale = 1; 543 } 544 545 if (largest_rx_cluster != MCLBYTES && 546 #if MJUMPAGESIZE != MCLBYTES 547 largest_rx_cluster != MJUMPAGESIZE && 548 #endif 549 largest_rx_cluster != MJUM9BYTES && 550 largest_rx_cluster != MJUM16BYTES) { 551 printf("Invalid hw.cxgbe.largest_rx_cluster value (%d)," 552 " using %d instead.\n", largest_rx_cluster, MJUM16BYTES); 553 largest_rx_cluster = MJUM16BYTES; 554 } 555 556 if (safest_rx_cluster != MCLBYTES && 557 #if MJUMPAGESIZE != MCLBYTES 558 safest_rx_cluster != MJUMPAGESIZE && 559 #endif 560 safest_rx_cluster != MJUM9BYTES && 561 safest_rx_cluster != MJUM16BYTES) { 562 printf("Invalid hw.cxgbe.safest_rx_cluster value (%d)," 563 " using %d instead.\n", safest_rx_cluster, MJUMPAGESIZE); 564 safest_rx_cluster = MJUMPAGESIZE; 565 } 566 567 extfree_refs = counter_u64_alloc(M_WAITOK); 568 extfree_rels = counter_u64_alloc(M_WAITOK); 569 pullups = counter_u64_alloc(M_WAITOK); 570 defrags = counter_u64_alloc(M_WAITOK); 571 counter_u64_zero(extfree_refs); 572 counter_u64_zero(extfree_rels); 573 counter_u64_zero(pullups); 574 counter_u64_zero(defrags); 575 576 t4_init_shared_cpl_handlers(); 577 t4_register_cpl_handler(CPL_FW4_MSG, handle_fw_msg); 578 t4_register_cpl_handler(CPL_FW6_MSG, handle_fw_msg); 579 t4_register_cpl_handler(CPL_SGE_EGR_UPDATE, handle_sge_egr_update); 580 #ifdef RATELIMIT 581 t4_register_shared_cpl_handler(CPL_FW4_ACK, ethofld_fw4_ack, 582 CPL_COOKIE_ETHOFLD); 583 #endif 584 t4_register_fw_msg_handler(FW6_TYPE_CMD_RPL, t4_handle_fw_rpl); 585 t4_register_fw_msg_handler(FW6_TYPE_WRERR_RPL, t4_handle_wrerr_rpl); 586 } 587 588 void 589 t4_sge_modunload(void) 590 { 591 592 counter_u64_free(extfree_refs); 593 counter_u64_free(extfree_rels); 594 counter_u64_free(pullups); 595 counter_u64_free(defrags); 596 } 597 598 uint64_t 599 t4_sge_extfree_refs(void) 600 { 601 uint64_t refs, rels; 602 603 rels = counter_u64_fetch(extfree_rels); 604 refs = counter_u64_fetch(extfree_refs); 605 606 return (refs - rels); 607 } 608 609 /* max 4096 */ 610 #define MAX_PACK_BOUNDARY 512 611 612 static inline void 613 setup_pad_and_pack_boundaries(struct adapter *sc) 614 { 615 uint32_t v, m; 616 int pad, pack, pad_shift; 617 618 pad_shift = chip_id(sc) > CHELSIO_T5 ? X_T6_INGPADBOUNDARY_SHIFT : 619 X_INGPADBOUNDARY_SHIFT; 620 pad = fl_pad; 621 if (fl_pad < (1 << pad_shift) || 622 fl_pad > (1 << (pad_shift + M_INGPADBOUNDARY)) || 623 !powerof2(fl_pad)) { 624 /* 625 * If there is any chance that we might use buffer packing and 626 * the chip is a T4, then pick 64 as the pad/pack boundary. Set 627 * it to the minimum allowed in all other cases. 628 */ 629 pad = is_t4(sc) && buffer_packing ? 64 : 1 << pad_shift; 630 631 /* 632 * For fl_pad = 0 we'll still write a reasonable value to the 633 * register but all the freelists will opt out of padding. 634 * We'll complain here only if the user tried to set it to a 635 * value greater than 0 that was invalid. 636 */ 637 if (fl_pad > 0) { 638 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pad value" 639 " (%d), using %d instead.\n", fl_pad, pad); 640 } 641 } 642 m = V_INGPADBOUNDARY(M_INGPADBOUNDARY); 643 v = V_INGPADBOUNDARY(ilog2(pad) - pad_shift); 644 t4_set_reg_field(sc, A_SGE_CONTROL, m, v); 645 646 if (is_t4(sc)) { 647 if (fl_pack != -1 && fl_pack != pad) { 648 /* Complain but carry on. */ 649 device_printf(sc->dev, "hw.cxgbe.fl_pack (%d) ignored," 650 " using %d instead.\n", fl_pack, pad); 651 } 652 return; 653 } 654 655 pack = fl_pack; 656 if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 || 657 !powerof2(fl_pack)) { 658 if (sc->params.pci.mps > MAX_PACK_BOUNDARY) 659 pack = MAX_PACK_BOUNDARY; 660 else 661 pack = max(sc->params.pci.mps, CACHE_LINE_SIZE); 662 MPASS(powerof2(pack)); 663 if (pack < 16) 664 pack = 16; 665 if (pack == 32) 666 pack = 64; 667 if (pack > 4096) 668 pack = 4096; 669 if (fl_pack != -1) { 670 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pack value" 671 " (%d), using %d instead.\n", fl_pack, pack); 672 } 673 } 674 m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); 675 if (pack == 16) 676 v = V_INGPACKBOUNDARY(0); 677 else 678 v = V_INGPACKBOUNDARY(ilog2(pack) - 5); 679 680 MPASS(!is_t4(sc)); /* T4 doesn't have SGE_CONTROL2 */ 681 t4_set_reg_field(sc, A_SGE_CONTROL2, m, v); 682 } 683 684 /* 685 * adap->params.vpd.cclk must be set up before this is called. 686 */ 687 void 688 t4_tweak_chip_settings(struct adapter *sc) 689 { 690 int i, reg; 691 uint32_t v, m; 692 int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200}; 693 int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk; 694 int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ 695 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 696 static int sw_buf_sizes[] = { 697 MCLBYTES, 698 #if MJUMPAGESIZE != MCLBYTES 699 MJUMPAGESIZE, 700 #endif 701 MJUM9BYTES, 702 MJUM16BYTES 703 }; 704 705 KASSERT(sc->flags & MASTER_PF, 706 ("%s: trying to change chip settings when not master.", __func__)); 707 708 m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE; 709 v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE | 710 V_EGRSTATUSPAGESIZE(spg_len == 128); 711 t4_set_reg_field(sc, A_SGE_CONTROL, m, v); 712 713 setup_pad_and_pack_boundaries(sc); 714 715 v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | 716 V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | 717 V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) | 718 V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) | 719 V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) | 720 V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) | 721 V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) | 722 V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10); 723 t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v); 724 725 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0, 4096); 726 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE1, 65536); 727 reg = A_SGE_FL_BUFFER_SIZE2; 728 for (i = 0; i < nitems(sw_buf_sizes); i++) { 729 MPASS(reg <= A_SGE_FL_BUFFER_SIZE15); 730 t4_write_reg(sc, reg, sw_buf_sizes[i]); 731 reg += 4; 732 MPASS(reg <= A_SGE_FL_BUFFER_SIZE15); 733 t4_write_reg(sc, reg, sw_buf_sizes[i] - CL_METADATA_SIZE); 734 reg += 4; 735 } 736 737 v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) | 738 V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]); 739 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v); 740 741 KASSERT(intr_timer[0] <= timer_max, 742 ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0], 743 timer_max)); 744 for (i = 1; i < nitems(intr_timer); i++) { 745 KASSERT(intr_timer[i] >= intr_timer[i - 1], 746 ("%s: timers not listed in increasing order (%d)", 747 __func__, i)); 748 749 while (intr_timer[i] > timer_max) { 750 if (i == nitems(intr_timer) - 1) { 751 intr_timer[i] = timer_max; 752 break; 753 } 754 intr_timer[i] += intr_timer[i - 1]; 755 intr_timer[i] /= 2; 756 } 757 } 758 759 v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) | 760 V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1])); 761 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v); 762 v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) | 763 V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3])); 764 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v); 765 v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) | 766 V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5])); 767 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v); 768 769 if (chip_id(sc) >= CHELSIO_T6) { 770 m = V_TSCALE(M_TSCALE); 771 if (tscale == 1) 772 v = 0; 773 else 774 v = V_TSCALE(tscale - 2); 775 t4_set_reg_field(sc, A_SGE_ITP_CONTROL, m, v); 776 777 if (sc->debug_flags & DF_DISABLE_TCB_CACHE) { 778 m = V_RDTHRESHOLD(M_RDTHRESHOLD) | F_WRTHRTHRESHEN | 779 V_WRTHRTHRESH(M_WRTHRTHRESH); 780 t4_tp_pio_read(sc, &v, 1, A_TP_CMM_CONFIG, 1); 781 v &= ~m; 782 v |= V_RDTHRESHOLD(1) | F_WRTHRTHRESHEN | 783 V_WRTHRTHRESH(16); 784 t4_tp_pio_write(sc, &v, 1, A_TP_CMM_CONFIG, 1); 785 } 786 } 787 788 /* 4K, 16K, 64K, 256K DDP "page sizes" for TDDP */ 789 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 790 t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v); 791 792 /* 793 * 4K, 8K, 16K, 64K DDP "page sizes" for iSCSI DDP. These have been 794 * chosen with MAXPHYS = 128K in mind. The largest DDP buffer that we 795 * may have to deal with is MAXPHYS + 1 page. 796 */ 797 v = V_HPZ0(0) | V_HPZ1(1) | V_HPZ2(2) | V_HPZ3(4); 798 t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, v); 799 800 /* We use multiple DDP page sizes both in plain-TOE and ISCSI modes. */ 801 m = v = F_TDDPTAGTCB | F_ISCSITAGTCB; 802 t4_set_reg_field(sc, A_ULP_RX_CTL, m, v); 803 804 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 805 F_RESETDDPOFFSET; 806 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 807 t4_set_reg_field(sc, A_TP_PARA_REG5, m, v); 808 } 809 810 /* 811 * SGE wants the buffer to be at least 64B and then a multiple of 16. Its 812 * address mut be 16B aligned. If padding is in use the buffer's start and end 813 * need to be aligned to the pad boundary as well. We'll just make sure that 814 * the size is a multiple of the pad boundary here, it is up to the buffer 815 * allocation code to make sure the start of the buffer is aligned. 816 */ 817 static inline int 818 hwsz_ok(struct adapter *sc, int hwsz) 819 { 820 int mask = fl_pad ? sc->params.sge.pad_boundary - 1 : 16 - 1; 821 822 return (hwsz >= 64 && (hwsz & mask) == 0); 823 } 824 825 /* 826 * XXX: driver really should be able to deal with unexpected settings. 827 */ 828 int 829 t4_read_chip_settings(struct adapter *sc) 830 { 831 struct sge *s = &sc->sge; 832 struct sge_params *sp = &sc->params.sge; 833 int i, j, n, rc = 0; 834 uint32_t m, v, r; 835 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 836 static int sw_buf_sizes[] = { /* Sorted by size */ 837 MCLBYTES, 838 #if MJUMPAGESIZE != MCLBYTES 839 MJUMPAGESIZE, 840 #endif 841 MJUM9BYTES, 842 MJUM16BYTES 843 }; 844 struct rx_buf_info *rxb; 845 846 m = F_RXPKTCPLMODE; 847 v = F_RXPKTCPLMODE; 848 r = sc->params.sge.sge_control; 849 if ((r & m) != v) { 850 device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r); 851 rc = EINVAL; 852 } 853 854 /* 855 * If this changes then every single use of PAGE_SHIFT in the driver 856 * needs to be carefully reviewed for PAGE_SHIFT vs sp->page_shift. 857 */ 858 if (sp->page_shift != PAGE_SHIFT) { 859 device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r); 860 rc = EINVAL; 861 } 862 863 s->safe_zidx = -1; 864 rxb = &s->rx_buf_info[0]; 865 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 866 rxb->size1 = sw_buf_sizes[i]; 867 rxb->zone = m_getzone(rxb->size1); 868 rxb->type = m_gettype(rxb->size1); 869 rxb->size2 = 0; 870 rxb->hwidx1 = -1; 871 rxb->hwidx2 = -1; 872 for (j = 0; j < SGE_FLBUF_SIZES; j++) { 873 int hwsize = sp->sge_fl_buffer_size[j]; 874 875 if (!hwsz_ok(sc, hwsize)) 876 continue; 877 878 /* hwidx for size1 */ 879 if (rxb->hwidx1 == -1 && rxb->size1 == hwsize) 880 rxb->hwidx1 = j; 881 882 /* hwidx for size2 (buffer packing) */ 883 if (rxb->size1 - CL_METADATA_SIZE < hwsize) 884 continue; 885 n = rxb->size1 - hwsize - CL_METADATA_SIZE; 886 if (n == 0) { 887 rxb->hwidx2 = j; 888 rxb->size2 = hwsize; 889 break; /* stop looking */ 890 } 891 if (rxb->hwidx2 != -1) { 892 if (n < sp->sge_fl_buffer_size[rxb->hwidx2] - 893 hwsize - CL_METADATA_SIZE) { 894 rxb->hwidx2 = j; 895 rxb->size2 = hwsize; 896 } 897 } else if (n <= 2 * CL_METADATA_SIZE) { 898 rxb->hwidx2 = j; 899 rxb->size2 = hwsize; 900 } 901 } 902 if (rxb->hwidx2 != -1) 903 sc->flags |= BUF_PACKING_OK; 904 if (s->safe_zidx == -1 && rxb->size1 == safest_rx_cluster) 905 s->safe_zidx = i; 906 } 907 908 if (sc->flags & IS_VF) 909 return (0); 910 911 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 912 r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ); 913 if (r != v) { 914 device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r); 915 rc = EINVAL; 916 } 917 918 m = v = F_TDDPTAGTCB; 919 r = t4_read_reg(sc, A_ULP_RX_CTL); 920 if ((r & m) != v) { 921 device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r); 922 rc = EINVAL; 923 } 924 925 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 926 F_RESETDDPOFFSET; 927 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 928 r = t4_read_reg(sc, A_TP_PARA_REG5); 929 if ((r & m) != v) { 930 device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r); 931 rc = EINVAL; 932 } 933 934 t4_init_tp_params(sc, 1); 935 936 t4_read_mtu_tbl(sc, sc->params.mtus, NULL); 937 t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd); 938 939 return (rc); 940 } 941 942 int 943 t4_create_dma_tag(struct adapter *sc) 944 { 945 int rc; 946 947 rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, 948 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 949 BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, 950 NULL, &sc->dmat); 951 if (rc != 0) { 952 device_printf(sc->dev, 953 "failed to create main DMA tag: %d\n", rc); 954 } 955 956 return (rc); 957 } 958 959 void 960 t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 961 struct sysctl_oid_list *children) 962 { 963 struct sge_params *sp = &sc->params.sge; 964 965 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes", 966 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 967 sysctl_bufsizes, "A", "freelist buffer sizes"); 968 969 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD, 970 NULL, sp->fl_pktshift, "payload DMA offset in rx buffer (bytes)"); 971 972 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD, 973 NULL, sp->pad_boundary, "payload pad boundary (bytes)"); 974 975 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD, 976 NULL, sp->spg_len, "status page size (bytes)"); 977 978 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD, 979 NULL, cong_drop, "congestion drop setting"); 980 981 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD, 982 NULL, sp->pack_boundary, "payload pack boundary (bytes)"); 983 } 984 985 int 986 t4_destroy_dma_tag(struct adapter *sc) 987 { 988 if (sc->dmat) 989 bus_dma_tag_destroy(sc->dmat); 990 991 return (0); 992 } 993 994 /* 995 * Allocate and initialize the firmware event queue, control queues, and special 996 * purpose rx queues owned by the adapter. 997 * 998 * Returns errno on failure. Resources allocated up to that point may still be 999 * allocated. Caller is responsible for cleanup in case this function fails. 1000 */ 1001 int 1002 t4_setup_adapter_queues(struct adapter *sc) 1003 { 1004 struct sysctl_oid *oid; 1005 struct sysctl_oid_list *children; 1006 int rc, i; 1007 1008 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 1009 1010 sysctl_ctx_init(&sc->ctx); 1011 sc->flags |= ADAP_SYSCTL_CTX; 1012 1013 /* 1014 * Firmware event queue 1015 */ 1016 rc = alloc_fwq(sc); 1017 if (rc != 0) 1018 return (rc); 1019 1020 /* 1021 * That's all for the VF driver. 1022 */ 1023 if (sc->flags & IS_VF) 1024 return (rc); 1025 1026 oid = device_get_sysctl_tree(sc->dev); 1027 children = SYSCTL_CHILDREN(oid); 1028 1029 /* 1030 * XXX: General purpose rx queues, one per port. 1031 */ 1032 1033 /* 1034 * Control queues, one per port. 1035 */ 1036 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "ctrlq", 1037 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "control queues"); 1038 for_each_port(sc, i) { 1039 struct sge_wrq *ctrlq = &sc->sge.ctrlq[i]; 1040 1041 rc = alloc_ctrlq(sc, ctrlq, i, oid); 1042 if (rc != 0) 1043 return (rc); 1044 } 1045 1046 return (rc); 1047 } 1048 1049 /* 1050 * Idempotent 1051 */ 1052 int 1053 t4_teardown_adapter_queues(struct adapter *sc) 1054 { 1055 int i; 1056 1057 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 1058 1059 /* Do this before freeing the queue */ 1060 if (sc->flags & ADAP_SYSCTL_CTX) { 1061 sysctl_ctx_free(&sc->ctx); 1062 sc->flags &= ~ADAP_SYSCTL_CTX; 1063 } 1064 1065 if (!(sc->flags & IS_VF)) { 1066 for_each_port(sc, i) 1067 free_wrq(sc, &sc->sge.ctrlq[i]); 1068 } 1069 free_fwq(sc); 1070 1071 return (0); 1072 } 1073 1074 /* Maximum payload that could arrive with a single iq descriptor. */ 1075 static inline int 1076 max_rx_payload(struct adapter *sc, struct ifnet *ifp, const bool ofld) 1077 { 1078 int maxp; 1079 1080 /* large enough even when hw VLAN extraction is disabled */ 1081 maxp = sc->params.sge.fl_pktshift + ETHER_HDR_LEN + 1082 ETHER_VLAN_ENCAP_LEN + ifp->if_mtu; 1083 if (ofld && sc->tt.tls && sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS && 1084 maxp < sc->params.tp.max_rx_pdu) 1085 maxp = sc->params.tp.max_rx_pdu; 1086 return (maxp); 1087 } 1088 1089 int 1090 t4_setup_vi_queues(struct vi_info *vi) 1091 { 1092 int rc = 0, i, intr_idx, iqidx; 1093 struct sge_rxq *rxq; 1094 struct sge_txq *txq; 1095 #ifdef TCP_OFFLOAD 1096 struct sge_ofld_rxq *ofld_rxq; 1097 #endif 1098 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1099 struct sge_wrq *ofld_txq; 1100 #endif 1101 #ifdef DEV_NETMAP 1102 int saved_idx; 1103 struct sge_nm_rxq *nm_rxq; 1104 struct sge_nm_txq *nm_txq; 1105 #endif 1106 char name[16]; 1107 struct port_info *pi = vi->pi; 1108 struct adapter *sc = pi->adapter; 1109 struct ifnet *ifp = vi->ifp; 1110 struct sysctl_oid *oid = device_get_sysctl_tree(vi->dev); 1111 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 1112 int maxp; 1113 1114 /* Interrupt vector to start from (when using multiple vectors) */ 1115 intr_idx = vi->first_intr; 1116 1117 #ifdef DEV_NETMAP 1118 saved_idx = intr_idx; 1119 if (ifp->if_capabilities & IFCAP_NETMAP) { 1120 1121 /* netmap is supported with direct interrupts only. */ 1122 MPASS(!forwarding_intr_to_fwq(sc)); 1123 1124 /* 1125 * We don't have buffers to back the netmap rx queues 1126 * right now so we create the queues in a way that 1127 * doesn't set off any congestion signal in the chip. 1128 */ 1129 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_rxq", 1130 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "rx queues"); 1131 for_each_nm_rxq(vi, i, nm_rxq) { 1132 rc = alloc_nm_rxq(vi, nm_rxq, intr_idx, i, oid); 1133 if (rc != 0) 1134 goto done; 1135 intr_idx++; 1136 } 1137 1138 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_txq", 1139 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "tx queues"); 1140 for_each_nm_txq(vi, i, nm_txq) { 1141 iqidx = vi->first_nm_rxq + (i % vi->nnmrxq); 1142 rc = alloc_nm_txq(vi, nm_txq, iqidx, i, oid); 1143 if (rc != 0) 1144 goto done; 1145 } 1146 } 1147 1148 /* Normal rx queues and netmap rx queues share the same interrupts. */ 1149 intr_idx = saved_idx; 1150 #endif 1151 1152 /* 1153 * Allocate rx queues first because a default iqid is required when 1154 * creating a tx queue. 1155 */ 1156 maxp = max_rx_payload(sc, ifp, false); 1157 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq", 1158 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "rx queues"); 1159 for_each_rxq(vi, i, rxq) { 1160 1161 init_iq(&rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, vi->qsize_rxq); 1162 1163 snprintf(name, sizeof(name), "%s rxq%d-fl", 1164 device_get_nameunit(vi->dev), i); 1165 init_fl(sc, &rxq->fl, vi->qsize_rxq / 8, maxp, name); 1166 1167 rc = alloc_rxq(vi, rxq, 1168 forwarding_intr_to_fwq(sc) ? -1 : intr_idx, i, oid); 1169 if (rc != 0) 1170 goto done; 1171 intr_idx++; 1172 } 1173 #ifdef DEV_NETMAP 1174 if (ifp->if_capabilities & IFCAP_NETMAP) 1175 intr_idx = saved_idx + max(vi->nrxq, vi->nnmrxq); 1176 #endif 1177 #ifdef TCP_OFFLOAD 1178 maxp = max_rx_payload(sc, ifp, true); 1179 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq", 1180 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "rx queues for offloaded TCP connections"); 1181 for_each_ofld_rxq(vi, i, ofld_rxq) { 1182 1183 init_iq(&ofld_rxq->iq, sc, vi->ofld_tmr_idx, vi->ofld_pktc_idx, 1184 vi->qsize_rxq); 1185 1186 snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", 1187 device_get_nameunit(vi->dev), i); 1188 init_fl(sc, &ofld_rxq->fl, vi->qsize_rxq / 8, maxp, name); 1189 1190 rc = alloc_ofld_rxq(vi, ofld_rxq, 1191 forwarding_intr_to_fwq(sc) ? -1 : intr_idx, i, oid); 1192 if (rc != 0) 1193 goto done; 1194 intr_idx++; 1195 } 1196 #endif 1197 1198 /* 1199 * Now the tx queues. 1200 */ 1201 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "txq", 1202 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "tx queues"); 1203 for_each_txq(vi, i, txq) { 1204 iqidx = vi->first_rxq + (i % vi->nrxq); 1205 snprintf(name, sizeof(name), "%s txq%d", 1206 device_get_nameunit(vi->dev), i); 1207 init_eq(sc, &txq->eq, EQ_ETH, vi->qsize_txq, pi->tx_chan, 1208 sc->sge.rxq[iqidx].iq.cntxt_id, name); 1209 1210 rc = alloc_txq(vi, txq, i, oid); 1211 if (rc != 0) 1212 goto done; 1213 } 1214 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1215 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_txq", 1216 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "tx queues for TOE/ETHOFLD"); 1217 for_each_ofld_txq(vi, i, ofld_txq) { 1218 struct sysctl_oid *oid2; 1219 1220 snprintf(name, sizeof(name), "%s ofld_txq%d", 1221 device_get_nameunit(vi->dev), i); 1222 if (vi->nofldrxq > 0) { 1223 iqidx = vi->first_ofld_rxq + (i % vi->nofldrxq); 1224 init_eq(sc, &ofld_txq->eq, EQ_OFLD, vi->qsize_txq, 1225 pi->tx_chan, sc->sge.ofld_rxq[iqidx].iq.cntxt_id, 1226 name); 1227 } else { 1228 iqidx = vi->first_rxq + (i % vi->nrxq); 1229 init_eq(sc, &ofld_txq->eq, EQ_OFLD, vi->qsize_txq, 1230 pi->tx_chan, sc->sge.rxq[iqidx].iq.cntxt_id, name); 1231 } 1232 1233 snprintf(name, sizeof(name), "%d", i); 1234 oid2 = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, 1235 name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "offload tx queue"); 1236 1237 rc = alloc_wrq(sc, vi, ofld_txq, oid2); 1238 if (rc != 0) 1239 goto done; 1240 } 1241 #endif 1242 done: 1243 if (rc) 1244 t4_teardown_vi_queues(vi); 1245 1246 return (rc); 1247 } 1248 1249 /* 1250 * Idempotent 1251 */ 1252 int 1253 t4_teardown_vi_queues(struct vi_info *vi) 1254 { 1255 int i; 1256 struct sge_rxq *rxq; 1257 struct sge_txq *txq; 1258 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1259 struct port_info *pi = vi->pi; 1260 struct adapter *sc = pi->adapter; 1261 struct sge_wrq *ofld_txq; 1262 #endif 1263 #ifdef TCP_OFFLOAD 1264 struct sge_ofld_rxq *ofld_rxq; 1265 #endif 1266 #ifdef DEV_NETMAP 1267 struct sge_nm_rxq *nm_rxq; 1268 struct sge_nm_txq *nm_txq; 1269 #endif 1270 1271 /* Do this before freeing the queues */ 1272 if (vi->flags & VI_SYSCTL_CTX) { 1273 sysctl_ctx_free(&vi->ctx); 1274 vi->flags &= ~VI_SYSCTL_CTX; 1275 } 1276 1277 #ifdef DEV_NETMAP 1278 if (vi->ifp->if_capabilities & IFCAP_NETMAP) { 1279 for_each_nm_txq(vi, i, nm_txq) { 1280 free_nm_txq(vi, nm_txq); 1281 } 1282 1283 for_each_nm_rxq(vi, i, nm_rxq) { 1284 free_nm_rxq(vi, nm_rxq); 1285 } 1286 } 1287 #endif 1288 1289 /* 1290 * Take down all the tx queues first, as they reference the rx queues 1291 * (for egress updates, etc.). 1292 */ 1293 1294 for_each_txq(vi, i, txq) { 1295 free_txq(vi, txq); 1296 } 1297 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1298 for_each_ofld_txq(vi, i, ofld_txq) { 1299 free_wrq(sc, ofld_txq); 1300 } 1301 #endif 1302 1303 /* 1304 * Then take down the rx queues. 1305 */ 1306 1307 for_each_rxq(vi, i, rxq) { 1308 free_rxq(vi, rxq); 1309 } 1310 #ifdef TCP_OFFLOAD 1311 for_each_ofld_rxq(vi, i, ofld_rxq) { 1312 free_ofld_rxq(vi, ofld_rxq); 1313 } 1314 #endif 1315 1316 return (0); 1317 } 1318 1319 /* 1320 * Interrupt handler when the driver is using only 1 interrupt. This is a very 1321 * unusual scenario. 1322 * 1323 * a) Deals with errors, if any. 1324 * b) Services firmware event queue, which is taking interrupts for all other 1325 * queues. 1326 */ 1327 void 1328 t4_intr_all(void *arg) 1329 { 1330 struct adapter *sc = arg; 1331 struct sge_iq *fwq = &sc->sge.fwq; 1332 1333 MPASS(sc->intr_count == 1); 1334 1335 if (sc->intr_type == INTR_INTX) 1336 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); 1337 1338 t4_intr_err(arg); 1339 t4_intr_evt(fwq); 1340 } 1341 1342 /* 1343 * Interrupt handler for errors (installed directly when multiple interrupts are 1344 * being used, or called by t4_intr_all). 1345 */ 1346 void 1347 t4_intr_err(void *arg) 1348 { 1349 struct adapter *sc = arg; 1350 uint32_t v; 1351 const bool verbose = (sc->debug_flags & DF_VERBOSE_SLOWINTR) != 0; 1352 1353 if (sc->flags & ADAP_ERR) 1354 return; 1355 1356 v = t4_read_reg(sc, MYPF_REG(A_PL_PF_INT_CAUSE)); 1357 if (v & F_PFSW) { 1358 sc->swintr++; 1359 t4_write_reg(sc, MYPF_REG(A_PL_PF_INT_CAUSE), v); 1360 } 1361 1362 t4_slow_intr_handler(sc, verbose); 1363 } 1364 1365 /* 1366 * Interrupt handler for iq-only queues. The firmware event queue is the only 1367 * such queue right now. 1368 */ 1369 void 1370 t4_intr_evt(void *arg) 1371 { 1372 struct sge_iq *iq = arg; 1373 1374 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1375 service_iq(iq, 0); 1376 (void) atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1377 } 1378 } 1379 1380 /* 1381 * Interrupt handler for iq+fl queues. 1382 */ 1383 void 1384 t4_intr(void *arg) 1385 { 1386 struct sge_iq *iq = arg; 1387 1388 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1389 service_iq_fl(iq, 0); 1390 (void) atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1391 } 1392 } 1393 1394 #ifdef DEV_NETMAP 1395 /* 1396 * Interrupt handler for netmap rx queues. 1397 */ 1398 void 1399 t4_nm_intr(void *arg) 1400 { 1401 struct sge_nm_rxq *nm_rxq = arg; 1402 1403 if (atomic_cmpset_int(&nm_rxq->nm_state, NM_ON, NM_BUSY)) { 1404 service_nm_rxq(nm_rxq); 1405 (void) atomic_cmpset_int(&nm_rxq->nm_state, NM_BUSY, NM_ON); 1406 } 1407 } 1408 1409 /* 1410 * Interrupt handler for vectors shared between NIC and netmap rx queues. 1411 */ 1412 void 1413 t4_vi_intr(void *arg) 1414 { 1415 struct irq *irq = arg; 1416 1417 MPASS(irq->nm_rxq != NULL); 1418 t4_nm_intr(irq->nm_rxq); 1419 1420 MPASS(irq->rxq != NULL); 1421 t4_intr(irq->rxq); 1422 } 1423 #endif 1424 1425 /* 1426 * Deals with interrupts on an iq-only (no freelist) queue. 1427 */ 1428 static int 1429 service_iq(struct sge_iq *iq, int budget) 1430 { 1431 struct sge_iq *q; 1432 struct adapter *sc = iq->adapter; 1433 struct iq_desc *d = &iq->desc[iq->cidx]; 1434 int ndescs = 0, limit; 1435 int rsp_type; 1436 uint32_t lq; 1437 STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql); 1438 1439 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); 1440 KASSERT((iq->flags & IQ_HAS_FL) == 0, 1441 ("%s: called for iq %p with fl (iq->flags 0x%x)", __func__, iq, 1442 iq->flags)); 1443 MPASS((iq->flags & IQ_ADJ_CREDIT) == 0); 1444 MPASS((iq->flags & IQ_LRO_ENABLED) == 0); 1445 1446 limit = budget ? budget : iq->qsize / 16; 1447 1448 /* 1449 * We always come back and check the descriptor ring for new indirect 1450 * interrupts and other responses after running a single handler. 1451 */ 1452 for (;;) { 1453 while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { 1454 1455 rmb(); 1456 1457 rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); 1458 lq = be32toh(d->rsp.pldbuflen_qid); 1459 1460 switch (rsp_type) { 1461 case X_RSPD_TYPE_FLBUF: 1462 panic("%s: data for an iq (%p) with no freelist", 1463 __func__, iq); 1464 1465 /* NOTREACHED */ 1466 1467 case X_RSPD_TYPE_CPL: 1468 KASSERT(d->rss.opcode < NUM_CPL_CMDS, 1469 ("%s: bad opcode %02x.", __func__, 1470 d->rss.opcode)); 1471 t4_cpl_handler[d->rss.opcode](iq, &d->rss, NULL); 1472 break; 1473 1474 case X_RSPD_TYPE_INTR: 1475 /* 1476 * There are 1K interrupt-capable queues (qids 0 1477 * through 1023). A response type indicating a 1478 * forwarded interrupt with a qid >= 1K is an 1479 * iWARP async notification. 1480 */ 1481 if (__predict_true(lq >= 1024)) { 1482 t4_an_handler(iq, &d->rsp); 1483 break; 1484 } 1485 1486 q = sc->sge.iqmap[lq - sc->sge.iq_start - 1487 sc->sge.iq_base]; 1488 if (atomic_cmpset_int(&q->state, IQS_IDLE, 1489 IQS_BUSY)) { 1490 if (service_iq_fl(q, q->qsize / 16) == 0) { 1491 (void) atomic_cmpset_int(&q->state, 1492 IQS_BUSY, IQS_IDLE); 1493 } else { 1494 STAILQ_INSERT_TAIL(&iql, q, 1495 link); 1496 } 1497 } 1498 break; 1499 1500 default: 1501 KASSERT(0, 1502 ("%s: illegal response type %d on iq %p", 1503 __func__, rsp_type, iq)); 1504 log(LOG_ERR, 1505 "%s: illegal response type %d on iq %p", 1506 device_get_nameunit(sc->dev), rsp_type, iq); 1507 break; 1508 } 1509 1510 d++; 1511 if (__predict_false(++iq->cidx == iq->sidx)) { 1512 iq->cidx = 0; 1513 iq->gen ^= F_RSPD_GEN; 1514 d = &iq->desc[0]; 1515 } 1516 if (__predict_false(++ndescs == limit)) { 1517 t4_write_reg(sc, sc->sge_gts_reg, 1518 V_CIDXINC(ndescs) | 1519 V_INGRESSQID(iq->cntxt_id) | 1520 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 1521 ndescs = 0; 1522 1523 if (budget) { 1524 return (EINPROGRESS); 1525 } 1526 } 1527 } 1528 1529 if (STAILQ_EMPTY(&iql)) 1530 break; 1531 1532 /* 1533 * Process the head only, and send it to the back of the list if 1534 * it's still not done. 1535 */ 1536 q = STAILQ_FIRST(&iql); 1537 STAILQ_REMOVE_HEAD(&iql, link); 1538 if (service_iq_fl(q, q->qsize / 8) == 0) 1539 (void) atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE); 1540 else 1541 STAILQ_INSERT_TAIL(&iql, q, link); 1542 } 1543 1544 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1545 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 1546 1547 return (0); 1548 } 1549 1550 static inline int 1551 sort_before_lro(struct lro_ctrl *lro) 1552 { 1553 1554 return (lro->lro_mbuf_max != 0); 1555 } 1556 1557 static inline uint64_t 1558 last_flit_to_ns(struct adapter *sc, uint64_t lf) 1559 { 1560 uint64_t n = be64toh(lf) & 0xfffffffffffffff; /* 60b, not 64b. */ 1561 1562 if (n > UINT64_MAX / 1000000) 1563 return (n / sc->params.vpd.cclk * 1000000); 1564 else 1565 return (n * 1000000 / sc->params.vpd.cclk); 1566 } 1567 1568 static inline void 1569 move_to_next_rxbuf(struct sge_fl *fl) 1570 { 1571 1572 fl->rx_offset = 0; 1573 if (__predict_false((++fl->cidx & 7) == 0)) { 1574 uint16_t cidx = fl->cidx >> 3; 1575 1576 if (__predict_false(cidx == fl->sidx)) 1577 fl->cidx = cidx = 0; 1578 fl->hw_cidx = cidx; 1579 } 1580 } 1581 1582 /* 1583 * Deals with interrupts on an iq+fl queue. 1584 */ 1585 static int 1586 service_iq_fl(struct sge_iq *iq, int budget) 1587 { 1588 struct sge_rxq *rxq = iq_to_rxq(iq); 1589 struct sge_fl *fl; 1590 struct adapter *sc = iq->adapter; 1591 struct iq_desc *d = &iq->desc[iq->cidx]; 1592 int ndescs, limit; 1593 int rsp_type, starved; 1594 uint32_t lq; 1595 uint16_t fl_hw_cidx; 1596 struct mbuf *m0; 1597 #if defined(INET) || defined(INET6) 1598 const struct timeval lro_timeout = {0, sc->lro_timeout}; 1599 struct lro_ctrl *lro = &rxq->lro; 1600 #endif 1601 1602 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); 1603 MPASS(iq->flags & IQ_HAS_FL); 1604 1605 ndescs = 0; 1606 #if defined(INET) || defined(INET6) 1607 if (iq->flags & IQ_ADJ_CREDIT) { 1608 MPASS(sort_before_lro(lro)); 1609 iq->flags &= ~IQ_ADJ_CREDIT; 1610 if ((d->rsp.u.type_gen & F_RSPD_GEN) != iq->gen) { 1611 tcp_lro_flush_all(lro); 1612 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(1) | 1613 V_INGRESSQID((u32)iq->cntxt_id) | 1614 V_SEINTARM(iq->intr_params)); 1615 return (0); 1616 } 1617 ndescs = 1; 1618 } 1619 #else 1620 MPASS((iq->flags & IQ_ADJ_CREDIT) == 0); 1621 #endif 1622 1623 limit = budget ? budget : iq->qsize / 16; 1624 fl = &rxq->fl; 1625 fl_hw_cidx = fl->hw_cidx; /* stable snapshot */ 1626 while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { 1627 1628 rmb(); 1629 1630 m0 = NULL; 1631 rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); 1632 lq = be32toh(d->rsp.pldbuflen_qid); 1633 1634 switch (rsp_type) { 1635 case X_RSPD_TYPE_FLBUF: 1636 if (lq & F_RSPD_NEWBUF) { 1637 if (fl->rx_offset > 0) 1638 move_to_next_rxbuf(fl); 1639 lq = G_RSPD_LEN(lq); 1640 } 1641 if (IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 4) { 1642 FL_LOCK(fl); 1643 refill_fl(sc, fl, 64); 1644 FL_UNLOCK(fl); 1645 fl_hw_cidx = fl->hw_cidx; 1646 } 1647 1648 if (d->rss.opcode == CPL_RX_PKT) { 1649 if (__predict_true(eth_rx(sc, rxq, d, lq) == 0)) 1650 break; 1651 goto out; 1652 } 1653 m0 = get_fl_payload(sc, fl, lq); 1654 if (__predict_false(m0 == NULL)) 1655 goto out; 1656 1657 /* fall through */ 1658 1659 case X_RSPD_TYPE_CPL: 1660 KASSERT(d->rss.opcode < NUM_CPL_CMDS, 1661 ("%s: bad opcode %02x.", __func__, d->rss.opcode)); 1662 t4_cpl_handler[d->rss.opcode](iq, &d->rss, m0); 1663 break; 1664 1665 case X_RSPD_TYPE_INTR: 1666 1667 /* 1668 * There are 1K interrupt-capable queues (qids 0 1669 * through 1023). A response type indicating a 1670 * forwarded interrupt with a qid >= 1K is an 1671 * iWARP async notification. That is the only 1672 * acceptable indirect interrupt on this queue. 1673 */ 1674 if (__predict_false(lq < 1024)) { 1675 panic("%s: indirect interrupt on iq_fl %p " 1676 "with qid %u", __func__, iq, lq); 1677 } 1678 1679 t4_an_handler(iq, &d->rsp); 1680 break; 1681 1682 default: 1683 KASSERT(0, ("%s: illegal response type %d on iq %p", 1684 __func__, rsp_type, iq)); 1685 log(LOG_ERR, "%s: illegal response type %d on iq %p", 1686 device_get_nameunit(sc->dev), rsp_type, iq); 1687 break; 1688 } 1689 1690 d++; 1691 if (__predict_false(++iq->cidx == iq->sidx)) { 1692 iq->cidx = 0; 1693 iq->gen ^= F_RSPD_GEN; 1694 d = &iq->desc[0]; 1695 } 1696 if (__predict_false(++ndescs == limit)) { 1697 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1698 V_INGRESSQID(iq->cntxt_id) | 1699 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 1700 1701 #if defined(INET) || defined(INET6) 1702 if (iq->flags & IQ_LRO_ENABLED && 1703 !sort_before_lro(lro) && 1704 sc->lro_timeout != 0) { 1705 tcp_lro_flush_inactive(lro, &lro_timeout); 1706 } 1707 #endif 1708 if (budget) 1709 return (EINPROGRESS); 1710 ndescs = 0; 1711 } 1712 } 1713 out: 1714 #if defined(INET) || defined(INET6) 1715 if (iq->flags & IQ_LRO_ENABLED) { 1716 if (ndescs > 0 && lro->lro_mbuf_count > 8) { 1717 MPASS(sort_before_lro(lro)); 1718 /* hold back one credit and don't flush LRO state */ 1719 iq->flags |= IQ_ADJ_CREDIT; 1720 ndescs--; 1721 } else { 1722 tcp_lro_flush_all(lro); 1723 } 1724 } 1725 #endif 1726 1727 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1728 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 1729 1730 FL_LOCK(fl); 1731 starved = refill_fl(sc, fl, 64); 1732 FL_UNLOCK(fl); 1733 if (__predict_false(starved != 0)) 1734 add_fl_to_sfl(sc, fl); 1735 1736 return (0); 1737 } 1738 1739 static inline struct cluster_metadata * 1740 cl_metadata(struct fl_sdesc *sd) 1741 { 1742 1743 return ((void *)(sd->cl + sd->moff)); 1744 } 1745 1746 static void 1747 rxb_free(struct mbuf *m) 1748 { 1749 struct cluster_metadata *clm = m->m_ext.ext_arg1; 1750 1751 uma_zfree(clm->zone, clm->cl); 1752 counter_u64_add(extfree_rels, 1); 1753 } 1754 1755 /* 1756 * The mbuf returned comes from zone_muf and carries the payload in one of these 1757 * ways 1758 * a) complete frame inside the mbuf 1759 * b) m_cljset (for clusters without metadata) 1760 * d) m_extaddref (cluster with metadata) 1761 */ 1762 static struct mbuf * 1763 get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset, 1764 int remaining) 1765 { 1766 struct mbuf *m; 1767 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1768 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx]; 1769 struct cluster_metadata *clm; 1770 int len, blen; 1771 caddr_t payload; 1772 1773 if (fl->flags & FL_BUF_PACKING) { 1774 u_int l, pad; 1775 1776 blen = rxb->size2 - fl->rx_offset; /* max possible in this buf */ 1777 len = min(remaining, blen); 1778 payload = sd->cl + fl->rx_offset; 1779 1780 l = fr_offset + len; 1781 pad = roundup2(l, fl->buf_boundary) - l; 1782 if (fl->rx_offset + len + pad < rxb->size2) 1783 blen = len + pad; 1784 MPASS(fl->rx_offset + blen <= rxb->size2); 1785 } else { 1786 MPASS(fl->rx_offset == 0); /* not packing */ 1787 blen = rxb->size1; 1788 len = min(remaining, blen); 1789 payload = sd->cl; 1790 } 1791 1792 if (fr_offset == 0) { 1793 m = m_gethdr(M_NOWAIT, MT_DATA); 1794 if (__predict_false(m == NULL)) 1795 return (NULL); 1796 m->m_pkthdr.len = remaining; 1797 } else { 1798 m = m_get(M_NOWAIT, MT_DATA); 1799 if (__predict_false(m == NULL)) 1800 return (NULL); 1801 } 1802 m->m_len = len; 1803 1804 if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) { 1805 /* copy data to mbuf */ 1806 bcopy(payload, mtod(m, caddr_t), len); 1807 if (fl->flags & FL_BUF_PACKING) { 1808 fl->rx_offset += blen; 1809 MPASS(fl->rx_offset <= rxb->size2); 1810 if (fl->rx_offset < rxb->size2) 1811 return (m); /* without advancing the cidx */ 1812 } 1813 } else if (fl->flags & FL_BUF_PACKING) { 1814 clm = cl_metadata(sd); 1815 if (sd->nmbuf++ == 0) { 1816 clm->refcount = 1; 1817 clm->zone = rxb->zone; 1818 clm->cl = sd->cl; 1819 counter_u64_add(extfree_refs, 1); 1820 } 1821 m_extaddref(m, payload, blen, &clm->refcount, rxb_free, clm, 1822 NULL); 1823 1824 fl->rx_offset += blen; 1825 MPASS(fl->rx_offset <= rxb->size2); 1826 if (fl->rx_offset < rxb->size2) 1827 return (m); /* without advancing the cidx */ 1828 } else { 1829 m_cljset(m, sd->cl, rxb->type); 1830 sd->cl = NULL; /* consumed, not a recycle candidate */ 1831 } 1832 1833 move_to_next_rxbuf(fl); 1834 1835 return (m); 1836 } 1837 1838 static struct mbuf * 1839 get_fl_payload(struct adapter *sc, struct sge_fl *fl, const u_int plen) 1840 { 1841 struct mbuf *m0, *m, **pnext; 1842 u_int remaining; 1843 1844 if (__predict_false(fl->flags & FL_BUF_RESUME)) { 1845 M_ASSERTPKTHDR(fl->m0); 1846 MPASS(fl->m0->m_pkthdr.len == plen); 1847 MPASS(fl->remaining < plen); 1848 1849 m0 = fl->m0; 1850 pnext = fl->pnext; 1851 remaining = fl->remaining; 1852 fl->flags &= ~FL_BUF_RESUME; 1853 goto get_segment; 1854 } 1855 1856 /* 1857 * Payload starts at rx_offset in the current hw buffer. Its length is 1858 * 'len' and it may span multiple hw buffers. 1859 */ 1860 1861 m0 = get_scatter_segment(sc, fl, 0, plen); 1862 if (m0 == NULL) 1863 return (NULL); 1864 remaining = plen - m0->m_len; 1865 pnext = &m0->m_next; 1866 while (remaining > 0) { 1867 get_segment: 1868 MPASS(fl->rx_offset == 0); 1869 m = get_scatter_segment(sc, fl, plen - remaining, remaining); 1870 if (__predict_false(m == NULL)) { 1871 fl->m0 = m0; 1872 fl->pnext = pnext; 1873 fl->remaining = remaining; 1874 fl->flags |= FL_BUF_RESUME; 1875 return (NULL); 1876 } 1877 *pnext = m; 1878 pnext = &m->m_next; 1879 remaining -= m->m_len; 1880 } 1881 *pnext = NULL; 1882 1883 M_ASSERTPKTHDR(m0); 1884 return (m0); 1885 } 1886 1887 static int 1888 skip_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset, 1889 int remaining) 1890 { 1891 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1892 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx]; 1893 int len, blen; 1894 1895 if (fl->flags & FL_BUF_PACKING) { 1896 u_int l, pad; 1897 1898 blen = rxb->size2 - fl->rx_offset; /* max possible in this buf */ 1899 len = min(remaining, blen); 1900 1901 l = fr_offset + len; 1902 pad = roundup2(l, fl->buf_boundary) - l; 1903 if (fl->rx_offset + len + pad < rxb->size2) 1904 blen = len + pad; 1905 fl->rx_offset += blen; 1906 MPASS(fl->rx_offset <= rxb->size2); 1907 if (fl->rx_offset < rxb->size2) 1908 return (len); /* without advancing the cidx */ 1909 } else { 1910 MPASS(fl->rx_offset == 0); /* not packing */ 1911 blen = rxb->size1; 1912 len = min(remaining, blen); 1913 } 1914 move_to_next_rxbuf(fl); 1915 return (len); 1916 } 1917 1918 static inline void 1919 skip_fl_payload(struct adapter *sc, struct sge_fl *fl, int plen) 1920 { 1921 int remaining, fr_offset, len; 1922 1923 fr_offset = 0; 1924 remaining = plen; 1925 while (remaining > 0) { 1926 len = skip_scatter_segment(sc, fl, fr_offset, remaining); 1927 fr_offset += len; 1928 remaining -= len; 1929 } 1930 } 1931 1932 static inline int 1933 get_segment_len(struct adapter *sc, struct sge_fl *fl, int plen) 1934 { 1935 int len; 1936 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1937 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx]; 1938 1939 if (fl->flags & FL_BUF_PACKING) 1940 len = rxb->size2 - fl->rx_offset; 1941 else 1942 len = rxb->size1; 1943 1944 return (min(plen, len)); 1945 } 1946 1947 static int 1948 eth_rx(struct adapter *sc, struct sge_rxq *rxq, const struct iq_desc *d, 1949 u_int plen) 1950 { 1951 struct mbuf *m0; 1952 struct ifnet *ifp = rxq->ifp; 1953 struct sge_fl *fl = &rxq->fl; 1954 struct vi_info *vi = ifp->if_softc; 1955 const struct cpl_rx_pkt *cpl; 1956 #if defined(INET) || defined(INET6) 1957 struct lro_ctrl *lro = &rxq->lro; 1958 #endif 1959 uint16_t err_vec, tnl_type, tnlhdr_len; 1960 static const int sw_hashtype[4][2] = { 1961 {M_HASHTYPE_NONE, M_HASHTYPE_NONE}, 1962 {M_HASHTYPE_RSS_IPV4, M_HASHTYPE_RSS_IPV6}, 1963 {M_HASHTYPE_RSS_TCP_IPV4, M_HASHTYPE_RSS_TCP_IPV6}, 1964 {M_HASHTYPE_RSS_UDP_IPV4, M_HASHTYPE_RSS_UDP_IPV6}, 1965 }; 1966 static const int sw_csum_flags[2][2] = { 1967 { 1968 /* IP, inner IP */ 1969 CSUM_ENCAP_VXLAN | 1970 CSUM_L3_CALC | CSUM_L3_VALID | 1971 CSUM_L4_CALC | CSUM_L4_VALID | 1972 CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID | 1973 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1974 1975 /* IP, inner IP6 */ 1976 CSUM_ENCAP_VXLAN | 1977 CSUM_L3_CALC | CSUM_L3_VALID | 1978 CSUM_L4_CALC | CSUM_L4_VALID | 1979 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1980 }, 1981 { 1982 /* IP6, inner IP */ 1983 CSUM_ENCAP_VXLAN | 1984 CSUM_L4_CALC | CSUM_L4_VALID | 1985 CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID | 1986 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1987 1988 /* IP6, inner IP6 */ 1989 CSUM_ENCAP_VXLAN | 1990 CSUM_L4_CALC | CSUM_L4_VALID | 1991 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1992 }, 1993 }; 1994 1995 MPASS(plen > sc->params.sge.fl_pktshift); 1996 if (vi->pfil != NULL && PFIL_HOOKED_IN(vi->pfil) && 1997 __predict_true((fl->flags & FL_BUF_RESUME) == 0)) { 1998 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1999 caddr_t frame; 2000 int rc, slen; 2001 2002 slen = get_segment_len(sc, fl, plen) - 2003 sc->params.sge.fl_pktshift; 2004 frame = sd->cl + fl->rx_offset + sc->params.sge.fl_pktshift; 2005 CURVNET_SET_QUIET(ifp->if_vnet); 2006 rc = pfil_run_hooks(vi->pfil, frame, ifp, 2007 slen | PFIL_MEMPTR | PFIL_IN, NULL); 2008 CURVNET_RESTORE(); 2009 if (rc == PFIL_DROPPED || rc == PFIL_CONSUMED) { 2010 skip_fl_payload(sc, fl, plen); 2011 return (0); 2012 } 2013 if (rc == PFIL_REALLOCED) { 2014 skip_fl_payload(sc, fl, plen); 2015 m0 = pfil_mem2mbuf(frame); 2016 goto have_mbuf; 2017 } 2018 } 2019 2020 m0 = get_fl_payload(sc, fl, plen); 2021 if (__predict_false(m0 == NULL)) 2022 return (ENOMEM); 2023 2024 m0->m_pkthdr.len -= sc->params.sge.fl_pktshift; 2025 m0->m_len -= sc->params.sge.fl_pktshift; 2026 m0->m_data += sc->params.sge.fl_pktshift; 2027 2028 have_mbuf: 2029 m0->m_pkthdr.rcvif = ifp; 2030 M_HASHTYPE_SET(m0, sw_hashtype[d->rss.hash_type][d->rss.ipv6]); 2031 m0->m_pkthdr.flowid = be32toh(d->rss.hash_val); 2032 2033 cpl = (const void *)(&d->rss + 1); 2034 if (sc->params.tp.rx_pkt_encap) { 2035 const uint16_t ev = be16toh(cpl->err_vec); 2036 2037 err_vec = G_T6_COMPR_RXERR_VEC(ev); 2038 tnl_type = G_T6_RX_TNL_TYPE(ev); 2039 tnlhdr_len = G_T6_RX_TNLHDR_LEN(ev); 2040 } else { 2041 err_vec = be16toh(cpl->err_vec); 2042 tnl_type = 0; 2043 tnlhdr_len = 0; 2044 } 2045 if (cpl->csum_calc && err_vec == 0) { 2046 int ipv6 = !!(cpl->l2info & htobe32(F_RXF_IP6)); 2047 2048 /* checksum(s) calculated and found to be correct. */ 2049 2050 MPASS((cpl->l2info & htobe32(F_RXF_IP)) ^ 2051 (cpl->l2info & htobe32(F_RXF_IP6))); 2052 m0->m_pkthdr.csum_data = be16toh(cpl->csum); 2053 if (tnl_type == 0) { 2054 if (!ipv6 && ifp->if_capenable & IFCAP_RXCSUM) { 2055 m0->m_pkthdr.csum_flags = CSUM_L3_CALC | 2056 CSUM_L3_VALID | CSUM_L4_CALC | 2057 CSUM_L4_VALID; 2058 } else if (ipv6 && ifp->if_capenable & IFCAP_RXCSUM_IPV6) { 2059 m0->m_pkthdr.csum_flags = CSUM_L4_CALC | 2060 CSUM_L4_VALID; 2061 } 2062 rxq->rxcsum++; 2063 } else { 2064 MPASS(tnl_type == RX_PKT_TNL_TYPE_VXLAN); 2065 if (__predict_false(cpl->ip_frag)) { 2066 /* 2067 * csum_data is for the inner frame (which is an 2068 * IP fragment) and is not 0xffff. There is no 2069 * way to pass the inner csum_data to the stack. 2070 * We don't want the stack to use the inner 2071 * csum_data to validate the outer frame or it 2072 * will get rejected. So we fix csum_data here 2073 * and let sw do the checksum of inner IP 2074 * fragments. 2075 * 2076 * XXX: Need 32b for csum_data2 in an rx mbuf. 2077 * Maybe stuff it into rcv_tstmp? 2078 */ 2079 m0->m_pkthdr.csum_data = 0xffff; 2080 if (ipv6) { 2081 m0->m_pkthdr.csum_flags = CSUM_L4_CALC | 2082 CSUM_L4_VALID; 2083 } else { 2084 m0->m_pkthdr.csum_flags = CSUM_L3_CALC | 2085 CSUM_L3_VALID | CSUM_L4_CALC | 2086 CSUM_L4_VALID; 2087 } 2088 } else { 2089 int outer_ipv6; 2090 2091 MPASS(m0->m_pkthdr.csum_data == 0xffff); 2092 2093 outer_ipv6 = tnlhdr_len >= 2094 sizeof(struct ether_header) + 2095 sizeof(struct ip6_hdr); 2096 m0->m_pkthdr.csum_flags = 2097 sw_csum_flags[outer_ipv6][ipv6]; 2098 } 2099 rxq->vxlan_rxcsum++; 2100 } 2101 } 2102 2103 if (cpl->vlan_ex) { 2104 m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); 2105 m0->m_flags |= M_VLANTAG; 2106 rxq->vlan_extraction++; 2107 } 2108 2109 if (rxq->iq.flags & IQ_RX_TIMESTAMP) { 2110 /* 2111 * Fill up rcv_tstmp but do not set M_TSTMP. 2112 * rcv_tstmp is not in the format that the 2113 * kernel expects and we don't want to mislead 2114 * it. For now this is only for custom code 2115 * that knows how to interpret cxgbe's stamp. 2116 */ 2117 m0->m_pkthdr.rcv_tstmp = 2118 last_flit_to_ns(sc, d->rsp.u.last_flit); 2119 #ifdef notyet 2120 m0->m_flags |= M_TSTMP; 2121 #endif 2122 } 2123 2124 #ifdef NUMA 2125 m0->m_pkthdr.numa_domain = ifp->if_numa_domain; 2126 #endif 2127 #if defined(INET) || defined(INET6) 2128 if (rxq->iq.flags & IQ_LRO_ENABLED && tnl_type == 0 && 2129 (M_HASHTYPE_GET(m0) == M_HASHTYPE_RSS_TCP_IPV4 || 2130 M_HASHTYPE_GET(m0) == M_HASHTYPE_RSS_TCP_IPV6)) { 2131 if (sort_before_lro(lro)) { 2132 tcp_lro_queue_mbuf(lro, m0); 2133 return (0); /* queued for sort, then LRO */ 2134 } 2135 if (tcp_lro_rx(lro, m0, 0) == 0) 2136 return (0); /* queued for LRO */ 2137 } 2138 #endif 2139 ifp->if_input(ifp, m0); 2140 2141 return (0); 2142 } 2143 2144 /* 2145 * Must drain the wrq or make sure that someone else will. 2146 */ 2147 static void 2148 wrq_tx_drain(void *arg, int n) 2149 { 2150 struct sge_wrq *wrq = arg; 2151 struct sge_eq *eq = &wrq->eq; 2152 2153 EQ_LOCK(eq); 2154 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 2155 drain_wrq_wr_list(wrq->adapter, wrq); 2156 EQ_UNLOCK(eq); 2157 } 2158 2159 static void 2160 drain_wrq_wr_list(struct adapter *sc, struct sge_wrq *wrq) 2161 { 2162 struct sge_eq *eq = &wrq->eq; 2163 u_int available, dbdiff; /* # of hardware descriptors */ 2164 u_int n; 2165 struct wrqe *wr; 2166 struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ 2167 2168 EQ_LOCK_ASSERT_OWNED(eq); 2169 MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs)); 2170 wr = STAILQ_FIRST(&wrq->wr_list); 2171 MPASS(wr != NULL); /* Must be called with something useful to do */ 2172 MPASS(eq->pidx == eq->dbidx); 2173 dbdiff = 0; 2174 2175 do { 2176 eq->cidx = read_hw_cidx(eq); 2177 if (eq->pidx == eq->cidx) 2178 available = eq->sidx - 1; 2179 else 2180 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2181 2182 MPASS(wr->wrq == wrq); 2183 n = howmany(wr->wr_len, EQ_ESIZE); 2184 if (available < n) 2185 break; 2186 2187 dst = (void *)&eq->desc[eq->pidx]; 2188 if (__predict_true(eq->sidx - eq->pidx > n)) { 2189 /* Won't wrap, won't end exactly at the status page. */ 2190 bcopy(&wr->wr[0], dst, wr->wr_len); 2191 eq->pidx += n; 2192 } else { 2193 int first_portion = (eq->sidx - eq->pidx) * EQ_ESIZE; 2194 2195 bcopy(&wr->wr[0], dst, first_portion); 2196 if (wr->wr_len > first_portion) { 2197 bcopy(&wr->wr[first_portion], &eq->desc[0], 2198 wr->wr_len - first_portion); 2199 } 2200 eq->pidx = n - (eq->sidx - eq->pidx); 2201 } 2202 wrq->tx_wrs_copied++; 2203 2204 if (available < eq->sidx / 4 && 2205 atomic_cmpset_int(&eq->equiq, 0, 1)) { 2206 /* 2207 * XXX: This is not 100% reliable with some 2208 * types of WRs. But this is a very unusual 2209 * situation for an ofld/ctrl queue anyway. 2210 */ 2211 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 2212 F_FW_WR_EQUEQ); 2213 } 2214 2215 dbdiff += n; 2216 if (dbdiff >= 16) { 2217 ring_eq_db(sc, eq, dbdiff); 2218 dbdiff = 0; 2219 } 2220 2221 STAILQ_REMOVE_HEAD(&wrq->wr_list, link); 2222 free_wrqe(wr); 2223 MPASS(wrq->nwr_pending > 0); 2224 wrq->nwr_pending--; 2225 MPASS(wrq->ndesc_needed >= n); 2226 wrq->ndesc_needed -= n; 2227 } while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL); 2228 2229 if (dbdiff) 2230 ring_eq_db(sc, eq, dbdiff); 2231 } 2232 2233 /* 2234 * Doesn't fail. Holds on to work requests it can't send right away. 2235 */ 2236 void 2237 t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr) 2238 { 2239 #ifdef INVARIANTS 2240 struct sge_eq *eq = &wrq->eq; 2241 #endif 2242 2243 EQ_LOCK_ASSERT_OWNED(eq); 2244 MPASS(wr != NULL); 2245 MPASS(wr->wr_len > 0 && wr->wr_len <= SGE_MAX_WR_LEN); 2246 MPASS((wr->wr_len & 0x7) == 0); 2247 2248 STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link); 2249 wrq->nwr_pending++; 2250 wrq->ndesc_needed += howmany(wr->wr_len, EQ_ESIZE); 2251 2252 if (!TAILQ_EMPTY(&wrq->incomplete_wrs)) 2253 return; /* commit_wrq_wr will drain wr_list as well. */ 2254 2255 drain_wrq_wr_list(sc, wrq); 2256 2257 /* Doorbell must have caught up to the pidx. */ 2258 MPASS(eq->pidx == eq->dbidx); 2259 } 2260 2261 void 2262 t4_update_fl_bufsize(struct ifnet *ifp) 2263 { 2264 struct vi_info *vi = ifp->if_softc; 2265 struct adapter *sc = vi->adapter; 2266 struct sge_rxq *rxq; 2267 #ifdef TCP_OFFLOAD 2268 struct sge_ofld_rxq *ofld_rxq; 2269 #endif 2270 struct sge_fl *fl; 2271 int i, maxp; 2272 2273 maxp = max_rx_payload(sc, ifp, false); 2274 for_each_rxq(vi, i, rxq) { 2275 fl = &rxq->fl; 2276 2277 FL_LOCK(fl); 2278 fl->zidx = find_refill_source(sc, maxp, 2279 fl->flags & FL_BUF_PACKING); 2280 FL_UNLOCK(fl); 2281 } 2282 #ifdef TCP_OFFLOAD 2283 maxp = max_rx_payload(sc, ifp, true); 2284 for_each_ofld_rxq(vi, i, ofld_rxq) { 2285 fl = &ofld_rxq->fl; 2286 2287 FL_LOCK(fl); 2288 fl->zidx = find_refill_source(sc, maxp, 2289 fl->flags & FL_BUF_PACKING); 2290 FL_UNLOCK(fl); 2291 } 2292 #endif 2293 } 2294 2295 static inline int 2296 mbuf_nsegs(struct mbuf *m) 2297 { 2298 2299 M_ASSERTPKTHDR(m); 2300 KASSERT(m->m_pkthdr.inner_l5hlen > 0, 2301 ("%s: mbuf %p missing information on # of segments.", __func__, m)); 2302 2303 return (m->m_pkthdr.inner_l5hlen); 2304 } 2305 2306 static inline void 2307 set_mbuf_nsegs(struct mbuf *m, uint8_t nsegs) 2308 { 2309 2310 M_ASSERTPKTHDR(m); 2311 m->m_pkthdr.inner_l5hlen = nsegs; 2312 } 2313 2314 static inline int 2315 mbuf_cflags(struct mbuf *m) 2316 { 2317 2318 M_ASSERTPKTHDR(m); 2319 return (m->m_pkthdr.PH_loc.eight[4]); 2320 } 2321 2322 static inline void 2323 set_mbuf_cflags(struct mbuf *m, uint8_t flags) 2324 { 2325 2326 M_ASSERTPKTHDR(m); 2327 m->m_pkthdr.PH_loc.eight[4] = flags; 2328 } 2329 2330 static inline int 2331 mbuf_len16(struct mbuf *m) 2332 { 2333 int n; 2334 2335 M_ASSERTPKTHDR(m); 2336 n = m->m_pkthdr.PH_loc.eight[0]; 2337 if (!(mbuf_cflags(m) & MC_TLS)) 2338 MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); 2339 2340 return (n); 2341 } 2342 2343 static inline void 2344 set_mbuf_len16(struct mbuf *m, uint8_t len16) 2345 { 2346 2347 M_ASSERTPKTHDR(m); 2348 if (!(mbuf_cflags(m) & MC_TLS)) 2349 MPASS(len16 > 0 && len16 <= SGE_MAX_WR_LEN / 16); 2350 m->m_pkthdr.PH_loc.eight[0] = len16; 2351 } 2352 2353 #ifdef RATELIMIT 2354 static inline int 2355 mbuf_eo_nsegs(struct mbuf *m) 2356 { 2357 2358 M_ASSERTPKTHDR(m); 2359 return (m->m_pkthdr.PH_loc.eight[1]); 2360 } 2361 2362 static inline void 2363 set_mbuf_eo_nsegs(struct mbuf *m, uint8_t nsegs) 2364 { 2365 2366 M_ASSERTPKTHDR(m); 2367 m->m_pkthdr.PH_loc.eight[1] = nsegs; 2368 } 2369 2370 static inline int 2371 mbuf_eo_len16(struct mbuf *m) 2372 { 2373 int n; 2374 2375 M_ASSERTPKTHDR(m); 2376 n = m->m_pkthdr.PH_loc.eight[2]; 2377 MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); 2378 2379 return (n); 2380 } 2381 2382 static inline void 2383 set_mbuf_eo_len16(struct mbuf *m, uint8_t len16) 2384 { 2385 2386 M_ASSERTPKTHDR(m); 2387 m->m_pkthdr.PH_loc.eight[2] = len16; 2388 } 2389 2390 static inline int 2391 mbuf_eo_tsclk_tsoff(struct mbuf *m) 2392 { 2393 2394 M_ASSERTPKTHDR(m); 2395 return (m->m_pkthdr.PH_loc.eight[3]); 2396 } 2397 2398 static inline void 2399 set_mbuf_eo_tsclk_tsoff(struct mbuf *m, uint8_t tsclk_tsoff) 2400 { 2401 2402 M_ASSERTPKTHDR(m); 2403 m->m_pkthdr.PH_loc.eight[3] = tsclk_tsoff; 2404 } 2405 2406 static inline int 2407 needs_eo(struct m_snd_tag *mst) 2408 { 2409 2410 return (mst != NULL && mst->type == IF_SND_TAG_TYPE_RATE_LIMIT); 2411 } 2412 #endif 2413 2414 /* 2415 * Try to allocate an mbuf to contain a raw work request. To make it 2416 * easy to construct the work request, don't allocate a chain but a 2417 * single mbuf. 2418 */ 2419 struct mbuf * 2420 alloc_wr_mbuf(int len, int how) 2421 { 2422 struct mbuf *m; 2423 2424 if (len <= MHLEN) 2425 m = m_gethdr(how, MT_DATA); 2426 else if (len <= MCLBYTES) 2427 m = m_getcl(how, MT_DATA, M_PKTHDR); 2428 else 2429 m = NULL; 2430 if (m == NULL) 2431 return (NULL); 2432 m->m_pkthdr.len = len; 2433 m->m_len = len; 2434 set_mbuf_cflags(m, MC_RAW_WR); 2435 set_mbuf_len16(m, howmany(len, 16)); 2436 return (m); 2437 } 2438 2439 static inline bool 2440 needs_hwcsum(struct mbuf *m) 2441 { 2442 const uint32_t csum_flags = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP | 2443 CSUM_IP_TSO | CSUM_INNER_IP | CSUM_INNER_IP_UDP | 2444 CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | CSUM_IP6_UDP | 2445 CSUM_IP6_TCP | CSUM_IP6_TSO | CSUM_INNER_IP6_UDP | 2446 CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO; 2447 2448 M_ASSERTPKTHDR(m); 2449 2450 return (m->m_pkthdr.csum_flags & csum_flags); 2451 } 2452 2453 static inline bool 2454 needs_tso(struct mbuf *m) 2455 { 2456 const uint32_t csum_flags = CSUM_IP_TSO | CSUM_IP6_TSO | 2457 CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO; 2458 2459 M_ASSERTPKTHDR(m); 2460 2461 return (m->m_pkthdr.csum_flags & csum_flags); 2462 } 2463 2464 static inline bool 2465 needs_vxlan_csum(struct mbuf *m) 2466 { 2467 2468 M_ASSERTPKTHDR(m); 2469 2470 return (m->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN); 2471 } 2472 2473 static inline bool 2474 needs_vxlan_tso(struct mbuf *m) 2475 { 2476 const uint32_t csum_flags = CSUM_ENCAP_VXLAN | CSUM_INNER_IP_TSO | 2477 CSUM_INNER_IP6_TSO; 2478 2479 M_ASSERTPKTHDR(m); 2480 2481 return ((m->m_pkthdr.csum_flags & csum_flags) != 0 && 2482 (m->m_pkthdr.csum_flags & csum_flags) != CSUM_ENCAP_VXLAN); 2483 } 2484 2485 static inline bool 2486 needs_inner_tcp_csum(struct mbuf *m) 2487 { 2488 const uint32_t csum_flags = CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO; 2489 2490 M_ASSERTPKTHDR(m); 2491 2492 return (m->m_pkthdr.csum_flags & csum_flags); 2493 } 2494 2495 static inline bool 2496 needs_l3_csum(struct mbuf *m) 2497 { 2498 const uint32_t csum_flags = CSUM_IP | CSUM_IP_TSO | CSUM_INNER_IP | 2499 CSUM_INNER_IP_TSO; 2500 2501 M_ASSERTPKTHDR(m); 2502 2503 return (m->m_pkthdr.csum_flags & csum_flags); 2504 } 2505 2506 static inline bool 2507 needs_outer_tcp_csum(struct mbuf *m) 2508 { 2509 const uint32_t csum_flags = CSUM_IP_TCP | CSUM_IP_TSO | CSUM_IP6_TCP | 2510 CSUM_IP6_TSO; 2511 2512 M_ASSERTPKTHDR(m); 2513 2514 return (m->m_pkthdr.csum_flags & csum_flags); 2515 } 2516 2517 #ifdef RATELIMIT 2518 static inline bool 2519 needs_outer_l4_csum(struct mbuf *m) 2520 { 2521 const uint32_t csum_flags = CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP_TSO | 2522 CSUM_IP6_UDP | CSUM_IP6_TCP | CSUM_IP6_TSO; 2523 2524 M_ASSERTPKTHDR(m); 2525 2526 return (m->m_pkthdr.csum_flags & csum_flags); 2527 } 2528 2529 static inline bool 2530 needs_outer_udp_csum(struct mbuf *m) 2531 { 2532 const uint32_t csum_flags = CSUM_IP_UDP | CSUM_IP6_UDP; 2533 2534 M_ASSERTPKTHDR(m); 2535 2536 return (m->m_pkthdr.csum_flags & csum_flags); 2537 } 2538 #endif 2539 2540 static inline bool 2541 needs_vlan_insertion(struct mbuf *m) 2542 { 2543 2544 M_ASSERTPKTHDR(m); 2545 2546 return (m->m_flags & M_VLANTAG); 2547 } 2548 2549 static void * 2550 m_advance(struct mbuf **pm, int *poffset, int len) 2551 { 2552 struct mbuf *m = *pm; 2553 int offset = *poffset; 2554 uintptr_t p = 0; 2555 2556 MPASS(len > 0); 2557 2558 for (;;) { 2559 if (offset + len < m->m_len) { 2560 offset += len; 2561 p = mtod(m, uintptr_t) + offset; 2562 break; 2563 } 2564 len -= m->m_len - offset; 2565 m = m->m_next; 2566 offset = 0; 2567 MPASS(m != NULL); 2568 } 2569 *poffset = offset; 2570 *pm = m; 2571 return ((void *)p); 2572 } 2573 2574 static inline int 2575 count_mbuf_ext_pgs(struct mbuf *m, int skip, vm_paddr_t *nextaddr) 2576 { 2577 vm_paddr_t paddr; 2578 int i, len, off, pglen, pgoff, seglen, segoff; 2579 int nsegs = 0; 2580 2581 M_ASSERTEXTPG(m); 2582 off = mtod(m, vm_offset_t); 2583 len = m->m_len; 2584 off += skip; 2585 len -= skip; 2586 2587 if (m->m_epg_hdrlen != 0) { 2588 if (off >= m->m_epg_hdrlen) { 2589 off -= m->m_epg_hdrlen; 2590 } else { 2591 seglen = m->m_epg_hdrlen - off; 2592 segoff = off; 2593 seglen = min(seglen, len); 2594 off = 0; 2595 len -= seglen; 2596 paddr = pmap_kextract( 2597 (vm_offset_t)&m->m_epg_hdr[segoff]); 2598 if (*nextaddr != paddr) 2599 nsegs++; 2600 *nextaddr = paddr + seglen; 2601 } 2602 } 2603 pgoff = m->m_epg_1st_off; 2604 for (i = 0; i < m->m_epg_npgs && len > 0; i++) { 2605 pglen = m_epg_pagelen(m, i, pgoff); 2606 if (off >= pglen) { 2607 off -= pglen; 2608 pgoff = 0; 2609 continue; 2610 } 2611 seglen = pglen - off; 2612 segoff = pgoff + off; 2613 off = 0; 2614 seglen = min(seglen, len); 2615 len -= seglen; 2616 paddr = m->m_epg_pa[i] + segoff; 2617 if (*nextaddr != paddr) 2618 nsegs++; 2619 *nextaddr = paddr + seglen; 2620 pgoff = 0; 2621 }; 2622 if (len != 0) { 2623 seglen = min(len, m->m_epg_trllen - off); 2624 len -= seglen; 2625 paddr = pmap_kextract((vm_offset_t)&m->m_epg_trail[off]); 2626 if (*nextaddr != paddr) 2627 nsegs++; 2628 *nextaddr = paddr + seglen; 2629 } 2630 2631 return (nsegs); 2632 } 2633 2634 2635 /* 2636 * Can deal with empty mbufs in the chain that have m_len = 0, but the chain 2637 * must have at least one mbuf that's not empty. It is possible for this 2638 * routine to return 0 if skip accounts for all the contents of the mbuf chain. 2639 */ 2640 static inline int 2641 count_mbuf_nsegs(struct mbuf *m, int skip, uint8_t *cflags) 2642 { 2643 vm_paddr_t nextaddr, paddr; 2644 vm_offset_t va; 2645 int len, nsegs; 2646 2647 M_ASSERTPKTHDR(m); 2648 MPASS(m->m_pkthdr.len > 0); 2649 MPASS(m->m_pkthdr.len >= skip); 2650 2651 nsegs = 0; 2652 nextaddr = 0; 2653 for (; m; m = m->m_next) { 2654 len = m->m_len; 2655 if (__predict_false(len == 0)) 2656 continue; 2657 if (skip >= len) { 2658 skip -= len; 2659 continue; 2660 } 2661 if ((m->m_flags & M_EXTPG) != 0) { 2662 *cflags |= MC_NOMAP; 2663 nsegs += count_mbuf_ext_pgs(m, skip, &nextaddr); 2664 skip = 0; 2665 continue; 2666 } 2667 va = mtod(m, vm_offset_t) + skip; 2668 len -= skip; 2669 skip = 0; 2670 paddr = pmap_kextract(va); 2671 nsegs += sglist_count((void *)(uintptr_t)va, len); 2672 if (paddr == nextaddr) 2673 nsegs--; 2674 nextaddr = pmap_kextract(va + len - 1) + 1; 2675 } 2676 2677 return (nsegs); 2678 } 2679 2680 /* 2681 * The maximum number of segments that can fit in a WR. 2682 */ 2683 static int 2684 max_nsegs_allowed(struct mbuf *m, bool vm_wr) 2685 { 2686 2687 if (vm_wr) { 2688 if (needs_tso(m)) 2689 return (TX_SGL_SEGS_VM_TSO); 2690 return (TX_SGL_SEGS_VM); 2691 } 2692 2693 if (needs_tso(m)) { 2694 if (needs_vxlan_tso(m)) 2695 return (TX_SGL_SEGS_VXLAN_TSO); 2696 else 2697 return (TX_SGL_SEGS_TSO); 2698 } 2699 2700 return (TX_SGL_SEGS); 2701 } 2702 2703 /* 2704 * Analyze the mbuf to determine its tx needs. The mbuf passed in may change: 2705 * a) caller can assume it's been freed if this function returns with an error. 2706 * b) it may get defragged up if the gather list is too long for the hardware. 2707 */ 2708 int 2709 parse_pkt(struct mbuf **mp, bool vm_wr) 2710 { 2711 struct mbuf *m0 = *mp, *m; 2712 int rc, nsegs, defragged = 0, offset; 2713 struct ether_header *eh; 2714 void *l3hdr; 2715 #if defined(INET) || defined(INET6) 2716 struct tcphdr *tcp; 2717 #endif 2718 #if defined(KERN_TLS) || defined(RATELIMIT) 2719 struct m_snd_tag *mst; 2720 #endif 2721 uint16_t eh_type; 2722 uint8_t cflags; 2723 2724 cflags = 0; 2725 M_ASSERTPKTHDR(m0); 2726 if (__predict_false(m0->m_pkthdr.len < ETHER_HDR_LEN)) { 2727 rc = EINVAL; 2728 fail: 2729 m_freem(m0); 2730 *mp = NULL; 2731 return (rc); 2732 } 2733 restart: 2734 /* 2735 * First count the number of gather list segments in the payload. 2736 * Defrag the mbuf if nsegs exceeds the hardware limit. 2737 */ 2738 M_ASSERTPKTHDR(m0); 2739 MPASS(m0->m_pkthdr.len > 0); 2740 nsegs = count_mbuf_nsegs(m0, 0, &cflags); 2741 #if defined(KERN_TLS) || defined(RATELIMIT) 2742 if (m0->m_pkthdr.csum_flags & CSUM_SND_TAG) 2743 mst = m0->m_pkthdr.snd_tag; 2744 else 2745 mst = NULL; 2746 #endif 2747 #ifdef KERN_TLS 2748 if (mst != NULL && mst->type == IF_SND_TAG_TYPE_TLS) { 2749 int len16; 2750 2751 cflags |= MC_TLS; 2752 set_mbuf_cflags(m0, cflags); 2753 rc = t6_ktls_parse_pkt(m0, &nsegs, &len16); 2754 if (rc != 0) 2755 goto fail; 2756 set_mbuf_nsegs(m0, nsegs); 2757 set_mbuf_len16(m0, len16); 2758 return (0); 2759 } 2760 #endif 2761 if (nsegs > max_nsegs_allowed(m0, vm_wr)) { 2762 if (defragged++ > 0) { 2763 rc = EFBIG; 2764 goto fail; 2765 } 2766 counter_u64_add(defrags, 1); 2767 if ((m = m_defrag(m0, M_NOWAIT)) == NULL) { 2768 rc = ENOMEM; 2769 goto fail; 2770 } 2771 *mp = m0 = m; /* update caller's copy after defrag */ 2772 goto restart; 2773 } 2774 2775 if (__predict_false(nsegs > 2 && m0->m_pkthdr.len <= MHLEN && 2776 !(cflags & MC_NOMAP))) { 2777 counter_u64_add(pullups, 1); 2778 m0 = m_pullup(m0, m0->m_pkthdr.len); 2779 if (m0 == NULL) { 2780 /* Should have left well enough alone. */ 2781 rc = EFBIG; 2782 goto fail; 2783 } 2784 *mp = m0; /* update caller's copy after pullup */ 2785 goto restart; 2786 } 2787 set_mbuf_nsegs(m0, nsegs); 2788 set_mbuf_cflags(m0, cflags); 2789 calculate_mbuf_len16(m0, vm_wr); 2790 2791 #ifdef RATELIMIT 2792 /* 2793 * Ethofld is limited to TCP and UDP for now, and only when L4 hw 2794 * checksumming is enabled. needs_outer_l4_csum happens to check for 2795 * all the right things. 2796 */ 2797 if (__predict_false(needs_eo(mst) && !needs_outer_l4_csum(m0))) { 2798 m_snd_tag_rele(m0->m_pkthdr.snd_tag); 2799 m0->m_pkthdr.snd_tag = NULL; 2800 m0->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; 2801 mst = NULL; 2802 } 2803 #endif 2804 2805 if (!needs_hwcsum(m0) 2806 #ifdef RATELIMIT 2807 && !needs_eo(mst) 2808 #endif 2809 ) 2810 return (0); 2811 2812 m = m0; 2813 eh = mtod(m, struct ether_header *); 2814 eh_type = ntohs(eh->ether_type); 2815 if (eh_type == ETHERTYPE_VLAN) { 2816 struct ether_vlan_header *evh = (void *)eh; 2817 2818 eh_type = ntohs(evh->evl_proto); 2819 m0->m_pkthdr.l2hlen = sizeof(*evh); 2820 } else 2821 m0->m_pkthdr.l2hlen = sizeof(*eh); 2822 2823 offset = 0; 2824 l3hdr = m_advance(&m, &offset, m0->m_pkthdr.l2hlen); 2825 2826 switch (eh_type) { 2827 #ifdef INET6 2828 case ETHERTYPE_IPV6: 2829 m0->m_pkthdr.l3hlen = sizeof(struct ip6_hdr); 2830 break; 2831 #endif 2832 #ifdef INET 2833 case ETHERTYPE_IP: 2834 { 2835 struct ip *ip = l3hdr; 2836 2837 if (needs_vxlan_csum(m0)) { 2838 /* Driver will do the outer IP hdr checksum. */ 2839 ip->ip_sum = 0; 2840 if (needs_vxlan_tso(m0)) { 2841 const uint16_t ipl = ip->ip_len; 2842 2843 ip->ip_len = 0; 2844 ip->ip_sum = ~in_cksum_hdr(ip); 2845 ip->ip_len = ipl; 2846 } else 2847 ip->ip_sum = in_cksum_hdr(ip); 2848 } 2849 m0->m_pkthdr.l3hlen = ip->ip_hl << 2; 2850 break; 2851 } 2852 #endif 2853 default: 2854 panic("%s: ethertype 0x%04x unknown. if_cxgbe must be compiled" 2855 " with the same INET/INET6 options as the kernel.", 2856 __func__, eh_type); 2857 } 2858 2859 if (needs_vxlan_csum(m0)) { 2860 m0->m_pkthdr.l4hlen = sizeof(struct udphdr); 2861 m0->m_pkthdr.l5hlen = sizeof(struct vxlan_header); 2862 2863 /* Inner headers. */ 2864 eh = m_advance(&m, &offset, m0->m_pkthdr.l3hlen + 2865 sizeof(struct udphdr) + sizeof(struct vxlan_header)); 2866 eh_type = ntohs(eh->ether_type); 2867 if (eh_type == ETHERTYPE_VLAN) { 2868 struct ether_vlan_header *evh = (void *)eh; 2869 2870 eh_type = ntohs(evh->evl_proto); 2871 m0->m_pkthdr.inner_l2hlen = sizeof(*evh); 2872 } else 2873 m0->m_pkthdr.inner_l2hlen = sizeof(*eh); 2874 l3hdr = m_advance(&m, &offset, m0->m_pkthdr.inner_l2hlen); 2875 2876 switch (eh_type) { 2877 #ifdef INET6 2878 case ETHERTYPE_IPV6: 2879 m0->m_pkthdr.inner_l3hlen = sizeof(struct ip6_hdr); 2880 break; 2881 #endif 2882 #ifdef INET 2883 case ETHERTYPE_IP: 2884 { 2885 struct ip *ip = l3hdr; 2886 2887 m0->m_pkthdr.inner_l3hlen = ip->ip_hl << 2; 2888 break; 2889 } 2890 #endif 2891 default: 2892 panic("%s: VXLAN hw offload requested with unknown " 2893 "ethertype 0x%04x. if_cxgbe must be compiled" 2894 " with the same INET/INET6 options as the kernel.", 2895 __func__, eh_type); 2896 } 2897 #if defined(INET) || defined(INET6) 2898 if (needs_inner_tcp_csum(m0)) { 2899 tcp = m_advance(&m, &offset, m0->m_pkthdr.inner_l3hlen); 2900 m0->m_pkthdr.inner_l4hlen = tcp->th_off * 4; 2901 } 2902 #endif 2903 MPASS((m0->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 2904 m0->m_pkthdr.csum_flags &= CSUM_INNER_IP6_UDP | 2905 CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO | CSUM_INNER_IP | 2906 CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | 2907 CSUM_ENCAP_VXLAN; 2908 } 2909 2910 #if defined(INET) || defined(INET6) 2911 if (needs_outer_tcp_csum(m0)) { 2912 tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen); 2913 m0->m_pkthdr.l4hlen = tcp->th_off * 4; 2914 #ifdef RATELIMIT 2915 if (tsclk >= 0 && *(uint32_t *)(tcp + 1) == ntohl(0x0101080a)) { 2916 set_mbuf_eo_tsclk_tsoff(m0, 2917 V_FW_ETH_TX_EO_WR_TSCLK(tsclk) | 2918 V_FW_ETH_TX_EO_WR_TSOFF(sizeof(*tcp) / 2 + 1)); 2919 } else 2920 set_mbuf_eo_tsclk_tsoff(m0, 0); 2921 } else if (needs_outer_udp_csum(m0)) { 2922 m0->m_pkthdr.l4hlen = sizeof(struct udphdr); 2923 #endif 2924 } 2925 #ifdef RATELIMIT 2926 if (needs_eo(mst)) { 2927 u_int immhdrs; 2928 2929 /* EO WRs have the headers in the WR and not the GL. */ 2930 immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + 2931 m0->m_pkthdr.l4hlen; 2932 cflags = 0; 2933 nsegs = count_mbuf_nsegs(m0, immhdrs, &cflags); 2934 MPASS(cflags == mbuf_cflags(m0)); 2935 set_mbuf_eo_nsegs(m0, nsegs); 2936 set_mbuf_eo_len16(m0, 2937 txpkt_eo_len16(nsegs, immhdrs, needs_tso(m0))); 2938 } 2939 #endif 2940 #endif 2941 MPASS(m0 == *mp); 2942 return (0); 2943 } 2944 2945 void * 2946 start_wrq_wr(struct sge_wrq *wrq, int len16, struct wrq_cookie *cookie) 2947 { 2948 struct sge_eq *eq = &wrq->eq; 2949 struct adapter *sc = wrq->adapter; 2950 int ndesc, available; 2951 struct wrqe *wr; 2952 void *w; 2953 2954 MPASS(len16 > 0); 2955 ndesc = tx_len16_to_desc(len16); 2956 MPASS(ndesc > 0 && ndesc <= SGE_MAX_WR_NDESC); 2957 2958 EQ_LOCK(eq); 2959 2960 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 2961 drain_wrq_wr_list(sc, wrq); 2962 2963 if (!STAILQ_EMPTY(&wrq->wr_list)) { 2964 slowpath: 2965 EQ_UNLOCK(eq); 2966 wr = alloc_wrqe(len16 * 16, wrq); 2967 if (__predict_false(wr == NULL)) 2968 return (NULL); 2969 cookie->pidx = -1; 2970 cookie->ndesc = ndesc; 2971 return (&wr->wr); 2972 } 2973 2974 eq->cidx = read_hw_cidx(eq); 2975 if (eq->pidx == eq->cidx) 2976 available = eq->sidx - 1; 2977 else 2978 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2979 if (available < ndesc) 2980 goto slowpath; 2981 2982 cookie->pidx = eq->pidx; 2983 cookie->ndesc = ndesc; 2984 TAILQ_INSERT_TAIL(&wrq->incomplete_wrs, cookie, link); 2985 2986 w = &eq->desc[eq->pidx]; 2987 IDXINCR(eq->pidx, ndesc, eq->sidx); 2988 if (__predict_false(cookie->pidx + ndesc > eq->sidx)) { 2989 w = &wrq->ss[0]; 2990 wrq->ss_pidx = cookie->pidx; 2991 wrq->ss_len = len16 * 16; 2992 } 2993 2994 EQ_UNLOCK(eq); 2995 2996 return (w); 2997 } 2998 2999 void 3000 commit_wrq_wr(struct sge_wrq *wrq, void *w, struct wrq_cookie *cookie) 3001 { 3002 struct sge_eq *eq = &wrq->eq; 3003 struct adapter *sc = wrq->adapter; 3004 int ndesc, pidx; 3005 struct wrq_cookie *prev, *next; 3006 3007 if (cookie->pidx == -1) { 3008 struct wrqe *wr = __containerof(w, struct wrqe, wr); 3009 3010 t4_wrq_tx(sc, wr); 3011 return; 3012 } 3013 3014 if (__predict_false(w == &wrq->ss[0])) { 3015 int n = (eq->sidx - wrq->ss_pidx) * EQ_ESIZE; 3016 3017 MPASS(wrq->ss_len > n); /* WR had better wrap around. */ 3018 bcopy(&wrq->ss[0], &eq->desc[wrq->ss_pidx], n); 3019 bcopy(&wrq->ss[n], &eq->desc[0], wrq->ss_len - n); 3020 wrq->tx_wrs_ss++; 3021 } else 3022 wrq->tx_wrs_direct++; 3023 3024 EQ_LOCK(eq); 3025 ndesc = cookie->ndesc; /* Can be more than SGE_MAX_WR_NDESC here. */ 3026 pidx = cookie->pidx; 3027 MPASS(pidx >= 0 && pidx < eq->sidx); 3028 prev = TAILQ_PREV(cookie, wrq_incomplete_wrs, link); 3029 next = TAILQ_NEXT(cookie, link); 3030 if (prev == NULL) { 3031 MPASS(pidx == eq->dbidx); 3032 if (next == NULL || ndesc >= 16) { 3033 int available; 3034 struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ 3035 3036 /* 3037 * Note that the WR via which we'll request tx updates 3038 * is at pidx and not eq->pidx, which has moved on 3039 * already. 3040 */ 3041 dst = (void *)&eq->desc[pidx]; 3042 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 3043 if (available < eq->sidx / 4 && 3044 atomic_cmpset_int(&eq->equiq, 0, 1)) { 3045 /* 3046 * XXX: This is not 100% reliable with some 3047 * types of WRs. But this is a very unusual 3048 * situation for an ofld/ctrl queue anyway. 3049 */ 3050 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 3051 F_FW_WR_EQUEQ); 3052 } 3053 3054 ring_eq_db(wrq->adapter, eq, ndesc); 3055 } else { 3056 MPASS(IDXDIFF(next->pidx, pidx, eq->sidx) == ndesc); 3057 next->pidx = pidx; 3058 next->ndesc += ndesc; 3059 } 3060 } else { 3061 MPASS(IDXDIFF(pidx, prev->pidx, eq->sidx) == prev->ndesc); 3062 prev->ndesc += ndesc; 3063 } 3064 TAILQ_REMOVE(&wrq->incomplete_wrs, cookie, link); 3065 3066 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 3067 drain_wrq_wr_list(sc, wrq); 3068 3069 #ifdef INVARIANTS 3070 if (TAILQ_EMPTY(&wrq->incomplete_wrs)) { 3071 /* Doorbell must have caught up to the pidx. */ 3072 MPASS(wrq->eq.pidx == wrq->eq.dbidx); 3073 } 3074 #endif 3075 EQ_UNLOCK(eq); 3076 } 3077 3078 static u_int 3079 can_resume_eth_tx(struct mp_ring *r) 3080 { 3081 struct sge_eq *eq = r->cookie; 3082 3083 return (total_available_tx_desc(eq) > eq->sidx / 8); 3084 } 3085 3086 static inline bool 3087 cannot_use_txpkts(struct mbuf *m) 3088 { 3089 /* maybe put a GL limit too, to avoid silliness? */ 3090 3091 return (needs_tso(m) || (mbuf_cflags(m) & (MC_RAW_WR | MC_TLS)) != 0); 3092 } 3093 3094 static inline int 3095 discard_tx(struct sge_eq *eq) 3096 { 3097 3098 return ((eq->flags & (EQ_ENABLED | EQ_QFLUSH)) != EQ_ENABLED); 3099 } 3100 3101 static inline int 3102 wr_can_update_eq(void *p) 3103 { 3104 struct fw_eth_tx_pkts_wr *wr = p; 3105 3106 switch (G_FW_WR_OP(be32toh(wr->op_pkd))) { 3107 case FW_ULPTX_WR: 3108 case FW_ETH_TX_PKT_WR: 3109 case FW_ETH_TX_PKTS_WR: 3110 case FW_ETH_TX_PKTS2_WR: 3111 case FW_ETH_TX_PKT_VM_WR: 3112 case FW_ETH_TX_PKTS_VM_WR: 3113 return (1); 3114 default: 3115 return (0); 3116 } 3117 } 3118 3119 static inline void 3120 set_txupdate_flags(struct sge_txq *txq, u_int avail, 3121 struct fw_eth_tx_pkt_wr *wr) 3122 { 3123 struct sge_eq *eq = &txq->eq; 3124 struct txpkts *txp = &txq->txp; 3125 3126 if ((txp->npkt > 0 || avail < eq->sidx / 2) && 3127 atomic_cmpset_int(&eq->equiq, 0, 1)) { 3128 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ | F_FW_WR_EQUIQ); 3129 eq->equeqidx = eq->pidx; 3130 } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { 3131 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); 3132 eq->equeqidx = eq->pidx; 3133 } 3134 } 3135 3136 /* 3137 * r->items[cidx] to r->items[pidx], with a wraparound at r->size, are ready to 3138 * be consumed. Return the actual number consumed. 0 indicates a stall. 3139 */ 3140 static u_int 3141 eth_tx(struct mp_ring *r, u_int cidx, u_int pidx, bool *coalescing) 3142 { 3143 struct sge_txq *txq = r->cookie; 3144 struct ifnet *ifp = txq->ifp; 3145 struct sge_eq *eq = &txq->eq; 3146 struct txpkts *txp = &txq->txp; 3147 struct vi_info *vi = ifp->if_softc; 3148 struct adapter *sc = vi->adapter; 3149 u_int total, remaining; /* # of packets */ 3150 u_int n, avail, dbdiff; /* # of hardware descriptors */ 3151 int i, rc; 3152 struct mbuf *m0; 3153 bool snd; 3154 void *wr; /* start of the last WR written to the ring */ 3155 3156 TXQ_LOCK_ASSERT_OWNED(txq); 3157 3158 remaining = IDXDIFF(pidx, cidx, r->size); 3159 if (__predict_false(discard_tx(eq))) { 3160 for (i = 0; i < txp->npkt; i++) 3161 m_freem(txp->mb[i]); 3162 txp->npkt = 0; 3163 while (cidx != pidx) { 3164 m0 = r->items[cidx]; 3165 m_freem(m0); 3166 if (++cidx == r->size) 3167 cidx = 0; 3168 } 3169 reclaim_tx_descs(txq, eq->sidx); 3170 *coalescing = false; 3171 return (remaining); /* emptied */ 3172 } 3173 3174 /* How many hardware descriptors do we have readily available. */ 3175 if (eq->pidx == eq->cidx) { 3176 avail = eq->sidx - 1; 3177 if (txp->score++ >= 5) 3178 txp->score = 5; /* tx is completely idle, reset. */ 3179 } else 3180 avail = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 3181 3182 total = 0; 3183 if (remaining == 0) { 3184 if (txp->score-- == 1) /* egr_update had to drain txpkts */ 3185 txp->score = 1; 3186 goto send_txpkts; 3187 } 3188 3189 dbdiff = 0; 3190 MPASS(remaining > 0); 3191 while (remaining > 0) { 3192 m0 = r->items[cidx]; 3193 M_ASSERTPKTHDR(m0); 3194 MPASS(m0->m_nextpkt == NULL); 3195 3196 if (avail < 2 * SGE_MAX_WR_NDESC) 3197 avail += reclaim_tx_descs(txq, 64); 3198 3199 if (txp->npkt > 0 || remaining > 1 || txp->score > 3 || 3200 atomic_load_int(&txq->eq.equiq) != 0) { 3201 if (vi->flags & TX_USES_VM_WR) 3202 rc = add_to_txpkts_vf(sc, txq, m0, avail, &snd); 3203 else 3204 rc = add_to_txpkts_pf(sc, txq, m0, avail, &snd); 3205 } else { 3206 snd = false; 3207 rc = EINVAL; 3208 } 3209 if (snd) { 3210 MPASS(txp->npkt > 0); 3211 for (i = 0; i < txp->npkt; i++) 3212 ETHER_BPF_MTAP(ifp, txp->mb[i]); 3213 if (txp->npkt > 1) { 3214 if (txp->score++ >= 10) 3215 txp->score = 10; 3216 MPASS(avail >= tx_len16_to_desc(txp->len16)); 3217 if (vi->flags & TX_USES_VM_WR) 3218 n = write_txpkts_vm_wr(sc, txq); 3219 else 3220 n = write_txpkts_wr(sc, txq); 3221 } else { 3222 MPASS(avail >= 3223 tx_len16_to_desc(mbuf_len16(txp->mb[0]))); 3224 if (vi->flags & TX_USES_VM_WR) 3225 n = write_txpkt_vm_wr(sc, txq, 3226 txp->mb[0]); 3227 else 3228 n = write_txpkt_wr(sc, txq, txp->mb[0], 3229 avail); 3230 } 3231 MPASS(n <= SGE_MAX_WR_NDESC); 3232 avail -= n; 3233 dbdiff += n; 3234 wr = &eq->desc[eq->pidx]; 3235 IDXINCR(eq->pidx, n, eq->sidx); 3236 txp->npkt = 0; /* emptied */ 3237 } 3238 if (rc == 0) { 3239 /* m0 was coalesced into txq->txpkts. */ 3240 goto next_mbuf; 3241 } 3242 if (rc == EAGAIN) { 3243 /* 3244 * m0 is suitable for tx coalescing but could not be 3245 * combined with the existing txq->txpkts, which has now 3246 * been transmitted. Start a new txpkts with m0. 3247 */ 3248 MPASS(snd); 3249 MPASS(txp->npkt == 0); 3250 continue; 3251 } 3252 3253 MPASS(rc != 0 && rc != EAGAIN); 3254 MPASS(txp->npkt == 0); 3255 3256 n = tx_len16_to_desc(mbuf_len16(m0)); 3257 if (__predict_false(avail < n)) { 3258 avail += reclaim_tx_descs(txq, min(n, 32)); 3259 if (avail < n) 3260 break; /* out of descriptors */ 3261 } 3262 3263 wr = &eq->desc[eq->pidx]; 3264 if (mbuf_cflags(m0) & MC_RAW_WR) { 3265 n = write_raw_wr(txq, wr, m0, avail); 3266 #ifdef KERN_TLS 3267 } else if (mbuf_cflags(m0) & MC_TLS) { 3268 ETHER_BPF_MTAP(ifp, m0); 3269 n = t6_ktls_write_wr(txq, wr, m0, mbuf_nsegs(m0), 3270 avail); 3271 #endif 3272 } else { 3273 ETHER_BPF_MTAP(ifp, m0); 3274 if (vi->flags & TX_USES_VM_WR) 3275 n = write_txpkt_vm_wr(sc, txq, m0); 3276 else 3277 n = write_txpkt_wr(sc, txq, m0, avail); 3278 } 3279 MPASS(n >= 1 && n <= avail); 3280 if (!(mbuf_cflags(m0) & MC_TLS)) 3281 MPASS(n <= SGE_MAX_WR_NDESC); 3282 3283 avail -= n; 3284 dbdiff += n; 3285 IDXINCR(eq->pidx, n, eq->sidx); 3286 3287 if (dbdiff >= 512 / EQ_ESIZE) { /* X_FETCHBURSTMAX_512B */ 3288 if (wr_can_update_eq(wr)) 3289 set_txupdate_flags(txq, avail, wr); 3290 ring_eq_db(sc, eq, dbdiff); 3291 avail += reclaim_tx_descs(txq, 32); 3292 dbdiff = 0; 3293 } 3294 next_mbuf: 3295 total++; 3296 remaining--; 3297 if (__predict_false(++cidx == r->size)) 3298 cidx = 0; 3299 } 3300 if (dbdiff != 0) { 3301 if (wr_can_update_eq(wr)) 3302 set_txupdate_flags(txq, avail, wr); 3303 ring_eq_db(sc, eq, dbdiff); 3304 reclaim_tx_descs(txq, 32); 3305 } else if (eq->pidx == eq->cidx && txp->npkt > 0 && 3306 atomic_load_int(&txq->eq.equiq) == 0) { 3307 /* 3308 * If nothing was submitted to the chip for tx (it was coalesced 3309 * into txpkts instead) and there is no tx update outstanding 3310 * then we need to send txpkts now. 3311 */ 3312 send_txpkts: 3313 MPASS(txp->npkt > 0); 3314 for (i = 0; i < txp->npkt; i++) 3315 ETHER_BPF_MTAP(ifp, txp->mb[i]); 3316 if (txp->npkt > 1) { 3317 MPASS(avail >= tx_len16_to_desc(txp->len16)); 3318 if (vi->flags & TX_USES_VM_WR) 3319 n = write_txpkts_vm_wr(sc, txq); 3320 else 3321 n = write_txpkts_wr(sc, txq); 3322 } else { 3323 MPASS(avail >= 3324 tx_len16_to_desc(mbuf_len16(txp->mb[0]))); 3325 if (vi->flags & TX_USES_VM_WR) 3326 n = write_txpkt_vm_wr(sc, txq, txp->mb[0]); 3327 else 3328 n = write_txpkt_wr(sc, txq, txp->mb[0], avail); 3329 } 3330 MPASS(n <= SGE_MAX_WR_NDESC); 3331 wr = &eq->desc[eq->pidx]; 3332 IDXINCR(eq->pidx, n, eq->sidx); 3333 txp->npkt = 0; /* emptied */ 3334 3335 MPASS(wr_can_update_eq(wr)); 3336 set_txupdate_flags(txq, avail - n, wr); 3337 ring_eq_db(sc, eq, n); 3338 reclaim_tx_descs(txq, 32); 3339 } 3340 *coalescing = txp->npkt > 0; 3341 3342 return (total); 3343 } 3344 3345 static inline void 3346 init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, 3347 int qsize) 3348 { 3349 3350 KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, 3351 ("%s: bad tmr_idx %d", __func__, tmr_idx)); 3352 KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */ 3353 ("%s: bad pktc_idx %d", __func__, pktc_idx)); 3354 3355 iq->flags = 0; 3356 iq->adapter = sc; 3357 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx); 3358 iq->intr_pktc_idx = SGE_NCOUNTERS - 1; 3359 if (pktc_idx >= 0) { 3360 iq->intr_params |= F_QINTR_CNT_EN; 3361 iq->intr_pktc_idx = pktc_idx; 3362 } 3363 iq->qsize = roundup2(qsize, 16); /* See FW_IQ_CMD/iqsize */ 3364 iq->sidx = iq->qsize - sc->params.sge.spg_len / IQ_ESIZE; 3365 } 3366 3367 static inline void 3368 init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, char *name) 3369 { 3370 3371 fl->qsize = qsize; 3372 fl->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; 3373 strlcpy(fl->lockname, name, sizeof(fl->lockname)); 3374 if (sc->flags & BUF_PACKING_OK && 3375 ((!is_t4(sc) && buffer_packing) || /* T5+: enabled unless 0 */ 3376 (is_t4(sc) && buffer_packing == 1)))/* T4: disabled unless 1 */ 3377 fl->flags |= FL_BUF_PACKING; 3378 fl->zidx = find_refill_source(sc, maxp, fl->flags & FL_BUF_PACKING); 3379 fl->safe_zidx = sc->sge.safe_zidx; 3380 } 3381 3382 static inline void 3383 init_eq(struct adapter *sc, struct sge_eq *eq, int eqtype, int qsize, 3384 uint8_t tx_chan, uint16_t iqid, char *name) 3385 { 3386 KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype)); 3387 3388 eq->flags = eqtype & EQ_TYPEMASK; 3389 eq->tx_chan = tx_chan; 3390 eq->iqid = iqid; 3391 eq->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; 3392 strlcpy(eq->lockname, name, sizeof(eq->lockname)); 3393 } 3394 3395 static int 3396 alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag, 3397 bus_dmamap_t *map, bus_addr_t *pa, void **va) 3398 { 3399 int rc; 3400 3401 rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR, 3402 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag); 3403 if (rc != 0) { 3404 device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc); 3405 goto done; 3406 } 3407 3408 rc = bus_dmamem_alloc(*tag, va, 3409 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map); 3410 if (rc != 0) { 3411 device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc); 3412 goto done; 3413 } 3414 3415 rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0); 3416 if (rc != 0) { 3417 device_printf(sc->dev, "cannot load DMA map: %d\n", rc); 3418 goto done; 3419 } 3420 done: 3421 if (rc) 3422 free_ring(sc, *tag, *map, *pa, *va); 3423 3424 return (rc); 3425 } 3426 3427 static int 3428 free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, 3429 bus_addr_t pa, void *va) 3430 { 3431 if (pa) 3432 bus_dmamap_unload(tag, map); 3433 if (va) 3434 bus_dmamem_free(tag, va, map); 3435 if (tag) 3436 bus_dma_tag_destroy(tag); 3437 3438 return (0); 3439 } 3440 3441 /* 3442 * Allocates the ring for an ingress queue and an optional freelist. If the 3443 * freelist is specified it will be allocated and then associated with the 3444 * ingress queue. 3445 * 3446 * Returns errno on failure. Resources allocated up to that point may still be 3447 * allocated. Caller is responsible for cleanup in case this function fails. 3448 * 3449 * If the ingress queue will take interrupts directly then the intr_idx 3450 * specifies the vector, starting from 0. -1 means the interrupts for this 3451 * queue should be forwarded to the fwq. 3452 */ 3453 static int 3454 alloc_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl, 3455 int intr_idx, int cong) 3456 { 3457 int rc, i, cntxt_id; 3458 size_t len; 3459 struct fw_iq_cmd c; 3460 struct port_info *pi = vi->pi; 3461 struct adapter *sc = iq->adapter; 3462 struct sge_params *sp = &sc->params.sge; 3463 __be32 v = 0; 3464 3465 len = iq->qsize * IQ_ESIZE; 3466 rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, 3467 (void **)&iq->desc); 3468 if (rc != 0) 3469 return (rc); 3470 3471 bzero(&c, sizeof(c)); 3472 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | 3473 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | 3474 V_FW_IQ_CMD_VFN(0)); 3475 3476 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | 3477 FW_LEN16(c)); 3478 3479 /* Special handling for firmware event queue */ 3480 if (iq == &sc->sge.fwq) 3481 v |= F_FW_IQ_CMD_IQASYNCH; 3482 3483 if (intr_idx < 0) { 3484 /* Forwarded interrupts, all headed to fwq */ 3485 v |= F_FW_IQ_CMD_IQANDST; 3486 v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.fwq.cntxt_id); 3487 } else { 3488 KASSERT(intr_idx < sc->intr_count, 3489 ("%s: invalid direct intr_idx %d", __func__, intr_idx)); 3490 v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); 3491 } 3492 3493 c.type_to_iqandstindex = htobe32(v | 3494 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | 3495 V_FW_IQ_CMD_VIID(vi->viid) | 3496 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); 3497 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | 3498 F_FW_IQ_CMD_IQGTSMODE | 3499 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | 3500 V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4)); 3501 c.iqsize = htobe16(iq->qsize); 3502 c.iqaddr = htobe64(iq->ba); 3503 if (cong >= 0) 3504 c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN); 3505 3506 if (fl) { 3507 mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); 3508 3509 len = fl->qsize * EQ_ESIZE; 3510 rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, 3511 &fl->ba, (void **)&fl->desc); 3512 if (rc) 3513 return (rc); 3514 3515 /* Allocate space for one software descriptor per buffer. */ 3516 rc = alloc_fl_sdesc(fl); 3517 if (rc != 0) { 3518 device_printf(sc->dev, 3519 "failed to setup fl software descriptors: %d\n", 3520 rc); 3521 return (rc); 3522 } 3523 3524 if (fl->flags & FL_BUF_PACKING) { 3525 fl->lowat = roundup2(sp->fl_starve_threshold2, 8); 3526 fl->buf_boundary = sp->pack_boundary; 3527 } else { 3528 fl->lowat = roundup2(sp->fl_starve_threshold, 8); 3529 fl->buf_boundary = 16; 3530 } 3531 if (fl_pad && fl->buf_boundary < sp->pad_boundary) 3532 fl->buf_boundary = sp->pad_boundary; 3533 3534 c.iqns_to_fl0congen |= 3535 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | 3536 F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | 3537 (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) | 3538 (fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN : 3539 0)); 3540 if (cong >= 0) { 3541 c.iqns_to_fl0congen |= 3542 htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) | 3543 F_FW_IQ_CMD_FL0CONGCIF | 3544 F_FW_IQ_CMD_FL0CONGEN); 3545 } 3546 c.fl0dcaen_to_fl0cidxfthresh = 3547 htobe16(V_FW_IQ_CMD_FL0FBMIN(chip_id(sc) <= CHELSIO_T5 ? 3548 X_FETCHBURSTMIN_128B : X_FETCHBURSTMIN_64B_T6) | 3549 V_FW_IQ_CMD_FL0FBMAX(chip_id(sc) <= CHELSIO_T5 ? 3550 X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B)); 3551 c.fl0size = htobe16(fl->qsize); 3552 c.fl0addr = htobe64(fl->ba); 3553 } 3554 3555 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 3556 if (rc != 0) { 3557 device_printf(sc->dev, 3558 "failed to create ingress queue: %d\n", rc); 3559 return (rc); 3560 } 3561 3562 iq->cidx = 0; 3563 iq->gen = F_RSPD_GEN; 3564 iq->intr_next = iq->intr_params; 3565 iq->cntxt_id = be16toh(c.iqid); 3566 iq->abs_id = be16toh(c.physiqid); 3567 iq->flags |= IQ_ALLOCATED; 3568 3569 cntxt_id = iq->cntxt_id - sc->sge.iq_start; 3570 if (cntxt_id >= sc->sge.iqmap_sz) { 3571 panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, 3572 cntxt_id, sc->sge.iqmap_sz - 1); 3573 } 3574 sc->sge.iqmap[cntxt_id] = iq; 3575 3576 if (fl) { 3577 u_int qid; 3578 3579 iq->flags |= IQ_HAS_FL; 3580 fl->cntxt_id = be16toh(c.fl0id); 3581 fl->pidx = fl->cidx = 0; 3582 3583 cntxt_id = fl->cntxt_id - sc->sge.eq_start; 3584 if (cntxt_id >= sc->sge.eqmap_sz) { 3585 panic("%s: fl->cntxt_id (%d) more than the max (%d)", 3586 __func__, cntxt_id, sc->sge.eqmap_sz - 1); 3587 } 3588 sc->sge.eqmap[cntxt_id] = (void *)fl; 3589 3590 qid = fl->cntxt_id; 3591 if (isset(&sc->doorbells, DOORBELL_UDB)) { 3592 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 3593 uint32_t mask = (1 << s_qpp) - 1; 3594 volatile uint8_t *udb; 3595 3596 udb = sc->udbs_base + UDBS_DB_OFFSET; 3597 udb += (qid >> s_qpp) << PAGE_SHIFT; 3598 qid &= mask; 3599 if (qid < PAGE_SIZE / UDBS_SEG_SIZE) { 3600 udb += qid << UDBS_SEG_SHIFT; 3601 qid = 0; 3602 } 3603 fl->udb = (volatile void *)udb; 3604 } 3605 fl->dbval = V_QID(qid) | sc->chip_params->sge_fl_db; 3606 3607 FL_LOCK(fl); 3608 /* Enough to make sure the SGE doesn't think it's starved */ 3609 refill_fl(sc, fl, fl->lowat); 3610 FL_UNLOCK(fl); 3611 } 3612 3613 if (chip_id(sc) >= CHELSIO_T5 && !(sc->flags & IS_VF) && cong >= 0) { 3614 uint32_t param, val; 3615 3616 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 3617 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | 3618 V_FW_PARAMS_PARAM_YZ(iq->cntxt_id); 3619 if (cong == 0) 3620 val = 1 << 19; 3621 else { 3622 val = 2 << 19; 3623 for (i = 0; i < 4; i++) { 3624 if (cong & (1 << i)) 3625 val |= 1 << (i << 2); 3626 } 3627 } 3628 3629 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); 3630 if (rc != 0) { 3631 /* report error but carry on */ 3632 device_printf(sc->dev, 3633 "failed to set congestion manager context for " 3634 "ingress queue %d: %d\n", iq->cntxt_id, rc); 3635 } 3636 } 3637 3638 /* Enable IQ interrupts */ 3639 atomic_store_rel_int(&iq->state, IQS_IDLE); 3640 t4_write_reg(sc, sc->sge_gts_reg, V_SEINTARM(iq->intr_params) | 3641 V_INGRESSQID(iq->cntxt_id)); 3642 3643 return (0); 3644 } 3645 3646 static int 3647 free_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl) 3648 { 3649 int rc; 3650 struct adapter *sc = iq->adapter; 3651 device_t dev; 3652 3653 if (sc == NULL) 3654 return (0); /* nothing to do */ 3655 3656 dev = vi ? vi->dev : sc->dev; 3657 3658 if (iq->flags & IQ_ALLOCATED) { 3659 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, 3660 FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id, 3661 fl ? fl->cntxt_id : 0xffff, 0xffff); 3662 if (rc != 0) { 3663 device_printf(dev, 3664 "failed to free queue %p: %d\n", iq, rc); 3665 return (rc); 3666 } 3667 iq->flags &= ~IQ_ALLOCATED; 3668 } 3669 3670 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc); 3671 3672 bzero(iq, sizeof(*iq)); 3673 3674 if (fl) { 3675 free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, 3676 fl->desc); 3677 3678 if (fl->sdesc) 3679 free_fl_sdesc(sc, fl); 3680 3681 if (mtx_initialized(&fl->fl_lock)) 3682 mtx_destroy(&fl->fl_lock); 3683 3684 bzero(fl, sizeof(*fl)); 3685 } 3686 3687 return (0); 3688 } 3689 3690 static void 3691 add_iq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 3692 struct sge_iq *iq) 3693 { 3694 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3695 3696 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &iq->ba, 3697 "bus address of descriptor ring"); 3698 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 3699 iq->qsize * IQ_ESIZE, "descriptor ring size in bytes"); 3700 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "abs_id", 3701 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &iq->abs_id, 0, 3702 sysctl_uint16, "I", "absolute id of the queue"); 3703 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 3704 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &iq->cntxt_id, 0, 3705 sysctl_uint16, "I", "SGE context id of the queue"); 3706 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", 3707 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &iq->cidx, 0, 3708 sysctl_uint16, "I", "consumer index"); 3709 } 3710 3711 static void 3712 add_fl_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 3713 struct sysctl_oid *oid, struct sge_fl *fl) 3714 { 3715 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3716 3717 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", 3718 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "freelist"); 3719 children = SYSCTL_CHILDREN(oid); 3720 3721 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, 3722 &fl->ba, "bus address of descriptor ring"); 3723 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 3724 fl->sidx * EQ_ESIZE + sc->params.sge.spg_len, 3725 "desc ring size in bytes"); 3726 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 3727 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &fl->cntxt_id, 0, 3728 sysctl_uint16, "I", "SGE context id of the freelist"); 3729 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "padding", CTLFLAG_RD, NULL, 3730 fl_pad ? 1 : 0, "padding enabled"); 3731 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "packing", CTLFLAG_RD, NULL, 3732 fl->flags & FL_BUF_PACKING ? 1 : 0, "packing enabled"); 3733 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx, 3734 0, "consumer index"); 3735 if (fl->flags & FL_BUF_PACKING) { 3736 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_offset", 3737 CTLFLAG_RD, &fl->rx_offset, 0, "packing rx offset"); 3738 } 3739 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx, 3740 0, "producer index"); 3741 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated", 3742 CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated"); 3743 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled", 3744 CTLFLAG_RD, &fl->cl_recycled, "# of clusters recycled"); 3745 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_fast_recycled", 3746 CTLFLAG_RD, &fl->cl_fast_recycled, "# of clusters recycled (fast)"); 3747 } 3748 3749 static int 3750 alloc_fwq(struct adapter *sc) 3751 { 3752 int rc, intr_idx; 3753 struct sge_iq *fwq = &sc->sge.fwq; 3754 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); 3755 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3756 3757 init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE); 3758 if (sc->flags & IS_VF) 3759 intr_idx = 0; 3760 else 3761 intr_idx = sc->intr_count > 1 ? 1 : 0; 3762 rc = alloc_iq_fl(&sc->port[0]->vi[0], fwq, NULL, intr_idx, -1); 3763 if (rc != 0) { 3764 device_printf(sc->dev, 3765 "failed to create firmware event queue: %d\n", rc); 3766 return (rc); 3767 } 3768 3769 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", 3770 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "firmware event queue"); 3771 add_iq_sysctls(&sc->ctx, oid, fwq); 3772 3773 return (0); 3774 } 3775 3776 static int 3777 free_fwq(struct adapter *sc) 3778 { 3779 return free_iq_fl(NULL, &sc->sge.fwq, NULL); 3780 } 3781 3782 static int 3783 alloc_ctrlq(struct adapter *sc, struct sge_wrq *ctrlq, int idx, 3784 struct sysctl_oid *oid) 3785 { 3786 int rc; 3787 char name[16]; 3788 struct sysctl_oid_list *children; 3789 3790 snprintf(name, sizeof(name), "%s ctrlq%d", device_get_nameunit(sc->dev), 3791 idx); 3792 init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[idx]->tx_chan, 3793 sc->sge.fwq.cntxt_id, name); 3794 3795 children = SYSCTL_CHILDREN(oid); 3796 snprintf(name, sizeof(name), "%d", idx); 3797 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, name, 3798 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ctrl queue"); 3799 rc = alloc_wrq(sc, NULL, ctrlq, oid); 3800 3801 return (rc); 3802 } 3803 3804 int 3805 tnl_cong(struct port_info *pi, int drop) 3806 { 3807 3808 if (drop == -1) 3809 return (-1); 3810 else if (drop == 1) 3811 return (0); 3812 else 3813 return (pi->rx_e_chan_map); 3814 } 3815 3816 static int 3817 alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int intr_idx, int idx, 3818 struct sysctl_oid *oid) 3819 { 3820 int rc; 3821 struct adapter *sc = vi->adapter; 3822 struct sysctl_oid_list *children; 3823 char name[16]; 3824 3825 rc = alloc_iq_fl(vi, &rxq->iq, &rxq->fl, intr_idx, 3826 tnl_cong(vi->pi, cong_drop)); 3827 if (rc != 0) 3828 return (rc); 3829 3830 if (idx == 0) 3831 sc->sge.iq_base = rxq->iq.abs_id - rxq->iq.cntxt_id; 3832 else 3833 KASSERT(rxq->iq.cntxt_id + sc->sge.iq_base == rxq->iq.abs_id, 3834 ("iq_base mismatch")); 3835 KASSERT(sc->sge.iq_base == 0 || sc->flags & IS_VF, 3836 ("PF with non-zero iq_base")); 3837 3838 /* 3839 * The freelist is just barely above the starvation threshold right now, 3840 * fill it up a bit more. 3841 */ 3842 FL_LOCK(&rxq->fl); 3843 refill_fl(sc, &rxq->fl, 128); 3844 FL_UNLOCK(&rxq->fl); 3845 3846 #if defined(INET) || defined(INET6) 3847 rc = tcp_lro_init_args(&rxq->lro, vi->ifp, lro_entries, lro_mbufs); 3848 if (rc != 0) 3849 return (rc); 3850 MPASS(rxq->lro.ifp == vi->ifp); /* also indicates LRO init'ed */ 3851 3852 if (vi->ifp->if_capenable & IFCAP_LRO) 3853 rxq->iq.flags |= IQ_LRO_ENABLED; 3854 #endif 3855 if (vi->ifp->if_capenable & IFCAP_HWRXTSTMP) 3856 rxq->iq.flags |= IQ_RX_TIMESTAMP; 3857 rxq->ifp = vi->ifp; 3858 3859 children = SYSCTL_CHILDREN(oid); 3860 3861 snprintf(name, sizeof(name), "%d", idx); 3862 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, 3863 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "rx queue"); 3864 children = SYSCTL_CHILDREN(oid); 3865 3866 add_iq_sysctls(&vi->ctx, oid, &rxq->iq); 3867 #if defined(INET) || defined(INET6) 3868 SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, 3869 &rxq->lro.lro_queued, 0, NULL); 3870 SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, 3871 &rxq->lro.lro_flushed, 0, NULL); 3872 #endif 3873 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, 3874 &rxq->rxcsum, "# of times hardware assisted with checksum"); 3875 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_extraction", 3876 CTLFLAG_RD, &rxq->vlan_extraction, 3877 "# of times hardware extracted 802.1Q tag"); 3878 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vxlan_rxcsum", 3879 CTLFLAG_RD, &rxq->vxlan_rxcsum, 3880 "# of times hardware assisted with inner checksum (VXLAN) "); 3881 3882 add_fl_sysctls(sc, &vi->ctx, oid, &rxq->fl); 3883 3884 return (rc); 3885 } 3886 3887 static int 3888 free_rxq(struct vi_info *vi, struct sge_rxq *rxq) 3889 { 3890 int rc; 3891 3892 #if defined(INET) || defined(INET6) 3893 if (rxq->lro.ifp) { 3894 tcp_lro_free(&rxq->lro); 3895 rxq->lro.ifp = NULL; 3896 } 3897 #endif 3898 3899 rc = free_iq_fl(vi, &rxq->iq, &rxq->fl); 3900 if (rc == 0) 3901 bzero(rxq, sizeof(*rxq)); 3902 3903 return (rc); 3904 } 3905 3906 #ifdef TCP_OFFLOAD 3907 static int 3908 alloc_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq, 3909 int intr_idx, int idx, struct sysctl_oid *oid) 3910 { 3911 struct port_info *pi = vi->pi; 3912 int rc; 3913 struct sysctl_oid_list *children; 3914 char name[16]; 3915 3916 rc = alloc_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx, 0); 3917 if (rc != 0) 3918 return (rc); 3919 3920 children = SYSCTL_CHILDREN(oid); 3921 3922 snprintf(name, sizeof(name), "%d", idx); 3923 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, 3924 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "rx queue"); 3925 add_iq_sysctls(&vi->ctx, oid, &ofld_rxq->iq); 3926 add_fl_sysctls(pi->adapter, &vi->ctx, oid, &ofld_rxq->fl); 3927 3928 return (rc); 3929 } 3930 3931 static int 3932 free_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq) 3933 { 3934 int rc; 3935 3936 rc = free_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl); 3937 if (rc == 0) 3938 bzero(ofld_rxq, sizeof(*ofld_rxq)); 3939 3940 return (rc); 3941 } 3942 #endif 3943 3944 #ifdef DEV_NETMAP 3945 static int 3946 alloc_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int intr_idx, 3947 int idx, struct sysctl_oid *oid) 3948 { 3949 int rc; 3950 struct sysctl_oid_list *children; 3951 struct sysctl_ctx_list *ctx; 3952 char name[16]; 3953 size_t len; 3954 struct adapter *sc = vi->adapter; 3955 struct netmap_adapter *na = NA(vi->ifp); 3956 3957 MPASS(na != NULL); 3958 3959 len = vi->qsize_rxq * IQ_ESIZE; 3960 rc = alloc_ring(sc, len, &nm_rxq->iq_desc_tag, &nm_rxq->iq_desc_map, 3961 &nm_rxq->iq_ba, (void **)&nm_rxq->iq_desc); 3962 if (rc != 0) 3963 return (rc); 3964 3965 len = na->num_rx_desc * EQ_ESIZE + sc->params.sge.spg_len; 3966 rc = alloc_ring(sc, len, &nm_rxq->fl_desc_tag, &nm_rxq->fl_desc_map, 3967 &nm_rxq->fl_ba, (void **)&nm_rxq->fl_desc); 3968 if (rc != 0) 3969 return (rc); 3970 3971 nm_rxq->vi = vi; 3972 nm_rxq->nid = idx; 3973 nm_rxq->iq_cidx = 0; 3974 nm_rxq->iq_sidx = vi->qsize_rxq - sc->params.sge.spg_len / IQ_ESIZE; 3975 nm_rxq->iq_gen = F_RSPD_GEN; 3976 nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0; 3977 nm_rxq->fl_sidx = na->num_rx_desc; 3978 nm_rxq->fl_sidx2 = nm_rxq->fl_sidx; /* copy for rxsync cacheline */ 3979 nm_rxq->intr_idx = intr_idx; 3980 nm_rxq->iq_cntxt_id = INVALID_NM_RXQ_CNTXT_ID; 3981 3982 ctx = &vi->ctx; 3983 children = SYSCTL_CHILDREN(oid); 3984 3985 snprintf(name, sizeof(name), "%d", idx); 3986 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, name, 3987 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "rx queue"); 3988 children = SYSCTL_CHILDREN(oid); 3989 3990 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "abs_id", 3991 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &nm_rxq->iq_abs_id, 3992 0, sysctl_uint16, "I", "absolute id of the queue"); 3993 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 3994 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &nm_rxq->iq_cntxt_id, 3995 0, sysctl_uint16, "I", "SGE context id of the queue"); 3996 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", 3997 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &nm_rxq->iq_cidx, 0, 3998 sysctl_uint16, "I", "consumer index"); 3999 4000 children = SYSCTL_CHILDREN(oid); 4001 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", 4002 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "freelist"); 4003 children = SYSCTL_CHILDREN(oid); 4004 4005 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 4006 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &nm_rxq->fl_cntxt_id, 4007 0, sysctl_uint16, "I", "SGE context id of the freelist"); 4008 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, 4009 &nm_rxq->fl_cidx, 0, "consumer index"); 4010 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, 4011 &nm_rxq->fl_pidx, 0, "producer index"); 4012 4013 return (rc); 4014 } 4015 4016 4017 static int 4018 free_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq) 4019 { 4020 struct adapter *sc = vi->adapter; 4021 4022 if (vi->flags & VI_INIT_DONE) 4023 MPASS(nm_rxq->iq_cntxt_id == INVALID_NM_RXQ_CNTXT_ID); 4024 else 4025 MPASS(nm_rxq->iq_cntxt_id == 0); 4026 4027 free_ring(sc, nm_rxq->iq_desc_tag, nm_rxq->iq_desc_map, nm_rxq->iq_ba, 4028 nm_rxq->iq_desc); 4029 free_ring(sc, nm_rxq->fl_desc_tag, nm_rxq->fl_desc_map, nm_rxq->fl_ba, 4030 nm_rxq->fl_desc); 4031 4032 return (0); 4033 } 4034 4035 static int 4036 alloc_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq, int iqidx, int idx, 4037 struct sysctl_oid *oid) 4038 { 4039 int rc; 4040 size_t len; 4041 struct port_info *pi = vi->pi; 4042 struct adapter *sc = pi->adapter; 4043 struct netmap_adapter *na = NA(vi->ifp); 4044 char name[16]; 4045 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 4046 4047 len = na->num_tx_desc * EQ_ESIZE + sc->params.sge.spg_len; 4048 rc = alloc_ring(sc, len, &nm_txq->desc_tag, &nm_txq->desc_map, 4049 &nm_txq->ba, (void **)&nm_txq->desc); 4050 if (rc) 4051 return (rc); 4052 4053 nm_txq->pidx = nm_txq->cidx = 0; 4054 nm_txq->sidx = na->num_tx_desc; 4055 nm_txq->nid = idx; 4056 nm_txq->iqidx = iqidx; 4057 nm_txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 4058 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) | 4059 V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld)); 4060 if (sc->params.fw_vers >= FW_VERSION32(1, 24, 11, 0)) 4061 nm_txq->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS2_WR)); 4062 else 4063 nm_txq->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); 4064 nm_txq->cntxt_id = INVALID_NM_TXQ_CNTXT_ID; 4065 4066 snprintf(name, sizeof(name), "%d", idx); 4067 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, 4068 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "netmap tx queue"); 4069 children = SYSCTL_CHILDREN(oid); 4070 4071 SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 4072 &nm_txq->cntxt_id, 0, "SGE context id of the queue"); 4073 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", 4074 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &nm_txq->cidx, 0, 4075 sysctl_uint16, "I", "consumer index"); 4076 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", 4077 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &nm_txq->pidx, 0, 4078 sysctl_uint16, "I", "producer index"); 4079 4080 return (rc); 4081 } 4082 4083 static int 4084 free_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq) 4085 { 4086 struct adapter *sc = vi->adapter; 4087 4088 if (vi->flags & VI_INIT_DONE) 4089 MPASS(nm_txq->cntxt_id == INVALID_NM_TXQ_CNTXT_ID); 4090 else 4091 MPASS(nm_txq->cntxt_id == 0); 4092 4093 free_ring(sc, nm_txq->desc_tag, nm_txq->desc_map, nm_txq->ba, 4094 nm_txq->desc); 4095 4096 return (0); 4097 } 4098 #endif 4099 4100 /* 4101 * Returns a reasonable automatic cidx flush threshold for a given queue size. 4102 */ 4103 static u_int 4104 qsize_to_fthresh(int qsize) 4105 { 4106 u_int fthresh; 4107 4108 while (!powerof2(qsize)) 4109 qsize++; 4110 fthresh = ilog2(qsize); 4111 if (fthresh > X_CIDXFLUSHTHRESH_128) 4112 fthresh = X_CIDXFLUSHTHRESH_128; 4113 4114 return (fthresh); 4115 } 4116 4117 static int 4118 ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq) 4119 { 4120 int rc, cntxt_id; 4121 struct fw_eq_ctrl_cmd c; 4122 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4123 4124 bzero(&c, sizeof(c)); 4125 4126 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | 4127 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) | 4128 V_FW_EQ_CTRL_CMD_VFN(0)); 4129 c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC | 4130 F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); 4131 c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); 4132 c.physeqid_pkd = htobe32(0); 4133 c.fetchszm_to_iqid = 4134 htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 4135 V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) | 4136 F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); 4137 c.dcaen_to_eqsize = 4138 htobe32(V_FW_EQ_CTRL_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ? 4139 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) | 4140 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 4141 V_FW_EQ_CTRL_CMD_CIDXFTHRESH(qsize_to_fthresh(qsize)) | 4142 V_FW_EQ_CTRL_CMD_EQSIZE(qsize)); 4143 c.eqaddr = htobe64(eq->ba); 4144 4145 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 4146 if (rc != 0) { 4147 device_printf(sc->dev, 4148 "failed to create control queue %d: %d\n", eq->tx_chan, rc); 4149 return (rc); 4150 } 4151 eq->flags |= EQ_ALLOCATED; 4152 4153 eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid)); 4154 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 4155 if (cntxt_id >= sc->sge.eqmap_sz) 4156 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 4157 cntxt_id, sc->sge.eqmap_sz - 1); 4158 sc->sge.eqmap[cntxt_id] = eq; 4159 4160 return (rc); 4161 } 4162 4163 static int 4164 eth_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 4165 { 4166 int rc, cntxt_id; 4167 struct fw_eq_eth_cmd c; 4168 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4169 4170 bzero(&c, sizeof(c)); 4171 4172 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | 4173 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | 4174 V_FW_EQ_ETH_CMD_VFN(0)); 4175 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | 4176 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); 4177 c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | 4178 F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid)); 4179 c.fetchszm_to_iqid = 4180 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | 4181 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | 4182 V_FW_EQ_ETH_CMD_IQID(eq->iqid)); 4183 c.dcaen_to_eqsize = 4184 htobe32(V_FW_EQ_ETH_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ? 4185 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) | 4186 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 4187 V_FW_EQ_ETH_CMD_EQSIZE(qsize)); 4188 c.eqaddr = htobe64(eq->ba); 4189 4190 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 4191 if (rc != 0) { 4192 device_printf(vi->dev, 4193 "failed to create Ethernet egress queue: %d\n", rc); 4194 return (rc); 4195 } 4196 eq->flags |= EQ_ALLOCATED; 4197 4198 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); 4199 eq->abs_id = G_FW_EQ_ETH_CMD_PHYSEQID(be32toh(c.physeqid_pkd)); 4200 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 4201 if (cntxt_id >= sc->sge.eqmap_sz) 4202 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 4203 cntxt_id, sc->sge.eqmap_sz - 1); 4204 sc->sge.eqmap[cntxt_id] = eq; 4205 4206 return (rc); 4207 } 4208 4209 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 4210 static int 4211 ofld_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 4212 { 4213 int rc, cntxt_id; 4214 struct fw_eq_ofld_cmd c; 4215 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4216 4217 bzero(&c, sizeof(c)); 4218 4219 c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST | 4220 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) | 4221 V_FW_EQ_OFLD_CMD_VFN(0)); 4222 c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC | 4223 F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c)); 4224 c.fetchszm_to_iqid = 4225 htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 4226 V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) | 4227 F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid)); 4228 c.dcaen_to_eqsize = 4229 htobe32(V_FW_EQ_OFLD_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ? 4230 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) | 4231 V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 4232 V_FW_EQ_OFLD_CMD_CIDXFTHRESH(qsize_to_fthresh(qsize)) | 4233 V_FW_EQ_OFLD_CMD_EQSIZE(qsize)); 4234 c.eqaddr = htobe64(eq->ba); 4235 4236 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 4237 if (rc != 0) { 4238 device_printf(vi->dev, 4239 "failed to create egress queue for TCP offload: %d\n", rc); 4240 return (rc); 4241 } 4242 eq->flags |= EQ_ALLOCATED; 4243 4244 eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd)); 4245 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 4246 if (cntxt_id >= sc->sge.eqmap_sz) 4247 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 4248 cntxt_id, sc->sge.eqmap_sz - 1); 4249 sc->sge.eqmap[cntxt_id] = eq; 4250 4251 return (rc); 4252 } 4253 #endif 4254 4255 static int 4256 alloc_eq(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 4257 { 4258 int rc, qsize; 4259 size_t len; 4260 4261 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); 4262 4263 qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4264 len = qsize * EQ_ESIZE; 4265 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, 4266 &eq->ba, (void **)&eq->desc); 4267 if (rc) 4268 return (rc); 4269 4270 eq->pidx = eq->cidx = eq->dbidx = 0; 4271 /* Note that equeqidx is not used with sge_wrq (OFLD/CTRL) queues. */ 4272 eq->equeqidx = 0; 4273 eq->doorbells = sc->doorbells; 4274 4275 switch (eq->flags & EQ_TYPEMASK) { 4276 case EQ_CTRL: 4277 rc = ctrl_eq_alloc(sc, eq); 4278 break; 4279 4280 case EQ_ETH: 4281 rc = eth_eq_alloc(sc, vi, eq); 4282 break; 4283 4284 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 4285 case EQ_OFLD: 4286 rc = ofld_eq_alloc(sc, vi, eq); 4287 break; 4288 #endif 4289 4290 default: 4291 panic("%s: invalid eq type %d.", __func__, 4292 eq->flags & EQ_TYPEMASK); 4293 } 4294 if (rc != 0) { 4295 device_printf(sc->dev, 4296 "failed to allocate egress queue(%d): %d\n", 4297 eq->flags & EQ_TYPEMASK, rc); 4298 } 4299 4300 if (isset(&eq->doorbells, DOORBELL_UDB) || 4301 isset(&eq->doorbells, DOORBELL_UDBWC) || 4302 isset(&eq->doorbells, DOORBELL_WCWR)) { 4303 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 4304 uint32_t mask = (1 << s_qpp) - 1; 4305 volatile uint8_t *udb; 4306 4307 udb = sc->udbs_base + UDBS_DB_OFFSET; 4308 udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT; /* pg offset */ 4309 eq->udb_qid = eq->cntxt_id & mask; /* id in page */ 4310 if (eq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE) 4311 clrbit(&eq->doorbells, DOORBELL_WCWR); 4312 else { 4313 udb += eq->udb_qid << UDBS_SEG_SHIFT; /* seg offset */ 4314 eq->udb_qid = 0; 4315 } 4316 eq->udb = (volatile void *)udb; 4317 } 4318 4319 return (rc); 4320 } 4321 4322 static int 4323 free_eq(struct adapter *sc, struct sge_eq *eq) 4324 { 4325 int rc; 4326 4327 if (eq->flags & EQ_ALLOCATED) { 4328 switch (eq->flags & EQ_TYPEMASK) { 4329 case EQ_CTRL: 4330 rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, 4331 eq->cntxt_id); 4332 break; 4333 4334 case EQ_ETH: 4335 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, 4336 eq->cntxt_id); 4337 break; 4338 4339 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 4340 case EQ_OFLD: 4341 rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0, 4342 eq->cntxt_id); 4343 break; 4344 #endif 4345 4346 default: 4347 panic("%s: invalid eq type %d.", __func__, 4348 eq->flags & EQ_TYPEMASK); 4349 } 4350 if (rc != 0) { 4351 device_printf(sc->dev, 4352 "failed to free egress queue (%d): %d\n", 4353 eq->flags & EQ_TYPEMASK, rc); 4354 return (rc); 4355 } 4356 eq->flags &= ~EQ_ALLOCATED; 4357 } 4358 4359 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); 4360 4361 if (mtx_initialized(&eq->eq_lock)) 4362 mtx_destroy(&eq->eq_lock); 4363 4364 bzero(eq, sizeof(*eq)); 4365 return (0); 4366 } 4367 4368 static int 4369 alloc_wrq(struct adapter *sc, struct vi_info *vi, struct sge_wrq *wrq, 4370 struct sysctl_oid *oid) 4371 { 4372 int rc; 4373 struct sysctl_ctx_list *ctx = vi ? &vi->ctx : &sc->ctx; 4374 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 4375 4376 rc = alloc_eq(sc, vi, &wrq->eq); 4377 if (rc) 4378 return (rc); 4379 4380 wrq->adapter = sc; 4381 TASK_INIT(&wrq->wrq_tx_task, 0, wrq_tx_drain, wrq); 4382 TAILQ_INIT(&wrq->incomplete_wrs); 4383 STAILQ_INIT(&wrq->wr_list); 4384 wrq->nwr_pending = 0; 4385 wrq->ndesc_needed = 0; 4386 4387 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, 4388 &wrq->eq.ba, "bus address of descriptor ring"); 4389 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 4390 wrq->eq.sidx * EQ_ESIZE + sc->params.sge.spg_len, 4391 "desc ring size in bytes"); 4392 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 4393 &wrq->eq.cntxt_id, 0, "SGE context id of the queue"); 4394 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", 4395 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &wrq->eq.cidx, 0, 4396 sysctl_uint16, "I", "consumer index"); 4397 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx", 4398 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &wrq->eq.pidx, 0, 4399 sysctl_uint16, "I", "producer index"); 4400 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL, 4401 wrq->eq.sidx, "status page index"); 4402 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_direct", CTLFLAG_RD, 4403 &wrq->tx_wrs_direct, "# of work requests (direct)"); 4404 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_copied", CTLFLAG_RD, 4405 &wrq->tx_wrs_copied, "# of work requests (copied)"); 4406 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_sspace", CTLFLAG_RD, 4407 &wrq->tx_wrs_ss, "# of work requests (copied from scratch space)"); 4408 4409 return (rc); 4410 } 4411 4412 static int 4413 free_wrq(struct adapter *sc, struct sge_wrq *wrq) 4414 { 4415 int rc; 4416 4417 rc = free_eq(sc, &wrq->eq); 4418 if (rc) 4419 return (rc); 4420 4421 bzero(wrq, sizeof(*wrq)); 4422 return (0); 4423 } 4424 4425 static int 4426 alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx, 4427 struct sysctl_oid *oid) 4428 { 4429 int rc; 4430 struct port_info *pi = vi->pi; 4431 struct adapter *sc = pi->adapter; 4432 struct sge_eq *eq = &txq->eq; 4433 struct txpkts *txp; 4434 char name[16]; 4435 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 4436 4437 rc = mp_ring_alloc(&txq->r, eq->sidx, txq, eth_tx, can_resume_eth_tx, 4438 M_CXGBE, &eq->eq_lock, M_WAITOK); 4439 if (rc != 0) { 4440 device_printf(sc->dev, "failed to allocate mp_ring: %d\n", rc); 4441 return (rc); 4442 } 4443 4444 rc = alloc_eq(sc, vi, eq); 4445 if (rc != 0) { 4446 mp_ring_free(txq->r); 4447 txq->r = NULL; 4448 return (rc); 4449 } 4450 4451 /* Can't fail after this point. */ 4452 4453 if (idx == 0) 4454 sc->sge.eq_base = eq->abs_id - eq->cntxt_id; 4455 else 4456 KASSERT(eq->cntxt_id + sc->sge.eq_base == eq->abs_id, 4457 ("eq_base mismatch")); 4458 KASSERT(sc->sge.eq_base == 0 || sc->flags & IS_VF, 4459 ("PF with non-zero eq_base")); 4460 4461 TASK_INIT(&txq->tx_reclaim_task, 0, tx_reclaim, eq); 4462 txq->ifp = vi->ifp; 4463 txq->gl = sglist_alloc(TX_SGL_SEGS, M_WAITOK); 4464 if (vi->flags & TX_USES_VM_WR) 4465 txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 4466 V_TXPKT_INTF(pi->tx_chan)); 4467 else 4468 txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 4469 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) | 4470 V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld)); 4471 txq->tc_idx = -1; 4472 txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE, 4473 M_ZERO | M_WAITOK); 4474 4475 txp = &txq->txp; 4476 txp->score = 5; 4477 MPASS(nitems(txp->mb) >= sc->params.max_pkts_per_eth_tx_pkts_wr); 4478 txq->txp.max_npkt = min(nitems(txp->mb), 4479 sc->params.max_pkts_per_eth_tx_pkts_wr); 4480 if (vi->flags & TX_USES_VM_WR && !(sc->flags & IS_VF)) 4481 txq->txp.max_npkt--; 4482 4483 snprintf(name, sizeof(name), "%d", idx); 4484 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, 4485 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "tx queue"); 4486 children = SYSCTL_CHILDREN(oid); 4487 4488 SYSCTL_ADD_UAUTO(&vi->ctx, children, OID_AUTO, "ba", CTLFLAG_RD, 4489 &eq->ba, "bus address of descriptor ring"); 4490 SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 4491 eq->sidx * EQ_ESIZE + sc->params.sge.spg_len, 4492 "desc ring size in bytes"); 4493 SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD, 4494 &eq->abs_id, 0, "absolute id of the queue"); 4495 SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 4496 &eq->cntxt_id, 0, "SGE context id of the queue"); 4497 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", 4498 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &eq->cidx, 0, 4499 sysctl_uint16, "I", "consumer index"); 4500 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", 4501 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &eq->pidx, 0, 4502 sysctl_uint16, "I", "producer index"); 4503 SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL, 4504 eq->sidx, "status page index"); 4505 4506 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "tc", 4507 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, idx, sysctl_tc, 4508 "I", "traffic class (-1 means none)"); 4509 4510 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, 4511 &txq->txcsum, "# of times hardware assisted with checksum"); 4512 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_insertion", 4513 CTLFLAG_RD, &txq->vlan_insertion, 4514 "# of times hardware inserted 802.1Q tag"); 4515 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, 4516 &txq->tso_wrs, "# of TSO work requests"); 4517 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, 4518 &txq->imm_wrs, "# of work requests with immediate data"); 4519 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, 4520 &txq->sgl_wrs, "# of work requests with direct SGL"); 4521 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, 4522 &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); 4523 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_wrs", 4524 CTLFLAG_RD, &txq->txpkts0_wrs, 4525 "# of txpkts (type 0) work requests"); 4526 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_wrs", 4527 CTLFLAG_RD, &txq->txpkts1_wrs, 4528 "# of txpkts (type 1) work requests"); 4529 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_pkts", 4530 CTLFLAG_RD, &txq->txpkts0_pkts, 4531 "# of frames tx'd using type0 txpkts work requests"); 4532 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_pkts", 4533 CTLFLAG_RD, &txq->txpkts1_pkts, 4534 "# of frames tx'd using type1 txpkts work requests"); 4535 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "raw_wrs", CTLFLAG_RD, 4536 &txq->raw_wrs, "# of raw work requests (non-packets)"); 4537 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vxlan_tso_wrs", 4538 CTLFLAG_RD, &txq->vxlan_tso_wrs, "# of VXLAN TSO work requests"); 4539 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vxlan_txcsum", 4540 CTLFLAG_RD, &txq->vxlan_txcsum, 4541 "# of times hardware assisted with inner checksums (VXLAN)"); 4542 4543 #ifdef KERN_TLS 4544 if (sc->flags & KERN_TLS_OK) { 4545 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4546 "kern_tls_records", CTLFLAG_RD, &txq->kern_tls_records, 4547 "# of NIC TLS records transmitted"); 4548 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4549 "kern_tls_short", CTLFLAG_RD, &txq->kern_tls_short, 4550 "# of short NIC TLS records transmitted"); 4551 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4552 "kern_tls_partial", CTLFLAG_RD, &txq->kern_tls_partial, 4553 "# of partial NIC TLS records transmitted"); 4554 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4555 "kern_tls_full", CTLFLAG_RD, &txq->kern_tls_full, 4556 "# of full NIC TLS records transmitted"); 4557 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4558 "kern_tls_octets", CTLFLAG_RD, &txq->kern_tls_octets, 4559 "# of payload octets in transmitted NIC TLS records"); 4560 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4561 "kern_tls_waste", CTLFLAG_RD, &txq->kern_tls_waste, 4562 "# of octets DMAd but not transmitted in NIC TLS records"); 4563 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4564 "kern_tls_options", CTLFLAG_RD, &txq->kern_tls_options, 4565 "# of NIC TLS options-only packets transmitted"); 4566 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4567 "kern_tls_header", CTLFLAG_RD, &txq->kern_tls_header, 4568 "# of NIC TLS header-only packets transmitted"); 4569 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4570 "kern_tls_fin", CTLFLAG_RD, &txq->kern_tls_fin, 4571 "# of NIC TLS FIN-only packets transmitted"); 4572 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4573 "kern_tls_fin_short", CTLFLAG_RD, &txq->kern_tls_fin_short, 4574 "# of NIC TLS padded FIN packets on short TLS records"); 4575 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4576 "kern_tls_cbc", CTLFLAG_RD, &txq->kern_tls_cbc, 4577 "# of NIC TLS sessions using AES-CBC"); 4578 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, 4579 "kern_tls_gcm", CTLFLAG_RD, &txq->kern_tls_gcm, 4580 "# of NIC TLS sessions using AES-GCM"); 4581 } 4582 #endif 4583 mp_ring_sysctls(txq->r, &vi->ctx, children); 4584 4585 return (0); 4586 } 4587 4588 static int 4589 free_txq(struct vi_info *vi, struct sge_txq *txq) 4590 { 4591 int rc; 4592 struct adapter *sc = vi->adapter; 4593 struct sge_eq *eq = &txq->eq; 4594 4595 rc = free_eq(sc, eq); 4596 if (rc) 4597 return (rc); 4598 4599 sglist_free(txq->gl); 4600 free(txq->sdesc, M_CXGBE); 4601 mp_ring_free(txq->r); 4602 4603 bzero(txq, sizeof(*txq)); 4604 return (0); 4605 } 4606 4607 static void 4608 oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) 4609 { 4610 bus_addr_t *ba = arg; 4611 4612 KASSERT(nseg == 1, 4613 ("%s meant for single segment mappings only.", __func__)); 4614 4615 *ba = error ? 0 : segs->ds_addr; 4616 } 4617 4618 static inline void 4619 ring_fl_db(struct adapter *sc, struct sge_fl *fl) 4620 { 4621 uint32_t n, v; 4622 4623 n = IDXDIFF(fl->pidx >> 3, fl->dbidx, fl->sidx); 4624 MPASS(n > 0); 4625 4626 wmb(); 4627 v = fl->dbval | V_PIDX(n); 4628 if (fl->udb) 4629 *fl->udb = htole32(v); 4630 else 4631 t4_write_reg(sc, sc->sge_kdoorbell_reg, v); 4632 IDXINCR(fl->dbidx, n, fl->sidx); 4633 } 4634 4635 /* 4636 * Fills up the freelist by allocating up to 'n' buffers. Buffers that are 4637 * recycled do not count towards this allocation budget. 4638 * 4639 * Returns non-zero to indicate that this freelist should be added to the list 4640 * of starving freelists. 4641 */ 4642 static int 4643 refill_fl(struct adapter *sc, struct sge_fl *fl, int n) 4644 { 4645 __be64 *d; 4646 struct fl_sdesc *sd; 4647 uintptr_t pa; 4648 caddr_t cl; 4649 struct rx_buf_info *rxb; 4650 struct cluster_metadata *clm; 4651 uint16_t max_pidx; 4652 uint16_t hw_cidx = fl->hw_cidx; /* stable snapshot */ 4653 4654 FL_LOCK_ASSERT_OWNED(fl); 4655 4656 /* 4657 * We always stop at the beginning of the hardware descriptor that's just 4658 * before the one with the hw cidx. This is to avoid hw pidx = hw cidx, 4659 * which would mean an empty freelist to the chip. 4660 */ 4661 max_pidx = __predict_false(hw_cidx == 0) ? fl->sidx - 1 : hw_cidx - 1; 4662 if (fl->pidx == max_pidx * 8) 4663 return (0); 4664 4665 d = &fl->desc[fl->pidx]; 4666 sd = &fl->sdesc[fl->pidx]; 4667 4668 while (n > 0) { 4669 4670 if (sd->cl != NULL) { 4671 4672 if (sd->nmbuf == 0) { 4673 /* 4674 * Fast recycle without involving any atomics on 4675 * the cluster's metadata (if the cluster has 4676 * metadata). This happens when all frames 4677 * received in the cluster were small enough to 4678 * fit within a single mbuf each. 4679 */ 4680 fl->cl_fast_recycled++; 4681 goto recycled; 4682 } 4683 4684 /* 4685 * Cluster is guaranteed to have metadata. Clusters 4686 * without metadata always take the fast recycle path 4687 * when they're recycled. 4688 */ 4689 clm = cl_metadata(sd); 4690 MPASS(clm != NULL); 4691 4692 if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { 4693 fl->cl_recycled++; 4694 counter_u64_add(extfree_rels, 1); 4695 goto recycled; 4696 } 4697 sd->cl = NULL; /* gave up my reference */ 4698 } 4699 MPASS(sd->cl == NULL); 4700 rxb = &sc->sge.rx_buf_info[fl->zidx]; 4701 cl = uma_zalloc(rxb->zone, M_NOWAIT); 4702 if (__predict_false(cl == NULL)) { 4703 if (fl->zidx != fl->safe_zidx) { 4704 rxb = &sc->sge.rx_buf_info[fl->safe_zidx]; 4705 cl = uma_zalloc(rxb->zone, M_NOWAIT); 4706 } 4707 if (cl == NULL) 4708 break; 4709 } 4710 fl->cl_allocated++; 4711 n--; 4712 4713 pa = pmap_kextract((vm_offset_t)cl); 4714 sd->cl = cl; 4715 sd->zidx = fl->zidx; 4716 4717 if (fl->flags & FL_BUF_PACKING) { 4718 *d = htobe64(pa | rxb->hwidx2); 4719 sd->moff = rxb->size2; 4720 } else { 4721 *d = htobe64(pa | rxb->hwidx1); 4722 sd->moff = 0; 4723 } 4724 recycled: 4725 sd->nmbuf = 0; 4726 d++; 4727 sd++; 4728 if (__predict_false((++fl->pidx & 7) == 0)) { 4729 uint16_t pidx = fl->pidx >> 3; 4730 4731 if (__predict_false(pidx == fl->sidx)) { 4732 fl->pidx = 0; 4733 pidx = 0; 4734 sd = fl->sdesc; 4735 d = fl->desc; 4736 } 4737 if (n < 8 || pidx == max_pidx) 4738 break; 4739 4740 if (IDXDIFF(pidx, fl->dbidx, fl->sidx) >= 4) 4741 ring_fl_db(sc, fl); 4742 } 4743 } 4744 4745 if ((fl->pidx >> 3) != fl->dbidx) 4746 ring_fl_db(sc, fl); 4747 4748 return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING)); 4749 } 4750 4751 /* 4752 * Attempt to refill all starving freelists. 4753 */ 4754 static void 4755 refill_sfl(void *arg) 4756 { 4757 struct adapter *sc = arg; 4758 struct sge_fl *fl, *fl_temp; 4759 4760 mtx_assert(&sc->sfl_lock, MA_OWNED); 4761 TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) { 4762 FL_LOCK(fl); 4763 refill_fl(sc, fl, 64); 4764 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) { 4765 TAILQ_REMOVE(&sc->sfl, fl, link); 4766 fl->flags &= ~FL_STARVING; 4767 } 4768 FL_UNLOCK(fl); 4769 } 4770 4771 if (!TAILQ_EMPTY(&sc->sfl)) 4772 callout_schedule(&sc->sfl_callout, hz / 5); 4773 } 4774 4775 static int 4776 alloc_fl_sdesc(struct sge_fl *fl) 4777 { 4778 4779 fl->sdesc = malloc(fl->sidx * 8 * sizeof(struct fl_sdesc), M_CXGBE, 4780 M_ZERO | M_WAITOK); 4781 4782 return (0); 4783 } 4784 4785 static void 4786 free_fl_sdesc(struct adapter *sc, struct sge_fl *fl) 4787 { 4788 struct fl_sdesc *sd; 4789 struct cluster_metadata *clm; 4790 int i; 4791 4792 sd = fl->sdesc; 4793 for (i = 0; i < fl->sidx * 8; i++, sd++) { 4794 if (sd->cl == NULL) 4795 continue; 4796 4797 if (sd->nmbuf == 0) 4798 uma_zfree(sc->sge.rx_buf_info[sd->zidx].zone, sd->cl); 4799 else if (fl->flags & FL_BUF_PACKING) { 4800 clm = cl_metadata(sd); 4801 if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { 4802 uma_zfree(sc->sge.rx_buf_info[sd->zidx].zone, 4803 sd->cl); 4804 counter_u64_add(extfree_rels, 1); 4805 } 4806 } 4807 sd->cl = NULL; 4808 } 4809 4810 free(fl->sdesc, M_CXGBE); 4811 fl->sdesc = NULL; 4812 } 4813 4814 static inline void 4815 get_pkt_gl(struct mbuf *m, struct sglist *gl) 4816 { 4817 int rc; 4818 4819 M_ASSERTPKTHDR(m); 4820 4821 sglist_reset(gl); 4822 rc = sglist_append_mbuf(gl, m); 4823 if (__predict_false(rc != 0)) { 4824 panic("%s: mbuf %p (%d segs) was vetted earlier but now fails " 4825 "with %d.", __func__, m, mbuf_nsegs(m), rc); 4826 } 4827 4828 KASSERT(gl->sg_nseg == mbuf_nsegs(m), 4829 ("%s: nsegs changed for mbuf %p from %d to %d", __func__, m, 4830 mbuf_nsegs(m), gl->sg_nseg)); 4831 #if 0 /* vm_wr not readily available here. */ 4832 KASSERT(gl->sg_nseg > 0 && gl->sg_nseg <= max_nsegs_allowed(m, vm_wr), 4833 ("%s: %d segments, should have been 1 <= nsegs <= %d", __func__, 4834 gl->sg_nseg, max_nsegs_allowed(m, vm_wr))); 4835 #endif 4836 } 4837 4838 /* 4839 * len16 for a txpkt WR with a GL. Includes the firmware work request header. 4840 */ 4841 static inline u_int 4842 txpkt_len16(u_int nsegs, const u_int extra) 4843 { 4844 u_int n; 4845 4846 MPASS(nsegs > 0); 4847 4848 nsegs--; /* first segment is part of ulptx_sgl */ 4849 n = extra + sizeof(struct fw_eth_tx_pkt_wr) + 4850 sizeof(struct cpl_tx_pkt_core) + 4851 sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 4852 4853 return (howmany(n, 16)); 4854 } 4855 4856 /* 4857 * len16 for a txpkt_vm WR with a GL. Includes the firmware work 4858 * request header. 4859 */ 4860 static inline u_int 4861 txpkt_vm_len16(u_int nsegs, const u_int extra) 4862 { 4863 u_int n; 4864 4865 MPASS(nsegs > 0); 4866 4867 nsegs--; /* first segment is part of ulptx_sgl */ 4868 n = extra + sizeof(struct fw_eth_tx_pkt_vm_wr) + 4869 sizeof(struct cpl_tx_pkt_core) + 4870 sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 4871 4872 return (howmany(n, 16)); 4873 } 4874 4875 static inline void 4876 calculate_mbuf_len16(struct mbuf *m, bool vm_wr) 4877 { 4878 const int lso = sizeof(struct cpl_tx_pkt_lso_core); 4879 const int tnl_lso = sizeof(struct cpl_tx_tnl_lso); 4880 4881 if (vm_wr) { 4882 if (needs_tso(m)) 4883 set_mbuf_len16(m, txpkt_vm_len16(mbuf_nsegs(m), lso)); 4884 else 4885 set_mbuf_len16(m, txpkt_vm_len16(mbuf_nsegs(m), 0)); 4886 return; 4887 } 4888 4889 if (needs_tso(m)) { 4890 if (needs_vxlan_tso(m)) 4891 set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), tnl_lso)); 4892 else 4893 set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), lso)); 4894 } else 4895 set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), 0)); 4896 } 4897 4898 /* 4899 * len16 for a txpkts type 0 WR with a GL. Does not include the firmware work 4900 * request header. 4901 */ 4902 static inline u_int 4903 txpkts0_len16(u_int nsegs) 4904 { 4905 u_int n; 4906 4907 MPASS(nsegs > 0); 4908 4909 nsegs--; /* first segment is part of ulptx_sgl */ 4910 n = sizeof(struct ulp_txpkt) + sizeof(struct ulptx_idata) + 4911 sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) + 4912 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 4913 4914 return (howmany(n, 16)); 4915 } 4916 4917 /* 4918 * len16 for a txpkts type 1 WR with a GL. Does not include the firmware work 4919 * request header. 4920 */ 4921 static inline u_int 4922 txpkts1_len16(void) 4923 { 4924 u_int n; 4925 4926 n = sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl); 4927 4928 return (howmany(n, 16)); 4929 } 4930 4931 static inline u_int 4932 imm_payload(u_int ndesc) 4933 { 4934 u_int n; 4935 4936 n = ndesc * EQ_ESIZE - sizeof(struct fw_eth_tx_pkt_wr) - 4937 sizeof(struct cpl_tx_pkt_core); 4938 4939 return (n); 4940 } 4941 4942 static inline uint64_t 4943 csum_to_ctrl(struct adapter *sc, struct mbuf *m) 4944 { 4945 uint64_t ctrl; 4946 int csum_type, l2hlen, l3hlen; 4947 int x, y; 4948 static const int csum_types[3][2] = { 4949 {TX_CSUM_TCPIP, TX_CSUM_TCPIP6}, 4950 {TX_CSUM_UDPIP, TX_CSUM_UDPIP6}, 4951 {TX_CSUM_IP, 0} 4952 }; 4953 4954 M_ASSERTPKTHDR(m); 4955 4956 if (!needs_hwcsum(m)) 4957 return (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS); 4958 4959 MPASS(m->m_pkthdr.l2hlen >= ETHER_HDR_LEN); 4960 MPASS(m->m_pkthdr.l3hlen >= sizeof(struct ip)); 4961 4962 if (needs_vxlan_csum(m)) { 4963 MPASS(m->m_pkthdr.l4hlen > 0); 4964 MPASS(m->m_pkthdr.l5hlen > 0); 4965 MPASS(m->m_pkthdr.inner_l2hlen >= ETHER_HDR_LEN); 4966 MPASS(m->m_pkthdr.inner_l3hlen >= sizeof(struct ip)); 4967 4968 l2hlen = m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + 4969 m->m_pkthdr.l4hlen + m->m_pkthdr.l5hlen + 4970 m->m_pkthdr.inner_l2hlen - ETHER_HDR_LEN; 4971 l3hlen = m->m_pkthdr.inner_l3hlen; 4972 } else { 4973 l2hlen = m->m_pkthdr.l2hlen - ETHER_HDR_LEN; 4974 l3hlen = m->m_pkthdr.l3hlen; 4975 } 4976 4977 ctrl = 0; 4978 if (!needs_l3_csum(m)) 4979 ctrl |= F_TXPKT_IPCSUM_DIS; 4980 4981 if (m->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_INNER_IP_TCP | 4982 CSUM_IP6_TCP | CSUM_INNER_IP6_TCP)) 4983 x = 0; /* TCP */ 4984 else if (m->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_INNER_IP_UDP | 4985 CSUM_IP6_UDP | CSUM_INNER_IP6_UDP)) 4986 x = 1; /* UDP */ 4987 else 4988 x = 2; 4989 4990 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP | 4991 CSUM_INNER_IP | CSUM_INNER_IP_TCP | CSUM_INNER_IP_UDP)) 4992 y = 0; /* IPv4 */ 4993 else { 4994 MPASS(m->m_pkthdr.csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | 4995 CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_UDP)); 4996 y = 1; /* IPv6 */ 4997 } 4998 /* 4999 * needs_hwcsum returned true earlier so there must be some kind of 5000 * checksum to calculate. 5001 */ 5002 csum_type = csum_types[x][y]; 5003 MPASS(csum_type != 0); 5004 if (csum_type == TX_CSUM_IP) 5005 ctrl |= F_TXPKT_L4CSUM_DIS; 5006 ctrl |= V_TXPKT_CSUM_TYPE(csum_type) | V_TXPKT_IPHDR_LEN(l3hlen); 5007 if (chip_id(sc) <= CHELSIO_T5) 5008 ctrl |= V_TXPKT_ETHHDR_LEN(l2hlen); 5009 else 5010 ctrl |= V_T6_TXPKT_ETHHDR_LEN(l2hlen); 5011 5012 return (ctrl); 5013 } 5014 5015 static inline void * 5016 write_lso_cpl(void *cpl, struct mbuf *m0) 5017 { 5018 struct cpl_tx_pkt_lso_core *lso; 5019 uint32_t ctrl; 5020 5021 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 5022 m0->m_pkthdr.l4hlen > 0, 5023 ("%s: mbuf %p needs TSO but missing header lengths", 5024 __func__, m0)); 5025 5026 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | 5027 F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE | 5028 V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen - ETHER_HDR_LEN) >> 2) | 5029 V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) | 5030 V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); 5031 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 5032 ctrl |= F_LSO_IPV6; 5033 5034 lso = cpl; 5035 lso->lso_ctrl = htobe32(ctrl); 5036 lso->ipid_ofst = htobe16(0); 5037 lso->mss = htobe16(m0->m_pkthdr.tso_segsz); 5038 lso->seqno_offset = htobe32(0); 5039 lso->len = htobe32(m0->m_pkthdr.len); 5040 5041 return (lso + 1); 5042 } 5043 5044 static void * 5045 write_tnl_lso_cpl(void *cpl, struct mbuf *m0) 5046 { 5047 struct cpl_tx_tnl_lso *tnl_lso = cpl; 5048 uint32_t ctrl; 5049 5050 KASSERT(m0->m_pkthdr.inner_l2hlen > 0 && 5051 m0->m_pkthdr.inner_l3hlen > 0 && m0->m_pkthdr.inner_l4hlen > 0 && 5052 m0->m_pkthdr.inner_l5hlen > 0, 5053 ("%s: mbuf %p needs VXLAN_TSO but missing inner header lengths", 5054 __func__, m0)); 5055 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 5056 m0->m_pkthdr.l4hlen > 0 && m0->m_pkthdr.l5hlen > 0, 5057 ("%s: mbuf %p needs VXLAN_TSO but missing outer header lengths", 5058 __func__, m0)); 5059 5060 /* Outer headers. */ 5061 ctrl = V_CPL_TX_TNL_LSO_OPCODE(CPL_TX_TNL_LSO) | 5062 F_CPL_TX_TNL_LSO_FIRST | F_CPL_TX_TNL_LSO_LAST | 5063 V_CPL_TX_TNL_LSO_ETHHDRLENOUT( 5064 (m0->m_pkthdr.l2hlen - ETHER_HDR_LEN) >> 2) | 5065 V_CPL_TX_TNL_LSO_IPHDRLENOUT(m0->m_pkthdr.l3hlen >> 2) | 5066 F_CPL_TX_TNL_LSO_IPLENSETOUT; 5067 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 5068 ctrl |= F_CPL_TX_TNL_LSO_IPV6OUT; 5069 else { 5070 ctrl |= F_CPL_TX_TNL_LSO_IPHDRCHKOUT | 5071 F_CPL_TX_TNL_LSO_IPIDINCOUT; 5072 } 5073 tnl_lso->op_to_IpIdSplitOut = htobe32(ctrl); 5074 tnl_lso->IpIdOffsetOut = 0; 5075 tnl_lso->UdpLenSetOut_to_TnlHdrLen = 5076 htobe16(F_CPL_TX_TNL_LSO_UDPCHKCLROUT | 5077 F_CPL_TX_TNL_LSO_UDPLENSETOUT | 5078 V_CPL_TX_TNL_LSO_TNLHDRLEN(m0->m_pkthdr.l2hlen + 5079 m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen + 5080 m0->m_pkthdr.l5hlen) | 5081 V_CPL_TX_TNL_LSO_TNLTYPE(TX_TNL_TYPE_VXLAN)); 5082 tnl_lso->r1 = 0; 5083 5084 /* Inner headers. */ 5085 ctrl = V_CPL_TX_TNL_LSO_ETHHDRLEN( 5086 (m0->m_pkthdr.inner_l2hlen - ETHER_HDR_LEN) >> 2) | 5087 V_CPL_TX_TNL_LSO_IPHDRLEN(m0->m_pkthdr.inner_l3hlen >> 2) | 5088 V_CPL_TX_TNL_LSO_TCPHDRLEN(m0->m_pkthdr.inner_l4hlen >> 2); 5089 if (m0->m_pkthdr.inner_l3hlen == sizeof(struct ip6_hdr)) 5090 ctrl |= F_CPL_TX_TNL_LSO_IPV6; 5091 tnl_lso->Flow_to_TcpHdrLen = htobe32(ctrl); 5092 tnl_lso->IpIdOffset = 0; 5093 tnl_lso->IpIdSplit_to_Mss = 5094 htobe16(V_CPL_TX_TNL_LSO_MSS(m0->m_pkthdr.tso_segsz)); 5095 tnl_lso->TCPSeqOffset = 0; 5096 tnl_lso->EthLenOffset_Size = 5097 htobe32(V_CPL_TX_TNL_LSO_SIZE(m0->m_pkthdr.len)); 5098 5099 return (tnl_lso + 1); 5100 } 5101 5102 #define VM_TX_L2HDR_LEN 16 /* ethmacdst to vlantci */ 5103 5104 /* 5105 * Write a VM txpkt WR for this packet to the hardware descriptors, update the 5106 * software descriptor, and advance the pidx. It is guaranteed that enough 5107 * descriptors are available. 5108 * 5109 * The return value is the # of hardware descriptors used. 5110 */ 5111 static u_int 5112 write_txpkt_vm_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0) 5113 { 5114 struct sge_eq *eq; 5115 struct fw_eth_tx_pkt_vm_wr *wr; 5116 struct tx_sdesc *txsd; 5117 struct cpl_tx_pkt_core *cpl; 5118 uint32_t ctrl; /* used in many unrelated places */ 5119 uint64_t ctrl1; 5120 int len16, ndesc, pktlen, nsegs; 5121 caddr_t dst; 5122 5123 TXQ_LOCK_ASSERT_OWNED(txq); 5124 M_ASSERTPKTHDR(m0); 5125 5126 len16 = mbuf_len16(m0); 5127 nsegs = mbuf_nsegs(m0); 5128 pktlen = m0->m_pkthdr.len; 5129 ctrl = sizeof(struct cpl_tx_pkt_core); 5130 if (needs_tso(m0)) 5131 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 5132 ndesc = tx_len16_to_desc(len16); 5133 5134 /* Firmware work request header */ 5135 eq = &txq->eq; 5136 wr = (void *)&eq->desc[eq->pidx]; 5137 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_VM_WR) | 5138 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 5139 5140 ctrl = V_FW_WR_LEN16(len16); 5141 wr->equiq_to_len16 = htobe32(ctrl); 5142 wr->r3[0] = 0; 5143 wr->r3[1] = 0; 5144 5145 /* 5146 * Copy over ethmacdst, ethmacsrc, ethtype, and vlantci. 5147 * vlantci is ignored unless the ethtype is 0x8100, so it's 5148 * simpler to always copy it rather than making it 5149 * conditional. Also, it seems that we do not have to set 5150 * vlantci or fake the ethtype when doing VLAN tag insertion. 5151 */ 5152 m_copydata(m0, 0, VM_TX_L2HDR_LEN, wr->ethmacdst); 5153 5154 if (needs_tso(m0)) { 5155 cpl = write_lso_cpl(wr + 1, m0); 5156 txq->tso_wrs++; 5157 } else 5158 cpl = (void *)(wr + 1); 5159 5160 /* Checksum offload */ 5161 ctrl1 = csum_to_ctrl(sc, m0); 5162 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) 5163 txq->txcsum++; /* some hardware assistance provided */ 5164 5165 /* VLAN tag insertion */ 5166 if (needs_vlan_insertion(m0)) { 5167 ctrl1 |= F_TXPKT_VLAN_VLD | 5168 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 5169 txq->vlan_insertion++; 5170 } 5171 5172 /* CPL header */ 5173 cpl->ctrl0 = txq->cpl_ctrl0; 5174 cpl->pack = 0; 5175 cpl->len = htobe16(pktlen); 5176 cpl->ctrl1 = htobe64(ctrl1); 5177 5178 /* SGL */ 5179 dst = (void *)(cpl + 1); 5180 5181 /* 5182 * A packet using TSO will use up an entire descriptor for the 5183 * firmware work request header, LSO CPL, and TX_PKT_XT CPL. 5184 * If this descriptor is the last descriptor in the ring, wrap 5185 * around to the front of the ring explicitly for the start of 5186 * the sgl. 5187 */ 5188 if (dst == (void *)&eq->desc[eq->sidx]) { 5189 dst = (void *)&eq->desc[0]; 5190 write_gl_to_txd(txq, m0, &dst, 0); 5191 } else 5192 write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); 5193 txq->sgl_wrs++; 5194 txq->txpkt_wrs++; 5195 5196 txsd = &txq->sdesc[eq->pidx]; 5197 txsd->m = m0; 5198 txsd->desc_used = ndesc; 5199 5200 return (ndesc); 5201 } 5202 5203 /* 5204 * Write a raw WR to the hardware descriptors, update the software 5205 * descriptor, and advance the pidx. It is guaranteed that enough 5206 * descriptors are available. 5207 * 5208 * The return value is the # of hardware descriptors used. 5209 */ 5210 static u_int 5211 write_raw_wr(struct sge_txq *txq, void *wr, struct mbuf *m0, u_int available) 5212 { 5213 struct sge_eq *eq = &txq->eq; 5214 struct tx_sdesc *txsd; 5215 struct mbuf *m; 5216 caddr_t dst; 5217 int len16, ndesc; 5218 5219 len16 = mbuf_len16(m0); 5220 ndesc = tx_len16_to_desc(len16); 5221 MPASS(ndesc <= available); 5222 5223 dst = wr; 5224 for (m = m0; m != NULL; m = m->m_next) 5225 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 5226 5227 txq->raw_wrs++; 5228 5229 txsd = &txq->sdesc[eq->pidx]; 5230 txsd->m = m0; 5231 txsd->desc_used = ndesc; 5232 5233 return (ndesc); 5234 } 5235 5236 /* 5237 * Write a txpkt WR for this packet to the hardware descriptors, update the 5238 * software descriptor, and advance the pidx. It is guaranteed that enough 5239 * descriptors are available. 5240 * 5241 * The return value is the # of hardware descriptors used. 5242 */ 5243 static u_int 5244 write_txpkt_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0, 5245 u_int available) 5246 { 5247 struct sge_eq *eq; 5248 struct fw_eth_tx_pkt_wr *wr; 5249 struct tx_sdesc *txsd; 5250 struct cpl_tx_pkt_core *cpl; 5251 uint32_t ctrl; /* used in many unrelated places */ 5252 uint64_t ctrl1; 5253 int len16, ndesc, pktlen, nsegs; 5254 caddr_t dst; 5255 5256 TXQ_LOCK_ASSERT_OWNED(txq); 5257 M_ASSERTPKTHDR(m0); 5258 5259 len16 = mbuf_len16(m0); 5260 nsegs = mbuf_nsegs(m0); 5261 pktlen = m0->m_pkthdr.len; 5262 ctrl = sizeof(struct cpl_tx_pkt_core); 5263 if (needs_tso(m0)) { 5264 if (needs_vxlan_tso(m0)) 5265 ctrl += sizeof(struct cpl_tx_tnl_lso); 5266 else 5267 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 5268 } else if (!(mbuf_cflags(m0) & MC_NOMAP) && pktlen <= imm_payload(2) && 5269 available >= 2) { 5270 /* Immediate data. Recalculate len16 and set nsegs to 0. */ 5271 ctrl += pktlen; 5272 len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + 5273 sizeof(struct cpl_tx_pkt_core) + pktlen, 16); 5274 nsegs = 0; 5275 } 5276 ndesc = tx_len16_to_desc(len16); 5277 MPASS(ndesc <= available); 5278 5279 /* Firmware work request header */ 5280 eq = &txq->eq; 5281 wr = (void *)&eq->desc[eq->pidx]; 5282 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | 5283 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 5284 5285 ctrl = V_FW_WR_LEN16(len16); 5286 wr->equiq_to_len16 = htobe32(ctrl); 5287 wr->r3 = 0; 5288 5289 if (needs_tso(m0)) { 5290 if (needs_vxlan_tso(m0)) { 5291 cpl = write_tnl_lso_cpl(wr + 1, m0); 5292 txq->vxlan_tso_wrs++; 5293 } else { 5294 cpl = write_lso_cpl(wr + 1, m0); 5295 txq->tso_wrs++; 5296 } 5297 } else 5298 cpl = (void *)(wr + 1); 5299 5300 /* Checksum offload */ 5301 ctrl1 = csum_to_ctrl(sc, m0); 5302 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) { 5303 /* some hardware assistance provided */ 5304 if (needs_vxlan_csum(m0)) 5305 txq->vxlan_txcsum++; 5306 else 5307 txq->txcsum++; 5308 } 5309 5310 /* VLAN tag insertion */ 5311 if (needs_vlan_insertion(m0)) { 5312 ctrl1 |= F_TXPKT_VLAN_VLD | 5313 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 5314 txq->vlan_insertion++; 5315 } 5316 5317 /* CPL header */ 5318 cpl->ctrl0 = txq->cpl_ctrl0; 5319 cpl->pack = 0; 5320 cpl->len = htobe16(pktlen); 5321 cpl->ctrl1 = htobe64(ctrl1); 5322 5323 /* SGL */ 5324 dst = (void *)(cpl + 1); 5325 if (__predict_false((uintptr_t)dst == (uintptr_t)&eq->desc[eq->sidx])) 5326 dst = (caddr_t)&eq->desc[0]; 5327 if (nsegs > 0) { 5328 5329 write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); 5330 txq->sgl_wrs++; 5331 } else { 5332 struct mbuf *m; 5333 5334 for (m = m0; m != NULL; m = m->m_next) { 5335 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 5336 #ifdef INVARIANTS 5337 pktlen -= m->m_len; 5338 #endif 5339 } 5340 #ifdef INVARIANTS 5341 KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen)); 5342 #endif 5343 txq->imm_wrs++; 5344 } 5345 5346 txq->txpkt_wrs++; 5347 5348 txsd = &txq->sdesc[eq->pidx]; 5349 txsd->m = m0; 5350 txsd->desc_used = ndesc; 5351 5352 return (ndesc); 5353 } 5354 5355 static inline bool 5356 cmp_l2hdr(struct txpkts *txp, struct mbuf *m) 5357 { 5358 int len; 5359 5360 MPASS(txp->npkt > 0); 5361 MPASS(m->m_len >= VM_TX_L2HDR_LEN); 5362 5363 if (txp->ethtype == be16toh(ETHERTYPE_VLAN)) 5364 len = VM_TX_L2HDR_LEN; 5365 else 5366 len = sizeof(struct ether_header); 5367 5368 return (memcmp(m->m_data, &txp->ethmacdst[0], len) != 0); 5369 } 5370 5371 static inline void 5372 save_l2hdr(struct txpkts *txp, struct mbuf *m) 5373 { 5374 MPASS(m->m_len >= VM_TX_L2HDR_LEN); 5375 5376 memcpy(&txp->ethmacdst[0], mtod(m, const void *), VM_TX_L2HDR_LEN); 5377 } 5378 5379 static int 5380 add_to_txpkts_vf(struct adapter *sc, struct sge_txq *txq, struct mbuf *m, 5381 int avail, bool *send) 5382 { 5383 struct txpkts *txp = &txq->txp; 5384 5385 /* Cannot have TSO and coalesce at the same time. */ 5386 if (cannot_use_txpkts(m)) { 5387 cannot_coalesce: 5388 *send = txp->npkt > 0; 5389 return (EINVAL); 5390 } 5391 5392 /* VF allows coalescing of type 1 (1 GL) only */ 5393 if (mbuf_nsegs(m) > 1) 5394 goto cannot_coalesce; 5395 5396 *send = false; 5397 if (txp->npkt > 0) { 5398 MPASS(tx_len16_to_desc(txp->len16) <= avail); 5399 MPASS(txp->npkt < txp->max_npkt); 5400 MPASS(txp->wr_type == 1); /* VF supports type 1 only */ 5401 5402 if (tx_len16_to_desc(txp->len16 + txpkts1_len16()) > avail) { 5403 retry_after_send: 5404 *send = true; 5405 return (EAGAIN); 5406 } 5407 if (m->m_pkthdr.len + txp->plen > 65535) 5408 goto retry_after_send; 5409 if (cmp_l2hdr(txp, m)) 5410 goto retry_after_send; 5411 5412 txp->len16 += txpkts1_len16(); 5413 txp->plen += m->m_pkthdr.len; 5414 txp->mb[txp->npkt++] = m; 5415 if (txp->npkt == txp->max_npkt) 5416 *send = true; 5417 } else { 5418 txp->len16 = howmany(sizeof(struct fw_eth_tx_pkts_vm_wr), 16) + 5419 txpkts1_len16(); 5420 if (tx_len16_to_desc(txp->len16) > avail) 5421 goto cannot_coalesce; 5422 txp->npkt = 1; 5423 txp->wr_type = 1; 5424 txp->plen = m->m_pkthdr.len; 5425 txp->mb[0] = m; 5426 save_l2hdr(txp, m); 5427 } 5428 return (0); 5429 } 5430 5431 static int 5432 add_to_txpkts_pf(struct adapter *sc, struct sge_txq *txq, struct mbuf *m, 5433 int avail, bool *send) 5434 { 5435 struct txpkts *txp = &txq->txp; 5436 int nsegs; 5437 5438 MPASS(!(sc->flags & IS_VF)); 5439 5440 /* Cannot have TSO and coalesce at the same time. */ 5441 if (cannot_use_txpkts(m)) { 5442 cannot_coalesce: 5443 *send = txp->npkt > 0; 5444 return (EINVAL); 5445 } 5446 5447 *send = false; 5448 nsegs = mbuf_nsegs(m); 5449 if (txp->npkt == 0) { 5450 if (m->m_pkthdr.len > 65535) 5451 goto cannot_coalesce; 5452 if (nsegs > 1) { 5453 txp->wr_type = 0; 5454 txp->len16 = 5455 howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + 5456 txpkts0_len16(nsegs); 5457 } else { 5458 txp->wr_type = 1; 5459 txp->len16 = 5460 howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + 5461 txpkts1_len16(); 5462 } 5463 if (tx_len16_to_desc(txp->len16) > avail) 5464 goto cannot_coalesce; 5465 txp->npkt = 1; 5466 txp->plen = m->m_pkthdr.len; 5467 txp->mb[0] = m; 5468 } else { 5469 MPASS(tx_len16_to_desc(txp->len16) <= avail); 5470 MPASS(txp->npkt < txp->max_npkt); 5471 5472 if (m->m_pkthdr.len + txp->plen > 65535) { 5473 retry_after_send: 5474 *send = true; 5475 return (EAGAIN); 5476 } 5477 5478 MPASS(txp->wr_type == 0 || txp->wr_type == 1); 5479 if (txp->wr_type == 0) { 5480 if (tx_len16_to_desc(txp->len16 + 5481 txpkts0_len16(nsegs)) > min(avail, SGE_MAX_WR_NDESC)) 5482 goto retry_after_send; 5483 txp->len16 += txpkts0_len16(nsegs); 5484 } else { 5485 if (nsegs != 1) 5486 goto retry_after_send; 5487 if (tx_len16_to_desc(txp->len16 + txpkts1_len16()) > 5488 avail) 5489 goto retry_after_send; 5490 txp->len16 += txpkts1_len16(); 5491 } 5492 5493 txp->plen += m->m_pkthdr.len; 5494 txp->mb[txp->npkt++] = m; 5495 if (txp->npkt == txp->max_npkt) 5496 *send = true; 5497 } 5498 return (0); 5499 } 5500 5501 /* 5502 * Write a txpkts WR for the packets in txp to the hardware descriptors, update 5503 * the software descriptor, and advance the pidx. It is guaranteed that enough 5504 * descriptors are available. 5505 * 5506 * The return value is the # of hardware descriptors used. 5507 */ 5508 static u_int 5509 write_txpkts_wr(struct adapter *sc, struct sge_txq *txq) 5510 { 5511 const struct txpkts *txp = &txq->txp; 5512 struct sge_eq *eq = &txq->eq; 5513 struct fw_eth_tx_pkts_wr *wr; 5514 struct tx_sdesc *txsd; 5515 struct cpl_tx_pkt_core *cpl; 5516 uint64_t ctrl1; 5517 int ndesc, i, checkwrap; 5518 struct mbuf *m, *last; 5519 void *flitp; 5520 5521 TXQ_LOCK_ASSERT_OWNED(txq); 5522 MPASS(txp->npkt > 0); 5523 MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16)); 5524 5525 wr = (void *)&eq->desc[eq->pidx]; 5526 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); 5527 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(txp->len16)); 5528 wr->plen = htobe16(txp->plen); 5529 wr->npkt = txp->npkt; 5530 wr->r3 = 0; 5531 wr->type = txp->wr_type; 5532 flitp = wr + 1; 5533 5534 /* 5535 * At this point we are 16B into a hardware descriptor. If checkwrap is 5536 * set then we know the WR is going to wrap around somewhere. We'll 5537 * check for that at appropriate points. 5538 */ 5539 ndesc = tx_len16_to_desc(txp->len16); 5540 last = NULL; 5541 checkwrap = eq->sidx - ndesc < eq->pidx; 5542 for (i = 0; i < txp->npkt; i++) { 5543 m = txp->mb[i]; 5544 if (txp->wr_type == 0) { 5545 struct ulp_txpkt *ulpmc; 5546 struct ulptx_idata *ulpsc; 5547 5548 /* ULP master command */ 5549 ulpmc = flitp; 5550 ulpmc->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) | 5551 V_ULP_TXPKT_DEST(0) | V_ULP_TXPKT_FID(eq->iqid)); 5552 ulpmc->len = htobe32(txpkts0_len16(mbuf_nsegs(m))); 5553 5554 /* ULP subcommand */ 5555 ulpsc = (void *)(ulpmc + 1); 5556 ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) | 5557 F_ULP_TX_SC_MORE); 5558 ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core)); 5559 5560 cpl = (void *)(ulpsc + 1); 5561 if (checkwrap && 5562 (uintptr_t)cpl == (uintptr_t)&eq->desc[eq->sidx]) 5563 cpl = (void *)&eq->desc[0]; 5564 } else { 5565 cpl = flitp; 5566 } 5567 5568 /* Checksum offload */ 5569 ctrl1 = csum_to_ctrl(sc, m); 5570 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) { 5571 /* some hardware assistance provided */ 5572 if (needs_vxlan_csum(m)) 5573 txq->vxlan_txcsum++; 5574 else 5575 txq->txcsum++; 5576 } 5577 5578 /* VLAN tag insertion */ 5579 if (needs_vlan_insertion(m)) { 5580 ctrl1 |= F_TXPKT_VLAN_VLD | 5581 V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 5582 txq->vlan_insertion++; 5583 } 5584 5585 /* CPL header */ 5586 cpl->ctrl0 = txq->cpl_ctrl0; 5587 cpl->pack = 0; 5588 cpl->len = htobe16(m->m_pkthdr.len); 5589 cpl->ctrl1 = htobe64(ctrl1); 5590 5591 flitp = cpl + 1; 5592 if (checkwrap && 5593 (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx]) 5594 flitp = (void *)&eq->desc[0]; 5595 5596 write_gl_to_txd(txq, m, (caddr_t *)(&flitp), checkwrap); 5597 5598 if (last != NULL) 5599 last->m_nextpkt = m; 5600 last = m; 5601 } 5602 5603 txq->sgl_wrs++; 5604 if (txp->wr_type == 0) { 5605 txq->txpkts0_pkts += txp->npkt; 5606 txq->txpkts0_wrs++; 5607 } else { 5608 txq->txpkts1_pkts += txp->npkt; 5609 txq->txpkts1_wrs++; 5610 } 5611 5612 txsd = &txq->sdesc[eq->pidx]; 5613 txsd->m = txp->mb[0]; 5614 txsd->desc_used = ndesc; 5615 5616 return (ndesc); 5617 } 5618 5619 static u_int 5620 write_txpkts_vm_wr(struct adapter *sc, struct sge_txq *txq) 5621 { 5622 const struct txpkts *txp = &txq->txp; 5623 struct sge_eq *eq = &txq->eq; 5624 struct fw_eth_tx_pkts_vm_wr *wr; 5625 struct tx_sdesc *txsd; 5626 struct cpl_tx_pkt_core *cpl; 5627 uint64_t ctrl1; 5628 int ndesc, i; 5629 struct mbuf *m, *last; 5630 void *flitp; 5631 5632 TXQ_LOCK_ASSERT_OWNED(txq); 5633 MPASS(txp->npkt > 0); 5634 MPASS(txp->wr_type == 1); /* VF supports type 1 only */ 5635 MPASS(txp->mb[0] != NULL); 5636 MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16)); 5637 5638 wr = (void *)&eq->desc[eq->pidx]; 5639 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_VM_WR)); 5640 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(txp->len16)); 5641 wr->r3 = 0; 5642 wr->plen = htobe16(txp->plen); 5643 wr->npkt = txp->npkt; 5644 wr->r4 = 0; 5645 memcpy(&wr->ethmacdst[0], &txp->ethmacdst[0], 16); 5646 flitp = wr + 1; 5647 5648 /* 5649 * At this point we are 32B into a hardware descriptor. Each mbuf in 5650 * the WR will take 32B so we check for the end of the descriptor ring 5651 * before writing odd mbufs (mb[1], 3, 5, ..) 5652 */ 5653 ndesc = tx_len16_to_desc(txp->len16); 5654 last = NULL; 5655 for (i = 0; i < txp->npkt; i++) { 5656 m = txp->mb[i]; 5657 if (i & 1 && (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx]) 5658 flitp = &eq->desc[0]; 5659 cpl = flitp; 5660 5661 /* Checksum offload */ 5662 ctrl1 = csum_to_ctrl(sc, m); 5663 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) 5664 txq->txcsum++; /* some hardware assistance provided */ 5665 5666 /* VLAN tag insertion */ 5667 if (needs_vlan_insertion(m)) { 5668 ctrl1 |= F_TXPKT_VLAN_VLD | 5669 V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 5670 txq->vlan_insertion++; 5671 } 5672 5673 /* CPL header */ 5674 cpl->ctrl0 = txq->cpl_ctrl0; 5675 cpl->pack = 0; 5676 cpl->len = htobe16(m->m_pkthdr.len); 5677 cpl->ctrl1 = htobe64(ctrl1); 5678 5679 flitp = cpl + 1; 5680 MPASS(mbuf_nsegs(m) == 1); 5681 write_gl_to_txd(txq, m, (caddr_t *)(&flitp), 0); 5682 5683 if (last != NULL) 5684 last->m_nextpkt = m; 5685 last = m; 5686 } 5687 5688 txq->sgl_wrs++; 5689 txq->txpkts1_pkts += txp->npkt; 5690 txq->txpkts1_wrs++; 5691 5692 txsd = &txq->sdesc[eq->pidx]; 5693 txsd->m = txp->mb[0]; 5694 txsd->desc_used = ndesc; 5695 5696 return (ndesc); 5697 } 5698 5699 /* 5700 * If the SGL ends on an address that is not 16 byte aligned, this function will 5701 * add a 0 filled flit at the end. 5702 */ 5703 static void 5704 write_gl_to_txd(struct sge_txq *txq, struct mbuf *m, caddr_t *to, int checkwrap) 5705 { 5706 struct sge_eq *eq = &txq->eq; 5707 struct sglist *gl = txq->gl; 5708 struct sglist_seg *seg; 5709 __be64 *flitp, *wrap; 5710 struct ulptx_sgl *usgl; 5711 int i, nflits, nsegs; 5712 5713 KASSERT(((uintptr_t)(*to) & 0xf) == 0, 5714 ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to)); 5715 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); 5716 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); 5717 5718 get_pkt_gl(m, gl); 5719 nsegs = gl->sg_nseg; 5720 MPASS(nsegs > 0); 5721 5722 nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2; 5723 flitp = (__be64 *)(*to); 5724 wrap = (__be64 *)(&eq->desc[eq->sidx]); 5725 seg = &gl->sg_segs[0]; 5726 usgl = (void *)flitp; 5727 5728 /* 5729 * We start at a 16 byte boundary somewhere inside the tx descriptor 5730 * ring, so we're at least 16 bytes away from the status page. There is 5731 * no chance of a wrap around in the middle of usgl (which is 16 bytes). 5732 */ 5733 5734 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 5735 V_ULPTX_NSGE(nsegs)); 5736 usgl->len0 = htobe32(seg->ss_len); 5737 usgl->addr0 = htobe64(seg->ss_paddr); 5738 seg++; 5739 5740 if (checkwrap == 0 || (uintptr_t)(flitp + nflits) <= (uintptr_t)wrap) { 5741 5742 /* Won't wrap around at all */ 5743 5744 for (i = 0; i < nsegs - 1; i++, seg++) { 5745 usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len); 5746 usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr); 5747 } 5748 if (i & 1) 5749 usgl->sge[i / 2].len[1] = htobe32(0); 5750 flitp += nflits; 5751 } else { 5752 5753 /* Will wrap somewhere in the rest of the SGL */ 5754 5755 /* 2 flits already written, write the rest flit by flit */ 5756 flitp = (void *)(usgl + 1); 5757 for (i = 0; i < nflits - 2; i++) { 5758 if (flitp == wrap) 5759 flitp = (void *)eq->desc; 5760 *flitp++ = get_flit(seg, nsegs - 1, i); 5761 } 5762 } 5763 5764 if (nflits & 1) { 5765 MPASS(((uintptr_t)flitp) & 0xf); 5766 *flitp++ = 0; 5767 } 5768 5769 MPASS((((uintptr_t)flitp) & 0xf) == 0); 5770 if (__predict_false(flitp == wrap)) 5771 *to = (void *)eq->desc; 5772 else 5773 *to = (void *)flitp; 5774 } 5775 5776 static inline void 5777 copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) 5778 { 5779 5780 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); 5781 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); 5782 5783 if (__predict_true((uintptr_t)(*to) + len <= 5784 (uintptr_t)&eq->desc[eq->sidx])) { 5785 bcopy(from, *to, len); 5786 (*to) += len; 5787 } else { 5788 int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to); 5789 5790 bcopy(from, *to, portion); 5791 from += portion; 5792 portion = len - portion; /* remaining */ 5793 bcopy(from, (void *)eq->desc, portion); 5794 (*to) = (caddr_t)eq->desc + portion; 5795 } 5796 } 5797 5798 static inline void 5799 ring_eq_db(struct adapter *sc, struct sge_eq *eq, u_int n) 5800 { 5801 u_int db; 5802 5803 MPASS(n > 0); 5804 5805 db = eq->doorbells; 5806 if (n > 1) 5807 clrbit(&db, DOORBELL_WCWR); 5808 wmb(); 5809 5810 switch (ffs(db) - 1) { 5811 case DOORBELL_UDB: 5812 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); 5813 break; 5814 5815 case DOORBELL_WCWR: { 5816 volatile uint64_t *dst, *src; 5817 int i; 5818 5819 /* 5820 * Queues whose 128B doorbell segment fits in the page do not 5821 * use relative qid (udb_qid is always 0). Only queues with 5822 * doorbell segments can do WCWR. 5823 */ 5824 KASSERT(eq->udb_qid == 0 && n == 1, 5825 ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p", 5826 __func__, eq->doorbells, n, eq->dbidx, eq)); 5827 5828 dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET - 5829 UDBS_DB_OFFSET); 5830 i = eq->dbidx; 5831 src = (void *)&eq->desc[i]; 5832 while (src != (void *)&eq->desc[i + 1]) 5833 *dst++ = *src++; 5834 wmb(); 5835 break; 5836 } 5837 5838 case DOORBELL_UDBWC: 5839 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); 5840 wmb(); 5841 break; 5842 5843 case DOORBELL_KDB: 5844 t4_write_reg(sc, sc->sge_kdoorbell_reg, 5845 V_QID(eq->cntxt_id) | V_PIDX(n)); 5846 break; 5847 } 5848 5849 IDXINCR(eq->dbidx, n, eq->sidx); 5850 } 5851 5852 static inline u_int 5853 reclaimable_tx_desc(struct sge_eq *eq) 5854 { 5855 uint16_t hw_cidx; 5856 5857 hw_cidx = read_hw_cidx(eq); 5858 return (IDXDIFF(hw_cidx, eq->cidx, eq->sidx)); 5859 } 5860 5861 static inline u_int 5862 total_available_tx_desc(struct sge_eq *eq) 5863 { 5864 uint16_t hw_cidx, pidx; 5865 5866 hw_cidx = read_hw_cidx(eq); 5867 pidx = eq->pidx; 5868 5869 if (pidx == hw_cidx) 5870 return (eq->sidx - 1); 5871 else 5872 return (IDXDIFF(hw_cidx, pidx, eq->sidx) - 1); 5873 } 5874 5875 static inline uint16_t 5876 read_hw_cidx(struct sge_eq *eq) 5877 { 5878 struct sge_qstat *spg = (void *)&eq->desc[eq->sidx]; 5879 uint16_t cidx = spg->cidx; /* stable snapshot */ 5880 5881 return (be16toh(cidx)); 5882 } 5883 5884 /* 5885 * Reclaim 'n' descriptors approximately. 5886 */ 5887 static u_int 5888 reclaim_tx_descs(struct sge_txq *txq, u_int n) 5889 { 5890 struct tx_sdesc *txsd; 5891 struct sge_eq *eq = &txq->eq; 5892 u_int can_reclaim, reclaimed; 5893 5894 TXQ_LOCK_ASSERT_OWNED(txq); 5895 MPASS(n > 0); 5896 5897 reclaimed = 0; 5898 can_reclaim = reclaimable_tx_desc(eq); 5899 while (can_reclaim && reclaimed < n) { 5900 int ndesc; 5901 struct mbuf *m, *nextpkt; 5902 5903 txsd = &txq->sdesc[eq->cidx]; 5904 ndesc = txsd->desc_used; 5905 5906 /* Firmware doesn't return "partial" credits. */ 5907 KASSERT(can_reclaim >= ndesc, 5908 ("%s: unexpected number of credits: %d, %d", 5909 __func__, can_reclaim, ndesc)); 5910 KASSERT(ndesc != 0, 5911 ("%s: descriptor with no credits: cidx %d", 5912 __func__, eq->cidx)); 5913 5914 for (m = txsd->m; m != NULL; m = nextpkt) { 5915 nextpkt = m->m_nextpkt; 5916 m->m_nextpkt = NULL; 5917 m_freem(m); 5918 } 5919 reclaimed += ndesc; 5920 can_reclaim -= ndesc; 5921 IDXINCR(eq->cidx, ndesc, eq->sidx); 5922 } 5923 5924 return (reclaimed); 5925 } 5926 5927 static void 5928 tx_reclaim(void *arg, int n) 5929 { 5930 struct sge_txq *txq = arg; 5931 struct sge_eq *eq = &txq->eq; 5932 5933 do { 5934 if (TXQ_TRYLOCK(txq) == 0) 5935 break; 5936 n = reclaim_tx_descs(txq, 32); 5937 if (eq->cidx == eq->pidx) 5938 eq->equeqidx = eq->pidx; 5939 TXQ_UNLOCK(txq); 5940 } while (n > 0); 5941 } 5942 5943 static __be64 5944 get_flit(struct sglist_seg *segs, int nsegs, int idx) 5945 { 5946 int i = (idx / 3) * 2; 5947 5948 switch (idx % 3) { 5949 case 0: { 5950 uint64_t rc; 5951 5952 rc = (uint64_t)segs[i].ss_len << 32; 5953 if (i + 1 < nsegs) 5954 rc |= (uint64_t)(segs[i + 1].ss_len); 5955 5956 return (htobe64(rc)); 5957 } 5958 case 1: 5959 return (htobe64(segs[i].ss_paddr)); 5960 case 2: 5961 return (htobe64(segs[i + 1].ss_paddr)); 5962 } 5963 5964 return (0); 5965 } 5966 5967 static int 5968 find_refill_source(struct adapter *sc, int maxp, bool packing) 5969 { 5970 int i, zidx = -1; 5971 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[0]; 5972 5973 if (packing) { 5974 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 5975 if (rxb->hwidx2 == -1) 5976 continue; 5977 if (rxb->size1 < PAGE_SIZE && 5978 rxb->size1 < largest_rx_cluster) 5979 continue; 5980 if (rxb->size1 > largest_rx_cluster) 5981 break; 5982 MPASS(rxb->size1 - rxb->size2 >= CL_METADATA_SIZE); 5983 if (rxb->size2 >= maxp) 5984 return (i); 5985 zidx = i; 5986 } 5987 } else { 5988 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 5989 if (rxb->hwidx1 == -1) 5990 continue; 5991 if (rxb->size1 > largest_rx_cluster) 5992 break; 5993 if (rxb->size1 >= maxp) 5994 return (i); 5995 zidx = i; 5996 } 5997 } 5998 5999 return (zidx); 6000 } 6001 6002 static void 6003 add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl) 6004 { 6005 mtx_lock(&sc->sfl_lock); 6006 FL_LOCK(fl); 6007 if ((fl->flags & FL_DOOMED) == 0) { 6008 fl->flags |= FL_STARVING; 6009 TAILQ_INSERT_TAIL(&sc->sfl, fl, link); 6010 callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc); 6011 } 6012 FL_UNLOCK(fl); 6013 mtx_unlock(&sc->sfl_lock); 6014 } 6015 6016 static void 6017 handle_wrq_egr_update(struct adapter *sc, struct sge_eq *eq) 6018 { 6019 struct sge_wrq *wrq = (void *)eq; 6020 6021 atomic_readandclear_int(&eq->equiq); 6022 taskqueue_enqueue(sc->tq[eq->tx_chan], &wrq->wrq_tx_task); 6023 } 6024 6025 static void 6026 handle_eth_egr_update(struct adapter *sc, struct sge_eq *eq) 6027 { 6028 struct sge_txq *txq = (void *)eq; 6029 6030 MPASS((eq->flags & EQ_TYPEMASK) == EQ_ETH); 6031 6032 atomic_readandclear_int(&eq->equiq); 6033 if (mp_ring_is_idle(txq->r)) 6034 taskqueue_enqueue(sc->tq[eq->tx_chan], &txq->tx_reclaim_task); 6035 else 6036 mp_ring_check_drainage(txq->r, 64); 6037 } 6038 6039 static int 6040 handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss, 6041 struct mbuf *m) 6042 { 6043 const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1); 6044 unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); 6045 struct adapter *sc = iq->adapter; 6046 struct sge *s = &sc->sge; 6047 struct sge_eq *eq; 6048 static void (*h[])(struct adapter *, struct sge_eq *) = {NULL, 6049 &handle_wrq_egr_update, &handle_eth_egr_update, 6050 &handle_wrq_egr_update}; 6051 6052 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 6053 rss->opcode)); 6054 6055 eq = s->eqmap[qid - s->eq_start - s->eq_base]; 6056 (*h[eq->flags & EQ_TYPEMASK])(sc, eq); 6057 6058 return (0); 6059 } 6060 6061 /* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */ 6062 CTASSERT(offsetof(struct cpl_fw4_msg, data) == \ 6063 offsetof(struct cpl_fw6_msg, data)); 6064 6065 static int 6066 handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 6067 { 6068 struct adapter *sc = iq->adapter; 6069 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); 6070 6071 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 6072 rss->opcode)); 6073 6074 if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) { 6075 const struct rss_header *rss2; 6076 6077 rss2 = (const struct rss_header *)&cpl->data[0]; 6078 return (t4_cpl_handler[rss2->opcode](iq, rss2, m)); 6079 } 6080 6081 return (t4_fw_msg_handler[cpl->type](sc, &cpl->data[0])); 6082 } 6083 6084 /** 6085 * t4_handle_wrerr_rpl - process a FW work request error message 6086 * @adap: the adapter 6087 * @rpl: start of the FW message 6088 */ 6089 static int 6090 t4_handle_wrerr_rpl(struct adapter *adap, const __be64 *rpl) 6091 { 6092 u8 opcode = *(const u8 *)rpl; 6093 const struct fw_error_cmd *e = (const void *)rpl; 6094 unsigned int i; 6095 6096 if (opcode != FW_ERROR_CMD) { 6097 log(LOG_ERR, 6098 "%s: Received WRERR_RPL message with opcode %#x\n", 6099 device_get_nameunit(adap->dev), opcode); 6100 return (EINVAL); 6101 } 6102 log(LOG_ERR, "%s: FW_ERROR (%s) ", device_get_nameunit(adap->dev), 6103 G_FW_ERROR_CMD_FATAL(be32toh(e->op_to_type)) ? "fatal" : 6104 "non-fatal"); 6105 switch (G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))) { 6106 case FW_ERROR_TYPE_EXCEPTION: 6107 log(LOG_ERR, "exception info:\n"); 6108 for (i = 0; i < nitems(e->u.exception.info); i++) 6109 log(LOG_ERR, "%s%08x", i == 0 ? "\t" : " ", 6110 be32toh(e->u.exception.info[i])); 6111 log(LOG_ERR, "\n"); 6112 break; 6113 case FW_ERROR_TYPE_HWMODULE: 6114 log(LOG_ERR, "HW module regaddr %08x regval %08x\n", 6115 be32toh(e->u.hwmodule.regaddr), 6116 be32toh(e->u.hwmodule.regval)); 6117 break; 6118 case FW_ERROR_TYPE_WR: 6119 log(LOG_ERR, "WR cidx %d PF %d VF %d eqid %d hdr:\n", 6120 be16toh(e->u.wr.cidx), 6121 G_FW_ERROR_CMD_PFN(be16toh(e->u.wr.pfn_vfn)), 6122 G_FW_ERROR_CMD_VFN(be16toh(e->u.wr.pfn_vfn)), 6123 be32toh(e->u.wr.eqid)); 6124 for (i = 0; i < nitems(e->u.wr.wrhdr); i++) 6125 log(LOG_ERR, "%s%02x", i == 0 ? "\t" : " ", 6126 e->u.wr.wrhdr[i]); 6127 log(LOG_ERR, "\n"); 6128 break; 6129 case FW_ERROR_TYPE_ACL: 6130 log(LOG_ERR, "ACL cidx %d PF %d VF %d eqid %d %s", 6131 be16toh(e->u.acl.cidx), 6132 G_FW_ERROR_CMD_PFN(be16toh(e->u.acl.pfn_vfn)), 6133 G_FW_ERROR_CMD_VFN(be16toh(e->u.acl.pfn_vfn)), 6134 be32toh(e->u.acl.eqid), 6135 G_FW_ERROR_CMD_MV(be16toh(e->u.acl.mv_pkd)) ? "vlanid" : 6136 "MAC"); 6137 for (i = 0; i < nitems(e->u.acl.val); i++) 6138 log(LOG_ERR, " %02x", e->u.acl.val[i]); 6139 log(LOG_ERR, "\n"); 6140 break; 6141 default: 6142 log(LOG_ERR, "type %#x\n", 6143 G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))); 6144 return (EINVAL); 6145 } 6146 return (0); 6147 } 6148 6149 static int 6150 sysctl_uint16(SYSCTL_HANDLER_ARGS) 6151 { 6152 uint16_t *id = arg1; 6153 int i = *id; 6154 6155 return sysctl_handle_int(oidp, &i, 0, req); 6156 } 6157 6158 static inline bool 6159 bufidx_used(struct adapter *sc, int idx) 6160 { 6161 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[0]; 6162 int i; 6163 6164 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 6165 if (rxb->size1 > largest_rx_cluster) 6166 continue; 6167 if (rxb->hwidx1 == idx || rxb->hwidx2 == idx) 6168 return (true); 6169 } 6170 6171 return (false); 6172 } 6173 6174 static int 6175 sysctl_bufsizes(SYSCTL_HANDLER_ARGS) 6176 { 6177 struct adapter *sc = arg1; 6178 struct sge_params *sp = &sc->params.sge; 6179 int i, rc; 6180 struct sbuf sb; 6181 char c; 6182 6183 sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND); 6184 for (i = 0; i < SGE_FLBUF_SIZES; i++) { 6185 if (bufidx_used(sc, i)) 6186 c = '*'; 6187 else 6188 c = '\0'; 6189 6190 sbuf_printf(&sb, "%u%c ", sp->sge_fl_buffer_size[i], c); 6191 } 6192 sbuf_trim(&sb); 6193 sbuf_finish(&sb); 6194 rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 6195 sbuf_delete(&sb); 6196 return (rc); 6197 } 6198 6199 #ifdef RATELIMIT 6200 /* 6201 * len16 for a txpkt WR with a GL. Includes the firmware work request header. 6202 */ 6203 static inline u_int 6204 txpkt_eo_len16(u_int nsegs, u_int immhdrs, u_int tso) 6205 { 6206 u_int n; 6207 6208 MPASS(immhdrs > 0); 6209 6210 n = roundup2(sizeof(struct fw_eth_tx_eo_wr) + 6211 sizeof(struct cpl_tx_pkt_core) + immhdrs, 16); 6212 if (__predict_false(nsegs == 0)) 6213 goto done; 6214 6215 nsegs--; /* first segment is part of ulptx_sgl */ 6216 n += sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 6217 if (tso) 6218 n += sizeof(struct cpl_tx_pkt_lso_core); 6219 6220 done: 6221 return (howmany(n, 16)); 6222 } 6223 6224 #define ETID_FLOWC_NPARAMS 6 6225 #define ETID_FLOWC_LEN (roundup2((sizeof(struct fw_flowc_wr) + \ 6226 ETID_FLOWC_NPARAMS * sizeof(struct fw_flowc_mnemval)), 16)) 6227 #define ETID_FLOWC_LEN16 (howmany(ETID_FLOWC_LEN, 16)) 6228 6229 static int 6230 send_etid_flowc_wr(struct cxgbe_rate_tag *cst, struct port_info *pi, 6231 struct vi_info *vi) 6232 { 6233 struct wrq_cookie cookie; 6234 u_int pfvf = pi->adapter->pf << S_FW_VIID_PFN; 6235 struct fw_flowc_wr *flowc; 6236 6237 mtx_assert(&cst->lock, MA_OWNED); 6238 MPASS((cst->flags & (EO_FLOWC_PENDING | EO_FLOWC_RPL_PENDING)) == 6239 EO_FLOWC_PENDING); 6240 6241 flowc = start_wrq_wr(cst->eo_txq, ETID_FLOWC_LEN16, &cookie); 6242 if (__predict_false(flowc == NULL)) 6243 return (ENOMEM); 6244 6245 bzero(flowc, ETID_FLOWC_LEN); 6246 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 6247 V_FW_FLOWC_WR_NPARAMS(ETID_FLOWC_NPARAMS) | V_FW_WR_COMPL(0)); 6248 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(ETID_FLOWC_LEN16) | 6249 V_FW_WR_FLOWID(cst->etid)); 6250 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; 6251 flowc->mnemval[0].val = htobe32(pfvf); 6252 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; 6253 flowc->mnemval[1].val = htobe32(pi->tx_chan); 6254 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; 6255 flowc->mnemval[2].val = htobe32(pi->tx_chan); 6256 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; 6257 flowc->mnemval[3].val = htobe32(cst->iqid); 6258 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_EOSTATE; 6259 flowc->mnemval[4].val = htobe32(FW_FLOWC_MNEM_EOSTATE_ESTABLISHED); 6260 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; 6261 flowc->mnemval[5].val = htobe32(cst->schedcl); 6262 6263 commit_wrq_wr(cst->eo_txq, flowc, &cookie); 6264 6265 cst->flags &= ~EO_FLOWC_PENDING; 6266 cst->flags |= EO_FLOWC_RPL_PENDING; 6267 MPASS(cst->tx_credits >= ETID_FLOWC_LEN16); /* flowc is first WR. */ 6268 cst->tx_credits -= ETID_FLOWC_LEN16; 6269 6270 return (0); 6271 } 6272 6273 #define ETID_FLUSH_LEN16 (howmany(sizeof (struct fw_flowc_wr), 16)) 6274 6275 void 6276 send_etid_flush_wr(struct cxgbe_rate_tag *cst) 6277 { 6278 struct fw_flowc_wr *flowc; 6279 struct wrq_cookie cookie; 6280 6281 mtx_assert(&cst->lock, MA_OWNED); 6282 6283 flowc = start_wrq_wr(cst->eo_txq, ETID_FLUSH_LEN16, &cookie); 6284 if (__predict_false(flowc == NULL)) 6285 CXGBE_UNIMPLEMENTED(__func__); 6286 6287 bzero(flowc, ETID_FLUSH_LEN16 * 16); 6288 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 6289 V_FW_FLOWC_WR_NPARAMS(0) | F_FW_WR_COMPL); 6290 flowc->flowid_len16 = htobe32(V_FW_WR_LEN16(ETID_FLUSH_LEN16) | 6291 V_FW_WR_FLOWID(cst->etid)); 6292 6293 commit_wrq_wr(cst->eo_txq, flowc, &cookie); 6294 6295 cst->flags |= EO_FLUSH_RPL_PENDING; 6296 MPASS(cst->tx_credits >= ETID_FLUSH_LEN16); 6297 cst->tx_credits -= ETID_FLUSH_LEN16; 6298 cst->ncompl++; 6299 } 6300 6301 static void 6302 write_ethofld_wr(struct cxgbe_rate_tag *cst, struct fw_eth_tx_eo_wr *wr, 6303 struct mbuf *m0, int compl) 6304 { 6305 struct cpl_tx_pkt_core *cpl; 6306 uint64_t ctrl1; 6307 uint32_t ctrl; /* used in many unrelated places */ 6308 int len16, pktlen, nsegs, immhdrs; 6309 caddr_t dst; 6310 uintptr_t p; 6311 struct ulptx_sgl *usgl; 6312 struct sglist sg; 6313 struct sglist_seg segs[38]; /* XXX: find real limit. XXX: get off the stack */ 6314 6315 mtx_assert(&cst->lock, MA_OWNED); 6316 M_ASSERTPKTHDR(m0); 6317 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 6318 m0->m_pkthdr.l4hlen > 0, 6319 ("%s: ethofld mbuf %p is missing header lengths", __func__, m0)); 6320 6321 len16 = mbuf_eo_len16(m0); 6322 nsegs = mbuf_eo_nsegs(m0); 6323 pktlen = m0->m_pkthdr.len; 6324 ctrl = sizeof(struct cpl_tx_pkt_core); 6325 if (needs_tso(m0)) 6326 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 6327 immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen; 6328 ctrl += immhdrs; 6329 6330 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_EO_WR) | 6331 V_FW_ETH_TX_EO_WR_IMMDLEN(ctrl) | V_FW_WR_COMPL(!!compl)); 6332 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(len16) | 6333 V_FW_WR_FLOWID(cst->etid)); 6334 wr->r3 = 0; 6335 if (needs_outer_udp_csum(m0)) { 6336 wr->u.udpseg.type = FW_ETH_TX_EO_TYPE_UDPSEG; 6337 wr->u.udpseg.ethlen = m0->m_pkthdr.l2hlen; 6338 wr->u.udpseg.iplen = htobe16(m0->m_pkthdr.l3hlen); 6339 wr->u.udpseg.udplen = m0->m_pkthdr.l4hlen; 6340 wr->u.udpseg.rtplen = 0; 6341 wr->u.udpseg.r4 = 0; 6342 wr->u.udpseg.mss = htobe16(pktlen - immhdrs); 6343 wr->u.udpseg.schedpktsize = wr->u.udpseg.mss; 6344 wr->u.udpseg.plen = htobe32(pktlen - immhdrs); 6345 cpl = (void *)(wr + 1); 6346 } else { 6347 MPASS(needs_outer_tcp_csum(m0)); 6348 wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG; 6349 wr->u.tcpseg.ethlen = m0->m_pkthdr.l2hlen; 6350 wr->u.tcpseg.iplen = htobe16(m0->m_pkthdr.l3hlen); 6351 wr->u.tcpseg.tcplen = m0->m_pkthdr.l4hlen; 6352 wr->u.tcpseg.tsclk_tsoff = mbuf_eo_tsclk_tsoff(m0); 6353 wr->u.tcpseg.r4 = 0; 6354 wr->u.tcpseg.r5 = 0; 6355 wr->u.tcpseg.plen = htobe32(pktlen - immhdrs); 6356 6357 if (needs_tso(m0)) { 6358 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); 6359 6360 wr->u.tcpseg.mss = htobe16(m0->m_pkthdr.tso_segsz); 6361 6362 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | 6363 F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE | 6364 V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen - 6365 ETHER_HDR_LEN) >> 2) | 6366 V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) | 6367 V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); 6368 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 6369 ctrl |= F_LSO_IPV6; 6370 lso->lso_ctrl = htobe32(ctrl); 6371 lso->ipid_ofst = htobe16(0); 6372 lso->mss = htobe16(m0->m_pkthdr.tso_segsz); 6373 lso->seqno_offset = htobe32(0); 6374 lso->len = htobe32(pktlen); 6375 6376 cpl = (void *)(lso + 1); 6377 } else { 6378 wr->u.tcpseg.mss = htobe16(0xffff); 6379 cpl = (void *)(wr + 1); 6380 } 6381 } 6382 6383 /* Checksum offload must be requested for ethofld. */ 6384 MPASS(needs_outer_l4_csum(m0)); 6385 ctrl1 = csum_to_ctrl(cst->adapter, m0); 6386 6387 /* VLAN tag insertion */ 6388 if (needs_vlan_insertion(m0)) { 6389 ctrl1 |= F_TXPKT_VLAN_VLD | 6390 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 6391 } 6392 6393 /* CPL header */ 6394 cpl->ctrl0 = cst->ctrl0; 6395 cpl->pack = 0; 6396 cpl->len = htobe16(pktlen); 6397 cpl->ctrl1 = htobe64(ctrl1); 6398 6399 /* Copy Ethernet, IP & TCP/UDP hdrs as immediate data */ 6400 p = (uintptr_t)(cpl + 1); 6401 m_copydata(m0, 0, immhdrs, (void *)p); 6402 6403 /* SGL */ 6404 dst = (void *)(cpl + 1); 6405 if (nsegs > 0) { 6406 int i, pad; 6407 6408 /* zero-pad upto next 16Byte boundary, if not 16Byte aligned */ 6409 p += immhdrs; 6410 pad = 16 - (immhdrs & 0xf); 6411 bzero((void *)p, pad); 6412 6413 usgl = (void *)(p + pad); 6414 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 6415 V_ULPTX_NSGE(nsegs)); 6416 6417 sglist_init(&sg, nitems(segs), segs); 6418 for (; m0 != NULL; m0 = m0->m_next) { 6419 if (__predict_false(m0->m_len == 0)) 6420 continue; 6421 if (immhdrs >= m0->m_len) { 6422 immhdrs -= m0->m_len; 6423 continue; 6424 } 6425 if (m0->m_flags & M_EXTPG) 6426 sglist_append_mbuf_epg(&sg, m0, 6427 mtod(m0, vm_offset_t), m0->m_len); 6428 else 6429 sglist_append(&sg, mtod(m0, char *) + immhdrs, 6430 m0->m_len - immhdrs); 6431 immhdrs = 0; 6432 } 6433 MPASS(sg.sg_nseg == nsegs); 6434 6435 /* 6436 * Zero pad last 8B in case the WR doesn't end on a 16B 6437 * boundary. 6438 */ 6439 *(uint64_t *)((char *)wr + len16 * 16 - 8) = 0; 6440 6441 usgl->len0 = htobe32(segs[0].ss_len); 6442 usgl->addr0 = htobe64(segs[0].ss_paddr); 6443 for (i = 0; i < nsegs - 1; i++) { 6444 usgl->sge[i / 2].len[i & 1] = htobe32(segs[i + 1].ss_len); 6445 usgl->sge[i / 2].addr[i & 1] = htobe64(segs[i + 1].ss_paddr); 6446 } 6447 if (i & 1) 6448 usgl->sge[i / 2].len[1] = htobe32(0); 6449 } 6450 6451 } 6452 6453 static void 6454 ethofld_tx(struct cxgbe_rate_tag *cst) 6455 { 6456 struct mbuf *m; 6457 struct wrq_cookie cookie; 6458 int next_credits, compl; 6459 struct fw_eth_tx_eo_wr *wr; 6460 6461 mtx_assert(&cst->lock, MA_OWNED); 6462 6463 while ((m = mbufq_first(&cst->pending_tx)) != NULL) { 6464 M_ASSERTPKTHDR(m); 6465 6466 /* How many len16 credits do we need to send this mbuf. */ 6467 next_credits = mbuf_eo_len16(m); 6468 MPASS(next_credits > 0); 6469 if (next_credits > cst->tx_credits) { 6470 /* 6471 * Tx will make progress eventually because there is at 6472 * least one outstanding fw4_ack that will return 6473 * credits and kick the tx. 6474 */ 6475 MPASS(cst->ncompl > 0); 6476 return; 6477 } 6478 wr = start_wrq_wr(cst->eo_txq, next_credits, &cookie); 6479 if (__predict_false(wr == NULL)) { 6480 /* XXX: wishful thinking, not a real assertion. */ 6481 MPASS(cst->ncompl > 0); 6482 return; 6483 } 6484 cst->tx_credits -= next_credits; 6485 cst->tx_nocompl += next_credits; 6486 compl = cst->ncompl == 0 || cst->tx_nocompl >= cst->tx_total / 2; 6487 ETHER_BPF_MTAP(cst->com.ifp, m); 6488 write_ethofld_wr(cst, wr, m, compl); 6489 commit_wrq_wr(cst->eo_txq, wr, &cookie); 6490 if (compl) { 6491 cst->ncompl++; 6492 cst->tx_nocompl = 0; 6493 } 6494 (void) mbufq_dequeue(&cst->pending_tx); 6495 6496 /* 6497 * Drop the mbuf's reference on the tag now rather 6498 * than waiting until m_freem(). This ensures that 6499 * cxgbe_rate_tag_free gets called when the inp drops 6500 * its reference on the tag and there are no more 6501 * mbufs in the pending_tx queue and can flush any 6502 * pending requests. Otherwise if the last mbuf 6503 * doesn't request a completion the etid will never be 6504 * released. 6505 */ 6506 m->m_pkthdr.snd_tag = NULL; 6507 m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; 6508 m_snd_tag_rele(&cst->com); 6509 6510 mbufq_enqueue(&cst->pending_fwack, m); 6511 } 6512 } 6513 6514 int 6515 ethofld_transmit(struct ifnet *ifp, struct mbuf *m0) 6516 { 6517 struct cxgbe_rate_tag *cst; 6518 int rc; 6519 6520 MPASS(m0->m_nextpkt == NULL); 6521 MPASS(m0->m_pkthdr.csum_flags & CSUM_SND_TAG); 6522 MPASS(m0->m_pkthdr.snd_tag != NULL); 6523 cst = mst_to_crt(m0->m_pkthdr.snd_tag); 6524 6525 mtx_lock(&cst->lock); 6526 MPASS(cst->flags & EO_SND_TAG_REF); 6527 6528 if (__predict_false(cst->flags & EO_FLOWC_PENDING)) { 6529 struct vi_info *vi = ifp->if_softc; 6530 struct port_info *pi = vi->pi; 6531 struct adapter *sc = pi->adapter; 6532 const uint32_t rss_mask = vi->rss_size - 1; 6533 uint32_t rss_hash; 6534 6535 cst->eo_txq = &sc->sge.ofld_txq[vi->first_ofld_txq]; 6536 if (M_HASHTYPE_ISHASH(m0)) 6537 rss_hash = m0->m_pkthdr.flowid; 6538 else 6539 rss_hash = arc4random(); 6540 /* We assume RSS hashing */ 6541 cst->iqid = vi->rss[rss_hash & rss_mask]; 6542 cst->eo_txq += rss_hash % vi->nofldtxq; 6543 rc = send_etid_flowc_wr(cst, pi, vi); 6544 if (rc != 0) 6545 goto done; 6546 } 6547 6548 if (__predict_false(cst->plen + m0->m_pkthdr.len > eo_max_backlog)) { 6549 rc = ENOBUFS; 6550 goto done; 6551 } 6552 6553 mbufq_enqueue(&cst->pending_tx, m0); 6554 cst->plen += m0->m_pkthdr.len; 6555 6556 /* 6557 * Hold an extra reference on the tag while generating work 6558 * requests to ensure that we don't try to free the tag during 6559 * ethofld_tx() in case we are sending the final mbuf after 6560 * the inp was freed. 6561 */ 6562 m_snd_tag_ref(&cst->com); 6563 ethofld_tx(cst); 6564 mtx_unlock(&cst->lock); 6565 m_snd_tag_rele(&cst->com); 6566 return (0); 6567 6568 done: 6569 mtx_unlock(&cst->lock); 6570 if (__predict_false(rc != 0)) 6571 m_freem(m0); 6572 return (rc); 6573 } 6574 6575 static int 6576 ethofld_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) 6577 { 6578 struct adapter *sc = iq->adapter; 6579 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 6580 struct mbuf *m; 6581 u_int etid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 6582 struct cxgbe_rate_tag *cst; 6583 uint8_t credits = cpl->credits; 6584 6585 cst = lookup_etid(sc, etid); 6586 mtx_lock(&cst->lock); 6587 if (__predict_false(cst->flags & EO_FLOWC_RPL_PENDING)) { 6588 MPASS(credits >= ETID_FLOWC_LEN16); 6589 credits -= ETID_FLOWC_LEN16; 6590 cst->flags &= ~EO_FLOWC_RPL_PENDING; 6591 } 6592 6593 KASSERT(cst->ncompl > 0, 6594 ("%s: etid %u (%p) wasn't expecting completion.", 6595 __func__, etid, cst)); 6596 cst->ncompl--; 6597 6598 while (credits > 0) { 6599 m = mbufq_dequeue(&cst->pending_fwack); 6600 if (__predict_false(m == NULL)) { 6601 /* 6602 * The remaining credits are for the final flush that 6603 * was issued when the tag was freed by the kernel. 6604 */ 6605 MPASS((cst->flags & 6606 (EO_FLUSH_RPL_PENDING | EO_SND_TAG_REF)) == 6607 EO_FLUSH_RPL_PENDING); 6608 MPASS(credits == ETID_FLUSH_LEN16); 6609 MPASS(cst->tx_credits + cpl->credits == cst->tx_total); 6610 MPASS(cst->ncompl == 0); 6611 6612 cst->flags &= ~EO_FLUSH_RPL_PENDING; 6613 cst->tx_credits += cpl->credits; 6614 cxgbe_rate_tag_free_locked(cst); 6615 return (0); /* cst is gone. */ 6616 } 6617 KASSERT(m != NULL, 6618 ("%s: too many credits (%u, %u)", __func__, cpl->credits, 6619 credits)); 6620 KASSERT(credits >= mbuf_eo_len16(m), 6621 ("%s: too few credits (%u, %u, %u)", __func__, 6622 cpl->credits, credits, mbuf_eo_len16(m))); 6623 credits -= mbuf_eo_len16(m); 6624 cst->plen -= m->m_pkthdr.len; 6625 m_freem(m); 6626 } 6627 6628 cst->tx_credits += cpl->credits; 6629 MPASS(cst->tx_credits <= cst->tx_total); 6630 6631 if (cst->flags & EO_SND_TAG_REF) { 6632 /* 6633 * As with ethofld_transmit(), hold an extra reference 6634 * so that the tag is stable across ethold_tx(). 6635 */ 6636 m_snd_tag_ref(&cst->com); 6637 m = mbufq_first(&cst->pending_tx); 6638 if (m != NULL && cst->tx_credits >= mbuf_eo_len16(m)) 6639 ethofld_tx(cst); 6640 mtx_unlock(&cst->lock); 6641 m_snd_tag_rele(&cst->com); 6642 } else { 6643 /* 6644 * There shouldn't be any pending packets if the tag 6645 * was freed by the kernel since any pending packet 6646 * should hold a reference to the tag. 6647 */ 6648 MPASS(mbufq_first(&cst->pending_tx) == NULL); 6649 mtx_unlock(&cst->lock); 6650 } 6651 6652 return (0); 6653 } 6654 #endif 6655