1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 Chelsio Communications, Inc. 5 * All rights reserved. 6 * Written by: Navdeep Parhar <np@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 #include "opt_kern_tls.h" 36 #include "opt_ratelimit.h" 37 38 #include <sys/types.h> 39 #include <sys/eventhandler.h> 40 #include <sys/mbuf.h> 41 #include <sys/socket.h> 42 #include <sys/kernel.h> 43 #include <sys/ktls.h> 44 #include <sys/malloc.h> 45 #include <sys/msan.h> 46 #include <sys/queue.h> 47 #include <sys/sbuf.h> 48 #include <sys/taskqueue.h> 49 #include <sys/time.h> 50 #include <sys/sglist.h> 51 #include <sys/sysctl.h> 52 #include <sys/smp.h> 53 #include <sys/socketvar.h> 54 #include <sys/counter.h> 55 #include <net/bpf.h> 56 #include <net/ethernet.h> 57 #include <net/if.h> 58 #include <net/if_vlan_var.h> 59 #include <net/if_vxlan.h> 60 #include <netinet/in.h> 61 #include <netinet/ip.h> 62 #include <netinet/ip6.h> 63 #include <netinet/tcp.h> 64 #include <netinet/udp.h> 65 #include <machine/in_cksum.h> 66 #include <machine/md_var.h> 67 #include <vm/vm.h> 68 #include <vm/pmap.h> 69 #ifdef DEV_NETMAP 70 #include <machine/bus.h> 71 #include <sys/selinfo.h> 72 #include <net/if_var.h> 73 #include <net/netmap.h> 74 #include <dev/netmap/netmap_kern.h> 75 #endif 76 77 #include "common/common.h" 78 #include "common/t4_regs.h" 79 #include "common/t4_regs_values.h" 80 #include "common/t4_msg.h" 81 #include "t4_l2t.h" 82 #include "t4_mp_ring.h" 83 84 #ifdef T4_PKT_TIMESTAMP 85 #define RX_COPY_THRESHOLD (MINCLSIZE - 8) 86 #else 87 #define RX_COPY_THRESHOLD MINCLSIZE 88 #endif 89 90 /* Internal mbuf flags stored in PH_loc.eight[1]. */ 91 #define MC_NOMAP 0x01 92 #define MC_RAW_WR 0x02 93 #define MC_TLS 0x04 94 95 /* 96 * Ethernet frames are DMA'd at this byte offset into the freelist buffer. 97 * 0-7 are valid values. 98 */ 99 static int fl_pktshift = 0; 100 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pktshift, CTLFLAG_RDTUN, &fl_pktshift, 0, 101 "payload DMA offset in rx buffer (bytes)"); 102 103 /* 104 * Pad ethernet payload up to this boundary. 105 * -1: driver should figure out a good value. 106 * 0: disable padding. 107 * Any power of 2 from 32 to 4096 (both inclusive) is also a valid value. 108 */ 109 int fl_pad = -1; 110 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pad, CTLFLAG_RDTUN, &fl_pad, 0, 111 "payload pad boundary (bytes)"); 112 113 /* 114 * Status page length. 115 * -1: driver should figure out a good value. 116 * 64 or 128 are the only other valid values. 117 */ 118 static int spg_len = -1; 119 SYSCTL_INT(_hw_cxgbe, OID_AUTO, spg_len, CTLFLAG_RDTUN, &spg_len, 0, 120 "status page size (bytes)"); 121 122 /* 123 * Congestion drops. 124 * -1: no congestion feedback (not recommended). 125 * 0: backpressure the channel instead of dropping packets right away. 126 * 1: no backpressure, drop packets for the congested queue immediately. 127 */ 128 static int cong_drop = 0; 129 SYSCTL_INT(_hw_cxgbe, OID_AUTO, cong_drop, CTLFLAG_RDTUN, &cong_drop, 0, 130 "Congestion control for RX queues (0 = backpressure, 1 = drop"); 131 132 /* 133 * Deliver multiple frames in the same free list buffer if they fit. 134 * -1: let the driver decide whether to enable buffer packing or not. 135 * 0: disable buffer packing. 136 * 1: enable buffer packing. 137 */ 138 static int buffer_packing = -1; 139 SYSCTL_INT(_hw_cxgbe, OID_AUTO, buffer_packing, CTLFLAG_RDTUN, &buffer_packing, 140 0, "Enable buffer packing"); 141 142 /* 143 * Start next frame in a packed buffer at this boundary. 144 * -1: driver should figure out a good value. 145 * T4: driver will ignore this and use the same value as fl_pad above. 146 * T5: 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value. 147 */ 148 static int fl_pack = -1; 149 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pack, CTLFLAG_RDTUN, &fl_pack, 0, 150 "payload pack boundary (bytes)"); 151 152 /* 153 * Largest rx cluster size that the driver is allowed to allocate. 154 */ 155 static int largest_rx_cluster = MJUM16BYTES; 156 SYSCTL_INT(_hw_cxgbe, OID_AUTO, largest_rx_cluster, CTLFLAG_RDTUN, 157 &largest_rx_cluster, 0, "Largest rx cluster (bytes)"); 158 159 /* 160 * Size of cluster allocation that's most likely to succeed. The driver will 161 * fall back to this size if it fails to allocate clusters larger than this. 162 */ 163 static int safest_rx_cluster = PAGE_SIZE; 164 SYSCTL_INT(_hw_cxgbe, OID_AUTO, safest_rx_cluster, CTLFLAG_RDTUN, 165 &safest_rx_cluster, 0, "Safe rx cluster (bytes)"); 166 167 #ifdef RATELIMIT 168 /* 169 * Knob to control TCP timestamp rewriting, and the granularity of the tick used 170 * for rewriting. -1 and 0-3 are all valid values. 171 * -1: hardware should leave the TCP timestamps alone. 172 * 0: 1ms 173 * 1: 100us 174 * 2: 10us 175 * 3: 1us 176 */ 177 static int tsclk = -1; 178 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tsclk, CTLFLAG_RDTUN, &tsclk, 0, 179 "Control TCP timestamp rewriting when using pacing"); 180 181 static int eo_max_backlog = 1024 * 1024; 182 SYSCTL_INT(_hw_cxgbe, OID_AUTO, eo_max_backlog, CTLFLAG_RDTUN, &eo_max_backlog, 183 0, "Maximum backlog of ratelimited data per flow"); 184 #endif 185 186 /* 187 * The interrupt holdoff timers are multiplied by this value on T6+. 188 * 1 and 3-17 (both inclusive) are legal values. 189 */ 190 static int tscale = 1; 191 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tscale, CTLFLAG_RDTUN, &tscale, 0, 192 "Interrupt holdoff timer scale on T6+"); 193 194 /* 195 * Number of LRO entries in the lro_ctrl structure per rx queue. 196 */ 197 static int lro_entries = TCP_LRO_ENTRIES; 198 SYSCTL_INT(_hw_cxgbe, OID_AUTO, lro_entries, CTLFLAG_RDTUN, &lro_entries, 0, 199 "Number of LRO entries per RX queue"); 200 201 /* 202 * This enables presorting of frames before they're fed into tcp_lro_rx. 203 */ 204 static int lro_mbufs = 0; 205 SYSCTL_INT(_hw_cxgbe, OID_AUTO, lro_mbufs, CTLFLAG_RDTUN, &lro_mbufs, 0, 206 "Enable presorting of LRO frames"); 207 208 static counter_u64_t pullups; 209 SYSCTL_COUNTER_U64(_hw_cxgbe, OID_AUTO, pullups, CTLFLAG_RD, &pullups, 210 "Number of mbuf pullups performed"); 211 212 static counter_u64_t defrags; 213 SYSCTL_COUNTER_U64(_hw_cxgbe, OID_AUTO, defrags, CTLFLAG_RD, &defrags, 214 "Number of mbuf defrags performed"); 215 216 static int t4_tx_coalesce = 1; 217 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_coalesce, CTLFLAG_RWTUN, &t4_tx_coalesce, 0, 218 "tx coalescing allowed"); 219 220 /* 221 * The driver will make aggressive attempts at tx coalescing if it sees these 222 * many packets eligible for coalescing in quick succession, with no more than 223 * the specified gap in between the eth_tx calls that delivered the packets. 224 */ 225 static int t4_tx_coalesce_pkts = 32; 226 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_coalesce_pkts, CTLFLAG_RWTUN, 227 &t4_tx_coalesce_pkts, 0, 228 "# of consecutive packets (1 - 255) that will trigger tx coalescing"); 229 static int t4_tx_coalesce_gap = 5; 230 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_coalesce_gap, CTLFLAG_RWTUN, 231 &t4_tx_coalesce_gap, 0, "tx gap (in microseconds)"); 232 233 static int service_iq(struct sge_iq *, int); 234 static int service_iq_fl(struct sge_iq *, int); 235 static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t); 236 static int eth_rx(struct adapter *, struct sge_rxq *, const struct iq_desc *, 237 u_int); 238 static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int, 239 int, int); 240 static inline void init_fl(struct adapter *, struct sge_fl *, int, int, char *); 241 static inline void init_eq(struct adapter *, struct sge_eq *, int, int, uint8_t, 242 struct sge_iq *, char *); 243 static int alloc_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *, 244 struct sysctl_ctx_list *, struct sysctl_oid *); 245 static void free_iq_fl(struct adapter *, struct sge_iq *, struct sge_fl *); 246 static void add_iq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 247 struct sge_iq *); 248 static void add_fl_sysctls(struct adapter *, struct sysctl_ctx_list *, 249 struct sysctl_oid *, struct sge_fl *); 250 static int alloc_iq_fl_hwq(struct vi_info *, struct sge_iq *, struct sge_fl *); 251 static int free_iq_fl_hwq(struct adapter *, struct sge_iq *, struct sge_fl *); 252 static int alloc_fwq(struct adapter *); 253 static void free_fwq(struct adapter *); 254 static int alloc_ctrlq(struct adapter *, int); 255 static void free_ctrlq(struct adapter *, int); 256 static int alloc_rxq(struct vi_info *, struct sge_rxq *, int, int, int); 257 static void free_rxq(struct vi_info *, struct sge_rxq *); 258 static void add_rxq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 259 struct sge_rxq *); 260 #ifdef TCP_OFFLOAD 261 static int alloc_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *, int, int, 262 int); 263 static void free_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *); 264 static void add_ofld_rxq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 265 struct sge_ofld_rxq *); 266 #endif 267 static int ctrl_eq_alloc(struct adapter *, struct sge_eq *); 268 static int eth_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); 269 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 270 static int ofld_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); 271 #endif 272 static int alloc_eq(struct adapter *, struct sge_eq *, struct sysctl_ctx_list *, 273 struct sysctl_oid *); 274 static void free_eq(struct adapter *, struct sge_eq *); 275 static void add_eq_sysctls(struct adapter *, struct sysctl_ctx_list *, 276 struct sysctl_oid *, struct sge_eq *); 277 static int alloc_eq_hwq(struct adapter *, struct vi_info *, struct sge_eq *); 278 static int free_eq_hwq(struct adapter *, struct vi_info *, struct sge_eq *); 279 static int alloc_wrq(struct adapter *, struct vi_info *, struct sge_wrq *, 280 struct sysctl_ctx_list *, struct sysctl_oid *); 281 static void free_wrq(struct adapter *, struct sge_wrq *); 282 static void add_wrq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 283 struct sge_wrq *); 284 static int alloc_txq(struct vi_info *, struct sge_txq *, int); 285 static void free_txq(struct vi_info *, struct sge_txq *); 286 static void add_txq_sysctls(struct vi_info *, struct sysctl_ctx_list *, 287 struct sysctl_oid *, struct sge_txq *); 288 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 289 static int alloc_ofld_txq(struct vi_info *, struct sge_ofld_txq *, int); 290 static void free_ofld_txq(struct vi_info *, struct sge_ofld_txq *); 291 static void add_ofld_txq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 292 struct sge_ofld_txq *); 293 #endif 294 static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); 295 static inline void ring_fl_db(struct adapter *, struct sge_fl *); 296 static int refill_fl(struct adapter *, struct sge_fl *, int); 297 static void refill_sfl(void *); 298 static int find_refill_source(struct adapter *, int, bool); 299 static void add_fl_to_sfl(struct adapter *, struct sge_fl *); 300 301 static inline void get_pkt_gl(struct mbuf *, struct sglist *); 302 static inline u_int txpkt_len16(u_int, const u_int); 303 static inline u_int txpkt_vm_len16(u_int, const u_int); 304 static inline void calculate_mbuf_len16(struct mbuf *, bool); 305 static inline u_int txpkts0_len16(u_int); 306 static inline u_int txpkts1_len16(void); 307 static u_int write_raw_wr(struct sge_txq *, void *, struct mbuf *, u_int); 308 static u_int write_txpkt_wr(struct adapter *, struct sge_txq *, struct mbuf *, 309 u_int); 310 static u_int write_txpkt_vm_wr(struct adapter *, struct sge_txq *, 311 struct mbuf *); 312 static int add_to_txpkts_vf(struct adapter *, struct sge_txq *, struct mbuf *, 313 int, bool *); 314 static int add_to_txpkts_pf(struct adapter *, struct sge_txq *, struct mbuf *, 315 int, bool *); 316 static u_int write_txpkts_wr(struct adapter *, struct sge_txq *); 317 static u_int write_txpkts_vm_wr(struct adapter *, struct sge_txq *); 318 static void write_gl_to_txd(struct sge_txq *, struct mbuf *, caddr_t *, int); 319 static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int); 320 static inline void ring_eq_db(struct adapter *, struct sge_eq *, u_int); 321 static inline uint16_t read_hw_cidx(struct sge_eq *); 322 static inline u_int reclaimable_tx_desc(struct sge_eq *); 323 static inline u_int total_available_tx_desc(struct sge_eq *); 324 static u_int reclaim_tx_descs(struct sge_txq *, u_int); 325 static void tx_reclaim(void *, int); 326 static __be64 get_flit(struct sglist_seg *, int, int); 327 static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *, 328 struct mbuf *); 329 static int handle_fw_msg(struct sge_iq *, const struct rss_header *, 330 struct mbuf *); 331 static int t4_handle_wrerr_rpl(struct adapter *, const __be64 *); 332 static void wrq_tx_drain(void *, int); 333 static void drain_wrq_wr_list(struct adapter *, struct sge_wrq *); 334 335 static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS); 336 #ifdef RATELIMIT 337 #if defined(INET) || defined(INET6) 338 static inline u_int txpkt_eo_len16(u_int, u_int, u_int); 339 #endif 340 static int ethofld_fw4_ack(struct sge_iq *, const struct rss_header *, 341 struct mbuf *); 342 #endif 343 344 static counter_u64_t extfree_refs; 345 static counter_u64_t extfree_rels; 346 347 an_handler_t t4_an_handler; 348 fw_msg_handler_t t4_fw_msg_handler[NUM_FW6_TYPES]; 349 cpl_handler_t t4_cpl_handler[NUM_CPL_CMDS]; 350 cpl_handler_t set_tcb_rpl_handlers[NUM_CPL_COOKIES]; 351 cpl_handler_t l2t_write_rpl_handlers[NUM_CPL_COOKIES]; 352 cpl_handler_t act_open_rpl_handlers[NUM_CPL_COOKIES]; 353 cpl_handler_t abort_rpl_rss_handlers[NUM_CPL_COOKIES]; 354 cpl_handler_t fw4_ack_handlers[NUM_CPL_COOKIES]; 355 356 void 357 t4_register_an_handler(an_handler_t h) 358 { 359 uintptr_t *loc; 360 361 MPASS(h == NULL || t4_an_handler == NULL); 362 363 loc = (uintptr_t *)&t4_an_handler; 364 atomic_store_rel_ptr(loc, (uintptr_t)h); 365 } 366 367 void 368 t4_register_fw_msg_handler(int type, fw_msg_handler_t h) 369 { 370 uintptr_t *loc; 371 372 MPASS(type < nitems(t4_fw_msg_handler)); 373 MPASS(h == NULL || t4_fw_msg_handler[type] == NULL); 374 /* 375 * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL 376 * handler dispatch table. Reject any attempt to install a handler for 377 * this subtype. 378 */ 379 MPASS(type != FW_TYPE_RSSCPL); 380 MPASS(type != FW6_TYPE_RSSCPL); 381 382 loc = (uintptr_t *)&t4_fw_msg_handler[type]; 383 atomic_store_rel_ptr(loc, (uintptr_t)h); 384 } 385 386 void 387 t4_register_cpl_handler(int opcode, cpl_handler_t h) 388 { 389 uintptr_t *loc; 390 391 MPASS(opcode < nitems(t4_cpl_handler)); 392 MPASS(h == NULL || t4_cpl_handler[opcode] == NULL); 393 394 loc = (uintptr_t *)&t4_cpl_handler[opcode]; 395 atomic_store_rel_ptr(loc, (uintptr_t)h); 396 } 397 398 static int 399 set_tcb_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 400 struct mbuf *m) 401 { 402 const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1); 403 u_int tid; 404 int cookie; 405 406 MPASS(m == NULL); 407 408 tid = GET_TID(cpl); 409 if (is_hpftid(iq->adapter, tid) || is_ftid(iq->adapter, tid)) { 410 /* 411 * The return code for filter-write is put in the CPL cookie so 412 * we have to rely on the hardware tid (is_ftid) to determine 413 * that this is a response to a filter. 414 */ 415 cookie = CPL_COOKIE_FILTER; 416 } else { 417 cookie = G_COOKIE(cpl->cookie); 418 } 419 MPASS(cookie > CPL_COOKIE_RESERVED); 420 MPASS(cookie < nitems(set_tcb_rpl_handlers)); 421 422 return (set_tcb_rpl_handlers[cookie](iq, rss, m)); 423 } 424 425 static int 426 l2t_write_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 427 struct mbuf *m) 428 { 429 const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); 430 unsigned int cookie; 431 432 MPASS(m == NULL); 433 434 cookie = GET_TID(rpl) & F_SYNC_WR ? CPL_COOKIE_TOM : CPL_COOKIE_FILTER; 435 return (l2t_write_rpl_handlers[cookie](iq, rss, m)); 436 } 437 438 static int 439 act_open_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 440 struct mbuf *m) 441 { 442 const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1); 443 u_int cookie = G_TID_COOKIE(G_AOPEN_ATID(be32toh(cpl->atid_status))); 444 445 MPASS(m == NULL); 446 MPASS(cookie != CPL_COOKIE_RESERVED); 447 448 return (act_open_rpl_handlers[cookie](iq, rss, m)); 449 } 450 451 static int 452 abort_rpl_rss_handler(struct sge_iq *iq, const struct rss_header *rss, 453 struct mbuf *m) 454 { 455 struct adapter *sc = iq->adapter; 456 u_int cookie; 457 458 MPASS(m == NULL); 459 if (is_hashfilter(sc)) 460 cookie = CPL_COOKIE_HASHFILTER; 461 else 462 cookie = CPL_COOKIE_TOM; 463 464 return (abort_rpl_rss_handlers[cookie](iq, rss, m)); 465 } 466 467 static int 468 fw4_ack_handler(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 469 { 470 struct adapter *sc = iq->adapter; 471 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 472 unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 473 u_int cookie; 474 475 MPASS(m == NULL); 476 if (is_etid(sc, tid)) 477 cookie = CPL_COOKIE_ETHOFLD; 478 else 479 cookie = CPL_COOKIE_TOM; 480 481 return (fw4_ack_handlers[cookie](iq, rss, m)); 482 } 483 484 static void 485 t4_init_shared_cpl_handlers(void) 486 { 487 488 t4_register_cpl_handler(CPL_SET_TCB_RPL, set_tcb_rpl_handler); 489 t4_register_cpl_handler(CPL_L2T_WRITE_RPL, l2t_write_rpl_handler); 490 t4_register_cpl_handler(CPL_ACT_OPEN_RPL, act_open_rpl_handler); 491 t4_register_cpl_handler(CPL_ABORT_RPL_RSS, abort_rpl_rss_handler); 492 t4_register_cpl_handler(CPL_FW4_ACK, fw4_ack_handler); 493 } 494 495 void 496 t4_register_shared_cpl_handler(int opcode, cpl_handler_t h, int cookie) 497 { 498 uintptr_t *loc; 499 500 MPASS(opcode < nitems(t4_cpl_handler)); 501 MPASS(cookie > CPL_COOKIE_RESERVED); 502 MPASS(cookie < NUM_CPL_COOKIES); 503 MPASS(t4_cpl_handler[opcode] != NULL); 504 505 switch (opcode) { 506 case CPL_SET_TCB_RPL: 507 loc = (uintptr_t *)&set_tcb_rpl_handlers[cookie]; 508 break; 509 case CPL_L2T_WRITE_RPL: 510 loc = (uintptr_t *)&l2t_write_rpl_handlers[cookie]; 511 break; 512 case CPL_ACT_OPEN_RPL: 513 loc = (uintptr_t *)&act_open_rpl_handlers[cookie]; 514 break; 515 case CPL_ABORT_RPL_RSS: 516 loc = (uintptr_t *)&abort_rpl_rss_handlers[cookie]; 517 break; 518 case CPL_FW4_ACK: 519 loc = (uintptr_t *)&fw4_ack_handlers[cookie]; 520 break; 521 default: 522 MPASS(0); 523 return; 524 } 525 MPASS(h == NULL || *loc == (uintptr_t)NULL); 526 atomic_store_rel_ptr(loc, (uintptr_t)h); 527 } 528 529 /* 530 * Called on MOD_LOAD. Validates and calculates the SGE tunables. 531 */ 532 void 533 t4_sge_modload(void) 534 { 535 536 if (fl_pktshift < 0 || fl_pktshift > 7) { 537 printf("Invalid hw.cxgbe.fl_pktshift value (%d)," 538 " using 0 instead.\n", fl_pktshift); 539 fl_pktshift = 0; 540 } 541 542 if (spg_len != 64 && spg_len != 128) { 543 int len; 544 545 #if defined(__i386__) || defined(__amd64__) 546 len = cpu_clflush_line_size > 64 ? 128 : 64; 547 #else 548 len = 64; 549 #endif 550 if (spg_len != -1) { 551 printf("Invalid hw.cxgbe.spg_len value (%d)," 552 " using %d instead.\n", spg_len, len); 553 } 554 spg_len = len; 555 } 556 557 if (cong_drop < -1 || cong_drop > 1) { 558 printf("Invalid hw.cxgbe.cong_drop value (%d)," 559 " using 0 instead.\n", cong_drop); 560 cong_drop = 0; 561 } 562 563 if (tscale != 1 && (tscale < 3 || tscale > 17)) { 564 printf("Invalid hw.cxgbe.tscale value (%d)," 565 " using 1 instead.\n", tscale); 566 tscale = 1; 567 } 568 569 if (largest_rx_cluster != MCLBYTES && 570 largest_rx_cluster != MJUMPAGESIZE && 571 largest_rx_cluster != MJUM9BYTES && 572 largest_rx_cluster != MJUM16BYTES) { 573 printf("Invalid hw.cxgbe.largest_rx_cluster value (%d)," 574 " using %d instead.\n", largest_rx_cluster, MJUM16BYTES); 575 largest_rx_cluster = MJUM16BYTES; 576 } 577 578 if (safest_rx_cluster != MCLBYTES && 579 safest_rx_cluster != MJUMPAGESIZE && 580 safest_rx_cluster != MJUM9BYTES && 581 safest_rx_cluster != MJUM16BYTES) { 582 printf("Invalid hw.cxgbe.safest_rx_cluster value (%d)," 583 " using %d instead.\n", safest_rx_cluster, MJUMPAGESIZE); 584 safest_rx_cluster = MJUMPAGESIZE; 585 } 586 587 extfree_refs = counter_u64_alloc(M_WAITOK); 588 extfree_rels = counter_u64_alloc(M_WAITOK); 589 pullups = counter_u64_alloc(M_WAITOK); 590 defrags = counter_u64_alloc(M_WAITOK); 591 counter_u64_zero(extfree_refs); 592 counter_u64_zero(extfree_rels); 593 counter_u64_zero(pullups); 594 counter_u64_zero(defrags); 595 596 t4_init_shared_cpl_handlers(); 597 t4_register_cpl_handler(CPL_FW4_MSG, handle_fw_msg); 598 t4_register_cpl_handler(CPL_FW6_MSG, handle_fw_msg); 599 t4_register_cpl_handler(CPL_SGE_EGR_UPDATE, handle_sge_egr_update); 600 #ifdef RATELIMIT 601 t4_register_shared_cpl_handler(CPL_FW4_ACK, ethofld_fw4_ack, 602 CPL_COOKIE_ETHOFLD); 603 #endif 604 t4_register_fw_msg_handler(FW6_TYPE_CMD_RPL, t4_handle_fw_rpl); 605 t4_register_fw_msg_handler(FW6_TYPE_WRERR_RPL, t4_handle_wrerr_rpl); 606 } 607 608 void 609 t4_sge_modunload(void) 610 { 611 612 counter_u64_free(extfree_refs); 613 counter_u64_free(extfree_rels); 614 counter_u64_free(pullups); 615 counter_u64_free(defrags); 616 } 617 618 uint64_t 619 t4_sge_extfree_refs(void) 620 { 621 uint64_t refs, rels; 622 623 rels = counter_u64_fetch(extfree_rels); 624 refs = counter_u64_fetch(extfree_refs); 625 626 return (refs - rels); 627 } 628 629 /* max 4096 */ 630 #define MAX_PACK_BOUNDARY 512 631 632 static inline void 633 setup_pad_and_pack_boundaries(struct adapter *sc) 634 { 635 uint32_t v, m; 636 int pad, pack, pad_shift; 637 638 pad_shift = chip_id(sc) > CHELSIO_T5 ? X_T6_INGPADBOUNDARY_SHIFT : 639 X_INGPADBOUNDARY_SHIFT; 640 pad = fl_pad; 641 if (fl_pad < (1 << pad_shift) || 642 fl_pad > (1 << (pad_shift + M_INGPADBOUNDARY)) || 643 !powerof2(fl_pad)) { 644 /* 645 * If there is any chance that we might use buffer packing and 646 * the chip is a T4, then pick 64 as the pad/pack boundary. Set 647 * it to the minimum allowed in all other cases. 648 */ 649 pad = is_t4(sc) && buffer_packing ? 64 : 1 << pad_shift; 650 651 /* 652 * For fl_pad = 0 we'll still write a reasonable value to the 653 * register but all the freelists will opt out of padding. 654 * We'll complain here only if the user tried to set it to a 655 * value greater than 0 that was invalid. 656 */ 657 if (fl_pad > 0) { 658 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pad value" 659 " (%d), using %d instead.\n", fl_pad, pad); 660 } 661 } 662 m = V_INGPADBOUNDARY(M_INGPADBOUNDARY); 663 v = V_INGPADBOUNDARY(ilog2(pad) - pad_shift); 664 t4_set_reg_field(sc, A_SGE_CONTROL, m, v); 665 666 if (is_t4(sc)) { 667 if (fl_pack != -1 && fl_pack != pad) { 668 /* Complain but carry on. */ 669 device_printf(sc->dev, "hw.cxgbe.fl_pack (%d) ignored," 670 " using %d instead.\n", fl_pack, pad); 671 } 672 return; 673 } 674 675 pack = fl_pack; 676 if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 || 677 !powerof2(fl_pack)) { 678 if (sc->params.pci.mps > MAX_PACK_BOUNDARY) 679 pack = MAX_PACK_BOUNDARY; 680 else 681 pack = max(sc->params.pci.mps, CACHE_LINE_SIZE); 682 MPASS(powerof2(pack)); 683 if (pack < 16) 684 pack = 16; 685 if (pack == 32) 686 pack = 64; 687 if (pack > 4096) 688 pack = 4096; 689 if (fl_pack != -1) { 690 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pack value" 691 " (%d), using %d instead.\n", fl_pack, pack); 692 } 693 } 694 m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); 695 if (pack == 16) 696 v = V_INGPACKBOUNDARY(0); 697 else 698 v = V_INGPACKBOUNDARY(ilog2(pack) - 5); 699 700 MPASS(!is_t4(sc)); /* T4 doesn't have SGE_CONTROL2 */ 701 t4_set_reg_field(sc, A_SGE_CONTROL2, m, v); 702 } 703 704 /* 705 * adap->params.vpd.cclk must be set up before this is called. 706 */ 707 void 708 t4_tweak_chip_settings(struct adapter *sc) 709 { 710 int i, reg; 711 uint32_t v, m; 712 int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200}; 713 int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk; 714 int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ 715 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 716 static int sw_buf_sizes[] = { 717 MCLBYTES, 718 MJUMPAGESIZE, 719 MJUM9BYTES, 720 MJUM16BYTES 721 }; 722 723 KASSERT(sc->flags & MASTER_PF, 724 ("%s: trying to change chip settings when not master.", __func__)); 725 726 m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE; 727 v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE | 728 V_EGRSTATUSPAGESIZE(spg_len == 128); 729 t4_set_reg_field(sc, A_SGE_CONTROL, m, v); 730 731 setup_pad_and_pack_boundaries(sc); 732 733 v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | 734 V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | 735 V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) | 736 V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) | 737 V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) | 738 V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) | 739 V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) | 740 V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10); 741 t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v); 742 743 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0, 4096); 744 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE1, 65536); 745 reg = A_SGE_FL_BUFFER_SIZE2; 746 for (i = 0; i < nitems(sw_buf_sizes); i++) { 747 MPASS(reg <= A_SGE_FL_BUFFER_SIZE15); 748 t4_write_reg(sc, reg, sw_buf_sizes[i]); 749 reg += 4; 750 MPASS(reg <= A_SGE_FL_BUFFER_SIZE15); 751 t4_write_reg(sc, reg, sw_buf_sizes[i] - CL_METADATA_SIZE); 752 reg += 4; 753 } 754 755 v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) | 756 V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]); 757 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v); 758 759 KASSERT(intr_timer[0] <= timer_max, 760 ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0], 761 timer_max)); 762 for (i = 1; i < nitems(intr_timer); i++) { 763 KASSERT(intr_timer[i] >= intr_timer[i - 1], 764 ("%s: timers not listed in increasing order (%d)", 765 __func__, i)); 766 767 while (intr_timer[i] > timer_max) { 768 if (i == nitems(intr_timer) - 1) { 769 intr_timer[i] = timer_max; 770 break; 771 } 772 intr_timer[i] += intr_timer[i - 1]; 773 intr_timer[i] /= 2; 774 } 775 } 776 777 v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) | 778 V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1])); 779 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v); 780 v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) | 781 V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3])); 782 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v); 783 v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) | 784 V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5])); 785 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v); 786 787 if (chip_id(sc) >= CHELSIO_T6) { 788 m = V_TSCALE(M_TSCALE); 789 if (tscale == 1) 790 v = 0; 791 else 792 v = V_TSCALE(tscale - 2); 793 t4_set_reg_field(sc, A_SGE_ITP_CONTROL, m, v); 794 795 if (sc->debug_flags & DF_DISABLE_TCB_CACHE) { 796 m = V_RDTHRESHOLD(M_RDTHRESHOLD) | F_WRTHRTHRESHEN | 797 V_WRTHRTHRESH(M_WRTHRTHRESH); 798 t4_tp_pio_read(sc, &v, 1, A_TP_CMM_CONFIG, 1); 799 v &= ~m; 800 v |= V_RDTHRESHOLD(1) | F_WRTHRTHRESHEN | 801 V_WRTHRTHRESH(16); 802 t4_tp_pio_write(sc, &v, 1, A_TP_CMM_CONFIG, 1); 803 } 804 } 805 806 /* 4K, 16K, 64K, 256K DDP "page sizes" for TDDP */ 807 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 808 t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v); 809 810 /* 811 * 4K, 8K, 16K, 64K DDP "page sizes" for iSCSI DDP. These have been 812 * chosen with MAXPHYS = 128K in mind. The largest DDP buffer that we 813 * may have to deal with is MAXPHYS + 1 page. 814 */ 815 v = V_HPZ0(0) | V_HPZ1(1) | V_HPZ2(2) | V_HPZ3(4); 816 t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, v); 817 818 /* We use multiple DDP page sizes both in plain-TOE and ISCSI modes. */ 819 m = v = F_TDDPTAGTCB | F_ISCSITAGTCB; 820 t4_set_reg_field(sc, A_ULP_RX_CTL, m, v); 821 822 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 823 F_RESETDDPOFFSET; 824 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 825 t4_set_reg_field(sc, A_TP_PARA_REG5, m, v); 826 } 827 828 /* 829 * SGE wants the buffer to be at least 64B and then a multiple of 16. Its 830 * address mut be 16B aligned. If padding is in use the buffer's start and end 831 * need to be aligned to the pad boundary as well. We'll just make sure that 832 * the size is a multiple of the pad boundary here, it is up to the buffer 833 * allocation code to make sure the start of the buffer is aligned. 834 */ 835 static inline int 836 hwsz_ok(struct adapter *sc, int hwsz) 837 { 838 int mask = fl_pad ? sc->params.sge.pad_boundary - 1 : 16 - 1; 839 840 return (hwsz >= 64 && (hwsz & mask) == 0); 841 } 842 843 /* 844 * Initialize the rx buffer sizes and figure out which zones the buffers will 845 * be allocated from. 846 */ 847 void 848 t4_init_rx_buf_info(struct adapter *sc) 849 { 850 struct sge *s = &sc->sge; 851 struct sge_params *sp = &sc->params.sge; 852 int i, j, n; 853 static int sw_buf_sizes[] = { /* Sorted by size */ 854 MCLBYTES, 855 MJUMPAGESIZE, 856 MJUM9BYTES, 857 MJUM16BYTES 858 }; 859 struct rx_buf_info *rxb; 860 861 s->safe_zidx = -1; 862 rxb = &s->rx_buf_info[0]; 863 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 864 rxb->size1 = sw_buf_sizes[i]; 865 rxb->zone = m_getzone(rxb->size1); 866 rxb->type = m_gettype(rxb->size1); 867 rxb->size2 = 0; 868 rxb->hwidx1 = -1; 869 rxb->hwidx2 = -1; 870 for (j = 0; j < SGE_FLBUF_SIZES; j++) { 871 int hwsize = sp->sge_fl_buffer_size[j]; 872 873 if (!hwsz_ok(sc, hwsize)) 874 continue; 875 876 /* hwidx for size1 */ 877 if (rxb->hwidx1 == -1 && rxb->size1 == hwsize) 878 rxb->hwidx1 = j; 879 880 /* hwidx for size2 (buffer packing) */ 881 if (rxb->size1 - CL_METADATA_SIZE < hwsize) 882 continue; 883 n = rxb->size1 - hwsize - CL_METADATA_SIZE; 884 if (n == 0) { 885 rxb->hwidx2 = j; 886 rxb->size2 = hwsize; 887 break; /* stop looking */ 888 } 889 if (rxb->hwidx2 != -1) { 890 if (n < sp->sge_fl_buffer_size[rxb->hwidx2] - 891 hwsize - CL_METADATA_SIZE) { 892 rxb->hwidx2 = j; 893 rxb->size2 = hwsize; 894 } 895 } else if (n <= 2 * CL_METADATA_SIZE) { 896 rxb->hwidx2 = j; 897 rxb->size2 = hwsize; 898 } 899 } 900 if (rxb->hwidx2 != -1) 901 sc->flags |= BUF_PACKING_OK; 902 if (s->safe_zidx == -1 && rxb->size1 == safest_rx_cluster) 903 s->safe_zidx = i; 904 } 905 } 906 907 /* 908 * Verify some basic SGE settings for the PF and VF driver, and other 909 * miscellaneous settings for the PF driver. 910 */ 911 int 912 t4_verify_chip_settings(struct adapter *sc) 913 { 914 struct sge_params *sp = &sc->params.sge; 915 uint32_t m, v, r; 916 int rc = 0; 917 const uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 918 919 m = F_RXPKTCPLMODE; 920 v = F_RXPKTCPLMODE; 921 r = sp->sge_control; 922 if ((r & m) != v) { 923 device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r); 924 rc = EINVAL; 925 } 926 927 /* 928 * If this changes then every single use of PAGE_SHIFT in the driver 929 * needs to be carefully reviewed for PAGE_SHIFT vs sp->page_shift. 930 */ 931 if (sp->page_shift != PAGE_SHIFT) { 932 device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r); 933 rc = EINVAL; 934 } 935 936 if (sc->flags & IS_VF) 937 return (0); 938 939 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 940 r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ); 941 if (r != v) { 942 device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r); 943 if (sc->vres.ddp.size != 0) 944 rc = EINVAL; 945 } 946 947 m = v = F_TDDPTAGTCB; 948 r = t4_read_reg(sc, A_ULP_RX_CTL); 949 if ((r & m) != v) { 950 device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r); 951 if (sc->vres.ddp.size != 0) 952 rc = EINVAL; 953 } 954 955 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 956 F_RESETDDPOFFSET; 957 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 958 r = t4_read_reg(sc, A_TP_PARA_REG5); 959 if ((r & m) != v) { 960 device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r); 961 if (sc->vres.ddp.size != 0) 962 rc = EINVAL; 963 } 964 965 return (rc); 966 } 967 968 int 969 t4_create_dma_tag(struct adapter *sc) 970 { 971 int rc; 972 973 rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, 974 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 975 BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, 976 NULL, &sc->dmat); 977 if (rc != 0) { 978 device_printf(sc->dev, 979 "failed to create main DMA tag: %d\n", rc); 980 } 981 982 return (rc); 983 } 984 985 void 986 t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 987 struct sysctl_oid_list *children) 988 { 989 struct sge_params *sp = &sc->params.sge; 990 991 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes", 992 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 993 sysctl_bufsizes, "A", "freelist buffer sizes"); 994 995 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD, 996 NULL, sp->fl_pktshift, "payload DMA offset in rx buffer (bytes)"); 997 998 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD, 999 NULL, sp->pad_boundary, "payload pad boundary (bytes)"); 1000 1001 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD, 1002 NULL, sp->spg_len, "status page size (bytes)"); 1003 1004 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD, 1005 NULL, cong_drop, "congestion drop setting"); 1006 1007 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD, 1008 NULL, sp->pack_boundary, "payload pack boundary (bytes)"); 1009 } 1010 1011 int 1012 t4_destroy_dma_tag(struct adapter *sc) 1013 { 1014 if (sc->dmat) 1015 bus_dma_tag_destroy(sc->dmat); 1016 1017 return (0); 1018 } 1019 1020 /* 1021 * Allocate and initialize the firmware event queue, control queues, and special 1022 * purpose rx queues owned by the adapter. 1023 * 1024 * Returns errno on failure. Resources allocated up to that point may still be 1025 * allocated. Caller is responsible for cleanup in case this function fails. 1026 */ 1027 int 1028 t4_setup_adapter_queues(struct adapter *sc) 1029 { 1030 int rc, i; 1031 1032 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 1033 1034 /* 1035 * Firmware event queue 1036 */ 1037 rc = alloc_fwq(sc); 1038 if (rc != 0) 1039 return (rc); 1040 1041 /* 1042 * That's all for the VF driver. 1043 */ 1044 if (sc->flags & IS_VF) 1045 return (rc); 1046 1047 /* 1048 * XXX: General purpose rx queues, one per port. 1049 */ 1050 1051 /* 1052 * Control queues, one per port. 1053 */ 1054 for_each_port(sc, i) { 1055 rc = alloc_ctrlq(sc, i); 1056 if (rc != 0) 1057 return (rc); 1058 } 1059 1060 return (rc); 1061 } 1062 1063 /* 1064 * Idempotent 1065 */ 1066 int 1067 t4_teardown_adapter_queues(struct adapter *sc) 1068 { 1069 int i; 1070 1071 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 1072 1073 if (sc->sge.ctrlq != NULL) { 1074 MPASS(!(sc->flags & IS_VF)); /* VFs don't allocate ctrlq. */ 1075 for_each_port(sc, i) 1076 free_ctrlq(sc, i); 1077 } 1078 free_fwq(sc); 1079 1080 return (0); 1081 } 1082 1083 /* Maximum payload that could arrive with a single iq descriptor. */ 1084 static inline int 1085 max_rx_payload(struct adapter *sc, struct ifnet *ifp, const bool ofld) 1086 { 1087 int maxp; 1088 1089 /* large enough even when hw VLAN extraction is disabled */ 1090 maxp = sc->params.sge.fl_pktshift + ETHER_HDR_LEN + 1091 ETHER_VLAN_ENCAP_LEN + ifp->if_mtu; 1092 if (ofld && sc->tt.tls && sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS && 1093 maxp < sc->params.tp.max_rx_pdu) 1094 maxp = sc->params.tp.max_rx_pdu; 1095 return (maxp); 1096 } 1097 1098 int 1099 t4_setup_vi_queues(struct vi_info *vi) 1100 { 1101 int rc = 0, i, intr_idx; 1102 struct sge_rxq *rxq; 1103 struct sge_txq *txq; 1104 #ifdef TCP_OFFLOAD 1105 struct sge_ofld_rxq *ofld_rxq; 1106 #endif 1107 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1108 struct sge_ofld_txq *ofld_txq; 1109 #endif 1110 #ifdef DEV_NETMAP 1111 int saved_idx, iqidx; 1112 struct sge_nm_rxq *nm_rxq; 1113 struct sge_nm_txq *nm_txq; 1114 #endif 1115 struct adapter *sc = vi->adapter; 1116 struct ifnet *ifp = vi->ifp; 1117 int maxp; 1118 1119 /* Interrupt vector to start from (when using multiple vectors) */ 1120 intr_idx = vi->first_intr; 1121 1122 #ifdef DEV_NETMAP 1123 saved_idx = intr_idx; 1124 if (ifp->if_capabilities & IFCAP_NETMAP) { 1125 1126 /* netmap is supported with direct interrupts only. */ 1127 MPASS(!forwarding_intr_to_fwq(sc)); 1128 MPASS(vi->first_intr >= 0); 1129 1130 /* 1131 * We don't have buffers to back the netmap rx queues 1132 * right now so we create the queues in a way that 1133 * doesn't set off any congestion signal in the chip. 1134 */ 1135 for_each_nm_rxq(vi, i, nm_rxq) { 1136 rc = alloc_nm_rxq(vi, nm_rxq, intr_idx, i); 1137 if (rc != 0) 1138 goto done; 1139 intr_idx++; 1140 } 1141 1142 for_each_nm_txq(vi, i, nm_txq) { 1143 iqidx = vi->first_nm_rxq + (i % vi->nnmrxq); 1144 rc = alloc_nm_txq(vi, nm_txq, iqidx, i); 1145 if (rc != 0) 1146 goto done; 1147 } 1148 } 1149 1150 /* Normal rx queues and netmap rx queues share the same interrupts. */ 1151 intr_idx = saved_idx; 1152 #endif 1153 1154 /* 1155 * Allocate rx queues first because a default iqid is required when 1156 * creating a tx queue. 1157 */ 1158 maxp = max_rx_payload(sc, ifp, false); 1159 for_each_rxq(vi, i, rxq) { 1160 rc = alloc_rxq(vi, rxq, i, intr_idx, maxp); 1161 if (rc != 0) 1162 goto done; 1163 if (!forwarding_intr_to_fwq(sc)) 1164 intr_idx++; 1165 } 1166 #ifdef DEV_NETMAP 1167 if (ifp->if_capabilities & IFCAP_NETMAP) 1168 intr_idx = saved_idx + max(vi->nrxq, vi->nnmrxq); 1169 #endif 1170 #ifdef TCP_OFFLOAD 1171 maxp = max_rx_payload(sc, ifp, true); 1172 for_each_ofld_rxq(vi, i, ofld_rxq) { 1173 rc = alloc_ofld_rxq(vi, ofld_rxq, i, intr_idx, maxp); 1174 if (rc != 0) 1175 goto done; 1176 if (!forwarding_intr_to_fwq(sc)) 1177 intr_idx++; 1178 } 1179 #endif 1180 1181 /* 1182 * Now the tx queues. 1183 */ 1184 for_each_txq(vi, i, txq) { 1185 rc = alloc_txq(vi, txq, i); 1186 if (rc != 0) 1187 goto done; 1188 } 1189 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1190 for_each_ofld_txq(vi, i, ofld_txq) { 1191 rc = alloc_ofld_txq(vi, ofld_txq, i); 1192 if (rc != 0) 1193 goto done; 1194 } 1195 #endif 1196 done: 1197 if (rc) 1198 t4_teardown_vi_queues(vi); 1199 1200 return (rc); 1201 } 1202 1203 /* 1204 * Idempotent 1205 */ 1206 int 1207 t4_teardown_vi_queues(struct vi_info *vi) 1208 { 1209 int i; 1210 struct sge_rxq *rxq; 1211 struct sge_txq *txq; 1212 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1213 struct sge_ofld_txq *ofld_txq; 1214 #endif 1215 #ifdef TCP_OFFLOAD 1216 struct sge_ofld_rxq *ofld_rxq; 1217 #endif 1218 #ifdef DEV_NETMAP 1219 struct sge_nm_rxq *nm_rxq; 1220 struct sge_nm_txq *nm_txq; 1221 #endif 1222 1223 #ifdef DEV_NETMAP 1224 if (vi->ifp->if_capabilities & IFCAP_NETMAP) { 1225 for_each_nm_txq(vi, i, nm_txq) { 1226 free_nm_txq(vi, nm_txq); 1227 } 1228 1229 for_each_nm_rxq(vi, i, nm_rxq) { 1230 free_nm_rxq(vi, nm_rxq); 1231 } 1232 } 1233 #endif 1234 1235 /* 1236 * Take down all the tx queues first, as they reference the rx queues 1237 * (for egress updates, etc.). 1238 */ 1239 1240 for_each_txq(vi, i, txq) { 1241 free_txq(vi, txq); 1242 } 1243 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1244 for_each_ofld_txq(vi, i, ofld_txq) { 1245 free_ofld_txq(vi, ofld_txq); 1246 } 1247 #endif 1248 1249 /* 1250 * Then take down the rx queues. 1251 */ 1252 1253 for_each_rxq(vi, i, rxq) { 1254 free_rxq(vi, rxq); 1255 } 1256 #ifdef TCP_OFFLOAD 1257 for_each_ofld_rxq(vi, i, ofld_rxq) { 1258 free_ofld_rxq(vi, ofld_rxq); 1259 } 1260 #endif 1261 1262 return (0); 1263 } 1264 1265 /* 1266 * Interrupt handler when the driver is using only 1 interrupt. This is a very 1267 * unusual scenario. 1268 * 1269 * a) Deals with errors, if any. 1270 * b) Services firmware event queue, which is taking interrupts for all other 1271 * queues. 1272 */ 1273 void 1274 t4_intr_all(void *arg) 1275 { 1276 struct adapter *sc = arg; 1277 struct sge_iq *fwq = &sc->sge.fwq; 1278 1279 MPASS(sc->intr_count == 1); 1280 1281 if (sc->intr_type == INTR_INTX) 1282 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); 1283 1284 t4_intr_err(arg); 1285 t4_intr_evt(fwq); 1286 } 1287 1288 /* 1289 * Interrupt handler for errors (installed directly when multiple interrupts are 1290 * being used, or called by t4_intr_all). 1291 */ 1292 void 1293 t4_intr_err(void *arg) 1294 { 1295 struct adapter *sc = arg; 1296 uint32_t v; 1297 const bool verbose = (sc->debug_flags & DF_VERBOSE_SLOWINTR) != 0; 1298 1299 if (atomic_load_int(&sc->error_flags) & ADAP_FATAL_ERR) 1300 return; 1301 1302 v = t4_read_reg(sc, MYPF_REG(A_PL_PF_INT_CAUSE)); 1303 if (v & F_PFSW) { 1304 sc->swintr++; 1305 t4_write_reg(sc, MYPF_REG(A_PL_PF_INT_CAUSE), v); 1306 } 1307 1308 if (t4_slow_intr_handler(sc, verbose)) 1309 t4_fatal_err(sc, false); 1310 } 1311 1312 /* 1313 * Interrupt handler for iq-only queues. The firmware event queue is the only 1314 * such queue right now. 1315 */ 1316 void 1317 t4_intr_evt(void *arg) 1318 { 1319 struct sge_iq *iq = arg; 1320 1321 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1322 service_iq(iq, 0); 1323 (void) atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1324 } 1325 } 1326 1327 /* 1328 * Interrupt handler for iq+fl queues. 1329 */ 1330 void 1331 t4_intr(void *arg) 1332 { 1333 struct sge_iq *iq = arg; 1334 1335 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1336 service_iq_fl(iq, 0); 1337 (void) atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1338 } 1339 } 1340 1341 #ifdef DEV_NETMAP 1342 /* 1343 * Interrupt handler for netmap rx queues. 1344 */ 1345 void 1346 t4_nm_intr(void *arg) 1347 { 1348 struct sge_nm_rxq *nm_rxq = arg; 1349 1350 if (atomic_cmpset_int(&nm_rxq->nm_state, NM_ON, NM_BUSY)) { 1351 service_nm_rxq(nm_rxq); 1352 (void) atomic_cmpset_int(&nm_rxq->nm_state, NM_BUSY, NM_ON); 1353 } 1354 } 1355 1356 /* 1357 * Interrupt handler for vectors shared between NIC and netmap rx queues. 1358 */ 1359 void 1360 t4_vi_intr(void *arg) 1361 { 1362 struct irq *irq = arg; 1363 1364 MPASS(irq->nm_rxq != NULL); 1365 t4_nm_intr(irq->nm_rxq); 1366 1367 MPASS(irq->rxq != NULL); 1368 t4_intr(irq->rxq); 1369 } 1370 #endif 1371 1372 /* 1373 * Deals with interrupts on an iq-only (no freelist) queue. 1374 */ 1375 static int 1376 service_iq(struct sge_iq *iq, int budget) 1377 { 1378 struct sge_iq *q; 1379 struct adapter *sc = iq->adapter; 1380 struct iq_desc *d = &iq->desc[iq->cidx]; 1381 int ndescs = 0, limit; 1382 int rsp_type; 1383 uint32_t lq; 1384 STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql); 1385 1386 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); 1387 KASSERT((iq->flags & IQ_HAS_FL) == 0, 1388 ("%s: called for iq %p with fl (iq->flags 0x%x)", __func__, iq, 1389 iq->flags)); 1390 MPASS((iq->flags & IQ_ADJ_CREDIT) == 0); 1391 MPASS((iq->flags & IQ_LRO_ENABLED) == 0); 1392 1393 limit = budget ? budget : iq->qsize / 16; 1394 1395 /* 1396 * We always come back and check the descriptor ring for new indirect 1397 * interrupts and other responses after running a single handler. 1398 */ 1399 for (;;) { 1400 while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { 1401 1402 rmb(); 1403 1404 rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); 1405 lq = be32toh(d->rsp.pldbuflen_qid); 1406 1407 switch (rsp_type) { 1408 case X_RSPD_TYPE_FLBUF: 1409 panic("%s: data for an iq (%p) with no freelist", 1410 __func__, iq); 1411 1412 /* NOTREACHED */ 1413 1414 case X_RSPD_TYPE_CPL: 1415 KASSERT(d->rss.opcode < NUM_CPL_CMDS, 1416 ("%s: bad opcode %02x.", __func__, 1417 d->rss.opcode)); 1418 t4_cpl_handler[d->rss.opcode](iq, &d->rss, NULL); 1419 break; 1420 1421 case X_RSPD_TYPE_INTR: 1422 /* 1423 * There are 1K interrupt-capable queues (qids 0 1424 * through 1023). A response type indicating a 1425 * forwarded interrupt with a qid >= 1K is an 1426 * iWARP async notification. 1427 */ 1428 if (__predict_true(lq >= 1024)) { 1429 t4_an_handler(iq, &d->rsp); 1430 break; 1431 } 1432 1433 q = sc->sge.iqmap[lq - sc->sge.iq_start - 1434 sc->sge.iq_base]; 1435 if (atomic_cmpset_int(&q->state, IQS_IDLE, 1436 IQS_BUSY)) { 1437 if (service_iq_fl(q, q->qsize / 16) == 0) { 1438 (void) atomic_cmpset_int(&q->state, 1439 IQS_BUSY, IQS_IDLE); 1440 } else { 1441 STAILQ_INSERT_TAIL(&iql, q, 1442 link); 1443 } 1444 } 1445 break; 1446 1447 default: 1448 KASSERT(0, 1449 ("%s: illegal response type %d on iq %p", 1450 __func__, rsp_type, iq)); 1451 log(LOG_ERR, 1452 "%s: illegal response type %d on iq %p", 1453 device_get_nameunit(sc->dev), rsp_type, iq); 1454 break; 1455 } 1456 1457 d++; 1458 if (__predict_false(++iq->cidx == iq->sidx)) { 1459 iq->cidx = 0; 1460 iq->gen ^= F_RSPD_GEN; 1461 d = &iq->desc[0]; 1462 } 1463 if (__predict_false(++ndescs == limit)) { 1464 t4_write_reg(sc, sc->sge_gts_reg, 1465 V_CIDXINC(ndescs) | 1466 V_INGRESSQID(iq->cntxt_id) | 1467 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 1468 ndescs = 0; 1469 1470 if (budget) { 1471 return (EINPROGRESS); 1472 } 1473 } 1474 } 1475 1476 if (STAILQ_EMPTY(&iql)) 1477 break; 1478 1479 /* 1480 * Process the head only, and send it to the back of the list if 1481 * it's still not done. 1482 */ 1483 q = STAILQ_FIRST(&iql); 1484 STAILQ_REMOVE_HEAD(&iql, link); 1485 if (service_iq_fl(q, q->qsize / 8) == 0) 1486 (void) atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE); 1487 else 1488 STAILQ_INSERT_TAIL(&iql, q, link); 1489 } 1490 1491 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1492 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 1493 1494 return (0); 1495 } 1496 1497 #if defined(INET) || defined(INET6) 1498 static inline int 1499 sort_before_lro(struct lro_ctrl *lro) 1500 { 1501 1502 return (lro->lro_mbuf_max != 0); 1503 } 1504 #endif 1505 1506 static inline uint64_t 1507 last_flit_to_ns(struct adapter *sc, uint64_t lf) 1508 { 1509 uint64_t n = be64toh(lf) & 0xfffffffffffffff; /* 60b, not 64b. */ 1510 1511 if (n > UINT64_MAX / 1000000) 1512 return (n / sc->params.vpd.cclk * 1000000); 1513 else 1514 return (n * 1000000 / sc->params.vpd.cclk); 1515 } 1516 1517 static inline void 1518 move_to_next_rxbuf(struct sge_fl *fl) 1519 { 1520 1521 fl->rx_offset = 0; 1522 if (__predict_false((++fl->cidx & 7) == 0)) { 1523 uint16_t cidx = fl->cidx >> 3; 1524 1525 if (__predict_false(cidx == fl->sidx)) 1526 fl->cidx = cidx = 0; 1527 fl->hw_cidx = cidx; 1528 } 1529 } 1530 1531 /* 1532 * Deals with interrupts on an iq+fl queue. 1533 */ 1534 static int 1535 service_iq_fl(struct sge_iq *iq, int budget) 1536 { 1537 struct sge_rxq *rxq = iq_to_rxq(iq); 1538 struct sge_fl *fl; 1539 struct adapter *sc = iq->adapter; 1540 struct iq_desc *d = &iq->desc[iq->cidx]; 1541 int ndescs, limit; 1542 int rsp_type, starved; 1543 uint32_t lq; 1544 uint16_t fl_hw_cidx; 1545 struct mbuf *m0; 1546 #if defined(INET) || defined(INET6) 1547 const struct timeval lro_timeout = {0, sc->lro_timeout}; 1548 struct lro_ctrl *lro = &rxq->lro; 1549 #endif 1550 1551 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); 1552 MPASS(iq->flags & IQ_HAS_FL); 1553 1554 ndescs = 0; 1555 #if defined(INET) || defined(INET6) 1556 if (iq->flags & IQ_ADJ_CREDIT) { 1557 MPASS(sort_before_lro(lro)); 1558 iq->flags &= ~IQ_ADJ_CREDIT; 1559 if ((d->rsp.u.type_gen & F_RSPD_GEN) != iq->gen) { 1560 tcp_lro_flush_all(lro); 1561 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(1) | 1562 V_INGRESSQID((u32)iq->cntxt_id) | 1563 V_SEINTARM(iq->intr_params)); 1564 return (0); 1565 } 1566 ndescs = 1; 1567 } 1568 #else 1569 MPASS((iq->flags & IQ_ADJ_CREDIT) == 0); 1570 #endif 1571 1572 limit = budget ? budget : iq->qsize / 16; 1573 fl = &rxq->fl; 1574 fl_hw_cidx = fl->hw_cidx; /* stable snapshot */ 1575 while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { 1576 1577 rmb(); 1578 1579 m0 = NULL; 1580 rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); 1581 lq = be32toh(d->rsp.pldbuflen_qid); 1582 1583 switch (rsp_type) { 1584 case X_RSPD_TYPE_FLBUF: 1585 if (lq & F_RSPD_NEWBUF) { 1586 if (fl->rx_offset > 0) 1587 move_to_next_rxbuf(fl); 1588 lq = G_RSPD_LEN(lq); 1589 } 1590 if (IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 4) { 1591 FL_LOCK(fl); 1592 refill_fl(sc, fl, 64); 1593 FL_UNLOCK(fl); 1594 fl_hw_cidx = fl->hw_cidx; 1595 } 1596 1597 if (d->rss.opcode == CPL_RX_PKT) { 1598 if (__predict_true(eth_rx(sc, rxq, d, lq) == 0)) 1599 break; 1600 goto out; 1601 } 1602 m0 = get_fl_payload(sc, fl, lq); 1603 if (__predict_false(m0 == NULL)) 1604 goto out; 1605 1606 /* fall through */ 1607 1608 case X_RSPD_TYPE_CPL: 1609 KASSERT(d->rss.opcode < NUM_CPL_CMDS, 1610 ("%s: bad opcode %02x.", __func__, d->rss.opcode)); 1611 t4_cpl_handler[d->rss.opcode](iq, &d->rss, m0); 1612 break; 1613 1614 case X_RSPD_TYPE_INTR: 1615 1616 /* 1617 * There are 1K interrupt-capable queues (qids 0 1618 * through 1023). A response type indicating a 1619 * forwarded interrupt with a qid >= 1K is an 1620 * iWARP async notification. That is the only 1621 * acceptable indirect interrupt on this queue. 1622 */ 1623 if (__predict_false(lq < 1024)) { 1624 panic("%s: indirect interrupt on iq_fl %p " 1625 "with qid %u", __func__, iq, lq); 1626 } 1627 1628 t4_an_handler(iq, &d->rsp); 1629 break; 1630 1631 default: 1632 KASSERT(0, ("%s: illegal response type %d on iq %p", 1633 __func__, rsp_type, iq)); 1634 log(LOG_ERR, "%s: illegal response type %d on iq %p", 1635 device_get_nameunit(sc->dev), rsp_type, iq); 1636 break; 1637 } 1638 1639 d++; 1640 if (__predict_false(++iq->cidx == iq->sidx)) { 1641 iq->cidx = 0; 1642 iq->gen ^= F_RSPD_GEN; 1643 d = &iq->desc[0]; 1644 } 1645 if (__predict_false(++ndescs == limit)) { 1646 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1647 V_INGRESSQID(iq->cntxt_id) | 1648 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 1649 1650 #if defined(INET) || defined(INET6) 1651 if (iq->flags & IQ_LRO_ENABLED && 1652 !sort_before_lro(lro) && 1653 sc->lro_timeout != 0) { 1654 tcp_lro_flush_inactive(lro, &lro_timeout); 1655 } 1656 #endif 1657 if (budget) 1658 return (EINPROGRESS); 1659 ndescs = 0; 1660 } 1661 } 1662 out: 1663 #if defined(INET) || defined(INET6) 1664 if (iq->flags & IQ_LRO_ENABLED) { 1665 if (ndescs > 0 && lro->lro_mbuf_count > 8) { 1666 MPASS(sort_before_lro(lro)); 1667 /* hold back one credit and don't flush LRO state */ 1668 iq->flags |= IQ_ADJ_CREDIT; 1669 ndescs--; 1670 } else { 1671 tcp_lro_flush_all(lro); 1672 } 1673 } 1674 #endif 1675 1676 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1677 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 1678 1679 FL_LOCK(fl); 1680 starved = refill_fl(sc, fl, 64); 1681 FL_UNLOCK(fl); 1682 if (__predict_false(starved != 0)) 1683 add_fl_to_sfl(sc, fl); 1684 1685 return (0); 1686 } 1687 1688 static inline struct cluster_metadata * 1689 cl_metadata(struct fl_sdesc *sd) 1690 { 1691 1692 return ((void *)(sd->cl + sd->moff)); 1693 } 1694 1695 static void 1696 rxb_free(struct mbuf *m) 1697 { 1698 struct cluster_metadata *clm = m->m_ext.ext_arg1; 1699 1700 uma_zfree(clm->zone, clm->cl); 1701 counter_u64_add(extfree_rels, 1); 1702 } 1703 1704 /* 1705 * The mbuf returned comes from zone_muf and carries the payload in one of these 1706 * ways 1707 * a) complete frame inside the mbuf 1708 * b) m_cljset (for clusters without metadata) 1709 * d) m_extaddref (cluster with metadata) 1710 */ 1711 static struct mbuf * 1712 get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset, 1713 int remaining) 1714 { 1715 struct mbuf *m; 1716 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1717 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx]; 1718 struct cluster_metadata *clm; 1719 int len, blen; 1720 caddr_t payload; 1721 1722 if (fl->flags & FL_BUF_PACKING) { 1723 u_int l, pad; 1724 1725 blen = rxb->size2 - fl->rx_offset; /* max possible in this buf */ 1726 len = min(remaining, blen); 1727 payload = sd->cl + fl->rx_offset; 1728 1729 l = fr_offset + len; 1730 pad = roundup2(l, fl->buf_boundary) - l; 1731 if (fl->rx_offset + len + pad < rxb->size2) 1732 blen = len + pad; 1733 MPASS(fl->rx_offset + blen <= rxb->size2); 1734 } else { 1735 MPASS(fl->rx_offset == 0); /* not packing */ 1736 blen = rxb->size1; 1737 len = min(remaining, blen); 1738 payload = sd->cl; 1739 } 1740 1741 if (fr_offset == 0) { 1742 m = m_gethdr(M_NOWAIT, MT_DATA); 1743 if (__predict_false(m == NULL)) 1744 return (NULL); 1745 m->m_pkthdr.len = remaining; 1746 } else { 1747 m = m_get(M_NOWAIT, MT_DATA); 1748 if (__predict_false(m == NULL)) 1749 return (NULL); 1750 } 1751 m->m_len = len; 1752 kmsan_mark(payload, len, KMSAN_STATE_INITED); 1753 1754 if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) { 1755 /* copy data to mbuf */ 1756 bcopy(payload, mtod(m, caddr_t), len); 1757 if (fl->flags & FL_BUF_PACKING) { 1758 fl->rx_offset += blen; 1759 MPASS(fl->rx_offset <= rxb->size2); 1760 if (fl->rx_offset < rxb->size2) 1761 return (m); /* without advancing the cidx */ 1762 } 1763 } else if (fl->flags & FL_BUF_PACKING) { 1764 clm = cl_metadata(sd); 1765 if (sd->nmbuf++ == 0) { 1766 clm->refcount = 1; 1767 clm->zone = rxb->zone; 1768 clm->cl = sd->cl; 1769 counter_u64_add(extfree_refs, 1); 1770 } 1771 m_extaddref(m, payload, blen, &clm->refcount, rxb_free, clm, 1772 NULL); 1773 1774 fl->rx_offset += blen; 1775 MPASS(fl->rx_offset <= rxb->size2); 1776 if (fl->rx_offset < rxb->size2) 1777 return (m); /* without advancing the cidx */ 1778 } else { 1779 m_cljset(m, sd->cl, rxb->type); 1780 sd->cl = NULL; /* consumed, not a recycle candidate */ 1781 } 1782 1783 move_to_next_rxbuf(fl); 1784 1785 return (m); 1786 } 1787 1788 static struct mbuf * 1789 get_fl_payload(struct adapter *sc, struct sge_fl *fl, const u_int plen) 1790 { 1791 struct mbuf *m0, *m, **pnext; 1792 u_int remaining; 1793 1794 if (__predict_false(fl->flags & FL_BUF_RESUME)) { 1795 M_ASSERTPKTHDR(fl->m0); 1796 MPASS(fl->m0->m_pkthdr.len == plen); 1797 MPASS(fl->remaining < plen); 1798 1799 m0 = fl->m0; 1800 pnext = fl->pnext; 1801 remaining = fl->remaining; 1802 fl->flags &= ~FL_BUF_RESUME; 1803 goto get_segment; 1804 } 1805 1806 /* 1807 * Payload starts at rx_offset in the current hw buffer. Its length is 1808 * 'len' and it may span multiple hw buffers. 1809 */ 1810 1811 m0 = get_scatter_segment(sc, fl, 0, plen); 1812 if (m0 == NULL) 1813 return (NULL); 1814 remaining = plen - m0->m_len; 1815 pnext = &m0->m_next; 1816 while (remaining > 0) { 1817 get_segment: 1818 MPASS(fl->rx_offset == 0); 1819 m = get_scatter_segment(sc, fl, plen - remaining, remaining); 1820 if (__predict_false(m == NULL)) { 1821 fl->m0 = m0; 1822 fl->pnext = pnext; 1823 fl->remaining = remaining; 1824 fl->flags |= FL_BUF_RESUME; 1825 return (NULL); 1826 } 1827 *pnext = m; 1828 pnext = &m->m_next; 1829 remaining -= m->m_len; 1830 } 1831 *pnext = NULL; 1832 1833 M_ASSERTPKTHDR(m0); 1834 return (m0); 1835 } 1836 1837 static int 1838 skip_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset, 1839 int remaining) 1840 { 1841 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1842 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx]; 1843 int len, blen; 1844 1845 if (fl->flags & FL_BUF_PACKING) { 1846 u_int l, pad; 1847 1848 blen = rxb->size2 - fl->rx_offset; /* max possible in this buf */ 1849 len = min(remaining, blen); 1850 1851 l = fr_offset + len; 1852 pad = roundup2(l, fl->buf_boundary) - l; 1853 if (fl->rx_offset + len + pad < rxb->size2) 1854 blen = len + pad; 1855 fl->rx_offset += blen; 1856 MPASS(fl->rx_offset <= rxb->size2); 1857 if (fl->rx_offset < rxb->size2) 1858 return (len); /* without advancing the cidx */ 1859 } else { 1860 MPASS(fl->rx_offset == 0); /* not packing */ 1861 blen = rxb->size1; 1862 len = min(remaining, blen); 1863 } 1864 move_to_next_rxbuf(fl); 1865 return (len); 1866 } 1867 1868 static inline void 1869 skip_fl_payload(struct adapter *sc, struct sge_fl *fl, int plen) 1870 { 1871 int remaining, fr_offset, len; 1872 1873 fr_offset = 0; 1874 remaining = plen; 1875 while (remaining > 0) { 1876 len = skip_scatter_segment(sc, fl, fr_offset, remaining); 1877 fr_offset += len; 1878 remaining -= len; 1879 } 1880 } 1881 1882 static inline int 1883 get_segment_len(struct adapter *sc, struct sge_fl *fl, int plen) 1884 { 1885 int len; 1886 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1887 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx]; 1888 1889 if (fl->flags & FL_BUF_PACKING) 1890 len = rxb->size2 - fl->rx_offset; 1891 else 1892 len = rxb->size1; 1893 1894 return (min(plen, len)); 1895 } 1896 1897 static int 1898 eth_rx(struct adapter *sc, struct sge_rxq *rxq, const struct iq_desc *d, 1899 u_int plen) 1900 { 1901 struct mbuf *m0; 1902 struct ifnet *ifp = rxq->ifp; 1903 struct sge_fl *fl = &rxq->fl; 1904 struct vi_info *vi = ifp->if_softc; 1905 const struct cpl_rx_pkt *cpl; 1906 #if defined(INET) || defined(INET6) 1907 struct lro_ctrl *lro = &rxq->lro; 1908 #endif 1909 uint16_t err_vec, tnl_type, tnlhdr_len; 1910 static const int sw_hashtype[4][2] = { 1911 {M_HASHTYPE_NONE, M_HASHTYPE_NONE}, 1912 {M_HASHTYPE_RSS_IPV4, M_HASHTYPE_RSS_IPV6}, 1913 {M_HASHTYPE_RSS_TCP_IPV4, M_HASHTYPE_RSS_TCP_IPV6}, 1914 {M_HASHTYPE_RSS_UDP_IPV4, M_HASHTYPE_RSS_UDP_IPV6}, 1915 }; 1916 static const int sw_csum_flags[2][2] = { 1917 { 1918 /* IP, inner IP */ 1919 CSUM_ENCAP_VXLAN | 1920 CSUM_L3_CALC | CSUM_L3_VALID | 1921 CSUM_L4_CALC | CSUM_L4_VALID | 1922 CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID | 1923 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1924 1925 /* IP, inner IP6 */ 1926 CSUM_ENCAP_VXLAN | 1927 CSUM_L3_CALC | CSUM_L3_VALID | 1928 CSUM_L4_CALC | CSUM_L4_VALID | 1929 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1930 }, 1931 { 1932 /* IP6, inner IP */ 1933 CSUM_ENCAP_VXLAN | 1934 CSUM_L4_CALC | CSUM_L4_VALID | 1935 CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID | 1936 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1937 1938 /* IP6, inner IP6 */ 1939 CSUM_ENCAP_VXLAN | 1940 CSUM_L4_CALC | CSUM_L4_VALID | 1941 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1942 }, 1943 }; 1944 1945 MPASS(plen > sc->params.sge.fl_pktshift); 1946 if (vi->pfil != NULL && PFIL_HOOKED_IN(vi->pfil) && 1947 __predict_true((fl->flags & FL_BUF_RESUME) == 0)) { 1948 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1949 caddr_t frame; 1950 int rc, slen; 1951 1952 slen = get_segment_len(sc, fl, plen) - 1953 sc->params.sge.fl_pktshift; 1954 frame = sd->cl + fl->rx_offset + sc->params.sge.fl_pktshift; 1955 CURVNET_SET_QUIET(ifp->if_vnet); 1956 rc = pfil_run_hooks(vi->pfil, frame, ifp, 1957 slen | PFIL_MEMPTR | PFIL_IN, NULL); 1958 CURVNET_RESTORE(); 1959 if (rc == PFIL_DROPPED || rc == PFIL_CONSUMED) { 1960 skip_fl_payload(sc, fl, plen); 1961 return (0); 1962 } 1963 if (rc == PFIL_REALLOCED) { 1964 skip_fl_payload(sc, fl, plen); 1965 m0 = pfil_mem2mbuf(frame); 1966 goto have_mbuf; 1967 } 1968 } 1969 1970 m0 = get_fl_payload(sc, fl, plen); 1971 if (__predict_false(m0 == NULL)) 1972 return (ENOMEM); 1973 1974 m0->m_pkthdr.len -= sc->params.sge.fl_pktshift; 1975 m0->m_len -= sc->params.sge.fl_pktshift; 1976 m0->m_data += sc->params.sge.fl_pktshift; 1977 1978 have_mbuf: 1979 m0->m_pkthdr.rcvif = ifp; 1980 M_HASHTYPE_SET(m0, sw_hashtype[d->rss.hash_type][d->rss.ipv6]); 1981 m0->m_pkthdr.flowid = be32toh(d->rss.hash_val); 1982 1983 cpl = (const void *)(&d->rss + 1); 1984 if (sc->params.tp.rx_pkt_encap) { 1985 const uint16_t ev = be16toh(cpl->err_vec); 1986 1987 err_vec = G_T6_COMPR_RXERR_VEC(ev); 1988 tnl_type = G_T6_RX_TNL_TYPE(ev); 1989 tnlhdr_len = G_T6_RX_TNLHDR_LEN(ev); 1990 } else { 1991 err_vec = be16toh(cpl->err_vec); 1992 tnl_type = 0; 1993 tnlhdr_len = 0; 1994 } 1995 if (cpl->csum_calc && err_vec == 0) { 1996 int ipv6 = !!(cpl->l2info & htobe32(F_RXF_IP6)); 1997 1998 /* checksum(s) calculated and found to be correct. */ 1999 2000 MPASS((cpl->l2info & htobe32(F_RXF_IP)) ^ 2001 (cpl->l2info & htobe32(F_RXF_IP6))); 2002 m0->m_pkthdr.csum_data = be16toh(cpl->csum); 2003 if (tnl_type == 0) { 2004 if (!ipv6 && ifp->if_capenable & IFCAP_RXCSUM) { 2005 m0->m_pkthdr.csum_flags = CSUM_L3_CALC | 2006 CSUM_L3_VALID | CSUM_L4_CALC | 2007 CSUM_L4_VALID; 2008 } else if (ipv6 && ifp->if_capenable & IFCAP_RXCSUM_IPV6) { 2009 m0->m_pkthdr.csum_flags = CSUM_L4_CALC | 2010 CSUM_L4_VALID; 2011 } 2012 rxq->rxcsum++; 2013 } else { 2014 MPASS(tnl_type == RX_PKT_TNL_TYPE_VXLAN); 2015 2016 M_HASHTYPE_SETINNER(m0); 2017 if (__predict_false(cpl->ip_frag)) { 2018 /* 2019 * csum_data is for the inner frame (which is an 2020 * IP fragment) and is not 0xffff. There is no 2021 * way to pass the inner csum_data to the stack. 2022 * We don't want the stack to use the inner 2023 * csum_data to validate the outer frame or it 2024 * will get rejected. So we fix csum_data here 2025 * and let sw do the checksum of inner IP 2026 * fragments. 2027 * 2028 * XXX: Need 32b for csum_data2 in an rx mbuf. 2029 * Maybe stuff it into rcv_tstmp? 2030 */ 2031 m0->m_pkthdr.csum_data = 0xffff; 2032 if (ipv6) { 2033 m0->m_pkthdr.csum_flags = CSUM_L4_CALC | 2034 CSUM_L4_VALID; 2035 } else { 2036 m0->m_pkthdr.csum_flags = CSUM_L3_CALC | 2037 CSUM_L3_VALID | CSUM_L4_CALC | 2038 CSUM_L4_VALID; 2039 } 2040 } else { 2041 int outer_ipv6; 2042 2043 MPASS(m0->m_pkthdr.csum_data == 0xffff); 2044 2045 outer_ipv6 = tnlhdr_len >= 2046 sizeof(struct ether_header) + 2047 sizeof(struct ip6_hdr); 2048 m0->m_pkthdr.csum_flags = 2049 sw_csum_flags[outer_ipv6][ipv6]; 2050 } 2051 rxq->vxlan_rxcsum++; 2052 } 2053 } 2054 2055 if (cpl->vlan_ex) { 2056 m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); 2057 m0->m_flags |= M_VLANTAG; 2058 rxq->vlan_extraction++; 2059 } 2060 2061 if (rxq->iq.flags & IQ_RX_TIMESTAMP) { 2062 /* 2063 * Fill up rcv_tstmp but do not set M_TSTMP. 2064 * rcv_tstmp is not in the format that the 2065 * kernel expects and we don't want to mislead 2066 * it. For now this is only for custom code 2067 * that knows how to interpret cxgbe's stamp. 2068 */ 2069 m0->m_pkthdr.rcv_tstmp = 2070 last_flit_to_ns(sc, d->rsp.u.last_flit); 2071 #ifdef notyet 2072 m0->m_flags |= M_TSTMP; 2073 #endif 2074 } 2075 2076 #ifdef NUMA 2077 m0->m_pkthdr.numa_domain = ifp->if_numa_domain; 2078 #endif 2079 #if defined(INET) || defined(INET6) 2080 if (rxq->iq.flags & IQ_LRO_ENABLED && tnl_type == 0 && 2081 (M_HASHTYPE_GET(m0) == M_HASHTYPE_RSS_TCP_IPV4 || 2082 M_HASHTYPE_GET(m0) == M_HASHTYPE_RSS_TCP_IPV6)) { 2083 if (sort_before_lro(lro)) { 2084 tcp_lro_queue_mbuf(lro, m0); 2085 return (0); /* queued for sort, then LRO */ 2086 } 2087 if (tcp_lro_rx(lro, m0, 0) == 0) 2088 return (0); /* queued for LRO */ 2089 } 2090 #endif 2091 ifp->if_input(ifp, m0); 2092 2093 return (0); 2094 } 2095 2096 /* 2097 * Must drain the wrq or make sure that someone else will. 2098 */ 2099 static void 2100 wrq_tx_drain(void *arg, int n) 2101 { 2102 struct sge_wrq *wrq = arg; 2103 struct sge_eq *eq = &wrq->eq; 2104 2105 EQ_LOCK(eq); 2106 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 2107 drain_wrq_wr_list(wrq->adapter, wrq); 2108 EQ_UNLOCK(eq); 2109 } 2110 2111 static void 2112 drain_wrq_wr_list(struct adapter *sc, struct sge_wrq *wrq) 2113 { 2114 struct sge_eq *eq = &wrq->eq; 2115 u_int available, dbdiff; /* # of hardware descriptors */ 2116 u_int n; 2117 struct wrqe *wr; 2118 struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ 2119 2120 EQ_LOCK_ASSERT_OWNED(eq); 2121 MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs)); 2122 wr = STAILQ_FIRST(&wrq->wr_list); 2123 MPASS(wr != NULL); /* Must be called with something useful to do */ 2124 MPASS(eq->pidx == eq->dbidx); 2125 dbdiff = 0; 2126 2127 do { 2128 eq->cidx = read_hw_cidx(eq); 2129 if (eq->pidx == eq->cidx) 2130 available = eq->sidx - 1; 2131 else 2132 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2133 2134 MPASS(wr->wrq == wrq); 2135 n = howmany(wr->wr_len, EQ_ESIZE); 2136 if (available < n) 2137 break; 2138 2139 dst = (void *)&eq->desc[eq->pidx]; 2140 if (__predict_true(eq->sidx - eq->pidx > n)) { 2141 /* Won't wrap, won't end exactly at the status page. */ 2142 bcopy(&wr->wr[0], dst, wr->wr_len); 2143 eq->pidx += n; 2144 } else { 2145 int first_portion = (eq->sidx - eq->pidx) * EQ_ESIZE; 2146 2147 bcopy(&wr->wr[0], dst, first_portion); 2148 if (wr->wr_len > first_portion) { 2149 bcopy(&wr->wr[first_portion], &eq->desc[0], 2150 wr->wr_len - first_portion); 2151 } 2152 eq->pidx = n - (eq->sidx - eq->pidx); 2153 } 2154 wrq->tx_wrs_copied++; 2155 2156 if (available < eq->sidx / 4 && 2157 atomic_cmpset_int(&eq->equiq, 0, 1)) { 2158 /* 2159 * XXX: This is not 100% reliable with some 2160 * types of WRs. But this is a very unusual 2161 * situation for an ofld/ctrl queue anyway. 2162 */ 2163 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 2164 F_FW_WR_EQUEQ); 2165 } 2166 2167 dbdiff += n; 2168 if (dbdiff >= 16) { 2169 ring_eq_db(sc, eq, dbdiff); 2170 dbdiff = 0; 2171 } 2172 2173 STAILQ_REMOVE_HEAD(&wrq->wr_list, link); 2174 free_wrqe(wr); 2175 MPASS(wrq->nwr_pending > 0); 2176 wrq->nwr_pending--; 2177 MPASS(wrq->ndesc_needed >= n); 2178 wrq->ndesc_needed -= n; 2179 } while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL); 2180 2181 if (dbdiff) 2182 ring_eq_db(sc, eq, dbdiff); 2183 } 2184 2185 /* 2186 * Doesn't fail. Holds on to work requests it can't send right away. 2187 */ 2188 void 2189 t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr) 2190 { 2191 #ifdef INVARIANTS 2192 struct sge_eq *eq = &wrq->eq; 2193 #endif 2194 2195 EQ_LOCK_ASSERT_OWNED(eq); 2196 MPASS(wr != NULL); 2197 MPASS(wr->wr_len > 0 && wr->wr_len <= SGE_MAX_WR_LEN); 2198 MPASS((wr->wr_len & 0x7) == 0); 2199 2200 STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link); 2201 wrq->nwr_pending++; 2202 wrq->ndesc_needed += howmany(wr->wr_len, EQ_ESIZE); 2203 2204 if (!TAILQ_EMPTY(&wrq->incomplete_wrs)) 2205 return; /* commit_wrq_wr will drain wr_list as well. */ 2206 2207 drain_wrq_wr_list(sc, wrq); 2208 2209 /* Doorbell must have caught up to the pidx. */ 2210 MPASS(eq->pidx == eq->dbidx); 2211 } 2212 2213 void 2214 t4_update_fl_bufsize(struct ifnet *ifp) 2215 { 2216 struct vi_info *vi = ifp->if_softc; 2217 struct adapter *sc = vi->adapter; 2218 struct sge_rxq *rxq; 2219 #ifdef TCP_OFFLOAD 2220 struct sge_ofld_rxq *ofld_rxq; 2221 #endif 2222 struct sge_fl *fl; 2223 int i, maxp; 2224 2225 maxp = max_rx_payload(sc, ifp, false); 2226 for_each_rxq(vi, i, rxq) { 2227 fl = &rxq->fl; 2228 2229 FL_LOCK(fl); 2230 fl->zidx = find_refill_source(sc, maxp, 2231 fl->flags & FL_BUF_PACKING); 2232 FL_UNLOCK(fl); 2233 } 2234 #ifdef TCP_OFFLOAD 2235 maxp = max_rx_payload(sc, ifp, true); 2236 for_each_ofld_rxq(vi, i, ofld_rxq) { 2237 fl = &ofld_rxq->fl; 2238 2239 FL_LOCK(fl); 2240 fl->zidx = find_refill_source(sc, maxp, 2241 fl->flags & FL_BUF_PACKING); 2242 FL_UNLOCK(fl); 2243 } 2244 #endif 2245 } 2246 2247 static inline int 2248 mbuf_nsegs(struct mbuf *m) 2249 { 2250 2251 M_ASSERTPKTHDR(m); 2252 KASSERT(m->m_pkthdr.inner_l5hlen > 0, 2253 ("%s: mbuf %p missing information on # of segments.", __func__, m)); 2254 2255 return (m->m_pkthdr.inner_l5hlen); 2256 } 2257 2258 static inline void 2259 set_mbuf_nsegs(struct mbuf *m, uint8_t nsegs) 2260 { 2261 2262 M_ASSERTPKTHDR(m); 2263 m->m_pkthdr.inner_l5hlen = nsegs; 2264 } 2265 2266 static inline int 2267 mbuf_cflags(struct mbuf *m) 2268 { 2269 2270 M_ASSERTPKTHDR(m); 2271 return (m->m_pkthdr.PH_loc.eight[4]); 2272 } 2273 2274 static inline void 2275 set_mbuf_cflags(struct mbuf *m, uint8_t flags) 2276 { 2277 2278 M_ASSERTPKTHDR(m); 2279 m->m_pkthdr.PH_loc.eight[4] = flags; 2280 } 2281 2282 static inline int 2283 mbuf_len16(struct mbuf *m) 2284 { 2285 int n; 2286 2287 M_ASSERTPKTHDR(m); 2288 n = m->m_pkthdr.PH_loc.eight[0]; 2289 if (!(mbuf_cflags(m) & MC_TLS)) 2290 MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); 2291 2292 return (n); 2293 } 2294 2295 static inline void 2296 set_mbuf_len16(struct mbuf *m, uint8_t len16) 2297 { 2298 2299 M_ASSERTPKTHDR(m); 2300 if (!(mbuf_cflags(m) & MC_TLS)) 2301 MPASS(len16 > 0 && len16 <= SGE_MAX_WR_LEN / 16); 2302 m->m_pkthdr.PH_loc.eight[0] = len16; 2303 } 2304 2305 #ifdef RATELIMIT 2306 static inline int 2307 mbuf_eo_nsegs(struct mbuf *m) 2308 { 2309 2310 M_ASSERTPKTHDR(m); 2311 return (m->m_pkthdr.PH_loc.eight[1]); 2312 } 2313 2314 #if defined(INET) || defined(INET6) 2315 static inline void 2316 set_mbuf_eo_nsegs(struct mbuf *m, uint8_t nsegs) 2317 { 2318 2319 M_ASSERTPKTHDR(m); 2320 m->m_pkthdr.PH_loc.eight[1] = nsegs; 2321 } 2322 #endif 2323 2324 static inline int 2325 mbuf_eo_len16(struct mbuf *m) 2326 { 2327 int n; 2328 2329 M_ASSERTPKTHDR(m); 2330 n = m->m_pkthdr.PH_loc.eight[2]; 2331 MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); 2332 2333 return (n); 2334 } 2335 2336 #if defined(INET) || defined(INET6) 2337 static inline void 2338 set_mbuf_eo_len16(struct mbuf *m, uint8_t len16) 2339 { 2340 2341 M_ASSERTPKTHDR(m); 2342 m->m_pkthdr.PH_loc.eight[2] = len16; 2343 } 2344 #endif 2345 2346 static inline int 2347 mbuf_eo_tsclk_tsoff(struct mbuf *m) 2348 { 2349 2350 M_ASSERTPKTHDR(m); 2351 return (m->m_pkthdr.PH_loc.eight[3]); 2352 } 2353 2354 #if defined(INET) || defined(INET6) 2355 static inline void 2356 set_mbuf_eo_tsclk_tsoff(struct mbuf *m, uint8_t tsclk_tsoff) 2357 { 2358 2359 M_ASSERTPKTHDR(m); 2360 m->m_pkthdr.PH_loc.eight[3] = tsclk_tsoff; 2361 } 2362 #endif 2363 2364 static inline int 2365 needs_eo(struct m_snd_tag *mst) 2366 { 2367 2368 return (mst != NULL && mst->sw->type == IF_SND_TAG_TYPE_RATE_LIMIT); 2369 } 2370 #endif 2371 2372 /* 2373 * Try to allocate an mbuf to contain a raw work request. To make it 2374 * easy to construct the work request, don't allocate a chain but a 2375 * single mbuf. 2376 */ 2377 struct mbuf * 2378 alloc_wr_mbuf(int len, int how) 2379 { 2380 struct mbuf *m; 2381 2382 if (len <= MHLEN) 2383 m = m_gethdr(how, MT_DATA); 2384 else if (len <= MCLBYTES) 2385 m = m_getcl(how, MT_DATA, M_PKTHDR); 2386 else 2387 m = NULL; 2388 if (m == NULL) 2389 return (NULL); 2390 m->m_pkthdr.len = len; 2391 m->m_len = len; 2392 set_mbuf_cflags(m, MC_RAW_WR); 2393 set_mbuf_len16(m, howmany(len, 16)); 2394 return (m); 2395 } 2396 2397 static inline bool 2398 needs_hwcsum(struct mbuf *m) 2399 { 2400 const uint32_t csum_flags = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP | 2401 CSUM_IP_TSO | CSUM_INNER_IP | CSUM_INNER_IP_UDP | 2402 CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | CSUM_IP6_UDP | 2403 CSUM_IP6_TCP | CSUM_IP6_TSO | CSUM_INNER_IP6_UDP | 2404 CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO; 2405 2406 M_ASSERTPKTHDR(m); 2407 2408 return (m->m_pkthdr.csum_flags & csum_flags); 2409 } 2410 2411 static inline bool 2412 needs_tso(struct mbuf *m) 2413 { 2414 const uint32_t csum_flags = CSUM_IP_TSO | CSUM_IP6_TSO | 2415 CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO; 2416 2417 M_ASSERTPKTHDR(m); 2418 2419 return (m->m_pkthdr.csum_flags & csum_flags); 2420 } 2421 2422 static inline bool 2423 needs_vxlan_csum(struct mbuf *m) 2424 { 2425 2426 M_ASSERTPKTHDR(m); 2427 2428 return (m->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN); 2429 } 2430 2431 static inline bool 2432 needs_vxlan_tso(struct mbuf *m) 2433 { 2434 const uint32_t csum_flags = CSUM_ENCAP_VXLAN | CSUM_INNER_IP_TSO | 2435 CSUM_INNER_IP6_TSO; 2436 2437 M_ASSERTPKTHDR(m); 2438 2439 return ((m->m_pkthdr.csum_flags & csum_flags) != 0 && 2440 (m->m_pkthdr.csum_flags & csum_flags) != CSUM_ENCAP_VXLAN); 2441 } 2442 2443 #if defined(INET) || defined(INET6) 2444 static inline bool 2445 needs_inner_tcp_csum(struct mbuf *m) 2446 { 2447 const uint32_t csum_flags = CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO; 2448 2449 M_ASSERTPKTHDR(m); 2450 2451 return (m->m_pkthdr.csum_flags & csum_flags); 2452 } 2453 #endif 2454 2455 static inline bool 2456 needs_l3_csum(struct mbuf *m) 2457 { 2458 const uint32_t csum_flags = CSUM_IP | CSUM_IP_TSO | CSUM_INNER_IP | 2459 CSUM_INNER_IP_TSO; 2460 2461 M_ASSERTPKTHDR(m); 2462 2463 return (m->m_pkthdr.csum_flags & csum_flags); 2464 } 2465 2466 static inline bool 2467 needs_outer_tcp_csum(struct mbuf *m) 2468 { 2469 const uint32_t csum_flags = CSUM_IP_TCP | CSUM_IP_TSO | CSUM_IP6_TCP | 2470 CSUM_IP6_TSO; 2471 2472 M_ASSERTPKTHDR(m); 2473 2474 return (m->m_pkthdr.csum_flags & csum_flags); 2475 } 2476 2477 #ifdef RATELIMIT 2478 static inline bool 2479 needs_outer_l4_csum(struct mbuf *m) 2480 { 2481 const uint32_t csum_flags = CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP_TSO | 2482 CSUM_IP6_UDP | CSUM_IP6_TCP | CSUM_IP6_TSO; 2483 2484 M_ASSERTPKTHDR(m); 2485 2486 return (m->m_pkthdr.csum_flags & csum_flags); 2487 } 2488 2489 static inline bool 2490 needs_outer_udp_csum(struct mbuf *m) 2491 { 2492 const uint32_t csum_flags = CSUM_IP_UDP | CSUM_IP6_UDP; 2493 2494 M_ASSERTPKTHDR(m); 2495 2496 return (m->m_pkthdr.csum_flags & csum_flags); 2497 } 2498 #endif 2499 2500 static inline bool 2501 needs_vlan_insertion(struct mbuf *m) 2502 { 2503 2504 M_ASSERTPKTHDR(m); 2505 2506 return (m->m_flags & M_VLANTAG); 2507 } 2508 2509 #if defined(INET) || defined(INET6) 2510 static void * 2511 m_advance(struct mbuf **pm, int *poffset, int len) 2512 { 2513 struct mbuf *m = *pm; 2514 int offset = *poffset; 2515 uintptr_t p = 0; 2516 2517 MPASS(len > 0); 2518 2519 for (;;) { 2520 if (offset + len < m->m_len) { 2521 offset += len; 2522 p = mtod(m, uintptr_t) + offset; 2523 break; 2524 } 2525 len -= m->m_len - offset; 2526 m = m->m_next; 2527 offset = 0; 2528 MPASS(m != NULL); 2529 } 2530 *poffset = offset; 2531 *pm = m; 2532 return ((void *)p); 2533 } 2534 #endif 2535 2536 static inline int 2537 count_mbuf_ext_pgs(struct mbuf *m, int skip, vm_paddr_t *nextaddr) 2538 { 2539 vm_paddr_t paddr; 2540 int i, len, off, pglen, pgoff, seglen, segoff; 2541 int nsegs = 0; 2542 2543 M_ASSERTEXTPG(m); 2544 off = mtod(m, vm_offset_t); 2545 len = m->m_len; 2546 off += skip; 2547 len -= skip; 2548 2549 if (m->m_epg_hdrlen != 0) { 2550 if (off >= m->m_epg_hdrlen) { 2551 off -= m->m_epg_hdrlen; 2552 } else { 2553 seglen = m->m_epg_hdrlen - off; 2554 segoff = off; 2555 seglen = min(seglen, len); 2556 off = 0; 2557 len -= seglen; 2558 paddr = pmap_kextract( 2559 (vm_offset_t)&m->m_epg_hdr[segoff]); 2560 if (*nextaddr != paddr) 2561 nsegs++; 2562 *nextaddr = paddr + seglen; 2563 } 2564 } 2565 pgoff = m->m_epg_1st_off; 2566 for (i = 0; i < m->m_epg_npgs && len > 0; i++) { 2567 pglen = m_epg_pagelen(m, i, pgoff); 2568 if (off >= pglen) { 2569 off -= pglen; 2570 pgoff = 0; 2571 continue; 2572 } 2573 seglen = pglen - off; 2574 segoff = pgoff + off; 2575 off = 0; 2576 seglen = min(seglen, len); 2577 len -= seglen; 2578 paddr = m->m_epg_pa[i] + segoff; 2579 if (*nextaddr != paddr) 2580 nsegs++; 2581 *nextaddr = paddr + seglen; 2582 pgoff = 0; 2583 }; 2584 if (len != 0) { 2585 seglen = min(len, m->m_epg_trllen - off); 2586 len -= seglen; 2587 paddr = pmap_kextract((vm_offset_t)&m->m_epg_trail[off]); 2588 if (*nextaddr != paddr) 2589 nsegs++; 2590 *nextaddr = paddr + seglen; 2591 } 2592 2593 return (nsegs); 2594 } 2595 2596 2597 /* 2598 * Can deal with empty mbufs in the chain that have m_len = 0, but the chain 2599 * must have at least one mbuf that's not empty. It is possible for this 2600 * routine to return 0 if skip accounts for all the contents of the mbuf chain. 2601 */ 2602 static inline int 2603 count_mbuf_nsegs(struct mbuf *m, int skip, uint8_t *cflags) 2604 { 2605 vm_paddr_t nextaddr, paddr; 2606 vm_offset_t va; 2607 int len, nsegs; 2608 2609 M_ASSERTPKTHDR(m); 2610 MPASS(m->m_pkthdr.len > 0); 2611 MPASS(m->m_pkthdr.len >= skip); 2612 2613 nsegs = 0; 2614 nextaddr = 0; 2615 for (; m; m = m->m_next) { 2616 len = m->m_len; 2617 if (__predict_false(len == 0)) 2618 continue; 2619 if (skip >= len) { 2620 skip -= len; 2621 continue; 2622 } 2623 if ((m->m_flags & M_EXTPG) != 0) { 2624 *cflags |= MC_NOMAP; 2625 nsegs += count_mbuf_ext_pgs(m, skip, &nextaddr); 2626 skip = 0; 2627 continue; 2628 } 2629 va = mtod(m, vm_offset_t) + skip; 2630 len -= skip; 2631 skip = 0; 2632 paddr = pmap_kextract(va); 2633 nsegs += sglist_count((void *)(uintptr_t)va, len); 2634 if (paddr == nextaddr) 2635 nsegs--; 2636 nextaddr = pmap_kextract(va + len - 1) + 1; 2637 } 2638 2639 return (nsegs); 2640 } 2641 2642 /* 2643 * The maximum number of segments that can fit in a WR. 2644 */ 2645 static int 2646 max_nsegs_allowed(struct mbuf *m, bool vm_wr) 2647 { 2648 2649 if (vm_wr) { 2650 if (needs_tso(m)) 2651 return (TX_SGL_SEGS_VM_TSO); 2652 return (TX_SGL_SEGS_VM); 2653 } 2654 2655 if (needs_tso(m)) { 2656 if (needs_vxlan_tso(m)) 2657 return (TX_SGL_SEGS_VXLAN_TSO); 2658 else 2659 return (TX_SGL_SEGS_TSO); 2660 } 2661 2662 return (TX_SGL_SEGS); 2663 } 2664 2665 static struct timeval txerr_ratecheck = {0}; 2666 static const struct timeval txerr_interval = {3, 0}; 2667 2668 /* 2669 * Analyze the mbuf to determine its tx needs. The mbuf passed in may change: 2670 * a) caller can assume it's been freed if this function returns with an error. 2671 * b) it may get defragged up if the gather list is too long for the hardware. 2672 */ 2673 int 2674 parse_pkt(struct mbuf **mp, bool vm_wr) 2675 { 2676 struct mbuf *m0 = *mp, *m; 2677 int rc, nsegs, defragged = 0; 2678 struct ether_header *eh; 2679 #ifdef INET 2680 void *l3hdr; 2681 #endif 2682 #if defined(INET) || defined(INET6) 2683 int offset; 2684 struct tcphdr *tcp; 2685 #endif 2686 #if defined(KERN_TLS) || defined(RATELIMIT) 2687 struct m_snd_tag *mst; 2688 #endif 2689 uint16_t eh_type; 2690 uint8_t cflags; 2691 2692 cflags = 0; 2693 M_ASSERTPKTHDR(m0); 2694 if (__predict_false(m0->m_pkthdr.len < ETHER_HDR_LEN)) { 2695 rc = EINVAL; 2696 fail: 2697 m_freem(m0); 2698 *mp = NULL; 2699 return (rc); 2700 } 2701 restart: 2702 /* 2703 * First count the number of gather list segments in the payload. 2704 * Defrag the mbuf if nsegs exceeds the hardware limit. 2705 */ 2706 M_ASSERTPKTHDR(m0); 2707 MPASS(m0->m_pkthdr.len > 0); 2708 nsegs = count_mbuf_nsegs(m0, 0, &cflags); 2709 #if defined(KERN_TLS) || defined(RATELIMIT) 2710 if (m0->m_pkthdr.csum_flags & CSUM_SND_TAG) 2711 mst = m0->m_pkthdr.snd_tag; 2712 else 2713 mst = NULL; 2714 #endif 2715 #ifdef KERN_TLS 2716 if (mst != NULL && mst->sw->type == IF_SND_TAG_TYPE_TLS) { 2717 int len16; 2718 2719 cflags |= MC_TLS; 2720 set_mbuf_cflags(m0, cflags); 2721 rc = t6_ktls_parse_pkt(m0, &nsegs, &len16); 2722 if (rc != 0) 2723 goto fail; 2724 set_mbuf_nsegs(m0, nsegs); 2725 set_mbuf_len16(m0, len16); 2726 return (0); 2727 } 2728 #endif 2729 if (nsegs > max_nsegs_allowed(m0, vm_wr)) { 2730 if (defragged++ > 0) { 2731 rc = EFBIG; 2732 goto fail; 2733 } 2734 counter_u64_add(defrags, 1); 2735 if ((m = m_defrag(m0, M_NOWAIT)) == NULL) { 2736 rc = ENOMEM; 2737 goto fail; 2738 } 2739 *mp = m0 = m; /* update caller's copy after defrag */ 2740 goto restart; 2741 } 2742 2743 if (__predict_false(nsegs > 2 && m0->m_pkthdr.len <= MHLEN && 2744 !(cflags & MC_NOMAP))) { 2745 counter_u64_add(pullups, 1); 2746 m0 = m_pullup(m0, m0->m_pkthdr.len); 2747 if (m0 == NULL) { 2748 /* Should have left well enough alone. */ 2749 rc = EFBIG; 2750 goto fail; 2751 } 2752 *mp = m0; /* update caller's copy after pullup */ 2753 goto restart; 2754 } 2755 set_mbuf_nsegs(m0, nsegs); 2756 set_mbuf_cflags(m0, cflags); 2757 calculate_mbuf_len16(m0, vm_wr); 2758 2759 #ifdef RATELIMIT 2760 /* 2761 * Ethofld is limited to TCP and UDP for now, and only when L4 hw 2762 * checksumming is enabled. needs_outer_l4_csum happens to check for 2763 * all the right things. 2764 */ 2765 if (__predict_false(needs_eo(mst) && !needs_outer_l4_csum(m0))) { 2766 m_snd_tag_rele(m0->m_pkthdr.snd_tag); 2767 m0->m_pkthdr.snd_tag = NULL; 2768 m0->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; 2769 mst = NULL; 2770 } 2771 #endif 2772 2773 if (!needs_hwcsum(m0) 2774 #ifdef RATELIMIT 2775 && !needs_eo(mst) 2776 #endif 2777 ) 2778 return (0); 2779 2780 m = m0; 2781 eh = mtod(m, struct ether_header *); 2782 eh_type = ntohs(eh->ether_type); 2783 if (eh_type == ETHERTYPE_VLAN) { 2784 struct ether_vlan_header *evh = (void *)eh; 2785 2786 eh_type = ntohs(evh->evl_proto); 2787 m0->m_pkthdr.l2hlen = sizeof(*evh); 2788 } else 2789 m0->m_pkthdr.l2hlen = sizeof(*eh); 2790 2791 #if defined(INET) || defined(INET6) 2792 offset = 0; 2793 #ifdef INET 2794 l3hdr = m_advance(&m, &offset, m0->m_pkthdr.l2hlen); 2795 #else 2796 m_advance(&m, &offset, m0->m_pkthdr.l2hlen); 2797 #endif 2798 #endif 2799 2800 switch (eh_type) { 2801 #ifdef INET6 2802 case ETHERTYPE_IPV6: 2803 m0->m_pkthdr.l3hlen = sizeof(struct ip6_hdr); 2804 break; 2805 #endif 2806 #ifdef INET 2807 case ETHERTYPE_IP: 2808 { 2809 struct ip *ip = l3hdr; 2810 2811 if (needs_vxlan_csum(m0)) { 2812 /* Driver will do the outer IP hdr checksum. */ 2813 ip->ip_sum = 0; 2814 if (needs_vxlan_tso(m0)) { 2815 const uint16_t ipl = ip->ip_len; 2816 2817 ip->ip_len = 0; 2818 ip->ip_sum = ~in_cksum_hdr(ip); 2819 ip->ip_len = ipl; 2820 } else 2821 ip->ip_sum = in_cksum_hdr(ip); 2822 } 2823 m0->m_pkthdr.l3hlen = ip->ip_hl << 2; 2824 break; 2825 } 2826 #endif 2827 default: 2828 if (ratecheck(&txerr_ratecheck, &txerr_interval)) { 2829 log(LOG_ERR, "%s: ethertype 0x%04x unknown. " 2830 "if_cxgbe must be compiled with the same " 2831 "INET/INET6 options as the kernel.\n", __func__, 2832 eh_type); 2833 } 2834 rc = EINVAL; 2835 goto fail; 2836 } 2837 2838 #if defined(INET) || defined(INET6) 2839 if (needs_vxlan_csum(m0)) { 2840 m0->m_pkthdr.l4hlen = sizeof(struct udphdr); 2841 m0->m_pkthdr.l5hlen = sizeof(struct vxlan_header); 2842 2843 /* Inner headers. */ 2844 eh = m_advance(&m, &offset, m0->m_pkthdr.l3hlen + 2845 sizeof(struct udphdr) + sizeof(struct vxlan_header)); 2846 eh_type = ntohs(eh->ether_type); 2847 if (eh_type == ETHERTYPE_VLAN) { 2848 struct ether_vlan_header *evh = (void *)eh; 2849 2850 eh_type = ntohs(evh->evl_proto); 2851 m0->m_pkthdr.inner_l2hlen = sizeof(*evh); 2852 } else 2853 m0->m_pkthdr.inner_l2hlen = sizeof(*eh); 2854 #ifdef INET 2855 l3hdr = m_advance(&m, &offset, m0->m_pkthdr.inner_l2hlen); 2856 #else 2857 m_advance(&m, &offset, m0->m_pkthdr.inner_l2hlen); 2858 #endif 2859 2860 switch (eh_type) { 2861 #ifdef INET6 2862 case ETHERTYPE_IPV6: 2863 m0->m_pkthdr.inner_l3hlen = sizeof(struct ip6_hdr); 2864 break; 2865 #endif 2866 #ifdef INET 2867 case ETHERTYPE_IP: 2868 { 2869 struct ip *ip = l3hdr; 2870 2871 m0->m_pkthdr.inner_l3hlen = ip->ip_hl << 2; 2872 break; 2873 } 2874 #endif 2875 default: 2876 if (ratecheck(&txerr_ratecheck, &txerr_interval)) { 2877 log(LOG_ERR, "%s: VXLAN hw offload requested" 2878 "with unknown ethertype 0x%04x. if_cxgbe " 2879 "must be compiled with the same INET/INET6 " 2880 "options as the kernel.\n", __func__, 2881 eh_type); 2882 } 2883 rc = EINVAL; 2884 goto fail; 2885 } 2886 if (needs_inner_tcp_csum(m0)) { 2887 tcp = m_advance(&m, &offset, m0->m_pkthdr.inner_l3hlen); 2888 m0->m_pkthdr.inner_l4hlen = tcp->th_off * 4; 2889 } 2890 MPASS((m0->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 2891 m0->m_pkthdr.csum_flags &= CSUM_INNER_IP6_UDP | 2892 CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO | CSUM_INNER_IP | 2893 CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | 2894 CSUM_ENCAP_VXLAN; 2895 } 2896 2897 if (needs_outer_tcp_csum(m0)) { 2898 tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen); 2899 m0->m_pkthdr.l4hlen = tcp->th_off * 4; 2900 #ifdef RATELIMIT 2901 if (tsclk >= 0 && *(uint32_t *)(tcp + 1) == ntohl(0x0101080a)) { 2902 set_mbuf_eo_tsclk_tsoff(m0, 2903 V_FW_ETH_TX_EO_WR_TSCLK(tsclk) | 2904 V_FW_ETH_TX_EO_WR_TSOFF(sizeof(*tcp) / 2 + 1)); 2905 } else 2906 set_mbuf_eo_tsclk_tsoff(m0, 0); 2907 } else if (needs_outer_udp_csum(m0)) { 2908 m0->m_pkthdr.l4hlen = sizeof(struct udphdr); 2909 #endif 2910 } 2911 #ifdef RATELIMIT 2912 if (needs_eo(mst)) { 2913 u_int immhdrs; 2914 2915 /* EO WRs have the headers in the WR and not the GL. */ 2916 immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + 2917 m0->m_pkthdr.l4hlen; 2918 cflags = 0; 2919 nsegs = count_mbuf_nsegs(m0, immhdrs, &cflags); 2920 MPASS(cflags == mbuf_cflags(m0)); 2921 set_mbuf_eo_nsegs(m0, nsegs); 2922 set_mbuf_eo_len16(m0, 2923 txpkt_eo_len16(nsegs, immhdrs, needs_tso(m0))); 2924 } 2925 #endif 2926 #endif 2927 MPASS(m0 == *mp); 2928 return (0); 2929 } 2930 2931 void * 2932 start_wrq_wr(struct sge_wrq *wrq, int len16, struct wrq_cookie *cookie) 2933 { 2934 struct sge_eq *eq = &wrq->eq; 2935 struct adapter *sc = wrq->adapter; 2936 int ndesc, available; 2937 struct wrqe *wr; 2938 void *w; 2939 2940 MPASS(len16 > 0); 2941 ndesc = tx_len16_to_desc(len16); 2942 MPASS(ndesc > 0 && ndesc <= SGE_MAX_WR_NDESC); 2943 2944 EQ_LOCK(eq); 2945 2946 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 2947 drain_wrq_wr_list(sc, wrq); 2948 2949 if (!STAILQ_EMPTY(&wrq->wr_list)) { 2950 slowpath: 2951 EQ_UNLOCK(eq); 2952 wr = alloc_wrqe(len16 * 16, wrq); 2953 if (__predict_false(wr == NULL)) 2954 return (NULL); 2955 cookie->pidx = -1; 2956 cookie->ndesc = ndesc; 2957 return (&wr->wr); 2958 } 2959 2960 eq->cidx = read_hw_cidx(eq); 2961 if (eq->pidx == eq->cidx) 2962 available = eq->sidx - 1; 2963 else 2964 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2965 if (available < ndesc) 2966 goto slowpath; 2967 2968 cookie->pidx = eq->pidx; 2969 cookie->ndesc = ndesc; 2970 TAILQ_INSERT_TAIL(&wrq->incomplete_wrs, cookie, link); 2971 2972 w = &eq->desc[eq->pidx]; 2973 IDXINCR(eq->pidx, ndesc, eq->sidx); 2974 if (__predict_false(cookie->pidx + ndesc > eq->sidx)) { 2975 w = &wrq->ss[0]; 2976 wrq->ss_pidx = cookie->pidx; 2977 wrq->ss_len = len16 * 16; 2978 } 2979 2980 EQ_UNLOCK(eq); 2981 2982 return (w); 2983 } 2984 2985 void 2986 commit_wrq_wr(struct sge_wrq *wrq, void *w, struct wrq_cookie *cookie) 2987 { 2988 struct sge_eq *eq = &wrq->eq; 2989 struct adapter *sc = wrq->adapter; 2990 int ndesc, pidx; 2991 struct wrq_cookie *prev, *next; 2992 2993 if (cookie->pidx == -1) { 2994 struct wrqe *wr = __containerof(w, struct wrqe, wr); 2995 2996 t4_wrq_tx(sc, wr); 2997 return; 2998 } 2999 3000 if (__predict_false(w == &wrq->ss[0])) { 3001 int n = (eq->sidx - wrq->ss_pidx) * EQ_ESIZE; 3002 3003 MPASS(wrq->ss_len > n); /* WR had better wrap around. */ 3004 bcopy(&wrq->ss[0], &eq->desc[wrq->ss_pidx], n); 3005 bcopy(&wrq->ss[n], &eq->desc[0], wrq->ss_len - n); 3006 wrq->tx_wrs_ss++; 3007 } else 3008 wrq->tx_wrs_direct++; 3009 3010 EQ_LOCK(eq); 3011 ndesc = cookie->ndesc; /* Can be more than SGE_MAX_WR_NDESC here. */ 3012 pidx = cookie->pidx; 3013 MPASS(pidx >= 0 && pidx < eq->sidx); 3014 prev = TAILQ_PREV(cookie, wrq_incomplete_wrs, link); 3015 next = TAILQ_NEXT(cookie, link); 3016 if (prev == NULL) { 3017 MPASS(pidx == eq->dbidx); 3018 if (next == NULL || ndesc >= 16) { 3019 int available; 3020 struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ 3021 3022 /* 3023 * Note that the WR via which we'll request tx updates 3024 * is at pidx and not eq->pidx, which has moved on 3025 * already. 3026 */ 3027 dst = (void *)&eq->desc[pidx]; 3028 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 3029 if (available < eq->sidx / 4 && 3030 atomic_cmpset_int(&eq->equiq, 0, 1)) { 3031 /* 3032 * XXX: This is not 100% reliable with some 3033 * types of WRs. But this is a very unusual 3034 * situation for an ofld/ctrl queue anyway. 3035 */ 3036 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 3037 F_FW_WR_EQUEQ); 3038 } 3039 3040 ring_eq_db(wrq->adapter, eq, ndesc); 3041 } else { 3042 MPASS(IDXDIFF(next->pidx, pidx, eq->sidx) == ndesc); 3043 next->pidx = pidx; 3044 next->ndesc += ndesc; 3045 } 3046 } else { 3047 MPASS(IDXDIFF(pidx, prev->pidx, eq->sidx) == prev->ndesc); 3048 prev->ndesc += ndesc; 3049 } 3050 TAILQ_REMOVE(&wrq->incomplete_wrs, cookie, link); 3051 3052 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 3053 drain_wrq_wr_list(sc, wrq); 3054 3055 #ifdef INVARIANTS 3056 if (TAILQ_EMPTY(&wrq->incomplete_wrs)) { 3057 /* Doorbell must have caught up to the pidx. */ 3058 MPASS(wrq->eq.pidx == wrq->eq.dbidx); 3059 } 3060 #endif 3061 EQ_UNLOCK(eq); 3062 } 3063 3064 static u_int 3065 can_resume_eth_tx(struct mp_ring *r) 3066 { 3067 struct sge_eq *eq = r->cookie; 3068 3069 return (total_available_tx_desc(eq) > eq->sidx / 8); 3070 } 3071 3072 static inline bool 3073 cannot_use_txpkts(struct mbuf *m) 3074 { 3075 /* maybe put a GL limit too, to avoid silliness? */ 3076 3077 return (needs_tso(m) || (mbuf_cflags(m) & (MC_RAW_WR | MC_TLS)) != 0); 3078 } 3079 3080 static inline int 3081 discard_tx(struct sge_eq *eq) 3082 { 3083 3084 return ((eq->flags & (EQ_ENABLED | EQ_QFLUSH)) != EQ_ENABLED); 3085 } 3086 3087 static inline int 3088 wr_can_update_eq(void *p) 3089 { 3090 struct fw_eth_tx_pkts_wr *wr = p; 3091 3092 switch (G_FW_WR_OP(be32toh(wr->op_pkd))) { 3093 case FW_ULPTX_WR: 3094 case FW_ETH_TX_PKT_WR: 3095 case FW_ETH_TX_PKTS_WR: 3096 case FW_ETH_TX_PKTS2_WR: 3097 case FW_ETH_TX_PKT_VM_WR: 3098 case FW_ETH_TX_PKTS_VM_WR: 3099 return (1); 3100 default: 3101 return (0); 3102 } 3103 } 3104 3105 static inline void 3106 set_txupdate_flags(struct sge_txq *txq, u_int avail, 3107 struct fw_eth_tx_pkt_wr *wr) 3108 { 3109 struct sge_eq *eq = &txq->eq; 3110 struct txpkts *txp = &txq->txp; 3111 3112 if ((txp->npkt > 0 || avail < eq->sidx / 2) && 3113 atomic_cmpset_int(&eq->equiq, 0, 1)) { 3114 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ | F_FW_WR_EQUIQ); 3115 eq->equeqidx = eq->pidx; 3116 } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { 3117 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); 3118 eq->equeqidx = eq->pidx; 3119 } 3120 } 3121 3122 #if defined(__i386__) || defined(__amd64__) 3123 extern uint64_t tsc_freq; 3124 #endif 3125 3126 static inline bool 3127 record_eth_tx_time(struct sge_txq *txq) 3128 { 3129 const uint64_t cycles = get_cyclecount(); 3130 const uint64_t last_tx = txq->last_tx; 3131 #if defined(__i386__) || defined(__amd64__) 3132 const uint64_t itg = tsc_freq * t4_tx_coalesce_gap / 1000000; 3133 #else 3134 const uint64_t itg = 0; 3135 #endif 3136 3137 MPASS(cycles >= last_tx); 3138 txq->last_tx = cycles; 3139 return (cycles - last_tx < itg); 3140 } 3141 3142 /* 3143 * r->items[cidx] to r->items[pidx], with a wraparound at r->size, are ready to 3144 * be consumed. Return the actual number consumed. 0 indicates a stall. 3145 */ 3146 static u_int 3147 eth_tx(struct mp_ring *r, u_int cidx, u_int pidx, bool *coalescing) 3148 { 3149 struct sge_txq *txq = r->cookie; 3150 struct ifnet *ifp = txq->ifp; 3151 struct sge_eq *eq = &txq->eq; 3152 struct txpkts *txp = &txq->txp; 3153 struct vi_info *vi = ifp->if_softc; 3154 struct adapter *sc = vi->adapter; 3155 u_int total, remaining; /* # of packets */ 3156 u_int n, avail, dbdiff; /* # of hardware descriptors */ 3157 int i, rc; 3158 struct mbuf *m0; 3159 bool snd, recent_tx; 3160 void *wr; /* start of the last WR written to the ring */ 3161 3162 TXQ_LOCK_ASSERT_OWNED(txq); 3163 recent_tx = record_eth_tx_time(txq); 3164 3165 remaining = IDXDIFF(pidx, cidx, r->size); 3166 if (__predict_false(discard_tx(eq))) { 3167 for (i = 0; i < txp->npkt; i++) 3168 m_freem(txp->mb[i]); 3169 txp->npkt = 0; 3170 while (cidx != pidx) { 3171 m0 = r->items[cidx]; 3172 m_freem(m0); 3173 if (++cidx == r->size) 3174 cidx = 0; 3175 } 3176 reclaim_tx_descs(txq, eq->sidx); 3177 *coalescing = false; 3178 return (remaining); /* emptied */ 3179 } 3180 3181 /* How many hardware descriptors do we have readily available. */ 3182 if (eq->pidx == eq->cidx) 3183 avail = eq->sidx - 1; 3184 else 3185 avail = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 3186 3187 total = 0; 3188 if (remaining == 0) { 3189 txp->score = 0; 3190 txq->txpkts_flush++; 3191 goto send_txpkts; 3192 } 3193 3194 dbdiff = 0; 3195 MPASS(remaining > 0); 3196 while (remaining > 0) { 3197 m0 = r->items[cidx]; 3198 M_ASSERTPKTHDR(m0); 3199 MPASS(m0->m_nextpkt == NULL); 3200 3201 if (avail < 2 * SGE_MAX_WR_NDESC) 3202 avail += reclaim_tx_descs(txq, 64); 3203 3204 if (t4_tx_coalesce == 0 && txp->npkt == 0) 3205 goto skip_coalescing; 3206 if (cannot_use_txpkts(m0)) 3207 txp->score = 0; 3208 else if (recent_tx) { 3209 if (++txp->score == 0) 3210 txp->score = UINT8_MAX; 3211 } else 3212 txp->score = 1; 3213 if (txp->npkt > 0 || remaining > 1 || 3214 txp->score >= t4_tx_coalesce_pkts || 3215 atomic_load_int(&txq->eq.equiq) != 0) { 3216 if (vi->flags & TX_USES_VM_WR) 3217 rc = add_to_txpkts_vf(sc, txq, m0, avail, &snd); 3218 else 3219 rc = add_to_txpkts_pf(sc, txq, m0, avail, &snd); 3220 } else { 3221 snd = false; 3222 rc = EINVAL; 3223 } 3224 if (snd) { 3225 MPASS(txp->npkt > 0); 3226 for (i = 0; i < txp->npkt; i++) 3227 ETHER_BPF_MTAP(ifp, txp->mb[i]); 3228 if (txp->npkt > 1) { 3229 MPASS(avail >= tx_len16_to_desc(txp->len16)); 3230 if (vi->flags & TX_USES_VM_WR) 3231 n = write_txpkts_vm_wr(sc, txq); 3232 else 3233 n = write_txpkts_wr(sc, txq); 3234 } else { 3235 MPASS(avail >= 3236 tx_len16_to_desc(mbuf_len16(txp->mb[0]))); 3237 if (vi->flags & TX_USES_VM_WR) 3238 n = write_txpkt_vm_wr(sc, txq, 3239 txp->mb[0]); 3240 else 3241 n = write_txpkt_wr(sc, txq, txp->mb[0], 3242 avail); 3243 } 3244 MPASS(n <= SGE_MAX_WR_NDESC); 3245 avail -= n; 3246 dbdiff += n; 3247 wr = &eq->desc[eq->pidx]; 3248 IDXINCR(eq->pidx, n, eq->sidx); 3249 txp->npkt = 0; /* emptied */ 3250 } 3251 if (rc == 0) { 3252 /* m0 was coalesced into txq->txpkts. */ 3253 goto next_mbuf; 3254 } 3255 if (rc == EAGAIN) { 3256 /* 3257 * m0 is suitable for tx coalescing but could not be 3258 * combined with the existing txq->txpkts, which has now 3259 * been transmitted. Start a new txpkts with m0. 3260 */ 3261 MPASS(snd); 3262 MPASS(txp->npkt == 0); 3263 continue; 3264 } 3265 3266 MPASS(rc != 0 && rc != EAGAIN); 3267 MPASS(txp->npkt == 0); 3268 skip_coalescing: 3269 n = tx_len16_to_desc(mbuf_len16(m0)); 3270 if (__predict_false(avail < n)) { 3271 avail += reclaim_tx_descs(txq, min(n, 32)); 3272 if (avail < n) 3273 break; /* out of descriptors */ 3274 } 3275 3276 wr = &eq->desc[eq->pidx]; 3277 if (mbuf_cflags(m0) & MC_RAW_WR) { 3278 n = write_raw_wr(txq, wr, m0, avail); 3279 #ifdef KERN_TLS 3280 } else if (mbuf_cflags(m0) & MC_TLS) { 3281 ETHER_BPF_MTAP(ifp, m0); 3282 n = t6_ktls_write_wr(txq, wr, m0, mbuf_nsegs(m0), 3283 avail); 3284 #endif 3285 } else { 3286 ETHER_BPF_MTAP(ifp, m0); 3287 if (vi->flags & TX_USES_VM_WR) 3288 n = write_txpkt_vm_wr(sc, txq, m0); 3289 else 3290 n = write_txpkt_wr(sc, txq, m0, avail); 3291 } 3292 MPASS(n >= 1 && n <= avail); 3293 if (!(mbuf_cflags(m0) & MC_TLS)) 3294 MPASS(n <= SGE_MAX_WR_NDESC); 3295 3296 avail -= n; 3297 dbdiff += n; 3298 IDXINCR(eq->pidx, n, eq->sidx); 3299 3300 if (dbdiff >= 512 / EQ_ESIZE) { /* X_FETCHBURSTMAX_512B */ 3301 if (wr_can_update_eq(wr)) 3302 set_txupdate_flags(txq, avail, wr); 3303 ring_eq_db(sc, eq, dbdiff); 3304 avail += reclaim_tx_descs(txq, 32); 3305 dbdiff = 0; 3306 } 3307 next_mbuf: 3308 total++; 3309 remaining--; 3310 if (__predict_false(++cidx == r->size)) 3311 cidx = 0; 3312 } 3313 if (dbdiff != 0) { 3314 if (wr_can_update_eq(wr)) 3315 set_txupdate_flags(txq, avail, wr); 3316 ring_eq_db(sc, eq, dbdiff); 3317 reclaim_tx_descs(txq, 32); 3318 } else if (eq->pidx == eq->cidx && txp->npkt > 0 && 3319 atomic_load_int(&txq->eq.equiq) == 0) { 3320 /* 3321 * If nothing was submitted to the chip for tx (it was coalesced 3322 * into txpkts instead) and there is no tx update outstanding 3323 * then we need to send txpkts now. 3324 */ 3325 send_txpkts: 3326 MPASS(txp->npkt > 0); 3327 for (i = 0; i < txp->npkt; i++) 3328 ETHER_BPF_MTAP(ifp, txp->mb[i]); 3329 if (txp->npkt > 1) { 3330 MPASS(avail >= tx_len16_to_desc(txp->len16)); 3331 if (vi->flags & TX_USES_VM_WR) 3332 n = write_txpkts_vm_wr(sc, txq); 3333 else 3334 n = write_txpkts_wr(sc, txq); 3335 } else { 3336 MPASS(avail >= 3337 tx_len16_to_desc(mbuf_len16(txp->mb[0]))); 3338 if (vi->flags & TX_USES_VM_WR) 3339 n = write_txpkt_vm_wr(sc, txq, txp->mb[0]); 3340 else 3341 n = write_txpkt_wr(sc, txq, txp->mb[0], avail); 3342 } 3343 MPASS(n <= SGE_MAX_WR_NDESC); 3344 wr = &eq->desc[eq->pidx]; 3345 IDXINCR(eq->pidx, n, eq->sidx); 3346 txp->npkt = 0; /* emptied */ 3347 3348 MPASS(wr_can_update_eq(wr)); 3349 set_txupdate_flags(txq, avail - n, wr); 3350 ring_eq_db(sc, eq, n); 3351 reclaim_tx_descs(txq, 32); 3352 } 3353 *coalescing = txp->npkt > 0; 3354 3355 return (total); 3356 } 3357 3358 static inline void 3359 init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, 3360 int qsize, int intr_idx, int cong) 3361 { 3362 3363 KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, 3364 ("%s: bad tmr_idx %d", __func__, tmr_idx)); 3365 KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */ 3366 ("%s: bad pktc_idx %d", __func__, pktc_idx)); 3367 KASSERT(intr_idx >= -1 && intr_idx < sc->intr_count, 3368 ("%s: bad intr_idx %d", __func__, intr_idx)); 3369 3370 iq->flags = 0; 3371 iq->state = IQS_DISABLED; 3372 iq->adapter = sc; 3373 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx); 3374 iq->intr_pktc_idx = SGE_NCOUNTERS - 1; 3375 if (pktc_idx >= 0) { 3376 iq->intr_params |= F_QINTR_CNT_EN; 3377 iq->intr_pktc_idx = pktc_idx; 3378 } 3379 iq->qsize = roundup2(qsize, 16); /* See FW_IQ_CMD/iqsize */ 3380 iq->sidx = iq->qsize - sc->params.sge.spg_len / IQ_ESIZE; 3381 iq->intr_idx = intr_idx; 3382 iq->cong = cong; 3383 } 3384 3385 static inline void 3386 init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, char *name) 3387 { 3388 struct sge_params *sp = &sc->params.sge; 3389 3390 fl->qsize = qsize; 3391 fl->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; 3392 strlcpy(fl->lockname, name, sizeof(fl->lockname)); 3393 mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); 3394 if (sc->flags & BUF_PACKING_OK && 3395 ((!is_t4(sc) && buffer_packing) || /* T5+: enabled unless 0 */ 3396 (is_t4(sc) && buffer_packing == 1)))/* T4: disabled unless 1 */ 3397 fl->flags |= FL_BUF_PACKING; 3398 fl->zidx = find_refill_source(sc, maxp, fl->flags & FL_BUF_PACKING); 3399 fl->safe_zidx = sc->sge.safe_zidx; 3400 if (fl->flags & FL_BUF_PACKING) { 3401 fl->lowat = roundup2(sp->fl_starve_threshold2, 8); 3402 fl->buf_boundary = sp->pack_boundary; 3403 } else { 3404 fl->lowat = roundup2(sp->fl_starve_threshold, 8); 3405 fl->buf_boundary = 16; 3406 } 3407 if (fl_pad && fl->buf_boundary < sp->pad_boundary) 3408 fl->buf_boundary = sp->pad_boundary; 3409 } 3410 3411 static inline void 3412 init_eq(struct adapter *sc, struct sge_eq *eq, int eqtype, int qsize, 3413 uint8_t tx_chan, struct sge_iq *iq, char *name) 3414 { 3415 KASSERT(eqtype >= EQ_CTRL && eqtype <= EQ_OFLD, 3416 ("%s: bad qtype %d", __func__, eqtype)); 3417 3418 eq->type = eqtype; 3419 eq->tx_chan = tx_chan; 3420 eq->iq = iq; 3421 eq->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; 3422 strlcpy(eq->lockname, name, sizeof(eq->lockname)); 3423 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); 3424 } 3425 3426 int 3427 alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag, 3428 bus_dmamap_t *map, bus_addr_t *pa, void **va) 3429 { 3430 int rc; 3431 3432 rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR, 3433 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag); 3434 if (rc != 0) { 3435 CH_ERR(sc, "cannot allocate DMA tag: %d\n", rc); 3436 goto done; 3437 } 3438 3439 rc = bus_dmamem_alloc(*tag, va, 3440 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map); 3441 if (rc != 0) { 3442 CH_ERR(sc, "cannot allocate DMA memory: %d\n", rc); 3443 goto done; 3444 } 3445 3446 rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0); 3447 if (rc != 0) { 3448 CH_ERR(sc, "cannot load DMA map: %d\n", rc); 3449 goto done; 3450 } 3451 done: 3452 if (rc) 3453 free_ring(sc, *tag, *map, *pa, *va); 3454 3455 return (rc); 3456 } 3457 3458 int 3459 free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, 3460 bus_addr_t pa, void *va) 3461 { 3462 if (pa) 3463 bus_dmamap_unload(tag, map); 3464 if (va) 3465 bus_dmamem_free(tag, va, map); 3466 if (tag) 3467 bus_dma_tag_destroy(tag); 3468 3469 return (0); 3470 } 3471 3472 /* 3473 * Allocates the software resources (mainly memory and sysctl nodes) for an 3474 * ingress queue and an optional freelist. 3475 * 3476 * Sets IQ_SW_ALLOCATED and returns 0 on success. 3477 */ 3478 static int 3479 alloc_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl, 3480 struct sysctl_ctx_list *ctx, struct sysctl_oid *oid) 3481 { 3482 int rc; 3483 size_t len; 3484 struct adapter *sc = vi->adapter; 3485 3486 MPASS(!(iq->flags & IQ_SW_ALLOCATED)); 3487 3488 len = iq->qsize * IQ_ESIZE; 3489 rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, 3490 (void **)&iq->desc); 3491 if (rc != 0) 3492 return (rc); 3493 3494 if (fl) { 3495 len = fl->qsize * EQ_ESIZE; 3496 rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, 3497 &fl->ba, (void **)&fl->desc); 3498 if (rc) { 3499 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, 3500 iq->desc); 3501 return (rc); 3502 } 3503 3504 /* Allocate space for one software descriptor per buffer. */ 3505 fl->sdesc = malloc(fl->sidx * 8 * sizeof(struct fl_sdesc), 3506 M_CXGBE, M_ZERO | M_WAITOK); 3507 3508 add_fl_sysctls(sc, ctx, oid, fl); 3509 iq->flags |= IQ_HAS_FL; 3510 } 3511 add_iq_sysctls(ctx, oid, iq); 3512 iq->flags |= IQ_SW_ALLOCATED; 3513 3514 return (0); 3515 } 3516 3517 /* 3518 * Frees all software resources (memory and locks) associated with an ingress 3519 * queue and an optional freelist. 3520 */ 3521 static void 3522 free_iq_fl(struct adapter *sc, struct sge_iq *iq, struct sge_fl *fl) 3523 { 3524 MPASS(iq->flags & IQ_SW_ALLOCATED); 3525 3526 if (fl) { 3527 MPASS(iq->flags & IQ_HAS_FL); 3528 free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, fl->desc); 3529 free_fl_buffers(sc, fl); 3530 free(fl->sdesc, M_CXGBE); 3531 mtx_destroy(&fl->fl_lock); 3532 bzero(fl, sizeof(*fl)); 3533 } 3534 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc); 3535 bzero(iq, sizeof(*iq)); 3536 } 3537 3538 /* 3539 * Allocates a hardware ingress queue and an optional freelist that will be 3540 * associated with it. 3541 * 3542 * Returns errno on failure. Resources allocated up to that point may still be 3543 * allocated. Caller is responsible for cleanup in case this function fails. 3544 */ 3545 static int 3546 alloc_iq_fl_hwq(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl) 3547 { 3548 int rc, i, cntxt_id; 3549 struct fw_iq_cmd c; 3550 struct adapter *sc = vi->adapter; 3551 __be32 v = 0; 3552 3553 MPASS (!(iq->flags & IQ_HW_ALLOCATED)); 3554 3555 bzero(&c, sizeof(c)); 3556 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | 3557 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | 3558 V_FW_IQ_CMD_VFN(0)); 3559 3560 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | 3561 FW_LEN16(c)); 3562 3563 /* Special handling for firmware event queue */ 3564 if (iq == &sc->sge.fwq) 3565 v |= F_FW_IQ_CMD_IQASYNCH; 3566 3567 if (iq->intr_idx < 0) { 3568 /* Forwarded interrupts, all headed to fwq */ 3569 v |= F_FW_IQ_CMD_IQANDST; 3570 v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.fwq.cntxt_id); 3571 } else { 3572 KASSERT(iq->intr_idx < sc->intr_count, 3573 ("%s: invalid direct intr_idx %d", __func__, iq->intr_idx)); 3574 v |= V_FW_IQ_CMD_IQANDSTINDEX(iq->intr_idx); 3575 } 3576 3577 bzero(iq->desc, iq->qsize * IQ_ESIZE); 3578 c.type_to_iqandstindex = htobe32(v | 3579 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | 3580 V_FW_IQ_CMD_VIID(vi->viid) | 3581 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); 3582 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(vi->pi->tx_chan) | 3583 F_FW_IQ_CMD_IQGTSMODE | 3584 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | 3585 V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4)); 3586 c.iqsize = htobe16(iq->qsize); 3587 c.iqaddr = htobe64(iq->ba); 3588 if (iq->cong >= 0) 3589 c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN); 3590 3591 if (fl) { 3592 bzero(fl->desc, fl->sidx * EQ_ESIZE + sc->params.sge.spg_len); 3593 c.iqns_to_fl0congen |= 3594 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | 3595 F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | 3596 (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) | 3597 (fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN : 3598 0)); 3599 if (iq->cong >= 0) { 3600 c.iqns_to_fl0congen |= 3601 htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(iq->cong) | 3602 F_FW_IQ_CMD_FL0CONGCIF | 3603 F_FW_IQ_CMD_FL0CONGEN); 3604 } 3605 c.fl0dcaen_to_fl0cidxfthresh = 3606 htobe16(V_FW_IQ_CMD_FL0FBMIN(chip_id(sc) <= CHELSIO_T5 ? 3607 X_FETCHBURSTMIN_128B : X_FETCHBURSTMIN_64B_T6) | 3608 V_FW_IQ_CMD_FL0FBMAX(chip_id(sc) <= CHELSIO_T5 ? 3609 X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B)); 3610 c.fl0size = htobe16(fl->qsize); 3611 c.fl0addr = htobe64(fl->ba); 3612 } 3613 3614 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 3615 if (rc != 0) { 3616 CH_ERR(sc, "failed to create hw ingress queue: %d\n", rc); 3617 return (rc); 3618 } 3619 3620 iq->cidx = 0; 3621 iq->gen = F_RSPD_GEN; 3622 iq->cntxt_id = be16toh(c.iqid); 3623 iq->abs_id = be16toh(c.physiqid); 3624 3625 cntxt_id = iq->cntxt_id - sc->sge.iq_start; 3626 if (cntxt_id >= sc->sge.iqmap_sz) { 3627 panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, 3628 cntxt_id, sc->sge.iqmap_sz - 1); 3629 } 3630 sc->sge.iqmap[cntxt_id] = iq; 3631 3632 if (fl) { 3633 u_int qid; 3634 #ifdef INVARIANTS 3635 MPASS(!(fl->flags & FL_BUF_RESUME)); 3636 for (i = 0; i < fl->sidx * 8; i++) 3637 MPASS(fl->sdesc[i].cl == NULL); 3638 #endif 3639 fl->cntxt_id = be16toh(c.fl0id); 3640 fl->pidx = fl->cidx = fl->hw_cidx = fl->dbidx = 0; 3641 fl->rx_offset = 0; 3642 fl->flags &= ~(FL_STARVING | FL_DOOMED); 3643 3644 cntxt_id = fl->cntxt_id - sc->sge.eq_start; 3645 if (cntxt_id >= sc->sge.eqmap_sz) { 3646 panic("%s: fl->cntxt_id (%d) more than the max (%d)", 3647 __func__, cntxt_id, sc->sge.eqmap_sz - 1); 3648 } 3649 sc->sge.eqmap[cntxt_id] = (void *)fl; 3650 3651 qid = fl->cntxt_id; 3652 if (isset(&sc->doorbells, DOORBELL_UDB)) { 3653 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 3654 uint32_t mask = (1 << s_qpp) - 1; 3655 volatile uint8_t *udb; 3656 3657 udb = sc->udbs_base + UDBS_DB_OFFSET; 3658 udb += (qid >> s_qpp) << PAGE_SHIFT; 3659 qid &= mask; 3660 if (qid < PAGE_SIZE / UDBS_SEG_SIZE) { 3661 udb += qid << UDBS_SEG_SHIFT; 3662 qid = 0; 3663 } 3664 fl->udb = (volatile void *)udb; 3665 } 3666 fl->dbval = V_QID(qid) | sc->chip_params->sge_fl_db; 3667 3668 FL_LOCK(fl); 3669 /* Enough to make sure the SGE doesn't think it's starved */ 3670 refill_fl(sc, fl, fl->lowat); 3671 FL_UNLOCK(fl); 3672 } 3673 3674 if (chip_id(sc) >= CHELSIO_T5 && !(sc->flags & IS_VF) && iq->cong >= 0) { 3675 uint32_t param, val; 3676 3677 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 3678 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | 3679 V_FW_PARAMS_PARAM_YZ(iq->cntxt_id); 3680 if (iq->cong == 0) 3681 val = 1 << 19; 3682 else { 3683 val = 2 << 19; 3684 for (i = 0; i < 4; i++) { 3685 if (iq->cong & (1 << i)) 3686 val |= 1 << (i << 2); 3687 } 3688 } 3689 3690 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); 3691 if (rc != 0) { 3692 /* report error but carry on */ 3693 CH_ERR(sc, "failed to set congestion manager context " 3694 "for ingress queue %d: %d\n", iq->cntxt_id, rc); 3695 } 3696 } 3697 3698 /* Enable IQ interrupts */ 3699 atomic_store_rel_int(&iq->state, IQS_IDLE); 3700 t4_write_reg(sc, sc->sge_gts_reg, V_SEINTARM(iq->intr_params) | 3701 V_INGRESSQID(iq->cntxt_id)); 3702 3703 iq->flags |= IQ_HW_ALLOCATED; 3704 3705 return (0); 3706 } 3707 3708 static int 3709 free_iq_fl_hwq(struct adapter *sc, struct sge_iq *iq, struct sge_fl *fl) 3710 { 3711 int rc; 3712 3713 MPASS(iq->flags & IQ_HW_ALLOCATED); 3714 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, FW_IQ_TYPE_FL_INT_CAP, 3715 iq->cntxt_id, fl ? fl->cntxt_id : 0xffff, 0xffff); 3716 if (rc != 0) { 3717 CH_ERR(sc, "failed to free iq %p: %d\n", iq, rc); 3718 return (rc); 3719 } 3720 iq->flags &= ~IQ_HW_ALLOCATED; 3721 3722 return (0); 3723 } 3724 3725 static void 3726 add_iq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 3727 struct sge_iq *iq) 3728 { 3729 struct sysctl_oid_list *children; 3730 3731 if (ctx == NULL || oid == NULL) 3732 return; 3733 3734 children = SYSCTL_CHILDREN(oid); 3735 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &iq->ba, 3736 "bus address of descriptor ring"); 3737 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 3738 iq->qsize * IQ_ESIZE, "descriptor ring size in bytes"); 3739 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD, 3740 &iq->abs_id, 0, "absolute id of the queue"); 3741 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 3742 &iq->cntxt_id, 0, "SGE context id of the queue"); 3743 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &iq->cidx, 3744 0, "consumer index"); 3745 } 3746 3747 static void 3748 add_fl_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 3749 struct sysctl_oid *oid, struct sge_fl *fl) 3750 { 3751 struct sysctl_oid_list *children; 3752 3753 if (ctx == NULL || oid == NULL) 3754 return; 3755 3756 children = SYSCTL_CHILDREN(oid); 3757 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", 3758 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "freelist"); 3759 children = SYSCTL_CHILDREN(oid); 3760 3761 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, 3762 &fl->ba, "bus address of descriptor ring"); 3763 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 3764 fl->sidx * EQ_ESIZE + sc->params.sge.spg_len, 3765 "desc ring size in bytes"); 3766 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 3767 &fl->cntxt_id, 0, "SGE context id of the freelist"); 3768 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "padding", CTLFLAG_RD, NULL, 3769 fl_pad ? 1 : 0, "padding enabled"); 3770 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "packing", CTLFLAG_RD, NULL, 3771 fl->flags & FL_BUF_PACKING ? 1 : 0, "packing enabled"); 3772 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx, 3773 0, "consumer index"); 3774 if (fl->flags & FL_BUF_PACKING) { 3775 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_offset", 3776 CTLFLAG_RD, &fl->rx_offset, 0, "packing rx offset"); 3777 } 3778 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx, 3779 0, "producer index"); 3780 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated", 3781 CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated"); 3782 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled", 3783 CTLFLAG_RD, &fl->cl_recycled, "# of clusters recycled"); 3784 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_fast_recycled", 3785 CTLFLAG_RD, &fl->cl_fast_recycled, "# of clusters recycled (fast)"); 3786 } 3787 3788 /* 3789 * Idempotent. 3790 */ 3791 static int 3792 alloc_fwq(struct adapter *sc) 3793 { 3794 int rc, intr_idx; 3795 struct sge_iq *fwq = &sc->sge.fwq; 3796 struct vi_info *vi = &sc->port[0]->vi[0]; 3797 3798 if (!(fwq->flags & IQ_SW_ALLOCATED)) { 3799 MPASS(!(fwq->flags & IQ_HW_ALLOCATED)); 3800 3801 if (sc->flags & IS_VF) 3802 intr_idx = 0; 3803 else 3804 intr_idx = sc->intr_count > 1 ? 1 : 0; 3805 init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, intr_idx, -1); 3806 rc = alloc_iq_fl(vi, fwq, NULL, &sc->ctx, sc->fwq_oid); 3807 if (rc != 0) { 3808 CH_ERR(sc, "failed to allocate fwq: %d\n", rc); 3809 return (rc); 3810 } 3811 MPASS(fwq->flags & IQ_SW_ALLOCATED); 3812 } 3813 3814 if (!(fwq->flags & IQ_HW_ALLOCATED)) { 3815 MPASS(fwq->flags & IQ_SW_ALLOCATED); 3816 3817 rc = alloc_iq_fl_hwq(vi, fwq, NULL); 3818 if (rc != 0) { 3819 CH_ERR(sc, "failed to create hw fwq: %d\n", rc); 3820 return (rc); 3821 } 3822 MPASS(fwq->flags & IQ_HW_ALLOCATED); 3823 } 3824 3825 return (0); 3826 } 3827 3828 /* 3829 * Idempotent. 3830 */ 3831 static void 3832 free_fwq(struct adapter *sc) 3833 { 3834 struct sge_iq *fwq = &sc->sge.fwq; 3835 3836 if (fwq->flags & IQ_HW_ALLOCATED) { 3837 MPASS(fwq->flags & IQ_SW_ALLOCATED); 3838 free_iq_fl_hwq(sc, fwq, NULL); 3839 MPASS(!(fwq->flags & IQ_HW_ALLOCATED)); 3840 } 3841 3842 if (fwq->flags & IQ_SW_ALLOCATED) { 3843 MPASS(!(fwq->flags & IQ_HW_ALLOCATED)); 3844 free_iq_fl(sc, fwq, NULL); 3845 MPASS(!(fwq->flags & IQ_SW_ALLOCATED)); 3846 } 3847 } 3848 3849 /* 3850 * Idempotent. 3851 */ 3852 static int 3853 alloc_ctrlq(struct adapter *sc, int idx) 3854 { 3855 int rc; 3856 char name[16]; 3857 struct sysctl_oid *oid; 3858 struct sge_wrq *ctrlq = &sc->sge.ctrlq[idx]; 3859 3860 MPASS(idx < sc->params.nports); 3861 3862 if (!(ctrlq->eq.flags & EQ_SW_ALLOCATED)) { 3863 MPASS(!(ctrlq->eq.flags & EQ_HW_ALLOCATED)); 3864 3865 snprintf(name, sizeof(name), "%d", idx); 3866 oid = SYSCTL_ADD_NODE(&sc->ctx, SYSCTL_CHILDREN(sc->ctrlq_oid), 3867 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 3868 "ctrl queue"); 3869 3870 snprintf(name, sizeof(name), "%s ctrlq%d", 3871 device_get_nameunit(sc->dev), idx); 3872 init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, 3873 sc->port[idx]->tx_chan, &sc->sge.fwq, name); 3874 rc = alloc_wrq(sc, NULL, ctrlq, &sc->ctx, oid); 3875 if (rc != 0) { 3876 CH_ERR(sc, "failed to allocate ctrlq%d: %d\n", idx, rc); 3877 sysctl_remove_oid(oid, 1, 1); 3878 return (rc); 3879 } 3880 MPASS(ctrlq->eq.flags & EQ_SW_ALLOCATED); 3881 } 3882 3883 if (!(ctrlq->eq.flags & EQ_HW_ALLOCATED)) { 3884 MPASS(ctrlq->eq.flags & EQ_SW_ALLOCATED); 3885 3886 rc = alloc_eq_hwq(sc, NULL, &ctrlq->eq); 3887 if (rc != 0) { 3888 CH_ERR(sc, "failed to create hw ctrlq%d: %d\n", idx, rc); 3889 return (rc); 3890 } 3891 MPASS(ctrlq->eq.flags & EQ_HW_ALLOCATED); 3892 } 3893 3894 return (0); 3895 } 3896 3897 /* 3898 * Idempotent. 3899 */ 3900 static void 3901 free_ctrlq(struct adapter *sc, int idx) 3902 { 3903 struct sge_wrq *ctrlq = &sc->sge.ctrlq[idx]; 3904 3905 if (ctrlq->eq.flags & EQ_HW_ALLOCATED) { 3906 MPASS(ctrlq->eq.flags & EQ_SW_ALLOCATED); 3907 free_eq_hwq(sc, NULL, &ctrlq->eq); 3908 MPASS(!(ctrlq->eq.flags & EQ_HW_ALLOCATED)); 3909 } 3910 3911 if (ctrlq->eq.flags & EQ_SW_ALLOCATED) { 3912 MPASS(!(ctrlq->eq.flags & EQ_HW_ALLOCATED)); 3913 free_wrq(sc, ctrlq); 3914 MPASS(!(ctrlq->eq.flags & EQ_SW_ALLOCATED)); 3915 } 3916 } 3917 3918 int 3919 tnl_cong(struct port_info *pi, int drop) 3920 { 3921 3922 if (drop == -1) 3923 return (-1); 3924 else if (drop == 1) 3925 return (0); 3926 else 3927 return (pi->rx_e_chan_map); 3928 } 3929 3930 /* 3931 * Idempotent. 3932 */ 3933 static int 3934 alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int idx, int intr_idx, 3935 int maxp) 3936 { 3937 int rc; 3938 struct adapter *sc = vi->adapter; 3939 struct ifnet *ifp = vi->ifp; 3940 struct sysctl_oid *oid; 3941 char name[16]; 3942 3943 if (!(rxq->iq.flags & IQ_SW_ALLOCATED)) { 3944 MPASS(!(rxq->iq.flags & IQ_HW_ALLOCATED)); 3945 #if defined(INET) || defined(INET6) 3946 rc = tcp_lro_init_args(&rxq->lro, ifp, lro_entries, lro_mbufs); 3947 if (rc != 0) 3948 return (rc); 3949 MPASS(rxq->lro.ifp == ifp); /* also indicates LRO init'ed */ 3950 #endif 3951 rxq->ifp = ifp; 3952 3953 snprintf(name, sizeof(name), "%d", idx); 3954 oid = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(vi->rxq_oid), 3955 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 3956 "rx queue"); 3957 3958 init_iq(&rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, vi->qsize_rxq, 3959 intr_idx, tnl_cong(vi->pi, cong_drop)); 3960 #if defined(INET) || defined(INET6) 3961 if (ifp->if_capenable & IFCAP_LRO) 3962 rxq->iq.flags |= IQ_LRO_ENABLED; 3963 #endif 3964 if (ifp->if_capenable & IFCAP_HWRXTSTMP) 3965 rxq->iq.flags |= IQ_RX_TIMESTAMP; 3966 snprintf(name, sizeof(name), "%s rxq%d-fl", 3967 device_get_nameunit(vi->dev), idx); 3968 init_fl(sc, &rxq->fl, vi->qsize_rxq / 8, maxp, name); 3969 rc = alloc_iq_fl(vi, &rxq->iq, &rxq->fl, &vi->ctx, oid); 3970 if (rc != 0) { 3971 CH_ERR(vi, "failed to allocate rxq%d: %d\n", idx, rc); 3972 sysctl_remove_oid(oid, 1, 1); 3973 #if defined(INET) || defined(INET6) 3974 tcp_lro_free(&rxq->lro); 3975 rxq->lro.ifp = NULL; 3976 #endif 3977 return (rc); 3978 } 3979 MPASS(rxq->iq.flags & IQ_SW_ALLOCATED); 3980 add_rxq_sysctls(&vi->ctx, oid, rxq); 3981 } 3982 3983 if (!(rxq->iq.flags & IQ_HW_ALLOCATED)) { 3984 MPASS(rxq->iq.flags & IQ_SW_ALLOCATED); 3985 rc = alloc_iq_fl_hwq(vi, &rxq->iq, &rxq->fl); 3986 if (rc != 0) { 3987 CH_ERR(vi, "failed to create hw rxq%d: %d\n", idx, rc); 3988 return (rc); 3989 } 3990 MPASS(rxq->iq.flags & IQ_HW_ALLOCATED); 3991 3992 if (idx == 0) 3993 sc->sge.iq_base = rxq->iq.abs_id - rxq->iq.cntxt_id; 3994 else 3995 KASSERT(rxq->iq.cntxt_id + sc->sge.iq_base == rxq->iq.abs_id, 3996 ("iq_base mismatch")); 3997 KASSERT(sc->sge.iq_base == 0 || sc->flags & IS_VF, 3998 ("PF with non-zero iq_base")); 3999 4000 /* 4001 * The freelist is just barely above the starvation threshold 4002 * right now, fill it up a bit more. 4003 */ 4004 FL_LOCK(&rxq->fl); 4005 refill_fl(sc, &rxq->fl, 128); 4006 FL_UNLOCK(&rxq->fl); 4007 } 4008 4009 return (0); 4010 } 4011 4012 /* 4013 * Idempotent. 4014 */ 4015 static void 4016 free_rxq(struct vi_info *vi, struct sge_rxq *rxq) 4017 { 4018 if (rxq->iq.flags & IQ_HW_ALLOCATED) { 4019 MPASS(rxq->iq.flags & IQ_SW_ALLOCATED); 4020 free_iq_fl_hwq(vi->adapter, &rxq->iq, &rxq->fl); 4021 MPASS(!(rxq->iq.flags & IQ_HW_ALLOCATED)); 4022 } 4023 4024 if (rxq->iq.flags & IQ_SW_ALLOCATED) { 4025 MPASS(!(rxq->iq.flags & IQ_HW_ALLOCATED)); 4026 #if defined(INET) || defined(INET6) 4027 tcp_lro_free(&rxq->lro); 4028 #endif 4029 free_iq_fl(vi->adapter, &rxq->iq, &rxq->fl); 4030 MPASS(!(rxq->iq.flags & IQ_SW_ALLOCATED)); 4031 bzero(rxq, sizeof(*rxq)); 4032 } 4033 } 4034 4035 static void 4036 add_rxq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 4037 struct sge_rxq *rxq) 4038 { 4039 struct sysctl_oid_list *children; 4040 4041 if (ctx == NULL || oid == NULL) 4042 return; 4043 4044 children = SYSCTL_CHILDREN(oid); 4045 #if defined(INET) || defined(INET6) 4046 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, 4047 &rxq->lro.lro_queued, 0, NULL); 4048 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, 4049 &rxq->lro.lro_flushed, 0, NULL); 4050 #endif 4051 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, 4052 &rxq->rxcsum, "# of times hardware assisted with checksum"); 4053 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vlan_extraction", CTLFLAG_RD, 4054 &rxq->vlan_extraction, "# of times hardware extracted 802.1Q tag"); 4055 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vxlan_rxcsum", CTLFLAG_RD, 4056 &rxq->vxlan_rxcsum, 4057 "# of times hardware assisted with inner checksum (VXLAN)"); 4058 } 4059 4060 #ifdef TCP_OFFLOAD 4061 /* 4062 * Idempotent. 4063 */ 4064 static int 4065 alloc_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq, int idx, 4066 int intr_idx, int maxp) 4067 { 4068 int rc; 4069 struct adapter *sc = vi->adapter; 4070 struct sysctl_oid *oid; 4071 char name[16]; 4072 4073 if (!(ofld_rxq->iq.flags & IQ_SW_ALLOCATED)) { 4074 MPASS(!(ofld_rxq->iq.flags & IQ_HW_ALLOCATED)); 4075 4076 snprintf(name, sizeof(name), "%d", idx); 4077 oid = SYSCTL_ADD_NODE(&vi->ctx, 4078 SYSCTL_CHILDREN(vi->ofld_rxq_oid), OID_AUTO, name, 4079 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "offload rx queue"); 4080 4081 init_iq(&ofld_rxq->iq, sc, vi->ofld_tmr_idx, vi->ofld_pktc_idx, 4082 vi->qsize_rxq, intr_idx, 0); 4083 snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", 4084 device_get_nameunit(vi->dev), idx); 4085 init_fl(sc, &ofld_rxq->fl, vi->qsize_rxq / 8, maxp, name); 4086 rc = alloc_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl, &vi->ctx, 4087 oid); 4088 if (rc != 0) { 4089 CH_ERR(vi, "failed to allocate ofld_rxq%d: %d\n", idx, 4090 rc); 4091 sysctl_remove_oid(oid, 1, 1); 4092 return (rc); 4093 } 4094 MPASS(ofld_rxq->iq.flags & IQ_SW_ALLOCATED); 4095 ofld_rxq->rx_iscsi_ddp_setup_ok = counter_u64_alloc(M_WAITOK); 4096 ofld_rxq->rx_iscsi_ddp_setup_error = 4097 counter_u64_alloc(M_WAITOK); 4098 add_ofld_rxq_sysctls(&vi->ctx, oid, ofld_rxq); 4099 } 4100 4101 if (!(ofld_rxq->iq.flags & IQ_HW_ALLOCATED)) { 4102 MPASS(ofld_rxq->iq.flags & IQ_SW_ALLOCATED); 4103 rc = alloc_iq_fl_hwq(vi, &ofld_rxq->iq, &ofld_rxq->fl); 4104 if (rc != 0) { 4105 CH_ERR(vi, "failed to create hw ofld_rxq%d: %d\n", idx, 4106 rc); 4107 return (rc); 4108 } 4109 MPASS(ofld_rxq->iq.flags & IQ_HW_ALLOCATED); 4110 } 4111 return (rc); 4112 } 4113 4114 /* 4115 * Idempotent. 4116 */ 4117 static void 4118 free_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq) 4119 { 4120 if (ofld_rxq->iq.flags & IQ_HW_ALLOCATED) { 4121 MPASS(ofld_rxq->iq.flags & IQ_SW_ALLOCATED); 4122 free_iq_fl_hwq(vi->adapter, &ofld_rxq->iq, &ofld_rxq->fl); 4123 MPASS(!(ofld_rxq->iq.flags & IQ_HW_ALLOCATED)); 4124 } 4125 4126 if (ofld_rxq->iq.flags & IQ_SW_ALLOCATED) { 4127 MPASS(!(ofld_rxq->iq.flags & IQ_HW_ALLOCATED)); 4128 free_iq_fl(vi->adapter, &ofld_rxq->iq, &ofld_rxq->fl); 4129 MPASS(!(ofld_rxq->iq.flags & IQ_SW_ALLOCATED)); 4130 counter_u64_free(ofld_rxq->rx_iscsi_ddp_setup_ok); 4131 counter_u64_free(ofld_rxq->rx_iscsi_ddp_setup_error); 4132 bzero(ofld_rxq, sizeof(*ofld_rxq)); 4133 } 4134 } 4135 4136 static void 4137 add_ofld_rxq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 4138 struct sge_ofld_rxq *ofld_rxq) 4139 { 4140 struct sysctl_oid_list *children; 4141 4142 if (ctx == NULL || oid == NULL) 4143 return; 4144 4145 children = SYSCTL_CHILDREN(oid); 4146 SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, 4147 "rx_toe_tls_records", CTLFLAG_RD, &ofld_rxq->rx_toe_tls_records, 4148 "# of TOE TLS records received"); 4149 SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, 4150 "rx_toe_tls_octets", CTLFLAG_RD, &ofld_rxq->rx_toe_tls_octets, 4151 "# of payload octets in received TOE TLS records"); 4152 4153 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "iscsi", 4154 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TOE iSCSI statistics"); 4155 children = SYSCTL_CHILDREN(oid); 4156 4157 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "ddp_setup_ok", 4158 CTLFLAG_RD, &ofld_rxq->rx_iscsi_ddp_setup_ok, 4159 "# of times DDP buffer was setup successfully."); 4160 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "ddp_setup_error", 4161 CTLFLAG_RD, &ofld_rxq->rx_iscsi_ddp_setup_error, 4162 "# of times DDP buffer setup failed."); 4163 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "ddp_octets", 4164 CTLFLAG_RD, &ofld_rxq->rx_iscsi_ddp_octets, 0, 4165 "# of octets placed directly"); 4166 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "ddp_pdus", 4167 CTLFLAG_RD, &ofld_rxq->rx_iscsi_ddp_pdus, 0, 4168 "# of PDUs with data placed directly."); 4169 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "fl_octets", 4170 CTLFLAG_RD, &ofld_rxq->rx_iscsi_fl_octets, 0, 4171 "# of data octets delivered in freelist"); 4172 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "fl_pdus", 4173 CTLFLAG_RD, &ofld_rxq->rx_iscsi_fl_pdus, 0, 4174 "# of PDUs with data delivered in freelist"); 4175 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "padding_errors", 4176 CTLFLAG_RD, &ofld_rxq->rx_iscsi_padding_errors, 0, 4177 "# of PDUs with invalid padding"); 4178 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "header_digest_errors", 4179 CTLFLAG_RD, &ofld_rxq->rx_iscsi_header_digest_errors, 0, 4180 "# of PDUs with invalid header digests"); 4181 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "data_digest_errors", 4182 CTLFLAG_RD, &ofld_rxq->rx_iscsi_data_digest_errors, 0, 4183 "# of PDUs with invalid data digests"); 4184 } 4185 #endif 4186 4187 /* 4188 * Returns a reasonable automatic cidx flush threshold for a given queue size. 4189 */ 4190 static u_int 4191 qsize_to_fthresh(int qsize) 4192 { 4193 u_int fthresh; 4194 4195 while (!powerof2(qsize)) 4196 qsize++; 4197 fthresh = ilog2(qsize); 4198 if (fthresh > X_CIDXFLUSHTHRESH_128) 4199 fthresh = X_CIDXFLUSHTHRESH_128; 4200 4201 return (fthresh); 4202 } 4203 4204 static int 4205 ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq) 4206 { 4207 int rc, cntxt_id; 4208 struct fw_eq_ctrl_cmd c; 4209 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4210 4211 bzero(&c, sizeof(c)); 4212 4213 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | 4214 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) | 4215 V_FW_EQ_CTRL_CMD_VFN(0)); 4216 c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC | 4217 F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); 4218 c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); 4219 c.physeqid_pkd = htobe32(0); 4220 c.fetchszm_to_iqid = 4221 htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 4222 V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) | 4223 F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); 4224 c.dcaen_to_eqsize = 4225 htobe32(V_FW_EQ_CTRL_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ? 4226 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) | 4227 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 4228 V_FW_EQ_CTRL_CMD_CIDXFTHRESH(qsize_to_fthresh(qsize)) | 4229 V_FW_EQ_CTRL_CMD_EQSIZE(qsize)); 4230 c.eqaddr = htobe64(eq->ba); 4231 4232 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 4233 if (rc != 0) { 4234 CH_ERR(sc, "failed to create hw ctrlq for tx_chan %d: %d\n", 4235 eq->tx_chan, rc); 4236 return (rc); 4237 } 4238 4239 eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid)); 4240 eq->abs_id = G_FW_EQ_CTRL_CMD_PHYSEQID(be32toh(c.physeqid_pkd)); 4241 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 4242 if (cntxt_id >= sc->sge.eqmap_sz) 4243 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 4244 cntxt_id, sc->sge.eqmap_sz - 1); 4245 sc->sge.eqmap[cntxt_id] = eq; 4246 4247 return (rc); 4248 } 4249 4250 static int 4251 eth_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 4252 { 4253 int rc, cntxt_id; 4254 struct fw_eq_eth_cmd c; 4255 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4256 4257 bzero(&c, sizeof(c)); 4258 4259 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | 4260 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | 4261 V_FW_EQ_ETH_CMD_VFN(0)); 4262 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | 4263 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); 4264 c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | 4265 F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid)); 4266 c.fetchszm_to_iqid = 4267 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | 4268 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | 4269 V_FW_EQ_ETH_CMD_IQID(eq->iqid)); 4270 c.dcaen_to_eqsize = 4271 htobe32(V_FW_EQ_ETH_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ? 4272 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) | 4273 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 4274 V_FW_EQ_ETH_CMD_EQSIZE(qsize)); 4275 c.eqaddr = htobe64(eq->ba); 4276 4277 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 4278 if (rc != 0) { 4279 device_printf(vi->dev, 4280 "failed to create Ethernet egress queue: %d\n", rc); 4281 return (rc); 4282 } 4283 4284 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); 4285 eq->abs_id = G_FW_EQ_ETH_CMD_PHYSEQID(be32toh(c.physeqid_pkd)); 4286 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 4287 if (cntxt_id >= sc->sge.eqmap_sz) 4288 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 4289 cntxt_id, sc->sge.eqmap_sz - 1); 4290 sc->sge.eqmap[cntxt_id] = eq; 4291 4292 return (rc); 4293 } 4294 4295 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 4296 static int 4297 ofld_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 4298 { 4299 int rc, cntxt_id; 4300 struct fw_eq_ofld_cmd c; 4301 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4302 4303 bzero(&c, sizeof(c)); 4304 4305 c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST | 4306 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) | 4307 V_FW_EQ_OFLD_CMD_VFN(0)); 4308 c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC | 4309 F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c)); 4310 c.fetchszm_to_iqid = 4311 htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 4312 V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) | 4313 F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid)); 4314 c.dcaen_to_eqsize = 4315 htobe32(V_FW_EQ_OFLD_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ? 4316 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) | 4317 V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 4318 V_FW_EQ_OFLD_CMD_CIDXFTHRESH(qsize_to_fthresh(qsize)) | 4319 V_FW_EQ_OFLD_CMD_EQSIZE(qsize)); 4320 c.eqaddr = htobe64(eq->ba); 4321 4322 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 4323 if (rc != 0) { 4324 device_printf(vi->dev, 4325 "failed to create egress queue for TCP offload: %d\n", rc); 4326 return (rc); 4327 } 4328 4329 eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd)); 4330 eq->abs_id = G_FW_EQ_OFLD_CMD_PHYSEQID(be32toh(c.physeqid_pkd)); 4331 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 4332 if (cntxt_id >= sc->sge.eqmap_sz) 4333 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 4334 cntxt_id, sc->sge.eqmap_sz - 1); 4335 sc->sge.eqmap[cntxt_id] = eq; 4336 4337 return (rc); 4338 } 4339 #endif 4340 4341 /* SW only */ 4342 static int 4343 alloc_eq(struct adapter *sc, struct sge_eq *eq, struct sysctl_ctx_list *ctx, 4344 struct sysctl_oid *oid) 4345 { 4346 int rc, qsize; 4347 size_t len; 4348 4349 MPASS(!(eq->flags & EQ_SW_ALLOCATED)); 4350 4351 qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4352 len = qsize * EQ_ESIZE; 4353 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, &eq->ba, 4354 (void **)&eq->desc); 4355 if (rc) 4356 return (rc); 4357 if (ctx != NULL && oid != NULL) 4358 add_eq_sysctls(sc, ctx, oid, eq); 4359 eq->flags |= EQ_SW_ALLOCATED; 4360 4361 return (0); 4362 } 4363 4364 /* SW only */ 4365 static void 4366 free_eq(struct adapter *sc, struct sge_eq *eq) 4367 { 4368 MPASS(eq->flags & EQ_SW_ALLOCATED); 4369 if (eq->type == EQ_ETH) 4370 MPASS(eq->pidx == eq->cidx); 4371 4372 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); 4373 mtx_destroy(&eq->eq_lock); 4374 bzero(eq, sizeof(*eq)); 4375 } 4376 4377 static void 4378 add_eq_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 4379 struct sysctl_oid *oid, struct sge_eq *eq) 4380 { 4381 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 4382 4383 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &eq->ba, 4384 "bus address of descriptor ring"); 4385 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 4386 eq->sidx * EQ_ESIZE + sc->params.sge.spg_len, 4387 "desc ring size in bytes"); 4388 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD, 4389 &eq->abs_id, 0, "absolute id of the queue"); 4390 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 4391 &eq->cntxt_id, 0, "SGE context id of the queue"); 4392 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &eq->cidx, 4393 0, "consumer index"); 4394 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &eq->pidx, 4395 0, "producer index"); 4396 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL, 4397 eq->sidx, "status page index"); 4398 } 4399 4400 static int 4401 alloc_eq_hwq(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 4402 { 4403 int rc; 4404 4405 MPASS(!(eq->flags & EQ_HW_ALLOCATED)); 4406 4407 eq->iqid = eq->iq->cntxt_id; 4408 eq->pidx = eq->cidx = eq->dbidx = 0; 4409 /* Note that equeqidx is not used with sge_wrq (OFLD/CTRL) queues. */ 4410 eq->equeqidx = 0; 4411 eq->doorbells = sc->doorbells; 4412 bzero(eq->desc, eq->sidx * EQ_ESIZE + sc->params.sge.spg_len); 4413 4414 switch (eq->type) { 4415 case EQ_CTRL: 4416 rc = ctrl_eq_alloc(sc, eq); 4417 break; 4418 4419 case EQ_ETH: 4420 rc = eth_eq_alloc(sc, vi, eq); 4421 break; 4422 4423 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 4424 case EQ_OFLD: 4425 rc = ofld_eq_alloc(sc, vi, eq); 4426 break; 4427 #endif 4428 4429 default: 4430 panic("%s: invalid eq type %d.", __func__, eq->type); 4431 } 4432 if (rc != 0) { 4433 CH_ERR(sc, "failed to allocate egress queue(%d): %d\n", 4434 eq->type, rc); 4435 return (rc); 4436 } 4437 4438 if (isset(&eq->doorbells, DOORBELL_UDB) || 4439 isset(&eq->doorbells, DOORBELL_UDBWC) || 4440 isset(&eq->doorbells, DOORBELL_WCWR)) { 4441 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 4442 uint32_t mask = (1 << s_qpp) - 1; 4443 volatile uint8_t *udb; 4444 4445 udb = sc->udbs_base + UDBS_DB_OFFSET; 4446 udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT; /* pg offset */ 4447 eq->udb_qid = eq->cntxt_id & mask; /* id in page */ 4448 if (eq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE) 4449 clrbit(&eq->doorbells, DOORBELL_WCWR); 4450 else { 4451 udb += eq->udb_qid << UDBS_SEG_SHIFT; /* seg offset */ 4452 eq->udb_qid = 0; 4453 } 4454 eq->udb = (volatile void *)udb; 4455 } 4456 4457 eq->flags |= EQ_HW_ALLOCATED; 4458 return (0); 4459 } 4460 4461 static int 4462 free_eq_hwq(struct adapter *sc, struct vi_info *vi __unused, struct sge_eq *eq) 4463 { 4464 int rc; 4465 4466 MPASS(eq->flags & EQ_HW_ALLOCATED); 4467 4468 switch (eq->type) { 4469 case EQ_CTRL: 4470 rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); 4471 break; 4472 case EQ_ETH: 4473 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); 4474 break; 4475 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 4476 case EQ_OFLD: 4477 rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); 4478 break; 4479 #endif 4480 default: 4481 panic("%s: invalid eq type %d.", __func__, eq->type); 4482 } 4483 if (rc != 0) { 4484 CH_ERR(sc, "failed to free eq (type %d): %d\n", eq->type, rc); 4485 return (rc); 4486 } 4487 eq->flags &= ~EQ_HW_ALLOCATED; 4488 4489 return (0); 4490 } 4491 4492 static int 4493 alloc_wrq(struct adapter *sc, struct vi_info *vi, struct sge_wrq *wrq, 4494 struct sysctl_ctx_list *ctx, struct sysctl_oid *oid) 4495 { 4496 struct sge_eq *eq = &wrq->eq; 4497 int rc; 4498 4499 MPASS(!(eq->flags & EQ_SW_ALLOCATED)); 4500 4501 rc = alloc_eq(sc, eq, ctx, oid); 4502 if (rc) 4503 return (rc); 4504 MPASS(eq->flags & EQ_SW_ALLOCATED); 4505 /* Can't fail after this. */ 4506 4507 wrq->adapter = sc; 4508 TASK_INIT(&wrq->wrq_tx_task, 0, wrq_tx_drain, wrq); 4509 TAILQ_INIT(&wrq->incomplete_wrs); 4510 STAILQ_INIT(&wrq->wr_list); 4511 wrq->nwr_pending = 0; 4512 wrq->ndesc_needed = 0; 4513 add_wrq_sysctls(ctx, oid, wrq); 4514 4515 return (0); 4516 } 4517 4518 static void 4519 free_wrq(struct adapter *sc, struct sge_wrq *wrq) 4520 { 4521 free_eq(sc, &wrq->eq); 4522 MPASS(wrq->nwr_pending == 0); 4523 MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs)); 4524 MPASS(STAILQ_EMPTY(&wrq->wr_list)); 4525 bzero(wrq, sizeof(*wrq)); 4526 } 4527 4528 static void 4529 add_wrq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 4530 struct sge_wrq *wrq) 4531 { 4532 struct sysctl_oid_list *children; 4533 4534 if (ctx == NULL || oid == NULL) 4535 return; 4536 4537 children = SYSCTL_CHILDREN(oid); 4538 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_direct", CTLFLAG_RD, 4539 &wrq->tx_wrs_direct, "# of work requests (direct)"); 4540 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_copied", CTLFLAG_RD, 4541 &wrq->tx_wrs_copied, "# of work requests (copied)"); 4542 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_sspace", CTLFLAG_RD, 4543 &wrq->tx_wrs_ss, "# of work requests (copied from scratch space)"); 4544 } 4545 4546 /* 4547 * Idempotent. 4548 */ 4549 static int 4550 alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx) 4551 { 4552 int rc, iqidx; 4553 struct port_info *pi = vi->pi; 4554 struct adapter *sc = vi->adapter; 4555 struct sge_eq *eq = &txq->eq; 4556 struct txpkts *txp; 4557 char name[16]; 4558 struct sysctl_oid *oid; 4559 4560 if (!(eq->flags & EQ_SW_ALLOCATED)) { 4561 MPASS(!(eq->flags & EQ_HW_ALLOCATED)); 4562 4563 snprintf(name, sizeof(name), "%d", idx); 4564 oid = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(vi->txq_oid), 4565 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 4566 "tx queue"); 4567 4568 iqidx = vi->first_rxq + (idx % vi->nrxq); 4569 snprintf(name, sizeof(name), "%s txq%d", 4570 device_get_nameunit(vi->dev), idx); 4571 init_eq(sc, &txq->eq, EQ_ETH, vi->qsize_txq, pi->tx_chan, 4572 &sc->sge.rxq[iqidx].iq, name); 4573 4574 rc = mp_ring_alloc(&txq->r, eq->sidx, txq, eth_tx, 4575 can_resume_eth_tx, M_CXGBE, &eq->eq_lock, M_WAITOK); 4576 if (rc != 0) { 4577 CH_ERR(vi, "failed to allocate mp_ring for txq%d: %d\n", 4578 idx, rc); 4579 failed: 4580 sysctl_remove_oid(oid, 1, 1); 4581 return (rc); 4582 } 4583 4584 rc = alloc_eq(sc, eq, &vi->ctx, oid); 4585 if (rc) { 4586 CH_ERR(vi, "failed to allocate txq%d: %d\n", idx, rc); 4587 mp_ring_free(txq->r); 4588 goto failed; 4589 } 4590 MPASS(eq->flags & EQ_SW_ALLOCATED); 4591 /* Can't fail after this point. */ 4592 4593 TASK_INIT(&txq->tx_reclaim_task, 0, tx_reclaim, eq); 4594 txq->ifp = vi->ifp; 4595 txq->gl = sglist_alloc(TX_SGL_SEGS, M_WAITOK); 4596 txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE, 4597 M_ZERO | M_WAITOK); 4598 4599 add_txq_sysctls(vi, &vi->ctx, oid, txq); 4600 } 4601 4602 if (!(eq->flags & EQ_HW_ALLOCATED)) { 4603 MPASS(eq->flags & EQ_SW_ALLOCATED); 4604 rc = alloc_eq_hwq(sc, vi, eq); 4605 if (rc != 0) { 4606 CH_ERR(vi, "failed to create hw txq%d: %d\n", idx, rc); 4607 return (rc); 4608 } 4609 MPASS(eq->flags & EQ_HW_ALLOCATED); 4610 /* Can't fail after this point. */ 4611 4612 if (idx == 0) 4613 sc->sge.eq_base = eq->abs_id - eq->cntxt_id; 4614 else 4615 KASSERT(eq->cntxt_id + sc->sge.eq_base == eq->abs_id, 4616 ("eq_base mismatch")); 4617 KASSERT(sc->sge.eq_base == 0 || sc->flags & IS_VF, 4618 ("PF with non-zero eq_base")); 4619 4620 txp = &txq->txp; 4621 MPASS(nitems(txp->mb) >= sc->params.max_pkts_per_eth_tx_pkts_wr); 4622 txq->txp.max_npkt = min(nitems(txp->mb), 4623 sc->params.max_pkts_per_eth_tx_pkts_wr); 4624 if (vi->flags & TX_USES_VM_WR && !(sc->flags & IS_VF)) 4625 txq->txp.max_npkt--; 4626 4627 if (vi->flags & TX_USES_VM_WR) 4628 txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 4629 V_TXPKT_INTF(pi->tx_chan)); 4630 else 4631 txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 4632 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) | 4633 V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld)); 4634 4635 txq->tc_idx = -1; 4636 } 4637 4638 return (0); 4639 } 4640 4641 /* 4642 * Idempotent. 4643 */ 4644 static void 4645 free_txq(struct vi_info *vi, struct sge_txq *txq) 4646 { 4647 struct adapter *sc = vi->adapter; 4648 struct sge_eq *eq = &txq->eq; 4649 4650 if (eq->flags & EQ_HW_ALLOCATED) { 4651 MPASS(eq->flags & EQ_SW_ALLOCATED); 4652 free_eq_hwq(sc, NULL, eq); 4653 MPASS(!(eq->flags & EQ_HW_ALLOCATED)); 4654 } 4655 4656 if (eq->flags & EQ_SW_ALLOCATED) { 4657 MPASS(!(eq->flags & EQ_HW_ALLOCATED)); 4658 sglist_free(txq->gl); 4659 free(txq->sdesc, M_CXGBE); 4660 mp_ring_free(txq->r); 4661 free_eq(sc, eq); 4662 MPASS(!(eq->flags & EQ_SW_ALLOCATED)); 4663 bzero(txq, sizeof(*txq)); 4664 } 4665 } 4666 4667 static void 4668 add_txq_sysctls(struct vi_info *vi, struct sysctl_ctx_list *ctx, 4669 struct sysctl_oid *oid, struct sge_txq *txq) 4670 { 4671 struct adapter *sc; 4672 struct sysctl_oid_list *children; 4673 4674 if (ctx == NULL || oid == NULL) 4675 return; 4676 4677 sc = vi->adapter; 4678 children = SYSCTL_CHILDREN(oid); 4679 4680 mp_ring_sysctls(txq->r, ctx, children); 4681 4682 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tc", 4683 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, txq - sc->sge.txq, 4684 sysctl_tc, "I", "traffic class (-1 means none)"); 4685 4686 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, 4687 &txq->txcsum, "# of times hardware assisted with checksum"); 4688 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vlan_insertion", CTLFLAG_RD, 4689 &txq->vlan_insertion, "# of times hardware inserted 802.1Q tag"); 4690 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, 4691 &txq->tso_wrs, "# of TSO work requests"); 4692 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, 4693 &txq->imm_wrs, "# of work requests with immediate data"); 4694 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, 4695 &txq->sgl_wrs, "# of work requests with direct SGL"); 4696 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, 4697 &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); 4698 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts0_wrs", CTLFLAG_RD, 4699 &txq->txpkts0_wrs, "# of txpkts (type 0) work requests"); 4700 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts1_wrs", CTLFLAG_RD, 4701 &txq->txpkts1_wrs, "# of txpkts (type 1) work requests"); 4702 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts0_pkts", CTLFLAG_RD, 4703 &txq->txpkts0_pkts, 4704 "# of frames tx'd using type0 txpkts work requests"); 4705 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts1_pkts", CTLFLAG_RD, 4706 &txq->txpkts1_pkts, 4707 "# of frames tx'd using type1 txpkts work requests"); 4708 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts_flush", CTLFLAG_RD, 4709 &txq->txpkts_flush, 4710 "# of times txpkts had to be flushed out by an egress-update"); 4711 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "raw_wrs", CTLFLAG_RD, 4712 &txq->raw_wrs, "# of raw work requests (non-packets)"); 4713 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vxlan_tso_wrs", CTLFLAG_RD, 4714 &txq->vxlan_tso_wrs, "# of VXLAN TSO work requests"); 4715 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vxlan_txcsum", CTLFLAG_RD, 4716 &txq->vxlan_txcsum, 4717 "# of times hardware assisted with inner checksums (VXLAN)"); 4718 4719 #ifdef KERN_TLS 4720 if (is_ktls(sc)) { 4721 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_records", 4722 CTLFLAG_RD, &txq->kern_tls_records, 4723 "# of NIC TLS records transmitted"); 4724 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_short", 4725 CTLFLAG_RD, &txq->kern_tls_short, 4726 "# of short NIC TLS records transmitted"); 4727 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_partial", 4728 CTLFLAG_RD, &txq->kern_tls_partial, 4729 "# of partial NIC TLS records transmitted"); 4730 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_full", 4731 CTLFLAG_RD, &txq->kern_tls_full, 4732 "# of full NIC TLS records transmitted"); 4733 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_octets", 4734 CTLFLAG_RD, &txq->kern_tls_octets, 4735 "# of payload octets in transmitted NIC TLS records"); 4736 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_waste", 4737 CTLFLAG_RD, &txq->kern_tls_waste, 4738 "# of octets DMAd but not transmitted in NIC TLS records"); 4739 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_options", 4740 CTLFLAG_RD, &txq->kern_tls_options, 4741 "# of NIC TLS options-only packets transmitted"); 4742 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_header", 4743 CTLFLAG_RD, &txq->kern_tls_header, 4744 "# of NIC TLS header-only packets transmitted"); 4745 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_fin", 4746 CTLFLAG_RD, &txq->kern_tls_fin, 4747 "# of NIC TLS FIN-only packets transmitted"); 4748 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_fin_short", 4749 CTLFLAG_RD, &txq->kern_tls_fin_short, 4750 "# of NIC TLS padded FIN packets on short TLS records"); 4751 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_cbc", 4752 CTLFLAG_RD, &txq->kern_tls_cbc, 4753 "# of NIC TLS sessions using AES-CBC"); 4754 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_gcm", 4755 CTLFLAG_RD, &txq->kern_tls_gcm, 4756 "# of NIC TLS sessions using AES-GCM"); 4757 } 4758 #endif 4759 } 4760 4761 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 4762 /* 4763 * Idempotent. 4764 */ 4765 static int 4766 alloc_ofld_txq(struct vi_info *vi, struct sge_ofld_txq *ofld_txq, int idx) 4767 { 4768 struct sysctl_oid *oid; 4769 struct port_info *pi = vi->pi; 4770 struct adapter *sc = vi->adapter; 4771 struct sge_eq *eq = &ofld_txq->wrq.eq; 4772 int rc, iqidx; 4773 char name[16]; 4774 4775 MPASS(idx >= 0); 4776 MPASS(idx < vi->nofldtxq); 4777 4778 if (!(eq->flags & EQ_SW_ALLOCATED)) { 4779 snprintf(name, sizeof(name), "%d", idx); 4780 oid = SYSCTL_ADD_NODE(&vi->ctx, 4781 SYSCTL_CHILDREN(vi->ofld_txq_oid), OID_AUTO, name, 4782 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "offload tx queue"); 4783 4784 snprintf(name, sizeof(name), "%s ofld_txq%d", 4785 device_get_nameunit(vi->dev), idx); 4786 if (vi->nofldrxq > 0) { 4787 iqidx = vi->first_ofld_rxq + (idx % vi->nofldrxq); 4788 init_eq(sc, eq, EQ_OFLD, vi->qsize_txq, pi->tx_chan, 4789 &sc->sge.ofld_rxq[iqidx].iq, name); 4790 } else { 4791 iqidx = vi->first_rxq + (idx % vi->nrxq); 4792 init_eq(sc, eq, EQ_OFLD, vi->qsize_txq, pi->tx_chan, 4793 &sc->sge.rxq[iqidx].iq, name); 4794 } 4795 4796 rc = alloc_wrq(sc, vi, &ofld_txq->wrq, &vi->ctx, oid); 4797 if (rc != 0) { 4798 CH_ERR(vi, "failed to allocate ofld_txq%d: %d\n", idx, 4799 rc); 4800 sysctl_remove_oid(oid, 1, 1); 4801 return (rc); 4802 } 4803 MPASS(eq->flags & EQ_SW_ALLOCATED); 4804 /* Can't fail after this point. */ 4805 4806 ofld_txq->tx_iscsi_pdus = counter_u64_alloc(M_WAITOK); 4807 ofld_txq->tx_iscsi_octets = counter_u64_alloc(M_WAITOK); 4808 ofld_txq->tx_iscsi_iso_wrs = counter_u64_alloc(M_WAITOK); 4809 ofld_txq->tx_toe_tls_records = counter_u64_alloc(M_WAITOK); 4810 ofld_txq->tx_toe_tls_octets = counter_u64_alloc(M_WAITOK); 4811 add_ofld_txq_sysctls(&vi->ctx, oid, ofld_txq); 4812 } 4813 4814 if (!(eq->flags & EQ_HW_ALLOCATED)) { 4815 rc = alloc_eq_hwq(sc, vi, eq); 4816 if (rc != 0) { 4817 CH_ERR(vi, "failed to create hw ofld_txq%d: %d\n", idx, 4818 rc); 4819 return (rc); 4820 } 4821 MPASS(eq->flags & EQ_HW_ALLOCATED); 4822 } 4823 4824 return (0); 4825 } 4826 4827 /* 4828 * Idempotent. 4829 */ 4830 static void 4831 free_ofld_txq(struct vi_info *vi, struct sge_ofld_txq *ofld_txq) 4832 { 4833 struct adapter *sc = vi->adapter; 4834 struct sge_eq *eq = &ofld_txq->wrq.eq; 4835 4836 if (eq->flags & EQ_HW_ALLOCATED) { 4837 MPASS(eq->flags & EQ_SW_ALLOCATED); 4838 free_eq_hwq(sc, NULL, eq); 4839 MPASS(!(eq->flags & EQ_HW_ALLOCATED)); 4840 } 4841 4842 if (eq->flags & EQ_SW_ALLOCATED) { 4843 MPASS(!(eq->flags & EQ_HW_ALLOCATED)); 4844 counter_u64_free(ofld_txq->tx_iscsi_pdus); 4845 counter_u64_free(ofld_txq->tx_iscsi_octets); 4846 counter_u64_free(ofld_txq->tx_iscsi_iso_wrs); 4847 counter_u64_free(ofld_txq->tx_toe_tls_records); 4848 counter_u64_free(ofld_txq->tx_toe_tls_octets); 4849 free_wrq(sc, &ofld_txq->wrq); 4850 MPASS(!(eq->flags & EQ_SW_ALLOCATED)); 4851 bzero(ofld_txq, sizeof(*ofld_txq)); 4852 } 4853 } 4854 4855 static void 4856 add_ofld_txq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 4857 struct sge_ofld_txq *ofld_txq) 4858 { 4859 struct sysctl_oid_list *children; 4860 4861 if (ctx == NULL || oid == NULL) 4862 return; 4863 4864 children = SYSCTL_CHILDREN(oid); 4865 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_iscsi_pdus", 4866 CTLFLAG_RD, &ofld_txq->tx_iscsi_pdus, 4867 "# of iSCSI PDUs transmitted"); 4868 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_iscsi_octets", 4869 CTLFLAG_RD, &ofld_txq->tx_iscsi_octets, 4870 "# of payload octets in transmitted iSCSI PDUs"); 4871 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_iscsi_iso_wrs", 4872 CTLFLAG_RD, &ofld_txq->tx_iscsi_iso_wrs, 4873 "# of iSCSI segmentation offload work requests"); 4874 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_toe_tls_records", 4875 CTLFLAG_RD, &ofld_txq->tx_toe_tls_records, 4876 "# of TOE TLS records transmitted"); 4877 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_toe_tls_octets", 4878 CTLFLAG_RD, &ofld_txq->tx_toe_tls_octets, 4879 "# of payload octets in transmitted TOE TLS records"); 4880 } 4881 #endif 4882 4883 static void 4884 oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) 4885 { 4886 bus_addr_t *ba = arg; 4887 4888 KASSERT(nseg == 1, 4889 ("%s meant for single segment mappings only.", __func__)); 4890 4891 *ba = error ? 0 : segs->ds_addr; 4892 } 4893 4894 static inline void 4895 ring_fl_db(struct adapter *sc, struct sge_fl *fl) 4896 { 4897 uint32_t n, v; 4898 4899 n = IDXDIFF(fl->pidx >> 3, fl->dbidx, fl->sidx); 4900 MPASS(n > 0); 4901 4902 wmb(); 4903 v = fl->dbval | V_PIDX(n); 4904 if (fl->udb) 4905 *fl->udb = htole32(v); 4906 else 4907 t4_write_reg(sc, sc->sge_kdoorbell_reg, v); 4908 IDXINCR(fl->dbidx, n, fl->sidx); 4909 } 4910 4911 /* 4912 * Fills up the freelist by allocating up to 'n' buffers. Buffers that are 4913 * recycled do not count towards this allocation budget. 4914 * 4915 * Returns non-zero to indicate that this freelist should be added to the list 4916 * of starving freelists. 4917 */ 4918 static int 4919 refill_fl(struct adapter *sc, struct sge_fl *fl, int n) 4920 { 4921 __be64 *d; 4922 struct fl_sdesc *sd; 4923 uintptr_t pa; 4924 caddr_t cl; 4925 struct rx_buf_info *rxb; 4926 struct cluster_metadata *clm; 4927 uint16_t max_pidx, zidx = fl->zidx; 4928 uint16_t hw_cidx = fl->hw_cidx; /* stable snapshot */ 4929 4930 FL_LOCK_ASSERT_OWNED(fl); 4931 4932 /* 4933 * We always stop at the beginning of the hardware descriptor that's just 4934 * before the one with the hw cidx. This is to avoid hw pidx = hw cidx, 4935 * which would mean an empty freelist to the chip. 4936 */ 4937 max_pidx = __predict_false(hw_cidx == 0) ? fl->sidx - 1 : hw_cidx - 1; 4938 if (fl->pidx == max_pidx * 8) 4939 return (0); 4940 4941 d = &fl->desc[fl->pidx]; 4942 sd = &fl->sdesc[fl->pidx]; 4943 rxb = &sc->sge.rx_buf_info[zidx]; 4944 4945 while (n > 0) { 4946 4947 if (sd->cl != NULL) { 4948 4949 if (sd->nmbuf == 0) { 4950 /* 4951 * Fast recycle without involving any atomics on 4952 * the cluster's metadata (if the cluster has 4953 * metadata). This happens when all frames 4954 * received in the cluster were small enough to 4955 * fit within a single mbuf each. 4956 */ 4957 fl->cl_fast_recycled++; 4958 goto recycled; 4959 } 4960 4961 /* 4962 * Cluster is guaranteed to have metadata. Clusters 4963 * without metadata always take the fast recycle path 4964 * when they're recycled. 4965 */ 4966 clm = cl_metadata(sd); 4967 MPASS(clm != NULL); 4968 4969 if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { 4970 fl->cl_recycled++; 4971 counter_u64_add(extfree_rels, 1); 4972 goto recycled; 4973 } 4974 sd->cl = NULL; /* gave up my reference */ 4975 } 4976 MPASS(sd->cl == NULL); 4977 cl = uma_zalloc(rxb->zone, M_NOWAIT); 4978 if (__predict_false(cl == NULL)) { 4979 if (zidx != fl->safe_zidx) { 4980 zidx = fl->safe_zidx; 4981 rxb = &sc->sge.rx_buf_info[zidx]; 4982 cl = uma_zalloc(rxb->zone, M_NOWAIT); 4983 } 4984 if (cl == NULL) 4985 break; 4986 } 4987 fl->cl_allocated++; 4988 n--; 4989 4990 pa = pmap_kextract((vm_offset_t)cl); 4991 sd->cl = cl; 4992 sd->zidx = zidx; 4993 4994 if (fl->flags & FL_BUF_PACKING) { 4995 *d = htobe64(pa | rxb->hwidx2); 4996 sd->moff = rxb->size2; 4997 } else { 4998 *d = htobe64(pa | rxb->hwidx1); 4999 sd->moff = 0; 5000 } 5001 recycled: 5002 sd->nmbuf = 0; 5003 d++; 5004 sd++; 5005 if (__predict_false((++fl->pidx & 7) == 0)) { 5006 uint16_t pidx = fl->pidx >> 3; 5007 5008 if (__predict_false(pidx == fl->sidx)) { 5009 fl->pidx = 0; 5010 pidx = 0; 5011 sd = fl->sdesc; 5012 d = fl->desc; 5013 } 5014 if (n < 8 || pidx == max_pidx) 5015 break; 5016 5017 if (IDXDIFF(pidx, fl->dbidx, fl->sidx) >= 4) 5018 ring_fl_db(sc, fl); 5019 } 5020 } 5021 5022 if ((fl->pidx >> 3) != fl->dbidx) 5023 ring_fl_db(sc, fl); 5024 5025 return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING)); 5026 } 5027 5028 /* 5029 * Attempt to refill all starving freelists. 5030 */ 5031 static void 5032 refill_sfl(void *arg) 5033 { 5034 struct adapter *sc = arg; 5035 struct sge_fl *fl, *fl_temp; 5036 5037 mtx_assert(&sc->sfl_lock, MA_OWNED); 5038 TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) { 5039 FL_LOCK(fl); 5040 refill_fl(sc, fl, 64); 5041 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) { 5042 TAILQ_REMOVE(&sc->sfl, fl, link); 5043 fl->flags &= ~FL_STARVING; 5044 } 5045 FL_UNLOCK(fl); 5046 } 5047 5048 if (!TAILQ_EMPTY(&sc->sfl)) 5049 callout_schedule(&sc->sfl_callout, hz / 5); 5050 } 5051 5052 /* 5053 * Release the driver's reference on all buffers in the given freelist. Buffers 5054 * with kernel references cannot be freed and will prevent the driver from being 5055 * unloaded safely. 5056 */ 5057 void 5058 free_fl_buffers(struct adapter *sc, struct sge_fl *fl) 5059 { 5060 struct fl_sdesc *sd; 5061 struct cluster_metadata *clm; 5062 int i; 5063 5064 sd = fl->sdesc; 5065 for (i = 0; i < fl->sidx * 8; i++, sd++) { 5066 if (sd->cl == NULL) 5067 continue; 5068 5069 if (sd->nmbuf == 0) 5070 uma_zfree(sc->sge.rx_buf_info[sd->zidx].zone, sd->cl); 5071 else if (fl->flags & FL_BUF_PACKING) { 5072 clm = cl_metadata(sd); 5073 if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { 5074 uma_zfree(sc->sge.rx_buf_info[sd->zidx].zone, 5075 sd->cl); 5076 counter_u64_add(extfree_rels, 1); 5077 } 5078 } 5079 sd->cl = NULL; 5080 } 5081 5082 if (fl->flags & FL_BUF_RESUME) { 5083 m_freem(fl->m0); 5084 fl->flags &= ~FL_BUF_RESUME; 5085 } 5086 } 5087 5088 static inline void 5089 get_pkt_gl(struct mbuf *m, struct sglist *gl) 5090 { 5091 int rc; 5092 5093 M_ASSERTPKTHDR(m); 5094 5095 sglist_reset(gl); 5096 rc = sglist_append_mbuf(gl, m); 5097 if (__predict_false(rc != 0)) { 5098 panic("%s: mbuf %p (%d segs) was vetted earlier but now fails " 5099 "with %d.", __func__, m, mbuf_nsegs(m), rc); 5100 } 5101 5102 KASSERT(gl->sg_nseg == mbuf_nsegs(m), 5103 ("%s: nsegs changed for mbuf %p from %d to %d", __func__, m, 5104 mbuf_nsegs(m), gl->sg_nseg)); 5105 #if 0 /* vm_wr not readily available here. */ 5106 KASSERT(gl->sg_nseg > 0 && gl->sg_nseg <= max_nsegs_allowed(m, vm_wr), 5107 ("%s: %d segments, should have been 1 <= nsegs <= %d", __func__, 5108 gl->sg_nseg, max_nsegs_allowed(m, vm_wr))); 5109 #endif 5110 } 5111 5112 /* 5113 * len16 for a txpkt WR with a GL. Includes the firmware work request header. 5114 */ 5115 static inline u_int 5116 txpkt_len16(u_int nsegs, const u_int extra) 5117 { 5118 u_int n; 5119 5120 MPASS(nsegs > 0); 5121 5122 nsegs--; /* first segment is part of ulptx_sgl */ 5123 n = extra + sizeof(struct fw_eth_tx_pkt_wr) + 5124 sizeof(struct cpl_tx_pkt_core) + 5125 sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 5126 5127 return (howmany(n, 16)); 5128 } 5129 5130 /* 5131 * len16 for a txpkt_vm WR with a GL. Includes the firmware work 5132 * request header. 5133 */ 5134 static inline u_int 5135 txpkt_vm_len16(u_int nsegs, const u_int extra) 5136 { 5137 u_int n; 5138 5139 MPASS(nsegs > 0); 5140 5141 nsegs--; /* first segment is part of ulptx_sgl */ 5142 n = extra + sizeof(struct fw_eth_tx_pkt_vm_wr) + 5143 sizeof(struct cpl_tx_pkt_core) + 5144 sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 5145 5146 return (howmany(n, 16)); 5147 } 5148 5149 static inline void 5150 calculate_mbuf_len16(struct mbuf *m, bool vm_wr) 5151 { 5152 const int lso = sizeof(struct cpl_tx_pkt_lso_core); 5153 const int tnl_lso = sizeof(struct cpl_tx_tnl_lso); 5154 5155 if (vm_wr) { 5156 if (needs_tso(m)) 5157 set_mbuf_len16(m, txpkt_vm_len16(mbuf_nsegs(m), lso)); 5158 else 5159 set_mbuf_len16(m, txpkt_vm_len16(mbuf_nsegs(m), 0)); 5160 return; 5161 } 5162 5163 if (needs_tso(m)) { 5164 if (needs_vxlan_tso(m)) 5165 set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), tnl_lso)); 5166 else 5167 set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), lso)); 5168 } else 5169 set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), 0)); 5170 } 5171 5172 /* 5173 * len16 for a txpkts type 0 WR with a GL. Does not include the firmware work 5174 * request header. 5175 */ 5176 static inline u_int 5177 txpkts0_len16(u_int nsegs) 5178 { 5179 u_int n; 5180 5181 MPASS(nsegs > 0); 5182 5183 nsegs--; /* first segment is part of ulptx_sgl */ 5184 n = sizeof(struct ulp_txpkt) + sizeof(struct ulptx_idata) + 5185 sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) + 5186 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 5187 5188 return (howmany(n, 16)); 5189 } 5190 5191 /* 5192 * len16 for a txpkts type 1 WR with a GL. Does not include the firmware work 5193 * request header. 5194 */ 5195 static inline u_int 5196 txpkts1_len16(void) 5197 { 5198 u_int n; 5199 5200 n = sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl); 5201 5202 return (howmany(n, 16)); 5203 } 5204 5205 static inline u_int 5206 imm_payload(u_int ndesc) 5207 { 5208 u_int n; 5209 5210 n = ndesc * EQ_ESIZE - sizeof(struct fw_eth_tx_pkt_wr) - 5211 sizeof(struct cpl_tx_pkt_core); 5212 5213 return (n); 5214 } 5215 5216 static inline uint64_t 5217 csum_to_ctrl(struct adapter *sc, struct mbuf *m) 5218 { 5219 uint64_t ctrl; 5220 int csum_type, l2hlen, l3hlen; 5221 int x, y; 5222 static const int csum_types[3][2] = { 5223 {TX_CSUM_TCPIP, TX_CSUM_TCPIP6}, 5224 {TX_CSUM_UDPIP, TX_CSUM_UDPIP6}, 5225 {TX_CSUM_IP, 0} 5226 }; 5227 5228 M_ASSERTPKTHDR(m); 5229 5230 if (!needs_hwcsum(m)) 5231 return (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS); 5232 5233 MPASS(m->m_pkthdr.l2hlen >= ETHER_HDR_LEN); 5234 MPASS(m->m_pkthdr.l3hlen >= sizeof(struct ip)); 5235 5236 if (needs_vxlan_csum(m)) { 5237 MPASS(m->m_pkthdr.l4hlen > 0); 5238 MPASS(m->m_pkthdr.l5hlen > 0); 5239 MPASS(m->m_pkthdr.inner_l2hlen >= ETHER_HDR_LEN); 5240 MPASS(m->m_pkthdr.inner_l3hlen >= sizeof(struct ip)); 5241 5242 l2hlen = m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + 5243 m->m_pkthdr.l4hlen + m->m_pkthdr.l5hlen + 5244 m->m_pkthdr.inner_l2hlen - ETHER_HDR_LEN; 5245 l3hlen = m->m_pkthdr.inner_l3hlen; 5246 } else { 5247 l2hlen = m->m_pkthdr.l2hlen - ETHER_HDR_LEN; 5248 l3hlen = m->m_pkthdr.l3hlen; 5249 } 5250 5251 ctrl = 0; 5252 if (!needs_l3_csum(m)) 5253 ctrl |= F_TXPKT_IPCSUM_DIS; 5254 5255 if (m->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_INNER_IP_TCP | 5256 CSUM_IP6_TCP | CSUM_INNER_IP6_TCP)) 5257 x = 0; /* TCP */ 5258 else if (m->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_INNER_IP_UDP | 5259 CSUM_IP6_UDP | CSUM_INNER_IP6_UDP)) 5260 x = 1; /* UDP */ 5261 else 5262 x = 2; 5263 5264 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP | 5265 CSUM_INNER_IP | CSUM_INNER_IP_TCP | CSUM_INNER_IP_UDP)) 5266 y = 0; /* IPv4 */ 5267 else { 5268 MPASS(m->m_pkthdr.csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | 5269 CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_UDP)); 5270 y = 1; /* IPv6 */ 5271 } 5272 /* 5273 * needs_hwcsum returned true earlier so there must be some kind of 5274 * checksum to calculate. 5275 */ 5276 csum_type = csum_types[x][y]; 5277 MPASS(csum_type != 0); 5278 if (csum_type == TX_CSUM_IP) 5279 ctrl |= F_TXPKT_L4CSUM_DIS; 5280 ctrl |= V_TXPKT_CSUM_TYPE(csum_type) | V_TXPKT_IPHDR_LEN(l3hlen); 5281 if (chip_id(sc) <= CHELSIO_T5) 5282 ctrl |= V_TXPKT_ETHHDR_LEN(l2hlen); 5283 else 5284 ctrl |= V_T6_TXPKT_ETHHDR_LEN(l2hlen); 5285 5286 return (ctrl); 5287 } 5288 5289 static inline void * 5290 write_lso_cpl(void *cpl, struct mbuf *m0) 5291 { 5292 struct cpl_tx_pkt_lso_core *lso; 5293 uint32_t ctrl; 5294 5295 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 5296 m0->m_pkthdr.l4hlen > 0, 5297 ("%s: mbuf %p needs TSO but missing header lengths", 5298 __func__, m0)); 5299 5300 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | 5301 F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE | 5302 V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen - ETHER_HDR_LEN) >> 2) | 5303 V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) | 5304 V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); 5305 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 5306 ctrl |= F_LSO_IPV6; 5307 5308 lso = cpl; 5309 lso->lso_ctrl = htobe32(ctrl); 5310 lso->ipid_ofst = htobe16(0); 5311 lso->mss = htobe16(m0->m_pkthdr.tso_segsz); 5312 lso->seqno_offset = htobe32(0); 5313 lso->len = htobe32(m0->m_pkthdr.len); 5314 5315 return (lso + 1); 5316 } 5317 5318 static void * 5319 write_tnl_lso_cpl(void *cpl, struct mbuf *m0) 5320 { 5321 struct cpl_tx_tnl_lso *tnl_lso = cpl; 5322 uint32_t ctrl; 5323 5324 KASSERT(m0->m_pkthdr.inner_l2hlen > 0 && 5325 m0->m_pkthdr.inner_l3hlen > 0 && m0->m_pkthdr.inner_l4hlen > 0 && 5326 m0->m_pkthdr.inner_l5hlen > 0, 5327 ("%s: mbuf %p needs VXLAN_TSO but missing inner header lengths", 5328 __func__, m0)); 5329 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 5330 m0->m_pkthdr.l4hlen > 0 && m0->m_pkthdr.l5hlen > 0, 5331 ("%s: mbuf %p needs VXLAN_TSO but missing outer header lengths", 5332 __func__, m0)); 5333 5334 /* Outer headers. */ 5335 ctrl = V_CPL_TX_TNL_LSO_OPCODE(CPL_TX_TNL_LSO) | 5336 F_CPL_TX_TNL_LSO_FIRST | F_CPL_TX_TNL_LSO_LAST | 5337 V_CPL_TX_TNL_LSO_ETHHDRLENOUT( 5338 (m0->m_pkthdr.l2hlen - ETHER_HDR_LEN) >> 2) | 5339 V_CPL_TX_TNL_LSO_IPHDRLENOUT(m0->m_pkthdr.l3hlen >> 2) | 5340 F_CPL_TX_TNL_LSO_IPLENSETOUT; 5341 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 5342 ctrl |= F_CPL_TX_TNL_LSO_IPV6OUT; 5343 else { 5344 ctrl |= F_CPL_TX_TNL_LSO_IPHDRCHKOUT | 5345 F_CPL_TX_TNL_LSO_IPIDINCOUT; 5346 } 5347 tnl_lso->op_to_IpIdSplitOut = htobe32(ctrl); 5348 tnl_lso->IpIdOffsetOut = 0; 5349 tnl_lso->UdpLenSetOut_to_TnlHdrLen = 5350 htobe16(F_CPL_TX_TNL_LSO_UDPCHKCLROUT | 5351 F_CPL_TX_TNL_LSO_UDPLENSETOUT | 5352 V_CPL_TX_TNL_LSO_TNLHDRLEN(m0->m_pkthdr.l2hlen + 5353 m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen + 5354 m0->m_pkthdr.l5hlen) | 5355 V_CPL_TX_TNL_LSO_TNLTYPE(TX_TNL_TYPE_VXLAN)); 5356 tnl_lso->r1 = 0; 5357 5358 /* Inner headers. */ 5359 ctrl = V_CPL_TX_TNL_LSO_ETHHDRLEN( 5360 (m0->m_pkthdr.inner_l2hlen - ETHER_HDR_LEN) >> 2) | 5361 V_CPL_TX_TNL_LSO_IPHDRLEN(m0->m_pkthdr.inner_l3hlen >> 2) | 5362 V_CPL_TX_TNL_LSO_TCPHDRLEN(m0->m_pkthdr.inner_l4hlen >> 2); 5363 if (m0->m_pkthdr.inner_l3hlen == sizeof(struct ip6_hdr)) 5364 ctrl |= F_CPL_TX_TNL_LSO_IPV6; 5365 tnl_lso->Flow_to_TcpHdrLen = htobe32(ctrl); 5366 tnl_lso->IpIdOffset = 0; 5367 tnl_lso->IpIdSplit_to_Mss = 5368 htobe16(V_CPL_TX_TNL_LSO_MSS(m0->m_pkthdr.tso_segsz)); 5369 tnl_lso->TCPSeqOffset = 0; 5370 tnl_lso->EthLenOffset_Size = 5371 htobe32(V_CPL_TX_TNL_LSO_SIZE(m0->m_pkthdr.len)); 5372 5373 return (tnl_lso + 1); 5374 } 5375 5376 #define VM_TX_L2HDR_LEN 16 /* ethmacdst to vlantci */ 5377 5378 /* 5379 * Write a VM txpkt WR for this packet to the hardware descriptors, update the 5380 * software descriptor, and advance the pidx. It is guaranteed that enough 5381 * descriptors are available. 5382 * 5383 * The return value is the # of hardware descriptors used. 5384 */ 5385 static u_int 5386 write_txpkt_vm_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0) 5387 { 5388 struct sge_eq *eq; 5389 struct fw_eth_tx_pkt_vm_wr *wr; 5390 struct tx_sdesc *txsd; 5391 struct cpl_tx_pkt_core *cpl; 5392 uint32_t ctrl; /* used in many unrelated places */ 5393 uint64_t ctrl1; 5394 int len16, ndesc, pktlen; 5395 caddr_t dst; 5396 5397 TXQ_LOCK_ASSERT_OWNED(txq); 5398 M_ASSERTPKTHDR(m0); 5399 5400 len16 = mbuf_len16(m0); 5401 pktlen = m0->m_pkthdr.len; 5402 ctrl = sizeof(struct cpl_tx_pkt_core); 5403 if (needs_tso(m0)) 5404 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 5405 ndesc = tx_len16_to_desc(len16); 5406 5407 /* Firmware work request header */ 5408 eq = &txq->eq; 5409 wr = (void *)&eq->desc[eq->pidx]; 5410 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_VM_WR) | 5411 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 5412 5413 ctrl = V_FW_WR_LEN16(len16); 5414 wr->equiq_to_len16 = htobe32(ctrl); 5415 wr->r3[0] = 0; 5416 wr->r3[1] = 0; 5417 5418 /* 5419 * Copy over ethmacdst, ethmacsrc, ethtype, and vlantci. 5420 * vlantci is ignored unless the ethtype is 0x8100, so it's 5421 * simpler to always copy it rather than making it 5422 * conditional. Also, it seems that we do not have to set 5423 * vlantci or fake the ethtype when doing VLAN tag insertion. 5424 */ 5425 m_copydata(m0, 0, VM_TX_L2HDR_LEN, wr->ethmacdst); 5426 5427 if (needs_tso(m0)) { 5428 cpl = write_lso_cpl(wr + 1, m0); 5429 txq->tso_wrs++; 5430 } else 5431 cpl = (void *)(wr + 1); 5432 5433 /* Checksum offload */ 5434 ctrl1 = csum_to_ctrl(sc, m0); 5435 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) 5436 txq->txcsum++; /* some hardware assistance provided */ 5437 5438 /* VLAN tag insertion */ 5439 if (needs_vlan_insertion(m0)) { 5440 ctrl1 |= F_TXPKT_VLAN_VLD | 5441 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 5442 txq->vlan_insertion++; 5443 } 5444 5445 /* CPL header */ 5446 cpl->ctrl0 = txq->cpl_ctrl0; 5447 cpl->pack = 0; 5448 cpl->len = htobe16(pktlen); 5449 cpl->ctrl1 = htobe64(ctrl1); 5450 5451 /* SGL */ 5452 dst = (void *)(cpl + 1); 5453 5454 /* 5455 * A packet using TSO will use up an entire descriptor for the 5456 * firmware work request header, LSO CPL, and TX_PKT_XT CPL. 5457 * If this descriptor is the last descriptor in the ring, wrap 5458 * around to the front of the ring explicitly for the start of 5459 * the sgl. 5460 */ 5461 if (dst == (void *)&eq->desc[eq->sidx]) { 5462 dst = (void *)&eq->desc[0]; 5463 write_gl_to_txd(txq, m0, &dst, 0); 5464 } else 5465 write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); 5466 txq->sgl_wrs++; 5467 txq->txpkt_wrs++; 5468 5469 txsd = &txq->sdesc[eq->pidx]; 5470 txsd->m = m0; 5471 txsd->desc_used = ndesc; 5472 5473 return (ndesc); 5474 } 5475 5476 /* 5477 * Write a raw WR to the hardware descriptors, update the software 5478 * descriptor, and advance the pidx. It is guaranteed that enough 5479 * descriptors are available. 5480 * 5481 * The return value is the # of hardware descriptors used. 5482 */ 5483 static u_int 5484 write_raw_wr(struct sge_txq *txq, void *wr, struct mbuf *m0, u_int available) 5485 { 5486 struct sge_eq *eq = &txq->eq; 5487 struct tx_sdesc *txsd; 5488 struct mbuf *m; 5489 caddr_t dst; 5490 int len16, ndesc; 5491 5492 len16 = mbuf_len16(m0); 5493 ndesc = tx_len16_to_desc(len16); 5494 MPASS(ndesc <= available); 5495 5496 dst = wr; 5497 for (m = m0; m != NULL; m = m->m_next) 5498 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 5499 5500 txq->raw_wrs++; 5501 5502 txsd = &txq->sdesc[eq->pidx]; 5503 txsd->m = m0; 5504 txsd->desc_used = ndesc; 5505 5506 return (ndesc); 5507 } 5508 5509 /* 5510 * Write a txpkt WR for this packet to the hardware descriptors, update the 5511 * software descriptor, and advance the pidx. It is guaranteed that enough 5512 * descriptors are available. 5513 * 5514 * The return value is the # of hardware descriptors used. 5515 */ 5516 static u_int 5517 write_txpkt_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0, 5518 u_int available) 5519 { 5520 struct sge_eq *eq; 5521 struct fw_eth_tx_pkt_wr *wr; 5522 struct tx_sdesc *txsd; 5523 struct cpl_tx_pkt_core *cpl; 5524 uint32_t ctrl; /* used in many unrelated places */ 5525 uint64_t ctrl1; 5526 int len16, ndesc, pktlen, nsegs; 5527 caddr_t dst; 5528 5529 TXQ_LOCK_ASSERT_OWNED(txq); 5530 M_ASSERTPKTHDR(m0); 5531 5532 len16 = mbuf_len16(m0); 5533 nsegs = mbuf_nsegs(m0); 5534 pktlen = m0->m_pkthdr.len; 5535 ctrl = sizeof(struct cpl_tx_pkt_core); 5536 if (needs_tso(m0)) { 5537 if (needs_vxlan_tso(m0)) 5538 ctrl += sizeof(struct cpl_tx_tnl_lso); 5539 else 5540 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 5541 } else if (!(mbuf_cflags(m0) & MC_NOMAP) && pktlen <= imm_payload(2) && 5542 available >= 2) { 5543 /* Immediate data. Recalculate len16 and set nsegs to 0. */ 5544 ctrl += pktlen; 5545 len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + 5546 sizeof(struct cpl_tx_pkt_core) + pktlen, 16); 5547 nsegs = 0; 5548 } 5549 ndesc = tx_len16_to_desc(len16); 5550 MPASS(ndesc <= available); 5551 5552 /* Firmware work request header */ 5553 eq = &txq->eq; 5554 wr = (void *)&eq->desc[eq->pidx]; 5555 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | 5556 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 5557 5558 ctrl = V_FW_WR_LEN16(len16); 5559 wr->equiq_to_len16 = htobe32(ctrl); 5560 wr->r3 = 0; 5561 5562 if (needs_tso(m0)) { 5563 if (needs_vxlan_tso(m0)) { 5564 cpl = write_tnl_lso_cpl(wr + 1, m0); 5565 txq->vxlan_tso_wrs++; 5566 } else { 5567 cpl = write_lso_cpl(wr + 1, m0); 5568 txq->tso_wrs++; 5569 } 5570 } else 5571 cpl = (void *)(wr + 1); 5572 5573 /* Checksum offload */ 5574 ctrl1 = csum_to_ctrl(sc, m0); 5575 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) { 5576 /* some hardware assistance provided */ 5577 if (needs_vxlan_csum(m0)) 5578 txq->vxlan_txcsum++; 5579 else 5580 txq->txcsum++; 5581 } 5582 5583 /* VLAN tag insertion */ 5584 if (needs_vlan_insertion(m0)) { 5585 ctrl1 |= F_TXPKT_VLAN_VLD | 5586 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 5587 txq->vlan_insertion++; 5588 } 5589 5590 /* CPL header */ 5591 cpl->ctrl0 = txq->cpl_ctrl0; 5592 cpl->pack = 0; 5593 cpl->len = htobe16(pktlen); 5594 cpl->ctrl1 = htobe64(ctrl1); 5595 5596 /* SGL */ 5597 dst = (void *)(cpl + 1); 5598 if (__predict_false((uintptr_t)dst == (uintptr_t)&eq->desc[eq->sidx])) 5599 dst = (caddr_t)&eq->desc[0]; 5600 if (nsegs > 0) { 5601 5602 write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); 5603 txq->sgl_wrs++; 5604 } else { 5605 struct mbuf *m; 5606 5607 for (m = m0; m != NULL; m = m->m_next) { 5608 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 5609 #ifdef INVARIANTS 5610 pktlen -= m->m_len; 5611 #endif 5612 } 5613 #ifdef INVARIANTS 5614 KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen)); 5615 #endif 5616 txq->imm_wrs++; 5617 } 5618 5619 txq->txpkt_wrs++; 5620 5621 txsd = &txq->sdesc[eq->pidx]; 5622 txsd->m = m0; 5623 txsd->desc_used = ndesc; 5624 5625 return (ndesc); 5626 } 5627 5628 static inline bool 5629 cmp_l2hdr(struct txpkts *txp, struct mbuf *m) 5630 { 5631 int len; 5632 5633 MPASS(txp->npkt > 0); 5634 MPASS(m->m_len >= VM_TX_L2HDR_LEN); 5635 5636 if (txp->ethtype == be16toh(ETHERTYPE_VLAN)) 5637 len = VM_TX_L2HDR_LEN; 5638 else 5639 len = sizeof(struct ether_header); 5640 5641 return (memcmp(m->m_data, &txp->ethmacdst[0], len) != 0); 5642 } 5643 5644 static inline void 5645 save_l2hdr(struct txpkts *txp, struct mbuf *m) 5646 { 5647 MPASS(m->m_len >= VM_TX_L2HDR_LEN); 5648 5649 memcpy(&txp->ethmacdst[0], mtod(m, const void *), VM_TX_L2HDR_LEN); 5650 } 5651 5652 static int 5653 add_to_txpkts_vf(struct adapter *sc, struct sge_txq *txq, struct mbuf *m, 5654 int avail, bool *send) 5655 { 5656 struct txpkts *txp = &txq->txp; 5657 5658 /* Cannot have TSO and coalesce at the same time. */ 5659 if (cannot_use_txpkts(m)) { 5660 cannot_coalesce: 5661 *send = txp->npkt > 0; 5662 return (EINVAL); 5663 } 5664 5665 /* VF allows coalescing of type 1 (1 GL) only */ 5666 if (mbuf_nsegs(m) > 1) 5667 goto cannot_coalesce; 5668 5669 *send = false; 5670 if (txp->npkt > 0) { 5671 MPASS(tx_len16_to_desc(txp->len16) <= avail); 5672 MPASS(txp->npkt < txp->max_npkt); 5673 MPASS(txp->wr_type == 1); /* VF supports type 1 only */ 5674 5675 if (tx_len16_to_desc(txp->len16 + txpkts1_len16()) > avail) { 5676 retry_after_send: 5677 *send = true; 5678 return (EAGAIN); 5679 } 5680 if (m->m_pkthdr.len + txp->plen > 65535) 5681 goto retry_after_send; 5682 if (cmp_l2hdr(txp, m)) 5683 goto retry_after_send; 5684 5685 txp->len16 += txpkts1_len16(); 5686 txp->plen += m->m_pkthdr.len; 5687 txp->mb[txp->npkt++] = m; 5688 if (txp->npkt == txp->max_npkt) 5689 *send = true; 5690 } else { 5691 txp->len16 = howmany(sizeof(struct fw_eth_tx_pkts_vm_wr), 16) + 5692 txpkts1_len16(); 5693 if (tx_len16_to_desc(txp->len16) > avail) 5694 goto cannot_coalesce; 5695 txp->npkt = 1; 5696 txp->wr_type = 1; 5697 txp->plen = m->m_pkthdr.len; 5698 txp->mb[0] = m; 5699 save_l2hdr(txp, m); 5700 } 5701 return (0); 5702 } 5703 5704 static int 5705 add_to_txpkts_pf(struct adapter *sc, struct sge_txq *txq, struct mbuf *m, 5706 int avail, bool *send) 5707 { 5708 struct txpkts *txp = &txq->txp; 5709 int nsegs; 5710 5711 MPASS(!(sc->flags & IS_VF)); 5712 5713 /* Cannot have TSO and coalesce at the same time. */ 5714 if (cannot_use_txpkts(m)) { 5715 cannot_coalesce: 5716 *send = txp->npkt > 0; 5717 return (EINVAL); 5718 } 5719 5720 *send = false; 5721 nsegs = mbuf_nsegs(m); 5722 if (txp->npkt == 0) { 5723 if (m->m_pkthdr.len > 65535) 5724 goto cannot_coalesce; 5725 if (nsegs > 1) { 5726 txp->wr_type = 0; 5727 txp->len16 = 5728 howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + 5729 txpkts0_len16(nsegs); 5730 } else { 5731 txp->wr_type = 1; 5732 txp->len16 = 5733 howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + 5734 txpkts1_len16(); 5735 } 5736 if (tx_len16_to_desc(txp->len16) > avail) 5737 goto cannot_coalesce; 5738 txp->npkt = 1; 5739 txp->plen = m->m_pkthdr.len; 5740 txp->mb[0] = m; 5741 } else { 5742 MPASS(tx_len16_to_desc(txp->len16) <= avail); 5743 MPASS(txp->npkt < txp->max_npkt); 5744 5745 if (m->m_pkthdr.len + txp->plen > 65535) { 5746 retry_after_send: 5747 *send = true; 5748 return (EAGAIN); 5749 } 5750 5751 MPASS(txp->wr_type == 0 || txp->wr_type == 1); 5752 if (txp->wr_type == 0) { 5753 if (tx_len16_to_desc(txp->len16 + 5754 txpkts0_len16(nsegs)) > min(avail, SGE_MAX_WR_NDESC)) 5755 goto retry_after_send; 5756 txp->len16 += txpkts0_len16(nsegs); 5757 } else { 5758 if (nsegs != 1) 5759 goto retry_after_send; 5760 if (tx_len16_to_desc(txp->len16 + txpkts1_len16()) > 5761 avail) 5762 goto retry_after_send; 5763 txp->len16 += txpkts1_len16(); 5764 } 5765 5766 txp->plen += m->m_pkthdr.len; 5767 txp->mb[txp->npkt++] = m; 5768 if (txp->npkt == txp->max_npkt) 5769 *send = true; 5770 } 5771 return (0); 5772 } 5773 5774 /* 5775 * Write a txpkts WR for the packets in txp to the hardware descriptors, update 5776 * the software descriptor, and advance the pidx. It is guaranteed that enough 5777 * descriptors are available. 5778 * 5779 * The return value is the # of hardware descriptors used. 5780 */ 5781 static u_int 5782 write_txpkts_wr(struct adapter *sc, struct sge_txq *txq) 5783 { 5784 const struct txpkts *txp = &txq->txp; 5785 struct sge_eq *eq = &txq->eq; 5786 struct fw_eth_tx_pkts_wr *wr; 5787 struct tx_sdesc *txsd; 5788 struct cpl_tx_pkt_core *cpl; 5789 uint64_t ctrl1; 5790 int ndesc, i, checkwrap; 5791 struct mbuf *m, *last; 5792 void *flitp; 5793 5794 TXQ_LOCK_ASSERT_OWNED(txq); 5795 MPASS(txp->npkt > 0); 5796 MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16)); 5797 5798 wr = (void *)&eq->desc[eq->pidx]; 5799 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); 5800 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(txp->len16)); 5801 wr->plen = htobe16(txp->plen); 5802 wr->npkt = txp->npkt; 5803 wr->r3 = 0; 5804 wr->type = txp->wr_type; 5805 flitp = wr + 1; 5806 5807 /* 5808 * At this point we are 16B into a hardware descriptor. If checkwrap is 5809 * set then we know the WR is going to wrap around somewhere. We'll 5810 * check for that at appropriate points. 5811 */ 5812 ndesc = tx_len16_to_desc(txp->len16); 5813 last = NULL; 5814 checkwrap = eq->sidx - ndesc < eq->pidx; 5815 for (i = 0; i < txp->npkt; i++) { 5816 m = txp->mb[i]; 5817 if (txp->wr_type == 0) { 5818 struct ulp_txpkt *ulpmc; 5819 struct ulptx_idata *ulpsc; 5820 5821 /* ULP master command */ 5822 ulpmc = flitp; 5823 ulpmc->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) | 5824 V_ULP_TXPKT_DEST(0) | V_ULP_TXPKT_FID(eq->iqid)); 5825 ulpmc->len = htobe32(txpkts0_len16(mbuf_nsegs(m))); 5826 5827 /* ULP subcommand */ 5828 ulpsc = (void *)(ulpmc + 1); 5829 ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) | 5830 F_ULP_TX_SC_MORE); 5831 ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core)); 5832 5833 cpl = (void *)(ulpsc + 1); 5834 if (checkwrap && 5835 (uintptr_t)cpl == (uintptr_t)&eq->desc[eq->sidx]) 5836 cpl = (void *)&eq->desc[0]; 5837 } else { 5838 cpl = flitp; 5839 } 5840 5841 /* Checksum offload */ 5842 ctrl1 = csum_to_ctrl(sc, m); 5843 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) { 5844 /* some hardware assistance provided */ 5845 if (needs_vxlan_csum(m)) 5846 txq->vxlan_txcsum++; 5847 else 5848 txq->txcsum++; 5849 } 5850 5851 /* VLAN tag insertion */ 5852 if (needs_vlan_insertion(m)) { 5853 ctrl1 |= F_TXPKT_VLAN_VLD | 5854 V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 5855 txq->vlan_insertion++; 5856 } 5857 5858 /* CPL header */ 5859 cpl->ctrl0 = txq->cpl_ctrl0; 5860 cpl->pack = 0; 5861 cpl->len = htobe16(m->m_pkthdr.len); 5862 cpl->ctrl1 = htobe64(ctrl1); 5863 5864 flitp = cpl + 1; 5865 if (checkwrap && 5866 (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx]) 5867 flitp = (void *)&eq->desc[0]; 5868 5869 write_gl_to_txd(txq, m, (caddr_t *)(&flitp), checkwrap); 5870 5871 if (last != NULL) 5872 last->m_nextpkt = m; 5873 last = m; 5874 } 5875 5876 txq->sgl_wrs++; 5877 if (txp->wr_type == 0) { 5878 txq->txpkts0_pkts += txp->npkt; 5879 txq->txpkts0_wrs++; 5880 } else { 5881 txq->txpkts1_pkts += txp->npkt; 5882 txq->txpkts1_wrs++; 5883 } 5884 5885 txsd = &txq->sdesc[eq->pidx]; 5886 txsd->m = txp->mb[0]; 5887 txsd->desc_used = ndesc; 5888 5889 return (ndesc); 5890 } 5891 5892 static u_int 5893 write_txpkts_vm_wr(struct adapter *sc, struct sge_txq *txq) 5894 { 5895 const struct txpkts *txp = &txq->txp; 5896 struct sge_eq *eq = &txq->eq; 5897 struct fw_eth_tx_pkts_vm_wr *wr; 5898 struct tx_sdesc *txsd; 5899 struct cpl_tx_pkt_core *cpl; 5900 uint64_t ctrl1; 5901 int ndesc, i; 5902 struct mbuf *m, *last; 5903 void *flitp; 5904 5905 TXQ_LOCK_ASSERT_OWNED(txq); 5906 MPASS(txp->npkt > 0); 5907 MPASS(txp->wr_type == 1); /* VF supports type 1 only */ 5908 MPASS(txp->mb[0] != NULL); 5909 MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16)); 5910 5911 wr = (void *)&eq->desc[eq->pidx]; 5912 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_VM_WR)); 5913 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(txp->len16)); 5914 wr->r3 = 0; 5915 wr->plen = htobe16(txp->plen); 5916 wr->npkt = txp->npkt; 5917 wr->r4 = 0; 5918 memcpy(&wr->ethmacdst[0], &txp->ethmacdst[0], 16); 5919 flitp = wr + 1; 5920 5921 /* 5922 * At this point we are 32B into a hardware descriptor. Each mbuf in 5923 * the WR will take 32B so we check for the end of the descriptor ring 5924 * before writing odd mbufs (mb[1], 3, 5, ..) 5925 */ 5926 ndesc = tx_len16_to_desc(txp->len16); 5927 last = NULL; 5928 for (i = 0; i < txp->npkt; i++) { 5929 m = txp->mb[i]; 5930 if (i & 1 && (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx]) 5931 flitp = &eq->desc[0]; 5932 cpl = flitp; 5933 5934 /* Checksum offload */ 5935 ctrl1 = csum_to_ctrl(sc, m); 5936 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) 5937 txq->txcsum++; /* some hardware assistance provided */ 5938 5939 /* VLAN tag insertion */ 5940 if (needs_vlan_insertion(m)) { 5941 ctrl1 |= F_TXPKT_VLAN_VLD | 5942 V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 5943 txq->vlan_insertion++; 5944 } 5945 5946 /* CPL header */ 5947 cpl->ctrl0 = txq->cpl_ctrl0; 5948 cpl->pack = 0; 5949 cpl->len = htobe16(m->m_pkthdr.len); 5950 cpl->ctrl1 = htobe64(ctrl1); 5951 5952 flitp = cpl + 1; 5953 MPASS(mbuf_nsegs(m) == 1); 5954 write_gl_to_txd(txq, m, (caddr_t *)(&flitp), 0); 5955 5956 if (last != NULL) 5957 last->m_nextpkt = m; 5958 last = m; 5959 } 5960 5961 txq->sgl_wrs++; 5962 txq->txpkts1_pkts += txp->npkt; 5963 txq->txpkts1_wrs++; 5964 5965 txsd = &txq->sdesc[eq->pidx]; 5966 txsd->m = txp->mb[0]; 5967 txsd->desc_used = ndesc; 5968 5969 return (ndesc); 5970 } 5971 5972 /* 5973 * If the SGL ends on an address that is not 16 byte aligned, this function will 5974 * add a 0 filled flit at the end. 5975 */ 5976 static void 5977 write_gl_to_txd(struct sge_txq *txq, struct mbuf *m, caddr_t *to, int checkwrap) 5978 { 5979 struct sge_eq *eq = &txq->eq; 5980 struct sglist *gl = txq->gl; 5981 struct sglist_seg *seg; 5982 __be64 *flitp, *wrap; 5983 struct ulptx_sgl *usgl; 5984 int i, nflits, nsegs; 5985 5986 KASSERT(((uintptr_t)(*to) & 0xf) == 0, 5987 ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to)); 5988 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); 5989 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); 5990 5991 get_pkt_gl(m, gl); 5992 nsegs = gl->sg_nseg; 5993 MPASS(nsegs > 0); 5994 5995 nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2; 5996 flitp = (__be64 *)(*to); 5997 wrap = (__be64 *)(&eq->desc[eq->sidx]); 5998 seg = &gl->sg_segs[0]; 5999 usgl = (void *)flitp; 6000 6001 /* 6002 * We start at a 16 byte boundary somewhere inside the tx descriptor 6003 * ring, so we're at least 16 bytes away from the status page. There is 6004 * no chance of a wrap around in the middle of usgl (which is 16 bytes). 6005 */ 6006 6007 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 6008 V_ULPTX_NSGE(nsegs)); 6009 usgl->len0 = htobe32(seg->ss_len); 6010 usgl->addr0 = htobe64(seg->ss_paddr); 6011 seg++; 6012 6013 if (checkwrap == 0 || (uintptr_t)(flitp + nflits) <= (uintptr_t)wrap) { 6014 6015 /* Won't wrap around at all */ 6016 6017 for (i = 0; i < nsegs - 1; i++, seg++) { 6018 usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len); 6019 usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr); 6020 } 6021 if (i & 1) 6022 usgl->sge[i / 2].len[1] = htobe32(0); 6023 flitp += nflits; 6024 } else { 6025 6026 /* Will wrap somewhere in the rest of the SGL */ 6027 6028 /* 2 flits already written, write the rest flit by flit */ 6029 flitp = (void *)(usgl + 1); 6030 for (i = 0; i < nflits - 2; i++) { 6031 if (flitp == wrap) 6032 flitp = (void *)eq->desc; 6033 *flitp++ = get_flit(seg, nsegs - 1, i); 6034 } 6035 } 6036 6037 if (nflits & 1) { 6038 MPASS(((uintptr_t)flitp) & 0xf); 6039 *flitp++ = 0; 6040 } 6041 6042 MPASS((((uintptr_t)flitp) & 0xf) == 0); 6043 if (__predict_false(flitp == wrap)) 6044 *to = (void *)eq->desc; 6045 else 6046 *to = (void *)flitp; 6047 } 6048 6049 static inline void 6050 copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) 6051 { 6052 6053 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); 6054 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); 6055 6056 if (__predict_true((uintptr_t)(*to) + len <= 6057 (uintptr_t)&eq->desc[eq->sidx])) { 6058 bcopy(from, *to, len); 6059 (*to) += len; 6060 } else { 6061 int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to); 6062 6063 bcopy(from, *to, portion); 6064 from += portion; 6065 portion = len - portion; /* remaining */ 6066 bcopy(from, (void *)eq->desc, portion); 6067 (*to) = (caddr_t)eq->desc + portion; 6068 } 6069 } 6070 6071 static inline void 6072 ring_eq_db(struct adapter *sc, struct sge_eq *eq, u_int n) 6073 { 6074 u_int db; 6075 6076 MPASS(n > 0); 6077 6078 db = eq->doorbells; 6079 if (n > 1) 6080 clrbit(&db, DOORBELL_WCWR); 6081 wmb(); 6082 6083 switch (ffs(db) - 1) { 6084 case DOORBELL_UDB: 6085 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); 6086 break; 6087 6088 case DOORBELL_WCWR: { 6089 volatile uint64_t *dst, *src; 6090 int i; 6091 6092 /* 6093 * Queues whose 128B doorbell segment fits in the page do not 6094 * use relative qid (udb_qid is always 0). Only queues with 6095 * doorbell segments can do WCWR. 6096 */ 6097 KASSERT(eq->udb_qid == 0 && n == 1, 6098 ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p", 6099 __func__, eq->doorbells, n, eq->dbidx, eq)); 6100 6101 dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET - 6102 UDBS_DB_OFFSET); 6103 i = eq->dbidx; 6104 src = (void *)&eq->desc[i]; 6105 while (src != (void *)&eq->desc[i + 1]) 6106 *dst++ = *src++; 6107 wmb(); 6108 break; 6109 } 6110 6111 case DOORBELL_UDBWC: 6112 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); 6113 wmb(); 6114 break; 6115 6116 case DOORBELL_KDB: 6117 t4_write_reg(sc, sc->sge_kdoorbell_reg, 6118 V_QID(eq->cntxt_id) | V_PIDX(n)); 6119 break; 6120 } 6121 6122 IDXINCR(eq->dbidx, n, eq->sidx); 6123 } 6124 6125 static inline u_int 6126 reclaimable_tx_desc(struct sge_eq *eq) 6127 { 6128 uint16_t hw_cidx; 6129 6130 hw_cidx = read_hw_cidx(eq); 6131 return (IDXDIFF(hw_cidx, eq->cidx, eq->sidx)); 6132 } 6133 6134 static inline u_int 6135 total_available_tx_desc(struct sge_eq *eq) 6136 { 6137 uint16_t hw_cidx, pidx; 6138 6139 hw_cidx = read_hw_cidx(eq); 6140 pidx = eq->pidx; 6141 6142 if (pidx == hw_cidx) 6143 return (eq->sidx - 1); 6144 else 6145 return (IDXDIFF(hw_cidx, pidx, eq->sidx) - 1); 6146 } 6147 6148 static inline uint16_t 6149 read_hw_cidx(struct sge_eq *eq) 6150 { 6151 struct sge_qstat *spg = (void *)&eq->desc[eq->sidx]; 6152 uint16_t cidx = spg->cidx; /* stable snapshot */ 6153 6154 return (be16toh(cidx)); 6155 } 6156 6157 /* 6158 * Reclaim 'n' descriptors approximately. 6159 */ 6160 static u_int 6161 reclaim_tx_descs(struct sge_txq *txq, u_int n) 6162 { 6163 struct tx_sdesc *txsd; 6164 struct sge_eq *eq = &txq->eq; 6165 u_int can_reclaim, reclaimed; 6166 6167 TXQ_LOCK_ASSERT_OWNED(txq); 6168 MPASS(n > 0); 6169 6170 reclaimed = 0; 6171 can_reclaim = reclaimable_tx_desc(eq); 6172 while (can_reclaim && reclaimed < n) { 6173 int ndesc; 6174 struct mbuf *m, *nextpkt; 6175 6176 txsd = &txq->sdesc[eq->cidx]; 6177 ndesc = txsd->desc_used; 6178 6179 /* Firmware doesn't return "partial" credits. */ 6180 KASSERT(can_reclaim >= ndesc, 6181 ("%s: unexpected number of credits: %d, %d", 6182 __func__, can_reclaim, ndesc)); 6183 KASSERT(ndesc != 0, 6184 ("%s: descriptor with no credits: cidx %d", 6185 __func__, eq->cidx)); 6186 6187 for (m = txsd->m; m != NULL; m = nextpkt) { 6188 nextpkt = m->m_nextpkt; 6189 m->m_nextpkt = NULL; 6190 m_freem(m); 6191 } 6192 reclaimed += ndesc; 6193 can_reclaim -= ndesc; 6194 IDXINCR(eq->cidx, ndesc, eq->sidx); 6195 } 6196 6197 return (reclaimed); 6198 } 6199 6200 static void 6201 tx_reclaim(void *arg, int n) 6202 { 6203 struct sge_txq *txq = arg; 6204 struct sge_eq *eq = &txq->eq; 6205 6206 do { 6207 if (TXQ_TRYLOCK(txq) == 0) 6208 break; 6209 n = reclaim_tx_descs(txq, 32); 6210 if (eq->cidx == eq->pidx) 6211 eq->equeqidx = eq->pidx; 6212 TXQ_UNLOCK(txq); 6213 } while (n > 0); 6214 } 6215 6216 static __be64 6217 get_flit(struct sglist_seg *segs, int nsegs, int idx) 6218 { 6219 int i = (idx / 3) * 2; 6220 6221 switch (idx % 3) { 6222 case 0: { 6223 uint64_t rc; 6224 6225 rc = (uint64_t)segs[i].ss_len << 32; 6226 if (i + 1 < nsegs) 6227 rc |= (uint64_t)(segs[i + 1].ss_len); 6228 6229 return (htobe64(rc)); 6230 } 6231 case 1: 6232 return (htobe64(segs[i].ss_paddr)); 6233 case 2: 6234 return (htobe64(segs[i + 1].ss_paddr)); 6235 } 6236 6237 return (0); 6238 } 6239 6240 static int 6241 find_refill_source(struct adapter *sc, int maxp, bool packing) 6242 { 6243 int i, zidx = -1; 6244 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[0]; 6245 6246 if (packing) { 6247 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 6248 if (rxb->hwidx2 == -1) 6249 continue; 6250 if (rxb->size1 < PAGE_SIZE && 6251 rxb->size1 < largest_rx_cluster) 6252 continue; 6253 if (rxb->size1 > largest_rx_cluster) 6254 break; 6255 MPASS(rxb->size1 - rxb->size2 >= CL_METADATA_SIZE); 6256 if (rxb->size2 >= maxp) 6257 return (i); 6258 zidx = i; 6259 } 6260 } else { 6261 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 6262 if (rxb->hwidx1 == -1) 6263 continue; 6264 if (rxb->size1 > largest_rx_cluster) 6265 break; 6266 if (rxb->size1 >= maxp) 6267 return (i); 6268 zidx = i; 6269 } 6270 } 6271 6272 return (zidx); 6273 } 6274 6275 static void 6276 add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl) 6277 { 6278 mtx_lock(&sc->sfl_lock); 6279 FL_LOCK(fl); 6280 if ((fl->flags & FL_DOOMED) == 0) { 6281 fl->flags |= FL_STARVING; 6282 TAILQ_INSERT_TAIL(&sc->sfl, fl, link); 6283 callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc); 6284 } 6285 FL_UNLOCK(fl); 6286 mtx_unlock(&sc->sfl_lock); 6287 } 6288 6289 static void 6290 handle_wrq_egr_update(struct adapter *sc, struct sge_eq *eq) 6291 { 6292 struct sge_wrq *wrq = (void *)eq; 6293 6294 atomic_readandclear_int(&eq->equiq); 6295 taskqueue_enqueue(sc->tq[eq->tx_chan], &wrq->wrq_tx_task); 6296 } 6297 6298 static void 6299 handle_eth_egr_update(struct adapter *sc, struct sge_eq *eq) 6300 { 6301 struct sge_txq *txq = (void *)eq; 6302 6303 MPASS(eq->type == EQ_ETH); 6304 6305 atomic_readandclear_int(&eq->equiq); 6306 if (mp_ring_is_idle(txq->r)) 6307 taskqueue_enqueue(sc->tq[eq->tx_chan], &txq->tx_reclaim_task); 6308 else 6309 mp_ring_check_drainage(txq->r, 64); 6310 } 6311 6312 static int 6313 handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss, 6314 struct mbuf *m) 6315 { 6316 const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1); 6317 unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); 6318 struct adapter *sc = iq->adapter; 6319 struct sge *s = &sc->sge; 6320 struct sge_eq *eq; 6321 static void (*h[])(struct adapter *, struct sge_eq *) = {NULL, 6322 &handle_wrq_egr_update, &handle_eth_egr_update, 6323 &handle_wrq_egr_update}; 6324 6325 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 6326 rss->opcode)); 6327 6328 eq = s->eqmap[qid - s->eq_start - s->eq_base]; 6329 (*h[eq->type])(sc, eq); 6330 6331 return (0); 6332 } 6333 6334 /* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */ 6335 CTASSERT(offsetof(struct cpl_fw4_msg, data) == \ 6336 offsetof(struct cpl_fw6_msg, data)); 6337 6338 static int 6339 handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 6340 { 6341 struct adapter *sc = iq->adapter; 6342 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); 6343 6344 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 6345 rss->opcode)); 6346 6347 if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) { 6348 const struct rss_header *rss2; 6349 6350 rss2 = (const struct rss_header *)&cpl->data[0]; 6351 return (t4_cpl_handler[rss2->opcode](iq, rss2, m)); 6352 } 6353 6354 return (t4_fw_msg_handler[cpl->type](sc, &cpl->data[0])); 6355 } 6356 6357 /** 6358 * t4_handle_wrerr_rpl - process a FW work request error message 6359 * @adap: the adapter 6360 * @rpl: start of the FW message 6361 */ 6362 static int 6363 t4_handle_wrerr_rpl(struct adapter *adap, const __be64 *rpl) 6364 { 6365 u8 opcode = *(const u8 *)rpl; 6366 const struct fw_error_cmd *e = (const void *)rpl; 6367 unsigned int i; 6368 6369 if (opcode != FW_ERROR_CMD) { 6370 log(LOG_ERR, 6371 "%s: Received WRERR_RPL message with opcode %#x\n", 6372 device_get_nameunit(adap->dev), opcode); 6373 return (EINVAL); 6374 } 6375 log(LOG_ERR, "%s: FW_ERROR (%s) ", device_get_nameunit(adap->dev), 6376 G_FW_ERROR_CMD_FATAL(be32toh(e->op_to_type)) ? "fatal" : 6377 "non-fatal"); 6378 switch (G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))) { 6379 case FW_ERROR_TYPE_EXCEPTION: 6380 log(LOG_ERR, "exception info:\n"); 6381 for (i = 0; i < nitems(e->u.exception.info); i++) 6382 log(LOG_ERR, "%s%08x", i == 0 ? "\t" : " ", 6383 be32toh(e->u.exception.info[i])); 6384 log(LOG_ERR, "\n"); 6385 break; 6386 case FW_ERROR_TYPE_HWMODULE: 6387 log(LOG_ERR, "HW module regaddr %08x regval %08x\n", 6388 be32toh(e->u.hwmodule.regaddr), 6389 be32toh(e->u.hwmodule.regval)); 6390 break; 6391 case FW_ERROR_TYPE_WR: 6392 log(LOG_ERR, "WR cidx %d PF %d VF %d eqid %d hdr:\n", 6393 be16toh(e->u.wr.cidx), 6394 G_FW_ERROR_CMD_PFN(be16toh(e->u.wr.pfn_vfn)), 6395 G_FW_ERROR_CMD_VFN(be16toh(e->u.wr.pfn_vfn)), 6396 be32toh(e->u.wr.eqid)); 6397 for (i = 0; i < nitems(e->u.wr.wrhdr); i++) 6398 log(LOG_ERR, "%s%02x", i == 0 ? "\t" : " ", 6399 e->u.wr.wrhdr[i]); 6400 log(LOG_ERR, "\n"); 6401 break; 6402 case FW_ERROR_TYPE_ACL: 6403 log(LOG_ERR, "ACL cidx %d PF %d VF %d eqid %d %s", 6404 be16toh(e->u.acl.cidx), 6405 G_FW_ERROR_CMD_PFN(be16toh(e->u.acl.pfn_vfn)), 6406 G_FW_ERROR_CMD_VFN(be16toh(e->u.acl.pfn_vfn)), 6407 be32toh(e->u.acl.eqid), 6408 G_FW_ERROR_CMD_MV(be16toh(e->u.acl.mv_pkd)) ? "vlanid" : 6409 "MAC"); 6410 for (i = 0; i < nitems(e->u.acl.val); i++) 6411 log(LOG_ERR, " %02x", e->u.acl.val[i]); 6412 log(LOG_ERR, "\n"); 6413 break; 6414 default: 6415 log(LOG_ERR, "type %#x\n", 6416 G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))); 6417 return (EINVAL); 6418 } 6419 return (0); 6420 } 6421 6422 static inline bool 6423 bufidx_used(struct adapter *sc, int idx) 6424 { 6425 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[0]; 6426 int i; 6427 6428 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 6429 if (rxb->size1 > largest_rx_cluster) 6430 continue; 6431 if (rxb->hwidx1 == idx || rxb->hwidx2 == idx) 6432 return (true); 6433 } 6434 6435 return (false); 6436 } 6437 6438 static int 6439 sysctl_bufsizes(SYSCTL_HANDLER_ARGS) 6440 { 6441 struct adapter *sc = arg1; 6442 struct sge_params *sp = &sc->params.sge; 6443 int i, rc; 6444 struct sbuf sb; 6445 char c; 6446 6447 sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND); 6448 for (i = 0; i < SGE_FLBUF_SIZES; i++) { 6449 if (bufidx_used(sc, i)) 6450 c = '*'; 6451 else 6452 c = '\0'; 6453 6454 sbuf_printf(&sb, "%u%c ", sp->sge_fl_buffer_size[i], c); 6455 } 6456 sbuf_trim(&sb); 6457 sbuf_finish(&sb); 6458 rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 6459 sbuf_delete(&sb); 6460 return (rc); 6461 } 6462 6463 #ifdef RATELIMIT 6464 #if defined(INET) || defined(INET6) 6465 /* 6466 * len16 for a txpkt WR with a GL. Includes the firmware work request header. 6467 */ 6468 static inline u_int 6469 txpkt_eo_len16(u_int nsegs, u_int immhdrs, u_int tso) 6470 { 6471 u_int n; 6472 6473 MPASS(immhdrs > 0); 6474 6475 n = roundup2(sizeof(struct fw_eth_tx_eo_wr) + 6476 sizeof(struct cpl_tx_pkt_core) + immhdrs, 16); 6477 if (__predict_false(nsegs == 0)) 6478 goto done; 6479 6480 nsegs--; /* first segment is part of ulptx_sgl */ 6481 n += sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 6482 if (tso) 6483 n += sizeof(struct cpl_tx_pkt_lso_core); 6484 6485 done: 6486 return (howmany(n, 16)); 6487 } 6488 #endif 6489 6490 #define ETID_FLOWC_NPARAMS 6 6491 #define ETID_FLOWC_LEN (roundup2((sizeof(struct fw_flowc_wr) + \ 6492 ETID_FLOWC_NPARAMS * sizeof(struct fw_flowc_mnemval)), 16)) 6493 #define ETID_FLOWC_LEN16 (howmany(ETID_FLOWC_LEN, 16)) 6494 6495 static int 6496 send_etid_flowc_wr(struct cxgbe_rate_tag *cst, struct port_info *pi, 6497 struct vi_info *vi) 6498 { 6499 struct wrq_cookie cookie; 6500 u_int pfvf = pi->adapter->pf << S_FW_VIID_PFN; 6501 struct fw_flowc_wr *flowc; 6502 6503 mtx_assert(&cst->lock, MA_OWNED); 6504 MPASS((cst->flags & (EO_FLOWC_PENDING | EO_FLOWC_RPL_PENDING)) == 6505 EO_FLOWC_PENDING); 6506 6507 flowc = start_wrq_wr(&cst->eo_txq->wrq, ETID_FLOWC_LEN16, &cookie); 6508 if (__predict_false(flowc == NULL)) 6509 return (ENOMEM); 6510 6511 bzero(flowc, ETID_FLOWC_LEN); 6512 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 6513 V_FW_FLOWC_WR_NPARAMS(ETID_FLOWC_NPARAMS) | V_FW_WR_COMPL(0)); 6514 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(ETID_FLOWC_LEN16) | 6515 V_FW_WR_FLOWID(cst->etid)); 6516 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; 6517 flowc->mnemval[0].val = htobe32(pfvf); 6518 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; 6519 flowc->mnemval[1].val = htobe32(pi->tx_chan); 6520 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; 6521 flowc->mnemval[2].val = htobe32(pi->tx_chan); 6522 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; 6523 flowc->mnemval[3].val = htobe32(cst->iqid); 6524 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_EOSTATE; 6525 flowc->mnemval[4].val = htobe32(FW_FLOWC_MNEM_EOSTATE_ESTABLISHED); 6526 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; 6527 flowc->mnemval[5].val = htobe32(cst->schedcl); 6528 6529 commit_wrq_wr(&cst->eo_txq->wrq, flowc, &cookie); 6530 6531 cst->flags &= ~EO_FLOWC_PENDING; 6532 cst->flags |= EO_FLOWC_RPL_PENDING; 6533 MPASS(cst->tx_credits >= ETID_FLOWC_LEN16); /* flowc is first WR. */ 6534 cst->tx_credits -= ETID_FLOWC_LEN16; 6535 6536 return (0); 6537 } 6538 6539 #define ETID_FLUSH_LEN16 (howmany(sizeof (struct fw_flowc_wr), 16)) 6540 6541 void 6542 send_etid_flush_wr(struct cxgbe_rate_tag *cst) 6543 { 6544 struct fw_flowc_wr *flowc; 6545 struct wrq_cookie cookie; 6546 6547 mtx_assert(&cst->lock, MA_OWNED); 6548 6549 flowc = start_wrq_wr(&cst->eo_txq->wrq, ETID_FLUSH_LEN16, &cookie); 6550 if (__predict_false(flowc == NULL)) 6551 CXGBE_UNIMPLEMENTED(__func__); 6552 6553 bzero(flowc, ETID_FLUSH_LEN16 * 16); 6554 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 6555 V_FW_FLOWC_WR_NPARAMS(0) | F_FW_WR_COMPL); 6556 flowc->flowid_len16 = htobe32(V_FW_WR_LEN16(ETID_FLUSH_LEN16) | 6557 V_FW_WR_FLOWID(cst->etid)); 6558 6559 commit_wrq_wr(&cst->eo_txq->wrq, flowc, &cookie); 6560 6561 cst->flags |= EO_FLUSH_RPL_PENDING; 6562 MPASS(cst->tx_credits >= ETID_FLUSH_LEN16); 6563 cst->tx_credits -= ETID_FLUSH_LEN16; 6564 cst->ncompl++; 6565 } 6566 6567 static void 6568 write_ethofld_wr(struct cxgbe_rate_tag *cst, struct fw_eth_tx_eo_wr *wr, 6569 struct mbuf *m0, int compl) 6570 { 6571 struct cpl_tx_pkt_core *cpl; 6572 uint64_t ctrl1; 6573 uint32_t ctrl; /* used in many unrelated places */ 6574 int len16, pktlen, nsegs, immhdrs; 6575 uintptr_t p; 6576 struct ulptx_sgl *usgl; 6577 struct sglist sg; 6578 struct sglist_seg segs[38]; /* XXX: find real limit. XXX: get off the stack */ 6579 6580 mtx_assert(&cst->lock, MA_OWNED); 6581 M_ASSERTPKTHDR(m0); 6582 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 6583 m0->m_pkthdr.l4hlen > 0, 6584 ("%s: ethofld mbuf %p is missing header lengths", __func__, m0)); 6585 6586 len16 = mbuf_eo_len16(m0); 6587 nsegs = mbuf_eo_nsegs(m0); 6588 pktlen = m0->m_pkthdr.len; 6589 ctrl = sizeof(struct cpl_tx_pkt_core); 6590 if (needs_tso(m0)) 6591 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 6592 immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen; 6593 ctrl += immhdrs; 6594 6595 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_EO_WR) | 6596 V_FW_ETH_TX_EO_WR_IMMDLEN(ctrl) | V_FW_WR_COMPL(!!compl)); 6597 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(len16) | 6598 V_FW_WR_FLOWID(cst->etid)); 6599 wr->r3 = 0; 6600 if (needs_outer_udp_csum(m0)) { 6601 wr->u.udpseg.type = FW_ETH_TX_EO_TYPE_UDPSEG; 6602 wr->u.udpseg.ethlen = m0->m_pkthdr.l2hlen; 6603 wr->u.udpseg.iplen = htobe16(m0->m_pkthdr.l3hlen); 6604 wr->u.udpseg.udplen = m0->m_pkthdr.l4hlen; 6605 wr->u.udpseg.rtplen = 0; 6606 wr->u.udpseg.r4 = 0; 6607 wr->u.udpseg.mss = htobe16(pktlen - immhdrs); 6608 wr->u.udpseg.schedpktsize = wr->u.udpseg.mss; 6609 wr->u.udpseg.plen = htobe32(pktlen - immhdrs); 6610 cpl = (void *)(wr + 1); 6611 } else { 6612 MPASS(needs_outer_tcp_csum(m0)); 6613 wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG; 6614 wr->u.tcpseg.ethlen = m0->m_pkthdr.l2hlen; 6615 wr->u.tcpseg.iplen = htobe16(m0->m_pkthdr.l3hlen); 6616 wr->u.tcpseg.tcplen = m0->m_pkthdr.l4hlen; 6617 wr->u.tcpseg.tsclk_tsoff = mbuf_eo_tsclk_tsoff(m0); 6618 wr->u.tcpseg.r4 = 0; 6619 wr->u.tcpseg.r5 = 0; 6620 wr->u.tcpseg.plen = htobe32(pktlen - immhdrs); 6621 6622 if (needs_tso(m0)) { 6623 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); 6624 6625 wr->u.tcpseg.mss = htobe16(m0->m_pkthdr.tso_segsz); 6626 6627 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | 6628 F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE | 6629 V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen - 6630 ETHER_HDR_LEN) >> 2) | 6631 V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) | 6632 V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); 6633 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 6634 ctrl |= F_LSO_IPV6; 6635 lso->lso_ctrl = htobe32(ctrl); 6636 lso->ipid_ofst = htobe16(0); 6637 lso->mss = htobe16(m0->m_pkthdr.tso_segsz); 6638 lso->seqno_offset = htobe32(0); 6639 lso->len = htobe32(pktlen); 6640 6641 cpl = (void *)(lso + 1); 6642 } else { 6643 wr->u.tcpseg.mss = htobe16(0xffff); 6644 cpl = (void *)(wr + 1); 6645 } 6646 } 6647 6648 /* Checksum offload must be requested for ethofld. */ 6649 MPASS(needs_outer_l4_csum(m0)); 6650 ctrl1 = csum_to_ctrl(cst->adapter, m0); 6651 6652 /* VLAN tag insertion */ 6653 if (needs_vlan_insertion(m0)) { 6654 ctrl1 |= F_TXPKT_VLAN_VLD | 6655 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 6656 } 6657 6658 /* CPL header */ 6659 cpl->ctrl0 = cst->ctrl0; 6660 cpl->pack = 0; 6661 cpl->len = htobe16(pktlen); 6662 cpl->ctrl1 = htobe64(ctrl1); 6663 6664 /* Copy Ethernet, IP & TCP/UDP hdrs as immediate data */ 6665 p = (uintptr_t)(cpl + 1); 6666 m_copydata(m0, 0, immhdrs, (void *)p); 6667 6668 /* SGL */ 6669 if (nsegs > 0) { 6670 int i, pad; 6671 6672 /* zero-pad upto next 16Byte boundary, if not 16Byte aligned */ 6673 p += immhdrs; 6674 pad = 16 - (immhdrs & 0xf); 6675 bzero((void *)p, pad); 6676 6677 usgl = (void *)(p + pad); 6678 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 6679 V_ULPTX_NSGE(nsegs)); 6680 6681 sglist_init(&sg, nitems(segs), segs); 6682 for (; m0 != NULL; m0 = m0->m_next) { 6683 if (__predict_false(m0->m_len == 0)) 6684 continue; 6685 if (immhdrs >= m0->m_len) { 6686 immhdrs -= m0->m_len; 6687 continue; 6688 } 6689 if (m0->m_flags & M_EXTPG) 6690 sglist_append_mbuf_epg(&sg, m0, 6691 mtod(m0, vm_offset_t), m0->m_len); 6692 else 6693 sglist_append(&sg, mtod(m0, char *) + immhdrs, 6694 m0->m_len - immhdrs); 6695 immhdrs = 0; 6696 } 6697 MPASS(sg.sg_nseg == nsegs); 6698 6699 /* 6700 * Zero pad last 8B in case the WR doesn't end on a 16B 6701 * boundary. 6702 */ 6703 *(uint64_t *)((char *)wr + len16 * 16 - 8) = 0; 6704 6705 usgl->len0 = htobe32(segs[0].ss_len); 6706 usgl->addr0 = htobe64(segs[0].ss_paddr); 6707 for (i = 0; i < nsegs - 1; i++) { 6708 usgl->sge[i / 2].len[i & 1] = htobe32(segs[i + 1].ss_len); 6709 usgl->sge[i / 2].addr[i & 1] = htobe64(segs[i + 1].ss_paddr); 6710 } 6711 if (i & 1) 6712 usgl->sge[i / 2].len[1] = htobe32(0); 6713 } 6714 6715 } 6716 6717 static void 6718 ethofld_tx(struct cxgbe_rate_tag *cst) 6719 { 6720 struct mbuf *m; 6721 struct wrq_cookie cookie; 6722 int next_credits, compl; 6723 struct fw_eth_tx_eo_wr *wr; 6724 6725 mtx_assert(&cst->lock, MA_OWNED); 6726 6727 while ((m = mbufq_first(&cst->pending_tx)) != NULL) { 6728 M_ASSERTPKTHDR(m); 6729 6730 /* How many len16 credits do we need to send this mbuf. */ 6731 next_credits = mbuf_eo_len16(m); 6732 MPASS(next_credits > 0); 6733 if (next_credits > cst->tx_credits) { 6734 /* 6735 * Tx will make progress eventually because there is at 6736 * least one outstanding fw4_ack that will return 6737 * credits and kick the tx. 6738 */ 6739 MPASS(cst->ncompl > 0); 6740 return; 6741 } 6742 wr = start_wrq_wr(&cst->eo_txq->wrq, next_credits, &cookie); 6743 if (__predict_false(wr == NULL)) { 6744 /* XXX: wishful thinking, not a real assertion. */ 6745 MPASS(cst->ncompl > 0); 6746 return; 6747 } 6748 cst->tx_credits -= next_credits; 6749 cst->tx_nocompl += next_credits; 6750 compl = cst->ncompl == 0 || cst->tx_nocompl >= cst->tx_total / 2; 6751 ETHER_BPF_MTAP(cst->com.ifp, m); 6752 write_ethofld_wr(cst, wr, m, compl); 6753 commit_wrq_wr(&cst->eo_txq->wrq, wr, &cookie); 6754 if (compl) { 6755 cst->ncompl++; 6756 cst->tx_nocompl = 0; 6757 } 6758 (void) mbufq_dequeue(&cst->pending_tx); 6759 6760 /* 6761 * Drop the mbuf's reference on the tag now rather 6762 * than waiting until m_freem(). This ensures that 6763 * cxgbe_rate_tag_free gets called when the inp drops 6764 * its reference on the tag and there are no more 6765 * mbufs in the pending_tx queue and can flush any 6766 * pending requests. Otherwise if the last mbuf 6767 * doesn't request a completion the etid will never be 6768 * released. 6769 */ 6770 m->m_pkthdr.snd_tag = NULL; 6771 m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; 6772 m_snd_tag_rele(&cst->com); 6773 6774 mbufq_enqueue(&cst->pending_fwack, m); 6775 } 6776 } 6777 6778 int 6779 ethofld_transmit(struct ifnet *ifp, struct mbuf *m0) 6780 { 6781 struct cxgbe_rate_tag *cst; 6782 int rc; 6783 6784 MPASS(m0->m_nextpkt == NULL); 6785 MPASS(m0->m_pkthdr.csum_flags & CSUM_SND_TAG); 6786 MPASS(m0->m_pkthdr.snd_tag != NULL); 6787 cst = mst_to_crt(m0->m_pkthdr.snd_tag); 6788 6789 mtx_lock(&cst->lock); 6790 MPASS(cst->flags & EO_SND_TAG_REF); 6791 6792 if (__predict_false(cst->flags & EO_FLOWC_PENDING)) { 6793 struct vi_info *vi = ifp->if_softc; 6794 struct port_info *pi = vi->pi; 6795 struct adapter *sc = pi->adapter; 6796 const uint32_t rss_mask = vi->rss_size - 1; 6797 uint32_t rss_hash; 6798 6799 cst->eo_txq = &sc->sge.ofld_txq[vi->first_ofld_txq]; 6800 if (M_HASHTYPE_ISHASH(m0)) 6801 rss_hash = m0->m_pkthdr.flowid; 6802 else 6803 rss_hash = arc4random(); 6804 /* We assume RSS hashing */ 6805 cst->iqid = vi->rss[rss_hash & rss_mask]; 6806 cst->eo_txq += rss_hash % vi->nofldtxq; 6807 rc = send_etid_flowc_wr(cst, pi, vi); 6808 if (rc != 0) 6809 goto done; 6810 } 6811 6812 if (__predict_false(cst->plen + m0->m_pkthdr.len > eo_max_backlog)) { 6813 rc = ENOBUFS; 6814 goto done; 6815 } 6816 6817 mbufq_enqueue(&cst->pending_tx, m0); 6818 cst->plen += m0->m_pkthdr.len; 6819 6820 /* 6821 * Hold an extra reference on the tag while generating work 6822 * requests to ensure that we don't try to free the tag during 6823 * ethofld_tx() in case we are sending the final mbuf after 6824 * the inp was freed. 6825 */ 6826 m_snd_tag_ref(&cst->com); 6827 ethofld_tx(cst); 6828 mtx_unlock(&cst->lock); 6829 m_snd_tag_rele(&cst->com); 6830 return (0); 6831 6832 done: 6833 mtx_unlock(&cst->lock); 6834 if (__predict_false(rc != 0)) 6835 m_freem(m0); 6836 return (rc); 6837 } 6838 6839 static int 6840 ethofld_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) 6841 { 6842 struct adapter *sc = iq->adapter; 6843 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 6844 struct mbuf *m; 6845 u_int etid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 6846 struct cxgbe_rate_tag *cst; 6847 uint8_t credits = cpl->credits; 6848 6849 cst = lookup_etid(sc, etid); 6850 mtx_lock(&cst->lock); 6851 if (__predict_false(cst->flags & EO_FLOWC_RPL_PENDING)) { 6852 MPASS(credits >= ETID_FLOWC_LEN16); 6853 credits -= ETID_FLOWC_LEN16; 6854 cst->flags &= ~EO_FLOWC_RPL_PENDING; 6855 } 6856 6857 KASSERT(cst->ncompl > 0, 6858 ("%s: etid %u (%p) wasn't expecting completion.", 6859 __func__, etid, cst)); 6860 cst->ncompl--; 6861 6862 while (credits > 0) { 6863 m = mbufq_dequeue(&cst->pending_fwack); 6864 if (__predict_false(m == NULL)) { 6865 /* 6866 * The remaining credits are for the final flush that 6867 * was issued when the tag was freed by the kernel. 6868 */ 6869 MPASS((cst->flags & 6870 (EO_FLUSH_RPL_PENDING | EO_SND_TAG_REF)) == 6871 EO_FLUSH_RPL_PENDING); 6872 MPASS(credits == ETID_FLUSH_LEN16); 6873 MPASS(cst->tx_credits + cpl->credits == cst->tx_total); 6874 MPASS(cst->ncompl == 0); 6875 6876 cst->flags &= ~EO_FLUSH_RPL_PENDING; 6877 cst->tx_credits += cpl->credits; 6878 cxgbe_rate_tag_free_locked(cst); 6879 return (0); /* cst is gone. */ 6880 } 6881 KASSERT(m != NULL, 6882 ("%s: too many credits (%u, %u)", __func__, cpl->credits, 6883 credits)); 6884 KASSERT(credits >= mbuf_eo_len16(m), 6885 ("%s: too few credits (%u, %u, %u)", __func__, 6886 cpl->credits, credits, mbuf_eo_len16(m))); 6887 credits -= mbuf_eo_len16(m); 6888 cst->plen -= m->m_pkthdr.len; 6889 m_freem(m); 6890 } 6891 6892 cst->tx_credits += cpl->credits; 6893 MPASS(cst->tx_credits <= cst->tx_total); 6894 6895 if (cst->flags & EO_SND_TAG_REF) { 6896 /* 6897 * As with ethofld_transmit(), hold an extra reference 6898 * so that the tag is stable across ethold_tx(). 6899 */ 6900 m_snd_tag_ref(&cst->com); 6901 m = mbufq_first(&cst->pending_tx); 6902 if (m != NULL && cst->tx_credits >= mbuf_eo_len16(m)) 6903 ethofld_tx(cst); 6904 mtx_unlock(&cst->lock); 6905 m_snd_tag_rele(&cst->com); 6906 } else { 6907 /* 6908 * There shouldn't be any pending packets if the tag 6909 * was freed by the kernel since any pending packet 6910 * should hold a reference to the tag. 6911 */ 6912 MPASS(mbufq_first(&cst->pending_tx) == NULL); 6913 mtx_unlock(&cst->lock); 6914 } 6915 6916 return (0); 6917 } 6918 #endif 6919