1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 Chelsio Communications, Inc. 5 * All rights reserved. 6 * Written by: Navdeep Parhar <np@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 #include "opt_kern_tls.h" 36 #include "opt_ratelimit.h" 37 38 #include <sys/types.h> 39 #include <sys/eventhandler.h> 40 #include <sys/mbuf.h> 41 #include <sys/socket.h> 42 #include <sys/kernel.h> 43 #include <sys/ktls.h> 44 #include <sys/malloc.h> 45 #include <sys/msan.h> 46 #include <sys/queue.h> 47 #include <sys/sbuf.h> 48 #include <sys/taskqueue.h> 49 #include <sys/time.h> 50 #include <sys/sglist.h> 51 #include <sys/sysctl.h> 52 #include <sys/smp.h> 53 #include <sys/socketvar.h> 54 #include <sys/counter.h> 55 #include <net/bpf.h> 56 #include <net/ethernet.h> 57 #include <net/if.h> 58 #include <net/if_vlan_var.h> 59 #include <net/if_vxlan.h> 60 #include <netinet/in.h> 61 #include <netinet/ip.h> 62 #include <netinet/ip6.h> 63 #include <netinet/tcp.h> 64 #include <netinet/udp.h> 65 #include <machine/in_cksum.h> 66 #include <machine/md_var.h> 67 #include <vm/vm.h> 68 #include <vm/pmap.h> 69 #ifdef DEV_NETMAP 70 #include <machine/bus.h> 71 #include <sys/selinfo.h> 72 #include <net/if_var.h> 73 #include <net/netmap.h> 74 #include <dev/netmap/netmap_kern.h> 75 #endif 76 77 #include "common/common.h" 78 #include "common/t4_regs.h" 79 #include "common/t4_regs_values.h" 80 #include "common/t4_msg.h" 81 #include "t4_l2t.h" 82 #include "t4_mp_ring.h" 83 84 #ifdef T4_PKT_TIMESTAMP 85 #define RX_COPY_THRESHOLD (MINCLSIZE - 8) 86 #else 87 #define RX_COPY_THRESHOLD MINCLSIZE 88 #endif 89 90 /* Internal mbuf flags stored in PH_loc.eight[1]. */ 91 #define MC_NOMAP 0x01 92 #define MC_RAW_WR 0x02 93 #define MC_TLS 0x04 94 95 /* 96 * Ethernet frames are DMA'd at this byte offset into the freelist buffer. 97 * 0-7 are valid values. 98 */ 99 static int fl_pktshift = 0; 100 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pktshift, CTLFLAG_RDTUN, &fl_pktshift, 0, 101 "payload DMA offset in rx buffer (bytes)"); 102 103 /* 104 * Pad ethernet payload up to this boundary. 105 * -1: driver should figure out a good value. 106 * 0: disable padding. 107 * Any power of 2 from 32 to 4096 (both inclusive) is also a valid value. 108 */ 109 int fl_pad = -1; 110 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pad, CTLFLAG_RDTUN, &fl_pad, 0, 111 "payload pad boundary (bytes)"); 112 113 /* 114 * Status page length. 115 * -1: driver should figure out a good value. 116 * 64 or 128 are the only other valid values. 117 */ 118 static int spg_len = -1; 119 SYSCTL_INT(_hw_cxgbe, OID_AUTO, spg_len, CTLFLAG_RDTUN, &spg_len, 0, 120 "status page size (bytes)"); 121 122 /* 123 * Congestion drops. 124 * -1: no congestion feedback (not recommended). 125 * 0: backpressure the channel instead of dropping packets right away. 126 * 1: no backpressure, drop packets for the congested queue immediately. 127 * 2: both backpressure and drop. 128 */ 129 static int cong_drop = 0; 130 SYSCTL_INT(_hw_cxgbe, OID_AUTO, cong_drop, CTLFLAG_RDTUN, &cong_drop, 0, 131 "Congestion control for NIC RX queues (0 = backpressure, 1 = drop, 2 = both"); 132 #ifdef TCP_OFFLOAD 133 static int ofld_cong_drop = 0; 134 SYSCTL_INT(_hw_cxgbe, OID_AUTO, ofld_cong_drop, CTLFLAG_RDTUN, &ofld_cong_drop, 0, 135 "Congestion control for TOE RX queues (0 = backpressure, 1 = drop, 2 = both"); 136 #endif 137 138 /* 139 * Deliver multiple frames in the same free list buffer if they fit. 140 * -1: let the driver decide whether to enable buffer packing or not. 141 * 0: disable buffer packing. 142 * 1: enable buffer packing. 143 */ 144 static int buffer_packing = -1; 145 SYSCTL_INT(_hw_cxgbe, OID_AUTO, buffer_packing, CTLFLAG_RDTUN, &buffer_packing, 146 0, "Enable buffer packing"); 147 148 /* 149 * Start next frame in a packed buffer at this boundary. 150 * -1: driver should figure out a good value. 151 * T4: driver will ignore this and use the same value as fl_pad above. 152 * T5: 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value. 153 */ 154 static int fl_pack = -1; 155 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pack, CTLFLAG_RDTUN, &fl_pack, 0, 156 "payload pack boundary (bytes)"); 157 158 /* 159 * Largest rx cluster size that the driver is allowed to allocate. 160 */ 161 static int largest_rx_cluster = MJUM16BYTES; 162 SYSCTL_INT(_hw_cxgbe, OID_AUTO, largest_rx_cluster, CTLFLAG_RDTUN, 163 &largest_rx_cluster, 0, "Largest rx cluster (bytes)"); 164 165 /* 166 * Size of cluster allocation that's most likely to succeed. The driver will 167 * fall back to this size if it fails to allocate clusters larger than this. 168 */ 169 static int safest_rx_cluster = PAGE_SIZE; 170 SYSCTL_INT(_hw_cxgbe, OID_AUTO, safest_rx_cluster, CTLFLAG_RDTUN, 171 &safest_rx_cluster, 0, "Safe rx cluster (bytes)"); 172 173 #ifdef RATELIMIT 174 /* 175 * Knob to control TCP timestamp rewriting, and the granularity of the tick used 176 * for rewriting. -1 and 0-3 are all valid values. 177 * -1: hardware should leave the TCP timestamps alone. 178 * 0: 1ms 179 * 1: 100us 180 * 2: 10us 181 * 3: 1us 182 */ 183 static int tsclk = -1; 184 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tsclk, CTLFLAG_RDTUN, &tsclk, 0, 185 "Control TCP timestamp rewriting when using pacing"); 186 187 static int eo_max_backlog = 1024 * 1024; 188 SYSCTL_INT(_hw_cxgbe, OID_AUTO, eo_max_backlog, CTLFLAG_RDTUN, &eo_max_backlog, 189 0, "Maximum backlog of ratelimited data per flow"); 190 #endif 191 192 /* 193 * The interrupt holdoff timers are multiplied by this value on T6+. 194 * 1 and 3-17 (both inclusive) are legal values. 195 */ 196 static int tscale = 1; 197 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tscale, CTLFLAG_RDTUN, &tscale, 0, 198 "Interrupt holdoff timer scale on T6+"); 199 200 /* 201 * Number of LRO entries in the lro_ctrl structure per rx queue. 202 */ 203 static int lro_entries = TCP_LRO_ENTRIES; 204 SYSCTL_INT(_hw_cxgbe, OID_AUTO, lro_entries, CTLFLAG_RDTUN, &lro_entries, 0, 205 "Number of LRO entries per RX queue"); 206 207 /* 208 * This enables presorting of frames before they're fed into tcp_lro_rx. 209 */ 210 static int lro_mbufs = 0; 211 SYSCTL_INT(_hw_cxgbe, OID_AUTO, lro_mbufs, CTLFLAG_RDTUN, &lro_mbufs, 0, 212 "Enable presorting of LRO frames"); 213 214 static counter_u64_t pullups; 215 SYSCTL_COUNTER_U64(_hw_cxgbe, OID_AUTO, pullups, CTLFLAG_RD, &pullups, 216 "Number of mbuf pullups performed"); 217 218 static counter_u64_t defrags; 219 SYSCTL_COUNTER_U64(_hw_cxgbe, OID_AUTO, defrags, CTLFLAG_RD, &defrags, 220 "Number of mbuf defrags performed"); 221 222 static int t4_tx_coalesce = 1; 223 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_coalesce, CTLFLAG_RWTUN, &t4_tx_coalesce, 0, 224 "tx coalescing allowed"); 225 226 /* 227 * The driver will make aggressive attempts at tx coalescing if it sees these 228 * many packets eligible for coalescing in quick succession, with no more than 229 * the specified gap in between the eth_tx calls that delivered the packets. 230 */ 231 static int t4_tx_coalesce_pkts = 32; 232 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_coalesce_pkts, CTLFLAG_RWTUN, 233 &t4_tx_coalesce_pkts, 0, 234 "# of consecutive packets (1 - 255) that will trigger tx coalescing"); 235 static int t4_tx_coalesce_gap = 5; 236 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_coalesce_gap, CTLFLAG_RWTUN, 237 &t4_tx_coalesce_gap, 0, "tx gap (in microseconds)"); 238 239 static int service_iq(struct sge_iq *, int); 240 static int service_iq_fl(struct sge_iq *, int); 241 static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t); 242 static int eth_rx(struct adapter *, struct sge_rxq *, const struct iq_desc *, 243 u_int); 244 static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int, 245 int, int, int); 246 static inline void init_fl(struct adapter *, struct sge_fl *, int, int, char *); 247 static inline void init_eq(struct adapter *, struct sge_eq *, int, int, uint8_t, 248 struct sge_iq *, char *); 249 static int alloc_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *, 250 struct sysctl_ctx_list *, struct sysctl_oid *); 251 static void free_iq_fl(struct adapter *, struct sge_iq *, struct sge_fl *); 252 static void add_iq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 253 struct sge_iq *); 254 static void add_fl_sysctls(struct adapter *, struct sysctl_ctx_list *, 255 struct sysctl_oid *, struct sge_fl *); 256 static int alloc_iq_fl_hwq(struct vi_info *, struct sge_iq *, struct sge_fl *); 257 static int free_iq_fl_hwq(struct adapter *, struct sge_iq *, struct sge_fl *); 258 static int alloc_fwq(struct adapter *); 259 static void free_fwq(struct adapter *); 260 static int alloc_ctrlq(struct adapter *, int); 261 static void free_ctrlq(struct adapter *, int); 262 static int alloc_rxq(struct vi_info *, struct sge_rxq *, int, int, int); 263 static void free_rxq(struct vi_info *, struct sge_rxq *); 264 static void add_rxq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 265 struct sge_rxq *); 266 #ifdef TCP_OFFLOAD 267 static int alloc_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *, int, int, 268 int); 269 static void free_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *); 270 static void add_ofld_rxq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 271 struct sge_ofld_rxq *); 272 #endif 273 static int ctrl_eq_alloc(struct adapter *, struct sge_eq *); 274 static int eth_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); 275 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 276 static int ofld_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); 277 #endif 278 static int alloc_eq(struct adapter *, struct sge_eq *, struct sysctl_ctx_list *, 279 struct sysctl_oid *); 280 static void free_eq(struct adapter *, struct sge_eq *); 281 static void add_eq_sysctls(struct adapter *, struct sysctl_ctx_list *, 282 struct sysctl_oid *, struct sge_eq *); 283 static int alloc_eq_hwq(struct adapter *, struct vi_info *, struct sge_eq *); 284 static int free_eq_hwq(struct adapter *, struct vi_info *, struct sge_eq *); 285 static int alloc_wrq(struct adapter *, struct vi_info *, struct sge_wrq *, 286 struct sysctl_ctx_list *, struct sysctl_oid *); 287 static void free_wrq(struct adapter *, struct sge_wrq *); 288 static void add_wrq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 289 struct sge_wrq *); 290 static int alloc_txq(struct vi_info *, struct sge_txq *, int); 291 static void free_txq(struct vi_info *, struct sge_txq *); 292 static void add_txq_sysctls(struct vi_info *, struct sysctl_ctx_list *, 293 struct sysctl_oid *, struct sge_txq *); 294 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 295 static int alloc_ofld_txq(struct vi_info *, struct sge_ofld_txq *, int); 296 static void free_ofld_txq(struct vi_info *, struct sge_ofld_txq *); 297 static void add_ofld_txq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 298 struct sge_ofld_txq *); 299 #endif 300 static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); 301 static inline void ring_fl_db(struct adapter *, struct sge_fl *); 302 static int refill_fl(struct adapter *, struct sge_fl *, int); 303 static void refill_sfl(void *); 304 static int find_refill_source(struct adapter *, int, bool); 305 static void add_fl_to_sfl(struct adapter *, struct sge_fl *); 306 307 static inline void get_pkt_gl(struct mbuf *, struct sglist *); 308 static inline u_int txpkt_len16(u_int, const u_int); 309 static inline u_int txpkt_vm_len16(u_int, const u_int); 310 static inline void calculate_mbuf_len16(struct mbuf *, bool); 311 static inline u_int txpkts0_len16(u_int); 312 static inline u_int txpkts1_len16(void); 313 static u_int write_raw_wr(struct sge_txq *, void *, struct mbuf *, u_int); 314 static u_int write_txpkt_wr(struct adapter *, struct sge_txq *, struct mbuf *, 315 u_int); 316 static u_int write_txpkt_vm_wr(struct adapter *, struct sge_txq *, 317 struct mbuf *); 318 static int add_to_txpkts_vf(struct adapter *, struct sge_txq *, struct mbuf *, 319 int, bool *); 320 static int add_to_txpkts_pf(struct adapter *, struct sge_txq *, struct mbuf *, 321 int, bool *); 322 static u_int write_txpkts_wr(struct adapter *, struct sge_txq *); 323 static u_int write_txpkts_vm_wr(struct adapter *, struct sge_txq *); 324 static void write_gl_to_txd(struct sge_txq *, struct mbuf *, caddr_t *, int); 325 static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int); 326 static inline void ring_eq_db(struct adapter *, struct sge_eq *, u_int); 327 static inline uint16_t read_hw_cidx(struct sge_eq *); 328 static inline u_int reclaimable_tx_desc(struct sge_eq *); 329 static inline u_int total_available_tx_desc(struct sge_eq *); 330 static u_int reclaim_tx_descs(struct sge_txq *, u_int); 331 static void tx_reclaim(void *, int); 332 static __be64 get_flit(struct sglist_seg *, int, int); 333 static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *, 334 struct mbuf *); 335 static int handle_fw_msg(struct sge_iq *, const struct rss_header *, 336 struct mbuf *); 337 static int t4_handle_wrerr_rpl(struct adapter *, const __be64 *); 338 static void wrq_tx_drain(void *, int); 339 static void drain_wrq_wr_list(struct adapter *, struct sge_wrq *); 340 341 static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS); 342 #ifdef RATELIMIT 343 #if defined(INET) || defined(INET6) 344 static inline u_int txpkt_eo_len16(u_int, u_int, u_int); 345 #endif 346 static int ethofld_fw4_ack(struct sge_iq *, const struct rss_header *, 347 struct mbuf *); 348 #endif 349 350 static counter_u64_t extfree_refs; 351 static counter_u64_t extfree_rels; 352 353 an_handler_t t4_an_handler; 354 fw_msg_handler_t t4_fw_msg_handler[NUM_FW6_TYPES]; 355 cpl_handler_t t4_cpl_handler[NUM_CPL_CMDS]; 356 cpl_handler_t set_tcb_rpl_handlers[NUM_CPL_COOKIES]; 357 cpl_handler_t l2t_write_rpl_handlers[NUM_CPL_COOKIES]; 358 cpl_handler_t act_open_rpl_handlers[NUM_CPL_COOKIES]; 359 cpl_handler_t abort_rpl_rss_handlers[NUM_CPL_COOKIES]; 360 cpl_handler_t fw4_ack_handlers[NUM_CPL_COOKIES]; 361 362 void 363 t4_register_an_handler(an_handler_t h) 364 { 365 uintptr_t *loc; 366 367 MPASS(h == NULL || t4_an_handler == NULL); 368 369 loc = (uintptr_t *)&t4_an_handler; 370 atomic_store_rel_ptr(loc, (uintptr_t)h); 371 } 372 373 void 374 t4_register_fw_msg_handler(int type, fw_msg_handler_t h) 375 { 376 uintptr_t *loc; 377 378 MPASS(type < nitems(t4_fw_msg_handler)); 379 MPASS(h == NULL || t4_fw_msg_handler[type] == NULL); 380 /* 381 * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL 382 * handler dispatch table. Reject any attempt to install a handler for 383 * this subtype. 384 */ 385 MPASS(type != FW_TYPE_RSSCPL); 386 MPASS(type != FW6_TYPE_RSSCPL); 387 388 loc = (uintptr_t *)&t4_fw_msg_handler[type]; 389 atomic_store_rel_ptr(loc, (uintptr_t)h); 390 } 391 392 void 393 t4_register_cpl_handler(int opcode, cpl_handler_t h) 394 { 395 uintptr_t *loc; 396 397 MPASS(opcode < nitems(t4_cpl_handler)); 398 MPASS(h == NULL || t4_cpl_handler[opcode] == NULL); 399 400 loc = (uintptr_t *)&t4_cpl_handler[opcode]; 401 atomic_store_rel_ptr(loc, (uintptr_t)h); 402 } 403 404 static int 405 set_tcb_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 406 struct mbuf *m) 407 { 408 const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1); 409 u_int tid; 410 int cookie; 411 412 MPASS(m == NULL); 413 414 tid = GET_TID(cpl); 415 if (is_hpftid(iq->adapter, tid) || is_ftid(iq->adapter, tid)) { 416 /* 417 * The return code for filter-write is put in the CPL cookie so 418 * we have to rely on the hardware tid (is_ftid) to determine 419 * that this is a response to a filter. 420 */ 421 cookie = CPL_COOKIE_FILTER; 422 } else { 423 cookie = G_COOKIE(cpl->cookie); 424 } 425 MPASS(cookie > CPL_COOKIE_RESERVED); 426 MPASS(cookie < nitems(set_tcb_rpl_handlers)); 427 428 return (set_tcb_rpl_handlers[cookie](iq, rss, m)); 429 } 430 431 static int 432 l2t_write_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 433 struct mbuf *m) 434 { 435 const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); 436 unsigned int cookie; 437 438 MPASS(m == NULL); 439 440 cookie = GET_TID(rpl) & F_SYNC_WR ? CPL_COOKIE_TOM : CPL_COOKIE_FILTER; 441 return (l2t_write_rpl_handlers[cookie](iq, rss, m)); 442 } 443 444 static int 445 act_open_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 446 struct mbuf *m) 447 { 448 const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1); 449 u_int cookie = G_TID_COOKIE(G_AOPEN_ATID(be32toh(cpl->atid_status))); 450 451 MPASS(m == NULL); 452 MPASS(cookie != CPL_COOKIE_RESERVED); 453 454 return (act_open_rpl_handlers[cookie](iq, rss, m)); 455 } 456 457 static int 458 abort_rpl_rss_handler(struct sge_iq *iq, const struct rss_header *rss, 459 struct mbuf *m) 460 { 461 struct adapter *sc = iq->adapter; 462 u_int cookie; 463 464 MPASS(m == NULL); 465 if (is_hashfilter(sc)) 466 cookie = CPL_COOKIE_HASHFILTER; 467 else 468 cookie = CPL_COOKIE_TOM; 469 470 return (abort_rpl_rss_handlers[cookie](iq, rss, m)); 471 } 472 473 static int 474 fw4_ack_handler(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 475 { 476 struct adapter *sc = iq->adapter; 477 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 478 unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 479 u_int cookie; 480 481 MPASS(m == NULL); 482 if (is_etid(sc, tid)) 483 cookie = CPL_COOKIE_ETHOFLD; 484 else 485 cookie = CPL_COOKIE_TOM; 486 487 return (fw4_ack_handlers[cookie](iq, rss, m)); 488 } 489 490 static void 491 t4_init_shared_cpl_handlers(void) 492 { 493 494 t4_register_cpl_handler(CPL_SET_TCB_RPL, set_tcb_rpl_handler); 495 t4_register_cpl_handler(CPL_L2T_WRITE_RPL, l2t_write_rpl_handler); 496 t4_register_cpl_handler(CPL_ACT_OPEN_RPL, act_open_rpl_handler); 497 t4_register_cpl_handler(CPL_ABORT_RPL_RSS, abort_rpl_rss_handler); 498 t4_register_cpl_handler(CPL_FW4_ACK, fw4_ack_handler); 499 } 500 501 void 502 t4_register_shared_cpl_handler(int opcode, cpl_handler_t h, int cookie) 503 { 504 uintptr_t *loc; 505 506 MPASS(opcode < nitems(t4_cpl_handler)); 507 MPASS(cookie > CPL_COOKIE_RESERVED); 508 MPASS(cookie < NUM_CPL_COOKIES); 509 MPASS(t4_cpl_handler[opcode] != NULL); 510 511 switch (opcode) { 512 case CPL_SET_TCB_RPL: 513 loc = (uintptr_t *)&set_tcb_rpl_handlers[cookie]; 514 break; 515 case CPL_L2T_WRITE_RPL: 516 loc = (uintptr_t *)&l2t_write_rpl_handlers[cookie]; 517 break; 518 case CPL_ACT_OPEN_RPL: 519 loc = (uintptr_t *)&act_open_rpl_handlers[cookie]; 520 break; 521 case CPL_ABORT_RPL_RSS: 522 loc = (uintptr_t *)&abort_rpl_rss_handlers[cookie]; 523 break; 524 case CPL_FW4_ACK: 525 loc = (uintptr_t *)&fw4_ack_handlers[cookie]; 526 break; 527 default: 528 MPASS(0); 529 return; 530 } 531 MPASS(h == NULL || *loc == (uintptr_t)NULL); 532 atomic_store_rel_ptr(loc, (uintptr_t)h); 533 } 534 535 /* 536 * Called on MOD_LOAD. Validates and calculates the SGE tunables. 537 */ 538 void 539 t4_sge_modload(void) 540 { 541 542 if (fl_pktshift < 0 || fl_pktshift > 7) { 543 printf("Invalid hw.cxgbe.fl_pktshift value (%d)," 544 " using 0 instead.\n", fl_pktshift); 545 fl_pktshift = 0; 546 } 547 548 if (spg_len != 64 && spg_len != 128) { 549 int len; 550 551 #if defined(__i386__) || defined(__amd64__) 552 len = cpu_clflush_line_size > 64 ? 128 : 64; 553 #else 554 len = 64; 555 #endif 556 if (spg_len != -1) { 557 printf("Invalid hw.cxgbe.spg_len value (%d)," 558 " using %d instead.\n", spg_len, len); 559 } 560 spg_len = len; 561 } 562 563 if (cong_drop < -1 || cong_drop > 2) { 564 printf("Invalid hw.cxgbe.cong_drop value (%d)," 565 " using 0 instead.\n", cong_drop); 566 cong_drop = 0; 567 } 568 #ifdef TCP_OFFLOAD 569 if (ofld_cong_drop < -1 || ofld_cong_drop > 2) { 570 printf("Invalid hw.cxgbe.ofld_cong_drop value (%d)," 571 " using 0 instead.\n", ofld_cong_drop); 572 ofld_cong_drop = 0; 573 } 574 #endif 575 576 if (tscale != 1 && (tscale < 3 || tscale > 17)) { 577 printf("Invalid hw.cxgbe.tscale value (%d)," 578 " using 1 instead.\n", tscale); 579 tscale = 1; 580 } 581 582 if (largest_rx_cluster != MCLBYTES && 583 largest_rx_cluster != MJUMPAGESIZE && 584 largest_rx_cluster != MJUM9BYTES && 585 largest_rx_cluster != MJUM16BYTES) { 586 printf("Invalid hw.cxgbe.largest_rx_cluster value (%d)," 587 " using %d instead.\n", largest_rx_cluster, MJUM16BYTES); 588 largest_rx_cluster = MJUM16BYTES; 589 } 590 591 if (safest_rx_cluster != MCLBYTES && 592 safest_rx_cluster != MJUMPAGESIZE && 593 safest_rx_cluster != MJUM9BYTES && 594 safest_rx_cluster != MJUM16BYTES) { 595 printf("Invalid hw.cxgbe.safest_rx_cluster value (%d)," 596 " using %d instead.\n", safest_rx_cluster, MJUMPAGESIZE); 597 safest_rx_cluster = MJUMPAGESIZE; 598 } 599 600 extfree_refs = counter_u64_alloc(M_WAITOK); 601 extfree_rels = counter_u64_alloc(M_WAITOK); 602 pullups = counter_u64_alloc(M_WAITOK); 603 defrags = counter_u64_alloc(M_WAITOK); 604 counter_u64_zero(extfree_refs); 605 counter_u64_zero(extfree_rels); 606 counter_u64_zero(pullups); 607 counter_u64_zero(defrags); 608 609 t4_init_shared_cpl_handlers(); 610 t4_register_cpl_handler(CPL_FW4_MSG, handle_fw_msg); 611 t4_register_cpl_handler(CPL_FW6_MSG, handle_fw_msg); 612 t4_register_cpl_handler(CPL_SGE_EGR_UPDATE, handle_sge_egr_update); 613 #ifdef RATELIMIT 614 t4_register_shared_cpl_handler(CPL_FW4_ACK, ethofld_fw4_ack, 615 CPL_COOKIE_ETHOFLD); 616 #endif 617 t4_register_fw_msg_handler(FW6_TYPE_CMD_RPL, t4_handle_fw_rpl); 618 t4_register_fw_msg_handler(FW6_TYPE_WRERR_RPL, t4_handle_wrerr_rpl); 619 } 620 621 void 622 t4_sge_modunload(void) 623 { 624 625 counter_u64_free(extfree_refs); 626 counter_u64_free(extfree_rels); 627 counter_u64_free(pullups); 628 counter_u64_free(defrags); 629 } 630 631 uint64_t 632 t4_sge_extfree_refs(void) 633 { 634 uint64_t refs, rels; 635 636 rels = counter_u64_fetch(extfree_rels); 637 refs = counter_u64_fetch(extfree_refs); 638 639 return (refs - rels); 640 } 641 642 /* max 4096 */ 643 #define MAX_PACK_BOUNDARY 512 644 645 static inline void 646 setup_pad_and_pack_boundaries(struct adapter *sc) 647 { 648 uint32_t v, m; 649 int pad, pack, pad_shift; 650 651 pad_shift = chip_id(sc) > CHELSIO_T5 ? X_T6_INGPADBOUNDARY_SHIFT : 652 X_INGPADBOUNDARY_SHIFT; 653 pad = fl_pad; 654 if (fl_pad < (1 << pad_shift) || 655 fl_pad > (1 << (pad_shift + M_INGPADBOUNDARY)) || 656 !powerof2(fl_pad)) { 657 /* 658 * If there is any chance that we might use buffer packing and 659 * the chip is a T4, then pick 64 as the pad/pack boundary. Set 660 * it to the minimum allowed in all other cases. 661 */ 662 pad = is_t4(sc) && buffer_packing ? 64 : 1 << pad_shift; 663 664 /* 665 * For fl_pad = 0 we'll still write a reasonable value to the 666 * register but all the freelists will opt out of padding. 667 * We'll complain here only if the user tried to set it to a 668 * value greater than 0 that was invalid. 669 */ 670 if (fl_pad > 0) { 671 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pad value" 672 " (%d), using %d instead.\n", fl_pad, pad); 673 } 674 } 675 m = V_INGPADBOUNDARY(M_INGPADBOUNDARY); 676 v = V_INGPADBOUNDARY(ilog2(pad) - pad_shift); 677 t4_set_reg_field(sc, A_SGE_CONTROL, m, v); 678 679 if (is_t4(sc)) { 680 if (fl_pack != -1 && fl_pack != pad) { 681 /* Complain but carry on. */ 682 device_printf(sc->dev, "hw.cxgbe.fl_pack (%d) ignored," 683 " using %d instead.\n", fl_pack, pad); 684 } 685 return; 686 } 687 688 pack = fl_pack; 689 if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 || 690 !powerof2(fl_pack)) { 691 if (sc->params.pci.mps > MAX_PACK_BOUNDARY) 692 pack = MAX_PACK_BOUNDARY; 693 else 694 pack = max(sc->params.pci.mps, CACHE_LINE_SIZE); 695 MPASS(powerof2(pack)); 696 if (pack < 16) 697 pack = 16; 698 if (pack == 32) 699 pack = 64; 700 if (pack > 4096) 701 pack = 4096; 702 if (fl_pack != -1) { 703 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pack value" 704 " (%d), using %d instead.\n", fl_pack, pack); 705 } 706 } 707 m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); 708 if (pack == 16) 709 v = V_INGPACKBOUNDARY(0); 710 else 711 v = V_INGPACKBOUNDARY(ilog2(pack) - 5); 712 713 MPASS(!is_t4(sc)); /* T4 doesn't have SGE_CONTROL2 */ 714 t4_set_reg_field(sc, A_SGE_CONTROL2, m, v); 715 } 716 717 /* 718 * adap->params.vpd.cclk must be set up before this is called. 719 */ 720 void 721 t4_tweak_chip_settings(struct adapter *sc) 722 { 723 int i, reg; 724 uint32_t v, m; 725 int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200}; 726 int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk; 727 int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ 728 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 729 static int sw_buf_sizes[] = { 730 MCLBYTES, 731 MJUMPAGESIZE, 732 MJUM9BYTES, 733 MJUM16BYTES 734 }; 735 736 KASSERT(sc->flags & MASTER_PF, 737 ("%s: trying to change chip settings when not master.", __func__)); 738 739 m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE; 740 v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE | 741 V_EGRSTATUSPAGESIZE(spg_len == 128); 742 t4_set_reg_field(sc, A_SGE_CONTROL, m, v); 743 744 setup_pad_and_pack_boundaries(sc); 745 746 v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | 747 V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | 748 V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) | 749 V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) | 750 V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) | 751 V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) | 752 V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) | 753 V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10); 754 t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v); 755 756 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0, 4096); 757 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE1, 65536); 758 reg = A_SGE_FL_BUFFER_SIZE2; 759 for (i = 0; i < nitems(sw_buf_sizes); i++) { 760 MPASS(reg <= A_SGE_FL_BUFFER_SIZE15); 761 t4_write_reg(sc, reg, sw_buf_sizes[i]); 762 reg += 4; 763 MPASS(reg <= A_SGE_FL_BUFFER_SIZE15); 764 t4_write_reg(sc, reg, sw_buf_sizes[i] - CL_METADATA_SIZE); 765 reg += 4; 766 } 767 768 v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) | 769 V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]); 770 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v); 771 772 KASSERT(intr_timer[0] <= timer_max, 773 ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0], 774 timer_max)); 775 for (i = 1; i < nitems(intr_timer); i++) { 776 KASSERT(intr_timer[i] >= intr_timer[i - 1], 777 ("%s: timers not listed in increasing order (%d)", 778 __func__, i)); 779 780 while (intr_timer[i] > timer_max) { 781 if (i == nitems(intr_timer) - 1) { 782 intr_timer[i] = timer_max; 783 break; 784 } 785 intr_timer[i] += intr_timer[i - 1]; 786 intr_timer[i] /= 2; 787 } 788 } 789 790 v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) | 791 V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1])); 792 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v); 793 v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) | 794 V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3])); 795 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v); 796 v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) | 797 V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5])); 798 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v); 799 800 if (chip_id(sc) >= CHELSIO_T6) { 801 m = V_TSCALE(M_TSCALE); 802 if (tscale == 1) 803 v = 0; 804 else 805 v = V_TSCALE(tscale - 2); 806 t4_set_reg_field(sc, A_SGE_ITP_CONTROL, m, v); 807 808 if (sc->debug_flags & DF_DISABLE_TCB_CACHE) { 809 m = V_RDTHRESHOLD(M_RDTHRESHOLD) | F_WRTHRTHRESHEN | 810 V_WRTHRTHRESH(M_WRTHRTHRESH); 811 t4_tp_pio_read(sc, &v, 1, A_TP_CMM_CONFIG, 1); 812 v &= ~m; 813 v |= V_RDTHRESHOLD(1) | F_WRTHRTHRESHEN | 814 V_WRTHRTHRESH(16); 815 t4_tp_pio_write(sc, &v, 1, A_TP_CMM_CONFIG, 1); 816 } 817 } 818 819 /* 4K, 16K, 64K, 256K DDP "page sizes" for TDDP */ 820 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 821 t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v); 822 823 /* 824 * 4K, 8K, 16K, 64K DDP "page sizes" for iSCSI DDP. These have been 825 * chosen with MAXPHYS = 128K in mind. The largest DDP buffer that we 826 * may have to deal with is MAXPHYS + 1 page. 827 */ 828 v = V_HPZ0(0) | V_HPZ1(1) | V_HPZ2(2) | V_HPZ3(4); 829 t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, v); 830 831 /* We use multiple DDP page sizes both in plain-TOE and ISCSI modes. */ 832 m = v = F_TDDPTAGTCB | F_ISCSITAGTCB; 833 t4_set_reg_field(sc, A_ULP_RX_CTL, m, v); 834 835 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 836 F_RESETDDPOFFSET; 837 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 838 t4_set_reg_field(sc, A_TP_PARA_REG5, m, v); 839 } 840 841 /* 842 * SGE wants the buffer to be at least 64B and then a multiple of 16. Its 843 * address mut be 16B aligned. If padding is in use the buffer's start and end 844 * need to be aligned to the pad boundary as well. We'll just make sure that 845 * the size is a multiple of the pad boundary here, it is up to the buffer 846 * allocation code to make sure the start of the buffer is aligned. 847 */ 848 static inline int 849 hwsz_ok(struct adapter *sc, int hwsz) 850 { 851 int mask = fl_pad ? sc->params.sge.pad_boundary - 1 : 16 - 1; 852 853 return (hwsz >= 64 && (hwsz & mask) == 0); 854 } 855 856 /* 857 * Initialize the rx buffer sizes and figure out which zones the buffers will 858 * be allocated from. 859 */ 860 void 861 t4_init_rx_buf_info(struct adapter *sc) 862 { 863 struct sge *s = &sc->sge; 864 struct sge_params *sp = &sc->params.sge; 865 int i, j, n; 866 static int sw_buf_sizes[] = { /* Sorted by size */ 867 MCLBYTES, 868 MJUMPAGESIZE, 869 MJUM9BYTES, 870 MJUM16BYTES 871 }; 872 struct rx_buf_info *rxb; 873 874 s->safe_zidx = -1; 875 rxb = &s->rx_buf_info[0]; 876 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 877 rxb->size1 = sw_buf_sizes[i]; 878 rxb->zone = m_getzone(rxb->size1); 879 rxb->type = m_gettype(rxb->size1); 880 rxb->size2 = 0; 881 rxb->hwidx1 = -1; 882 rxb->hwidx2 = -1; 883 for (j = 0; j < SGE_FLBUF_SIZES; j++) { 884 int hwsize = sp->sge_fl_buffer_size[j]; 885 886 if (!hwsz_ok(sc, hwsize)) 887 continue; 888 889 /* hwidx for size1 */ 890 if (rxb->hwidx1 == -1 && rxb->size1 == hwsize) 891 rxb->hwidx1 = j; 892 893 /* hwidx for size2 (buffer packing) */ 894 if (rxb->size1 - CL_METADATA_SIZE < hwsize) 895 continue; 896 n = rxb->size1 - hwsize - CL_METADATA_SIZE; 897 if (n == 0) { 898 rxb->hwidx2 = j; 899 rxb->size2 = hwsize; 900 break; /* stop looking */ 901 } 902 if (rxb->hwidx2 != -1) { 903 if (n < sp->sge_fl_buffer_size[rxb->hwidx2] - 904 hwsize - CL_METADATA_SIZE) { 905 rxb->hwidx2 = j; 906 rxb->size2 = hwsize; 907 } 908 } else if (n <= 2 * CL_METADATA_SIZE) { 909 rxb->hwidx2 = j; 910 rxb->size2 = hwsize; 911 } 912 } 913 if (rxb->hwidx2 != -1) 914 sc->flags |= BUF_PACKING_OK; 915 if (s->safe_zidx == -1 && rxb->size1 == safest_rx_cluster) 916 s->safe_zidx = i; 917 } 918 } 919 920 /* 921 * Verify some basic SGE settings for the PF and VF driver, and other 922 * miscellaneous settings for the PF driver. 923 */ 924 int 925 t4_verify_chip_settings(struct adapter *sc) 926 { 927 struct sge_params *sp = &sc->params.sge; 928 uint32_t m, v, r; 929 int rc = 0; 930 const uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 931 932 m = F_RXPKTCPLMODE; 933 v = F_RXPKTCPLMODE; 934 r = sp->sge_control; 935 if ((r & m) != v) { 936 device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r); 937 rc = EINVAL; 938 } 939 940 /* 941 * If this changes then every single use of PAGE_SHIFT in the driver 942 * needs to be carefully reviewed for PAGE_SHIFT vs sp->page_shift. 943 */ 944 if (sp->page_shift != PAGE_SHIFT) { 945 device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r); 946 rc = EINVAL; 947 } 948 949 if (sc->flags & IS_VF) 950 return (0); 951 952 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 953 r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ); 954 if (r != v) { 955 device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r); 956 if (sc->vres.ddp.size != 0) 957 rc = EINVAL; 958 } 959 960 m = v = F_TDDPTAGTCB; 961 r = t4_read_reg(sc, A_ULP_RX_CTL); 962 if ((r & m) != v) { 963 device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r); 964 if (sc->vres.ddp.size != 0) 965 rc = EINVAL; 966 } 967 968 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 969 F_RESETDDPOFFSET; 970 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 971 r = t4_read_reg(sc, A_TP_PARA_REG5); 972 if ((r & m) != v) { 973 device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r); 974 if (sc->vres.ddp.size != 0) 975 rc = EINVAL; 976 } 977 978 return (rc); 979 } 980 981 int 982 t4_create_dma_tag(struct adapter *sc) 983 { 984 int rc; 985 986 rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, 987 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 988 BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, 989 NULL, &sc->dmat); 990 if (rc != 0) { 991 device_printf(sc->dev, 992 "failed to create main DMA tag: %d\n", rc); 993 } 994 995 return (rc); 996 } 997 998 void 999 t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 1000 struct sysctl_oid_list *children) 1001 { 1002 struct sge_params *sp = &sc->params.sge; 1003 1004 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes", 1005 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1006 sysctl_bufsizes, "A", "freelist buffer sizes"); 1007 1008 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD, 1009 NULL, sp->fl_pktshift, "payload DMA offset in rx buffer (bytes)"); 1010 1011 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD, 1012 NULL, sp->pad_boundary, "payload pad boundary (bytes)"); 1013 1014 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD, 1015 NULL, sp->spg_len, "status page size (bytes)"); 1016 1017 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD, 1018 NULL, cong_drop, "congestion drop setting"); 1019 #ifdef TCP_OFFLOAD 1020 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ofld_cong_drop", CTLFLAG_RD, 1021 NULL, ofld_cong_drop, "congestion drop setting"); 1022 #endif 1023 1024 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD, 1025 NULL, sp->pack_boundary, "payload pack boundary (bytes)"); 1026 } 1027 1028 int 1029 t4_destroy_dma_tag(struct adapter *sc) 1030 { 1031 if (sc->dmat) 1032 bus_dma_tag_destroy(sc->dmat); 1033 1034 return (0); 1035 } 1036 1037 /* 1038 * Allocate and initialize the firmware event queue, control queues, and special 1039 * purpose rx queues owned by the adapter. 1040 * 1041 * Returns errno on failure. Resources allocated up to that point may still be 1042 * allocated. Caller is responsible for cleanup in case this function fails. 1043 */ 1044 int 1045 t4_setup_adapter_queues(struct adapter *sc) 1046 { 1047 int rc, i; 1048 1049 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 1050 1051 /* 1052 * Firmware event queue 1053 */ 1054 rc = alloc_fwq(sc); 1055 if (rc != 0) 1056 return (rc); 1057 1058 /* 1059 * That's all for the VF driver. 1060 */ 1061 if (sc->flags & IS_VF) 1062 return (rc); 1063 1064 /* 1065 * XXX: General purpose rx queues, one per port. 1066 */ 1067 1068 /* 1069 * Control queues, one per port. 1070 */ 1071 for_each_port(sc, i) { 1072 rc = alloc_ctrlq(sc, i); 1073 if (rc != 0) 1074 return (rc); 1075 } 1076 1077 return (rc); 1078 } 1079 1080 /* 1081 * Idempotent 1082 */ 1083 int 1084 t4_teardown_adapter_queues(struct adapter *sc) 1085 { 1086 int i; 1087 1088 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 1089 1090 if (sc->sge.ctrlq != NULL) { 1091 MPASS(!(sc->flags & IS_VF)); /* VFs don't allocate ctrlq. */ 1092 for_each_port(sc, i) 1093 free_ctrlq(sc, i); 1094 } 1095 free_fwq(sc); 1096 1097 return (0); 1098 } 1099 1100 /* Maximum payload that could arrive with a single iq descriptor. */ 1101 static inline int 1102 max_rx_payload(struct adapter *sc, struct ifnet *ifp, const bool ofld) 1103 { 1104 int maxp; 1105 1106 /* large enough even when hw VLAN extraction is disabled */ 1107 maxp = sc->params.sge.fl_pktshift + ETHER_HDR_LEN + 1108 ETHER_VLAN_ENCAP_LEN + ifp->if_mtu; 1109 if (ofld && sc->tt.tls && sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS && 1110 maxp < sc->params.tp.max_rx_pdu) 1111 maxp = sc->params.tp.max_rx_pdu; 1112 return (maxp); 1113 } 1114 1115 int 1116 t4_setup_vi_queues(struct vi_info *vi) 1117 { 1118 int rc = 0, i, intr_idx; 1119 struct sge_rxq *rxq; 1120 struct sge_txq *txq; 1121 #ifdef TCP_OFFLOAD 1122 struct sge_ofld_rxq *ofld_rxq; 1123 #endif 1124 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1125 struct sge_ofld_txq *ofld_txq; 1126 #endif 1127 #ifdef DEV_NETMAP 1128 int saved_idx, iqidx; 1129 struct sge_nm_rxq *nm_rxq; 1130 struct sge_nm_txq *nm_txq; 1131 #endif 1132 struct adapter *sc = vi->adapter; 1133 struct ifnet *ifp = vi->ifp; 1134 int maxp; 1135 1136 /* Interrupt vector to start from (when using multiple vectors) */ 1137 intr_idx = vi->first_intr; 1138 1139 #ifdef DEV_NETMAP 1140 saved_idx = intr_idx; 1141 if (ifp->if_capabilities & IFCAP_NETMAP) { 1142 1143 /* netmap is supported with direct interrupts only. */ 1144 MPASS(!forwarding_intr_to_fwq(sc)); 1145 MPASS(vi->first_intr >= 0); 1146 1147 /* 1148 * We don't have buffers to back the netmap rx queues 1149 * right now so we create the queues in a way that 1150 * doesn't set off any congestion signal in the chip. 1151 */ 1152 for_each_nm_rxq(vi, i, nm_rxq) { 1153 rc = alloc_nm_rxq(vi, nm_rxq, intr_idx, i); 1154 if (rc != 0) 1155 goto done; 1156 intr_idx++; 1157 } 1158 1159 for_each_nm_txq(vi, i, nm_txq) { 1160 iqidx = vi->first_nm_rxq + (i % vi->nnmrxq); 1161 rc = alloc_nm_txq(vi, nm_txq, iqidx, i); 1162 if (rc != 0) 1163 goto done; 1164 } 1165 } 1166 1167 /* Normal rx queues and netmap rx queues share the same interrupts. */ 1168 intr_idx = saved_idx; 1169 #endif 1170 1171 /* 1172 * Allocate rx queues first because a default iqid is required when 1173 * creating a tx queue. 1174 */ 1175 maxp = max_rx_payload(sc, ifp, false); 1176 for_each_rxq(vi, i, rxq) { 1177 rc = alloc_rxq(vi, rxq, i, intr_idx, maxp); 1178 if (rc != 0) 1179 goto done; 1180 if (!forwarding_intr_to_fwq(sc)) 1181 intr_idx++; 1182 } 1183 #ifdef DEV_NETMAP 1184 if (ifp->if_capabilities & IFCAP_NETMAP) 1185 intr_idx = saved_idx + max(vi->nrxq, vi->nnmrxq); 1186 #endif 1187 #ifdef TCP_OFFLOAD 1188 maxp = max_rx_payload(sc, ifp, true); 1189 for_each_ofld_rxq(vi, i, ofld_rxq) { 1190 rc = alloc_ofld_rxq(vi, ofld_rxq, i, intr_idx, maxp); 1191 if (rc != 0) 1192 goto done; 1193 if (!forwarding_intr_to_fwq(sc)) 1194 intr_idx++; 1195 } 1196 #endif 1197 1198 /* 1199 * Now the tx queues. 1200 */ 1201 for_each_txq(vi, i, txq) { 1202 rc = alloc_txq(vi, txq, i); 1203 if (rc != 0) 1204 goto done; 1205 } 1206 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1207 for_each_ofld_txq(vi, i, ofld_txq) { 1208 rc = alloc_ofld_txq(vi, ofld_txq, i); 1209 if (rc != 0) 1210 goto done; 1211 } 1212 #endif 1213 done: 1214 if (rc) 1215 t4_teardown_vi_queues(vi); 1216 1217 return (rc); 1218 } 1219 1220 /* 1221 * Idempotent 1222 */ 1223 int 1224 t4_teardown_vi_queues(struct vi_info *vi) 1225 { 1226 int i; 1227 struct sge_rxq *rxq; 1228 struct sge_txq *txq; 1229 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1230 struct sge_ofld_txq *ofld_txq; 1231 #endif 1232 #ifdef TCP_OFFLOAD 1233 struct sge_ofld_rxq *ofld_rxq; 1234 #endif 1235 #ifdef DEV_NETMAP 1236 struct sge_nm_rxq *nm_rxq; 1237 struct sge_nm_txq *nm_txq; 1238 #endif 1239 1240 #ifdef DEV_NETMAP 1241 if (vi->ifp->if_capabilities & IFCAP_NETMAP) { 1242 for_each_nm_txq(vi, i, nm_txq) { 1243 free_nm_txq(vi, nm_txq); 1244 } 1245 1246 for_each_nm_rxq(vi, i, nm_rxq) { 1247 free_nm_rxq(vi, nm_rxq); 1248 } 1249 } 1250 #endif 1251 1252 /* 1253 * Take down all the tx queues first, as they reference the rx queues 1254 * (for egress updates, etc.). 1255 */ 1256 1257 for_each_txq(vi, i, txq) { 1258 free_txq(vi, txq); 1259 } 1260 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1261 for_each_ofld_txq(vi, i, ofld_txq) { 1262 free_ofld_txq(vi, ofld_txq); 1263 } 1264 #endif 1265 1266 /* 1267 * Then take down the rx queues. 1268 */ 1269 1270 for_each_rxq(vi, i, rxq) { 1271 free_rxq(vi, rxq); 1272 } 1273 #ifdef TCP_OFFLOAD 1274 for_each_ofld_rxq(vi, i, ofld_rxq) { 1275 free_ofld_rxq(vi, ofld_rxq); 1276 } 1277 #endif 1278 1279 return (0); 1280 } 1281 1282 /* 1283 * Interrupt handler when the driver is using only 1 interrupt. This is a very 1284 * unusual scenario. 1285 * 1286 * a) Deals with errors, if any. 1287 * b) Services firmware event queue, which is taking interrupts for all other 1288 * queues. 1289 */ 1290 void 1291 t4_intr_all(void *arg) 1292 { 1293 struct adapter *sc = arg; 1294 struct sge_iq *fwq = &sc->sge.fwq; 1295 1296 MPASS(sc->intr_count == 1); 1297 1298 if (sc->intr_type == INTR_INTX) 1299 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); 1300 1301 t4_intr_err(arg); 1302 t4_intr_evt(fwq); 1303 } 1304 1305 /* 1306 * Interrupt handler for errors (installed directly when multiple interrupts are 1307 * being used, or called by t4_intr_all). 1308 */ 1309 void 1310 t4_intr_err(void *arg) 1311 { 1312 struct adapter *sc = arg; 1313 uint32_t v; 1314 const bool verbose = (sc->debug_flags & DF_VERBOSE_SLOWINTR) != 0; 1315 1316 if (atomic_load_int(&sc->error_flags) & ADAP_FATAL_ERR) 1317 return; 1318 1319 v = t4_read_reg(sc, MYPF_REG(A_PL_PF_INT_CAUSE)); 1320 if (v & F_PFSW) { 1321 sc->swintr++; 1322 t4_write_reg(sc, MYPF_REG(A_PL_PF_INT_CAUSE), v); 1323 } 1324 1325 if (t4_slow_intr_handler(sc, verbose)) 1326 t4_fatal_err(sc, false); 1327 } 1328 1329 /* 1330 * Interrupt handler for iq-only queues. The firmware event queue is the only 1331 * such queue right now. 1332 */ 1333 void 1334 t4_intr_evt(void *arg) 1335 { 1336 struct sge_iq *iq = arg; 1337 1338 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1339 service_iq(iq, 0); 1340 (void) atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1341 } 1342 } 1343 1344 /* 1345 * Interrupt handler for iq+fl queues. 1346 */ 1347 void 1348 t4_intr(void *arg) 1349 { 1350 struct sge_iq *iq = arg; 1351 1352 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1353 service_iq_fl(iq, 0); 1354 (void) atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1355 } 1356 } 1357 1358 #ifdef DEV_NETMAP 1359 /* 1360 * Interrupt handler for netmap rx queues. 1361 */ 1362 void 1363 t4_nm_intr(void *arg) 1364 { 1365 struct sge_nm_rxq *nm_rxq = arg; 1366 1367 if (atomic_cmpset_int(&nm_rxq->nm_state, NM_ON, NM_BUSY)) { 1368 service_nm_rxq(nm_rxq); 1369 (void) atomic_cmpset_int(&nm_rxq->nm_state, NM_BUSY, NM_ON); 1370 } 1371 } 1372 1373 /* 1374 * Interrupt handler for vectors shared between NIC and netmap rx queues. 1375 */ 1376 void 1377 t4_vi_intr(void *arg) 1378 { 1379 struct irq *irq = arg; 1380 1381 MPASS(irq->nm_rxq != NULL); 1382 t4_nm_intr(irq->nm_rxq); 1383 1384 MPASS(irq->rxq != NULL); 1385 t4_intr(irq->rxq); 1386 } 1387 #endif 1388 1389 /* 1390 * Deals with interrupts on an iq-only (no freelist) queue. 1391 */ 1392 static int 1393 service_iq(struct sge_iq *iq, int budget) 1394 { 1395 struct sge_iq *q; 1396 struct adapter *sc = iq->adapter; 1397 struct iq_desc *d = &iq->desc[iq->cidx]; 1398 int ndescs = 0, limit; 1399 int rsp_type; 1400 uint32_t lq; 1401 STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql); 1402 1403 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); 1404 KASSERT((iq->flags & IQ_HAS_FL) == 0, 1405 ("%s: called for iq %p with fl (iq->flags 0x%x)", __func__, iq, 1406 iq->flags)); 1407 MPASS((iq->flags & IQ_ADJ_CREDIT) == 0); 1408 MPASS((iq->flags & IQ_LRO_ENABLED) == 0); 1409 1410 limit = budget ? budget : iq->qsize / 16; 1411 1412 /* 1413 * We always come back and check the descriptor ring for new indirect 1414 * interrupts and other responses after running a single handler. 1415 */ 1416 for (;;) { 1417 while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { 1418 1419 rmb(); 1420 1421 rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); 1422 lq = be32toh(d->rsp.pldbuflen_qid); 1423 1424 switch (rsp_type) { 1425 case X_RSPD_TYPE_FLBUF: 1426 panic("%s: data for an iq (%p) with no freelist", 1427 __func__, iq); 1428 1429 /* NOTREACHED */ 1430 1431 case X_RSPD_TYPE_CPL: 1432 KASSERT(d->rss.opcode < NUM_CPL_CMDS, 1433 ("%s: bad opcode %02x.", __func__, 1434 d->rss.opcode)); 1435 t4_cpl_handler[d->rss.opcode](iq, &d->rss, NULL); 1436 break; 1437 1438 case X_RSPD_TYPE_INTR: 1439 /* 1440 * There are 1K interrupt-capable queues (qids 0 1441 * through 1023). A response type indicating a 1442 * forwarded interrupt with a qid >= 1K is an 1443 * iWARP async notification. 1444 */ 1445 if (__predict_true(lq >= 1024)) { 1446 t4_an_handler(iq, &d->rsp); 1447 break; 1448 } 1449 1450 q = sc->sge.iqmap[lq - sc->sge.iq_start - 1451 sc->sge.iq_base]; 1452 if (atomic_cmpset_int(&q->state, IQS_IDLE, 1453 IQS_BUSY)) { 1454 if (service_iq_fl(q, q->qsize / 16) == 0) { 1455 (void) atomic_cmpset_int(&q->state, 1456 IQS_BUSY, IQS_IDLE); 1457 } else { 1458 STAILQ_INSERT_TAIL(&iql, q, 1459 link); 1460 } 1461 } 1462 break; 1463 1464 default: 1465 KASSERT(0, 1466 ("%s: illegal response type %d on iq %p", 1467 __func__, rsp_type, iq)); 1468 log(LOG_ERR, 1469 "%s: illegal response type %d on iq %p", 1470 device_get_nameunit(sc->dev), rsp_type, iq); 1471 break; 1472 } 1473 1474 d++; 1475 if (__predict_false(++iq->cidx == iq->sidx)) { 1476 iq->cidx = 0; 1477 iq->gen ^= F_RSPD_GEN; 1478 d = &iq->desc[0]; 1479 } 1480 if (__predict_false(++ndescs == limit)) { 1481 t4_write_reg(sc, sc->sge_gts_reg, 1482 V_CIDXINC(ndescs) | 1483 V_INGRESSQID(iq->cntxt_id) | 1484 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 1485 ndescs = 0; 1486 1487 if (budget) { 1488 return (EINPROGRESS); 1489 } 1490 } 1491 } 1492 1493 if (STAILQ_EMPTY(&iql)) 1494 break; 1495 1496 /* 1497 * Process the head only, and send it to the back of the list if 1498 * it's still not done. 1499 */ 1500 q = STAILQ_FIRST(&iql); 1501 STAILQ_REMOVE_HEAD(&iql, link); 1502 if (service_iq_fl(q, q->qsize / 8) == 0) 1503 (void) atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE); 1504 else 1505 STAILQ_INSERT_TAIL(&iql, q, link); 1506 } 1507 1508 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1509 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 1510 1511 return (0); 1512 } 1513 1514 #if defined(INET) || defined(INET6) 1515 static inline int 1516 sort_before_lro(struct lro_ctrl *lro) 1517 { 1518 1519 return (lro->lro_mbuf_max != 0); 1520 } 1521 #endif 1522 1523 #define CGBE_SHIFT_SCALE 10 1524 1525 static inline uint64_t 1526 t4_tstmp_to_ns(struct adapter *sc, uint64_t lf) 1527 { 1528 struct clock_sync *cur, dcur; 1529 uint64_t hw_clocks; 1530 uint64_t hw_clk_div; 1531 sbintime_t sbt_cur_to_prev, sbt; 1532 uint64_t hw_tstmp = lf & 0xfffffffffffffffULL; /* 60b, not 64b. */ 1533 seqc_t gen; 1534 1535 for (;;) { 1536 cur = &sc->cal_info[sc->cal_current]; 1537 gen = seqc_read(&cur->gen); 1538 if (gen == 0) 1539 return (0); 1540 dcur = *cur; 1541 if (seqc_consistent(&cur->gen, gen)) 1542 break; 1543 } 1544 1545 /* 1546 * Our goal here is to have a result that is: 1547 * 1548 * ( (cur_time - prev_time) ) 1549 * ((hw_tstmp - hw_prev) * ----------------------------- ) + prev_time 1550 * ( (hw_cur - hw_prev) ) 1551 * 1552 * With the constraints that we cannot use float and we 1553 * don't want to overflow the uint64_t numbers we are using. 1554 */ 1555 hw_clocks = hw_tstmp - dcur.hw_prev; 1556 sbt_cur_to_prev = (dcur.sbt_cur - dcur.sbt_prev); 1557 hw_clk_div = dcur.hw_cur - dcur.hw_prev; 1558 sbt = hw_clocks * sbt_cur_to_prev / hw_clk_div + dcur.sbt_prev; 1559 return (sbttons(sbt)); 1560 } 1561 1562 static inline void 1563 move_to_next_rxbuf(struct sge_fl *fl) 1564 { 1565 1566 fl->rx_offset = 0; 1567 if (__predict_false((++fl->cidx & 7) == 0)) { 1568 uint16_t cidx = fl->cidx >> 3; 1569 1570 if (__predict_false(cidx == fl->sidx)) 1571 fl->cidx = cidx = 0; 1572 fl->hw_cidx = cidx; 1573 } 1574 } 1575 1576 /* 1577 * Deals with interrupts on an iq+fl queue. 1578 */ 1579 static int 1580 service_iq_fl(struct sge_iq *iq, int budget) 1581 { 1582 struct sge_rxq *rxq = iq_to_rxq(iq); 1583 struct sge_fl *fl; 1584 struct adapter *sc = iq->adapter; 1585 struct iq_desc *d = &iq->desc[iq->cidx]; 1586 int ndescs, limit; 1587 int rsp_type, starved; 1588 uint32_t lq; 1589 uint16_t fl_hw_cidx; 1590 struct mbuf *m0; 1591 #if defined(INET) || defined(INET6) 1592 const struct timeval lro_timeout = {0, sc->lro_timeout}; 1593 struct lro_ctrl *lro = &rxq->lro; 1594 #endif 1595 1596 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); 1597 MPASS(iq->flags & IQ_HAS_FL); 1598 1599 ndescs = 0; 1600 #if defined(INET) || defined(INET6) 1601 if (iq->flags & IQ_ADJ_CREDIT) { 1602 MPASS(sort_before_lro(lro)); 1603 iq->flags &= ~IQ_ADJ_CREDIT; 1604 if ((d->rsp.u.type_gen & F_RSPD_GEN) != iq->gen) { 1605 tcp_lro_flush_all(lro); 1606 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(1) | 1607 V_INGRESSQID((u32)iq->cntxt_id) | 1608 V_SEINTARM(iq->intr_params)); 1609 return (0); 1610 } 1611 ndescs = 1; 1612 } 1613 #else 1614 MPASS((iq->flags & IQ_ADJ_CREDIT) == 0); 1615 #endif 1616 1617 limit = budget ? budget : iq->qsize / 16; 1618 fl = &rxq->fl; 1619 fl_hw_cidx = fl->hw_cidx; /* stable snapshot */ 1620 while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { 1621 1622 rmb(); 1623 1624 m0 = NULL; 1625 rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); 1626 lq = be32toh(d->rsp.pldbuflen_qid); 1627 1628 switch (rsp_type) { 1629 case X_RSPD_TYPE_FLBUF: 1630 if (lq & F_RSPD_NEWBUF) { 1631 if (fl->rx_offset > 0) 1632 move_to_next_rxbuf(fl); 1633 lq = G_RSPD_LEN(lq); 1634 } 1635 if (IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 4) { 1636 FL_LOCK(fl); 1637 refill_fl(sc, fl, 64); 1638 FL_UNLOCK(fl); 1639 fl_hw_cidx = fl->hw_cidx; 1640 } 1641 1642 if (d->rss.opcode == CPL_RX_PKT) { 1643 if (__predict_true(eth_rx(sc, rxq, d, lq) == 0)) 1644 break; 1645 goto out; 1646 } 1647 m0 = get_fl_payload(sc, fl, lq); 1648 if (__predict_false(m0 == NULL)) 1649 goto out; 1650 1651 /* fall through */ 1652 1653 case X_RSPD_TYPE_CPL: 1654 KASSERT(d->rss.opcode < NUM_CPL_CMDS, 1655 ("%s: bad opcode %02x.", __func__, d->rss.opcode)); 1656 t4_cpl_handler[d->rss.opcode](iq, &d->rss, m0); 1657 break; 1658 1659 case X_RSPD_TYPE_INTR: 1660 1661 /* 1662 * There are 1K interrupt-capable queues (qids 0 1663 * through 1023). A response type indicating a 1664 * forwarded interrupt with a qid >= 1K is an 1665 * iWARP async notification. That is the only 1666 * acceptable indirect interrupt on this queue. 1667 */ 1668 if (__predict_false(lq < 1024)) { 1669 panic("%s: indirect interrupt on iq_fl %p " 1670 "with qid %u", __func__, iq, lq); 1671 } 1672 1673 t4_an_handler(iq, &d->rsp); 1674 break; 1675 1676 default: 1677 KASSERT(0, ("%s: illegal response type %d on iq %p", 1678 __func__, rsp_type, iq)); 1679 log(LOG_ERR, "%s: illegal response type %d on iq %p", 1680 device_get_nameunit(sc->dev), rsp_type, iq); 1681 break; 1682 } 1683 1684 d++; 1685 if (__predict_false(++iq->cidx == iq->sidx)) { 1686 iq->cidx = 0; 1687 iq->gen ^= F_RSPD_GEN; 1688 d = &iq->desc[0]; 1689 } 1690 if (__predict_false(++ndescs == limit)) { 1691 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1692 V_INGRESSQID(iq->cntxt_id) | 1693 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 1694 1695 #if defined(INET) || defined(INET6) 1696 if (iq->flags & IQ_LRO_ENABLED && 1697 !sort_before_lro(lro) && 1698 sc->lro_timeout != 0) { 1699 tcp_lro_flush_inactive(lro, &lro_timeout); 1700 } 1701 #endif 1702 if (budget) 1703 return (EINPROGRESS); 1704 ndescs = 0; 1705 } 1706 } 1707 out: 1708 #if defined(INET) || defined(INET6) 1709 if (iq->flags & IQ_LRO_ENABLED) { 1710 if (ndescs > 0 && lro->lro_mbuf_count > 8) { 1711 MPASS(sort_before_lro(lro)); 1712 /* hold back one credit and don't flush LRO state */ 1713 iq->flags |= IQ_ADJ_CREDIT; 1714 ndescs--; 1715 } else { 1716 tcp_lro_flush_all(lro); 1717 } 1718 } 1719 #endif 1720 1721 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1722 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 1723 1724 FL_LOCK(fl); 1725 starved = refill_fl(sc, fl, 64); 1726 FL_UNLOCK(fl); 1727 if (__predict_false(starved != 0)) 1728 add_fl_to_sfl(sc, fl); 1729 1730 return (0); 1731 } 1732 1733 static inline struct cluster_metadata * 1734 cl_metadata(struct fl_sdesc *sd) 1735 { 1736 1737 return ((void *)(sd->cl + sd->moff)); 1738 } 1739 1740 static void 1741 rxb_free(struct mbuf *m) 1742 { 1743 struct cluster_metadata *clm = m->m_ext.ext_arg1; 1744 1745 uma_zfree(clm->zone, clm->cl); 1746 counter_u64_add(extfree_rels, 1); 1747 } 1748 1749 /* 1750 * The mbuf returned comes from zone_muf and carries the payload in one of these 1751 * ways 1752 * a) complete frame inside the mbuf 1753 * b) m_cljset (for clusters without metadata) 1754 * d) m_extaddref (cluster with metadata) 1755 */ 1756 static struct mbuf * 1757 get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset, 1758 int remaining) 1759 { 1760 struct mbuf *m; 1761 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1762 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx]; 1763 struct cluster_metadata *clm; 1764 int len, blen; 1765 caddr_t payload; 1766 1767 if (fl->flags & FL_BUF_PACKING) { 1768 u_int l, pad; 1769 1770 blen = rxb->size2 - fl->rx_offset; /* max possible in this buf */ 1771 len = min(remaining, blen); 1772 payload = sd->cl + fl->rx_offset; 1773 1774 l = fr_offset + len; 1775 pad = roundup2(l, fl->buf_boundary) - l; 1776 if (fl->rx_offset + len + pad < rxb->size2) 1777 blen = len + pad; 1778 MPASS(fl->rx_offset + blen <= rxb->size2); 1779 } else { 1780 MPASS(fl->rx_offset == 0); /* not packing */ 1781 blen = rxb->size1; 1782 len = min(remaining, blen); 1783 payload = sd->cl; 1784 } 1785 1786 if (fr_offset == 0) { 1787 m = m_gethdr(M_NOWAIT, MT_DATA); 1788 if (__predict_false(m == NULL)) 1789 return (NULL); 1790 m->m_pkthdr.len = remaining; 1791 } else { 1792 m = m_get(M_NOWAIT, MT_DATA); 1793 if (__predict_false(m == NULL)) 1794 return (NULL); 1795 } 1796 m->m_len = len; 1797 kmsan_mark(payload, len, KMSAN_STATE_INITED); 1798 1799 if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) { 1800 /* copy data to mbuf */ 1801 bcopy(payload, mtod(m, caddr_t), len); 1802 if (fl->flags & FL_BUF_PACKING) { 1803 fl->rx_offset += blen; 1804 MPASS(fl->rx_offset <= rxb->size2); 1805 if (fl->rx_offset < rxb->size2) 1806 return (m); /* without advancing the cidx */ 1807 } 1808 } else if (fl->flags & FL_BUF_PACKING) { 1809 clm = cl_metadata(sd); 1810 if (sd->nmbuf++ == 0) { 1811 clm->refcount = 1; 1812 clm->zone = rxb->zone; 1813 clm->cl = sd->cl; 1814 counter_u64_add(extfree_refs, 1); 1815 } 1816 m_extaddref(m, payload, blen, &clm->refcount, rxb_free, clm, 1817 NULL); 1818 1819 fl->rx_offset += blen; 1820 MPASS(fl->rx_offset <= rxb->size2); 1821 if (fl->rx_offset < rxb->size2) 1822 return (m); /* without advancing the cidx */ 1823 } else { 1824 m_cljset(m, sd->cl, rxb->type); 1825 sd->cl = NULL; /* consumed, not a recycle candidate */ 1826 } 1827 1828 move_to_next_rxbuf(fl); 1829 1830 return (m); 1831 } 1832 1833 static struct mbuf * 1834 get_fl_payload(struct adapter *sc, struct sge_fl *fl, const u_int plen) 1835 { 1836 struct mbuf *m0, *m, **pnext; 1837 u_int remaining; 1838 1839 if (__predict_false(fl->flags & FL_BUF_RESUME)) { 1840 M_ASSERTPKTHDR(fl->m0); 1841 MPASS(fl->m0->m_pkthdr.len == plen); 1842 MPASS(fl->remaining < plen); 1843 1844 m0 = fl->m0; 1845 pnext = fl->pnext; 1846 remaining = fl->remaining; 1847 fl->flags &= ~FL_BUF_RESUME; 1848 goto get_segment; 1849 } 1850 1851 /* 1852 * Payload starts at rx_offset in the current hw buffer. Its length is 1853 * 'len' and it may span multiple hw buffers. 1854 */ 1855 1856 m0 = get_scatter_segment(sc, fl, 0, plen); 1857 if (m0 == NULL) 1858 return (NULL); 1859 remaining = plen - m0->m_len; 1860 pnext = &m0->m_next; 1861 while (remaining > 0) { 1862 get_segment: 1863 MPASS(fl->rx_offset == 0); 1864 m = get_scatter_segment(sc, fl, plen - remaining, remaining); 1865 if (__predict_false(m == NULL)) { 1866 fl->m0 = m0; 1867 fl->pnext = pnext; 1868 fl->remaining = remaining; 1869 fl->flags |= FL_BUF_RESUME; 1870 return (NULL); 1871 } 1872 *pnext = m; 1873 pnext = &m->m_next; 1874 remaining -= m->m_len; 1875 } 1876 *pnext = NULL; 1877 1878 M_ASSERTPKTHDR(m0); 1879 return (m0); 1880 } 1881 1882 static int 1883 skip_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset, 1884 int remaining) 1885 { 1886 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1887 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx]; 1888 int len, blen; 1889 1890 if (fl->flags & FL_BUF_PACKING) { 1891 u_int l, pad; 1892 1893 blen = rxb->size2 - fl->rx_offset; /* max possible in this buf */ 1894 len = min(remaining, blen); 1895 1896 l = fr_offset + len; 1897 pad = roundup2(l, fl->buf_boundary) - l; 1898 if (fl->rx_offset + len + pad < rxb->size2) 1899 blen = len + pad; 1900 fl->rx_offset += blen; 1901 MPASS(fl->rx_offset <= rxb->size2); 1902 if (fl->rx_offset < rxb->size2) 1903 return (len); /* without advancing the cidx */ 1904 } else { 1905 MPASS(fl->rx_offset == 0); /* not packing */ 1906 blen = rxb->size1; 1907 len = min(remaining, blen); 1908 } 1909 move_to_next_rxbuf(fl); 1910 return (len); 1911 } 1912 1913 static inline void 1914 skip_fl_payload(struct adapter *sc, struct sge_fl *fl, int plen) 1915 { 1916 int remaining, fr_offset, len; 1917 1918 fr_offset = 0; 1919 remaining = plen; 1920 while (remaining > 0) { 1921 len = skip_scatter_segment(sc, fl, fr_offset, remaining); 1922 fr_offset += len; 1923 remaining -= len; 1924 } 1925 } 1926 1927 static inline int 1928 get_segment_len(struct adapter *sc, struct sge_fl *fl, int plen) 1929 { 1930 int len; 1931 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1932 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx]; 1933 1934 if (fl->flags & FL_BUF_PACKING) 1935 len = rxb->size2 - fl->rx_offset; 1936 else 1937 len = rxb->size1; 1938 1939 return (min(plen, len)); 1940 } 1941 1942 static int 1943 eth_rx(struct adapter *sc, struct sge_rxq *rxq, const struct iq_desc *d, 1944 u_int plen) 1945 { 1946 struct mbuf *m0; 1947 struct ifnet *ifp = rxq->ifp; 1948 struct sge_fl *fl = &rxq->fl; 1949 struct vi_info *vi = ifp->if_softc; 1950 const struct cpl_rx_pkt *cpl; 1951 #if defined(INET) || defined(INET6) 1952 struct lro_ctrl *lro = &rxq->lro; 1953 #endif 1954 uint16_t err_vec, tnl_type, tnlhdr_len; 1955 static const int sw_hashtype[4][2] = { 1956 {M_HASHTYPE_NONE, M_HASHTYPE_NONE}, 1957 {M_HASHTYPE_RSS_IPV4, M_HASHTYPE_RSS_IPV6}, 1958 {M_HASHTYPE_RSS_TCP_IPV4, M_HASHTYPE_RSS_TCP_IPV6}, 1959 {M_HASHTYPE_RSS_UDP_IPV4, M_HASHTYPE_RSS_UDP_IPV6}, 1960 }; 1961 static const int sw_csum_flags[2][2] = { 1962 { 1963 /* IP, inner IP */ 1964 CSUM_ENCAP_VXLAN | 1965 CSUM_L3_CALC | CSUM_L3_VALID | 1966 CSUM_L4_CALC | CSUM_L4_VALID | 1967 CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID | 1968 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1969 1970 /* IP, inner IP6 */ 1971 CSUM_ENCAP_VXLAN | 1972 CSUM_L3_CALC | CSUM_L3_VALID | 1973 CSUM_L4_CALC | CSUM_L4_VALID | 1974 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1975 }, 1976 { 1977 /* IP6, inner IP */ 1978 CSUM_ENCAP_VXLAN | 1979 CSUM_L4_CALC | CSUM_L4_VALID | 1980 CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID | 1981 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1982 1983 /* IP6, inner IP6 */ 1984 CSUM_ENCAP_VXLAN | 1985 CSUM_L4_CALC | CSUM_L4_VALID | 1986 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1987 }, 1988 }; 1989 1990 MPASS(plen > sc->params.sge.fl_pktshift); 1991 if (vi->pfil != NULL && PFIL_HOOKED_IN(vi->pfil) && 1992 __predict_true((fl->flags & FL_BUF_RESUME) == 0)) { 1993 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1994 caddr_t frame; 1995 int rc, slen; 1996 1997 slen = get_segment_len(sc, fl, plen) - 1998 sc->params.sge.fl_pktshift; 1999 frame = sd->cl + fl->rx_offset + sc->params.sge.fl_pktshift; 2000 CURVNET_SET_QUIET(ifp->if_vnet); 2001 rc = pfil_mem_in(vi->pfil, frame, slen, ifp, &m0); 2002 CURVNET_RESTORE(); 2003 if (rc == PFIL_DROPPED || rc == PFIL_CONSUMED) { 2004 skip_fl_payload(sc, fl, plen); 2005 return (0); 2006 } 2007 if (rc == PFIL_REALLOCED) { 2008 skip_fl_payload(sc, fl, plen); 2009 goto have_mbuf; 2010 } 2011 } 2012 2013 m0 = get_fl_payload(sc, fl, plen); 2014 if (__predict_false(m0 == NULL)) 2015 return (ENOMEM); 2016 2017 m0->m_pkthdr.len -= sc->params.sge.fl_pktshift; 2018 m0->m_len -= sc->params.sge.fl_pktshift; 2019 m0->m_data += sc->params.sge.fl_pktshift; 2020 2021 have_mbuf: 2022 m0->m_pkthdr.rcvif = ifp; 2023 M_HASHTYPE_SET(m0, sw_hashtype[d->rss.hash_type][d->rss.ipv6]); 2024 m0->m_pkthdr.flowid = be32toh(d->rss.hash_val); 2025 2026 cpl = (const void *)(&d->rss + 1); 2027 if (sc->params.tp.rx_pkt_encap) { 2028 const uint16_t ev = be16toh(cpl->err_vec); 2029 2030 err_vec = G_T6_COMPR_RXERR_VEC(ev); 2031 tnl_type = G_T6_RX_TNL_TYPE(ev); 2032 tnlhdr_len = G_T6_RX_TNLHDR_LEN(ev); 2033 } else { 2034 err_vec = be16toh(cpl->err_vec); 2035 tnl_type = 0; 2036 tnlhdr_len = 0; 2037 } 2038 if (cpl->csum_calc && err_vec == 0) { 2039 int ipv6 = !!(cpl->l2info & htobe32(F_RXF_IP6)); 2040 2041 /* checksum(s) calculated and found to be correct. */ 2042 2043 MPASS((cpl->l2info & htobe32(F_RXF_IP)) ^ 2044 (cpl->l2info & htobe32(F_RXF_IP6))); 2045 m0->m_pkthdr.csum_data = be16toh(cpl->csum); 2046 if (tnl_type == 0) { 2047 if (!ipv6 && ifp->if_capenable & IFCAP_RXCSUM) { 2048 m0->m_pkthdr.csum_flags = CSUM_L3_CALC | 2049 CSUM_L3_VALID | CSUM_L4_CALC | 2050 CSUM_L4_VALID; 2051 } else if (ipv6 && ifp->if_capenable & IFCAP_RXCSUM_IPV6) { 2052 m0->m_pkthdr.csum_flags = CSUM_L4_CALC | 2053 CSUM_L4_VALID; 2054 } 2055 rxq->rxcsum++; 2056 } else { 2057 MPASS(tnl_type == RX_PKT_TNL_TYPE_VXLAN); 2058 2059 M_HASHTYPE_SETINNER(m0); 2060 if (__predict_false(cpl->ip_frag)) { 2061 /* 2062 * csum_data is for the inner frame (which is an 2063 * IP fragment) and is not 0xffff. There is no 2064 * way to pass the inner csum_data to the stack. 2065 * We don't want the stack to use the inner 2066 * csum_data to validate the outer frame or it 2067 * will get rejected. So we fix csum_data here 2068 * and let sw do the checksum of inner IP 2069 * fragments. 2070 * 2071 * XXX: Need 32b for csum_data2 in an rx mbuf. 2072 * Maybe stuff it into rcv_tstmp? 2073 */ 2074 m0->m_pkthdr.csum_data = 0xffff; 2075 if (ipv6) { 2076 m0->m_pkthdr.csum_flags = CSUM_L4_CALC | 2077 CSUM_L4_VALID; 2078 } else { 2079 m0->m_pkthdr.csum_flags = CSUM_L3_CALC | 2080 CSUM_L3_VALID | CSUM_L4_CALC | 2081 CSUM_L4_VALID; 2082 } 2083 } else { 2084 int outer_ipv6; 2085 2086 MPASS(m0->m_pkthdr.csum_data == 0xffff); 2087 2088 outer_ipv6 = tnlhdr_len >= 2089 sizeof(struct ether_header) + 2090 sizeof(struct ip6_hdr); 2091 m0->m_pkthdr.csum_flags = 2092 sw_csum_flags[outer_ipv6][ipv6]; 2093 } 2094 rxq->vxlan_rxcsum++; 2095 } 2096 } 2097 2098 if (cpl->vlan_ex) { 2099 m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); 2100 m0->m_flags |= M_VLANTAG; 2101 rxq->vlan_extraction++; 2102 } 2103 2104 if (rxq->iq.flags & IQ_RX_TIMESTAMP) { 2105 /* 2106 * Fill up rcv_tstmp but do not set M_TSTMP as 2107 * long as we get a non-zero back from t4_tstmp_to_ns(). 2108 */ 2109 m0->m_pkthdr.rcv_tstmp = t4_tstmp_to_ns(sc, 2110 be64toh(d->rsp.u.last_flit)); 2111 if (m0->m_pkthdr.rcv_tstmp != 0) 2112 m0->m_flags |= M_TSTMP; 2113 } 2114 2115 #ifdef NUMA 2116 m0->m_pkthdr.numa_domain = ifp->if_numa_domain; 2117 #endif 2118 #if defined(INET) || defined(INET6) 2119 if (rxq->iq.flags & IQ_LRO_ENABLED && tnl_type == 0 && 2120 (M_HASHTYPE_GET(m0) == M_HASHTYPE_RSS_TCP_IPV4 || 2121 M_HASHTYPE_GET(m0) == M_HASHTYPE_RSS_TCP_IPV6)) { 2122 if (sort_before_lro(lro)) { 2123 tcp_lro_queue_mbuf(lro, m0); 2124 return (0); /* queued for sort, then LRO */ 2125 } 2126 if (tcp_lro_rx(lro, m0, 0) == 0) 2127 return (0); /* queued for LRO */ 2128 } 2129 #endif 2130 ifp->if_input(ifp, m0); 2131 2132 return (0); 2133 } 2134 2135 /* 2136 * Must drain the wrq or make sure that someone else will. 2137 */ 2138 static void 2139 wrq_tx_drain(void *arg, int n) 2140 { 2141 struct sge_wrq *wrq = arg; 2142 struct sge_eq *eq = &wrq->eq; 2143 2144 EQ_LOCK(eq); 2145 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 2146 drain_wrq_wr_list(wrq->adapter, wrq); 2147 EQ_UNLOCK(eq); 2148 } 2149 2150 static void 2151 drain_wrq_wr_list(struct adapter *sc, struct sge_wrq *wrq) 2152 { 2153 struct sge_eq *eq = &wrq->eq; 2154 u_int available, dbdiff; /* # of hardware descriptors */ 2155 u_int n; 2156 struct wrqe *wr; 2157 struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ 2158 2159 EQ_LOCK_ASSERT_OWNED(eq); 2160 MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs)); 2161 wr = STAILQ_FIRST(&wrq->wr_list); 2162 MPASS(wr != NULL); /* Must be called with something useful to do */ 2163 MPASS(eq->pidx == eq->dbidx); 2164 dbdiff = 0; 2165 2166 do { 2167 eq->cidx = read_hw_cidx(eq); 2168 if (eq->pidx == eq->cidx) 2169 available = eq->sidx - 1; 2170 else 2171 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2172 2173 MPASS(wr->wrq == wrq); 2174 n = howmany(wr->wr_len, EQ_ESIZE); 2175 if (available < n) 2176 break; 2177 2178 dst = (void *)&eq->desc[eq->pidx]; 2179 if (__predict_true(eq->sidx - eq->pidx > n)) { 2180 /* Won't wrap, won't end exactly at the status page. */ 2181 bcopy(&wr->wr[0], dst, wr->wr_len); 2182 eq->pidx += n; 2183 } else { 2184 int first_portion = (eq->sidx - eq->pidx) * EQ_ESIZE; 2185 2186 bcopy(&wr->wr[0], dst, first_portion); 2187 if (wr->wr_len > first_portion) { 2188 bcopy(&wr->wr[first_portion], &eq->desc[0], 2189 wr->wr_len - first_portion); 2190 } 2191 eq->pidx = n - (eq->sidx - eq->pidx); 2192 } 2193 wrq->tx_wrs_copied++; 2194 2195 if (available < eq->sidx / 4 && 2196 atomic_cmpset_int(&eq->equiq, 0, 1)) { 2197 /* 2198 * XXX: This is not 100% reliable with some 2199 * types of WRs. But this is a very unusual 2200 * situation for an ofld/ctrl queue anyway. 2201 */ 2202 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 2203 F_FW_WR_EQUEQ); 2204 } 2205 2206 dbdiff += n; 2207 if (dbdiff >= 16) { 2208 ring_eq_db(sc, eq, dbdiff); 2209 dbdiff = 0; 2210 } 2211 2212 STAILQ_REMOVE_HEAD(&wrq->wr_list, link); 2213 free_wrqe(wr); 2214 MPASS(wrq->nwr_pending > 0); 2215 wrq->nwr_pending--; 2216 MPASS(wrq->ndesc_needed >= n); 2217 wrq->ndesc_needed -= n; 2218 } while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL); 2219 2220 if (dbdiff) 2221 ring_eq_db(sc, eq, dbdiff); 2222 } 2223 2224 /* 2225 * Doesn't fail. Holds on to work requests it can't send right away. 2226 */ 2227 void 2228 t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr) 2229 { 2230 #ifdef INVARIANTS 2231 struct sge_eq *eq = &wrq->eq; 2232 #endif 2233 2234 EQ_LOCK_ASSERT_OWNED(eq); 2235 MPASS(wr != NULL); 2236 MPASS(wr->wr_len > 0 && wr->wr_len <= SGE_MAX_WR_LEN); 2237 MPASS((wr->wr_len & 0x7) == 0); 2238 2239 STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link); 2240 wrq->nwr_pending++; 2241 wrq->ndesc_needed += howmany(wr->wr_len, EQ_ESIZE); 2242 2243 if (!TAILQ_EMPTY(&wrq->incomplete_wrs)) 2244 return; /* commit_wrq_wr will drain wr_list as well. */ 2245 2246 drain_wrq_wr_list(sc, wrq); 2247 2248 /* Doorbell must have caught up to the pidx. */ 2249 MPASS(eq->pidx == eq->dbidx); 2250 } 2251 2252 void 2253 t4_update_fl_bufsize(struct ifnet *ifp) 2254 { 2255 struct vi_info *vi = ifp->if_softc; 2256 struct adapter *sc = vi->adapter; 2257 struct sge_rxq *rxq; 2258 #ifdef TCP_OFFLOAD 2259 struct sge_ofld_rxq *ofld_rxq; 2260 #endif 2261 struct sge_fl *fl; 2262 int i, maxp; 2263 2264 maxp = max_rx_payload(sc, ifp, false); 2265 for_each_rxq(vi, i, rxq) { 2266 fl = &rxq->fl; 2267 2268 FL_LOCK(fl); 2269 fl->zidx = find_refill_source(sc, maxp, 2270 fl->flags & FL_BUF_PACKING); 2271 FL_UNLOCK(fl); 2272 } 2273 #ifdef TCP_OFFLOAD 2274 maxp = max_rx_payload(sc, ifp, true); 2275 for_each_ofld_rxq(vi, i, ofld_rxq) { 2276 fl = &ofld_rxq->fl; 2277 2278 FL_LOCK(fl); 2279 fl->zidx = find_refill_source(sc, maxp, 2280 fl->flags & FL_BUF_PACKING); 2281 FL_UNLOCK(fl); 2282 } 2283 #endif 2284 } 2285 2286 static inline int 2287 mbuf_nsegs(struct mbuf *m) 2288 { 2289 2290 M_ASSERTPKTHDR(m); 2291 KASSERT(m->m_pkthdr.inner_l5hlen > 0, 2292 ("%s: mbuf %p missing information on # of segments.", __func__, m)); 2293 2294 return (m->m_pkthdr.inner_l5hlen); 2295 } 2296 2297 static inline void 2298 set_mbuf_nsegs(struct mbuf *m, uint8_t nsegs) 2299 { 2300 2301 M_ASSERTPKTHDR(m); 2302 m->m_pkthdr.inner_l5hlen = nsegs; 2303 } 2304 2305 static inline int 2306 mbuf_cflags(struct mbuf *m) 2307 { 2308 2309 M_ASSERTPKTHDR(m); 2310 return (m->m_pkthdr.PH_loc.eight[4]); 2311 } 2312 2313 static inline void 2314 set_mbuf_cflags(struct mbuf *m, uint8_t flags) 2315 { 2316 2317 M_ASSERTPKTHDR(m); 2318 m->m_pkthdr.PH_loc.eight[4] = flags; 2319 } 2320 2321 static inline int 2322 mbuf_len16(struct mbuf *m) 2323 { 2324 int n; 2325 2326 M_ASSERTPKTHDR(m); 2327 n = m->m_pkthdr.PH_loc.eight[0]; 2328 if (!(mbuf_cflags(m) & MC_TLS)) 2329 MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); 2330 2331 return (n); 2332 } 2333 2334 static inline void 2335 set_mbuf_len16(struct mbuf *m, uint8_t len16) 2336 { 2337 2338 M_ASSERTPKTHDR(m); 2339 if (!(mbuf_cflags(m) & MC_TLS)) 2340 MPASS(len16 > 0 && len16 <= SGE_MAX_WR_LEN / 16); 2341 m->m_pkthdr.PH_loc.eight[0] = len16; 2342 } 2343 2344 #ifdef RATELIMIT 2345 static inline int 2346 mbuf_eo_nsegs(struct mbuf *m) 2347 { 2348 2349 M_ASSERTPKTHDR(m); 2350 return (m->m_pkthdr.PH_loc.eight[1]); 2351 } 2352 2353 #if defined(INET) || defined(INET6) 2354 static inline void 2355 set_mbuf_eo_nsegs(struct mbuf *m, uint8_t nsegs) 2356 { 2357 2358 M_ASSERTPKTHDR(m); 2359 m->m_pkthdr.PH_loc.eight[1] = nsegs; 2360 } 2361 #endif 2362 2363 static inline int 2364 mbuf_eo_len16(struct mbuf *m) 2365 { 2366 int n; 2367 2368 M_ASSERTPKTHDR(m); 2369 n = m->m_pkthdr.PH_loc.eight[2]; 2370 MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); 2371 2372 return (n); 2373 } 2374 2375 #if defined(INET) || defined(INET6) 2376 static inline void 2377 set_mbuf_eo_len16(struct mbuf *m, uint8_t len16) 2378 { 2379 2380 M_ASSERTPKTHDR(m); 2381 m->m_pkthdr.PH_loc.eight[2] = len16; 2382 } 2383 #endif 2384 2385 static inline int 2386 mbuf_eo_tsclk_tsoff(struct mbuf *m) 2387 { 2388 2389 M_ASSERTPKTHDR(m); 2390 return (m->m_pkthdr.PH_loc.eight[3]); 2391 } 2392 2393 #if defined(INET) || defined(INET6) 2394 static inline void 2395 set_mbuf_eo_tsclk_tsoff(struct mbuf *m, uint8_t tsclk_tsoff) 2396 { 2397 2398 M_ASSERTPKTHDR(m); 2399 m->m_pkthdr.PH_loc.eight[3] = tsclk_tsoff; 2400 } 2401 #endif 2402 2403 static inline int 2404 needs_eo(struct m_snd_tag *mst) 2405 { 2406 2407 return (mst != NULL && mst->sw->type == IF_SND_TAG_TYPE_RATE_LIMIT); 2408 } 2409 #endif 2410 2411 /* 2412 * Try to allocate an mbuf to contain a raw work request. To make it 2413 * easy to construct the work request, don't allocate a chain but a 2414 * single mbuf. 2415 */ 2416 struct mbuf * 2417 alloc_wr_mbuf(int len, int how) 2418 { 2419 struct mbuf *m; 2420 2421 if (len <= MHLEN) 2422 m = m_gethdr(how, MT_DATA); 2423 else if (len <= MCLBYTES) 2424 m = m_getcl(how, MT_DATA, M_PKTHDR); 2425 else 2426 m = NULL; 2427 if (m == NULL) 2428 return (NULL); 2429 m->m_pkthdr.len = len; 2430 m->m_len = len; 2431 set_mbuf_cflags(m, MC_RAW_WR); 2432 set_mbuf_len16(m, howmany(len, 16)); 2433 return (m); 2434 } 2435 2436 static inline bool 2437 needs_hwcsum(struct mbuf *m) 2438 { 2439 const uint32_t csum_flags = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP | 2440 CSUM_IP_TSO | CSUM_INNER_IP | CSUM_INNER_IP_UDP | 2441 CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | CSUM_IP6_UDP | 2442 CSUM_IP6_TCP | CSUM_IP6_TSO | CSUM_INNER_IP6_UDP | 2443 CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO; 2444 2445 M_ASSERTPKTHDR(m); 2446 2447 return (m->m_pkthdr.csum_flags & csum_flags); 2448 } 2449 2450 static inline bool 2451 needs_tso(struct mbuf *m) 2452 { 2453 const uint32_t csum_flags = CSUM_IP_TSO | CSUM_IP6_TSO | 2454 CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO; 2455 2456 M_ASSERTPKTHDR(m); 2457 2458 return (m->m_pkthdr.csum_flags & csum_flags); 2459 } 2460 2461 static inline bool 2462 needs_vxlan_csum(struct mbuf *m) 2463 { 2464 2465 M_ASSERTPKTHDR(m); 2466 2467 return (m->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN); 2468 } 2469 2470 static inline bool 2471 needs_vxlan_tso(struct mbuf *m) 2472 { 2473 const uint32_t csum_flags = CSUM_ENCAP_VXLAN | CSUM_INNER_IP_TSO | 2474 CSUM_INNER_IP6_TSO; 2475 2476 M_ASSERTPKTHDR(m); 2477 2478 return ((m->m_pkthdr.csum_flags & csum_flags) != 0 && 2479 (m->m_pkthdr.csum_flags & csum_flags) != CSUM_ENCAP_VXLAN); 2480 } 2481 2482 #if defined(INET) || defined(INET6) 2483 static inline bool 2484 needs_inner_tcp_csum(struct mbuf *m) 2485 { 2486 const uint32_t csum_flags = CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO; 2487 2488 M_ASSERTPKTHDR(m); 2489 2490 return (m->m_pkthdr.csum_flags & csum_flags); 2491 } 2492 #endif 2493 2494 static inline bool 2495 needs_l3_csum(struct mbuf *m) 2496 { 2497 const uint32_t csum_flags = CSUM_IP | CSUM_IP_TSO | CSUM_INNER_IP | 2498 CSUM_INNER_IP_TSO; 2499 2500 M_ASSERTPKTHDR(m); 2501 2502 return (m->m_pkthdr.csum_flags & csum_flags); 2503 } 2504 2505 static inline bool 2506 needs_outer_tcp_csum(struct mbuf *m) 2507 { 2508 const uint32_t csum_flags = CSUM_IP_TCP | CSUM_IP_TSO | CSUM_IP6_TCP | 2509 CSUM_IP6_TSO; 2510 2511 M_ASSERTPKTHDR(m); 2512 2513 return (m->m_pkthdr.csum_flags & csum_flags); 2514 } 2515 2516 #ifdef RATELIMIT 2517 static inline bool 2518 needs_outer_l4_csum(struct mbuf *m) 2519 { 2520 const uint32_t csum_flags = CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP_TSO | 2521 CSUM_IP6_UDP | CSUM_IP6_TCP | CSUM_IP6_TSO; 2522 2523 M_ASSERTPKTHDR(m); 2524 2525 return (m->m_pkthdr.csum_flags & csum_flags); 2526 } 2527 2528 static inline bool 2529 needs_outer_udp_csum(struct mbuf *m) 2530 { 2531 const uint32_t csum_flags = CSUM_IP_UDP | CSUM_IP6_UDP; 2532 2533 M_ASSERTPKTHDR(m); 2534 2535 return (m->m_pkthdr.csum_flags & csum_flags); 2536 } 2537 #endif 2538 2539 static inline bool 2540 needs_vlan_insertion(struct mbuf *m) 2541 { 2542 2543 M_ASSERTPKTHDR(m); 2544 2545 return (m->m_flags & M_VLANTAG); 2546 } 2547 2548 #if defined(INET) || defined(INET6) 2549 static void * 2550 m_advance(struct mbuf **pm, int *poffset, int len) 2551 { 2552 struct mbuf *m = *pm; 2553 int offset = *poffset; 2554 uintptr_t p = 0; 2555 2556 MPASS(len > 0); 2557 2558 for (;;) { 2559 if (offset + len < m->m_len) { 2560 offset += len; 2561 p = mtod(m, uintptr_t) + offset; 2562 break; 2563 } 2564 len -= m->m_len - offset; 2565 m = m->m_next; 2566 offset = 0; 2567 MPASS(m != NULL); 2568 } 2569 *poffset = offset; 2570 *pm = m; 2571 return ((void *)p); 2572 } 2573 #endif 2574 2575 static inline int 2576 count_mbuf_ext_pgs(struct mbuf *m, int skip, vm_paddr_t *nextaddr) 2577 { 2578 vm_paddr_t paddr; 2579 int i, len, off, pglen, pgoff, seglen, segoff; 2580 int nsegs = 0; 2581 2582 M_ASSERTEXTPG(m); 2583 off = mtod(m, vm_offset_t); 2584 len = m->m_len; 2585 off += skip; 2586 len -= skip; 2587 2588 if (m->m_epg_hdrlen != 0) { 2589 if (off >= m->m_epg_hdrlen) { 2590 off -= m->m_epg_hdrlen; 2591 } else { 2592 seglen = m->m_epg_hdrlen - off; 2593 segoff = off; 2594 seglen = min(seglen, len); 2595 off = 0; 2596 len -= seglen; 2597 paddr = pmap_kextract( 2598 (vm_offset_t)&m->m_epg_hdr[segoff]); 2599 if (*nextaddr != paddr) 2600 nsegs++; 2601 *nextaddr = paddr + seglen; 2602 } 2603 } 2604 pgoff = m->m_epg_1st_off; 2605 for (i = 0; i < m->m_epg_npgs && len > 0; i++) { 2606 pglen = m_epg_pagelen(m, i, pgoff); 2607 if (off >= pglen) { 2608 off -= pglen; 2609 pgoff = 0; 2610 continue; 2611 } 2612 seglen = pglen - off; 2613 segoff = pgoff + off; 2614 off = 0; 2615 seglen = min(seglen, len); 2616 len -= seglen; 2617 paddr = m->m_epg_pa[i] + segoff; 2618 if (*nextaddr != paddr) 2619 nsegs++; 2620 *nextaddr = paddr + seglen; 2621 pgoff = 0; 2622 }; 2623 if (len != 0) { 2624 seglen = min(len, m->m_epg_trllen - off); 2625 len -= seglen; 2626 paddr = pmap_kextract((vm_offset_t)&m->m_epg_trail[off]); 2627 if (*nextaddr != paddr) 2628 nsegs++; 2629 *nextaddr = paddr + seglen; 2630 } 2631 2632 return (nsegs); 2633 } 2634 2635 2636 /* 2637 * Can deal with empty mbufs in the chain that have m_len = 0, but the chain 2638 * must have at least one mbuf that's not empty. It is possible for this 2639 * routine to return 0 if skip accounts for all the contents of the mbuf chain. 2640 */ 2641 static inline int 2642 count_mbuf_nsegs(struct mbuf *m, int skip, uint8_t *cflags) 2643 { 2644 vm_paddr_t nextaddr, paddr; 2645 vm_offset_t va; 2646 int len, nsegs; 2647 2648 M_ASSERTPKTHDR(m); 2649 MPASS(m->m_pkthdr.len > 0); 2650 MPASS(m->m_pkthdr.len >= skip); 2651 2652 nsegs = 0; 2653 nextaddr = 0; 2654 for (; m; m = m->m_next) { 2655 len = m->m_len; 2656 if (__predict_false(len == 0)) 2657 continue; 2658 if (skip >= len) { 2659 skip -= len; 2660 continue; 2661 } 2662 if ((m->m_flags & M_EXTPG) != 0) { 2663 *cflags |= MC_NOMAP; 2664 nsegs += count_mbuf_ext_pgs(m, skip, &nextaddr); 2665 skip = 0; 2666 continue; 2667 } 2668 va = mtod(m, vm_offset_t) + skip; 2669 len -= skip; 2670 skip = 0; 2671 paddr = pmap_kextract(va); 2672 nsegs += sglist_count((void *)(uintptr_t)va, len); 2673 if (paddr == nextaddr) 2674 nsegs--; 2675 nextaddr = pmap_kextract(va + len - 1) + 1; 2676 } 2677 2678 return (nsegs); 2679 } 2680 2681 /* 2682 * The maximum number of segments that can fit in a WR. 2683 */ 2684 static int 2685 max_nsegs_allowed(struct mbuf *m, bool vm_wr) 2686 { 2687 2688 if (vm_wr) { 2689 if (needs_tso(m)) 2690 return (TX_SGL_SEGS_VM_TSO); 2691 return (TX_SGL_SEGS_VM); 2692 } 2693 2694 if (needs_tso(m)) { 2695 if (needs_vxlan_tso(m)) 2696 return (TX_SGL_SEGS_VXLAN_TSO); 2697 else 2698 return (TX_SGL_SEGS_TSO); 2699 } 2700 2701 return (TX_SGL_SEGS); 2702 } 2703 2704 static struct timeval txerr_ratecheck = {0}; 2705 static const struct timeval txerr_interval = {3, 0}; 2706 2707 /* 2708 * Analyze the mbuf to determine its tx needs. The mbuf passed in may change: 2709 * a) caller can assume it's been freed if this function returns with an error. 2710 * b) it may get defragged up if the gather list is too long for the hardware. 2711 */ 2712 int 2713 parse_pkt(struct mbuf **mp, bool vm_wr) 2714 { 2715 struct mbuf *m0 = *mp, *m; 2716 int rc, nsegs, defragged = 0; 2717 struct ether_header *eh; 2718 #ifdef INET 2719 void *l3hdr; 2720 #endif 2721 #if defined(INET) || defined(INET6) 2722 int offset; 2723 struct tcphdr *tcp; 2724 #endif 2725 #if defined(KERN_TLS) || defined(RATELIMIT) 2726 struct m_snd_tag *mst; 2727 #endif 2728 uint16_t eh_type; 2729 uint8_t cflags; 2730 2731 cflags = 0; 2732 M_ASSERTPKTHDR(m0); 2733 if (__predict_false(m0->m_pkthdr.len < ETHER_HDR_LEN)) { 2734 rc = EINVAL; 2735 fail: 2736 m_freem(m0); 2737 *mp = NULL; 2738 return (rc); 2739 } 2740 restart: 2741 /* 2742 * First count the number of gather list segments in the payload. 2743 * Defrag the mbuf if nsegs exceeds the hardware limit. 2744 */ 2745 M_ASSERTPKTHDR(m0); 2746 MPASS(m0->m_pkthdr.len > 0); 2747 nsegs = count_mbuf_nsegs(m0, 0, &cflags); 2748 #if defined(KERN_TLS) || defined(RATELIMIT) 2749 if (m0->m_pkthdr.csum_flags & CSUM_SND_TAG) 2750 mst = m0->m_pkthdr.snd_tag; 2751 else 2752 mst = NULL; 2753 #endif 2754 #ifdef KERN_TLS 2755 if (mst != NULL && mst->sw->type == IF_SND_TAG_TYPE_TLS) { 2756 int len16; 2757 2758 cflags |= MC_TLS; 2759 set_mbuf_cflags(m0, cflags); 2760 rc = t6_ktls_parse_pkt(m0, &nsegs, &len16); 2761 if (rc != 0) 2762 goto fail; 2763 set_mbuf_nsegs(m0, nsegs); 2764 set_mbuf_len16(m0, len16); 2765 return (0); 2766 } 2767 #endif 2768 if (nsegs > max_nsegs_allowed(m0, vm_wr)) { 2769 if (defragged++ > 0) { 2770 rc = EFBIG; 2771 goto fail; 2772 } 2773 counter_u64_add(defrags, 1); 2774 if ((m = m_defrag(m0, M_NOWAIT)) == NULL) { 2775 rc = ENOMEM; 2776 goto fail; 2777 } 2778 *mp = m0 = m; /* update caller's copy after defrag */ 2779 goto restart; 2780 } 2781 2782 if (__predict_false(nsegs > 2 && m0->m_pkthdr.len <= MHLEN && 2783 !(cflags & MC_NOMAP))) { 2784 counter_u64_add(pullups, 1); 2785 m0 = m_pullup(m0, m0->m_pkthdr.len); 2786 if (m0 == NULL) { 2787 /* Should have left well enough alone. */ 2788 rc = EFBIG; 2789 goto fail; 2790 } 2791 *mp = m0; /* update caller's copy after pullup */ 2792 goto restart; 2793 } 2794 set_mbuf_nsegs(m0, nsegs); 2795 set_mbuf_cflags(m0, cflags); 2796 calculate_mbuf_len16(m0, vm_wr); 2797 2798 #ifdef RATELIMIT 2799 /* 2800 * Ethofld is limited to TCP and UDP for now, and only when L4 hw 2801 * checksumming is enabled. needs_outer_l4_csum happens to check for 2802 * all the right things. 2803 */ 2804 if (__predict_false(needs_eo(mst) && !needs_outer_l4_csum(m0))) { 2805 m_snd_tag_rele(m0->m_pkthdr.snd_tag); 2806 m0->m_pkthdr.snd_tag = NULL; 2807 m0->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; 2808 mst = NULL; 2809 } 2810 #endif 2811 2812 if (!needs_hwcsum(m0) 2813 #ifdef RATELIMIT 2814 && !needs_eo(mst) 2815 #endif 2816 ) 2817 return (0); 2818 2819 m = m0; 2820 eh = mtod(m, struct ether_header *); 2821 eh_type = ntohs(eh->ether_type); 2822 if (eh_type == ETHERTYPE_VLAN) { 2823 struct ether_vlan_header *evh = (void *)eh; 2824 2825 eh_type = ntohs(evh->evl_proto); 2826 m0->m_pkthdr.l2hlen = sizeof(*evh); 2827 } else 2828 m0->m_pkthdr.l2hlen = sizeof(*eh); 2829 2830 #if defined(INET) || defined(INET6) 2831 offset = 0; 2832 #ifdef INET 2833 l3hdr = m_advance(&m, &offset, m0->m_pkthdr.l2hlen); 2834 #else 2835 m_advance(&m, &offset, m0->m_pkthdr.l2hlen); 2836 #endif 2837 #endif 2838 2839 switch (eh_type) { 2840 #ifdef INET6 2841 case ETHERTYPE_IPV6: 2842 m0->m_pkthdr.l3hlen = sizeof(struct ip6_hdr); 2843 break; 2844 #endif 2845 #ifdef INET 2846 case ETHERTYPE_IP: 2847 { 2848 struct ip *ip = l3hdr; 2849 2850 if (needs_vxlan_csum(m0)) { 2851 /* Driver will do the outer IP hdr checksum. */ 2852 ip->ip_sum = 0; 2853 if (needs_vxlan_tso(m0)) { 2854 const uint16_t ipl = ip->ip_len; 2855 2856 ip->ip_len = 0; 2857 ip->ip_sum = ~in_cksum_hdr(ip); 2858 ip->ip_len = ipl; 2859 } else 2860 ip->ip_sum = in_cksum_hdr(ip); 2861 } 2862 m0->m_pkthdr.l3hlen = ip->ip_hl << 2; 2863 break; 2864 } 2865 #endif 2866 default: 2867 if (ratecheck(&txerr_ratecheck, &txerr_interval)) { 2868 log(LOG_ERR, "%s: ethertype 0x%04x unknown. " 2869 "if_cxgbe must be compiled with the same " 2870 "INET/INET6 options as the kernel.\n", __func__, 2871 eh_type); 2872 } 2873 rc = EINVAL; 2874 goto fail; 2875 } 2876 2877 #if defined(INET) || defined(INET6) 2878 if (needs_vxlan_csum(m0)) { 2879 m0->m_pkthdr.l4hlen = sizeof(struct udphdr); 2880 m0->m_pkthdr.l5hlen = sizeof(struct vxlan_header); 2881 2882 /* Inner headers. */ 2883 eh = m_advance(&m, &offset, m0->m_pkthdr.l3hlen + 2884 sizeof(struct udphdr) + sizeof(struct vxlan_header)); 2885 eh_type = ntohs(eh->ether_type); 2886 if (eh_type == ETHERTYPE_VLAN) { 2887 struct ether_vlan_header *evh = (void *)eh; 2888 2889 eh_type = ntohs(evh->evl_proto); 2890 m0->m_pkthdr.inner_l2hlen = sizeof(*evh); 2891 } else 2892 m0->m_pkthdr.inner_l2hlen = sizeof(*eh); 2893 #ifdef INET 2894 l3hdr = m_advance(&m, &offset, m0->m_pkthdr.inner_l2hlen); 2895 #else 2896 m_advance(&m, &offset, m0->m_pkthdr.inner_l2hlen); 2897 #endif 2898 2899 switch (eh_type) { 2900 #ifdef INET6 2901 case ETHERTYPE_IPV6: 2902 m0->m_pkthdr.inner_l3hlen = sizeof(struct ip6_hdr); 2903 break; 2904 #endif 2905 #ifdef INET 2906 case ETHERTYPE_IP: 2907 { 2908 struct ip *ip = l3hdr; 2909 2910 m0->m_pkthdr.inner_l3hlen = ip->ip_hl << 2; 2911 break; 2912 } 2913 #endif 2914 default: 2915 if (ratecheck(&txerr_ratecheck, &txerr_interval)) { 2916 log(LOG_ERR, "%s: VXLAN hw offload requested" 2917 "with unknown ethertype 0x%04x. if_cxgbe " 2918 "must be compiled with the same INET/INET6 " 2919 "options as the kernel.\n", __func__, 2920 eh_type); 2921 } 2922 rc = EINVAL; 2923 goto fail; 2924 } 2925 if (needs_inner_tcp_csum(m0)) { 2926 tcp = m_advance(&m, &offset, m0->m_pkthdr.inner_l3hlen); 2927 m0->m_pkthdr.inner_l4hlen = tcp->th_off * 4; 2928 } 2929 MPASS((m0->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 2930 m0->m_pkthdr.csum_flags &= CSUM_INNER_IP6_UDP | 2931 CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO | CSUM_INNER_IP | 2932 CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | 2933 CSUM_ENCAP_VXLAN; 2934 } 2935 2936 if (needs_outer_tcp_csum(m0)) { 2937 tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen); 2938 m0->m_pkthdr.l4hlen = tcp->th_off * 4; 2939 #ifdef RATELIMIT 2940 if (tsclk >= 0 && *(uint32_t *)(tcp + 1) == ntohl(0x0101080a)) { 2941 set_mbuf_eo_tsclk_tsoff(m0, 2942 V_FW_ETH_TX_EO_WR_TSCLK(tsclk) | 2943 V_FW_ETH_TX_EO_WR_TSOFF(sizeof(*tcp) / 2 + 1)); 2944 } else 2945 set_mbuf_eo_tsclk_tsoff(m0, 0); 2946 } else if (needs_outer_udp_csum(m0)) { 2947 m0->m_pkthdr.l4hlen = sizeof(struct udphdr); 2948 #endif 2949 } 2950 #ifdef RATELIMIT 2951 if (needs_eo(mst)) { 2952 u_int immhdrs; 2953 2954 /* EO WRs have the headers in the WR and not the GL. */ 2955 immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + 2956 m0->m_pkthdr.l4hlen; 2957 cflags = 0; 2958 nsegs = count_mbuf_nsegs(m0, immhdrs, &cflags); 2959 MPASS(cflags == mbuf_cflags(m0)); 2960 set_mbuf_eo_nsegs(m0, nsegs); 2961 set_mbuf_eo_len16(m0, 2962 txpkt_eo_len16(nsegs, immhdrs, needs_tso(m0))); 2963 } 2964 #endif 2965 #endif 2966 MPASS(m0 == *mp); 2967 return (0); 2968 } 2969 2970 void * 2971 start_wrq_wr(struct sge_wrq *wrq, int len16, struct wrq_cookie *cookie) 2972 { 2973 struct sge_eq *eq = &wrq->eq; 2974 struct adapter *sc = wrq->adapter; 2975 int ndesc, available; 2976 struct wrqe *wr; 2977 void *w; 2978 2979 MPASS(len16 > 0); 2980 ndesc = tx_len16_to_desc(len16); 2981 MPASS(ndesc > 0 && ndesc <= SGE_MAX_WR_NDESC); 2982 2983 EQ_LOCK(eq); 2984 2985 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 2986 drain_wrq_wr_list(sc, wrq); 2987 2988 if (!STAILQ_EMPTY(&wrq->wr_list)) { 2989 slowpath: 2990 EQ_UNLOCK(eq); 2991 wr = alloc_wrqe(len16 * 16, wrq); 2992 if (__predict_false(wr == NULL)) 2993 return (NULL); 2994 cookie->pidx = -1; 2995 cookie->ndesc = ndesc; 2996 return (&wr->wr); 2997 } 2998 2999 eq->cidx = read_hw_cidx(eq); 3000 if (eq->pidx == eq->cidx) 3001 available = eq->sidx - 1; 3002 else 3003 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 3004 if (available < ndesc) 3005 goto slowpath; 3006 3007 cookie->pidx = eq->pidx; 3008 cookie->ndesc = ndesc; 3009 TAILQ_INSERT_TAIL(&wrq->incomplete_wrs, cookie, link); 3010 3011 w = &eq->desc[eq->pidx]; 3012 IDXINCR(eq->pidx, ndesc, eq->sidx); 3013 if (__predict_false(cookie->pidx + ndesc > eq->sidx)) { 3014 w = &wrq->ss[0]; 3015 wrq->ss_pidx = cookie->pidx; 3016 wrq->ss_len = len16 * 16; 3017 } 3018 3019 EQ_UNLOCK(eq); 3020 3021 return (w); 3022 } 3023 3024 void 3025 commit_wrq_wr(struct sge_wrq *wrq, void *w, struct wrq_cookie *cookie) 3026 { 3027 struct sge_eq *eq = &wrq->eq; 3028 struct adapter *sc = wrq->adapter; 3029 int ndesc, pidx; 3030 struct wrq_cookie *prev, *next; 3031 3032 if (cookie->pidx == -1) { 3033 struct wrqe *wr = __containerof(w, struct wrqe, wr); 3034 3035 t4_wrq_tx(sc, wr); 3036 return; 3037 } 3038 3039 if (__predict_false(w == &wrq->ss[0])) { 3040 int n = (eq->sidx - wrq->ss_pidx) * EQ_ESIZE; 3041 3042 MPASS(wrq->ss_len > n); /* WR had better wrap around. */ 3043 bcopy(&wrq->ss[0], &eq->desc[wrq->ss_pidx], n); 3044 bcopy(&wrq->ss[n], &eq->desc[0], wrq->ss_len - n); 3045 wrq->tx_wrs_ss++; 3046 } else 3047 wrq->tx_wrs_direct++; 3048 3049 EQ_LOCK(eq); 3050 ndesc = cookie->ndesc; /* Can be more than SGE_MAX_WR_NDESC here. */ 3051 pidx = cookie->pidx; 3052 MPASS(pidx >= 0 && pidx < eq->sidx); 3053 prev = TAILQ_PREV(cookie, wrq_incomplete_wrs, link); 3054 next = TAILQ_NEXT(cookie, link); 3055 if (prev == NULL) { 3056 MPASS(pidx == eq->dbidx); 3057 if (next == NULL || ndesc >= 16) { 3058 int available; 3059 struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ 3060 3061 /* 3062 * Note that the WR via which we'll request tx updates 3063 * is at pidx and not eq->pidx, which has moved on 3064 * already. 3065 */ 3066 dst = (void *)&eq->desc[pidx]; 3067 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 3068 if (available < eq->sidx / 4 && 3069 atomic_cmpset_int(&eq->equiq, 0, 1)) { 3070 /* 3071 * XXX: This is not 100% reliable with some 3072 * types of WRs. But this is a very unusual 3073 * situation for an ofld/ctrl queue anyway. 3074 */ 3075 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 3076 F_FW_WR_EQUEQ); 3077 } 3078 3079 ring_eq_db(wrq->adapter, eq, ndesc); 3080 } else { 3081 MPASS(IDXDIFF(next->pidx, pidx, eq->sidx) == ndesc); 3082 next->pidx = pidx; 3083 next->ndesc += ndesc; 3084 } 3085 } else { 3086 MPASS(IDXDIFF(pidx, prev->pidx, eq->sidx) == prev->ndesc); 3087 prev->ndesc += ndesc; 3088 } 3089 TAILQ_REMOVE(&wrq->incomplete_wrs, cookie, link); 3090 3091 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 3092 drain_wrq_wr_list(sc, wrq); 3093 3094 #ifdef INVARIANTS 3095 if (TAILQ_EMPTY(&wrq->incomplete_wrs)) { 3096 /* Doorbell must have caught up to the pidx. */ 3097 MPASS(wrq->eq.pidx == wrq->eq.dbidx); 3098 } 3099 #endif 3100 EQ_UNLOCK(eq); 3101 } 3102 3103 static u_int 3104 can_resume_eth_tx(struct mp_ring *r) 3105 { 3106 struct sge_eq *eq = r->cookie; 3107 3108 return (total_available_tx_desc(eq) > eq->sidx / 8); 3109 } 3110 3111 static inline bool 3112 cannot_use_txpkts(struct mbuf *m) 3113 { 3114 /* maybe put a GL limit too, to avoid silliness? */ 3115 3116 return (needs_tso(m) || (mbuf_cflags(m) & (MC_RAW_WR | MC_TLS)) != 0); 3117 } 3118 3119 static inline int 3120 discard_tx(struct sge_eq *eq) 3121 { 3122 3123 return ((eq->flags & (EQ_ENABLED | EQ_QFLUSH)) != EQ_ENABLED); 3124 } 3125 3126 static inline int 3127 wr_can_update_eq(void *p) 3128 { 3129 struct fw_eth_tx_pkts_wr *wr = p; 3130 3131 switch (G_FW_WR_OP(be32toh(wr->op_pkd))) { 3132 case FW_ULPTX_WR: 3133 case FW_ETH_TX_PKT_WR: 3134 case FW_ETH_TX_PKTS_WR: 3135 case FW_ETH_TX_PKTS2_WR: 3136 case FW_ETH_TX_PKT_VM_WR: 3137 case FW_ETH_TX_PKTS_VM_WR: 3138 return (1); 3139 default: 3140 return (0); 3141 } 3142 } 3143 3144 static inline void 3145 set_txupdate_flags(struct sge_txq *txq, u_int avail, 3146 struct fw_eth_tx_pkt_wr *wr) 3147 { 3148 struct sge_eq *eq = &txq->eq; 3149 struct txpkts *txp = &txq->txp; 3150 3151 if ((txp->npkt > 0 || avail < eq->sidx / 2) && 3152 atomic_cmpset_int(&eq->equiq, 0, 1)) { 3153 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ | F_FW_WR_EQUIQ); 3154 eq->equeqidx = eq->pidx; 3155 } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { 3156 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); 3157 eq->equeqidx = eq->pidx; 3158 } 3159 } 3160 3161 #if defined(__i386__) || defined(__amd64__) 3162 extern uint64_t tsc_freq; 3163 #endif 3164 3165 static inline bool 3166 record_eth_tx_time(struct sge_txq *txq) 3167 { 3168 const uint64_t cycles = get_cyclecount(); 3169 const uint64_t last_tx = txq->last_tx; 3170 #if defined(__i386__) || defined(__amd64__) 3171 const uint64_t itg = tsc_freq * t4_tx_coalesce_gap / 1000000; 3172 #else 3173 const uint64_t itg = 0; 3174 #endif 3175 3176 MPASS(cycles >= last_tx); 3177 txq->last_tx = cycles; 3178 return (cycles - last_tx < itg); 3179 } 3180 3181 /* 3182 * r->items[cidx] to r->items[pidx], with a wraparound at r->size, are ready to 3183 * be consumed. Return the actual number consumed. 0 indicates a stall. 3184 */ 3185 static u_int 3186 eth_tx(struct mp_ring *r, u_int cidx, u_int pidx, bool *coalescing) 3187 { 3188 struct sge_txq *txq = r->cookie; 3189 struct ifnet *ifp = txq->ifp; 3190 struct sge_eq *eq = &txq->eq; 3191 struct txpkts *txp = &txq->txp; 3192 struct vi_info *vi = ifp->if_softc; 3193 struct adapter *sc = vi->adapter; 3194 u_int total, remaining; /* # of packets */ 3195 u_int n, avail, dbdiff; /* # of hardware descriptors */ 3196 int i, rc; 3197 struct mbuf *m0; 3198 bool snd, recent_tx; 3199 void *wr; /* start of the last WR written to the ring */ 3200 3201 TXQ_LOCK_ASSERT_OWNED(txq); 3202 recent_tx = record_eth_tx_time(txq); 3203 3204 remaining = IDXDIFF(pidx, cidx, r->size); 3205 if (__predict_false(discard_tx(eq))) { 3206 for (i = 0; i < txp->npkt; i++) 3207 m_freem(txp->mb[i]); 3208 txp->npkt = 0; 3209 while (cidx != pidx) { 3210 m0 = r->items[cidx]; 3211 m_freem(m0); 3212 if (++cidx == r->size) 3213 cidx = 0; 3214 } 3215 reclaim_tx_descs(txq, eq->sidx); 3216 *coalescing = false; 3217 return (remaining); /* emptied */ 3218 } 3219 3220 /* How many hardware descriptors do we have readily available. */ 3221 if (eq->pidx == eq->cidx) 3222 avail = eq->sidx - 1; 3223 else 3224 avail = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 3225 3226 total = 0; 3227 if (remaining == 0) { 3228 txp->score = 0; 3229 txq->txpkts_flush++; 3230 goto send_txpkts; 3231 } 3232 3233 dbdiff = 0; 3234 MPASS(remaining > 0); 3235 while (remaining > 0) { 3236 m0 = r->items[cidx]; 3237 M_ASSERTPKTHDR(m0); 3238 MPASS(m0->m_nextpkt == NULL); 3239 3240 if (avail < 2 * SGE_MAX_WR_NDESC) 3241 avail += reclaim_tx_descs(txq, 64); 3242 3243 if (t4_tx_coalesce == 0 && txp->npkt == 0) 3244 goto skip_coalescing; 3245 if (cannot_use_txpkts(m0)) 3246 txp->score = 0; 3247 else if (recent_tx) { 3248 if (++txp->score == 0) 3249 txp->score = UINT8_MAX; 3250 } else 3251 txp->score = 1; 3252 if (txp->npkt > 0 || remaining > 1 || 3253 txp->score >= t4_tx_coalesce_pkts || 3254 atomic_load_int(&txq->eq.equiq) != 0) { 3255 if (vi->flags & TX_USES_VM_WR) 3256 rc = add_to_txpkts_vf(sc, txq, m0, avail, &snd); 3257 else 3258 rc = add_to_txpkts_pf(sc, txq, m0, avail, &snd); 3259 } else { 3260 snd = false; 3261 rc = EINVAL; 3262 } 3263 if (snd) { 3264 MPASS(txp->npkt > 0); 3265 for (i = 0; i < txp->npkt; i++) 3266 ETHER_BPF_MTAP(ifp, txp->mb[i]); 3267 if (txp->npkt > 1) { 3268 MPASS(avail >= tx_len16_to_desc(txp->len16)); 3269 if (vi->flags & TX_USES_VM_WR) 3270 n = write_txpkts_vm_wr(sc, txq); 3271 else 3272 n = write_txpkts_wr(sc, txq); 3273 } else { 3274 MPASS(avail >= 3275 tx_len16_to_desc(mbuf_len16(txp->mb[0]))); 3276 if (vi->flags & TX_USES_VM_WR) 3277 n = write_txpkt_vm_wr(sc, txq, 3278 txp->mb[0]); 3279 else 3280 n = write_txpkt_wr(sc, txq, txp->mb[0], 3281 avail); 3282 } 3283 MPASS(n <= SGE_MAX_WR_NDESC); 3284 avail -= n; 3285 dbdiff += n; 3286 wr = &eq->desc[eq->pidx]; 3287 IDXINCR(eq->pidx, n, eq->sidx); 3288 txp->npkt = 0; /* emptied */ 3289 } 3290 if (rc == 0) { 3291 /* m0 was coalesced into txq->txpkts. */ 3292 goto next_mbuf; 3293 } 3294 if (rc == EAGAIN) { 3295 /* 3296 * m0 is suitable for tx coalescing but could not be 3297 * combined with the existing txq->txpkts, which has now 3298 * been transmitted. Start a new txpkts with m0. 3299 */ 3300 MPASS(snd); 3301 MPASS(txp->npkt == 0); 3302 continue; 3303 } 3304 3305 MPASS(rc != 0 && rc != EAGAIN); 3306 MPASS(txp->npkt == 0); 3307 skip_coalescing: 3308 n = tx_len16_to_desc(mbuf_len16(m0)); 3309 if (__predict_false(avail < n)) { 3310 avail += reclaim_tx_descs(txq, min(n, 32)); 3311 if (avail < n) 3312 break; /* out of descriptors */ 3313 } 3314 3315 wr = &eq->desc[eq->pidx]; 3316 if (mbuf_cflags(m0) & MC_RAW_WR) { 3317 n = write_raw_wr(txq, wr, m0, avail); 3318 #ifdef KERN_TLS 3319 } else if (mbuf_cflags(m0) & MC_TLS) { 3320 ETHER_BPF_MTAP(ifp, m0); 3321 n = t6_ktls_write_wr(txq, wr, m0, mbuf_nsegs(m0), 3322 avail); 3323 #endif 3324 } else { 3325 ETHER_BPF_MTAP(ifp, m0); 3326 if (vi->flags & TX_USES_VM_WR) 3327 n = write_txpkt_vm_wr(sc, txq, m0); 3328 else 3329 n = write_txpkt_wr(sc, txq, m0, avail); 3330 } 3331 MPASS(n >= 1 && n <= avail); 3332 if (!(mbuf_cflags(m0) & MC_TLS)) 3333 MPASS(n <= SGE_MAX_WR_NDESC); 3334 3335 avail -= n; 3336 dbdiff += n; 3337 IDXINCR(eq->pidx, n, eq->sidx); 3338 3339 if (dbdiff >= 512 / EQ_ESIZE) { /* X_FETCHBURSTMAX_512B */ 3340 if (wr_can_update_eq(wr)) 3341 set_txupdate_flags(txq, avail, wr); 3342 ring_eq_db(sc, eq, dbdiff); 3343 avail += reclaim_tx_descs(txq, 32); 3344 dbdiff = 0; 3345 } 3346 next_mbuf: 3347 total++; 3348 remaining--; 3349 if (__predict_false(++cidx == r->size)) 3350 cidx = 0; 3351 } 3352 if (dbdiff != 0) { 3353 if (wr_can_update_eq(wr)) 3354 set_txupdate_flags(txq, avail, wr); 3355 ring_eq_db(sc, eq, dbdiff); 3356 reclaim_tx_descs(txq, 32); 3357 } else if (eq->pidx == eq->cidx && txp->npkt > 0 && 3358 atomic_load_int(&txq->eq.equiq) == 0) { 3359 /* 3360 * If nothing was submitted to the chip for tx (it was coalesced 3361 * into txpkts instead) and there is no tx update outstanding 3362 * then we need to send txpkts now. 3363 */ 3364 send_txpkts: 3365 MPASS(txp->npkt > 0); 3366 for (i = 0; i < txp->npkt; i++) 3367 ETHER_BPF_MTAP(ifp, txp->mb[i]); 3368 if (txp->npkt > 1) { 3369 MPASS(avail >= tx_len16_to_desc(txp->len16)); 3370 if (vi->flags & TX_USES_VM_WR) 3371 n = write_txpkts_vm_wr(sc, txq); 3372 else 3373 n = write_txpkts_wr(sc, txq); 3374 } else { 3375 MPASS(avail >= 3376 tx_len16_to_desc(mbuf_len16(txp->mb[0]))); 3377 if (vi->flags & TX_USES_VM_WR) 3378 n = write_txpkt_vm_wr(sc, txq, txp->mb[0]); 3379 else 3380 n = write_txpkt_wr(sc, txq, txp->mb[0], avail); 3381 } 3382 MPASS(n <= SGE_MAX_WR_NDESC); 3383 wr = &eq->desc[eq->pidx]; 3384 IDXINCR(eq->pidx, n, eq->sidx); 3385 txp->npkt = 0; /* emptied */ 3386 3387 MPASS(wr_can_update_eq(wr)); 3388 set_txupdate_flags(txq, avail - n, wr); 3389 ring_eq_db(sc, eq, n); 3390 reclaim_tx_descs(txq, 32); 3391 } 3392 *coalescing = txp->npkt > 0; 3393 3394 return (total); 3395 } 3396 3397 static inline void 3398 init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, 3399 int qsize, int intr_idx, int cong, int qtype) 3400 { 3401 3402 KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, 3403 ("%s: bad tmr_idx %d", __func__, tmr_idx)); 3404 KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */ 3405 ("%s: bad pktc_idx %d", __func__, pktc_idx)); 3406 KASSERT(intr_idx >= -1 && intr_idx < sc->intr_count, 3407 ("%s: bad intr_idx %d", __func__, intr_idx)); 3408 KASSERT(qtype == FW_IQ_IQTYPE_OTHER || qtype == FW_IQ_IQTYPE_NIC || 3409 qtype == FW_IQ_IQTYPE_OFLD, ("%s: bad qtype %d", __func__, qtype)); 3410 3411 iq->flags = 0; 3412 iq->state = IQS_DISABLED; 3413 iq->adapter = sc; 3414 iq->qtype = qtype; 3415 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx); 3416 iq->intr_pktc_idx = SGE_NCOUNTERS - 1; 3417 if (pktc_idx >= 0) { 3418 iq->intr_params |= F_QINTR_CNT_EN; 3419 iq->intr_pktc_idx = pktc_idx; 3420 } 3421 iq->qsize = roundup2(qsize, 16); /* See FW_IQ_CMD/iqsize */ 3422 iq->sidx = iq->qsize - sc->params.sge.spg_len / IQ_ESIZE; 3423 iq->intr_idx = intr_idx; 3424 iq->cong_drop = cong; 3425 } 3426 3427 static inline void 3428 init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, char *name) 3429 { 3430 struct sge_params *sp = &sc->params.sge; 3431 3432 fl->qsize = qsize; 3433 fl->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; 3434 strlcpy(fl->lockname, name, sizeof(fl->lockname)); 3435 mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); 3436 if (sc->flags & BUF_PACKING_OK && 3437 ((!is_t4(sc) && buffer_packing) || /* T5+: enabled unless 0 */ 3438 (is_t4(sc) && buffer_packing == 1)))/* T4: disabled unless 1 */ 3439 fl->flags |= FL_BUF_PACKING; 3440 fl->zidx = find_refill_source(sc, maxp, fl->flags & FL_BUF_PACKING); 3441 fl->safe_zidx = sc->sge.safe_zidx; 3442 if (fl->flags & FL_BUF_PACKING) { 3443 fl->lowat = roundup2(sp->fl_starve_threshold2, 8); 3444 fl->buf_boundary = sp->pack_boundary; 3445 } else { 3446 fl->lowat = roundup2(sp->fl_starve_threshold, 8); 3447 fl->buf_boundary = 16; 3448 } 3449 if (fl_pad && fl->buf_boundary < sp->pad_boundary) 3450 fl->buf_boundary = sp->pad_boundary; 3451 } 3452 3453 static inline void 3454 init_eq(struct adapter *sc, struct sge_eq *eq, int eqtype, int qsize, 3455 uint8_t tx_chan, struct sge_iq *iq, char *name) 3456 { 3457 KASSERT(eqtype >= EQ_CTRL && eqtype <= EQ_OFLD, 3458 ("%s: bad qtype %d", __func__, eqtype)); 3459 3460 eq->type = eqtype; 3461 eq->tx_chan = tx_chan; 3462 eq->iq = iq; 3463 eq->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; 3464 strlcpy(eq->lockname, name, sizeof(eq->lockname)); 3465 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); 3466 } 3467 3468 int 3469 alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag, 3470 bus_dmamap_t *map, bus_addr_t *pa, void **va) 3471 { 3472 int rc; 3473 3474 rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR, 3475 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag); 3476 if (rc != 0) { 3477 CH_ERR(sc, "cannot allocate DMA tag: %d\n", rc); 3478 goto done; 3479 } 3480 3481 rc = bus_dmamem_alloc(*tag, va, 3482 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map); 3483 if (rc != 0) { 3484 CH_ERR(sc, "cannot allocate DMA memory: %d\n", rc); 3485 goto done; 3486 } 3487 3488 rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0); 3489 if (rc != 0) { 3490 CH_ERR(sc, "cannot load DMA map: %d\n", rc); 3491 goto done; 3492 } 3493 done: 3494 if (rc) 3495 free_ring(sc, *tag, *map, *pa, *va); 3496 3497 return (rc); 3498 } 3499 3500 int 3501 free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, 3502 bus_addr_t pa, void *va) 3503 { 3504 if (pa) 3505 bus_dmamap_unload(tag, map); 3506 if (va) 3507 bus_dmamem_free(tag, va, map); 3508 if (tag) 3509 bus_dma_tag_destroy(tag); 3510 3511 return (0); 3512 } 3513 3514 /* 3515 * Allocates the software resources (mainly memory and sysctl nodes) for an 3516 * ingress queue and an optional freelist. 3517 * 3518 * Sets IQ_SW_ALLOCATED and returns 0 on success. 3519 */ 3520 static int 3521 alloc_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl, 3522 struct sysctl_ctx_list *ctx, struct sysctl_oid *oid) 3523 { 3524 int rc; 3525 size_t len; 3526 struct adapter *sc = vi->adapter; 3527 3528 MPASS(!(iq->flags & IQ_SW_ALLOCATED)); 3529 3530 len = iq->qsize * IQ_ESIZE; 3531 rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, 3532 (void **)&iq->desc); 3533 if (rc != 0) 3534 return (rc); 3535 3536 if (fl) { 3537 len = fl->qsize * EQ_ESIZE; 3538 rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, 3539 &fl->ba, (void **)&fl->desc); 3540 if (rc) { 3541 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, 3542 iq->desc); 3543 return (rc); 3544 } 3545 3546 /* Allocate space for one software descriptor per buffer. */ 3547 fl->sdesc = malloc(fl->sidx * 8 * sizeof(struct fl_sdesc), 3548 M_CXGBE, M_ZERO | M_WAITOK); 3549 3550 add_fl_sysctls(sc, ctx, oid, fl); 3551 iq->flags |= IQ_HAS_FL; 3552 } 3553 add_iq_sysctls(ctx, oid, iq); 3554 iq->flags |= IQ_SW_ALLOCATED; 3555 3556 return (0); 3557 } 3558 3559 /* 3560 * Frees all software resources (memory and locks) associated with an ingress 3561 * queue and an optional freelist. 3562 */ 3563 static void 3564 free_iq_fl(struct adapter *sc, struct sge_iq *iq, struct sge_fl *fl) 3565 { 3566 MPASS(iq->flags & IQ_SW_ALLOCATED); 3567 3568 if (fl) { 3569 MPASS(iq->flags & IQ_HAS_FL); 3570 free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, fl->desc); 3571 free_fl_buffers(sc, fl); 3572 free(fl->sdesc, M_CXGBE); 3573 mtx_destroy(&fl->fl_lock); 3574 bzero(fl, sizeof(*fl)); 3575 } 3576 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc); 3577 bzero(iq, sizeof(*iq)); 3578 } 3579 3580 /* 3581 * Allocates a hardware ingress queue and an optional freelist that will be 3582 * associated with it. 3583 * 3584 * Returns errno on failure. Resources allocated up to that point may still be 3585 * allocated. Caller is responsible for cleanup in case this function fails. 3586 */ 3587 static int 3588 alloc_iq_fl_hwq(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl) 3589 { 3590 int rc, cntxt_id, cong_map; 3591 struct fw_iq_cmd c; 3592 struct adapter *sc = vi->adapter; 3593 struct port_info *pi = vi->pi; 3594 __be32 v = 0; 3595 3596 MPASS (!(iq->flags & IQ_HW_ALLOCATED)); 3597 3598 bzero(&c, sizeof(c)); 3599 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | 3600 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | 3601 V_FW_IQ_CMD_VFN(0)); 3602 3603 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | 3604 FW_LEN16(c)); 3605 3606 /* Special handling for firmware event queue */ 3607 if (iq == &sc->sge.fwq) 3608 v |= F_FW_IQ_CMD_IQASYNCH; 3609 3610 if (iq->intr_idx < 0) { 3611 /* Forwarded interrupts, all headed to fwq */ 3612 v |= F_FW_IQ_CMD_IQANDST; 3613 v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.fwq.cntxt_id); 3614 } else { 3615 KASSERT(iq->intr_idx < sc->intr_count, 3616 ("%s: invalid direct intr_idx %d", __func__, iq->intr_idx)); 3617 v |= V_FW_IQ_CMD_IQANDSTINDEX(iq->intr_idx); 3618 } 3619 3620 bzero(iq->desc, iq->qsize * IQ_ESIZE); 3621 c.type_to_iqandstindex = htobe32(v | 3622 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | 3623 V_FW_IQ_CMD_VIID(vi->viid) | 3624 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); 3625 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | 3626 F_FW_IQ_CMD_IQGTSMODE | 3627 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | 3628 V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4)); 3629 c.iqsize = htobe16(iq->qsize); 3630 c.iqaddr = htobe64(iq->ba); 3631 c.iqns_to_fl0congen = htobe32(V_FW_IQ_CMD_IQTYPE(iq->qtype)); 3632 if (iq->cong_drop != -1) { 3633 cong_map = iq->qtype == IQ_ETH ? pi->rx_e_chan_map : 0; 3634 c.iqns_to_fl0congen |= htobe32(F_FW_IQ_CMD_IQFLINTCONGEN); 3635 } 3636 3637 if (fl) { 3638 bzero(fl->desc, fl->sidx * EQ_ESIZE + sc->params.sge.spg_len); 3639 c.iqns_to_fl0congen |= 3640 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | 3641 F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | 3642 (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) | 3643 (fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN : 3644 0)); 3645 if (iq->cong_drop != -1) { 3646 c.iqns_to_fl0congen |= 3647 htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong_map) | 3648 F_FW_IQ_CMD_FL0CONGCIF | 3649 F_FW_IQ_CMD_FL0CONGEN); 3650 } 3651 c.fl0dcaen_to_fl0cidxfthresh = 3652 htobe16(V_FW_IQ_CMD_FL0FBMIN(chip_id(sc) <= CHELSIO_T5 ? 3653 X_FETCHBURSTMIN_128B : X_FETCHBURSTMIN_64B_T6) | 3654 V_FW_IQ_CMD_FL0FBMAX(chip_id(sc) <= CHELSIO_T5 ? 3655 X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B)); 3656 c.fl0size = htobe16(fl->qsize); 3657 c.fl0addr = htobe64(fl->ba); 3658 } 3659 3660 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 3661 if (rc != 0) { 3662 CH_ERR(sc, "failed to create hw ingress queue: %d\n", rc); 3663 return (rc); 3664 } 3665 3666 iq->cidx = 0; 3667 iq->gen = F_RSPD_GEN; 3668 iq->cntxt_id = be16toh(c.iqid); 3669 iq->abs_id = be16toh(c.physiqid); 3670 3671 cntxt_id = iq->cntxt_id - sc->sge.iq_start; 3672 if (cntxt_id >= sc->sge.iqmap_sz) { 3673 panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, 3674 cntxt_id, sc->sge.iqmap_sz - 1); 3675 } 3676 sc->sge.iqmap[cntxt_id] = iq; 3677 3678 if (fl) { 3679 u_int qid; 3680 #ifdef INVARIANTS 3681 int i; 3682 3683 MPASS(!(fl->flags & FL_BUF_RESUME)); 3684 for (i = 0; i < fl->sidx * 8; i++) 3685 MPASS(fl->sdesc[i].cl == NULL); 3686 #endif 3687 fl->cntxt_id = be16toh(c.fl0id); 3688 fl->pidx = fl->cidx = fl->hw_cidx = fl->dbidx = 0; 3689 fl->rx_offset = 0; 3690 fl->flags &= ~(FL_STARVING | FL_DOOMED); 3691 3692 cntxt_id = fl->cntxt_id - sc->sge.eq_start; 3693 if (cntxt_id >= sc->sge.eqmap_sz) { 3694 panic("%s: fl->cntxt_id (%d) more than the max (%d)", 3695 __func__, cntxt_id, sc->sge.eqmap_sz - 1); 3696 } 3697 sc->sge.eqmap[cntxt_id] = (void *)fl; 3698 3699 qid = fl->cntxt_id; 3700 if (isset(&sc->doorbells, DOORBELL_UDB)) { 3701 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 3702 uint32_t mask = (1 << s_qpp) - 1; 3703 volatile uint8_t *udb; 3704 3705 udb = sc->udbs_base + UDBS_DB_OFFSET; 3706 udb += (qid >> s_qpp) << PAGE_SHIFT; 3707 qid &= mask; 3708 if (qid < PAGE_SIZE / UDBS_SEG_SIZE) { 3709 udb += qid << UDBS_SEG_SHIFT; 3710 qid = 0; 3711 } 3712 fl->udb = (volatile void *)udb; 3713 } 3714 fl->dbval = V_QID(qid) | sc->chip_params->sge_fl_db; 3715 3716 FL_LOCK(fl); 3717 /* Enough to make sure the SGE doesn't think it's starved */ 3718 refill_fl(sc, fl, fl->lowat); 3719 FL_UNLOCK(fl); 3720 } 3721 3722 if (chip_id(sc) >= CHELSIO_T5 && !(sc->flags & IS_VF) && 3723 iq->cong_drop != -1) { 3724 t4_sge_set_conm_context(sc, iq->cntxt_id, iq->cong_drop, 3725 cong_map); 3726 } 3727 3728 /* Enable IQ interrupts */ 3729 atomic_store_rel_int(&iq->state, IQS_IDLE); 3730 t4_write_reg(sc, sc->sge_gts_reg, V_SEINTARM(iq->intr_params) | 3731 V_INGRESSQID(iq->cntxt_id)); 3732 3733 iq->flags |= IQ_HW_ALLOCATED; 3734 3735 return (0); 3736 } 3737 3738 static int 3739 free_iq_fl_hwq(struct adapter *sc, struct sge_iq *iq, struct sge_fl *fl) 3740 { 3741 int rc; 3742 3743 MPASS(iq->flags & IQ_HW_ALLOCATED); 3744 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, FW_IQ_TYPE_FL_INT_CAP, 3745 iq->cntxt_id, fl ? fl->cntxt_id : 0xffff, 0xffff); 3746 if (rc != 0) { 3747 CH_ERR(sc, "failed to free iq %p: %d\n", iq, rc); 3748 return (rc); 3749 } 3750 iq->flags &= ~IQ_HW_ALLOCATED; 3751 3752 return (0); 3753 } 3754 3755 static void 3756 add_iq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 3757 struct sge_iq *iq) 3758 { 3759 struct sysctl_oid_list *children; 3760 3761 if (ctx == NULL || oid == NULL) 3762 return; 3763 3764 children = SYSCTL_CHILDREN(oid); 3765 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &iq->ba, 3766 "bus address of descriptor ring"); 3767 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 3768 iq->qsize * IQ_ESIZE, "descriptor ring size in bytes"); 3769 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD, 3770 &iq->abs_id, 0, "absolute id of the queue"); 3771 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 3772 &iq->cntxt_id, 0, "SGE context id of the queue"); 3773 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &iq->cidx, 3774 0, "consumer index"); 3775 } 3776 3777 static void 3778 add_fl_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 3779 struct sysctl_oid *oid, struct sge_fl *fl) 3780 { 3781 struct sysctl_oid_list *children; 3782 3783 if (ctx == NULL || oid == NULL) 3784 return; 3785 3786 children = SYSCTL_CHILDREN(oid); 3787 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", 3788 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "freelist"); 3789 children = SYSCTL_CHILDREN(oid); 3790 3791 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, 3792 &fl->ba, "bus address of descriptor ring"); 3793 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 3794 fl->sidx * EQ_ESIZE + sc->params.sge.spg_len, 3795 "desc ring size in bytes"); 3796 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 3797 &fl->cntxt_id, 0, "SGE context id of the freelist"); 3798 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "padding", CTLFLAG_RD, NULL, 3799 fl_pad ? 1 : 0, "padding enabled"); 3800 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "packing", CTLFLAG_RD, NULL, 3801 fl->flags & FL_BUF_PACKING ? 1 : 0, "packing enabled"); 3802 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx, 3803 0, "consumer index"); 3804 if (fl->flags & FL_BUF_PACKING) { 3805 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_offset", 3806 CTLFLAG_RD, &fl->rx_offset, 0, "packing rx offset"); 3807 } 3808 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx, 3809 0, "producer index"); 3810 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated", 3811 CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated"); 3812 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled", 3813 CTLFLAG_RD, &fl->cl_recycled, "# of clusters recycled"); 3814 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_fast_recycled", 3815 CTLFLAG_RD, &fl->cl_fast_recycled, "# of clusters recycled (fast)"); 3816 } 3817 3818 /* 3819 * Idempotent. 3820 */ 3821 static int 3822 alloc_fwq(struct adapter *sc) 3823 { 3824 int rc, intr_idx; 3825 struct sge_iq *fwq = &sc->sge.fwq; 3826 struct vi_info *vi = &sc->port[0]->vi[0]; 3827 3828 if (!(fwq->flags & IQ_SW_ALLOCATED)) { 3829 MPASS(!(fwq->flags & IQ_HW_ALLOCATED)); 3830 3831 if (sc->flags & IS_VF) 3832 intr_idx = 0; 3833 else 3834 intr_idx = sc->intr_count > 1 ? 1 : 0; 3835 init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, intr_idx, -1, IQ_OTHER); 3836 rc = alloc_iq_fl(vi, fwq, NULL, &sc->ctx, sc->fwq_oid); 3837 if (rc != 0) { 3838 CH_ERR(sc, "failed to allocate fwq: %d\n", rc); 3839 return (rc); 3840 } 3841 MPASS(fwq->flags & IQ_SW_ALLOCATED); 3842 } 3843 3844 if (!(fwq->flags & IQ_HW_ALLOCATED)) { 3845 MPASS(fwq->flags & IQ_SW_ALLOCATED); 3846 3847 rc = alloc_iq_fl_hwq(vi, fwq, NULL); 3848 if (rc != 0) { 3849 CH_ERR(sc, "failed to create hw fwq: %d\n", rc); 3850 return (rc); 3851 } 3852 MPASS(fwq->flags & IQ_HW_ALLOCATED); 3853 } 3854 3855 return (0); 3856 } 3857 3858 /* 3859 * Idempotent. 3860 */ 3861 static void 3862 free_fwq(struct adapter *sc) 3863 { 3864 struct sge_iq *fwq = &sc->sge.fwq; 3865 3866 if (fwq->flags & IQ_HW_ALLOCATED) { 3867 MPASS(fwq->flags & IQ_SW_ALLOCATED); 3868 free_iq_fl_hwq(sc, fwq, NULL); 3869 MPASS(!(fwq->flags & IQ_HW_ALLOCATED)); 3870 } 3871 3872 if (fwq->flags & IQ_SW_ALLOCATED) { 3873 MPASS(!(fwq->flags & IQ_HW_ALLOCATED)); 3874 free_iq_fl(sc, fwq, NULL); 3875 MPASS(!(fwq->flags & IQ_SW_ALLOCATED)); 3876 } 3877 } 3878 3879 /* 3880 * Idempotent. 3881 */ 3882 static int 3883 alloc_ctrlq(struct adapter *sc, int idx) 3884 { 3885 int rc; 3886 char name[16]; 3887 struct sysctl_oid *oid; 3888 struct sge_wrq *ctrlq = &sc->sge.ctrlq[idx]; 3889 3890 MPASS(idx < sc->params.nports); 3891 3892 if (!(ctrlq->eq.flags & EQ_SW_ALLOCATED)) { 3893 MPASS(!(ctrlq->eq.flags & EQ_HW_ALLOCATED)); 3894 3895 snprintf(name, sizeof(name), "%d", idx); 3896 oid = SYSCTL_ADD_NODE(&sc->ctx, SYSCTL_CHILDREN(sc->ctrlq_oid), 3897 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 3898 "ctrl queue"); 3899 3900 snprintf(name, sizeof(name), "%s ctrlq%d", 3901 device_get_nameunit(sc->dev), idx); 3902 init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, 3903 sc->port[idx]->tx_chan, &sc->sge.fwq, name); 3904 rc = alloc_wrq(sc, NULL, ctrlq, &sc->ctx, oid); 3905 if (rc != 0) { 3906 CH_ERR(sc, "failed to allocate ctrlq%d: %d\n", idx, rc); 3907 sysctl_remove_oid(oid, 1, 1); 3908 return (rc); 3909 } 3910 MPASS(ctrlq->eq.flags & EQ_SW_ALLOCATED); 3911 } 3912 3913 if (!(ctrlq->eq.flags & EQ_HW_ALLOCATED)) { 3914 MPASS(ctrlq->eq.flags & EQ_SW_ALLOCATED); 3915 3916 rc = alloc_eq_hwq(sc, NULL, &ctrlq->eq); 3917 if (rc != 0) { 3918 CH_ERR(sc, "failed to create hw ctrlq%d: %d\n", idx, rc); 3919 return (rc); 3920 } 3921 MPASS(ctrlq->eq.flags & EQ_HW_ALLOCATED); 3922 } 3923 3924 return (0); 3925 } 3926 3927 /* 3928 * Idempotent. 3929 */ 3930 static void 3931 free_ctrlq(struct adapter *sc, int idx) 3932 { 3933 struct sge_wrq *ctrlq = &sc->sge.ctrlq[idx]; 3934 3935 if (ctrlq->eq.flags & EQ_HW_ALLOCATED) { 3936 MPASS(ctrlq->eq.flags & EQ_SW_ALLOCATED); 3937 free_eq_hwq(sc, NULL, &ctrlq->eq); 3938 MPASS(!(ctrlq->eq.flags & EQ_HW_ALLOCATED)); 3939 } 3940 3941 if (ctrlq->eq.flags & EQ_SW_ALLOCATED) { 3942 MPASS(!(ctrlq->eq.flags & EQ_HW_ALLOCATED)); 3943 free_wrq(sc, ctrlq); 3944 MPASS(!(ctrlq->eq.flags & EQ_SW_ALLOCATED)); 3945 } 3946 } 3947 3948 int 3949 t4_sge_set_conm_context(struct adapter *sc, int cntxt_id, int cong_drop, 3950 int cong_map) 3951 { 3952 const int cng_ch_bits_log = sc->chip_params->cng_ch_bits_log; 3953 uint32_t param, val; 3954 uint16_t ch_map; 3955 int cong_mode, rc, i; 3956 3957 if (chip_id(sc) < CHELSIO_T5) 3958 return (ENOTSUP); 3959 3960 /* Convert the driver knob to the mode understood by the firmware. */ 3961 switch (cong_drop) { 3962 case -1: 3963 cong_mode = X_CONMCTXT_CNGTPMODE_DISABLE; 3964 break; 3965 case 0: 3966 cong_mode = X_CONMCTXT_CNGTPMODE_CHANNEL; 3967 break; 3968 case 1: 3969 cong_mode = X_CONMCTXT_CNGTPMODE_QUEUE; 3970 break; 3971 case 2: 3972 cong_mode = X_CONMCTXT_CNGTPMODE_BOTH; 3973 break; 3974 default: 3975 MPASS(0); 3976 CH_ERR(sc, "cong_drop = %d is invalid (ingress queue %d).\n", 3977 cong_drop, cntxt_id); 3978 return (EINVAL); 3979 } 3980 3981 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 3982 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | 3983 V_FW_PARAMS_PARAM_YZ(cntxt_id); 3984 val = V_CONMCTXT_CNGTPMODE(cong_mode); 3985 if (cong_mode == X_CONMCTXT_CNGTPMODE_CHANNEL || 3986 cong_mode == X_CONMCTXT_CNGTPMODE_BOTH) { 3987 for (i = 0, ch_map = 0; i < 4; i++) { 3988 if (cong_map & (1 << i)) 3989 ch_map |= 1 << (i << cng_ch_bits_log); 3990 } 3991 val |= V_CONMCTXT_CNGCHMAP(ch_map); 3992 } 3993 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); 3994 if (rc != 0) { 3995 CH_ERR(sc, "failed to set congestion manager context " 3996 "for ingress queue %d: %d\n", cntxt_id, rc); 3997 } 3998 3999 return (rc); 4000 } 4001 4002 /* 4003 * Idempotent. 4004 */ 4005 static int 4006 alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int idx, int intr_idx, 4007 int maxp) 4008 { 4009 int rc; 4010 struct adapter *sc = vi->adapter; 4011 struct ifnet *ifp = vi->ifp; 4012 struct sysctl_oid *oid; 4013 char name[16]; 4014 4015 if (!(rxq->iq.flags & IQ_SW_ALLOCATED)) { 4016 MPASS(!(rxq->iq.flags & IQ_HW_ALLOCATED)); 4017 #if defined(INET) || defined(INET6) 4018 rc = tcp_lro_init_args(&rxq->lro, ifp, lro_entries, lro_mbufs); 4019 if (rc != 0) 4020 return (rc); 4021 MPASS(rxq->lro.ifp == ifp); /* also indicates LRO init'ed */ 4022 #endif 4023 rxq->ifp = ifp; 4024 4025 snprintf(name, sizeof(name), "%d", idx); 4026 oid = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(vi->rxq_oid), 4027 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 4028 "rx queue"); 4029 4030 init_iq(&rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, vi->qsize_rxq, 4031 intr_idx, cong_drop, IQ_ETH); 4032 #if defined(INET) || defined(INET6) 4033 if (ifp->if_capenable & IFCAP_LRO) 4034 rxq->iq.flags |= IQ_LRO_ENABLED; 4035 #endif 4036 if (ifp->if_capenable & IFCAP_HWRXTSTMP) 4037 rxq->iq.flags |= IQ_RX_TIMESTAMP; 4038 snprintf(name, sizeof(name), "%s rxq%d-fl", 4039 device_get_nameunit(vi->dev), idx); 4040 init_fl(sc, &rxq->fl, vi->qsize_rxq / 8, maxp, name); 4041 rc = alloc_iq_fl(vi, &rxq->iq, &rxq->fl, &vi->ctx, oid); 4042 if (rc != 0) { 4043 CH_ERR(vi, "failed to allocate rxq%d: %d\n", idx, rc); 4044 sysctl_remove_oid(oid, 1, 1); 4045 #if defined(INET) || defined(INET6) 4046 tcp_lro_free(&rxq->lro); 4047 rxq->lro.ifp = NULL; 4048 #endif 4049 return (rc); 4050 } 4051 MPASS(rxq->iq.flags & IQ_SW_ALLOCATED); 4052 add_rxq_sysctls(&vi->ctx, oid, rxq); 4053 } 4054 4055 if (!(rxq->iq.flags & IQ_HW_ALLOCATED)) { 4056 MPASS(rxq->iq.flags & IQ_SW_ALLOCATED); 4057 rc = alloc_iq_fl_hwq(vi, &rxq->iq, &rxq->fl); 4058 if (rc != 0) { 4059 CH_ERR(vi, "failed to create hw rxq%d: %d\n", idx, rc); 4060 return (rc); 4061 } 4062 MPASS(rxq->iq.flags & IQ_HW_ALLOCATED); 4063 4064 if (idx == 0) 4065 sc->sge.iq_base = rxq->iq.abs_id - rxq->iq.cntxt_id; 4066 else 4067 KASSERT(rxq->iq.cntxt_id + sc->sge.iq_base == rxq->iq.abs_id, 4068 ("iq_base mismatch")); 4069 KASSERT(sc->sge.iq_base == 0 || sc->flags & IS_VF, 4070 ("PF with non-zero iq_base")); 4071 4072 /* 4073 * The freelist is just barely above the starvation threshold 4074 * right now, fill it up a bit more. 4075 */ 4076 FL_LOCK(&rxq->fl); 4077 refill_fl(sc, &rxq->fl, 128); 4078 FL_UNLOCK(&rxq->fl); 4079 } 4080 4081 return (0); 4082 } 4083 4084 /* 4085 * Idempotent. 4086 */ 4087 static void 4088 free_rxq(struct vi_info *vi, struct sge_rxq *rxq) 4089 { 4090 if (rxq->iq.flags & IQ_HW_ALLOCATED) { 4091 MPASS(rxq->iq.flags & IQ_SW_ALLOCATED); 4092 free_iq_fl_hwq(vi->adapter, &rxq->iq, &rxq->fl); 4093 MPASS(!(rxq->iq.flags & IQ_HW_ALLOCATED)); 4094 } 4095 4096 if (rxq->iq.flags & IQ_SW_ALLOCATED) { 4097 MPASS(!(rxq->iq.flags & IQ_HW_ALLOCATED)); 4098 #if defined(INET) || defined(INET6) 4099 tcp_lro_free(&rxq->lro); 4100 #endif 4101 free_iq_fl(vi->adapter, &rxq->iq, &rxq->fl); 4102 MPASS(!(rxq->iq.flags & IQ_SW_ALLOCATED)); 4103 bzero(rxq, sizeof(*rxq)); 4104 } 4105 } 4106 4107 static void 4108 add_rxq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 4109 struct sge_rxq *rxq) 4110 { 4111 struct sysctl_oid_list *children; 4112 4113 if (ctx == NULL || oid == NULL) 4114 return; 4115 4116 children = SYSCTL_CHILDREN(oid); 4117 #if defined(INET) || defined(INET6) 4118 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, 4119 &rxq->lro.lro_queued, 0, NULL); 4120 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, 4121 &rxq->lro.lro_flushed, 0, NULL); 4122 #endif 4123 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, 4124 &rxq->rxcsum, "# of times hardware assisted with checksum"); 4125 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vlan_extraction", CTLFLAG_RD, 4126 &rxq->vlan_extraction, "# of times hardware extracted 802.1Q tag"); 4127 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vxlan_rxcsum", CTLFLAG_RD, 4128 &rxq->vxlan_rxcsum, 4129 "# of times hardware assisted with inner checksum (VXLAN)"); 4130 } 4131 4132 #ifdef TCP_OFFLOAD 4133 /* 4134 * Idempotent. 4135 */ 4136 static int 4137 alloc_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq, int idx, 4138 int intr_idx, int maxp) 4139 { 4140 int rc; 4141 struct adapter *sc = vi->adapter; 4142 struct sysctl_oid *oid; 4143 char name[16]; 4144 4145 if (!(ofld_rxq->iq.flags & IQ_SW_ALLOCATED)) { 4146 MPASS(!(ofld_rxq->iq.flags & IQ_HW_ALLOCATED)); 4147 4148 snprintf(name, sizeof(name), "%d", idx); 4149 oid = SYSCTL_ADD_NODE(&vi->ctx, 4150 SYSCTL_CHILDREN(vi->ofld_rxq_oid), OID_AUTO, name, 4151 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "offload rx queue"); 4152 4153 init_iq(&ofld_rxq->iq, sc, vi->ofld_tmr_idx, vi->ofld_pktc_idx, 4154 vi->qsize_rxq, intr_idx, ofld_cong_drop, IQ_OFLD); 4155 snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", 4156 device_get_nameunit(vi->dev), idx); 4157 init_fl(sc, &ofld_rxq->fl, vi->qsize_rxq / 8, maxp, name); 4158 rc = alloc_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl, &vi->ctx, 4159 oid); 4160 if (rc != 0) { 4161 CH_ERR(vi, "failed to allocate ofld_rxq%d: %d\n", idx, 4162 rc); 4163 sysctl_remove_oid(oid, 1, 1); 4164 return (rc); 4165 } 4166 MPASS(ofld_rxq->iq.flags & IQ_SW_ALLOCATED); 4167 ofld_rxq->rx_iscsi_ddp_setup_ok = counter_u64_alloc(M_WAITOK); 4168 ofld_rxq->rx_iscsi_ddp_setup_error = 4169 counter_u64_alloc(M_WAITOK); 4170 add_ofld_rxq_sysctls(&vi->ctx, oid, ofld_rxq); 4171 } 4172 4173 if (!(ofld_rxq->iq.flags & IQ_HW_ALLOCATED)) { 4174 MPASS(ofld_rxq->iq.flags & IQ_SW_ALLOCATED); 4175 rc = alloc_iq_fl_hwq(vi, &ofld_rxq->iq, &ofld_rxq->fl); 4176 if (rc != 0) { 4177 CH_ERR(vi, "failed to create hw ofld_rxq%d: %d\n", idx, 4178 rc); 4179 return (rc); 4180 } 4181 MPASS(ofld_rxq->iq.flags & IQ_HW_ALLOCATED); 4182 } 4183 return (rc); 4184 } 4185 4186 /* 4187 * Idempotent. 4188 */ 4189 static void 4190 free_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq) 4191 { 4192 if (ofld_rxq->iq.flags & IQ_HW_ALLOCATED) { 4193 MPASS(ofld_rxq->iq.flags & IQ_SW_ALLOCATED); 4194 free_iq_fl_hwq(vi->adapter, &ofld_rxq->iq, &ofld_rxq->fl); 4195 MPASS(!(ofld_rxq->iq.flags & IQ_HW_ALLOCATED)); 4196 } 4197 4198 if (ofld_rxq->iq.flags & IQ_SW_ALLOCATED) { 4199 MPASS(!(ofld_rxq->iq.flags & IQ_HW_ALLOCATED)); 4200 free_iq_fl(vi->adapter, &ofld_rxq->iq, &ofld_rxq->fl); 4201 MPASS(!(ofld_rxq->iq.flags & IQ_SW_ALLOCATED)); 4202 counter_u64_free(ofld_rxq->rx_iscsi_ddp_setup_ok); 4203 counter_u64_free(ofld_rxq->rx_iscsi_ddp_setup_error); 4204 bzero(ofld_rxq, sizeof(*ofld_rxq)); 4205 } 4206 } 4207 4208 static void 4209 add_ofld_rxq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 4210 struct sge_ofld_rxq *ofld_rxq) 4211 { 4212 struct sysctl_oid_list *children; 4213 4214 if (ctx == NULL || oid == NULL) 4215 return; 4216 4217 children = SYSCTL_CHILDREN(oid); 4218 SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, 4219 "rx_toe_tls_records", CTLFLAG_RD, &ofld_rxq->rx_toe_tls_records, 4220 "# of TOE TLS records received"); 4221 SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, 4222 "rx_toe_tls_octets", CTLFLAG_RD, &ofld_rxq->rx_toe_tls_octets, 4223 "# of payload octets in received TOE TLS records"); 4224 4225 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "iscsi", 4226 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TOE iSCSI statistics"); 4227 children = SYSCTL_CHILDREN(oid); 4228 4229 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "ddp_setup_ok", 4230 CTLFLAG_RD, &ofld_rxq->rx_iscsi_ddp_setup_ok, 4231 "# of times DDP buffer was setup successfully."); 4232 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "ddp_setup_error", 4233 CTLFLAG_RD, &ofld_rxq->rx_iscsi_ddp_setup_error, 4234 "# of times DDP buffer setup failed."); 4235 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "ddp_octets", 4236 CTLFLAG_RD, &ofld_rxq->rx_iscsi_ddp_octets, 0, 4237 "# of octets placed directly"); 4238 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "ddp_pdus", 4239 CTLFLAG_RD, &ofld_rxq->rx_iscsi_ddp_pdus, 0, 4240 "# of PDUs with data placed directly."); 4241 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "fl_octets", 4242 CTLFLAG_RD, &ofld_rxq->rx_iscsi_fl_octets, 0, 4243 "# of data octets delivered in freelist"); 4244 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "fl_pdus", 4245 CTLFLAG_RD, &ofld_rxq->rx_iscsi_fl_pdus, 0, 4246 "# of PDUs with data delivered in freelist"); 4247 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "padding_errors", 4248 CTLFLAG_RD, &ofld_rxq->rx_iscsi_padding_errors, 0, 4249 "# of PDUs with invalid padding"); 4250 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "header_digest_errors", 4251 CTLFLAG_RD, &ofld_rxq->rx_iscsi_header_digest_errors, 0, 4252 "# of PDUs with invalid header digests"); 4253 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "data_digest_errors", 4254 CTLFLAG_RD, &ofld_rxq->rx_iscsi_data_digest_errors, 0, 4255 "# of PDUs with invalid data digests"); 4256 } 4257 #endif 4258 4259 /* 4260 * Returns a reasonable automatic cidx flush threshold for a given queue size. 4261 */ 4262 static u_int 4263 qsize_to_fthresh(int qsize) 4264 { 4265 u_int fthresh; 4266 4267 while (!powerof2(qsize)) 4268 qsize++; 4269 fthresh = ilog2(qsize); 4270 if (fthresh > X_CIDXFLUSHTHRESH_128) 4271 fthresh = X_CIDXFLUSHTHRESH_128; 4272 4273 return (fthresh); 4274 } 4275 4276 static int 4277 ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq) 4278 { 4279 int rc, cntxt_id; 4280 struct fw_eq_ctrl_cmd c; 4281 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4282 4283 bzero(&c, sizeof(c)); 4284 4285 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | 4286 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) | 4287 V_FW_EQ_CTRL_CMD_VFN(0)); 4288 c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC | 4289 F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); 4290 c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); 4291 c.physeqid_pkd = htobe32(0); 4292 c.fetchszm_to_iqid = 4293 htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 4294 V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) | 4295 F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); 4296 c.dcaen_to_eqsize = 4297 htobe32(V_FW_EQ_CTRL_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ? 4298 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) | 4299 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 4300 V_FW_EQ_CTRL_CMD_CIDXFTHRESH(qsize_to_fthresh(qsize)) | 4301 V_FW_EQ_CTRL_CMD_EQSIZE(qsize)); 4302 c.eqaddr = htobe64(eq->ba); 4303 4304 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 4305 if (rc != 0) { 4306 CH_ERR(sc, "failed to create hw ctrlq for tx_chan %d: %d\n", 4307 eq->tx_chan, rc); 4308 return (rc); 4309 } 4310 4311 eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid)); 4312 eq->abs_id = G_FW_EQ_CTRL_CMD_PHYSEQID(be32toh(c.physeqid_pkd)); 4313 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 4314 if (cntxt_id >= sc->sge.eqmap_sz) 4315 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 4316 cntxt_id, sc->sge.eqmap_sz - 1); 4317 sc->sge.eqmap[cntxt_id] = eq; 4318 4319 return (rc); 4320 } 4321 4322 static int 4323 eth_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 4324 { 4325 int rc, cntxt_id; 4326 struct fw_eq_eth_cmd c; 4327 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4328 4329 bzero(&c, sizeof(c)); 4330 4331 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | 4332 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | 4333 V_FW_EQ_ETH_CMD_VFN(0)); 4334 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | 4335 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); 4336 c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | 4337 F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid)); 4338 c.fetchszm_to_iqid = 4339 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | 4340 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | 4341 V_FW_EQ_ETH_CMD_IQID(eq->iqid)); 4342 c.dcaen_to_eqsize = 4343 htobe32(V_FW_EQ_ETH_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ? 4344 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) | 4345 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 4346 V_FW_EQ_ETH_CMD_EQSIZE(qsize)); 4347 c.eqaddr = htobe64(eq->ba); 4348 4349 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 4350 if (rc != 0) { 4351 device_printf(vi->dev, 4352 "failed to create Ethernet egress queue: %d\n", rc); 4353 return (rc); 4354 } 4355 4356 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); 4357 eq->abs_id = G_FW_EQ_ETH_CMD_PHYSEQID(be32toh(c.physeqid_pkd)); 4358 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 4359 if (cntxt_id >= sc->sge.eqmap_sz) 4360 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 4361 cntxt_id, sc->sge.eqmap_sz - 1); 4362 sc->sge.eqmap[cntxt_id] = eq; 4363 4364 return (rc); 4365 } 4366 4367 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 4368 static int 4369 ofld_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 4370 { 4371 int rc, cntxt_id; 4372 struct fw_eq_ofld_cmd c; 4373 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4374 4375 bzero(&c, sizeof(c)); 4376 4377 c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST | 4378 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) | 4379 V_FW_EQ_OFLD_CMD_VFN(0)); 4380 c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC | 4381 F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c)); 4382 c.fetchszm_to_iqid = 4383 htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 4384 V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) | 4385 F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid)); 4386 c.dcaen_to_eqsize = 4387 htobe32(V_FW_EQ_OFLD_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ? 4388 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) | 4389 V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 4390 V_FW_EQ_OFLD_CMD_CIDXFTHRESH(qsize_to_fthresh(qsize)) | 4391 V_FW_EQ_OFLD_CMD_EQSIZE(qsize)); 4392 c.eqaddr = htobe64(eq->ba); 4393 4394 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 4395 if (rc != 0) { 4396 device_printf(vi->dev, 4397 "failed to create egress queue for TCP offload: %d\n", rc); 4398 return (rc); 4399 } 4400 4401 eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd)); 4402 eq->abs_id = G_FW_EQ_OFLD_CMD_PHYSEQID(be32toh(c.physeqid_pkd)); 4403 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 4404 if (cntxt_id >= sc->sge.eqmap_sz) 4405 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 4406 cntxt_id, sc->sge.eqmap_sz - 1); 4407 sc->sge.eqmap[cntxt_id] = eq; 4408 4409 return (rc); 4410 } 4411 #endif 4412 4413 /* SW only */ 4414 static int 4415 alloc_eq(struct adapter *sc, struct sge_eq *eq, struct sysctl_ctx_list *ctx, 4416 struct sysctl_oid *oid) 4417 { 4418 int rc, qsize; 4419 size_t len; 4420 4421 MPASS(!(eq->flags & EQ_SW_ALLOCATED)); 4422 4423 qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4424 len = qsize * EQ_ESIZE; 4425 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, &eq->ba, 4426 (void **)&eq->desc); 4427 if (rc) 4428 return (rc); 4429 if (ctx != NULL && oid != NULL) 4430 add_eq_sysctls(sc, ctx, oid, eq); 4431 eq->flags |= EQ_SW_ALLOCATED; 4432 4433 return (0); 4434 } 4435 4436 /* SW only */ 4437 static void 4438 free_eq(struct adapter *sc, struct sge_eq *eq) 4439 { 4440 MPASS(eq->flags & EQ_SW_ALLOCATED); 4441 if (eq->type == EQ_ETH) 4442 MPASS(eq->pidx == eq->cidx); 4443 4444 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); 4445 mtx_destroy(&eq->eq_lock); 4446 bzero(eq, sizeof(*eq)); 4447 } 4448 4449 static void 4450 add_eq_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 4451 struct sysctl_oid *oid, struct sge_eq *eq) 4452 { 4453 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 4454 4455 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &eq->ba, 4456 "bus address of descriptor ring"); 4457 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 4458 eq->sidx * EQ_ESIZE + sc->params.sge.spg_len, 4459 "desc ring size in bytes"); 4460 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD, 4461 &eq->abs_id, 0, "absolute id of the queue"); 4462 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 4463 &eq->cntxt_id, 0, "SGE context id of the queue"); 4464 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &eq->cidx, 4465 0, "consumer index"); 4466 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &eq->pidx, 4467 0, "producer index"); 4468 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL, 4469 eq->sidx, "status page index"); 4470 } 4471 4472 static int 4473 alloc_eq_hwq(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 4474 { 4475 int rc; 4476 4477 MPASS(!(eq->flags & EQ_HW_ALLOCATED)); 4478 4479 eq->iqid = eq->iq->cntxt_id; 4480 eq->pidx = eq->cidx = eq->dbidx = 0; 4481 /* Note that equeqidx is not used with sge_wrq (OFLD/CTRL) queues. */ 4482 eq->equeqidx = 0; 4483 eq->doorbells = sc->doorbells; 4484 bzero(eq->desc, eq->sidx * EQ_ESIZE + sc->params.sge.spg_len); 4485 4486 switch (eq->type) { 4487 case EQ_CTRL: 4488 rc = ctrl_eq_alloc(sc, eq); 4489 break; 4490 4491 case EQ_ETH: 4492 rc = eth_eq_alloc(sc, vi, eq); 4493 break; 4494 4495 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 4496 case EQ_OFLD: 4497 rc = ofld_eq_alloc(sc, vi, eq); 4498 break; 4499 #endif 4500 4501 default: 4502 panic("%s: invalid eq type %d.", __func__, eq->type); 4503 } 4504 if (rc != 0) { 4505 CH_ERR(sc, "failed to allocate egress queue(%d): %d\n", 4506 eq->type, rc); 4507 return (rc); 4508 } 4509 4510 if (isset(&eq->doorbells, DOORBELL_UDB) || 4511 isset(&eq->doorbells, DOORBELL_UDBWC) || 4512 isset(&eq->doorbells, DOORBELL_WCWR)) { 4513 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 4514 uint32_t mask = (1 << s_qpp) - 1; 4515 volatile uint8_t *udb; 4516 4517 udb = sc->udbs_base + UDBS_DB_OFFSET; 4518 udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT; /* pg offset */ 4519 eq->udb_qid = eq->cntxt_id & mask; /* id in page */ 4520 if (eq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE) 4521 clrbit(&eq->doorbells, DOORBELL_WCWR); 4522 else { 4523 udb += eq->udb_qid << UDBS_SEG_SHIFT; /* seg offset */ 4524 eq->udb_qid = 0; 4525 } 4526 eq->udb = (volatile void *)udb; 4527 } 4528 4529 eq->flags |= EQ_HW_ALLOCATED; 4530 return (0); 4531 } 4532 4533 static int 4534 free_eq_hwq(struct adapter *sc, struct vi_info *vi __unused, struct sge_eq *eq) 4535 { 4536 int rc; 4537 4538 MPASS(eq->flags & EQ_HW_ALLOCATED); 4539 4540 switch (eq->type) { 4541 case EQ_CTRL: 4542 rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); 4543 break; 4544 case EQ_ETH: 4545 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); 4546 break; 4547 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 4548 case EQ_OFLD: 4549 rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); 4550 break; 4551 #endif 4552 default: 4553 panic("%s: invalid eq type %d.", __func__, eq->type); 4554 } 4555 if (rc != 0) { 4556 CH_ERR(sc, "failed to free eq (type %d): %d\n", eq->type, rc); 4557 return (rc); 4558 } 4559 eq->flags &= ~EQ_HW_ALLOCATED; 4560 4561 return (0); 4562 } 4563 4564 static int 4565 alloc_wrq(struct adapter *sc, struct vi_info *vi, struct sge_wrq *wrq, 4566 struct sysctl_ctx_list *ctx, struct sysctl_oid *oid) 4567 { 4568 struct sge_eq *eq = &wrq->eq; 4569 int rc; 4570 4571 MPASS(!(eq->flags & EQ_SW_ALLOCATED)); 4572 4573 rc = alloc_eq(sc, eq, ctx, oid); 4574 if (rc) 4575 return (rc); 4576 MPASS(eq->flags & EQ_SW_ALLOCATED); 4577 /* Can't fail after this. */ 4578 4579 wrq->adapter = sc; 4580 TASK_INIT(&wrq->wrq_tx_task, 0, wrq_tx_drain, wrq); 4581 TAILQ_INIT(&wrq->incomplete_wrs); 4582 STAILQ_INIT(&wrq->wr_list); 4583 wrq->nwr_pending = 0; 4584 wrq->ndesc_needed = 0; 4585 add_wrq_sysctls(ctx, oid, wrq); 4586 4587 return (0); 4588 } 4589 4590 static void 4591 free_wrq(struct adapter *sc, struct sge_wrq *wrq) 4592 { 4593 free_eq(sc, &wrq->eq); 4594 MPASS(wrq->nwr_pending == 0); 4595 MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs)); 4596 MPASS(STAILQ_EMPTY(&wrq->wr_list)); 4597 bzero(wrq, sizeof(*wrq)); 4598 } 4599 4600 static void 4601 add_wrq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 4602 struct sge_wrq *wrq) 4603 { 4604 struct sysctl_oid_list *children; 4605 4606 if (ctx == NULL || oid == NULL) 4607 return; 4608 4609 children = SYSCTL_CHILDREN(oid); 4610 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_direct", CTLFLAG_RD, 4611 &wrq->tx_wrs_direct, "# of work requests (direct)"); 4612 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_copied", CTLFLAG_RD, 4613 &wrq->tx_wrs_copied, "# of work requests (copied)"); 4614 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_sspace", CTLFLAG_RD, 4615 &wrq->tx_wrs_ss, "# of work requests (copied from scratch space)"); 4616 } 4617 4618 /* 4619 * Idempotent. 4620 */ 4621 static int 4622 alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx) 4623 { 4624 int rc, iqidx; 4625 struct port_info *pi = vi->pi; 4626 struct adapter *sc = vi->adapter; 4627 struct sge_eq *eq = &txq->eq; 4628 struct txpkts *txp; 4629 char name[16]; 4630 struct sysctl_oid *oid; 4631 4632 if (!(eq->flags & EQ_SW_ALLOCATED)) { 4633 MPASS(!(eq->flags & EQ_HW_ALLOCATED)); 4634 4635 snprintf(name, sizeof(name), "%d", idx); 4636 oid = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(vi->txq_oid), 4637 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 4638 "tx queue"); 4639 4640 iqidx = vi->first_rxq + (idx % vi->nrxq); 4641 snprintf(name, sizeof(name), "%s txq%d", 4642 device_get_nameunit(vi->dev), idx); 4643 init_eq(sc, &txq->eq, EQ_ETH, vi->qsize_txq, pi->tx_chan, 4644 &sc->sge.rxq[iqidx].iq, name); 4645 4646 rc = mp_ring_alloc(&txq->r, eq->sidx, txq, eth_tx, 4647 can_resume_eth_tx, M_CXGBE, &eq->eq_lock, M_WAITOK); 4648 if (rc != 0) { 4649 CH_ERR(vi, "failed to allocate mp_ring for txq%d: %d\n", 4650 idx, rc); 4651 failed: 4652 sysctl_remove_oid(oid, 1, 1); 4653 return (rc); 4654 } 4655 4656 rc = alloc_eq(sc, eq, &vi->ctx, oid); 4657 if (rc) { 4658 CH_ERR(vi, "failed to allocate txq%d: %d\n", idx, rc); 4659 mp_ring_free(txq->r); 4660 goto failed; 4661 } 4662 MPASS(eq->flags & EQ_SW_ALLOCATED); 4663 /* Can't fail after this point. */ 4664 4665 TASK_INIT(&txq->tx_reclaim_task, 0, tx_reclaim, eq); 4666 txq->ifp = vi->ifp; 4667 txq->gl = sglist_alloc(TX_SGL_SEGS, M_WAITOK); 4668 txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE, 4669 M_ZERO | M_WAITOK); 4670 4671 add_txq_sysctls(vi, &vi->ctx, oid, txq); 4672 } 4673 4674 if (!(eq->flags & EQ_HW_ALLOCATED)) { 4675 MPASS(eq->flags & EQ_SW_ALLOCATED); 4676 rc = alloc_eq_hwq(sc, vi, eq); 4677 if (rc != 0) { 4678 CH_ERR(vi, "failed to create hw txq%d: %d\n", idx, rc); 4679 return (rc); 4680 } 4681 MPASS(eq->flags & EQ_HW_ALLOCATED); 4682 /* Can't fail after this point. */ 4683 4684 if (idx == 0) 4685 sc->sge.eq_base = eq->abs_id - eq->cntxt_id; 4686 else 4687 KASSERT(eq->cntxt_id + sc->sge.eq_base == eq->abs_id, 4688 ("eq_base mismatch")); 4689 KASSERT(sc->sge.eq_base == 0 || sc->flags & IS_VF, 4690 ("PF with non-zero eq_base")); 4691 4692 txp = &txq->txp; 4693 MPASS(nitems(txp->mb) >= sc->params.max_pkts_per_eth_tx_pkts_wr); 4694 txq->txp.max_npkt = min(nitems(txp->mb), 4695 sc->params.max_pkts_per_eth_tx_pkts_wr); 4696 if (vi->flags & TX_USES_VM_WR && !(sc->flags & IS_VF)) 4697 txq->txp.max_npkt--; 4698 4699 if (vi->flags & TX_USES_VM_WR) 4700 txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 4701 V_TXPKT_INTF(pi->tx_chan)); 4702 else 4703 txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 4704 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) | 4705 V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld)); 4706 4707 txq->tc_idx = -1; 4708 } 4709 4710 return (0); 4711 } 4712 4713 /* 4714 * Idempotent. 4715 */ 4716 static void 4717 free_txq(struct vi_info *vi, struct sge_txq *txq) 4718 { 4719 struct adapter *sc = vi->adapter; 4720 struct sge_eq *eq = &txq->eq; 4721 4722 if (eq->flags & EQ_HW_ALLOCATED) { 4723 MPASS(eq->flags & EQ_SW_ALLOCATED); 4724 free_eq_hwq(sc, NULL, eq); 4725 MPASS(!(eq->flags & EQ_HW_ALLOCATED)); 4726 } 4727 4728 if (eq->flags & EQ_SW_ALLOCATED) { 4729 MPASS(!(eq->flags & EQ_HW_ALLOCATED)); 4730 sglist_free(txq->gl); 4731 free(txq->sdesc, M_CXGBE); 4732 mp_ring_free(txq->r); 4733 free_eq(sc, eq); 4734 MPASS(!(eq->flags & EQ_SW_ALLOCATED)); 4735 bzero(txq, sizeof(*txq)); 4736 } 4737 } 4738 4739 static void 4740 add_txq_sysctls(struct vi_info *vi, struct sysctl_ctx_list *ctx, 4741 struct sysctl_oid *oid, struct sge_txq *txq) 4742 { 4743 struct adapter *sc; 4744 struct sysctl_oid_list *children; 4745 4746 if (ctx == NULL || oid == NULL) 4747 return; 4748 4749 sc = vi->adapter; 4750 children = SYSCTL_CHILDREN(oid); 4751 4752 mp_ring_sysctls(txq->r, ctx, children); 4753 4754 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tc", 4755 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, txq - sc->sge.txq, 4756 sysctl_tc, "I", "traffic class (-1 means none)"); 4757 4758 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, 4759 &txq->txcsum, "# of times hardware assisted with checksum"); 4760 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vlan_insertion", CTLFLAG_RD, 4761 &txq->vlan_insertion, "# of times hardware inserted 802.1Q tag"); 4762 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, 4763 &txq->tso_wrs, "# of TSO work requests"); 4764 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, 4765 &txq->imm_wrs, "# of work requests with immediate data"); 4766 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, 4767 &txq->sgl_wrs, "# of work requests with direct SGL"); 4768 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, 4769 &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); 4770 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts0_wrs", CTLFLAG_RD, 4771 &txq->txpkts0_wrs, "# of txpkts (type 0) work requests"); 4772 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts1_wrs", CTLFLAG_RD, 4773 &txq->txpkts1_wrs, "# of txpkts (type 1) work requests"); 4774 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts0_pkts", CTLFLAG_RD, 4775 &txq->txpkts0_pkts, 4776 "# of frames tx'd using type0 txpkts work requests"); 4777 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts1_pkts", CTLFLAG_RD, 4778 &txq->txpkts1_pkts, 4779 "# of frames tx'd using type1 txpkts work requests"); 4780 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts_flush", CTLFLAG_RD, 4781 &txq->txpkts_flush, 4782 "# of times txpkts had to be flushed out by an egress-update"); 4783 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "raw_wrs", CTLFLAG_RD, 4784 &txq->raw_wrs, "# of raw work requests (non-packets)"); 4785 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vxlan_tso_wrs", CTLFLAG_RD, 4786 &txq->vxlan_tso_wrs, "# of VXLAN TSO work requests"); 4787 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vxlan_txcsum", CTLFLAG_RD, 4788 &txq->vxlan_txcsum, 4789 "# of times hardware assisted with inner checksums (VXLAN)"); 4790 4791 #ifdef KERN_TLS 4792 if (is_ktls(sc)) { 4793 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_records", 4794 CTLFLAG_RD, &txq->kern_tls_records, 4795 "# of NIC TLS records transmitted"); 4796 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_short", 4797 CTLFLAG_RD, &txq->kern_tls_short, 4798 "# of short NIC TLS records transmitted"); 4799 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_partial", 4800 CTLFLAG_RD, &txq->kern_tls_partial, 4801 "# of partial NIC TLS records transmitted"); 4802 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_full", 4803 CTLFLAG_RD, &txq->kern_tls_full, 4804 "# of full NIC TLS records transmitted"); 4805 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_octets", 4806 CTLFLAG_RD, &txq->kern_tls_octets, 4807 "# of payload octets in transmitted NIC TLS records"); 4808 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_waste", 4809 CTLFLAG_RD, &txq->kern_tls_waste, 4810 "# of octets DMAd but not transmitted in NIC TLS records"); 4811 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_options", 4812 CTLFLAG_RD, &txq->kern_tls_options, 4813 "# of NIC TLS options-only packets transmitted"); 4814 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_header", 4815 CTLFLAG_RD, &txq->kern_tls_header, 4816 "# of NIC TLS header-only packets transmitted"); 4817 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_fin", 4818 CTLFLAG_RD, &txq->kern_tls_fin, 4819 "# of NIC TLS FIN-only packets transmitted"); 4820 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_fin_short", 4821 CTLFLAG_RD, &txq->kern_tls_fin_short, 4822 "# of NIC TLS padded FIN packets on short TLS records"); 4823 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_cbc", 4824 CTLFLAG_RD, &txq->kern_tls_cbc, 4825 "# of NIC TLS sessions using AES-CBC"); 4826 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_gcm", 4827 CTLFLAG_RD, &txq->kern_tls_gcm, 4828 "# of NIC TLS sessions using AES-GCM"); 4829 } 4830 #endif 4831 } 4832 4833 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 4834 /* 4835 * Idempotent. 4836 */ 4837 static int 4838 alloc_ofld_txq(struct vi_info *vi, struct sge_ofld_txq *ofld_txq, int idx) 4839 { 4840 struct sysctl_oid *oid; 4841 struct port_info *pi = vi->pi; 4842 struct adapter *sc = vi->adapter; 4843 struct sge_eq *eq = &ofld_txq->wrq.eq; 4844 int rc, iqidx; 4845 char name[16]; 4846 4847 MPASS(idx >= 0); 4848 MPASS(idx < vi->nofldtxq); 4849 4850 if (!(eq->flags & EQ_SW_ALLOCATED)) { 4851 snprintf(name, sizeof(name), "%d", idx); 4852 oid = SYSCTL_ADD_NODE(&vi->ctx, 4853 SYSCTL_CHILDREN(vi->ofld_txq_oid), OID_AUTO, name, 4854 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "offload tx queue"); 4855 4856 snprintf(name, sizeof(name), "%s ofld_txq%d", 4857 device_get_nameunit(vi->dev), idx); 4858 if (vi->nofldrxq > 0) { 4859 iqidx = vi->first_ofld_rxq + (idx % vi->nofldrxq); 4860 init_eq(sc, eq, EQ_OFLD, vi->qsize_txq, pi->tx_chan, 4861 &sc->sge.ofld_rxq[iqidx].iq, name); 4862 } else { 4863 iqidx = vi->first_rxq + (idx % vi->nrxq); 4864 init_eq(sc, eq, EQ_OFLD, vi->qsize_txq, pi->tx_chan, 4865 &sc->sge.rxq[iqidx].iq, name); 4866 } 4867 4868 rc = alloc_wrq(sc, vi, &ofld_txq->wrq, &vi->ctx, oid); 4869 if (rc != 0) { 4870 CH_ERR(vi, "failed to allocate ofld_txq%d: %d\n", idx, 4871 rc); 4872 sysctl_remove_oid(oid, 1, 1); 4873 return (rc); 4874 } 4875 MPASS(eq->flags & EQ_SW_ALLOCATED); 4876 /* Can't fail after this point. */ 4877 4878 ofld_txq->tx_iscsi_pdus = counter_u64_alloc(M_WAITOK); 4879 ofld_txq->tx_iscsi_octets = counter_u64_alloc(M_WAITOK); 4880 ofld_txq->tx_iscsi_iso_wrs = counter_u64_alloc(M_WAITOK); 4881 ofld_txq->tx_toe_tls_records = counter_u64_alloc(M_WAITOK); 4882 ofld_txq->tx_toe_tls_octets = counter_u64_alloc(M_WAITOK); 4883 add_ofld_txq_sysctls(&vi->ctx, oid, ofld_txq); 4884 } 4885 4886 if (!(eq->flags & EQ_HW_ALLOCATED)) { 4887 rc = alloc_eq_hwq(sc, vi, eq); 4888 if (rc != 0) { 4889 CH_ERR(vi, "failed to create hw ofld_txq%d: %d\n", idx, 4890 rc); 4891 return (rc); 4892 } 4893 MPASS(eq->flags & EQ_HW_ALLOCATED); 4894 } 4895 4896 return (0); 4897 } 4898 4899 /* 4900 * Idempotent. 4901 */ 4902 static void 4903 free_ofld_txq(struct vi_info *vi, struct sge_ofld_txq *ofld_txq) 4904 { 4905 struct adapter *sc = vi->adapter; 4906 struct sge_eq *eq = &ofld_txq->wrq.eq; 4907 4908 if (eq->flags & EQ_HW_ALLOCATED) { 4909 MPASS(eq->flags & EQ_SW_ALLOCATED); 4910 free_eq_hwq(sc, NULL, eq); 4911 MPASS(!(eq->flags & EQ_HW_ALLOCATED)); 4912 } 4913 4914 if (eq->flags & EQ_SW_ALLOCATED) { 4915 MPASS(!(eq->flags & EQ_HW_ALLOCATED)); 4916 counter_u64_free(ofld_txq->tx_iscsi_pdus); 4917 counter_u64_free(ofld_txq->tx_iscsi_octets); 4918 counter_u64_free(ofld_txq->tx_iscsi_iso_wrs); 4919 counter_u64_free(ofld_txq->tx_toe_tls_records); 4920 counter_u64_free(ofld_txq->tx_toe_tls_octets); 4921 free_wrq(sc, &ofld_txq->wrq); 4922 MPASS(!(eq->flags & EQ_SW_ALLOCATED)); 4923 bzero(ofld_txq, sizeof(*ofld_txq)); 4924 } 4925 } 4926 4927 static void 4928 add_ofld_txq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 4929 struct sge_ofld_txq *ofld_txq) 4930 { 4931 struct sysctl_oid_list *children; 4932 4933 if (ctx == NULL || oid == NULL) 4934 return; 4935 4936 children = SYSCTL_CHILDREN(oid); 4937 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_iscsi_pdus", 4938 CTLFLAG_RD, &ofld_txq->tx_iscsi_pdus, 4939 "# of iSCSI PDUs transmitted"); 4940 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_iscsi_octets", 4941 CTLFLAG_RD, &ofld_txq->tx_iscsi_octets, 4942 "# of payload octets in transmitted iSCSI PDUs"); 4943 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_iscsi_iso_wrs", 4944 CTLFLAG_RD, &ofld_txq->tx_iscsi_iso_wrs, 4945 "# of iSCSI segmentation offload work requests"); 4946 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_toe_tls_records", 4947 CTLFLAG_RD, &ofld_txq->tx_toe_tls_records, 4948 "# of TOE TLS records transmitted"); 4949 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_toe_tls_octets", 4950 CTLFLAG_RD, &ofld_txq->tx_toe_tls_octets, 4951 "# of payload octets in transmitted TOE TLS records"); 4952 } 4953 #endif 4954 4955 static void 4956 oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) 4957 { 4958 bus_addr_t *ba = arg; 4959 4960 KASSERT(nseg == 1, 4961 ("%s meant for single segment mappings only.", __func__)); 4962 4963 *ba = error ? 0 : segs->ds_addr; 4964 } 4965 4966 static inline void 4967 ring_fl_db(struct adapter *sc, struct sge_fl *fl) 4968 { 4969 uint32_t n, v; 4970 4971 n = IDXDIFF(fl->pidx >> 3, fl->dbidx, fl->sidx); 4972 MPASS(n > 0); 4973 4974 wmb(); 4975 v = fl->dbval | V_PIDX(n); 4976 if (fl->udb) 4977 *fl->udb = htole32(v); 4978 else 4979 t4_write_reg(sc, sc->sge_kdoorbell_reg, v); 4980 IDXINCR(fl->dbidx, n, fl->sidx); 4981 } 4982 4983 /* 4984 * Fills up the freelist by allocating up to 'n' buffers. Buffers that are 4985 * recycled do not count towards this allocation budget. 4986 * 4987 * Returns non-zero to indicate that this freelist should be added to the list 4988 * of starving freelists. 4989 */ 4990 static int 4991 refill_fl(struct adapter *sc, struct sge_fl *fl, int n) 4992 { 4993 __be64 *d; 4994 struct fl_sdesc *sd; 4995 uintptr_t pa; 4996 caddr_t cl; 4997 struct rx_buf_info *rxb; 4998 struct cluster_metadata *clm; 4999 uint16_t max_pidx, zidx = fl->zidx; 5000 uint16_t hw_cidx = fl->hw_cidx; /* stable snapshot */ 5001 5002 FL_LOCK_ASSERT_OWNED(fl); 5003 5004 /* 5005 * We always stop at the beginning of the hardware descriptor that's just 5006 * before the one with the hw cidx. This is to avoid hw pidx = hw cidx, 5007 * which would mean an empty freelist to the chip. 5008 */ 5009 max_pidx = __predict_false(hw_cidx == 0) ? fl->sidx - 1 : hw_cidx - 1; 5010 if (fl->pidx == max_pidx * 8) 5011 return (0); 5012 5013 d = &fl->desc[fl->pidx]; 5014 sd = &fl->sdesc[fl->pidx]; 5015 rxb = &sc->sge.rx_buf_info[zidx]; 5016 5017 while (n > 0) { 5018 5019 if (sd->cl != NULL) { 5020 5021 if (sd->nmbuf == 0) { 5022 /* 5023 * Fast recycle without involving any atomics on 5024 * the cluster's metadata (if the cluster has 5025 * metadata). This happens when all frames 5026 * received in the cluster were small enough to 5027 * fit within a single mbuf each. 5028 */ 5029 fl->cl_fast_recycled++; 5030 goto recycled; 5031 } 5032 5033 /* 5034 * Cluster is guaranteed to have metadata. Clusters 5035 * without metadata always take the fast recycle path 5036 * when they're recycled. 5037 */ 5038 clm = cl_metadata(sd); 5039 MPASS(clm != NULL); 5040 5041 if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { 5042 fl->cl_recycled++; 5043 counter_u64_add(extfree_rels, 1); 5044 goto recycled; 5045 } 5046 sd->cl = NULL; /* gave up my reference */ 5047 } 5048 MPASS(sd->cl == NULL); 5049 cl = uma_zalloc(rxb->zone, M_NOWAIT); 5050 if (__predict_false(cl == NULL)) { 5051 if (zidx != fl->safe_zidx) { 5052 zidx = fl->safe_zidx; 5053 rxb = &sc->sge.rx_buf_info[zidx]; 5054 cl = uma_zalloc(rxb->zone, M_NOWAIT); 5055 } 5056 if (cl == NULL) 5057 break; 5058 } 5059 fl->cl_allocated++; 5060 n--; 5061 5062 pa = pmap_kextract((vm_offset_t)cl); 5063 sd->cl = cl; 5064 sd->zidx = zidx; 5065 5066 if (fl->flags & FL_BUF_PACKING) { 5067 *d = htobe64(pa | rxb->hwidx2); 5068 sd->moff = rxb->size2; 5069 } else { 5070 *d = htobe64(pa | rxb->hwidx1); 5071 sd->moff = 0; 5072 } 5073 recycled: 5074 sd->nmbuf = 0; 5075 d++; 5076 sd++; 5077 if (__predict_false((++fl->pidx & 7) == 0)) { 5078 uint16_t pidx = fl->pidx >> 3; 5079 5080 if (__predict_false(pidx == fl->sidx)) { 5081 fl->pidx = 0; 5082 pidx = 0; 5083 sd = fl->sdesc; 5084 d = fl->desc; 5085 } 5086 if (n < 8 || pidx == max_pidx) 5087 break; 5088 5089 if (IDXDIFF(pidx, fl->dbidx, fl->sidx) >= 4) 5090 ring_fl_db(sc, fl); 5091 } 5092 } 5093 5094 if ((fl->pidx >> 3) != fl->dbidx) 5095 ring_fl_db(sc, fl); 5096 5097 return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING)); 5098 } 5099 5100 /* 5101 * Attempt to refill all starving freelists. 5102 */ 5103 static void 5104 refill_sfl(void *arg) 5105 { 5106 struct adapter *sc = arg; 5107 struct sge_fl *fl, *fl_temp; 5108 5109 mtx_assert(&sc->sfl_lock, MA_OWNED); 5110 TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) { 5111 FL_LOCK(fl); 5112 refill_fl(sc, fl, 64); 5113 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) { 5114 TAILQ_REMOVE(&sc->sfl, fl, link); 5115 fl->flags &= ~FL_STARVING; 5116 } 5117 FL_UNLOCK(fl); 5118 } 5119 5120 if (!TAILQ_EMPTY(&sc->sfl)) 5121 callout_schedule(&sc->sfl_callout, hz / 5); 5122 } 5123 5124 /* 5125 * Release the driver's reference on all buffers in the given freelist. Buffers 5126 * with kernel references cannot be freed and will prevent the driver from being 5127 * unloaded safely. 5128 */ 5129 void 5130 free_fl_buffers(struct adapter *sc, struct sge_fl *fl) 5131 { 5132 struct fl_sdesc *sd; 5133 struct cluster_metadata *clm; 5134 int i; 5135 5136 sd = fl->sdesc; 5137 for (i = 0; i < fl->sidx * 8; i++, sd++) { 5138 if (sd->cl == NULL) 5139 continue; 5140 5141 if (sd->nmbuf == 0) 5142 uma_zfree(sc->sge.rx_buf_info[sd->zidx].zone, sd->cl); 5143 else if (fl->flags & FL_BUF_PACKING) { 5144 clm = cl_metadata(sd); 5145 if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { 5146 uma_zfree(sc->sge.rx_buf_info[sd->zidx].zone, 5147 sd->cl); 5148 counter_u64_add(extfree_rels, 1); 5149 } 5150 } 5151 sd->cl = NULL; 5152 } 5153 5154 if (fl->flags & FL_BUF_RESUME) { 5155 m_freem(fl->m0); 5156 fl->flags &= ~FL_BUF_RESUME; 5157 } 5158 } 5159 5160 static inline void 5161 get_pkt_gl(struct mbuf *m, struct sglist *gl) 5162 { 5163 int rc; 5164 5165 M_ASSERTPKTHDR(m); 5166 5167 sglist_reset(gl); 5168 rc = sglist_append_mbuf(gl, m); 5169 if (__predict_false(rc != 0)) { 5170 panic("%s: mbuf %p (%d segs) was vetted earlier but now fails " 5171 "with %d.", __func__, m, mbuf_nsegs(m), rc); 5172 } 5173 5174 KASSERT(gl->sg_nseg == mbuf_nsegs(m), 5175 ("%s: nsegs changed for mbuf %p from %d to %d", __func__, m, 5176 mbuf_nsegs(m), gl->sg_nseg)); 5177 #if 0 /* vm_wr not readily available here. */ 5178 KASSERT(gl->sg_nseg > 0 && gl->sg_nseg <= max_nsegs_allowed(m, vm_wr), 5179 ("%s: %d segments, should have been 1 <= nsegs <= %d", __func__, 5180 gl->sg_nseg, max_nsegs_allowed(m, vm_wr))); 5181 #endif 5182 } 5183 5184 /* 5185 * len16 for a txpkt WR with a GL. Includes the firmware work request header. 5186 */ 5187 static inline u_int 5188 txpkt_len16(u_int nsegs, const u_int extra) 5189 { 5190 u_int n; 5191 5192 MPASS(nsegs > 0); 5193 5194 nsegs--; /* first segment is part of ulptx_sgl */ 5195 n = extra + sizeof(struct fw_eth_tx_pkt_wr) + 5196 sizeof(struct cpl_tx_pkt_core) + 5197 sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 5198 5199 return (howmany(n, 16)); 5200 } 5201 5202 /* 5203 * len16 for a txpkt_vm WR with a GL. Includes the firmware work 5204 * request header. 5205 */ 5206 static inline u_int 5207 txpkt_vm_len16(u_int nsegs, const u_int extra) 5208 { 5209 u_int n; 5210 5211 MPASS(nsegs > 0); 5212 5213 nsegs--; /* first segment is part of ulptx_sgl */ 5214 n = extra + sizeof(struct fw_eth_tx_pkt_vm_wr) + 5215 sizeof(struct cpl_tx_pkt_core) + 5216 sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 5217 5218 return (howmany(n, 16)); 5219 } 5220 5221 static inline void 5222 calculate_mbuf_len16(struct mbuf *m, bool vm_wr) 5223 { 5224 const int lso = sizeof(struct cpl_tx_pkt_lso_core); 5225 const int tnl_lso = sizeof(struct cpl_tx_tnl_lso); 5226 5227 if (vm_wr) { 5228 if (needs_tso(m)) 5229 set_mbuf_len16(m, txpkt_vm_len16(mbuf_nsegs(m), lso)); 5230 else 5231 set_mbuf_len16(m, txpkt_vm_len16(mbuf_nsegs(m), 0)); 5232 return; 5233 } 5234 5235 if (needs_tso(m)) { 5236 if (needs_vxlan_tso(m)) 5237 set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), tnl_lso)); 5238 else 5239 set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), lso)); 5240 } else 5241 set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), 0)); 5242 } 5243 5244 /* 5245 * len16 for a txpkts type 0 WR with a GL. Does not include the firmware work 5246 * request header. 5247 */ 5248 static inline u_int 5249 txpkts0_len16(u_int nsegs) 5250 { 5251 u_int n; 5252 5253 MPASS(nsegs > 0); 5254 5255 nsegs--; /* first segment is part of ulptx_sgl */ 5256 n = sizeof(struct ulp_txpkt) + sizeof(struct ulptx_idata) + 5257 sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) + 5258 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 5259 5260 return (howmany(n, 16)); 5261 } 5262 5263 /* 5264 * len16 for a txpkts type 1 WR with a GL. Does not include the firmware work 5265 * request header. 5266 */ 5267 static inline u_int 5268 txpkts1_len16(void) 5269 { 5270 u_int n; 5271 5272 n = sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl); 5273 5274 return (howmany(n, 16)); 5275 } 5276 5277 static inline u_int 5278 imm_payload(u_int ndesc) 5279 { 5280 u_int n; 5281 5282 n = ndesc * EQ_ESIZE - sizeof(struct fw_eth_tx_pkt_wr) - 5283 sizeof(struct cpl_tx_pkt_core); 5284 5285 return (n); 5286 } 5287 5288 static inline uint64_t 5289 csum_to_ctrl(struct adapter *sc, struct mbuf *m) 5290 { 5291 uint64_t ctrl; 5292 int csum_type, l2hlen, l3hlen; 5293 int x, y; 5294 static const int csum_types[3][2] = { 5295 {TX_CSUM_TCPIP, TX_CSUM_TCPIP6}, 5296 {TX_CSUM_UDPIP, TX_CSUM_UDPIP6}, 5297 {TX_CSUM_IP, 0} 5298 }; 5299 5300 M_ASSERTPKTHDR(m); 5301 5302 if (!needs_hwcsum(m)) 5303 return (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS); 5304 5305 MPASS(m->m_pkthdr.l2hlen >= ETHER_HDR_LEN); 5306 MPASS(m->m_pkthdr.l3hlen >= sizeof(struct ip)); 5307 5308 if (needs_vxlan_csum(m)) { 5309 MPASS(m->m_pkthdr.l4hlen > 0); 5310 MPASS(m->m_pkthdr.l5hlen > 0); 5311 MPASS(m->m_pkthdr.inner_l2hlen >= ETHER_HDR_LEN); 5312 MPASS(m->m_pkthdr.inner_l3hlen >= sizeof(struct ip)); 5313 5314 l2hlen = m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + 5315 m->m_pkthdr.l4hlen + m->m_pkthdr.l5hlen + 5316 m->m_pkthdr.inner_l2hlen - ETHER_HDR_LEN; 5317 l3hlen = m->m_pkthdr.inner_l3hlen; 5318 } else { 5319 l2hlen = m->m_pkthdr.l2hlen - ETHER_HDR_LEN; 5320 l3hlen = m->m_pkthdr.l3hlen; 5321 } 5322 5323 ctrl = 0; 5324 if (!needs_l3_csum(m)) 5325 ctrl |= F_TXPKT_IPCSUM_DIS; 5326 5327 if (m->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_INNER_IP_TCP | 5328 CSUM_IP6_TCP | CSUM_INNER_IP6_TCP)) 5329 x = 0; /* TCP */ 5330 else if (m->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_INNER_IP_UDP | 5331 CSUM_IP6_UDP | CSUM_INNER_IP6_UDP)) 5332 x = 1; /* UDP */ 5333 else 5334 x = 2; 5335 5336 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP | 5337 CSUM_INNER_IP | CSUM_INNER_IP_TCP | CSUM_INNER_IP_UDP)) 5338 y = 0; /* IPv4 */ 5339 else { 5340 MPASS(m->m_pkthdr.csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | 5341 CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_UDP)); 5342 y = 1; /* IPv6 */ 5343 } 5344 /* 5345 * needs_hwcsum returned true earlier so there must be some kind of 5346 * checksum to calculate. 5347 */ 5348 csum_type = csum_types[x][y]; 5349 MPASS(csum_type != 0); 5350 if (csum_type == TX_CSUM_IP) 5351 ctrl |= F_TXPKT_L4CSUM_DIS; 5352 ctrl |= V_TXPKT_CSUM_TYPE(csum_type) | V_TXPKT_IPHDR_LEN(l3hlen); 5353 if (chip_id(sc) <= CHELSIO_T5) 5354 ctrl |= V_TXPKT_ETHHDR_LEN(l2hlen); 5355 else 5356 ctrl |= V_T6_TXPKT_ETHHDR_LEN(l2hlen); 5357 5358 return (ctrl); 5359 } 5360 5361 static inline void * 5362 write_lso_cpl(void *cpl, struct mbuf *m0) 5363 { 5364 struct cpl_tx_pkt_lso_core *lso; 5365 uint32_t ctrl; 5366 5367 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 5368 m0->m_pkthdr.l4hlen > 0, 5369 ("%s: mbuf %p needs TSO but missing header lengths", 5370 __func__, m0)); 5371 5372 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | 5373 F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE | 5374 V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen - ETHER_HDR_LEN) >> 2) | 5375 V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) | 5376 V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); 5377 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 5378 ctrl |= F_LSO_IPV6; 5379 5380 lso = cpl; 5381 lso->lso_ctrl = htobe32(ctrl); 5382 lso->ipid_ofst = htobe16(0); 5383 lso->mss = htobe16(m0->m_pkthdr.tso_segsz); 5384 lso->seqno_offset = htobe32(0); 5385 lso->len = htobe32(m0->m_pkthdr.len); 5386 5387 return (lso + 1); 5388 } 5389 5390 static void * 5391 write_tnl_lso_cpl(void *cpl, struct mbuf *m0) 5392 { 5393 struct cpl_tx_tnl_lso *tnl_lso = cpl; 5394 uint32_t ctrl; 5395 5396 KASSERT(m0->m_pkthdr.inner_l2hlen > 0 && 5397 m0->m_pkthdr.inner_l3hlen > 0 && m0->m_pkthdr.inner_l4hlen > 0 && 5398 m0->m_pkthdr.inner_l5hlen > 0, 5399 ("%s: mbuf %p needs VXLAN_TSO but missing inner header lengths", 5400 __func__, m0)); 5401 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 5402 m0->m_pkthdr.l4hlen > 0 && m0->m_pkthdr.l5hlen > 0, 5403 ("%s: mbuf %p needs VXLAN_TSO but missing outer header lengths", 5404 __func__, m0)); 5405 5406 /* Outer headers. */ 5407 ctrl = V_CPL_TX_TNL_LSO_OPCODE(CPL_TX_TNL_LSO) | 5408 F_CPL_TX_TNL_LSO_FIRST | F_CPL_TX_TNL_LSO_LAST | 5409 V_CPL_TX_TNL_LSO_ETHHDRLENOUT( 5410 (m0->m_pkthdr.l2hlen - ETHER_HDR_LEN) >> 2) | 5411 V_CPL_TX_TNL_LSO_IPHDRLENOUT(m0->m_pkthdr.l3hlen >> 2) | 5412 F_CPL_TX_TNL_LSO_IPLENSETOUT; 5413 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 5414 ctrl |= F_CPL_TX_TNL_LSO_IPV6OUT; 5415 else { 5416 ctrl |= F_CPL_TX_TNL_LSO_IPHDRCHKOUT | 5417 F_CPL_TX_TNL_LSO_IPIDINCOUT; 5418 } 5419 tnl_lso->op_to_IpIdSplitOut = htobe32(ctrl); 5420 tnl_lso->IpIdOffsetOut = 0; 5421 tnl_lso->UdpLenSetOut_to_TnlHdrLen = 5422 htobe16(F_CPL_TX_TNL_LSO_UDPCHKCLROUT | 5423 F_CPL_TX_TNL_LSO_UDPLENSETOUT | 5424 V_CPL_TX_TNL_LSO_TNLHDRLEN(m0->m_pkthdr.l2hlen + 5425 m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen + 5426 m0->m_pkthdr.l5hlen) | 5427 V_CPL_TX_TNL_LSO_TNLTYPE(TX_TNL_TYPE_VXLAN)); 5428 tnl_lso->r1 = 0; 5429 5430 /* Inner headers. */ 5431 ctrl = V_CPL_TX_TNL_LSO_ETHHDRLEN( 5432 (m0->m_pkthdr.inner_l2hlen - ETHER_HDR_LEN) >> 2) | 5433 V_CPL_TX_TNL_LSO_IPHDRLEN(m0->m_pkthdr.inner_l3hlen >> 2) | 5434 V_CPL_TX_TNL_LSO_TCPHDRLEN(m0->m_pkthdr.inner_l4hlen >> 2); 5435 if (m0->m_pkthdr.inner_l3hlen == sizeof(struct ip6_hdr)) 5436 ctrl |= F_CPL_TX_TNL_LSO_IPV6; 5437 tnl_lso->Flow_to_TcpHdrLen = htobe32(ctrl); 5438 tnl_lso->IpIdOffset = 0; 5439 tnl_lso->IpIdSplit_to_Mss = 5440 htobe16(V_CPL_TX_TNL_LSO_MSS(m0->m_pkthdr.tso_segsz)); 5441 tnl_lso->TCPSeqOffset = 0; 5442 tnl_lso->EthLenOffset_Size = 5443 htobe32(V_CPL_TX_TNL_LSO_SIZE(m0->m_pkthdr.len)); 5444 5445 return (tnl_lso + 1); 5446 } 5447 5448 #define VM_TX_L2HDR_LEN 16 /* ethmacdst to vlantci */ 5449 5450 /* 5451 * Write a VM txpkt WR for this packet to the hardware descriptors, update the 5452 * software descriptor, and advance the pidx. It is guaranteed that enough 5453 * descriptors are available. 5454 * 5455 * The return value is the # of hardware descriptors used. 5456 */ 5457 static u_int 5458 write_txpkt_vm_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0) 5459 { 5460 struct sge_eq *eq; 5461 struct fw_eth_tx_pkt_vm_wr *wr; 5462 struct tx_sdesc *txsd; 5463 struct cpl_tx_pkt_core *cpl; 5464 uint32_t ctrl; /* used in many unrelated places */ 5465 uint64_t ctrl1; 5466 int len16, ndesc, pktlen; 5467 caddr_t dst; 5468 5469 TXQ_LOCK_ASSERT_OWNED(txq); 5470 M_ASSERTPKTHDR(m0); 5471 5472 len16 = mbuf_len16(m0); 5473 pktlen = m0->m_pkthdr.len; 5474 ctrl = sizeof(struct cpl_tx_pkt_core); 5475 if (needs_tso(m0)) 5476 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 5477 ndesc = tx_len16_to_desc(len16); 5478 5479 /* Firmware work request header */ 5480 eq = &txq->eq; 5481 wr = (void *)&eq->desc[eq->pidx]; 5482 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_VM_WR) | 5483 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 5484 5485 ctrl = V_FW_WR_LEN16(len16); 5486 wr->equiq_to_len16 = htobe32(ctrl); 5487 wr->r3[0] = 0; 5488 wr->r3[1] = 0; 5489 5490 /* 5491 * Copy over ethmacdst, ethmacsrc, ethtype, and vlantci. 5492 * vlantci is ignored unless the ethtype is 0x8100, so it's 5493 * simpler to always copy it rather than making it 5494 * conditional. Also, it seems that we do not have to set 5495 * vlantci or fake the ethtype when doing VLAN tag insertion. 5496 */ 5497 m_copydata(m0, 0, VM_TX_L2HDR_LEN, wr->ethmacdst); 5498 5499 if (needs_tso(m0)) { 5500 cpl = write_lso_cpl(wr + 1, m0); 5501 txq->tso_wrs++; 5502 } else 5503 cpl = (void *)(wr + 1); 5504 5505 /* Checksum offload */ 5506 ctrl1 = csum_to_ctrl(sc, m0); 5507 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) 5508 txq->txcsum++; /* some hardware assistance provided */ 5509 5510 /* VLAN tag insertion */ 5511 if (needs_vlan_insertion(m0)) { 5512 ctrl1 |= F_TXPKT_VLAN_VLD | 5513 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 5514 txq->vlan_insertion++; 5515 } 5516 5517 /* CPL header */ 5518 cpl->ctrl0 = txq->cpl_ctrl0; 5519 cpl->pack = 0; 5520 cpl->len = htobe16(pktlen); 5521 cpl->ctrl1 = htobe64(ctrl1); 5522 5523 /* SGL */ 5524 dst = (void *)(cpl + 1); 5525 5526 /* 5527 * A packet using TSO will use up an entire descriptor for the 5528 * firmware work request header, LSO CPL, and TX_PKT_XT CPL. 5529 * If this descriptor is the last descriptor in the ring, wrap 5530 * around to the front of the ring explicitly for the start of 5531 * the sgl. 5532 */ 5533 if (dst == (void *)&eq->desc[eq->sidx]) { 5534 dst = (void *)&eq->desc[0]; 5535 write_gl_to_txd(txq, m0, &dst, 0); 5536 } else 5537 write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); 5538 txq->sgl_wrs++; 5539 txq->txpkt_wrs++; 5540 5541 txsd = &txq->sdesc[eq->pidx]; 5542 txsd->m = m0; 5543 txsd->desc_used = ndesc; 5544 5545 return (ndesc); 5546 } 5547 5548 /* 5549 * Write a raw WR to the hardware descriptors, update the software 5550 * descriptor, and advance the pidx. It is guaranteed that enough 5551 * descriptors are available. 5552 * 5553 * The return value is the # of hardware descriptors used. 5554 */ 5555 static u_int 5556 write_raw_wr(struct sge_txq *txq, void *wr, struct mbuf *m0, u_int available) 5557 { 5558 struct sge_eq *eq = &txq->eq; 5559 struct tx_sdesc *txsd; 5560 struct mbuf *m; 5561 caddr_t dst; 5562 int len16, ndesc; 5563 5564 len16 = mbuf_len16(m0); 5565 ndesc = tx_len16_to_desc(len16); 5566 MPASS(ndesc <= available); 5567 5568 dst = wr; 5569 for (m = m0; m != NULL; m = m->m_next) 5570 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 5571 5572 txq->raw_wrs++; 5573 5574 txsd = &txq->sdesc[eq->pidx]; 5575 txsd->m = m0; 5576 txsd->desc_used = ndesc; 5577 5578 return (ndesc); 5579 } 5580 5581 /* 5582 * Write a txpkt WR for this packet to the hardware descriptors, update the 5583 * software descriptor, and advance the pidx. It is guaranteed that enough 5584 * descriptors are available. 5585 * 5586 * The return value is the # of hardware descriptors used. 5587 */ 5588 static u_int 5589 write_txpkt_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0, 5590 u_int available) 5591 { 5592 struct sge_eq *eq; 5593 struct fw_eth_tx_pkt_wr *wr; 5594 struct tx_sdesc *txsd; 5595 struct cpl_tx_pkt_core *cpl; 5596 uint32_t ctrl; /* used in many unrelated places */ 5597 uint64_t ctrl1; 5598 int len16, ndesc, pktlen, nsegs; 5599 caddr_t dst; 5600 5601 TXQ_LOCK_ASSERT_OWNED(txq); 5602 M_ASSERTPKTHDR(m0); 5603 5604 len16 = mbuf_len16(m0); 5605 nsegs = mbuf_nsegs(m0); 5606 pktlen = m0->m_pkthdr.len; 5607 ctrl = sizeof(struct cpl_tx_pkt_core); 5608 if (needs_tso(m0)) { 5609 if (needs_vxlan_tso(m0)) 5610 ctrl += sizeof(struct cpl_tx_tnl_lso); 5611 else 5612 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 5613 } else if (!(mbuf_cflags(m0) & MC_NOMAP) && pktlen <= imm_payload(2) && 5614 available >= 2) { 5615 /* Immediate data. Recalculate len16 and set nsegs to 0. */ 5616 ctrl += pktlen; 5617 len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + 5618 sizeof(struct cpl_tx_pkt_core) + pktlen, 16); 5619 nsegs = 0; 5620 } 5621 ndesc = tx_len16_to_desc(len16); 5622 MPASS(ndesc <= available); 5623 5624 /* Firmware work request header */ 5625 eq = &txq->eq; 5626 wr = (void *)&eq->desc[eq->pidx]; 5627 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | 5628 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 5629 5630 ctrl = V_FW_WR_LEN16(len16); 5631 wr->equiq_to_len16 = htobe32(ctrl); 5632 wr->r3 = 0; 5633 5634 if (needs_tso(m0)) { 5635 if (needs_vxlan_tso(m0)) { 5636 cpl = write_tnl_lso_cpl(wr + 1, m0); 5637 txq->vxlan_tso_wrs++; 5638 } else { 5639 cpl = write_lso_cpl(wr + 1, m0); 5640 txq->tso_wrs++; 5641 } 5642 } else 5643 cpl = (void *)(wr + 1); 5644 5645 /* Checksum offload */ 5646 ctrl1 = csum_to_ctrl(sc, m0); 5647 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) { 5648 /* some hardware assistance provided */ 5649 if (needs_vxlan_csum(m0)) 5650 txq->vxlan_txcsum++; 5651 else 5652 txq->txcsum++; 5653 } 5654 5655 /* VLAN tag insertion */ 5656 if (needs_vlan_insertion(m0)) { 5657 ctrl1 |= F_TXPKT_VLAN_VLD | 5658 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 5659 txq->vlan_insertion++; 5660 } 5661 5662 /* CPL header */ 5663 cpl->ctrl0 = txq->cpl_ctrl0; 5664 cpl->pack = 0; 5665 cpl->len = htobe16(pktlen); 5666 cpl->ctrl1 = htobe64(ctrl1); 5667 5668 /* SGL */ 5669 dst = (void *)(cpl + 1); 5670 if (__predict_false((uintptr_t)dst == (uintptr_t)&eq->desc[eq->sidx])) 5671 dst = (caddr_t)&eq->desc[0]; 5672 if (nsegs > 0) { 5673 5674 write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); 5675 txq->sgl_wrs++; 5676 } else { 5677 struct mbuf *m; 5678 5679 for (m = m0; m != NULL; m = m->m_next) { 5680 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 5681 #ifdef INVARIANTS 5682 pktlen -= m->m_len; 5683 #endif 5684 } 5685 #ifdef INVARIANTS 5686 KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen)); 5687 #endif 5688 txq->imm_wrs++; 5689 } 5690 5691 txq->txpkt_wrs++; 5692 5693 txsd = &txq->sdesc[eq->pidx]; 5694 txsd->m = m0; 5695 txsd->desc_used = ndesc; 5696 5697 return (ndesc); 5698 } 5699 5700 static inline bool 5701 cmp_l2hdr(struct txpkts *txp, struct mbuf *m) 5702 { 5703 int len; 5704 5705 MPASS(txp->npkt > 0); 5706 MPASS(m->m_len >= VM_TX_L2HDR_LEN); 5707 5708 if (txp->ethtype == be16toh(ETHERTYPE_VLAN)) 5709 len = VM_TX_L2HDR_LEN; 5710 else 5711 len = sizeof(struct ether_header); 5712 5713 return (memcmp(m->m_data, &txp->ethmacdst[0], len) != 0); 5714 } 5715 5716 static inline void 5717 save_l2hdr(struct txpkts *txp, struct mbuf *m) 5718 { 5719 MPASS(m->m_len >= VM_TX_L2HDR_LEN); 5720 5721 memcpy(&txp->ethmacdst[0], mtod(m, const void *), VM_TX_L2HDR_LEN); 5722 } 5723 5724 static int 5725 add_to_txpkts_vf(struct adapter *sc, struct sge_txq *txq, struct mbuf *m, 5726 int avail, bool *send) 5727 { 5728 struct txpkts *txp = &txq->txp; 5729 5730 /* Cannot have TSO and coalesce at the same time. */ 5731 if (cannot_use_txpkts(m)) { 5732 cannot_coalesce: 5733 *send = txp->npkt > 0; 5734 return (EINVAL); 5735 } 5736 5737 /* VF allows coalescing of type 1 (1 GL) only */ 5738 if (mbuf_nsegs(m) > 1) 5739 goto cannot_coalesce; 5740 5741 *send = false; 5742 if (txp->npkt > 0) { 5743 MPASS(tx_len16_to_desc(txp->len16) <= avail); 5744 MPASS(txp->npkt < txp->max_npkt); 5745 MPASS(txp->wr_type == 1); /* VF supports type 1 only */ 5746 5747 if (tx_len16_to_desc(txp->len16 + txpkts1_len16()) > avail) { 5748 retry_after_send: 5749 *send = true; 5750 return (EAGAIN); 5751 } 5752 if (m->m_pkthdr.len + txp->plen > 65535) 5753 goto retry_after_send; 5754 if (cmp_l2hdr(txp, m)) 5755 goto retry_after_send; 5756 5757 txp->len16 += txpkts1_len16(); 5758 txp->plen += m->m_pkthdr.len; 5759 txp->mb[txp->npkt++] = m; 5760 if (txp->npkt == txp->max_npkt) 5761 *send = true; 5762 } else { 5763 txp->len16 = howmany(sizeof(struct fw_eth_tx_pkts_vm_wr), 16) + 5764 txpkts1_len16(); 5765 if (tx_len16_to_desc(txp->len16) > avail) 5766 goto cannot_coalesce; 5767 txp->npkt = 1; 5768 txp->wr_type = 1; 5769 txp->plen = m->m_pkthdr.len; 5770 txp->mb[0] = m; 5771 save_l2hdr(txp, m); 5772 } 5773 return (0); 5774 } 5775 5776 static int 5777 add_to_txpkts_pf(struct adapter *sc, struct sge_txq *txq, struct mbuf *m, 5778 int avail, bool *send) 5779 { 5780 struct txpkts *txp = &txq->txp; 5781 int nsegs; 5782 5783 MPASS(!(sc->flags & IS_VF)); 5784 5785 /* Cannot have TSO and coalesce at the same time. */ 5786 if (cannot_use_txpkts(m)) { 5787 cannot_coalesce: 5788 *send = txp->npkt > 0; 5789 return (EINVAL); 5790 } 5791 5792 *send = false; 5793 nsegs = mbuf_nsegs(m); 5794 if (txp->npkt == 0) { 5795 if (m->m_pkthdr.len > 65535) 5796 goto cannot_coalesce; 5797 if (nsegs > 1) { 5798 txp->wr_type = 0; 5799 txp->len16 = 5800 howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + 5801 txpkts0_len16(nsegs); 5802 } else { 5803 txp->wr_type = 1; 5804 txp->len16 = 5805 howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + 5806 txpkts1_len16(); 5807 } 5808 if (tx_len16_to_desc(txp->len16) > avail) 5809 goto cannot_coalesce; 5810 txp->npkt = 1; 5811 txp->plen = m->m_pkthdr.len; 5812 txp->mb[0] = m; 5813 } else { 5814 MPASS(tx_len16_to_desc(txp->len16) <= avail); 5815 MPASS(txp->npkt < txp->max_npkt); 5816 5817 if (m->m_pkthdr.len + txp->plen > 65535) { 5818 retry_after_send: 5819 *send = true; 5820 return (EAGAIN); 5821 } 5822 5823 MPASS(txp->wr_type == 0 || txp->wr_type == 1); 5824 if (txp->wr_type == 0) { 5825 if (tx_len16_to_desc(txp->len16 + 5826 txpkts0_len16(nsegs)) > min(avail, SGE_MAX_WR_NDESC)) 5827 goto retry_after_send; 5828 txp->len16 += txpkts0_len16(nsegs); 5829 } else { 5830 if (nsegs != 1) 5831 goto retry_after_send; 5832 if (tx_len16_to_desc(txp->len16 + txpkts1_len16()) > 5833 avail) 5834 goto retry_after_send; 5835 txp->len16 += txpkts1_len16(); 5836 } 5837 5838 txp->plen += m->m_pkthdr.len; 5839 txp->mb[txp->npkt++] = m; 5840 if (txp->npkt == txp->max_npkt) 5841 *send = true; 5842 } 5843 return (0); 5844 } 5845 5846 /* 5847 * Write a txpkts WR for the packets in txp to the hardware descriptors, update 5848 * the software descriptor, and advance the pidx. It is guaranteed that enough 5849 * descriptors are available. 5850 * 5851 * The return value is the # of hardware descriptors used. 5852 */ 5853 static u_int 5854 write_txpkts_wr(struct adapter *sc, struct sge_txq *txq) 5855 { 5856 const struct txpkts *txp = &txq->txp; 5857 struct sge_eq *eq = &txq->eq; 5858 struct fw_eth_tx_pkts_wr *wr; 5859 struct tx_sdesc *txsd; 5860 struct cpl_tx_pkt_core *cpl; 5861 uint64_t ctrl1; 5862 int ndesc, i, checkwrap; 5863 struct mbuf *m, *last; 5864 void *flitp; 5865 5866 TXQ_LOCK_ASSERT_OWNED(txq); 5867 MPASS(txp->npkt > 0); 5868 MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16)); 5869 5870 wr = (void *)&eq->desc[eq->pidx]; 5871 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); 5872 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(txp->len16)); 5873 wr->plen = htobe16(txp->plen); 5874 wr->npkt = txp->npkt; 5875 wr->r3 = 0; 5876 wr->type = txp->wr_type; 5877 flitp = wr + 1; 5878 5879 /* 5880 * At this point we are 16B into a hardware descriptor. If checkwrap is 5881 * set then we know the WR is going to wrap around somewhere. We'll 5882 * check for that at appropriate points. 5883 */ 5884 ndesc = tx_len16_to_desc(txp->len16); 5885 last = NULL; 5886 checkwrap = eq->sidx - ndesc < eq->pidx; 5887 for (i = 0; i < txp->npkt; i++) { 5888 m = txp->mb[i]; 5889 if (txp->wr_type == 0) { 5890 struct ulp_txpkt *ulpmc; 5891 struct ulptx_idata *ulpsc; 5892 5893 /* ULP master command */ 5894 ulpmc = flitp; 5895 ulpmc->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) | 5896 V_ULP_TXPKT_DEST(0) | V_ULP_TXPKT_FID(eq->iqid)); 5897 ulpmc->len = htobe32(txpkts0_len16(mbuf_nsegs(m))); 5898 5899 /* ULP subcommand */ 5900 ulpsc = (void *)(ulpmc + 1); 5901 ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) | 5902 F_ULP_TX_SC_MORE); 5903 ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core)); 5904 5905 cpl = (void *)(ulpsc + 1); 5906 if (checkwrap && 5907 (uintptr_t)cpl == (uintptr_t)&eq->desc[eq->sidx]) 5908 cpl = (void *)&eq->desc[0]; 5909 } else { 5910 cpl = flitp; 5911 } 5912 5913 /* Checksum offload */ 5914 ctrl1 = csum_to_ctrl(sc, m); 5915 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) { 5916 /* some hardware assistance provided */ 5917 if (needs_vxlan_csum(m)) 5918 txq->vxlan_txcsum++; 5919 else 5920 txq->txcsum++; 5921 } 5922 5923 /* VLAN tag insertion */ 5924 if (needs_vlan_insertion(m)) { 5925 ctrl1 |= F_TXPKT_VLAN_VLD | 5926 V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 5927 txq->vlan_insertion++; 5928 } 5929 5930 /* CPL header */ 5931 cpl->ctrl0 = txq->cpl_ctrl0; 5932 cpl->pack = 0; 5933 cpl->len = htobe16(m->m_pkthdr.len); 5934 cpl->ctrl1 = htobe64(ctrl1); 5935 5936 flitp = cpl + 1; 5937 if (checkwrap && 5938 (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx]) 5939 flitp = (void *)&eq->desc[0]; 5940 5941 write_gl_to_txd(txq, m, (caddr_t *)(&flitp), checkwrap); 5942 5943 if (last != NULL) 5944 last->m_nextpkt = m; 5945 last = m; 5946 } 5947 5948 txq->sgl_wrs++; 5949 if (txp->wr_type == 0) { 5950 txq->txpkts0_pkts += txp->npkt; 5951 txq->txpkts0_wrs++; 5952 } else { 5953 txq->txpkts1_pkts += txp->npkt; 5954 txq->txpkts1_wrs++; 5955 } 5956 5957 txsd = &txq->sdesc[eq->pidx]; 5958 txsd->m = txp->mb[0]; 5959 txsd->desc_used = ndesc; 5960 5961 return (ndesc); 5962 } 5963 5964 static u_int 5965 write_txpkts_vm_wr(struct adapter *sc, struct sge_txq *txq) 5966 { 5967 const struct txpkts *txp = &txq->txp; 5968 struct sge_eq *eq = &txq->eq; 5969 struct fw_eth_tx_pkts_vm_wr *wr; 5970 struct tx_sdesc *txsd; 5971 struct cpl_tx_pkt_core *cpl; 5972 uint64_t ctrl1; 5973 int ndesc, i; 5974 struct mbuf *m, *last; 5975 void *flitp; 5976 5977 TXQ_LOCK_ASSERT_OWNED(txq); 5978 MPASS(txp->npkt > 0); 5979 MPASS(txp->wr_type == 1); /* VF supports type 1 only */ 5980 MPASS(txp->mb[0] != NULL); 5981 MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16)); 5982 5983 wr = (void *)&eq->desc[eq->pidx]; 5984 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_VM_WR)); 5985 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(txp->len16)); 5986 wr->r3 = 0; 5987 wr->plen = htobe16(txp->plen); 5988 wr->npkt = txp->npkt; 5989 wr->r4 = 0; 5990 memcpy(&wr->ethmacdst[0], &txp->ethmacdst[0], 16); 5991 flitp = wr + 1; 5992 5993 /* 5994 * At this point we are 32B into a hardware descriptor. Each mbuf in 5995 * the WR will take 32B so we check for the end of the descriptor ring 5996 * before writing odd mbufs (mb[1], 3, 5, ..) 5997 */ 5998 ndesc = tx_len16_to_desc(txp->len16); 5999 last = NULL; 6000 for (i = 0; i < txp->npkt; i++) { 6001 m = txp->mb[i]; 6002 if (i & 1 && (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx]) 6003 flitp = &eq->desc[0]; 6004 cpl = flitp; 6005 6006 /* Checksum offload */ 6007 ctrl1 = csum_to_ctrl(sc, m); 6008 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) 6009 txq->txcsum++; /* some hardware assistance provided */ 6010 6011 /* VLAN tag insertion */ 6012 if (needs_vlan_insertion(m)) { 6013 ctrl1 |= F_TXPKT_VLAN_VLD | 6014 V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 6015 txq->vlan_insertion++; 6016 } 6017 6018 /* CPL header */ 6019 cpl->ctrl0 = txq->cpl_ctrl0; 6020 cpl->pack = 0; 6021 cpl->len = htobe16(m->m_pkthdr.len); 6022 cpl->ctrl1 = htobe64(ctrl1); 6023 6024 flitp = cpl + 1; 6025 MPASS(mbuf_nsegs(m) == 1); 6026 write_gl_to_txd(txq, m, (caddr_t *)(&flitp), 0); 6027 6028 if (last != NULL) 6029 last->m_nextpkt = m; 6030 last = m; 6031 } 6032 6033 txq->sgl_wrs++; 6034 txq->txpkts1_pkts += txp->npkt; 6035 txq->txpkts1_wrs++; 6036 6037 txsd = &txq->sdesc[eq->pidx]; 6038 txsd->m = txp->mb[0]; 6039 txsd->desc_used = ndesc; 6040 6041 return (ndesc); 6042 } 6043 6044 /* 6045 * If the SGL ends on an address that is not 16 byte aligned, this function will 6046 * add a 0 filled flit at the end. 6047 */ 6048 static void 6049 write_gl_to_txd(struct sge_txq *txq, struct mbuf *m, caddr_t *to, int checkwrap) 6050 { 6051 struct sge_eq *eq = &txq->eq; 6052 struct sglist *gl = txq->gl; 6053 struct sglist_seg *seg; 6054 __be64 *flitp, *wrap; 6055 struct ulptx_sgl *usgl; 6056 int i, nflits, nsegs; 6057 6058 KASSERT(((uintptr_t)(*to) & 0xf) == 0, 6059 ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to)); 6060 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); 6061 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); 6062 6063 get_pkt_gl(m, gl); 6064 nsegs = gl->sg_nseg; 6065 MPASS(nsegs > 0); 6066 6067 nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2; 6068 flitp = (__be64 *)(*to); 6069 wrap = (__be64 *)(&eq->desc[eq->sidx]); 6070 seg = &gl->sg_segs[0]; 6071 usgl = (void *)flitp; 6072 6073 /* 6074 * We start at a 16 byte boundary somewhere inside the tx descriptor 6075 * ring, so we're at least 16 bytes away from the status page. There is 6076 * no chance of a wrap around in the middle of usgl (which is 16 bytes). 6077 */ 6078 6079 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 6080 V_ULPTX_NSGE(nsegs)); 6081 usgl->len0 = htobe32(seg->ss_len); 6082 usgl->addr0 = htobe64(seg->ss_paddr); 6083 seg++; 6084 6085 if (checkwrap == 0 || (uintptr_t)(flitp + nflits) <= (uintptr_t)wrap) { 6086 6087 /* Won't wrap around at all */ 6088 6089 for (i = 0; i < nsegs - 1; i++, seg++) { 6090 usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len); 6091 usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr); 6092 } 6093 if (i & 1) 6094 usgl->sge[i / 2].len[1] = htobe32(0); 6095 flitp += nflits; 6096 } else { 6097 6098 /* Will wrap somewhere in the rest of the SGL */ 6099 6100 /* 2 flits already written, write the rest flit by flit */ 6101 flitp = (void *)(usgl + 1); 6102 for (i = 0; i < nflits - 2; i++) { 6103 if (flitp == wrap) 6104 flitp = (void *)eq->desc; 6105 *flitp++ = get_flit(seg, nsegs - 1, i); 6106 } 6107 } 6108 6109 if (nflits & 1) { 6110 MPASS(((uintptr_t)flitp) & 0xf); 6111 *flitp++ = 0; 6112 } 6113 6114 MPASS((((uintptr_t)flitp) & 0xf) == 0); 6115 if (__predict_false(flitp == wrap)) 6116 *to = (void *)eq->desc; 6117 else 6118 *to = (void *)flitp; 6119 } 6120 6121 static inline void 6122 copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) 6123 { 6124 6125 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); 6126 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); 6127 6128 if (__predict_true((uintptr_t)(*to) + len <= 6129 (uintptr_t)&eq->desc[eq->sidx])) { 6130 bcopy(from, *to, len); 6131 (*to) += len; 6132 } else { 6133 int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to); 6134 6135 bcopy(from, *to, portion); 6136 from += portion; 6137 portion = len - portion; /* remaining */ 6138 bcopy(from, (void *)eq->desc, portion); 6139 (*to) = (caddr_t)eq->desc + portion; 6140 } 6141 } 6142 6143 static inline void 6144 ring_eq_db(struct adapter *sc, struct sge_eq *eq, u_int n) 6145 { 6146 u_int db; 6147 6148 MPASS(n > 0); 6149 6150 db = eq->doorbells; 6151 if (n > 1) 6152 clrbit(&db, DOORBELL_WCWR); 6153 wmb(); 6154 6155 switch (ffs(db) - 1) { 6156 case DOORBELL_UDB: 6157 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); 6158 break; 6159 6160 case DOORBELL_WCWR: { 6161 volatile uint64_t *dst, *src; 6162 int i; 6163 6164 /* 6165 * Queues whose 128B doorbell segment fits in the page do not 6166 * use relative qid (udb_qid is always 0). Only queues with 6167 * doorbell segments can do WCWR. 6168 */ 6169 KASSERT(eq->udb_qid == 0 && n == 1, 6170 ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p", 6171 __func__, eq->doorbells, n, eq->dbidx, eq)); 6172 6173 dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET - 6174 UDBS_DB_OFFSET); 6175 i = eq->dbidx; 6176 src = (void *)&eq->desc[i]; 6177 while (src != (void *)&eq->desc[i + 1]) 6178 *dst++ = *src++; 6179 wmb(); 6180 break; 6181 } 6182 6183 case DOORBELL_UDBWC: 6184 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); 6185 wmb(); 6186 break; 6187 6188 case DOORBELL_KDB: 6189 t4_write_reg(sc, sc->sge_kdoorbell_reg, 6190 V_QID(eq->cntxt_id) | V_PIDX(n)); 6191 break; 6192 } 6193 6194 IDXINCR(eq->dbidx, n, eq->sidx); 6195 } 6196 6197 static inline u_int 6198 reclaimable_tx_desc(struct sge_eq *eq) 6199 { 6200 uint16_t hw_cidx; 6201 6202 hw_cidx = read_hw_cidx(eq); 6203 return (IDXDIFF(hw_cidx, eq->cidx, eq->sidx)); 6204 } 6205 6206 static inline u_int 6207 total_available_tx_desc(struct sge_eq *eq) 6208 { 6209 uint16_t hw_cidx, pidx; 6210 6211 hw_cidx = read_hw_cidx(eq); 6212 pidx = eq->pidx; 6213 6214 if (pidx == hw_cidx) 6215 return (eq->sidx - 1); 6216 else 6217 return (IDXDIFF(hw_cidx, pidx, eq->sidx) - 1); 6218 } 6219 6220 static inline uint16_t 6221 read_hw_cidx(struct sge_eq *eq) 6222 { 6223 struct sge_qstat *spg = (void *)&eq->desc[eq->sidx]; 6224 uint16_t cidx = spg->cidx; /* stable snapshot */ 6225 6226 return (be16toh(cidx)); 6227 } 6228 6229 /* 6230 * Reclaim 'n' descriptors approximately. 6231 */ 6232 static u_int 6233 reclaim_tx_descs(struct sge_txq *txq, u_int n) 6234 { 6235 struct tx_sdesc *txsd; 6236 struct sge_eq *eq = &txq->eq; 6237 u_int can_reclaim, reclaimed; 6238 6239 TXQ_LOCK_ASSERT_OWNED(txq); 6240 MPASS(n > 0); 6241 6242 reclaimed = 0; 6243 can_reclaim = reclaimable_tx_desc(eq); 6244 while (can_reclaim && reclaimed < n) { 6245 int ndesc; 6246 struct mbuf *m, *nextpkt; 6247 6248 txsd = &txq->sdesc[eq->cidx]; 6249 ndesc = txsd->desc_used; 6250 6251 /* Firmware doesn't return "partial" credits. */ 6252 KASSERT(can_reclaim >= ndesc, 6253 ("%s: unexpected number of credits: %d, %d", 6254 __func__, can_reclaim, ndesc)); 6255 KASSERT(ndesc != 0, 6256 ("%s: descriptor with no credits: cidx %d", 6257 __func__, eq->cidx)); 6258 6259 for (m = txsd->m; m != NULL; m = nextpkt) { 6260 nextpkt = m->m_nextpkt; 6261 m->m_nextpkt = NULL; 6262 m_freem(m); 6263 } 6264 reclaimed += ndesc; 6265 can_reclaim -= ndesc; 6266 IDXINCR(eq->cidx, ndesc, eq->sidx); 6267 } 6268 6269 return (reclaimed); 6270 } 6271 6272 static void 6273 tx_reclaim(void *arg, int n) 6274 { 6275 struct sge_txq *txq = arg; 6276 struct sge_eq *eq = &txq->eq; 6277 6278 do { 6279 if (TXQ_TRYLOCK(txq) == 0) 6280 break; 6281 n = reclaim_tx_descs(txq, 32); 6282 if (eq->cidx == eq->pidx) 6283 eq->equeqidx = eq->pidx; 6284 TXQ_UNLOCK(txq); 6285 } while (n > 0); 6286 } 6287 6288 static __be64 6289 get_flit(struct sglist_seg *segs, int nsegs, int idx) 6290 { 6291 int i = (idx / 3) * 2; 6292 6293 switch (idx % 3) { 6294 case 0: { 6295 uint64_t rc; 6296 6297 rc = (uint64_t)segs[i].ss_len << 32; 6298 if (i + 1 < nsegs) 6299 rc |= (uint64_t)(segs[i + 1].ss_len); 6300 6301 return (htobe64(rc)); 6302 } 6303 case 1: 6304 return (htobe64(segs[i].ss_paddr)); 6305 case 2: 6306 return (htobe64(segs[i + 1].ss_paddr)); 6307 } 6308 6309 return (0); 6310 } 6311 6312 static int 6313 find_refill_source(struct adapter *sc, int maxp, bool packing) 6314 { 6315 int i, zidx = -1; 6316 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[0]; 6317 6318 if (packing) { 6319 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 6320 if (rxb->hwidx2 == -1) 6321 continue; 6322 if (rxb->size1 < PAGE_SIZE && 6323 rxb->size1 < largest_rx_cluster) 6324 continue; 6325 if (rxb->size1 > largest_rx_cluster) 6326 break; 6327 MPASS(rxb->size1 - rxb->size2 >= CL_METADATA_SIZE); 6328 if (rxb->size2 >= maxp) 6329 return (i); 6330 zidx = i; 6331 } 6332 } else { 6333 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 6334 if (rxb->hwidx1 == -1) 6335 continue; 6336 if (rxb->size1 > largest_rx_cluster) 6337 break; 6338 if (rxb->size1 >= maxp) 6339 return (i); 6340 zidx = i; 6341 } 6342 } 6343 6344 return (zidx); 6345 } 6346 6347 static void 6348 add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl) 6349 { 6350 mtx_lock(&sc->sfl_lock); 6351 FL_LOCK(fl); 6352 if ((fl->flags & FL_DOOMED) == 0) { 6353 fl->flags |= FL_STARVING; 6354 TAILQ_INSERT_TAIL(&sc->sfl, fl, link); 6355 callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc); 6356 } 6357 FL_UNLOCK(fl); 6358 mtx_unlock(&sc->sfl_lock); 6359 } 6360 6361 static void 6362 handle_wrq_egr_update(struct adapter *sc, struct sge_eq *eq) 6363 { 6364 struct sge_wrq *wrq = (void *)eq; 6365 6366 atomic_readandclear_int(&eq->equiq); 6367 taskqueue_enqueue(sc->tq[eq->tx_chan], &wrq->wrq_tx_task); 6368 } 6369 6370 static void 6371 handle_eth_egr_update(struct adapter *sc, struct sge_eq *eq) 6372 { 6373 struct sge_txq *txq = (void *)eq; 6374 6375 MPASS(eq->type == EQ_ETH); 6376 6377 atomic_readandclear_int(&eq->equiq); 6378 if (mp_ring_is_idle(txq->r)) 6379 taskqueue_enqueue(sc->tq[eq->tx_chan], &txq->tx_reclaim_task); 6380 else 6381 mp_ring_check_drainage(txq->r, 64); 6382 } 6383 6384 static int 6385 handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss, 6386 struct mbuf *m) 6387 { 6388 const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1); 6389 unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); 6390 struct adapter *sc = iq->adapter; 6391 struct sge *s = &sc->sge; 6392 struct sge_eq *eq; 6393 static void (*h[])(struct adapter *, struct sge_eq *) = {NULL, 6394 &handle_wrq_egr_update, &handle_eth_egr_update, 6395 &handle_wrq_egr_update}; 6396 6397 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 6398 rss->opcode)); 6399 6400 eq = s->eqmap[qid - s->eq_start - s->eq_base]; 6401 (*h[eq->type])(sc, eq); 6402 6403 return (0); 6404 } 6405 6406 /* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */ 6407 CTASSERT(offsetof(struct cpl_fw4_msg, data) == \ 6408 offsetof(struct cpl_fw6_msg, data)); 6409 6410 static int 6411 handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 6412 { 6413 struct adapter *sc = iq->adapter; 6414 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); 6415 6416 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 6417 rss->opcode)); 6418 6419 if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) { 6420 const struct rss_header *rss2; 6421 6422 rss2 = (const struct rss_header *)&cpl->data[0]; 6423 return (t4_cpl_handler[rss2->opcode](iq, rss2, m)); 6424 } 6425 6426 return (t4_fw_msg_handler[cpl->type](sc, &cpl->data[0])); 6427 } 6428 6429 /** 6430 * t4_handle_wrerr_rpl - process a FW work request error message 6431 * @adap: the adapter 6432 * @rpl: start of the FW message 6433 */ 6434 static int 6435 t4_handle_wrerr_rpl(struct adapter *adap, const __be64 *rpl) 6436 { 6437 u8 opcode = *(const u8 *)rpl; 6438 const struct fw_error_cmd *e = (const void *)rpl; 6439 unsigned int i; 6440 6441 if (opcode != FW_ERROR_CMD) { 6442 log(LOG_ERR, 6443 "%s: Received WRERR_RPL message with opcode %#x\n", 6444 device_get_nameunit(adap->dev), opcode); 6445 return (EINVAL); 6446 } 6447 log(LOG_ERR, "%s: FW_ERROR (%s) ", device_get_nameunit(adap->dev), 6448 G_FW_ERROR_CMD_FATAL(be32toh(e->op_to_type)) ? "fatal" : 6449 "non-fatal"); 6450 switch (G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))) { 6451 case FW_ERROR_TYPE_EXCEPTION: 6452 log(LOG_ERR, "exception info:\n"); 6453 for (i = 0; i < nitems(e->u.exception.info); i++) 6454 log(LOG_ERR, "%s%08x", i == 0 ? "\t" : " ", 6455 be32toh(e->u.exception.info[i])); 6456 log(LOG_ERR, "\n"); 6457 break; 6458 case FW_ERROR_TYPE_HWMODULE: 6459 log(LOG_ERR, "HW module regaddr %08x regval %08x\n", 6460 be32toh(e->u.hwmodule.regaddr), 6461 be32toh(e->u.hwmodule.regval)); 6462 break; 6463 case FW_ERROR_TYPE_WR: 6464 log(LOG_ERR, "WR cidx %d PF %d VF %d eqid %d hdr:\n", 6465 be16toh(e->u.wr.cidx), 6466 G_FW_ERROR_CMD_PFN(be16toh(e->u.wr.pfn_vfn)), 6467 G_FW_ERROR_CMD_VFN(be16toh(e->u.wr.pfn_vfn)), 6468 be32toh(e->u.wr.eqid)); 6469 for (i = 0; i < nitems(e->u.wr.wrhdr); i++) 6470 log(LOG_ERR, "%s%02x", i == 0 ? "\t" : " ", 6471 e->u.wr.wrhdr[i]); 6472 log(LOG_ERR, "\n"); 6473 break; 6474 case FW_ERROR_TYPE_ACL: 6475 log(LOG_ERR, "ACL cidx %d PF %d VF %d eqid %d %s", 6476 be16toh(e->u.acl.cidx), 6477 G_FW_ERROR_CMD_PFN(be16toh(e->u.acl.pfn_vfn)), 6478 G_FW_ERROR_CMD_VFN(be16toh(e->u.acl.pfn_vfn)), 6479 be32toh(e->u.acl.eqid), 6480 G_FW_ERROR_CMD_MV(be16toh(e->u.acl.mv_pkd)) ? "vlanid" : 6481 "MAC"); 6482 for (i = 0; i < nitems(e->u.acl.val); i++) 6483 log(LOG_ERR, " %02x", e->u.acl.val[i]); 6484 log(LOG_ERR, "\n"); 6485 break; 6486 default: 6487 log(LOG_ERR, "type %#x\n", 6488 G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))); 6489 return (EINVAL); 6490 } 6491 return (0); 6492 } 6493 6494 static inline bool 6495 bufidx_used(struct adapter *sc, int idx) 6496 { 6497 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[0]; 6498 int i; 6499 6500 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 6501 if (rxb->size1 > largest_rx_cluster) 6502 continue; 6503 if (rxb->hwidx1 == idx || rxb->hwidx2 == idx) 6504 return (true); 6505 } 6506 6507 return (false); 6508 } 6509 6510 static int 6511 sysctl_bufsizes(SYSCTL_HANDLER_ARGS) 6512 { 6513 struct adapter *sc = arg1; 6514 struct sge_params *sp = &sc->params.sge; 6515 int i, rc; 6516 struct sbuf sb; 6517 char c; 6518 6519 sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND); 6520 for (i = 0; i < SGE_FLBUF_SIZES; i++) { 6521 if (bufidx_used(sc, i)) 6522 c = '*'; 6523 else 6524 c = '\0'; 6525 6526 sbuf_printf(&sb, "%u%c ", sp->sge_fl_buffer_size[i], c); 6527 } 6528 sbuf_trim(&sb); 6529 sbuf_finish(&sb); 6530 rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 6531 sbuf_delete(&sb); 6532 return (rc); 6533 } 6534 6535 #ifdef RATELIMIT 6536 #if defined(INET) || defined(INET6) 6537 /* 6538 * len16 for a txpkt WR with a GL. Includes the firmware work request header. 6539 */ 6540 static inline u_int 6541 txpkt_eo_len16(u_int nsegs, u_int immhdrs, u_int tso) 6542 { 6543 u_int n; 6544 6545 MPASS(immhdrs > 0); 6546 6547 n = roundup2(sizeof(struct fw_eth_tx_eo_wr) + 6548 sizeof(struct cpl_tx_pkt_core) + immhdrs, 16); 6549 if (__predict_false(nsegs == 0)) 6550 goto done; 6551 6552 nsegs--; /* first segment is part of ulptx_sgl */ 6553 n += sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 6554 if (tso) 6555 n += sizeof(struct cpl_tx_pkt_lso_core); 6556 6557 done: 6558 return (howmany(n, 16)); 6559 } 6560 #endif 6561 6562 #define ETID_FLOWC_NPARAMS 6 6563 #define ETID_FLOWC_LEN (roundup2((sizeof(struct fw_flowc_wr) + \ 6564 ETID_FLOWC_NPARAMS * sizeof(struct fw_flowc_mnemval)), 16)) 6565 #define ETID_FLOWC_LEN16 (howmany(ETID_FLOWC_LEN, 16)) 6566 6567 static int 6568 send_etid_flowc_wr(struct cxgbe_rate_tag *cst, struct port_info *pi, 6569 struct vi_info *vi) 6570 { 6571 struct wrq_cookie cookie; 6572 u_int pfvf = pi->adapter->pf << S_FW_VIID_PFN; 6573 struct fw_flowc_wr *flowc; 6574 6575 mtx_assert(&cst->lock, MA_OWNED); 6576 MPASS((cst->flags & (EO_FLOWC_PENDING | EO_FLOWC_RPL_PENDING)) == 6577 EO_FLOWC_PENDING); 6578 6579 flowc = start_wrq_wr(&cst->eo_txq->wrq, ETID_FLOWC_LEN16, &cookie); 6580 if (__predict_false(flowc == NULL)) 6581 return (ENOMEM); 6582 6583 bzero(flowc, ETID_FLOWC_LEN); 6584 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 6585 V_FW_FLOWC_WR_NPARAMS(ETID_FLOWC_NPARAMS) | V_FW_WR_COMPL(0)); 6586 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(ETID_FLOWC_LEN16) | 6587 V_FW_WR_FLOWID(cst->etid)); 6588 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; 6589 flowc->mnemval[0].val = htobe32(pfvf); 6590 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; 6591 flowc->mnemval[1].val = htobe32(pi->tx_chan); 6592 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; 6593 flowc->mnemval[2].val = htobe32(pi->tx_chan); 6594 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; 6595 flowc->mnemval[3].val = htobe32(cst->iqid); 6596 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_EOSTATE; 6597 flowc->mnemval[4].val = htobe32(FW_FLOWC_MNEM_EOSTATE_ESTABLISHED); 6598 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; 6599 flowc->mnemval[5].val = htobe32(cst->schedcl); 6600 6601 commit_wrq_wr(&cst->eo_txq->wrq, flowc, &cookie); 6602 6603 cst->flags &= ~EO_FLOWC_PENDING; 6604 cst->flags |= EO_FLOWC_RPL_PENDING; 6605 MPASS(cst->tx_credits >= ETID_FLOWC_LEN16); /* flowc is first WR. */ 6606 cst->tx_credits -= ETID_FLOWC_LEN16; 6607 6608 return (0); 6609 } 6610 6611 #define ETID_FLUSH_LEN16 (howmany(sizeof (struct fw_flowc_wr), 16)) 6612 6613 void 6614 send_etid_flush_wr(struct cxgbe_rate_tag *cst) 6615 { 6616 struct fw_flowc_wr *flowc; 6617 struct wrq_cookie cookie; 6618 6619 mtx_assert(&cst->lock, MA_OWNED); 6620 6621 flowc = start_wrq_wr(&cst->eo_txq->wrq, ETID_FLUSH_LEN16, &cookie); 6622 if (__predict_false(flowc == NULL)) 6623 CXGBE_UNIMPLEMENTED(__func__); 6624 6625 bzero(flowc, ETID_FLUSH_LEN16 * 16); 6626 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 6627 V_FW_FLOWC_WR_NPARAMS(0) | F_FW_WR_COMPL); 6628 flowc->flowid_len16 = htobe32(V_FW_WR_LEN16(ETID_FLUSH_LEN16) | 6629 V_FW_WR_FLOWID(cst->etid)); 6630 6631 commit_wrq_wr(&cst->eo_txq->wrq, flowc, &cookie); 6632 6633 cst->flags |= EO_FLUSH_RPL_PENDING; 6634 MPASS(cst->tx_credits >= ETID_FLUSH_LEN16); 6635 cst->tx_credits -= ETID_FLUSH_LEN16; 6636 cst->ncompl++; 6637 } 6638 6639 static void 6640 write_ethofld_wr(struct cxgbe_rate_tag *cst, struct fw_eth_tx_eo_wr *wr, 6641 struct mbuf *m0, int compl) 6642 { 6643 struct cpl_tx_pkt_core *cpl; 6644 uint64_t ctrl1; 6645 uint32_t ctrl; /* used in many unrelated places */ 6646 int len16, pktlen, nsegs, immhdrs; 6647 uintptr_t p; 6648 struct ulptx_sgl *usgl; 6649 struct sglist sg; 6650 struct sglist_seg segs[38]; /* XXX: find real limit. XXX: get off the stack */ 6651 6652 mtx_assert(&cst->lock, MA_OWNED); 6653 M_ASSERTPKTHDR(m0); 6654 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 6655 m0->m_pkthdr.l4hlen > 0, 6656 ("%s: ethofld mbuf %p is missing header lengths", __func__, m0)); 6657 6658 len16 = mbuf_eo_len16(m0); 6659 nsegs = mbuf_eo_nsegs(m0); 6660 pktlen = m0->m_pkthdr.len; 6661 ctrl = sizeof(struct cpl_tx_pkt_core); 6662 if (needs_tso(m0)) 6663 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 6664 immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen; 6665 ctrl += immhdrs; 6666 6667 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_EO_WR) | 6668 V_FW_ETH_TX_EO_WR_IMMDLEN(ctrl) | V_FW_WR_COMPL(!!compl)); 6669 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(len16) | 6670 V_FW_WR_FLOWID(cst->etid)); 6671 wr->r3 = 0; 6672 if (needs_outer_udp_csum(m0)) { 6673 wr->u.udpseg.type = FW_ETH_TX_EO_TYPE_UDPSEG; 6674 wr->u.udpseg.ethlen = m0->m_pkthdr.l2hlen; 6675 wr->u.udpseg.iplen = htobe16(m0->m_pkthdr.l3hlen); 6676 wr->u.udpseg.udplen = m0->m_pkthdr.l4hlen; 6677 wr->u.udpseg.rtplen = 0; 6678 wr->u.udpseg.r4 = 0; 6679 wr->u.udpseg.mss = htobe16(pktlen - immhdrs); 6680 wr->u.udpseg.schedpktsize = wr->u.udpseg.mss; 6681 wr->u.udpseg.plen = htobe32(pktlen - immhdrs); 6682 cpl = (void *)(wr + 1); 6683 } else { 6684 MPASS(needs_outer_tcp_csum(m0)); 6685 wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG; 6686 wr->u.tcpseg.ethlen = m0->m_pkthdr.l2hlen; 6687 wr->u.tcpseg.iplen = htobe16(m0->m_pkthdr.l3hlen); 6688 wr->u.tcpseg.tcplen = m0->m_pkthdr.l4hlen; 6689 wr->u.tcpseg.tsclk_tsoff = mbuf_eo_tsclk_tsoff(m0); 6690 wr->u.tcpseg.r4 = 0; 6691 wr->u.tcpseg.r5 = 0; 6692 wr->u.tcpseg.plen = htobe32(pktlen - immhdrs); 6693 6694 if (needs_tso(m0)) { 6695 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); 6696 6697 wr->u.tcpseg.mss = htobe16(m0->m_pkthdr.tso_segsz); 6698 6699 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | 6700 F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE | 6701 V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen - 6702 ETHER_HDR_LEN) >> 2) | 6703 V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) | 6704 V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); 6705 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 6706 ctrl |= F_LSO_IPV6; 6707 lso->lso_ctrl = htobe32(ctrl); 6708 lso->ipid_ofst = htobe16(0); 6709 lso->mss = htobe16(m0->m_pkthdr.tso_segsz); 6710 lso->seqno_offset = htobe32(0); 6711 lso->len = htobe32(pktlen); 6712 6713 cpl = (void *)(lso + 1); 6714 } else { 6715 wr->u.tcpseg.mss = htobe16(0xffff); 6716 cpl = (void *)(wr + 1); 6717 } 6718 } 6719 6720 /* Checksum offload must be requested for ethofld. */ 6721 MPASS(needs_outer_l4_csum(m0)); 6722 ctrl1 = csum_to_ctrl(cst->adapter, m0); 6723 6724 /* VLAN tag insertion */ 6725 if (needs_vlan_insertion(m0)) { 6726 ctrl1 |= F_TXPKT_VLAN_VLD | 6727 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 6728 } 6729 6730 /* CPL header */ 6731 cpl->ctrl0 = cst->ctrl0; 6732 cpl->pack = 0; 6733 cpl->len = htobe16(pktlen); 6734 cpl->ctrl1 = htobe64(ctrl1); 6735 6736 /* Copy Ethernet, IP & TCP/UDP hdrs as immediate data */ 6737 p = (uintptr_t)(cpl + 1); 6738 m_copydata(m0, 0, immhdrs, (void *)p); 6739 6740 /* SGL */ 6741 if (nsegs > 0) { 6742 int i, pad; 6743 6744 /* zero-pad upto next 16Byte boundary, if not 16Byte aligned */ 6745 p += immhdrs; 6746 pad = 16 - (immhdrs & 0xf); 6747 bzero((void *)p, pad); 6748 6749 usgl = (void *)(p + pad); 6750 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 6751 V_ULPTX_NSGE(nsegs)); 6752 6753 sglist_init(&sg, nitems(segs), segs); 6754 for (; m0 != NULL; m0 = m0->m_next) { 6755 if (__predict_false(m0->m_len == 0)) 6756 continue; 6757 if (immhdrs >= m0->m_len) { 6758 immhdrs -= m0->m_len; 6759 continue; 6760 } 6761 if (m0->m_flags & M_EXTPG) 6762 sglist_append_mbuf_epg(&sg, m0, 6763 mtod(m0, vm_offset_t), m0->m_len); 6764 else 6765 sglist_append(&sg, mtod(m0, char *) + immhdrs, 6766 m0->m_len - immhdrs); 6767 immhdrs = 0; 6768 } 6769 MPASS(sg.sg_nseg == nsegs); 6770 6771 /* 6772 * Zero pad last 8B in case the WR doesn't end on a 16B 6773 * boundary. 6774 */ 6775 *(uint64_t *)((char *)wr + len16 * 16 - 8) = 0; 6776 6777 usgl->len0 = htobe32(segs[0].ss_len); 6778 usgl->addr0 = htobe64(segs[0].ss_paddr); 6779 for (i = 0; i < nsegs - 1; i++) { 6780 usgl->sge[i / 2].len[i & 1] = htobe32(segs[i + 1].ss_len); 6781 usgl->sge[i / 2].addr[i & 1] = htobe64(segs[i + 1].ss_paddr); 6782 } 6783 if (i & 1) 6784 usgl->sge[i / 2].len[1] = htobe32(0); 6785 } 6786 6787 } 6788 6789 static void 6790 ethofld_tx(struct cxgbe_rate_tag *cst) 6791 { 6792 struct mbuf *m; 6793 struct wrq_cookie cookie; 6794 int next_credits, compl; 6795 struct fw_eth_tx_eo_wr *wr; 6796 6797 mtx_assert(&cst->lock, MA_OWNED); 6798 6799 while ((m = mbufq_first(&cst->pending_tx)) != NULL) { 6800 M_ASSERTPKTHDR(m); 6801 6802 /* How many len16 credits do we need to send this mbuf. */ 6803 next_credits = mbuf_eo_len16(m); 6804 MPASS(next_credits > 0); 6805 if (next_credits > cst->tx_credits) { 6806 /* 6807 * Tx will make progress eventually because there is at 6808 * least one outstanding fw4_ack that will return 6809 * credits and kick the tx. 6810 */ 6811 MPASS(cst->ncompl > 0); 6812 return; 6813 } 6814 wr = start_wrq_wr(&cst->eo_txq->wrq, next_credits, &cookie); 6815 if (__predict_false(wr == NULL)) { 6816 /* XXX: wishful thinking, not a real assertion. */ 6817 MPASS(cst->ncompl > 0); 6818 return; 6819 } 6820 cst->tx_credits -= next_credits; 6821 cst->tx_nocompl += next_credits; 6822 compl = cst->ncompl == 0 || cst->tx_nocompl >= cst->tx_total / 2; 6823 ETHER_BPF_MTAP(cst->com.ifp, m); 6824 write_ethofld_wr(cst, wr, m, compl); 6825 commit_wrq_wr(&cst->eo_txq->wrq, wr, &cookie); 6826 if (compl) { 6827 cst->ncompl++; 6828 cst->tx_nocompl = 0; 6829 } 6830 (void) mbufq_dequeue(&cst->pending_tx); 6831 6832 /* 6833 * Drop the mbuf's reference on the tag now rather 6834 * than waiting until m_freem(). This ensures that 6835 * cxgbe_rate_tag_free gets called when the inp drops 6836 * its reference on the tag and there are no more 6837 * mbufs in the pending_tx queue and can flush any 6838 * pending requests. Otherwise if the last mbuf 6839 * doesn't request a completion the etid will never be 6840 * released. 6841 */ 6842 m->m_pkthdr.snd_tag = NULL; 6843 m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; 6844 m_snd_tag_rele(&cst->com); 6845 6846 mbufq_enqueue(&cst->pending_fwack, m); 6847 } 6848 } 6849 6850 int 6851 ethofld_transmit(struct ifnet *ifp, struct mbuf *m0) 6852 { 6853 struct cxgbe_rate_tag *cst; 6854 int rc; 6855 6856 MPASS(m0->m_nextpkt == NULL); 6857 MPASS(m0->m_pkthdr.csum_flags & CSUM_SND_TAG); 6858 MPASS(m0->m_pkthdr.snd_tag != NULL); 6859 cst = mst_to_crt(m0->m_pkthdr.snd_tag); 6860 6861 mtx_lock(&cst->lock); 6862 MPASS(cst->flags & EO_SND_TAG_REF); 6863 6864 if (__predict_false(cst->flags & EO_FLOWC_PENDING)) { 6865 struct vi_info *vi = ifp->if_softc; 6866 struct port_info *pi = vi->pi; 6867 struct adapter *sc = pi->adapter; 6868 const uint32_t rss_mask = vi->rss_size - 1; 6869 uint32_t rss_hash; 6870 6871 cst->eo_txq = &sc->sge.ofld_txq[vi->first_ofld_txq]; 6872 if (M_HASHTYPE_ISHASH(m0)) 6873 rss_hash = m0->m_pkthdr.flowid; 6874 else 6875 rss_hash = arc4random(); 6876 /* We assume RSS hashing */ 6877 cst->iqid = vi->rss[rss_hash & rss_mask]; 6878 cst->eo_txq += rss_hash % vi->nofldtxq; 6879 rc = send_etid_flowc_wr(cst, pi, vi); 6880 if (rc != 0) 6881 goto done; 6882 } 6883 6884 if (__predict_false(cst->plen + m0->m_pkthdr.len > eo_max_backlog)) { 6885 rc = ENOBUFS; 6886 goto done; 6887 } 6888 6889 mbufq_enqueue(&cst->pending_tx, m0); 6890 cst->plen += m0->m_pkthdr.len; 6891 6892 /* 6893 * Hold an extra reference on the tag while generating work 6894 * requests to ensure that we don't try to free the tag during 6895 * ethofld_tx() in case we are sending the final mbuf after 6896 * the inp was freed. 6897 */ 6898 m_snd_tag_ref(&cst->com); 6899 ethofld_tx(cst); 6900 mtx_unlock(&cst->lock); 6901 m_snd_tag_rele(&cst->com); 6902 return (0); 6903 6904 done: 6905 mtx_unlock(&cst->lock); 6906 if (__predict_false(rc != 0)) 6907 m_freem(m0); 6908 return (rc); 6909 } 6910 6911 static int 6912 ethofld_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) 6913 { 6914 struct adapter *sc = iq->adapter; 6915 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 6916 struct mbuf *m; 6917 u_int etid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 6918 struct cxgbe_rate_tag *cst; 6919 uint8_t credits = cpl->credits; 6920 6921 cst = lookup_etid(sc, etid); 6922 mtx_lock(&cst->lock); 6923 if (__predict_false(cst->flags & EO_FLOWC_RPL_PENDING)) { 6924 MPASS(credits >= ETID_FLOWC_LEN16); 6925 credits -= ETID_FLOWC_LEN16; 6926 cst->flags &= ~EO_FLOWC_RPL_PENDING; 6927 } 6928 6929 KASSERT(cst->ncompl > 0, 6930 ("%s: etid %u (%p) wasn't expecting completion.", 6931 __func__, etid, cst)); 6932 cst->ncompl--; 6933 6934 while (credits > 0) { 6935 m = mbufq_dequeue(&cst->pending_fwack); 6936 if (__predict_false(m == NULL)) { 6937 /* 6938 * The remaining credits are for the final flush that 6939 * was issued when the tag was freed by the kernel. 6940 */ 6941 MPASS((cst->flags & 6942 (EO_FLUSH_RPL_PENDING | EO_SND_TAG_REF)) == 6943 EO_FLUSH_RPL_PENDING); 6944 MPASS(credits == ETID_FLUSH_LEN16); 6945 MPASS(cst->tx_credits + cpl->credits == cst->tx_total); 6946 MPASS(cst->ncompl == 0); 6947 6948 cst->flags &= ~EO_FLUSH_RPL_PENDING; 6949 cst->tx_credits += cpl->credits; 6950 cxgbe_rate_tag_free_locked(cst); 6951 return (0); /* cst is gone. */ 6952 } 6953 KASSERT(m != NULL, 6954 ("%s: too many credits (%u, %u)", __func__, cpl->credits, 6955 credits)); 6956 KASSERT(credits >= mbuf_eo_len16(m), 6957 ("%s: too few credits (%u, %u, %u)", __func__, 6958 cpl->credits, credits, mbuf_eo_len16(m))); 6959 credits -= mbuf_eo_len16(m); 6960 cst->plen -= m->m_pkthdr.len; 6961 m_freem(m); 6962 } 6963 6964 cst->tx_credits += cpl->credits; 6965 MPASS(cst->tx_credits <= cst->tx_total); 6966 6967 if (cst->flags & EO_SND_TAG_REF) { 6968 /* 6969 * As with ethofld_transmit(), hold an extra reference 6970 * so that the tag is stable across ethold_tx(). 6971 */ 6972 m_snd_tag_ref(&cst->com); 6973 m = mbufq_first(&cst->pending_tx); 6974 if (m != NULL && cst->tx_credits >= mbuf_eo_len16(m)) 6975 ethofld_tx(cst); 6976 mtx_unlock(&cst->lock); 6977 m_snd_tag_rele(&cst->com); 6978 } else { 6979 /* 6980 * There shouldn't be any pending packets if the tag 6981 * was freed by the kernel since any pending packet 6982 * should hold a reference to the tag. 6983 */ 6984 MPASS(mbufq_first(&cst->pending_tx) == NULL); 6985 mtx_unlock(&cst->lock); 6986 } 6987 6988 return (0); 6989 } 6990 #endif 6991