1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 Chelsio Communications, Inc. 5 * All rights reserved. 6 * Written by: Navdeep Parhar <np@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 #include "opt_kern_tls.h" 36 #include "opt_ratelimit.h" 37 38 #include <sys/types.h> 39 #include <sys/eventhandler.h> 40 #include <sys/mbuf.h> 41 #include <sys/socket.h> 42 #include <sys/kernel.h> 43 #include <sys/ktls.h> 44 #include <sys/malloc.h> 45 #include <sys/msan.h> 46 #include <sys/queue.h> 47 #include <sys/sbuf.h> 48 #include <sys/taskqueue.h> 49 #include <sys/time.h> 50 #include <sys/sglist.h> 51 #include <sys/sysctl.h> 52 #include <sys/smp.h> 53 #include <sys/socketvar.h> 54 #include <sys/counter.h> 55 #include <net/bpf.h> 56 #include <net/ethernet.h> 57 #include <net/if.h> 58 #include <net/if_vlan_var.h> 59 #include <net/if_vxlan.h> 60 #include <netinet/in.h> 61 #include <netinet/ip.h> 62 #include <netinet/ip6.h> 63 #include <netinet/tcp.h> 64 #include <netinet/udp.h> 65 #include <machine/in_cksum.h> 66 #include <machine/md_var.h> 67 #include <vm/vm.h> 68 #include <vm/pmap.h> 69 #ifdef DEV_NETMAP 70 #include <machine/bus.h> 71 #include <sys/selinfo.h> 72 #include <net/if_var.h> 73 #include <net/netmap.h> 74 #include <dev/netmap/netmap_kern.h> 75 #endif 76 77 #include "common/common.h" 78 #include "common/t4_regs.h" 79 #include "common/t4_regs_values.h" 80 #include "common/t4_msg.h" 81 #include "t4_l2t.h" 82 #include "t4_mp_ring.h" 83 84 #ifdef T4_PKT_TIMESTAMP 85 #define RX_COPY_THRESHOLD (MINCLSIZE - 8) 86 #else 87 #define RX_COPY_THRESHOLD MINCLSIZE 88 #endif 89 90 /* 91 * Ethernet frames are DMA'd at this byte offset into the freelist buffer. 92 * 0-7 are valid values. 93 */ 94 static int fl_pktshift = 0; 95 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pktshift, CTLFLAG_RDTUN, &fl_pktshift, 0, 96 "payload DMA offset in rx buffer (bytes)"); 97 98 /* 99 * Pad ethernet payload up to this boundary. 100 * -1: driver should figure out a good value. 101 * 0: disable padding. 102 * Any power of 2 from 32 to 4096 (both inclusive) is also a valid value. 103 */ 104 int fl_pad = -1; 105 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pad, CTLFLAG_RDTUN, &fl_pad, 0, 106 "payload pad boundary (bytes)"); 107 108 /* 109 * Status page length. 110 * -1: driver should figure out a good value. 111 * 64 or 128 are the only other valid values. 112 */ 113 static int spg_len = -1; 114 SYSCTL_INT(_hw_cxgbe, OID_AUTO, spg_len, CTLFLAG_RDTUN, &spg_len, 0, 115 "status page size (bytes)"); 116 117 /* 118 * Congestion drops. 119 * -1: no congestion feedback (not recommended). 120 * 0: backpressure the channel instead of dropping packets right away. 121 * 1: no backpressure, drop packets for the congested queue immediately. 122 * 2: both backpressure and drop. 123 */ 124 static int cong_drop = 0; 125 SYSCTL_INT(_hw_cxgbe, OID_AUTO, cong_drop, CTLFLAG_RDTUN, &cong_drop, 0, 126 "Congestion control for NIC RX queues (0 = backpressure, 1 = drop, 2 = both"); 127 #ifdef TCP_OFFLOAD 128 static int ofld_cong_drop = 0; 129 SYSCTL_INT(_hw_cxgbe, OID_AUTO, ofld_cong_drop, CTLFLAG_RDTUN, &ofld_cong_drop, 0, 130 "Congestion control for TOE RX queues (0 = backpressure, 1 = drop, 2 = both"); 131 #endif 132 133 /* 134 * Deliver multiple frames in the same free list buffer if they fit. 135 * -1: let the driver decide whether to enable buffer packing or not. 136 * 0: disable buffer packing. 137 * 1: enable buffer packing. 138 */ 139 static int buffer_packing = -1; 140 SYSCTL_INT(_hw_cxgbe, OID_AUTO, buffer_packing, CTLFLAG_RDTUN, &buffer_packing, 141 0, "Enable buffer packing"); 142 143 /* 144 * Start next frame in a packed buffer at this boundary. 145 * -1: driver should figure out a good value. 146 * T4: driver will ignore this and use the same value as fl_pad above. 147 * T5: 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value. 148 */ 149 static int fl_pack = -1; 150 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pack, CTLFLAG_RDTUN, &fl_pack, 0, 151 "payload pack boundary (bytes)"); 152 153 /* 154 * Largest rx cluster size that the driver is allowed to allocate. 155 */ 156 static int largest_rx_cluster = MJUM16BYTES; 157 SYSCTL_INT(_hw_cxgbe, OID_AUTO, largest_rx_cluster, CTLFLAG_RDTUN, 158 &largest_rx_cluster, 0, "Largest rx cluster (bytes)"); 159 160 /* 161 * Size of cluster allocation that's most likely to succeed. The driver will 162 * fall back to this size if it fails to allocate clusters larger than this. 163 */ 164 static int safest_rx_cluster = PAGE_SIZE; 165 SYSCTL_INT(_hw_cxgbe, OID_AUTO, safest_rx_cluster, CTLFLAG_RDTUN, 166 &safest_rx_cluster, 0, "Safe rx cluster (bytes)"); 167 168 #ifdef RATELIMIT 169 /* 170 * Knob to control TCP timestamp rewriting, and the granularity of the tick used 171 * for rewriting. -1 and 0-3 are all valid values. 172 * -1: hardware should leave the TCP timestamps alone. 173 * 0: 1ms 174 * 1: 100us 175 * 2: 10us 176 * 3: 1us 177 */ 178 static int tsclk = -1; 179 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tsclk, CTLFLAG_RDTUN, &tsclk, 0, 180 "Control TCP timestamp rewriting when using pacing"); 181 182 static int eo_max_backlog = 1024 * 1024; 183 SYSCTL_INT(_hw_cxgbe, OID_AUTO, eo_max_backlog, CTLFLAG_RDTUN, &eo_max_backlog, 184 0, "Maximum backlog of ratelimited data per flow"); 185 #endif 186 187 /* 188 * The interrupt holdoff timers are multiplied by this value on T6+. 189 * 1 and 3-17 (both inclusive) are legal values. 190 */ 191 static int tscale = 1; 192 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tscale, CTLFLAG_RDTUN, &tscale, 0, 193 "Interrupt holdoff timer scale on T6+"); 194 195 /* 196 * Number of LRO entries in the lro_ctrl structure per rx queue. 197 */ 198 static int lro_entries = TCP_LRO_ENTRIES; 199 SYSCTL_INT(_hw_cxgbe, OID_AUTO, lro_entries, CTLFLAG_RDTUN, &lro_entries, 0, 200 "Number of LRO entries per RX queue"); 201 202 /* 203 * This enables presorting of frames before they're fed into tcp_lro_rx. 204 */ 205 static int lro_mbufs = 0; 206 SYSCTL_INT(_hw_cxgbe, OID_AUTO, lro_mbufs, CTLFLAG_RDTUN, &lro_mbufs, 0, 207 "Enable presorting of LRO frames"); 208 209 static counter_u64_t pullups; 210 SYSCTL_COUNTER_U64(_hw_cxgbe, OID_AUTO, pullups, CTLFLAG_RD, &pullups, 211 "Number of mbuf pullups performed"); 212 213 static counter_u64_t defrags; 214 SYSCTL_COUNTER_U64(_hw_cxgbe, OID_AUTO, defrags, CTLFLAG_RD, &defrags, 215 "Number of mbuf defrags performed"); 216 217 static int t4_tx_coalesce = 1; 218 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_coalesce, CTLFLAG_RWTUN, &t4_tx_coalesce, 0, 219 "tx coalescing allowed"); 220 221 /* 222 * The driver will make aggressive attempts at tx coalescing if it sees these 223 * many packets eligible for coalescing in quick succession, with no more than 224 * the specified gap in between the eth_tx calls that delivered the packets. 225 */ 226 static int t4_tx_coalesce_pkts = 32; 227 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_coalesce_pkts, CTLFLAG_RWTUN, 228 &t4_tx_coalesce_pkts, 0, 229 "# of consecutive packets (1 - 255) that will trigger tx coalescing"); 230 static int t4_tx_coalesce_gap = 5; 231 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_coalesce_gap, CTLFLAG_RWTUN, 232 &t4_tx_coalesce_gap, 0, "tx gap (in microseconds)"); 233 234 static int service_iq(struct sge_iq *, int); 235 static int service_iq_fl(struct sge_iq *, int); 236 static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t); 237 static int eth_rx(struct adapter *, struct sge_rxq *, const struct iq_desc *, 238 u_int); 239 static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int, 240 int, int, int); 241 static inline void init_fl(struct adapter *, struct sge_fl *, int, int, char *); 242 static inline void init_eq(struct adapter *, struct sge_eq *, int, int, uint8_t, 243 struct sge_iq *, char *); 244 static int alloc_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *, 245 struct sysctl_ctx_list *, struct sysctl_oid *); 246 static void free_iq_fl(struct adapter *, struct sge_iq *, struct sge_fl *); 247 static void add_iq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 248 struct sge_iq *); 249 static void add_fl_sysctls(struct adapter *, struct sysctl_ctx_list *, 250 struct sysctl_oid *, struct sge_fl *); 251 static int alloc_iq_fl_hwq(struct vi_info *, struct sge_iq *, struct sge_fl *); 252 static int free_iq_fl_hwq(struct adapter *, struct sge_iq *, struct sge_fl *); 253 static int alloc_fwq(struct adapter *); 254 static void free_fwq(struct adapter *); 255 static int alloc_ctrlq(struct adapter *, int); 256 static void free_ctrlq(struct adapter *, int); 257 static int alloc_rxq(struct vi_info *, struct sge_rxq *, int, int, int); 258 static void free_rxq(struct vi_info *, struct sge_rxq *); 259 static void add_rxq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 260 struct sge_rxq *); 261 #ifdef TCP_OFFLOAD 262 static int alloc_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *, int, int, 263 int); 264 static void free_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *); 265 static void add_ofld_rxq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 266 struct sge_ofld_rxq *); 267 #endif 268 static int ctrl_eq_alloc(struct adapter *, struct sge_eq *); 269 static int eth_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); 270 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 271 static int ofld_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); 272 #endif 273 static int alloc_eq(struct adapter *, struct sge_eq *, struct sysctl_ctx_list *, 274 struct sysctl_oid *); 275 static void free_eq(struct adapter *, struct sge_eq *); 276 static void add_eq_sysctls(struct adapter *, struct sysctl_ctx_list *, 277 struct sysctl_oid *, struct sge_eq *); 278 static int alloc_eq_hwq(struct adapter *, struct vi_info *, struct sge_eq *); 279 static int free_eq_hwq(struct adapter *, struct vi_info *, struct sge_eq *); 280 static int alloc_wrq(struct adapter *, struct vi_info *, struct sge_wrq *, 281 struct sysctl_ctx_list *, struct sysctl_oid *); 282 static void free_wrq(struct adapter *, struct sge_wrq *); 283 static void add_wrq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 284 struct sge_wrq *); 285 static int alloc_txq(struct vi_info *, struct sge_txq *, int); 286 static void free_txq(struct vi_info *, struct sge_txq *); 287 static void add_txq_sysctls(struct vi_info *, struct sysctl_ctx_list *, 288 struct sysctl_oid *, struct sge_txq *); 289 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 290 static int alloc_ofld_txq(struct vi_info *, struct sge_ofld_txq *, int); 291 static void free_ofld_txq(struct vi_info *, struct sge_ofld_txq *); 292 static void add_ofld_txq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 293 struct sge_ofld_txq *); 294 #endif 295 static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); 296 static inline void ring_fl_db(struct adapter *, struct sge_fl *); 297 static int refill_fl(struct adapter *, struct sge_fl *, int); 298 static void refill_sfl(void *); 299 static int find_refill_source(struct adapter *, int, bool); 300 static void add_fl_to_sfl(struct adapter *, struct sge_fl *); 301 302 static inline void get_pkt_gl(struct mbuf *, struct sglist *); 303 static inline u_int txpkt_len16(u_int, const u_int); 304 static inline u_int txpkt_vm_len16(u_int, const u_int); 305 static inline void calculate_mbuf_len16(struct mbuf *, bool); 306 static inline u_int txpkts0_len16(u_int); 307 static inline u_int txpkts1_len16(void); 308 static u_int write_raw_wr(struct sge_txq *, void *, struct mbuf *, u_int); 309 static u_int write_txpkt_wr(struct adapter *, struct sge_txq *, struct mbuf *, 310 u_int); 311 static u_int write_txpkt_vm_wr(struct adapter *, struct sge_txq *, 312 struct mbuf *); 313 static int add_to_txpkts_vf(struct adapter *, struct sge_txq *, struct mbuf *, 314 int, bool *); 315 static int add_to_txpkts_pf(struct adapter *, struct sge_txq *, struct mbuf *, 316 int, bool *); 317 static u_int write_txpkts_wr(struct adapter *, struct sge_txq *); 318 static u_int write_txpkts_vm_wr(struct adapter *, struct sge_txq *); 319 static void write_gl_to_txd(struct sge_txq *, struct mbuf *, caddr_t *, int); 320 static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int); 321 static inline void ring_eq_db(struct adapter *, struct sge_eq *, u_int); 322 static inline uint16_t read_hw_cidx(struct sge_eq *); 323 static inline u_int reclaimable_tx_desc(struct sge_eq *); 324 static inline u_int total_available_tx_desc(struct sge_eq *); 325 static u_int reclaim_tx_descs(struct sge_txq *, u_int); 326 static void tx_reclaim(void *, int); 327 static __be64 get_flit(struct sglist_seg *, int, int); 328 static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *, 329 struct mbuf *); 330 static int handle_fw_msg(struct sge_iq *, const struct rss_header *, 331 struct mbuf *); 332 static int t4_handle_wrerr_rpl(struct adapter *, const __be64 *); 333 static void wrq_tx_drain(void *, int); 334 static void drain_wrq_wr_list(struct adapter *, struct sge_wrq *); 335 336 static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS); 337 #ifdef RATELIMIT 338 #if defined(INET) || defined(INET6) 339 static inline u_int txpkt_eo_len16(u_int, u_int, u_int); 340 #endif 341 static int ethofld_fw4_ack(struct sge_iq *, const struct rss_header *, 342 struct mbuf *); 343 static int ethofld_transmit(struct ifnet *, struct mbuf *); 344 #endif 345 346 static counter_u64_t extfree_refs; 347 static counter_u64_t extfree_rels; 348 349 an_handler_t t4_an_handler; 350 fw_msg_handler_t t4_fw_msg_handler[NUM_FW6_TYPES]; 351 cpl_handler_t t4_cpl_handler[NUM_CPL_CMDS]; 352 cpl_handler_t set_tcb_rpl_handlers[NUM_CPL_COOKIES]; 353 cpl_handler_t l2t_write_rpl_handlers[NUM_CPL_COOKIES]; 354 cpl_handler_t act_open_rpl_handlers[NUM_CPL_COOKIES]; 355 cpl_handler_t abort_rpl_rss_handlers[NUM_CPL_COOKIES]; 356 cpl_handler_t fw4_ack_handlers[NUM_CPL_COOKIES]; 357 358 void 359 t4_register_an_handler(an_handler_t h) 360 { 361 uintptr_t *loc; 362 363 MPASS(h == NULL || t4_an_handler == NULL); 364 365 loc = (uintptr_t *)&t4_an_handler; 366 atomic_store_rel_ptr(loc, (uintptr_t)h); 367 } 368 369 void 370 t4_register_fw_msg_handler(int type, fw_msg_handler_t h) 371 { 372 uintptr_t *loc; 373 374 MPASS(type < nitems(t4_fw_msg_handler)); 375 MPASS(h == NULL || t4_fw_msg_handler[type] == NULL); 376 /* 377 * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL 378 * handler dispatch table. Reject any attempt to install a handler for 379 * this subtype. 380 */ 381 MPASS(type != FW_TYPE_RSSCPL); 382 MPASS(type != FW6_TYPE_RSSCPL); 383 384 loc = (uintptr_t *)&t4_fw_msg_handler[type]; 385 atomic_store_rel_ptr(loc, (uintptr_t)h); 386 } 387 388 void 389 t4_register_cpl_handler(int opcode, cpl_handler_t h) 390 { 391 uintptr_t *loc; 392 393 MPASS(opcode < nitems(t4_cpl_handler)); 394 MPASS(h == NULL || t4_cpl_handler[opcode] == NULL); 395 396 loc = (uintptr_t *)&t4_cpl_handler[opcode]; 397 atomic_store_rel_ptr(loc, (uintptr_t)h); 398 } 399 400 static int 401 set_tcb_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 402 struct mbuf *m) 403 { 404 const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1); 405 u_int tid; 406 int cookie; 407 408 MPASS(m == NULL); 409 410 tid = GET_TID(cpl); 411 if (is_hpftid(iq->adapter, tid) || is_ftid(iq->adapter, tid)) { 412 /* 413 * The return code for filter-write is put in the CPL cookie so 414 * we have to rely on the hardware tid (is_ftid) to determine 415 * that this is a response to a filter. 416 */ 417 cookie = CPL_COOKIE_FILTER; 418 } else { 419 cookie = G_COOKIE(cpl->cookie); 420 } 421 MPASS(cookie > CPL_COOKIE_RESERVED); 422 MPASS(cookie < nitems(set_tcb_rpl_handlers)); 423 424 return (set_tcb_rpl_handlers[cookie](iq, rss, m)); 425 } 426 427 static int 428 l2t_write_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 429 struct mbuf *m) 430 { 431 const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); 432 unsigned int cookie; 433 434 MPASS(m == NULL); 435 436 cookie = GET_TID(rpl) & F_SYNC_WR ? CPL_COOKIE_TOM : CPL_COOKIE_FILTER; 437 return (l2t_write_rpl_handlers[cookie](iq, rss, m)); 438 } 439 440 static int 441 act_open_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 442 struct mbuf *m) 443 { 444 const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1); 445 u_int cookie = G_TID_COOKIE(G_AOPEN_ATID(be32toh(cpl->atid_status))); 446 447 MPASS(m == NULL); 448 MPASS(cookie != CPL_COOKIE_RESERVED); 449 450 return (act_open_rpl_handlers[cookie](iq, rss, m)); 451 } 452 453 static int 454 abort_rpl_rss_handler(struct sge_iq *iq, const struct rss_header *rss, 455 struct mbuf *m) 456 { 457 struct adapter *sc = iq->adapter; 458 u_int cookie; 459 460 MPASS(m == NULL); 461 if (is_hashfilter(sc)) 462 cookie = CPL_COOKIE_HASHFILTER; 463 else 464 cookie = CPL_COOKIE_TOM; 465 466 return (abort_rpl_rss_handlers[cookie](iq, rss, m)); 467 } 468 469 static int 470 fw4_ack_handler(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 471 { 472 struct adapter *sc = iq->adapter; 473 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 474 unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 475 u_int cookie; 476 477 MPASS(m == NULL); 478 if (is_etid(sc, tid)) 479 cookie = CPL_COOKIE_ETHOFLD; 480 else 481 cookie = CPL_COOKIE_TOM; 482 483 return (fw4_ack_handlers[cookie](iq, rss, m)); 484 } 485 486 static void 487 t4_init_shared_cpl_handlers(void) 488 { 489 490 t4_register_cpl_handler(CPL_SET_TCB_RPL, set_tcb_rpl_handler); 491 t4_register_cpl_handler(CPL_L2T_WRITE_RPL, l2t_write_rpl_handler); 492 t4_register_cpl_handler(CPL_ACT_OPEN_RPL, act_open_rpl_handler); 493 t4_register_cpl_handler(CPL_ABORT_RPL_RSS, abort_rpl_rss_handler); 494 t4_register_cpl_handler(CPL_FW4_ACK, fw4_ack_handler); 495 } 496 497 void 498 t4_register_shared_cpl_handler(int opcode, cpl_handler_t h, int cookie) 499 { 500 uintptr_t *loc; 501 502 MPASS(opcode < nitems(t4_cpl_handler)); 503 MPASS(cookie > CPL_COOKIE_RESERVED); 504 MPASS(cookie < NUM_CPL_COOKIES); 505 MPASS(t4_cpl_handler[opcode] != NULL); 506 507 switch (opcode) { 508 case CPL_SET_TCB_RPL: 509 loc = (uintptr_t *)&set_tcb_rpl_handlers[cookie]; 510 break; 511 case CPL_L2T_WRITE_RPL: 512 loc = (uintptr_t *)&l2t_write_rpl_handlers[cookie]; 513 break; 514 case CPL_ACT_OPEN_RPL: 515 loc = (uintptr_t *)&act_open_rpl_handlers[cookie]; 516 break; 517 case CPL_ABORT_RPL_RSS: 518 loc = (uintptr_t *)&abort_rpl_rss_handlers[cookie]; 519 break; 520 case CPL_FW4_ACK: 521 loc = (uintptr_t *)&fw4_ack_handlers[cookie]; 522 break; 523 default: 524 MPASS(0); 525 return; 526 } 527 MPASS(h == NULL || *loc == (uintptr_t)NULL); 528 atomic_store_rel_ptr(loc, (uintptr_t)h); 529 } 530 531 /* 532 * Called on MOD_LOAD. Validates and calculates the SGE tunables. 533 */ 534 void 535 t4_sge_modload(void) 536 { 537 538 if (fl_pktshift < 0 || fl_pktshift > 7) { 539 printf("Invalid hw.cxgbe.fl_pktshift value (%d)," 540 " using 0 instead.\n", fl_pktshift); 541 fl_pktshift = 0; 542 } 543 544 if (spg_len != 64 && spg_len != 128) { 545 int len; 546 547 #if defined(__i386__) || defined(__amd64__) 548 len = cpu_clflush_line_size > 64 ? 128 : 64; 549 #else 550 len = 64; 551 #endif 552 if (spg_len != -1) { 553 printf("Invalid hw.cxgbe.spg_len value (%d)," 554 " using %d instead.\n", spg_len, len); 555 } 556 spg_len = len; 557 } 558 559 if (cong_drop < -1 || cong_drop > 2) { 560 printf("Invalid hw.cxgbe.cong_drop value (%d)," 561 " using 0 instead.\n", cong_drop); 562 cong_drop = 0; 563 } 564 #ifdef TCP_OFFLOAD 565 if (ofld_cong_drop < -1 || ofld_cong_drop > 2) { 566 printf("Invalid hw.cxgbe.ofld_cong_drop value (%d)," 567 " using 0 instead.\n", ofld_cong_drop); 568 ofld_cong_drop = 0; 569 } 570 #endif 571 572 if (tscale != 1 && (tscale < 3 || tscale > 17)) { 573 printf("Invalid hw.cxgbe.tscale value (%d)," 574 " using 1 instead.\n", tscale); 575 tscale = 1; 576 } 577 578 if (largest_rx_cluster != MCLBYTES && 579 largest_rx_cluster != MJUMPAGESIZE && 580 largest_rx_cluster != MJUM9BYTES && 581 largest_rx_cluster != MJUM16BYTES) { 582 printf("Invalid hw.cxgbe.largest_rx_cluster value (%d)," 583 " using %d instead.\n", largest_rx_cluster, MJUM16BYTES); 584 largest_rx_cluster = MJUM16BYTES; 585 } 586 587 if (safest_rx_cluster != MCLBYTES && 588 safest_rx_cluster != MJUMPAGESIZE && 589 safest_rx_cluster != MJUM9BYTES && 590 safest_rx_cluster != MJUM16BYTES) { 591 printf("Invalid hw.cxgbe.safest_rx_cluster value (%d)," 592 " using %d instead.\n", safest_rx_cluster, MJUMPAGESIZE); 593 safest_rx_cluster = MJUMPAGESIZE; 594 } 595 596 extfree_refs = counter_u64_alloc(M_WAITOK); 597 extfree_rels = counter_u64_alloc(M_WAITOK); 598 pullups = counter_u64_alloc(M_WAITOK); 599 defrags = counter_u64_alloc(M_WAITOK); 600 counter_u64_zero(extfree_refs); 601 counter_u64_zero(extfree_rels); 602 counter_u64_zero(pullups); 603 counter_u64_zero(defrags); 604 605 t4_init_shared_cpl_handlers(); 606 t4_register_cpl_handler(CPL_FW4_MSG, handle_fw_msg); 607 t4_register_cpl_handler(CPL_FW6_MSG, handle_fw_msg); 608 t4_register_cpl_handler(CPL_SGE_EGR_UPDATE, handle_sge_egr_update); 609 #ifdef RATELIMIT 610 t4_register_shared_cpl_handler(CPL_FW4_ACK, ethofld_fw4_ack, 611 CPL_COOKIE_ETHOFLD); 612 #endif 613 t4_register_fw_msg_handler(FW6_TYPE_CMD_RPL, t4_handle_fw_rpl); 614 t4_register_fw_msg_handler(FW6_TYPE_WRERR_RPL, t4_handle_wrerr_rpl); 615 } 616 617 void 618 t4_sge_modunload(void) 619 { 620 621 counter_u64_free(extfree_refs); 622 counter_u64_free(extfree_rels); 623 counter_u64_free(pullups); 624 counter_u64_free(defrags); 625 } 626 627 uint64_t 628 t4_sge_extfree_refs(void) 629 { 630 uint64_t refs, rels; 631 632 rels = counter_u64_fetch(extfree_rels); 633 refs = counter_u64_fetch(extfree_refs); 634 635 return (refs - rels); 636 } 637 638 /* max 4096 */ 639 #define MAX_PACK_BOUNDARY 512 640 641 static inline void 642 setup_pad_and_pack_boundaries(struct adapter *sc) 643 { 644 uint32_t v, m; 645 int pad, pack, pad_shift; 646 647 pad_shift = chip_id(sc) > CHELSIO_T5 ? X_T6_INGPADBOUNDARY_SHIFT : 648 X_INGPADBOUNDARY_SHIFT; 649 pad = fl_pad; 650 if (fl_pad < (1 << pad_shift) || 651 fl_pad > (1 << (pad_shift + M_INGPADBOUNDARY)) || 652 !powerof2(fl_pad)) { 653 /* 654 * If there is any chance that we might use buffer packing and 655 * the chip is a T4, then pick 64 as the pad/pack boundary. Set 656 * it to the minimum allowed in all other cases. 657 */ 658 pad = is_t4(sc) && buffer_packing ? 64 : 1 << pad_shift; 659 660 /* 661 * For fl_pad = 0 we'll still write a reasonable value to the 662 * register but all the freelists will opt out of padding. 663 * We'll complain here only if the user tried to set it to a 664 * value greater than 0 that was invalid. 665 */ 666 if (fl_pad > 0) { 667 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pad value" 668 " (%d), using %d instead.\n", fl_pad, pad); 669 } 670 } 671 m = V_INGPADBOUNDARY(M_INGPADBOUNDARY); 672 v = V_INGPADBOUNDARY(ilog2(pad) - pad_shift); 673 t4_set_reg_field(sc, A_SGE_CONTROL, m, v); 674 675 if (is_t4(sc)) { 676 if (fl_pack != -1 && fl_pack != pad) { 677 /* Complain but carry on. */ 678 device_printf(sc->dev, "hw.cxgbe.fl_pack (%d) ignored," 679 " using %d instead.\n", fl_pack, pad); 680 } 681 return; 682 } 683 684 pack = fl_pack; 685 if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 || 686 !powerof2(fl_pack)) { 687 if (sc->params.pci.mps > MAX_PACK_BOUNDARY) 688 pack = MAX_PACK_BOUNDARY; 689 else 690 pack = max(sc->params.pci.mps, CACHE_LINE_SIZE); 691 MPASS(powerof2(pack)); 692 if (pack < 16) 693 pack = 16; 694 if (pack == 32) 695 pack = 64; 696 if (pack > 4096) 697 pack = 4096; 698 if (fl_pack != -1) { 699 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pack value" 700 " (%d), using %d instead.\n", fl_pack, pack); 701 } 702 } 703 m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); 704 if (pack == 16) 705 v = V_INGPACKBOUNDARY(0); 706 else 707 v = V_INGPACKBOUNDARY(ilog2(pack) - 5); 708 709 MPASS(!is_t4(sc)); /* T4 doesn't have SGE_CONTROL2 */ 710 t4_set_reg_field(sc, A_SGE_CONTROL2, m, v); 711 } 712 713 /* 714 * adap->params.vpd.cclk must be set up before this is called. 715 */ 716 void 717 t4_tweak_chip_settings(struct adapter *sc) 718 { 719 int i, reg; 720 uint32_t v, m; 721 int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200}; 722 int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk; 723 int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ 724 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 725 static int sw_buf_sizes[] = { 726 MCLBYTES, 727 MJUMPAGESIZE, 728 MJUM9BYTES, 729 MJUM16BYTES 730 }; 731 732 KASSERT(sc->flags & MASTER_PF, 733 ("%s: trying to change chip settings when not master.", __func__)); 734 735 m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE; 736 v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE | 737 V_EGRSTATUSPAGESIZE(spg_len == 128); 738 t4_set_reg_field(sc, A_SGE_CONTROL, m, v); 739 740 setup_pad_and_pack_boundaries(sc); 741 742 v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | 743 V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | 744 V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) | 745 V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) | 746 V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) | 747 V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) | 748 V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) | 749 V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10); 750 t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v); 751 752 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0, 4096); 753 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE1, 65536); 754 reg = A_SGE_FL_BUFFER_SIZE2; 755 for (i = 0; i < nitems(sw_buf_sizes); i++) { 756 MPASS(reg <= A_SGE_FL_BUFFER_SIZE15); 757 t4_write_reg(sc, reg, sw_buf_sizes[i]); 758 reg += 4; 759 MPASS(reg <= A_SGE_FL_BUFFER_SIZE15); 760 t4_write_reg(sc, reg, sw_buf_sizes[i] - CL_METADATA_SIZE); 761 reg += 4; 762 } 763 764 v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) | 765 V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]); 766 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v); 767 768 KASSERT(intr_timer[0] <= timer_max, 769 ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0], 770 timer_max)); 771 for (i = 1; i < nitems(intr_timer); i++) { 772 KASSERT(intr_timer[i] >= intr_timer[i - 1], 773 ("%s: timers not listed in increasing order (%d)", 774 __func__, i)); 775 776 while (intr_timer[i] > timer_max) { 777 if (i == nitems(intr_timer) - 1) { 778 intr_timer[i] = timer_max; 779 break; 780 } 781 intr_timer[i] += intr_timer[i - 1]; 782 intr_timer[i] /= 2; 783 } 784 } 785 786 v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) | 787 V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1])); 788 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v); 789 v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) | 790 V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3])); 791 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v); 792 v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) | 793 V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5])); 794 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v); 795 796 if (chip_id(sc) >= CHELSIO_T6) { 797 m = V_TSCALE(M_TSCALE); 798 if (tscale == 1) 799 v = 0; 800 else 801 v = V_TSCALE(tscale - 2); 802 t4_set_reg_field(sc, A_SGE_ITP_CONTROL, m, v); 803 804 if (sc->debug_flags & DF_DISABLE_TCB_CACHE) { 805 m = V_RDTHRESHOLD(M_RDTHRESHOLD) | F_WRTHRTHRESHEN | 806 V_WRTHRTHRESH(M_WRTHRTHRESH); 807 t4_tp_pio_read(sc, &v, 1, A_TP_CMM_CONFIG, 1); 808 v &= ~m; 809 v |= V_RDTHRESHOLD(1) | F_WRTHRTHRESHEN | 810 V_WRTHRTHRESH(16); 811 t4_tp_pio_write(sc, &v, 1, A_TP_CMM_CONFIG, 1); 812 } 813 } 814 815 /* 4K, 16K, 64K, 256K DDP "page sizes" for TDDP */ 816 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 817 t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v); 818 819 /* 820 * 4K, 8K, 16K, 64K DDP "page sizes" for iSCSI DDP. These have been 821 * chosen with MAXPHYS = 128K in mind. The largest DDP buffer that we 822 * may have to deal with is MAXPHYS + 1 page. 823 */ 824 v = V_HPZ0(0) | V_HPZ1(1) | V_HPZ2(2) | V_HPZ3(4); 825 t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, v); 826 827 /* We use multiple DDP page sizes both in plain-TOE and ISCSI modes. */ 828 m = v = F_TDDPTAGTCB | F_ISCSITAGTCB; 829 t4_set_reg_field(sc, A_ULP_RX_CTL, m, v); 830 831 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 832 F_RESETDDPOFFSET; 833 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 834 t4_set_reg_field(sc, A_TP_PARA_REG5, m, v); 835 } 836 837 /* 838 * SGE wants the buffer to be at least 64B and then a multiple of 16. Its 839 * address mut be 16B aligned. If padding is in use the buffer's start and end 840 * need to be aligned to the pad boundary as well. We'll just make sure that 841 * the size is a multiple of the pad boundary here, it is up to the buffer 842 * allocation code to make sure the start of the buffer is aligned. 843 */ 844 static inline int 845 hwsz_ok(struct adapter *sc, int hwsz) 846 { 847 int mask = fl_pad ? sc->params.sge.pad_boundary - 1 : 16 - 1; 848 849 return (hwsz >= 64 && (hwsz & mask) == 0); 850 } 851 852 /* 853 * Initialize the rx buffer sizes and figure out which zones the buffers will 854 * be allocated from. 855 */ 856 void 857 t4_init_rx_buf_info(struct adapter *sc) 858 { 859 struct sge *s = &sc->sge; 860 struct sge_params *sp = &sc->params.sge; 861 int i, j, n; 862 static int sw_buf_sizes[] = { /* Sorted by size */ 863 MCLBYTES, 864 MJUMPAGESIZE, 865 MJUM9BYTES, 866 MJUM16BYTES 867 }; 868 struct rx_buf_info *rxb; 869 870 s->safe_zidx = -1; 871 rxb = &s->rx_buf_info[0]; 872 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 873 rxb->size1 = sw_buf_sizes[i]; 874 rxb->zone = m_getzone(rxb->size1); 875 rxb->type = m_gettype(rxb->size1); 876 rxb->size2 = 0; 877 rxb->hwidx1 = -1; 878 rxb->hwidx2 = -1; 879 for (j = 0; j < SGE_FLBUF_SIZES; j++) { 880 int hwsize = sp->sge_fl_buffer_size[j]; 881 882 if (!hwsz_ok(sc, hwsize)) 883 continue; 884 885 /* hwidx for size1 */ 886 if (rxb->hwidx1 == -1 && rxb->size1 == hwsize) 887 rxb->hwidx1 = j; 888 889 /* hwidx for size2 (buffer packing) */ 890 if (rxb->size1 - CL_METADATA_SIZE < hwsize) 891 continue; 892 n = rxb->size1 - hwsize - CL_METADATA_SIZE; 893 if (n == 0) { 894 rxb->hwidx2 = j; 895 rxb->size2 = hwsize; 896 break; /* stop looking */ 897 } 898 if (rxb->hwidx2 != -1) { 899 if (n < sp->sge_fl_buffer_size[rxb->hwidx2] - 900 hwsize - CL_METADATA_SIZE) { 901 rxb->hwidx2 = j; 902 rxb->size2 = hwsize; 903 } 904 } else if (n <= 2 * CL_METADATA_SIZE) { 905 rxb->hwidx2 = j; 906 rxb->size2 = hwsize; 907 } 908 } 909 if (rxb->hwidx2 != -1) 910 sc->flags |= BUF_PACKING_OK; 911 if (s->safe_zidx == -1 && rxb->size1 == safest_rx_cluster) 912 s->safe_zidx = i; 913 } 914 } 915 916 /* 917 * Verify some basic SGE settings for the PF and VF driver, and other 918 * miscellaneous settings for the PF driver. 919 */ 920 int 921 t4_verify_chip_settings(struct adapter *sc) 922 { 923 struct sge_params *sp = &sc->params.sge; 924 uint32_t m, v, r; 925 int rc = 0; 926 const uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 927 928 m = F_RXPKTCPLMODE; 929 v = F_RXPKTCPLMODE; 930 r = sp->sge_control; 931 if ((r & m) != v) { 932 device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r); 933 rc = EINVAL; 934 } 935 936 /* 937 * If this changes then every single use of PAGE_SHIFT in the driver 938 * needs to be carefully reviewed for PAGE_SHIFT vs sp->page_shift. 939 */ 940 if (sp->page_shift != PAGE_SHIFT) { 941 device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r); 942 rc = EINVAL; 943 } 944 945 if (sc->flags & IS_VF) 946 return (0); 947 948 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 949 r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ); 950 if (r != v) { 951 device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r); 952 if (sc->vres.ddp.size != 0) 953 rc = EINVAL; 954 } 955 956 m = v = F_TDDPTAGTCB; 957 r = t4_read_reg(sc, A_ULP_RX_CTL); 958 if ((r & m) != v) { 959 device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r); 960 if (sc->vres.ddp.size != 0) 961 rc = EINVAL; 962 } 963 964 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 965 F_RESETDDPOFFSET; 966 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 967 r = t4_read_reg(sc, A_TP_PARA_REG5); 968 if ((r & m) != v) { 969 device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r); 970 if (sc->vres.ddp.size != 0) 971 rc = EINVAL; 972 } 973 974 return (rc); 975 } 976 977 int 978 t4_create_dma_tag(struct adapter *sc) 979 { 980 int rc; 981 982 rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, 983 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 984 BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, 985 NULL, &sc->dmat); 986 if (rc != 0) { 987 device_printf(sc->dev, 988 "failed to create main DMA tag: %d\n", rc); 989 } 990 991 return (rc); 992 } 993 994 void 995 t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 996 struct sysctl_oid_list *children) 997 { 998 struct sge_params *sp = &sc->params.sge; 999 1000 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes", 1001 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 1002 sysctl_bufsizes, "A", "freelist buffer sizes"); 1003 1004 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD, 1005 NULL, sp->fl_pktshift, "payload DMA offset in rx buffer (bytes)"); 1006 1007 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD, 1008 NULL, sp->pad_boundary, "payload pad boundary (bytes)"); 1009 1010 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD, 1011 NULL, sp->spg_len, "status page size (bytes)"); 1012 1013 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD, 1014 NULL, cong_drop, "congestion drop setting"); 1015 #ifdef TCP_OFFLOAD 1016 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ofld_cong_drop", CTLFLAG_RD, 1017 NULL, ofld_cong_drop, "congestion drop setting"); 1018 #endif 1019 1020 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD, 1021 NULL, sp->pack_boundary, "payload pack boundary (bytes)"); 1022 } 1023 1024 int 1025 t4_destroy_dma_tag(struct adapter *sc) 1026 { 1027 if (sc->dmat) 1028 bus_dma_tag_destroy(sc->dmat); 1029 1030 return (0); 1031 } 1032 1033 /* 1034 * Allocate and initialize the firmware event queue, control queues, and special 1035 * purpose rx queues owned by the adapter. 1036 * 1037 * Returns errno on failure. Resources allocated up to that point may still be 1038 * allocated. Caller is responsible for cleanup in case this function fails. 1039 */ 1040 int 1041 t4_setup_adapter_queues(struct adapter *sc) 1042 { 1043 int rc, i; 1044 1045 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 1046 1047 /* 1048 * Firmware event queue 1049 */ 1050 rc = alloc_fwq(sc); 1051 if (rc != 0) 1052 return (rc); 1053 1054 /* 1055 * That's all for the VF driver. 1056 */ 1057 if (sc->flags & IS_VF) 1058 return (rc); 1059 1060 /* 1061 * XXX: General purpose rx queues, one per port. 1062 */ 1063 1064 /* 1065 * Control queues, one per port. 1066 */ 1067 for_each_port(sc, i) { 1068 rc = alloc_ctrlq(sc, i); 1069 if (rc != 0) 1070 return (rc); 1071 } 1072 1073 return (rc); 1074 } 1075 1076 /* 1077 * Idempotent 1078 */ 1079 int 1080 t4_teardown_adapter_queues(struct adapter *sc) 1081 { 1082 int i; 1083 1084 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 1085 1086 if (sc->sge.ctrlq != NULL) { 1087 MPASS(!(sc->flags & IS_VF)); /* VFs don't allocate ctrlq. */ 1088 for_each_port(sc, i) 1089 free_ctrlq(sc, i); 1090 } 1091 free_fwq(sc); 1092 1093 return (0); 1094 } 1095 1096 /* Maximum payload that could arrive with a single iq descriptor. */ 1097 static inline int 1098 max_rx_payload(struct adapter *sc, struct ifnet *ifp, const bool ofld) 1099 { 1100 int maxp; 1101 1102 /* large enough even when hw VLAN extraction is disabled */ 1103 maxp = sc->params.sge.fl_pktshift + ETHER_HDR_LEN + 1104 ETHER_VLAN_ENCAP_LEN + ifp->if_mtu; 1105 if (ofld && sc->tt.tls && sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS && 1106 maxp < sc->params.tp.max_rx_pdu) 1107 maxp = sc->params.tp.max_rx_pdu; 1108 return (maxp); 1109 } 1110 1111 int 1112 t4_setup_vi_queues(struct vi_info *vi) 1113 { 1114 int rc = 0, i, intr_idx; 1115 struct sge_rxq *rxq; 1116 struct sge_txq *txq; 1117 #ifdef TCP_OFFLOAD 1118 struct sge_ofld_rxq *ofld_rxq; 1119 #endif 1120 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1121 struct sge_ofld_txq *ofld_txq; 1122 #endif 1123 #ifdef DEV_NETMAP 1124 int saved_idx, iqidx; 1125 struct sge_nm_rxq *nm_rxq; 1126 struct sge_nm_txq *nm_txq; 1127 #endif 1128 struct adapter *sc = vi->adapter; 1129 struct ifnet *ifp = vi->ifp; 1130 int maxp; 1131 1132 /* Interrupt vector to start from (when using multiple vectors) */ 1133 intr_idx = vi->first_intr; 1134 1135 #ifdef DEV_NETMAP 1136 saved_idx = intr_idx; 1137 if (ifp->if_capabilities & IFCAP_NETMAP) { 1138 1139 /* netmap is supported with direct interrupts only. */ 1140 MPASS(!forwarding_intr_to_fwq(sc)); 1141 MPASS(vi->first_intr >= 0); 1142 1143 /* 1144 * We don't have buffers to back the netmap rx queues 1145 * right now so we create the queues in a way that 1146 * doesn't set off any congestion signal in the chip. 1147 */ 1148 for_each_nm_rxq(vi, i, nm_rxq) { 1149 rc = alloc_nm_rxq(vi, nm_rxq, intr_idx, i); 1150 if (rc != 0) 1151 goto done; 1152 intr_idx++; 1153 } 1154 1155 for_each_nm_txq(vi, i, nm_txq) { 1156 iqidx = vi->first_nm_rxq + (i % vi->nnmrxq); 1157 rc = alloc_nm_txq(vi, nm_txq, iqidx, i); 1158 if (rc != 0) 1159 goto done; 1160 } 1161 } 1162 1163 /* Normal rx queues and netmap rx queues share the same interrupts. */ 1164 intr_idx = saved_idx; 1165 #endif 1166 1167 /* 1168 * Allocate rx queues first because a default iqid is required when 1169 * creating a tx queue. 1170 */ 1171 maxp = max_rx_payload(sc, ifp, false); 1172 for_each_rxq(vi, i, rxq) { 1173 rc = alloc_rxq(vi, rxq, i, intr_idx, maxp); 1174 if (rc != 0) 1175 goto done; 1176 if (!forwarding_intr_to_fwq(sc)) 1177 intr_idx++; 1178 } 1179 #ifdef DEV_NETMAP 1180 if (ifp->if_capabilities & IFCAP_NETMAP) 1181 intr_idx = saved_idx + max(vi->nrxq, vi->nnmrxq); 1182 #endif 1183 #ifdef TCP_OFFLOAD 1184 maxp = max_rx_payload(sc, ifp, true); 1185 for_each_ofld_rxq(vi, i, ofld_rxq) { 1186 rc = alloc_ofld_rxq(vi, ofld_rxq, i, intr_idx, maxp); 1187 if (rc != 0) 1188 goto done; 1189 if (!forwarding_intr_to_fwq(sc)) 1190 intr_idx++; 1191 } 1192 #endif 1193 1194 /* 1195 * Now the tx queues. 1196 */ 1197 for_each_txq(vi, i, txq) { 1198 rc = alloc_txq(vi, txq, i); 1199 if (rc != 0) 1200 goto done; 1201 } 1202 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1203 for_each_ofld_txq(vi, i, ofld_txq) { 1204 rc = alloc_ofld_txq(vi, ofld_txq, i); 1205 if (rc != 0) 1206 goto done; 1207 } 1208 #endif 1209 done: 1210 if (rc) 1211 t4_teardown_vi_queues(vi); 1212 1213 return (rc); 1214 } 1215 1216 /* 1217 * Idempotent 1218 */ 1219 int 1220 t4_teardown_vi_queues(struct vi_info *vi) 1221 { 1222 int i; 1223 struct sge_rxq *rxq; 1224 struct sge_txq *txq; 1225 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1226 struct sge_ofld_txq *ofld_txq; 1227 #endif 1228 #ifdef TCP_OFFLOAD 1229 struct sge_ofld_rxq *ofld_rxq; 1230 #endif 1231 #ifdef DEV_NETMAP 1232 struct sge_nm_rxq *nm_rxq; 1233 struct sge_nm_txq *nm_txq; 1234 #endif 1235 1236 #ifdef DEV_NETMAP 1237 if (vi->ifp->if_capabilities & IFCAP_NETMAP) { 1238 for_each_nm_txq(vi, i, nm_txq) { 1239 free_nm_txq(vi, nm_txq); 1240 } 1241 1242 for_each_nm_rxq(vi, i, nm_rxq) { 1243 free_nm_rxq(vi, nm_rxq); 1244 } 1245 } 1246 #endif 1247 1248 /* 1249 * Take down all the tx queues first, as they reference the rx queues 1250 * (for egress updates, etc.). 1251 */ 1252 1253 for_each_txq(vi, i, txq) { 1254 free_txq(vi, txq); 1255 } 1256 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1257 for_each_ofld_txq(vi, i, ofld_txq) { 1258 free_ofld_txq(vi, ofld_txq); 1259 } 1260 #endif 1261 1262 /* 1263 * Then take down the rx queues. 1264 */ 1265 1266 for_each_rxq(vi, i, rxq) { 1267 free_rxq(vi, rxq); 1268 } 1269 #ifdef TCP_OFFLOAD 1270 for_each_ofld_rxq(vi, i, ofld_rxq) { 1271 free_ofld_rxq(vi, ofld_rxq); 1272 } 1273 #endif 1274 1275 return (0); 1276 } 1277 1278 /* 1279 * Interrupt handler when the driver is using only 1 interrupt. This is a very 1280 * unusual scenario. 1281 * 1282 * a) Deals with errors, if any. 1283 * b) Services firmware event queue, which is taking interrupts for all other 1284 * queues. 1285 */ 1286 void 1287 t4_intr_all(void *arg) 1288 { 1289 struct adapter *sc = arg; 1290 struct sge_iq *fwq = &sc->sge.fwq; 1291 1292 MPASS(sc->intr_count == 1); 1293 1294 if (sc->intr_type == INTR_INTX) 1295 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); 1296 1297 t4_intr_err(arg); 1298 t4_intr_evt(fwq); 1299 } 1300 1301 /* 1302 * Interrupt handler for errors (installed directly when multiple interrupts are 1303 * being used, or called by t4_intr_all). 1304 */ 1305 void 1306 t4_intr_err(void *arg) 1307 { 1308 struct adapter *sc = arg; 1309 uint32_t v; 1310 const bool verbose = (sc->debug_flags & DF_VERBOSE_SLOWINTR) != 0; 1311 1312 if (atomic_load_int(&sc->error_flags) & ADAP_FATAL_ERR) 1313 return; 1314 1315 v = t4_read_reg(sc, MYPF_REG(A_PL_PF_INT_CAUSE)); 1316 if (v & F_PFSW) { 1317 sc->swintr++; 1318 t4_write_reg(sc, MYPF_REG(A_PL_PF_INT_CAUSE), v); 1319 } 1320 1321 if (t4_slow_intr_handler(sc, verbose)) 1322 t4_fatal_err(sc, false); 1323 } 1324 1325 /* 1326 * Interrupt handler for iq-only queues. The firmware event queue is the only 1327 * such queue right now. 1328 */ 1329 void 1330 t4_intr_evt(void *arg) 1331 { 1332 struct sge_iq *iq = arg; 1333 1334 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1335 service_iq(iq, 0); 1336 (void) atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1337 } 1338 } 1339 1340 /* 1341 * Interrupt handler for iq+fl queues. 1342 */ 1343 void 1344 t4_intr(void *arg) 1345 { 1346 struct sge_iq *iq = arg; 1347 1348 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1349 service_iq_fl(iq, 0); 1350 (void) atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1351 } 1352 } 1353 1354 #ifdef DEV_NETMAP 1355 /* 1356 * Interrupt handler for netmap rx queues. 1357 */ 1358 void 1359 t4_nm_intr(void *arg) 1360 { 1361 struct sge_nm_rxq *nm_rxq = arg; 1362 1363 if (atomic_cmpset_int(&nm_rxq->nm_state, NM_ON, NM_BUSY)) { 1364 service_nm_rxq(nm_rxq); 1365 (void) atomic_cmpset_int(&nm_rxq->nm_state, NM_BUSY, NM_ON); 1366 } 1367 } 1368 1369 /* 1370 * Interrupt handler for vectors shared between NIC and netmap rx queues. 1371 */ 1372 void 1373 t4_vi_intr(void *arg) 1374 { 1375 struct irq *irq = arg; 1376 1377 MPASS(irq->nm_rxq != NULL); 1378 t4_nm_intr(irq->nm_rxq); 1379 1380 MPASS(irq->rxq != NULL); 1381 t4_intr(irq->rxq); 1382 } 1383 #endif 1384 1385 /* 1386 * Deals with interrupts on an iq-only (no freelist) queue. 1387 */ 1388 static int 1389 service_iq(struct sge_iq *iq, int budget) 1390 { 1391 struct sge_iq *q; 1392 struct adapter *sc = iq->adapter; 1393 struct iq_desc *d = &iq->desc[iq->cidx]; 1394 int ndescs = 0, limit; 1395 int rsp_type; 1396 uint32_t lq; 1397 STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql); 1398 1399 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); 1400 KASSERT((iq->flags & IQ_HAS_FL) == 0, 1401 ("%s: called for iq %p with fl (iq->flags 0x%x)", __func__, iq, 1402 iq->flags)); 1403 MPASS((iq->flags & IQ_ADJ_CREDIT) == 0); 1404 MPASS((iq->flags & IQ_LRO_ENABLED) == 0); 1405 1406 limit = budget ? budget : iq->qsize / 16; 1407 1408 /* 1409 * We always come back and check the descriptor ring for new indirect 1410 * interrupts and other responses after running a single handler. 1411 */ 1412 for (;;) { 1413 while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { 1414 1415 rmb(); 1416 1417 rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); 1418 lq = be32toh(d->rsp.pldbuflen_qid); 1419 1420 switch (rsp_type) { 1421 case X_RSPD_TYPE_FLBUF: 1422 panic("%s: data for an iq (%p) with no freelist", 1423 __func__, iq); 1424 1425 /* NOTREACHED */ 1426 1427 case X_RSPD_TYPE_CPL: 1428 KASSERT(d->rss.opcode < NUM_CPL_CMDS, 1429 ("%s: bad opcode %02x.", __func__, 1430 d->rss.opcode)); 1431 t4_cpl_handler[d->rss.opcode](iq, &d->rss, NULL); 1432 break; 1433 1434 case X_RSPD_TYPE_INTR: 1435 /* 1436 * There are 1K interrupt-capable queues (qids 0 1437 * through 1023). A response type indicating a 1438 * forwarded interrupt with a qid >= 1K is an 1439 * iWARP async notification. 1440 */ 1441 if (__predict_true(lq >= 1024)) { 1442 t4_an_handler(iq, &d->rsp); 1443 break; 1444 } 1445 1446 q = sc->sge.iqmap[lq - sc->sge.iq_start - 1447 sc->sge.iq_base]; 1448 if (atomic_cmpset_int(&q->state, IQS_IDLE, 1449 IQS_BUSY)) { 1450 if (service_iq_fl(q, q->qsize / 16) == 0) { 1451 (void) atomic_cmpset_int(&q->state, 1452 IQS_BUSY, IQS_IDLE); 1453 } else { 1454 STAILQ_INSERT_TAIL(&iql, q, 1455 link); 1456 } 1457 } 1458 break; 1459 1460 default: 1461 KASSERT(0, 1462 ("%s: illegal response type %d on iq %p", 1463 __func__, rsp_type, iq)); 1464 log(LOG_ERR, 1465 "%s: illegal response type %d on iq %p", 1466 device_get_nameunit(sc->dev), rsp_type, iq); 1467 break; 1468 } 1469 1470 d++; 1471 if (__predict_false(++iq->cidx == iq->sidx)) { 1472 iq->cidx = 0; 1473 iq->gen ^= F_RSPD_GEN; 1474 d = &iq->desc[0]; 1475 } 1476 if (__predict_false(++ndescs == limit)) { 1477 t4_write_reg(sc, sc->sge_gts_reg, 1478 V_CIDXINC(ndescs) | 1479 V_INGRESSQID(iq->cntxt_id) | 1480 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 1481 ndescs = 0; 1482 1483 if (budget) { 1484 return (EINPROGRESS); 1485 } 1486 } 1487 } 1488 1489 if (STAILQ_EMPTY(&iql)) 1490 break; 1491 1492 /* 1493 * Process the head only, and send it to the back of the list if 1494 * it's still not done. 1495 */ 1496 q = STAILQ_FIRST(&iql); 1497 STAILQ_REMOVE_HEAD(&iql, link); 1498 if (service_iq_fl(q, q->qsize / 8) == 0) 1499 (void) atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE); 1500 else 1501 STAILQ_INSERT_TAIL(&iql, q, link); 1502 } 1503 1504 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1505 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 1506 1507 return (0); 1508 } 1509 1510 #if defined(INET) || defined(INET6) 1511 static inline int 1512 sort_before_lro(struct lro_ctrl *lro) 1513 { 1514 1515 return (lro->lro_mbuf_max != 0); 1516 } 1517 #endif 1518 1519 #define CGBE_SHIFT_SCALE 10 1520 1521 static inline uint64_t 1522 t4_tstmp_to_ns(struct adapter *sc, uint64_t lf) 1523 { 1524 struct clock_sync *cur, dcur; 1525 uint64_t hw_clocks; 1526 uint64_t hw_clk_div; 1527 sbintime_t sbt_cur_to_prev, sbt; 1528 uint64_t hw_tstmp = lf & 0xfffffffffffffffULL; /* 60b, not 64b. */ 1529 seqc_t gen; 1530 1531 for (;;) { 1532 cur = &sc->cal_info[sc->cal_current]; 1533 gen = seqc_read(&cur->gen); 1534 if (gen == 0) 1535 return (0); 1536 dcur = *cur; 1537 if (seqc_consistent(&cur->gen, gen)) 1538 break; 1539 } 1540 1541 /* 1542 * Our goal here is to have a result that is: 1543 * 1544 * ( (cur_time - prev_time) ) 1545 * ((hw_tstmp - hw_prev) * ----------------------------- ) + prev_time 1546 * ( (hw_cur - hw_prev) ) 1547 * 1548 * With the constraints that we cannot use float and we 1549 * don't want to overflow the uint64_t numbers we are using. 1550 */ 1551 hw_clocks = hw_tstmp - dcur.hw_prev; 1552 sbt_cur_to_prev = (dcur.sbt_cur - dcur.sbt_prev); 1553 hw_clk_div = dcur.hw_cur - dcur.hw_prev; 1554 sbt = hw_clocks * sbt_cur_to_prev / hw_clk_div + dcur.sbt_prev; 1555 return (sbttons(sbt)); 1556 } 1557 1558 static inline void 1559 move_to_next_rxbuf(struct sge_fl *fl) 1560 { 1561 1562 fl->rx_offset = 0; 1563 if (__predict_false((++fl->cidx & 7) == 0)) { 1564 uint16_t cidx = fl->cidx >> 3; 1565 1566 if (__predict_false(cidx == fl->sidx)) 1567 fl->cidx = cidx = 0; 1568 fl->hw_cidx = cidx; 1569 } 1570 } 1571 1572 /* 1573 * Deals with interrupts on an iq+fl queue. 1574 */ 1575 static int 1576 service_iq_fl(struct sge_iq *iq, int budget) 1577 { 1578 struct sge_rxq *rxq = iq_to_rxq(iq); 1579 struct sge_fl *fl; 1580 struct adapter *sc = iq->adapter; 1581 struct iq_desc *d = &iq->desc[iq->cidx]; 1582 int ndescs, limit; 1583 int rsp_type, starved; 1584 uint32_t lq; 1585 uint16_t fl_hw_cidx; 1586 struct mbuf *m0; 1587 #if defined(INET) || defined(INET6) 1588 const struct timeval lro_timeout = {0, sc->lro_timeout}; 1589 struct lro_ctrl *lro = &rxq->lro; 1590 #endif 1591 1592 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); 1593 MPASS(iq->flags & IQ_HAS_FL); 1594 1595 ndescs = 0; 1596 #if defined(INET) || defined(INET6) 1597 if (iq->flags & IQ_ADJ_CREDIT) { 1598 MPASS(sort_before_lro(lro)); 1599 iq->flags &= ~IQ_ADJ_CREDIT; 1600 if ((d->rsp.u.type_gen & F_RSPD_GEN) != iq->gen) { 1601 tcp_lro_flush_all(lro); 1602 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(1) | 1603 V_INGRESSQID((u32)iq->cntxt_id) | 1604 V_SEINTARM(iq->intr_params)); 1605 return (0); 1606 } 1607 ndescs = 1; 1608 } 1609 #else 1610 MPASS((iq->flags & IQ_ADJ_CREDIT) == 0); 1611 #endif 1612 1613 limit = budget ? budget : iq->qsize / 16; 1614 fl = &rxq->fl; 1615 fl_hw_cidx = fl->hw_cidx; /* stable snapshot */ 1616 while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { 1617 1618 rmb(); 1619 1620 m0 = NULL; 1621 rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); 1622 lq = be32toh(d->rsp.pldbuflen_qid); 1623 1624 switch (rsp_type) { 1625 case X_RSPD_TYPE_FLBUF: 1626 if (lq & F_RSPD_NEWBUF) { 1627 if (fl->rx_offset > 0) 1628 move_to_next_rxbuf(fl); 1629 lq = G_RSPD_LEN(lq); 1630 } 1631 if (IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 4) { 1632 FL_LOCK(fl); 1633 refill_fl(sc, fl, 64); 1634 FL_UNLOCK(fl); 1635 fl_hw_cidx = fl->hw_cidx; 1636 } 1637 1638 if (d->rss.opcode == CPL_RX_PKT) { 1639 if (__predict_true(eth_rx(sc, rxq, d, lq) == 0)) 1640 break; 1641 goto out; 1642 } 1643 m0 = get_fl_payload(sc, fl, lq); 1644 if (__predict_false(m0 == NULL)) 1645 goto out; 1646 1647 /* fall through */ 1648 1649 case X_RSPD_TYPE_CPL: 1650 KASSERT(d->rss.opcode < NUM_CPL_CMDS, 1651 ("%s: bad opcode %02x.", __func__, d->rss.opcode)); 1652 t4_cpl_handler[d->rss.opcode](iq, &d->rss, m0); 1653 break; 1654 1655 case X_RSPD_TYPE_INTR: 1656 1657 /* 1658 * There are 1K interrupt-capable queues (qids 0 1659 * through 1023). A response type indicating a 1660 * forwarded interrupt with a qid >= 1K is an 1661 * iWARP async notification. That is the only 1662 * acceptable indirect interrupt on this queue. 1663 */ 1664 if (__predict_false(lq < 1024)) { 1665 panic("%s: indirect interrupt on iq_fl %p " 1666 "with qid %u", __func__, iq, lq); 1667 } 1668 1669 t4_an_handler(iq, &d->rsp); 1670 break; 1671 1672 default: 1673 KASSERT(0, ("%s: illegal response type %d on iq %p", 1674 __func__, rsp_type, iq)); 1675 log(LOG_ERR, "%s: illegal response type %d on iq %p", 1676 device_get_nameunit(sc->dev), rsp_type, iq); 1677 break; 1678 } 1679 1680 d++; 1681 if (__predict_false(++iq->cidx == iq->sidx)) { 1682 iq->cidx = 0; 1683 iq->gen ^= F_RSPD_GEN; 1684 d = &iq->desc[0]; 1685 } 1686 if (__predict_false(++ndescs == limit)) { 1687 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1688 V_INGRESSQID(iq->cntxt_id) | 1689 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 1690 1691 #if defined(INET) || defined(INET6) 1692 if (iq->flags & IQ_LRO_ENABLED && 1693 !sort_before_lro(lro) && 1694 sc->lro_timeout != 0) { 1695 tcp_lro_flush_inactive(lro, &lro_timeout); 1696 } 1697 #endif 1698 if (budget) 1699 return (EINPROGRESS); 1700 ndescs = 0; 1701 } 1702 } 1703 out: 1704 #if defined(INET) || defined(INET6) 1705 if (iq->flags & IQ_LRO_ENABLED) { 1706 if (ndescs > 0 && lro->lro_mbuf_count > 8) { 1707 MPASS(sort_before_lro(lro)); 1708 /* hold back one credit and don't flush LRO state */ 1709 iq->flags |= IQ_ADJ_CREDIT; 1710 ndescs--; 1711 } else { 1712 tcp_lro_flush_all(lro); 1713 } 1714 } 1715 #endif 1716 1717 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1718 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 1719 1720 FL_LOCK(fl); 1721 starved = refill_fl(sc, fl, 64); 1722 FL_UNLOCK(fl); 1723 if (__predict_false(starved != 0)) 1724 add_fl_to_sfl(sc, fl); 1725 1726 return (0); 1727 } 1728 1729 static inline struct cluster_metadata * 1730 cl_metadata(struct fl_sdesc *sd) 1731 { 1732 1733 return ((void *)(sd->cl + sd->moff)); 1734 } 1735 1736 static void 1737 rxb_free(struct mbuf *m) 1738 { 1739 struct cluster_metadata *clm = m->m_ext.ext_arg1; 1740 1741 uma_zfree(clm->zone, clm->cl); 1742 counter_u64_add(extfree_rels, 1); 1743 } 1744 1745 /* 1746 * The mbuf returned comes from zone_muf and carries the payload in one of these 1747 * ways 1748 * a) complete frame inside the mbuf 1749 * b) m_cljset (for clusters without metadata) 1750 * d) m_extaddref (cluster with metadata) 1751 */ 1752 static struct mbuf * 1753 get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset, 1754 int remaining) 1755 { 1756 struct mbuf *m; 1757 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1758 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx]; 1759 struct cluster_metadata *clm; 1760 int len, blen; 1761 caddr_t payload; 1762 1763 if (fl->flags & FL_BUF_PACKING) { 1764 u_int l, pad; 1765 1766 blen = rxb->size2 - fl->rx_offset; /* max possible in this buf */ 1767 len = min(remaining, blen); 1768 payload = sd->cl + fl->rx_offset; 1769 1770 l = fr_offset + len; 1771 pad = roundup2(l, fl->buf_boundary) - l; 1772 if (fl->rx_offset + len + pad < rxb->size2) 1773 blen = len + pad; 1774 MPASS(fl->rx_offset + blen <= rxb->size2); 1775 } else { 1776 MPASS(fl->rx_offset == 0); /* not packing */ 1777 blen = rxb->size1; 1778 len = min(remaining, blen); 1779 payload = sd->cl; 1780 } 1781 1782 if (fr_offset == 0) { 1783 m = m_gethdr(M_NOWAIT, MT_DATA); 1784 if (__predict_false(m == NULL)) 1785 return (NULL); 1786 m->m_pkthdr.len = remaining; 1787 } else { 1788 m = m_get(M_NOWAIT, MT_DATA); 1789 if (__predict_false(m == NULL)) 1790 return (NULL); 1791 } 1792 m->m_len = len; 1793 kmsan_mark(payload, len, KMSAN_STATE_INITED); 1794 1795 if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) { 1796 /* copy data to mbuf */ 1797 bcopy(payload, mtod(m, caddr_t), len); 1798 if (fl->flags & FL_BUF_PACKING) { 1799 fl->rx_offset += blen; 1800 MPASS(fl->rx_offset <= rxb->size2); 1801 if (fl->rx_offset < rxb->size2) 1802 return (m); /* without advancing the cidx */ 1803 } 1804 } else if (fl->flags & FL_BUF_PACKING) { 1805 clm = cl_metadata(sd); 1806 if (sd->nmbuf++ == 0) { 1807 clm->refcount = 1; 1808 clm->zone = rxb->zone; 1809 clm->cl = sd->cl; 1810 counter_u64_add(extfree_refs, 1); 1811 } 1812 m_extaddref(m, payload, blen, &clm->refcount, rxb_free, clm, 1813 NULL); 1814 1815 fl->rx_offset += blen; 1816 MPASS(fl->rx_offset <= rxb->size2); 1817 if (fl->rx_offset < rxb->size2) 1818 return (m); /* without advancing the cidx */ 1819 } else { 1820 m_cljset(m, sd->cl, rxb->type); 1821 sd->cl = NULL; /* consumed, not a recycle candidate */ 1822 } 1823 1824 move_to_next_rxbuf(fl); 1825 1826 return (m); 1827 } 1828 1829 static struct mbuf * 1830 get_fl_payload(struct adapter *sc, struct sge_fl *fl, const u_int plen) 1831 { 1832 struct mbuf *m0, *m, **pnext; 1833 u_int remaining; 1834 1835 if (__predict_false(fl->flags & FL_BUF_RESUME)) { 1836 M_ASSERTPKTHDR(fl->m0); 1837 MPASS(fl->m0->m_pkthdr.len == plen); 1838 MPASS(fl->remaining < plen); 1839 1840 m0 = fl->m0; 1841 pnext = fl->pnext; 1842 remaining = fl->remaining; 1843 fl->flags &= ~FL_BUF_RESUME; 1844 goto get_segment; 1845 } 1846 1847 /* 1848 * Payload starts at rx_offset in the current hw buffer. Its length is 1849 * 'len' and it may span multiple hw buffers. 1850 */ 1851 1852 m0 = get_scatter_segment(sc, fl, 0, plen); 1853 if (m0 == NULL) 1854 return (NULL); 1855 remaining = plen - m0->m_len; 1856 pnext = &m0->m_next; 1857 while (remaining > 0) { 1858 get_segment: 1859 MPASS(fl->rx_offset == 0); 1860 m = get_scatter_segment(sc, fl, plen - remaining, remaining); 1861 if (__predict_false(m == NULL)) { 1862 fl->m0 = m0; 1863 fl->pnext = pnext; 1864 fl->remaining = remaining; 1865 fl->flags |= FL_BUF_RESUME; 1866 return (NULL); 1867 } 1868 *pnext = m; 1869 pnext = &m->m_next; 1870 remaining -= m->m_len; 1871 } 1872 *pnext = NULL; 1873 1874 M_ASSERTPKTHDR(m0); 1875 return (m0); 1876 } 1877 1878 static int 1879 skip_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset, 1880 int remaining) 1881 { 1882 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1883 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx]; 1884 int len, blen; 1885 1886 if (fl->flags & FL_BUF_PACKING) { 1887 u_int l, pad; 1888 1889 blen = rxb->size2 - fl->rx_offset; /* max possible in this buf */ 1890 len = min(remaining, blen); 1891 1892 l = fr_offset + len; 1893 pad = roundup2(l, fl->buf_boundary) - l; 1894 if (fl->rx_offset + len + pad < rxb->size2) 1895 blen = len + pad; 1896 fl->rx_offset += blen; 1897 MPASS(fl->rx_offset <= rxb->size2); 1898 if (fl->rx_offset < rxb->size2) 1899 return (len); /* without advancing the cidx */ 1900 } else { 1901 MPASS(fl->rx_offset == 0); /* not packing */ 1902 blen = rxb->size1; 1903 len = min(remaining, blen); 1904 } 1905 move_to_next_rxbuf(fl); 1906 return (len); 1907 } 1908 1909 static inline void 1910 skip_fl_payload(struct adapter *sc, struct sge_fl *fl, int plen) 1911 { 1912 int remaining, fr_offset, len; 1913 1914 fr_offset = 0; 1915 remaining = plen; 1916 while (remaining > 0) { 1917 len = skip_scatter_segment(sc, fl, fr_offset, remaining); 1918 fr_offset += len; 1919 remaining -= len; 1920 } 1921 } 1922 1923 static inline int 1924 get_segment_len(struct adapter *sc, struct sge_fl *fl, int plen) 1925 { 1926 int len; 1927 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1928 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[sd->zidx]; 1929 1930 if (fl->flags & FL_BUF_PACKING) 1931 len = rxb->size2 - fl->rx_offset; 1932 else 1933 len = rxb->size1; 1934 1935 return (min(plen, len)); 1936 } 1937 1938 static int 1939 eth_rx(struct adapter *sc, struct sge_rxq *rxq, const struct iq_desc *d, 1940 u_int plen) 1941 { 1942 struct mbuf *m0; 1943 struct ifnet *ifp = rxq->ifp; 1944 struct sge_fl *fl = &rxq->fl; 1945 struct vi_info *vi = ifp->if_softc; 1946 const struct cpl_rx_pkt *cpl; 1947 #if defined(INET) || defined(INET6) 1948 struct lro_ctrl *lro = &rxq->lro; 1949 #endif 1950 uint16_t err_vec, tnl_type, tnlhdr_len; 1951 static const int sw_hashtype[4][2] = { 1952 {M_HASHTYPE_NONE, M_HASHTYPE_NONE}, 1953 {M_HASHTYPE_RSS_IPV4, M_HASHTYPE_RSS_IPV6}, 1954 {M_HASHTYPE_RSS_TCP_IPV4, M_HASHTYPE_RSS_TCP_IPV6}, 1955 {M_HASHTYPE_RSS_UDP_IPV4, M_HASHTYPE_RSS_UDP_IPV6}, 1956 }; 1957 static const int sw_csum_flags[2][2] = { 1958 { 1959 /* IP, inner IP */ 1960 CSUM_ENCAP_VXLAN | 1961 CSUM_L3_CALC | CSUM_L3_VALID | 1962 CSUM_L4_CALC | CSUM_L4_VALID | 1963 CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID | 1964 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1965 1966 /* IP, inner IP6 */ 1967 CSUM_ENCAP_VXLAN | 1968 CSUM_L3_CALC | CSUM_L3_VALID | 1969 CSUM_L4_CALC | CSUM_L4_VALID | 1970 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1971 }, 1972 { 1973 /* IP6, inner IP */ 1974 CSUM_ENCAP_VXLAN | 1975 CSUM_L4_CALC | CSUM_L4_VALID | 1976 CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID | 1977 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1978 1979 /* IP6, inner IP6 */ 1980 CSUM_ENCAP_VXLAN | 1981 CSUM_L4_CALC | CSUM_L4_VALID | 1982 CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID, 1983 }, 1984 }; 1985 1986 MPASS(plen > sc->params.sge.fl_pktshift); 1987 if (vi->pfil != NULL && PFIL_HOOKED_IN(vi->pfil) && 1988 __predict_true((fl->flags & FL_BUF_RESUME) == 0)) { 1989 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1990 caddr_t frame; 1991 int rc, slen; 1992 1993 slen = get_segment_len(sc, fl, plen) - 1994 sc->params.sge.fl_pktshift; 1995 frame = sd->cl + fl->rx_offset + sc->params.sge.fl_pktshift; 1996 CURVNET_SET_QUIET(ifp->if_vnet); 1997 rc = pfil_mem_in(vi->pfil, frame, slen, ifp, &m0); 1998 CURVNET_RESTORE(); 1999 if (rc == PFIL_DROPPED || rc == PFIL_CONSUMED) { 2000 skip_fl_payload(sc, fl, plen); 2001 return (0); 2002 } 2003 if (rc == PFIL_REALLOCED) { 2004 skip_fl_payload(sc, fl, plen); 2005 goto have_mbuf; 2006 } 2007 } 2008 2009 m0 = get_fl_payload(sc, fl, plen); 2010 if (__predict_false(m0 == NULL)) 2011 return (ENOMEM); 2012 2013 m0->m_pkthdr.len -= sc->params.sge.fl_pktshift; 2014 m0->m_len -= sc->params.sge.fl_pktshift; 2015 m0->m_data += sc->params.sge.fl_pktshift; 2016 2017 have_mbuf: 2018 m0->m_pkthdr.rcvif = ifp; 2019 M_HASHTYPE_SET(m0, sw_hashtype[d->rss.hash_type][d->rss.ipv6]); 2020 m0->m_pkthdr.flowid = be32toh(d->rss.hash_val); 2021 2022 cpl = (const void *)(&d->rss + 1); 2023 if (sc->params.tp.rx_pkt_encap) { 2024 const uint16_t ev = be16toh(cpl->err_vec); 2025 2026 err_vec = G_T6_COMPR_RXERR_VEC(ev); 2027 tnl_type = G_T6_RX_TNL_TYPE(ev); 2028 tnlhdr_len = G_T6_RX_TNLHDR_LEN(ev); 2029 } else { 2030 err_vec = be16toh(cpl->err_vec); 2031 tnl_type = 0; 2032 tnlhdr_len = 0; 2033 } 2034 if (cpl->csum_calc && err_vec == 0) { 2035 int ipv6 = !!(cpl->l2info & htobe32(F_RXF_IP6)); 2036 2037 /* checksum(s) calculated and found to be correct. */ 2038 2039 MPASS((cpl->l2info & htobe32(F_RXF_IP)) ^ 2040 (cpl->l2info & htobe32(F_RXF_IP6))); 2041 m0->m_pkthdr.csum_data = be16toh(cpl->csum); 2042 if (tnl_type == 0) { 2043 if (!ipv6 && ifp->if_capenable & IFCAP_RXCSUM) { 2044 m0->m_pkthdr.csum_flags = CSUM_L3_CALC | 2045 CSUM_L3_VALID | CSUM_L4_CALC | 2046 CSUM_L4_VALID; 2047 } else if (ipv6 && ifp->if_capenable & IFCAP_RXCSUM_IPV6) { 2048 m0->m_pkthdr.csum_flags = CSUM_L4_CALC | 2049 CSUM_L4_VALID; 2050 } 2051 rxq->rxcsum++; 2052 } else { 2053 MPASS(tnl_type == RX_PKT_TNL_TYPE_VXLAN); 2054 2055 M_HASHTYPE_SETINNER(m0); 2056 if (__predict_false(cpl->ip_frag)) { 2057 /* 2058 * csum_data is for the inner frame (which is an 2059 * IP fragment) and is not 0xffff. There is no 2060 * way to pass the inner csum_data to the stack. 2061 * We don't want the stack to use the inner 2062 * csum_data to validate the outer frame or it 2063 * will get rejected. So we fix csum_data here 2064 * and let sw do the checksum of inner IP 2065 * fragments. 2066 * 2067 * XXX: Need 32b for csum_data2 in an rx mbuf. 2068 * Maybe stuff it into rcv_tstmp? 2069 */ 2070 m0->m_pkthdr.csum_data = 0xffff; 2071 if (ipv6) { 2072 m0->m_pkthdr.csum_flags = CSUM_L4_CALC | 2073 CSUM_L4_VALID; 2074 } else { 2075 m0->m_pkthdr.csum_flags = CSUM_L3_CALC | 2076 CSUM_L3_VALID | CSUM_L4_CALC | 2077 CSUM_L4_VALID; 2078 } 2079 } else { 2080 int outer_ipv6; 2081 2082 MPASS(m0->m_pkthdr.csum_data == 0xffff); 2083 2084 outer_ipv6 = tnlhdr_len >= 2085 sizeof(struct ether_header) + 2086 sizeof(struct ip6_hdr); 2087 m0->m_pkthdr.csum_flags = 2088 sw_csum_flags[outer_ipv6][ipv6]; 2089 } 2090 rxq->vxlan_rxcsum++; 2091 } 2092 } 2093 2094 if (cpl->vlan_ex) { 2095 m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); 2096 m0->m_flags |= M_VLANTAG; 2097 rxq->vlan_extraction++; 2098 } 2099 2100 if (rxq->iq.flags & IQ_RX_TIMESTAMP) { 2101 /* 2102 * Fill up rcv_tstmp but do not set M_TSTMP as 2103 * long as we get a non-zero back from t4_tstmp_to_ns(). 2104 */ 2105 m0->m_pkthdr.rcv_tstmp = t4_tstmp_to_ns(sc, 2106 be64toh(d->rsp.u.last_flit)); 2107 if (m0->m_pkthdr.rcv_tstmp != 0) 2108 m0->m_flags |= M_TSTMP; 2109 } 2110 2111 #ifdef NUMA 2112 m0->m_pkthdr.numa_domain = ifp->if_numa_domain; 2113 #endif 2114 #if defined(INET) || defined(INET6) 2115 if (rxq->iq.flags & IQ_LRO_ENABLED && tnl_type == 0 && 2116 (M_HASHTYPE_GET(m0) == M_HASHTYPE_RSS_TCP_IPV4 || 2117 M_HASHTYPE_GET(m0) == M_HASHTYPE_RSS_TCP_IPV6)) { 2118 if (sort_before_lro(lro)) { 2119 tcp_lro_queue_mbuf(lro, m0); 2120 return (0); /* queued for sort, then LRO */ 2121 } 2122 if (tcp_lro_rx(lro, m0, 0) == 0) 2123 return (0); /* queued for LRO */ 2124 } 2125 #endif 2126 ifp->if_input(ifp, m0); 2127 2128 return (0); 2129 } 2130 2131 /* 2132 * Must drain the wrq or make sure that someone else will. 2133 */ 2134 static void 2135 wrq_tx_drain(void *arg, int n) 2136 { 2137 struct sge_wrq *wrq = arg; 2138 struct sge_eq *eq = &wrq->eq; 2139 2140 EQ_LOCK(eq); 2141 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 2142 drain_wrq_wr_list(wrq->adapter, wrq); 2143 EQ_UNLOCK(eq); 2144 } 2145 2146 static void 2147 drain_wrq_wr_list(struct adapter *sc, struct sge_wrq *wrq) 2148 { 2149 struct sge_eq *eq = &wrq->eq; 2150 u_int available, dbdiff; /* # of hardware descriptors */ 2151 u_int n; 2152 struct wrqe *wr; 2153 struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ 2154 2155 EQ_LOCK_ASSERT_OWNED(eq); 2156 MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs)); 2157 wr = STAILQ_FIRST(&wrq->wr_list); 2158 MPASS(wr != NULL); /* Must be called with something useful to do */ 2159 MPASS(eq->pidx == eq->dbidx); 2160 dbdiff = 0; 2161 2162 do { 2163 eq->cidx = read_hw_cidx(eq); 2164 if (eq->pidx == eq->cidx) 2165 available = eq->sidx - 1; 2166 else 2167 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2168 2169 MPASS(wr->wrq == wrq); 2170 n = howmany(wr->wr_len, EQ_ESIZE); 2171 if (available < n) 2172 break; 2173 2174 dst = (void *)&eq->desc[eq->pidx]; 2175 if (__predict_true(eq->sidx - eq->pidx > n)) { 2176 /* Won't wrap, won't end exactly at the status page. */ 2177 bcopy(&wr->wr[0], dst, wr->wr_len); 2178 eq->pidx += n; 2179 } else { 2180 int first_portion = (eq->sidx - eq->pidx) * EQ_ESIZE; 2181 2182 bcopy(&wr->wr[0], dst, first_portion); 2183 if (wr->wr_len > first_portion) { 2184 bcopy(&wr->wr[first_portion], &eq->desc[0], 2185 wr->wr_len - first_portion); 2186 } 2187 eq->pidx = n - (eq->sidx - eq->pidx); 2188 } 2189 wrq->tx_wrs_copied++; 2190 2191 if (available < eq->sidx / 4 && 2192 atomic_cmpset_int(&eq->equiq, 0, 1)) { 2193 /* 2194 * XXX: This is not 100% reliable with some 2195 * types of WRs. But this is a very unusual 2196 * situation for an ofld/ctrl queue anyway. 2197 */ 2198 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 2199 F_FW_WR_EQUEQ); 2200 } 2201 2202 dbdiff += n; 2203 if (dbdiff >= 16) { 2204 ring_eq_db(sc, eq, dbdiff); 2205 dbdiff = 0; 2206 } 2207 2208 STAILQ_REMOVE_HEAD(&wrq->wr_list, link); 2209 free_wrqe(wr); 2210 MPASS(wrq->nwr_pending > 0); 2211 wrq->nwr_pending--; 2212 MPASS(wrq->ndesc_needed >= n); 2213 wrq->ndesc_needed -= n; 2214 } while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL); 2215 2216 if (dbdiff) 2217 ring_eq_db(sc, eq, dbdiff); 2218 } 2219 2220 /* 2221 * Doesn't fail. Holds on to work requests it can't send right away. 2222 */ 2223 void 2224 t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr) 2225 { 2226 #ifdef INVARIANTS 2227 struct sge_eq *eq = &wrq->eq; 2228 #endif 2229 2230 EQ_LOCK_ASSERT_OWNED(eq); 2231 MPASS(wr != NULL); 2232 MPASS(wr->wr_len > 0 && wr->wr_len <= SGE_MAX_WR_LEN); 2233 MPASS((wr->wr_len & 0x7) == 0); 2234 2235 STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link); 2236 wrq->nwr_pending++; 2237 wrq->ndesc_needed += howmany(wr->wr_len, EQ_ESIZE); 2238 2239 if (!TAILQ_EMPTY(&wrq->incomplete_wrs)) 2240 return; /* commit_wrq_wr will drain wr_list as well. */ 2241 2242 drain_wrq_wr_list(sc, wrq); 2243 2244 /* Doorbell must have caught up to the pidx. */ 2245 MPASS(eq->pidx == eq->dbidx); 2246 } 2247 2248 void 2249 t4_update_fl_bufsize(struct ifnet *ifp) 2250 { 2251 struct vi_info *vi = ifp->if_softc; 2252 struct adapter *sc = vi->adapter; 2253 struct sge_rxq *rxq; 2254 #ifdef TCP_OFFLOAD 2255 struct sge_ofld_rxq *ofld_rxq; 2256 #endif 2257 struct sge_fl *fl; 2258 int i, maxp; 2259 2260 maxp = max_rx_payload(sc, ifp, false); 2261 for_each_rxq(vi, i, rxq) { 2262 fl = &rxq->fl; 2263 2264 FL_LOCK(fl); 2265 fl->zidx = find_refill_source(sc, maxp, 2266 fl->flags & FL_BUF_PACKING); 2267 FL_UNLOCK(fl); 2268 } 2269 #ifdef TCP_OFFLOAD 2270 maxp = max_rx_payload(sc, ifp, true); 2271 for_each_ofld_rxq(vi, i, ofld_rxq) { 2272 fl = &ofld_rxq->fl; 2273 2274 FL_LOCK(fl); 2275 fl->zidx = find_refill_source(sc, maxp, 2276 fl->flags & FL_BUF_PACKING); 2277 FL_UNLOCK(fl); 2278 } 2279 #endif 2280 } 2281 2282 #ifdef RATELIMIT 2283 static inline int 2284 mbuf_eo_nsegs(struct mbuf *m) 2285 { 2286 2287 M_ASSERTPKTHDR(m); 2288 return (m->m_pkthdr.PH_loc.eight[1]); 2289 } 2290 2291 #if defined(INET) || defined(INET6) 2292 static inline void 2293 set_mbuf_eo_nsegs(struct mbuf *m, uint8_t nsegs) 2294 { 2295 2296 M_ASSERTPKTHDR(m); 2297 m->m_pkthdr.PH_loc.eight[1] = nsegs; 2298 } 2299 #endif 2300 2301 static inline int 2302 mbuf_eo_len16(struct mbuf *m) 2303 { 2304 int n; 2305 2306 M_ASSERTPKTHDR(m); 2307 n = m->m_pkthdr.PH_loc.eight[2]; 2308 MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); 2309 2310 return (n); 2311 } 2312 2313 #if defined(INET) || defined(INET6) 2314 static inline void 2315 set_mbuf_eo_len16(struct mbuf *m, uint8_t len16) 2316 { 2317 2318 M_ASSERTPKTHDR(m); 2319 m->m_pkthdr.PH_loc.eight[2] = len16; 2320 } 2321 #endif 2322 2323 static inline int 2324 mbuf_eo_tsclk_tsoff(struct mbuf *m) 2325 { 2326 2327 M_ASSERTPKTHDR(m); 2328 return (m->m_pkthdr.PH_loc.eight[3]); 2329 } 2330 2331 #if defined(INET) || defined(INET6) 2332 static inline void 2333 set_mbuf_eo_tsclk_tsoff(struct mbuf *m, uint8_t tsclk_tsoff) 2334 { 2335 2336 M_ASSERTPKTHDR(m); 2337 m->m_pkthdr.PH_loc.eight[3] = tsclk_tsoff; 2338 } 2339 #endif 2340 2341 static inline int 2342 needs_eo(struct m_snd_tag *mst) 2343 { 2344 2345 return (mst != NULL && mst->sw->type == IF_SND_TAG_TYPE_RATE_LIMIT); 2346 } 2347 #endif 2348 2349 /* 2350 * Try to allocate an mbuf to contain a raw work request. To make it 2351 * easy to construct the work request, don't allocate a chain but a 2352 * single mbuf. 2353 */ 2354 struct mbuf * 2355 alloc_wr_mbuf(int len, int how) 2356 { 2357 struct mbuf *m; 2358 2359 if (len <= MHLEN) 2360 m = m_gethdr(how, MT_DATA); 2361 else if (len <= MCLBYTES) 2362 m = m_getcl(how, MT_DATA, M_PKTHDR); 2363 else 2364 m = NULL; 2365 if (m == NULL) 2366 return (NULL); 2367 m->m_pkthdr.len = len; 2368 m->m_len = len; 2369 set_mbuf_cflags(m, MC_RAW_WR); 2370 set_mbuf_len16(m, howmany(len, 16)); 2371 return (m); 2372 } 2373 2374 static inline bool 2375 needs_hwcsum(struct mbuf *m) 2376 { 2377 const uint32_t csum_flags = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP | 2378 CSUM_IP_TSO | CSUM_INNER_IP | CSUM_INNER_IP_UDP | 2379 CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | CSUM_IP6_UDP | 2380 CSUM_IP6_TCP | CSUM_IP6_TSO | CSUM_INNER_IP6_UDP | 2381 CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO; 2382 2383 M_ASSERTPKTHDR(m); 2384 2385 return (m->m_pkthdr.csum_flags & csum_flags); 2386 } 2387 2388 static inline bool 2389 needs_tso(struct mbuf *m) 2390 { 2391 const uint32_t csum_flags = CSUM_IP_TSO | CSUM_IP6_TSO | 2392 CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO; 2393 2394 M_ASSERTPKTHDR(m); 2395 2396 return (m->m_pkthdr.csum_flags & csum_flags); 2397 } 2398 2399 static inline bool 2400 needs_vxlan_csum(struct mbuf *m) 2401 { 2402 2403 M_ASSERTPKTHDR(m); 2404 2405 return (m->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN); 2406 } 2407 2408 static inline bool 2409 needs_vxlan_tso(struct mbuf *m) 2410 { 2411 const uint32_t csum_flags = CSUM_ENCAP_VXLAN | CSUM_INNER_IP_TSO | 2412 CSUM_INNER_IP6_TSO; 2413 2414 M_ASSERTPKTHDR(m); 2415 2416 return ((m->m_pkthdr.csum_flags & csum_flags) != 0 && 2417 (m->m_pkthdr.csum_flags & csum_flags) != CSUM_ENCAP_VXLAN); 2418 } 2419 2420 #if defined(INET) || defined(INET6) 2421 static inline bool 2422 needs_inner_tcp_csum(struct mbuf *m) 2423 { 2424 const uint32_t csum_flags = CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO; 2425 2426 M_ASSERTPKTHDR(m); 2427 2428 return (m->m_pkthdr.csum_flags & csum_flags); 2429 } 2430 #endif 2431 2432 static inline bool 2433 needs_l3_csum(struct mbuf *m) 2434 { 2435 const uint32_t csum_flags = CSUM_IP | CSUM_IP_TSO | CSUM_INNER_IP | 2436 CSUM_INNER_IP_TSO; 2437 2438 M_ASSERTPKTHDR(m); 2439 2440 return (m->m_pkthdr.csum_flags & csum_flags); 2441 } 2442 2443 static inline bool 2444 needs_outer_tcp_csum(struct mbuf *m) 2445 { 2446 const uint32_t csum_flags = CSUM_IP_TCP | CSUM_IP_TSO | CSUM_IP6_TCP | 2447 CSUM_IP6_TSO; 2448 2449 M_ASSERTPKTHDR(m); 2450 2451 return (m->m_pkthdr.csum_flags & csum_flags); 2452 } 2453 2454 #ifdef RATELIMIT 2455 static inline bool 2456 needs_outer_l4_csum(struct mbuf *m) 2457 { 2458 const uint32_t csum_flags = CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP_TSO | 2459 CSUM_IP6_UDP | CSUM_IP6_TCP | CSUM_IP6_TSO; 2460 2461 M_ASSERTPKTHDR(m); 2462 2463 return (m->m_pkthdr.csum_flags & csum_flags); 2464 } 2465 2466 static inline bool 2467 needs_outer_udp_csum(struct mbuf *m) 2468 { 2469 const uint32_t csum_flags = CSUM_IP_UDP | CSUM_IP6_UDP; 2470 2471 M_ASSERTPKTHDR(m); 2472 2473 return (m->m_pkthdr.csum_flags & csum_flags); 2474 } 2475 #endif 2476 2477 static inline bool 2478 needs_vlan_insertion(struct mbuf *m) 2479 { 2480 2481 M_ASSERTPKTHDR(m); 2482 2483 return (m->m_flags & M_VLANTAG); 2484 } 2485 2486 #if defined(INET) || defined(INET6) 2487 static void * 2488 m_advance(struct mbuf **pm, int *poffset, int len) 2489 { 2490 struct mbuf *m = *pm; 2491 int offset = *poffset; 2492 uintptr_t p = 0; 2493 2494 MPASS(len > 0); 2495 2496 for (;;) { 2497 if (offset + len < m->m_len) { 2498 offset += len; 2499 p = mtod(m, uintptr_t) + offset; 2500 break; 2501 } 2502 len -= m->m_len - offset; 2503 m = m->m_next; 2504 offset = 0; 2505 MPASS(m != NULL); 2506 } 2507 *poffset = offset; 2508 *pm = m; 2509 return ((void *)p); 2510 } 2511 #endif 2512 2513 static inline int 2514 count_mbuf_ext_pgs(struct mbuf *m, int skip, vm_paddr_t *nextaddr) 2515 { 2516 vm_paddr_t paddr; 2517 int i, len, off, pglen, pgoff, seglen, segoff; 2518 int nsegs = 0; 2519 2520 M_ASSERTEXTPG(m); 2521 off = mtod(m, vm_offset_t); 2522 len = m->m_len; 2523 off += skip; 2524 len -= skip; 2525 2526 if (m->m_epg_hdrlen != 0) { 2527 if (off >= m->m_epg_hdrlen) { 2528 off -= m->m_epg_hdrlen; 2529 } else { 2530 seglen = m->m_epg_hdrlen - off; 2531 segoff = off; 2532 seglen = min(seglen, len); 2533 off = 0; 2534 len -= seglen; 2535 paddr = pmap_kextract( 2536 (vm_offset_t)&m->m_epg_hdr[segoff]); 2537 if (*nextaddr != paddr) 2538 nsegs++; 2539 *nextaddr = paddr + seglen; 2540 } 2541 } 2542 pgoff = m->m_epg_1st_off; 2543 for (i = 0; i < m->m_epg_npgs && len > 0; i++) { 2544 pglen = m_epg_pagelen(m, i, pgoff); 2545 if (off >= pglen) { 2546 off -= pglen; 2547 pgoff = 0; 2548 continue; 2549 } 2550 seglen = pglen - off; 2551 segoff = pgoff + off; 2552 off = 0; 2553 seglen = min(seglen, len); 2554 len -= seglen; 2555 paddr = m->m_epg_pa[i] + segoff; 2556 if (*nextaddr != paddr) 2557 nsegs++; 2558 *nextaddr = paddr + seglen; 2559 pgoff = 0; 2560 }; 2561 if (len != 0) { 2562 seglen = min(len, m->m_epg_trllen - off); 2563 len -= seglen; 2564 paddr = pmap_kextract((vm_offset_t)&m->m_epg_trail[off]); 2565 if (*nextaddr != paddr) 2566 nsegs++; 2567 *nextaddr = paddr + seglen; 2568 } 2569 2570 return (nsegs); 2571 } 2572 2573 2574 /* 2575 * Can deal with empty mbufs in the chain that have m_len = 0, but the chain 2576 * must have at least one mbuf that's not empty. It is possible for this 2577 * routine to return 0 if skip accounts for all the contents of the mbuf chain. 2578 */ 2579 static inline int 2580 count_mbuf_nsegs(struct mbuf *m, int skip, uint8_t *cflags) 2581 { 2582 vm_paddr_t nextaddr, paddr; 2583 vm_offset_t va; 2584 int len, nsegs; 2585 2586 M_ASSERTPKTHDR(m); 2587 MPASS(m->m_pkthdr.len > 0); 2588 MPASS(m->m_pkthdr.len >= skip); 2589 2590 nsegs = 0; 2591 nextaddr = 0; 2592 for (; m; m = m->m_next) { 2593 len = m->m_len; 2594 if (__predict_false(len == 0)) 2595 continue; 2596 if (skip >= len) { 2597 skip -= len; 2598 continue; 2599 } 2600 if ((m->m_flags & M_EXTPG) != 0) { 2601 *cflags |= MC_NOMAP; 2602 nsegs += count_mbuf_ext_pgs(m, skip, &nextaddr); 2603 skip = 0; 2604 continue; 2605 } 2606 va = mtod(m, vm_offset_t) + skip; 2607 len -= skip; 2608 skip = 0; 2609 paddr = pmap_kextract(va); 2610 nsegs += sglist_count((void *)(uintptr_t)va, len); 2611 if (paddr == nextaddr) 2612 nsegs--; 2613 nextaddr = pmap_kextract(va + len - 1) + 1; 2614 } 2615 2616 return (nsegs); 2617 } 2618 2619 /* 2620 * The maximum number of segments that can fit in a WR. 2621 */ 2622 static int 2623 max_nsegs_allowed(struct mbuf *m, bool vm_wr) 2624 { 2625 2626 if (vm_wr) { 2627 if (needs_tso(m)) 2628 return (TX_SGL_SEGS_VM_TSO); 2629 return (TX_SGL_SEGS_VM); 2630 } 2631 2632 if (needs_tso(m)) { 2633 if (needs_vxlan_tso(m)) 2634 return (TX_SGL_SEGS_VXLAN_TSO); 2635 else 2636 return (TX_SGL_SEGS_TSO); 2637 } 2638 2639 return (TX_SGL_SEGS); 2640 } 2641 2642 static struct timeval txerr_ratecheck = {0}; 2643 static const struct timeval txerr_interval = {3, 0}; 2644 2645 /* 2646 * Analyze the mbuf to determine its tx needs. The mbuf passed in may change: 2647 * a) caller can assume it's been freed if this function returns with an error. 2648 * b) it may get defragged up if the gather list is too long for the hardware. 2649 */ 2650 int 2651 parse_pkt(struct mbuf **mp, bool vm_wr) 2652 { 2653 struct mbuf *m0 = *mp, *m; 2654 int rc, nsegs, defragged = 0; 2655 struct ether_header *eh; 2656 #ifdef INET 2657 void *l3hdr; 2658 #endif 2659 #if defined(INET) || defined(INET6) 2660 int offset; 2661 struct tcphdr *tcp; 2662 #endif 2663 #if defined(KERN_TLS) || defined(RATELIMIT) 2664 struct m_snd_tag *mst; 2665 #endif 2666 uint16_t eh_type; 2667 uint8_t cflags; 2668 2669 cflags = 0; 2670 M_ASSERTPKTHDR(m0); 2671 if (__predict_false(m0->m_pkthdr.len < ETHER_HDR_LEN)) { 2672 rc = EINVAL; 2673 fail: 2674 m_freem(m0); 2675 *mp = NULL; 2676 return (rc); 2677 } 2678 restart: 2679 /* 2680 * First count the number of gather list segments in the payload. 2681 * Defrag the mbuf if nsegs exceeds the hardware limit. 2682 */ 2683 M_ASSERTPKTHDR(m0); 2684 MPASS(m0->m_pkthdr.len > 0); 2685 nsegs = count_mbuf_nsegs(m0, 0, &cflags); 2686 #if defined(KERN_TLS) || defined(RATELIMIT) 2687 if (m0->m_pkthdr.csum_flags & CSUM_SND_TAG) 2688 mst = m0->m_pkthdr.snd_tag; 2689 else 2690 mst = NULL; 2691 #endif 2692 #ifdef KERN_TLS 2693 if (mst != NULL && mst->sw->type == IF_SND_TAG_TYPE_TLS) { 2694 cflags |= MC_TLS; 2695 set_mbuf_cflags(m0, cflags); 2696 rc = t6_ktls_parse_pkt(m0); 2697 if (rc != 0) 2698 goto fail; 2699 return (EINPROGRESS); 2700 } 2701 #endif 2702 if (nsegs > max_nsegs_allowed(m0, vm_wr)) { 2703 if (defragged++ > 0) { 2704 rc = EFBIG; 2705 goto fail; 2706 } 2707 counter_u64_add(defrags, 1); 2708 if ((m = m_defrag(m0, M_NOWAIT)) == NULL) { 2709 rc = ENOMEM; 2710 goto fail; 2711 } 2712 *mp = m0 = m; /* update caller's copy after defrag */ 2713 goto restart; 2714 } 2715 2716 if (__predict_false(nsegs > 2 && m0->m_pkthdr.len <= MHLEN && 2717 !(cflags & MC_NOMAP))) { 2718 counter_u64_add(pullups, 1); 2719 m0 = m_pullup(m0, m0->m_pkthdr.len); 2720 if (m0 == NULL) { 2721 /* Should have left well enough alone. */ 2722 rc = EFBIG; 2723 goto fail; 2724 } 2725 *mp = m0; /* update caller's copy after pullup */ 2726 goto restart; 2727 } 2728 set_mbuf_nsegs(m0, nsegs); 2729 set_mbuf_cflags(m0, cflags); 2730 calculate_mbuf_len16(m0, vm_wr); 2731 2732 #ifdef RATELIMIT 2733 /* 2734 * Ethofld is limited to TCP and UDP for now, and only when L4 hw 2735 * checksumming is enabled. needs_outer_l4_csum happens to check for 2736 * all the right things. 2737 */ 2738 if (__predict_false(needs_eo(mst) && !needs_outer_l4_csum(m0))) { 2739 m_snd_tag_rele(m0->m_pkthdr.snd_tag); 2740 m0->m_pkthdr.snd_tag = NULL; 2741 m0->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; 2742 mst = NULL; 2743 } 2744 #endif 2745 2746 if (!needs_hwcsum(m0) 2747 #ifdef RATELIMIT 2748 && !needs_eo(mst) 2749 #endif 2750 ) 2751 return (0); 2752 2753 m = m0; 2754 eh = mtod(m, struct ether_header *); 2755 eh_type = ntohs(eh->ether_type); 2756 if (eh_type == ETHERTYPE_VLAN) { 2757 struct ether_vlan_header *evh = (void *)eh; 2758 2759 eh_type = ntohs(evh->evl_proto); 2760 m0->m_pkthdr.l2hlen = sizeof(*evh); 2761 } else 2762 m0->m_pkthdr.l2hlen = sizeof(*eh); 2763 2764 #if defined(INET) || defined(INET6) 2765 offset = 0; 2766 #ifdef INET 2767 l3hdr = m_advance(&m, &offset, m0->m_pkthdr.l2hlen); 2768 #else 2769 m_advance(&m, &offset, m0->m_pkthdr.l2hlen); 2770 #endif 2771 #endif 2772 2773 switch (eh_type) { 2774 #ifdef INET6 2775 case ETHERTYPE_IPV6: 2776 m0->m_pkthdr.l3hlen = sizeof(struct ip6_hdr); 2777 break; 2778 #endif 2779 #ifdef INET 2780 case ETHERTYPE_IP: 2781 { 2782 struct ip *ip = l3hdr; 2783 2784 if (needs_vxlan_csum(m0)) { 2785 /* Driver will do the outer IP hdr checksum. */ 2786 ip->ip_sum = 0; 2787 if (needs_vxlan_tso(m0)) { 2788 const uint16_t ipl = ip->ip_len; 2789 2790 ip->ip_len = 0; 2791 ip->ip_sum = ~in_cksum_hdr(ip); 2792 ip->ip_len = ipl; 2793 } else 2794 ip->ip_sum = in_cksum_hdr(ip); 2795 } 2796 m0->m_pkthdr.l3hlen = ip->ip_hl << 2; 2797 break; 2798 } 2799 #endif 2800 default: 2801 if (ratecheck(&txerr_ratecheck, &txerr_interval)) { 2802 log(LOG_ERR, "%s: ethertype 0x%04x unknown. " 2803 "if_cxgbe must be compiled with the same " 2804 "INET/INET6 options as the kernel.\n", __func__, 2805 eh_type); 2806 } 2807 rc = EINVAL; 2808 goto fail; 2809 } 2810 2811 #if defined(INET) || defined(INET6) 2812 if (needs_vxlan_csum(m0)) { 2813 m0->m_pkthdr.l4hlen = sizeof(struct udphdr); 2814 m0->m_pkthdr.l5hlen = sizeof(struct vxlan_header); 2815 2816 /* Inner headers. */ 2817 eh = m_advance(&m, &offset, m0->m_pkthdr.l3hlen + 2818 sizeof(struct udphdr) + sizeof(struct vxlan_header)); 2819 eh_type = ntohs(eh->ether_type); 2820 if (eh_type == ETHERTYPE_VLAN) { 2821 struct ether_vlan_header *evh = (void *)eh; 2822 2823 eh_type = ntohs(evh->evl_proto); 2824 m0->m_pkthdr.inner_l2hlen = sizeof(*evh); 2825 } else 2826 m0->m_pkthdr.inner_l2hlen = sizeof(*eh); 2827 #ifdef INET 2828 l3hdr = m_advance(&m, &offset, m0->m_pkthdr.inner_l2hlen); 2829 #else 2830 m_advance(&m, &offset, m0->m_pkthdr.inner_l2hlen); 2831 #endif 2832 2833 switch (eh_type) { 2834 #ifdef INET6 2835 case ETHERTYPE_IPV6: 2836 m0->m_pkthdr.inner_l3hlen = sizeof(struct ip6_hdr); 2837 break; 2838 #endif 2839 #ifdef INET 2840 case ETHERTYPE_IP: 2841 { 2842 struct ip *ip = l3hdr; 2843 2844 m0->m_pkthdr.inner_l3hlen = ip->ip_hl << 2; 2845 break; 2846 } 2847 #endif 2848 default: 2849 if (ratecheck(&txerr_ratecheck, &txerr_interval)) { 2850 log(LOG_ERR, "%s: VXLAN hw offload requested" 2851 "with unknown ethertype 0x%04x. if_cxgbe " 2852 "must be compiled with the same INET/INET6 " 2853 "options as the kernel.\n", __func__, 2854 eh_type); 2855 } 2856 rc = EINVAL; 2857 goto fail; 2858 } 2859 if (needs_inner_tcp_csum(m0)) { 2860 tcp = m_advance(&m, &offset, m0->m_pkthdr.inner_l3hlen); 2861 m0->m_pkthdr.inner_l4hlen = tcp->th_off * 4; 2862 } 2863 MPASS((m0->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 2864 m0->m_pkthdr.csum_flags &= CSUM_INNER_IP6_UDP | 2865 CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO | CSUM_INNER_IP | 2866 CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | 2867 CSUM_ENCAP_VXLAN; 2868 } 2869 2870 if (needs_outer_tcp_csum(m0)) { 2871 tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen); 2872 m0->m_pkthdr.l4hlen = tcp->th_off * 4; 2873 #ifdef RATELIMIT 2874 if (tsclk >= 0 && *(uint32_t *)(tcp + 1) == ntohl(0x0101080a)) { 2875 set_mbuf_eo_tsclk_tsoff(m0, 2876 V_FW_ETH_TX_EO_WR_TSCLK(tsclk) | 2877 V_FW_ETH_TX_EO_WR_TSOFF(sizeof(*tcp) / 2 + 1)); 2878 } else 2879 set_mbuf_eo_tsclk_tsoff(m0, 0); 2880 } else if (needs_outer_udp_csum(m0)) { 2881 m0->m_pkthdr.l4hlen = sizeof(struct udphdr); 2882 #endif 2883 } 2884 #ifdef RATELIMIT 2885 if (needs_eo(mst)) { 2886 u_int immhdrs; 2887 2888 /* EO WRs have the headers in the WR and not the GL. */ 2889 immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + 2890 m0->m_pkthdr.l4hlen; 2891 cflags = 0; 2892 nsegs = count_mbuf_nsegs(m0, immhdrs, &cflags); 2893 MPASS(cflags == mbuf_cflags(m0)); 2894 set_mbuf_eo_nsegs(m0, nsegs); 2895 set_mbuf_eo_len16(m0, 2896 txpkt_eo_len16(nsegs, immhdrs, needs_tso(m0))); 2897 rc = ethofld_transmit(mst->ifp, m0); 2898 if (rc != 0) 2899 goto fail; 2900 return (EINPROGRESS); 2901 } 2902 #endif 2903 #endif 2904 MPASS(m0 == *mp); 2905 return (0); 2906 } 2907 2908 void * 2909 start_wrq_wr(struct sge_wrq *wrq, int len16, struct wrq_cookie *cookie) 2910 { 2911 struct sge_eq *eq = &wrq->eq; 2912 struct adapter *sc = wrq->adapter; 2913 int ndesc, available; 2914 struct wrqe *wr; 2915 void *w; 2916 2917 MPASS(len16 > 0); 2918 ndesc = tx_len16_to_desc(len16); 2919 MPASS(ndesc > 0 && ndesc <= SGE_MAX_WR_NDESC); 2920 2921 EQ_LOCK(eq); 2922 2923 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 2924 drain_wrq_wr_list(sc, wrq); 2925 2926 if (!STAILQ_EMPTY(&wrq->wr_list)) { 2927 slowpath: 2928 EQ_UNLOCK(eq); 2929 wr = alloc_wrqe(len16 * 16, wrq); 2930 if (__predict_false(wr == NULL)) 2931 return (NULL); 2932 cookie->pidx = -1; 2933 cookie->ndesc = ndesc; 2934 return (&wr->wr); 2935 } 2936 2937 eq->cidx = read_hw_cidx(eq); 2938 if (eq->pidx == eq->cidx) 2939 available = eq->sidx - 1; 2940 else 2941 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2942 if (available < ndesc) 2943 goto slowpath; 2944 2945 cookie->pidx = eq->pidx; 2946 cookie->ndesc = ndesc; 2947 TAILQ_INSERT_TAIL(&wrq->incomplete_wrs, cookie, link); 2948 2949 w = &eq->desc[eq->pidx]; 2950 IDXINCR(eq->pidx, ndesc, eq->sidx); 2951 if (__predict_false(cookie->pidx + ndesc > eq->sidx)) { 2952 w = &wrq->ss[0]; 2953 wrq->ss_pidx = cookie->pidx; 2954 wrq->ss_len = len16 * 16; 2955 } 2956 2957 EQ_UNLOCK(eq); 2958 2959 return (w); 2960 } 2961 2962 void 2963 commit_wrq_wr(struct sge_wrq *wrq, void *w, struct wrq_cookie *cookie) 2964 { 2965 struct sge_eq *eq = &wrq->eq; 2966 struct adapter *sc = wrq->adapter; 2967 int ndesc, pidx; 2968 struct wrq_cookie *prev, *next; 2969 2970 if (cookie->pidx == -1) { 2971 struct wrqe *wr = __containerof(w, struct wrqe, wr); 2972 2973 t4_wrq_tx(sc, wr); 2974 return; 2975 } 2976 2977 if (__predict_false(w == &wrq->ss[0])) { 2978 int n = (eq->sidx - wrq->ss_pidx) * EQ_ESIZE; 2979 2980 MPASS(wrq->ss_len > n); /* WR had better wrap around. */ 2981 bcopy(&wrq->ss[0], &eq->desc[wrq->ss_pidx], n); 2982 bcopy(&wrq->ss[n], &eq->desc[0], wrq->ss_len - n); 2983 wrq->tx_wrs_ss++; 2984 } else 2985 wrq->tx_wrs_direct++; 2986 2987 EQ_LOCK(eq); 2988 ndesc = cookie->ndesc; /* Can be more than SGE_MAX_WR_NDESC here. */ 2989 pidx = cookie->pidx; 2990 MPASS(pidx >= 0 && pidx < eq->sidx); 2991 prev = TAILQ_PREV(cookie, wrq_incomplete_wrs, link); 2992 next = TAILQ_NEXT(cookie, link); 2993 if (prev == NULL) { 2994 MPASS(pidx == eq->dbidx); 2995 if (next == NULL || ndesc >= 16) { 2996 int available; 2997 struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ 2998 2999 /* 3000 * Note that the WR via which we'll request tx updates 3001 * is at pidx and not eq->pidx, which has moved on 3002 * already. 3003 */ 3004 dst = (void *)&eq->desc[pidx]; 3005 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 3006 if (available < eq->sidx / 4 && 3007 atomic_cmpset_int(&eq->equiq, 0, 1)) { 3008 /* 3009 * XXX: This is not 100% reliable with some 3010 * types of WRs. But this is a very unusual 3011 * situation for an ofld/ctrl queue anyway. 3012 */ 3013 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 3014 F_FW_WR_EQUEQ); 3015 } 3016 3017 ring_eq_db(wrq->adapter, eq, ndesc); 3018 } else { 3019 MPASS(IDXDIFF(next->pidx, pidx, eq->sidx) == ndesc); 3020 next->pidx = pidx; 3021 next->ndesc += ndesc; 3022 } 3023 } else { 3024 MPASS(IDXDIFF(pidx, prev->pidx, eq->sidx) == prev->ndesc); 3025 prev->ndesc += ndesc; 3026 } 3027 TAILQ_REMOVE(&wrq->incomplete_wrs, cookie, link); 3028 3029 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 3030 drain_wrq_wr_list(sc, wrq); 3031 3032 #ifdef INVARIANTS 3033 if (TAILQ_EMPTY(&wrq->incomplete_wrs)) { 3034 /* Doorbell must have caught up to the pidx. */ 3035 MPASS(wrq->eq.pidx == wrq->eq.dbidx); 3036 } 3037 #endif 3038 EQ_UNLOCK(eq); 3039 } 3040 3041 static u_int 3042 can_resume_eth_tx(struct mp_ring *r) 3043 { 3044 struct sge_eq *eq = r->cookie; 3045 3046 return (total_available_tx_desc(eq) > eq->sidx / 8); 3047 } 3048 3049 static inline bool 3050 cannot_use_txpkts(struct mbuf *m) 3051 { 3052 /* maybe put a GL limit too, to avoid silliness? */ 3053 3054 return (needs_tso(m) || (mbuf_cflags(m) & (MC_RAW_WR | MC_TLS)) != 0); 3055 } 3056 3057 static inline int 3058 discard_tx(struct sge_eq *eq) 3059 { 3060 3061 return ((eq->flags & (EQ_ENABLED | EQ_QFLUSH)) != EQ_ENABLED); 3062 } 3063 3064 static inline int 3065 wr_can_update_eq(void *p) 3066 { 3067 struct fw_eth_tx_pkts_wr *wr = p; 3068 3069 switch (G_FW_WR_OP(be32toh(wr->op_pkd))) { 3070 case FW_ULPTX_WR: 3071 case FW_ETH_TX_PKT_WR: 3072 case FW_ETH_TX_PKTS_WR: 3073 case FW_ETH_TX_PKTS2_WR: 3074 case FW_ETH_TX_PKT_VM_WR: 3075 case FW_ETH_TX_PKTS_VM_WR: 3076 return (1); 3077 default: 3078 return (0); 3079 } 3080 } 3081 3082 static inline void 3083 set_txupdate_flags(struct sge_txq *txq, u_int avail, 3084 struct fw_eth_tx_pkt_wr *wr) 3085 { 3086 struct sge_eq *eq = &txq->eq; 3087 struct txpkts *txp = &txq->txp; 3088 3089 if ((txp->npkt > 0 || avail < eq->sidx / 2) && 3090 atomic_cmpset_int(&eq->equiq, 0, 1)) { 3091 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ | F_FW_WR_EQUIQ); 3092 eq->equeqidx = eq->pidx; 3093 } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { 3094 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); 3095 eq->equeqidx = eq->pidx; 3096 } 3097 } 3098 3099 #if defined(__i386__) || defined(__amd64__) 3100 extern uint64_t tsc_freq; 3101 #endif 3102 3103 static inline bool 3104 record_eth_tx_time(struct sge_txq *txq) 3105 { 3106 const uint64_t cycles = get_cyclecount(); 3107 const uint64_t last_tx = txq->last_tx; 3108 #if defined(__i386__) || defined(__amd64__) 3109 const uint64_t itg = tsc_freq * t4_tx_coalesce_gap / 1000000; 3110 #else 3111 const uint64_t itg = 0; 3112 #endif 3113 3114 MPASS(cycles >= last_tx); 3115 txq->last_tx = cycles; 3116 return (cycles - last_tx < itg); 3117 } 3118 3119 /* 3120 * r->items[cidx] to r->items[pidx], with a wraparound at r->size, are ready to 3121 * be consumed. Return the actual number consumed. 0 indicates a stall. 3122 */ 3123 static u_int 3124 eth_tx(struct mp_ring *r, u_int cidx, u_int pidx, bool *coalescing) 3125 { 3126 struct sge_txq *txq = r->cookie; 3127 struct ifnet *ifp = txq->ifp; 3128 struct sge_eq *eq = &txq->eq; 3129 struct txpkts *txp = &txq->txp; 3130 struct vi_info *vi = ifp->if_softc; 3131 struct adapter *sc = vi->adapter; 3132 u_int total, remaining; /* # of packets */ 3133 u_int n, avail, dbdiff; /* # of hardware descriptors */ 3134 int i, rc; 3135 struct mbuf *m0; 3136 bool snd, recent_tx; 3137 void *wr; /* start of the last WR written to the ring */ 3138 3139 TXQ_LOCK_ASSERT_OWNED(txq); 3140 recent_tx = record_eth_tx_time(txq); 3141 3142 remaining = IDXDIFF(pidx, cidx, r->size); 3143 if (__predict_false(discard_tx(eq))) { 3144 for (i = 0; i < txp->npkt; i++) 3145 m_freem(txp->mb[i]); 3146 txp->npkt = 0; 3147 while (cidx != pidx) { 3148 m0 = r->items[cidx]; 3149 m_freem(m0); 3150 if (++cidx == r->size) 3151 cidx = 0; 3152 } 3153 reclaim_tx_descs(txq, eq->sidx); 3154 *coalescing = false; 3155 return (remaining); /* emptied */ 3156 } 3157 3158 /* How many hardware descriptors do we have readily available. */ 3159 if (eq->pidx == eq->cidx) 3160 avail = eq->sidx - 1; 3161 else 3162 avail = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 3163 3164 total = 0; 3165 if (remaining == 0) { 3166 txp->score = 0; 3167 txq->txpkts_flush++; 3168 goto send_txpkts; 3169 } 3170 3171 dbdiff = 0; 3172 MPASS(remaining > 0); 3173 while (remaining > 0) { 3174 m0 = r->items[cidx]; 3175 M_ASSERTPKTHDR(m0); 3176 MPASS(m0->m_nextpkt == NULL); 3177 3178 if (avail < 2 * SGE_MAX_WR_NDESC) 3179 avail += reclaim_tx_descs(txq, 64); 3180 3181 if (t4_tx_coalesce == 0 && txp->npkt == 0) 3182 goto skip_coalescing; 3183 if (cannot_use_txpkts(m0)) 3184 txp->score = 0; 3185 else if (recent_tx) { 3186 if (++txp->score == 0) 3187 txp->score = UINT8_MAX; 3188 } else 3189 txp->score = 1; 3190 if (txp->npkt > 0 || remaining > 1 || 3191 txp->score >= t4_tx_coalesce_pkts || 3192 atomic_load_int(&txq->eq.equiq) != 0) { 3193 if (vi->flags & TX_USES_VM_WR) 3194 rc = add_to_txpkts_vf(sc, txq, m0, avail, &snd); 3195 else 3196 rc = add_to_txpkts_pf(sc, txq, m0, avail, &snd); 3197 } else { 3198 snd = false; 3199 rc = EINVAL; 3200 } 3201 if (snd) { 3202 MPASS(txp->npkt > 0); 3203 for (i = 0; i < txp->npkt; i++) 3204 ETHER_BPF_MTAP(ifp, txp->mb[i]); 3205 if (txp->npkt > 1) { 3206 MPASS(avail >= tx_len16_to_desc(txp->len16)); 3207 if (vi->flags & TX_USES_VM_WR) 3208 n = write_txpkts_vm_wr(sc, txq); 3209 else 3210 n = write_txpkts_wr(sc, txq); 3211 } else { 3212 MPASS(avail >= 3213 tx_len16_to_desc(mbuf_len16(txp->mb[0]))); 3214 if (vi->flags & TX_USES_VM_WR) 3215 n = write_txpkt_vm_wr(sc, txq, 3216 txp->mb[0]); 3217 else 3218 n = write_txpkt_wr(sc, txq, txp->mb[0], 3219 avail); 3220 } 3221 MPASS(n <= SGE_MAX_WR_NDESC); 3222 avail -= n; 3223 dbdiff += n; 3224 wr = &eq->desc[eq->pidx]; 3225 IDXINCR(eq->pidx, n, eq->sidx); 3226 txp->npkt = 0; /* emptied */ 3227 } 3228 if (rc == 0) { 3229 /* m0 was coalesced into txq->txpkts. */ 3230 goto next_mbuf; 3231 } 3232 if (rc == EAGAIN) { 3233 /* 3234 * m0 is suitable for tx coalescing but could not be 3235 * combined with the existing txq->txpkts, which has now 3236 * been transmitted. Start a new txpkts with m0. 3237 */ 3238 MPASS(snd); 3239 MPASS(txp->npkt == 0); 3240 continue; 3241 } 3242 3243 MPASS(rc != 0 && rc != EAGAIN); 3244 MPASS(txp->npkt == 0); 3245 skip_coalescing: 3246 n = tx_len16_to_desc(mbuf_len16(m0)); 3247 if (__predict_false(avail < n)) { 3248 avail += reclaim_tx_descs(txq, min(n, 32)); 3249 if (avail < n) 3250 break; /* out of descriptors */ 3251 } 3252 3253 wr = &eq->desc[eq->pidx]; 3254 if (mbuf_cflags(m0) & MC_RAW_WR) { 3255 n = write_raw_wr(txq, wr, m0, avail); 3256 #ifdef KERN_TLS 3257 } else if (mbuf_cflags(m0) & MC_TLS) { 3258 ETHER_BPF_MTAP(ifp, m0); 3259 n = t6_ktls_write_wr(txq, wr, m0, avail); 3260 #endif 3261 } else { 3262 ETHER_BPF_MTAP(ifp, m0); 3263 if (vi->flags & TX_USES_VM_WR) 3264 n = write_txpkt_vm_wr(sc, txq, m0); 3265 else 3266 n = write_txpkt_wr(sc, txq, m0, avail); 3267 } 3268 MPASS(n >= 1 && n <= avail); 3269 if (!(mbuf_cflags(m0) & MC_TLS)) 3270 MPASS(n <= SGE_MAX_WR_NDESC); 3271 3272 avail -= n; 3273 dbdiff += n; 3274 IDXINCR(eq->pidx, n, eq->sidx); 3275 3276 if (dbdiff >= 512 / EQ_ESIZE) { /* X_FETCHBURSTMAX_512B */ 3277 if (wr_can_update_eq(wr)) 3278 set_txupdate_flags(txq, avail, wr); 3279 ring_eq_db(sc, eq, dbdiff); 3280 avail += reclaim_tx_descs(txq, 32); 3281 dbdiff = 0; 3282 } 3283 next_mbuf: 3284 total++; 3285 remaining--; 3286 if (__predict_false(++cidx == r->size)) 3287 cidx = 0; 3288 } 3289 if (dbdiff != 0) { 3290 if (wr_can_update_eq(wr)) 3291 set_txupdate_flags(txq, avail, wr); 3292 ring_eq_db(sc, eq, dbdiff); 3293 reclaim_tx_descs(txq, 32); 3294 } else if (eq->pidx == eq->cidx && txp->npkt > 0 && 3295 atomic_load_int(&txq->eq.equiq) == 0) { 3296 /* 3297 * If nothing was submitted to the chip for tx (it was coalesced 3298 * into txpkts instead) and there is no tx update outstanding 3299 * then we need to send txpkts now. 3300 */ 3301 send_txpkts: 3302 MPASS(txp->npkt > 0); 3303 for (i = 0; i < txp->npkt; i++) 3304 ETHER_BPF_MTAP(ifp, txp->mb[i]); 3305 if (txp->npkt > 1) { 3306 MPASS(avail >= tx_len16_to_desc(txp->len16)); 3307 if (vi->flags & TX_USES_VM_WR) 3308 n = write_txpkts_vm_wr(sc, txq); 3309 else 3310 n = write_txpkts_wr(sc, txq); 3311 } else { 3312 MPASS(avail >= 3313 tx_len16_to_desc(mbuf_len16(txp->mb[0]))); 3314 if (vi->flags & TX_USES_VM_WR) 3315 n = write_txpkt_vm_wr(sc, txq, txp->mb[0]); 3316 else 3317 n = write_txpkt_wr(sc, txq, txp->mb[0], avail); 3318 } 3319 MPASS(n <= SGE_MAX_WR_NDESC); 3320 wr = &eq->desc[eq->pidx]; 3321 IDXINCR(eq->pidx, n, eq->sidx); 3322 txp->npkt = 0; /* emptied */ 3323 3324 MPASS(wr_can_update_eq(wr)); 3325 set_txupdate_flags(txq, avail - n, wr); 3326 ring_eq_db(sc, eq, n); 3327 reclaim_tx_descs(txq, 32); 3328 } 3329 *coalescing = txp->npkt > 0; 3330 3331 return (total); 3332 } 3333 3334 static inline void 3335 init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, 3336 int qsize, int intr_idx, int cong, int qtype) 3337 { 3338 3339 KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, 3340 ("%s: bad tmr_idx %d", __func__, tmr_idx)); 3341 KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */ 3342 ("%s: bad pktc_idx %d", __func__, pktc_idx)); 3343 KASSERT(intr_idx >= -1 && intr_idx < sc->intr_count, 3344 ("%s: bad intr_idx %d", __func__, intr_idx)); 3345 KASSERT(qtype == FW_IQ_IQTYPE_OTHER || qtype == FW_IQ_IQTYPE_NIC || 3346 qtype == FW_IQ_IQTYPE_OFLD, ("%s: bad qtype %d", __func__, qtype)); 3347 3348 iq->flags = 0; 3349 iq->state = IQS_DISABLED; 3350 iq->adapter = sc; 3351 iq->qtype = qtype; 3352 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx); 3353 iq->intr_pktc_idx = SGE_NCOUNTERS - 1; 3354 if (pktc_idx >= 0) { 3355 iq->intr_params |= F_QINTR_CNT_EN; 3356 iq->intr_pktc_idx = pktc_idx; 3357 } 3358 iq->qsize = roundup2(qsize, 16); /* See FW_IQ_CMD/iqsize */ 3359 iq->sidx = iq->qsize - sc->params.sge.spg_len / IQ_ESIZE; 3360 iq->intr_idx = intr_idx; 3361 iq->cong_drop = cong; 3362 } 3363 3364 static inline void 3365 init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, char *name) 3366 { 3367 struct sge_params *sp = &sc->params.sge; 3368 3369 fl->qsize = qsize; 3370 fl->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; 3371 strlcpy(fl->lockname, name, sizeof(fl->lockname)); 3372 mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); 3373 if (sc->flags & BUF_PACKING_OK && 3374 ((!is_t4(sc) && buffer_packing) || /* T5+: enabled unless 0 */ 3375 (is_t4(sc) && buffer_packing == 1)))/* T4: disabled unless 1 */ 3376 fl->flags |= FL_BUF_PACKING; 3377 fl->zidx = find_refill_source(sc, maxp, fl->flags & FL_BUF_PACKING); 3378 fl->safe_zidx = sc->sge.safe_zidx; 3379 if (fl->flags & FL_BUF_PACKING) { 3380 fl->lowat = roundup2(sp->fl_starve_threshold2, 8); 3381 fl->buf_boundary = sp->pack_boundary; 3382 } else { 3383 fl->lowat = roundup2(sp->fl_starve_threshold, 8); 3384 fl->buf_boundary = 16; 3385 } 3386 if (fl_pad && fl->buf_boundary < sp->pad_boundary) 3387 fl->buf_boundary = sp->pad_boundary; 3388 } 3389 3390 static inline void 3391 init_eq(struct adapter *sc, struct sge_eq *eq, int eqtype, int qsize, 3392 uint8_t tx_chan, struct sge_iq *iq, char *name) 3393 { 3394 KASSERT(eqtype >= EQ_CTRL && eqtype <= EQ_OFLD, 3395 ("%s: bad qtype %d", __func__, eqtype)); 3396 3397 eq->type = eqtype; 3398 eq->tx_chan = tx_chan; 3399 eq->iq = iq; 3400 eq->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; 3401 strlcpy(eq->lockname, name, sizeof(eq->lockname)); 3402 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); 3403 } 3404 3405 int 3406 alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag, 3407 bus_dmamap_t *map, bus_addr_t *pa, void **va) 3408 { 3409 int rc; 3410 3411 rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR, 3412 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag); 3413 if (rc != 0) { 3414 CH_ERR(sc, "cannot allocate DMA tag: %d\n", rc); 3415 goto done; 3416 } 3417 3418 rc = bus_dmamem_alloc(*tag, va, 3419 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map); 3420 if (rc != 0) { 3421 CH_ERR(sc, "cannot allocate DMA memory: %d\n", rc); 3422 goto done; 3423 } 3424 3425 rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0); 3426 if (rc != 0) { 3427 CH_ERR(sc, "cannot load DMA map: %d\n", rc); 3428 goto done; 3429 } 3430 done: 3431 if (rc) 3432 free_ring(sc, *tag, *map, *pa, *va); 3433 3434 return (rc); 3435 } 3436 3437 int 3438 free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, 3439 bus_addr_t pa, void *va) 3440 { 3441 if (pa) 3442 bus_dmamap_unload(tag, map); 3443 if (va) 3444 bus_dmamem_free(tag, va, map); 3445 if (tag) 3446 bus_dma_tag_destroy(tag); 3447 3448 return (0); 3449 } 3450 3451 /* 3452 * Allocates the software resources (mainly memory and sysctl nodes) for an 3453 * ingress queue and an optional freelist. 3454 * 3455 * Sets IQ_SW_ALLOCATED and returns 0 on success. 3456 */ 3457 static int 3458 alloc_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl, 3459 struct sysctl_ctx_list *ctx, struct sysctl_oid *oid) 3460 { 3461 int rc; 3462 size_t len; 3463 struct adapter *sc = vi->adapter; 3464 3465 MPASS(!(iq->flags & IQ_SW_ALLOCATED)); 3466 3467 len = iq->qsize * IQ_ESIZE; 3468 rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, 3469 (void **)&iq->desc); 3470 if (rc != 0) 3471 return (rc); 3472 3473 if (fl) { 3474 len = fl->qsize * EQ_ESIZE; 3475 rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, 3476 &fl->ba, (void **)&fl->desc); 3477 if (rc) { 3478 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, 3479 iq->desc); 3480 return (rc); 3481 } 3482 3483 /* Allocate space for one software descriptor per buffer. */ 3484 fl->sdesc = malloc(fl->sidx * 8 * sizeof(struct fl_sdesc), 3485 M_CXGBE, M_ZERO | M_WAITOK); 3486 3487 add_fl_sysctls(sc, ctx, oid, fl); 3488 iq->flags |= IQ_HAS_FL; 3489 } 3490 add_iq_sysctls(ctx, oid, iq); 3491 iq->flags |= IQ_SW_ALLOCATED; 3492 3493 return (0); 3494 } 3495 3496 /* 3497 * Frees all software resources (memory and locks) associated with an ingress 3498 * queue and an optional freelist. 3499 */ 3500 static void 3501 free_iq_fl(struct adapter *sc, struct sge_iq *iq, struct sge_fl *fl) 3502 { 3503 MPASS(iq->flags & IQ_SW_ALLOCATED); 3504 3505 if (fl) { 3506 MPASS(iq->flags & IQ_HAS_FL); 3507 free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, fl->desc); 3508 free_fl_buffers(sc, fl); 3509 free(fl->sdesc, M_CXGBE); 3510 mtx_destroy(&fl->fl_lock); 3511 bzero(fl, sizeof(*fl)); 3512 } 3513 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc); 3514 bzero(iq, sizeof(*iq)); 3515 } 3516 3517 /* 3518 * Allocates a hardware ingress queue and an optional freelist that will be 3519 * associated with it. 3520 * 3521 * Returns errno on failure. Resources allocated up to that point may still be 3522 * allocated. Caller is responsible for cleanup in case this function fails. 3523 */ 3524 static int 3525 alloc_iq_fl_hwq(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl) 3526 { 3527 int rc, cntxt_id, cong_map; 3528 struct fw_iq_cmd c; 3529 struct adapter *sc = vi->adapter; 3530 struct port_info *pi = vi->pi; 3531 __be32 v = 0; 3532 3533 MPASS (!(iq->flags & IQ_HW_ALLOCATED)); 3534 3535 bzero(&c, sizeof(c)); 3536 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | 3537 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | 3538 V_FW_IQ_CMD_VFN(0)); 3539 3540 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | 3541 FW_LEN16(c)); 3542 3543 /* Special handling for firmware event queue */ 3544 if (iq == &sc->sge.fwq) 3545 v |= F_FW_IQ_CMD_IQASYNCH; 3546 3547 if (iq->intr_idx < 0) { 3548 /* Forwarded interrupts, all headed to fwq */ 3549 v |= F_FW_IQ_CMD_IQANDST; 3550 v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.fwq.cntxt_id); 3551 } else { 3552 KASSERT(iq->intr_idx < sc->intr_count, 3553 ("%s: invalid direct intr_idx %d", __func__, iq->intr_idx)); 3554 v |= V_FW_IQ_CMD_IQANDSTINDEX(iq->intr_idx); 3555 } 3556 3557 bzero(iq->desc, iq->qsize * IQ_ESIZE); 3558 c.type_to_iqandstindex = htobe32(v | 3559 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | 3560 V_FW_IQ_CMD_VIID(vi->viid) | 3561 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); 3562 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | 3563 F_FW_IQ_CMD_IQGTSMODE | 3564 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | 3565 V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4)); 3566 c.iqsize = htobe16(iq->qsize); 3567 c.iqaddr = htobe64(iq->ba); 3568 c.iqns_to_fl0congen = htobe32(V_FW_IQ_CMD_IQTYPE(iq->qtype)); 3569 if (iq->cong_drop != -1) { 3570 cong_map = iq->qtype == IQ_ETH ? pi->rx_e_chan_map : 0; 3571 c.iqns_to_fl0congen |= htobe32(F_FW_IQ_CMD_IQFLINTCONGEN); 3572 } 3573 3574 if (fl) { 3575 bzero(fl->desc, fl->sidx * EQ_ESIZE + sc->params.sge.spg_len); 3576 c.iqns_to_fl0congen |= 3577 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | 3578 F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | 3579 (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) | 3580 (fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN : 3581 0)); 3582 if (iq->cong_drop != -1) { 3583 c.iqns_to_fl0congen |= 3584 htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong_map) | 3585 F_FW_IQ_CMD_FL0CONGCIF | 3586 F_FW_IQ_CMD_FL0CONGEN); 3587 } 3588 c.fl0dcaen_to_fl0cidxfthresh = 3589 htobe16(V_FW_IQ_CMD_FL0FBMIN(chip_id(sc) <= CHELSIO_T5 ? 3590 X_FETCHBURSTMIN_128B : X_FETCHBURSTMIN_64B_T6) | 3591 V_FW_IQ_CMD_FL0FBMAX(chip_id(sc) <= CHELSIO_T5 ? 3592 X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B)); 3593 c.fl0size = htobe16(fl->qsize); 3594 c.fl0addr = htobe64(fl->ba); 3595 } 3596 3597 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 3598 if (rc != 0) { 3599 CH_ERR(sc, "failed to create hw ingress queue: %d\n", rc); 3600 return (rc); 3601 } 3602 3603 iq->cidx = 0; 3604 iq->gen = F_RSPD_GEN; 3605 iq->cntxt_id = be16toh(c.iqid); 3606 iq->abs_id = be16toh(c.physiqid); 3607 3608 cntxt_id = iq->cntxt_id - sc->sge.iq_start; 3609 if (cntxt_id >= sc->sge.iqmap_sz) { 3610 panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, 3611 cntxt_id, sc->sge.iqmap_sz - 1); 3612 } 3613 sc->sge.iqmap[cntxt_id] = iq; 3614 3615 if (fl) { 3616 u_int qid; 3617 #ifdef INVARIANTS 3618 int i; 3619 3620 MPASS(!(fl->flags & FL_BUF_RESUME)); 3621 for (i = 0; i < fl->sidx * 8; i++) 3622 MPASS(fl->sdesc[i].cl == NULL); 3623 #endif 3624 fl->cntxt_id = be16toh(c.fl0id); 3625 fl->pidx = fl->cidx = fl->hw_cidx = fl->dbidx = 0; 3626 fl->rx_offset = 0; 3627 fl->flags &= ~(FL_STARVING | FL_DOOMED); 3628 3629 cntxt_id = fl->cntxt_id - sc->sge.eq_start; 3630 if (cntxt_id >= sc->sge.eqmap_sz) { 3631 panic("%s: fl->cntxt_id (%d) more than the max (%d)", 3632 __func__, cntxt_id, sc->sge.eqmap_sz - 1); 3633 } 3634 sc->sge.eqmap[cntxt_id] = (void *)fl; 3635 3636 qid = fl->cntxt_id; 3637 if (isset(&sc->doorbells, DOORBELL_UDB)) { 3638 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 3639 uint32_t mask = (1 << s_qpp) - 1; 3640 volatile uint8_t *udb; 3641 3642 udb = sc->udbs_base + UDBS_DB_OFFSET; 3643 udb += (qid >> s_qpp) << PAGE_SHIFT; 3644 qid &= mask; 3645 if (qid < PAGE_SIZE / UDBS_SEG_SIZE) { 3646 udb += qid << UDBS_SEG_SHIFT; 3647 qid = 0; 3648 } 3649 fl->udb = (volatile void *)udb; 3650 } 3651 fl->dbval = V_QID(qid) | sc->chip_params->sge_fl_db; 3652 3653 FL_LOCK(fl); 3654 /* Enough to make sure the SGE doesn't think it's starved */ 3655 refill_fl(sc, fl, fl->lowat); 3656 FL_UNLOCK(fl); 3657 } 3658 3659 if (chip_id(sc) >= CHELSIO_T5 && !(sc->flags & IS_VF) && 3660 iq->cong_drop != -1) { 3661 t4_sge_set_conm_context(sc, iq->cntxt_id, iq->cong_drop, 3662 cong_map); 3663 } 3664 3665 /* Enable IQ interrupts */ 3666 atomic_store_rel_int(&iq->state, IQS_IDLE); 3667 t4_write_reg(sc, sc->sge_gts_reg, V_SEINTARM(iq->intr_params) | 3668 V_INGRESSQID(iq->cntxt_id)); 3669 3670 iq->flags |= IQ_HW_ALLOCATED; 3671 3672 return (0); 3673 } 3674 3675 static int 3676 free_iq_fl_hwq(struct adapter *sc, struct sge_iq *iq, struct sge_fl *fl) 3677 { 3678 int rc; 3679 3680 MPASS(iq->flags & IQ_HW_ALLOCATED); 3681 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, FW_IQ_TYPE_FL_INT_CAP, 3682 iq->cntxt_id, fl ? fl->cntxt_id : 0xffff, 0xffff); 3683 if (rc != 0) { 3684 CH_ERR(sc, "failed to free iq %p: %d\n", iq, rc); 3685 return (rc); 3686 } 3687 iq->flags &= ~IQ_HW_ALLOCATED; 3688 3689 return (0); 3690 } 3691 3692 static void 3693 add_iq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 3694 struct sge_iq *iq) 3695 { 3696 struct sysctl_oid_list *children; 3697 3698 if (ctx == NULL || oid == NULL) 3699 return; 3700 3701 children = SYSCTL_CHILDREN(oid); 3702 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &iq->ba, 3703 "bus address of descriptor ring"); 3704 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 3705 iq->qsize * IQ_ESIZE, "descriptor ring size in bytes"); 3706 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD, 3707 &iq->abs_id, 0, "absolute id of the queue"); 3708 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 3709 &iq->cntxt_id, 0, "SGE context id of the queue"); 3710 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &iq->cidx, 3711 0, "consumer index"); 3712 } 3713 3714 static void 3715 add_fl_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 3716 struct sysctl_oid *oid, struct sge_fl *fl) 3717 { 3718 struct sysctl_oid_list *children; 3719 3720 if (ctx == NULL || oid == NULL) 3721 return; 3722 3723 children = SYSCTL_CHILDREN(oid); 3724 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", 3725 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "freelist"); 3726 children = SYSCTL_CHILDREN(oid); 3727 3728 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, 3729 &fl->ba, "bus address of descriptor ring"); 3730 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 3731 fl->sidx * EQ_ESIZE + sc->params.sge.spg_len, 3732 "desc ring size in bytes"); 3733 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 3734 &fl->cntxt_id, 0, "SGE context id of the freelist"); 3735 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "padding", CTLFLAG_RD, NULL, 3736 fl_pad ? 1 : 0, "padding enabled"); 3737 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "packing", CTLFLAG_RD, NULL, 3738 fl->flags & FL_BUF_PACKING ? 1 : 0, "packing enabled"); 3739 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx, 3740 0, "consumer index"); 3741 if (fl->flags & FL_BUF_PACKING) { 3742 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_offset", 3743 CTLFLAG_RD, &fl->rx_offset, 0, "packing rx offset"); 3744 } 3745 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx, 3746 0, "producer index"); 3747 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated", 3748 CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated"); 3749 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled", 3750 CTLFLAG_RD, &fl->cl_recycled, "# of clusters recycled"); 3751 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_fast_recycled", 3752 CTLFLAG_RD, &fl->cl_fast_recycled, "# of clusters recycled (fast)"); 3753 } 3754 3755 /* 3756 * Idempotent. 3757 */ 3758 static int 3759 alloc_fwq(struct adapter *sc) 3760 { 3761 int rc, intr_idx; 3762 struct sge_iq *fwq = &sc->sge.fwq; 3763 struct vi_info *vi = &sc->port[0]->vi[0]; 3764 3765 if (!(fwq->flags & IQ_SW_ALLOCATED)) { 3766 MPASS(!(fwq->flags & IQ_HW_ALLOCATED)); 3767 3768 if (sc->flags & IS_VF) 3769 intr_idx = 0; 3770 else 3771 intr_idx = sc->intr_count > 1 ? 1 : 0; 3772 init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, intr_idx, -1, IQ_OTHER); 3773 rc = alloc_iq_fl(vi, fwq, NULL, &sc->ctx, sc->fwq_oid); 3774 if (rc != 0) { 3775 CH_ERR(sc, "failed to allocate fwq: %d\n", rc); 3776 return (rc); 3777 } 3778 MPASS(fwq->flags & IQ_SW_ALLOCATED); 3779 } 3780 3781 if (!(fwq->flags & IQ_HW_ALLOCATED)) { 3782 MPASS(fwq->flags & IQ_SW_ALLOCATED); 3783 3784 rc = alloc_iq_fl_hwq(vi, fwq, NULL); 3785 if (rc != 0) { 3786 CH_ERR(sc, "failed to create hw fwq: %d\n", rc); 3787 return (rc); 3788 } 3789 MPASS(fwq->flags & IQ_HW_ALLOCATED); 3790 } 3791 3792 return (0); 3793 } 3794 3795 /* 3796 * Idempotent. 3797 */ 3798 static void 3799 free_fwq(struct adapter *sc) 3800 { 3801 struct sge_iq *fwq = &sc->sge.fwq; 3802 3803 if (fwq->flags & IQ_HW_ALLOCATED) { 3804 MPASS(fwq->flags & IQ_SW_ALLOCATED); 3805 free_iq_fl_hwq(sc, fwq, NULL); 3806 MPASS(!(fwq->flags & IQ_HW_ALLOCATED)); 3807 } 3808 3809 if (fwq->flags & IQ_SW_ALLOCATED) { 3810 MPASS(!(fwq->flags & IQ_HW_ALLOCATED)); 3811 free_iq_fl(sc, fwq, NULL); 3812 MPASS(!(fwq->flags & IQ_SW_ALLOCATED)); 3813 } 3814 } 3815 3816 /* 3817 * Idempotent. 3818 */ 3819 static int 3820 alloc_ctrlq(struct adapter *sc, int idx) 3821 { 3822 int rc; 3823 char name[16]; 3824 struct sysctl_oid *oid; 3825 struct sge_wrq *ctrlq = &sc->sge.ctrlq[idx]; 3826 3827 MPASS(idx < sc->params.nports); 3828 3829 if (!(ctrlq->eq.flags & EQ_SW_ALLOCATED)) { 3830 MPASS(!(ctrlq->eq.flags & EQ_HW_ALLOCATED)); 3831 3832 snprintf(name, sizeof(name), "%d", idx); 3833 oid = SYSCTL_ADD_NODE(&sc->ctx, SYSCTL_CHILDREN(sc->ctrlq_oid), 3834 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 3835 "ctrl queue"); 3836 3837 snprintf(name, sizeof(name), "%s ctrlq%d", 3838 device_get_nameunit(sc->dev), idx); 3839 init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, 3840 sc->port[idx]->tx_chan, &sc->sge.fwq, name); 3841 rc = alloc_wrq(sc, NULL, ctrlq, &sc->ctx, oid); 3842 if (rc != 0) { 3843 CH_ERR(sc, "failed to allocate ctrlq%d: %d\n", idx, rc); 3844 sysctl_remove_oid(oid, 1, 1); 3845 return (rc); 3846 } 3847 MPASS(ctrlq->eq.flags & EQ_SW_ALLOCATED); 3848 } 3849 3850 if (!(ctrlq->eq.flags & EQ_HW_ALLOCATED)) { 3851 MPASS(ctrlq->eq.flags & EQ_SW_ALLOCATED); 3852 3853 rc = alloc_eq_hwq(sc, NULL, &ctrlq->eq); 3854 if (rc != 0) { 3855 CH_ERR(sc, "failed to create hw ctrlq%d: %d\n", idx, rc); 3856 return (rc); 3857 } 3858 MPASS(ctrlq->eq.flags & EQ_HW_ALLOCATED); 3859 } 3860 3861 return (0); 3862 } 3863 3864 /* 3865 * Idempotent. 3866 */ 3867 static void 3868 free_ctrlq(struct adapter *sc, int idx) 3869 { 3870 struct sge_wrq *ctrlq = &sc->sge.ctrlq[idx]; 3871 3872 if (ctrlq->eq.flags & EQ_HW_ALLOCATED) { 3873 MPASS(ctrlq->eq.flags & EQ_SW_ALLOCATED); 3874 free_eq_hwq(sc, NULL, &ctrlq->eq); 3875 MPASS(!(ctrlq->eq.flags & EQ_HW_ALLOCATED)); 3876 } 3877 3878 if (ctrlq->eq.flags & EQ_SW_ALLOCATED) { 3879 MPASS(!(ctrlq->eq.flags & EQ_HW_ALLOCATED)); 3880 free_wrq(sc, ctrlq); 3881 MPASS(!(ctrlq->eq.flags & EQ_SW_ALLOCATED)); 3882 } 3883 } 3884 3885 int 3886 t4_sge_set_conm_context(struct adapter *sc, int cntxt_id, int cong_drop, 3887 int cong_map) 3888 { 3889 const int cng_ch_bits_log = sc->chip_params->cng_ch_bits_log; 3890 uint32_t param, val; 3891 uint16_t ch_map; 3892 int cong_mode, rc, i; 3893 3894 if (chip_id(sc) < CHELSIO_T5) 3895 return (ENOTSUP); 3896 3897 /* Convert the driver knob to the mode understood by the firmware. */ 3898 switch (cong_drop) { 3899 case -1: 3900 cong_mode = X_CONMCTXT_CNGTPMODE_DISABLE; 3901 break; 3902 case 0: 3903 cong_mode = X_CONMCTXT_CNGTPMODE_CHANNEL; 3904 break; 3905 case 1: 3906 cong_mode = X_CONMCTXT_CNGTPMODE_QUEUE; 3907 break; 3908 case 2: 3909 cong_mode = X_CONMCTXT_CNGTPMODE_BOTH; 3910 break; 3911 default: 3912 MPASS(0); 3913 CH_ERR(sc, "cong_drop = %d is invalid (ingress queue %d).\n", 3914 cong_drop, cntxt_id); 3915 return (EINVAL); 3916 } 3917 3918 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 3919 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | 3920 V_FW_PARAMS_PARAM_YZ(cntxt_id); 3921 val = V_CONMCTXT_CNGTPMODE(cong_mode); 3922 if (cong_mode == X_CONMCTXT_CNGTPMODE_CHANNEL || 3923 cong_mode == X_CONMCTXT_CNGTPMODE_BOTH) { 3924 for (i = 0, ch_map = 0; i < 4; i++) { 3925 if (cong_map & (1 << i)) 3926 ch_map |= 1 << (i << cng_ch_bits_log); 3927 } 3928 val |= V_CONMCTXT_CNGCHMAP(ch_map); 3929 } 3930 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); 3931 if (rc != 0) { 3932 CH_ERR(sc, "failed to set congestion manager context " 3933 "for ingress queue %d: %d\n", cntxt_id, rc); 3934 } 3935 3936 return (rc); 3937 } 3938 3939 /* 3940 * Idempotent. 3941 */ 3942 static int 3943 alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int idx, int intr_idx, 3944 int maxp) 3945 { 3946 int rc; 3947 struct adapter *sc = vi->adapter; 3948 struct ifnet *ifp = vi->ifp; 3949 struct sysctl_oid *oid; 3950 char name[16]; 3951 3952 if (!(rxq->iq.flags & IQ_SW_ALLOCATED)) { 3953 MPASS(!(rxq->iq.flags & IQ_HW_ALLOCATED)); 3954 #if defined(INET) || defined(INET6) 3955 rc = tcp_lro_init_args(&rxq->lro, ifp, lro_entries, lro_mbufs); 3956 if (rc != 0) 3957 return (rc); 3958 MPASS(rxq->lro.ifp == ifp); /* also indicates LRO init'ed */ 3959 #endif 3960 rxq->ifp = ifp; 3961 3962 snprintf(name, sizeof(name), "%d", idx); 3963 oid = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(vi->rxq_oid), 3964 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 3965 "rx queue"); 3966 3967 init_iq(&rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, vi->qsize_rxq, 3968 intr_idx, cong_drop, IQ_ETH); 3969 #if defined(INET) || defined(INET6) 3970 if (ifp->if_capenable & IFCAP_LRO) 3971 rxq->iq.flags |= IQ_LRO_ENABLED; 3972 #endif 3973 if (ifp->if_capenable & IFCAP_HWRXTSTMP) 3974 rxq->iq.flags |= IQ_RX_TIMESTAMP; 3975 snprintf(name, sizeof(name), "%s rxq%d-fl", 3976 device_get_nameunit(vi->dev), idx); 3977 init_fl(sc, &rxq->fl, vi->qsize_rxq / 8, maxp, name); 3978 rc = alloc_iq_fl(vi, &rxq->iq, &rxq->fl, &vi->ctx, oid); 3979 if (rc != 0) { 3980 CH_ERR(vi, "failed to allocate rxq%d: %d\n", idx, rc); 3981 sysctl_remove_oid(oid, 1, 1); 3982 #if defined(INET) || defined(INET6) 3983 tcp_lro_free(&rxq->lro); 3984 rxq->lro.ifp = NULL; 3985 #endif 3986 return (rc); 3987 } 3988 MPASS(rxq->iq.flags & IQ_SW_ALLOCATED); 3989 add_rxq_sysctls(&vi->ctx, oid, rxq); 3990 } 3991 3992 if (!(rxq->iq.flags & IQ_HW_ALLOCATED)) { 3993 MPASS(rxq->iq.flags & IQ_SW_ALLOCATED); 3994 rc = alloc_iq_fl_hwq(vi, &rxq->iq, &rxq->fl); 3995 if (rc != 0) { 3996 CH_ERR(vi, "failed to create hw rxq%d: %d\n", idx, rc); 3997 return (rc); 3998 } 3999 MPASS(rxq->iq.flags & IQ_HW_ALLOCATED); 4000 4001 if (idx == 0) 4002 sc->sge.iq_base = rxq->iq.abs_id - rxq->iq.cntxt_id; 4003 else 4004 KASSERT(rxq->iq.cntxt_id + sc->sge.iq_base == rxq->iq.abs_id, 4005 ("iq_base mismatch")); 4006 KASSERT(sc->sge.iq_base == 0 || sc->flags & IS_VF, 4007 ("PF with non-zero iq_base")); 4008 4009 /* 4010 * The freelist is just barely above the starvation threshold 4011 * right now, fill it up a bit more. 4012 */ 4013 FL_LOCK(&rxq->fl); 4014 refill_fl(sc, &rxq->fl, 128); 4015 FL_UNLOCK(&rxq->fl); 4016 } 4017 4018 return (0); 4019 } 4020 4021 /* 4022 * Idempotent. 4023 */ 4024 static void 4025 free_rxq(struct vi_info *vi, struct sge_rxq *rxq) 4026 { 4027 if (rxq->iq.flags & IQ_HW_ALLOCATED) { 4028 MPASS(rxq->iq.flags & IQ_SW_ALLOCATED); 4029 free_iq_fl_hwq(vi->adapter, &rxq->iq, &rxq->fl); 4030 MPASS(!(rxq->iq.flags & IQ_HW_ALLOCATED)); 4031 } 4032 4033 if (rxq->iq.flags & IQ_SW_ALLOCATED) { 4034 MPASS(!(rxq->iq.flags & IQ_HW_ALLOCATED)); 4035 #if defined(INET) || defined(INET6) 4036 tcp_lro_free(&rxq->lro); 4037 #endif 4038 free_iq_fl(vi->adapter, &rxq->iq, &rxq->fl); 4039 MPASS(!(rxq->iq.flags & IQ_SW_ALLOCATED)); 4040 bzero(rxq, sizeof(*rxq)); 4041 } 4042 } 4043 4044 static void 4045 add_rxq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 4046 struct sge_rxq *rxq) 4047 { 4048 struct sysctl_oid_list *children; 4049 4050 if (ctx == NULL || oid == NULL) 4051 return; 4052 4053 children = SYSCTL_CHILDREN(oid); 4054 #if defined(INET) || defined(INET6) 4055 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, 4056 &rxq->lro.lro_queued, 0, NULL); 4057 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, 4058 &rxq->lro.lro_flushed, 0, NULL); 4059 #endif 4060 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, 4061 &rxq->rxcsum, "# of times hardware assisted with checksum"); 4062 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vlan_extraction", CTLFLAG_RD, 4063 &rxq->vlan_extraction, "# of times hardware extracted 802.1Q tag"); 4064 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vxlan_rxcsum", CTLFLAG_RD, 4065 &rxq->vxlan_rxcsum, 4066 "# of times hardware assisted with inner checksum (VXLAN)"); 4067 } 4068 4069 #ifdef TCP_OFFLOAD 4070 /* 4071 * Idempotent. 4072 */ 4073 static int 4074 alloc_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq, int idx, 4075 int intr_idx, int maxp) 4076 { 4077 int rc; 4078 struct adapter *sc = vi->adapter; 4079 struct sysctl_oid *oid; 4080 char name[16]; 4081 4082 if (!(ofld_rxq->iq.flags & IQ_SW_ALLOCATED)) { 4083 MPASS(!(ofld_rxq->iq.flags & IQ_HW_ALLOCATED)); 4084 4085 snprintf(name, sizeof(name), "%d", idx); 4086 oid = SYSCTL_ADD_NODE(&vi->ctx, 4087 SYSCTL_CHILDREN(vi->ofld_rxq_oid), OID_AUTO, name, 4088 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "offload rx queue"); 4089 4090 init_iq(&ofld_rxq->iq, sc, vi->ofld_tmr_idx, vi->ofld_pktc_idx, 4091 vi->qsize_rxq, intr_idx, ofld_cong_drop, IQ_OFLD); 4092 snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", 4093 device_get_nameunit(vi->dev), idx); 4094 init_fl(sc, &ofld_rxq->fl, vi->qsize_rxq / 8, maxp, name); 4095 rc = alloc_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl, &vi->ctx, 4096 oid); 4097 if (rc != 0) { 4098 CH_ERR(vi, "failed to allocate ofld_rxq%d: %d\n", idx, 4099 rc); 4100 sysctl_remove_oid(oid, 1, 1); 4101 return (rc); 4102 } 4103 MPASS(ofld_rxq->iq.flags & IQ_SW_ALLOCATED); 4104 ofld_rxq->rx_iscsi_ddp_setup_ok = counter_u64_alloc(M_WAITOK); 4105 ofld_rxq->rx_iscsi_ddp_setup_error = 4106 counter_u64_alloc(M_WAITOK); 4107 add_ofld_rxq_sysctls(&vi->ctx, oid, ofld_rxq); 4108 } 4109 4110 if (!(ofld_rxq->iq.flags & IQ_HW_ALLOCATED)) { 4111 MPASS(ofld_rxq->iq.flags & IQ_SW_ALLOCATED); 4112 rc = alloc_iq_fl_hwq(vi, &ofld_rxq->iq, &ofld_rxq->fl); 4113 if (rc != 0) { 4114 CH_ERR(vi, "failed to create hw ofld_rxq%d: %d\n", idx, 4115 rc); 4116 return (rc); 4117 } 4118 MPASS(ofld_rxq->iq.flags & IQ_HW_ALLOCATED); 4119 } 4120 return (rc); 4121 } 4122 4123 /* 4124 * Idempotent. 4125 */ 4126 static void 4127 free_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq) 4128 { 4129 if (ofld_rxq->iq.flags & IQ_HW_ALLOCATED) { 4130 MPASS(ofld_rxq->iq.flags & IQ_SW_ALLOCATED); 4131 free_iq_fl_hwq(vi->adapter, &ofld_rxq->iq, &ofld_rxq->fl); 4132 MPASS(!(ofld_rxq->iq.flags & IQ_HW_ALLOCATED)); 4133 } 4134 4135 if (ofld_rxq->iq.flags & IQ_SW_ALLOCATED) { 4136 MPASS(!(ofld_rxq->iq.flags & IQ_HW_ALLOCATED)); 4137 free_iq_fl(vi->adapter, &ofld_rxq->iq, &ofld_rxq->fl); 4138 MPASS(!(ofld_rxq->iq.flags & IQ_SW_ALLOCATED)); 4139 counter_u64_free(ofld_rxq->rx_iscsi_ddp_setup_ok); 4140 counter_u64_free(ofld_rxq->rx_iscsi_ddp_setup_error); 4141 bzero(ofld_rxq, sizeof(*ofld_rxq)); 4142 } 4143 } 4144 4145 static void 4146 add_ofld_rxq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 4147 struct sge_ofld_rxq *ofld_rxq) 4148 { 4149 struct sysctl_oid_list *children; 4150 4151 if (ctx == NULL || oid == NULL) 4152 return; 4153 4154 children = SYSCTL_CHILDREN(oid); 4155 SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, 4156 "rx_toe_tls_records", CTLFLAG_RD, &ofld_rxq->rx_toe_tls_records, 4157 "# of TOE TLS records received"); 4158 SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, 4159 "rx_toe_tls_octets", CTLFLAG_RD, &ofld_rxq->rx_toe_tls_octets, 4160 "# of payload octets in received TOE TLS records"); 4161 4162 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "iscsi", 4163 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TOE iSCSI statistics"); 4164 children = SYSCTL_CHILDREN(oid); 4165 4166 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "ddp_setup_ok", 4167 CTLFLAG_RD, &ofld_rxq->rx_iscsi_ddp_setup_ok, 4168 "# of times DDP buffer was setup successfully."); 4169 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "ddp_setup_error", 4170 CTLFLAG_RD, &ofld_rxq->rx_iscsi_ddp_setup_error, 4171 "# of times DDP buffer setup failed."); 4172 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "ddp_octets", 4173 CTLFLAG_RD, &ofld_rxq->rx_iscsi_ddp_octets, 0, 4174 "# of octets placed directly"); 4175 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "ddp_pdus", 4176 CTLFLAG_RD, &ofld_rxq->rx_iscsi_ddp_pdus, 0, 4177 "# of PDUs with data placed directly."); 4178 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "fl_octets", 4179 CTLFLAG_RD, &ofld_rxq->rx_iscsi_fl_octets, 0, 4180 "# of data octets delivered in freelist"); 4181 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "fl_pdus", 4182 CTLFLAG_RD, &ofld_rxq->rx_iscsi_fl_pdus, 0, 4183 "# of PDUs with data delivered in freelist"); 4184 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "padding_errors", 4185 CTLFLAG_RD, &ofld_rxq->rx_iscsi_padding_errors, 0, 4186 "# of PDUs with invalid padding"); 4187 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "header_digest_errors", 4188 CTLFLAG_RD, &ofld_rxq->rx_iscsi_header_digest_errors, 0, 4189 "# of PDUs with invalid header digests"); 4190 SYSCTL_ADD_U64(ctx, children, OID_AUTO, "data_digest_errors", 4191 CTLFLAG_RD, &ofld_rxq->rx_iscsi_data_digest_errors, 0, 4192 "# of PDUs with invalid data digests"); 4193 } 4194 #endif 4195 4196 /* 4197 * Returns a reasonable automatic cidx flush threshold for a given queue size. 4198 */ 4199 static u_int 4200 qsize_to_fthresh(int qsize) 4201 { 4202 u_int fthresh; 4203 4204 while (!powerof2(qsize)) 4205 qsize++; 4206 fthresh = ilog2(qsize); 4207 if (fthresh > X_CIDXFLUSHTHRESH_128) 4208 fthresh = X_CIDXFLUSHTHRESH_128; 4209 4210 return (fthresh); 4211 } 4212 4213 static int 4214 ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq) 4215 { 4216 int rc, cntxt_id; 4217 struct fw_eq_ctrl_cmd c; 4218 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4219 4220 bzero(&c, sizeof(c)); 4221 4222 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | 4223 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) | 4224 V_FW_EQ_CTRL_CMD_VFN(0)); 4225 c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC | 4226 F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); 4227 c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); 4228 c.physeqid_pkd = htobe32(0); 4229 c.fetchszm_to_iqid = 4230 htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 4231 V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) | 4232 F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); 4233 c.dcaen_to_eqsize = 4234 htobe32(V_FW_EQ_CTRL_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ? 4235 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) | 4236 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 4237 V_FW_EQ_CTRL_CMD_CIDXFTHRESH(qsize_to_fthresh(qsize)) | 4238 V_FW_EQ_CTRL_CMD_EQSIZE(qsize)); 4239 c.eqaddr = htobe64(eq->ba); 4240 4241 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 4242 if (rc != 0) { 4243 CH_ERR(sc, "failed to create hw ctrlq for tx_chan %d: %d\n", 4244 eq->tx_chan, rc); 4245 return (rc); 4246 } 4247 4248 eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid)); 4249 eq->abs_id = G_FW_EQ_CTRL_CMD_PHYSEQID(be32toh(c.physeqid_pkd)); 4250 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 4251 if (cntxt_id >= sc->sge.eqmap_sz) 4252 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 4253 cntxt_id, sc->sge.eqmap_sz - 1); 4254 sc->sge.eqmap[cntxt_id] = eq; 4255 4256 return (rc); 4257 } 4258 4259 static int 4260 eth_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 4261 { 4262 int rc, cntxt_id; 4263 struct fw_eq_eth_cmd c; 4264 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4265 4266 bzero(&c, sizeof(c)); 4267 4268 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | 4269 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | 4270 V_FW_EQ_ETH_CMD_VFN(0)); 4271 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | 4272 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); 4273 c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | 4274 F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid)); 4275 c.fetchszm_to_iqid = 4276 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | 4277 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | 4278 V_FW_EQ_ETH_CMD_IQID(eq->iqid)); 4279 c.dcaen_to_eqsize = 4280 htobe32(V_FW_EQ_ETH_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ? 4281 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) | 4282 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 4283 V_FW_EQ_ETH_CMD_EQSIZE(qsize)); 4284 c.eqaddr = htobe64(eq->ba); 4285 4286 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 4287 if (rc != 0) { 4288 device_printf(vi->dev, 4289 "failed to create Ethernet egress queue: %d\n", rc); 4290 return (rc); 4291 } 4292 4293 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); 4294 eq->abs_id = G_FW_EQ_ETH_CMD_PHYSEQID(be32toh(c.physeqid_pkd)); 4295 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 4296 if (cntxt_id >= sc->sge.eqmap_sz) 4297 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 4298 cntxt_id, sc->sge.eqmap_sz - 1); 4299 sc->sge.eqmap[cntxt_id] = eq; 4300 4301 return (rc); 4302 } 4303 4304 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 4305 static int 4306 ofld_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 4307 { 4308 int rc, cntxt_id; 4309 struct fw_eq_ofld_cmd c; 4310 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4311 4312 bzero(&c, sizeof(c)); 4313 4314 c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST | 4315 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) | 4316 V_FW_EQ_OFLD_CMD_VFN(0)); 4317 c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC | 4318 F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c)); 4319 c.fetchszm_to_iqid = 4320 htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 4321 V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) | 4322 F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid)); 4323 c.dcaen_to_eqsize = 4324 htobe32(V_FW_EQ_OFLD_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ? 4325 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) | 4326 V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 4327 V_FW_EQ_OFLD_CMD_CIDXFTHRESH(qsize_to_fthresh(qsize)) | 4328 V_FW_EQ_OFLD_CMD_EQSIZE(qsize)); 4329 c.eqaddr = htobe64(eq->ba); 4330 4331 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 4332 if (rc != 0) { 4333 device_printf(vi->dev, 4334 "failed to create egress queue for TCP offload: %d\n", rc); 4335 return (rc); 4336 } 4337 4338 eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd)); 4339 eq->abs_id = G_FW_EQ_OFLD_CMD_PHYSEQID(be32toh(c.physeqid_pkd)); 4340 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 4341 if (cntxt_id >= sc->sge.eqmap_sz) 4342 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 4343 cntxt_id, sc->sge.eqmap_sz - 1); 4344 sc->sge.eqmap[cntxt_id] = eq; 4345 4346 return (rc); 4347 } 4348 #endif 4349 4350 /* SW only */ 4351 static int 4352 alloc_eq(struct adapter *sc, struct sge_eq *eq, struct sysctl_ctx_list *ctx, 4353 struct sysctl_oid *oid) 4354 { 4355 int rc, qsize; 4356 size_t len; 4357 4358 MPASS(!(eq->flags & EQ_SW_ALLOCATED)); 4359 4360 qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 4361 len = qsize * EQ_ESIZE; 4362 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, &eq->ba, 4363 (void **)&eq->desc); 4364 if (rc) 4365 return (rc); 4366 if (ctx != NULL && oid != NULL) 4367 add_eq_sysctls(sc, ctx, oid, eq); 4368 eq->flags |= EQ_SW_ALLOCATED; 4369 4370 return (0); 4371 } 4372 4373 /* SW only */ 4374 static void 4375 free_eq(struct adapter *sc, struct sge_eq *eq) 4376 { 4377 MPASS(eq->flags & EQ_SW_ALLOCATED); 4378 if (eq->type == EQ_ETH) 4379 MPASS(eq->pidx == eq->cidx); 4380 4381 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); 4382 mtx_destroy(&eq->eq_lock); 4383 bzero(eq, sizeof(*eq)); 4384 } 4385 4386 static void 4387 add_eq_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 4388 struct sysctl_oid *oid, struct sge_eq *eq) 4389 { 4390 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 4391 4392 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &eq->ba, 4393 "bus address of descriptor ring"); 4394 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 4395 eq->sidx * EQ_ESIZE + sc->params.sge.spg_len, 4396 "desc ring size in bytes"); 4397 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD, 4398 &eq->abs_id, 0, "absolute id of the queue"); 4399 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 4400 &eq->cntxt_id, 0, "SGE context id of the queue"); 4401 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &eq->cidx, 4402 0, "consumer index"); 4403 SYSCTL_ADD_U16(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &eq->pidx, 4404 0, "producer index"); 4405 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL, 4406 eq->sidx, "status page index"); 4407 } 4408 4409 static int 4410 alloc_eq_hwq(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 4411 { 4412 int rc; 4413 4414 MPASS(!(eq->flags & EQ_HW_ALLOCATED)); 4415 4416 eq->iqid = eq->iq->cntxt_id; 4417 eq->pidx = eq->cidx = eq->dbidx = 0; 4418 /* Note that equeqidx is not used with sge_wrq (OFLD/CTRL) queues. */ 4419 eq->equeqidx = 0; 4420 eq->doorbells = sc->doorbells; 4421 bzero(eq->desc, eq->sidx * EQ_ESIZE + sc->params.sge.spg_len); 4422 4423 switch (eq->type) { 4424 case EQ_CTRL: 4425 rc = ctrl_eq_alloc(sc, eq); 4426 break; 4427 4428 case EQ_ETH: 4429 rc = eth_eq_alloc(sc, vi, eq); 4430 break; 4431 4432 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 4433 case EQ_OFLD: 4434 rc = ofld_eq_alloc(sc, vi, eq); 4435 break; 4436 #endif 4437 4438 default: 4439 panic("%s: invalid eq type %d.", __func__, eq->type); 4440 } 4441 if (rc != 0) { 4442 CH_ERR(sc, "failed to allocate egress queue(%d): %d\n", 4443 eq->type, rc); 4444 return (rc); 4445 } 4446 4447 if (isset(&eq->doorbells, DOORBELL_UDB) || 4448 isset(&eq->doorbells, DOORBELL_UDBWC) || 4449 isset(&eq->doorbells, DOORBELL_WCWR)) { 4450 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 4451 uint32_t mask = (1 << s_qpp) - 1; 4452 volatile uint8_t *udb; 4453 4454 udb = sc->udbs_base + UDBS_DB_OFFSET; 4455 udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT; /* pg offset */ 4456 eq->udb_qid = eq->cntxt_id & mask; /* id in page */ 4457 if (eq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE) 4458 clrbit(&eq->doorbells, DOORBELL_WCWR); 4459 else { 4460 udb += eq->udb_qid << UDBS_SEG_SHIFT; /* seg offset */ 4461 eq->udb_qid = 0; 4462 } 4463 eq->udb = (volatile void *)udb; 4464 } 4465 4466 eq->flags |= EQ_HW_ALLOCATED; 4467 return (0); 4468 } 4469 4470 static int 4471 free_eq_hwq(struct adapter *sc, struct vi_info *vi __unused, struct sge_eq *eq) 4472 { 4473 int rc; 4474 4475 MPASS(eq->flags & EQ_HW_ALLOCATED); 4476 4477 switch (eq->type) { 4478 case EQ_CTRL: 4479 rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); 4480 break; 4481 case EQ_ETH: 4482 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); 4483 break; 4484 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 4485 case EQ_OFLD: 4486 rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); 4487 break; 4488 #endif 4489 default: 4490 panic("%s: invalid eq type %d.", __func__, eq->type); 4491 } 4492 if (rc != 0) { 4493 CH_ERR(sc, "failed to free eq (type %d): %d\n", eq->type, rc); 4494 return (rc); 4495 } 4496 eq->flags &= ~EQ_HW_ALLOCATED; 4497 4498 return (0); 4499 } 4500 4501 static int 4502 alloc_wrq(struct adapter *sc, struct vi_info *vi, struct sge_wrq *wrq, 4503 struct sysctl_ctx_list *ctx, struct sysctl_oid *oid) 4504 { 4505 struct sge_eq *eq = &wrq->eq; 4506 int rc; 4507 4508 MPASS(!(eq->flags & EQ_SW_ALLOCATED)); 4509 4510 rc = alloc_eq(sc, eq, ctx, oid); 4511 if (rc) 4512 return (rc); 4513 MPASS(eq->flags & EQ_SW_ALLOCATED); 4514 /* Can't fail after this. */ 4515 4516 wrq->adapter = sc; 4517 TASK_INIT(&wrq->wrq_tx_task, 0, wrq_tx_drain, wrq); 4518 TAILQ_INIT(&wrq->incomplete_wrs); 4519 STAILQ_INIT(&wrq->wr_list); 4520 wrq->nwr_pending = 0; 4521 wrq->ndesc_needed = 0; 4522 add_wrq_sysctls(ctx, oid, wrq); 4523 4524 return (0); 4525 } 4526 4527 static void 4528 free_wrq(struct adapter *sc, struct sge_wrq *wrq) 4529 { 4530 free_eq(sc, &wrq->eq); 4531 MPASS(wrq->nwr_pending == 0); 4532 MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs)); 4533 MPASS(STAILQ_EMPTY(&wrq->wr_list)); 4534 bzero(wrq, sizeof(*wrq)); 4535 } 4536 4537 static void 4538 add_wrq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 4539 struct sge_wrq *wrq) 4540 { 4541 struct sysctl_oid_list *children; 4542 4543 if (ctx == NULL || oid == NULL) 4544 return; 4545 4546 children = SYSCTL_CHILDREN(oid); 4547 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_direct", CTLFLAG_RD, 4548 &wrq->tx_wrs_direct, "# of work requests (direct)"); 4549 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_copied", CTLFLAG_RD, 4550 &wrq->tx_wrs_copied, "# of work requests (copied)"); 4551 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_sspace", CTLFLAG_RD, 4552 &wrq->tx_wrs_ss, "# of work requests (copied from scratch space)"); 4553 } 4554 4555 /* 4556 * Idempotent. 4557 */ 4558 static int 4559 alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx) 4560 { 4561 int rc, iqidx; 4562 struct port_info *pi = vi->pi; 4563 struct adapter *sc = vi->adapter; 4564 struct sge_eq *eq = &txq->eq; 4565 struct txpkts *txp; 4566 char name[16]; 4567 struct sysctl_oid *oid; 4568 4569 if (!(eq->flags & EQ_SW_ALLOCATED)) { 4570 MPASS(!(eq->flags & EQ_HW_ALLOCATED)); 4571 4572 snprintf(name, sizeof(name), "%d", idx); 4573 oid = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(vi->txq_oid), 4574 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 4575 "tx queue"); 4576 4577 iqidx = vi->first_rxq + (idx % vi->nrxq); 4578 snprintf(name, sizeof(name), "%s txq%d", 4579 device_get_nameunit(vi->dev), idx); 4580 init_eq(sc, &txq->eq, EQ_ETH, vi->qsize_txq, pi->tx_chan, 4581 &sc->sge.rxq[iqidx].iq, name); 4582 4583 rc = mp_ring_alloc(&txq->r, eq->sidx, txq, eth_tx, 4584 can_resume_eth_tx, M_CXGBE, &eq->eq_lock, M_WAITOK); 4585 if (rc != 0) { 4586 CH_ERR(vi, "failed to allocate mp_ring for txq%d: %d\n", 4587 idx, rc); 4588 failed: 4589 sysctl_remove_oid(oid, 1, 1); 4590 return (rc); 4591 } 4592 4593 rc = alloc_eq(sc, eq, &vi->ctx, oid); 4594 if (rc) { 4595 CH_ERR(vi, "failed to allocate txq%d: %d\n", idx, rc); 4596 mp_ring_free(txq->r); 4597 goto failed; 4598 } 4599 MPASS(eq->flags & EQ_SW_ALLOCATED); 4600 /* Can't fail after this point. */ 4601 4602 TASK_INIT(&txq->tx_reclaim_task, 0, tx_reclaim, eq); 4603 txq->ifp = vi->ifp; 4604 txq->gl = sglist_alloc(TX_SGL_SEGS, M_WAITOK); 4605 txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE, 4606 M_ZERO | M_WAITOK); 4607 4608 add_txq_sysctls(vi, &vi->ctx, oid, txq); 4609 } 4610 4611 if (!(eq->flags & EQ_HW_ALLOCATED)) { 4612 MPASS(eq->flags & EQ_SW_ALLOCATED); 4613 rc = alloc_eq_hwq(sc, vi, eq); 4614 if (rc != 0) { 4615 CH_ERR(vi, "failed to create hw txq%d: %d\n", idx, rc); 4616 return (rc); 4617 } 4618 MPASS(eq->flags & EQ_HW_ALLOCATED); 4619 /* Can't fail after this point. */ 4620 4621 if (idx == 0) 4622 sc->sge.eq_base = eq->abs_id - eq->cntxt_id; 4623 else 4624 KASSERT(eq->cntxt_id + sc->sge.eq_base == eq->abs_id, 4625 ("eq_base mismatch")); 4626 KASSERT(sc->sge.eq_base == 0 || sc->flags & IS_VF, 4627 ("PF with non-zero eq_base")); 4628 4629 txp = &txq->txp; 4630 MPASS(nitems(txp->mb) >= sc->params.max_pkts_per_eth_tx_pkts_wr); 4631 txq->txp.max_npkt = min(nitems(txp->mb), 4632 sc->params.max_pkts_per_eth_tx_pkts_wr); 4633 if (vi->flags & TX_USES_VM_WR && !(sc->flags & IS_VF)) 4634 txq->txp.max_npkt--; 4635 4636 if (vi->flags & TX_USES_VM_WR) 4637 txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 4638 V_TXPKT_INTF(pi->tx_chan)); 4639 else 4640 txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 4641 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) | 4642 V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld)); 4643 4644 txq->tc_idx = -1; 4645 } 4646 4647 return (0); 4648 } 4649 4650 /* 4651 * Idempotent. 4652 */ 4653 static void 4654 free_txq(struct vi_info *vi, struct sge_txq *txq) 4655 { 4656 struct adapter *sc = vi->adapter; 4657 struct sge_eq *eq = &txq->eq; 4658 4659 if (eq->flags & EQ_HW_ALLOCATED) { 4660 MPASS(eq->flags & EQ_SW_ALLOCATED); 4661 free_eq_hwq(sc, NULL, eq); 4662 MPASS(!(eq->flags & EQ_HW_ALLOCATED)); 4663 } 4664 4665 if (eq->flags & EQ_SW_ALLOCATED) { 4666 MPASS(!(eq->flags & EQ_HW_ALLOCATED)); 4667 sglist_free(txq->gl); 4668 free(txq->sdesc, M_CXGBE); 4669 mp_ring_free(txq->r); 4670 free_eq(sc, eq); 4671 MPASS(!(eq->flags & EQ_SW_ALLOCATED)); 4672 bzero(txq, sizeof(*txq)); 4673 } 4674 } 4675 4676 static void 4677 add_txq_sysctls(struct vi_info *vi, struct sysctl_ctx_list *ctx, 4678 struct sysctl_oid *oid, struct sge_txq *txq) 4679 { 4680 struct adapter *sc; 4681 struct sysctl_oid_list *children; 4682 4683 if (ctx == NULL || oid == NULL) 4684 return; 4685 4686 sc = vi->adapter; 4687 children = SYSCTL_CHILDREN(oid); 4688 4689 mp_ring_sysctls(txq->r, ctx, children); 4690 4691 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tc", 4692 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, txq - sc->sge.txq, 4693 sysctl_tc, "I", "traffic class (-1 means none)"); 4694 4695 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, 4696 &txq->txcsum, "# of times hardware assisted with checksum"); 4697 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vlan_insertion", CTLFLAG_RD, 4698 &txq->vlan_insertion, "# of times hardware inserted 802.1Q tag"); 4699 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, 4700 &txq->tso_wrs, "# of TSO work requests"); 4701 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, 4702 &txq->imm_wrs, "# of work requests with immediate data"); 4703 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, 4704 &txq->sgl_wrs, "# of work requests with direct SGL"); 4705 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, 4706 &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); 4707 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts0_wrs", CTLFLAG_RD, 4708 &txq->txpkts0_wrs, "# of txpkts (type 0) work requests"); 4709 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts1_wrs", CTLFLAG_RD, 4710 &txq->txpkts1_wrs, "# of txpkts (type 1) work requests"); 4711 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts0_pkts", CTLFLAG_RD, 4712 &txq->txpkts0_pkts, 4713 "# of frames tx'd using type0 txpkts work requests"); 4714 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts1_pkts", CTLFLAG_RD, 4715 &txq->txpkts1_pkts, 4716 "# of frames tx'd using type1 txpkts work requests"); 4717 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "txpkts_flush", CTLFLAG_RD, 4718 &txq->txpkts_flush, 4719 "# of times txpkts had to be flushed out by an egress-update"); 4720 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "raw_wrs", CTLFLAG_RD, 4721 &txq->raw_wrs, "# of raw work requests (non-packets)"); 4722 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vxlan_tso_wrs", CTLFLAG_RD, 4723 &txq->vxlan_tso_wrs, "# of VXLAN TSO work requests"); 4724 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "vxlan_txcsum", CTLFLAG_RD, 4725 &txq->vxlan_txcsum, 4726 "# of times hardware assisted with inner checksums (VXLAN)"); 4727 4728 #ifdef KERN_TLS 4729 if (is_ktls(sc)) { 4730 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_records", 4731 CTLFLAG_RD, &txq->kern_tls_records, 4732 "# of NIC TLS records transmitted"); 4733 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_short", 4734 CTLFLAG_RD, &txq->kern_tls_short, 4735 "# of short NIC TLS records transmitted"); 4736 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_partial", 4737 CTLFLAG_RD, &txq->kern_tls_partial, 4738 "# of partial NIC TLS records transmitted"); 4739 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_full", 4740 CTLFLAG_RD, &txq->kern_tls_full, 4741 "# of full NIC TLS records transmitted"); 4742 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_octets", 4743 CTLFLAG_RD, &txq->kern_tls_octets, 4744 "# of payload octets in transmitted NIC TLS records"); 4745 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_waste", 4746 CTLFLAG_RD, &txq->kern_tls_waste, 4747 "# of octets DMAd but not transmitted in NIC TLS records"); 4748 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_options", 4749 CTLFLAG_RD, &txq->kern_tls_options, 4750 "# of NIC TLS options-only packets transmitted"); 4751 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_header", 4752 CTLFLAG_RD, &txq->kern_tls_header, 4753 "# of NIC TLS header-only packets transmitted"); 4754 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_fin", 4755 CTLFLAG_RD, &txq->kern_tls_fin, 4756 "# of NIC TLS FIN-only packets transmitted"); 4757 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_fin_short", 4758 CTLFLAG_RD, &txq->kern_tls_fin_short, 4759 "# of NIC TLS padded FIN packets on short TLS records"); 4760 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_cbc", 4761 CTLFLAG_RD, &txq->kern_tls_cbc, 4762 "# of NIC TLS sessions using AES-CBC"); 4763 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "kern_tls_gcm", 4764 CTLFLAG_RD, &txq->kern_tls_gcm, 4765 "# of NIC TLS sessions using AES-GCM"); 4766 } 4767 #endif 4768 } 4769 4770 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 4771 /* 4772 * Idempotent. 4773 */ 4774 static int 4775 alloc_ofld_txq(struct vi_info *vi, struct sge_ofld_txq *ofld_txq, int idx) 4776 { 4777 struct sysctl_oid *oid; 4778 struct port_info *pi = vi->pi; 4779 struct adapter *sc = vi->adapter; 4780 struct sge_eq *eq = &ofld_txq->wrq.eq; 4781 int rc, iqidx; 4782 char name[16]; 4783 4784 MPASS(idx >= 0); 4785 MPASS(idx < vi->nofldtxq); 4786 4787 if (!(eq->flags & EQ_SW_ALLOCATED)) { 4788 snprintf(name, sizeof(name), "%d", idx); 4789 oid = SYSCTL_ADD_NODE(&vi->ctx, 4790 SYSCTL_CHILDREN(vi->ofld_txq_oid), OID_AUTO, name, 4791 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "offload tx queue"); 4792 4793 snprintf(name, sizeof(name), "%s ofld_txq%d", 4794 device_get_nameunit(vi->dev), idx); 4795 if (vi->nofldrxq > 0) { 4796 iqidx = vi->first_ofld_rxq + (idx % vi->nofldrxq); 4797 init_eq(sc, eq, EQ_OFLD, vi->qsize_txq, pi->tx_chan, 4798 &sc->sge.ofld_rxq[iqidx].iq, name); 4799 } else { 4800 iqidx = vi->first_rxq + (idx % vi->nrxq); 4801 init_eq(sc, eq, EQ_OFLD, vi->qsize_txq, pi->tx_chan, 4802 &sc->sge.rxq[iqidx].iq, name); 4803 } 4804 4805 rc = alloc_wrq(sc, vi, &ofld_txq->wrq, &vi->ctx, oid); 4806 if (rc != 0) { 4807 CH_ERR(vi, "failed to allocate ofld_txq%d: %d\n", idx, 4808 rc); 4809 sysctl_remove_oid(oid, 1, 1); 4810 return (rc); 4811 } 4812 MPASS(eq->flags & EQ_SW_ALLOCATED); 4813 /* Can't fail after this point. */ 4814 4815 ofld_txq->tx_iscsi_pdus = counter_u64_alloc(M_WAITOK); 4816 ofld_txq->tx_iscsi_octets = counter_u64_alloc(M_WAITOK); 4817 ofld_txq->tx_iscsi_iso_wrs = counter_u64_alloc(M_WAITOK); 4818 ofld_txq->tx_toe_tls_records = counter_u64_alloc(M_WAITOK); 4819 ofld_txq->tx_toe_tls_octets = counter_u64_alloc(M_WAITOK); 4820 add_ofld_txq_sysctls(&vi->ctx, oid, ofld_txq); 4821 } 4822 4823 if (!(eq->flags & EQ_HW_ALLOCATED)) { 4824 rc = alloc_eq_hwq(sc, vi, eq); 4825 if (rc != 0) { 4826 CH_ERR(vi, "failed to create hw ofld_txq%d: %d\n", idx, 4827 rc); 4828 return (rc); 4829 } 4830 MPASS(eq->flags & EQ_HW_ALLOCATED); 4831 } 4832 4833 return (0); 4834 } 4835 4836 /* 4837 * Idempotent. 4838 */ 4839 static void 4840 free_ofld_txq(struct vi_info *vi, struct sge_ofld_txq *ofld_txq) 4841 { 4842 struct adapter *sc = vi->adapter; 4843 struct sge_eq *eq = &ofld_txq->wrq.eq; 4844 4845 if (eq->flags & EQ_HW_ALLOCATED) { 4846 MPASS(eq->flags & EQ_SW_ALLOCATED); 4847 free_eq_hwq(sc, NULL, eq); 4848 MPASS(!(eq->flags & EQ_HW_ALLOCATED)); 4849 } 4850 4851 if (eq->flags & EQ_SW_ALLOCATED) { 4852 MPASS(!(eq->flags & EQ_HW_ALLOCATED)); 4853 counter_u64_free(ofld_txq->tx_iscsi_pdus); 4854 counter_u64_free(ofld_txq->tx_iscsi_octets); 4855 counter_u64_free(ofld_txq->tx_iscsi_iso_wrs); 4856 counter_u64_free(ofld_txq->tx_toe_tls_records); 4857 counter_u64_free(ofld_txq->tx_toe_tls_octets); 4858 free_wrq(sc, &ofld_txq->wrq); 4859 MPASS(!(eq->flags & EQ_SW_ALLOCATED)); 4860 bzero(ofld_txq, sizeof(*ofld_txq)); 4861 } 4862 } 4863 4864 static void 4865 add_ofld_txq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 4866 struct sge_ofld_txq *ofld_txq) 4867 { 4868 struct sysctl_oid_list *children; 4869 4870 if (ctx == NULL || oid == NULL) 4871 return; 4872 4873 children = SYSCTL_CHILDREN(oid); 4874 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_iscsi_pdus", 4875 CTLFLAG_RD, &ofld_txq->tx_iscsi_pdus, 4876 "# of iSCSI PDUs transmitted"); 4877 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_iscsi_octets", 4878 CTLFLAG_RD, &ofld_txq->tx_iscsi_octets, 4879 "# of payload octets in transmitted iSCSI PDUs"); 4880 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_iscsi_iso_wrs", 4881 CTLFLAG_RD, &ofld_txq->tx_iscsi_iso_wrs, 4882 "# of iSCSI segmentation offload work requests"); 4883 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_toe_tls_records", 4884 CTLFLAG_RD, &ofld_txq->tx_toe_tls_records, 4885 "# of TOE TLS records transmitted"); 4886 SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_toe_tls_octets", 4887 CTLFLAG_RD, &ofld_txq->tx_toe_tls_octets, 4888 "# of payload octets in transmitted TOE TLS records"); 4889 } 4890 #endif 4891 4892 static void 4893 oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) 4894 { 4895 bus_addr_t *ba = arg; 4896 4897 KASSERT(nseg == 1, 4898 ("%s meant for single segment mappings only.", __func__)); 4899 4900 *ba = error ? 0 : segs->ds_addr; 4901 } 4902 4903 static inline void 4904 ring_fl_db(struct adapter *sc, struct sge_fl *fl) 4905 { 4906 uint32_t n, v; 4907 4908 n = IDXDIFF(fl->pidx >> 3, fl->dbidx, fl->sidx); 4909 MPASS(n > 0); 4910 4911 wmb(); 4912 v = fl->dbval | V_PIDX(n); 4913 if (fl->udb) 4914 *fl->udb = htole32(v); 4915 else 4916 t4_write_reg(sc, sc->sge_kdoorbell_reg, v); 4917 IDXINCR(fl->dbidx, n, fl->sidx); 4918 } 4919 4920 /* 4921 * Fills up the freelist by allocating up to 'n' buffers. Buffers that are 4922 * recycled do not count towards this allocation budget. 4923 * 4924 * Returns non-zero to indicate that this freelist should be added to the list 4925 * of starving freelists. 4926 */ 4927 static int 4928 refill_fl(struct adapter *sc, struct sge_fl *fl, int n) 4929 { 4930 __be64 *d; 4931 struct fl_sdesc *sd; 4932 uintptr_t pa; 4933 caddr_t cl; 4934 struct rx_buf_info *rxb; 4935 struct cluster_metadata *clm; 4936 uint16_t max_pidx, zidx = fl->zidx; 4937 uint16_t hw_cidx = fl->hw_cidx; /* stable snapshot */ 4938 4939 FL_LOCK_ASSERT_OWNED(fl); 4940 4941 /* 4942 * We always stop at the beginning of the hardware descriptor that's just 4943 * before the one with the hw cidx. This is to avoid hw pidx = hw cidx, 4944 * which would mean an empty freelist to the chip. 4945 */ 4946 max_pidx = __predict_false(hw_cidx == 0) ? fl->sidx - 1 : hw_cidx - 1; 4947 if (fl->pidx == max_pidx * 8) 4948 return (0); 4949 4950 d = &fl->desc[fl->pidx]; 4951 sd = &fl->sdesc[fl->pidx]; 4952 rxb = &sc->sge.rx_buf_info[zidx]; 4953 4954 while (n > 0) { 4955 4956 if (sd->cl != NULL) { 4957 4958 if (sd->nmbuf == 0) { 4959 /* 4960 * Fast recycle without involving any atomics on 4961 * the cluster's metadata (if the cluster has 4962 * metadata). This happens when all frames 4963 * received in the cluster were small enough to 4964 * fit within a single mbuf each. 4965 */ 4966 fl->cl_fast_recycled++; 4967 goto recycled; 4968 } 4969 4970 /* 4971 * Cluster is guaranteed to have metadata. Clusters 4972 * without metadata always take the fast recycle path 4973 * when they're recycled. 4974 */ 4975 clm = cl_metadata(sd); 4976 MPASS(clm != NULL); 4977 4978 if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { 4979 fl->cl_recycled++; 4980 counter_u64_add(extfree_rels, 1); 4981 goto recycled; 4982 } 4983 sd->cl = NULL; /* gave up my reference */ 4984 } 4985 MPASS(sd->cl == NULL); 4986 cl = uma_zalloc(rxb->zone, M_NOWAIT); 4987 if (__predict_false(cl == NULL)) { 4988 if (zidx != fl->safe_zidx) { 4989 zidx = fl->safe_zidx; 4990 rxb = &sc->sge.rx_buf_info[zidx]; 4991 cl = uma_zalloc(rxb->zone, M_NOWAIT); 4992 } 4993 if (cl == NULL) 4994 break; 4995 } 4996 fl->cl_allocated++; 4997 n--; 4998 4999 pa = pmap_kextract((vm_offset_t)cl); 5000 sd->cl = cl; 5001 sd->zidx = zidx; 5002 5003 if (fl->flags & FL_BUF_PACKING) { 5004 *d = htobe64(pa | rxb->hwidx2); 5005 sd->moff = rxb->size2; 5006 } else { 5007 *d = htobe64(pa | rxb->hwidx1); 5008 sd->moff = 0; 5009 } 5010 recycled: 5011 sd->nmbuf = 0; 5012 d++; 5013 sd++; 5014 if (__predict_false((++fl->pidx & 7) == 0)) { 5015 uint16_t pidx = fl->pidx >> 3; 5016 5017 if (__predict_false(pidx == fl->sidx)) { 5018 fl->pidx = 0; 5019 pidx = 0; 5020 sd = fl->sdesc; 5021 d = fl->desc; 5022 } 5023 if (n < 8 || pidx == max_pidx) 5024 break; 5025 5026 if (IDXDIFF(pidx, fl->dbidx, fl->sidx) >= 4) 5027 ring_fl_db(sc, fl); 5028 } 5029 } 5030 5031 if ((fl->pidx >> 3) != fl->dbidx) 5032 ring_fl_db(sc, fl); 5033 5034 return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING)); 5035 } 5036 5037 /* 5038 * Attempt to refill all starving freelists. 5039 */ 5040 static void 5041 refill_sfl(void *arg) 5042 { 5043 struct adapter *sc = arg; 5044 struct sge_fl *fl, *fl_temp; 5045 5046 mtx_assert(&sc->sfl_lock, MA_OWNED); 5047 TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) { 5048 FL_LOCK(fl); 5049 refill_fl(sc, fl, 64); 5050 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) { 5051 TAILQ_REMOVE(&sc->sfl, fl, link); 5052 fl->flags &= ~FL_STARVING; 5053 } 5054 FL_UNLOCK(fl); 5055 } 5056 5057 if (!TAILQ_EMPTY(&sc->sfl)) 5058 callout_schedule(&sc->sfl_callout, hz / 5); 5059 } 5060 5061 /* 5062 * Release the driver's reference on all buffers in the given freelist. Buffers 5063 * with kernel references cannot be freed and will prevent the driver from being 5064 * unloaded safely. 5065 */ 5066 void 5067 free_fl_buffers(struct adapter *sc, struct sge_fl *fl) 5068 { 5069 struct fl_sdesc *sd; 5070 struct cluster_metadata *clm; 5071 int i; 5072 5073 sd = fl->sdesc; 5074 for (i = 0; i < fl->sidx * 8; i++, sd++) { 5075 if (sd->cl == NULL) 5076 continue; 5077 5078 if (sd->nmbuf == 0) 5079 uma_zfree(sc->sge.rx_buf_info[sd->zidx].zone, sd->cl); 5080 else if (fl->flags & FL_BUF_PACKING) { 5081 clm = cl_metadata(sd); 5082 if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { 5083 uma_zfree(sc->sge.rx_buf_info[sd->zidx].zone, 5084 sd->cl); 5085 counter_u64_add(extfree_rels, 1); 5086 } 5087 } 5088 sd->cl = NULL; 5089 } 5090 5091 if (fl->flags & FL_BUF_RESUME) { 5092 m_freem(fl->m0); 5093 fl->flags &= ~FL_BUF_RESUME; 5094 } 5095 } 5096 5097 static inline void 5098 get_pkt_gl(struct mbuf *m, struct sglist *gl) 5099 { 5100 int rc; 5101 5102 M_ASSERTPKTHDR(m); 5103 5104 sglist_reset(gl); 5105 rc = sglist_append_mbuf(gl, m); 5106 if (__predict_false(rc != 0)) { 5107 panic("%s: mbuf %p (%d segs) was vetted earlier but now fails " 5108 "with %d.", __func__, m, mbuf_nsegs(m), rc); 5109 } 5110 5111 KASSERT(gl->sg_nseg == mbuf_nsegs(m), 5112 ("%s: nsegs changed for mbuf %p from %d to %d", __func__, m, 5113 mbuf_nsegs(m), gl->sg_nseg)); 5114 #if 0 /* vm_wr not readily available here. */ 5115 KASSERT(gl->sg_nseg > 0 && gl->sg_nseg <= max_nsegs_allowed(m, vm_wr), 5116 ("%s: %d segments, should have been 1 <= nsegs <= %d", __func__, 5117 gl->sg_nseg, max_nsegs_allowed(m, vm_wr))); 5118 #endif 5119 } 5120 5121 /* 5122 * len16 for a txpkt WR with a GL. Includes the firmware work request header. 5123 */ 5124 static inline u_int 5125 txpkt_len16(u_int nsegs, const u_int extra) 5126 { 5127 u_int n; 5128 5129 MPASS(nsegs > 0); 5130 5131 nsegs--; /* first segment is part of ulptx_sgl */ 5132 n = extra + sizeof(struct fw_eth_tx_pkt_wr) + 5133 sizeof(struct cpl_tx_pkt_core) + 5134 sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 5135 5136 return (howmany(n, 16)); 5137 } 5138 5139 /* 5140 * len16 for a txpkt_vm WR with a GL. Includes the firmware work 5141 * request header. 5142 */ 5143 static inline u_int 5144 txpkt_vm_len16(u_int nsegs, const u_int extra) 5145 { 5146 u_int n; 5147 5148 MPASS(nsegs > 0); 5149 5150 nsegs--; /* first segment is part of ulptx_sgl */ 5151 n = extra + sizeof(struct fw_eth_tx_pkt_vm_wr) + 5152 sizeof(struct cpl_tx_pkt_core) + 5153 sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 5154 5155 return (howmany(n, 16)); 5156 } 5157 5158 static inline void 5159 calculate_mbuf_len16(struct mbuf *m, bool vm_wr) 5160 { 5161 const int lso = sizeof(struct cpl_tx_pkt_lso_core); 5162 const int tnl_lso = sizeof(struct cpl_tx_tnl_lso); 5163 5164 if (vm_wr) { 5165 if (needs_tso(m)) 5166 set_mbuf_len16(m, txpkt_vm_len16(mbuf_nsegs(m), lso)); 5167 else 5168 set_mbuf_len16(m, txpkt_vm_len16(mbuf_nsegs(m), 0)); 5169 return; 5170 } 5171 5172 if (needs_tso(m)) { 5173 if (needs_vxlan_tso(m)) 5174 set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), tnl_lso)); 5175 else 5176 set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), lso)); 5177 } else 5178 set_mbuf_len16(m, txpkt_len16(mbuf_nsegs(m), 0)); 5179 } 5180 5181 /* 5182 * len16 for a txpkts type 0 WR with a GL. Does not include the firmware work 5183 * request header. 5184 */ 5185 static inline u_int 5186 txpkts0_len16(u_int nsegs) 5187 { 5188 u_int n; 5189 5190 MPASS(nsegs > 0); 5191 5192 nsegs--; /* first segment is part of ulptx_sgl */ 5193 n = sizeof(struct ulp_txpkt) + sizeof(struct ulptx_idata) + 5194 sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) + 5195 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 5196 5197 return (howmany(n, 16)); 5198 } 5199 5200 /* 5201 * len16 for a txpkts type 1 WR with a GL. Does not include the firmware work 5202 * request header. 5203 */ 5204 static inline u_int 5205 txpkts1_len16(void) 5206 { 5207 u_int n; 5208 5209 n = sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl); 5210 5211 return (howmany(n, 16)); 5212 } 5213 5214 static inline u_int 5215 imm_payload(u_int ndesc) 5216 { 5217 u_int n; 5218 5219 n = ndesc * EQ_ESIZE - sizeof(struct fw_eth_tx_pkt_wr) - 5220 sizeof(struct cpl_tx_pkt_core); 5221 5222 return (n); 5223 } 5224 5225 static inline uint64_t 5226 csum_to_ctrl(struct adapter *sc, struct mbuf *m) 5227 { 5228 uint64_t ctrl; 5229 int csum_type, l2hlen, l3hlen; 5230 int x, y; 5231 static const int csum_types[3][2] = { 5232 {TX_CSUM_TCPIP, TX_CSUM_TCPIP6}, 5233 {TX_CSUM_UDPIP, TX_CSUM_UDPIP6}, 5234 {TX_CSUM_IP, 0} 5235 }; 5236 5237 M_ASSERTPKTHDR(m); 5238 5239 if (!needs_hwcsum(m)) 5240 return (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS); 5241 5242 MPASS(m->m_pkthdr.l2hlen >= ETHER_HDR_LEN); 5243 MPASS(m->m_pkthdr.l3hlen >= sizeof(struct ip)); 5244 5245 if (needs_vxlan_csum(m)) { 5246 MPASS(m->m_pkthdr.l4hlen > 0); 5247 MPASS(m->m_pkthdr.l5hlen > 0); 5248 MPASS(m->m_pkthdr.inner_l2hlen >= ETHER_HDR_LEN); 5249 MPASS(m->m_pkthdr.inner_l3hlen >= sizeof(struct ip)); 5250 5251 l2hlen = m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + 5252 m->m_pkthdr.l4hlen + m->m_pkthdr.l5hlen + 5253 m->m_pkthdr.inner_l2hlen - ETHER_HDR_LEN; 5254 l3hlen = m->m_pkthdr.inner_l3hlen; 5255 } else { 5256 l2hlen = m->m_pkthdr.l2hlen - ETHER_HDR_LEN; 5257 l3hlen = m->m_pkthdr.l3hlen; 5258 } 5259 5260 ctrl = 0; 5261 if (!needs_l3_csum(m)) 5262 ctrl |= F_TXPKT_IPCSUM_DIS; 5263 5264 if (m->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_INNER_IP_TCP | 5265 CSUM_IP6_TCP | CSUM_INNER_IP6_TCP)) 5266 x = 0; /* TCP */ 5267 else if (m->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_INNER_IP_UDP | 5268 CSUM_IP6_UDP | CSUM_INNER_IP6_UDP)) 5269 x = 1; /* UDP */ 5270 else 5271 x = 2; 5272 5273 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP | 5274 CSUM_INNER_IP | CSUM_INNER_IP_TCP | CSUM_INNER_IP_UDP)) 5275 y = 0; /* IPv4 */ 5276 else { 5277 MPASS(m->m_pkthdr.csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | 5278 CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_UDP)); 5279 y = 1; /* IPv6 */ 5280 } 5281 /* 5282 * needs_hwcsum returned true earlier so there must be some kind of 5283 * checksum to calculate. 5284 */ 5285 csum_type = csum_types[x][y]; 5286 MPASS(csum_type != 0); 5287 if (csum_type == TX_CSUM_IP) 5288 ctrl |= F_TXPKT_L4CSUM_DIS; 5289 ctrl |= V_TXPKT_CSUM_TYPE(csum_type) | V_TXPKT_IPHDR_LEN(l3hlen); 5290 if (chip_id(sc) <= CHELSIO_T5) 5291 ctrl |= V_TXPKT_ETHHDR_LEN(l2hlen); 5292 else 5293 ctrl |= V_T6_TXPKT_ETHHDR_LEN(l2hlen); 5294 5295 return (ctrl); 5296 } 5297 5298 static inline void * 5299 write_lso_cpl(void *cpl, struct mbuf *m0) 5300 { 5301 struct cpl_tx_pkt_lso_core *lso; 5302 uint32_t ctrl; 5303 5304 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 5305 m0->m_pkthdr.l4hlen > 0, 5306 ("%s: mbuf %p needs TSO but missing header lengths", 5307 __func__, m0)); 5308 5309 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | 5310 F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE | 5311 V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen - ETHER_HDR_LEN) >> 2) | 5312 V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) | 5313 V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); 5314 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 5315 ctrl |= F_LSO_IPV6; 5316 5317 lso = cpl; 5318 lso->lso_ctrl = htobe32(ctrl); 5319 lso->ipid_ofst = htobe16(0); 5320 lso->mss = htobe16(m0->m_pkthdr.tso_segsz); 5321 lso->seqno_offset = htobe32(0); 5322 lso->len = htobe32(m0->m_pkthdr.len); 5323 5324 return (lso + 1); 5325 } 5326 5327 static void * 5328 write_tnl_lso_cpl(void *cpl, struct mbuf *m0) 5329 { 5330 struct cpl_tx_tnl_lso *tnl_lso = cpl; 5331 uint32_t ctrl; 5332 5333 KASSERT(m0->m_pkthdr.inner_l2hlen > 0 && 5334 m0->m_pkthdr.inner_l3hlen > 0 && m0->m_pkthdr.inner_l4hlen > 0 && 5335 m0->m_pkthdr.inner_l5hlen > 0, 5336 ("%s: mbuf %p needs VXLAN_TSO but missing inner header lengths", 5337 __func__, m0)); 5338 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 5339 m0->m_pkthdr.l4hlen > 0 && m0->m_pkthdr.l5hlen > 0, 5340 ("%s: mbuf %p needs VXLAN_TSO but missing outer header lengths", 5341 __func__, m0)); 5342 5343 /* Outer headers. */ 5344 ctrl = V_CPL_TX_TNL_LSO_OPCODE(CPL_TX_TNL_LSO) | 5345 F_CPL_TX_TNL_LSO_FIRST | F_CPL_TX_TNL_LSO_LAST | 5346 V_CPL_TX_TNL_LSO_ETHHDRLENOUT( 5347 (m0->m_pkthdr.l2hlen - ETHER_HDR_LEN) >> 2) | 5348 V_CPL_TX_TNL_LSO_IPHDRLENOUT(m0->m_pkthdr.l3hlen >> 2) | 5349 F_CPL_TX_TNL_LSO_IPLENSETOUT; 5350 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 5351 ctrl |= F_CPL_TX_TNL_LSO_IPV6OUT; 5352 else { 5353 ctrl |= F_CPL_TX_TNL_LSO_IPHDRCHKOUT | 5354 F_CPL_TX_TNL_LSO_IPIDINCOUT; 5355 } 5356 tnl_lso->op_to_IpIdSplitOut = htobe32(ctrl); 5357 tnl_lso->IpIdOffsetOut = 0; 5358 tnl_lso->UdpLenSetOut_to_TnlHdrLen = 5359 htobe16(F_CPL_TX_TNL_LSO_UDPCHKCLROUT | 5360 F_CPL_TX_TNL_LSO_UDPLENSETOUT | 5361 V_CPL_TX_TNL_LSO_TNLHDRLEN(m0->m_pkthdr.l2hlen + 5362 m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen + 5363 m0->m_pkthdr.l5hlen) | 5364 V_CPL_TX_TNL_LSO_TNLTYPE(TX_TNL_TYPE_VXLAN)); 5365 tnl_lso->r1 = 0; 5366 5367 /* Inner headers. */ 5368 ctrl = V_CPL_TX_TNL_LSO_ETHHDRLEN( 5369 (m0->m_pkthdr.inner_l2hlen - ETHER_HDR_LEN) >> 2) | 5370 V_CPL_TX_TNL_LSO_IPHDRLEN(m0->m_pkthdr.inner_l3hlen >> 2) | 5371 V_CPL_TX_TNL_LSO_TCPHDRLEN(m0->m_pkthdr.inner_l4hlen >> 2); 5372 if (m0->m_pkthdr.inner_l3hlen == sizeof(struct ip6_hdr)) 5373 ctrl |= F_CPL_TX_TNL_LSO_IPV6; 5374 tnl_lso->Flow_to_TcpHdrLen = htobe32(ctrl); 5375 tnl_lso->IpIdOffset = 0; 5376 tnl_lso->IpIdSplit_to_Mss = 5377 htobe16(V_CPL_TX_TNL_LSO_MSS(m0->m_pkthdr.tso_segsz)); 5378 tnl_lso->TCPSeqOffset = 0; 5379 tnl_lso->EthLenOffset_Size = 5380 htobe32(V_CPL_TX_TNL_LSO_SIZE(m0->m_pkthdr.len)); 5381 5382 return (tnl_lso + 1); 5383 } 5384 5385 #define VM_TX_L2HDR_LEN 16 /* ethmacdst to vlantci */ 5386 5387 /* 5388 * Write a VM txpkt WR for this packet to the hardware descriptors, update the 5389 * software descriptor, and advance the pidx. It is guaranteed that enough 5390 * descriptors are available. 5391 * 5392 * The return value is the # of hardware descriptors used. 5393 */ 5394 static u_int 5395 write_txpkt_vm_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0) 5396 { 5397 struct sge_eq *eq; 5398 struct fw_eth_tx_pkt_vm_wr *wr; 5399 struct tx_sdesc *txsd; 5400 struct cpl_tx_pkt_core *cpl; 5401 uint32_t ctrl; /* used in many unrelated places */ 5402 uint64_t ctrl1; 5403 int len16, ndesc, pktlen; 5404 caddr_t dst; 5405 5406 TXQ_LOCK_ASSERT_OWNED(txq); 5407 M_ASSERTPKTHDR(m0); 5408 5409 len16 = mbuf_len16(m0); 5410 pktlen = m0->m_pkthdr.len; 5411 ctrl = sizeof(struct cpl_tx_pkt_core); 5412 if (needs_tso(m0)) 5413 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 5414 ndesc = tx_len16_to_desc(len16); 5415 5416 /* Firmware work request header */ 5417 eq = &txq->eq; 5418 wr = (void *)&eq->desc[eq->pidx]; 5419 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_VM_WR) | 5420 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 5421 5422 ctrl = V_FW_WR_LEN16(len16); 5423 wr->equiq_to_len16 = htobe32(ctrl); 5424 wr->r3[0] = 0; 5425 wr->r3[1] = 0; 5426 5427 /* 5428 * Copy over ethmacdst, ethmacsrc, ethtype, and vlantci. 5429 * vlantci is ignored unless the ethtype is 0x8100, so it's 5430 * simpler to always copy it rather than making it 5431 * conditional. Also, it seems that we do not have to set 5432 * vlantci or fake the ethtype when doing VLAN tag insertion. 5433 */ 5434 m_copydata(m0, 0, VM_TX_L2HDR_LEN, wr->ethmacdst); 5435 5436 if (needs_tso(m0)) { 5437 cpl = write_lso_cpl(wr + 1, m0); 5438 txq->tso_wrs++; 5439 } else 5440 cpl = (void *)(wr + 1); 5441 5442 /* Checksum offload */ 5443 ctrl1 = csum_to_ctrl(sc, m0); 5444 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) 5445 txq->txcsum++; /* some hardware assistance provided */ 5446 5447 /* VLAN tag insertion */ 5448 if (needs_vlan_insertion(m0)) { 5449 ctrl1 |= F_TXPKT_VLAN_VLD | 5450 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 5451 txq->vlan_insertion++; 5452 } 5453 5454 /* CPL header */ 5455 cpl->ctrl0 = txq->cpl_ctrl0; 5456 cpl->pack = 0; 5457 cpl->len = htobe16(pktlen); 5458 cpl->ctrl1 = htobe64(ctrl1); 5459 5460 /* SGL */ 5461 dst = (void *)(cpl + 1); 5462 5463 /* 5464 * A packet using TSO will use up an entire descriptor for the 5465 * firmware work request header, LSO CPL, and TX_PKT_XT CPL. 5466 * If this descriptor is the last descriptor in the ring, wrap 5467 * around to the front of the ring explicitly for the start of 5468 * the sgl. 5469 */ 5470 if (dst == (void *)&eq->desc[eq->sidx]) { 5471 dst = (void *)&eq->desc[0]; 5472 write_gl_to_txd(txq, m0, &dst, 0); 5473 } else 5474 write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); 5475 txq->sgl_wrs++; 5476 txq->txpkt_wrs++; 5477 5478 txsd = &txq->sdesc[eq->pidx]; 5479 txsd->m = m0; 5480 txsd->desc_used = ndesc; 5481 5482 return (ndesc); 5483 } 5484 5485 /* 5486 * Write a raw WR to the hardware descriptors, update the software 5487 * descriptor, and advance the pidx. It is guaranteed that enough 5488 * descriptors are available. 5489 * 5490 * The return value is the # of hardware descriptors used. 5491 */ 5492 static u_int 5493 write_raw_wr(struct sge_txq *txq, void *wr, struct mbuf *m0, u_int available) 5494 { 5495 struct sge_eq *eq = &txq->eq; 5496 struct tx_sdesc *txsd; 5497 struct mbuf *m; 5498 caddr_t dst; 5499 int len16, ndesc; 5500 5501 len16 = mbuf_len16(m0); 5502 ndesc = tx_len16_to_desc(len16); 5503 MPASS(ndesc <= available); 5504 5505 dst = wr; 5506 for (m = m0; m != NULL; m = m->m_next) 5507 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 5508 5509 txq->raw_wrs++; 5510 5511 txsd = &txq->sdesc[eq->pidx]; 5512 txsd->m = m0; 5513 txsd->desc_used = ndesc; 5514 5515 return (ndesc); 5516 } 5517 5518 /* 5519 * Write a txpkt WR for this packet to the hardware descriptors, update the 5520 * software descriptor, and advance the pidx. It is guaranteed that enough 5521 * descriptors are available. 5522 * 5523 * The return value is the # of hardware descriptors used. 5524 */ 5525 static u_int 5526 write_txpkt_wr(struct adapter *sc, struct sge_txq *txq, struct mbuf *m0, 5527 u_int available) 5528 { 5529 struct sge_eq *eq; 5530 struct fw_eth_tx_pkt_wr *wr; 5531 struct tx_sdesc *txsd; 5532 struct cpl_tx_pkt_core *cpl; 5533 uint32_t ctrl; /* used in many unrelated places */ 5534 uint64_t ctrl1; 5535 int len16, ndesc, pktlen, nsegs; 5536 caddr_t dst; 5537 5538 TXQ_LOCK_ASSERT_OWNED(txq); 5539 M_ASSERTPKTHDR(m0); 5540 5541 len16 = mbuf_len16(m0); 5542 nsegs = mbuf_nsegs(m0); 5543 pktlen = m0->m_pkthdr.len; 5544 ctrl = sizeof(struct cpl_tx_pkt_core); 5545 if (needs_tso(m0)) { 5546 if (needs_vxlan_tso(m0)) 5547 ctrl += sizeof(struct cpl_tx_tnl_lso); 5548 else 5549 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 5550 } else if (!(mbuf_cflags(m0) & MC_NOMAP) && pktlen <= imm_payload(2) && 5551 available >= 2) { 5552 /* Immediate data. Recalculate len16 and set nsegs to 0. */ 5553 ctrl += pktlen; 5554 len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + 5555 sizeof(struct cpl_tx_pkt_core) + pktlen, 16); 5556 nsegs = 0; 5557 } 5558 ndesc = tx_len16_to_desc(len16); 5559 MPASS(ndesc <= available); 5560 5561 /* Firmware work request header */ 5562 eq = &txq->eq; 5563 wr = (void *)&eq->desc[eq->pidx]; 5564 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | 5565 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 5566 5567 ctrl = V_FW_WR_LEN16(len16); 5568 wr->equiq_to_len16 = htobe32(ctrl); 5569 wr->r3 = 0; 5570 5571 if (needs_tso(m0)) { 5572 if (needs_vxlan_tso(m0)) { 5573 cpl = write_tnl_lso_cpl(wr + 1, m0); 5574 txq->vxlan_tso_wrs++; 5575 } else { 5576 cpl = write_lso_cpl(wr + 1, m0); 5577 txq->tso_wrs++; 5578 } 5579 } else 5580 cpl = (void *)(wr + 1); 5581 5582 /* Checksum offload */ 5583 ctrl1 = csum_to_ctrl(sc, m0); 5584 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) { 5585 /* some hardware assistance provided */ 5586 if (needs_vxlan_csum(m0)) 5587 txq->vxlan_txcsum++; 5588 else 5589 txq->txcsum++; 5590 } 5591 5592 /* VLAN tag insertion */ 5593 if (needs_vlan_insertion(m0)) { 5594 ctrl1 |= F_TXPKT_VLAN_VLD | 5595 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 5596 txq->vlan_insertion++; 5597 } 5598 5599 /* CPL header */ 5600 cpl->ctrl0 = txq->cpl_ctrl0; 5601 cpl->pack = 0; 5602 cpl->len = htobe16(pktlen); 5603 cpl->ctrl1 = htobe64(ctrl1); 5604 5605 /* SGL */ 5606 dst = (void *)(cpl + 1); 5607 if (__predict_false((uintptr_t)dst == (uintptr_t)&eq->desc[eq->sidx])) 5608 dst = (caddr_t)&eq->desc[0]; 5609 if (nsegs > 0) { 5610 5611 write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); 5612 txq->sgl_wrs++; 5613 } else { 5614 struct mbuf *m; 5615 5616 for (m = m0; m != NULL; m = m->m_next) { 5617 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 5618 #ifdef INVARIANTS 5619 pktlen -= m->m_len; 5620 #endif 5621 } 5622 #ifdef INVARIANTS 5623 KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen)); 5624 #endif 5625 txq->imm_wrs++; 5626 } 5627 5628 txq->txpkt_wrs++; 5629 5630 txsd = &txq->sdesc[eq->pidx]; 5631 txsd->m = m0; 5632 txsd->desc_used = ndesc; 5633 5634 return (ndesc); 5635 } 5636 5637 static inline bool 5638 cmp_l2hdr(struct txpkts *txp, struct mbuf *m) 5639 { 5640 int len; 5641 5642 MPASS(txp->npkt > 0); 5643 MPASS(m->m_len >= VM_TX_L2HDR_LEN); 5644 5645 if (txp->ethtype == be16toh(ETHERTYPE_VLAN)) 5646 len = VM_TX_L2HDR_LEN; 5647 else 5648 len = sizeof(struct ether_header); 5649 5650 return (memcmp(m->m_data, &txp->ethmacdst[0], len) != 0); 5651 } 5652 5653 static inline void 5654 save_l2hdr(struct txpkts *txp, struct mbuf *m) 5655 { 5656 MPASS(m->m_len >= VM_TX_L2HDR_LEN); 5657 5658 memcpy(&txp->ethmacdst[0], mtod(m, const void *), VM_TX_L2HDR_LEN); 5659 } 5660 5661 static int 5662 add_to_txpkts_vf(struct adapter *sc, struct sge_txq *txq, struct mbuf *m, 5663 int avail, bool *send) 5664 { 5665 struct txpkts *txp = &txq->txp; 5666 5667 /* Cannot have TSO and coalesce at the same time. */ 5668 if (cannot_use_txpkts(m)) { 5669 cannot_coalesce: 5670 *send = txp->npkt > 0; 5671 return (EINVAL); 5672 } 5673 5674 /* VF allows coalescing of type 1 (1 GL) only */ 5675 if (mbuf_nsegs(m) > 1) 5676 goto cannot_coalesce; 5677 5678 *send = false; 5679 if (txp->npkt > 0) { 5680 MPASS(tx_len16_to_desc(txp->len16) <= avail); 5681 MPASS(txp->npkt < txp->max_npkt); 5682 MPASS(txp->wr_type == 1); /* VF supports type 1 only */ 5683 5684 if (tx_len16_to_desc(txp->len16 + txpkts1_len16()) > avail) { 5685 retry_after_send: 5686 *send = true; 5687 return (EAGAIN); 5688 } 5689 if (m->m_pkthdr.len + txp->plen > 65535) 5690 goto retry_after_send; 5691 if (cmp_l2hdr(txp, m)) 5692 goto retry_after_send; 5693 5694 txp->len16 += txpkts1_len16(); 5695 txp->plen += m->m_pkthdr.len; 5696 txp->mb[txp->npkt++] = m; 5697 if (txp->npkt == txp->max_npkt) 5698 *send = true; 5699 } else { 5700 txp->len16 = howmany(sizeof(struct fw_eth_tx_pkts_vm_wr), 16) + 5701 txpkts1_len16(); 5702 if (tx_len16_to_desc(txp->len16) > avail) 5703 goto cannot_coalesce; 5704 txp->npkt = 1; 5705 txp->wr_type = 1; 5706 txp->plen = m->m_pkthdr.len; 5707 txp->mb[0] = m; 5708 save_l2hdr(txp, m); 5709 } 5710 return (0); 5711 } 5712 5713 static int 5714 add_to_txpkts_pf(struct adapter *sc, struct sge_txq *txq, struct mbuf *m, 5715 int avail, bool *send) 5716 { 5717 struct txpkts *txp = &txq->txp; 5718 int nsegs; 5719 5720 MPASS(!(sc->flags & IS_VF)); 5721 5722 /* Cannot have TSO and coalesce at the same time. */ 5723 if (cannot_use_txpkts(m)) { 5724 cannot_coalesce: 5725 *send = txp->npkt > 0; 5726 return (EINVAL); 5727 } 5728 5729 *send = false; 5730 nsegs = mbuf_nsegs(m); 5731 if (txp->npkt == 0) { 5732 if (m->m_pkthdr.len > 65535) 5733 goto cannot_coalesce; 5734 if (nsegs > 1) { 5735 txp->wr_type = 0; 5736 txp->len16 = 5737 howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + 5738 txpkts0_len16(nsegs); 5739 } else { 5740 txp->wr_type = 1; 5741 txp->len16 = 5742 howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + 5743 txpkts1_len16(); 5744 } 5745 if (tx_len16_to_desc(txp->len16) > avail) 5746 goto cannot_coalesce; 5747 txp->npkt = 1; 5748 txp->plen = m->m_pkthdr.len; 5749 txp->mb[0] = m; 5750 } else { 5751 MPASS(tx_len16_to_desc(txp->len16) <= avail); 5752 MPASS(txp->npkt < txp->max_npkt); 5753 5754 if (m->m_pkthdr.len + txp->plen > 65535) { 5755 retry_after_send: 5756 *send = true; 5757 return (EAGAIN); 5758 } 5759 5760 MPASS(txp->wr_type == 0 || txp->wr_type == 1); 5761 if (txp->wr_type == 0) { 5762 if (tx_len16_to_desc(txp->len16 + 5763 txpkts0_len16(nsegs)) > min(avail, SGE_MAX_WR_NDESC)) 5764 goto retry_after_send; 5765 txp->len16 += txpkts0_len16(nsegs); 5766 } else { 5767 if (nsegs != 1) 5768 goto retry_after_send; 5769 if (tx_len16_to_desc(txp->len16 + txpkts1_len16()) > 5770 avail) 5771 goto retry_after_send; 5772 txp->len16 += txpkts1_len16(); 5773 } 5774 5775 txp->plen += m->m_pkthdr.len; 5776 txp->mb[txp->npkt++] = m; 5777 if (txp->npkt == txp->max_npkt) 5778 *send = true; 5779 } 5780 return (0); 5781 } 5782 5783 /* 5784 * Write a txpkts WR for the packets in txp to the hardware descriptors, update 5785 * the software descriptor, and advance the pidx. It is guaranteed that enough 5786 * descriptors are available. 5787 * 5788 * The return value is the # of hardware descriptors used. 5789 */ 5790 static u_int 5791 write_txpkts_wr(struct adapter *sc, struct sge_txq *txq) 5792 { 5793 const struct txpkts *txp = &txq->txp; 5794 struct sge_eq *eq = &txq->eq; 5795 struct fw_eth_tx_pkts_wr *wr; 5796 struct tx_sdesc *txsd; 5797 struct cpl_tx_pkt_core *cpl; 5798 uint64_t ctrl1; 5799 int ndesc, i, checkwrap; 5800 struct mbuf *m, *last; 5801 void *flitp; 5802 5803 TXQ_LOCK_ASSERT_OWNED(txq); 5804 MPASS(txp->npkt > 0); 5805 MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16)); 5806 5807 wr = (void *)&eq->desc[eq->pidx]; 5808 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); 5809 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(txp->len16)); 5810 wr->plen = htobe16(txp->plen); 5811 wr->npkt = txp->npkt; 5812 wr->r3 = 0; 5813 wr->type = txp->wr_type; 5814 flitp = wr + 1; 5815 5816 /* 5817 * At this point we are 16B into a hardware descriptor. If checkwrap is 5818 * set then we know the WR is going to wrap around somewhere. We'll 5819 * check for that at appropriate points. 5820 */ 5821 ndesc = tx_len16_to_desc(txp->len16); 5822 last = NULL; 5823 checkwrap = eq->sidx - ndesc < eq->pidx; 5824 for (i = 0; i < txp->npkt; i++) { 5825 m = txp->mb[i]; 5826 if (txp->wr_type == 0) { 5827 struct ulp_txpkt *ulpmc; 5828 struct ulptx_idata *ulpsc; 5829 5830 /* ULP master command */ 5831 ulpmc = flitp; 5832 ulpmc->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) | 5833 V_ULP_TXPKT_DEST(0) | V_ULP_TXPKT_FID(eq->iqid)); 5834 ulpmc->len = htobe32(txpkts0_len16(mbuf_nsegs(m))); 5835 5836 /* ULP subcommand */ 5837 ulpsc = (void *)(ulpmc + 1); 5838 ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) | 5839 F_ULP_TX_SC_MORE); 5840 ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core)); 5841 5842 cpl = (void *)(ulpsc + 1); 5843 if (checkwrap && 5844 (uintptr_t)cpl == (uintptr_t)&eq->desc[eq->sidx]) 5845 cpl = (void *)&eq->desc[0]; 5846 } else { 5847 cpl = flitp; 5848 } 5849 5850 /* Checksum offload */ 5851 ctrl1 = csum_to_ctrl(sc, m); 5852 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) { 5853 /* some hardware assistance provided */ 5854 if (needs_vxlan_csum(m)) 5855 txq->vxlan_txcsum++; 5856 else 5857 txq->txcsum++; 5858 } 5859 5860 /* VLAN tag insertion */ 5861 if (needs_vlan_insertion(m)) { 5862 ctrl1 |= F_TXPKT_VLAN_VLD | 5863 V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 5864 txq->vlan_insertion++; 5865 } 5866 5867 /* CPL header */ 5868 cpl->ctrl0 = txq->cpl_ctrl0; 5869 cpl->pack = 0; 5870 cpl->len = htobe16(m->m_pkthdr.len); 5871 cpl->ctrl1 = htobe64(ctrl1); 5872 5873 flitp = cpl + 1; 5874 if (checkwrap && 5875 (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx]) 5876 flitp = (void *)&eq->desc[0]; 5877 5878 write_gl_to_txd(txq, m, (caddr_t *)(&flitp), checkwrap); 5879 5880 if (last != NULL) 5881 last->m_nextpkt = m; 5882 last = m; 5883 } 5884 5885 txq->sgl_wrs++; 5886 if (txp->wr_type == 0) { 5887 txq->txpkts0_pkts += txp->npkt; 5888 txq->txpkts0_wrs++; 5889 } else { 5890 txq->txpkts1_pkts += txp->npkt; 5891 txq->txpkts1_wrs++; 5892 } 5893 5894 txsd = &txq->sdesc[eq->pidx]; 5895 txsd->m = txp->mb[0]; 5896 txsd->desc_used = ndesc; 5897 5898 return (ndesc); 5899 } 5900 5901 static u_int 5902 write_txpkts_vm_wr(struct adapter *sc, struct sge_txq *txq) 5903 { 5904 const struct txpkts *txp = &txq->txp; 5905 struct sge_eq *eq = &txq->eq; 5906 struct fw_eth_tx_pkts_vm_wr *wr; 5907 struct tx_sdesc *txsd; 5908 struct cpl_tx_pkt_core *cpl; 5909 uint64_t ctrl1; 5910 int ndesc, i; 5911 struct mbuf *m, *last; 5912 void *flitp; 5913 5914 TXQ_LOCK_ASSERT_OWNED(txq); 5915 MPASS(txp->npkt > 0); 5916 MPASS(txp->wr_type == 1); /* VF supports type 1 only */ 5917 MPASS(txp->mb[0] != NULL); 5918 MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16)); 5919 5920 wr = (void *)&eq->desc[eq->pidx]; 5921 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_VM_WR)); 5922 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(txp->len16)); 5923 wr->r3 = 0; 5924 wr->plen = htobe16(txp->plen); 5925 wr->npkt = txp->npkt; 5926 wr->r4 = 0; 5927 memcpy(&wr->ethmacdst[0], &txp->ethmacdst[0], 16); 5928 flitp = wr + 1; 5929 5930 /* 5931 * At this point we are 32B into a hardware descriptor. Each mbuf in 5932 * the WR will take 32B so we check for the end of the descriptor ring 5933 * before writing odd mbufs (mb[1], 3, 5, ..) 5934 */ 5935 ndesc = tx_len16_to_desc(txp->len16); 5936 last = NULL; 5937 for (i = 0; i < txp->npkt; i++) { 5938 m = txp->mb[i]; 5939 if (i & 1 && (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx]) 5940 flitp = &eq->desc[0]; 5941 cpl = flitp; 5942 5943 /* Checksum offload */ 5944 ctrl1 = csum_to_ctrl(sc, m); 5945 if (ctrl1 != (F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS)) 5946 txq->txcsum++; /* some hardware assistance provided */ 5947 5948 /* VLAN tag insertion */ 5949 if (needs_vlan_insertion(m)) { 5950 ctrl1 |= F_TXPKT_VLAN_VLD | 5951 V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 5952 txq->vlan_insertion++; 5953 } 5954 5955 /* CPL header */ 5956 cpl->ctrl0 = txq->cpl_ctrl0; 5957 cpl->pack = 0; 5958 cpl->len = htobe16(m->m_pkthdr.len); 5959 cpl->ctrl1 = htobe64(ctrl1); 5960 5961 flitp = cpl + 1; 5962 MPASS(mbuf_nsegs(m) == 1); 5963 write_gl_to_txd(txq, m, (caddr_t *)(&flitp), 0); 5964 5965 if (last != NULL) 5966 last->m_nextpkt = m; 5967 last = m; 5968 } 5969 5970 txq->sgl_wrs++; 5971 txq->txpkts1_pkts += txp->npkt; 5972 txq->txpkts1_wrs++; 5973 5974 txsd = &txq->sdesc[eq->pidx]; 5975 txsd->m = txp->mb[0]; 5976 txsd->desc_used = ndesc; 5977 5978 return (ndesc); 5979 } 5980 5981 /* 5982 * If the SGL ends on an address that is not 16 byte aligned, this function will 5983 * add a 0 filled flit at the end. 5984 */ 5985 static void 5986 write_gl_to_txd(struct sge_txq *txq, struct mbuf *m, caddr_t *to, int checkwrap) 5987 { 5988 struct sge_eq *eq = &txq->eq; 5989 struct sglist *gl = txq->gl; 5990 struct sglist_seg *seg; 5991 __be64 *flitp, *wrap; 5992 struct ulptx_sgl *usgl; 5993 int i, nflits, nsegs; 5994 5995 KASSERT(((uintptr_t)(*to) & 0xf) == 0, 5996 ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to)); 5997 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); 5998 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); 5999 6000 get_pkt_gl(m, gl); 6001 nsegs = gl->sg_nseg; 6002 MPASS(nsegs > 0); 6003 6004 nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2; 6005 flitp = (__be64 *)(*to); 6006 wrap = (__be64 *)(&eq->desc[eq->sidx]); 6007 seg = &gl->sg_segs[0]; 6008 usgl = (void *)flitp; 6009 6010 /* 6011 * We start at a 16 byte boundary somewhere inside the tx descriptor 6012 * ring, so we're at least 16 bytes away from the status page. There is 6013 * no chance of a wrap around in the middle of usgl (which is 16 bytes). 6014 */ 6015 6016 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 6017 V_ULPTX_NSGE(nsegs)); 6018 usgl->len0 = htobe32(seg->ss_len); 6019 usgl->addr0 = htobe64(seg->ss_paddr); 6020 seg++; 6021 6022 if (checkwrap == 0 || (uintptr_t)(flitp + nflits) <= (uintptr_t)wrap) { 6023 6024 /* Won't wrap around at all */ 6025 6026 for (i = 0; i < nsegs - 1; i++, seg++) { 6027 usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len); 6028 usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr); 6029 } 6030 if (i & 1) 6031 usgl->sge[i / 2].len[1] = htobe32(0); 6032 flitp += nflits; 6033 } else { 6034 6035 /* Will wrap somewhere in the rest of the SGL */ 6036 6037 /* 2 flits already written, write the rest flit by flit */ 6038 flitp = (void *)(usgl + 1); 6039 for (i = 0; i < nflits - 2; i++) { 6040 if (flitp == wrap) 6041 flitp = (void *)eq->desc; 6042 *flitp++ = get_flit(seg, nsegs - 1, i); 6043 } 6044 } 6045 6046 if (nflits & 1) { 6047 MPASS(((uintptr_t)flitp) & 0xf); 6048 *flitp++ = 0; 6049 } 6050 6051 MPASS((((uintptr_t)flitp) & 0xf) == 0); 6052 if (__predict_false(flitp == wrap)) 6053 *to = (void *)eq->desc; 6054 else 6055 *to = (void *)flitp; 6056 } 6057 6058 static inline void 6059 copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) 6060 { 6061 6062 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); 6063 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); 6064 6065 if (__predict_true((uintptr_t)(*to) + len <= 6066 (uintptr_t)&eq->desc[eq->sidx])) { 6067 bcopy(from, *to, len); 6068 (*to) += len; 6069 } else { 6070 int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to); 6071 6072 bcopy(from, *to, portion); 6073 from += portion; 6074 portion = len - portion; /* remaining */ 6075 bcopy(from, (void *)eq->desc, portion); 6076 (*to) = (caddr_t)eq->desc + portion; 6077 } 6078 } 6079 6080 static inline void 6081 ring_eq_db(struct adapter *sc, struct sge_eq *eq, u_int n) 6082 { 6083 u_int db; 6084 6085 MPASS(n > 0); 6086 6087 db = eq->doorbells; 6088 if (n > 1) 6089 clrbit(&db, DOORBELL_WCWR); 6090 wmb(); 6091 6092 switch (ffs(db) - 1) { 6093 case DOORBELL_UDB: 6094 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); 6095 break; 6096 6097 case DOORBELL_WCWR: { 6098 volatile uint64_t *dst, *src; 6099 int i; 6100 6101 /* 6102 * Queues whose 128B doorbell segment fits in the page do not 6103 * use relative qid (udb_qid is always 0). Only queues with 6104 * doorbell segments can do WCWR. 6105 */ 6106 KASSERT(eq->udb_qid == 0 && n == 1, 6107 ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p", 6108 __func__, eq->doorbells, n, eq->dbidx, eq)); 6109 6110 dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET - 6111 UDBS_DB_OFFSET); 6112 i = eq->dbidx; 6113 src = (void *)&eq->desc[i]; 6114 while (src != (void *)&eq->desc[i + 1]) 6115 *dst++ = *src++; 6116 wmb(); 6117 break; 6118 } 6119 6120 case DOORBELL_UDBWC: 6121 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); 6122 wmb(); 6123 break; 6124 6125 case DOORBELL_KDB: 6126 t4_write_reg(sc, sc->sge_kdoorbell_reg, 6127 V_QID(eq->cntxt_id) | V_PIDX(n)); 6128 break; 6129 } 6130 6131 IDXINCR(eq->dbidx, n, eq->sidx); 6132 } 6133 6134 static inline u_int 6135 reclaimable_tx_desc(struct sge_eq *eq) 6136 { 6137 uint16_t hw_cidx; 6138 6139 hw_cidx = read_hw_cidx(eq); 6140 return (IDXDIFF(hw_cidx, eq->cidx, eq->sidx)); 6141 } 6142 6143 static inline u_int 6144 total_available_tx_desc(struct sge_eq *eq) 6145 { 6146 uint16_t hw_cidx, pidx; 6147 6148 hw_cidx = read_hw_cidx(eq); 6149 pidx = eq->pidx; 6150 6151 if (pidx == hw_cidx) 6152 return (eq->sidx - 1); 6153 else 6154 return (IDXDIFF(hw_cidx, pidx, eq->sidx) - 1); 6155 } 6156 6157 static inline uint16_t 6158 read_hw_cidx(struct sge_eq *eq) 6159 { 6160 struct sge_qstat *spg = (void *)&eq->desc[eq->sidx]; 6161 uint16_t cidx = spg->cidx; /* stable snapshot */ 6162 6163 return (be16toh(cidx)); 6164 } 6165 6166 /* 6167 * Reclaim 'n' descriptors approximately. 6168 */ 6169 static u_int 6170 reclaim_tx_descs(struct sge_txq *txq, u_int n) 6171 { 6172 struct tx_sdesc *txsd; 6173 struct sge_eq *eq = &txq->eq; 6174 u_int can_reclaim, reclaimed; 6175 6176 TXQ_LOCK_ASSERT_OWNED(txq); 6177 MPASS(n > 0); 6178 6179 reclaimed = 0; 6180 can_reclaim = reclaimable_tx_desc(eq); 6181 while (can_reclaim && reclaimed < n) { 6182 int ndesc; 6183 struct mbuf *m, *nextpkt; 6184 6185 txsd = &txq->sdesc[eq->cidx]; 6186 ndesc = txsd->desc_used; 6187 6188 /* Firmware doesn't return "partial" credits. */ 6189 KASSERT(can_reclaim >= ndesc, 6190 ("%s: unexpected number of credits: %d, %d", 6191 __func__, can_reclaim, ndesc)); 6192 KASSERT(ndesc != 0, 6193 ("%s: descriptor with no credits: cidx %d", 6194 __func__, eq->cidx)); 6195 6196 for (m = txsd->m; m != NULL; m = nextpkt) { 6197 nextpkt = m->m_nextpkt; 6198 m->m_nextpkt = NULL; 6199 m_freem(m); 6200 } 6201 reclaimed += ndesc; 6202 can_reclaim -= ndesc; 6203 IDXINCR(eq->cidx, ndesc, eq->sidx); 6204 } 6205 6206 return (reclaimed); 6207 } 6208 6209 static void 6210 tx_reclaim(void *arg, int n) 6211 { 6212 struct sge_txq *txq = arg; 6213 struct sge_eq *eq = &txq->eq; 6214 6215 do { 6216 if (TXQ_TRYLOCK(txq) == 0) 6217 break; 6218 n = reclaim_tx_descs(txq, 32); 6219 if (eq->cidx == eq->pidx) 6220 eq->equeqidx = eq->pidx; 6221 TXQ_UNLOCK(txq); 6222 } while (n > 0); 6223 } 6224 6225 static __be64 6226 get_flit(struct sglist_seg *segs, int nsegs, int idx) 6227 { 6228 int i = (idx / 3) * 2; 6229 6230 switch (idx % 3) { 6231 case 0: { 6232 uint64_t rc; 6233 6234 rc = (uint64_t)segs[i].ss_len << 32; 6235 if (i + 1 < nsegs) 6236 rc |= (uint64_t)(segs[i + 1].ss_len); 6237 6238 return (htobe64(rc)); 6239 } 6240 case 1: 6241 return (htobe64(segs[i].ss_paddr)); 6242 case 2: 6243 return (htobe64(segs[i + 1].ss_paddr)); 6244 } 6245 6246 return (0); 6247 } 6248 6249 static int 6250 find_refill_source(struct adapter *sc, int maxp, bool packing) 6251 { 6252 int i, zidx = -1; 6253 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[0]; 6254 6255 if (packing) { 6256 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 6257 if (rxb->hwidx2 == -1) 6258 continue; 6259 if (rxb->size1 < PAGE_SIZE && 6260 rxb->size1 < largest_rx_cluster) 6261 continue; 6262 if (rxb->size1 > largest_rx_cluster) 6263 break; 6264 MPASS(rxb->size1 - rxb->size2 >= CL_METADATA_SIZE); 6265 if (rxb->size2 >= maxp) 6266 return (i); 6267 zidx = i; 6268 } 6269 } else { 6270 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 6271 if (rxb->hwidx1 == -1) 6272 continue; 6273 if (rxb->size1 > largest_rx_cluster) 6274 break; 6275 if (rxb->size1 >= maxp) 6276 return (i); 6277 zidx = i; 6278 } 6279 } 6280 6281 return (zidx); 6282 } 6283 6284 static void 6285 add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl) 6286 { 6287 mtx_lock(&sc->sfl_lock); 6288 FL_LOCK(fl); 6289 if ((fl->flags & FL_DOOMED) == 0) { 6290 fl->flags |= FL_STARVING; 6291 TAILQ_INSERT_TAIL(&sc->sfl, fl, link); 6292 callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc); 6293 } 6294 FL_UNLOCK(fl); 6295 mtx_unlock(&sc->sfl_lock); 6296 } 6297 6298 static void 6299 handle_wrq_egr_update(struct adapter *sc, struct sge_eq *eq) 6300 { 6301 struct sge_wrq *wrq = (void *)eq; 6302 6303 atomic_readandclear_int(&eq->equiq); 6304 taskqueue_enqueue(sc->tq[eq->tx_chan], &wrq->wrq_tx_task); 6305 } 6306 6307 static void 6308 handle_eth_egr_update(struct adapter *sc, struct sge_eq *eq) 6309 { 6310 struct sge_txq *txq = (void *)eq; 6311 6312 MPASS(eq->type == EQ_ETH); 6313 6314 atomic_readandclear_int(&eq->equiq); 6315 if (mp_ring_is_idle(txq->r)) 6316 taskqueue_enqueue(sc->tq[eq->tx_chan], &txq->tx_reclaim_task); 6317 else 6318 mp_ring_check_drainage(txq->r, 64); 6319 } 6320 6321 static int 6322 handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss, 6323 struct mbuf *m) 6324 { 6325 const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1); 6326 unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); 6327 struct adapter *sc = iq->adapter; 6328 struct sge *s = &sc->sge; 6329 struct sge_eq *eq; 6330 static void (*h[])(struct adapter *, struct sge_eq *) = {NULL, 6331 &handle_wrq_egr_update, &handle_eth_egr_update, 6332 &handle_wrq_egr_update}; 6333 6334 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 6335 rss->opcode)); 6336 6337 eq = s->eqmap[qid - s->eq_start - s->eq_base]; 6338 (*h[eq->type])(sc, eq); 6339 6340 return (0); 6341 } 6342 6343 /* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */ 6344 CTASSERT(offsetof(struct cpl_fw4_msg, data) == \ 6345 offsetof(struct cpl_fw6_msg, data)); 6346 6347 static int 6348 handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 6349 { 6350 struct adapter *sc = iq->adapter; 6351 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); 6352 6353 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 6354 rss->opcode)); 6355 6356 if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) { 6357 const struct rss_header *rss2; 6358 6359 rss2 = (const struct rss_header *)&cpl->data[0]; 6360 return (t4_cpl_handler[rss2->opcode](iq, rss2, m)); 6361 } 6362 6363 return (t4_fw_msg_handler[cpl->type](sc, &cpl->data[0])); 6364 } 6365 6366 /** 6367 * t4_handle_wrerr_rpl - process a FW work request error message 6368 * @adap: the adapter 6369 * @rpl: start of the FW message 6370 */ 6371 static int 6372 t4_handle_wrerr_rpl(struct adapter *adap, const __be64 *rpl) 6373 { 6374 u8 opcode = *(const u8 *)rpl; 6375 const struct fw_error_cmd *e = (const void *)rpl; 6376 unsigned int i; 6377 6378 if (opcode != FW_ERROR_CMD) { 6379 log(LOG_ERR, 6380 "%s: Received WRERR_RPL message with opcode %#x\n", 6381 device_get_nameunit(adap->dev), opcode); 6382 return (EINVAL); 6383 } 6384 log(LOG_ERR, "%s: FW_ERROR (%s) ", device_get_nameunit(adap->dev), 6385 G_FW_ERROR_CMD_FATAL(be32toh(e->op_to_type)) ? "fatal" : 6386 "non-fatal"); 6387 switch (G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))) { 6388 case FW_ERROR_TYPE_EXCEPTION: 6389 log(LOG_ERR, "exception info:\n"); 6390 for (i = 0; i < nitems(e->u.exception.info); i++) 6391 log(LOG_ERR, "%s%08x", i == 0 ? "\t" : " ", 6392 be32toh(e->u.exception.info[i])); 6393 log(LOG_ERR, "\n"); 6394 break; 6395 case FW_ERROR_TYPE_HWMODULE: 6396 log(LOG_ERR, "HW module regaddr %08x regval %08x\n", 6397 be32toh(e->u.hwmodule.regaddr), 6398 be32toh(e->u.hwmodule.regval)); 6399 break; 6400 case FW_ERROR_TYPE_WR: 6401 log(LOG_ERR, "WR cidx %d PF %d VF %d eqid %d hdr:\n", 6402 be16toh(e->u.wr.cidx), 6403 G_FW_ERROR_CMD_PFN(be16toh(e->u.wr.pfn_vfn)), 6404 G_FW_ERROR_CMD_VFN(be16toh(e->u.wr.pfn_vfn)), 6405 be32toh(e->u.wr.eqid)); 6406 for (i = 0; i < nitems(e->u.wr.wrhdr); i++) 6407 log(LOG_ERR, "%s%02x", i == 0 ? "\t" : " ", 6408 e->u.wr.wrhdr[i]); 6409 log(LOG_ERR, "\n"); 6410 break; 6411 case FW_ERROR_TYPE_ACL: 6412 log(LOG_ERR, "ACL cidx %d PF %d VF %d eqid %d %s", 6413 be16toh(e->u.acl.cidx), 6414 G_FW_ERROR_CMD_PFN(be16toh(e->u.acl.pfn_vfn)), 6415 G_FW_ERROR_CMD_VFN(be16toh(e->u.acl.pfn_vfn)), 6416 be32toh(e->u.acl.eqid), 6417 G_FW_ERROR_CMD_MV(be16toh(e->u.acl.mv_pkd)) ? "vlanid" : 6418 "MAC"); 6419 for (i = 0; i < nitems(e->u.acl.val); i++) 6420 log(LOG_ERR, " %02x", e->u.acl.val[i]); 6421 log(LOG_ERR, "\n"); 6422 break; 6423 default: 6424 log(LOG_ERR, "type %#x\n", 6425 G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))); 6426 return (EINVAL); 6427 } 6428 return (0); 6429 } 6430 6431 static inline bool 6432 bufidx_used(struct adapter *sc, int idx) 6433 { 6434 struct rx_buf_info *rxb = &sc->sge.rx_buf_info[0]; 6435 int i; 6436 6437 for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) { 6438 if (rxb->size1 > largest_rx_cluster) 6439 continue; 6440 if (rxb->hwidx1 == idx || rxb->hwidx2 == idx) 6441 return (true); 6442 } 6443 6444 return (false); 6445 } 6446 6447 static int 6448 sysctl_bufsizes(SYSCTL_HANDLER_ARGS) 6449 { 6450 struct adapter *sc = arg1; 6451 struct sge_params *sp = &sc->params.sge; 6452 int i, rc; 6453 struct sbuf sb; 6454 char c; 6455 6456 sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND); 6457 for (i = 0; i < SGE_FLBUF_SIZES; i++) { 6458 if (bufidx_used(sc, i)) 6459 c = '*'; 6460 else 6461 c = '\0'; 6462 6463 sbuf_printf(&sb, "%u%c ", sp->sge_fl_buffer_size[i], c); 6464 } 6465 sbuf_trim(&sb); 6466 sbuf_finish(&sb); 6467 rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 6468 sbuf_delete(&sb); 6469 return (rc); 6470 } 6471 6472 #ifdef RATELIMIT 6473 #if defined(INET) || defined(INET6) 6474 /* 6475 * len16 for a txpkt WR with a GL. Includes the firmware work request header. 6476 */ 6477 static inline u_int 6478 txpkt_eo_len16(u_int nsegs, u_int immhdrs, u_int tso) 6479 { 6480 u_int n; 6481 6482 MPASS(immhdrs > 0); 6483 6484 n = roundup2(sizeof(struct fw_eth_tx_eo_wr) + 6485 sizeof(struct cpl_tx_pkt_core) + immhdrs, 16); 6486 if (__predict_false(nsegs == 0)) 6487 goto done; 6488 6489 nsegs--; /* first segment is part of ulptx_sgl */ 6490 n += sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 6491 if (tso) 6492 n += sizeof(struct cpl_tx_pkt_lso_core); 6493 6494 done: 6495 return (howmany(n, 16)); 6496 } 6497 #endif 6498 6499 #define ETID_FLOWC_NPARAMS 6 6500 #define ETID_FLOWC_LEN (roundup2((sizeof(struct fw_flowc_wr) + \ 6501 ETID_FLOWC_NPARAMS * sizeof(struct fw_flowc_mnemval)), 16)) 6502 #define ETID_FLOWC_LEN16 (howmany(ETID_FLOWC_LEN, 16)) 6503 6504 static int 6505 send_etid_flowc_wr(struct cxgbe_rate_tag *cst, struct port_info *pi, 6506 struct vi_info *vi) 6507 { 6508 struct wrq_cookie cookie; 6509 u_int pfvf = pi->adapter->pf << S_FW_VIID_PFN; 6510 struct fw_flowc_wr *flowc; 6511 6512 mtx_assert(&cst->lock, MA_OWNED); 6513 MPASS((cst->flags & (EO_FLOWC_PENDING | EO_FLOWC_RPL_PENDING)) == 6514 EO_FLOWC_PENDING); 6515 6516 flowc = start_wrq_wr(&cst->eo_txq->wrq, ETID_FLOWC_LEN16, &cookie); 6517 if (__predict_false(flowc == NULL)) 6518 return (ENOMEM); 6519 6520 bzero(flowc, ETID_FLOWC_LEN); 6521 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 6522 V_FW_FLOWC_WR_NPARAMS(ETID_FLOWC_NPARAMS) | V_FW_WR_COMPL(0)); 6523 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(ETID_FLOWC_LEN16) | 6524 V_FW_WR_FLOWID(cst->etid)); 6525 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; 6526 flowc->mnemval[0].val = htobe32(pfvf); 6527 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; 6528 flowc->mnemval[1].val = htobe32(pi->tx_chan); 6529 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; 6530 flowc->mnemval[2].val = htobe32(pi->tx_chan); 6531 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; 6532 flowc->mnemval[3].val = htobe32(cst->iqid); 6533 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_EOSTATE; 6534 flowc->mnemval[4].val = htobe32(FW_FLOWC_MNEM_EOSTATE_ESTABLISHED); 6535 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; 6536 flowc->mnemval[5].val = htobe32(cst->schedcl); 6537 6538 commit_wrq_wr(&cst->eo_txq->wrq, flowc, &cookie); 6539 6540 cst->flags &= ~EO_FLOWC_PENDING; 6541 cst->flags |= EO_FLOWC_RPL_PENDING; 6542 MPASS(cst->tx_credits >= ETID_FLOWC_LEN16); /* flowc is first WR. */ 6543 cst->tx_credits -= ETID_FLOWC_LEN16; 6544 6545 return (0); 6546 } 6547 6548 #define ETID_FLUSH_LEN16 (howmany(sizeof (struct fw_flowc_wr), 16)) 6549 6550 void 6551 send_etid_flush_wr(struct cxgbe_rate_tag *cst) 6552 { 6553 struct fw_flowc_wr *flowc; 6554 struct wrq_cookie cookie; 6555 6556 mtx_assert(&cst->lock, MA_OWNED); 6557 6558 flowc = start_wrq_wr(&cst->eo_txq->wrq, ETID_FLUSH_LEN16, &cookie); 6559 if (__predict_false(flowc == NULL)) 6560 CXGBE_UNIMPLEMENTED(__func__); 6561 6562 bzero(flowc, ETID_FLUSH_LEN16 * 16); 6563 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 6564 V_FW_FLOWC_WR_NPARAMS(0) | F_FW_WR_COMPL); 6565 flowc->flowid_len16 = htobe32(V_FW_WR_LEN16(ETID_FLUSH_LEN16) | 6566 V_FW_WR_FLOWID(cst->etid)); 6567 6568 commit_wrq_wr(&cst->eo_txq->wrq, flowc, &cookie); 6569 6570 cst->flags |= EO_FLUSH_RPL_PENDING; 6571 MPASS(cst->tx_credits >= ETID_FLUSH_LEN16); 6572 cst->tx_credits -= ETID_FLUSH_LEN16; 6573 cst->ncompl++; 6574 } 6575 6576 static void 6577 write_ethofld_wr(struct cxgbe_rate_tag *cst, struct fw_eth_tx_eo_wr *wr, 6578 struct mbuf *m0, int compl) 6579 { 6580 struct cpl_tx_pkt_core *cpl; 6581 uint64_t ctrl1; 6582 uint32_t ctrl; /* used in many unrelated places */ 6583 int len16, pktlen, nsegs, immhdrs; 6584 uintptr_t p; 6585 struct ulptx_sgl *usgl; 6586 struct sglist sg; 6587 struct sglist_seg segs[38]; /* XXX: find real limit. XXX: get off the stack */ 6588 6589 mtx_assert(&cst->lock, MA_OWNED); 6590 M_ASSERTPKTHDR(m0); 6591 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 6592 m0->m_pkthdr.l4hlen > 0, 6593 ("%s: ethofld mbuf %p is missing header lengths", __func__, m0)); 6594 6595 len16 = mbuf_eo_len16(m0); 6596 nsegs = mbuf_eo_nsegs(m0); 6597 pktlen = m0->m_pkthdr.len; 6598 ctrl = sizeof(struct cpl_tx_pkt_core); 6599 if (needs_tso(m0)) 6600 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 6601 immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen; 6602 ctrl += immhdrs; 6603 6604 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_EO_WR) | 6605 V_FW_ETH_TX_EO_WR_IMMDLEN(ctrl) | V_FW_WR_COMPL(!!compl)); 6606 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(len16) | 6607 V_FW_WR_FLOWID(cst->etid)); 6608 wr->r3 = 0; 6609 if (needs_outer_udp_csum(m0)) { 6610 wr->u.udpseg.type = FW_ETH_TX_EO_TYPE_UDPSEG; 6611 wr->u.udpseg.ethlen = m0->m_pkthdr.l2hlen; 6612 wr->u.udpseg.iplen = htobe16(m0->m_pkthdr.l3hlen); 6613 wr->u.udpseg.udplen = m0->m_pkthdr.l4hlen; 6614 wr->u.udpseg.rtplen = 0; 6615 wr->u.udpseg.r4 = 0; 6616 wr->u.udpseg.mss = htobe16(pktlen - immhdrs); 6617 wr->u.udpseg.schedpktsize = wr->u.udpseg.mss; 6618 wr->u.udpseg.plen = htobe32(pktlen - immhdrs); 6619 cpl = (void *)(wr + 1); 6620 } else { 6621 MPASS(needs_outer_tcp_csum(m0)); 6622 wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG; 6623 wr->u.tcpseg.ethlen = m0->m_pkthdr.l2hlen; 6624 wr->u.tcpseg.iplen = htobe16(m0->m_pkthdr.l3hlen); 6625 wr->u.tcpseg.tcplen = m0->m_pkthdr.l4hlen; 6626 wr->u.tcpseg.tsclk_tsoff = mbuf_eo_tsclk_tsoff(m0); 6627 wr->u.tcpseg.r4 = 0; 6628 wr->u.tcpseg.r5 = 0; 6629 wr->u.tcpseg.plen = htobe32(pktlen - immhdrs); 6630 6631 if (needs_tso(m0)) { 6632 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); 6633 6634 wr->u.tcpseg.mss = htobe16(m0->m_pkthdr.tso_segsz); 6635 6636 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | 6637 F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE | 6638 V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen - 6639 ETHER_HDR_LEN) >> 2) | 6640 V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) | 6641 V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); 6642 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 6643 ctrl |= F_LSO_IPV6; 6644 lso->lso_ctrl = htobe32(ctrl); 6645 lso->ipid_ofst = htobe16(0); 6646 lso->mss = htobe16(m0->m_pkthdr.tso_segsz); 6647 lso->seqno_offset = htobe32(0); 6648 lso->len = htobe32(pktlen); 6649 6650 cpl = (void *)(lso + 1); 6651 } else { 6652 wr->u.tcpseg.mss = htobe16(0xffff); 6653 cpl = (void *)(wr + 1); 6654 } 6655 } 6656 6657 /* Checksum offload must be requested for ethofld. */ 6658 MPASS(needs_outer_l4_csum(m0)); 6659 ctrl1 = csum_to_ctrl(cst->adapter, m0); 6660 6661 /* VLAN tag insertion */ 6662 if (needs_vlan_insertion(m0)) { 6663 ctrl1 |= F_TXPKT_VLAN_VLD | 6664 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 6665 } 6666 6667 /* CPL header */ 6668 cpl->ctrl0 = cst->ctrl0; 6669 cpl->pack = 0; 6670 cpl->len = htobe16(pktlen); 6671 cpl->ctrl1 = htobe64(ctrl1); 6672 6673 /* Copy Ethernet, IP & TCP/UDP hdrs as immediate data */ 6674 p = (uintptr_t)(cpl + 1); 6675 m_copydata(m0, 0, immhdrs, (void *)p); 6676 6677 /* SGL */ 6678 if (nsegs > 0) { 6679 int i, pad; 6680 6681 /* zero-pad upto next 16Byte boundary, if not 16Byte aligned */ 6682 p += immhdrs; 6683 pad = 16 - (immhdrs & 0xf); 6684 bzero((void *)p, pad); 6685 6686 usgl = (void *)(p + pad); 6687 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 6688 V_ULPTX_NSGE(nsegs)); 6689 6690 sglist_init(&sg, nitems(segs), segs); 6691 for (; m0 != NULL; m0 = m0->m_next) { 6692 if (__predict_false(m0->m_len == 0)) 6693 continue; 6694 if (immhdrs >= m0->m_len) { 6695 immhdrs -= m0->m_len; 6696 continue; 6697 } 6698 if (m0->m_flags & M_EXTPG) 6699 sglist_append_mbuf_epg(&sg, m0, 6700 mtod(m0, vm_offset_t), m0->m_len); 6701 else 6702 sglist_append(&sg, mtod(m0, char *) + immhdrs, 6703 m0->m_len - immhdrs); 6704 immhdrs = 0; 6705 } 6706 MPASS(sg.sg_nseg == nsegs); 6707 6708 /* 6709 * Zero pad last 8B in case the WR doesn't end on a 16B 6710 * boundary. 6711 */ 6712 *(uint64_t *)((char *)wr + len16 * 16 - 8) = 0; 6713 6714 usgl->len0 = htobe32(segs[0].ss_len); 6715 usgl->addr0 = htobe64(segs[0].ss_paddr); 6716 for (i = 0; i < nsegs - 1; i++) { 6717 usgl->sge[i / 2].len[i & 1] = htobe32(segs[i + 1].ss_len); 6718 usgl->sge[i / 2].addr[i & 1] = htobe64(segs[i + 1].ss_paddr); 6719 } 6720 if (i & 1) 6721 usgl->sge[i / 2].len[1] = htobe32(0); 6722 } 6723 6724 } 6725 6726 static void 6727 ethofld_tx(struct cxgbe_rate_tag *cst) 6728 { 6729 struct mbuf *m; 6730 struct wrq_cookie cookie; 6731 int next_credits, compl; 6732 struct fw_eth_tx_eo_wr *wr; 6733 6734 mtx_assert(&cst->lock, MA_OWNED); 6735 6736 while ((m = mbufq_first(&cst->pending_tx)) != NULL) { 6737 M_ASSERTPKTHDR(m); 6738 6739 /* How many len16 credits do we need to send this mbuf. */ 6740 next_credits = mbuf_eo_len16(m); 6741 MPASS(next_credits > 0); 6742 if (next_credits > cst->tx_credits) { 6743 /* 6744 * Tx will make progress eventually because there is at 6745 * least one outstanding fw4_ack that will return 6746 * credits and kick the tx. 6747 */ 6748 MPASS(cst->ncompl > 0); 6749 return; 6750 } 6751 wr = start_wrq_wr(&cst->eo_txq->wrq, next_credits, &cookie); 6752 if (__predict_false(wr == NULL)) { 6753 /* XXX: wishful thinking, not a real assertion. */ 6754 MPASS(cst->ncompl > 0); 6755 return; 6756 } 6757 cst->tx_credits -= next_credits; 6758 cst->tx_nocompl += next_credits; 6759 compl = cst->ncompl == 0 || cst->tx_nocompl >= cst->tx_total / 2; 6760 ETHER_BPF_MTAP(cst->com.ifp, m); 6761 write_ethofld_wr(cst, wr, m, compl); 6762 commit_wrq_wr(&cst->eo_txq->wrq, wr, &cookie); 6763 if (compl) { 6764 cst->ncompl++; 6765 cst->tx_nocompl = 0; 6766 } 6767 (void) mbufq_dequeue(&cst->pending_tx); 6768 6769 /* 6770 * Drop the mbuf's reference on the tag now rather 6771 * than waiting until m_freem(). This ensures that 6772 * cxgbe_rate_tag_free gets called when the inp drops 6773 * its reference on the tag and there are no more 6774 * mbufs in the pending_tx queue and can flush any 6775 * pending requests. Otherwise if the last mbuf 6776 * doesn't request a completion the etid will never be 6777 * released. 6778 */ 6779 m->m_pkthdr.snd_tag = NULL; 6780 m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; 6781 m_snd_tag_rele(&cst->com); 6782 6783 mbufq_enqueue(&cst->pending_fwack, m); 6784 } 6785 } 6786 6787 static int 6788 ethofld_transmit(struct ifnet *ifp, struct mbuf *m0) 6789 { 6790 struct cxgbe_rate_tag *cst; 6791 int rc; 6792 6793 MPASS(m0->m_nextpkt == NULL); 6794 MPASS(m0->m_pkthdr.csum_flags & CSUM_SND_TAG); 6795 MPASS(m0->m_pkthdr.snd_tag != NULL); 6796 cst = mst_to_crt(m0->m_pkthdr.snd_tag); 6797 6798 mtx_lock(&cst->lock); 6799 MPASS(cst->flags & EO_SND_TAG_REF); 6800 6801 if (__predict_false(cst->flags & EO_FLOWC_PENDING)) { 6802 struct vi_info *vi = ifp->if_softc; 6803 struct port_info *pi = vi->pi; 6804 struct adapter *sc = pi->adapter; 6805 const uint32_t rss_mask = vi->rss_size - 1; 6806 uint32_t rss_hash; 6807 6808 cst->eo_txq = &sc->sge.ofld_txq[vi->first_ofld_txq]; 6809 if (M_HASHTYPE_ISHASH(m0)) 6810 rss_hash = m0->m_pkthdr.flowid; 6811 else 6812 rss_hash = arc4random(); 6813 /* We assume RSS hashing */ 6814 cst->iqid = vi->rss[rss_hash & rss_mask]; 6815 cst->eo_txq += rss_hash % vi->nofldtxq; 6816 rc = send_etid_flowc_wr(cst, pi, vi); 6817 if (rc != 0) 6818 goto done; 6819 } 6820 6821 if (__predict_false(cst->plen + m0->m_pkthdr.len > eo_max_backlog)) { 6822 rc = ENOBUFS; 6823 goto done; 6824 } 6825 6826 mbufq_enqueue(&cst->pending_tx, m0); 6827 cst->plen += m0->m_pkthdr.len; 6828 6829 /* 6830 * Hold an extra reference on the tag while generating work 6831 * requests to ensure that we don't try to free the tag during 6832 * ethofld_tx() in case we are sending the final mbuf after 6833 * the inp was freed. 6834 */ 6835 m_snd_tag_ref(&cst->com); 6836 ethofld_tx(cst); 6837 mtx_unlock(&cst->lock); 6838 m_snd_tag_rele(&cst->com); 6839 return (0); 6840 6841 done: 6842 mtx_unlock(&cst->lock); 6843 return (rc); 6844 } 6845 6846 static int 6847 ethofld_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) 6848 { 6849 struct adapter *sc = iq->adapter; 6850 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 6851 struct mbuf *m; 6852 u_int etid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 6853 struct cxgbe_rate_tag *cst; 6854 uint8_t credits = cpl->credits; 6855 6856 cst = lookup_etid(sc, etid); 6857 mtx_lock(&cst->lock); 6858 if (__predict_false(cst->flags & EO_FLOWC_RPL_PENDING)) { 6859 MPASS(credits >= ETID_FLOWC_LEN16); 6860 credits -= ETID_FLOWC_LEN16; 6861 cst->flags &= ~EO_FLOWC_RPL_PENDING; 6862 } 6863 6864 KASSERT(cst->ncompl > 0, 6865 ("%s: etid %u (%p) wasn't expecting completion.", 6866 __func__, etid, cst)); 6867 cst->ncompl--; 6868 6869 while (credits > 0) { 6870 m = mbufq_dequeue(&cst->pending_fwack); 6871 if (__predict_false(m == NULL)) { 6872 /* 6873 * The remaining credits are for the final flush that 6874 * was issued when the tag was freed by the kernel. 6875 */ 6876 MPASS((cst->flags & 6877 (EO_FLUSH_RPL_PENDING | EO_SND_TAG_REF)) == 6878 EO_FLUSH_RPL_PENDING); 6879 MPASS(credits == ETID_FLUSH_LEN16); 6880 MPASS(cst->tx_credits + cpl->credits == cst->tx_total); 6881 MPASS(cst->ncompl == 0); 6882 6883 cst->flags &= ~EO_FLUSH_RPL_PENDING; 6884 cst->tx_credits += cpl->credits; 6885 cxgbe_rate_tag_free_locked(cst); 6886 return (0); /* cst is gone. */ 6887 } 6888 KASSERT(m != NULL, 6889 ("%s: too many credits (%u, %u)", __func__, cpl->credits, 6890 credits)); 6891 KASSERT(credits >= mbuf_eo_len16(m), 6892 ("%s: too few credits (%u, %u, %u)", __func__, 6893 cpl->credits, credits, mbuf_eo_len16(m))); 6894 credits -= mbuf_eo_len16(m); 6895 cst->plen -= m->m_pkthdr.len; 6896 m_freem(m); 6897 } 6898 6899 cst->tx_credits += cpl->credits; 6900 MPASS(cst->tx_credits <= cst->tx_total); 6901 6902 if (cst->flags & EO_SND_TAG_REF) { 6903 /* 6904 * As with ethofld_transmit(), hold an extra reference 6905 * so that the tag is stable across ethold_tx(). 6906 */ 6907 m_snd_tag_ref(&cst->com); 6908 m = mbufq_first(&cst->pending_tx); 6909 if (m != NULL && cst->tx_credits >= mbuf_eo_len16(m)) 6910 ethofld_tx(cst); 6911 mtx_unlock(&cst->lock); 6912 m_snd_tag_rele(&cst->com); 6913 } else { 6914 /* 6915 * There shouldn't be any pending packets if the tag 6916 * was freed by the kernel since any pending packet 6917 * should hold a reference to the tag. 6918 */ 6919 MPASS(mbufq_first(&cst->pending_tx) == NULL); 6920 mtx_unlock(&cst->lock); 6921 } 6922 6923 return (0); 6924 } 6925 #endif 6926