1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 Chelsio Communications, Inc. 5 * All rights reserved. 6 * Written by: Navdeep Parhar <np@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 #include "opt_ratelimit.h" 36 37 #include <sys/types.h> 38 #include <sys/eventhandler.h> 39 #include <sys/mbuf.h> 40 #include <sys/socket.h> 41 #include <sys/kernel.h> 42 #include <sys/malloc.h> 43 #include <sys/queue.h> 44 #include <sys/sbuf.h> 45 #include <sys/taskqueue.h> 46 #include <sys/time.h> 47 #include <sys/sglist.h> 48 #include <sys/sysctl.h> 49 #include <sys/smp.h> 50 #include <sys/counter.h> 51 #include <net/bpf.h> 52 #include <net/ethernet.h> 53 #include <net/if.h> 54 #include <net/if_vlan_var.h> 55 #include <netinet/in.h> 56 #include <netinet/ip.h> 57 #include <netinet/ip6.h> 58 #include <netinet/tcp.h> 59 #include <netinet/udp.h> 60 #include <machine/in_cksum.h> 61 #include <machine/md_var.h> 62 #include <vm/vm.h> 63 #include <vm/pmap.h> 64 #ifdef DEV_NETMAP 65 #include <machine/bus.h> 66 #include <sys/selinfo.h> 67 #include <net/if_var.h> 68 #include <net/netmap.h> 69 #include <dev/netmap/netmap_kern.h> 70 #endif 71 72 #include "common/common.h" 73 #include "common/t4_regs.h" 74 #include "common/t4_regs_values.h" 75 #include "common/t4_msg.h" 76 #include "t4_l2t.h" 77 #include "t4_mp_ring.h" 78 79 #ifdef T4_PKT_TIMESTAMP 80 #define RX_COPY_THRESHOLD (MINCLSIZE - 8) 81 #else 82 #define RX_COPY_THRESHOLD MINCLSIZE 83 #endif 84 85 /* Internal mbuf flags stored in PH_loc.eight[1]. */ 86 #define MC_RAW_WR 0x02 87 88 /* 89 * Ethernet frames are DMA'd at this byte offset into the freelist buffer. 90 * 0-7 are valid values. 91 */ 92 static int fl_pktshift = 0; 93 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pktshift, CTLFLAG_RDTUN, &fl_pktshift, 0, 94 "payload DMA offset in rx buffer (bytes)"); 95 96 /* 97 * Pad ethernet payload up to this boundary. 98 * -1: driver should figure out a good value. 99 * 0: disable padding. 100 * Any power of 2 from 32 to 4096 (both inclusive) is also a valid value. 101 */ 102 int fl_pad = -1; 103 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pad, CTLFLAG_RDTUN, &fl_pad, 0, 104 "payload pad boundary (bytes)"); 105 106 /* 107 * Status page length. 108 * -1: driver should figure out a good value. 109 * 64 or 128 are the only other valid values. 110 */ 111 static int spg_len = -1; 112 SYSCTL_INT(_hw_cxgbe, OID_AUTO, spg_len, CTLFLAG_RDTUN, &spg_len, 0, 113 "status page size (bytes)"); 114 115 /* 116 * Congestion drops. 117 * -1: no congestion feedback (not recommended). 118 * 0: backpressure the channel instead of dropping packets right away. 119 * 1: no backpressure, drop packets for the congested queue immediately. 120 */ 121 static int cong_drop = 0; 122 SYSCTL_INT(_hw_cxgbe, OID_AUTO, cong_drop, CTLFLAG_RDTUN, &cong_drop, 0, 123 "Congestion control for RX queues (0 = backpressure, 1 = drop"); 124 125 /* 126 * Deliver multiple frames in the same free list buffer if they fit. 127 * -1: let the driver decide whether to enable buffer packing or not. 128 * 0: disable buffer packing. 129 * 1: enable buffer packing. 130 */ 131 static int buffer_packing = -1; 132 SYSCTL_INT(_hw_cxgbe, OID_AUTO, buffer_packing, CTLFLAG_RDTUN, &buffer_packing, 133 0, "Enable buffer packing"); 134 135 /* 136 * Start next frame in a packed buffer at this boundary. 137 * -1: driver should figure out a good value. 138 * T4: driver will ignore this and use the same value as fl_pad above. 139 * T5: 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value. 140 */ 141 static int fl_pack = -1; 142 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fl_pack, CTLFLAG_RDTUN, &fl_pack, 0, 143 "payload pack boundary (bytes)"); 144 145 /* 146 * Allow the driver to create mbuf(s) in a cluster allocated for rx. 147 * 0: never; always allocate mbufs from the zone_mbuf UMA zone. 148 * 1: ok to create mbuf(s) within a cluster if there is room. 149 */ 150 static int allow_mbufs_in_cluster = 1; 151 SYSCTL_INT(_hw_cxgbe, OID_AUTO, allow_mbufs_in_cluster, CTLFLAG_RDTUN, 152 &allow_mbufs_in_cluster, 0, 153 "Allow driver to create mbufs within a rx cluster"); 154 155 /* 156 * Largest rx cluster size that the driver is allowed to allocate. 157 */ 158 static int largest_rx_cluster = MJUM16BYTES; 159 SYSCTL_INT(_hw_cxgbe, OID_AUTO, largest_rx_cluster, CTLFLAG_RDTUN, 160 &largest_rx_cluster, 0, "Largest rx cluster (bytes)"); 161 162 /* 163 * Size of cluster allocation that's most likely to succeed. The driver will 164 * fall back to this size if it fails to allocate clusters larger than this. 165 */ 166 static int safest_rx_cluster = PAGE_SIZE; 167 SYSCTL_INT(_hw_cxgbe, OID_AUTO, safest_rx_cluster, CTLFLAG_RDTUN, 168 &safest_rx_cluster, 0, "Safe rx cluster (bytes)"); 169 170 #ifdef RATELIMIT 171 /* 172 * Knob to control TCP timestamp rewriting, and the granularity of the tick used 173 * for rewriting. -1 and 0-3 are all valid values. 174 * -1: hardware should leave the TCP timestamps alone. 175 * 0: 1ms 176 * 1: 100us 177 * 2: 10us 178 * 3: 1us 179 */ 180 static int tsclk = -1; 181 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tsclk, CTLFLAG_RDTUN, &tsclk, 0, 182 "Control TCP timestamp rewriting when using pacing"); 183 184 static int eo_max_backlog = 1024 * 1024; 185 SYSCTL_INT(_hw_cxgbe, OID_AUTO, eo_max_backlog, CTLFLAG_RDTUN, &eo_max_backlog, 186 0, "Maximum backlog of ratelimited data per flow"); 187 #endif 188 189 /* 190 * The interrupt holdoff timers are multiplied by this value on T6+. 191 * 1 and 3-17 (both inclusive) are legal values. 192 */ 193 static int tscale = 1; 194 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tscale, CTLFLAG_RDTUN, &tscale, 0, 195 "Interrupt holdoff timer scale on T6+"); 196 197 /* 198 * Number of LRO entries in the lro_ctrl structure per rx queue. 199 */ 200 static int lro_entries = TCP_LRO_ENTRIES; 201 SYSCTL_INT(_hw_cxgbe, OID_AUTO, lro_entries, CTLFLAG_RDTUN, &lro_entries, 0, 202 "Number of LRO entries per RX queue"); 203 204 /* 205 * This enables presorting of frames before they're fed into tcp_lro_rx. 206 */ 207 static int lro_mbufs = 0; 208 SYSCTL_INT(_hw_cxgbe, OID_AUTO, lro_mbufs, CTLFLAG_RDTUN, &lro_mbufs, 0, 209 "Enable presorting of LRO frames"); 210 211 struct txpkts { 212 u_int wr_type; /* type 0 or type 1 */ 213 u_int npkt; /* # of packets in this work request */ 214 u_int plen; /* total payload (sum of all packets) */ 215 u_int len16; /* # of 16B pieces used by this work request */ 216 }; 217 218 /* A packet's SGL. This + m_pkthdr has all info needed for tx */ 219 struct sgl { 220 struct sglist sg; 221 struct sglist_seg seg[TX_SGL_SEGS]; 222 }; 223 224 static int service_iq(struct sge_iq *, int); 225 static int service_iq_fl(struct sge_iq *, int); 226 static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t); 227 static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *); 228 static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int); 229 static inline void init_fl(struct adapter *, struct sge_fl *, int, int, char *); 230 static inline void init_eq(struct adapter *, struct sge_eq *, int, int, uint8_t, 231 uint16_t, char *); 232 static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, 233 bus_addr_t *, void **); 234 static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, 235 void *); 236 static int alloc_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *, 237 int, int); 238 static int free_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *); 239 static void add_iq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, 240 struct sge_iq *); 241 static void add_fl_sysctls(struct adapter *, struct sysctl_ctx_list *, 242 struct sysctl_oid *, struct sge_fl *); 243 static int alloc_fwq(struct adapter *); 244 static int free_fwq(struct adapter *); 245 static int alloc_ctrlq(struct adapter *, struct sge_wrq *, int, 246 struct sysctl_oid *); 247 static int alloc_rxq(struct vi_info *, struct sge_rxq *, int, int, 248 struct sysctl_oid *); 249 static int free_rxq(struct vi_info *, struct sge_rxq *); 250 #ifdef TCP_OFFLOAD 251 static int alloc_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *, int, int, 252 struct sysctl_oid *); 253 static int free_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *); 254 #endif 255 #ifdef DEV_NETMAP 256 static int alloc_nm_rxq(struct vi_info *, struct sge_nm_rxq *, int, int, 257 struct sysctl_oid *); 258 static int free_nm_rxq(struct vi_info *, struct sge_nm_rxq *); 259 static int alloc_nm_txq(struct vi_info *, struct sge_nm_txq *, int, int, 260 struct sysctl_oid *); 261 static int free_nm_txq(struct vi_info *, struct sge_nm_txq *); 262 #endif 263 static int ctrl_eq_alloc(struct adapter *, struct sge_eq *); 264 static int eth_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); 265 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 266 static int ofld_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); 267 #endif 268 static int alloc_eq(struct adapter *, struct vi_info *, struct sge_eq *); 269 static int free_eq(struct adapter *, struct sge_eq *); 270 static int alloc_wrq(struct adapter *, struct vi_info *, struct sge_wrq *, 271 struct sysctl_oid *); 272 static int free_wrq(struct adapter *, struct sge_wrq *); 273 static int alloc_txq(struct vi_info *, struct sge_txq *, int, 274 struct sysctl_oid *); 275 static int free_txq(struct vi_info *, struct sge_txq *); 276 static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); 277 static inline void ring_fl_db(struct adapter *, struct sge_fl *); 278 static int refill_fl(struct adapter *, struct sge_fl *, int); 279 static void refill_sfl(void *); 280 static int alloc_fl_sdesc(struct sge_fl *); 281 static void free_fl_sdesc(struct adapter *, struct sge_fl *); 282 static void find_best_refill_source(struct adapter *, struct sge_fl *, int); 283 static void find_safe_refill_source(struct adapter *, struct sge_fl *); 284 static void add_fl_to_sfl(struct adapter *, struct sge_fl *); 285 286 static inline void get_pkt_gl(struct mbuf *, struct sglist *); 287 static inline u_int txpkt_len16(u_int, u_int); 288 static inline u_int txpkt_vm_len16(u_int, u_int); 289 static inline u_int txpkts0_len16(u_int); 290 static inline u_int txpkts1_len16(void); 291 static u_int write_raw_wr(struct sge_txq *, void *, struct mbuf *, u_int); 292 static u_int write_txpkt_wr(struct sge_txq *, struct fw_eth_tx_pkt_wr *, 293 struct mbuf *, u_int); 294 static u_int write_txpkt_vm_wr(struct adapter *, struct sge_txq *, 295 struct fw_eth_tx_pkt_vm_wr *, struct mbuf *, u_int); 296 static int try_txpkts(struct mbuf *, struct mbuf *, struct txpkts *, u_int); 297 static int add_to_txpkts(struct mbuf *, struct txpkts *, u_int); 298 static u_int write_txpkts_wr(struct sge_txq *, struct fw_eth_tx_pkts_wr *, 299 struct mbuf *, const struct txpkts *, u_int); 300 static void write_gl_to_txd(struct sge_txq *, struct mbuf *, caddr_t *, int); 301 static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int); 302 static inline void ring_eq_db(struct adapter *, struct sge_eq *, u_int); 303 static inline uint16_t read_hw_cidx(struct sge_eq *); 304 static inline u_int reclaimable_tx_desc(struct sge_eq *); 305 static inline u_int total_available_tx_desc(struct sge_eq *); 306 static u_int reclaim_tx_descs(struct sge_txq *, u_int); 307 static void tx_reclaim(void *, int); 308 static __be64 get_flit(struct sglist_seg *, int, int); 309 static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *, 310 struct mbuf *); 311 static int handle_fw_msg(struct sge_iq *, const struct rss_header *, 312 struct mbuf *); 313 static int t4_handle_wrerr_rpl(struct adapter *, const __be64 *); 314 static void wrq_tx_drain(void *, int); 315 static void drain_wrq_wr_list(struct adapter *, struct sge_wrq *); 316 317 static int sysctl_uint16(SYSCTL_HANDLER_ARGS); 318 static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS); 319 #ifdef RATELIMIT 320 static inline u_int txpkt_eo_len16(u_int, u_int, u_int); 321 static int ethofld_fw4_ack(struct sge_iq *, const struct rss_header *, 322 struct mbuf *); 323 #endif 324 325 static counter_u64_t extfree_refs; 326 static counter_u64_t extfree_rels; 327 328 an_handler_t t4_an_handler; 329 fw_msg_handler_t t4_fw_msg_handler[NUM_FW6_TYPES]; 330 cpl_handler_t t4_cpl_handler[NUM_CPL_CMDS]; 331 cpl_handler_t set_tcb_rpl_handlers[NUM_CPL_COOKIES]; 332 cpl_handler_t l2t_write_rpl_handlers[NUM_CPL_COOKIES]; 333 cpl_handler_t act_open_rpl_handlers[NUM_CPL_COOKIES]; 334 cpl_handler_t abort_rpl_rss_handlers[NUM_CPL_COOKIES]; 335 cpl_handler_t fw4_ack_handlers[NUM_CPL_COOKIES]; 336 337 void 338 t4_register_an_handler(an_handler_t h) 339 { 340 uintptr_t *loc; 341 342 MPASS(h == NULL || t4_an_handler == NULL); 343 344 loc = (uintptr_t *)&t4_an_handler; 345 atomic_store_rel_ptr(loc, (uintptr_t)h); 346 } 347 348 void 349 t4_register_fw_msg_handler(int type, fw_msg_handler_t h) 350 { 351 uintptr_t *loc; 352 353 MPASS(type < nitems(t4_fw_msg_handler)); 354 MPASS(h == NULL || t4_fw_msg_handler[type] == NULL); 355 /* 356 * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL 357 * handler dispatch table. Reject any attempt to install a handler for 358 * this subtype. 359 */ 360 MPASS(type != FW_TYPE_RSSCPL); 361 MPASS(type != FW6_TYPE_RSSCPL); 362 363 loc = (uintptr_t *)&t4_fw_msg_handler[type]; 364 atomic_store_rel_ptr(loc, (uintptr_t)h); 365 } 366 367 void 368 t4_register_cpl_handler(int opcode, cpl_handler_t h) 369 { 370 uintptr_t *loc; 371 372 MPASS(opcode < nitems(t4_cpl_handler)); 373 MPASS(h == NULL || t4_cpl_handler[opcode] == NULL); 374 375 loc = (uintptr_t *)&t4_cpl_handler[opcode]; 376 atomic_store_rel_ptr(loc, (uintptr_t)h); 377 } 378 379 static int 380 set_tcb_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 381 struct mbuf *m) 382 { 383 const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1); 384 u_int tid; 385 int cookie; 386 387 MPASS(m == NULL); 388 389 tid = GET_TID(cpl); 390 if (is_hpftid(iq->adapter, tid) || is_ftid(iq->adapter, tid)) { 391 /* 392 * The return code for filter-write is put in the CPL cookie so 393 * we have to rely on the hardware tid (is_ftid) to determine 394 * that this is a response to a filter. 395 */ 396 cookie = CPL_COOKIE_FILTER; 397 } else { 398 cookie = G_COOKIE(cpl->cookie); 399 } 400 MPASS(cookie > CPL_COOKIE_RESERVED); 401 MPASS(cookie < nitems(set_tcb_rpl_handlers)); 402 403 return (set_tcb_rpl_handlers[cookie](iq, rss, m)); 404 } 405 406 static int 407 l2t_write_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 408 struct mbuf *m) 409 { 410 const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); 411 unsigned int cookie; 412 413 MPASS(m == NULL); 414 415 cookie = GET_TID(rpl) & F_SYNC_WR ? CPL_COOKIE_TOM : CPL_COOKIE_FILTER; 416 return (l2t_write_rpl_handlers[cookie](iq, rss, m)); 417 } 418 419 static int 420 act_open_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, 421 struct mbuf *m) 422 { 423 const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1); 424 u_int cookie = G_TID_COOKIE(G_AOPEN_ATID(be32toh(cpl->atid_status))); 425 426 MPASS(m == NULL); 427 MPASS(cookie != CPL_COOKIE_RESERVED); 428 429 return (act_open_rpl_handlers[cookie](iq, rss, m)); 430 } 431 432 static int 433 abort_rpl_rss_handler(struct sge_iq *iq, const struct rss_header *rss, 434 struct mbuf *m) 435 { 436 struct adapter *sc = iq->adapter; 437 u_int cookie; 438 439 MPASS(m == NULL); 440 if (is_hashfilter(sc)) 441 cookie = CPL_COOKIE_HASHFILTER; 442 else 443 cookie = CPL_COOKIE_TOM; 444 445 return (abort_rpl_rss_handlers[cookie](iq, rss, m)); 446 } 447 448 static int 449 fw4_ack_handler(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 450 { 451 struct adapter *sc = iq->adapter; 452 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 453 unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 454 u_int cookie; 455 456 MPASS(m == NULL); 457 if (is_etid(sc, tid)) 458 cookie = CPL_COOKIE_ETHOFLD; 459 else 460 cookie = CPL_COOKIE_TOM; 461 462 return (fw4_ack_handlers[cookie](iq, rss, m)); 463 } 464 465 static void 466 t4_init_shared_cpl_handlers(void) 467 { 468 469 t4_register_cpl_handler(CPL_SET_TCB_RPL, set_tcb_rpl_handler); 470 t4_register_cpl_handler(CPL_L2T_WRITE_RPL, l2t_write_rpl_handler); 471 t4_register_cpl_handler(CPL_ACT_OPEN_RPL, act_open_rpl_handler); 472 t4_register_cpl_handler(CPL_ABORT_RPL_RSS, abort_rpl_rss_handler); 473 t4_register_cpl_handler(CPL_FW4_ACK, fw4_ack_handler); 474 } 475 476 void 477 t4_register_shared_cpl_handler(int opcode, cpl_handler_t h, int cookie) 478 { 479 uintptr_t *loc; 480 481 MPASS(opcode < nitems(t4_cpl_handler)); 482 MPASS(cookie > CPL_COOKIE_RESERVED); 483 MPASS(cookie < NUM_CPL_COOKIES); 484 MPASS(t4_cpl_handler[opcode] != NULL); 485 486 switch (opcode) { 487 case CPL_SET_TCB_RPL: 488 loc = (uintptr_t *)&set_tcb_rpl_handlers[cookie]; 489 break; 490 case CPL_L2T_WRITE_RPL: 491 loc = (uintptr_t *)&l2t_write_rpl_handlers[cookie]; 492 break; 493 case CPL_ACT_OPEN_RPL: 494 loc = (uintptr_t *)&act_open_rpl_handlers[cookie]; 495 break; 496 case CPL_ABORT_RPL_RSS: 497 loc = (uintptr_t *)&abort_rpl_rss_handlers[cookie]; 498 break; 499 case CPL_FW4_ACK: 500 loc = (uintptr_t *)&fw4_ack_handlers[cookie]; 501 break; 502 default: 503 MPASS(0); 504 return; 505 } 506 MPASS(h == NULL || *loc == (uintptr_t)NULL); 507 atomic_store_rel_ptr(loc, (uintptr_t)h); 508 } 509 510 /* 511 * Called on MOD_LOAD. Validates and calculates the SGE tunables. 512 */ 513 void 514 t4_sge_modload(void) 515 { 516 517 if (fl_pktshift < 0 || fl_pktshift > 7) { 518 printf("Invalid hw.cxgbe.fl_pktshift value (%d)," 519 " using 0 instead.\n", fl_pktshift); 520 fl_pktshift = 0; 521 } 522 523 if (spg_len != 64 && spg_len != 128) { 524 int len; 525 526 #if defined(__i386__) || defined(__amd64__) 527 len = cpu_clflush_line_size > 64 ? 128 : 64; 528 #else 529 len = 64; 530 #endif 531 if (spg_len != -1) { 532 printf("Invalid hw.cxgbe.spg_len value (%d)," 533 " using %d instead.\n", spg_len, len); 534 } 535 spg_len = len; 536 } 537 538 if (cong_drop < -1 || cong_drop > 1) { 539 printf("Invalid hw.cxgbe.cong_drop value (%d)," 540 " using 0 instead.\n", cong_drop); 541 cong_drop = 0; 542 } 543 544 if (tscale != 1 && (tscale < 3 || tscale > 17)) { 545 printf("Invalid hw.cxgbe.tscale value (%d)," 546 " using 1 instead.\n", tscale); 547 tscale = 1; 548 } 549 550 extfree_refs = counter_u64_alloc(M_WAITOK); 551 extfree_rels = counter_u64_alloc(M_WAITOK); 552 counter_u64_zero(extfree_refs); 553 counter_u64_zero(extfree_rels); 554 555 t4_init_shared_cpl_handlers(); 556 t4_register_cpl_handler(CPL_FW4_MSG, handle_fw_msg); 557 t4_register_cpl_handler(CPL_FW6_MSG, handle_fw_msg); 558 t4_register_cpl_handler(CPL_SGE_EGR_UPDATE, handle_sge_egr_update); 559 t4_register_cpl_handler(CPL_RX_PKT, t4_eth_rx); 560 #ifdef RATELIMIT 561 t4_register_shared_cpl_handler(CPL_FW4_ACK, ethofld_fw4_ack, 562 CPL_COOKIE_ETHOFLD); 563 #endif 564 t4_register_fw_msg_handler(FW6_TYPE_CMD_RPL, t4_handle_fw_rpl); 565 t4_register_fw_msg_handler(FW6_TYPE_WRERR_RPL, t4_handle_wrerr_rpl); 566 } 567 568 void 569 t4_sge_modunload(void) 570 { 571 572 counter_u64_free(extfree_refs); 573 counter_u64_free(extfree_rels); 574 } 575 576 uint64_t 577 t4_sge_extfree_refs(void) 578 { 579 uint64_t refs, rels; 580 581 rels = counter_u64_fetch(extfree_rels); 582 refs = counter_u64_fetch(extfree_refs); 583 584 return (refs - rels); 585 } 586 587 static inline void 588 setup_pad_and_pack_boundaries(struct adapter *sc) 589 { 590 uint32_t v, m; 591 int pad, pack, pad_shift; 592 593 pad_shift = chip_id(sc) > CHELSIO_T5 ? X_T6_INGPADBOUNDARY_SHIFT : 594 X_INGPADBOUNDARY_SHIFT; 595 pad = fl_pad; 596 if (fl_pad < (1 << pad_shift) || 597 fl_pad > (1 << (pad_shift + M_INGPADBOUNDARY)) || 598 !powerof2(fl_pad)) { 599 /* 600 * If there is any chance that we might use buffer packing and 601 * the chip is a T4, then pick 64 as the pad/pack boundary. Set 602 * it to the minimum allowed in all other cases. 603 */ 604 pad = is_t4(sc) && buffer_packing ? 64 : 1 << pad_shift; 605 606 /* 607 * For fl_pad = 0 we'll still write a reasonable value to the 608 * register but all the freelists will opt out of padding. 609 * We'll complain here only if the user tried to set it to a 610 * value greater than 0 that was invalid. 611 */ 612 if (fl_pad > 0) { 613 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pad value" 614 " (%d), using %d instead.\n", fl_pad, pad); 615 } 616 } 617 m = V_INGPADBOUNDARY(M_INGPADBOUNDARY); 618 v = V_INGPADBOUNDARY(ilog2(pad) - pad_shift); 619 t4_set_reg_field(sc, A_SGE_CONTROL, m, v); 620 621 if (is_t4(sc)) { 622 if (fl_pack != -1 && fl_pack != pad) { 623 /* Complain but carry on. */ 624 device_printf(sc->dev, "hw.cxgbe.fl_pack (%d) ignored," 625 " using %d instead.\n", fl_pack, pad); 626 } 627 return; 628 } 629 630 pack = fl_pack; 631 if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 || 632 !powerof2(fl_pack)) { 633 pack = max(sc->params.pci.mps, CACHE_LINE_SIZE); 634 MPASS(powerof2(pack)); 635 if (pack < 16) 636 pack = 16; 637 if (pack == 32) 638 pack = 64; 639 if (pack > 4096) 640 pack = 4096; 641 if (fl_pack != -1) { 642 device_printf(sc->dev, "Invalid hw.cxgbe.fl_pack value" 643 " (%d), using %d instead.\n", fl_pack, pack); 644 } 645 } 646 m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); 647 if (pack == 16) 648 v = V_INGPACKBOUNDARY(0); 649 else 650 v = V_INGPACKBOUNDARY(ilog2(pack) - 5); 651 652 MPASS(!is_t4(sc)); /* T4 doesn't have SGE_CONTROL2 */ 653 t4_set_reg_field(sc, A_SGE_CONTROL2, m, v); 654 } 655 656 /* 657 * adap->params.vpd.cclk must be set up before this is called. 658 */ 659 void 660 t4_tweak_chip_settings(struct adapter *sc) 661 { 662 int i; 663 uint32_t v, m; 664 int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200}; 665 int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk; 666 int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ 667 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 668 static int sge_flbuf_sizes[] = { 669 MCLBYTES, 670 #if MJUMPAGESIZE != MCLBYTES 671 MJUMPAGESIZE, 672 MJUMPAGESIZE - CL_METADATA_SIZE, 673 MJUMPAGESIZE - 2 * MSIZE - CL_METADATA_SIZE, 674 #endif 675 MJUM9BYTES, 676 MJUM16BYTES, 677 MCLBYTES - MSIZE - CL_METADATA_SIZE, 678 MJUM9BYTES - CL_METADATA_SIZE, 679 MJUM16BYTES - CL_METADATA_SIZE, 680 }; 681 682 KASSERT(sc->flags & MASTER_PF, 683 ("%s: trying to change chip settings when not master.", __func__)); 684 685 m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE; 686 v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE | 687 V_EGRSTATUSPAGESIZE(spg_len == 128); 688 t4_set_reg_field(sc, A_SGE_CONTROL, m, v); 689 690 setup_pad_and_pack_boundaries(sc); 691 692 v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | 693 V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | 694 V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) | 695 V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) | 696 V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) | 697 V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) | 698 V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) | 699 V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10); 700 t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v); 701 702 KASSERT(nitems(sge_flbuf_sizes) <= SGE_FLBUF_SIZES, 703 ("%s: hw buffer size table too big", __func__)); 704 for (i = 0; i < min(nitems(sge_flbuf_sizes), SGE_FLBUF_SIZES); i++) { 705 t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i), 706 sge_flbuf_sizes[i]); 707 } 708 709 v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) | 710 V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]); 711 t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v); 712 713 KASSERT(intr_timer[0] <= timer_max, 714 ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0], 715 timer_max)); 716 for (i = 1; i < nitems(intr_timer); i++) { 717 KASSERT(intr_timer[i] >= intr_timer[i - 1], 718 ("%s: timers not listed in increasing order (%d)", 719 __func__, i)); 720 721 while (intr_timer[i] > timer_max) { 722 if (i == nitems(intr_timer) - 1) { 723 intr_timer[i] = timer_max; 724 break; 725 } 726 intr_timer[i] += intr_timer[i - 1]; 727 intr_timer[i] /= 2; 728 } 729 } 730 731 v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) | 732 V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1])); 733 t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v); 734 v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) | 735 V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3])); 736 t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v); 737 v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) | 738 V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5])); 739 t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v); 740 741 if (chip_id(sc) >= CHELSIO_T6) { 742 m = V_TSCALE(M_TSCALE); 743 if (tscale == 1) 744 v = 0; 745 else 746 v = V_TSCALE(tscale - 2); 747 t4_set_reg_field(sc, A_SGE_ITP_CONTROL, m, v); 748 749 if (sc->debug_flags & DF_DISABLE_TCB_CACHE) { 750 m = V_RDTHRESHOLD(M_RDTHRESHOLD) | F_WRTHRTHRESHEN | 751 V_WRTHRTHRESH(M_WRTHRTHRESH); 752 t4_tp_pio_read(sc, &v, 1, A_TP_CMM_CONFIG, 1); 753 v &= ~m; 754 v |= V_RDTHRESHOLD(1) | F_WRTHRTHRESHEN | 755 V_WRTHRTHRESH(16); 756 t4_tp_pio_write(sc, &v, 1, A_TP_CMM_CONFIG, 1); 757 } 758 } 759 760 /* 4K, 16K, 64K, 256K DDP "page sizes" for TDDP */ 761 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 762 t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v); 763 764 /* 765 * 4K, 8K, 16K, 64K DDP "page sizes" for iSCSI DDP. These have been 766 * chosen with MAXPHYS = 128K in mind. The largest DDP buffer that we 767 * may have to deal with is MAXPHYS + 1 page. 768 */ 769 v = V_HPZ0(0) | V_HPZ1(1) | V_HPZ2(2) | V_HPZ3(4); 770 t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, v); 771 772 /* We use multiple DDP page sizes both in plain-TOE and ISCSI modes. */ 773 m = v = F_TDDPTAGTCB | F_ISCSITAGTCB; 774 t4_set_reg_field(sc, A_ULP_RX_CTL, m, v); 775 776 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 777 F_RESETDDPOFFSET; 778 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 779 t4_set_reg_field(sc, A_TP_PARA_REG5, m, v); 780 } 781 782 /* 783 * SGE wants the buffer to be at least 64B and then a multiple of 16. If 784 * padding is in use, the buffer's start and end need to be aligned to the pad 785 * boundary as well. We'll just make sure that the size is a multiple of the 786 * boundary here, it is up to the buffer allocation code to make sure the start 787 * of the buffer is aligned as well. 788 */ 789 static inline int 790 hwsz_ok(struct adapter *sc, int hwsz) 791 { 792 int mask = fl_pad ? sc->params.sge.pad_boundary - 1 : 16 - 1; 793 794 return (hwsz >= 64 && (hwsz & mask) == 0); 795 } 796 797 /* 798 * XXX: driver really should be able to deal with unexpected settings. 799 */ 800 int 801 t4_read_chip_settings(struct adapter *sc) 802 { 803 struct sge *s = &sc->sge; 804 struct sge_params *sp = &sc->params.sge; 805 int i, j, n, rc = 0; 806 uint32_t m, v, r; 807 uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); 808 static int sw_buf_sizes[] = { /* Sorted by size */ 809 MCLBYTES, 810 #if MJUMPAGESIZE != MCLBYTES 811 MJUMPAGESIZE, 812 #endif 813 MJUM9BYTES, 814 MJUM16BYTES 815 }; 816 struct sw_zone_info *swz, *safe_swz; 817 struct hw_buf_info *hwb; 818 819 m = F_RXPKTCPLMODE; 820 v = F_RXPKTCPLMODE; 821 r = sc->params.sge.sge_control; 822 if ((r & m) != v) { 823 device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r); 824 rc = EINVAL; 825 } 826 827 /* 828 * If this changes then every single use of PAGE_SHIFT in the driver 829 * needs to be carefully reviewed for PAGE_SHIFT vs sp->page_shift. 830 */ 831 if (sp->page_shift != PAGE_SHIFT) { 832 device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r); 833 rc = EINVAL; 834 } 835 836 /* Filter out unusable hw buffer sizes entirely (mark with -2). */ 837 hwb = &s->hw_buf_info[0]; 838 for (i = 0; i < nitems(s->hw_buf_info); i++, hwb++) { 839 r = sc->params.sge.sge_fl_buffer_size[i]; 840 hwb->size = r; 841 hwb->zidx = hwsz_ok(sc, r) ? -1 : -2; 842 hwb->next = -1; 843 } 844 845 /* 846 * Create a sorted list in decreasing order of hw buffer sizes (and so 847 * increasing order of spare area) for each software zone. 848 * 849 * If padding is enabled then the start and end of the buffer must align 850 * to the pad boundary; if packing is enabled then they must align with 851 * the pack boundary as well. Allocations from the cluster zones are 852 * aligned to min(size, 4K), so the buffer starts at that alignment and 853 * ends at hwb->size alignment. If mbuf inlining is allowed the 854 * starting alignment will be reduced to MSIZE and the driver will 855 * exercise appropriate caution when deciding on the best buffer layout 856 * to use. 857 */ 858 n = 0; /* no usable buffer size to begin with */ 859 swz = &s->sw_zone_info[0]; 860 safe_swz = NULL; 861 for (i = 0; i < SW_ZONE_SIZES; i++, swz++) { 862 int8_t head = -1, tail = -1; 863 864 swz->size = sw_buf_sizes[i]; 865 swz->zone = m_getzone(swz->size); 866 swz->type = m_gettype(swz->size); 867 868 if (swz->size < PAGE_SIZE) { 869 MPASS(powerof2(swz->size)); 870 if (fl_pad && (swz->size % sp->pad_boundary != 0)) 871 continue; 872 } 873 874 if (swz->size == safest_rx_cluster) 875 safe_swz = swz; 876 877 hwb = &s->hw_buf_info[0]; 878 for (j = 0; j < SGE_FLBUF_SIZES; j++, hwb++) { 879 if (hwb->zidx != -1 || hwb->size > swz->size) 880 continue; 881 #ifdef INVARIANTS 882 if (fl_pad) 883 MPASS(hwb->size % sp->pad_boundary == 0); 884 #endif 885 hwb->zidx = i; 886 if (head == -1) 887 head = tail = j; 888 else if (hwb->size < s->hw_buf_info[tail].size) { 889 s->hw_buf_info[tail].next = j; 890 tail = j; 891 } else { 892 int8_t *cur; 893 struct hw_buf_info *t; 894 895 for (cur = &head; *cur != -1; cur = &t->next) { 896 t = &s->hw_buf_info[*cur]; 897 if (hwb->size == t->size) { 898 hwb->zidx = -2; 899 break; 900 } 901 if (hwb->size > t->size) { 902 hwb->next = *cur; 903 *cur = j; 904 break; 905 } 906 } 907 } 908 } 909 swz->head_hwidx = head; 910 swz->tail_hwidx = tail; 911 912 if (tail != -1) { 913 n++; 914 if (swz->size - s->hw_buf_info[tail].size >= 915 CL_METADATA_SIZE) 916 sc->flags |= BUF_PACKING_OK; 917 } 918 } 919 if (n == 0) { 920 device_printf(sc->dev, "no usable SGE FL buffer size.\n"); 921 rc = EINVAL; 922 } 923 924 s->safe_hwidx1 = -1; 925 s->safe_hwidx2 = -1; 926 if (safe_swz != NULL) { 927 s->safe_hwidx1 = safe_swz->head_hwidx; 928 for (i = safe_swz->head_hwidx; i != -1; i = hwb->next) { 929 int spare; 930 931 hwb = &s->hw_buf_info[i]; 932 #ifdef INVARIANTS 933 if (fl_pad) 934 MPASS(hwb->size % sp->pad_boundary == 0); 935 #endif 936 spare = safe_swz->size - hwb->size; 937 if (spare >= CL_METADATA_SIZE) { 938 s->safe_hwidx2 = i; 939 break; 940 } 941 } 942 } 943 944 if (sc->flags & IS_VF) 945 return (0); 946 947 v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); 948 r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ); 949 if (r != v) { 950 device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r); 951 rc = EINVAL; 952 } 953 954 m = v = F_TDDPTAGTCB; 955 r = t4_read_reg(sc, A_ULP_RX_CTL); 956 if ((r & m) != v) { 957 device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r); 958 rc = EINVAL; 959 } 960 961 m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | 962 F_RESETDDPOFFSET; 963 v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; 964 r = t4_read_reg(sc, A_TP_PARA_REG5); 965 if ((r & m) != v) { 966 device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r); 967 rc = EINVAL; 968 } 969 970 t4_init_tp_params(sc, 1); 971 972 t4_read_mtu_tbl(sc, sc->params.mtus, NULL); 973 t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd); 974 975 return (rc); 976 } 977 978 int 979 t4_create_dma_tag(struct adapter *sc) 980 { 981 int rc; 982 983 rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, 984 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, 985 BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, 986 NULL, &sc->dmat); 987 if (rc != 0) { 988 device_printf(sc->dev, 989 "failed to create main DMA tag: %d\n", rc); 990 } 991 992 return (rc); 993 } 994 995 void 996 t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 997 struct sysctl_oid_list *children) 998 { 999 struct sge_params *sp = &sc->params.sge; 1000 1001 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes", 1002 CTLTYPE_STRING | CTLFLAG_RD, &sc->sge, 0, sysctl_bufsizes, "A", 1003 "freelist buffer sizes"); 1004 1005 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD, 1006 NULL, sp->fl_pktshift, "payload DMA offset in rx buffer (bytes)"); 1007 1008 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD, 1009 NULL, sp->pad_boundary, "payload pad boundary (bytes)"); 1010 1011 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD, 1012 NULL, sp->spg_len, "status page size (bytes)"); 1013 1014 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD, 1015 NULL, cong_drop, "congestion drop setting"); 1016 1017 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD, 1018 NULL, sp->pack_boundary, "payload pack boundary (bytes)"); 1019 } 1020 1021 int 1022 t4_destroy_dma_tag(struct adapter *sc) 1023 { 1024 if (sc->dmat) 1025 bus_dma_tag_destroy(sc->dmat); 1026 1027 return (0); 1028 } 1029 1030 /* 1031 * Allocate and initialize the firmware event queue, control queues, and special 1032 * purpose rx queues owned by the adapter. 1033 * 1034 * Returns errno on failure. Resources allocated up to that point may still be 1035 * allocated. Caller is responsible for cleanup in case this function fails. 1036 */ 1037 int 1038 t4_setup_adapter_queues(struct adapter *sc) 1039 { 1040 struct sysctl_oid *oid; 1041 struct sysctl_oid_list *children; 1042 int rc, i; 1043 1044 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 1045 1046 sysctl_ctx_init(&sc->ctx); 1047 sc->flags |= ADAP_SYSCTL_CTX; 1048 1049 /* 1050 * Firmware event queue 1051 */ 1052 rc = alloc_fwq(sc); 1053 if (rc != 0) 1054 return (rc); 1055 1056 /* 1057 * That's all for the VF driver. 1058 */ 1059 if (sc->flags & IS_VF) 1060 return (rc); 1061 1062 oid = device_get_sysctl_tree(sc->dev); 1063 children = SYSCTL_CHILDREN(oid); 1064 1065 /* 1066 * XXX: General purpose rx queues, one per port. 1067 */ 1068 1069 /* 1070 * Control queues, one per port. 1071 */ 1072 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "ctrlq", 1073 CTLFLAG_RD, NULL, "control queues"); 1074 for_each_port(sc, i) { 1075 struct sge_wrq *ctrlq = &sc->sge.ctrlq[i]; 1076 1077 rc = alloc_ctrlq(sc, ctrlq, i, oid); 1078 if (rc != 0) 1079 return (rc); 1080 } 1081 1082 return (rc); 1083 } 1084 1085 /* 1086 * Idempotent 1087 */ 1088 int 1089 t4_teardown_adapter_queues(struct adapter *sc) 1090 { 1091 int i; 1092 1093 ADAPTER_LOCK_ASSERT_NOTOWNED(sc); 1094 1095 /* Do this before freeing the queue */ 1096 if (sc->flags & ADAP_SYSCTL_CTX) { 1097 sysctl_ctx_free(&sc->ctx); 1098 sc->flags &= ~ADAP_SYSCTL_CTX; 1099 } 1100 1101 if (!(sc->flags & IS_VF)) { 1102 for_each_port(sc, i) 1103 free_wrq(sc, &sc->sge.ctrlq[i]); 1104 } 1105 free_fwq(sc); 1106 1107 return (0); 1108 } 1109 1110 /* Maximum payload that can be delivered with a single iq descriptor */ 1111 static inline int 1112 mtu_to_max_payload(struct adapter *sc, int mtu, const int toe) 1113 { 1114 int payload; 1115 1116 #ifdef TCP_OFFLOAD 1117 if (toe) { 1118 int rxcs = G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2)); 1119 1120 /* Note that COP can set rx_coalesce on/off per connection. */ 1121 payload = max(mtu, rxcs); 1122 } else { 1123 #endif 1124 /* large enough even when hw VLAN extraction is disabled */ 1125 payload = sc->params.sge.fl_pktshift + ETHER_HDR_LEN + 1126 ETHER_VLAN_ENCAP_LEN + mtu; 1127 #ifdef TCP_OFFLOAD 1128 } 1129 #endif 1130 1131 return (payload); 1132 } 1133 1134 int 1135 t4_setup_vi_queues(struct vi_info *vi) 1136 { 1137 int rc = 0, i, intr_idx, iqidx; 1138 struct sge_rxq *rxq; 1139 struct sge_txq *txq; 1140 #ifdef TCP_OFFLOAD 1141 struct sge_ofld_rxq *ofld_rxq; 1142 #endif 1143 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1144 struct sge_wrq *ofld_txq; 1145 #endif 1146 #ifdef DEV_NETMAP 1147 int saved_idx; 1148 struct sge_nm_rxq *nm_rxq; 1149 struct sge_nm_txq *nm_txq; 1150 #endif 1151 char name[16]; 1152 struct port_info *pi = vi->pi; 1153 struct adapter *sc = pi->adapter; 1154 struct ifnet *ifp = vi->ifp; 1155 struct sysctl_oid *oid = device_get_sysctl_tree(vi->dev); 1156 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 1157 int maxp, mtu = ifp->if_mtu; 1158 1159 /* Interrupt vector to start from (when using multiple vectors) */ 1160 intr_idx = vi->first_intr; 1161 1162 #ifdef DEV_NETMAP 1163 saved_idx = intr_idx; 1164 if (ifp->if_capabilities & IFCAP_NETMAP) { 1165 1166 /* netmap is supported with direct interrupts only. */ 1167 MPASS(!forwarding_intr_to_fwq(sc)); 1168 1169 /* 1170 * We don't have buffers to back the netmap rx queues 1171 * right now so we create the queues in a way that 1172 * doesn't set off any congestion signal in the chip. 1173 */ 1174 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_rxq", 1175 CTLFLAG_RD, NULL, "rx queues"); 1176 for_each_nm_rxq(vi, i, nm_rxq) { 1177 rc = alloc_nm_rxq(vi, nm_rxq, intr_idx, i, oid); 1178 if (rc != 0) 1179 goto done; 1180 intr_idx++; 1181 } 1182 1183 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_txq", 1184 CTLFLAG_RD, NULL, "tx queues"); 1185 for_each_nm_txq(vi, i, nm_txq) { 1186 iqidx = vi->first_nm_rxq + (i % vi->nnmrxq); 1187 rc = alloc_nm_txq(vi, nm_txq, iqidx, i, oid); 1188 if (rc != 0) 1189 goto done; 1190 } 1191 } 1192 1193 /* Normal rx queues and netmap rx queues share the same interrupts. */ 1194 intr_idx = saved_idx; 1195 #endif 1196 1197 /* 1198 * Allocate rx queues first because a default iqid is required when 1199 * creating a tx queue. 1200 */ 1201 maxp = mtu_to_max_payload(sc, mtu, 0); 1202 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq", 1203 CTLFLAG_RD, NULL, "rx queues"); 1204 for_each_rxq(vi, i, rxq) { 1205 1206 init_iq(&rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, vi->qsize_rxq); 1207 1208 snprintf(name, sizeof(name), "%s rxq%d-fl", 1209 device_get_nameunit(vi->dev), i); 1210 init_fl(sc, &rxq->fl, vi->qsize_rxq / 8, maxp, name); 1211 1212 rc = alloc_rxq(vi, rxq, 1213 forwarding_intr_to_fwq(sc) ? -1 : intr_idx, i, oid); 1214 if (rc != 0) 1215 goto done; 1216 intr_idx++; 1217 } 1218 #ifdef DEV_NETMAP 1219 if (ifp->if_capabilities & IFCAP_NETMAP) 1220 intr_idx = saved_idx + max(vi->nrxq, vi->nnmrxq); 1221 #endif 1222 #ifdef TCP_OFFLOAD 1223 maxp = mtu_to_max_payload(sc, mtu, 1); 1224 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq", 1225 CTLFLAG_RD, NULL, "rx queues for offloaded TCP connections"); 1226 for_each_ofld_rxq(vi, i, ofld_rxq) { 1227 1228 init_iq(&ofld_rxq->iq, sc, vi->ofld_tmr_idx, vi->ofld_pktc_idx, 1229 vi->qsize_rxq); 1230 1231 snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", 1232 device_get_nameunit(vi->dev), i); 1233 init_fl(sc, &ofld_rxq->fl, vi->qsize_rxq / 8, maxp, name); 1234 1235 rc = alloc_ofld_rxq(vi, ofld_rxq, 1236 forwarding_intr_to_fwq(sc) ? -1 : intr_idx, i, oid); 1237 if (rc != 0) 1238 goto done; 1239 intr_idx++; 1240 } 1241 #endif 1242 1243 /* 1244 * Now the tx queues. 1245 */ 1246 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD, 1247 NULL, "tx queues"); 1248 for_each_txq(vi, i, txq) { 1249 iqidx = vi->first_rxq + (i % vi->nrxq); 1250 snprintf(name, sizeof(name), "%s txq%d", 1251 device_get_nameunit(vi->dev), i); 1252 init_eq(sc, &txq->eq, EQ_ETH, vi->qsize_txq, pi->tx_chan, 1253 sc->sge.rxq[iqidx].iq.cntxt_id, name); 1254 1255 rc = alloc_txq(vi, txq, i, oid); 1256 if (rc != 0) 1257 goto done; 1258 } 1259 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1260 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_txq", 1261 CTLFLAG_RD, NULL, "tx queues for TOE/ETHOFLD"); 1262 for_each_ofld_txq(vi, i, ofld_txq) { 1263 struct sysctl_oid *oid2; 1264 1265 snprintf(name, sizeof(name), "%s ofld_txq%d", 1266 device_get_nameunit(vi->dev), i); 1267 if (vi->nofldrxq > 0) { 1268 iqidx = vi->first_ofld_rxq + (i % vi->nofldrxq); 1269 init_eq(sc, &ofld_txq->eq, EQ_OFLD, vi->qsize_txq, 1270 pi->tx_chan, sc->sge.ofld_rxq[iqidx].iq.cntxt_id, 1271 name); 1272 } else { 1273 iqidx = vi->first_rxq + (i % vi->nrxq); 1274 init_eq(sc, &ofld_txq->eq, EQ_OFLD, vi->qsize_txq, 1275 pi->tx_chan, sc->sge.rxq[iqidx].iq.cntxt_id, name); 1276 } 1277 1278 snprintf(name, sizeof(name), "%d", i); 1279 oid2 = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, 1280 name, CTLFLAG_RD, NULL, "offload tx queue"); 1281 1282 rc = alloc_wrq(sc, vi, ofld_txq, oid2); 1283 if (rc != 0) 1284 goto done; 1285 } 1286 #endif 1287 done: 1288 if (rc) 1289 t4_teardown_vi_queues(vi); 1290 1291 return (rc); 1292 } 1293 1294 /* 1295 * Idempotent 1296 */ 1297 int 1298 t4_teardown_vi_queues(struct vi_info *vi) 1299 { 1300 int i; 1301 struct sge_rxq *rxq; 1302 struct sge_txq *txq; 1303 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1304 struct port_info *pi = vi->pi; 1305 struct adapter *sc = pi->adapter; 1306 struct sge_wrq *ofld_txq; 1307 #endif 1308 #ifdef TCP_OFFLOAD 1309 struct sge_ofld_rxq *ofld_rxq; 1310 #endif 1311 #ifdef DEV_NETMAP 1312 struct sge_nm_rxq *nm_rxq; 1313 struct sge_nm_txq *nm_txq; 1314 #endif 1315 1316 /* Do this before freeing the queues */ 1317 if (vi->flags & VI_SYSCTL_CTX) { 1318 sysctl_ctx_free(&vi->ctx); 1319 vi->flags &= ~VI_SYSCTL_CTX; 1320 } 1321 1322 #ifdef DEV_NETMAP 1323 if (vi->ifp->if_capabilities & IFCAP_NETMAP) { 1324 for_each_nm_txq(vi, i, nm_txq) { 1325 free_nm_txq(vi, nm_txq); 1326 } 1327 1328 for_each_nm_rxq(vi, i, nm_rxq) { 1329 free_nm_rxq(vi, nm_rxq); 1330 } 1331 } 1332 #endif 1333 1334 /* 1335 * Take down all the tx queues first, as they reference the rx queues 1336 * (for egress updates, etc.). 1337 */ 1338 1339 for_each_txq(vi, i, txq) { 1340 free_txq(vi, txq); 1341 } 1342 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 1343 for_each_ofld_txq(vi, i, ofld_txq) { 1344 free_wrq(sc, ofld_txq); 1345 } 1346 #endif 1347 1348 /* 1349 * Then take down the rx queues. 1350 */ 1351 1352 for_each_rxq(vi, i, rxq) { 1353 free_rxq(vi, rxq); 1354 } 1355 #ifdef TCP_OFFLOAD 1356 for_each_ofld_rxq(vi, i, ofld_rxq) { 1357 free_ofld_rxq(vi, ofld_rxq); 1358 } 1359 #endif 1360 1361 return (0); 1362 } 1363 1364 /* 1365 * Interrupt handler when the driver is using only 1 interrupt. This is a very 1366 * unusual scenario. 1367 * 1368 * a) Deals with errors, if any. 1369 * b) Services firmware event queue, which is taking interrupts for all other 1370 * queues. 1371 */ 1372 void 1373 t4_intr_all(void *arg) 1374 { 1375 struct adapter *sc = arg; 1376 struct sge_iq *fwq = &sc->sge.fwq; 1377 1378 MPASS(sc->intr_count == 1); 1379 1380 t4_intr_err(arg); 1381 t4_intr_evt(fwq); 1382 } 1383 1384 /* 1385 * Interrupt handler for errors (installed directly when multiple interrupts are 1386 * being used, or called by t4_intr_all). 1387 */ 1388 void 1389 t4_intr_err(void *arg) 1390 { 1391 struct adapter *sc = arg; 1392 1393 t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); 1394 t4_slow_intr_handler(sc); 1395 } 1396 1397 /* 1398 * Interrupt handler for iq-only queues. The firmware event queue is the only 1399 * such queue right now. 1400 */ 1401 void 1402 t4_intr_evt(void *arg) 1403 { 1404 struct sge_iq *iq = arg; 1405 1406 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1407 service_iq(iq, 0); 1408 (void) atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1409 } 1410 } 1411 1412 /* 1413 * Interrupt handler for iq+fl queues. 1414 */ 1415 void 1416 t4_intr(void *arg) 1417 { 1418 struct sge_iq *iq = arg; 1419 1420 if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { 1421 service_iq_fl(iq, 0); 1422 (void) atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); 1423 } 1424 } 1425 1426 #ifdef DEV_NETMAP 1427 /* 1428 * Interrupt handler for netmap rx queues. 1429 */ 1430 void 1431 t4_nm_intr(void *arg) 1432 { 1433 struct sge_nm_rxq *nm_rxq = arg; 1434 1435 if (atomic_cmpset_int(&nm_rxq->nm_state, NM_ON, NM_BUSY)) { 1436 service_nm_rxq(nm_rxq); 1437 (void) atomic_cmpset_int(&nm_rxq->nm_state, NM_BUSY, NM_ON); 1438 } 1439 } 1440 1441 /* 1442 * Interrupt handler for vectors shared between NIC and netmap rx queues. 1443 */ 1444 void 1445 t4_vi_intr(void *arg) 1446 { 1447 struct irq *irq = arg; 1448 1449 MPASS(irq->nm_rxq != NULL); 1450 t4_nm_intr(irq->nm_rxq); 1451 1452 MPASS(irq->rxq != NULL); 1453 t4_intr(irq->rxq); 1454 } 1455 #endif 1456 1457 /* 1458 * Deals with interrupts on an iq-only (no freelist) queue. 1459 */ 1460 static int 1461 service_iq(struct sge_iq *iq, int budget) 1462 { 1463 struct sge_iq *q; 1464 struct adapter *sc = iq->adapter; 1465 struct iq_desc *d = &iq->desc[iq->cidx]; 1466 int ndescs = 0, limit; 1467 int rsp_type; 1468 uint32_t lq; 1469 STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql); 1470 1471 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); 1472 KASSERT((iq->flags & IQ_HAS_FL) == 0, 1473 ("%s: called for iq %p with fl (iq->flags 0x%x)", __func__, iq, 1474 iq->flags)); 1475 MPASS((iq->flags & IQ_ADJ_CREDIT) == 0); 1476 MPASS((iq->flags & IQ_LRO_ENABLED) == 0); 1477 1478 limit = budget ? budget : iq->qsize / 16; 1479 1480 /* 1481 * We always come back and check the descriptor ring for new indirect 1482 * interrupts and other responses after running a single handler. 1483 */ 1484 for (;;) { 1485 while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { 1486 1487 rmb(); 1488 1489 rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); 1490 lq = be32toh(d->rsp.pldbuflen_qid); 1491 1492 switch (rsp_type) { 1493 case X_RSPD_TYPE_FLBUF: 1494 panic("%s: data for an iq (%p) with no freelist", 1495 __func__, iq); 1496 1497 /* NOTREACHED */ 1498 1499 case X_RSPD_TYPE_CPL: 1500 KASSERT(d->rss.opcode < NUM_CPL_CMDS, 1501 ("%s: bad opcode %02x.", __func__, 1502 d->rss.opcode)); 1503 t4_cpl_handler[d->rss.opcode](iq, &d->rss, NULL); 1504 break; 1505 1506 case X_RSPD_TYPE_INTR: 1507 /* 1508 * There are 1K interrupt-capable queues (qids 0 1509 * through 1023). A response type indicating a 1510 * forwarded interrupt with a qid >= 1K is an 1511 * iWARP async notification. 1512 */ 1513 if (__predict_true(lq >= 1024)) { 1514 t4_an_handler(iq, &d->rsp); 1515 break; 1516 } 1517 1518 q = sc->sge.iqmap[lq - sc->sge.iq_start - 1519 sc->sge.iq_base]; 1520 if (atomic_cmpset_int(&q->state, IQS_IDLE, 1521 IQS_BUSY)) { 1522 if (service_iq_fl(q, q->qsize / 16) == 0) { 1523 (void) atomic_cmpset_int(&q->state, 1524 IQS_BUSY, IQS_IDLE); 1525 } else { 1526 STAILQ_INSERT_TAIL(&iql, q, 1527 link); 1528 } 1529 } 1530 break; 1531 1532 default: 1533 KASSERT(0, 1534 ("%s: illegal response type %d on iq %p", 1535 __func__, rsp_type, iq)); 1536 log(LOG_ERR, 1537 "%s: illegal response type %d on iq %p", 1538 device_get_nameunit(sc->dev), rsp_type, iq); 1539 break; 1540 } 1541 1542 d++; 1543 if (__predict_false(++iq->cidx == iq->sidx)) { 1544 iq->cidx = 0; 1545 iq->gen ^= F_RSPD_GEN; 1546 d = &iq->desc[0]; 1547 } 1548 if (__predict_false(++ndescs == limit)) { 1549 t4_write_reg(sc, sc->sge_gts_reg, 1550 V_CIDXINC(ndescs) | 1551 V_INGRESSQID(iq->cntxt_id) | 1552 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 1553 ndescs = 0; 1554 1555 if (budget) { 1556 return (EINPROGRESS); 1557 } 1558 } 1559 } 1560 1561 if (STAILQ_EMPTY(&iql)) 1562 break; 1563 1564 /* 1565 * Process the head only, and send it to the back of the list if 1566 * it's still not done. 1567 */ 1568 q = STAILQ_FIRST(&iql); 1569 STAILQ_REMOVE_HEAD(&iql, link); 1570 if (service_iq_fl(q, q->qsize / 8) == 0) 1571 (void) atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE); 1572 else 1573 STAILQ_INSERT_TAIL(&iql, q, link); 1574 } 1575 1576 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1577 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 1578 1579 return (0); 1580 } 1581 1582 static inline int 1583 sort_before_lro(struct lro_ctrl *lro) 1584 { 1585 1586 return (lro->lro_mbuf_max != 0); 1587 } 1588 1589 static inline uint64_t 1590 last_flit_to_ns(struct adapter *sc, uint64_t lf) 1591 { 1592 uint64_t n = be64toh(lf) & 0xfffffffffffffff; /* 60b, not 64b. */ 1593 1594 if (n > UINT64_MAX / 1000000) 1595 return (n / sc->params.vpd.cclk * 1000000); 1596 else 1597 return (n * 1000000 / sc->params.vpd.cclk); 1598 } 1599 1600 /* 1601 * Deals with interrupts on an iq+fl queue. 1602 */ 1603 static int 1604 service_iq_fl(struct sge_iq *iq, int budget) 1605 { 1606 struct sge_rxq *rxq = iq_to_rxq(iq); 1607 struct sge_fl *fl; 1608 struct adapter *sc = iq->adapter; 1609 struct iq_desc *d = &iq->desc[iq->cidx]; 1610 int ndescs = 0, limit; 1611 int rsp_type, refill, starved; 1612 uint32_t lq; 1613 uint16_t fl_hw_cidx; 1614 struct mbuf *m0; 1615 #if defined(INET) || defined(INET6) 1616 const struct timeval lro_timeout = {0, sc->lro_timeout}; 1617 struct lro_ctrl *lro = &rxq->lro; 1618 #endif 1619 1620 KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); 1621 MPASS(iq->flags & IQ_HAS_FL); 1622 1623 limit = budget ? budget : iq->qsize / 16; 1624 fl = &rxq->fl; 1625 fl_hw_cidx = fl->hw_cidx; /* stable snapshot */ 1626 1627 #if defined(INET) || defined(INET6) 1628 if (iq->flags & IQ_ADJ_CREDIT) { 1629 MPASS(sort_before_lro(lro)); 1630 iq->flags &= ~IQ_ADJ_CREDIT; 1631 if ((d->rsp.u.type_gen & F_RSPD_GEN) != iq->gen) { 1632 tcp_lro_flush_all(lro); 1633 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(1) | 1634 V_INGRESSQID((u32)iq->cntxt_id) | 1635 V_SEINTARM(iq->intr_params)); 1636 return (0); 1637 } 1638 ndescs = 1; 1639 } 1640 #else 1641 MPASS((iq->flags & IQ_ADJ_CREDIT) == 0); 1642 #endif 1643 1644 while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { 1645 1646 rmb(); 1647 1648 refill = 0; 1649 m0 = NULL; 1650 rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); 1651 lq = be32toh(d->rsp.pldbuflen_qid); 1652 1653 switch (rsp_type) { 1654 case X_RSPD_TYPE_FLBUF: 1655 1656 m0 = get_fl_payload(sc, fl, lq); 1657 if (__predict_false(m0 == NULL)) 1658 goto out; 1659 refill = IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 2; 1660 1661 if (iq->flags & IQ_RX_TIMESTAMP) { 1662 /* 1663 * Fill up rcv_tstmp but do not set M_TSTMP. 1664 * rcv_tstmp is not in the format that the 1665 * kernel expects and we don't want to mislead 1666 * it. For now this is only for custom code 1667 * that knows how to interpret cxgbe's stamp. 1668 */ 1669 m0->m_pkthdr.rcv_tstmp = 1670 last_flit_to_ns(sc, d->rsp.u.last_flit); 1671 #ifdef notyet 1672 m0->m_flags |= M_TSTMP; 1673 #endif 1674 } 1675 1676 /* fall through */ 1677 1678 case X_RSPD_TYPE_CPL: 1679 KASSERT(d->rss.opcode < NUM_CPL_CMDS, 1680 ("%s: bad opcode %02x.", __func__, d->rss.opcode)); 1681 t4_cpl_handler[d->rss.opcode](iq, &d->rss, m0); 1682 break; 1683 1684 case X_RSPD_TYPE_INTR: 1685 1686 /* 1687 * There are 1K interrupt-capable queues (qids 0 1688 * through 1023). A response type indicating a 1689 * forwarded interrupt with a qid >= 1K is an 1690 * iWARP async notification. That is the only 1691 * acceptable indirect interrupt on this queue. 1692 */ 1693 if (__predict_false(lq < 1024)) { 1694 panic("%s: indirect interrupt on iq_fl %p " 1695 "with qid %u", __func__, iq, lq); 1696 } 1697 1698 t4_an_handler(iq, &d->rsp); 1699 break; 1700 1701 default: 1702 KASSERT(0, ("%s: illegal response type %d on iq %p", 1703 __func__, rsp_type, iq)); 1704 log(LOG_ERR, "%s: illegal response type %d on iq %p", 1705 device_get_nameunit(sc->dev), rsp_type, iq); 1706 break; 1707 } 1708 1709 d++; 1710 if (__predict_false(++iq->cidx == iq->sidx)) { 1711 iq->cidx = 0; 1712 iq->gen ^= F_RSPD_GEN; 1713 d = &iq->desc[0]; 1714 } 1715 if (__predict_false(++ndescs == limit)) { 1716 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1717 V_INGRESSQID(iq->cntxt_id) | 1718 V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); 1719 ndescs = 0; 1720 1721 #if defined(INET) || defined(INET6) 1722 if (iq->flags & IQ_LRO_ENABLED && 1723 !sort_before_lro(lro) && 1724 sc->lro_timeout != 0) { 1725 tcp_lro_flush_inactive(lro, &lro_timeout); 1726 } 1727 #endif 1728 if (budget) { 1729 FL_LOCK(fl); 1730 refill_fl(sc, fl, 32); 1731 FL_UNLOCK(fl); 1732 1733 return (EINPROGRESS); 1734 } 1735 } 1736 if (refill) { 1737 FL_LOCK(fl); 1738 refill_fl(sc, fl, 32); 1739 FL_UNLOCK(fl); 1740 fl_hw_cidx = fl->hw_cidx; 1741 } 1742 } 1743 out: 1744 #if defined(INET) || defined(INET6) 1745 if (iq->flags & IQ_LRO_ENABLED) { 1746 if (ndescs > 0 && lro->lro_mbuf_count > 8) { 1747 MPASS(sort_before_lro(lro)); 1748 /* hold back one credit and don't flush LRO state */ 1749 iq->flags |= IQ_ADJ_CREDIT; 1750 ndescs--; 1751 } else { 1752 tcp_lro_flush_all(lro); 1753 } 1754 } 1755 #endif 1756 1757 t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | 1758 V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); 1759 1760 FL_LOCK(fl); 1761 starved = refill_fl(sc, fl, 64); 1762 FL_UNLOCK(fl); 1763 if (__predict_false(starved != 0)) 1764 add_fl_to_sfl(sc, fl); 1765 1766 return (0); 1767 } 1768 1769 static inline int 1770 cl_has_metadata(struct sge_fl *fl, struct cluster_layout *cll) 1771 { 1772 int rc = fl->flags & FL_BUF_PACKING || cll->region1 > 0; 1773 1774 if (rc) 1775 MPASS(cll->region3 >= CL_METADATA_SIZE); 1776 1777 return (rc); 1778 } 1779 1780 static inline struct cluster_metadata * 1781 cl_metadata(struct adapter *sc, struct sge_fl *fl, struct cluster_layout *cll, 1782 caddr_t cl) 1783 { 1784 1785 if (cl_has_metadata(fl, cll)) { 1786 struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; 1787 1788 return ((struct cluster_metadata *)(cl + swz->size) - 1); 1789 } 1790 return (NULL); 1791 } 1792 1793 static void 1794 rxb_free(struct mbuf *m) 1795 { 1796 uma_zone_t zone = m->m_ext.ext_arg1; 1797 void *cl = m->m_ext.ext_arg2; 1798 1799 uma_zfree(zone, cl); 1800 counter_u64_add(extfree_rels, 1); 1801 } 1802 1803 /* 1804 * The mbuf returned by this function could be allocated from zone_mbuf or 1805 * constructed in spare room in the cluster. 1806 * 1807 * The mbuf carries the payload in one of these ways 1808 * a) frame inside the mbuf (mbuf from zone_mbuf) 1809 * b) m_cljset (for clusters without metadata) zone_mbuf 1810 * c) m_extaddref (cluster with metadata) inline mbuf 1811 * d) m_extaddref (cluster with metadata) zone_mbuf 1812 */ 1813 static struct mbuf * 1814 get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset, 1815 int remaining) 1816 { 1817 struct mbuf *m; 1818 struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; 1819 struct cluster_layout *cll = &sd->cll; 1820 struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; 1821 struct hw_buf_info *hwb = &sc->sge.hw_buf_info[cll->hwidx]; 1822 struct cluster_metadata *clm = cl_metadata(sc, fl, cll, sd->cl); 1823 int len, blen; 1824 caddr_t payload; 1825 1826 blen = hwb->size - fl->rx_offset; /* max possible in this buf */ 1827 len = min(remaining, blen); 1828 payload = sd->cl + cll->region1 + fl->rx_offset; 1829 if (fl->flags & FL_BUF_PACKING) { 1830 const u_int l = fr_offset + len; 1831 const u_int pad = roundup2(l, fl->buf_boundary) - l; 1832 1833 if (fl->rx_offset + len + pad < hwb->size) 1834 blen = len + pad; 1835 MPASS(fl->rx_offset + blen <= hwb->size); 1836 } else { 1837 MPASS(fl->rx_offset == 0); /* not packing */ 1838 } 1839 1840 1841 if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) { 1842 1843 /* 1844 * Copy payload into a freshly allocated mbuf. 1845 */ 1846 1847 m = fr_offset == 0 ? 1848 m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); 1849 if (m == NULL) 1850 return (NULL); 1851 fl->mbuf_allocated++; 1852 1853 /* copy data to mbuf */ 1854 bcopy(payload, mtod(m, caddr_t), len); 1855 1856 } else if (sd->nmbuf * MSIZE < cll->region1) { 1857 1858 /* 1859 * There's spare room in the cluster for an mbuf. Create one 1860 * and associate it with the payload that's in the cluster. 1861 */ 1862 1863 MPASS(clm != NULL); 1864 m = (struct mbuf *)(sd->cl + sd->nmbuf * MSIZE); 1865 /* No bzero required */ 1866 if (m_init(m, M_NOWAIT, MT_DATA, 1867 fr_offset == 0 ? M_PKTHDR | M_NOFREE : M_NOFREE)) 1868 return (NULL); 1869 fl->mbuf_inlined++; 1870 m_extaddref(m, payload, blen, &clm->refcount, rxb_free, 1871 swz->zone, sd->cl); 1872 if (sd->nmbuf++ == 0) 1873 counter_u64_add(extfree_refs, 1); 1874 1875 } else { 1876 1877 /* 1878 * Grab an mbuf from zone_mbuf and associate it with the 1879 * payload in the cluster. 1880 */ 1881 1882 m = fr_offset == 0 ? 1883 m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); 1884 if (m == NULL) 1885 return (NULL); 1886 fl->mbuf_allocated++; 1887 if (clm != NULL) { 1888 m_extaddref(m, payload, blen, &clm->refcount, 1889 rxb_free, swz->zone, sd->cl); 1890 if (sd->nmbuf++ == 0) 1891 counter_u64_add(extfree_refs, 1); 1892 } else { 1893 m_cljset(m, sd->cl, swz->type); 1894 sd->cl = NULL; /* consumed, not a recycle candidate */ 1895 } 1896 } 1897 if (fr_offset == 0) 1898 m->m_pkthdr.len = remaining; 1899 m->m_len = len; 1900 1901 if (fl->flags & FL_BUF_PACKING) { 1902 fl->rx_offset += blen; 1903 MPASS(fl->rx_offset <= hwb->size); 1904 if (fl->rx_offset < hwb->size) 1905 return (m); /* without advancing the cidx */ 1906 } 1907 1908 if (__predict_false(++fl->cidx % 8 == 0)) { 1909 uint16_t cidx = fl->cidx / 8; 1910 1911 if (__predict_false(cidx == fl->sidx)) 1912 fl->cidx = cidx = 0; 1913 fl->hw_cidx = cidx; 1914 } 1915 fl->rx_offset = 0; 1916 1917 return (m); 1918 } 1919 1920 static struct mbuf * 1921 get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf) 1922 { 1923 struct mbuf *m0, *m, **pnext; 1924 u_int remaining; 1925 const u_int total = G_RSPD_LEN(len_newbuf); 1926 1927 if (__predict_false(fl->flags & FL_BUF_RESUME)) { 1928 M_ASSERTPKTHDR(fl->m0); 1929 MPASS(fl->m0->m_pkthdr.len == total); 1930 MPASS(fl->remaining < total); 1931 1932 m0 = fl->m0; 1933 pnext = fl->pnext; 1934 remaining = fl->remaining; 1935 fl->flags &= ~FL_BUF_RESUME; 1936 goto get_segment; 1937 } 1938 1939 if (fl->rx_offset > 0 && len_newbuf & F_RSPD_NEWBUF) { 1940 fl->rx_offset = 0; 1941 if (__predict_false(++fl->cidx % 8 == 0)) { 1942 uint16_t cidx = fl->cidx / 8; 1943 1944 if (__predict_false(cidx == fl->sidx)) 1945 fl->cidx = cidx = 0; 1946 fl->hw_cidx = cidx; 1947 } 1948 } 1949 1950 /* 1951 * Payload starts at rx_offset in the current hw buffer. Its length is 1952 * 'len' and it may span multiple hw buffers. 1953 */ 1954 1955 m0 = get_scatter_segment(sc, fl, 0, total); 1956 if (m0 == NULL) 1957 return (NULL); 1958 remaining = total - m0->m_len; 1959 pnext = &m0->m_next; 1960 while (remaining > 0) { 1961 get_segment: 1962 MPASS(fl->rx_offset == 0); 1963 m = get_scatter_segment(sc, fl, total - remaining, remaining); 1964 if (__predict_false(m == NULL)) { 1965 fl->m0 = m0; 1966 fl->pnext = pnext; 1967 fl->remaining = remaining; 1968 fl->flags |= FL_BUF_RESUME; 1969 return (NULL); 1970 } 1971 *pnext = m; 1972 pnext = &m->m_next; 1973 remaining -= m->m_len; 1974 } 1975 *pnext = NULL; 1976 1977 M_ASSERTPKTHDR(m0); 1978 return (m0); 1979 } 1980 1981 static int 1982 t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) 1983 { 1984 struct sge_rxq *rxq = iq_to_rxq(iq); 1985 struct ifnet *ifp = rxq->ifp; 1986 struct adapter *sc = iq->adapter; 1987 const struct cpl_rx_pkt *cpl = (const void *)(rss + 1); 1988 #if defined(INET) || defined(INET6) 1989 struct lro_ctrl *lro = &rxq->lro; 1990 #endif 1991 static const int sw_hashtype[4][2] = { 1992 {M_HASHTYPE_NONE, M_HASHTYPE_NONE}, 1993 {M_HASHTYPE_RSS_IPV4, M_HASHTYPE_RSS_IPV6}, 1994 {M_HASHTYPE_RSS_TCP_IPV4, M_HASHTYPE_RSS_TCP_IPV6}, 1995 {M_HASHTYPE_RSS_UDP_IPV4, M_HASHTYPE_RSS_UDP_IPV6}, 1996 }; 1997 1998 KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__, 1999 rss->opcode)); 2000 2001 m0->m_pkthdr.len -= sc->params.sge.fl_pktshift; 2002 m0->m_len -= sc->params.sge.fl_pktshift; 2003 m0->m_data += sc->params.sge.fl_pktshift; 2004 2005 m0->m_pkthdr.rcvif = ifp; 2006 M_HASHTYPE_SET(m0, sw_hashtype[rss->hash_type][rss->ipv6]); 2007 m0->m_pkthdr.flowid = be32toh(rss->hash_val); 2008 2009 if (cpl->csum_calc && !(cpl->err_vec & sc->params.tp.err_vec_mask)) { 2010 if (ifp->if_capenable & IFCAP_RXCSUM && 2011 cpl->l2info & htobe32(F_RXF_IP)) { 2012 m0->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | 2013 CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 2014 rxq->rxcsum++; 2015 } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && 2016 cpl->l2info & htobe32(F_RXF_IP6)) { 2017 m0->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 | 2018 CSUM_PSEUDO_HDR); 2019 rxq->rxcsum++; 2020 } 2021 2022 if (__predict_false(cpl->ip_frag)) 2023 m0->m_pkthdr.csum_data = be16toh(cpl->csum); 2024 else 2025 m0->m_pkthdr.csum_data = 0xffff; 2026 } 2027 2028 if (cpl->vlan_ex) { 2029 m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); 2030 m0->m_flags |= M_VLANTAG; 2031 rxq->vlan_extraction++; 2032 } 2033 2034 #if defined(INET) || defined(INET6) 2035 if (iq->flags & IQ_LRO_ENABLED) { 2036 if (sort_before_lro(lro)) { 2037 tcp_lro_queue_mbuf(lro, m0); 2038 return (0); /* queued for sort, then LRO */ 2039 } 2040 if (tcp_lro_rx(lro, m0, 0) == 0) 2041 return (0); /* queued for LRO */ 2042 } 2043 #endif 2044 ifp->if_input(ifp, m0); 2045 2046 return (0); 2047 } 2048 2049 /* 2050 * Must drain the wrq or make sure that someone else will. 2051 */ 2052 static void 2053 wrq_tx_drain(void *arg, int n) 2054 { 2055 struct sge_wrq *wrq = arg; 2056 struct sge_eq *eq = &wrq->eq; 2057 2058 EQ_LOCK(eq); 2059 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 2060 drain_wrq_wr_list(wrq->adapter, wrq); 2061 EQ_UNLOCK(eq); 2062 } 2063 2064 static void 2065 drain_wrq_wr_list(struct adapter *sc, struct sge_wrq *wrq) 2066 { 2067 struct sge_eq *eq = &wrq->eq; 2068 u_int available, dbdiff; /* # of hardware descriptors */ 2069 u_int n; 2070 struct wrqe *wr; 2071 struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ 2072 2073 EQ_LOCK_ASSERT_OWNED(eq); 2074 MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs)); 2075 wr = STAILQ_FIRST(&wrq->wr_list); 2076 MPASS(wr != NULL); /* Must be called with something useful to do */ 2077 MPASS(eq->pidx == eq->dbidx); 2078 dbdiff = 0; 2079 2080 do { 2081 eq->cidx = read_hw_cidx(eq); 2082 if (eq->pidx == eq->cidx) 2083 available = eq->sidx - 1; 2084 else 2085 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2086 2087 MPASS(wr->wrq == wrq); 2088 n = howmany(wr->wr_len, EQ_ESIZE); 2089 if (available < n) 2090 break; 2091 2092 dst = (void *)&eq->desc[eq->pidx]; 2093 if (__predict_true(eq->sidx - eq->pidx > n)) { 2094 /* Won't wrap, won't end exactly at the status page. */ 2095 bcopy(&wr->wr[0], dst, wr->wr_len); 2096 eq->pidx += n; 2097 } else { 2098 int first_portion = (eq->sidx - eq->pidx) * EQ_ESIZE; 2099 2100 bcopy(&wr->wr[0], dst, first_portion); 2101 if (wr->wr_len > first_portion) { 2102 bcopy(&wr->wr[first_portion], &eq->desc[0], 2103 wr->wr_len - first_portion); 2104 } 2105 eq->pidx = n - (eq->sidx - eq->pidx); 2106 } 2107 wrq->tx_wrs_copied++; 2108 2109 if (available < eq->sidx / 4 && 2110 atomic_cmpset_int(&eq->equiq, 0, 1)) { 2111 /* 2112 * XXX: This is not 100% reliable with some 2113 * types of WRs. But this is a very unusual 2114 * situation for an ofld/ctrl queue anyway. 2115 */ 2116 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 2117 F_FW_WR_EQUEQ); 2118 } 2119 2120 dbdiff += n; 2121 if (dbdiff >= 16) { 2122 ring_eq_db(sc, eq, dbdiff); 2123 dbdiff = 0; 2124 } 2125 2126 STAILQ_REMOVE_HEAD(&wrq->wr_list, link); 2127 free_wrqe(wr); 2128 MPASS(wrq->nwr_pending > 0); 2129 wrq->nwr_pending--; 2130 MPASS(wrq->ndesc_needed >= n); 2131 wrq->ndesc_needed -= n; 2132 } while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL); 2133 2134 if (dbdiff) 2135 ring_eq_db(sc, eq, dbdiff); 2136 } 2137 2138 /* 2139 * Doesn't fail. Holds on to work requests it can't send right away. 2140 */ 2141 void 2142 t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr) 2143 { 2144 #ifdef INVARIANTS 2145 struct sge_eq *eq = &wrq->eq; 2146 #endif 2147 2148 EQ_LOCK_ASSERT_OWNED(eq); 2149 MPASS(wr != NULL); 2150 MPASS(wr->wr_len > 0 && wr->wr_len <= SGE_MAX_WR_LEN); 2151 MPASS((wr->wr_len & 0x7) == 0); 2152 2153 STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link); 2154 wrq->nwr_pending++; 2155 wrq->ndesc_needed += howmany(wr->wr_len, EQ_ESIZE); 2156 2157 if (!TAILQ_EMPTY(&wrq->incomplete_wrs)) 2158 return; /* commit_wrq_wr will drain wr_list as well. */ 2159 2160 drain_wrq_wr_list(sc, wrq); 2161 2162 /* Doorbell must have caught up to the pidx. */ 2163 MPASS(eq->pidx == eq->dbidx); 2164 } 2165 2166 void 2167 t4_update_fl_bufsize(struct ifnet *ifp) 2168 { 2169 struct vi_info *vi = ifp->if_softc; 2170 struct adapter *sc = vi->pi->adapter; 2171 struct sge_rxq *rxq; 2172 #ifdef TCP_OFFLOAD 2173 struct sge_ofld_rxq *ofld_rxq; 2174 #endif 2175 struct sge_fl *fl; 2176 int i, maxp, mtu = ifp->if_mtu; 2177 2178 maxp = mtu_to_max_payload(sc, mtu, 0); 2179 for_each_rxq(vi, i, rxq) { 2180 fl = &rxq->fl; 2181 2182 FL_LOCK(fl); 2183 find_best_refill_source(sc, fl, maxp); 2184 FL_UNLOCK(fl); 2185 } 2186 #ifdef TCP_OFFLOAD 2187 maxp = mtu_to_max_payload(sc, mtu, 1); 2188 for_each_ofld_rxq(vi, i, ofld_rxq) { 2189 fl = &ofld_rxq->fl; 2190 2191 FL_LOCK(fl); 2192 find_best_refill_source(sc, fl, maxp); 2193 FL_UNLOCK(fl); 2194 } 2195 #endif 2196 } 2197 2198 static inline int 2199 mbuf_nsegs(struct mbuf *m) 2200 { 2201 2202 M_ASSERTPKTHDR(m); 2203 KASSERT(m->m_pkthdr.l5hlen > 0, 2204 ("%s: mbuf %p missing information on # of segments.", __func__, m)); 2205 2206 return (m->m_pkthdr.l5hlen); 2207 } 2208 2209 static inline void 2210 set_mbuf_nsegs(struct mbuf *m, uint8_t nsegs) 2211 { 2212 2213 M_ASSERTPKTHDR(m); 2214 m->m_pkthdr.l5hlen = nsegs; 2215 } 2216 2217 static inline int 2218 mbuf_cflags(struct mbuf *m) 2219 { 2220 2221 M_ASSERTPKTHDR(m); 2222 return (m->m_pkthdr.PH_loc.eight[4]); 2223 } 2224 2225 static inline void 2226 set_mbuf_cflags(struct mbuf *m, uint8_t flags) 2227 { 2228 2229 M_ASSERTPKTHDR(m); 2230 m->m_pkthdr.PH_loc.eight[4] = flags; 2231 } 2232 2233 static inline int 2234 mbuf_len16(struct mbuf *m) 2235 { 2236 int n; 2237 2238 M_ASSERTPKTHDR(m); 2239 n = m->m_pkthdr.PH_loc.eight[0]; 2240 MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); 2241 2242 return (n); 2243 } 2244 2245 static inline void 2246 set_mbuf_len16(struct mbuf *m, uint8_t len16) 2247 { 2248 2249 M_ASSERTPKTHDR(m); 2250 m->m_pkthdr.PH_loc.eight[0] = len16; 2251 } 2252 2253 #ifdef RATELIMIT 2254 static inline int 2255 mbuf_eo_nsegs(struct mbuf *m) 2256 { 2257 2258 M_ASSERTPKTHDR(m); 2259 return (m->m_pkthdr.PH_loc.eight[1]); 2260 } 2261 2262 static inline void 2263 set_mbuf_eo_nsegs(struct mbuf *m, uint8_t nsegs) 2264 { 2265 2266 M_ASSERTPKTHDR(m); 2267 m->m_pkthdr.PH_loc.eight[1] = nsegs; 2268 } 2269 2270 static inline int 2271 mbuf_eo_len16(struct mbuf *m) 2272 { 2273 int n; 2274 2275 M_ASSERTPKTHDR(m); 2276 n = m->m_pkthdr.PH_loc.eight[2]; 2277 MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); 2278 2279 return (n); 2280 } 2281 2282 static inline void 2283 set_mbuf_eo_len16(struct mbuf *m, uint8_t len16) 2284 { 2285 2286 M_ASSERTPKTHDR(m); 2287 m->m_pkthdr.PH_loc.eight[2] = len16; 2288 } 2289 2290 static inline int 2291 mbuf_eo_tsclk_tsoff(struct mbuf *m) 2292 { 2293 2294 M_ASSERTPKTHDR(m); 2295 return (m->m_pkthdr.PH_loc.eight[3]); 2296 } 2297 2298 static inline void 2299 set_mbuf_eo_tsclk_tsoff(struct mbuf *m, uint8_t tsclk_tsoff) 2300 { 2301 2302 M_ASSERTPKTHDR(m); 2303 m->m_pkthdr.PH_loc.eight[3] = tsclk_tsoff; 2304 } 2305 2306 static inline int 2307 needs_eo(struct mbuf *m) 2308 { 2309 2310 return (m->m_pkthdr.snd_tag != NULL); 2311 } 2312 #endif 2313 2314 /* 2315 * Try to allocate an mbuf to contain a raw work request. To make it 2316 * easy to construct the work request, don't allocate a chain but a 2317 * single mbuf. 2318 */ 2319 struct mbuf * 2320 alloc_wr_mbuf(int len, int how) 2321 { 2322 struct mbuf *m; 2323 2324 if (len <= MHLEN) 2325 m = m_gethdr(how, MT_DATA); 2326 else if (len <= MCLBYTES) 2327 m = m_getcl(how, MT_DATA, M_PKTHDR); 2328 else 2329 m = NULL; 2330 if (m == NULL) 2331 return (NULL); 2332 m->m_pkthdr.len = len; 2333 m->m_len = len; 2334 set_mbuf_cflags(m, MC_RAW_WR); 2335 set_mbuf_len16(m, howmany(len, 16)); 2336 return (m); 2337 } 2338 2339 static inline int 2340 needs_tso(struct mbuf *m) 2341 { 2342 2343 M_ASSERTPKTHDR(m); 2344 2345 return (m->m_pkthdr.csum_flags & CSUM_TSO); 2346 } 2347 2348 static inline int 2349 needs_l3_csum(struct mbuf *m) 2350 { 2351 2352 M_ASSERTPKTHDR(m); 2353 2354 return (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)); 2355 } 2356 2357 static inline int 2358 needs_l4_csum(struct mbuf *m) 2359 { 2360 2361 M_ASSERTPKTHDR(m); 2362 2363 return (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | 2364 CSUM_TCP_IPV6 | CSUM_TSO)); 2365 } 2366 2367 static inline int 2368 needs_tcp_csum(struct mbuf *m) 2369 { 2370 2371 M_ASSERTPKTHDR(m); 2372 return (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TCP_IPV6 | CSUM_TSO)); 2373 } 2374 2375 #ifdef RATELIMIT 2376 static inline int 2377 needs_udp_csum(struct mbuf *m) 2378 { 2379 2380 M_ASSERTPKTHDR(m); 2381 return (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6)); 2382 } 2383 #endif 2384 2385 static inline int 2386 needs_vlan_insertion(struct mbuf *m) 2387 { 2388 2389 M_ASSERTPKTHDR(m); 2390 2391 return (m->m_flags & M_VLANTAG); 2392 } 2393 2394 static void * 2395 m_advance(struct mbuf **pm, int *poffset, int len) 2396 { 2397 struct mbuf *m = *pm; 2398 int offset = *poffset; 2399 uintptr_t p = 0; 2400 2401 MPASS(len > 0); 2402 2403 for (;;) { 2404 if (offset + len < m->m_len) { 2405 offset += len; 2406 p = mtod(m, uintptr_t) + offset; 2407 break; 2408 } 2409 len -= m->m_len - offset; 2410 m = m->m_next; 2411 offset = 0; 2412 MPASS(m != NULL); 2413 } 2414 *poffset = offset; 2415 *pm = m; 2416 return ((void *)p); 2417 } 2418 2419 /* 2420 * Can deal with empty mbufs in the chain that have m_len = 0, but the chain 2421 * must have at least one mbuf that's not empty. It is possible for this 2422 * routine to return 0 if skip accounts for all the contents of the mbuf chain. 2423 */ 2424 static inline int 2425 count_mbuf_nsegs(struct mbuf *m, int skip) 2426 { 2427 vm_paddr_t lastb, next; 2428 vm_offset_t va; 2429 int len, nsegs; 2430 2431 M_ASSERTPKTHDR(m); 2432 MPASS(m->m_pkthdr.len > 0); 2433 MPASS(m->m_pkthdr.len >= skip); 2434 2435 nsegs = 0; 2436 lastb = 0; 2437 for (; m; m = m->m_next) { 2438 2439 len = m->m_len; 2440 if (__predict_false(len == 0)) 2441 continue; 2442 if (skip >= len) { 2443 skip -= len; 2444 continue; 2445 } 2446 va = mtod(m, vm_offset_t) + skip; 2447 len -= skip; 2448 skip = 0; 2449 next = pmap_kextract(va); 2450 nsegs += sglist_count((void *)(uintptr_t)va, len); 2451 if (lastb + 1 == next) 2452 nsegs--; 2453 lastb = pmap_kextract(va + len - 1); 2454 } 2455 2456 return (nsegs); 2457 } 2458 2459 /* 2460 * Analyze the mbuf to determine its tx needs. The mbuf passed in may change: 2461 * a) caller can assume it's been freed if this function returns with an error. 2462 * b) it may get defragged up if the gather list is too long for the hardware. 2463 */ 2464 int 2465 parse_pkt(struct adapter *sc, struct mbuf **mp) 2466 { 2467 struct mbuf *m0 = *mp, *m; 2468 int rc, nsegs, defragged = 0, offset; 2469 struct ether_header *eh; 2470 void *l3hdr; 2471 #if defined(INET) || defined(INET6) 2472 struct tcphdr *tcp; 2473 #endif 2474 uint16_t eh_type; 2475 2476 M_ASSERTPKTHDR(m0); 2477 if (__predict_false(m0->m_pkthdr.len < ETHER_HDR_LEN)) { 2478 rc = EINVAL; 2479 fail: 2480 m_freem(m0); 2481 *mp = NULL; 2482 return (rc); 2483 } 2484 restart: 2485 /* 2486 * First count the number of gather list segments in the payload. 2487 * Defrag the mbuf if nsegs exceeds the hardware limit. 2488 */ 2489 M_ASSERTPKTHDR(m0); 2490 MPASS(m0->m_pkthdr.len > 0); 2491 nsegs = count_mbuf_nsegs(m0, 0); 2492 if (nsegs > (needs_tso(m0) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)) { 2493 if (defragged++ > 0 || (m = m_defrag(m0, M_NOWAIT)) == NULL) { 2494 rc = EFBIG; 2495 goto fail; 2496 } 2497 *mp = m0 = m; /* update caller's copy after defrag */ 2498 goto restart; 2499 } 2500 2501 if (__predict_false(nsegs > 2 && m0->m_pkthdr.len <= MHLEN)) { 2502 m0 = m_pullup(m0, m0->m_pkthdr.len); 2503 if (m0 == NULL) { 2504 /* Should have left well enough alone. */ 2505 rc = EFBIG; 2506 goto fail; 2507 } 2508 *mp = m0; /* update caller's copy after pullup */ 2509 goto restart; 2510 } 2511 set_mbuf_nsegs(m0, nsegs); 2512 set_mbuf_cflags(m0, 0); 2513 if (sc->flags & IS_VF) 2514 set_mbuf_len16(m0, txpkt_vm_len16(nsegs, needs_tso(m0))); 2515 else 2516 set_mbuf_len16(m0, txpkt_len16(nsegs, needs_tso(m0))); 2517 2518 #ifdef RATELIMIT 2519 /* 2520 * Ethofld is limited to TCP and UDP for now, and only when L4 hw 2521 * checksumming is enabled. needs_l4_csum happens to check for all the 2522 * right things. 2523 */ 2524 if (__predict_false(needs_eo(m0) && !needs_l4_csum(m0))) 2525 m0->m_pkthdr.snd_tag = NULL; 2526 #endif 2527 2528 if (!needs_tso(m0) && 2529 #ifdef RATELIMIT 2530 !needs_eo(m0) && 2531 #endif 2532 !(sc->flags & IS_VF && (needs_l3_csum(m0) || needs_l4_csum(m0)))) 2533 return (0); 2534 2535 m = m0; 2536 eh = mtod(m, struct ether_header *); 2537 eh_type = ntohs(eh->ether_type); 2538 if (eh_type == ETHERTYPE_VLAN) { 2539 struct ether_vlan_header *evh = (void *)eh; 2540 2541 eh_type = ntohs(evh->evl_proto); 2542 m0->m_pkthdr.l2hlen = sizeof(*evh); 2543 } else 2544 m0->m_pkthdr.l2hlen = sizeof(*eh); 2545 2546 offset = 0; 2547 l3hdr = m_advance(&m, &offset, m0->m_pkthdr.l2hlen); 2548 2549 switch (eh_type) { 2550 #ifdef INET6 2551 case ETHERTYPE_IPV6: 2552 { 2553 struct ip6_hdr *ip6 = l3hdr; 2554 2555 MPASS(!needs_tso(m0) || ip6->ip6_nxt == IPPROTO_TCP); 2556 2557 m0->m_pkthdr.l3hlen = sizeof(*ip6); 2558 break; 2559 } 2560 #endif 2561 #ifdef INET 2562 case ETHERTYPE_IP: 2563 { 2564 struct ip *ip = l3hdr; 2565 2566 m0->m_pkthdr.l3hlen = ip->ip_hl * 4; 2567 break; 2568 } 2569 #endif 2570 default: 2571 panic("%s: ethertype 0x%04x unknown. if_cxgbe must be compiled" 2572 " with the same INET/INET6 options as the kernel.", 2573 __func__, eh_type); 2574 } 2575 2576 #if defined(INET) || defined(INET6) 2577 if (needs_tcp_csum(m0)) { 2578 tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen); 2579 m0->m_pkthdr.l4hlen = tcp->th_off * 4; 2580 #ifdef RATELIMIT 2581 if (tsclk >= 0 && *(uint32_t *)(tcp + 1) == ntohl(0x0101080a)) { 2582 set_mbuf_eo_tsclk_tsoff(m0, 2583 V_FW_ETH_TX_EO_WR_TSCLK(tsclk) | 2584 V_FW_ETH_TX_EO_WR_TSOFF(sizeof(*tcp) / 2 + 1)); 2585 } else 2586 set_mbuf_eo_tsclk_tsoff(m0, 0); 2587 } else if (needs_udp_csum(m)) { 2588 m0->m_pkthdr.l4hlen = sizeof(struct udphdr); 2589 #endif 2590 } 2591 #ifdef RATELIMIT 2592 if (needs_eo(m0)) { 2593 u_int immhdrs; 2594 2595 /* EO WRs have the headers in the WR and not the GL. */ 2596 immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + 2597 m0->m_pkthdr.l4hlen; 2598 nsegs = count_mbuf_nsegs(m0, immhdrs); 2599 set_mbuf_eo_nsegs(m0, nsegs); 2600 set_mbuf_eo_len16(m0, 2601 txpkt_eo_len16(nsegs, immhdrs, needs_tso(m0))); 2602 } 2603 #endif 2604 #endif 2605 MPASS(m0 == *mp); 2606 return (0); 2607 } 2608 2609 void * 2610 start_wrq_wr(struct sge_wrq *wrq, int len16, struct wrq_cookie *cookie) 2611 { 2612 struct sge_eq *eq = &wrq->eq; 2613 struct adapter *sc = wrq->adapter; 2614 int ndesc, available; 2615 struct wrqe *wr; 2616 void *w; 2617 2618 MPASS(len16 > 0); 2619 ndesc = howmany(len16, EQ_ESIZE / 16); 2620 MPASS(ndesc > 0 && ndesc <= SGE_MAX_WR_NDESC); 2621 2622 EQ_LOCK(eq); 2623 2624 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 2625 drain_wrq_wr_list(sc, wrq); 2626 2627 if (!STAILQ_EMPTY(&wrq->wr_list)) { 2628 slowpath: 2629 EQ_UNLOCK(eq); 2630 wr = alloc_wrqe(len16 * 16, wrq); 2631 if (__predict_false(wr == NULL)) 2632 return (NULL); 2633 cookie->pidx = -1; 2634 cookie->ndesc = ndesc; 2635 return (&wr->wr); 2636 } 2637 2638 eq->cidx = read_hw_cidx(eq); 2639 if (eq->pidx == eq->cidx) 2640 available = eq->sidx - 1; 2641 else 2642 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2643 if (available < ndesc) 2644 goto slowpath; 2645 2646 cookie->pidx = eq->pidx; 2647 cookie->ndesc = ndesc; 2648 TAILQ_INSERT_TAIL(&wrq->incomplete_wrs, cookie, link); 2649 2650 w = &eq->desc[eq->pidx]; 2651 IDXINCR(eq->pidx, ndesc, eq->sidx); 2652 if (__predict_false(cookie->pidx + ndesc > eq->sidx)) { 2653 w = &wrq->ss[0]; 2654 wrq->ss_pidx = cookie->pidx; 2655 wrq->ss_len = len16 * 16; 2656 } 2657 2658 EQ_UNLOCK(eq); 2659 2660 return (w); 2661 } 2662 2663 void 2664 commit_wrq_wr(struct sge_wrq *wrq, void *w, struct wrq_cookie *cookie) 2665 { 2666 struct sge_eq *eq = &wrq->eq; 2667 struct adapter *sc = wrq->adapter; 2668 int ndesc, pidx; 2669 struct wrq_cookie *prev, *next; 2670 2671 if (cookie->pidx == -1) { 2672 struct wrqe *wr = __containerof(w, struct wrqe, wr); 2673 2674 t4_wrq_tx(sc, wr); 2675 return; 2676 } 2677 2678 if (__predict_false(w == &wrq->ss[0])) { 2679 int n = (eq->sidx - wrq->ss_pidx) * EQ_ESIZE; 2680 2681 MPASS(wrq->ss_len > n); /* WR had better wrap around. */ 2682 bcopy(&wrq->ss[0], &eq->desc[wrq->ss_pidx], n); 2683 bcopy(&wrq->ss[n], &eq->desc[0], wrq->ss_len - n); 2684 wrq->tx_wrs_ss++; 2685 } else 2686 wrq->tx_wrs_direct++; 2687 2688 EQ_LOCK(eq); 2689 ndesc = cookie->ndesc; /* Can be more than SGE_MAX_WR_NDESC here. */ 2690 pidx = cookie->pidx; 2691 MPASS(pidx >= 0 && pidx < eq->sidx); 2692 prev = TAILQ_PREV(cookie, wrq_incomplete_wrs, link); 2693 next = TAILQ_NEXT(cookie, link); 2694 if (prev == NULL) { 2695 MPASS(pidx == eq->dbidx); 2696 if (next == NULL || ndesc >= 16) { 2697 int available; 2698 struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ 2699 2700 /* 2701 * Note that the WR via which we'll request tx updates 2702 * is at pidx and not eq->pidx, which has moved on 2703 * already. 2704 */ 2705 dst = (void *)&eq->desc[pidx]; 2706 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2707 if (available < eq->sidx / 4 && 2708 atomic_cmpset_int(&eq->equiq, 0, 1)) { 2709 /* 2710 * XXX: This is not 100% reliable with some 2711 * types of WRs. But this is a very unusual 2712 * situation for an ofld/ctrl queue anyway. 2713 */ 2714 dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 2715 F_FW_WR_EQUEQ); 2716 } 2717 2718 ring_eq_db(wrq->adapter, eq, ndesc); 2719 } else { 2720 MPASS(IDXDIFF(next->pidx, pidx, eq->sidx) == ndesc); 2721 next->pidx = pidx; 2722 next->ndesc += ndesc; 2723 } 2724 } else { 2725 MPASS(IDXDIFF(pidx, prev->pidx, eq->sidx) == prev->ndesc); 2726 prev->ndesc += ndesc; 2727 } 2728 TAILQ_REMOVE(&wrq->incomplete_wrs, cookie, link); 2729 2730 if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) 2731 drain_wrq_wr_list(sc, wrq); 2732 2733 #ifdef INVARIANTS 2734 if (TAILQ_EMPTY(&wrq->incomplete_wrs)) { 2735 /* Doorbell must have caught up to the pidx. */ 2736 MPASS(wrq->eq.pidx == wrq->eq.dbidx); 2737 } 2738 #endif 2739 EQ_UNLOCK(eq); 2740 } 2741 2742 static u_int 2743 can_resume_eth_tx(struct mp_ring *r) 2744 { 2745 struct sge_eq *eq = r->cookie; 2746 2747 return (total_available_tx_desc(eq) > eq->sidx / 8); 2748 } 2749 2750 static inline int 2751 cannot_use_txpkts(struct mbuf *m) 2752 { 2753 /* maybe put a GL limit too, to avoid silliness? */ 2754 2755 return (needs_tso(m) || (mbuf_cflags(m) & MC_RAW_WR) != 0); 2756 } 2757 2758 static inline int 2759 discard_tx(struct sge_eq *eq) 2760 { 2761 2762 return ((eq->flags & (EQ_ENABLED | EQ_QFLUSH)) != EQ_ENABLED); 2763 } 2764 2765 static inline int 2766 wr_can_update_eq(struct fw_eth_tx_pkts_wr *wr) 2767 { 2768 2769 switch (G_FW_WR_OP(be32toh(wr->op_pkd))) { 2770 case FW_ULPTX_WR: 2771 case FW_ETH_TX_PKT_WR: 2772 case FW_ETH_TX_PKTS_WR: 2773 case FW_ETH_TX_PKT_VM_WR: 2774 return (1); 2775 default: 2776 return (0); 2777 } 2778 } 2779 2780 /* 2781 * r->items[cidx] to r->items[pidx], with a wraparound at r->size, are ready to 2782 * be consumed. Return the actual number consumed. 0 indicates a stall. 2783 */ 2784 static u_int 2785 eth_tx(struct mp_ring *r, u_int cidx, u_int pidx) 2786 { 2787 struct sge_txq *txq = r->cookie; 2788 struct sge_eq *eq = &txq->eq; 2789 struct ifnet *ifp = txq->ifp; 2790 struct vi_info *vi = ifp->if_softc; 2791 struct port_info *pi = vi->pi; 2792 struct adapter *sc = pi->adapter; 2793 u_int total, remaining; /* # of packets */ 2794 u_int available, dbdiff; /* # of hardware descriptors */ 2795 u_int n, next_cidx; 2796 struct mbuf *m0, *tail; 2797 struct txpkts txp; 2798 struct fw_eth_tx_pkts_wr *wr; /* any fw WR struct will do */ 2799 2800 remaining = IDXDIFF(pidx, cidx, r->size); 2801 MPASS(remaining > 0); /* Must not be called without work to do. */ 2802 total = 0; 2803 2804 TXQ_LOCK(txq); 2805 if (__predict_false(discard_tx(eq))) { 2806 while (cidx != pidx) { 2807 m0 = r->items[cidx]; 2808 m_freem(m0); 2809 if (++cidx == r->size) 2810 cidx = 0; 2811 } 2812 reclaim_tx_descs(txq, 2048); 2813 total = remaining; 2814 goto done; 2815 } 2816 2817 /* How many hardware descriptors do we have readily available. */ 2818 if (eq->pidx == eq->cidx) 2819 available = eq->sidx - 1; 2820 else 2821 available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; 2822 dbdiff = IDXDIFF(eq->pidx, eq->dbidx, eq->sidx); 2823 2824 while (remaining > 0) { 2825 2826 m0 = r->items[cidx]; 2827 M_ASSERTPKTHDR(m0); 2828 MPASS(m0->m_nextpkt == NULL); 2829 2830 if (available < SGE_MAX_WR_NDESC) { 2831 available += reclaim_tx_descs(txq, 64); 2832 if (available < howmany(mbuf_len16(m0), EQ_ESIZE / 16)) 2833 break; /* out of descriptors */ 2834 } 2835 2836 next_cidx = cidx + 1; 2837 if (__predict_false(next_cidx == r->size)) 2838 next_cidx = 0; 2839 2840 wr = (void *)&eq->desc[eq->pidx]; 2841 if (sc->flags & IS_VF) { 2842 total++; 2843 remaining--; 2844 ETHER_BPF_MTAP(ifp, m0); 2845 n = write_txpkt_vm_wr(sc, txq, (void *)wr, m0, 2846 available); 2847 } else if (remaining > 1 && 2848 try_txpkts(m0, r->items[next_cidx], &txp, available) == 0) { 2849 2850 /* pkts at cidx, next_cidx should both be in txp. */ 2851 MPASS(txp.npkt == 2); 2852 tail = r->items[next_cidx]; 2853 MPASS(tail->m_nextpkt == NULL); 2854 ETHER_BPF_MTAP(ifp, m0); 2855 ETHER_BPF_MTAP(ifp, tail); 2856 m0->m_nextpkt = tail; 2857 2858 if (__predict_false(++next_cidx == r->size)) 2859 next_cidx = 0; 2860 2861 while (next_cidx != pidx) { 2862 if (add_to_txpkts(r->items[next_cidx], &txp, 2863 available) != 0) 2864 break; 2865 tail->m_nextpkt = r->items[next_cidx]; 2866 tail = tail->m_nextpkt; 2867 ETHER_BPF_MTAP(ifp, tail); 2868 if (__predict_false(++next_cidx == r->size)) 2869 next_cidx = 0; 2870 } 2871 2872 n = write_txpkts_wr(txq, wr, m0, &txp, available); 2873 total += txp.npkt; 2874 remaining -= txp.npkt; 2875 } else if (mbuf_cflags(m0) & MC_RAW_WR) { 2876 total++; 2877 remaining--; 2878 n = write_raw_wr(txq, (void *)wr, m0, available); 2879 } else { 2880 total++; 2881 remaining--; 2882 ETHER_BPF_MTAP(ifp, m0); 2883 n = write_txpkt_wr(txq, (void *)wr, m0, available); 2884 } 2885 MPASS(n >= 1 && n <= available && n <= SGE_MAX_WR_NDESC); 2886 2887 available -= n; 2888 dbdiff += n; 2889 IDXINCR(eq->pidx, n, eq->sidx); 2890 2891 if (wr_can_update_eq(wr)) { 2892 if (total_available_tx_desc(eq) < eq->sidx / 4 && 2893 atomic_cmpset_int(&eq->equiq, 0, 1)) { 2894 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | 2895 F_FW_WR_EQUEQ); 2896 eq->equeqidx = eq->pidx; 2897 } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 2898 32) { 2899 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); 2900 eq->equeqidx = eq->pidx; 2901 } 2902 } 2903 2904 if (dbdiff >= 16 && remaining >= 4) { 2905 ring_eq_db(sc, eq, dbdiff); 2906 available += reclaim_tx_descs(txq, 4 * dbdiff); 2907 dbdiff = 0; 2908 } 2909 2910 cidx = next_cidx; 2911 } 2912 if (dbdiff != 0) { 2913 ring_eq_db(sc, eq, dbdiff); 2914 reclaim_tx_descs(txq, 32); 2915 } 2916 done: 2917 TXQ_UNLOCK(txq); 2918 2919 return (total); 2920 } 2921 2922 static inline void 2923 init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, 2924 int qsize) 2925 { 2926 2927 KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, 2928 ("%s: bad tmr_idx %d", __func__, tmr_idx)); 2929 KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */ 2930 ("%s: bad pktc_idx %d", __func__, pktc_idx)); 2931 2932 iq->flags = 0; 2933 iq->adapter = sc; 2934 iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx); 2935 iq->intr_pktc_idx = SGE_NCOUNTERS - 1; 2936 if (pktc_idx >= 0) { 2937 iq->intr_params |= F_QINTR_CNT_EN; 2938 iq->intr_pktc_idx = pktc_idx; 2939 } 2940 iq->qsize = roundup2(qsize, 16); /* See FW_IQ_CMD/iqsize */ 2941 iq->sidx = iq->qsize - sc->params.sge.spg_len / IQ_ESIZE; 2942 } 2943 2944 static inline void 2945 init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, char *name) 2946 { 2947 2948 fl->qsize = qsize; 2949 fl->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; 2950 strlcpy(fl->lockname, name, sizeof(fl->lockname)); 2951 if (sc->flags & BUF_PACKING_OK && 2952 ((!is_t4(sc) && buffer_packing) || /* T5+: enabled unless 0 */ 2953 (is_t4(sc) && buffer_packing == 1)))/* T4: disabled unless 1 */ 2954 fl->flags |= FL_BUF_PACKING; 2955 find_best_refill_source(sc, fl, maxp); 2956 find_safe_refill_source(sc, fl); 2957 } 2958 2959 static inline void 2960 init_eq(struct adapter *sc, struct sge_eq *eq, int eqtype, int qsize, 2961 uint8_t tx_chan, uint16_t iqid, char *name) 2962 { 2963 KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype)); 2964 2965 eq->flags = eqtype & EQ_TYPEMASK; 2966 eq->tx_chan = tx_chan; 2967 eq->iqid = iqid; 2968 eq->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; 2969 strlcpy(eq->lockname, name, sizeof(eq->lockname)); 2970 } 2971 2972 static int 2973 alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag, 2974 bus_dmamap_t *map, bus_addr_t *pa, void **va) 2975 { 2976 int rc; 2977 2978 rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR, 2979 BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag); 2980 if (rc != 0) { 2981 device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc); 2982 goto done; 2983 } 2984 2985 rc = bus_dmamem_alloc(*tag, va, 2986 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map); 2987 if (rc != 0) { 2988 device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc); 2989 goto done; 2990 } 2991 2992 rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0); 2993 if (rc != 0) { 2994 device_printf(sc->dev, "cannot load DMA map: %d\n", rc); 2995 goto done; 2996 } 2997 done: 2998 if (rc) 2999 free_ring(sc, *tag, *map, *pa, *va); 3000 3001 return (rc); 3002 } 3003 3004 static int 3005 free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, 3006 bus_addr_t pa, void *va) 3007 { 3008 if (pa) 3009 bus_dmamap_unload(tag, map); 3010 if (va) 3011 bus_dmamem_free(tag, va, map); 3012 if (tag) 3013 bus_dma_tag_destroy(tag); 3014 3015 return (0); 3016 } 3017 3018 /* 3019 * Allocates the ring for an ingress queue and an optional freelist. If the 3020 * freelist is specified it will be allocated and then associated with the 3021 * ingress queue. 3022 * 3023 * Returns errno on failure. Resources allocated up to that point may still be 3024 * allocated. Caller is responsible for cleanup in case this function fails. 3025 * 3026 * If the ingress queue will take interrupts directly then the intr_idx 3027 * specifies the vector, starting from 0. -1 means the interrupts for this 3028 * queue should be forwarded to the fwq. 3029 */ 3030 static int 3031 alloc_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl, 3032 int intr_idx, int cong) 3033 { 3034 int rc, i, cntxt_id; 3035 size_t len; 3036 struct fw_iq_cmd c; 3037 struct port_info *pi = vi->pi; 3038 struct adapter *sc = iq->adapter; 3039 struct sge_params *sp = &sc->params.sge; 3040 __be32 v = 0; 3041 3042 len = iq->qsize * IQ_ESIZE; 3043 rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, 3044 (void **)&iq->desc); 3045 if (rc != 0) 3046 return (rc); 3047 3048 bzero(&c, sizeof(c)); 3049 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | 3050 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | 3051 V_FW_IQ_CMD_VFN(0)); 3052 3053 c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | 3054 FW_LEN16(c)); 3055 3056 /* Special handling for firmware event queue */ 3057 if (iq == &sc->sge.fwq) 3058 v |= F_FW_IQ_CMD_IQASYNCH; 3059 3060 if (intr_idx < 0) { 3061 /* Forwarded interrupts, all headed to fwq */ 3062 v |= F_FW_IQ_CMD_IQANDST; 3063 v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.fwq.cntxt_id); 3064 } else { 3065 KASSERT(intr_idx < sc->intr_count, 3066 ("%s: invalid direct intr_idx %d", __func__, intr_idx)); 3067 v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); 3068 } 3069 3070 c.type_to_iqandstindex = htobe32(v | 3071 V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | 3072 V_FW_IQ_CMD_VIID(vi->viid) | 3073 V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); 3074 c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | 3075 F_FW_IQ_CMD_IQGTSMODE | 3076 V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | 3077 V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4)); 3078 c.iqsize = htobe16(iq->qsize); 3079 c.iqaddr = htobe64(iq->ba); 3080 if (cong >= 0) 3081 c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN); 3082 3083 if (fl) { 3084 mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); 3085 3086 len = fl->qsize * EQ_ESIZE; 3087 rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, 3088 &fl->ba, (void **)&fl->desc); 3089 if (rc) 3090 return (rc); 3091 3092 /* Allocate space for one software descriptor per buffer. */ 3093 rc = alloc_fl_sdesc(fl); 3094 if (rc != 0) { 3095 device_printf(sc->dev, 3096 "failed to setup fl software descriptors: %d\n", 3097 rc); 3098 return (rc); 3099 } 3100 3101 if (fl->flags & FL_BUF_PACKING) { 3102 fl->lowat = roundup2(sp->fl_starve_threshold2, 8); 3103 fl->buf_boundary = sp->pack_boundary; 3104 } else { 3105 fl->lowat = roundup2(sp->fl_starve_threshold, 8); 3106 fl->buf_boundary = 16; 3107 } 3108 if (fl_pad && fl->buf_boundary < sp->pad_boundary) 3109 fl->buf_boundary = sp->pad_boundary; 3110 3111 c.iqns_to_fl0congen |= 3112 htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | 3113 F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | 3114 (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) | 3115 (fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN : 3116 0)); 3117 if (cong >= 0) { 3118 c.iqns_to_fl0congen |= 3119 htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) | 3120 F_FW_IQ_CMD_FL0CONGCIF | 3121 F_FW_IQ_CMD_FL0CONGEN); 3122 } 3123 c.fl0dcaen_to_fl0cidxfthresh = 3124 htobe16(V_FW_IQ_CMD_FL0FBMIN(chip_id(sc) <= CHELSIO_T5 ? 3125 X_FETCHBURSTMIN_128B : X_FETCHBURSTMIN_64B) | 3126 V_FW_IQ_CMD_FL0FBMAX(chip_id(sc) <= CHELSIO_T5 ? 3127 X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B)); 3128 c.fl0size = htobe16(fl->qsize); 3129 c.fl0addr = htobe64(fl->ba); 3130 } 3131 3132 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 3133 if (rc != 0) { 3134 device_printf(sc->dev, 3135 "failed to create ingress queue: %d\n", rc); 3136 return (rc); 3137 } 3138 3139 iq->cidx = 0; 3140 iq->gen = F_RSPD_GEN; 3141 iq->intr_next = iq->intr_params; 3142 iq->cntxt_id = be16toh(c.iqid); 3143 iq->abs_id = be16toh(c.physiqid); 3144 iq->flags |= IQ_ALLOCATED; 3145 3146 cntxt_id = iq->cntxt_id - sc->sge.iq_start; 3147 if (cntxt_id >= sc->sge.niq) { 3148 panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, 3149 cntxt_id, sc->sge.niq - 1); 3150 } 3151 sc->sge.iqmap[cntxt_id] = iq; 3152 3153 if (fl) { 3154 u_int qid; 3155 3156 iq->flags |= IQ_HAS_FL; 3157 fl->cntxt_id = be16toh(c.fl0id); 3158 fl->pidx = fl->cidx = 0; 3159 3160 cntxt_id = fl->cntxt_id - sc->sge.eq_start; 3161 if (cntxt_id >= sc->sge.neq) { 3162 panic("%s: fl->cntxt_id (%d) more than the max (%d)", 3163 __func__, cntxt_id, sc->sge.neq - 1); 3164 } 3165 sc->sge.eqmap[cntxt_id] = (void *)fl; 3166 3167 qid = fl->cntxt_id; 3168 if (isset(&sc->doorbells, DOORBELL_UDB)) { 3169 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 3170 uint32_t mask = (1 << s_qpp) - 1; 3171 volatile uint8_t *udb; 3172 3173 udb = sc->udbs_base + UDBS_DB_OFFSET; 3174 udb += (qid >> s_qpp) << PAGE_SHIFT; 3175 qid &= mask; 3176 if (qid < PAGE_SIZE / UDBS_SEG_SIZE) { 3177 udb += qid << UDBS_SEG_SHIFT; 3178 qid = 0; 3179 } 3180 fl->udb = (volatile void *)udb; 3181 } 3182 fl->dbval = V_QID(qid) | sc->chip_params->sge_fl_db; 3183 3184 FL_LOCK(fl); 3185 /* Enough to make sure the SGE doesn't think it's starved */ 3186 refill_fl(sc, fl, fl->lowat); 3187 FL_UNLOCK(fl); 3188 } 3189 3190 if (chip_id(sc) >= CHELSIO_T5 && !(sc->flags & IS_VF) && cong >= 0) { 3191 uint32_t param, val; 3192 3193 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 3194 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | 3195 V_FW_PARAMS_PARAM_YZ(iq->cntxt_id); 3196 if (cong == 0) 3197 val = 1 << 19; 3198 else { 3199 val = 2 << 19; 3200 for (i = 0; i < 4; i++) { 3201 if (cong & (1 << i)) 3202 val |= 1 << (i << 2); 3203 } 3204 } 3205 3206 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); 3207 if (rc != 0) { 3208 /* report error but carry on */ 3209 device_printf(sc->dev, 3210 "failed to set congestion manager context for " 3211 "ingress queue %d: %d\n", iq->cntxt_id, rc); 3212 } 3213 } 3214 3215 /* Enable IQ interrupts */ 3216 atomic_store_rel_int(&iq->state, IQS_IDLE); 3217 t4_write_reg(sc, sc->sge_gts_reg, V_SEINTARM(iq->intr_params) | 3218 V_INGRESSQID(iq->cntxt_id)); 3219 3220 return (0); 3221 } 3222 3223 static int 3224 free_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl) 3225 { 3226 int rc; 3227 struct adapter *sc = iq->adapter; 3228 device_t dev; 3229 3230 if (sc == NULL) 3231 return (0); /* nothing to do */ 3232 3233 dev = vi ? vi->dev : sc->dev; 3234 3235 if (iq->flags & IQ_ALLOCATED) { 3236 rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, 3237 FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id, 3238 fl ? fl->cntxt_id : 0xffff, 0xffff); 3239 if (rc != 0) { 3240 device_printf(dev, 3241 "failed to free queue %p: %d\n", iq, rc); 3242 return (rc); 3243 } 3244 iq->flags &= ~IQ_ALLOCATED; 3245 } 3246 3247 free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc); 3248 3249 bzero(iq, sizeof(*iq)); 3250 3251 if (fl) { 3252 free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, 3253 fl->desc); 3254 3255 if (fl->sdesc) 3256 free_fl_sdesc(sc, fl); 3257 3258 if (mtx_initialized(&fl->fl_lock)) 3259 mtx_destroy(&fl->fl_lock); 3260 3261 bzero(fl, sizeof(*fl)); 3262 } 3263 3264 return (0); 3265 } 3266 3267 static void 3268 add_iq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, 3269 struct sge_iq *iq) 3270 { 3271 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3272 3273 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &iq->ba, 3274 "bus address of descriptor ring"); 3275 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 3276 iq->qsize * IQ_ESIZE, "descriptor ring size in bytes"); 3277 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "abs_id", 3278 CTLTYPE_INT | CTLFLAG_RD, &iq->abs_id, 0, sysctl_uint16, "I", 3279 "absolute id of the queue"); 3280 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 3281 CTLTYPE_INT | CTLFLAG_RD, &iq->cntxt_id, 0, sysctl_uint16, "I", 3282 "SGE context id of the queue"); 3283 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", 3284 CTLTYPE_INT | CTLFLAG_RD, &iq->cidx, 0, sysctl_uint16, "I", 3285 "consumer index"); 3286 } 3287 3288 static void 3289 add_fl_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, 3290 struct sysctl_oid *oid, struct sge_fl *fl) 3291 { 3292 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3293 3294 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL, 3295 "freelist"); 3296 children = SYSCTL_CHILDREN(oid); 3297 3298 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, 3299 &fl->ba, "bus address of descriptor ring"); 3300 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 3301 fl->sidx * EQ_ESIZE + sc->params.sge.spg_len, 3302 "desc ring size in bytes"); 3303 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 3304 CTLTYPE_INT | CTLFLAG_RD, &fl->cntxt_id, 0, sysctl_uint16, "I", 3305 "SGE context id of the freelist"); 3306 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "padding", CTLFLAG_RD, NULL, 3307 fl_pad ? 1 : 0, "padding enabled"); 3308 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "packing", CTLFLAG_RD, NULL, 3309 fl->flags & FL_BUF_PACKING ? 1 : 0, "packing enabled"); 3310 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx, 3311 0, "consumer index"); 3312 if (fl->flags & FL_BUF_PACKING) { 3313 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_offset", 3314 CTLFLAG_RD, &fl->rx_offset, 0, "packing rx offset"); 3315 } 3316 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx, 3317 0, "producer index"); 3318 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_allocated", 3319 CTLFLAG_RD, &fl->mbuf_allocated, "# of mbuf allocated"); 3320 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_inlined", 3321 CTLFLAG_RD, &fl->mbuf_inlined, "# of mbuf inlined in clusters"); 3322 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated", 3323 CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated"); 3324 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled", 3325 CTLFLAG_RD, &fl->cl_recycled, "# of clusters recycled"); 3326 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_fast_recycled", 3327 CTLFLAG_RD, &fl->cl_fast_recycled, "# of clusters recycled (fast)"); 3328 } 3329 3330 static int 3331 alloc_fwq(struct adapter *sc) 3332 { 3333 int rc, intr_idx; 3334 struct sge_iq *fwq = &sc->sge.fwq; 3335 struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); 3336 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3337 3338 init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE); 3339 if (sc->flags & IS_VF) 3340 intr_idx = 0; 3341 else 3342 intr_idx = sc->intr_count > 1 ? 1 : 0; 3343 rc = alloc_iq_fl(&sc->port[0]->vi[0], fwq, NULL, intr_idx, -1); 3344 if (rc != 0) { 3345 device_printf(sc->dev, 3346 "failed to create firmware event queue: %d\n", rc); 3347 return (rc); 3348 } 3349 3350 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD, 3351 NULL, "firmware event queue"); 3352 add_iq_sysctls(&sc->ctx, oid, fwq); 3353 3354 return (0); 3355 } 3356 3357 static int 3358 free_fwq(struct adapter *sc) 3359 { 3360 return free_iq_fl(NULL, &sc->sge.fwq, NULL); 3361 } 3362 3363 static int 3364 alloc_ctrlq(struct adapter *sc, struct sge_wrq *ctrlq, int idx, 3365 struct sysctl_oid *oid) 3366 { 3367 int rc; 3368 char name[16]; 3369 struct sysctl_oid_list *children; 3370 3371 snprintf(name, sizeof(name), "%s ctrlq%d", device_get_nameunit(sc->dev), 3372 idx); 3373 init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[idx]->tx_chan, 3374 sc->sge.fwq.cntxt_id, name); 3375 3376 children = SYSCTL_CHILDREN(oid); 3377 snprintf(name, sizeof(name), "%d", idx); 3378 oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, name, CTLFLAG_RD, 3379 NULL, "ctrl queue"); 3380 rc = alloc_wrq(sc, NULL, ctrlq, oid); 3381 3382 return (rc); 3383 } 3384 3385 int 3386 tnl_cong(struct port_info *pi, int drop) 3387 { 3388 3389 if (drop == -1) 3390 return (-1); 3391 else if (drop == 1) 3392 return (0); 3393 else 3394 return (pi->rx_e_chan_map); 3395 } 3396 3397 static int 3398 alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int intr_idx, int idx, 3399 struct sysctl_oid *oid) 3400 { 3401 int rc; 3402 struct adapter *sc = vi->pi->adapter; 3403 struct sysctl_oid_list *children; 3404 char name[16]; 3405 3406 rc = alloc_iq_fl(vi, &rxq->iq, &rxq->fl, intr_idx, 3407 tnl_cong(vi->pi, cong_drop)); 3408 if (rc != 0) 3409 return (rc); 3410 3411 if (idx == 0) 3412 sc->sge.iq_base = rxq->iq.abs_id - rxq->iq.cntxt_id; 3413 else 3414 KASSERT(rxq->iq.cntxt_id + sc->sge.iq_base == rxq->iq.abs_id, 3415 ("iq_base mismatch")); 3416 KASSERT(sc->sge.iq_base == 0 || sc->flags & IS_VF, 3417 ("PF with non-zero iq_base")); 3418 3419 /* 3420 * The freelist is just barely above the starvation threshold right now, 3421 * fill it up a bit more. 3422 */ 3423 FL_LOCK(&rxq->fl); 3424 refill_fl(sc, &rxq->fl, 128); 3425 FL_UNLOCK(&rxq->fl); 3426 3427 #if defined(INET) || defined(INET6) 3428 rc = tcp_lro_init_args(&rxq->lro, vi->ifp, lro_entries, lro_mbufs); 3429 if (rc != 0) 3430 return (rc); 3431 MPASS(rxq->lro.ifp == vi->ifp); /* also indicates LRO init'ed */ 3432 3433 if (vi->ifp->if_capenable & IFCAP_LRO) 3434 rxq->iq.flags |= IQ_LRO_ENABLED; 3435 #endif 3436 rxq->ifp = vi->ifp; 3437 3438 children = SYSCTL_CHILDREN(oid); 3439 3440 snprintf(name, sizeof(name), "%d", idx); 3441 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 3442 NULL, "rx queue"); 3443 children = SYSCTL_CHILDREN(oid); 3444 3445 add_iq_sysctls(&vi->ctx, oid, &rxq->iq); 3446 #if defined(INET) || defined(INET6) 3447 SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, 3448 &rxq->lro.lro_queued, 0, NULL); 3449 SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, 3450 &rxq->lro.lro_flushed, 0, NULL); 3451 #endif 3452 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, 3453 &rxq->rxcsum, "# of times hardware assisted with checksum"); 3454 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_extraction", 3455 CTLFLAG_RD, &rxq->vlan_extraction, 3456 "# of times hardware extracted 802.1Q tag"); 3457 3458 add_fl_sysctls(sc, &vi->ctx, oid, &rxq->fl); 3459 3460 return (rc); 3461 } 3462 3463 static int 3464 free_rxq(struct vi_info *vi, struct sge_rxq *rxq) 3465 { 3466 int rc; 3467 3468 #if defined(INET) || defined(INET6) 3469 if (rxq->lro.ifp) { 3470 tcp_lro_free(&rxq->lro); 3471 rxq->lro.ifp = NULL; 3472 } 3473 #endif 3474 3475 rc = free_iq_fl(vi, &rxq->iq, &rxq->fl); 3476 if (rc == 0) 3477 bzero(rxq, sizeof(*rxq)); 3478 3479 return (rc); 3480 } 3481 3482 #ifdef TCP_OFFLOAD 3483 static int 3484 alloc_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq, 3485 int intr_idx, int idx, struct sysctl_oid *oid) 3486 { 3487 struct port_info *pi = vi->pi; 3488 int rc; 3489 struct sysctl_oid_list *children; 3490 char name[16]; 3491 3492 rc = alloc_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx, 0); 3493 if (rc != 0) 3494 return (rc); 3495 3496 children = SYSCTL_CHILDREN(oid); 3497 3498 snprintf(name, sizeof(name), "%d", idx); 3499 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 3500 NULL, "rx queue"); 3501 add_iq_sysctls(&vi->ctx, oid, &ofld_rxq->iq); 3502 add_fl_sysctls(pi->adapter, &vi->ctx, oid, &ofld_rxq->fl); 3503 3504 return (rc); 3505 } 3506 3507 static int 3508 free_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq) 3509 { 3510 int rc; 3511 3512 rc = free_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl); 3513 if (rc == 0) 3514 bzero(ofld_rxq, sizeof(*ofld_rxq)); 3515 3516 return (rc); 3517 } 3518 #endif 3519 3520 #ifdef DEV_NETMAP 3521 static int 3522 alloc_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int intr_idx, 3523 int idx, struct sysctl_oid *oid) 3524 { 3525 int rc; 3526 struct sysctl_oid_list *children; 3527 struct sysctl_ctx_list *ctx; 3528 char name[16]; 3529 size_t len; 3530 struct adapter *sc = vi->pi->adapter; 3531 struct netmap_adapter *na = NA(vi->ifp); 3532 3533 MPASS(na != NULL); 3534 3535 len = vi->qsize_rxq * IQ_ESIZE; 3536 rc = alloc_ring(sc, len, &nm_rxq->iq_desc_tag, &nm_rxq->iq_desc_map, 3537 &nm_rxq->iq_ba, (void **)&nm_rxq->iq_desc); 3538 if (rc != 0) 3539 return (rc); 3540 3541 len = na->num_rx_desc * EQ_ESIZE + sc->params.sge.spg_len; 3542 rc = alloc_ring(sc, len, &nm_rxq->fl_desc_tag, &nm_rxq->fl_desc_map, 3543 &nm_rxq->fl_ba, (void **)&nm_rxq->fl_desc); 3544 if (rc != 0) 3545 return (rc); 3546 3547 nm_rxq->vi = vi; 3548 nm_rxq->nid = idx; 3549 nm_rxq->iq_cidx = 0; 3550 nm_rxq->iq_sidx = vi->qsize_rxq - sc->params.sge.spg_len / IQ_ESIZE; 3551 nm_rxq->iq_gen = F_RSPD_GEN; 3552 nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0; 3553 nm_rxq->fl_sidx = na->num_rx_desc; 3554 nm_rxq->intr_idx = intr_idx; 3555 nm_rxq->iq_cntxt_id = INVALID_NM_RXQ_CNTXT_ID; 3556 3557 ctx = &vi->ctx; 3558 children = SYSCTL_CHILDREN(oid); 3559 3560 snprintf(name, sizeof(name), "%d", idx); 3561 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, 3562 "rx queue"); 3563 children = SYSCTL_CHILDREN(oid); 3564 3565 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "abs_id", 3566 CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_abs_id, 0, sysctl_uint16, 3567 "I", "absolute id of the queue"); 3568 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 3569 CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_cntxt_id, 0, sysctl_uint16, 3570 "I", "SGE context id of the queue"); 3571 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", 3572 CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_cidx, 0, sysctl_uint16, "I", 3573 "consumer index"); 3574 3575 children = SYSCTL_CHILDREN(oid); 3576 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL, 3577 "freelist"); 3578 children = SYSCTL_CHILDREN(oid); 3579 3580 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", 3581 CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->fl_cntxt_id, 0, sysctl_uint16, 3582 "I", "SGE context id of the freelist"); 3583 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, 3584 &nm_rxq->fl_cidx, 0, "consumer index"); 3585 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, 3586 &nm_rxq->fl_pidx, 0, "producer index"); 3587 3588 return (rc); 3589 } 3590 3591 3592 static int 3593 free_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq) 3594 { 3595 struct adapter *sc = vi->pi->adapter; 3596 3597 if (vi->flags & VI_INIT_DONE) 3598 MPASS(nm_rxq->iq_cntxt_id == INVALID_NM_RXQ_CNTXT_ID); 3599 else 3600 MPASS(nm_rxq->iq_cntxt_id == 0); 3601 3602 free_ring(sc, nm_rxq->iq_desc_tag, nm_rxq->iq_desc_map, nm_rxq->iq_ba, 3603 nm_rxq->iq_desc); 3604 free_ring(sc, nm_rxq->fl_desc_tag, nm_rxq->fl_desc_map, nm_rxq->fl_ba, 3605 nm_rxq->fl_desc); 3606 3607 return (0); 3608 } 3609 3610 static int 3611 alloc_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq, int iqidx, int idx, 3612 struct sysctl_oid *oid) 3613 { 3614 int rc; 3615 size_t len; 3616 struct port_info *pi = vi->pi; 3617 struct adapter *sc = pi->adapter; 3618 struct netmap_adapter *na = NA(vi->ifp); 3619 char name[16]; 3620 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3621 3622 len = na->num_tx_desc * EQ_ESIZE + sc->params.sge.spg_len; 3623 rc = alloc_ring(sc, len, &nm_txq->desc_tag, &nm_txq->desc_map, 3624 &nm_txq->ba, (void **)&nm_txq->desc); 3625 if (rc) 3626 return (rc); 3627 3628 nm_txq->pidx = nm_txq->cidx = 0; 3629 nm_txq->sidx = na->num_tx_desc; 3630 nm_txq->nid = idx; 3631 nm_txq->iqidx = iqidx; 3632 nm_txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 3633 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(G_FW_VIID_PFN(vi->viid)) | 3634 V_TXPKT_VF(G_FW_VIID_VIN(vi->viid)) | 3635 V_TXPKT_VF_VLD(G_FW_VIID_VIVLD(vi->viid))); 3636 nm_txq->cntxt_id = INVALID_NM_TXQ_CNTXT_ID; 3637 3638 snprintf(name, sizeof(name), "%d", idx); 3639 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 3640 NULL, "netmap tx queue"); 3641 children = SYSCTL_CHILDREN(oid); 3642 3643 SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 3644 &nm_txq->cntxt_id, 0, "SGE context id of the queue"); 3645 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", 3646 CTLTYPE_INT | CTLFLAG_RD, &nm_txq->cidx, 0, sysctl_uint16, "I", 3647 "consumer index"); 3648 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", 3649 CTLTYPE_INT | CTLFLAG_RD, &nm_txq->pidx, 0, sysctl_uint16, "I", 3650 "producer index"); 3651 3652 return (rc); 3653 } 3654 3655 static int 3656 free_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq) 3657 { 3658 struct adapter *sc = vi->pi->adapter; 3659 3660 if (vi->flags & VI_INIT_DONE) 3661 MPASS(nm_txq->cntxt_id == INVALID_NM_TXQ_CNTXT_ID); 3662 else 3663 MPASS(nm_txq->cntxt_id == 0); 3664 3665 free_ring(sc, nm_txq->desc_tag, nm_txq->desc_map, nm_txq->ba, 3666 nm_txq->desc); 3667 3668 return (0); 3669 } 3670 #endif 3671 3672 /* 3673 * Returns a reasonable automatic cidx flush threshold for a given queue size. 3674 */ 3675 static u_int 3676 qsize_to_fthresh(int qsize) 3677 { 3678 u_int fthresh; 3679 3680 while (!powerof2(qsize)) 3681 qsize++; 3682 fthresh = ilog2(qsize); 3683 if (fthresh > X_CIDXFLUSHTHRESH_128) 3684 fthresh = X_CIDXFLUSHTHRESH_128; 3685 3686 return (fthresh); 3687 } 3688 3689 static int 3690 ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq) 3691 { 3692 int rc, cntxt_id; 3693 struct fw_eq_ctrl_cmd c; 3694 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 3695 3696 bzero(&c, sizeof(c)); 3697 3698 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | 3699 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) | 3700 V_FW_EQ_CTRL_CMD_VFN(0)); 3701 c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC | 3702 F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); 3703 c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); 3704 c.physeqid_pkd = htobe32(0); 3705 c.fetchszm_to_iqid = 3706 htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 3707 V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) | 3708 F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); 3709 c.dcaen_to_eqsize = 3710 htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 3711 V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 3712 V_FW_EQ_CTRL_CMD_CIDXFTHRESH(qsize_to_fthresh(qsize)) | 3713 V_FW_EQ_CTRL_CMD_EQSIZE(qsize)); 3714 c.eqaddr = htobe64(eq->ba); 3715 3716 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 3717 if (rc != 0) { 3718 device_printf(sc->dev, 3719 "failed to create control queue %d: %d\n", eq->tx_chan, rc); 3720 return (rc); 3721 } 3722 eq->flags |= EQ_ALLOCATED; 3723 3724 eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid)); 3725 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 3726 if (cntxt_id >= sc->sge.neq) 3727 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 3728 cntxt_id, sc->sge.neq - 1); 3729 sc->sge.eqmap[cntxt_id] = eq; 3730 3731 return (rc); 3732 } 3733 3734 static int 3735 eth_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 3736 { 3737 int rc, cntxt_id; 3738 struct fw_eq_eth_cmd c; 3739 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 3740 3741 bzero(&c, sizeof(c)); 3742 3743 c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | 3744 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | 3745 V_FW_EQ_ETH_CMD_VFN(0)); 3746 c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | 3747 F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); 3748 c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | 3749 F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid)); 3750 c.fetchszm_to_iqid = 3751 htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | 3752 V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | 3753 V_FW_EQ_ETH_CMD_IQID(eq->iqid)); 3754 c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 3755 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 3756 V_FW_EQ_ETH_CMD_EQSIZE(qsize)); 3757 c.eqaddr = htobe64(eq->ba); 3758 3759 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 3760 if (rc != 0) { 3761 device_printf(vi->dev, 3762 "failed to create Ethernet egress queue: %d\n", rc); 3763 return (rc); 3764 } 3765 eq->flags |= EQ_ALLOCATED; 3766 3767 eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); 3768 eq->abs_id = G_FW_EQ_ETH_CMD_PHYSEQID(be32toh(c.physeqid_pkd)); 3769 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 3770 if (cntxt_id >= sc->sge.neq) 3771 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 3772 cntxt_id, sc->sge.neq - 1); 3773 sc->sge.eqmap[cntxt_id] = eq; 3774 3775 return (rc); 3776 } 3777 3778 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 3779 static int 3780 ofld_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 3781 { 3782 int rc, cntxt_id; 3783 struct fw_eq_ofld_cmd c; 3784 int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 3785 3786 bzero(&c, sizeof(c)); 3787 3788 c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST | 3789 F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) | 3790 V_FW_EQ_OFLD_CMD_VFN(0)); 3791 c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC | 3792 F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c)); 3793 c.fetchszm_to_iqid = 3794 htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | 3795 V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) | 3796 F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid)); 3797 c.dcaen_to_eqsize = 3798 htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) | 3799 V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) | 3800 V_FW_EQ_OFLD_CMD_CIDXFTHRESH(qsize_to_fthresh(qsize)) | 3801 V_FW_EQ_OFLD_CMD_EQSIZE(qsize)); 3802 c.eqaddr = htobe64(eq->ba); 3803 3804 rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); 3805 if (rc != 0) { 3806 device_printf(vi->dev, 3807 "failed to create egress queue for TCP offload: %d\n", rc); 3808 return (rc); 3809 } 3810 eq->flags |= EQ_ALLOCATED; 3811 3812 eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd)); 3813 cntxt_id = eq->cntxt_id - sc->sge.eq_start; 3814 if (cntxt_id >= sc->sge.neq) 3815 panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, 3816 cntxt_id, sc->sge.neq - 1); 3817 sc->sge.eqmap[cntxt_id] = eq; 3818 3819 return (rc); 3820 } 3821 #endif 3822 3823 static int 3824 alloc_eq(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) 3825 { 3826 int rc, qsize; 3827 size_t len; 3828 3829 mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); 3830 3831 qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; 3832 len = qsize * EQ_ESIZE; 3833 rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, 3834 &eq->ba, (void **)&eq->desc); 3835 if (rc) 3836 return (rc); 3837 3838 eq->pidx = eq->cidx = eq->dbidx = 0; 3839 /* Note that equeqidx is not used with sge_wrq (OFLD/CTRL) queues. */ 3840 eq->equeqidx = 0; 3841 eq->doorbells = sc->doorbells; 3842 3843 switch (eq->flags & EQ_TYPEMASK) { 3844 case EQ_CTRL: 3845 rc = ctrl_eq_alloc(sc, eq); 3846 break; 3847 3848 case EQ_ETH: 3849 rc = eth_eq_alloc(sc, vi, eq); 3850 break; 3851 3852 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 3853 case EQ_OFLD: 3854 rc = ofld_eq_alloc(sc, vi, eq); 3855 break; 3856 #endif 3857 3858 default: 3859 panic("%s: invalid eq type %d.", __func__, 3860 eq->flags & EQ_TYPEMASK); 3861 } 3862 if (rc != 0) { 3863 device_printf(sc->dev, 3864 "failed to allocate egress queue(%d): %d\n", 3865 eq->flags & EQ_TYPEMASK, rc); 3866 } 3867 3868 if (isset(&eq->doorbells, DOORBELL_UDB) || 3869 isset(&eq->doorbells, DOORBELL_UDBWC) || 3870 isset(&eq->doorbells, DOORBELL_WCWR)) { 3871 uint32_t s_qpp = sc->params.sge.eq_s_qpp; 3872 uint32_t mask = (1 << s_qpp) - 1; 3873 volatile uint8_t *udb; 3874 3875 udb = sc->udbs_base + UDBS_DB_OFFSET; 3876 udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT; /* pg offset */ 3877 eq->udb_qid = eq->cntxt_id & mask; /* id in page */ 3878 if (eq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE) 3879 clrbit(&eq->doorbells, DOORBELL_WCWR); 3880 else { 3881 udb += eq->udb_qid << UDBS_SEG_SHIFT; /* seg offset */ 3882 eq->udb_qid = 0; 3883 } 3884 eq->udb = (volatile void *)udb; 3885 } 3886 3887 return (rc); 3888 } 3889 3890 static int 3891 free_eq(struct adapter *sc, struct sge_eq *eq) 3892 { 3893 int rc; 3894 3895 if (eq->flags & EQ_ALLOCATED) { 3896 switch (eq->flags & EQ_TYPEMASK) { 3897 case EQ_CTRL: 3898 rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, 3899 eq->cntxt_id); 3900 break; 3901 3902 case EQ_ETH: 3903 rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, 3904 eq->cntxt_id); 3905 break; 3906 3907 #if defined(TCP_OFFLOAD) || defined(RATELIMIT) 3908 case EQ_OFLD: 3909 rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0, 3910 eq->cntxt_id); 3911 break; 3912 #endif 3913 3914 default: 3915 panic("%s: invalid eq type %d.", __func__, 3916 eq->flags & EQ_TYPEMASK); 3917 } 3918 if (rc != 0) { 3919 device_printf(sc->dev, 3920 "failed to free egress queue (%d): %d\n", 3921 eq->flags & EQ_TYPEMASK, rc); 3922 return (rc); 3923 } 3924 eq->flags &= ~EQ_ALLOCATED; 3925 } 3926 3927 free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); 3928 3929 if (mtx_initialized(&eq->eq_lock)) 3930 mtx_destroy(&eq->eq_lock); 3931 3932 bzero(eq, sizeof(*eq)); 3933 return (0); 3934 } 3935 3936 static int 3937 alloc_wrq(struct adapter *sc, struct vi_info *vi, struct sge_wrq *wrq, 3938 struct sysctl_oid *oid) 3939 { 3940 int rc; 3941 struct sysctl_ctx_list *ctx = vi ? &vi->ctx : &sc->ctx; 3942 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 3943 3944 rc = alloc_eq(sc, vi, &wrq->eq); 3945 if (rc) 3946 return (rc); 3947 3948 wrq->adapter = sc; 3949 TASK_INIT(&wrq->wrq_tx_task, 0, wrq_tx_drain, wrq); 3950 TAILQ_INIT(&wrq->incomplete_wrs); 3951 STAILQ_INIT(&wrq->wr_list); 3952 wrq->nwr_pending = 0; 3953 wrq->ndesc_needed = 0; 3954 3955 SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, 3956 &wrq->eq.ba, "bus address of descriptor ring"); 3957 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 3958 wrq->eq.sidx * EQ_ESIZE + sc->params.sge.spg_len, 3959 "desc ring size in bytes"); 3960 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 3961 &wrq->eq.cntxt_id, 0, "SGE context id of the queue"); 3962 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", 3963 CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I", 3964 "consumer index"); 3965 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx", 3966 CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I", 3967 "producer index"); 3968 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL, 3969 wrq->eq.sidx, "status page index"); 3970 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_direct", CTLFLAG_RD, 3971 &wrq->tx_wrs_direct, "# of work requests (direct)"); 3972 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_copied", CTLFLAG_RD, 3973 &wrq->tx_wrs_copied, "# of work requests (copied)"); 3974 SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_sspace", CTLFLAG_RD, 3975 &wrq->tx_wrs_ss, "# of work requests (copied from scratch space)"); 3976 3977 return (rc); 3978 } 3979 3980 static int 3981 free_wrq(struct adapter *sc, struct sge_wrq *wrq) 3982 { 3983 int rc; 3984 3985 rc = free_eq(sc, &wrq->eq); 3986 if (rc) 3987 return (rc); 3988 3989 bzero(wrq, sizeof(*wrq)); 3990 return (0); 3991 } 3992 3993 static int 3994 alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx, 3995 struct sysctl_oid *oid) 3996 { 3997 int rc; 3998 struct port_info *pi = vi->pi; 3999 struct adapter *sc = pi->adapter; 4000 struct sge_eq *eq = &txq->eq; 4001 char name[16]; 4002 struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); 4003 4004 rc = mp_ring_alloc(&txq->r, eq->sidx, txq, eth_tx, can_resume_eth_tx, 4005 M_CXGBE, M_WAITOK); 4006 if (rc != 0) { 4007 device_printf(sc->dev, "failed to allocate mp_ring: %d\n", rc); 4008 return (rc); 4009 } 4010 4011 rc = alloc_eq(sc, vi, eq); 4012 if (rc != 0) { 4013 mp_ring_free(txq->r); 4014 txq->r = NULL; 4015 return (rc); 4016 } 4017 4018 /* Can't fail after this point. */ 4019 4020 if (idx == 0) 4021 sc->sge.eq_base = eq->abs_id - eq->cntxt_id; 4022 else 4023 KASSERT(eq->cntxt_id + sc->sge.eq_base == eq->abs_id, 4024 ("eq_base mismatch")); 4025 KASSERT(sc->sge.eq_base == 0 || sc->flags & IS_VF, 4026 ("PF with non-zero eq_base")); 4027 4028 TASK_INIT(&txq->tx_reclaim_task, 0, tx_reclaim, eq); 4029 txq->ifp = vi->ifp; 4030 txq->gl = sglist_alloc(TX_SGL_SEGS, M_WAITOK); 4031 if (sc->flags & IS_VF) 4032 txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 4033 V_TXPKT_INTF(pi->tx_chan)); 4034 else 4035 txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | 4036 V_TXPKT_INTF(pi->tx_chan) | 4037 V_TXPKT_PF(G_FW_VIID_PFN(vi->viid)) | 4038 V_TXPKT_VF(G_FW_VIID_VIN(vi->viid)) | 4039 V_TXPKT_VF_VLD(G_FW_VIID_VIVLD(vi->viid))); 4040 txq->tc_idx = -1; 4041 txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE, 4042 M_ZERO | M_WAITOK); 4043 4044 snprintf(name, sizeof(name), "%d", idx); 4045 oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, 4046 NULL, "tx queue"); 4047 children = SYSCTL_CHILDREN(oid); 4048 4049 SYSCTL_ADD_UAUTO(&vi->ctx, children, OID_AUTO, "ba", CTLFLAG_RD, 4050 &eq->ba, "bus address of descriptor ring"); 4051 SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, 4052 eq->sidx * EQ_ESIZE + sc->params.sge.spg_len, 4053 "desc ring size in bytes"); 4054 SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD, 4055 &eq->abs_id, 0, "absolute id of the queue"); 4056 SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, 4057 &eq->cntxt_id, 0, "SGE context id of the queue"); 4058 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", 4059 CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I", 4060 "consumer index"); 4061 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", 4062 CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I", 4063 "producer index"); 4064 SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL, 4065 eq->sidx, "status page index"); 4066 4067 SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "tc", 4068 CTLTYPE_INT | CTLFLAG_RW, vi, idx, sysctl_tc, "I", 4069 "traffic class (-1 means none)"); 4070 4071 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, 4072 &txq->txcsum, "# of times hardware assisted with checksum"); 4073 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_insertion", 4074 CTLFLAG_RD, &txq->vlan_insertion, 4075 "# of times hardware inserted 802.1Q tag"); 4076 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, 4077 &txq->tso_wrs, "# of TSO work requests"); 4078 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, 4079 &txq->imm_wrs, "# of work requests with immediate data"); 4080 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, 4081 &txq->sgl_wrs, "# of work requests with direct SGL"); 4082 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, 4083 &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); 4084 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_wrs", 4085 CTLFLAG_RD, &txq->txpkts0_wrs, 4086 "# of txpkts (type 0) work requests"); 4087 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_wrs", 4088 CTLFLAG_RD, &txq->txpkts1_wrs, 4089 "# of txpkts (type 1) work requests"); 4090 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_pkts", 4091 CTLFLAG_RD, &txq->txpkts0_pkts, 4092 "# of frames tx'd using type0 txpkts work requests"); 4093 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_pkts", 4094 CTLFLAG_RD, &txq->txpkts1_pkts, 4095 "# of frames tx'd using type1 txpkts work requests"); 4096 SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "raw_wrs", CTLFLAG_RD, 4097 &txq->raw_wrs, "# of raw work requests (non-packets)"); 4098 4099 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_enqueues", 4100 CTLFLAG_RD, &txq->r->enqueues, 4101 "# of enqueues to the mp_ring for this queue"); 4102 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_drops", 4103 CTLFLAG_RD, &txq->r->drops, 4104 "# of drops in the mp_ring for this queue"); 4105 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_starts", 4106 CTLFLAG_RD, &txq->r->starts, 4107 "# of normal consumer starts in the mp_ring for this queue"); 4108 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_stalls", 4109 CTLFLAG_RD, &txq->r->stalls, 4110 "# of consumer stalls in the mp_ring for this queue"); 4111 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_restarts", 4112 CTLFLAG_RD, &txq->r->restarts, 4113 "# of consumer restarts in the mp_ring for this queue"); 4114 SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_abdications", 4115 CTLFLAG_RD, &txq->r->abdications, 4116 "# of consumer abdications in the mp_ring for this queue"); 4117 4118 return (0); 4119 } 4120 4121 static int 4122 free_txq(struct vi_info *vi, struct sge_txq *txq) 4123 { 4124 int rc; 4125 struct adapter *sc = vi->pi->adapter; 4126 struct sge_eq *eq = &txq->eq; 4127 4128 rc = free_eq(sc, eq); 4129 if (rc) 4130 return (rc); 4131 4132 sglist_free(txq->gl); 4133 free(txq->sdesc, M_CXGBE); 4134 mp_ring_free(txq->r); 4135 4136 bzero(txq, sizeof(*txq)); 4137 return (0); 4138 } 4139 4140 static void 4141 oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) 4142 { 4143 bus_addr_t *ba = arg; 4144 4145 KASSERT(nseg == 1, 4146 ("%s meant for single segment mappings only.", __func__)); 4147 4148 *ba = error ? 0 : segs->ds_addr; 4149 } 4150 4151 static inline void 4152 ring_fl_db(struct adapter *sc, struct sge_fl *fl) 4153 { 4154 uint32_t n, v; 4155 4156 n = IDXDIFF(fl->pidx / 8, fl->dbidx, fl->sidx); 4157 MPASS(n > 0); 4158 4159 wmb(); 4160 v = fl->dbval | V_PIDX(n); 4161 if (fl->udb) 4162 *fl->udb = htole32(v); 4163 else 4164 t4_write_reg(sc, sc->sge_kdoorbell_reg, v); 4165 IDXINCR(fl->dbidx, n, fl->sidx); 4166 } 4167 4168 /* 4169 * Fills up the freelist by allocating up to 'n' buffers. Buffers that are 4170 * recycled do not count towards this allocation budget. 4171 * 4172 * Returns non-zero to indicate that this freelist should be added to the list 4173 * of starving freelists. 4174 */ 4175 static int 4176 refill_fl(struct adapter *sc, struct sge_fl *fl, int n) 4177 { 4178 __be64 *d; 4179 struct fl_sdesc *sd; 4180 uintptr_t pa; 4181 caddr_t cl; 4182 struct cluster_layout *cll; 4183 struct sw_zone_info *swz; 4184 struct cluster_metadata *clm; 4185 uint16_t max_pidx; 4186 uint16_t hw_cidx = fl->hw_cidx; /* stable snapshot */ 4187 4188 FL_LOCK_ASSERT_OWNED(fl); 4189 4190 /* 4191 * We always stop at the beginning of the hardware descriptor that's just 4192 * before the one with the hw cidx. This is to avoid hw pidx = hw cidx, 4193 * which would mean an empty freelist to the chip. 4194 */ 4195 max_pidx = __predict_false(hw_cidx == 0) ? fl->sidx - 1 : hw_cidx - 1; 4196 if (fl->pidx == max_pidx * 8) 4197 return (0); 4198 4199 d = &fl->desc[fl->pidx]; 4200 sd = &fl->sdesc[fl->pidx]; 4201 cll = &fl->cll_def; /* default layout */ 4202 swz = &sc->sge.sw_zone_info[cll->zidx]; 4203 4204 while (n > 0) { 4205 4206 if (sd->cl != NULL) { 4207 4208 if (sd->nmbuf == 0) { 4209 /* 4210 * Fast recycle without involving any atomics on 4211 * the cluster's metadata (if the cluster has 4212 * metadata). This happens when all frames 4213 * received in the cluster were small enough to 4214 * fit within a single mbuf each. 4215 */ 4216 fl->cl_fast_recycled++; 4217 #ifdef INVARIANTS 4218 clm = cl_metadata(sc, fl, &sd->cll, sd->cl); 4219 if (clm != NULL) 4220 MPASS(clm->refcount == 1); 4221 #endif 4222 goto recycled_fast; 4223 } 4224 4225 /* 4226 * Cluster is guaranteed to have metadata. Clusters 4227 * without metadata always take the fast recycle path 4228 * when they're recycled. 4229 */ 4230 clm = cl_metadata(sc, fl, &sd->cll, sd->cl); 4231 MPASS(clm != NULL); 4232 4233 if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { 4234 fl->cl_recycled++; 4235 counter_u64_add(extfree_rels, 1); 4236 goto recycled; 4237 } 4238 sd->cl = NULL; /* gave up my reference */ 4239 } 4240 MPASS(sd->cl == NULL); 4241 alloc: 4242 cl = uma_zalloc(swz->zone, M_NOWAIT); 4243 if (__predict_false(cl == NULL)) { 4244 if (cll == &fl->cll_alt || fl->cll_alt.zidx == -1 || 4245 fl->cll_def.zidx == fl->cll_alt.zidx) 4246 break; 4247 4248 /* fall back to the safe zone */ 4249 cll = &fl->cll_alt; 4250 swz = &sc->sge.sw_zone_info[cll->zidx]; 4251 goto alloc; 4252 } 4253 fl->cl_allocated++; 4254 n--; 4255 4256 pa = pmap_kextract((vm_offset_t)cl); 4257 pa += cll->region1; 4258 sd->cl = cl; 4259 sd->cll = *cll; 4260 *d = htobe64(pa | cll->hwidx); 4261 clm = cl_metadata(sc, fl, cll, cl); 4262 if (clm != NULL) { 4263 recycled: 4264 #ifdef INVARIANTS 4265 clm->sd = sd; 4266 #endif 4267 clm->refcount = 1; 4268 } 4269 sd->nmbuf = 0; 4270 recycled_fast: 4271 d++; 4272 sd++; 4273 if (__predict_false(++fl->pidx % 8 == 0)) { 4274 uint16_t pidx = fl->pidx / 8; 4275 4276 if (__predict_false(pidx == fl->sidx)) { 4277 fl->pidx = 0; 4278 pidx = 0; 4279 sd = fl->sdesc; 4280 d = fl->desc; 4281 } 4282 if (pidx == max_pidx) 4283 break; 4284 4285 if (IDXDIFF(pidx, fl->dbidx, fl->sidx) >= 4) 4286 ring_fl_db(sc, fl); 4287 } 4288 } 4289 4290 if (fl->pidx / 8 != fl->dbidx) 4291 ring_fl_db(sc, fl); 4292 4293 return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING)); 4294 } 4295 4296 /* 4297 * Attempt to refill all starving freelists. 4298 */ 4299 static void 4300 refill_sfl(void *arg) 4301 { 4302 struct adapter *sc = arg; 4303 struct sge_fl *fl, *fl_temp; 4304 4305 mtx_assert(&sc->sfl_lock, MA_OWNED); 4306 TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) { 4307 FL_LOCK(fl); 4308 refill_fl(sc, fl, 64); 4309 if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) { 4310 TAILQ_REMOVE(&sc->sfl, fl, link); 4311 fl->flags &= ~FL_STARVING; 4312 } 4313 FL_UNLOCK(fl); 4314 } 4315 4316 if (!TAILQ_EMPTY(&sc->sfl)) 4317 callout_schedule(&sc->sfl_callout, hz / 5); 4318 } 4319 4320 static int 4321 alloc_fl_sdesc(struct sge_fl *fl) 4322 { 4323 4324 fl->sdesc = malloc(fl->sidx * 8 * sizeof(struct fl_sdesc), M_CXGBE, 4325 M_ZERO | M_WAITOK); 4326 4327 return (0); 4328 } 4329 4330 static void 4331 free_fl_sdesc(struct adapter *sc, struct sge_fl *fl) 4332 { 4333 struct fl_sdesc *sd; 4334 struct cluster_metadata *clm; 4335 struct cluster_layout *cll; 4336 int i; 4337 4338 sd = fl->sdesc; 4339 for (i = 0; i < fl->sidx * 8; i++, sd++) { 4340 if (sd->cl == NULL) 4341 continue; 4342 4343 cll = &sd->cll; 4344 clm = cl_metadata(sc, fl, cll, sd->cl); 4345 if (sd->nmbuf == 0) 4346 uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl); 4347 else if (clm && atomic_fetchadd_int(&clm->refcount, -1) == 1) { 4348 uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl); 4349 counter_u64_add(extfree_rels, 1); 4350 } 4351 sd->cl = NULL; 4352 } 4353 4354 free(fl->sdesc, M_CXGBE); 4355 fl->sdesc = NULL; 4356 } 4357 4358 static inline void 4359 get_pkt_gl(struct mbuf *m, struct sglist *gl) 4360 { 4361 int rc; 4362 4363 M_ASSERTPKTHDR(m); 4364 4365 sglist_reset(gl); 4366 rc = sglist_append_mbuf(gl, m); 4367 if (__predict_false(rc != 0)) { 4368 panic("%s: mbuf %p (%d segs) was vetted earlier but now fails " 4369 "with %d.", __func__, m, mbuf_nsegs(m), rc); 4370 } 4371 4372 KASSERT(gl->sg_nseg == mbuf_nsegs(m), 4373 ("%s: nsegs changed for mbuf %p from %d to %d", __func__, m, 4374 mbuf_nsegs(m), gl->sg_nseg)); 4375 KASSERT(gl->sg_nseg > 0 && 4376 gl->sg_nseg <= (needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS), 4377 ("%s: %d segments, should have been 1 <= nsegs <= %d", __func__, 4378 gl->sg_nseg, needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)); 4379 } 4380 4381 /* 4382 * len16 for a txpkt WR with a GL. Includes the firmware work request header. 4383 */ 4384 static inline u_int 4385 txpkt_len16(u_int nsegs, u_int tso) 4386 { 4387 u_int n; 4388 4389 MPASS(nsegs > 0); 4390 4391 nsegs--; /* first segment is part of ulptx_sgl */ 4392 n = sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_core) + 4393 sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 4394 if (tso) 4395 n += sizeof(struct cpl_tx_pkt_lso_core); 4396 4397 return (howmany(n, 16)); 4398 } 4399 4400 /* 4401 * len16 for a txpkt_vm WR with a GL. Includes the firmware work 4402 * request header. 4403 */ 4404 static inline u_int 4405 txpkt_vm_len16(u_int nsegs, u_int tso) 4406 { 4407 u_int n; 4408 4409 MPASS(nsegs > 0); 4410 4411 nsegs--; /* first segment is part of ulptx_sgl */ 4412 n = sizeof(struct fw_eth_tx_pkt_vm_wr) + 4413 sizeof(struct cpl_tx_pkt_core) + 4414 sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 4415 if (tso) 4416 n += sizeof(struct cpl_tx_pkt_lso_core); 4417 4418 return (howmany(n, 16)); 4419 } 4420 4421 /* 4422 * len16 for a txpkts type 0 WR with a GL. Does not include the firmware work 4423 * request header. 4424 */ 4425 static inline u_int 4426 txpkts0_len16(u_int nsegs) 4427 { 4428 u_int n; 4429 4430 MPASS(nsegs > 0); 4431 4432 nsegs--; /* first segment is part of ulptx_sgl */ 4433 n = sizeof(struct ulp_txpkt) + sizeof(struct ulptx_idata) + 4434 sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) + 4435 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 4436 4437 return (howmany(n, 16)); 4438 } 4439 4440 /* 4441 * len16 for a txpkts type 1 WR with a GL. Does not include the firmware work 4442 * request header. 4443 */ 4444 static inline u_int 4445 txpkts1_len16(void) 4446 { 4447 u_int n; 4448 4449 n = sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl); 4450 4451 return (howmany(n, 16)); 4452 } 4453 4454 static inline u_int 4455 imm_payload(u_int ndesc) 4456 { 4457 u_int n; 4458 4459 n = ndesc * EQ_ESIZE - sizeof(struct fw_eth_tx_pkt_wr) - 4460 sizeof(struct cpl_tx_pkt_core); 4461 4462 return (n); 4463 } 4464 4465 /* 4466 * Write a VM txpkt WR for this packet to the hardware descriptors, update the 4467 * software descriptor, and advance the pidx. It is guaranteed that enough 4468 * descriptors are available. 4469 * 4470 * The return value is the # of hardware descriptors used. 4471 */ 4472 static u_int 4473 write_txpkt_vm_wr(struct adapter *sc, struct sge_txq *txq, 4474 struct fw_eth_tx_pkt_vm_wr *wr, struct mbuf *m0, u_int available) 4475 { 4476 struct sge_eq *eq = &txq->eq; 4477 struct tx_sdesc *txsd; 4478 struct cpl_tx_pkt_core *cpl; 4479 uint32_t ctrl; /* used in many unrelated places */ 4480 uint64_t ctrl1; 4481 int csum_type, len16, ndesc, pktlen, nsegs; 4482 caddr_t dst; 4483 4484 TXQ_LOCK_ASSERT_OWNED(txq); 4485 M_ASSERTPKTHDR(m0); 4486 MPASS(available > 0 && available < eq->sidx); 4487 4488 len16 = mbuf_len16(m0); 4489 nsegs = mbuf_nsegs(m0); 4490 pktlen = m0->m_pkthdr.len; 4491 ctrl = sizeof(struct cpl_tx_pkt_core); 4492 if (needs_tso(m0)) 4493 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 4494 ndesc = howmany(len16, EQ_ESIZE / 16); 4495 MPASS(ndesc <= available); 4496 4497 /* Firmware work request header */ 4498 MPASS(wr == (void *)&eq->desc[eq->pidx]); 4499 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_VM_WR) | 4500 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 4501 4502 ctrl = V_FW_WR_LEN16(len16); 4503 wr->equiq_to_len16 = htobe32(ctrl); 4504 wr->r3[0] = 0; 4505 wr->r3[1] = 0; 4506 4507 /* 4508 * Copy over ethmacdst, ethmacsrc, ethtype, and vlantci. 4509 * vlantci is ignored unless the ethtype is 0x8100, so it's 4510 * simpler to always copy it rather than making it 4511 * conditional. Also, it seems that we do not have to set 4512 * vlantci or fake the ethtype when doing VLAN tag insertion. 4513 */ 4514 m_copydata(m0, 0, sizeof(struct ether_header) + 2, wr->ethmacdst); 4515 4516 csum_type = -1; 4517 if (needs_tso(m0)) { 4518 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); 4519 4520 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 4521 m0->m_pkthdr.l4hlen > 0, 4522 ("%s: mbuf %p needs TSO but missing header lengths", 4523 __func__, m0)); 4524 4525 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | 4526 F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) 4527 | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); 4528 if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header)) 4529 ctrl |= V_LSO_ETHHDR_LEN(1); 4530 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 4531 ctrl |= F_LSO_IPV6; 4532 4533 lso->lso_ctrl = htobe32(ctrl); 4534 lso->ipid_ofst = htobe16(0); 4535 lso->mss = htobe16(m0->m_pkthdr.tso_segsz); 4536 lso->seqno_offset = htobe32(0); 4537 lso->len = htobe32(pktlen); 4538 4539 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 4540 csum_type = TX_CSUM_TCPIP6; 4541 else 4542 csum_type = TX_CSUM_TCPIP; 4543 4544 cpl = (void *)(lso + 1); 4545 4546 txq->tso_wrs++; 4547 } else { 4548 if (m0->m_pkthdr.csum_flags & CSUM_IP_TCP) 4549 csum_type = TX_CSUM_TCPIP; 4550 else if (m0->m_pkthdr.csum_flags & CSUM_IP_UDP) 4551 csum_type = TX_CSUM_UDPIP; 4552 else if (m0->m_pkthdr.csum_flags & CSUM_IP6_TCP) 4553 csum_type = TX_CSUM_TCPIP6; 4554 else if (m0->m_pkthdr.csum_flags & CSUM_IP6_UDP) 4555 csum_type = TX_CSUM_UDPIP6; 4556 #if defined(INET) 4557 else if (m0->m_pkthdr.csum_flags & CSUM_IP) { 4558 /* 4559 * XXX: The firmware appears to stomp on the 4560 * fragment/flags field of the IP header when 4561 * using TX_CSUM_IP. Fall back to doing 4562 * software checksums. 4563 */ 4564 u_short *sump; 4565 struct mbuf *m; 4566 int offset; 4567 4568 m = m0; 4569 offset = 0; 4570 sump = m_advance(&m, &offset, m0->m_pkthdr.l2hlen + 4571 offsetof(struct ip, ip_sum)); 4572 *sump = in_cksum_skip(m0, m0->m_pkthdr.l2hlen + 4573 m0->m_pkthdr.l3hlen, m0->m_pkthdr.l2hlen); 4574 m0->m_pkthdr.csum_flags &= ~CSUM_IP; 4575 } 4576 #endif 4577 4578 cpl = (void *)(wr + 1); 4579 } 4580 4581 /* Checksum offload */ 4582 ctrl1 = 0; 4583 if (needs_l3_csum(m0) == 0) 4584 ctrl1 |= F_TXPKT_IPCSUM_DIS; 4585 if (csum_type >= 0) { 4586 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0, 4587 ("%s: mbuf %p needs checksum offload but missing header lengths", 4588 __func__, m0)); 4589 4590 if (chip_id(sc) <= CHELSIO_T5) { 4591 ctrl1 |= V_TXPKT_ETHHDR_LEN(m0->m_pkthdr.l2hlen - 4592 ETHER_HDR_LEN); 4593 } else { 4594 ctrl1 |= V_T6_TXPKT_ETHHDR_LEN(m0->m_pkthdr.l2hlen - 4595 ETHER_HDR_LEN); 4596 } 4597 ctrl1 |= V_TXPKT_IPHDR_LEN(m0->m_pkthdr.l3hlen); 4598 ctrl1 |= V_TXPKT_CSUM_TYPE(csum_type); 4599 } else 4600 ctrl1 |= F_TXPKT_L4CSUM_DIS; 4601 if (m0->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | 4602 CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) 4603 txq->txcsum++; /* some hardware assistance provided */ 4604 4605 /* VLAN tag insertion */ 4606 if (needs_vlan_insertion(m0)) { 4607 ctrl1 |= F_TXPKT_VLAN_VLD | 4608 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 4609 txq->vlan_insertion++; 4610 } 4611 4612 /* CPL header */ 4613 cpl->ctrl0 = txq->cpl_ctrl0; 4614 cpl->pack = 0; 4615 cpl->len = htobe16(pktlen); 4616 cpl->ctrl1 = htobe64(ctrl1); 4617 4618 /* SGL */ 4619 dst = (void *)(cpl + 1); 4620 4621 /* 4622 * A packet using TSO will use up an entire descriptor for the 4623 * firmware work request header, LSO CPL, and TX_PKT_XT CPL. 4624 * If this descriptor is the last descriptor in the ring, wrap 4625 * around to the front of the ring explicitly for the start of 4626 * the sgl. 4627 */ 4628 if (dst == (void *)&eq->desc[eq->sidx]) { 4629 dst = (void *)&eq->desc[0]; 4630 write_gl_to_txd(txq, m0, &dst, 0); 4631 } else 4632 write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); 4633 txq->sgl_wrs++; 4634 4635 txq->txpkt_wrs++; 4636 4637 txsd = &txq->sdesc[eq->pidx]; 4638 txsd->m = m0; 4639 txsd->desc_used = ndesc; 4640 4641 return (ndesc); 4642 } 4643 4644 /* 4645 * Write a raw WR to the hardware descriptors, update the software 4646 * descriptor, and advance the pidx. It is guaranteed that enough 4647 * descriptors are available. 4648 * 4649 * The return value is the # of hardware descriptors used. 4650 */ 4651 static u_int 4652 write_raw_wr(struct sge_txq *txq, void *wr, struct mbuf *m0, u_int available) 4653 { 4654 struct sge_eq *eq = &txq->eq; 4655 struct tx_sdesc *txsd; 4656 struct mbuf *m; 4657 caddr_t dst; 4658 int len16, ndesc; 4659 4660 len16 = mbuf_len16(m0); 4661 ndesc = howmany(len16, EQ_ESIZE / 16); 4662 MPASS(ndesc <= available); 4663 4664 dst = wr; 4665 for (m = m0; m != NULL; m = m->m_next) 4666 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 4667 4668 txq->raw_wrs++; 4669 4670 txsd = &txq->sdesc[eq->pidx]; 4671 txsd->m = m0; 4672 txsd->desc_used = ndesc; 4673 4674 return (ndesc); 4675 } 4676 4677 /* 4678 * Write a txpkt WR for this packet to the hardware descriptors, update the 4679 * software descriptor, and advance the pidx. It is guaranteed that enough 4680 * descriptors are available. 4681 * 4682 * The return value is the # of hardware descriptors used. 4683 */ 4684 static u_int 4685 write_txpkt_wr(struct sge_txq *txq, struct fw_eth_tx_pkt_wr *wr, 4686 struct mbuf *m0, u_int available) 4687 { 4688 struct sge_eq *eq = &txq->eq; 4689 struct tx_sdesc *txsd; 4690 struct cpl_tx_pkt_core *cpl; 4691 uint32_t ctrl; /* used in many unrelated places */ 4692 uint64_t ctrl1; 4693 int len16, ndesc, pktlen, nsegs; 4694 caddr_t dst; 4695 4696 TXQ_LOCK_ASSERT_OWNED(txq); 4697 M_ASSERTPKTHDR(m0); 4698 MPASS(available > 0 && available < eq->sidx); 4699 4700 len16 = mbuf_len16(m0); 4701 nsegs = mbuf_nsegs(m0); 4702 pktlen = m0->m_pkthdr.len; 4703 ctrl = sizeof(struct cpl_tx_pkt_core); 4704 if (needs_tso(m0)) 4705 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 4706 else if (pktlen <= imm_payload(2) && available >= 2) { 4707 /* Immediate data. Recalculate len16 and set nsegs to 0. */ 4708 ctrl += pktlen; 4709 len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + 4710 sizeof(struct cpl_tx_pkt_core) + pktlen, 16); 4711 nsegs = 0; 4712 } 4713 ndesc = howmany(len16, EQ_ESIZE / 16); 4714 MPASS(ndesc <= available); 4715 4716 /* Firmware work request header */ 4717 MPASS(wr == (void *)&eq->desc[eq->pidx]); 4718 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | 4719 V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); 4720 4721 ctrl = V_FW_WR_LEN16(len16); 4722 wr->equiq_to_len16 = htobe32(ctrl); 4723 wr->r3 = 0; 4724 4725 if (needs_tso(m0)) { 4726 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); 4727 4728 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 4729 m0->m_pkthdr.l4hlen > 0, 4730 ("%s: mbuf %p needs TSO but missing header lengths", 4731 __func__, m0)); 4732 4733 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | 4734 F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) 4735 | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); 4736 if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header)) 4737 ctrl |= V_LSO_ETHHDR_LEN(1); 4738 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 4739 ctrl |= F_LSO_IPV6; 4740 4741 lso->lso_ctrl = htobe32(ctrl); 4742 lso->ipid_ofst = htobe16(0); 4743 lso->mss = htobe16(m0->m_pkthdr.tso_segsz); 4744 lso->seqno_offset = htobe32(0); 4745 lso->len = htobe32(pktlen); 4746 4747 cpl = (void *)(lso + 1); 4748 4749 txq->tso_wrs++; 4750 } else 4751 cpl = (void *)(wr + 1); 4752 4753 /* Checksum offload */ 4754 ctrl1 = 0; 4755 if (needs_l3_csum(m0) == 0) 4756 ctrl1 |= F_TXPKT_IPCSUM_DIS; 4757 if (needs_l4_csum(m0) == 0) 4758 ctrl1 |= F_TXPKT_L4CSUM_DIS; 4759 if (m0->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | 4760 CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) 4761 txq->txcsum++; /* some hardware assistance provided */ 4762 4763 /* VLAN tag insertion */ 4764 if (needs_vlan_insertion(m0)) { 4765 ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 4766 txq->vlan_insertion++; 4767 } 4768 4769 /* CPL header */ 4770 cpl->ctrl0 = txq->cpl_ctrl0; 4771 cpl->pack = 0; 4772 cpl->len = htobe16(pktlen); 4773 cpl->ctrl1 = htobe64(ctrl1); 4774 4775 /* SGL */ 4776 dst = (void *)(cpl + 1); 4777 if (nsegs > 0) { 4778 4779 write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); 4780 txq->sgl_wrs++; 4781 } else { 4782 struct mbuf *m; 4783 4784 for (m = m0; m != NULL; m = m->m_next) { 4785 copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); 4786 #ifdef INVARIANTS 4787 pktlen -= m->m_len; 4788 #endif 4789 } 4790 #ifdef INVARIANTS 4791 KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen)); 4792 #endif 4793 txq->imm_wrs++; 4794 } 4795 4796 txq->txpkt_wrs++; 4797 4798 txsd = &txq->sdesc[eq->pidx]; 4799 txsd->m = m0; 4800 txsd->desc_used = ndesc; 4801 4802 return (ndesc); 4803 } 4804 4805 static int 4806 try_txpkts(struct mbuf *m, struct mbuf *n, struct txpkts *txp, u_int available) 4807 { 4808 u_int needed, nsegs1, nsegs2, l1, l2; 4809 4810 if (cannot_use_txpkts(m) || cannot_use_txpkts(n)) 4811 return (1); 4812 4813 nsegs1 = mbuf_nsegs(m); 4814 nsegs2 = mbuf_nsegs(n); 4815 if (nsegs1 + nsegs2 == 2) { 4816 txp->wr_type = 1; 4817 l1 = l2 = txpkts1_len16(); 4818 } else { 4819 txp->wr_type = 0; 4820 l1 = txpkts0_len16(nsegs1); 4821 l2 = txpkts0_len16(nsegs2); 4822 } 4823 txp->len16 = howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + l1 + l2; 4824 needed = howmany(txp->len16, EQ_ESIZE / 16); 4825 if (needed > SGE_MAX_WR_NDESC || needed > available) 4826 return (1); 4827 4828 txp->plen = m->m_pkthdr.len + n->m_pkthdr.len; 4829 if (txp->plen > 65535) 4830 return (1); 4831 4832 txp->npkt = 2; 4833 set_mbuf_len16(m, l1); 4834 set_mbuf_len16(n, l2); 4835 4836 return (0); 4837 } 4838 4839 static int 4840 add_to_txpkts(struct mbuf *m, struct txpkts *txp, u_int available) 4841 { 4842 u_int plen, len16, needed, nsegs; 4843 4844 MPASS(txp->wr_type == 0 || txp->wr_type == 1); 4845 4846 if (cannot_use_txpkts(m)) 4847 return (1); 4848 4849 nsegs = mbuf_nsegs(m); 4850 if (txp->wr_type == 1 && nsegs != 1) 4851 return (1); 4852 4853 plen = txp->plen + m->m_pkthdr.len; 4854 if (plen > 65535) 4855 return (1); 4856 4857 if (txp->wr_type == 0) 4858 len16 = txpkts0_len16(nsegs); 4859 else 4860 len16 = txpkts1_len16(); 4861 needed = howmany(txp->len16 + len16, EQ_ESIZE / 16); 4862 if (needed > SGE_MAX_WR_NDESC || needed > available) 4863 return (1); 4864 4865 txp->npkt++; 4866 txp->plen = plen; 4867 txp->len16 += len16; 4868 set_mbuf_len16(m, len16); 4869 4870 return (0); 4871 } 4872 4873 /* 4874 * Write a txpkts WR for the packets in txp to the hardware descriptors, update 4875 * the software descriptor, and advance the pidx. It is guaranteed that enough 4876 * descriptors are available. 4877 * 4878 * The return value is the # of hardware descriptors used. 4879 */ 4880 static u_int 4881 write_txpkts_wr(struct sge_txq *txq, struct fw_eth_tx_pkts_wr *wr, 4882 struct mbuf *m0, const struct txpkts *txp, u_int available) 4883 { 4884 struct sge_eq *eq = &txq->eq; 4885 struct tx_sdesc *txsd; 4886 struct cpl_tx_pkt_core *cpl; 4887 uint32_t ctrl; 4888 uint64_t ctrl1; 4889 int ndesc, checkwrap; 4890 struct mbuf *m; 4891 void *flitp; 4892 4893 TXQ_LOCK_ASSERT_OWNED(txq); 4894 MPASS(txp->npkt > 0); 4895 MPASS(txp->plen < 65536); 4896 MPASS(m0 != NULL); 4897 MPASS(m0->m_nextpkt != NULL); 4898 MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16)); 4899 MPASS(available > 0 && available < eq->sidx); 4900 4901 ndesc = howmany(txp->len16, EQ_ESIZE / 16); 4902 MPASS(ndesc <= available); 4903 4904 MPASS(wr == (void *)&eq->desc[eq->pidx]); 4905 wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); 4906 ctrl = V_FW_WR_LEN16(txp->len16); 4907 wr->equiq_to_len16 = htobe32(ctrl); 4908 wr->plen = htobe16(txp->plen); 4909 wr->npkt = txp->npkt; 4910 wr->r3 = 0; 4911 wr->type = txp->wr_type; 4912 flitp = wr + 1; 4913 4914 /* 4915 * At this point we are 16B into a hardware descriptor. If checkwrap is 4916 * set then we know the WR is going to wrap around somewhere. We'll 4917 * check for that at appropriate points. 4918 */ 4919 checkwrap = eq->sidx - ndesc < eq->pidx; 4920 for (m = m0; m != NULL; m = m->m_nextpkt) { 4921 if (txp->wr_type == 0) { 4922 struct ulp_txpkt *ulpmc; 4923 struct ulptx_idata *ulpsc; 4924 4925 /* ULP master command */ 4926 ulpmc = flitp; 4927 ulpmc->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) | 4928 V_ULP_TXPKT_DEST(0) | V_ULP_TXPKT_FID(eq->iqid)); 4929 ulpmc->len = htobe32(mbuf_len16(m)); 4930 4931 /* ULP subcommand */ 4932 ulpsc = (void *)(ulpmc + 1); 4933 ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) | 4934 F_ULP_TX_SC_MORE); 4935 ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core)); 4936 4937 cpl = (void *)(ulpsc + 1); 4938 if (checkwrap && 4939 (uintptr_t)cpl == (uintptr_t)&eq->desc[eq->sidx]) 4940 cpl = (void *)&eq->desc[0]; 4941 } else { 4942 cpl = flitp; 4943 } 4944 4945 /* Checksum offload */ 4946 ctrl1 = 0; 4947 if (needs_l3_csum(m) == 0) 4948 ctrl1 |= F_TXPKT_IPCSUM_DIS; 4949 if (needs_l4_csum(m) == 0) 4950 ctrl1 |= F_TXPKT_L4CSUM_DIS; 4951 if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | 4952 CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) 4953 txq->txcsum++; /* some hardware assistance provided */ 4954 4955 /* VLAN tag insertion */ 4956 if (needs_vlan_insertion(m)) { 4957 ctrl1 |= F_TXPKT_VLAN_VLD | 4958 V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); 4959 txq->vlan_insertion++; 4960 } 4961 4962 /* CPL header */ 4963 cpl->ctrl0 = txq->cpl_ctrl0; 4964 cpl->pack = 0; 4965 cpl->len = htobe16(m->m_pkthdr.len); 4966 cpl->ctrl1 = htobe64(ctrl1); 4967 4968 flitp = cpl + 1; 4969 if (checkwrap && 4970 (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx]) 4971 flitp = (void *)&eq->desc[0]; 4972 4973 write_gl_to_txd(txq, m, (caddr_t *)(&flitp), checkwrap); 4974 4975 } 4976 4977 if (txp->wr_type == 0) { 4978 txq->txpkts0_pkts += txp->npkt; 4979 txq->txpkts0_wrs++; 4980 } else { 4981 txq->txpkts1_pkts += txp->npkt; 4982 txq->txpkts1_wrs++; 4983 } 4984 4985 txsd = &txq->sdesc[eq->pidx]; 4986 txsd->m = m0; 4987 txsd->desc_used = ndesc; 4988 4989 return (ndesc); 4990 } 4991 4992 /* 4993 * If the SGL ends on an address that is not 16 byte aligned, this function will 4994 * add a 0 filled flit at the end. 4995 */ 4996 static void 4997 write_gl_to_txd(struct sge_txq *txq, struct mbuf *m, caddr_t *to, int checkwrap) 4998 { 4999 struct sge_eq *eq = &txq->eq; 5000 struct sglist *gl = txq->gl; 5001 struct sglist_seg *seg; 5002 __be64 *flitp, *wrap; 5003 struct ulptx_sgl *usgl; 5004 int i, nflits, nsegs; 5005 5006 KASSERT(((uintptr_t)(*to) & 0xf) == 0, 5007 ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to)); 5008 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); 5009 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); 5010 5011 get_pkt_gl(m, gl); 5012 nsegs = gl->sg_nseg; 5013 MPASS(nsegs > 0); 5014 5015 nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2; 5016 flitp = (__be64 *)(*to); 5017 wrap = (__be64 *)(&eq->desc[eq->sidx]); 5018 seg = &gl->sg_segs[0]; 5019 usgl = (void *)flitp; 5020 5021 /* 5022 * We start at a 16 byte boundary somewhere inside the tx descriptor 5023 * ring, so we're at least 16 bytes away from the status page. There is 5024 * no chance of a wrap around in the middle of usgl (which is 16 bytes). 5025 */ 5026 5027 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 5028 V_ULPTX_NSGE(nsegs)); 5029 usgl->len0 = htobe32(seg->ss_len); 5030 usgl->addr0 = htobe64(seg->ss_paddr); 5031 seg++; 5032 5033 if (checkwrap == 0 || (uintptr_t)(flitp + nflits) <= (uintptr_t)wrap) { 5034 5035 /* Won't wrap around at all */ 5036 5037 for (i = 0; i < nsegs - 1; i++, seg++) { 5038 usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len); 5039 usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr); 5040 } 5041 if (i & 1) 5042 usgl->sge[i / 2].len[1] = htobe32(0); 5043 flitp += nflits; 5044 } else { 5045 5046 /* Will wrap somewhere in the rest of the SGL */ 5047 5048 /* 2 flits already written, write the rest flit by flit */ 5049 flitp = (void *)(usgl + 1); 5050 for (i = 0; i < nflits - 2; i++) { 5051 if (flitp == wrap) 5052 flitp = (void *)eq->desc; 5053 *flitp++ = get_flit(seg, nsegs - 1, i); 5054 } 5055 } 5056 5057 if (nflits & 1) { 5058 MPASS(((uintptr_t)flitp) & 0xf); 5059 *flitp++ = 0; 5060 } 5061 5062 MPASS((((uintptr_t)flitp) & 0xf) == 0); 5063 if (__predict_false(flitp == wrap)) 5064 *to = (void *)eq->desc; 5065 else 5066 *to = (void *)flitp; 5067 } 5068 5069 static inline void 5070 copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) 5071 { 5072 5073 MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); 5074 MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); 5075 5076 if (__predict_true((uintptr_t)(*to) + len <= 5077 (uintptr_t)&eq->desc[eq->sidx])) { 5078 bcopy(from, *to, len); 5079 (*to) += len; 5080 } else { 5081 int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to); 5082 5083 bcopy(from, *to, portion); 5084 from += portion; 5085 portion = len - portion; /* remaining */ 5086 bcopy(from, (void *)eq->desc, portion); 5087 (*to) = (caddr_t)eq->desc + portion; 5088 } 5089 } 5090 5091 static inline void 5092 ring_eq_db(struct adapter *sc, struct sge_eq *eq, u_int n) 5093 { 5094 u_int db; 5095 5096 MPASS(n > 0); 5097 5098 db = eq->doorbells; 5099 if (n > 1) 5100 clrbit(&db, DOORBELL_WCWR); 5101 wmb(); 5102 5103 switch (ffs(db) - 1) { 5104 case DOORBELL_UDB: 5105 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); 5106 break; 5107 5108 case DOORBELL_WCWR: { 5109 volatile uint64_t *dst, *src; 5110 int i; 5111 5112 /* 5113 * Queues whose 128B doorbell segment fits in the page do not 5114 * use relative qid (udb_qid is always 0). Only queues with 5115 * doorbell segments can do WCWR. 5116 */ 5117 KASSERT(eq->udb_qid == 0 && n == 1, 5118 ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p", 5119 __func__, eq->doorbells, n, eq->dbidx, eq)); 5120 5121 dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET - 5122 UDBS_DB_OFFSET); 5123 i = eq->dbidx; 5124 src = (void *)&eq->desc[i]; 5125 while (src != (void *)&eq->desc[i + 1]) 5126 *dst++ = *src++; 5127 wmb(); 5128 break; 5129 } 5130 5131 case DOORBELL_UDBWC: 5132 *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); 5133 wmb(); 5134 break; 5135 5136 case DOORBELL_KDB: 5137 t4_write_reg(sc, sc->sge_kdoorbell_reg, 5138 V_QID(eq->cntxt_id) | V_PIDX(n)); 5139 break; 5140 } 5141 5142 IDXINCR(eq->dbidx, n, eq->sidx); 5143 } 5144 5145 static inline u_int 5146 reclaimable_tx_desc(struct sge_eq *eq) 5147 { 5148 uint16_t hw_cidx; 5149 5150 hw_cidx = read_hw_cidx(eq); 5151 return (IDXDIFF(hw_cidx, eq->cidx, eq->sidx)); 5152 } 5153 5154 static inline u_int 5155 total_available_tx_desc(struct sge_eq *eq) 5156 { 5157 uint16_t hw_cidx, pidx; 5158 5159 hw_cidx = read_hw_cidx(eq); 5160 pidx = eq->pidx; 5161 5162 if (pidx == hw_cidx) 5163 return (eq->sidx - 1); 5164 else 5165 return (IDXDIFF(hw_cidx, pidx, eq->sidx) - 1); 5166 } 5167 5168 static inline uint16_t 5169 read_hw_cidx(struct sge_eq *eq) 5170 { 5171 struct sge_qstat *spg = (void *)&eq->desc[eq->sidx]; 5172 uint16_t cidx = spg->cidx; /* stable snapshot */ 5173 5174 return (be16toh(cidx)); 5175 } 5176 5177 /* 5178 * Reclaim 'n' descriptors approximately. 5179 */ 5180 static u_int 5181 reclaim_tx_descs(struct sge_txq *txq, u_int n) 5182 { 5183 struct tx_sdesc *txsd; 5184 struct sge_eq *eq = &txq->eq; 5185 u_int can_reclaim, reclaimed; 5186 5187 TXQ_LOCK_ASSERT_OWNED(txq); 5188 MPASS(n > 0); 5189 5190 reclaimed = 0; 5191 can_reclaim = reclaimable_tx_desc(eq); 5192 while (can_reclaim && reclaimed < n) { 5193 int ndesc; 5194 struct mbuf *m, *nextpkt; 5195 5196 txsd = &txq->sdesc[eq->cidx]; 5197 ndesc = txsd->desc_used; 5198 5199 /* Firmware doesn't return "partial" credits. */ 5200 KASSERT(can_reclaim >= ndesc, 5201 ("%s: unexpected number of credits: %d, %d", 5202 __func__, can_reclaim, ndesc)); 5203 KASSERT(ndesc != 0, 5204 ("%s: descriptor with no credits: cidx %d", 5205 __func__, eq->cidx)); 5206 5207 for (m = txsd->m; m != NULL; m = nextpkt) { 5208 nextpkt = m->m_nextpkt; 5209 m->m_nextpkt = NULL; 5210 m_freem(m); 5211 } 5212 reclaimed += ndesc; 5213 can_reclaim -= ndesc; 5214 IDXINCR(eq->cidx, ndesc, eq->sidx); 5215 } 5216 5217 return (reclaimed); 5218 } 5219 5220 static void 5221 tx_reclaim(void *arg, int n) 5222 { 5223 struct sge_txq *txq = arg; 5224 struct sge_eq *eq = &txq->eq; 5225 5226 do { 5227 if (TXQ_TRYLOCK(txq) == 0) 5228 break; 5229 n = reclaim_tx_descs(txq, 32); 5230 if (eq->cidx == eq->pidx) 5231 eq->equeqidx = eq->pidx; 5232 TXQ_UNLOCK(txq); 5233 } while (n > 0); 5234 } 5235 5236 static __be64 5237 get_flit(struct sglist_seg *segs, int nsegs, int idx) 5238 { 5239 int i = (idx / 3) * 2; 5240 5241 switch (idx % 3) { 5242 case 0: { 5243 uint64_t rc; 5244 5245 rc = (uint64_t)segs[i].ss_len << 32; 5246 if (i + 1 < nsegs) 5247 rc |= (uint64_t)(segs[i + 1].ss_len); 5248 5249 return (htobe64(rc)); 5250 } 5251 case 1: 5252 return (htobe64(segs[i].ss_paddr)); 5253 case 2: 5254 return (htobe64(segs[i + 1].ss_paddr)); 5255 } 5256 5257 return (0); 5258 } 5259 5260 static void 5261 find_best_refill_source(struct adapter *sc, struct sge_fl *fl, int maxp) 5262 { 5263 int8_t zidx, hwidx, idx; 5264 uint16_t region1, region3; 5265 int spare, spare_needed, n; 5266 struct sw_zone_info *swz; 5267 struct hw_buf_info *hwb, *hwb_list = &sc->sge.hw_buf_info[0]; 5268 5269 /* 5270 * Buffer Packing: Look for PAGE_SIZE or larger zone which has a bufsize 5271 * large enough for the max payload and cluster metadata. Otherwise 5272 * settle for the largest bufsize that leaves enough room in the cluster 5273 * for metadata. 5274 * 5275 * Without buffer packing: Look for the smallest zone which has a 5276 * bufsize large enough for the max payload. Settle for the largest 5277 * bufsize available if there's nothing big enough for max payload. 5278 */ 5279 spare_needed = fl->flags & FL_BUF_PACKING ? CL_METADATA_SIZE : 0; 5280 swz = &sc->sge.sw_zone_info[0]; 5281 hwidx = -1; 5282 for (zidx = 0; zidx < SW_ZONE_SIZES; zidx++, swz++) { 5283 if (swz->size > largest_rx_cluster) { 5284 if (__predict_true(hwidx != -1)) 5285 break; 5286 5287 /* 5288 * This is a misconfiguration. largest_rx_cluster is 5289 * preventing us from finding a refill source. See 5290 * dev.t5nex.<n>.buffer_sizes to figure out why. 5291 */ 5292 device_printf(sc->dev, "largest_rx_cluster=%u leaves no" 5293 " refill source for fl %p (dma %u). Ignored.\n", 5294 largest_rx_cluster, fl, maxp); 5295 } 5296 for (idx = swz->head_hwidx; idx != -1; idx = hwb->next) { 5297 hwb = &hwb_list[idx]; 5298 spare = swz->size - hwb->size; 5299 if (spare < spare_needed) 5300 continue; 5301 5302 hwidx = idx; /* best option so far */ 5303 if (hwb->size >= maxp) { 5304 5305 if ((fl->flags & FL_BUF_PACKING) == 0) 5306 goto done; /* stop looking (not packing) */ 5307 5308 if (swz->size >= safest_rx_cluster) 5309 goto done; /* stop looking (packing) */ 5310 } 5311 break; /* keep looking, next zone */ 5312 } 5313 } 5314 done: 5315 /* A usable hwidx has been located. */ 5316 MPASS(hwidx != -1); 5317 hwb = &hwb_list[hwidx]; 5318 zidx = hwb->zidx; 5319 swz = &sc->sge.sw_zone_info[zidx]; 5320 region1 = 0; 5321 region3 = swz->size - hwb->size; 5322 5323 /* 5324 * Stay within this zone and see if there is a better match when mbuf 5325 * inlining is allowed. Remember that the hwidx's are sorted in 5326 * decreasing order of size (so in increasing order of spare area). 5327 */ 5328 for (idx = hwidx; idx != -1; idx = hwb->next) { 5329 hwb = &hwb_list[idx]; 5330 spare = swz->size - hwb->size; 5331 5332 if (allow_mbufs_in_cluster == 0 || hwb->size < maxp) 5333 break; 5334 5335 /* 5336 * Do not inline mbufs if doing so would violate the pad/pack 5337 * boundary alignment requirement. 5338 */ 5339 if (fl_pad && (MSIZE % sc->params.sge.pad_boundary) != 0) 5340 continue; 5341 if (fl->flags & FL_BUF_PACKING && 5342 (MSIZE % sc->params.sge.pack_boundary) != 0) 5343 continue; 5344 5345 if (spare < CL_METADATA_SIZE + MSIZE) 5346 continue; 5347 n = (spare - CL_METADATA_SIZE) / MSIZE; 5348 if (n > howmany(hwb->size, maxp)) 5349 break; 5350 5351 hwidx = idx; 5352 if (fl->flags & FL_BUF_PACKING) { 5353 region1 = n * MSIZE; 5354 region3 = spare - region1; 5355 } else { 5356 region1 = MSIZE; 5357 region3 = spare - region1; 5358 break; 5359 } 5360 } 5361 5362 KASSERT(zidx >= 0 && zidx < SW_ZONE_SIZES, 5363 ("%s: bad zone %d for fl %p, maxp %d", __func__, zidx, fl, maxp)); 5364 KASSERT(hwidx >= 0 && hwidx <= SGE_FLBUF_SIZES, 5365 ("%s: bad hwidx %d for fl %p, maxp %d", __func__, hwidx, fl, maxp)); 5366 KASSERT(region1 + sc->sge.hw_buf_info[hwidx].size + region3 == 5367 sc->sge.sw_zone_info[zidx].size, 5368 ("%s: bad buffer layout for fl %p, maxp %d. " 5369 "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, 5370 sc->sge.sw_zone_info[zidx].size, region1, 5371 sc->sge.hw_buf_info[hwidx].size, region3)); 5372 if (fl->flags & FL_BUF_PACKING || region1 > 0) { 5373 KASSERT(region3 >= CL_METADATA_SIZE, 5374 ("%s: no room for metadata. fl %p, maxp %d; " 5375 "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, 5376 sc->sge.sw_zone_info[zidx].size, region1, 5377 sc->sge.hw_buf_info[hwidx].size, region3)); 5378 KASSERT(region1 % MSIZE == 0, 5379 ("%s: bad mbuf region for fl %p, maxp %d. " 5380 "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, 5381 sc->sge.sw_zone_info[zidx].size, region1, 5382 sc->sge.hw_buf_info[hwidx].size, region3)); 5383 } 5384 5385 fl->cll_def.zidx = zidx; 5386 fl->cll_def.hwidx = hwidx; 5387 fl->cll_def.region1 = region1; 5388 fl->cll_def.region3 = region3; 5389 } 5390 5391 static void 5392 find_safe_refill_source(struct adapter *sc, struct sge_fl *fl) 5393 { 5394 struct sge *s = &sc->sge; 5395 struct hw_buf_info *hwb; 5396 struct sw_zone_info *swz; 5397 int spare; 5398 int8_t hwidx; 5399 5400 if (fl->flags & FL_BUF_PACKING) 5401 hwidx = s->safe_hwidx2; /* with room for metadata */ 5402 else if (allow_mbufs_in_cluster && s->safe_hwidx2 != -1) { 5403 hwidx = s->safe_hwidx2; 5404 hwb = &s->hw_buf_info[hwidx]; 5405 swz = &s->sw_zone_info[hwb->zidx]; 5406 spare = swz->size - hwb->size; 5407 5408 /* no good if there isn't room for an mbuf as well */ 5409 if (spare < CL_METADATA_SIZE + MSIZE) 5410 hwidx = s->safe_hwidx1; 5411 } else 5412 hwidx = s->safe_hwidx1; 5413 5414 if (hwidx == -1) { 5415 /* No fallback source */ 5416 fl->cll_alt.hwidx = -1; 5417 fl->cll_alt.zidx = -1; 5418 5419 return; 5420 } 5421 5422 hwb = &s->hw_buf_info[hwidx]; 5423 swz = &s->sw_zone_info[hwb->zidx]; 5424 spare = swz->size - hwb->size; 5425 fl->cll_alt.hwidx = hwidx; 5426 fl->cll_alt.zidx = hwb->zidx; 5427 if (allow_mbufs_in_cluster && 5428 (fl_pad == 0 || (MSIZE % sc->params.sge.pad_boundary) == 0)) 5429 fl->cll_alt.region1 = ((spare - CL_METADATA_SIZE) / MSIZE) * MSIZE; 5430 else 5431 fl->cll_alt.region1 = 0; 5432 fl->cll_alt.region3 = spare - fl->cll_alt.region1; 5433 } 5434 5435 static void 5436 add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl) 5437 { 5438 mtx_lock(&sc->sfl_lock); 5439 FL_LOCK(fl); 5440 if ((fl->flags & FL_DOOMED) == 0) { 5441 fl->flags |= FL_STARVING; 5442 TAILQ_INSERT_TAIL(&sc->sfl, fl, link); 5443 callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc); 5444 } 5445 FL_UNLOCK(fl); 5446 mtx_unlock(&sc->sfl_lock); 5447 } 5448 5449 static void 5450 handle_wrq_egr_update(struct adapter *sc, struct sge_eq *eq) 5451 { 5452 struct sge_wrq *wrq = (void *)eq; 5453 5454 atomic_readandclear_int(&eq->equiq); 5455 taskqueue_enqueue(sc->tq[eq->tx_chan], &wrq->wrq_tx_task); 5456 } 5457 5458 static void 5459 handle_eth_egr_update(struct adapter *sc, struct sge_eq *eq) 5460 { 5461 struct sge_txq *txq = (void *)eq; 5462 5463 MPASS((eq->flags & EQ_TYPEMASK) == EQ_ETH); 5464 5465 atomic_readandclear_int(&eq->equiq); 5466 mp_ring_check_drainage(txq->r, 0); 5467 taskqueue_enqueue(sc->tq[eq->tx_chan], &txq->tx_reclaim_task); 5468 } 5469 5470 static int 5471 handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss, 5472 struct mbuf *m) 5473 { 5474 const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1); 5475 unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); 5476 struct adapter *sc = iq->adapter; 5477 struct sge *s = &sc->sge; 5478 struct sge_eq *eq; 5479 static void (*h[])(struct adapter *, struct sge_eq *) = {NULL, 5480 &handle_wrq_egr_update, &handle_eth_egr_update, 5481 &handle_wrq_egr_update}; 5482 5483 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 5484 rss->opcode)); 5485 5486 eq = s->eqmap[qid - s->eq_start - s->eq_base]; 5487 (*h[eq->flags & EQ_TYPEMASK])(sc, eq); 5488 5489 return (0); 5490 } 5491 5492 /* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */ 5493 CTASSERT(offsetof(struct cpl_fw4_msg, data) == \ 5494 offsetof(struct cpl_fw6_msg, data)); 5495 5496 static int 5497 handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 5498 { 5499 struct adapter *sc = iq->adapter; 5500 const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); 5501 5502 KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, 5503 rss->opcode)); 5504 5505 if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) { 5506 const struct rss_header *rss2; 5507 5508 rss2 = (const struct rss_header *)&cpl->data[0]; 5509 return (t4_cpl_handler[rss2->opcode](iq, rss2, m)); 5510 } 5511 5512 return (t4_fw_msg_handler[cpl->type](sc, &cpl->data[0])); 5513 } 5514 5515 /** 5516 * t4_handle_wrerr_rpl - process a FW work request error message 5517 * @adap: the adapter 5518 * @rpl: start of the FW message 5519 */ 5520 static int 5521 t4_handle_wrerr_rpl(struct adapter *adap, const __be64 *rpl) 5522 { 5523 u8 opcode = *(const u8 *)rpl; 5524 const struct fw_error_cmd *e = (const void *)rpl; 5525 unsigned int i; 5526 5527 if (opcode != FW_ERROR_CMD) { 5528 log(LOG_ERR, 5529 "%s: Received WRERR_RPL message with opcode %#x\n", 5530 device_get_nameunit(adap->dev), opcode); 5531 return (EINVAL); 5532 } 5533 log(LOG_ERR, "%s: FW_ERROR (%s) ", device_get_nameunit(adap->dev), 5534 G_FW_ERROR_CMD_FATAL(be32toh(e->op_to_type)) ? "fatal" : 5535 "non-fatal"); 5536 switch (G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))) { 5537 case FW_ERROR_TYPE_EXCEPTION: 5538 log(LOG_ERR, "exception info:\n"); 5539 for (i = 0; i < nitems(e->u.exception.info); i++) 5540 log(LOG_ERR, "%s%08x", i == 0 ? "\t" : " ", 5541 be32toh(e->u.exception.info[i])); 5542 log(LOG_ERR, "\n"); 5543 break; 5544 case FW_ERROR_TYPE_HWMODULE: 5545 log(LOG_ERR, "HW module regaddr %08x regval %08x\n", 5546 be32toh(e->u.hwmodule.regaddr), 5547 be32toh(e->u.hwmodule.regval)); 5548 break; 5549 case FW_ERROR_TYPE_WR: 5550 log(LOG_ERR, "WR cidx %d PF %d VF %d eqid %d hdr:\n", 5551 be16toh(e->u.wr.cidx), 5552 G_FW_ERROR_CMD_PFN(be16toh(e->u.wr.pfn_vfn)), 5553 G_FW_ERROR_CMD_VFN(be16toh(e->u.wr.pfn_vfn)), 5554 be32toh(e->u.wr.eqid)); 5555 for (i = 0; i < nitems(e->u.wr.wrhdr); i++) 5556 log(LOG_ERR, "%s%02x", i == 0 ? "\t" : " ", 5557 e->u.wr.wrhdr[i]); 5558 log(LOG_ERR, "\n"); 5559 break; 5560 case FW_ERROR_TYPE_ACL: 5561 log(LOG_ERR, "ACL cidx %d PF %d VF %d eqid %d %s", 5562 be16toh(e->u.acl.cidx), 5563 G_FW_ERROR_CMD_PFN(be16toh(e->u.acl.pfn_vfn)), 5564 G_FW_ERROR_CMD_VFN(be16toh(e->u.acl.pfn_vfn)), 5565 be32toh(e->u.acl.eqid), 5566 G_FW_ERROR_CMD_MV(be16toh(e->u.acl.mv_pkd)) ? "vlanid" : 5567 "MAC"); 5568 for (i = 0; i < nitems(e->u.acl.val); i++) 5569 log(LOG_ERR, " %02x", e->u.acl.val[i]); 5570 log(LOG_ERR, "\n"); 5571 break; 5572 default: 5573 log(LOG_ERR, "type %#x\n", 5574 G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))); 5575 return (EINVAL); 5576 } 5577 return (0); 5578 } 5579 5580 static int 5581 sysctl_uint16(SYSCTL_HANDLER_ARGS) 5582 { 5583 uint16_t *id = arg1; 5584 int i = *id; 5585 5586 return sysctl_handle_int(oidp, &i, 0, req); 5587 } 5588 5589 static int 5590 sysctl_bufsizes(SYSCTL_HANDLER_ARGS) 5591 { 5592 struct sge *s = arg1; 5593 struct hw_buf_info *hwb = &s->hw_buf_info[0]; 5594 struct sw_zone_info *swz = &s->sw_zone_info[0]; 5595 int i, rc; 5596 struct sbuf sb; 5597 char c; 5598 5599 sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND); 5600 for (i = 0; i < SGE_FLBUF_SIZES; i++, hwb++) { 5601 if (hwb->zidx >= 0 && swz[hwb->zidx].size <= largest_rx_cluster) 5602 c = '*'; 5603 else 5604 c = '\0'; 5605 5606 sbuf_printf(&sb, "%u%c ", hwb->size, c); 5607 } 5608 sbuf_trim(&sb); 5609 sbuf_finish(&sb); 5610 rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 5611 sbuf_delete(&sb); 5612 return (rc); 5613 } 5614 5615 #ifdef RATELIMIT 5616 /* 5617 * len16 for a txpkt WR with a GL. Includes the firmware work request header. 5618 */ 5619 static inline u_int 5620 txpkt_eo_len16(u_int nsegs, u_int immhdrs, u_int tso) 5621 { 5622 u_int n; 5623 5624 MPASS(immhdrs > 0); 5625 5626 n = roundup2(sizeof(struct fw_eth_tx_eo_wr) + 5627 sizeof(struct cpl_tx_pkt_core) + immhdrs, 16); 5628 if (__predict_false(nsegs == 0)) 5629 goto done; 5630 5631 nsegs--; /* first segment is part of ulptx_sgl */ 5632 n += sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); 5633 if (tso) 5634 n += sizeof(struct cpl_tx_pkt_lso_core); 5635 5636 done: 5637 return (howmany(n, 16)); 5638 } 5639 5640 #define ETID_FLOWC_NPARAMS 6 5641 #define ETID_FLOWC_LEN (roundup2((sizeof(struct fw_flowc_wr) + \ 5642 ETID_FLOWC_NPARAMS * sizeof(struct fw_flowc_mnemval)), 16)) 5643 #define ETID_FLOWC_LEN16 (howmany(ETID_FLOWC_LEN, 16)) 5644 5645 static int 5646 send_etid_flowc_wr(struct cxgbe_snd_tag *cst, struct port_info *pi, 5647 struct vi_info *vi) 5648 { 5649 struct wrq_cookie cookie; 5650 u_int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN; 5651 struct fw_flowc_wr *flowc; 5652 5653 mtx_assert(&cst->lock, MA_OWNED); 5654 MPASS((cst->flags & (EO_FLOWC_PENDING | EO_FLOWC_RPL_PENDING)) == 5655 EO_FLOWC_PENDING); 5656 5657 flowc = start_wrq_wr(cst->eo_txq, ETID_FLOWC_LEN16, &cookie); 5658 if (__predict_false(flowc == NULL)) 5659 return (ENOMEM); 5660 5661 bzero(flowc, ETID_FLOWC_LEN); 5662 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 5663 V_FW_FLOWC_WR_NPARAMS(ETID_FLOWC_NPARAMS) | V_FW_WR_COMPL(0)); 5664 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(ETID_FLOWC_LEN16) | 5665 V_FW_WR_FLOWID(cst->etid)); 5666 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; 5667 flowc->mnemval[0].val = htobe32(pfvf); 5668 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; 5669 flowc->mnemval[1].val = htobe32(pi->tx_chan); 5670 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; 5671 flowc->mnemval[2].val = htobe32(pi->tx_chan); 5672 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; 5673 flowc->mnemval[3].val = htobe32(cst->iqid); 5674 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_EOSTATE; 5675 flowc->mnemval[4].val = htobe32(FW_FLOWC_MNEM_EOSTATE_ESTABLISHED); 5676 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; 5677 flowc->mnemval[5].val = htobe32(cst->schedcl); 5678 5679 commit_wrq_wr(cst->eo_txq, flowc, &cookie); 5680 5681 cst->flags &= ~EO_FLOWC_PENDING; 5682 cst->flags |= EO_FLOWC_RPL_PENDING; 5683 MPASS(cst->tx_credits >= ETID_FLOWC_LEN16); /* flowc is first WR. */ 5684 cst->tx_credits -= ETID_FLOWC_LEN16; 5685 5686 return (0); 5687 } 5688 5689 #define ETID_FLUSH_LEN16 (howmany(sizeof (struct fw_flowc_wr), 16)) 5690 5691 void 5692 send_etid_flush_wr(struct cxgbe_snd_tag *cst) 5693 { 5694 struct fw_flowc_wr *flowc; 5695 struct wrq_cookie cookie; 5696 5697 mtx_assert(&cst->lock, MA_OWNED); 5698 5699 flowc = start_wrq_wr(cst->eo_txq, ETID_FLUSH_LEN16, &cookie); 5700 if (__predict_false(flowc == NULL)) 5701 CXGBE_UNIMPLEMENTED(__func__); 5702 5703 bzero(flowc, ETID_FLUSH_LEN16 * 16); 5704 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 5705 V_FW_FLOWC_WR_NPARAMS(0) | F_FW_WR_COMPL); 5706 flowc->flowid_len16 = htobe32(V_FW_WR_LEN16(ETID_FLUSH_LEN16) | 5707 V_FW_WR_FLOWID(cst->etid)); 5708 5709 commit_wrq_wr(cst->eo_txq, flowc, &cookie); 5710 5711 cst->flags |= EO_FLUSH_RPL_PENDING; 5712 MPASS(cst->tx_credits >= ETID_FLUSH_LEN16); 5713 cst->tx_credits -= ETID_FLUSH_LEN16; 5714 cst->ncompl++; 5715 } 5716 5717 static void 5718 write_ethofld_wr(struct cxgbe_snd_tag *cst, struct fw_eth_tx_eo_wr *wr, 5719 struct mbuf *m0, int compl) 5720 { 5721 struct cpl_tx_pkt_core *cpl; 5722 uint64_t ctrl1; 5723 uint32_t ctrl; /* used in many unrelated places */ 5724 int len16, pktlen, nsegs, immhdrs; 5725 caddr_t dst; 5726 uintptr_t p; 5727 struct ulptx_sgl *usgl; 5728 struct sglist sg; 5729 struct sglist_seg segs[38]; /* XXX: find real limit. XXX: get off the stack */ 5730 5731 mtx_assert(&cst->lock, MA_OWNED); 5732 M_ASSERTPKTHDR(m0); 5733 KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && 5734 m0->m_pkthdr.l4hlen > 0, 5735 ("%s: ethofld mbuf %p is missing header lengths", __func__, m0)); 5736 5737 len16 = mbuf_eo_len16(m0); 5738 nsegs = mbuf_eo_nsegs(m0); 5739 pktlen = m0->m_pkthdr.len; 5740 ctrl = sizeof(struct cpl_tx_pkt_core); 5741 if (needs_tso(m0)) 5742 ctrl += sizeof(struct cpl_tx_pkt_lso_core); 5743 immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen; 5744 ctrl += immhdrs; 5745 5746 wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_EO_WR) | 5747 V_FW_ETH_TX_EO_WR_IMMDLEN(ctrl) | V_FW_WR_COMPL(!!compl)); 5748 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(len16) | 5749 V_FW_WR_FLOWID(cst->etid)); 5750 wr->r3 = 0; 5751 if (needs_udp_csum(m0)) { 5752 wr->u.udpseg.type = FW_ETH_TX_EO_TYPE_UDPSEG; 5753 wr->u.udpseg.ethlen = m0->m_pkthdr.l2hlen; 5754 wr->u.udpseg.iplen = htobe16(m0->m_pkthdr.l3hlen); 5755 wr->u.udpseg.udplen = m0->m_pkthdr.l4hlen; 5756 wr->u.udpseg.rtplen = 0; 5757 wr->u.udpseg.r4 = 0; 5758 wr->u.udpseg.mss = htobe16(pktlen - immhdrs); 5759 wr->u.udpseg.schedpktsize = wr->u.udpseg.mss; 5760 wr->u.udpseg.plen = htobe32(pktlen - immhdrs); 5761 cpl = (void *)(wr + 1); 5762 } else { 5763 MPASS(needs_tcp_csum(m0)); 5764 wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG; 5765 wr->u.tcpseg.ethlen = m0->m_pkthdr.l2hlen; 5766 wr->u.tcpseg.iplen = htobe16(m0->m_pkthdr.l3hlen); 5767 wr->u.tcpseg.tcplen = m0->m_pkthdr.l4hlen; 5768 wr->u.tcpseg.tsclk_tsoff = mbuf_eo_tsclk_tsoff(m0); 5769 wr->u.tcpseg.r4 = 0; 5770 wr->u.tcpseg.r5 = 0; 5771 wr->u.tcpseg.plen = htobe32(pktlen - immhdrs); 5772 5773 if (needs_tso(m0)) { 5774 struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); 5775 5776 wr->u.tcpseg.mss = htobe16(m0->m_pkthdr.tso_segsz); 5777 5778 ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | 5779 F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE | 5780 V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) | 5781 V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); 5782 if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header)) 5783 ctrl |= V_LSO_ETHHDR_LEN(1); 5784 if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) 5785 ctrl |= F_LSO_IPV6; 5786 lso->lso_ctrl = htobe32(ctrl); 5787 lso->ipid_ofst = htobe16(0); 5788 lso->mss = htobe16(m0->m_pkthdr.tso_segsz); 5789 lso->seqno_offset = htobe32(0); 5790 lso->len = htobe32(pktlen); 5791 5792 cpl = (void *)(lso + 1); 5793 } else { 5794 wr->u.tcpseg.mss = htobe16(0xffff); 5795 cpl = (void *)(wr + 1); 5796 } 5797 } 5798 5799 /* Checksum offload must be requested for ethofld. */ 5800 ctrl1 = 0; 5801 MPASS(needs_l4_csum(m0)); 5802 5803 /* VLAN tag insertion */ 5804 if (needs_vlan_insertion(m0)) { 5805 ctrl1 |= F_TXPKT_VLAN_VLD | 5806 V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); 5807 } 5808 5809 /* CPL header */ 5810 cpl->ctrl0 = cst->ctrl0; 5811 cpl->pack = 0; 5812 cpl->len = htobe16(pktlen); 5813 cpl->ctrl1 = htobe64(ctrl1); 5814 5815 /* Copy Ethernet, IP & TCP/UDP hdrs as immediate data */ 5816 p = (uintptr_t)(cpl + 1); 5817 m_copydata(m0, 0, immhdrs, (void *)p); 5818 5819 /* SGL */ 5820 dst = (void *)(cpl + 1); 5821 if (nsegs > 0) { 5822 int i, pad; 5823 5824 /* zero-pad upto next 16Byte boundary, if not 16Byte aligned */ 5825 p += immhdrs; 5826 pad = 16 - (immhdrs & 0xf); 5827 bzero((void *)p, pad); 5828 5829 usgl = (void *)(p + pad); 5830 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 5831 V_ULPTX_NSGE(nsegs)); 5832 5833 sglist_init(&sg, nitems(segs), segs); 5834 for (; m0 != NULL; m0 = m0->m_next) { 5835 if (__predict_false(m0->m_len == 0)) 5836 continue; 5837 if (immhdrs >= m0->m_len) { 5838 immhdrs -= m0->m_len; 5839 continue; 5840 } 5841 5842 sglist_append(&sg, mtod(m0, char *) + immhdrs, 5843 m0->m_len - immhdrs); 5844 immhdrs = 0; 5845 } 5846 MPASS(sg.sg_nseg == nsegs); 5847 5848 /* 5849 * Zero pad last 8B in case the WR doesn't end on a 16B 5850 * boundary. 5851 */ 5852 *(uint64_t *)((char *)wr + len16 * 16 - 8) = 0; 5853 5854 usgl->len0 = htobe32(segs[0].ss_len); 5855 usgl->addr0 = htobe64(segs[0].ss_paddr); 5856 for (i = 0; i < nsegs - 1; i++) { 5857 usgl->sge[i / 2].len[i & 1] = htobe32(segs[i + 1].ss_len); 5858 usgl->sge[i / 2].addr[i & 1] = htobe64(segs[i + 1].ss_paddr); 5859 } 5860 if (i & 1) 5861 usgl->sge[i / 2].len[1] = htobe32(0); 5862 } 5863 5864 } 5865 5866 static void 5867 ethofld_tx(struct cxgbe_snd_tag *cst) 5868 { 5869 struct mbuf *m; 5870 struct wrq_cookie cookie; 5871 int next_credits, compl; 5872 struct fw_eth_tx_eo_wr *wr; 5873 5874 mtx_assert(&cst->lock, MA_OWNED); 5875 5876 while ((m = mbufq_first(&cst->pending_tx)) != NULL) { 5877 M_ASSERTPKTHDR(m); 5878 5879 /* How many len16 credits do we need to send this mbuf. */ 5880 next_credits = mbuf_eo_len16(m); 5881 MPASS(next_credits > 0); 5882 if (next_credits > cst->tx_credits) { 5883 /* 5884 * Tx will make progress eventually because there is at 5885 * least one outstanding fw4_ack that will return 5886 * credits and kick the tx. 5887 */ 5888 MPASS(cst->ncompl > 0); 5889 return; 5890 } 5891 wr = start_wrq_wr(cst->eo_txq, next_credits, &cookie); 5892 if (__predict_false(wr == NULL)) { 5893 /* XXX: wishful thinking, not a real assertion. */ 5894 MPASS(cst->ncompl > 0); 5895 return; 5896 } 5897 cst->tx_credits -= next_credits; 5898 cst->tx_nocompl += next_credits; 5899 compl = cst->ncompl == 0 || cst->tx_nocompl >= cst->tx_total / 2; 5900 ETHER_BPF_MTAP(cst->com.ifp, m); 5901 write_ethofld_wr(cst, wr, m, compl); 5902 commit_wrq_wr(cst->eo_txq, wr, &cookie); 5903 if (compl) { 5904 cst->ncompl++; 5905 cst->tx_nocompl = 0; 5906 } 5907 (void) mbufq_dequeue(&cst->pending_tx); 5908 mbufq_enqueue(&cst->pending_fwack, m); 5909 } 5910 } 5911 5912 int 5913 ethofld_transmit(struct ifnet *ifp, struct mbuf *m0) 5914 { 5915 struct cxgbe_snd_tag *cst; 5916 int rc; 5917 5918 MPASS(m0->m_nextpkt == NULL); 5919 MPASS(m0->m_pkthdr.snd_tag != NULL); 5920 cst = mst_to_cst(m0->m_pkthdr.snd_tag); 5921 5922 mtx_lock(&cst->lock); 5923 MPASS(cst->flags & EO_SND_TAG_REF); 5924 5925 if (__predict_false(cst->flags & EO_FLOWC_PENDING)) { 5926 struct vi_info *vi = ifp->if_softc; 5927 struct port_info *pi = vi->pi; 5928 struct adapter *sc = pi->adapter; 5929 const uint32_t rss_mask = vi->rss_size - 1; 5930 uint32_t rss_hash; 5931 5932 cst->eo_txq = &sc->sge.ofld_txq[vi->first_ofld_txq]; 5933 if (M_HASHTYPE_ISHASH(m0)) 5934 rss_hash = m0->m_pkthdr.flowid; 5935 else 5936 rss_hash = arc4random(); 5937 /* We assume RSS hashing */ 5938 cst->iqid = vi->rss[rss_hash & rss_mask]; 5939 cst->eo_txq += rss_hash % vi->nofldtxq; 5940 rc = send_etid_flowc_wr(cst, pi, vi); 5941 if (rc != 0) 5942 goto done; 5943 } 5944 5945 if (__predict_false(cst->plen + m0->m_pkthdr.len > eo_max_backlog)) { 5946 rc = ENOBUFS; 5947 goto done; 5948 } 5949 5950 mbufq_enqueue(&cst->pending_tx, m0); 5951 cst->plen += m0->m_pkthdr.len; 5952 5953 ethofld_tx(cst); 5954 rc = 0; 5955 done: 5956 mtx_unlock(&cst->lock); 5957 if (__predict_false(rc != 0)) 5958 m_freem(m0); 5959 return (rc); 5960 } 5961 5962 static int 5963 ethofld_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) 5964 { 5965 struct adapter *sc = iq->adapter; 5966 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 5967 struct mbuf *m; 5968 u_int etid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 5969 struct cxgbe_snd_tag *cst; 5970 uint8_t credits = cpl->credits; 5971 5972 cst = lookup_etid(sc, etid); 5973 mtx_lock(&cst->lock); 5974 if (__predict_false(cst->flags & EO_FLOWC_RPL_PENDING)) { 5975 MPASS(credits >= ETID_FLOWC_LEN16); 5976 credits -= ETID_FLOWC_LEN16; 5977 cst->flags &= ~EO_FLOWC_RPL_PENDING; 5978 } 5979 5980 KASSERT(cst->ncompl > 0, 5981 ("%s: etid %u (%p) wasn't expecting completion.", 5982 __func__, etid, cst)); 5983 cst->ncompl--; 5984 5985 while (credits > 0) { 5986 m = mbufq_dequeue(&cst->pending_fwack); 5987 if (__predict_false(m == NULL)) { 5988 /* 5989 * The remaining credits are for the final flush that 5990 * was issued when the tag was freed by the kernel. 5991 */ 5992 MPASS((cst->flags & 5993 (EO_FLUSH_RPL_PENDING | EO_SND_TAG_REF)) == 5994 EO_FLUSH_RPL_PENDING); 5995 MPASS(credits == ETID_FLUSH_LEN16); 5996 MPASS(cst->tx_credits + cpl->credits == cst->tx_total); 5997 MPASS(cst->ncompl == 0); 5998 5999 cst->flags &= ~EO_FLUSH_RPL_PENDING; 6000 cst->tx_credits += cpl->credits; 6001 freetag: 6002 cxgbe_snd_tag_free_locked(cst); 6003 return (0); /* cst is gone. */ 6004 } 6005 KASSERT(m != NULL, 6006 ("%s: too many credits (%u, %u)", __func__, cpl->credits, 6007 credits)); 6008 KASSERT(credits >= mbuf_eo_len16(m), 6009 ("%s: too few credits (%u, %u, %u)", __func__, 6010 cpl->credits, credits, mbuf_eo_len16(m))); 6011 credits -= mbuf_eo_len16(m); 6012 cst->plen -= m->m_pkthdr.len; 6013 m_freem(m); 6014 } 6015 6016 cst->tx_credits += cpl->credits; 6017 MPASS(cst->tx_credits <= cst->tx_total); 6018 6019 m = mbufq_first(&cst->pending_tx); 6020 if (m != NULL && cst->tx_credits >= mbuf_eo_len16(m)) 6021 ethofld_tx(cst); 6022 6023 if (__predict_false((cst->flags & EO_SND_TAG_REF) == 0) && 6024 cst->ncompl == 0) { 6025 if (cst->tx_credits == cst->tx_total) 6026 goto freetag; 6027 else { 6028 MPASS((cst->flags & EO_FLUSH_RPL_PENDING) == 0); 6029 send_etid_flush_wr(cst); 6030 } 6031 } 6032 6033 mtx_unlock(&cst->lock); 6034 6035 return (0); 6036 } 6037 #endif 6038