127f190a3SBjoern A. Zeeb /*- 2*4d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 3fe267a55SPedro F. Giffuni * 427f190a3SBjoern A. Zeeb * Copyright (c) 2006, Myricom Inc. 527f190a3SBjoern A. Zeeb * Copyright (c) 2008, Intel Corporation. 69ca874cfSHans Petter Selasky * Copyright (c) 2016-2021 Mellanox Technologies. 727f190a3SBjoern A. Zeeb * All rights reserved. 827f190a3SBjoern A. Zeeb * 927f190a3SBjoern A. Zeeb * Redistribution and use in source and binary forms, with or without 1027f190a3SBjoern A. Zeeb * modification, are permitted provided that the following conditions 1127f190a3SBjoern A. Zeeb * are met: 1227f190a3SBjoern A. Zeeb * 1. Redistributions of source code must retain the above copyright 1327f190a3SBjoern A. Zeeb * notice, this list of conditions and the following disclaimer. 1427f190a3SBjoern A. Zeeb * 2. Redistributions in binary form must reproduce the above copyright 1527f190a3SBjoern A. Zeeb * notice, this list of conditions and the following disclaimer in the 1627f190a3SBjoern A. Zeeb * documentation and/or other materials provided with the distribution. 1727f190a3SBjoern A. Zeeb * 1827f190a3SBjoern A. Zeeb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1927f190a3SBjoern A. Zeeb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2027f190a3SBjoern A. Zeeb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2127f190a3SBjoern A. Zeeb * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 2227f190a3SBjoern A. Zeeb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2327f190a3SBjoern A. Zeeb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2427f190a3SBjoern A. Zeeb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2527f190a3SBjoern A. Zeeb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2627f190a3SBjoern A. Zeeb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2727f190a3SBjoern A. Zeeb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2827f190a3SBjoern A. Zeeb * SUCH DAMAGE. 2927f190a3SBjoern A. Zeeb * 3027f190a3SBjoern A. Zeeb * $FreeBSD$ 3127f190a3SBjoern A. Zeeb */ 326c5087a8SJack F Vogel 336c5087a8SJack F Vogel #ifndef _TCP_LRO_H_ 346c5087a8SJack F Vogel #define _TCP_LRO_H_ 356c5087a8SJack F Vogel 367127e6acSNavdeep Parhar #include <sys/time.h> 37e0d8add4SHans Petter Selasky #include <sys/param.h> 38e0d8add4SHans Petter Selasky 39b4f60fabSMark Johnston #include <netinet/in.h> 407127e6acSNavdeep Parhar 41e936121dSHans Petter Selasky #ifndef TCP_LRO_ENTRIES 42e936121dSHans Petter Selasky /* Define default number of LRO entries per RX queue */ 43e936121dSHans Petter Selasky #define TCP_LRO_ENTRIES 8 44e936121dSHans Petter Selasky #endif 45e936121dSHans Petter Selasky 4669a34e8dSRandall Stewart /* 4769a34e8dSRandall Stewart * Flags for ACK entry for compression 4893e28d6eSRichard Scheffenegger * the bottom 12 bits has the th_x2|th_flags. 4969a34e8dSRandall Stewart * LRO itself adds only the TSTMP flags 5069a34e8dSRandall Stewart * to indicate if either of the types 5169a34e8dSRandall Stewart * of timestamps are filled and the 5269a34e8dSRandall Stewart * HAS_TSTMP option to indicate if the 5369a34e8dSRandall Stewart * TCP timestamp option is valid. 5469a34e8dSRandall Stewart * 5593e28d6eSRichard Scheffenegger * The other 1 flag bits are for processing 5669a34e8dSRandall Stewart * by a stack. 5769a34e8dSRandall Stewart * 5869a34e8dSRandall Stewart */ 5993e28d6eSRichard Scheffenegger #define TSTMP_LRO 0x1000 6093e28d6eSRichard Scheffenegger #define TSTMP_HDWR 0x2000 6193e28d6eSRichard Scheffenegger #define HAS_TSTMP 0x4000 62d7955cc0SRandall Stewart /* 63d7955cc0SRandall Stewart * Default number of interrupts on the same cpu in a row 64d7955cc0SRandall Stewart * that will cause us to declare a "affinity cpu". 65d7955cc0SRandall Stewart */ 66d7955cc0SRandall Stewart #define TCP_LRO_CPU_DECLARATION_THRESH 50 6769a34e8dSRandall Stewart 6869a34e8dSRandall Stewart struct inpcb; 6969a34e8dSRandall Stewart 70e0d8add4SHans Petter Selasky /* Precompute the LRO_RAW_ADDRESS_MAX value: */ 71e0d8add4SHans Petter Selasky #define LRO_RAW_ADDRESS_MAX \ 72e0d8add4SHans Petter Selasky howmany(12 + 2 * sizeof(struct in6_addr), sizeof(u_long)) 73e0d8add4SHans Petter Selasky 749ca874cfSHans Petter Selasky union lro_address { 75e0d8add4SHans Petter Selasky u_long raw[LRO_RAW_ADDRESS_MAX]; 769ca874cfSHans Petter Selasky struct { 77bb5cd80eSHans Petter Selasky uint8_t lro_type; /* internal */ 789ca874cfSHans Petter Selasky #define LRO_TYPE_NONE 0 799ca874cfSHans Petter Selasky #define LRO_TYPE_IPV4_TCP 1 809ca874cfSHans Petter Selasky #define LRO_TYPE_IPV6_TCP 2 819ca874cfSHans Petter Selasky #define LRO_TYPE_IPV4_UDP 3 829ca874cfSHans Petter Selasky #define LRO_TYPE_IPV6_UDP 4 83bb5cd80eSHans Petter Selasky uint8_t lro_flags; 84bb5cd80eSHans Petter Selasky #define LRO_FLAG_DECRYPTED 1 859ca874cfSHans Petter Selasky uint16_t vlan_id; /* VLAN identifier */ 869ca874cfSHans Petter Selasky uint16_t s_port; /* source TCP/UDP port */ 879ca874cfSHans Petter Selasky uint16_t d_port; /* destination TCP/UDP port */ 889ca874cfSHans Petter Selasky uint32_t vxlan_vni; /* VXLAN virtual network identifier */ 899ca874cfSHans Petter Selasky union { 909ca874cfSHans Petter Selasky struct in_addr v4; 919ca874cfSHans Petter Selasky struct in6_addr v6; 929ca874cfSHans Petter Selasky } s_addr; /* source IPv4/IPv6 address */ 939ca874cfSHans Petter Selasky union { 949ca874cfSHans Petter Selasky struct in_addr v4; 959ca874cfSHans Petter Selasky struct in6_addr v6; 969ca874cfSHans Petter Selasky } d_addr; /* destination IPv4/IPv6 address */ 979ca874cfSHans Petter Selasky }; 98e0d8add4SHans Petter Selasky }; 999ca874cfSHans Petter Selasky 100e0d8add4SHans Petter Selasky _Static_assert(sizeof(union lro_address) == sizeof(u_long) * LRO_RAW_ADDRESS_MAX, 101e0d8add4SHans Petter Selasky "The raw field in the lro_address union does not cover the whole structure."); 1029ca874cfSHans Petter Selasky 1039ca874cfSHans Petter Selasky /* Optimize address comparison by comparing one unsigned long at a time: */ 1049ca874cfSHans Petter Selasky 1059ca874cfSHans Petter Selasky static inline bool 1069ca874cfSHans Petter Selasky lro_address_compare(const union lro_address *pa, const union lro_address *pb) 1079ca874cfSHans Petter Selasky { 1089ca874cfSHans Petter Selasky if (pa->lro_type == LRO_TYPE_NONE && pb->lro_type == LRO_TYPE_NONE) { 1099ca874cfSHans Petter Selasky return (true); 1109ca874cfSHans Petter Selasky } else for (unsigned i = 0; i < LRO_RAW_ADDRESS_MAX; i++) { 1119ca874cfSHans Petter Selasky if (pa->raw[i] != pb->raw[i]) 1129ca874cfSHans Petter Selasky return (false); 1139ca874cfSHans Petter Selasky } 1149ca874cfSHans Petter Selasky return (true); 1159ca874cfSHans Petter Selasky } 1169ca874cfSHans Petter Selasky 1179ca874cfSHans Petter Selasky struct lro_parser { 1189ca874cfSHans Petter Selasky union lro_address data; 1199ca874cfSHans Petter Selasky union { 1209ca874cfSHans Petter Selasky uint8_t *l3; 1219ca874cfSHans Petter Selasky struct ip *ip4; 1229ca874cfSHans Petter Selasky struct ip6_hdr *ip6; 1239ca874cfSHans Petter Selasky }; 1249ca874cfSHans Petter Selasky union { 1259ca874cfSHans Petter Selasky uint8_t *l4; 1269ca874cfSHans Petter Selasky struct tcphdr *tcp; 1279ca874cfSHans Petter Selasky struct udphdr *udp; 1289ca874cfSHans Petter Selasky }; 1299ca874cfSHans Petter Selasky uint16_t total_hdr_len; 1309ca874cfSHans Petter Selasky }; 1319ca874cfSHans Petter Selasky 1329ca874cfSHans Petter Selasky /* This structure is zeroed frequently, try to keep it small. */ 1331ea44822SSepherosa Ziehau struct lro_entry { 1341ea44822SSepherosa Ziehau LIST_ENTRY(lro_entry) next; 13505cde7efSSepherosa Ziehau LIST_ENTRY(lro_entry) hash_next; 1366c5087a8SJack F Vogel struct mbuf *m_head; 1376c5087a8SJack F Vogel struct mbuf *m_tail; 138e57b2d0eSRandall Stewart struct mbuf *m_last_mbuf; 1399ca874cfSHans Petter Selasky struct lro_parser outer; 1409ca874cfSHans Petter Selasky struct lro_parser inner; 14162b5b6ecSBjoern A. Zeeb uint32_t next_seq; /* tcp_seq */ 14262b5b6ecSBjoern A. Zeeb uint32_t ack_seq; /* tcp_seq */ 14362b5b6ecSBjoern A. Zeeb uint32_t tsval; 14462b5b6ecSBjoern A. Zeeb uint32_t tsecr; 1459ca874cfSHans Petter Selasky uint16_t compressed; 1469ca874cfSHans Petter Selasky uint16_t uncompressed; 14762b5b6ecSBjoern A. Zeeb uint16_t window; 14893e28d6eSRichard Scheffenegger uint16_t flags : 12, /* 12 TCP header bits */ 14993e28d6eSRichard Scheffenegger timestamp : 1, 15093e28d6eSRichard Scheffenegger needs_merge : 1, 15193e28d6eSRichard Scheffenegger reserved : 2; /* unused */ 152b45daaeaSRandall Stewart struct bintime alloc_time; /* time when entry was allocated */ 1536c5087a8SJack F Vogel }; 1546c5087a8SJack F Vogel 1559ca874cfSHans Petter Selasky LIST_HEAD(lro_head, lro_entry); 15662b5b6ecSBjoern A. Zeeb 157fc271df3SHans Petter Selasky struct lro_mbuf_sort { 158fc271df3SHans Petter Selasky uint64_t seq; 159fc271df3SHans Petter Selasky struct mbuf *mb; 160fc271df3SHans Petter Selasky }; 161fc271df3SHans Petter Selasky 16262b5b6ecSBjoern A. Zeeb /* NB: This is part of driver structs. */ 1636c5087a8SJack F Vogel struct lro_ctrl { 1646c5087a8SJack F Vogel struct ifnet *ifp; 165fc271df3SHans Petter Selasky struct lro_mbuf_sort *lro_mbuf_data; 166b45daaeaSRandall Stewart struct bintime lro_last_queue_time; /* last time data was queued */ 167e936121dSHans Petter Selasky uint64_t lro_queued; 168e936121dSHans Petter Selasky uint64_t lro_flushed; 169e936121dSHans Petter Selasky uint64_t lro_bad_csum; 170e936121dSHans Petter Selasky unsigned lro_cnt; 171e936121dSHans Petter Selasky unsigned lro_mbuf_count; 172e936121dSHans Petter Selasky unsigned lro_mbuf_max; 1737ae3d4bfSSepherosa Ziehau unsigned short lro_ackcnt_lim; /* max # of aggregated ACKs */ 174d7955cc0SRandall Stewart unsigned short lro_cpu; /* Guess at the cpu we have affinity too */ 1757ae3d4bfSSepherosa Ziehau unsigned lro_length_lim; /* max len of aggregated data */ 17605cde7efSSepherosa Ziehau u_long lro_hashsz; 177d7955cc0SRandall Stewart uint32_t lro_last_cpu; 178d7955cc0SRandall Stewart uint32_t lro_cnt_of_same_cpu; 17905cde7efSSepherosa Ziehau struct lro_head *lro_hash; 1806c5087a8SJack F Vogel struct lro_head lro_active; 1816c5087a8SJack F Vogel struct lro_head lro_free; 182d7955cc0SRandall Stewart uint8_t lro_cpu_is_set; /* Flag to say its ok to set the CPU on the inp */ 1836c5087a8SJack F Vogel }; 1846c5087a8SJack F Vogel 18569a34e8dSRandall Stewart struct tcp_ackent { 18669a34e8dSRandall Stewart uint64_t timestamp; /* hardware or sofware timestamp, valid if TSTMP_LRO or TSTMP_HDRW set */ 18769a34e8dSRandall Stewart uint32_t seq; /* th_seq value */ 18869a34e8dSRandall Stewart uint32_t ack; /* th_ack value */ 18969a34e8dSRandall Stewart uint32_t ts_value; /* If ts option value, valid if HAS_TSTMP is set */ 19069a34e8dSRandall Stewart uint32_t ts_echo; /* If ts option echo, valid if HAS_TSTMP is set */ 19169a34e8dSRandall Stewart uint16_t win; /* TCP window */ 19269a34e8dSRandall Stewart uint16_t flags; /* Flags to say if TS is present and type of timestamp and th_flags */ 19369a34e8dSRandall Stewart uint8_t codepoint; /* IP level codepoint including ECN bits */ 19469a34e8dSRandall Stewart uint8_t ack_val_set; /* Classification of ack used by the stack */ 19569a34e8dSRandall Stewart uint8_t pad[2]; /* To 32 byte boundary */ 19669a34e8dSRandall Stewart }; 19769a34e8dSRandall Stewart 19869a34e8dSRandall Stewart /* We use two M_PROTO on the mbuf */ 19969a34e8dSRandall Stewart #define M_ACKCMP M_PROTO4 /* Indicates LRO is sending in a Ack-compression mbuf */ 20069a34e8dSRandall Stewart #define M_LRO_EHDRSTRP M_PROTO6 /* Indicates that LRO has stripped the etherenet header */ 20169a34e8dSRandall Stewart 2029ca874cfSHans Petter Selasky #define TCP_LRO_LENGTH_MAX (65535 - 255) /* safe value with room for outer headers */ 2037ae3d4bfSSepherosa Ziehau #define TCP_LRO_ACKCNT_MAX 65535 /* unlimited */ 2047ae3d4bfSSepherosa Ziehau 2056c5087a8SJack F Vogel int tcp_lro_init(struct lro_ctrl *); 206e936121dSHans Petter Selasky int tcp_lro_init_args(struct lro_ctrl *, struct ifnet *, unsigned, unsigned); 2076c5087a8SJack F Vogel void tcp_lro_free(struct lro_ctrl *); 2087127e6acSNavdeep Parhar void tcp_lro_flush_inactive(struct lro_ctrl *, const struct timeval *); 2096c5087a8SJack F Vogel void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *); 210e936121dSHans Petter Selasky void tcp_lro_flush_all(struct lro_ctrl *); 2116c5087a8SJack F Vogel int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t); 212e936121dSHans Petter Selasky void tcp_lro_queue_mbuf(struct lro_ctrl *, struct mbuf *); 213e57b2d0eSRandall Stewart void tcp_lro_reg_mbufq(void); 214e57b2d0eSRandall Stewart void tcp_lro_dereg_mbufq(void); 2156c5087a8SJack F Vogel 216489f0c3cSSepherosa Ziehau #define TCP_LRO_NO_ENTRIES -2 21762b5b6ecSBjoern A. Zeeb #define TCP_LRO_CANNOT -1 21862b5b6ecSBjoern A. Zeeb #define TCP_LRO_NOT_SUPPORTED 1 2196c5087a8SJack F Vogel 2206c5087a8SJack F Vogel #endif /* _TCP_LRO_H_ */ 221