127f190a3SBjoern A. Zeeb /*- 2fe267a55SPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3fe267a55SPedro F. Giffuni * 427f190a3SBjoern A. Zeeb * Copyright (c) 2006, Myricom Inc. 527f190a3SBjoern A. Zeeb * Copyright (c) 2008, Intel Corporation. 69ca874cfSHans Petter Selasky * Copyright (c) 2016-2021 Mellanox Technologies. 727f190a3SBjoern A. Zeeb * All rights reserved. 827f190a3SBjoern A. Zeeb * 927f190a3SBjoern A. Zeeb * Redistribution and use in source and binary forms, with or without 1027f190a3SBjoern A. Zeeb * modification, are permitted provided that the following conditions 1127f190a3SBjoern A. Zeeb * are met: 1227f190a3SBjoern A. Zeeb * 1. Redistributions of source code must retain the above copyright 1327f190a3SBjoern A. Zeeb * notice, this list of conditions and the following disclaimer. 1427f190a3SBjoern A. Zeeb * 2. Redistributions in binary form must reproduce the above copyright 1527f190a3SBjoern A. Zeeb * notice, this list of conditions and the following disclaimer in the 1627f190a3SBjoern A. Zeeb * documentation and/or other materials provided with the distribution. 1727f190a3SBjoern A. Zeeb * 1827f190a3SBjoern A. Zeeb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1927f190a3SBjoern A. Zeeb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2027f190a3SBjoern A. Zeeb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2127f190a3SBjoern A. Zeeb * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 2227f190a3SBjoern A. Zeeb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2327f190a3SBjoern A. Zeeb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2427f190a3SBjoern A. Zeeb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2527f190a3SBjoern A. Zeeb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2627f190a3SBjoern A. Zeeb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2727f190a3SBjoern A. Zeeb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2827f190a3SBjoern A. Zeeb * SUCH DAMAGE. 2927f190a3SBjoern A. Zeeb * 3027f190a3SBjoern A. Zeeb * $FreeBSD$ 3127f190a3SBjoern A. Zeeb */ 326c5087a8SJack F Vogel 336c5087a8SJack F Vogel #ifndef _TCP_LRO_H_ 346c5087a8SJack F Vogel #define _TCP_LRO_H_ 356c5087a8SJack F Vogel 367127e6acSNavdeep Parhar #include <sys/time.h> 377127e6acSNavdeep Parhar 38e936121dSHans Petter Selasky #ifndef TCP_LRO_ENTRIES 39e936121dSHans Petter Selasky /* Define default number of LRO entries per RX queue */ 40e936121dSHans Petter Selasky #define TCP_LRO_ENTRIES 8 41e936121dSHans Petter Selasky #endif 42e936121dSHans Petter Selasky 4369a34e8dSRandall Stewart /* 4469a34e8dSRandall Stewart * Flags for ACK entry for compression 4569a34e8dSRandall Stewart * the bottom 8 bits has the th_flags. 4669a34e8dSRandall Stewart * LRO itself adds only the TSTMP flags 4769a34e8dSRandall Stewart * to indicate if either of the types 4869a34e8dSRandall Stewart * of timestamps are filled and the 4969a34e8dSRandall Stewart * HAS_TSTMP option to indicate if the 5069a34e8dSRandall Stewart * TCP timestamp option is valid. 5169a34e8dSRandall Stewart * 5269a34e8dSRandall Stewart * The other 5 flag bits are for processing 5369a34e8dSRandall Stewart * by a stack. 5469a34e8dSRandall Stewart * 5569a34e8dSRandall Stewart */ 5669a34e8dSRandall Stewart #define TSTMP_LRO 0x0100 5769a34e8dSRandall Stewart #define TSTMP_HDWR 0x0200 5869a34e8dSRandall Stewart #define HAS_TSTMP 0x0400 5969a34e8dSRandall Stewart 6069a34e8dSRandall Stewart struct inpcb; 6169a34e8dSRandall Stewart 629ca874cfSHans Petter Selasky union lro_address { 639ca874cfSHans Petter Selasky u_long raw[1]; 649ca874cfSHans Petter Selasky struct { 659ca874cfSHans Petter Selasky uint16_t lro_type; /* internal */ 669ca874cfSHans Petter Selasky #define LRO_TYPE_NONE 0 679ca874cfSHans Petter Selasky #define LRO_TYPE_IPV4_TCP 1 689ca874cfSHans Petter Selasky #define LRO_TYPE_IPV6_TCP 2 699ca874cfSHans Petter Selasky #define LRO_TYPE_IPV4_UDP 3 709ca874cfSHans Petter Selasky #define LRO_TYPE_IPV6_UDP 4 719ca874cfSHans Petter Selasky uint16_t vlan_id; /* VLAN identifier */ 729ca874cfSHans Petter Selasky uint16_t s_port; /* source TCP/UDP port */ 739ca874cfSHans Petter Selasky uint16_t d_port; /* destination TCP/UDP port */ 749ca874cfSHans Petter Selasky uint32_t vxlan_vni; /* VXLAN virtual network identifier */ 759ca874cfSHans Petter Selasky union { 769ca874cfSHans Petter Selasky #ifdef INET 779ca874cfSHans Petter Selasky struct in_addr v4; 789ca874cfSHans Petter Selasky #endif 799ca874cfSHans Petter Selasky #ifdef INET6 809ca874cfSHans Petter Selasky struct in6_addr v6; 819ca874cfSHans Petter Selasky #endif 829ca874cfSHans Petter Selasky } s_addr; /* source IPv4/IPv6 address */ 839ca874cfSHans Petter Selasky union { 849ca874cfSHans Petter Selasky #ifdef INET 859ca874cfSHans Petter Selasky struct in_addr v4; 869ca874cfSHans Petter Selasky #endif 879ca874cfSHans Petter Selasky #ifdef INET6 889ca874cfSHans Petter Selasky struct in6_addr v6; 899ca874cfSHans Petter Selasky #endif 909ca874cfSHans Petter Selasky } d_addr; /* destination IPv4/IPv6 address */ 919ca874cfSHans Petter Selasky }; 929ca874cfSHans Petter Selasky } __aligned(sizeof(u_long)); 939ca874cfSHans Petter Selasky 949ca874cfSHans Petter Selasky #define LRO_RAW_ADDRESS_MAX \ 959ca874cfSHans Petter Selasky (sizeof(union lro_address) / sizeof(u_long)) 969ca874cfSHans Petter Selasky 979ca874cfSHans Petter Selasky /* Optimize address comparison by comparing one unsigned long at a time: */ 989ca874cfSHans Petter Selasky 999ca874cfSHans Petter Selasky static inline bool 1009ca874cfSHans Petter Selasky lro_address_compare(const union lro_address *pa, const union lro_address *pb) 1019ca874cfSHans Petter Selasky { 1029ca874cfSHans Petter Selasky if (pa->lro_type == LRO_TYPE_NONE && pb->lro_type == LRO_TYPE_NONE) { 1039ca874cfSHans Petter Selasky return (true); 1049ca874cfSHans Petter Selasky } else for (unsigned i = 0; i < LRO_RAW_ADDRESS_MAX; i++) { 1059ca874cfSHans Petter Selasky if (pa->raw[i] != pb->raw[i]) 1069ca874cfSHans Petter Selasky return (false); 1079ca874cfSHans Petter Selasky } 1089ca874cfSHans Petter Selasky return (true); 1099ca874cfSHans Petter Selasky } 1109ca874cfSHans Petter Selasky 1119ca874cfSHans Petter Selasky struct lro_parser { 1129ca874cfSHans Petter Selasky union lro_address data; 1139ca874cfSHans Petter Selasky union { 1149ca874cfSHans Petter Selasky uint8_t *l3; 1159ca874cfSHans Petter Selasky struct ip *ip4; 1169ca874cfSHans Petter Selasky struct ip6_hdr *ip6; 1179ca874cfSHans Petter Selasky }; 1189ca874cfSHans Petter Selasky union { 1199ca874cfSHans Petter Selasky uint8_t *l4; 1209ca874cfSHans Petter Selasky struct tcphdr *tcp; 1219ca874cfSHans Petter Selasky struct udphdr *udp; 1229ca874cfSHans Petter Selasky }; 1239ca874cfSHans Petter Selasky uint16_t total_hdr_len; 1249ca874cfSHans Petter Selasky }; 1259ca874cfSHans Petter Selasky 1269ca874cfSHans Petter Selasky /* This structure is zeroed frequently, try to keep it small. */ 1271ea44822SSepherosa Ziehau struct lro_entry { 1281ea44822SSepherosa Ziehau LIST_ENTRY(lro_entry) next; 12905cde7efSSepherosa Ziehau LIST_ENTRY(lro_entry) hash_next; 1306c5087a8SJack F Vogel struct mbuf *m_head; 1316c5087a8SJack F Vogel struct mbuf *m_tail; 132e57b2d0eSRandall Stewart struct mbuf *m_last_mbuf; 1339ca874cfSHans Petter Selasky struct lro_parser outer; 1349ca874cfSHans Petter Selasky struct lro_parser inner; 13562b5b6ecSBjoern A. Zeeb uint32_t next_seq; /* tcp_seq */ 13662b5b6ecSBjoern A. Zeeb uint32_t ack_seq; /* tcp_seq */ 13762b5b6ecSBjoern A. Zeeb uint32_t tsval; 13862b5b6ecSBjoern A. Zeeb uint32_t tsecr; 1399ca874cfSHans Petter Selasky uint16_t compressed; 1409ca874cfSHans Petter Selasky uint16_t uncompressed; 14162b5b6ecSBjoern A. Zeeb uint16_t window; 14262b5b6ecSBjoern A. Zeeb uint16_t timestamp; /* flag, not a TCP hdr field. */ 143*b45daaeaSRandall Stewart struct bintime alloc_time; /* time when entry was allocated */ 1446c5087a8SJack F Vogel }; 1456c5087a8SJack F Vogel 1469ca874cfSHans Petter Selasky LIST_HEAD(lro_head, lro_entry); 14762b5b6ecSBjoern A. Zeeb 148fc271df3SHans Petter Selasky struct lro_mbuf_sort { 149fc271df3SHans Petter Selasky uint64_t seq; 150fc271df3SHans Petter Selasky struct mbuf *mb; 151fc271df3SHans Petter Selasky }; 152fc271df3SHans Petter Selasky 15362b5b6ecSBjoern A. Zeeb /* NB: This is part of driver structs. */ 1546c5087a8SJack F Vogel struct lro_ctrl { 1556c5087a8SJack F Vogel struct ifnet *ifp; 156fc271df3SHans Petter Selasky struct lro_mbuf_sort *lro_mbuf_data; 157*b45daaeaSRandall Stewart struct bintime lro_last_queue_time; /* last time data was queued */ 158e936121dSHans Petter Selasky uint64_t lro_queued; 159e936121dSHans Petter Selasky uint64_t lro_flushed; 160e936121dSHans Petter Selasky uint64_t lro_bad_csum; 161e936121dSHans Petter Selasky unsigned lro_cnt; 162e936121dSHans Petter Selasky unsigned lro_mbuf_count; 163e936121dSHans Petter Selasky unsigned lro_mbuf_max; 1647ae3d4bfSSepherosa Ziehau unsigned short lro_ackcnt_lim; /* max # of aggregated ACKs */ 1657ae3d4bfSSepherosa Ziehau unsigned lro_length_lim; /* max len of aggregated data */ 1666c5087a8SJack F Vogel 16705cde7efSSepherosa Ziehau u_long lro_hashsz; 16805cde7efSSepherosa Ziehau struct lro_head *lro_hash; 1696c5087a8SJack F Vogel struct lro_head lro_active; 1706c5087a8SJack F Vogel struct lro_head lro_free; 1716c5087a8SJack F Vogel }; 1726c5087a8SJack F Vogel 17369a34e8dSRandall Stewart struct tcp_ackent { 17469a34e8dSRandall Stewart uint64_t timestamp; /* hardware or sofware timestamp, valid if TSTMP_LRO or TSTMP_HDRW set */ 17569a34e8dSRandall Stewart uint32_t seq; /* th_seq value */ 17669a34e8dSRandall Stewart uint32_t ack; /* th_ack value */ 17769a34e8dSRandall Stewart uint32_t ts_value; /* If ts option value, valid if HAS_TSTMP is set */ 17869a34e8dSRandall Stewart uint32_t ts_echo; /* If ts option echo, valid if HAS_TSTMP is set */ 17969a34e8dSRandall Stewart uint16_t win; /* TCP window */ 18069a34e8dSRandall Stewart uint16_t flags; /* Flags to say if TS is present and type of timestamp and th_flags */ 18169a34e8dSRandall Stewart uint8_t codepoint; /* IP level codepoint including ECN bits */ 18269a34e8dSRandall Stewart uint8_t ack_val_set; /* Classification of ack used by the stack */ 18369a34e8dSRandall Stewart uint8_t pad[2]; /* To 32 byte boundary */ 18469a34e8dSRandall Stewart }; 18569a34e8dSRandall Stewart 18669a34e8dSRandall Stewart /* We use two M_PROTO on the mbuf */ 18769a34e8dSRandall Stewart #define M_ACKCMP M_PROTO4 /* Indicates LRO is sending in a Ack-compression mbuf */ 18869a34e8dSRandall Stewart #define M_LRO_EHDRSTRP M_PROTO6 /* Indicates that LRO has stripped the etherenet header */ 18969a34e8dSRandall Stewart 1909ca874cfSHans Petter Selasky #define TCP_LRO_LENGTH_MAX (65535 - 255) /* safe value with room for outer headers */ 1917ae3d4bfSSepherosa Ziehau #define TCP_LRO_ACKCNT_MAX 65535 /* unlimited */ 1927ae3d4bfSSepherosa Ziehau 1936c5087a8SJack F Vogel int tcp_lro_init(struct lro_ctrl *); 194e936121dSHans Petter Selasky int tcp_lro_init_args(struct lro_ctrl *, struct ifnet *, unsigned, unsigned); 1956c5087a8SJack F Vogel void tcp_lro_free(struct lro_ctrl *); 1967127e6acSNavdeep Parhar void tcp_lro_flush_inactive(struct lro_ctrl *, const struct timeval *); 1976c5087a8SJack F Vogel void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *); 198e936121dSHans Petter Selasky void tcp_lro_flush_all(struct lro_ctrl *); 1996c5087a8SJack F Vogel int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t); 200e936121dSHans Petter Selasky void tcp_lro_queue_mbuf(struct lro_ctrl *, struct mbuf *); 201e57b2d0eSRandall Stewart void tcp_lro_reg_mbufq(void); 202e57b2d0eSRandall Stewart void tcp_lro_dereg_mbufq(void); 2036c5087a8SJack F Vogel 204489f0c3cSSepherosa Ziehau #define TCP_LRO_NO_ENTRIES -2 20562b5b6ecSBjoern A. Zeeb #define TCP_LRO_CANNOT -1 20662b5b6ecSBjoern A. Zeeb #define TCP_LRO_NOT_SUPPORTED 1 2076c5087a8SJack F Vogel 2086c5087a8SJack F Vogel #endif /* _TCP_LRO_H_ */ 209