1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2006, Myricom Inc. 5 * Copyright (c) 2008, Intel Corporation. 6 * Copyright (c) 2016-2021 Mellanox Technologies. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #ifndef _TCP_LRO_H_ 32 #define _TCP_LRO_H_ 33 34 #include <sys/time.h> 35 #include <sys/param.h> 36 #include <sys/mbuf.h> 37 #include <netinet/in.h> 38 39 #ifndef TCP_LRO_ENTRIES 40 /* Define default number of LRO entries per RX queue */ 41 #define TCP_LRO_ENTRIES 8 42 #endif 43 44 /* 45 * Flags for ACK entry for compression 46 * the bottom 12 bits has the th_x2|th_flags. 47 * LRO itself adds only the TSTMP flags 48 * to indicate if either of the types 49 * of timestamps are filled and the 50 * HAS_TSTMP option to indicate if the 51 * TCP timestamp option is valid. 52 * 53 * The other 1 flag bits are for processing 54 * by a stack. 55 * 56 */ 57 #define TSTMP_LRO 0x1000 58 #define TSTMP_HDWR 0x2000 59 #define HAS_TSTMP 0x4000 60 /* 61 * Default number of interrupts on the same cpu in a row 62 * that will cause us to declare a "affinity cpu". 63 */ 64 #define TCP_LRO_CPU_DECLARATION_THRESH 50 65 66 struct inpcb; 67 68 /* Precompute the LRO_RAW_ADDRESS_MAX value: */ 69 #define LRO_RAW_ADDRESS_MAX \ 70 howmany(12 + 2 * sizeof(struct in6_addr), sizeof(u_long)) 71 72 union lro_address { 73 u_long raw[LRO_RAW_ADDRESS_MAX]; 74 struct { 75 uint8_t lro_type; /* internal */ 76 #define LRO_TYPE_NONE 0 77 #define LRO_TYPE_IPV4_TCP 1 78 #define LRO_TYPE_IPV6_TCP 2 79 #define LRO_TYPE_IPV4_UDP 3 80 #define LRO_TYPE_IPV6_UDP 4 81 uint8_t lro_flags; 82 #define LRO_FLAG_DECRYPTED 1 83 uint16_t vlan_id; /* VLAN identifier */ 84 uint16_t s_port; /* source TCP/UDP port */ 85 uint16_t d_port; /* destination TCP/UDP port */ 86 uint32_t vxlan_vni; /* VXLAN virtual network identifier */ 87 union { 88 struct in_addr v4; 89 struct in6_addr v6; 90 } s_addr; /* source IPv4/IPv6 address */ 91 union { 92 struct in_addr v4; 93 struct in6_addr v6; 94 } d_addr; /* destination IPv4/IPv6 address */ 95 }; 96 }; 97 98 _Static_assert(sizeof(union lro_address) == sizeof(u_long) * LRO_RAW_ADDRESS_MAX, 99 "The raw field in the lro_address union does not cover the whole structure."); 100 101 /* Optimize address comparison by comparing one unsigned long at a time: */ 102 103 static inline bool 104 lro_address_compare(const union lro_address *pa, const union lro_address *pb) 105 { 106 if (pa->lro_type == LRO_TYPE_NONE && pb->lro_type == LRO_TYPE_NONE) { 107 return (true); 108 } else for (unsigned i = 0; i < LRO_RAW_ADDRESS_MAX; i++) { 109 if (pa->raw[i] != pb->raw[i]) 110 return (false); 111 } 112 return (true); 113 } 114 115 struct lro_parser { 116 union lro_address data; 117 union { 118 uint8_t *l3; 119 struct ip *ip4; 120 struct ip6_hdr *ip6; 121 }; 122 union { 123 uint8_t *l4; 124 struct tcphdr *tcp; 125 struct udphdr *udp; 126 }; 127 uint16_t total_hdr_len; 128 }; 129 130 /* This structure is zeroed frequently, try to keep it small. */ 131 struct lro_entry { 132 LIST_ENTRY(lro_entry) next; 133 LIST_ENTRY(lro_entry) hash_next; 134 struct mbuf *m_head; 135 struct mbuf *m_tail; 136 struct mbuf *m_last_mbuf; 137 struct lro_parser outer; 138 struct lro_parser inner; 139 uint32_t next_seq; /* tcp_seq */ 140 uint32_t ack_seq; /* tcp_seq */ 141 uint32_t tsval; 142 uint32_t tsecr; 143 uint16_t compressed; 144 uint16_t uncompressed; 145 uint16_t window; 146 uint16_t flags : 12, /* 12 TCP header bits */ 147 timestamp : 1, 148 needs_merge : 1, 149 reserved : 2; /* unused */ 150 struct bintime alloc_time; /* time when entry was allocated */ 151 }; 152 153 LIST_HEAD(lro_head, lro_entry); 154 155 struct lro_mbuf_sort { 156 uint64_t seq; 157 struct mbuf *mb; 158 }; 159 160 /* NB: This is part of driver structs. */ 161 struct lro_ctrl { 162 struct ifnet *ifp; 163 struct lro_mbuf_sort *lro_mbuf_data; 164 struct bintime lro_last_queue_time; /* last time data was queued */ 165 uint64_t lro_queued; 166 uint64_t lro_flushed; 167 uint64_t lro_bad_csum; 168 unsigned lro_cnt; 169 unsigned lro_mbuf_count; 170 unsigned lro_mbuf_max; 171 unsigned short lro_ackcnt_lim; /* max # of aggregated ACKs */ 172 unsigned short lro_cpu; /* Guess at the cpu we have affinity too */ 173 unsigned lro_length_lim; /* max len of aggregated data */ 174 u_long lro_hashsz; 175 uint32_t lro_last_cpu; 176 uint32_t lro_cnt_of_same_cpu; 177 struct lro_head *lro_hash; 178 struct lro_head lro_active; 179 struct lro_head lro_free; 180 uint8_t lro_cpu_is_set; /* Flag to say its ok to set the CPU on the inp */ 181 }; 182 183 struct tcp_ackent { 184 uint64_t timestamp; /* hardware or sofware timestamp, valid if TSTMP_LRO or TSTMP_HDRW set */ 185 uint32_t seq; /* th_seq value */ 186 uint32_t ack; /* th_ack value */ 187 uint32_t ts_value; /* If ts option value, valid if HAS_TSTMP is set */ 188 uint32_t ts_echo; /* If ts option echo, valid if HAS_TSTMP is set */ 189 uint16_t win; /* TCP window */ 190 uint16_t flags; /* Flags to say if TS is present and type of timestamp and th_flags */ 191 uint8_t codepoint; /* IP level codepoint including ECN bits */ 192 uint8_t ack_val_set; /* Classification of ack used by the stack */ 193 uint8_t pad[2]; /* To 32 byte boundary */ 194 }; 195 196 /* We use two M_PROTO on the mbuf */ 197 #define M_ACKCMP M_PROTO4 /* Indicates LRO is sending in a Ack-compression mbuf */ 198 #define M_LRO_EHDRSTRP M_PROTO6 /* Indicates that LRO has stripped the etherenet header */ 199 200 #define TCP_LRO_LENGTH_MAX (65535 - 255) /* safe value with room for outer headers */ 201 #define TCP_LRO_ACKCNT_MAX 65535 /* unlimited */ 202 203 #define TCP_LRO_TS_OPTION ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |\ 204 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP) 205 206 static inline struct tcphdr * 207 tcp_lro_get_th(struct mbuf *m) 208 { 209 return ((struct tcphdr *)((char *)m->m_data + 210 m->m_pkthdr.lro_tcp_h_off)); 211 } 212 213 extern long tcplro_stacks_wanting_mbufq; 214 215 int tcp_lro_init(struct lro_ctrl *); 216 int tcp_lro_init_args(struct lro_ctrl *, struct ifnet *, unsigned, unsigned); 217 void tcp_lro_free(struct lro_ctrl *); 218 void tcp_lro_flush_inactive(struct lro_ctrl *, const struct timeval *); 219 void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *); 220 void tcp_lro_flush_all(struct lro_ctrl *); 221 extern int (*tcp_lro_flush_tcphpts)(struct lro_ctrl *, struct lro_entry *); 222 int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t); 223 void tcp_lro_queue_mbuf(struct lro_ctrl *, struct mbuf *); 224 void tcp_lro_reg_mbufq(void); 225 void tcp_lro_dereg_mbufq(void); 226 227 #define TCP_LRO_NO_ENTRIES -2 228 #define TCP_LRO_CANNOT -1 229 #define TCP_LRO_NOT_SUPPORTED 1 230 231 #endif /* _TCP_LRO_H_ */ 232