1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 5 * The Regents of the University of California. All rights reserved. 6 * Copyright (c) 2007-2008,2010 7 * Swinburne University of Technology, Melbourne, Australia. 8 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org> 9 * Copyright (c) 2010 The FreeBSD Foundation 10 * Copyright (c) 2010-2011 Juniper Networks, Inc. 11 * Copyright (c) 2019 Richard Scheffenegger <srichard@netapp.com> 12 * All rights reserved. 13 * 14 * Portions of this software were developed at the Centre for Advanced Internet 15 * Architectures, Swinburne University of Technology, by Lawrence Stewart, 16 * James Healy and David Hayes, made possible in part by a grant from the Cisco 17 * University Research Program Fund at Community Foundation Silicon Valley. 18 * 19 * Portions of this software were developed at the Centre for Advanced 20 * Internet Architectures, Swinburne University of Technology, Melbourne, 21 * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 22 * 23 * Portions of this software were developed by Robert N. M. Watson under 24 * contract to Juniper Networks, Inc. 25 * 26 * Redistribution and use in source and binary forms, with or without 27 * modification, are permitted provided that the following conditions 28 * are met: 29 * 1. Redistributions of source code must retain the above copyright 30 * notice, this list of conditions and the following disclaimer. 31 * 2. Redistributions in binary form must reproduce the above copyright 32 * notice, this list of conditions and the following disclaimer in the 33 * documentation and/or other materials provided with the distribution. 34 * 3. Neither the name of the University nor the names of its contributors 35 * may be used to endorse or promote products derived from this software 36 * without specific prior written permission. 37 * 38 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 39 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 41 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 42 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 43 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 44 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 45 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 46 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 47 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 48 * SUCH DAMAGE. 49 * 50 * @(#)tcp_ecn.c 8.12 (Berkeley) 5/24/95 51 */ 52 53 /* 54 * Utility functions to deal with Explicit Congestion Notification in TCP 55 * implementing the essential parts of the Accurate ECN extension 56 * https://tools.ietf.org/html/draft-ietf-tcpm-accurate-ecn-09 57 */ 58 59 #include <sys/cdefs.h> 60 __FBSDID("$FreeBSD$"); 61 62 #include "opt_inet.h" 63 #include "opt_inet6.h" 64 #include "opt_tcpdebug.h" 65 66 #include <sys/param.h> 67 #include <sys/systm.h> 68 #include <sys/kernel.h> 69 #include <sys/sysctl.h> 70 #include <sys/malloc.h> 71 #include <sys/mbuf.h> 72 #include <sys/socket.h> 73 #include <sys/socketvar.h> 74 75 #include <machine/cpu.h> 76 77 #include <vm/uma.h> 78 79 #include <net/if.h> 80 #include <net/if_var.h> 81 #include <net/route.h> 82 #include <net/vnet.h> 83 84 #include <netinet/in.h> 85 #include <netinet/in_systm.h> 86 #include <netinet/ip.h> 87 #include <netinet/in_var.h> 88 #include <netinet/in_pcb.h> 89 #include <netinet/ip_var.h> 90 #include <netinet/ip6.h> 91 #include <netinet/icmp6.h> 92 #include <netinet6/nd6.h> 93 #include <netinet6/ip6_var.h> 94 #include <netinet6/in6_pcb.h> 95 #include <netinet/tcp.h> 96 #include <netinet/tcp_fsm.h> 97 #include <netinet/tcp_seq.h> 98 #include <netinet/tcp_var.h> 99 #include <netinet/tcp_syncache.h> 100 #include <netinet/tcp_timer.h> 101 #include <netinet6/tcp6_var.h> 102 #include <netinet/tcpip.h> 103 #include <netinet/tcp_ecn.h> 104 105 106 /* 107 * Process incoming SYN,ACK packet 108 */ 109 void 110 tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos) 111 { 112 thflags &= (TH_CWR|TH_ECE); 113 114 if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) && 115 V_tcp_do_ecn) { 116 tp->t_flags2 |= TF2_ECN_PERMIT; 117 TCPSTAT_INC(tcps_ecn_shs); 118 } 119 } 120 121 /* 122 * Handle parallel SYN for ECN 123 */ 124 void 125 tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos) 126 { 127 if (thflags & TH_ACK) 128 return; 129 if (V_tcp_do_ecn == 0) 130 return; 131 if ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2)) { 132 /* RFC3168 ECN handling */ 133 if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) { 134 tp->t_flags2 |= TF2_ECN_PERMIT; 135 tp->t_flags2 |= TF2_ECN_SND_ECE; 136 TCPSTAT_INC(tcps_ecn_shs); 137 } 138 } 139 } 140 141 /* 142 * TCP ECN processing. 143 */ 144 int 145 tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int iptos) 146 { 147 int delta_ace = 0; 148 149 if (tp->t_flags2 & TF2_ECN_PERMIT) { 150 switch (iptos & IPTOS_ECN_MASK) { 151 case IPTOS_ECN_CE: 152 TCPSTAT_INC(tcps_ecn_ce); 153 break; 154 case IPTOS_ECN_ECT0: 155 TCPSTAT_INC(tcps_ecn_ect0); 156 break; 157 case IPTOS_ECN_ECT1: 158 TCPSTAT_INC(tcps_ecn_ect1); 159 break; 160 } 161 162 /* RFC3168 ECN handling */ 163 if (thflags & TH_ECE) 164 delta_ace = 1; 165 if (thflags & TH_CWR) { 166 tp->t_flags2 &= ~TF2_ECN_SND_ECE; 167 tp->t_flags |= TF_ACKNOW; 168 } 169 if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 170 tp->t_flags2 |= TF2_ECN_SND_ECE; 171 172 /* Process a packet differently from RFC3168. */ 173 cc_ecnpkt_handler_flags(tp, thflags, iptos); 174 } 175 176 return delta_ace; 177 } 178 179 /* 180 * Send ECN setup <SYN> packet header flags 181 */ 182 uint16_t 183 tcp_ecn_output_syn_sent(struct tcpcb *tp) 184 { 185 uint16_t thflags = 0; 186 187 if (V_tcp_do_ecn == 1) { 188 /* Send a RFC3168 ECN setup <SYN> packet */ 189 if (tp->t_rxtshift >= 1) { 190 if (tp->t_rxtshift <= V_tcp_ecn_maxretries) 191 thflags = TH_ECE|TH_CWR; 192 } else 193 thflags = TH_ECE|TH_CWR; 194 } 195 196 return thflags; 197 } 198 199 /* 200 * output processing of ECN feature 201 * returning IP ECN header codepoint 202 */ 203 int 204 tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len) 205 { 206 int ipecn = IPTOS_ECN_NOTECT; 207 bool newdata; 208 209 /* 210 * If the peer has ECN, mark data packets with 211 * ECN capable transmission (ECT). 212 * Ignore pure control packets, retransmissions 213 * and window probes. 214 */ 215 newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && 216 !((tp->t_flags & TF_FORCEDATA) && len == 1)); 217 if (newdata) { 218 ipecn = IPTOS_ECN_ECT0; 219 TCPSTAT_INC(tcps_ecn_ect0); 220 } 221 /* 222 * Reply with proper ECN notifications. 223 */ 224 if (newdata && 225 (tp->t_flags2 & TF2_ECN_SND_CWR)) { 226 *thflags |= TH_CWR; 227 tp->t_flags2 &= ~TF2_ECN_SND_CWR; 228 } 229 if (tp->t_flags2 & TF2_ECN_SND_ECE) 230 *thflags |= TH_ECE; 231 232 return ipecn; 233 } 234 235 /* 236 * Set up the ECN related tcpcb fields from 237 * a syncache entry 238 */ 239 void 240 tcp_ecn_syncache_socket(struct tcpcb *tp, struct syncache *sc) 241 { 242 if (sc->sc_flags & SCF_ECN_MASK) { 243 switch (sc->sc_flags & SCF_ECN_MASK) { 244 case SCF_ECN: 245 tp->t_flags2 |= TF2_ECN_PERMIT; 246 break; 247 /* undefined SCF codepoint */ 248 default: 249 break; 250 } 251 } 252 } 253 254 /* 255 * Process a <SYN> packets ECN information, and provide the 256 * syncache with the relevant information. 257 */ 258 int 259 tcp_ecn_syncache_add(uint16_t thflags, int iptos) 260 { 261 int scflags = 0; 262 263 switch (thflags & (TH_CWR|TH_ECE)) { 264 /* no ECN */ 265 case (0|0): 266 break; 267 /* legacy ECN */ 268 case (TH_CWR|TH_ECE): 269 scflags = SCF_ECN; 270 break; 271 default: 272 break; 273 } 274 return scflags; 275 } 276 277 /* 278 * Set up the ECN information for the <SYN,ACK> from 279 * syncache information. 280 */ 281 uint16_t 282 tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc) 283 { 284 if ((thflags & TH_SYN) && 285 (sc->sc_flags & SCF_ECN_MASK)) { 286 switch (sc->sc_flags & SCF_ECN_MASK) { 287 case SCF_ECN: 288 thflags |= (0 | TH_ECE); 289 TCPSTAT_INC(tcps_ecn_shs); 290 break; 291 /* undefined SCF codepoint */ 292 default: 293 break; 294 } 295 } 296 return thflags; 297 } 298