1f7220c48SRichard Scheffenegger /*- 2f7220c48SRichard Scheffenegger * SPDX-License-Identifier: BSD-3-Clause 3f7220c48SRichard Scheffenegger * 4f7220c48SRichard Scheffenegger * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 5f7220c48SRichard Scheffenegger * The Regents of the University of California. All rights reserved. 6f7220c48SRichard Scheffenegger * Copyright (c) 2007-2008,2010 7f7220c48SRichard Scheffenegger * Swinburne University of Technology, Melbourne, Australia. 8f7220c48SRichard Scheffenegger * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org> 9f7220c48SRichard Scheffenegger * Copyright (c) 2010 The FreeBSD Foundation 10f7220c48SRichard Scheffenegger * Copyright (c) 2010-2011 Juniper Networks, Inc. 11f7220c48SRichard Scheffenegger * Copyright (c) 2019 Richard Scheffenegger <srichard@netapp.com> 12f7220c48SRichard Scheffenegger * All rights reserved. 13f7220c48SRichard Scheffenegger * 14f7220c48SRichard Scheffenegger * Portions of this software were developed at the Centre for Advanced Internet 15f7220c48SRichard Scheffenegger * Architectures, Swinburne University of Technology, by Lawrence Stewart, 16f7220c48SRichard Scheffenegger * James Healy and David Hayes, made possible in part by a grant from the Cisco 17f7220c48SRichard Scheffenegger * University Research Program Fund at Community Foundation Silicon Valley. 18f7220c48SRichard Scheffenegger * 19f7220c48SRichard Scheffenegger * Portions of this software were developed at the Centre for Advanced 20f7220c48SRichard Scheffenegger * Internet Architectures, Swinburne University of Technology, Melbourne, 21f7220c48SRichard Scheffenegger * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 22f7220c48SRichard Scheffenegger * 23f7220c48SRichard Scheffenegger * Portions of this software were developed by Robert N. M. Watson under 24f7220c48SRichard Scheffenegger * contract to Juniper Networks, Inc. 25f7220c48SRichard Scheffenegger * 26f7220c48SRichard Scheffenegger * Redistribution and use in source and binary forms, with or without 27f7220c48SRichard Scheffenegger * modification, are permitted provided that the following conditions 28f7220c48SRichard Scheffenegger * are met: 29f7220c48SRichard Scheffenegger * 1. Redistributions of source code must retain the above copyright 30f7220c48SRichard Scheffenegger * notice, this list of conditions and the following disclaimer. 31f7220c48SRichard Scheffenegger * 2. Redistributions in binary form must reproduce the above copyright 32f7220c48SRichard Scheffenegger * notice, this list of conditions and the following disclaimer in the 33f7220c48SRichard Scheffenegger * documentation and/or other materials provided with the distribution. 34f7220c48SRichard Scheffenegger * 3. Neither the name of the University nor the names of its contributors 35f7220c48SRichard Scheffenegger * may be used to endorse or promote products derived from this software 36f7220c48SRichard Scheffenegger * without specific prior written permission. 37f7220c48SRichard Scheffenegger * 38f7220c48SRichard Scheffenegger * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 39f7220c48SRichard Scheffenegger * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 40f7220c48SRichard Scheffenegger * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 41f7220c48SRichard Scheffenegger * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 42f7220c48SRichard Scheffenegger * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 43f7220c48SRichard Scheffenegger * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 44f7220c48SRichard Scheffenegger * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 45f7220c48SRichard Scheffenegger * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 46f7220c48SRichard Scheffenegger * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 47f7220c48SRichard Scheffenegger * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 48f7220c48SRichard Scheffenegger * SUCH DAMAGE. 49f7220c48SRichard Scheffenegger * 50f7220c48SRichard Scheffenegger * @(#)tcp_ecn.c 8.12 (Berkeley) 5/24/95 51f7220c48SRichard Scheffenegger */ 52f7220c48SRichard Scheffenegger 53f7220c48SRichard Scheffenegger /* 54f7220c48SRichard Scheffenegger * Utility functions to deal with Explicit Congestion Notification in TCP 55f7220c48SRichard Scheffenegger * implementing the essential parts of the Accurate ECN extension 56f7220c48SRichard Scheffenegger * https://tools.ietf.org/html/draft-ietf-tcpm-accurate-ecn-09 57f7220c48SRichard Scheffenegger */ 58f7220c48SRichard Scheffenegger 59f7220c48SRichard Scheffenegger #include <sys/cdefs.h> 60f7220c48SRichard Scheffenegger __FBSDID("$FreeBSD$"); 61f7220c48SRichard Scheffenegger 62f7220c48SRichard Scheffenegger #include "opt_inet.h" 63f7220c48SRichard Scheffenegger #include "opt_inet6.h" 64f7220c48SRichard Scheffenegger #include "opt_tcpdebug.h" 65f7220c48SRichard Scheffenegger 66f7220c48SRichard Scheffenegger #include <sys/param.h> 67f7220c48SRichard Scheffenegger #include <sys/systm.h> 68f7220c48SRichard Scheffenegger #include <sys/kernel.h> 69f7220c48SRichard Scheffenegger #include <sys/sysctl.h> 70f7220c48SRichard Scheffenegger #include <sys/malloc.h> 71f7220c48SRichard Scheffenegger #include <sys/mbuf.h> 72f7220c48SRichard Scheffenegger #include <sys/socket.h> 73f7220c48SRichard Scheffenegger #include <sys/socketvar.h> 74f7220c48SRichard Scheffenegger 75f7220c48SRichard Scheffenegger #include <machine/cpu.h> 76f7220c48SRichard Scheffenegger 77f7220c48SRichard Scheffenegger #include <vm/uma.h> 78f7220c48SRichard Scheffenegger 79f7220c48SRichard Scheffenegger #include <net/if.h> 80f7220c48SRichard Scheffenegger #include <net/if_var.h> 81f7220c48SRichard Scheffenegger #include <net/route.h> 82f7220c48SRichard Scheffenegger #include <net/vnet.h> 83f7220c48SRichard Scheffenegger 84f7220c48SRichard Scheffenegger #include <netinet/in.h> 85f7220c48SRichard Scheffenegger #include <netinet/in_systm.h> 86f7220c48SRichard Scheffenegger #include <netinet/ip.h> 87f7220c48SRichard Scheffenegger #include <netinet/in_var.h> 88f7220c48SRichard Scheffenegger #include <netinet/in_pcb.h> 89f7220c48SRichard Scheffenegger #include <netinet/ip_var.h> 90f7220c48SRichard Scheffenegger #include <netinet/ip6.h> 91f7220c48SRichard Scheffenegger #include <netinet/icmp6.h> 92f7220c48SRichard Scheffenegger #include <netinet6/nd6.h> 93f7220c48SRichard Scheffenegger #include <netinet6/ip6_var.h> 94f7220c48SRichard Scheffenegger #include <netinet6/in6_pcb.h> 95f7220c48SRichard Scheffenegger #include <netinet/tcp.h> 96f7220c48SRichard Scheffenegger #include <netinet/tcp_fsm.h> 97f7220c48SRichard Scheffenegger #include <netinet/tcp_seq.h> 98f7220c48SRichard Scheffenegger #include <netinet/tcp_var.h> 99f7220c48SRichard Scheffenegger #include <netinet/tcp_syncache.h> 100f7220c48SRichard Scheffenegger #include <netinet/tcp_timer.h> 101f7220c48SRichard Scheffenegger #include <netinet6/tcp6_var.h> 102f7220c48SRichard Scheffenegger #include <netinet/tcpip.h> 103f7220c48SRichard Scheffenegger #include <netinet/tcp_ecn.h> 104f7220c48SRichard Scheffenegger 105f7220c48SRichard Scheffenegger 106f7220c48SRichard Scheffenegger /* 107f7220c48SRichard Scheffenegger * Process incoming SYN,ACK packet 108f7220c48SRichard Scheffenegger */ 109f7220c48SRichard Scheffenegger void 110f7220c48SRichard Scheffenegger tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos) 111f7220c48SRichard Scheffenegger { 112f7220c48SRichard Scheffenegger thflags &= (TH_CWR|TH_ECE); 113f7220c48SRichard Scheffenegger 114f7220c48SRichard Scheffenegger if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) && 115f7220c48SRichard Scheffenegger V_tcp_do_ecn) { 116f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 1171790549dSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 118f7220c48SRichard Scheffenegger } 119f7220c48SRichard Scheffenegger } 120f7220c48SRichard Scheffenegger 121f7220c48SRichard Scheffenegger /* 122f7220c48SRichard Scheffenegger * Handle parallel SYN for ECN 123f7220c48SRichard Scheffenegger */ 124f7220c48SRichard Scheffenegger void 125f7220c48SRichard Scheffenegger tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos) 126f7220c48SRichard Scheffenegger { 127f7220c48SRichard Scheffenegger if (thflags & TH_ACK) 128f7220c48SRichard Scheffenegger return; 129f7220c48SRichard Scheffenegger if (V_tcp_do_ecn == 0) 130f7220c48SRichard Scheffenegger return; 131f7220c48SRichard Scheffenegger if ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2)) { 132f7220c48SRichard Scheffenegger /* RFC3168 ECN handling */ 133f7220c48SRichard Scheffenegger if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) { 134f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 135f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_SND_ECE; 1361790549dSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 137f7220c48SRichard Scheffenegger } 138f7220c48SRichard Scheffenegger } 139f7220c48SRichard Scheffenegger } 140f7220c48SRichard Scheffenegger 141f7220c48SRichard Scheffenegger /* 142f7220c48SRichard Scheffenegger * TCP ECN processing. 143f7220c48SRichard Scheffenegger */ 144f7220c48SRichard Scheffenegger int 145f7220c48SRichard Scheffenegger tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int iptos) 146f7220c48SRichard Scheffenegger { 147f7220c48SRichard Scheffenegger int delta_ace = 0; 148f7220c48SRichard Scheffenegger 149f7220c48SRichard Scheffenegger if (tp->t_flags2 & TF2_ECN_PERMIT) { 150f7220c48SRichard Scheffenegger switch (iptos & IPTOS_ECN_MASK) { 151f7220c48SRichard Scheffenegger case IPTOS_ECN_CE: 1521790549dSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_ce); 153f7220c48SRichard Scheffenegger break; 154f7220c48SRichard Scheffenegger case IPTOS_ECN_ECT0: 1551790549dSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_ect0); 156f7220c48SRichard Scheffenegger break; 157f7220c48SRichard Scheffenegger case IPTOS_ECN_ECT1: 1581790549dSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_ect1); 159f7220c48SRichard Scheffenegger break; 160f7220c48SRichard Scheffenegger } 161f7220c48SRichard Scheffenegger 162f7220c48SRichard Scheffenegger /* RFC3168 ECN handling */ 163f7220c48SRichard Scheffenegger if (thflags & TH_ECE) 164f7220c48SRichard Scheffenegger delta_ace = 1; 165f7220c48SRichard Scheffenegger if (thflags & TH_CWR) { 166f7220c48SRichard Scheffenegger tp->t_flags2 &= ~TF2_ECN_SND_ECE; 167f7220c48SRichard Scheffenegger tp->t_flags |= TF_ACKNOW; 168f7220c48SRichard Scheffenegger } 169f7220c48SRichard Scheffenegger if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 170f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_SND_ECE; 171f7220c48SRichard Scheffenegger 172f7220c48SRichard Scheffenegger /* Process a packet differently from RFC3168. */ 173f7220c48SRichard Scheffenegger cc_ecnpkt_handler_flags(tp, thflags, iptos); 174f7220c48SRichard Scheffenegger } 175f7220c48SRichard Scheffenegger 176f7220c48SRichard Scheffenegger return delta_ace; 177f7220c48SRichard Scheffenegger } 178f7220c48SRichard Scheffenegger 179f7220c48SRichard Scheffenegger /* 180f7220c48SRichard Scheffenegger * Send ECN setup <SYN> packet header flags 181f7220c48SRichard Scheffenegger */ 182f7220c48SRichard Scheffenegger uint16_t 183f7220c48SRichard Scheffenegger tcp_ecn_output_syn_sent(struct tcpcb *tp) 184f7220c48SRichard Scheffenegger { 185f7220c48SRichard Scheffenegger uint16_t thflags = 0; 186f7220c48SRichard Scheffenegger 187f7220c48SRichard Scheffenegger if (V_tcp_do_ecn == 1) { 188f7220c48SRichard Scheffenegger /* Send a RFC3168 ECN setup <SYN> packet */ 189f7220c48SRichard Scheffenegger if (tp->t_rxtshift >= 1) { 190f7220c48SRichard Scheffenegger if (tp->t_rxtshift <= V_tcp_ecn_maxretries) 191f7220c48SRichard Scheffenegger thflags = TH_ECE|TH_CWR; 192f7220c48SRichard Scheffenegger } else 193f7220c48SRichard Scheffenegger thflags = TH_ECE|TH_CWR; 194f7220c48SRichard Scheffenegger } 195f7220c48SRichard Scheffenegger 196f7220c48SRichard Scheffenegger return thflags; 197f7220c48SRichard Scheffenegger } 198f7220c48SRichard Scheffenegger 199f7220c48SRichard Scheffenegger /* 200f7220c48SRichard Scheffenegger * output processing of ECN feature 201f7220c48SRichard Scheffenegger * returning IP ECN header codepoint 202f7220c48SRichard Scheffenegger */ 203f7220c48SRichard Scheffenegger int 204*2ff07d92SRichard Scheffenegger tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len, bool rxmit) 205f7220c48SRichard Scheffenegger { 206f7220c48SRichard Scheffenegger int ipecn = IPTOS_ECN_NOTECT; 207f7220c48SRichard Scheffenegger bool newdata; 208f7220c48SRichard Scheffenegger 209f7220c48SRichard Scheffenegger /* 210f7220c48SRichard Scheffenegger * If the peer has ECN, mark data packets with 211f7220c48SRichard Scheffenegger * ECN capable transmission (ECT). 212f7220c48SRichard Scheffenegger * Ignore pure control packets, retransmissions 213f7220c48SRichard Scheffenegger * and window probes. 214f7220c48SRichard Scheffenegger */ 215f7220c48SRichard Scheffenegger newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && 216*2ff07d92SRichard Scheffenegger !rxmit && 217f7220c48SRichard Scheffenegger !((tp->t_flags & TF_FORCEDATA) && len == 1)); 218f7220c48SRichard Scheffenegger if (newdata) { 219f7220c48SRichard Scheffenegger ipecn = IPTOS_ECN_ECT0; 2201790549dSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_ect0); 221f7220c48SRichard Scheffenegger } 222f7220c48SRichard Scheffenegger /* 223f7220c48SRichard Scheffenegger * Reply with proper ECN notifications. 224f7220c48SRichard Scheffenegger */ 225f7220c48SRichard Scheffenegger if (newdata && 226f7220c48SRichard Scheffenegger (tp->t_flags2 & TF2_ECN_SND_CWR)) { 227f7220c48SRichard Scheffenegger *thflags |= TH_CWR; 228f7220c48SRichard Scheffenegger tp->t_flags2 &= ~TF2_ECN_SND_CWR; 229f7220c48SRichard Scheffenegger } 230f7220c48SRichard Scheffenegger if (tp->t_flags2 & TF2_ECN_SND_ECE) 231f7220c48SRichard Scheffenegger *thflags |= TH_ECE; 232f7220c48SRichard Scheffenegger 233f7220c48SRichard Scheffenegger return ipecn; 234f7220c48SRichard Scheffenegger } 235f7220c48SRichard Scheffenegger 236f7220c48SRichard Scheffenegger /* 237f7220c48SRichard Scheffenegger * Set up the ECN related tcpcb fields from 238f7220c48SRichard Scheffenegger * a syncache entry 239f7220c48SRichard Scheffenegger */ 240f7220c48SRichard Scheffenegger void 241f7220c48SRichard Scheffenegger tcp_ecn_syncache_socket(struct tcpcb *tp, struct syncache *sc) 242f7220c48SRichard Scheffenegger { 2433f169c54SRichard Scheffenegger if (sc->sc_flags & SCF_ECN_MASK) { 2443f169c54SRichard Scheffenegger switch (sc->sc_flags & SCF_ECN_MASK) { 245f7220c48SRichard Scheffenegger case SCF_ECN: 246f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 247f7220c48SRichard Scheffenegger break; 248f7220c48SRichard Scheffenegger /* undefined SCF codepoint */ 249f7220c48SRichard Scheffenegger default: 250f7220c48SRichard Scheffenegger break; 251f7220c48SRichard Scheffenegger } 252f7220c48SRichard Scheffenegger } 253f7220c48SRichard Scheffenegger } 254f7220c48SRichard Scheffenegger 255f7220c48SRichard Scheffenegger /* 256f7220c48SRichard Scheffenegger * Process a <SYN> packets ECN information, and provide the 257f7220c48SRichard Scheffenegger * syncache with the relevant information. 258f7220c48SRichard Scheffenegger */ 259f7220c48SRichard Scheffenegger int 260f7220c48SRichard Scheffenegger tcp_ecn_syncache_add(uint16_t thflags, int iptos) 261f7220c48SRichard Scheffenegger { 262f7220c48SRichard Scheffenegger int scflags = 0; 263f7220c48SRichard Scheffenegger 264f7220c48SRichard Scheffenegger switch (thflags & (TH_CWR|TH_ECE)) { 265f7220c48SRichard Scheffenegger /* no ECN */ 266f7220c48SRichard Scheffenegger case (0|0): 267f7220c48SRichard Scheffenegger break; 268f7220c48SRichard Scheffenegger /* legacy ECN */ 269f7220c48SRichard Scheffenegger case (TH_CWR|TH_ECE): 270f7220c48SRichard Scheffenegger scflags = SCF_ECN; 271f7220c48SRichard Scheffenegger break; 272f7220c48SRichard Scheffenegger default: 273f7220c48SRichard Scheffenegger break; 274f7220c48SRichard Scheffenegger } 275f7220c48SRichard Scheffenegger return scflags; 276f7220c48SRichard Scheffenegger } 277f7220c48SRichard Scheffenegger 278f7220c48SRichard Scheffenegger /* 279f7220c48SRichard Scheffenegger * Set up the ECN information for the <SYN,ACK> from 280f7220c48SRichard Scheffenegger * syncache information. 281f7220c48SRichard Scheffenegger */ 282f7220c48SRichard Scheffenegger uint16_t 283f7220c48SRichard Scheffenegger tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc) 284f7220c48SRichard Scheffenegger { 285f7220c48SRichard Scheffenegger if ((thflags & TH_SYN) && 2863f169c54SRichard Scheffenegger (sc->sc_flags & SCF_ECN_MASK)) { 2873f169c54SRichard Scheffenegger switch (sc->sc_flags & SCF_ECN_MASK) { 288f7220c48SRichard Scheffenegger case SCF_ECN: 289f7220c48SRichard Scheffenegger thflags |= (0 | TH_ECE); 2901790549dSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 291f7220c48SRichard Scheffenegger break; 292f7220c48SRichard Scheffenegger /* undefined SCF codepoint */ 293f7220c48SRichard Scheffenegger default: 294f7220c48SRichard Scheffenegger break; 295f7220c48SRichard Scheffenegger } 296f7220c48SRichard Scheffenegger } 297f7220c48SRichard Scheffenegger return thflags; 298f7220c48SRichard Scheffenegger } 299