1f7220c48SRichard Scheffenegger /*- 2f7220c48SRichard Scheffenegger * SPDX-License-Identifier: BSD-3-Clause 3f7220c48SRichard Scheffenegger * 4f7220c48SRichard Scheffenegger * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 5f7220c48SRichard Scheffenegger * The Regents of the University of California. All rights reserved. 6f7220c48SRichard Scheffenegger * Copyright (c) 2007-2008,2010 7f7220c48SRichard Scheffenegger * Swinburne University of Technology, Melbourne, Australia. 8f7220c48SRichard Scheffenegger * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org> 9f7220c48SRichard Scheffenegger * Copyright (c) 2010 The FreeBSD Foundation 10f7220c48SRichard Scheffenegger * Copyright (c) 2010-2011 Juniper Networks, Inc. 11f7220c48SRichard Scheffenegger * Copyright (c) 2019 Richard Scheffenegger <srichard@netapp.com> 12f7220c48SRichard Scheffenegger * All rights reserved. 13f7220c48SRichard Scheffenegger * 14f7220c48SRichard Scheffenegger * Portions of this software were developed at the Centre for Advanced Internet 15f7220c48SRichard Scheffenegger * Architectures, Swinburne University of Technology, by Lawrence Stewart, 16f7220c48SRichard Scheffenegger * James Healy and David Hayes, made possible in part by a grant from the Cisco 17f7220c48SRichard Scheffenegger * University Research Program Fund at Community Foundation Silicon Valley. 18f7220c48SRichard Scheffenegger * 19f7220c48SRichard Scheffenegger * Portions of this software were developed at the Centre for Advanced 20f7220c48SRichard Scheffenegger * Internet Architectures, Swinburne University of Technology, Melbourne, 21f7220c48SRichard Scheffenegger * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 22f7220c48SRichard Scheffenegger * 23f7220c48SRichard Scheffenegger * Portions of this software were developed by Robert N. M. Watson under 24f7220c48SRichard Scheffenegger * contract to Juniper Networks, Inc. 25f7220c48SRichard Scheffenegger * 26f7220c48SRichard Scheffenegger * Redistribution and use in source and binary forms, with or without 27f7220c48SRichard Scheffenegger * modification, are permitted provided that the following conditions 28f7220c48SRichard Scheffenegger * are met: 29f7220c48SRichard Scheffenegger * 1. Redistributions of source code must retain the above copyright 30f7220c48SRichard Scheffenegger * notice, this list of conditions and the following disclaimer. 31f7220c48SRichard Scheffenegger * 2. Redistributions in binary form must reproduce the above copyright 32f7220c48SRichard Scheffenegger * notice, this list of conditions and the following disclaimer in the 33f7220c48SRichard Scheffenegger * documentation and/or other materials provided with the distribution. 34f7220c48SRichard Scheffenegger * 3. Neither the name of the University nor the names of its contributors 35f7220c48SRichard Scheffenegger * may be used to endorse or promote products derived from this software 36f7220c48SRichard Scheffenegger * without specific prior written permission. 37f7220c48SRichard Scheffenegger * 38f7220c48SRichard Scheffenegger * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 39f7220c48SRichard Scheffenegger * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 40f7220c48SRichard Scheffenegger * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 41f7220c48SRichard Scheffenegger * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 42f7220c48SRichard Scheffenegger * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 43f7220c48SRichard Scheffenegger * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 44f7220c48SRichard Scheffenegger * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 45f7220c48SRichard Scheffenegger * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 46f7220c48SRichard Scheffenegger * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 47f7220c48SRichard Scheffenegger * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 48f7220c48SRichard Scheffenegger * SUCH DAMAGE. 49f7220c48SRichard Scheffenegger * 50f7220c48SRichard Scheffenegger * @(#)tcp_ecn.c 8.12 (Berkeley) 5/24/95 51f7220c48SRichard Scheffenegger */ 52f7220c48SRichard Scheffenegger 53f7220c48SRichard Scheffenegger /* 54f7220c48SRichard Scheffenegger * Utility functions to deal with Explicit Congestion Notification in TCP 55f7220c48SRichard Scheffenegger * implementing the essential parts of the Accurate ECN extension 56f7220c48SRichard Scheffenegger * https://tools.ietf.org/html/draft-ietf-tcpm-accurate-ecn-09 57f7220c48SRichard Scheffenegger */ 58f7220c48SRichard Scheffenegger 59f7220c48SRichard Scheffenegger #include <sys/cdefs.h> 60f7220c48SRichard Scheffenegger __FBSDID("$FreeBSD$"); 61f7220c48SRichard Scheffenegger 62f7220c48SRichard Scheffenegger #include "opt_inet.h" 63f7220c48SRichard Scheffenegger #include "opt_inet6.h" 64f7220c48SRichard Scheffenegger 65f7220c48SRichard Scheffenegger #include <sys/param.h> 66f7220c48SRichard Scheffenegger #include <sys/systm.h> 67f7220c48SRichard Scheffenegger #include <sys/kernel.h> 68f7220c48SRichard Scheffenegger #include <sys/sysctl.h> 69f7220c48SRichard Scheffenegger #include <sys/malloc.h> 70f7220c48SRichard Scheffenegger #include <sys/mbuf.h> 71f7220c48SRichard Scheffenegger #include <sys/socket.h> 72f7220c48SRichard Scheffenegger #include <sys/socketvar.h> 73f7220c48SRichard Scheffenegger 74f7220c48SRichard Scheffenegger #include <machine/cpu.h> 75f7220c48SRichard Scheffenegger 76f7220c48SRichard Scheffenegger #include <vm/uma.h> 77f7220c48SRichard Scheffenegger 78f7220c48SRichard Scheffenegger #include <net/if.h> 79f7220c48SRichard Scheffenegger #include <net/if_var.h> 80f7220c48SRichard Scheffenegger #include <net/route.h> 81f7220c48SRichard Scheffenegger #include <net/vnet.h> 82f7220c48SRichard Scheffenegger 83f7220c48SRichard Scheffenegger #include <netinet/in.h> 84f7220c48SRichard Scheffenegger #include <netinet/in_systm.h> 85f7220c48SRichard Scheffenegger #include <netinet/ip.h> 86f7220c48SRichard Scheffenegger #include <netinet/in_var.h> 87f7220c48SRichard Scheffenegger #include <netinet/in_pcb.h> 88f7220c48SRichard Scheffenegger #include <netinet/ip_var.h> 89f7220c48SRichard Scheffenegger #include <netinet/ip6.h> 90f7220c48SRichard Scheffenegger #include <netinet/icmp6.h> 91f7220c48SRichard Scheffenegger #include <netinet6/nd6.h> 92f7220c48SRichard Scheffenegger #include <netinet6/ip6_var.h> 93f7220c48SRichard Scheffenegger #include <netinet6/in6_pcb.h> 94f7220c48SRichard Scheffenegger #include <netinet/tcp.h> 95f7220c48SRichard Scheffenegger #include <netinet/tcp_fsm.h> 96f7220c48SRichard Scheffenegger #include <netinet/tcp_seq.h> 97f7220c48SRichard Scheffenegger #include <netinet/tcp_var.h> 98f7220c48SRichard Scheffenegger #include <netinet/tcp_syncache.h> 99f7220c48SRichard Scheffenegger #include <netinet/tcp_timer.h> 100f7220c48SRichard Scheffenegger #include <netinet/tcpip.h> 101f7220c48SRichard Scheffenegger #include <netinet/tcp_ecn.h> 102f7220c48SRichard Scheffenegger 103004bb636SRichard Scheffenegger static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn, 104004bb636SRichard Scheffenegger CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 105004bb636SRichard Scheffenegger "TCP ECN"); 106004bb636SRichard Scheffenegger 107004bb636SRichard Scheffenegger VNET_DEFINE(int, tcp_do_ecn) = 2; 108004bb636SRichard Scheffenegger SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, enable, 109004bb636SRichard Scheffenegger CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_ecn), 0, 110004bb636SRichard Scheffenegger "TCP ECN support"); 111004bb636SRichard Scheffenegger 112004bb636SRichard Scheffenegger VNET_DEFINE(int, tcp_ecn_maxretries) = 1; 113004bb636SRichard Scheffenegger SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, maxretries, 114004bb636SRichard Scheffenegger CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_ecn_maxretries), 0, 115004bb636SRichard Scheffenegger "Max retries before giving up on ECN"); 116f7220c48SRichard Scheffenegger 117f7220c48SRichard Scheffenegger /* 118f7220c48SRichard Scheffenegger * Process incoming SYN,ACK packet 119f7220c48SRichard Scheffenegger */ 120f7220c48SRichard Scheffenegger void 121f7220c48SRichard Scheffenegger tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos) 122f7220c48SRichard Scheffenegger { 123f7220c48SRichard Scheffenegger 1244012ef77SRichard Scheffenegger if (V_tcp_do_ecn == 0) 1254012ef77SRichard Scheffenegger return; 1264012ef77SRichard Scheffenegger if ((V_tcp_do_ecn == 1) || 1274012ef77SRichard Scheffenegger (V_tcp_do_ecn == 2)) { 1284012ef77SRichard Scheffenegger /* RFC3168 ECN handling */ 1294012ef77SRichard Scheffenegger if ((thflags & (TH_CWR | TH_ECE)) == (0 | TH_ECE)) { 130f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 13183c1ec92SRichard Scheffenegger tp->t_flags2 &= ~TF2_ACE_PERMIT; 1321790549dSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 133f7220c48SRichard Scheffenegger } 1344012ef77SRichard Scheffenegger } else 1354012ef77SRichard Scheffenegger /* decoding Accurate ECN according to table in section 3.1.1 */ 1364012ef77SRichard Scheffenegger if ((V_tcp_do_ecn == 3) || 1374012ef77SRichard Scheffenegger (V_tcp_do_ecn == 4)) { 1384012ef77SRichard Scheffenegger /* 1394012ef77SRichard Scheffenegger * on the SYN,ACK, process the AccECN 1404012ef77SRichard Scheffenegger * flags indicating the state the SYN 1414012ef77SRichard Scheffenegger * was delivered. 1424012ef77SRichard Scheffenegger * Reactions to Path ECN mangling can 1434012ef77SRichard Scheffenegger * come here. 1444012ef77SRichard Scheffenegger */ 1454012ef77SRichard Scheffenegger switch (thflags & (TH_AE | TH_CWR | TH_ECE)) { 1464012ef77SRichard Scheffenegger /* RFC3168 SYN */ 1474012ef77SRichard Scheffenegger case (0|0|TH_ECE): 1484012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 14983c1ec92SRichard Scheffenegger tp->t_flags2 &= ~TF2_ACE_PERMIT; 1504012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 1514012ef77SRichard Scheffenegger break; 1524012ef77SRichard Scheffenegger /* non-ECT SYN */ 1534012ef77SRichard Scheffenegger case (0|TH_CWR|0): 1544012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ACE_PERMIT; 15583c1ec92SRichard Scheffenegger tp->t_flags2 &= ~TF2_ECN_PERMIT; 1564012ef77SRichard Scheffenegger tp->t_scep = 5; 1574012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 1584012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ace_nect); 1594012ef77SRichard Scheffenegger break; 1604012ef77SRichard Scheffenegger /* ECT0 SYN */ 1614012ef77SRichard Scheffenegger case (TH_AE|0|0): 1624012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ACE_PERMIT; 16383c1ec92SRichard Scheffenegger tp->t_flags2 &= ~TF2_ECN_PERMIT; 1644012ef77SRichard Scheffenegger tp->t_scep = 5; 1654012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 1664012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ace_ect0); 1674012ef77SRichard Scheffenegger break; 1684012ef77SRichard Scheffenegger /* ECT1 SYN */ 1694012ef77SRichard Scheffenegger case (0|TH_CWR|TH_ECE): 1704012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ACE_PERMIT; 17183c1ec92SRichard Scheffenegger tp->t_flags2 &= ~TF2_ECN_PERMIT; 1724012ef77SRichard Scheffenegger tp->t_scep = 5; 1734012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 1744012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ace_ect1); 1754012ef77SRichard Scheffenegger break; 1764012ef77SRichard Scheffenegger /* CE SYN */ 1774012ef77SRichard Scheffenegger case (TH_AE|TH_CWR|0): 1784012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ACE_PERMIT; 17983c1ec92SRichard Scheffenegger tp->t_flags2 &= ~TF2_ECN_PERMIT; 1804012ef77SRichard Scheffenegger tp->t_scep = 6; 1814012ef77SRichard Scheffenegger /* 1824012ef77SRichard Scheffenegger * reduce the IW to 2 MSS (to 1834012ef77SRichard Scheffenegger * account for delayed acks) if 1844012ef77SRichard Scheffenegger * the SYN,ACK was CE marked 1854012ef77SRichard Scheffenegger */ 1864012ef77SRichard Scheffenegger tp->snd_cwnd = 2 * tcp_maxseg(tp); 1874012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 1884012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ace_nect); 1894012ef77SRichard Scheffenegger break; 1904012ef77SRichard Scheffenegger default: 19183c1ec92SRichard Scheffenegger tp->t_flags2 &= ~(TF2_ECN_PERMIT | TF2_ACE_PERMIT); 1924012ef77SRichard Scheffenegger break; 1934012ef77SRichard Scheffenegger } 1944012ef77SRichard Scheffenegger /* 1954012ef77SRichard Scheffenegger * Set the AccECN Codepoints on 1964012ef77SRichard Scheffenegger * the outgoing <ACK> to the ECN 1974012ef77SRichard Scheffenegger * state of the <SYN,ACK> 1984012ef77SRichard Scheffenegger * according to table 3 in the 1994012ef77SRichard Scheffenegger * AccECN draft 2004012ef77SRichard Scheffenegger */ 2014012ef77SRichard Scheffenegger switch (iptos & IPTOS_ECN_MASK) { 2024012ef77SRichard Scheffenegger case (IPTOS_ECN_NOTECT): 2034012ef77SRichard Scheffenegger tp->t_rcep = 0b010; 2044012ef77SRichard Scheffenegger break; 2054012ef77SRichard Scheffenegger case (IPTOS_ECN_ECT0): 2064012ef77SRichard Scheffenegger tp->t_rcep = 0b100; 2074012ef77SRichard Scheffenegger break; 2084012ef77SRichard Scheffenegger case (IPTOS_ECN_ECT1): 2094012ef77SRichard Scheffenegger tp->t_rcep = 0b011; 2104012ef77SRichard Scheffenegger break; 2114012ef77SRichard Scheffenegger case (IPTOS_ECN_CE): 2124012ef77SRichard Scheffenegger tp->t_rcep = 0b110; 2134012ef77SRichard Scheffenegger break; 2144012ef77SRichard Scheffenegger } 2154012ef77SRichard Scheffenegger } 216f7220c48SRichard Scheffenegger } 217f7220c48SRichard Scheffenegger 218f7220c48SRichard Scheffenegger /* 219f7220c48SRichard Scheffenegger * Handle parallel SYN for ECN 220f7220c48SRichard Scheffenegger */ 221f7220c48SRichard Scheffenegger void 222f7220c48SRichard Scheffenegger tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos) 223f7220c48SRichard Scheffenegger { 224f7220c48SRichard Scheffenegger if (thflags & TH_ACK) 225f7220c48SRichard Scheffenegger return; 226f7220c48SRichard Scheffenegger if (V_tcp_do_ecn == 0) 227f7220c48SRichard Scheffenegger return; 2284012ef77SRichard Scheffenegger if ((V_tcp_do_ecn == 1) || 2294012ef77SRichard Scheffenegger (V_tcp_do_ecn == 2)) { 230f7220c48SRichard Scheffenegger /* RFC3168 ECN handling */ 231f7220c48SRichard Scheffenegger if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) { 232f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 23383c1ec92SRichard Scheffenegger tp->t_flags2 &= ~TF2_ACE_PERMIT; 234f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_SND_ECE; 2351790549dSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 236f7220c48SRichard Scheffenegger } 2374012ef77SRichard Scheffenegger } else 2384012ef77SRichard Scheffenegger if ((V_tcp_do_ecn == 3) || 2394012ef77SRichard Scheffenegger (V_tcp_do_ecn == 4)) { 2404012ef77SRichard Scheffenegger /* AccECN handling */ 2414012ef77SRichard Scheffenegger switch (thflags & (TH_AE | TH_CWR | TH_ECE)) { 2424012ef77SRichard Scheffenegger default: 2434012ef77SRichard Scheffenegger case (0|0|0): 24483c1ec92SRichard Scheffenegger tp->t_flags2 &= ~(TF2_ECN_PERMIT | TF2_ACE_PERMIT); 2454012ef77SRichard Scheffenegger break; 2464012ef77SRichard Scheffenegger case (0|TH_CWR|TH_ECE): 2474012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 24883c1ec92SRichard Scheffenegger tp->t_flags2 &= ~TF2_ACE_PERMIT; 2494012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_SND_ECE; 2504012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 2514012ef77SRichard Scheffenegger break; 2524012ef77SRichard Scheffenegger case (TH_AE|TH_CWR|TH_ECE): 2534012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ACE_PERMIT; 25483c1ec92SRichard Scheffenegger tp->t_flags2 &= ~TF2_ECN_PERMIT; 2554012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 2564012ef77SRichard Scheffenegger /* 2574012ef77SRichard Scheffenegger * Set the AccECN Codepoints on 2584012ef77SRichard Scheffenegger * the outgoing <ACK> to the ECN 2594012ef77SRichard Scheffenegger * state of the <SYN,ACK> 2604012ef77SRichard Scheffenegger * according to table 3 in the 2614012ef77SRichard Scheffenegger * AccECN draft 2624012ef77SRichard Scheffenegger */ 2634012ef77SRichard Scheffenegger switch (iptos & IPTOS_ECN_MASK) { 2644012ef77SRichard Scheffenegger case (IPTOS_ECN_NOTECT): 2654012ef77SRichard Scheffenegger tp->t_rcep = 0b010; 2664012ef77SRichard Scheffenegger break; 2674012ef77SRichard Scheffenegger case (IPTOS_ECN_ECT0): 2684012ef77SRichard Scheffenegger tp->t_rcep = 0b100; 2694012ef77SRichard Scheffenegger break; 2704012ef77SRichard Scheffenegger case (IPTOS_ECN_ECT1): 2714012ef77SRichard Scheffenegger tp->t_rcep = 0b011; 2724012ef77SRichard Scheffenegger break; 2734012ef77SRichard Scheffenegger case (IPTOS_ECN_CE): 2744012ef77SRichard Scheffenegger tp->t_rcep = 0b110; 2754012ef77SRichard Scheffenegger break; 2764012ef77SRichard Scheffenegger } 2774012ef77SRichard Scheffenegger break; 2784012ef77SRichard Scheffenegger } 279f7220c48SRichard Scheffenegger } 280f7220c48SRichard Scheffenegger } 281f7220c48SRichard Scheffenegger 282f7220c48SRichard Scheffenegger /* 283f7220c48SRichard Scheffenegger * TCP ECN processing. 284f7220c48SRichard Scheffenegger */ 285f7220c48SRichard Scheffenegger int 286b1258b76SRichard Scheffenegger tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int tlen, int pkts, int iptos) 287f7220c48SRichard Scheffenegger { 288b1258b76SRichard Scheffenegger int delta_cep = 0; 289f7220c48SRichard Scheffenegger 290f7220c48SRichard Scheffenegger switch (iptos & IPTOS_ECN_MASK) { 291f7220c48SRichard Scheffenegger case IPTOS_ECN_CE: 2921a70101aSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_rcvce); 293f7220c48SRichard Scheffenegger break; 294f7220c48SRichard Scheffenegger case IPTOS_ECN_ECT0: 2951a70101aSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_rcvect0); 296f7220c48SRichard Scheffenegger break; 297f7220c48SRichard Scheffenegger case IPTOS_ECN_ECT1: 2981a70101aSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_rcvect1); 299f7220c48SRichard Scheffenegger break; 300f7220c48SRichard Scheffenegger } 301f7220c48SRichard Scheffenegger 3021a70101aSRichard Scheffenegger if (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) { 3034012ef77SRichard Scheffenegger if (tp->t_flags2 & TF2_ACE_PERMIT) { 3044012ef77SRichard Scheffenegger if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 3054012ef77SRichard Scheffenegger tp->t_rcep += 1; 3064012ef77SRichard Scheffenegger if (tp->t_flags2 & TF2_ECN_PERMIT) { 307b1258b76SRichard Scheffenegger delta_cep = (tcp_ecn_get_ace(thflags) + 8 - 308b1258b76SRichard Scheffenegger (tp->t_scep & 7)) & 7; 309b1258b76SRichard Scheffenegger if (delta_cep < pkts) 310b1258b76SRichard Scheffenegger delta_cep = pkts - 311b1258b76SRichard Scheffenegger ((pkts - delta_cep) & 7); 312b1258b76SRichard Scheffenegger tp->t_scep += delta_cep; 3134012ef77SRichard Scheffenegger } else { 3144012ef77SRichard Scheffenegger /* 3154012ef77SRichard Scheffenegger * process the final ACK of the 3WHS 3164012ef77SRichard Scheffenegger * see table 3 in draft-ietf-tcpm-accurate-ecn 3174012ef77SRichard Scheffenegger */ 3184012ef77SRichard Scheffenegger switch (tcp_ecn_get_ace(thflags)) { 3194012ef77SRichard Scheffenegger case 0b010: 3204012ef77SRichard Scheffenegger /* nonECT SYN or SYN,ACK */ 321*7ea8d027SRichard Scheffenegger /* FALLTHROUGH */ 3224012ef77SRichard Scheffenegger case 0b011: 3234012ef77SRichard Scheffenegger /* ECT1 SYN or SYN,ACK */ 324*7ea8d027SRichard Scheffenegger /* FALLTHROUGH */ 3254012ef77SRichard Scheffenegger case 0b100: 3264012ef77SRichard Scheffenegger /* ECT0 SYN or SYN,ACK */ 3274012ef77SRichard Scheffenegger tp->t_scep = 5; 3284012ef77SRichard Scheffenegger break; 3294012ef77SRichard Scheffenegger case 0b110: 3304012ef77SRichard Scheffenegger /* CE SYN or SYN,ACK */ 3314012ef77SRichard Scheffenegger tp->t_scep = 6; 3324012ef77SRichard Scheffenegger tp->snd_cwnd = 2 * tcp_maxseg(tp); 3334012ef77SRichard Scheffenegger break; 3344012ef77SRichard Scheffenegger default: 3354012ef77SRichard Scheffenegger /* mangled AccECN handshake */ 3364012ef77SRichard Scheffenegger tp->t_scep = 5; 3374012ef77SRichard Scheffenegger break; 3384012ef77SRichard Scheffenegger } 3394012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 3404012ef77SRichard Scheffenegger } 3414012ef77SRichard Scheffenegger } else { 342f7220c48SRichard Scheffenegger /* RFC3168 ECN handling */ 34322c81cc5SRichard Scheffenegger if ((thflags & (TH_SYN | TH_ECE)) == TH_ECE) { 344b1258b76SRichard Scheffenegger delta_cep = 1; 34522c81cc5SRichard Scheffenegger tp->t_scep++; 34622c81cc5SRichard Scheffenegger } 347f7220c48SRichard Scheffenegger if (thflags & TH_CWR) { 348f7220c48SRichard Scheffenegger tp->t_flags2 &= ~TF2_ECN_SND_ECE; 349f7220c48SRichard Scheffenegger tp->t_flags |= TF_ACKNOW; 350f7220c48SRichard Scheffenegger } 351f7220c48SRichard Scheffenegger if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 352f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_SND_ECE; 3534012ef77SRichard Scheffenegger } 354f7220c48SRichard Scheffenegger 355f7220c48SRichard Scheffenegger /* Process a packet differently from RFC3168. */ 356f7220c48SRichard Scheffenegger cc_ecnpkt_handler_flags(tp, thflags, iptos); 357f7220c48SRichard Scheffenegger } 358f7220c48SRichard Scheffenegger 359b1258b76SRichard Scheffenegger return delta_cep; 360f7220c48SRichard Scheffenegger } 361f7220c48SRichard Scheffenegger 362f7220c48SRichard Scheffenegger /* 363f7220c48SRichard Scheffenegger * Send ECN setup <SYN> packet header flags 364f7220c48SRichard Scheffenegger */ 365f7220c48SRichard Scheffenegger uint16_t 366f7220c48SRichard Scheffenegger tcp_ecn_output_syn_sent(struct tcpcb *tp) 367f7220c48SRichard Scheffenegger { 368f7220c48SRichard Scheffenegger uint16_t thflags = 0; 369f7220c48SRichard Scheffenegger 3704012ef77SRichard Scheffenegger if (V_tcp_do_ecn == 0) 3714012ef77SRichard Scheffenegger return thflags; 372f7220c48SRichard Scheffenegger if (V_tcp_do_ecn == 1) { 373f7220c48SRichard Scheffenegger /* Send a RFC3168 ECN setup <SYN> packet */ 374f7220c48SRichard Scheffenegger if (tp->t_rxtshift >= 1) { 375f7220c48SRichard Scheffenegger if (tp->t_rxtshift <= V_tcp_ecn_maxretries) 376f7220c48SRichard Scheffenegger thflags = TH_ECE|TH_CWR; 377f7220c48SRichard Scheffenegger } else 378f7220c48SRichard Scheffenegger thflags = TH_ECE|TH_CWR; 3794012ef77SRichard Scheffenegger } else 3804012ef77SRichard Scheffenegger if (V_tcp_do_ecn == 3) { 3814012ef77SRichard Scheffenegger /* Send an Accurate ECN setup <SYN> packet */ 3824012ef77SRichard Scheffenegger if (tp->t_rxtshift >= 1) { 3834012ef77SRichard Scheffenegger if (tp->t_rxtshift <= V_tcp_ecn_maxretries) 3844012ef77SRichard Scheffenegger thflags = TH_ECE|TH_CWR|TH_AE; 3854012ef77SRichard Scheffenegger } else 3864012ef77SRichard Scheffenegger thflags = TH_ECE|TH_CWR|TH_AE; 387f7220c48SRichard Scheffenegger } 388f7220c48SRichard Scheffenegger 389f7220c48SRichard Scheffenegger return thflags; 390f7220c48SRichard Scheffenegger } 391f7220c48SRichard Scheffenegger 392f7220c48SRichard Scheffenegger /* 393f7220c48SRichard Scheffenegger * output processing of ECN feature 394f7220c48SRichard Scheffenegger * returning IP ECN header codepoint 395f7220c48SRichard Scheffenegger */ 396f7220c48SRichard Scheffenegger int 3972ff07d92SRichard Scheffenegger tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len, bool rxmit) 398f7220c48SRichard Scheffenegger { 399f7220c48SRichard Scheffenegger int ipecn = IPTOS_ECN_NOTECT; 400f7220c48SRichard Scheffenegger bool newdata; 401f7220c48SRichard Scheffenegger 402f7220c48SRichard Scheffenegger /* 403f7220c48SRichard Scheffenegger * If the peer has ECN, mark data packets with 404f7220c48SRichard Scheffenegger * ECN capable transmission (ECT). 405f7220c48SRichard Scheffenegger * Ignore pure control packets, retransmissions 406f7220c48SRichard Scheffenegger * and window probes. 407f7220c48SRichard Scheffenegger */ 408f7220c48SRichard Scheffenegger newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && 4092ff07d92SRichard Scheffenegger !rxmit && 410f7220c48SRichard Scheffenegger !((tp->t_flags & TF_FORCEDATA) && len == 1)); 4114012ef77SRichard Scheffenegger /* RFC3168 ECN marking, only new data segments */ 412f7220c48SRichard Scheffenegger if (newdata) { 413dc9daa04SRichard Scheffenegger if (tp->t_flags2 & TF2_ECN_USE_ECT1) { 414dc9daa04SRichard Scheffenegger ipecn = IPTOS_ECN_ECT1; 4151a70101aSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_sndect1); 416dc9daa04SRichard Scheffenegger } else { 417f7220c48SRichard Scheffenegger ipecn = IPTOS_ECN_ECT0; 4181a70101aSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_sndect0); 419f7220c48SRichard Scheffenegger } 420dc9daa04SRichard Scheffenegger } 421f7220c48SRichard Scheffenegger /* 422f7220c48SRichard Scheffenegger * Reply with proper ECN notifications. 423f7220c48SRichard Scheffenegger */ 4244012ef77SRichard Scheffenegger if (tp->t_flags2 & TF2_ACE_PERMIT) { 4254012ef77SRichard Scheffenegger *thflags &= ~(TH_AE|TH_CWR|TH_ECE); 4264012ef77SRichard Scheffenegger if (tp->t_rcep & 0x01) 4274012ef77SRichard Scheffenegger *thflags |= TH_ECE; 4284012ef77SRichard Scheffenegger if (tp->t_rcep & 0x02) 4294012ef77SRichard Scheffenegger *thflags |= TH_CWR; 4304012ef77SRichard Scheffenegger if (tp->t_rcep & 0x04) 4314012ef77SRichard Scheffenegger *thflags |= TH_AE; 4324012ef77SRichard Scheffenegger if (!(tp->t_flags2 & TF2_ECN_PERMIT)) { 4334012ef77SRichard Scheffenegger /* 4344012ef77SRichard Scheffenegger * here we process the final 4354012ef77SRichard Scheffenegger * ACK of the 3WHS 4364012ef77SRichard Scheffenegger */ 4374012ef77SRichard Scheffenegger if (tp->t_rcep == 0b110) { 4384012ef77SRichard Scheffenegger tp->t_rcep = 6; 4394012ef77SRichard Scheffenegger } else { 4404012ef77SRichard Scheffenegger tp->t_rcep = 5; 4414012ef77SRichard Scheffenegger } 4424012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 4434012ef77SRichard Scheffenegger } 4444012ef77SRichard Scheffenegger } else { 445f7220c48SRichard Scheffenegger if (newdata && 446f7220c48SRichard Scheffenegger (tp->t_flags2 & TF2_ECN_SND_CWR)) { 447f7220c48SRichard Scheffenegger *thflags |= TH_CWR; 448f7220c48SRichard Scheffenegger tp->t_flags2 &= ~TF2_ECN_SND_CWR; 449f7220c48SRichard Scheffenegger } 450f7220c48SRichard Scheffenegger if (tp->t_flags2 & TF2_ECN_SND_ECE) 451f7220c48SRichard Scheffenegger *thflags |= TH_ECE; 4524012ef77SRichard Scheffenegger } 453f7220c48SRichard Scheffenegger 454f7220c48SRichard Scheffenegger return ipecn; 455f7220c48SRichard Scheffenegger } 456f7220c48SRichard Scheffenegger 457f7220c48SRichard Scheffenegger /* 458f7220c48SRichard Scheffenegger * Set up the ECN related tcpcb fields from 459f7220c48SRichard Scheffenegger * a syncache entry 460f7220c48SRichard Scheffenegger */ 461f7220c48SRichard Scheffenegger void 462f7220c48SRichard Scheffenegger tcp_ecn_syncache_socket(struct tcpcb *tp, struct syncache *sc) 463f7220c48SRichard Scheffenegger { 4643f169c54SRichard Scheffenegger if (sc->sc_flags & SCF_ECN_MASK) { 4653f169c54SRichard Scheffenegger switch (sc->sc_flags & SCF_ECN_MASK) { 466f7220c48SRichard Scheffenegger case SCF_ECN: 467f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 468f7220c48SRichard Scheffenegger break; 4694012ef77SRichard Scheffenegger case SCF_ACE_N: 470*7ea8d027SRichard Scheffenegger /* FALLTHROUGH */ 4714012ef77SRichard Scheffenegger case SCF_ACE_0: 472*7ea8d027SRichard Scheffenegger /* FALLTHROUGH */ 4734012ef77SRichard Scheffenegger case SCF_ACE_1: 4744012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ACE_PERMIT; 4754012ef77SRichard Scheffenegger tp->t_scep = 5; 4764012ef77SRichard Scheffenegger tp->t_rcep = 5; 4774012ef77SRichard Scheffenegger break; 4784012ef77SRichard Scheffenegger case SCF_ACE_CE: 4794012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ACE_PERMIT; 4804012ef77SRichard Scheffenegger tp->t_scep = 6; 4814012ef77SRichard Scheffenegger tp->t_rcep = 6; 4824012ef77SRichard Scheffenegger break; 483f7220c48SRichard Scheffenegger /* undefined SCF codepoint */ 484f7220c48SRichard Scheffenegger default: 485f7220c48SRichard Scheffenegger break; 486f7220c48SRichard Scheffenegger } 487f7220c48SRichard Scheffenegger } 488f7220c48SRichard Scheffenegger } 489f7220c48SRichard Scheffenegger 490f7220c48SRichard Scheffenegger /* 491f7220c48SRichard Scheffenegger * Process a <SYN> packets ECN information, and provide the 492f7220c48SRichard Scheffenegger * syncache with the relevant information. 493f7220c48SRichard Scheffenegger */ 494f7220c48SRichard Scheffenegger int 495f7220c48SRichard Scheffenegger tcp_ecn_syncache_add(uint16_t thflags, int iptos) 496f7220c48SRichard Scheffenegger { 497f7220c48SRichard Scheffenegger int scflags = 0; 498f7220c48SRichard Scheffenegger 4991a70101aSRichard Scheffenegger switch (iptos & IPTOS_ECN_MASK) { 5001a70101aSRichard Scheffenegger case IPTOS_ECN_CE: 5011a70101aSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_rcvce); 5021a70101aSRichard Scheffenegger break; 5031a70101aSRichard Scheffenegger case IPTOS_ECN_ECT0: 5041a70101aSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_rcvect0); 5051a70101aSRichard Scheffenegger break; 5061a70101aSRichard Scheffenegger case IPTOS_ECN_ECT1: 5071a70101aSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_rcvect1); 5081a70101aSRichard Scheffenegger break; 5091a70101aSRichard Scheffenegger } 5101a70101aSRichard Scheffenegger 5114012ef77SRichard Scheffenegger switch (thflags & (TH_AE|TH_CWR|TH_ECE)) { 512f7220c48SRichard Scheffenegger /* no ECN */ 5134012ef77SRichard Scheffenegger case (0|0|0): 514f7220c48SRichard Scheffenegger break; 515f7220c48SRichard Scheffenegger /* legacy ECN */ 5164012ef77SRichard Scheffenegger case (0|TH_CWR|TH_ECE): 517f7220c48SRichard Scheffenegger scflags = SCF_ECN; 518f7220c48SRichard Scheffenegger break; 5194012ef77SRichard Scheffenegger /* Accurate ECN */ 5204012ef77SRichard Scheffenegger case (TH_AE|TH_CWR|TH_ECE): 5214012ef77SRichard Scheffenegger if ((V_tcp_do_ecn == 3) || 5224012ef77SRichard Scheffenegger (V_tcp_do_ecn == 4)) { 5234012ef77SRichard Scheffenegger switch (iptos & IPTOS_ECN_MASK) { 5244012ef77SRichard Scheffenegger case IPTOS_ECN_CE: 5254012ef77SRichard Scheffenegger scflags = SCF_ACE_CE; 5264012ef77SRichard Scheffenegger break; 5274012ef77SRichard Scheffenegger case IPTOS_ECN_ECT0: 5284012ef77SRichard Scheffenegger scflags = SCF_ACE_0; 5294012ef77SRichard Scheffenegger break; 5304012ef77SRichard Scheffenegger case IPTOS_ECN_ECT1: 5314012ef77SRichard Scheffenegger scflags = SCF_ACE_1; 5324012ef77SRichard Scheffenegger break; 5334012ef77SRichard Scheffenegger case IPTOS_ECN_NOTECT: 5344012ef77SRichard Scheffenegger scflags = SCF_ACE_N; 5354012ef77SRichard Scheffenegger break; 5364012ef77SRichard Scheffenegger } 5374012ef77SRichard Scheffenegger } else 5384012ef77SRichard Scheffenegger scflags = SCF_ECN; 5394012ef77SRichard Scheffenegger break; 5404012ef77SRichard Scheffenegger /* Default Case (section 3.1.2) */ 541f7220c48SRichard Scheffenegger default: 5424012ef77SRichard Scheffenegger if ((V_tcp_do_ecn == 3) || 5434012ef77SRichard Scheffenegger (V_tcp_do_ecn == 4)) { 5444012ef77SRichard Scheffenegger switch (iptos & IPTOS_ECN_MASK) { 5454012ef77SRichard Scheffenegger case IPTOS_ECN_CE: 5464012ef77SRichard Scheffenegger scflags = SCF_ACE_CE; 5474012ef77SRichard Scheffenegger break; 5484012ef77SRichard Scheffenegger case IPTOS_ECN_ECT0: 5494012ef77SRichard Scheffenegger scflags = SCF_ACE_0; 5504012ef77SRichard Scheffenegger break; 5514012ef77SRichard Scheffenegger case IPTOS_ECN_ECT1: 5524012ef77SRichard Scheffenegger scflags = SCF_ACE_1; 5534012ef77SRichard Scheffenegger break; 5544012ef77SRichard Scheffenegger case IPTOS_ECN_NOTECT: 5554012ef77SRichard Scheffenegger scflags = SCF_ACE_N; 5564012ef77SRichard Scheffenegger break; 5574012ef77SRichard Scheffenegger } 5584012ef77SRichard Scheffenegger } 559f7220c48SRichard Scheffenegger break; 560f7220c48SRichard Scheffenegger } 561f7220c48SRichard Scheffenegger return scflags; 562f7220c48SRichard Scheffenegger } 563f7220c48SRichard Scheffenegger 564f7220c48SRichard Scheffenegger /* 565f7220c48SRichard Scheffenegger * Set up the ECN information for the <SYN,ACK> from 566f7220c48SRichard Scheffenegger * syncache information. 567f7220c48SRichard Scheffenegger */ 568f7220c48SRichard Scheffenegger uint16_t 569f7220c48SRichard Scheffenegger tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc) 570f7220c48SRichard Scheffenegger { 571f7220c48SRichard Scheffenegger if ((thflags & TH_SYN) && 5723f169c54SRichard Scheffenegger (sc->sc_flags & SCF_ECN_MASK)) { 5733f169c54SRichard Scheffenegger switch (sc->sc_flags & SCF_ECN_MASK) { 574f7220c48SRichard Scheffenegger case SCF_ECN: 5754012ef77SRichard Scheffenegger thflags |= (0 | 0 | TH_ECE); 5761790549dSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 577f7220c48SRichard Scheffenegger break; 5784012ef77SRichard Scheffenegger case SCF_ACE_N: 5794012ef77SRichard Scheffenegger thflags |= (0 | TH_CWR | 0); 5804012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 5814012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ace_nect); 5824012ef77SRichard Scheffenegger break; 5834012ef77SRichard Scheffenegger case SCF_ACE_0: 5844012ef77SRichard Scheffenegger thflags |= (TH_AE | 0 | 0); 5854012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 5864012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ace_ect0); 5874012ef77SRichard Scheffenegger break; 5884012ef77SRichard Scheffenegger case SCF_ACE_1: 5894012ef77SRichard Scheffenegger thflags |= (0 | TH_ECE | TH_CWR); 5904012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 5914012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ace_ect1); 5924012ef77SRichard Scheffenegger break; 5934012ef77SRichard Scheffenegger case SCF_ACE_CE: 5944012ef77SRichard Scheffenegger thflags |= (TH_AE | TH_CWR | 0); 5954012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 5964012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ace_ce); 5974012ef77SRichard Scheffenegger break; 598f7220c48SRichard Scheffenegger /* undefined SCF codepoint */ 599f7220c48SRichard Scheffenegger default: 600f7220c48SRichard Scheffenegger break; 601f7220c48SRichard Scheffenegger } 602f7220c48SRichard Scheffenegger } 603f7220c48SRichard Scheffenegger return thflags; 604f7220c48SRichard Scheffenegger } 6054012ef77SRichard Scheffenegger 6064012ef77SRichard Scheffenegger int 6074012ef77SRichard Scheffenegger tcp_ecn_get_ace(uint16_t thflags) 6084012ef77SRichard Scheffenegger { 6094012ef77SRichard Scheffenegger int ace = 0; 6104012ef77SRichard Scheffenegger 6114012ef77SRichard Scheffenegger if (thflags & TH_ECE) 6124012ef77SRichard Scheffenegger ace += 1; 6134012ef77SRichard Scheffenegger if (thflags & TH_CWR) 6144012ef77SRichard Scheffenegger ace += 2; 6154012ef77SRichard Scheffenegger if (thflags & TH_AE) 6164012ef77SRichard Scheffenegger ace += 4; 6174012ef77SRichard Scheffenegger return ace; 6184012ef77SRichard Scheffenegger } 619