1f7220c48SRichard Scheffenegger /*- 2f7220c48SRichard Scheffenegger * SPDX-License-Identifier: BSD-3-Clause 3f7220c48SRichard Scheffenegger * 4f7220c48SRichard Scheffenegger * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 5f7220c48SRichard Scheffenegger * The Regents of the University of California. All rights reserved. 6f7220c48SRichard Scheffenegger * Copyright (c) 2007-2008,2010 7f7220c48SRichard Scheffenegger * Swinburne University of Technology, Melbourne, Australia. 8f7220c48SRichard Scheffenegger * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org> 9f7220c48SRichard Scheffenegger * Copyright (c) 2010 The FreeBSD Foundation 10f7220c48SRichard Scheffenegger * Copyright (c) 2010-2011 Juniper Networks, Inc. 11f7220c48SRichard Scheffenegger * Copyright (c) 2019 Richard Scheffenegger <srichard@netapp.com> 12f7220c48SRichard Scheffenegger * All rights reserved. 13f7220c48SRichard Scheffenegger * 14f7220c48SRichard Scheffenegger * Portions of this software were developed at the Centre for Advanced Internet 15f7220c48SRichard Scheffenegger * Architectures, Swinburne University of Technology, by Lawrence Stewart, 16f7220c48SRichard Scheffenegger * James Healy and David Hayes, made possible in part by a grant from the Cisco 17f7220c48SRichard Scheffenegger * University Research Program Fund at Community Foundation Silicon Valley. 18f7220c48SRichard Scheffenegger * 19f7220c48SRichard Scheffenegger * Portions of this software were developed at the Centre for Advanced 20f7220c48SRichard Scheffenegger * Internet Architectures, Swinburne University of Technology, Melbourne, 21f7220c48SRichard Scheffenegger * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 22f7220c48SRichard Scheffenegger * 23f7220c48SRichard Scheffenegger * Portions of this software were developed by Robert N. M. Watson under 24f7220c48SRichard Scheffenegger * contract to Juniper Networks, Inc. 25f7220c48SRichard Scheffenegger * 26f7220c48SRichard Scheffenegger * Redistribution and use in source and binary forms, with or without 27f7220c48SRichard Scheffenegger * modification, are permitted provided that the following conditions 28f7220c48SRichard Scheffenegger * are met: 29f7220c48SRichard Scheffenegger * 1. Redistributions of source code must retain the above copyright 30f7220c48SRichard Scheffenegger * notice, this list of conditions and the following disclaimer. 31f7220c48SRichard Scheffenegger * 2. Redistributions in binary form must reproduce the above copyright 32f7220c48SRichard Scheffenegger * notice, this list of conditions and the following disclaimer in the 33f7220c48SRichard Scheffenegger * documentation and/or other materials provided with the distribution. 34f7220c48SRichard Scheffenegger * 3. Neither the name of the University nor the names of its contributors 35f7220c48SRichard Scheffenegger * may be used to endorse or promote products derived from this software 36f7220c48SRichard Scheffenegger * without specific prior written permission. 37f7220c48SRichard Scheffenegger * 38f7220c48SRichard Scheffenegger * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 39f7220c48SRichard Scheffenegger * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 40f7220c48SRichard Scheffenegger * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 41f7220c48SRichard Scheffenegger * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 42f7220c48SRichard Scheffenegger * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 43f7220c48SRichard Scheffenegger * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 44f7220c48SRichard Scheffenegger * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 45f7220c48SRichard Scheffenegger * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 46f7220c48SRichard Scheffenegger * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 47f7220c48SRichard Scheffenegger * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 48f7220c48SRichard Scheffenegger * SUCH DAMAGE. 49f7220c48SRichard Scheffenegger * 50f7220c48SRichard Scheffenegger * @(#)tcp_ecn.c 8.12 (Berkeley) 5/24/95 51f7220c48SRichard Scheffenegger */ 52f7220c48SRichard Scheffenegger 53f7220c48SRichard Scheffenegger /* 54f7220c48SRichard Scheffenegger * Utility functions to deal with Explicit Congestion Notification in TCP 55f7220c48SRichard Scheffenegger * implementing the essential parts of the Accurate ECN extension 56f7220c48SRichard Scheffenegger * https://tools.ietf.org/html/draft-ietf-tcpm-accurate-ecn-09 57f7220c48SRichard Scheffenegger */ 58f7220c48SRichard Scheffenegger 59f7220c48SRichard Scheffenegger #include <sys/cdefs.h> 60f7220c48SRichard Scheffenegger __FBSDID("$FreeBSD$"); 61f7220c48SRichard Scheffenegger 62f7220c48SRichard Scheffenegger #include "opt_inet.h" 63f7220c48SRichard Scheffenegger #include "opt_inet6.h" 64f7220c48SRichard Scheffenegger #include "opt_tcpdebug.h" 65f7220c48SRichard Scheffenegger 66f7220c48SRichard Scheffenegger #include <sys/param.h> 67f7220c48SRichard Scheffenegger #include <sys/systm.h> 68f7220c48SRichard Scheffenegger #include <sys/kernel.h> 69f7220c48SRichard Scheffenegger #include <sys/sysctl.h> 70f7220c48SRichard Scheffenegger #include <sys/malloc.h> 71f7220c48SRichard Scheffenegger #include <sys/mbuf.h> 72f7220c48SRichard Scheffenegger #include <sys/socket.h> 73f7220c48SRichard Scheffenegger #include <sys/socketvar.h> 74f7220c48SRichard Scheffenegger 75f7220c48SRichard Scheffenegger #include <machine/cpu.h> 76f7220c48SRichard Scheffenegger 77f7220c48SRichard Scheffenegger #include <vm/uma.h> 78f7220c48SRichard Scheffenegger 79f7220c48SRichard Scheffenegger #include <net/if.h> 80f7220c48SRichard Scheffenegger #include <net/if_var.h> 81f7220c48SRichard Scheffenegger #include <net/route.h> 82f7220c48SRichard Scheffenegger #include <net/vnet.h> 83f7220c48SRichard Scheffenegger 84f7220c48SRichard Scheffenegger #include <netinet/in.h> 85f7220c48SRichard Scheffenegger #include <netinet/in_systm.h> 86f7220c48SRichard Scheffenegger #include <netinet/ip.h> 87f7220c48SRichard Scheffenegger #include <netinet/in_var.h> 88f7220c48SRichard Scheffenegger #include <netinet/in_pcb.h> 89f7220c48SRichard Scheffenegger #include <netinet/ip_var.h> 90f7220c48SRichard Scheffenegger #include <netinet/ip6.h> 91f7220c48SRichard Scheffenegger #include <netinet/icmp6.h> 92f7220c48SRichard Scheffenegger #include <netinet6/nd6.h> 93f7220c48SRichard Scheffenegger #include <netinet6/ip6_var.h> 94f7220c48SRichard Scheffenegger #include <netinet6/in6_pcb.h> 95f7220c48SRichard Scheffenegger #include <netinet/tcp.h> 96f7220c48SRichard Scheffenegger #include <netinet/tcp_fsm.h> 97f7220c48SRichard Scheffenegger #include <netinet/tcp_seq.h> 98f7220c48SRichard Scheffenegger #include <netinet/tcp_var.h> 99f7220c48SRichard Scheffenegger #include <netinet/tcp_syncache.h> 100f7220c48SRichard Scheffenegger #include <netinet/tcp_timer.h> 101f7220c48SRichard Scheffenegger #include <netinet6/tcp6_var.h> 102f7220c48SRichard Scheffenegger #include <netinet/tcpip.h> 103f7220c48SRichard Scheffenegger #include <netinet/tcp_ecn.h> 104f7220c48SRichard Scheffenegger 105f7220c48SRichard Scheffenegger 106f7220c48SRichard Scheffenegger /* 107f7220c48SRichard Scheffenegger * Process incoming SYN,ACK packet 108f7220c48SRichard Scheffenegger */ 109f7220c48SRichard Scheffenegger void 110f7220c48SRichard Scheffenegger tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos) 111f7220c48SRichard Scheffenegger { 112f7220c48SRichard Scheffenegger 113*4012ef77SRichard Scheffenegger if (V_tcp_do_ecn == 0) 114*4012ef77SRichard Scheffenegger return; 115*4012ef77SRichard Scheffenegger if ((V_tcp_do_ecn == 1) || 116*4012ef77SRichard Scheffenegger (V_tcp_do_ecn == 2)) { 117*4012ef77SRichard Scheffenegger /* RFC3168 ECN handling */ 118*4012ef77SRichard Scheffenegger if ((thflags & (TH_CWR | TH_ECE)) == (0 | TH_ECE)) { 119f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 1201790549dSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 121f7220c48SRichard Scheffenegger } 122*4012ef77SRichard Scheffenegger } else 123*4012ef77SRichard Scheffenegger /* decoding Accurate ECN according to table in section 3.1.1 */ 124*4012ef77SRichard Scheffenegger if ((V_tcp_do_ecn == 3) || 125*4012ef77SRichard Scheffenegger (V_tcp_do_ecn == 4)) { 126*4012ef77SRichard Scheffenegger /* 127*4012ef77SRichard Scheffenegger * on the SYN,ACK, process the AccECN 128*4012ef77SRichard Scheffenegger * flags indicating the state the SYN 129*4012ef77SRichard Scheffenegger * was delivered. 130*4012ef77SRichard Scheffenegger * Reactions to Path ECN mangling can 131*4012ef77SRichard Scheffenegger * come here. 132*4012ef77SRichard Scheffenegger */ 133*4012ef77SRichard Scheffenegger switch (thflags & (TH_AE | TH_CWR | TH_ECE)) { 134*4012ef77SRichard Scheffenegger /* RFC3168 SYN */ 135*4012ef77SRichard Scheffenegger case (0|0|TH_ECE): 136*4012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 137*4012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 138*4012ef77SRichard Scheffenegger break; 139*4012ef77SRichard Scheffenegger /* non-ECT SYN */ 140*4012ef77SRichard Scheffenegger case (0|TH_CWR|0): 141*4012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ACE_PERMIT; 142*4012ef77SRichard Scheffenegger tp->t_scep = 5; 143*4012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 144*4012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ace_nect); 145*4012ef77SRichard Scheffenegger break; 146*4012ef77SRichard Scheffenegger /* ECT0 SYN */ 147*4012ef77SRichard Scheffenegger case (TH_AE|0|0): 148*4012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ACE_PERMIT; 149*4012ef77SRichard Scheffenegger tp->t_scep = 5; 150*4012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 151*4012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ace_ect0); 152*4012ef77SRichard Scheffenegger break; 153*4012ef77SRichard Scheffenegger /* ECT1 SYN */ 154*4012ef77SRichard Scheffenegger case (0|TH_CWR|TH_ECE): 155*4012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ACE_PERMIT; 156*4012ef77SRichard Scheffenegger tp->t_scep = 5; 157*4012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 158*4012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ace_ect1); 159*4012ef77SRichard Scheffenegger break; 160*4012ef77SRichard Scheffenegger /* CE SYN */ 161*4012ef77SRichard Scheffenegger case (TH_AE|TH_CWR|0): 162*4012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ACE_PERMIT; 163*4012ef77SRichard Scheffenegger tp->t_scep = 6; 164*4012ef77SRichard Scheffenegger /* 165*4012ef77SRichard Scheffenegger * reduce the IW to 2 MSS (to 166*4012ef77SRichard Scheffenegger * account for delayed acks) if 167*4012ef77SRichard Scheffenegger * the SYN,ACK was CE marked 168*4012ef77SRichard Scheffenegger */ 169*4012ef77SRichard Scheffenegger tp->snd_cwnd = 2 * tcp_maxseg(tp); 170*4012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 171*4012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ace_nect); 172*4012ef77SRichard Scheffenegger break; 173*4012ef77SRichard Scheffenegger default: 174*4012ef77SRichard Scheffenegger break; 175*4012ef77SRichard Scheffenegger } 176*4012ef77SRichard Scheffenegger /* 177*4012ef77SRichard Scheffenegger * Set the AccECN Codepoints on 178*4012ef77SRichard Scheffenegger * the outgoing <ACK> to the ECN 179*4012ef77SRichard Scheffenegger * state of the <SYN,ACK> 180*4012ef77SRichard Scheffenegger * according to table 3 in the 181*4012ef77SRichard Scheffenegger * AccECN draft 182*4012ef77SRichard Scheffenegger */ 183*4012ef77SRichard Scheffenegger switch (iptos & IPTOS_ECN_MASK) { 184*4012ef77SRichard Scheffenegger case (IPTOS_ECN_NOTECT): 185*4012ef77SRichard Scheffenegger tp->t_rcep = 0b010; 186*4012ef77SRichard Scheffenegger break; 187*4012ef77SRichard Scheffenegger case (IPTOS_ECN_ECT0): 188*4012ef77SRichard Scheffenegger tp->t_rcep = 0b100; 189*4012ef77SRichard Scheffenegger break; 190*4012ef77SRichard Scheffenegger case (IPTOS_ECN_ECT1): 191*4012ef77SRichard Scheffenegger tp->t_rcep = 0b011; 192*4012ef77SRichard Scheffenegger break; 193*4012ef77SRichard Scheffenegger case (IPTOS_ECN_CE): 194*4012ef77SRichard Scheffenegger tp->t_rcep = 0b110; 195*4012ef77SRichard Scheffenegger break; 196*4012ef77SRichard Scheffenegger } 197*4012ef77SRichard Scheffenegger } 198f7220c48SRichard Scheffenegger } 199f7220c48SRichard Scheffenegger 200f7220c48SRichard Scheffenegger /* 201f7220c48SRichard Scheffenegger * Handle parallel SYN for ECN 202f7220c48SRichard Scheffenegger */ 203f7220c48SRichard Scheffenegger void 204f7220c48SRichard Scheffenegger tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos) 205f7220c48SRichard Scheffenegger { 206f7220c48SRichard Scheffenegger if (thflags & TH_ACK) 207f7220c48SRichard Scheffenegger return; 208f7220c48SRichard Scheffenegger if (V_tcp_do_ecn == 0) 209f7220c48SRichard Scheffenegger return; 210*4012ef77SRichard Scheffenegger if ((V_tcp_do_ecn == 1) || 211*4012ef77SRichard Scheffenegger (V_tcp_do_ecn == 2)) { 212f7220c48SRichard Scheffenegger /* RFC3168 ECN handling */ 213f7220c48SRichard Scheffenegger if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) { 214f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 215f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_SND_ECE; 2161790549dSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 217f7220c48SRichard Scheffenegger } 218*4012ef77SRichard Scheffenegger } else 219*4012ef77SRichard Scheffenegger if ((V_tcp_do_ecn == 3) || 220*4012ef77SRichard Scheffenegger (V_tcp_do_ecn == 4)) { 221*4012ef77SRichard Scheffenegger /* AccECN handling */ 222*4012ef77SRichard Scheffenegger switch (thflags & (TH_AE | TH_CWR | TH_ECE)) { 223*4012ef77SRichard Scheffenegger default: 224*4012ef77SRichard Scheffenegger case (0|0|0): 225*4012ef77SRichard Scheffenegger break; 226*4012ef77SRichard Scheffenegger case (0|TH_CWR|TH_ECE): 227*4012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 228*4012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_SND_ECE; 229*4012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 230*4012ef77SRichard Scheffenegger break; 231*4012ef77SRichard Scheffenegger case (TH_AE|TH_CWR|TH_ECE): 232*4012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ACE_PERMIT; 233*4012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 234*4012ef77SRichard Scheffenegger /* 235*4012ef77SRichard Scheffenegger * Set the AccECN Codepoints on 236*4012ef77SRichard Scheffenegger * the outgoing <ACK> to the ECN 237*4012ef77SRichard Scheffenegger * state of the <SYN,ACK> 238*4012ef77SRichard Scheffenegger * according to table 3 in the 239*4012ef77SRichard Scheffenegger * AccECN draft 240*4012ef77SRichard Scheffenegger */ 241*4012ef77SRichard Scheffenegger switch (iptos & IPTOS_ECN_MASK) { 242*4012ef77SRichard Scheffenegger case (IPTOS_ECN_NOTECT): 243*4012ef77SRichard Scheffenegger tp->t_rcep = 0b010; 244*4012ef77SRichard Scheffenegger break; 245*4012ef77SRichard Scheffenegger case (IPTOS_ECN_ECT0): 246*4012ef77SRichard Scheffenegger tp->t_rcep = 0b100; 247*4012ef77SRichard Scheffenegger break; 248*4012ef77SRichard Scheffenegger case (IPTOS_ECN_ECT1): 249*4012ef77SRichard Scheffenegger tp->t_rcep = 0b011; 250*4012ef77SRichard Scheffenegger break; 251*4012ef77SRichard Scheffenegger case (IPTOS_ECN_CE): 252*4012ef77SRichard Scheffenegger tp->t_rcep = 0b110; 253*4012ef77SRichard Scheffenegger break; 254*4012ef77SRichard Scheffenegger } 255*4012ef77SRichard Scheffenegger break; 256*4012ef77SRichard Scheffenegger } 257f7220c48SRichard Scheffenegger } 258f7220c48SRichard Scheffenegger } 259f7220c48SRichard Scheffenegger 260f7220c48SRichard Scheffenegger /* 261f7220c48SRichard Scheffenegger * TCP ECN processing. 262f7220c48SRichard Scheffenegger */ 263f7220c48SRichard Scheffenegger int 264f7220c48SRichard Scheffenegger tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int iptos) 265f7220c48SRichard Scheffenegger { 266f7220c48SRichard Scheffenegger int delta_ace = 0; 267f7220c48SRichard Scheffenegger 268*4012ef77SRichard Scheffenegger if (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) { 269f7220c48SRichard Scheffenegger switch (iptos & IPTOS_ECN_MASK) { 270f7220c48SRichard Scheffenegger case IPTOS_ECN_CE: 2711790549dSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_ce); 272f7220c48SRichard Scheffenegger break; 273f7220c48SRichard Scheffenegger case IPTOS_ECN_ECT0: 2741790549dSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_ect0); 275f7220c48SRichard Scheffenegger break; 276f7220c48SRichard Scheffenegger case IPTOS_ECN_ECT1: 2771790549dSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_ect1); 278f7220c48SRichard Scheffenegger break; 279f7220c48SRichard Scheffenegger } 280f7220c48SRichard Scheffenegger 281*4012ef77SRichard Scheffenegger if (tp->t_flags2 & TF2_ACE_PERMIT) { 282*4012ef77SRichard Scheffenegger if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 283*4012ef77SRichard Scheffenegger tp->t_rcep += 1; 284*4012ef77SRichard Scheffenegger if (tp->t_flags2 & TF2_ECN_PERMIT) { 285*4012ef77SRichard Scheffenegger delta_ace = (tcp_ecn_get_ace(thflags) + 8 - 286*4012ef77SRichard Scheffenegger (tp->t_scep & 0x07)) & 0x07; 287*4012ef77SRichard Scheffenegger tp->t_scep += delta_ace; 288*4012ef77SRichard Scheffenegger } else { 289*4012ef77SRichard Scheffenegger /* 290*4012ef77SRichard Scheffenegger * process the final ACK of the 3WHS 291*4012ef77SRichard Scheffenegger * see table 3 in draft-ietf-tcpm-accurate-ecn 292*4012ef77SRichard Scheffenegger */ 293*4012ef77SRichard Scheffenegger switch (tcp_ecn_get_ace(thflags)) { 294*4012ef77SRichard Scheffenegger case 0b010: 295*4012ef77SRichard Scheffenegger /* nonECT SYN or SYN,ACK */ 296*4012ef77SRichard Scheffenegger /* Fallthrough */ 297*4012ef77SRichard Scheffenegger case 0b011: 298*4012ef77SRichard Scheffenegger /* ECT1 SYN or SYN,ACK */ 299*4012ef77SRichard Scheffenegger /* Fallthrough */ 300*4012ef77SRichard Scheffenegger case 0b100: 301*4012ef77SRichard Scheffenegger /* ECT0 SYN or SYN,ACK */ 302*4012ef77SRichard Scheffenegger tp->t_scep = 5; 303*4012ef77SRichard Scheffenegger break; 304*4012ef77SRichard Scheffenegger case 0b110: 305*4012ef77SRichard Scheffenegger /* CE SYN or SYN,ACK */ 306*4012ef77SRichard Scheffenegger tp->t_scep = 6; 307*4012ef77SRichard Scheffenegger tp->snd_cwnd = 2 * tcp_maxseg(tp); 308*4012ef77SRichard Scheffenegger break; 309*4012ef77SRichard Scheffenegger default: 310*4012ef77SRichard Scheffenegger /* mangled AccECN handshake */ 311*4012ef77SRichard Scheffenegger tp->t_scep = 5; 312*4012ef77SRichard Scheffenegger break; 313*4012ef77SRichard Scheffenegger } 314*4012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 315*4012ef77SRichard Scheffenegger } 316*4012ef77SRichard Scheffenegger } else { 317f7220c48SRichard Scheffenegger /* RFC3168 ECN handling */ 318f7220c48SRichard Scheffenegger if (thflags & TH_ECE) 319f7220c48SRichard Scheffenegger delta_ace = 1; 320f7220c48SRichard Scheffenegger if (thflags & TH_CWR) { 321f7220c48SRichard Scheffenegger tp->t_flags2 &= ~TF2_ECN_SND_ECE; 322f7220c48SRichard Scheffenegger tp->t_flags |= TF_ACKNOW; 323f7220c48SRichard Scheffenegger } 324f7220c48SRichard Scheffenegger if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 325f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_SND_ECE; 326*4012ef77SRichard Scheffenegger } 327f7220c48SRichard Scheffenegger 328f7220c48SRichard Scheffenegger /* Process a packet differently from RFC3168. */ 329f7220c48SRichard Scheffenegger cc_ecnpkt_handler_flags(tp, thflags, iptos); 330f7220c48SRichard Scheffenegger } 331f7220c48SRichard Scheffenegger 332f7220c48SRichard Scheffenegger return delta_ace; 333f7220c48SRichard Scheffenegger } 334f7220c48SRichard Scheffenegger 335f7220c48SRichard Scheffenegger /* 336f7220c48SRichard Scheffenegger * Send ECN setup <SYN> packet header flags 337f7220c48SRichard Scheffenegger */ 338f7220c48SRichard Scheffenegger uint16_t 339f7220c48SRichard Scheffenegger tcp_ecn_output_syn_sent(struct tcpcb *tp) 340f7220c48SRichard Scheffenegger { 341f7220c48SRichard Scheffenegger uint16_t thflags = 0; 342f7220c48SRichard Scheffenegger 343*4012ef77SRichard Scheffenegger if (V_tcp_do_ecn == 0) 344*4012ef77SRichard Scheffenegger return thflags; 345f7220c48SRichard Scheffenegger if (V_tcp_do_ecn == 1) { 346f7220c48SRichard Scheffenegger /* Send a RFC3168 ECN setup <SYN> packet */ 347f7220c48SRichard Scheffenegger if (tp->t_rxtshift >= 1) { 348f7220c48SRichard Scheffenegger if (tp->t_rxtshift <= V_tcp_ecn_maxretries) 349f7220c48SRichard Scheffenegger thflags = TH_ECE|TH_CWR; 350f7220c48SRichard Scheffenegger } else 351f7220c48SRichard Scheffenegger thflags = TH_ECE|TH_CWR; 352*4012ef77SRichard Scheffenegger } else 353*4012ef77SRichard Scheffenegger if (V_tcp_do_ecn == 3) { 354*4012ef77SRichard Scheffenegger /* Send an Accurate ECN setup <SYN> packet */ 355*4012ef77SRichard Scheffenegger if (tp->t_rxtshift >= 1) { 356*4012ef77SRichard Scheffenegger if (tp->t_rxtshift <= V_tcp_ecn_maxretries) 357*4012ef77SRichard Scheffenegger thflags = TH_ECE|TH_CWR|TH_AE; 358*4012ef77SRichard Scheffenegger } else 359*4012ef77SRichard Scheffenegger thflags = TH_ECE|TH_CWR|TH_AE; 360f7220c48SRichard Scheffenegger } 361f7220c48SRichard Scheffenegger 362f7220c48SRichard Scheffenegger return thflags; 363f7220c48SRichard Scheffenegger } 364f7220c48SRichard Scheffenegger 365f7220c48SRichard Scheffenegger /* 366f7220c48SRichard Scheffenegger * output processing of ECN feature 367f7220c48SRichard Scheffenegger * returning IP ECN header codepoint 368f7220c48SRichard Scheffenegger */ 369f7220c48SRichard Scheffenegger int 3702ff07d92SRichard Scheffenegger tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len, bool rxmit) 371f7220c48SRichard Scheffenegger { 372f7220c48SRichard Scheffenegger int ipecn = IPTOS_ECN_NOTECT; 373f7220c48SRichard Scheffenegger bool newdata; 374f7220c48SRichard Scheffenegger 375f7220c48SRichard Scheffenegger /* 376f7220c48SRichard Scheffenegger * If the peer has ECN, mark data packets with 377f7220c48SRichard Scheffenegger * ECN capable transmission (ECT). 378f7220c48SRichard Scheffenegger * Ignore pure control packets, retransmissions 379f7220c48SRichard Scheffenegger * and window probes. 380f7220c48SRichard Scheffenegger */ 381f7220c48SRichard Scheffenegger newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && 3822ff07d92SRichard Scheffenegger !rxmit && 383f7220c48SRichard Scheffenegger !((tp->t_flags & TF_FORCEDATA) && len == 1)); 384*4012ef77SRichard Scheffenegger /* RFC3168 ECN marking, only new data segments */ 385f7220c48SRichard Scheffenegger if (newdata) { 386f7220c48SRichard Scheffenegger ipecn = IPTOS_ECN_ECT0; 3871790549dSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_ect0); 388f7220c48SRichard Scheffenegger } 389f7220c48SRichard Scheffenegger /* 390f7220c48SRichard Scheffenegger * Reply with proper ECN notifications. 391f7220c48SRichard Scheffenegger */ 392*4012ef77SRichard Scheffenegger if (tp->t_flags2 & TF2_ACE_PERMIT) { 393*4012ef77SRichard Scheffenegger *thflags &= ~(TH_AE|TH_CWR|TH_ECE); 394*4012ef77SRichard Scheffenegger if (tp->t_rcep & 0x01) 395*4012ef77SRichard Scheffenegger *thflags |= TH_ECE; 396*4012ef77SRichard Scheffenegger if (tp->t_rcep & 0x02) 397*4012ef77SRichard Scheffenegger *thflags |= TH_CWR; 398*4012ef77SRichard Scheffenegger if (tp->t_rcep & 0x04) 399*4012ef77SRichard Scheffenegger *thflags |= TH_AE; 400*4012ef77SRichard Scheffenegger if (!(tp->t_flags2 & TF2_ECN_PERMIT)) { 401*4012ef77SRichard Scheffenegger /* 402*4012ef77SRichard Scheffenegger * here we process the final 403*4012ef77SRichard Scheffenegger * ACK of the 3WHS 404*4012ef77SRichard Scheffenegger */ 405*4012ef77SRichard Scheffenegger if (tp->t_rcep == 0b110) { 406*4012ef77SRichard Scheffenegger tp->t_rcep = 6; 407*4012ef77SRichard Scheffenegger } else { 408*4012ef77SRichard Scheffenegger tp->t_rcep = 5; 409*4012ef77SRichard Scheffenegger } 410*4012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 411*4012ef77SRichard Scheffenegger } 412*4012ef77SRichard Scheffenegger } else { 413f7220c48SRichard Scheffenegger if (newdata && 414f7220c48SRichard Scheffenegger (tp->t_flags2 & TF2_ECN_SND_CWR)) { 415f7220c48SRichard Scheffenegger *thflags |= TH_CWR; 416f7220c48SRichard Scheffenegger tp->t_flags2 &= ~TF2_ECN_SND_CWR; 417f7220c48SRichard Scheffenegger } 418f7220c48SRichard Scheffenegger if (tp->t_flags2 & TF2_ECN_SND_ECE) 419f7220c48SRichard Scheffenegger *thflags |= TH_ECE; 420*4012ef77SRichard Scheffenegger } 421f7220c48SRichard Scheffenegger 422f7220c48SRichard Scheffenegger return ipecn; 423f7220c48SRichard Scheffenegger } 424f7220c48SRichard Scheffenegger 425f7220c48SRichard Scheffenegger /* 426f7220c48SRichard Scheffenegger * Set up the ECN related tcpcb fields from 427f7220c48SRichard Scheffenegger * a syncache entry 428f7220c48SRichard Scheffenegger */ 429f7220c48SRichard Scheffenegger void 430f7220c48SRichard Scheffenegger tcp_ecn_syncache_socket(struct tcpcb *tp, struct syncache *sc) 431f7220c48SRichard Scheffenegger { 4323f169c54SRichard Scheffenegger if (sc->sc_flags & SCF_ECN_MASK) { 4333f169c54SRichard Scheffenegger switch (sc->sc_flags & SCF_ECN_MASK) { 434f7220c48SRichard Scheffenegger case SCF_ECN: 435f7220c48SRichard Scheffenegger tp->t_flags2 |= TF2_ECN_PERMIT; 436f7220c48SRichard Scheffenegger break; 437*4012ef77SRichard Scheffenegger case SCF_ACE_N: 438*4012ef77SRichard Scheffenegger /* Fallthrough */ 439*4012ef77SRichard Scheffenegger case SCF_ACE_0: 440*4012ef77SRichard Scheffenegger /* Fallthrough */ 441*4012ef77SRichard Scheffenegger case SCF_ACE_1: 442*4012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ACE_PERMIT; 443*4012ef77SRichard Scheffenegger tp->t_scep = 5; 444*4012ef77SRichard Scheffenegger tp->t_rcep = 5; 445*4012ef77SRichard Scheffenegger break; 446*4012ef77SRichard Scheffenegger case SCF_ACE_CE: 447*4012ef77SRichard Scheffenegger tp->t_flags2 |= TF2_ACE_PERMIT; 448*4012ef77SRichard Scheffenegger tp->t_scep = 6; 449*4012ef77SRichard Scheffenegger tp->t_rcep = 6; 450*4012ef77SRichard Scheffenegger break; 451f7220c48SRichard Scheffenegger /* undefined SCF codepoint */ 452f7220c48SRichard Scheffenegger default: 453f7220c48SRichard Scheffenegger break; 454f7220c48SRichard Scheffenegger } 455f7220c48SRichard Scheffenegger } 456f7220c48SRichard Scheffenegger } 457f7220c48SRichard Scheffenegger 458f7220c48SRichard Scheffenegger /* 459f7220c48SRichard Scheffenegger * Process a <SYN> packets ECN information, and provide the 460f7220c48SRichard Scheffenegger * syncache with the relevant information. 461f7220c48SRichard Scheffenegger */ 462f7220c48SRichard Scheffenegger int 463f7220c48SRichard Scheffenegger tcp_ecn_syncache_add(uint16_t thflags, int iptos) 464f7220c48SRichard Scheffenegger { 465f7220c48SRichard Scheffenegger int scflags = 0; 466f7220c48SRichard Scheffenegger 467*4012ef77SRichard Scheffenegger switch (thflags & (TH_AE|TH_CWR|TH_ECE)) { 468f7220c48SRichard Scheffenegger /* no ECN */ 469*4012ef77SRichard Scheffenegger case (0|0|0): 470f7220c48SRichard Scheffenegger break; 471f7220c48SRichard Scheffenegger /* legacy ECN */ 472*4012ef77SRichard Scheffenegger case (0|TH_CWR|TH_ECE): 473f7220c48SRichard Scheffenegger scflags = SCF_ECN; 474f7220c48SRichard Scheffenegger break; 475*4012ef77SRichard Scheffenegger /* Accurate ECN */ 476*4012ef77SRichard Scheffenegger case (TH_AE|TH_CWR|TH_ECE): 477*4012ef77SRichard Scheffenegger if ((V_tcp_do_ecn == 3) || 478*4012ef77SRichard Scheffenegger (V_tcp_do_ecn == 4)) { 479*4012ef77SRichard Scheffenegger switch (iptos & IPTOS_ECN_MASK) { 480*4012ef77SRichard Scheffenegger case IPTOS_ECN_CE: 481*4012ef77SRichard Scheffenegger scflags = SCF_ACE_CE; 482*4012ef77SRichard Scheffenegger break; 483*4012ef77SRichard Scheffenegger case IPTOS_ECN_ECT0: 484*4012ef77SRichard Scheffenegger scflags = SCF_ACE_0; 485*4012ef77SRichard Scheffenegger break; 486*4012ef77SRichard Scheffenegger case IPTOS_ECN_ECT1: 487*4012ef77SRichard Scheffenegger scflags = SCF_ACE_1; 488*4012ef77SRichard Scheffenegger break; 489*4012ef77SRichard Scheffenegger case IPTOS_ECN_NOTECT: 490*4012ef77SRichard Scheffenegger scflags = SCF_ACE_N; 491*4012ef77SRichard Scheffenegger break; 492*4012ef77SRichard Scheffenegger } 493*4012ef77SRichard Scheffenegger } else 494*4012ef77SRichard Scheffenegger scflags = SCF_ECN; 495*4012ef77SRichard Scheffenegger break; 496*4012ef77SRichard Scheffenegger /* Default Case (section 3.1.2) */ 497f7220c48SRichard Scheffenegger default: 498*4012ef77SRichard Scheffenegger if ((V_tcp_do_ecn == 3) || 499*4012ef77SRichard Scheffenegger (V_tcp_do_ecn == 4)) { 500*4012ef77SRichard Scheffenegger switch (iptos & IPTOS_ECN_MASK) { 501*4012ef77SRichard Scheffenegger case IPTOS_ECN_CE: 502*4012ef77SRichard Scheffenegger scflags = SCF_ACE_CE; 503*4012ef77SRichard Scheffenegger break; 504*4012ef77SRichard Scheffenegger case IPTOS_ECN_ECT0: 505*4012ef77SRichard Scheffenegger scflags = SCF_ACE_0; 506*4012ef77SRichard Scheffenegger break; 507*4012ef77SRichard Scheffenegger case IPTOS_ECN_ECT1: 508*4012ef77SRichard Scheffenegger scflags = SCF_ACE_1; 509*4012ef77SRichard Scheffenegger break; 510*4012ef77SRichard Scheffenegger case IPTOS_ECN_NOTECT: 511*4012ef77SRichard Scheffenegger scflags = SCF_ACE_N; 512*4012ef77SRichard Scheffenegger break; 513*4012ef77SRichard Scheffenegger } 514*4012ef77SRichard Scheffenegger } 515f7220c48SRichard Scheffenegger break; 516f7220c48SRichard Scheffenegger } 517f7220c48SRichard Scheffenegger return scflags; 518f7220c48SRichard Scheffenegger } 519f7220c48SRichard Scheffenegger 520f7220c48SRichard Scheffenegger /* 521f7220c48SRichard Scheffenegger * Set up the ECN information for the <SYN,ACK> from 522f7220c48SRichard Scheffenegger * syncache information. 523f7220c48SRichard Scheffenegger */ 524f7220c48SRichard Scheffenegger uint16_t 525f7220c48SRichard Scheffenegger tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc) 526f7220c48SRichard Scheffenegger { 527f7220c48SRichard Scheffenegger if ((thflags & TH_SYN) && 5283f169c54SRichard Scheffenegger (sc->sc_flags & SCF_ECN_MASK)) { 5293f169c54SRichard Scheffenegger switch (sc->sc_flags & SCF_ECN_MASK) { 530f7220c48SRichard Scheffenegger case SCF_ECN: 531*4012ef77SRichard Scheffenegger thflags |= (0 | 0 | TH_ECE); 5321790549dSRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 533f7220c48SRichard Scheffenegger break; 534*4012ef77SRichard Scheffenegger case SCF_ACE_N: 535*4012ef77SRichard Scheffenegger thflags |= (0 | TH_CWR | 0); 536*4012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 537*4012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ace_nect); 538*4012ef77SRichard Scheffenegger break; 539*4012ef77SRichard Scheffenegger case SCF_ACE_0: 540*4012ef77SRichard Scheffenegger thflags |= (TH_AE | 0 | 0); 541*4012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 542*4012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ace_ect0); 543*4012ef77SRichard Scheffenegger break; 544*4012ef77SRichard Scheffenegger case SCF_ACE_1: 545*4012ef77SRichard Scheffenegger thflags |= (0 | TH_ECE | TH_CWR); 546*4012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 547*4012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ace_ect1); 548*4012ef77SRichard Scheffenegger break; 549*4012ef77SRichard Scheffenegger case SCF_ACE_CE: 550*4012ef77SRichard Scheffenegger thflags |= (TH_AE | TH_CWR | 0); 551*4012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ecn_shs); 552*4012ef77SRichard Scheffenegger TCPSTAT_INC(tcps_ace_ce); 553*4012ef77SRichard Scheffenegger break; 554f7220c48SRichard Scheffenegger /* undefined SCF codepoint */ 555f7220c48SRichard Scheffenegger default: 556f7220c48SRichard Scheffenegger break; 557f7220c48SRichard Scheffenegger } 558f7220c48SRichard Scheffenegger } 559f7220c48SRichard Scheffenegger return thflags; 560f7220c48SRichard Scheffenegger } 561*4012ef77SRichard Scheffenegger 562*4012ef77SRichard Scheffenegger int 563*4012ef77SRichard Scheffenegger tcp_ecn_get_ace(uint16_t thflags) 564*4012ef77SRichard Scheffenegger { 565*4012ef77SRichard Scheffenegger int ace = 0; 566*4012ef77SRichard Scheffenegger 567*4012ef77SRichard Scheffenegger if (thflags & TH_ECE) 568*4012ef77SRichard Scheffenegger ace += 1; 569*4012ef77SRichard Scheffenegger if (thflags & TH_CWR) 570*4012ef77SRichard Scheffenegger ace += 2; 571*4012ef77SRichard Scheffenegger if (thflags & TH_AE) 572*4012ef77SRichard Scheffenegger ace += 4; 573*4012ef77SRichard Scheffenegger return ace; 574*4012ef77SRichard Scheffenegger } 575