xref: /freebsd/sys/netinet/tcp_ecn.c (revision 004bb636ca65f3239da284c20abb7f9d1d953dee)
1f7220c48SRichard Scheffenegger /*-
2f7220c48SRichard Scheffenegger  * SPDX-License-Identifier: BSD-3-Clause
3f7220c48SRichard Scheffenegger  *
4f7220c48SRichard Scheffenegger  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
5f7220c48SRichard Scheffenegger  *      The Regents of the University of California.  All rights reserved.
6f7220c48SRichard Scheffenegger  * Copyright (c) 2007-2008,2010
7f7220c48SRichard Scheffenegger  *      Swinburne University of Technology, Melbourne, Australia.
8f7220c48SRichard Scheffenegger  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
9f7220c48SRichard Scheffenegger  * Copyright (c) 2010 The FreeBSD Foundation
10f7220c48SRichard Scheffenegger  * Copyright (c) 2010-2011 Juniper Networks, Inc.
11f7220c48SRichard Scheffenegger  * Copyright (c) 2019 Richard Scheffenegger <srichard@netapp.com>
12f7220c48SRichard Scheffenegger  * All rights reserved.
13f7220c48SRichard Scheffenegger  *
14f7220c48SRichard Scheffenegger  * Portions of this software were developed at the Centre for Advanced Internet
15f7220c48SRichard Scheffenegger  * Architectures, Swinburne University of Technology, by Lawrence Stewart,
16f7220c48SRichard Scheffenegger  * James Healy and David Hayes, made possible in part by a grant from the Cisco
17f7220c48SRichard Scheffenegger  * University Research Program Fund at Community Foundation Silicon Valley.
18f7220c48SRichard Scheffenegger  *
19f7220c48SRichard Scheffenegger  * Portions of this software were developed at the Centre for Advanced
20f7220c48SRichard Scheffenegger  * Internet Architectures, Swinburne University of Technology, Melbourne,
21f7220c48SRichard Scheffenegger  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
22f7220c48SRichard Scheffenegger  *
23f7220c48SRichard Scheffenegger  * Portions of this software were developed by Robert N. M. Watson under
24f7220c48SRichard Scheffenegger  * contract to Juniper Networks, Inc.
25f7220c48SRichard Scheffenegger  *
26f7220c48SRichard Scheffenegger  * Redistribution and use in source and binary forms, with or without
27f7220c48SRichard Scheffenegger  * modification, are permitted provided that the following conditions
28f7220c48SRichard Scheffenegger  * are met:
29f7220c48SRichard Scheffenegger  * 1. Redistributions of source code must retain the above copyright
30f7220c48SRichard Scheffenegger  *    notice, this list of conditions and the following disclaimer.
31f7220c48SRichard Scheffenegger  * 2. Redistributions in binary form must reproduce the above copyright
32f7220c48SRichard Scheffenegger  *    notice, this list of conditions and the following disclaimer in the
33f7220c48SRichard Scheffenegger  *    documentation and/or other materials provided with the distribution.
34f7220c48SRichard Scheffenegger  * 3. Neither the name of the University nor the names of its contributors
35f7220c48SRichard Scheffenegger  *    may be used to endorse or promote products derived from this software
36f7220c48SRichard Scheffenegger  *    without specific prior written permission.
37f7220c48SRichard Scheffenegger  *
38f7220c48SRichard Scheffenegger  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
39f7220c48SRichard Scheffenegger  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40f7220c48SRichard Scheffenegger  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41f7220c48SRichard Scheffenegger  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
42f7220c48SRichard Scheffenegger  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
43f7220c48SRichard Scheffenegger  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
44f7220c48SRichard Scheffenegger  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45f7220c48SRichard Scheffenegger  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
46f7220c48SRichard Scheffenegger  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
47f7220c48SRichard Scheffenegger  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48f7220c48SRichard Scheffenegger  * SUCH DAMAGE.
49f7220c48SRichard Scheffenegger  *
50f7220c48SRichard Scheffenegger  *      @(#)tcp_ecn.c 8.12 (Berkeley) 5/24/95
51f7220c48SRichard Scheffenegger  */
52f7220c48SRichard Scheffenegger 
53f7220c48SRichard Scheffenegger /*
54f7220c48SRichard Scheffenegger  * Utility functions to deal with Explicit Congestion Notification in TCP
55f7220c48SRichard Scheffenegger  * implementing the essential parts of the Accurate ECN extension
56f7220c48SRichard Scheffenegger  * https://tools.ietf.org/html/draft-ietf-tcpm-accurate-ecn-09
57f7220c48SRichard Scheffenegger  */
58f7220c48SRichard Scheffenegger 
59f7220c48SRichard Scheffenegger #include <sys/cdefs.h>
60f7220c48SRichard Scheffenegger __FBSDID("$FreeBSD$");
61f7220c48SRichard Scheffenegger 
62f7220c48SRichard Scheffenegger #include "opt_inet.h"
63f7220c48SRichard Scheffenegger #include "opt_inet6.h"
64f7220c48SRichard Scheffenegger #include "opt_tcpdebug.h"
65f7220c48SRichard Scheffenegger 
66f7220c48SRichard Scheffenegger #include <sys/param.h>
67f7220c48SRichard Scheffenegger #include <sys/systm.h>
68f7220c48SRichard Scheffenegger #include <sys/kernel.h>
69f7220c48SRichard Scheffenegger #include <sys/sysctl.h>
70f7220c48SRichard Scheffenegger #include <sys/malloc.h>
71f7220c48SRichard Scheffenegger #include <sys/mbuf.h>
72f7220c48SRichard Scheffenegger #include <sys/socket.h>
73f7220c48SRichard Scheffenegger #include <sys/socketvar.h>
74f7220c48SRichard Scheffenegger 
75f7220c48SRichard Scheffenegger #include <machine/cpu.h>
76f7220c48SRichard Scheffenegger 
77f7220c48SRichard Scheffenegger #include <vm/uma.h>
78f7220c48SRichard Scheffenegger 
79f7220c48SRichard Scheffenegger #include <net/if.h>
80f7220c48SRichard Scheffenegger #include <net/if_var.h>
81f7220c48SRichard Scheffenegger #include <net/route.h>
82f7220c48SRichard Scheffenegger #include <net/vnet.h>
83f7220c48SRichard Scheffenegger 
84f7220c48SRichard Scheffenegger #include <netinet/in.h>
85f7220c48SRichard Scheffenegger #include <netinet/in_systm.h>
86f7220c48SRichard Scheffenegger #include <netinet/ip.h>
87f7220c48SRichard Scheffenegger #include <netinet/in_var.h>
88f7220c48SRichard Scheffenegger #include <netinet/in_pcb.h>
89f7220c48SRichard Scheffenegger #include <netinet/ip_var.h>
90f7220c48SRichard Scheffenegger #include <netinet/ip6.h>
91f7220c48SRichard Scheffenegger #include <netinet/icmp6.h>
92f7220c48SRichard Scheffenegger #include <netinet6/nd6.h>
93f7220c48SRichard Scheffenegger #include <netinet6/ip6_var.h>
94f7220c48SRichard Scheffenegger #include <netinet6/in6_pcb.h>
95f7220c48SRichard Scheffenegger #include <netinet/tcp.h>
96f7220c48SRichard Scheffenegger #include <netinet/tcp_fsm.h>
97f7220c48SRichard Scheffenegger #include <netinet/tcp_seq.h>
98f7220c48SRichard Scheffenegger #include <netinet/tcp_var.h>
99f7220c48SRichard Scheffenegger #include <netinet/tcp_syncache.h>
100f7220c48SRichard Scheffenegger #include <netinet/tcp_timer.h>
101f7220c48SRichard Scheffenegger #include <netinet/tcpip.h>
102f7220c48SRichard Scheffenegger #include <netinet/tcp_ecn.h>
103f7220c48SRichard Scheffenegger 
104*004bb636SRichard Scheffenegger static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn,
105*004bb636SRichard Scheffenegger     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
106*004bb636SRichard Scheffenegger     "TCP ECN");
107*004bb636SRichard Scheffenegger 
108*004bb636SRichard Scheffenegger VNET_DEFINE(int, tcp_do_ecn) = 2;
109*004bb636SRichard Scheffenegger SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, enable,
110*004bb636SRichard Scheffenegger     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_ecn), 0,
111*004bb636SRichard Scheffenegger     "TCP ECN support");
112*004bb636SRichard Scheffenegger 
113*004bb636SRichard Scheffenegger VNET_DEFINE(int, tcp_ecn_maxretries) = 1;
114*004bb636SRichard Scheffenegger SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, maxretries,
115*004bb636SRichard Scheffenegger     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_ecn_maxretries), 0,
116*004bb636SRichard Scheffenegger     "Max retries before giving up on ECN");
117f7220c48SRichard Scheffenegger 
118f7220c48SRichard Scheffenegger /*
119f7220c48SRichard Scheffenegger  * Process incoming SYN,ACK packet
120f7220c48SRichard Scheffenegger  */
121f7220c48SRichard Scheffenegger void
122f7220c48SRichard Scheffenegger tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
123f7220c48SRichard Scheffenegger {
124f7220c48SRichard Scheffenegger 
1254012ef77SRichard Scheffenegger 	if (V_tcp_do_ecn == 0)
1264012ef77SRichard Scheffenegger 		return;
1274012ef77SRichard Scheffenegger 	if ((V_tcp_do_ecn == 1) ||
1284012ef77SRichard Scheffenegger 	    (V_tcp_do_ecn == 2)) {
1294012ef77SRichard Scheffenegger 		/* RFC3168 ECN handling */
1304012ef77SRichard Scheffenegger 		if ((thflags & (TH_CWR | TH_ECE)) == (0 | TH_ECE)) {
131f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
13283c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ACE_PERMIT;
1331790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
134f7220c48SRichard Scheffenegger 		}
1354012ef77SRichard Scheffenegger 	} else
1364012ef77SRichard Scheffenegger 	/* decoding Accurate ECN according to table in section 3.1.1 */
1374012ef77SRichard Scheffenegger 	if ((V_tcp_do_ecn == 3) ||
1384012ef77SRichard Scheffenegger 	    (V_tcp_do_ecn == 4)) {
1394012ef77SRichard Scheffenegger 		/*
1404012ef77SRichard Scheffenegger 		 * on the SYN,ACK, process the AccECN
1414012ef77SRichard Scheffenegger 		 * flags indicating the state the SYN
1424012ef77SRichard Scheffenegger 		 * was delivered.
1434012ef77SRichard Scheffenegger 		 * Reactions to Path ECN mangling can
1444012ef77SRichard Scheffenegger 		 * come here.
1454012ef77SRichard Scheffenegger 		 */
1464012ef77SRichard Scheffenegger 		switch (thflags & (TH_AE | TH_CWR | TH_ECE)) {
1474012ef77SRichard Scheffenegger 		/* RFC3168 SYN */
1484012ef77SRichard Scheffenegger 		case (0|0|TH_ECE):
1494012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
15083c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ACE_PERMIT;
1514012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
1524012ef77SRichard Scheffenegger 			break;
1534012ef77SRichard Scheffenegger 		/* non-ECT SYN */
1544012ef77SRichard Scheffenegger 		case (0|TH_CWR|0):
1554012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
15683c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_PERMIT;
1574012ef77SRichard Scheffenegger 			tp->t_scep = 5;
1584012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
1594012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_nect);
1604012ef77SRichard Scheffenegger 			break;
1614012ef77SRichard Scheffenegger 		/* ECT0 SYN */
1624012ef77SRichard Scheffenegger 		case (TH_AE|0|0):
1634012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
16483c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_PERMIT;
1654012ef77SRichard Scheffenegger 			tp->t_scep = 5;
1664012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
1674012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_ect0);
1684012ef77SRichard Scheffenegger 			break;
1694012ef77SRichard Scheffenegger 		/* ECT1 SYN */
1704012ef77SRichard Scheffenegger 		case (0|TH_CWR|TH_ECE):
1714012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
17283c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_PERMIT;
1734012ef77SRichard Scheffenegger 			tp->t_scep = 5;
1744012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
1754012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_ect1);
1764012ef77SRichard Scheffenegger 			break;
1774012ef77SRichard Scheffenegger 		/* CE SYN */
1784012ef77SRichard Scheffenegger 		case (TH_AE|TH_CWR|0):
1794012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
18083c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_PERMIT;
1814012ef77SRichard Scheffenegger 			tp->t_scep = 6;
1824012ef77SRichard Scheffenegger 			/*
1834012ef77SRichard Scheffenegger 			 * reduce the IW to 2 MSS (to
1844012ef77SRichard Scheffenegger 			 * account for delayed acks) if
1854012ef77SRichard Scheffenegger 			 * the SYN,ACK was CE marked
1864012ef77SRichard Scheffenegger 			 */
1874012ef77SRichard Scheffenegger 			tp->snd_cwnd = 2 * tcp_maxseg(tp);
1884012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
1894012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_nect);
1904012ef77SRichard Scheffenegger 			break;
1914012ef77SRichard Scheffenegger 		default:
19283c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~(TF2_ECN_PERMIT | TF2_ACE_PERMIT);
1934012ef77SRichard Scheffenegger 			break;
1944012ef77SRichard Scheffenegger 		}
1954012ef77SRichard Scheffenegger 		/*
1964012ef77SRichard Scheffenegger 		 * Set the AccECN Codepoints on
1974012ef77SRichard Scheffenegger 		 * the outgoing <ACK> to the ECN
1984012ef77SRichard Scheffenegger 		 * state of the <SYN,ACK>
1994012ef77SRichard Scheffenegger 		 * according to table 3 in the
2004012ef77SRichard Scheffenegger 		 * AccECN draft
2014012ef77SRichard Scheffenegger 		 */
2024012ef77SRichard Scheffenegger 		switch (iptos & IPTOS_ECN_MASK) {
2034012ef77SRichard Scheffenegger 		case (IPTOS_ECN_NOTECT):
2044012ef77SRichard Scheffenegger 			tp->t_rcep = 0b010;
2054012ef77SRichard Scheffenegger 			break;
2064012ef77SRichard Scheffenegger 		case (IPTOS_ECN_ECT0):
2074012ef77SRichard Scheffenegger 			tp->t_rcep = 0b100;
2084012ef77SRichard Scheffenegger 			break;
2094012ef77SRichard Scheffenegger 		case (IPTOS_ECN_ECT1):
2104012ef77SRichard Scheffenegger 			tp->t_rcep = 0b011;
2114012ef77SRichard Scheffenegger 			break;
2124012ef77SRichard Scheffenegger 		case (IPTOS_ECN_CE):
2134012ef77SRichard Scheffenegger 			tp->t_rcep = 0b110;
2144012ef77SRichard Scheffenegger 			break;
2154012ef77SRichard Scheffenegger 		}
2164012ef77SRichard Scheffenegger 	}
217f7220c48SRichard Scheffenegger }
218f7220c48SRichard Scheffenegger 
219f7220c48SRichard Scheffenegger /*
220f7220c48SRichard Scheffenegger  * Handle parallel SYN for ECN
221f7220c48SRichard Scheffenegger  */
222f7220c48SRichard Scheffenegger void
223f7220c48SRichard Scheffenegger tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos)
224f7220c48SRichard Scheffenegger {
225f7220c48SRichard Scheffenegger 	if (thflags & TH_ACK)
226f7220c48SRichard Scheffenegger 		return;
227f7220c48SRichard Scheffenegger 	if (V_tcp_do_ecn == 0)
228f7220c48SRichard Scheffenegger 		return;
2294012ef77SRichard Scheffenegger 	if ((V_tcp_do_ecn == 1) ||
2304012ef77SRichard Scheffenegger 	    (V_tcp_do_ecn == 2)) {
231f7220c48SRichard Scheffenegger 		/* RFC3168 ECN handling */
232f7220c48SRichard Scheffenegger 		if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) {
233f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
23483c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ACE_PERMIT;
235f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_SND_ECE;
2361790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
237f7220c48SRichard Scheffenegger 		}
2384012ef77SRichard Scheffenegger 	} else
2394012ef77SRichard Scheffenegger 	if ((V_tcp_do_ecn == 3) ||
2404012ef77SRichard Scheffenegger 	    (V_tcp_do_ecn == 4)) {
2414012ef77SRichard Scheffenegger 		/* AccECN handling */
2424012ef77SRichard Scheffenegger 		switch (thflags & (TH_AE | TH_CWR | TH_ECE)) {
2434012ef77SRichard Scheffenegger 		default:
2444012ef77SRichard Scheffenegger 		case (0|0|0):
24583c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~(TF2_ECN_PERMIT | TF2_ACE_PERMIT);
2464012ef77SRichard Scheffenegger 			break;
2474012ef77SRichard Scheffenegger 		case (0|TH_CWR|TH_ECE):
2484012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
24983c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ACE_PERMIT;
2504012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_SND_ECE;
2514012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
2524012ef77SRichard Scheffenegger 			break;
2534012ef77SRichard Scheffenegger 		case (TH_AE|TH_CWR|TH_ECE):
2544012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
25583c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_PERMIT;
2564012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
2574012ef77SRichard Scheffenegger 			/*
2584012ef77SRichard Scheffenegger 			 * Set the AccECN Codepoints on
2594012ef77SRichard Scheffenegger 			 * the outgoing <ACK> to the ECN
2604012ef77SRichard Scheffenegger 			 * state of the <SYN,ACK>
2614012ef77SRichard Scheffenegger 			 * according to table 3 in the
2624012ef77SRichard Scheffenegger 			 * AccECN draft
2634012ef77SRichard Scheffenegger 			 */
2644012ef77SRichard Scheffenegger 			switch (iptos & IPTOS_ECN_MASK) {
2654012ef77SRichard Scheffenegger 			case (IPTOS_ECN_NOTECT):
2664012ef77SRichard Scheffenegger 				tp->t_rcep = 0b010;
2674012ef77SRichard Scheffenegger 				break;
2684012ef77SRichard Scheffenegger 			case (IPTOS_ECN_ECT0):
2694012ef77SRichard Scheffenegger 				tp->t_rcep = 0b100;
2704012ef77SRichard Scheffenegger 				break;
2714012ef77SRichard Scheffenegger 			case (IPTOS_ECN_ECT1):
2724012ef77SRichard Scheffenegger 				tp->t_rcep = 0b011;
2734012ef77SRichard Scheffenegger 				break;
2744012ef77SRichard Scheffenegger 			case (IPTOS_ECN_CE):
2754012ef77SRichard Scheffenegger 				tp->t_rcep = 0b110;
2764012ef77SRichard Scheffenegger 				break;
2774012ef77SRichard Scheffenegger 			}
2784012ef77SRichard Scheffenegger 			break;
2794012ef77SRichard Scheffenegger 		}
280f7220c48SRichard Scheffenegger 	}
281f7220c48SRichard Scheffenegger }
282f7220c48SRichard Scheffenegger 
283f7220c48SRichard Scheffenegger /*
284f7220c48SRichard Scheffenegger  * TCP ECN processing.
285f7220c48SRichard Scheffenegger  */
286f7220c48SRichard Scheffenegger int
287b1258b76SRichard Scheffenegger tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int tlen, int pkts, int iptos)
288f7220c48SRichard Scheffenegger {
289b1258b76SRichard Scheffenegger 	int delta_cep = 0;
290f7220c48SRichard Scheffenegger 
2914012ef77SRichard Scheffenegger 	if (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) {
292f7220c48SRichard Scheffenegger 		switch (iptos & IPTOS_ECN_MASK) {
293f7220c48SRichard Scheffenegger 		case IPTOS_ECN_CE:
2941790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_ce);
295f7220c48SRichard Scheffenegger 			break;
296f7220c48SRichard Scheffenegger 		case IPTOS_ECN_ECT0:
2971790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_ect0);
298f7220c48SRichard Scheffenegger 			break;
299f7220c48SRichard Scheffenegger 		case IPTOS_ECN_ECT1:
3001790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_ect1);
301f7220c48SRichard Scheffenegger 			break;
302f7220c48SRichard Scheffenegger 		}
303f7220c48SRichard Scheffenegger 
3044012ef77SRichard Scheffenegger 		if (tp->t_flags2 & TF2_ACE_PERMIT) {
3054012ef77SRichard Scheffenegger 			if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
3064012ef77SRichard Scheffenegger 				tp->t_rcep += 1;
3074012ef77SRichard Scheffenegger 			if (tp->t_flags2 & TF2_ECN_PERMIT) {
308b1258b76SRichard Scheffenegger 				delta_cep = (tcp_ecn_get_ace(thflags) + 8 -
309b1258b76SRichard Scheffenegger 					    (tp->t_scep & 7)) & 7;
310b1258b76SRichard Scheffenegger 				if (delta_cep < pkts)
311b1258b76SRichard Scheffenegger 					delta_cep = pkts -
312b1258b76SRichard Scheffenegger 					    ((pkts - delta_cep) & 7);
313b1258b76SRichard Scheffenegger 				tp->t_scep += delta_cep;
3144012ef77SRichard Scheffenegger 			} else {
3154012ef77SRichard Scheffenegger 				/*
3164012ef77SRichard Scheffenegger 				 * process the final ACK of the 3WHS
3174012ef77SRichard Scheffenegger 				 * see table 3 in draft-ietf-tcpm-accurate-ecn
3184012ef77SRichard Scheffenegger 				 */
3194012ef77SRichard Scheffenegger 				switch (tcp_ecn_get_ace(thflags)) {
3204012ef77SRichard Scheffenegger 				case 0b010:
3214012ef77SRichard Scheffenegger 					/* nonECT SYN or SYN,ACK */
3224012ef77SRichard Scheffenegger 					/* Fallthrough */
3234012ef77SRichard Scheffenegger 				case 0b011:
3244012ef77SRichard Scheffenegger 					/* ECT1 SYN or SYN,ACK */
3254012ef77SRichard Scheffenegger 					/* Fallthrough */
3264012ef77SRichard Scheffenegger 				case 0b100:
3274012ef77SRichard Scheffenegger 					/* ECT0 SYN or SYN,ACK */
3284012ef77SRichard Scheffenegger 					tp->t_scep = 5;
3294012ef77SRichard Scheffenegger 					break;
3304012ef77SRichard Scheffenegger 				case 0b110:
3314012ef77SRichard Scheffenegger 					/* CE SYN or SYN,ACK */
3324012ef77SRichard Scheffenegger 					tp->t_scep = 6;
3334012ef77SRichard Scheffenegger 					tp->snd_cwnd = 2 * tcp_maxseg(tp);
3344012ef77SRichard Scheffenegger 					break;
3354012ef77SRichard Scheffenegger 				default:
3364012ef77SRichard Scheffenegger 					/* mangled AccECN handshake */
3374012ef77SRichard Scheffenegger 					tp->t_scep = 5;
3384012ef77SRichard Scheffenegger 					break;
3394012ef77SRichard Scheffenegger 				}
3404012ef77SRichard Scheffenegger 				tp->t_flags2 |= TF2_ECN_PERMIT;
3414012ef77SRichard Scheffenegger 			}
3424012ef77SRichard Scheffenegger 		} else {
343f7220c48SRichard Scheffenegger 			/* RFC3168 ECN handling */
34422c81cc5SRichard Scheffenegger 			if ((thflags & (TH_SYN | TH_ECE)) == TH_ECE) {
345b1258b76SRichard Scheffenegger 				delta_cep = 1;
34622c81cc5SRichard Scheffenegger 				tp->t_scep++;
34722c81cc5SRichard Scheffenegger 			}
348f7220c48SRichard Scheffenegger 			if (thflags & TH_CWR) {
349f7220c48SRichard Scheffenegger 				tp->t_flags2 &= ~TF2_ECN_SND_ECE;
350f7220c48SRichard Scheffenegger 				tp->t_flags |= TF_ACKNOW;
351f7220c48SRichard Scheffenegger 			}
352f7220c48SRichard Scheffenegger 			if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
353f7220c48SRichard Scheffenegger 				tp->t_flags2 |= TF2_ECN_SND_ECE;
3544012ef77SRichard Scheffenegger 		}
355f7220c48SRichard Scheffenegger 
356f7220c48SRichard Scheffenegger 		/* Process a packet differently from RFC3168. */
357f7220c48SRichard Scheffenegger 		cc_ecnpkt_handler_flags(tp, thflags, iptos);
358f7220c48SRichard Scheffenegger 	}
359f7220c48SRichard Scheffenegger 
360b1258b76SRichard Scheffenegger 	return delta_cep;
361f7220c48SRichard Scheffenegger }
362f7220c48SRichard Scheffenegger 
363f7220c48SRichard Scheffenegger /*
364f7220c48SRichard Scheffenegger  * Send ECN setup <SYN> packet header flags
365f7220c48SRichard Scheffenegger  */
366f7220c48SRichard Scheffenegger uint16_t
367f7220c48SRichard Scheffenegger tcp_ecn_output_syn_sent(struct tcpcb *tp)
368f7220c48SRichard Scheffenegger {
369f7220c48SRichard Scheffenegger 	uint16_t thflags = 0;
370f7220c48SRichard Scheffenegger 
3714012ef77SRichard Scheffenegger 	if (V_tcp_do_ecn == 0)
3724012ef77SRichard Scheffenegger 		return thflags;
373f7220c48SRichard Scheffenegger 	if (V_tcp_do_ecn == 1) {
374f7220c48SRichard Scheffenegger 		/* Send a RFC3168 ECN setup <SYN> packet */
375f7220c48SRichard Scheffenegger 		if (tp->t_rxtshift >= 1) {
376f7220c48SRichard Scheffenegger 			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
377f7220c48SRichard Scheffenegger 				thflags = TH_ECE|TH_CWR;
378f7220c48SRichard Scheffenegger 		} else
379f7220c48SRichard Scheffenegger 			thflags = TH_ECE|TH_CWR;
3804012ef77SRichard Scheffenegger 	} else
3814012ef77SRichard Scheffenegger 	if (V_tcp_do_ecn == 3) {
3824012ef77SRichard Scheffenegger 		/* Send an Accurate ECN setup <SYN> packet */
3834012ef77SRichard Scheffenegger 		if (tp->t_rxtshift >= 1) {
3844012ef77SRichard Scheffenegger 			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
3854012ef77SRichard Scheffenegger 				thflags = TH_ECE|TH_CWR|TH_AE;
3864012ef77SRichard Scheffenegger 		} else
3874012ef77SRichard Scheffenegger 			thflags = TH_ECE|TH_CWR|TH_AE;
388f7220c48SRichard Scheffenegger 	}
389f7220c48SRichard Scheffenegger 
390f7220c48SRichard Scheffenegger 	return thflags;
391f7220c48SRichard Scheffenegger }
392f7220c48SRichard Scheffenegger 
393f7220c48SRichard Scheffenegger /*
394f7220c48SRichard Scheffenegger  * output processing of ECN feature
395f7220c48SRichard Scheffenegger  * returning IP ECN header codepoint
396f7220c48SRichard Scheffenegger  */
397f7220c48SRichard Scheffenegger int
3982ff07d92SRichard Scheffenegger tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len, bool rxmit)
399f7220c48SRichard Scheffenegger {
400f7220c48SRichard Scheffenegger 	int ipecn = IPTOS_ECN_NOTECT;
401f7220c48SRichard Scheffenegger 	bool newdata;
402f7220c48SRichard Scheffenegger 
403f7220c48SRichard Scheffenegger 	/*
404f7220c48SRichard Scheffenegger 	 * If the peer has ECN, mark data packets with
405f7220c48SRichard Scheffenegger 	 * ECN capable transmission (ECT).
406f7220c48SRichard Scheffenegger 	 * Ignore pure control packets, retransmissions
407f7220c48SRichard Scheffenegger 	 * and window probes.
408f7220c48SRichard Scheffenegger 	 */
409f7220c48SRichard Scheffenegger 	newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
4102ff07d92SRichard Scheffenegger 		    !rxmit &&
411f7220c48SRichard Scheffenegger 		    !((tp->t_flags & TF_FORCEDATA) && len == 1));
4124012ef77SRichard Scheffenegger 	/* RFC3168 ECN marking, only new data segments */
413f7220c48SRichard Scheffenegger 	if (newdata) {
414f7220c48SRichard Scheffenegger 		ipecn = IPTOS_ECN_ECT0;
4151790549dSRichard Scheffenegger 		TCPSTAT_INC(tcps_ecn_ect0);
416f7220c48SRichard Scheffenegger 	}
417f7220c48SRichard Scheffenegger 	/*
418f7220c48SRichard Scheffenegger 	 * Reply with proper ECN notifications.
419f7220c48SRichard Scheffenegger 	 */
4204012ef77SRichard Scheffenegger 	if (tp->t_flags2 & TF2_ACE_PERMIT) {
4214012ef77SRichard Scheffenegger 		*thflags &= ~(TH_AE|TH_CWR|TH_ECE);
4224012ef77SRichard Scheffenegger 		if (tp->t_rcep & 0x01)
4234012ef77SRichard Scheffenegger 			*thflags |= TH_ECE;
4244012ef77SRichard Scheffenegger 		if (tp->t_rcep & 0x02)
4254012ef77SRichard Scheffenegger 			*thflags |= TH_CWR;
4264012ef77SRichard Scheffenegger 		if (tp->t_rcep & 0x04)
4274012ef77SRichard Scheffenegger 			*thflags |= TH_AE;
4284012ef77SRichard Scheffenegger 		if (!(tp->t_flags2 & TF2_ECN_PERMIT)) {
4294012ef77SRichard Scheffenegger 			/*
4304012ef77SRichard Scheffenegger 			 * here we process the final
4314012ef77SRichard Scheffenegger 			 * ACK of the 3WHS
4324012ef77SRichard Scheffenegger 			 */
4334012ef77SRichard Scheffenegger 			if (tp->t_rcep == 0b110) {
4344012ef77SRichard Scheffenegger 				tp->t_rcep = 6;
4354012ef77SRichard Scheffenegger 			} else {
4364012ef77SRichard Scheffenegger 				tp->t_rcep = 5;
4374012ef77SRichard Scheffenegger 			}
4384012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
4394012ef77SRichard Scheffenegger 		}
4404012ef77SRichard Scheffenegger 	} else {
441f7220c48SRichard Scheffenegger 		if (newdata &&
442f7220c48SRichard Scheffenegger 		    (tp->t_flags2 & TF2_ECN_SND_CWR)) {
443f7220c48SRichard Scheffenegger 			*thflags |= TH_CWR;
444f7220c48SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_SND_CWR;
445f7220c48SRichard Scheffenegger 		}
446f7220c48SRichard Scheffenegger 		if (tp->t_flags2 & TF2_ECN_SND_ECE)
447f7220c48SRichard Scheffenegger 			*thflags |= TH_ECE;
4484012ef77SRichard Scheffenegger 	}
449f7220c48SRichard Scheffenegger 
450f7220c48SRichard Scheffenegger 	return ipecn;
451f7220c48SRichard Scheffenegger }
452f7220c48SRichard Scheffenegger 
453f7220c48SRichard Scheffenegger /*
454f7220c48SRichard Scheffenegger  * Set up the ECN related tcpcb fields from
455f7220c48SRichard Scheffenegger  * a syncache entry
456f7220c48SRichard Scheffenegger  */
457f7220c48SRichard Scheffenegger void
458f7220c48SRichard Scheffenegger tcp_ecn_syncache_socket(struct tcpcb *tp, struct syncache *sc)
459f7220c48SRichard Scheffenegger {
4603f169c54SRichard Scheffenegger 	if (sc->sc_flags & SCF_ECN_MASK) {
4613f169c54SRichard Scheffenegger 		switch (sc->sc_flags & SCF_ECN_MASK) {
462f7220c48SRichard Scheffenegger 		case SCF_ECN:
463f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
464f7220c48SRichard Scheffenegger 			break;
4654012ef77SRichard Scheffenegger 		case SCF_ACE_N:
4664012ef77SRichard Scheffenegger 			/* Fallthrough */
4674012ef77SRichard Scheffenegger 		case SCF_ACE_0:
4684012ef77SRichard Scheffenegger 			/* Fallthrough */
4694012ef77SRichard Scheffenegger 		case SCF_ACE_1:
4704012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
4714012ef77SRichard Scheffenegger 			tp->t_scep = 5;
4724012ef77SRichard Scheffenegger 			tp->t_rcep = 5;
4734012ef77SRichard Scheffenegger 			break;
4744012ef77SRichard Scheffenegger 		case SCF_ACE_CE:
4754012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
4764012ef77SRichard Scheffenegger 			tp->t_scep = 6;
4774012ef77SRichard Scheffenegger 			tp->t_rcep = 6;
4784012ef77SRichard Scheffenegger 			break;
479f7220c48SRichard Scheffenegger 		/* undefined SCF codepoint */
480f7220c48SRichard Scheffenegger 		default:
481f7220c48SRichard Scheffenegger 			break;
482f7220c48SRichard Scheffenegger 		}
483f7220c48SRichard Scheffenegger 	}
484f7220c48SRichard Scheffenegger }
485f7220c48SRichard Scheffenegger 
486f7220c48SRichard Scheffenegger /*
487f7220c48SRichard Scheffenegger  * Process a <SYN> packets ECN information, and provide the
488f7220c48SRichard Scheffenegger  * syncache with the relevant information.
489f7220c48SRichard Scheffenegger  */
490f7220c48SRichard Scheffenegger int
491f7220c48SRichard Scheffenegger tcp_ecn_syncache_add(uint16_t thflags, int iptos)
492f7220c48SRichard Scheffenegger {
493f7220c48SRichard Scheffenegger 	int scflags = 0;
494f7220c48SRichard Scheffenegger 
4954012ef77SRichard Scheffenegger 	switch (thflags & (TH_AE|TH_CWR|TH_ECE)) {
496f7220c48SRichard Scheffenegger 	/* no ECN */
4974012ef77SRichard Scheffenegger 	case (0|0|0):
498f7220c48SRichard Scheffenegger 		break;
499f7220c48SRichard Scheffenegger 	/* legacy ECN */
5004012ef77SRichard Scheffenegger 	case (0|TH_CWR|TH_ECE):
501f7220c48SRichard Scheffenegger 		scflags = SCF_ECN;
502f7220c48SRichard Scheffenegger 		break;
5034012ef77SRichard Scheffenegger 	/* Accurate ECN */
5044012ef77SRichard Scheffenegger 	case (TH_AE|TH_CWR|TH_ECE):
5054012ef77SRichard Scheffenegger 		if ((V_tcp_do_ecn == 3) ||
5064012ef77SRichard Scheffenegger 		    (V_tcp_do_ecn == 4)) {
5074012ef77SRichard Scheffenegger 			switch (iptos & IPTOS_ECN_MASK) {
5084012ef77SRichard Scheffenegger 			case IPTOS_ECN_CE:
5094012ef77SRichard Scheffenegger 				scflags = SCF_ACE_CE;
5104012ef77SRichard Scheffenegger 				break;
5114012ef77SRichard Scheffenegger 			case IPTOS_ECN_ECT0:
5124012ef77SRichard Scheffenegger 				scflags = SCF_ACE_0;
5134012ef77SRichard Scheffenegger 				break;
5144012ef77SRichard Scheffenegger 			case IPTOS_ECN_ECT1:
5154012ef77SRichard Scheffenegger 				scflags = SCF_ACE_1;
5164012ef77SRichard Scheffenegger 				break;
5174012ef77SRichard Scheffenegger 			case IPTOS_ECN_NOTECT:
5184012ef77SRichard Scheffenegger 				scflags = SCF_ACE_N;
5194012ef77SRichard Scheffenegger 				break;
5204012ef77SRichard Scheffenegger 			}
5214012ef77SRichard Scheffenegger 		} else
5224012ef77SRichard Scheffenegger 			scflags = SCF_ECN;
5234012ef77SRichard Scheffenegger 		break;
5244012ef77SRichard Scheffenegger 	/* Default Case (section 3.1.2) */
525f7220c48SRichard Scheffenegger 	default:
5264012ef77SRichard Scheffenegger 		if ((V_tcp_do_ecn == 3) ||
5274012ef77SRichard Scheffenegger 		    (V_tcp_do_ecn == 4)) {
5284012ef77SRichard Scheffenegger 			switch (iptos & IPTOS_ECN_MASK) {
5294012ef77SRichard Scheffenegger 			case IPTOS_ECN_CE:
5304012ef77SRichard Scheffenegger 				scflags = SCF_ACE_CE;
5314012ef77SRichard Scheffenegger 				break;
5324012ef77SRichard Scheffenegger 			case IPTOS_ECN_ECT0:
5334012ef77SRichard Scheffenegger 				scflags = SCF_ACE_0;
5344012ef77SRichard Scheffenegger 				break;
5354012ef77SRichard Scheffenegger 			case IPTOS_ECN_ECT1:
5364012ef77SRichard Scheffenegger 				scflags = SCF_ACE_1;
5374012ef77SRichard Scheffenegger 				break;
5384012ef77SRichard Scheffenegger 			case IPTOS_ECN_NOTECT:
5394012ef77SRichard Scheffenegger 				scflags = SCF_ACE_N;
5404012ef77SRichard Scheffenegger 				break;
5414012ef77SRichard Scheffenegger 			}
5424012ef77SRichard Scheffenegger 		}
543f7220c48SRichard Scheffenegger 		break;
544f7220c48SRichard Scheffenegger 	}
545f7220c48SRichard Scheffenegger 	return scflags;
546f7220c48SRichard Scheffenegger }
547f7220c48SRichard Scheffenegger 
548f7220c48SRichard Scheffenegger /*
549f7220c48SRichard Scheffenegger  * Set up the ECN information for the <SYN,ACK> from
550f7220c48SRichard Scheffenegger  * syncache information.
551f7220c48SRichard Scheffenegger  */
552f7220c48SRichard Scheffenegger uint16_t
553f7220c48SRichard Scheffenegger tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc)
554f7220c48SRichard Scheffenegger {
555f7220c48SRichard Scheffenegger 	if ((thflags & TH_SYN) &&
5563f169c54SRichard Scheffenegger 	    (sc->sc_flags & SCF_ECN_MASK)) {
5573f169c54SRichard Scheffenegger 		switch (sc->sc_flags & SCF_ECN_MASK) {
558f7220c48SRichard Scheffenegger 		case SCF_ECN:
5594012ef77SRichard Scheffenegger 			thflags |= (0 | 0 | TH_ECE);
5601790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
561f7220c48SRichard Scheffenegger 			break;
5624012ef77SRichard Scheffenegger 		case SCF_ACE_N:
5634012ef77SRichard Scheffenegger 			thflags |= (0 | TH_CWR | 0);
5644012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
5654012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_nect);
5664012ef77SRichard Scheffenegger 			break;
5674012ef77SRichard Scheffenegger 		case SCF_ACE_0:
5684012ef77SRichard Scheffenegger 			thflags |= (TH_AE | 0 | 0);
5694012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
5704012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_ect0);
5714012ef77SRichard Scheffenegger 			break;
5724012ef77SRichard Scheffenegger 		case SCF_ACE_1:
5734012ef77SRichard Scheffenegger 			thflags |= (0 | TH_ECE | TH_CWR);
5744012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
5754012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_ect1);
5764012ef77SRichard Scheffenegger 			break;
5774012ef77SRichard Scheffenegger 		case SCF_ACE_CE:
5784012ef77SRichard Scheffenegger 			thflags |= (TH_AE | TH_CWR | 0);
5794012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
5804012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_ce);
5814012ef77SRichard Scheffenegger 			break;
582f7220c48SRichard Scheffenegger 		/* undefined SCF codepoint */
583f7220c48SRichard Scheffenegger 		default:
584f7220c48SRichard Scheffenegger 			break;
585f7220c48SRichard Scheffenegger 		}
586f7220c48SRichard Scheffenegger 	}
587f7220c48SRichard Scheffenegger 	return thflags;
588f7220c48SRichard Scheffenegger }
5894012ef77SRichard Scheffenegger 
5904012ef77SRichard Scheffenegger int
5914012ef77SRichard Scheffenegger tcp_ecn_get_ace(uint16_t thflags)
5924012ef77SRichard Scheffenegger {
5934012ef77SRichard Scheffenegger 	int ace = 0;
5944012ef77SRichard Scheffenegger 
5954012ef77SRichard Scheffenegger 	if (thflags & TH_ECE)
5964012ef77SRichard Scheffenegger 		ace += 1;
5974012ef77SRichard Scheffenegger 	if (thflags & TH_CWR)
5984012ef77SRichard Scheffenegger 		ace += 2;
5994012ef77SRichard Scheffenegger 	if (thflags & TH_AE)
6004012ef77SRichard Scheffenegger 		ace += 4;
6014012ef77SRichard Scheffenegger 	return ace;
6024012ef77SRichard Scheffenegger }
603