xref: /freebsd/sys/netinet/tcp_ecn.c (revision 83c1ec92e454a7592dd15b15b738d18ae36575e0)
1f7220c48SRichard Scheffenegger /*-
2f7220c48SRichard Scheffenegger  * SPDX-License-Identifier: BSD-3-Clause
3f7220c48SRichard Scheffenegger  *
4f7220c48SRichard Scheffenegger  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
5f7220c48SRichard Scheffenegger  *      The Regents of the University of California.  All rights reserved.
6f7220c48SRichard Scheffenegger  * Copyright (c) 2007-2008,2010
7f7220c48SRichard Scheffenegger  *      Swinburne University of Technology, Melbourne, Australia.
8f7220c48SRichard Scheffenegger  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
9f7220c48SRichard Scheffenegger  * Copyright (c) 2010 The FreeBSD Foundation
10f7220c48SRichard Scheffenegger  * Copyright (c) 2010-2011 Juniper Networks, Inc.
11f7220c48SRichard Scheffenegger  * Copyright (c) 2019 Richard Scheffenegger <srichard@netapp.com>
12f7220c48SRichard Scheffenegger  * All rights reserved.
13f7220c48SRichard Scheffenegger  *
14f7220c48SRichard Scheffenegger  * Portions of this software were developed at the Centre for Advanced Internet
15f7220c48SRichard Scheffenegger  * Architectures, Swinburne University of Technology, by Lawrence Stewart,
16f7220c48SRichard Scheffenegger  * James Healy and David Hayes, made possible in part by a grant from the Cisco
17f7220c48SRichard Scheffenegger  * University Research Program Fund at Community Foundation Silicon Valley.
18f7220c48SRichard Scheffenegger  *
19f7220c48SRichard Scheffenegger  * Portions of this software were developed at the Centre for Advanced
20f7220c48SRichard Scheffenegger  * Internet Architectures, Swinburne University of Technology, Melbourne,
21f7220c48SRichard Scheffenegger  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
22f7220c48SRichard Scheffenegger  *
23f7220c48SRichard Scheffenegger  * Portions of this software were developed by Robert N. M. Watson under
24f7220c48SRichard Scheffenegger  * contract to Juniper Networks, Inc.
25f7220c48SRichard Scheffenegger  *
26f7220c48SRichard Scheffenegger  * Redistribution and use in source and binary forms, with or without
27f7220c48SRichard Scheffenegger  * modification, are permitted provided that the following conditions
28f7220c48SRichard Scheffenegger  * are met:
29f7220c48SRichard Scheffenegger  * 1. Redistributions of source code must retain the above copyright
30f7220c48SRichard Scheffenegger  *    notice, this list of conditions and the following disclaimer.
31f7220c48SRichard Scheffenegger  * 2. Redistributions in binary form must reproduce the above copyright
32f7220c48SRichard Scheffenegger  *    notice, this list of conditions and the following disclaimer in the
33f7220c48SRichard Scheffenegger  *    documentation and/or other materials provided with the distribution.
34f7220c48SRichard Scheffenegger  * 3. Neither the name of the University nor the names of its contributors
35f7220c48SRichard Scheffenegger  *    may be used to endorse or promote products derived from this software
36f7220c48SRichard Scheffenegger  *    without specific prior written permission.
37f7220c48SRichard Scheffenegger  *
38f7220c48SRichard Scheffenegger  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
39f7220c48SRichard Scheffenegger  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40f7220c48SRichard Scheffenegger  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41f7220c48SRichard Scheffenegger  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
42f7220c48SRichard Scheffenegger  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
43f7220c48SRichard Scheffenegger  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
44f7220c48SRichard Scheffenegger  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45f7220c48SRichard Scheffenegger  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
46f7220c48SRichard Scheffenegger  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
47f7220c48SRichard Scheffenegger  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48f7220c48SRichard Scheffenegger  * SUCH DAMAGE.
49f7220c48SRichard Scheffenegger  *
50f7220c48SRichard Scheffenegger  *      @(#)tcp_ecn.c 8.12 (Berkeley) 5/24/95
51f7220c48SRichard Scheffenegger  */
52f7220c48SRichard Scheffenegger 
53f7220c48SRichard Scheffenegger /*
54f7220c48SRichard Scheffenegger  * Utility functions to deal with Explicit Congestion Notification in TCP
55f7220c48SRichard Scheffenegger  * implementing the essential parts of the Accurate ECN extension
56f7220c48SRichard Scheffenegger  * https://tools.ietf.org/html/draft-ietf-tcpm-accurate-ecn-09
57f7220c48SRichard Scheffenegger  */
58f7220c48SRichard Scheffenegger 
59f7220c48SRichard Scheffenegger #include <sys/cdefs.h>
60f7220c48SRichard Scheffenegger __FBSDID("$FreeBSD$");
61f7220c48SRichard Scheffenegger 
62f7220c48SRichard Scheffenegger #include "opt_inet.h"
63f7220c48SRichard Scheffenegger #include "opt_inet6.h"
64f7220c48SRichard Scheffenegger #include "opt_tcpdebug.h"
65f7220c48SRichard Scheffenegger 
66f7220c48SRichard Scheffenegger #include <sys/param.h>
67f7220c48SRichard Scheffenegger #include <sys/systm.h>
68f7220c48SRichard Scheffenegger #include <sys/kernel.h>
69f7220c48SRichard Scheffenegger #include <sys/sysctl.h>
70f7220c48SRichard Scheffenegger #include <sys/malloc.h>
71f7220c48SRichard Scheffenegger #include <sys/mbuf.h>
72f7220c48SRichard Scheffenegger #include <sys/socket.h>
73f7220c48SRichard Scheffenegger #include <sys/socketvar.h>
74f7220c48SRichard Scheffenegger 
75f7220c48SRichard Scheffenegger #include <machine/cpu.h>
76f7220c48SRichard Scheffenegger 
77f7220c48SRichard Scheffenegger #include <vm/uma.h>
78f7220c48SRichard Scheffenegger 
79f7220c48SRichard Scheffenegger #include <net/if.h>
80f7220c48SRichard Scheffenegger #include <net/if_var.h>
81f7220c48SRichard Scheffenegger #include <net/route.h>
82f7220c48SRichard Scheffenegger #include <net/vnet.h>
83f7220c48SRichard Scheffenegger 
84f7220c48SRichard Scheffenegger #include <netinet/in.h>
85f7220c48SRichard Scheffenegger #include <netinet/in_systm.h>
86f7220c48SRichard Scheffenegger #include <netinet/ip.h>
87f7220c48SRichard Scheffenegger #include <netinet/in_var.h>
88f7220c48SRichard Scheffenegger #include <netinet/in_pcb.h>
89f7220c48SRichard Scheffenegger #include <netinet/ip_var.h>
90f7220c48SRichard Scheffenegger #include <netinet/ip6.h>
91f7220c48SRichard Scheffenegger #include <netinet/icmp6.h>
92f7220c48SRichard Scheffenegger #include <netinet6/nd6.h>
93f7220c48SRichard Scheffenegger #include <netinet6/ip6_var.h>
94f7220c48SRichard Scheffenegger #include <netinet6/in6_pcb.h>
95f7220c48SRichard Scheffenegger #include <netinet/tcp.h>
96f7220c48SRichard Scheffenegger #include <netinet/tcp_fsm.h>
97f7220c48SRichard Scheffenegger #include <netinet/tcp_seq.h>
98f7220c48SRichard Scheffenegger #include <netinet/tcp_var.h>
99f7220c48SRichard Scheffenegger #include <netinet/tcp_syncache.h>
100f7220c48SRichard Scheffenegger #include <netinet/tcp_timer.h>
101f7220c48SRichard Scheffenegger #include <netinet/tcpip.h>
102f7220c48SRichard Scheffenegger #include <netinet/tcp_ecn.h>
103f7220c48SRichard Scheffenegger 
104f7220c48SRichard Scheffenegger 
105f7220c48SRichard Scheffenegger /*
106f7220c48SRichard Scheffenegger  * Process incoming SYN,ACK packet
107f7220c48SRichard Scheffenegger  */
108f7220c48SRichard Scheffenegger void
109f7220c48SRichard Scheffenegger tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
110f7220c48SRichard Scheffenegger {
111f7220c48SRichard Scheffenegger 
1124012ef77SRichard Scheffenegger 	if (V_tcp_do_ecn == 0)
1134012ef77SRichard Scheffenegger 		return;
1144012ef77SRichard Scheffenegger 	if ((V_tcp_do_ecn == 1) ||
1154012ef77SRichard Scheffenegger 	    (V_tcp_do_ecn == 2)) {
1164012ef77SRichard Scheffenegger 		/* RFC3168 ECN handling */
1174012ef77SRichard Scheffenegger 		if ((thflags & (TH_CWR | TH_ECE)) == (0 | TH_ECE)) {
118f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
119*83c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ACE_PERMIT;
1201790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
121f7220c48SRichard Scheffenegger 		}
1224012ef77SRichard Scheffenegger 	} else
1234012ef77SRichard Scheffenegger 	/* decoding Accurate ECN according to table in section 3.1.1 */
1244012ef77SRichard Scheffenegger 	if ((V_tcp_do_ecn == 3) ||
1254012ef77SRichard Scheffenegger 	    (V_tcp_do_ecn == 4)) {
1264012ef77SRichard Scheffenegger 		/*
1274012ef77SRichard Scheffenegger 		 * on the SYN,ACK, process the AccECN
1284012ef77SRichard Scheffenegger 		 * flags indicating the state the SYN
1294012ef77SRichard Scheffenegger 		 * was delivered.
1304012ef77SRichard Scheffenegger 		 * Reactions to Path ECN mangling can
1314012ef77SRichard Scheffenegger 		 * come here.
1324012ef77SRichard Scheffenegger 		 */
1334012ef77SRichard Scheffenegger 		switch (thflags & (TH_AE | TH_CWR | TH_ECE)) {
1344012ef77SRichard Scheffenegger 		/* RFC3168 SYN */
1354012ef77SRichard Scheffenegger 		case (0|0|TH_ECE):
1364012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
137*83c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ACE_PERMIT;
1384012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
1394012ef77SRichard Scheffenegger 			break;
1404012ef77SRichard Scheffenegger 		/* non-ECT SYN */
1414012ef77SRichard Scheffenegger 		case (0|TH_CWR|0):
1424012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
143*83c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_PERMIT;
1444012ef77SRichard Scheffenegger 			tp->t_scep = 5;
1454012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
1464012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_nect);
1474012ef77SRichard Scheffenegger 			break;
1484012ef77SRichard Scheffenegger 		/* ECT0 SYN */
1494012ef77SRichard Scheffenegger 		case (TH_AE|0|0):
1504012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
151*83c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_PERMIT;
1524012ef77SRichard Scheffenegger 			tp->t_scep = 5;
1534012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
1544012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_ect0);
1554012ef77SRichard Scheffenegger 			break;
1564012ef77SRichard Scheffenegger 		/* ECT1 SYN */
1574012ef77SRichard Scheffenegger 		case (0|TH_CWR|TH_ECE):
1584012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
159*83c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_PERMIT;
1604012ef77SRichard Scheffenegger 			tp->t_scep = 5;
1614012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
1624012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_ect1);
1634012ef77SRichard Scheffenegger 			break;
1644012ef77SRichard Scheffenegger 		/* CE SYN */
1654012ef77SRichard Scheffenegger 		case (TH_AE|TH_CWR|0):
1664012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
167*83c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_PERMIT;
1684012ef77SRichard Scheffenegger 			tp->t_scep = 6;
1694012ef77SRichard Scheffenegger 			/*
1704012ef77SRichard Scheffenegger 			 * reduce the IW to 2 MSS (to
1714012ef77SRichard Scheffenegger 			 * account for delayed acks) if
1724012ef77SRichard Scheffenegger 			 * the SYN,ACK was CE marked
1734012ef77SRichard Scheffenegger 			 */
1744012ef77SRichard Scheffenegger 			tp->snd_cwnd = 2 * tcp_maxseg(tp);
1754012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
1764012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_nect);
1774012ef77SRichard Scheffenegger 			break;
1784012ef77SRichard Scheffenegger 		default:
179*83c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~(TF2_ECN_PERMIT | TF2_ACE_PERMIT);
1804012ef77SRichard Scheffenegger 			break;
1814012ef77SRichard Scheffenegger 		}
1824012ef77SRichard Scheffenegger 		/*
1834012ef77SRichard Scheffenegger 		 * Set the AccECN Codepoints on
1844012ef77SRichard Scheffenegger 		 * the outgoing <ACK> to the ECN
1854012ef77SRichard Scheffenegger 		 * state of the <SYN,ACK>
1864012ef77SRichard Scheffenegger 		 * according to table 3 in the
1874012ef77SRichard Scheffenegger 		 * AccECN draft
1884012ef77SRichard Scheffenegger 		 */
1894012ef77SRichard Scheffenegger 		switch (iptos & IPTOS_ECN_MASK) {
1904012ef77SRichard Scheffenegger 		case (IPTOS_ECN_NOTECT):
1914012ef77SRichard Scheffenegger 			tp->t_rcep = 0b010;
1924012ef77SRichard Scheffenegger 			break;
1934012ef77SRichard Scheffenegger 		case (IPTOS_ECN_ECT0):
1944012ef77SRichard Scheffenegger 			tp->t_rcep = 0b100;
1954012ef77SRichard Scheffenegger 			break;
1964012ef77SRichard Scheffenegger 		case (IPTOS_ECN_ECT1):
1974012ef77SRichard Scheffenegger 			tp->t_rcep = 0b011;
1984012ef77SRichard Scheffenegger 			break;
1994012ef77SRichard Scheffenegger 		case (IPTOS_ECN_CE):
2004012ef77SRichard Scheffenegger 			tp->t_rcep = 0b110;
2014012ef77SRichard Scheffenegger 			break;
2024012ef77SRichard Scheffenegger 		}
2034012ef77SRichard Scheffenegger 	}
204f7220c48SRichard Scheffenegger }
205f7220c48SRichard Scheffenegger 
206f7220c48SRichard Scheffenegger /*
207f7220c48SRichard Scheffenegger  * Handle parallel SYN for ECN
208f7220c48SRichard Scheffenegger  */
209f7220c48SRichard Scheffenegger void
210f7220c48SRichard Scheffenegger tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos)
211f7220c48SRichard Scheffenegger {
212f7220c48SRichard Scheffenegger 	if (thflags & TH_ACK)
213f7220c48SRichard Scheffenegger 		return;
214f7220c48SRichard Scheffenegger 	if (V_tcp_do_ecn == 0)
215f7220c48SRichard Scheffenegger 		return;
2164012ef77SRichard Scheffenegger 	if ((V_tcp_do_ecn == 1) ||
2174012ef77SRichard Scheffenegger 	    (V_tcp_do_ecn == 2)) {
218f7220c48SRichard Scheffenegger 		/* RFC3168 ECN handling */
219f7220c48SRichard Scheffenegger 		if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) {
220f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
221*83c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ACE_PERMIT;
222f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_SND_ECE;
2231790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
224f7220c48SRichard Scheffenegger 		}
2254012ef77SRichard Scheffenegger 	} else
2264012ef77SRichard Scheffenegger 	if ((V_tcp_do_ecn == 3) ||
2274012ef77SRichard Scheffenegger 	    (V_tcp_do_ecn == 4)) {
2284012ef77SRichard Scheffenegger 		/* AccECN handling */
2294012ef77SRichard Scheffenegger 		switch (thflags & (TH_AE | TH_CWR | TH_ECE)) {
2304012ef77SRichard Scheffenegger 		default:
2314012ef77SRichard Scheffenegger 		case (0|0|0):
232*83c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~(TF2_ECN_PERMIT | TF2_ACE_PERMIT);
2334012ef77SRichard Scheffenegger 			break;
2344012ef77SRichard Scheffenegger 		case (0|TH_CWR|TH_ECE):
2354012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
236*83c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ACE_PERMIT;
2374012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_SND_ECE;
2384012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
2394012ef77SRichard Scheffenegger 			break;
2404012ef77SRichard Scheffenegger 		case (TH_AE|TH_CWR|TH_ECE):
2414012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
242*83c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_PERMIT;
2434012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
2444012ef77SRichard Scheffenegger 			/*
2454012ef77SRichard Scheffenegger 			 * Set the AccECN Codepoints on
2464012ef77SRichard Scheffenegger 			 * the outgoing <ACK> to the ECN
2474012ef77SRichard Scheffenegger 			 * state of the <SYN,ACK>
2484012ef77SRichard Scheffenegger 			 * according to table 3 in the
2494012ef77SRichard Scheffenegger 			 * AccECN draft
2504012ef77SRichard Scheffenegger 			 */
2514012ef77SRichard Scheffenegger 			switch (iptos & IPTOS_ECN_MASK) {
2524012ef77SRichard Scheffenegger 			case (IPTOS_ECN_NOTECT):
2534012ef77SRichard Scheffenegger 				tp->t_rcep = 0b010;
2544012ef77SRichard Scheffenegger 				break;
2554012ef77SRichard Scheffenegger 			case (IPTOS_ECN_ECT0):
2564012ef77SRichard Scheffenegger 				tp->t_rcep = 0b100;
2574012ef77SRichard Scheffenegger 				break;
2584012ef77SRichard Scheffenegger 			case (IPTOS_ECN_ECT1):
2594012ef77SRichard Scheffenegger 				tp->t_rcep = 0b011;
2604012ef77SRichard Scheffenegger 				break;
2614012ef77SRichard Scheffenegger 			case (IPTOS_ECN_CE):
2624012ef77SRichard Scheffenegger 				tp->t_rcep = 0b110;
2634012ef77SRichard Scheffenegger 				break;
2644012ef77SRichard Scheffenegger 			}
2654012ef77SRichard Scheffenegger 			break;
2664012ef77SRichard Scheffenegger 		}
267f7220c48SRichard Scheffenegger 	}
268f7220c48SRichard Scheffenegger }
269f7220c48SRichard Scheffenegger 
270f7220c48SRichard Scheffenegger /*
271f7220c48SRichard Scheffenegger  * TCP ECN processing.
272f7220c48SRichard Scheffenegger  */
273f7220c48SRichard Scheffenegger int
274f7220c48SRichard Scheffenegger tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int iptos)
275f7220c48SRichard Scheffenegger {
276f7220c48SRichard Scheffenegger 	int delta_ace = 0;
277f7220c48SRichard Scheffenegger 
2784012ef77SRichard Scheffenegger 	if (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) {
279f7220c48SRichard Scheffenegger 		switch (iptos & IPTOS_ECN_MASK) {
280f7220c48SRichard Scheffenegger 		case IPTOS_ECN_CE:
2811790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_ce);
282f7220c48SRichard Scheffenegger 			break;
283f7220c48SRichard Scheffenegger 		case IPTOS_ECN_ECT0:
2841790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_ect0);
285f7220c48SRichard Scheffenegger 			break;
286f7220c48SRichard Scheffenegger 		case IPTOS_ECN_ECT1:
2871790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_ect1);
288f7220c48SRichard Scheffenegger 			break;
289f7220c48SRichard Scheffenegger 		}
290f7220c48SRichard Scheffenegger 
2914012ef77SRichard Scheffenegger 		if (tp->t_flags2 & TF2_ACE_PERMIT) {
2924012ef77SRichard Scheffenegger 			if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
2934012ef77SRichard Scheffenegger 				tp->t_rcep += 1;
2944012ef77SRichard Scheffenegger 			if (tp->t_flags2 & TF2_ECN_PERMIT) {
2954012ef77SRichard Scheffenegger 				delta_ace = (tcp_ecn_get_ace(thflags) + 8 -
2964012ef77SRichard Scheffenegger 					    (tp->t_scep & 0x07)) & 0x07;
2974012ef77SRichard Scheffenegger 				tp->t_scep += delta_ace;
2984012ef77SRichard Scheffenegger 			} else {
2994012ef77SRichard Scheffenegger 				/*
3004012ef77SRichard Scheffenegger 				 * process the final ACK of the 3WHS
3014012ef77SRichard Scheffenegger 				 * see table 3 in draft-ietf-tcpm-accurate-ecn
3024012ef77SRichard Scheffenegger 				 */
3034012ef77SRichard Scheffenegger 				switch (tcp_ecn_get_ace(thflags)) {
3044012ef77SRichard Scheffenegger 				case 0b010:
3054012ef77SRichard Scheffenegger 					/* nonECT SYN or SYN,ACK */
3064012ef77SRichard Scheffenegger 					/* Fallthrough */
3074012ef77SRichard Scheffenegger 				case 0b011:
3084012ef77SRichard Scheffenegger 					/* ECT1 SYN or SYN,ACK */
3094012ef77SRichard Scheffenegger 					/* Fallthrough */
3104012ef77SRichard Scheffenegger 				case 0b100:
3114012ef77SRichard Scheffenegger 					/* ECT0 SYN or SYN,ACK */
3124012ef77SRichard Scheffenegger 					tp->t_scep = 5;
3134012ef77SRichard Scheffenegger 					break;
3144012ef77SRichard Scheffenegger 				case 0b110:
3154012ef77SRichard Scheffenegger 					/* CE SYN or SYN,ACK */
3164012ef77SRichard Scheffenegger 					tp->t_scep = 6;
3174012ef77SRichard Scheffenegger 					tp->snd_cwnd = 2 * tcp_maxseg(tp);
3184012ef77SRichard Scheffenegger 					break;
3194012ef77SRichard Scheffenegger 				default:
3204012ef77SRichard Scheffenegger 					/* mangled AccECN handshake */
3214012ef77SRichard Scheffenegger 					tp->t_scep = 5;
3224012ef77SRichard Scheffenegger 					break;
3234012ef77SRichard Scheffenegger 				}
3244012ef77SRichard Scheffenegger 				tp->t_flags2 |= TF2_ECN_PERMIT;
3254012ef77SRichard Scheffenegger 			}
3264012ef77SRichard Scheffenegger 		} else {
327f7220c48SRichard Scheffenegger 			/* RFC3168 ECN handling */
3284edff766SRichard Scheffenegger 			if ((thflags & (TH_SYN | TH_ECE)) == TH_ECE)
329f7220c48SRichard Scheffenegger 				delta_ace = 1;
330f7220c48SRichard Scheffenegger 			if (thflags & TH_CWR) {
331f7220c48SRichard Scheffenegger 				tp->t_flags2 &= ~TF2_ECN_SND_ECE;
332f7220c48SRichard Scheffenegger 				tp->t_flags |= TF_ACKNOW;
333f7220c48SRichard Scheffenegger 			}
334f7220c48SRichard Scheffenegger 			if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
335f7220c48SRichard Scheffenegger 				tp->t_flags2 |= TF2_ECN_SND_ECE;
3364012ef77SRichard Scheffenegger 		}
337f7220c48SRichard Scheffenegger 
338f7220c48SRichard Scheffenegger 		/* Process a packet differently from RFC3168. */
339f7220c48SRichard Scheffenegger 		cc_ecnpkt_handler_flags(tp, thflags, iptos);
340f7220c48SRichard Scheffenegger 	}
341f7220c48SRichard Scheffenegger 
342f7220c48SRichard Scheffenegger 	return delta_ace;
343f7220c48SRichard Scheffenegger }
344f7220c48SRichard Scheffenegger 
345f7220c48SRichard Scheffenegger /*
346f7220c48SRichard Scheffenegger  * Send ECN setup <SYN> packet header flags
347f7220c48SRichard Scheffenegger  */
348f7220c48SRichard Scheffenegger uint16_t
349f7220c48SRichard Scheffenegger tcp_ecn_output_syn_sent(struct tcpcb *tp)
350f7220c48SRichard Scheffenegger {
351f7220c48SRichard Scheffenegger 	uint16_t thflags = 0;
352f7220c48SRichard Scheffenegger 
3534012ef77SRichard Scheffenegger 	if (V_tcp_do_ecn == 0)
3544012ef77SRichard Scheffenegger 		return thflags;
355f7220c48SRichard Scheffenegger 	if (V_tcp_do_ecn == 1) {
356f7220c48SRichard Scheffenegger 		/* Send a RFC3168 ECN setup <SYN> packet */
357f7220c48SRichard Scheffenegger 		if (tp->t_rxtshift >= 1) {
358f7220c48SRichard Scheffenegger 			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
359f7220c48SRichard Scheffenegger 				thflags = TH_ECE|TH_CWR;
360f7220c48SRichard Scheffenegger 		} else
361f7220c48SRichard Scheffenegger 			thflags = TH_ECE|TH_CWR;
3624012ef77SRichard Scheffenegger 	} else
3634012ef77SRichard Scheffenegger 	if (V_tcp_do_ecn == 3) {
3644012ef77SRichard Scheffenegger 		/* Send an Accurate ECN setup <SYN> packet */
3654012ef77SRichard Scheffenegger 		if (tp->t_rxtshift >= 1) {
3664012ef77SRichard Scheffenegger 			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
3674012ef77SRichard Scheffenegger 				thflags = TH_ECE|TH_CWR|TH_AE;
3684012ef77SRichard Scheffenegger 		} else
3694012ef77SRichard Scheffenegger 			thflags = TH_ECE|TH_CWR|TH_AE;
370f7220c48SRichard Scheffenegger 	}
371f7220c48SRichard Scheffenegger 
372f7220c48SRichard Scheffenegger 	return thflags;
373f7220c48SRichard Scheffenegger }
374f7220c48SRichard Scheffenegger 
375f7220c48SRichard Scheffenegger /*
376f7220c48SRichard Scheffenegger  * output processing of ECN feature
377f7220c48SRichard Scheffenegger  * returning IP ECN header codepoint
378f7220c48SRichard Scheffenegger  */
379f7220c48SRichard Scheffenegger int
3802ff07d92SRichard Scheffenegger tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len, bool rxmit)
381f7220c48SRichard Scheffenegger {
382f7220c48SRichard Scheffenegger 	int ipecn = IPTOS_ECN_NOTECT;
383f7220c48SRichard Scheffenegger 	bool newdata;
384f7220c48SRichard Scheffenegger 
385f7220c48SRichard Scheffenegger 	/*
386f7220c48SRichard Scheffenegger 	 * If the peer has ECN, mark data packets with
387f7220c48SRichard Scheffenegger 	 * ECN capable transmission (ECT).
388f7220c48SRichard Scheffenegger 	 * Ignore pure control packets, retransmissions
389f7220c48SRichard Scheffenegger 	 * and window probes.
390f7220c48SRichard Scheffenegger 	 */
391f7220c48SRichard Scheffenegger 	newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
3922ff07d92SRichard Scheffenegger 		    !rxmit &&
393f7220c48SRichard Scheffenegger 		    !((tp->t_flags & TF_FORCEDATA) && len == 1));
3944012ef77SRichard Scheffenegger 	/* RFC3168 ECN marking, only new data segments */
395f7220c48SRichard Scheffenegger 	if (newdata) {
396f7220c48SRichard Scheffenegger 		ipecn = IPTOS_ECN_ECT0;
3971790549dSRichard Scheffenegger 		TCPSTAT_INC(tcps_ecn_ect0);
398f7220c48SRichard Scheffenegger 	}
399f7220c48SRichard Scheffenegger 	/*
400f7220c48SRichard Scheffenegger 	 * Reply with proper ECN notifications.
401f7220c48SRichard Scheffenegger 	 */
4024012ef77SRichard Scheffenegger 	if (tp->t_flags2 & TF2_ACE_PERMIT) {
4034012ef77SRichard Scheffenegger 		*thflags &= ~(TH_AE|TH_CWR|TH_ECE);
4044012ef77SRichard Scheffenegger 		if (tp->t_rcep & 0x01)
4054012ef77SRichard Scheffenegger 			*thflags |= TH_ECE;
4064012ef77SRichard Scheffenegger 		if (tp->t_rcep & 0x02)
4074012ef77SRichard Scheffenegger 			*thflags |= TH_CWR;
4084012ef77SRichard Scheffenegger 		if (tp->t_rcep & 0x04)
4094012ef77SRichard Scheffenegger 			*thflags |= TH_AE;
4104012ef77SRichard Scheffenegger 		if (!(tp->t_flags2 & TF2_ECN_PERMIT)) {
4114012ef77SRichard Scheffenegger 			/*
4124012ef77SRichard Scheffenegger 			 * here we process the final
4134012ef77SRichard Scheffenegger 			 * ACK of the 3WHS
4144012ef77SRichard Scheffenegger 			 */
4154012ef77SRichard Scheffenegger 			if (tp->t_rcep == 0b110) {
4164012ef77SRichard Scheffenegger 				tp->t_rcep = 6;
4174012ef77SRichard Scheffenegger 			} else {
4184012ef77SRichard Scheffenegger 				tp->t_rcep = 5;
4194012ef77SRichard Scheffenegger 			}
4204012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
4214012ef77SRichard Scheffenegger 		}
4224012ef77SRichard Scheffenegger 	} else {
423f7220c48SRichard Scheffenegger 		if (newdata &&
424f7220c48SRichard Scheffenegger 		    (tp->t_flags2 & TF2_ECN_SND_CWR)) {
425f7220c48SRichard Scheffenegger 			*thflags |= TH_CWR;
426f7220c48SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_SND_CWR;
427f7220c48SRichard Scheffenegger 		}
428f7220c48SRichard Scheffenegger 		if (tp->t_flags2 & TF2_ECN_SND_ECE)
429f7220c48SRichard Scheffenegger 			*thflags |= TH_ECE;
4304012ef77SRichard Scheffenegger 	}
431f7220c48SRichard Scheffenegger 
432f7220c48SRichard Scheffenegger 	return ipecn;
433f7220c48SRichard Scheffenegger }
434f7220c48SRichard Scheffenegger 
435f7220c48SRichard Scheffenegger /*
436f7220c48SRichard Scheffenegger  * Set up the ECN related tcpcb fields from
437f7220c48SRichard Scheffenegger  * a syncache entry
438f7220c48SRichard Scheffenegger  */
439f7220c48SRichard Scheffenegger void
440f7220c48SRichard Scheffenegger tcp_ecn_syncache_socket(struct tcpcb *tp, struct syncache *sc)
441f7220c48SRichard Scheffenegger {
4423f169c54SRichard Scheffenegger 	if (sc->sc_flags & SCF_ECN_MASK) {
4433f169c54SRichard Scheffenegger 		switch (sc->sc_flags & SCF_ECN_MASK) {
444f7220c48SRichard Scheffenegger 		case SCF_ECN:
445f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
446f7220c48SRichard Scheffenegger 			break;
4474012ef77SRichard Scheffenegger 		case SCF_ACE_N:
4484012ef77SRichard Scheffenegger 			/* Fallthrough */
4494012ef77SRichard Scheffenegger 		case SCF_ACE_0:
4504012ef77SRichard Scheffenegger 			/* Fallthrough */
4514012ef77SRichard Scheffenegger 		case SCF_ACE_1:
4524012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
4534012ef77SRichard Scheffenegger 			tp->t_scep = 5;
4544012ef77SRichard Scheffenegger 			tp->t_rcep = 5;
4554012ef77SRichard Scheffenegger 			break;
4564012ef77SRichard Scheffenegger 		case SCF_ACE_CE:
4574012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
4584012ef77SRichard Scheffenegger 			tp->t_scep = 6;
4594012ef77SRichard Scheffenegger 			tp->t_rcep = 6;
4604012ef77SRichard Scheffenegger 			break;
461f7220c48SRichard Scheffenegger 		/* undefined SCF codepoint */
462f7220c48SRichard Scheffenegger 		default:
463f7220c48SRichard Scheffenegger 			break;
464f7220c48SRichard Scheffenegger 		}
465f7220c48SRichard Scheffenegger 	}
466f7220c48SRichard Scheffenegger }
467f7220c48SRichard Scheffenegger 
468f7220c48SRichard Scheffenegger /*
469f7220c48SRichard Scheffenegger  * Process a <SYN> packets ECN information, and provide the
470f7220c48SRichard Scheffenegger  * syncache with the relevant information.
471f7220c48SRichard Scheffenegger  */
472f7220c48SRichard Scheffenegger int
473f7220c48SRichard Scheffenegger tcp_ecn_syncache_add(uint16_t thflags, int iptos)
474f7220c48SRichard Scheffenegger {
475f7220c48SRichard Scheffenegger 	int scflags = 0;
476f7220c48SRichard Scheffenegger 
4774012ef77SRichard Scheffenegger 	switch (thflags & (TH_AE|TH_CWR|TH_ECE)) {
478f7220c48SRichard Scheffenegger 	/* no ECN */
4794012ef77SRichard Scheffenegger 	case (0|0|0):
480f7220c48SRichard Scheffenegger 		break;
481f7220c48SRichard Scheffenegger 	/* legacy ECN */
4824012ef77SRichard Scheffenegger 	case (0|TH_CWR|TH_ECE):
483f7220c48SRichard Scheffenegger 		scflags = SCF_ECN;
484f7220c48SRichard Scheffenegger 		break;
4854012ef77SRichard Scheffenegger 	/* Accurate ECN */
4864012ef77SRichard Scheffenegger 	case (TH_AE|TH_CWR|TH_ECE):
4874012ef77SRichard Scheffenegger 		if ((V_tcp_do_ecn == 3) ||
4884012ef77SRichard Scheffenegger 		    (V_tcp_do_ecn == 4)) {
4894012ef77SRichard Scheffenegger 			switch (iptos & IPTOS_ECN_MASK) {
4904012ef77SRichard Scheffenegger 			case IPTOS_ECN_CE:
4914012ef77SRichard Scheffenegger 				scflags = SCF_ACE_CE;
4924012ef77SRichard Scheffenegger 				break;
4934012ef77SRichard Scheffenegger 			case IPTOS_ECN_ECT0:
4944012ef77SRichard Scheffenegger 				scflags = SCF_ACE_0;
4954012ef77SRichard Scheffenegger 				break;
4964012ef77SRichard Scheffenegger 			case IPTOS_ECN_ECT1:
4974012ef77SRichard Scheffenegger 				scflags = SCF_ACE_1;
4984012ef77SRichard Scheffenegger 				break;
4994012ef77SRichard Scheffenegger 			case IPTOS_ECN_NOTECT:
5004012ef77SRichard Scheffenegger 				scflags = SCF_ACE_N;
5014012ef77SRichard Scheffenegger 				break;
5024012ef77SRichard Scheffenegger 			}
5034012ef77SRichard Scheffenegger 		} else
5044012ef77SRichard Scheffenegger 			scflags = SCF_ECN;
5054012ef77SRichard Scheffenegger 		break;
5064012ef77SRichard Scheffenegger 	/* Default Case (section 3.1.2) */
507f7220c48SRichard Scheffenegger 	default:
5084012ef77SRichard Scheffenegger 		if ((V_tcp_do_ecn == 3) ||
5094012ef77SRichard Scheffenegger 		    (V_tcp_do_ecn == 4)) {
5104012ef77SRichard Scheffenegger 			switch (iptos & IPTOS_ECN_MASK) {
5114012ef77SRichard Scheffenegger 			case IPTOS_ECN_CE:
5124012ef77SRichard Scheffenegger 				scflags = SCF_ACE_CE;
5134012ef77SRichard Scheffenegger 				break;
5144012ef77SRichard Scheffenegger 			case IPTOS_ECN_ECT0:
5154012ef77SRichard Scheffenegger 				scflags = SCF_ACE_0;
5164012ef77SRichard Scheffenegger 				break;
5174012ef77SRichard Scheffenegger 			case IPTOS_ECN_ECT1:
5184012ef77SRichard Scheffenegger 				scflags = SCF_ACE_1;
5194012ef77SRichard Scheffenegger 				break;
5204012ef77SRichard Scheffenegger 			case IPTOS_ECN_NOTECT:
5214012ef77SRichard Scheffenegger 				scflags = SCF_ACE_N;
5224012ef77SRichard Scheffenegger 				break;
5234012ef77SRichard Scheffenegger 			}
5244012ef77SRichard Scheffenegger 		}
525f7220c48SRichard Scheffenegger 		break;
526f7220c48SRichard Scheffenegger 	}
527f7220c48SRichard Scheffenegger 	return scflags;
528f7220c48SRichard Scheffenegger }
529f7220c48SRichard Scheffenegger 
530f7220c48SRichard Scheffenegger /*
531f7220c48SRichard Scheffenegger  * Set up the ECN information for the <SYN,ACK> from
532f7220c48SRichard Scheffenegger  * syncache information.
533f7220c48SRichard Scheffenegger  */
534f7220c48SRichard Scheffenegger uint16_t
535f7220c48SRichard Scheffenegger tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc)
536f7220c48SRichard Scheffenegger {
537f7220c48SRichard Scheffenegger 	if ((thflags & TH_SYN) &&
5383f169c54SRichard Scheffenegger 	    (sc->sc_flags & SCF_ECN_MASK)) {
5393f169c54SRichard Scheffenegger 		switch (sc->sc_flags & SCF_ECN_MASK) {
540f7220c48SRichard Scheffenegger 		case SCF_ECN:
5414012ef77SRichard Scheffenegger 			thflags |= (0 | 0 | TH_ECE);
5421790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
543f7220c48SRichard Scheffenegger 			break;
5444012ef77SRichard Scheffenegger 		case SCF_ACE_N:
5454012ef77SRichard Scheffenegger 			thflags |= (0 | TH_CWR | 0);
5464012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
5474012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_nect);
5484012ef77SRichard Scheffenegger 			break;
5494012ef77SRichard Scheffenegger 		case SCF_ACE_0:
5504012ef77SRichard Scheffenegger 			thflags |= (TH_AE | 0 | 0);
5514012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
5524012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_ect0);
5534012ef77SRichard Scheffenegger 			break;
5544012ef77SRichard Scheffenegger 		case SCF_ACE_1:
5554012ef77SRichard Scheffenegger 			thflags |= (0 | TH_ECE | TH_CWR);
5564012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
5574012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_ect1);
5584012ef77SRichard Scheffenegger 			break;
5594012ef77SRichard Scheffenegger 		case SCF_ACE_CE:
5604012ef77SRichard Scheffenegger 			thflags |= (TH_AE | TH_CWR | 0);
5614012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
5624012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_ce);
5634012ef77SRichard Scheffenegger 			break;
564f7220c48SRichard Scheffenegger 		/* undefined SCF codepoint */
565f7220c48SRichard Scheffenegger 		default:
566f7220c48SRichard Scheffenegger 			break;
567f7220c48SRichard Scheffenegger 		}
568f7220c48SRichard Scheffenegger 	}
569f7220c48SRichard Scheffenegger 	return thflags;
570f7220c48SRichard Scheffenegger }
5714012ef77SRichard Scheffenegger 
5724012ef77SRichard Scheffenegger int
5734012ef77SRichard Scheffenegger tcp_ecn_get_ace(uint16_t thflags)
5744012ef77SRichard Scheffenegger {
5754012ef77SRichard Scheffenegger 	int ace = 0;
5764012ef77SRichard Scheffenegger 
5774012ef77SRichard Scheffenegger 	if (thflags & TH_ECE)
5784012ef77SRichard Scheffenegger 		ace += 1;
5794012ef77SRichard Scheffenegger 	if (thflags & TH_CWR)
5804012ef77SRichard Scheffenegger 		ace += 2;
5814012ef77SRichard Scheffenegger 	if (thflags & TH_AE)
5824012ef77SRichard Scheffenegger 		ace += 4;
5834012ef77SRichard Scheffenegger 	return ace;
5844012ef77SRichard Scheffenegger }
585