xref: /freebsd/sys/netinet/tcp_ecn.c (revision 7ea8d02798055068682e80d8791556d3f81bda82)
1f7220c48SRichard Scheffenegger /*-
2f7220c48SRichard Scheffenegger  * SPDX-License-Identifier: BSD-3-Clause
3f7220c48SRichard Scheffenegger  *
4f7220c48SRichard Scheffenegger  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
5f7220c48SRichard Scheffenegger  *      The Regents of the University of California.  All rights reserved.
6f7220c48SRichard Scheffenegger  * Copyright (c) 2007-2008,2010
7f7220c48SRichard Scheffenegger  *      Swinburne University of Technology, Melbourne, Australia.
8f7220c48SRichard Scheffenegger  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
9f7220c48SRichard Scheffenegger  * Copyright (c) 2010 The FreeBSD Foundation
10f7220c48SRichard Scheffenegger  * Copyright (c) 2010-2011 Juniper Networks, Inc.
11f7220c48SRichard Scheffenegger  * Copyright (c) 2019 Richard Scheffenegger <srichard@netapp.com>
12f7220c48SRichard Scheffenegger  * All rights reserved.
13f7220c48SRichard Scheffenegger  *
14f7220c48SRichard Scheffenegger  * Portions of this software were developed at the Centre for Advanced Internet
15f7220c48SRichard Scheffenegger  * Architectures, Swinburne University of Technology, by Lawrence Stewart,
16f7220c48SRichard Scheffenegger  * James Healy and David Hayes, made possible in part by a grant from the Cisco
17f7220c48SRichard Scheffenegger  * University Research Program Fund at Community Foundation Silicon Valley.
18f7220c48SRichard Scheffenegger  *
19f7220c48SRichard Scheffenegger  * Portions of this software were developed at the Centre for Advanced
20f7220c48SRichard Scheffenegger  * Internet Architectures, Swinburne University of Technology, Melbourne,
21f7220c48SRichard Scheffenegger  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
22f7220c48SRichard Scheffenegger  *
23f7220c48SRichard Scheffenegger  * Portions of this software were developed by Robert N. M. Watson under
24f7220c48SRichard Scheffenegger  * contract to Juniper Networks, Inc.
25f7220c48SRichard Scheffenegger  *
26f7220c48SRichard Scheffenegger  * Redistribution and use in source and binary forms, with or without
27f7220c48SRichard Scheffenegger  * modification, are permitted provided that the following conditions
28f7220c48SRichard Scheffenegger  * are met:
29f7220c48SRichard Scheffenegger  * 1. Redistributions of source code must retain the above copyright
30f7220c48SRichard Scheffenegger  *    notice, this list of conditions and the following disclaimer.
31f7220c48SRichard Scheffenegger  * 2. Redistributions in binary form must reproduce the above copyright
32f7220c48SRichard Scheffenegger  *    notice, this list of conditions and the following disclaimer in the
33f7220c48SRichard Scheffenegger  *    documentation and/or other materials provided with the distribution.
34f7220c48SRichard Scheffenegger  * 3. Neither the name of the University nor the names of its contributors
35f7220c48SRichard Scheffenegger  *    may be used to endorse or promote products derived from this software
36f7220c48SRichard Scheffenegger  *    without specific prior written permission.
37f7220c48SRichard Scheffenegger  *
38f7220c48SRichard Scheffenegger  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
39f7220c48SRichard Scheffenegger  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40f7220c48SRichard Scheffenegger  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41f7220c48SRichard Scheffenegger  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
42f7220c48SRichard Scheffenegger  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
43f7220c48SRichard Scheffenegger  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
44f7220c48SRichard Scheffenegger  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45f7220c48SRichard Scheffenegger  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
46f7220c48SRichard Scheffenegger  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
47f7220c48SRichard Scheffenegger  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48f7220c48SRichard Scheffenegger  * SUCH DAMAGE.
49f7220c48SRichard Scheffenegger  *
50f7220c48SRichard Scheffenegger  *      @(#)tcp_ecn.c 8.12 (Berkeley) 5/24/95
51f7220c48SRichard Scheffenegger  */
52f7220c48SRichard Scheffenegger 
53f7220c48SRichard Scheffenegger /*
54f7220c48SRichard Scheffenegger  * Utility functions to deal with Explicit Congestion Notification in TCP
55f7220c48SRichard Scheffenegger  * implementing the essential parts of the Accurate ECN extension
56f7220c48SRichard Scheffenegger  * https://tools.ietf.org/html/draft-ietf-tcpm-accurate-ecn-09
57f7220c48SRichard Scheffenegger  */
58f7220c48SRichard Scheffenegger 
59f7220c48SRichard Scheffenegger #include <sys/cdefs.h>
60f7220c48SRichard Scheffenegger __FBSDID("$FreeBSD$");
61f7220c48SRichard Scheffenegger 
62f7220c48SRichard Scheffenegger #include "opt_inet.h"
63f7220c48SRichard Scheffenegger #include "opt_inet6.h"
64f7220c48SRichard Scheffenegger 
65f7220c48SRichard Scheffenegger #include <sys/param.h>
66f7220c48SRichard Scheffenegger #include <sys/systm.h>
67f7220c48SRichard Scheffenegger #include <sys/kernel.h>
68f7220c48SRichard Scheffenegger #include <sys/sysctl.h>
69f7220c48SRichard Scheffenegger #include <sys/malloc.h>
70f7220c48SRichard Scheffenegger #include <sys/mbuf.h>
71f7220c48SRichard Scheffenegger #include <sys/socket.h>
72f7220c48SRichard Scheffenegger #include <sys/socketvar.h>
73f7220c48SRichard Scheffenegger 
74f7220c48SRichard Scheffenegger #include <machine/cpu.h>
75f7220c48SRichard Scheffenegger 
76f7220c48SRichard Scheffenegger #include <vm/uma.h>
77f7220c48SRichard Scheffenegger 
78f7220c48SRichard Scheffenegger #include <net/if.h>
79f7220c48SRichard Scheffenegger #include <net/if_var.h>
80f7220c48SRichard Scheffenegger #include <net/route.h>
81f7220c48SRichard Scheffenegger #include <net/vnet.h>
82f7220c48SRichard Scheffenegger 
83f7220c48SRichard Scheffenegger #include <netinet/in.h>
84f7220c48SRichard Scheffenegger #include <netinet/in_systm.h>
85f7220c48SRichard Scheffenegger #include <netinet/ip.h>
86f7220c48SRichard Scheffenegger #include <netinet/in_var.h>
87f7220c48SRichard Scheffenegger #include <netinet/in_pcb.h>
88f7220c48SRichard Scheffenegger #include <netinet/ip_var.h>
89f7220c48SRichard Scheffenegger #include <netinet/ip6.h>
90f7220c48SRichard Scheffenegger #include <netinet/icmp6.h>
91f7220c48SRichard Scheffenegger #include <netinet6/nd6.h>
92f7220c48SRichard Scheffenegger #include <netinet6/ip6_var.h>
93f7220c48SRichard Scheffenegger #include <netinet6/in6_pcb.h>
94f7220c48SRichard Scheffenegger #include <netinet/tcp.h>
95f7220c48SRichard Scheffenegger #include <netinet/tcp_fsm.h>
96f7220c48SRichard Scheffenegger #include <netinet/tcp_seq.h>
97f7220c48SRichard Scheffenegger #include <netinet/tcp_var.h>
98f7220c48SRichard Scheffenegger #include <netinet/tcp_syncache.h>
99f7220c48SRichard Scheffenegger #include <netinet/tcp_timer.h>
100f7220c48SRichard Scheffenegger #include <netinet/tcpip.h>
101f7220c48SRichard Scheffenegger #include <netinet/tcp_ecn.h>
102f7220c48SRichard Scheffenegger 
103004bb636SRichard Scheffenegger static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn,
104004bb636SRichard Scheffenegger     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
105004bb636SRichard Scheffenegger     "TCP ECN");
106004bb636SRichard Scheffenegger 
107004bb636SRichard Scheffenegger VNET_DEFINE(int, tcp_do_ecn) = 2;
108004bb636SRichard Scheffenegger SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, enable,
109004bb636SRichard Scheffenegger     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_ecn), 0,
110004bb636SRichard Scheffenegger     "TCP ECN support");
111004bb636SRichard Scheffenegger 
112004bb636SRichard Scheffenegger VNET_DEFINE(int, tcp_ecn_maxretries) = 1;
113004bb636SRichard Scheffenegger SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, maxretries,
114004bb636SRichard Scheffenegger     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_ecn_maxretries), 0,
115004bb636SRichard Scheffenegger     "Max retries before giving up on ECN");
116f7220c48SRichard Scheffenegger 
117f7220c48SRichard Scheffenegger /*
118f7220c48SRichard Scheffenegger  * Process incoming SYN,ACK packet
119f7220c48SRichard Scheffenegger  */
120f7220c48SRichard Scheffenegger void
121f7220c48SRichard Scheffenegger tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
122f7220c48SRichard Scheffenegger {
123f7220c48SRichard Scheffenegger 
1244012ef77SRichard Scheffenegger 	if (V_tcp_do_ecn == 0)
1254012ef77SRichard Scheffenegger 		return;
1264012ef77SRichard Scheffenegger 	if ((V_tcp_do_ecn == 1) ||
1274012ef77SRichard Scheffenegger 	    (V_tcp_do_ecn == 2)) {
1284012ef77SRichard Scheffenegger 		/* RFC3168 ECN handling */
1294012ef77SRichard Scheffenegger 		if ((thflags & (TH_CWR | TH_ECE)) == (0 | TH_ECE)) {
130f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
13183c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ACE_PERMIT;
1321790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
133f7220c48SRichard Scheffenegger 		}
1344012ef77SRichard Scheffenegger 	} else
1354012ef77SRichard Scheffenegger 	/* decoding Accurate ECN according to table in section 3.1.1 */
1364012ef77SRichard Scheffenegger 	if ((V_tcp_do_ecn == 3) ||
1374012ef77SRichard Scheffenegger 	    (V_tcp_do_ecn == 4)) {
1384012ef77SRichard Scheffenegger 		/*
1394012ef77SRichard Scheffenegger 		 * on the SYN,ACK, process the AccECN
1404012ef77SRichard Scheffenegger 		 * flags indicating the state the SYN
1414012ef77SRichard Scheffenegger 		 * was delivered.
1424012ef77SRichard Scheffenegger 		 * Reactions to Path ECN mangling can
1434012ef77SRichard Scheffenegger 		 * come here.
1444012ef77SRichard Scheffenegger 		 */
1454012ef77SRichard Scheffenegger 		switch (thflags & (TH_AE | TH_CWR | TH_ECE)) {
1464012ef77SRichard Scheffenegger 		/* RFC3168 SYN */
1474012ef77SRichard Scheffenegger 		case (0|0|TH_ECE):
1484012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
14983c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ACE_PERMIT;
1504012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
1514012ef77SRichard Scheffenegger 			break;
1524012ef77SRichard Scheffenegger 		/* non-ECT SYN */
1534012ef77SRichard Scheffenegger 		case (0|TH_CWR|0):
1544012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
15583c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_PERMIT;
1564012ef77SRichard Scheffenegger 			tp->t_scep = 5;
1574012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
1584012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_nect);
1594012ef77SRichard Scheffenegger 			break;
1604012ef77SRichard Scheffenegger 		/* ECT0 SYN */
1614012ef77SRichard Scheffenegger 		case (TH_AE|0|0):
1624012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
16383c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_PERMIT;
1644012ef77SRichard Scheffenegger 			tp->t_scep = 5;
1654012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
1664012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_ect0);
1674012ef77SRichard Scheffenegger 			break;
1684012ef77SRichard Scheffenegger 		/* ECT1 SYN */
1694012ef77SRichard Scheffenegger 		case (0|TH_CWR|TH_ECE):
1704012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
17183c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_PERMIT;
1724012ef77SRichard Scheffenegger 			tp->t_scep = 5;
1734012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
1744012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_ect1);
1754012ef77SRichard Scheffenegger 			break;
1764012ef77SRichard Scheffenegger 		/* CE SYN */
1774012ef77SRichard Scheffenegger 		case (TH_AE|TH_CWR|0):
1784012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
17983c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_PERMIT;
1804012ef77SRichard Scheffenegger 			tp->t_scep = 6;
1814012ef77SRichard Scheffenegger 			/*
1824012ef77SRichard Scheffenegger 			 * reduce the IW to 2 MSS (to
1834012ef77SRichard Scheffenegger 			 * account for delayed acks) if
1844012ef77SRichard Scheffenegger 			 * the SYN,ACK was CE marked
1854012ef77SRichard Scheffenegger 			 */
1864012ef77SRichard Scheffenegger 			tp->snd_cwnd = 2 * tcp_maxseg(tp);
1874012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
1884012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_nect);
1894012ef77SRichard Scheffenegger 			break;
1904012ef77SRichard Scheffenegger 		default:
19183c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~(TF2_ECN_PERMIT | TF2_ACE_PERMIT);
1924012ef77SRichard Scheffenegger 			break;
1934012ef77SRichard Scheffenegger 		}
1944012ef77SRichard Scheffenegger 		/*
1954012ef77SRichard Scheffenegger 		 * Set the AccECN Codepoints on
1964012ef77SRichard Scheffenegger 		 * the outgoing <ACK> to the ECN
1974012ef77SRichard Scheffenegger 		 * state of the <SYN,ACK>
1984012ef77SRichard Scheffenegger 		 * according to table 3 in the
1994012ef77SRichard Scheffenegger 		 * AccECN draft
2004012ef77SRichard Scheffenegger 		 */
2014012ef77SRichard Scheffenegger 		switch (iptos & IPTOS_ECN_MASK) {
2024012ef77SRichard Scheffenegger 		case (IPTOS_ECN_NOTECT):
2034012ef77SRichard Scheffenegger 			tp->t_rcep = 0b010;
2044012ef77SRichard Scheffenegger 			break;
2054012ef77SRichard Scheffenegger 		case (IPTOS_ECN_ECT0):
2064012ef77SRichard Scheffenegger 			tp->t_rcep = 0b100;
2074012ef77SRichard Scheffenegger 			break;
2084012ef77SRichard Scheffenegger 		case (IPTOS_ECN_ECT1):
2094012ef77SRichard Scheffenegger 			tp->t_rcep = 0b011;
2104012ef77SRichard Scheffenegger 			break;
2114012ef77SRichard Scheffenegger 		case (IPTOS_ECN_CE):
2124012ef77SRichard Scheffenegger 			tp->t_rcep = 0b110;
2134012ef77SRichard Scheffenegger 			break;
2144012ef77SRichard Scheffenegger 		}
2154012ef77SRichard Scheffenegger 	}
216f7220c48SRichard Scheffenegger }
217f7220c48SRichard Scheffenegger 
218f7220c48SRichard Scheffenegger /*
219f7220c48SRichard Scheffenegger  * Handle parallel SYN for ECN
220f7220c48SRichard Scheffenegger  */
221f7220c48SRichard Scheffenegger void
222f7220c48SRichard Scheffenegger tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos)
223f7220c48SRichard Scheffenegger {
224f7220c48SRichard Scheffenegger 	if (thflags & TH_ACK)
225f7220c48SRichard Scheffenegger 		return;
226f7220c48SRichard Scheffenegger 	if (V_tcp_do_ecn == 0)
227f7220c48SRichard Scheffenegger 		return;
2284012ef77SRichard Scheffenegger 	if ((V_tcp_do_ecn == 1) ||
2294012ef77SRichard Scheffenegger 	    (V_tcp_do_ecn == 2)) {
230f7220c48SRichard Scheffenegger 		/* RFC3168 ECN handling */
231f7220c48SRichard Scheffenegger 		if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) {
232f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
23383c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ACE_PERMIT;
234f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_SND_ECE;
2351790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
236f7220c48SRichard Scheffenegger 		}
2374012ef77SRichard Scheffenegger 	} else
2384012ef77SRichard Scheffenegger 	if ((V_tcp_do_ecn == 3) ||
2394012ef77SRichard Scheffenegger 	    (V_tcp_do_ecn == 4)) {
2404012ef77SRichard Scheffenegger 		/* AccECN handling */
2414012ef77SRichard Scheffenegger 		switch (thflags & (TH_AE | TH_CWR | TH_ECE)) {
2424012ef77SRichard Scheffenegger 		default:
2434012ef77SRichard Scheffenegger 		case (0|0|0):
24483c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~(TF2_ECN_PERMIT | TF2_ACE_PERMIT);
2454012ef77SRichard Scheffenegger 			break;
2464012ef77SRichard Scheffenegger 		case (0|TH_CWR|TH_ECE):
2474012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
24883c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ACE_PERMIT;
2494012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_SND_ECE;
2504012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
2514012ef77SRichard Scheffenegger 			break;
2524012ef77SRichard Scheffenegger 		case (TH_AE|TH_CWR|TH_ECE):
2534012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
25483c1ec92SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_PERMIT;
2554012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
2564012ef77SRichard Scheffenegger 			/*
2574012ef77SRichard Scheffenegger 			 * Set the AccECN Codepoints on
2584012ef77SRichard Scheffenegger 			 * the outgoing <ACK> to the ECN
2594012ef77SRichard Scheffenegger 			 * state of the <SYN,ACK>
2604012ef77SRichard Scheffenegger 			 * according to table 3 in the
2614012ef77SRichard Scheffenegger 			 * AccECN draft
2624012ef77SRichard Scheffenegger 			 */
2634012ef77SRichard Scheffenegger 			switch (iptos & IPTOS_ECN_MASK) {
2644012ef77SRichard Scheffenegger 			case (IPTOS_ECN_NOTECT):
2654012ef77SRichard Scheffenegger 				tp->t_rcep = 0b010;
2664012ef77SRichard Scheffenegger 				break;
2674012ef77SRichard Scheffenegger 			case (IPTOS_ECN_ECT0):
2684012ef77SRichard Scheffenegger 				tp->t_rcep = 0b100;
2694012ef77SRichard Scheffenegger 				break;
2704012ef77SRichard Scheffenegger 			case (IPTOS_ECN_ECT1):
2714012ef77SRichard Scheffenegger 				tp->t_rcep = 0b011;
2724012ef77SRichard Scheffenegger 				break;
2734012ef77SRichard Scheffenegger 			case (IPTOS_ECN_CE):
2744012ef77SRichard Scheffenegger 				tp->t_rcep = 0b110;
2754012ef77SRichard Scheffenegger 				break;
2764012ef77SRichard Scheffenegger 			}
2774012ef77SRichard Scheffenegger 			break;
2784012ef77SRichard Scheffenegger 		}
279f7220c48SRichard Scheffenegger 	}
280f7220c48SRichard Scheffenegger }
281f7220c48SRichard Scheffenegger 
282f7220c48SRichard Scheffenegger /*
283f7220c48SRichard Scheffenegger  * TCP ECN processing.
284f7220c48SRichard Scheffenegger  */
285f7220c48SRichard Scheffenegger int
286b1258b76SRichard Scheffenegger tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int tlen, int pkts, int iptos)
287f7220c48SRichard Scheffenegger {
288b1258b76SRichard Scheffenegger 	int delta_cep = 0;
289f7220c48SRichard Scheffenegger 
290f7220c48SRichard Scheffenegger 	switch (iptos & IPTOS_ECN_MASK) {
291f7220c48SRichard Scheffenegger 	case IPTOS_ECN_CE:
2921a70101aSRichard Scheffenegger 		TCPSTAT_INC(tcps_ecn_rcvce);
293f7220c48SRichard Scheffenegger 		break;
294f7220c48SRichard Scheffenegger 	case IPTOS_ECN_ECT0:
2951a70101aSRichard Scheffenegger 		TCPSTAT_INC(tcps_ecn_rcvect0);
296f7220c48SRichard Scheffenegger 		break;
297f7220c48SRichard Scheffenegger 	case IPTOS_ECN_ECT1:
2981a70101aSRichard Scheffenegger 		TCPSTAT_INC(tcps_ecn_rcvect1);
299f7220c48SRichard Scheffenegger 		break;
300f7220c48SRichard Scheffenegger 	}
301f7220c48SRichard Scheffenegger 
3021a70101aSRichard Scheffenegger 	if (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) {
3034012ef77SRichard Scheffenegger 		if (tp->t_flags2 & TF2_ACE_PERMIT) {
3044012ef77SRichard Scheffenegger 			if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
3054012ef77SRichard Scheffenegger 				tp->t_rcep += 1;
3064012ef77SRichard Scheffenegger 			if (tp->t_flags2 & TF2_ECN_PERMIT) {
307b1258b76SRichard Scheffenegger 				delta_cep = (tcp_ecn_get_ace(thflags) + 8 -
308b1258b76SRichard Scheffenegger 					    (tp->t_scep & 7)) & 7;
309b1258b76SRichard Scheffenegger 				if (delta_cep < pkts)
310b1258b76SRichard Scheffenegger 					delta_cep = pkts -
311b1258b76SRichard Scheffenegger 					    ((pkts - delta_cep) & 7);
312b1258b76SRichard Scheffenegger 				tp->t_scep += delta_cep;
3134012ef77SRichard Scheffenegger 			} else {
3144012ef77SRichard Scheffenegger 				/*
3154012ef77SRichard Scheffenegger 				 * process the final ACK of the 3WHS
3164012ef77SRichard Scheffenegger 				 * see table 3 in draft-ietf-tcpm-accurate-ecn
3174012ef77SRichard Scheffenegger 				 */
3184012ef77SRichard Scheffenegger 				switch (tcp_ecn_get_ace(thflags)) {
3194012ef77SRichard Scheffenegger 				case 0b010:
3204012ef77SRichard Scheffenegger 					/* nonECT SYN or SYN,ACK */
321*7ea8d027SRichard Scheffenegger 					/* FALLTHROUGH */
3224012ef77SRichard Scheffenegger 				case 0b011:
3234012ef77SRichard Scheffenegger 					/* ECT1 SYN or SYN,ACK */
324*7ea8d027SRichard Scheffenegger 					/* FALLTHROUGH */
3254012ef77SRichard Scheffenegger 				case 0b100:
3264012ef77SRichard Scheffenegger 					/* ECT0 SYN or SYN,ACK */
3274012ef77SRichard Scheffenegger 					tp->t_scep = 5;
3284012ef77SRichard Scheffenegger 					break;
3294012ef77SRichard Scheffenegger 				case 0b110:
3304012ef77SRichard Scheffenegger 					/* CE SYN or SYN,ACK */
3314012ef77SRichard Scheffenegger 					tp->t_scep = 6;
3324012ef77SRichard Scheffenegger 					tp->snd_cwnd = 2 * tcp_maxseg(tp);
3334012ef77SRichard Scheffenegger 					break;
3344012ef77SRichard Scheffenegger 				default:
3354012ef77SRichard Scheffenegger 					/* mangled AccECN handshake */
3364012ef77SRichard Scheffenegger 					tp->t_scep = 5;
3374012ef77SRichard Scheffenegger 					break;
3384012ef77SRichard Scheffenegger 				}
3394012ef77SRichard Scheffenegger 				tp->t_flags2 |= TF2_ECN_PERMIT;
3404012ef77SRichard Scheffenegger 			}
3414012ef77SRichard Scheffenegger 		} else {
342f7220c48SRichard Scheffenegger 			/* RFC3168 ECN handling */
34322c81cc5SRichard Scheffenegger 			if ((thflags & (TH_SYN | TH_ECE)) == TH_ECE) {
344b1258b76SRichard Scheffenegger 				delta_cep = 1;
34522c81cc5SRichard Scheffenegger 				tp->t_scep++;
34622c81cc5SRichard Scheffenegger 			}
347f7220c48SRichard Scheffenegger 			if (thflags & TH_CWR) {
348f7220c48SRichard Scheffenegger 				tp->t_flags2 &= ~TF2_ECN_SND_ECE;
349f7220c48SRichard Scheffenegger 				tp->t_flags |= TF_ACKNOW;
350f7220c48SRichard Scheffenegger 			}
351f7220c48SRichard Scheffenegger 			if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
352f7220c48SRichard Scheffenegger 				tp->t_flags2 |= TF2_ECN_SND_ECE;
3534012ef77SRichard Scheffenegger 		}
354f7220c48SRichard Scheffenegger 
355f7220c48SRichard Scheffenegger 		/* Process a packet differently from RFC3168. */
356f7220c48SRichard Scheffenegger 		cc_ecnpkt_handler_flags(tp, thflags, iptos);
357f7220c48SRichard Scheffenegger 	}
358f7220c48SRichard Scheffenegger 
359b1258b76SRichard Scheffenegger 	return delta_cep;
360f7220c48SRichard Scheffenegger }
361f7220c48SRichard Scheffenegger 
362f7220c48SRichard Scheffenegger /*
363f7220c48SRichard Scheffenegger  * Send ECN setup <SYN> packet header flags
364f7220c48SRichard Scheffenegger  */
365f7220c48SRichard Scheffenegger uint16_t
366f7220c48SRichard Scheffenegger tcp_ecn_output_syn_sent(struct tcpcb *tp)
367f7220c48SRichard Scheffenegger {
368f7220c48SRichard Scheffenegger 	uint16_t thflags = 0;
369f7220c48SRichard Scheffenegger 
3704012ef77SRichard Scheffenegger 	if (V_tcp_do_ecn == 0)
3714012ef77SRichard Scheffenegger 		return thflags;
372f7220c48SRichard Scheffenegger 	if (V_tcp_do_ecn == 1) {
373f7220c48SRichard Scheffenegger 		/* Send a RFC3168 ECN setup <SYN> packet */
374f7220c48SRichard Scheffenegger 		if (tp->t_rxtshift >= 1) {
375f7220c48SRichard Scheffenegger 			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
376f7220c48SRichard Scheffenegger 				thflags = TH_ECE|TH_CWR;
377f7220c48SRichard Scheffenegger 		} else
378f7220c48SRichard Scheffenegger 			thflags = TH_ECE|TH_CWR;
3794012ef77SRichard Scheffenegger 	} else
3804012ef77SRichard Scheffenegger 	if (V_tcp_do_ecn == 3) {
3814012ef77SRichard Scheffenegger 		/* Send an Accurate ECN setup <SYN> packet */
3824012ef77SRichard Scheffenegger 		if (tp->t_rxtshift >= 1) {
3834012ef77SRichard Scheffenegger 			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
3844012ef77SRichard Scheffenegger 				thflags = TH_ECE|TH_CWR|TH_AE;
3854012ef77SRichard Scheffenegger 		} else
3864012ef77SRichard Scheffenegger 			thflags = TH_ECE|TH_CWR|TH_AE;
387f7220c48SRichard Scheffenegger 	}
388f7220c48SRichard Scheffenegger 
389f7220c48SRichard Scheffenegger 	return thflags;
390f7220c48SRichard Scheffenegger }
391f7220c48SRichard Scheffenegger 
392f7220c48SRichard Scheffenegger /*
393f7220c48SRichard Scheffenegger  * output processing of ECN feature
394f7220c48SRichard Scheffenegger  * returning IP ECN header codepoint
395f7220c48SRichard Scheffenegger  */
396f7220c48SRichard Scheffenegger int
3972ff07d92SRichard Scheffenegger tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len, bool rxmit)
398f7220c48SRichard Scheffenegger {
399f7220c48SRichard Scheffenegger 	int ipecn = IPTOS_ECN_NOTECT;
400f7220c48SRichard Scheffenegger 	bool newdata;
401f7220c48SRichard Scheffenegger 
402f7220c48SRichard Scheffenegger 	/*
403f7220c48SRichard Scheffenegger 	 * If the peer has ECN, mark data packets with
404f7220c48SRichard Scheffenegger 	 * ECN capable transmission (ECT).
405f7220c48SRichard Scheffenegger 	 * Ignore pure control packets, retransmissions
406f7220c48SRichard Scheffenegger 	 * and window probes.
407f7220c48SRichard Scheffenegger 	 */
408f7220c48SRichard Scheffenegger 	newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
4092ff07d92SRichard Scheffenegger 		    !rxmit &&
410f7220c48SRichard Scheffenegger 		    !((tp->t_flags & TF_FORCEDATA) && len == 1));
4114012ef77SRichard Scheffenegger 	/* RFC3168 ECN marking, only new data segments */
412f7220c48SRichard Scheffenegger 	if (newdata) {
413dc9daa04SRichard Scheffenegger 		if (tp->t_flags2 & TF2_ECN_USE_ECT1) {
414dc9daa04SRichard Scheffenegger 			ipecn = IPTOS_ECN_ECT1;
4151a70101aSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_sndect1);
416dc9daa04SRichard Scheffenegger 		} else {
417f7220c48SRichard Scheffenegger 			ipecn = IPTOS_ECN_ECT0;
4181a70101aSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_sndect0);
419f7220c48SRichard Scheffenegger 		}
420dc9daa04SRichard Scheffenegger 	}
421f7220c48SRichard Scheffenegger 	/*
422f7220c48SRichard Scheffenegger 	 * Reply with proper ECN notifications.
423f7220c48SRichard Scheffenegger 	 */
4244012ef77SRichard Scheffenegger 	if (tp->t_flags2 & TF2_ACE_PERMIT) {
4254012ef77SRichard Scheffenegger 		*thflags &= ~(TH_AE|TH_CWR|TH_ECE);
4264012ef77SRichard Scheffenegger 		if (tp->t_rcep & 0x01)
4274012ef77SRichard Scheffenegger 			*thflags |= TH_ECE;
4284012ef77SRichard Scheffenegger 		if (tp->t_rcep & 0x02)
4294012ef77SRichard Scheffenegger 			*thflags |= TH_CWR;
4304012ef77SRichard Scheffenegger 		if (tp->t_rcep & 0x04)
4314012ef77SRichard Scheffenegger 			*thflags |= TH_AE;
4324012ef77SRichard Scheffenegger 		if (!(tp->t_flags2 & TF2_ECN_PERMIT)) {
4334012ef77SRichard Scheffenegger 			/*
4344012ef77SRichard Scheffenegger 			 * here we process the final
4354012ef77SRichard Scheffenegger 			 * ACK of the 3WHS
4364012ef77SRichard Scheffenegger 			 */
4374012ef77SRichard Scheffenegger 			if (tp->t_rcep == 0b110) {
4384012ef77SRichard Scheffenegger 				tp->t_rcep = 6;
4394012ef77SRichard Scheffenegger 			} else {
4404012ef77SRichard Scheffenegger 				tp->t_rcep = 5;
4414012ef77SRichard Scheffenegger 			}
4424012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
4434012ef77SRichard Scheffenegger 		}
4444012ef77SRichard Scheffenegger 	} else {
445f7220c48SRichard Scheffenegger 		if (newdata &&
446f7220c48SRichard Scheffenegger 		    (tp->t_flags2 & TF2_ECN_SND_CWR)) {
447f7220c48SRichard Scheffenegger 			*thflags |= TH_CWR;
448f7220c48SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_SND_CWR;
449f7220c48SRichard Scheffenegger 		}
450f7220c48SRichard Scheffenegger 		if (tp->t_flags2 & TF2_ECN_SND_ECE)
451f7220c48SRichard Scheffenegger 			*thflags |= TH_ECE;
4524012ef77SRichard Scheffenegger 	}
453f7220c48SRichard Scheffenegger 
454f7220c48SRichard Scheffenegger 	return ipecn;
455f7220c48SRichard Scheffenegger }
456f7220c48SRichard Scheffenegger 
457f7220c48SRichard Scheffenegger /*
458f7220c48SRichard Scheffenegger  * Set up the ECN related tcpcb fields from
459f7220c48SRichard Scheffenegger  * a syncache entry
460f7220c48SRichard Scheffenegger  */
461f7220c48SRichard Scheffenegger void
462f7220c48SRichard Scheffenegger tcp_ecn_syncache_socket(struct tcpcb *tp, struct syncache *sc)
463f7220c48SRichard Scheffenegger {
4643f169c54SRichard Scheffenegger 	if (sc->sc_flags & SCF_ECN_MASK) {
4653f169c54SRichard Scheffenegger 		switch (sc->sc_flags & SCF_ECN_MASK) {
466f7220c48SRichard Scheffenegger 		case SCF_ECN:
467f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
468f7220c48SRichard Scheffenegger 			break;
4694012ef77SRichard Scheffenegger 		case SCF_ACE_N:
470*7ea8d027SRichard Scheffenegger 			/* FALLTHROUGH */
4714012ef77SRichard Scheffenegger 		case SCF_ACE_0:
472*7ea8d027SRichard Scheffenegger 			/* FALLTHROUGH */
4734012ef77SRichard Scheffenegger 		case SCF_ACE_1:
4744012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
4754012ef77SRichard Scheffenegger 			tp->t_scep = 5;
4764012ef77SRichard Scheffenegger 			tp->t_rcep = 5;
4774012ef77SRichard Scheffenegger 			break;
4784012ef77SRichard Scheffenegger 		case SCF_ACE_CE:
4794012ef77SRichard Scheffenegger 			tp->t_flags2 |= TF2_ACE_PERMIT;
4804012ef77SRichard Scheffenegger 			tp->t_scep = 6;
4814012ef77SRichard Scheffenegger 			tp->t_rcep = 6;
4824012ef77SRichard Scheffenegger 			break;
483f7220c48SRichard Scheffenegger 		/* undefined SCF codepoint */
484f7220c48SRichard Scheffenegger 		default:
485f7220c48SRichard Scheffenegger 			break;
486f7220c48SRichard Scheffenegger 		}
487f7220c48SRichard Scheffenegger 	}
488f7220c48SRichard Scheffenegger }
489f7220c48SRichard Scheffenegger 
490f7220c48SRichard Scheffenegger /*
491f7220c48SRichard Scheffenegger  * Process a <SYN> packets ECN information, and provide the
492f7220c48SRichard Scheffenegger  * syncache with the relevant information.
493f7220c48SRichard Scheffenegger  */
494f7220c48SRichard Scheffenegger int
495f7220c48SRichard Scheffenegger tcp_ecn_syncache_add(uint16_t thflags, int iptos)
496f7220c48SRichard Scheffenegger {
497f7220c48SRichard Scheffenegger 	int scflags = 0;
498f7220c48SRichard Scheffenegger 
4991a70101aSRichard Scheffenegger 	switch (iptos & IPTOS_ECN_MASK) {
5001a70101aSRichard Scheffenegger 	case IPTOS_ECN_CE:
5011a70101aSRichard Scheffenegger 		TCPSTAT_INC(tcps_ecn_rcvce);
5021a70101aSRichard Scheffenegger 		break;
5031a70101aSRichard Scheffenegger 	case IPTOS_ECN_ECT0:
5041a70101aSRichard Scheffenegger 		TCPSTAT_INC(tcps_ecn_rcvect0);
5051a70101aSRichard Scheffenegger 		break;
5061a70101aSRichard Scheffenegger 	case IPTOS_ECN_ECT1:
5071a70101aSRichard Scheffenegger 		TCPSTAT_INC(tcps_ecn_rcvect1);
5081a70101aSRichard Scheffenegger 		break;
5091a70101aSRichard Scheffenegger 	}
5101a70101aSRichard Scheffenegger 
5114012ef77SRichard Scheffenegger 	switch (thflags & (TH_AE|TH_CWR|TH_ECE)) {
512f7220c48SRichard Scheffenegger 	/* no ECN */
5134012ef77SRichard Scheffenegger 	case (0|0|0):
514f7220c48SRichard Scheffenegger 		break;
515f7220c48SRichard Scheffenegger 	/* legacy ECN */
5164012ef77SRichard Scheffenegger 	case (0|TH_CWR|TH_ECE):
517f7220c48SRichard Scheffenegger 		scflags = SCF_ECN;
518f7220c48SRichard Scheffenegger 		break;
5194012ef77SRichard Scheffenegger 	/* Accurate ECN */
5204012ef77SRichard Scheffenegger 	case (TH_AE|TH_CWR|TH_ECE):
5214012ef77SRichard Scheffenegger 		if ((V_tcp_do_ecn == 3) ||
5224012ef77SRichard Scheffenegger 		    (V_tcp_do_ecn == 4)) {
5234012ef77SRichard Scheffenegger 			switch (iptos & IPTOS_ECN_MASK) {
5244012ef77SRichard Scheffenegger 			case IPTOS_ECN_CE:
5254012ef77SRichard Scheffenegger 				scflags = SCF_ACE_CE;
5264012ef77SRichard Scheffenegger 				break;
5274012ef77SRichard Scheffenegger 			case IPTOS_ECN_ECT0:
5284012ef77SRichard Scheffenegger 				scflags = SCF_ACE_0;
5294012ef77SRichard Scheffenegger 				break;
5304012ef77SRichard Scheffenegger 			case IPTOS_ECN_ECT1:
5314012ef77SRichard Scheffenegger 				scflags = SCF_ACE_1;
5324012ef77SRichard Scheffenegger 				break;
5334012ef77SRichard Scheffenegger 			case IPTOS_ECN_NOTECT:
5344012ef77SRichard Scheffenegger 				scflags = SCF_ACE_N;
5354012ef77SRichard Scheffenegger 				break;
5364012ef77SRichard Scheffenegger 			}
5374012ef77SRichard Scheffenegger 		} else
5384012ef77SRichard Scheffenegger 			scflags = SCF_ECN;
5394012ef77SRichard Scheffenegger 		break;
5404012ef77SRichard Scheffenegger 	/* Default Case (section 3.1.2) */
541f7220c48SRichard Scheffenegger 	default:
5424012ef77SRichard Scheffenegger 		if ((V_tcp_do_ecn == 3) ||
5434012ef77SRichard Scheffenegger 		    (V_tcp_do_ecn == 4)) {
5444012ef77SRichard Scheffenegger 			switch (iptos & IPTOS_ECN_MASK) {
5454012ef77SRichard Scheffenegger 			case IPTOS_ECN_CE:
5464012ef77SRichard Scheffenegger 				scflags = SCF_ACE_CE;
5474012ef77SRichard Scheffenegger 				break;
5484012ef77SRichard Scheffenegger 			case IPTOS_ECN_ECT0:
5494012ef77SRichard Scheffenegger 				scflags = SCF_ACE_0;
5504012ef77SRichard Scheffenegger 				break;
5514012ef77SRichard Scheffenegger 			case IPTOS_ECN_ECT1:
5524012ef77SRichard Scheffenegger 				scflags = SCF_ACE_1;
5534012ef77SRichard Scheffenegger 				break;
5544012ef77SRichard Scheffenegger 			case IPTOS_ECN_NOTECT:
5554012ef77SRichard Scheffenegger 				scflags = SCF_ACE_N;
5564012ef77SRichard Scheffenegger 				break;
5574012ef77SRichard Scheffenegger 			}
5584012ef77SRichard Scheffenegger 		}
559f7220c48SRichard Scheffenegger 		break;
560f7220c48SRichard Scheffenegger 	}
561f7220c48SRichard Scheffenegger 	return scflags;
562f7220c48SRichard Scheffenegger }
563f7220c48SRichard Scheffenegger 
564f7220c48SRichard Scheffenegger /*
565f7220c48SRichard Scheffenegger  * Set up the ECN information for the <SYN,ACK> from
566f7220c48SRichard Scheffenegger  * syncache information.
567f7220c48SRichard Scheffenegger  */
568f7220c48SRichard Scheffenegger uint16_t
569f7220c48SRichard Scheffenegger tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc)
570f7220c48SRichard Scheffenegger {
571f7220c48SRichard Scheffenegger 	if ((thflags & TH_SYN) &&
5723f169c54SRichard Scheffenegger 	    (sc->sc_flags & SCF_ECN_MASK)) {
5733f169c54SRichard Scheffenegger 		switch (sc->sc_flags & SCF_ECN_MASK) {
574f7220c48SRichard Scheffenegger 		case SCF_ECN:
5754012ef77SRichard Scheffenegger 			thflags |= (0 | 0 | TH_ECE);
5761790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
577f7220c48SRichard Scheffenegger 			break;
5784012ef77SRichard Scheffenegger 		case SCF_ACE_N:
5794012ef77SRichard Scheffenegger 			thflags |= (0 | TH_CWR | 0);
5804012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
5814012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_nect);
5824012ef77SRichard Scheffenegger 			break;
5834012ef77SRichard Scheffenegger 		case SCF_ACE_0:
5844012ef77SRichard Scheffenegger 			thflags |= (TH_AE | 0 | 0);
5854012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
5864012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_ect0);
5874012ef77SRichard Scheffenegger 			break;
5884012ef77SRichard Scheffenegger 		case SCF_ACE_1:
5894012ef77SRichard Scheffenegger 			thflags |= (0 | TH_ECE | TH_CWR);
5904012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
5914012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_ect1);
5924012ef77SRichard Scheffenegger 			break;
5934012ef77SRichard Scheffenegger 		case SCF_ACE_CE:
5944012ef77SRichard Scheffenegger 			thflags |= (TH_AE | TH_CWR | 0);
5954012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
5964012ef77SRichard Scheffenegger 			TCPSTAT_INC(tcps_ace_ce);
5974012ef77SRichard Scheffenegger 			break;
598f7220c48SRichard Scheffenegger 		/* undefined SCF codepoint */
599f7220c48SRichard Scheffenegger 		default:
600f7220c48SRichard Scheffenegger 			break;
601f7220c48SRichard Scheffenegger 		}
602f7220c48SRichard Scheffenegger 	}
603f7220c48SRichard Scheffenegger 	return thflags;
604f7220c48SRichard Scheffenegger }
6054012ef77SRichard Scheffenegger 
6064012ef77SRichard Scheffenegger int
6074012ef77SRichard Scheffenegger tcp_ecn_get_ace(uint16_t thflags)
6084012ef77SRichard Scheffenegger {
6094012ef77SRichard Scheffenegger 	int ace = 0;
6104012ef77SRichard Scheffenegger 
6114012ef77SRichard Scheffenegger 	if (thflags & TH_ECE)
6124012ef77SRichard Scheffenegger 		ace += 1;
6134012ef77SRichard Scheffenegger 	if (thflags & TH_CWR)
6144012ef77SRichard Scheffenegger 		ace += 2;
6154012ef77SRichard Scheffenegger 	if (thflags & TH_AE)
6164012ef77SRichard Scheffenegger 		ace += 4;
6174012ef77SRichard Scheffenegger 	return ace;
6184012ef77SRichard Scheffenegger }
619