xref: /freebsd/sys/netinet/tcp_ecn.c (revision 2ff07d9220a0af1ffe31849bfd66139557a9aa27)
1f7220c48SRichard Scheffenegger /*-
2f7220c48SRichard Scheffenegger  * SPDX-License-Identifier: BSD-3-Clause
3f7220c48SRichard Scheffenegger  *
4f7220c48SRichard Scheffenegger  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
5f7220c48SRichard Scheffenegger  *      The Regents of the University of California.  All rights reserved.
6f7220c48SRichard Scheffenegger  * Copyright (c) 2007-2008,2010
7f7220c48SRichard Scheffenegger  *      Swinburne University of Technology, Melbourne, Australia.
8f7220c48SRichard Scheffenegger  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
9f7220c48SRichard Scheffenegger  * Copyright (c) 2010 The FreeBSD Foundation
10f7220c48SRichard Scheffenegger  * Copyright (c) 2010-2011 Juniper Networks, Inc.
11f7220c48SRichard Scheffenegger  * Copyright (c) 2019 Richard Scheffenegger <srichard@netapp.com>
12f7220c48SRichard Scheffenegger  * All rights reserved.
13f7220c48SRichard Scheffenegger  *
14f7220c48SRichard Scheffenegger  * Portions of this software were developed at the Centre for Advanced Internet
15f7220c48SRichard Scheffenegger  * Architectures, Swinburne University of Technology, by Lawrence Stewart,
16f7220c48SRichard Scheffenegger  * James Healy and David Hayes, made possible in part by a grant from the Cisco
17f7220c48SRichard Scheffenegger  * University Research Program Fund at Community Foundation Silicon Valley.
18f7220c48SRichard Scheffenegger  *
19f7220c48SRichard Scheffenegger  * Portions of this software were developed at the Centre for Advanced
20f7220c48SRichard Scheffenegger  * Internet Architectures, Swinburne University of Technology, Melbourne,
21f7220c48SRichard Scheffenegger  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
22f7220c48SRichard Scheffenegger  *
23f7220c48SRichard Scheffenegger  * Portions of this software were developed by Robert N. M. Watson under
24f7220c48SRichard Scheffenegger  * contract to Juniper Networks, Inc.
25f7220c48SRichard Scheffenegger  *
26f7220c48SRichard Scheffenegger  * Redistribution and use in source and binary forms, with or without
27f7220c48SRichard Scheffenegger  * modification, are permitted provided that the following conditions
28f7220c48SRichard Scheffenegger  * are met:
29f7220c48SRichard Scheffenegger  * 1. Redistributions of source code must retain the above copyright
30f7220c48SRichard Scheffenegger  *    notice, this list of conditions and the following disclaimer.
31f7220c48SRichard Scheffenegger  * 2. Redistributions in binary form must reproduce the above copyright
32f7220c48SRichard Scheffenegger  *    notice, this list of conditions and the following disclaimer in the
33f7220c48SRichard Scheffenegger  *    documentation and/or other materials provided with the distribution.
34f7220c48SRichard Scheffenegger  * 3. Neither the name of the University nor the names of its contributors
35f7220c48SRichard Scheffenegger  *    may be used to endorse or promote products derived from this software
36f7220c48SRichard Scheffenegger  *    without specific prior written permission.
37f7220c48SRichard Scheffenegger  *
38f7220c48SRichard Scheffenegger  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
39f7220c48SRichard Scheffenegger  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40f7220c48SRichard Scheffenegger  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41f7220c48SRichard Scheffenegger  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
42f7220c48SRichard Scheffenegger  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
43f7220c48SRichard Scheffenegger  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
44f7220c48SRichard Scheffenegger  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45f7220c48SRichard Scheffenegger  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
46f7220c48SRichard Scheffenegger  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
47f7220c48SRichard Scheffenegger  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48f7220c48SRichard Scheffenegger  * SUCH DAMAGE.
49f7220c48SRichard Scheffenegger  *
50f7220c48SRichard Scheffenegger  *      @(#)tcp_ecn.c 8.12 (Berkeley) 5/24/95
51f7220c48SRichard Scheffenegger  */
52f7220c48SRichard Scheffenegger 
53f7220c48SRichard Scheffenegger /*
54f7220c48SRichard Scheffenegger  * Utility functions to deal with Explicit Congestion Notification in TCP
55f7220c48SRichard Scheffenegger  * implementing the essential parts of the Accurate ECN extension
56f7220c48SRichard Scheffenegger  * https://tools.ietf.org/html/draft-ietf-tcpm-accurate-ecn-09
57f7220c48SRichard Scheffenegger  */
58f7220c48SRichard Scheffenegger 
59f7220c48SRichard Scheffenegger #include <sys/cdefs.h>
60f7220c48SRichard Scheffenegger __FBSDID("$FreeBSD$");
61f7220c48SRichard Scheffenegger 
62f7220c48SRichard Scheffenegger #include "opt_inet.h"
63f7220c48SRichard Scheffenegger #include "opt_inet6.h"
64f7220c48SRichard Scheffenegger #include "opt_tcpdebug.h"
65f7220c48SRichard Scheffenegger 
66f7220c48SRichard Scheffenegger #include <sys/param.h>
67f7220c48SRichard Scheffenegger #include <sys/systm.h>
68f7220c48SRichard Scheffenegger #include <sys/kernel.h>
69f7220c48SRichard Scheffenegger #include <sys/sysctl.h>
70f7220c48SRichard Scheffenegger #include <sys/malloc.h>
71f7220c48SRichard Scheffenegger #include <sys/mbuf.h>
72f7220c48SRichard Scheffenegger #include <sys/socket.h>
73f7220c48SRichard Scheffenegger #include <sys/socketvar.h>
74f7220c48SRichard Scheffenegger 
75f7220c48SRichard Scheffenegger #include <machine/cpu.h>
76f7220c48SRichard Scheffenegger 
77f7220c48SRichard Scheffenegger #include <vm/uma.h>
78f7220c48SRichard Scheffenegger 
79f7220c48SRichard Scheffenegger #include <net/if.h>
80f7220c48SRichard Scheffenegger #include <net/if_var.h>
81f7220c48SRichard Scheffenegger #include <net/route.h>
82f7220c48SRichard Scheffenegger #include <net/vnet.h>
83f7220c48SRichard Scheffenegger 
84f7220c48SRichard Scheffenegger #include <netinet/in.h>
85f7220c48SRichard Scheffenegger #include <netinet/in_systm.h>
86f7220c48SRichard Scheffenegger #include <netinet/ip.h>
87f7220c48SRichard Scheffenegger #include <netinet/in_var.h>
88f7220c48SRichard Scheffenegger #include <netinet/in_pcb.h>
89f7220c48SRichard Scheffenegger #include <netinet/ip_var.h>
90f7220c48SRichard Scheffenegger #include <netinet/ip6.h>
91f7220c48SRichard Scheffenegger #include <netinet/icmp6.h>
92f7220c48SRichard Scheffenegger #include <netinet6/nd6.h>
93f7220c48SRichard Scheffenegger #include <netinet6/ip6_var.h>
94f7220c48SRichard Scheffenegger #include <netinet6/in6_pcb.h>
95f7220c48SRichard Scheffenegger #include <netinet/tcp.h>
96f7220c48SRichard Scheffenegger #include <netinet/tcp_fsm.h>
97f7220c48SRichard Scheffenegger #include <netinet/tcp_seq.h>
98f7220c48SRichard Scheffenegger #include <netinet/tcp_var.h>
99f7220c48SRichard Scheffenegger #include <netinet/tcp_syncache.h>
100f7220c48SRichard Scheffenegger #include <netinet/tcp_timer.h>
101f7220c48SRichard Scheffenegger #include <netinet6/tcp6_var.h>
102f7220c48SRichard Scheffenegger #include <netinet/tcpip.h>
103f7220c48SRichard Scheffenegger #include <netinet/tcp_ecn.h>
104f7220c48SRichard Scheffenegger 
105f7220c48SRichard Scheffenegger 
106f7220c48SRichard Scheffenegger /*
107f7220c48SRichard Scheffenegger  * Process incoming SYN,ACK packet
108f7220c48SRichard Scheffenegger  */
109f7220c48SRichard Scheffenegger void
110f7220c48SRichard Scheffenegger tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
111f7220c48SRichard Scheffenegger {
112f7220c48SRichard Scheffenegger 	thflags &= (TH_CWR|TH_ECE);
113f7220c48SRichard Scheffenegger 
114f7220c48SRichard Scheffenegger 	if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
115f7220c48SRichard Scheffenegger 	    V_tcp_do_ecn) {
116f7220c48SRichard Scheffenegger 		tp->t_flags2 |= TF2_ECN_PERMIT;
1171790549dSRichard Scheffenegger 		TCPSTAT_INC(tcps_ecn_shs);
118f7220c48SRichard Scheffenegger 	}
119f7220c48SRichard Scheffenegger }
120f7220c48SRichard Scheffenegger 
121f7220c48SRichard Scheffenegger /*
122f7220c48SRichard Scheffenegger  * Handle parallel SYN for ECN
123f7220c48SRichard Scheffenegger  */
124f7220c48SRichard Scheffenegger void
125f7220c48SRichard Scheffenegger tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos)
126f7220c48SRichard Scheffenegger {
127f7220c48SRichard Scheffenegger 	if (thflags & TH_ACK)
128f7220c48SRichard Scheffenegger 		return;
129f7220c48SRichard Scheffenegger 	if (V_tcp_do_ecn == 0)
130f7220c48SRichard Scheffenegger 		return;
131f7220c48SRichard Scheffenegger 	if ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2)) {
132f7220c48SRichard Scheffenegger 		/* RFC3168 ECN handling */
133f7220c48SRichard Scheffenegger 		if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) {
134f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
135f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_SND_ECE;
1361790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
137f7220c48SRichard Scheffenegger 		}
138f7220c48SRichard Scheffenegger 	}
139f7220c48SRichard Scheffenegger }
140f7220c48SRichard Scheffenegger 
141f7220c48SRichard Scheffenegger /*
142f7220c48SRichard Scheffenegger  * TCP ECN processing.
143f7220c48SRichard Scheffenegger  */
144f7220c48SRichard Scheffenegger int
145f7220c48SRichard Scheffenegger tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int iptos)
146f7220c48SRichard Scheffenegger {
147f7220c48SRichard Scheffenegger 	int delta_ace = 0;
148f7220c48SRichard Scheffenegger 
149f7220c48SRichard Scheffenegger 	if (tp->t_flags2 & TF2_ECN_PERMIT) {
150f7220c48SRichard Scheffenegger 		switch (iptos & IPTOS_ECN_MASK) {
151f7220c48SRichard Scheffenegger 		case IPTOS_ECN_CE:
1521790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_ce);
153f7220c48SRichard Scheffenegger 			break;
154f7220c48SRichard Scheffenegger 		case IPTOS_ECN_ECT0:
1551790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_ect0);
156f7220c48SRichard Scheffenegger 			break;
157f7220c48SRichard Scheffenegger 		case IPTOS_ECN_ECT1:
1581790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_ect1);
159f7220c48SRichard Scheffenegger 			break;
160f7220c48SRichard Scheffenegger 		}
161f7220c48SRichard Scheffenegger 
162f7220c48SRichard Scheffenegger 		/* RFC3168 ECN handling */
163f7220c48SRichard Scheffenegger 		if (thflags & TH_ECE)
164f7220c48SRichard Scheffenegger 			delta_ace = 1;
165f7220c48SRichard Scheffenegger 		if (thflags & TH_CWR) {
166f7220c48SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_SND_ECE;
167f7220c48SRichard Scheffenegger 			tp->t_flags |= TF_ACKNOW;
168f7220c48SRichard Scheffenegger 		}
169f7220c48SRichard Scheffenegger 		if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
170f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_SND_ECE;
171f7220c48SRichard Scheffenegger 
172f7220c48SRichard Scheffenegger 		/* Process a packet differently from RFC3168. */
173f7220c48SRichard Scheffenegger 		cc_ecnpkt_handler_flags(tp, thflags, iptos);
174f7220c48SRichard Scheffenegger 	}
175f7220c48SRichard Scheffenegger 
176f7220c48SRichard Scheffenegger 	return delta_ace;
177f7220c48SRichard Scheffenegger }
178f7220c48SRichard Scheffenegger 
179f7220c48SRichard Scheffenegger /*
180f7220c48SRichard Scheffenegger  * Send ECN setup <SYN> packet header flags
181f7220c48SRichard Scheffenegger  */
182f7220c48SRichard Scheffenegger uint16_t
183f7220c48SRichard Scheffenegger tcp_ecn_output_syn_sent(struct tcpcb *tp)
184f7220c48SRichard Scheffenegger {
185f7220c48SRichard Scheffenegger 	uint16_t thflags = 0;
186f7220c48SRichard Scheffenegger 
187f7220c48SRichard Scheffenegger 	if (V_tcp_do_ecn == 1) {
188f7220c48SRichard Scheffenegger 		/* Send a RFC3168 ECN setup <SYN> packet */
189f7220c48SRichard Scheffenegger 		if (tp->t_rxtshift >= 1) {
190f7220c48SRichard Scheffenegger 			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
191f7220c48SRichard Scheffenegger 				thflags = TH_ECE|TH_CWR;
192f7220c48SRichard Scheffenegger 		} else
193f7220c48SRichard Scheffenegger 			thflags = TH_ECE|TH_CWR;
194f7220c48SRichard Scheffenegger 	}
195f7220c48SRichard Scheffenegger 
196f7220c48SRichard Scheffenegger 	return thflags;
197f7220c48SRichard Scheffenegger }
198f7220c48SRichard Scheffenegger 
199f7220c48SRichard Scheffenegger /*
200f7220c48SRichard Scheffenegger  * output processing of ECN feature
201f7220c48SRichard Scheffenegger  * returning IP ECN header codepoint
202f7220c48SRichard Scheffenegger  */
203f7220c48SRichard Scheffenegger int
204*2ff07d92SRichard Scheffenegger tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len, bool rxmit)
205f7220c48SRichard Scheffenegger {
206f7220c48SRichard Scheffenegger 	int ipecn = IPTOS_ECN_NOTECT;
207f7220c48SRichard Scheffenegger 	bool newdata;
208f7220c48SRichard Scheffenegger 
209f7220c48SRichard Scheffenegger 	/*
210f7220c48SRichard Scheffenegger 	 * If the peer has ECN, mark data packets with
211f7220c48SRichard Scheffenegger 	 * ECN capable transmission (ECT).
212f7220c48SRichard Scheffenegger 	 * Ignore pure control packets, retransmissions
213f7220c48SRichard Scheffenegger 	 * and window probes.
214f7220c48SRichard Scheffenegger 	 */
215f7220c48SRichard Scheffenegger 	newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
216*2ff07d92SRichard Scheffenegger 		    !rxmit &&
217f7220c48SRichard Scheffenegger 		    !((tp->t_flags & TF_FORCEDATA) && len == 1));
218f7220c48SRichard Scheffenegger 	if (newdata) {
219f7220c48SRichard Scheffenegger 		ipecn = IPTOS_ECN_ECT0;
2201790549dSRichard Scheffenegger 		TCPSTAT_INC(tcps_ecn_ect0);
221f7220c48SRichard Scheffenegger 	}
222f7220c48SRichard Scheffenegger 	/*
223f7220c48SRichard Scheffenegger 	 * Reply with proper ECN notifications.
224f7220c48SRichard Scheffenegger 	 */
225f7220c48SRichard Scheffenegger 	if (newdata &&
226f7220c48SRichard Scheffenegger 	    (tp->t_flags2 & TF2_ECN_SND_CWR)) {
227f7220c48SRichard Scheffenegger 		*thflags |= TH_CWR;
228f7220c48SRichard Scheffenegger 		tp->t_flags2 &= ~TF2_ECN_SND_CWR;
229f7220c48SRichard Scheffenegger 	}
230f7220c48SRichard Scheffenegger 	if (tp->t_flags2 & TF2_ECN_SND_ECE)
231f7220c48SRichard Scheffenegger 		*thflags |= TH_ECE;
232f7220c48SRichard Scheffenegger 
233f7220c48SRichard Scheffenegger 	return ipecn;
234f7220c48SRichard Scheffenegger }
235f7220c48SRichard Scheffenegger 
236f7220c48SRichard Scheffenegger /*
237f7220c48SRichard Scheffenegger  * Set up the ECN related tcpcb fields from
238f7220c48SRichard Scheffenegger  * a syncache entry
239f7220c48SRichard Scheffenegger  */
240f7220c48SRichard Scheffenegger void
241f7220c48SRichard Scheffenegger tcp_ecn_syncache_socket(struct tcpcb *tp, struct syncache *sc)
242f7220c48SRichard Scheffenegger {
2433f169c54SRichard Scheffenegger 	if (sc->sc_flags & SCF_ECN_MASK) {
2443f169c54SRichard Scheffenegger 		switch (sc->sc_flags & SCF_ECN_MASK) {
245f7220c48SRichard Scheffenegger 		case SCF_ECN:
246f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
247f7220c48SRichard Scheffenegger 			break;
248f7220c48SRichard Scheffenegger 		/* undefined SCF codepoint */
249f7220c48SRichard Scheffenegger 		default:
250f7220c48SRichard Scheffenegger 			break;
251f7220c48SRichard Scheffenegger 		}
252f7220c48SRichard Scheffenegger 	}
253f7220c48SRichard Scheffenegger }
254f7220c48SRichard Scheffenegger 
255f7220c48SRichard Scheffenegger /*
256f7220c48SRichard Scheffenegger  * Process a <SYN> packets ECN information, and provide the
257f7220c48SRichard Scheffenegger  * syncache with the relevant information.
258f7220c48SRichard Scheffenegger  */
259f7220c48SRichard Scheffenegger int
260f7220c48SRichard Scheffenegger tcp_ecn_syncache_add(uint16_t thflags, int iptos)
261f7220c48SRichard Scheffenegger {
262f7220c48SRichard Scheffenegger 	int scflags = 0;
263f7220c48SRichard Scheffenegger 
264f7220c48SRichard Scheffenegger 	switch (thflags & (TH_CWR|TH_ECE)) {
265f7220c48SRichard Scheffenegger 	/* no ECN */
266f7220c48SRichard Scheffenegger 	case (0|0):
267f7220c48SRichard Scheffenegger 		break;
268f7220c48SRichard Scheffenegger 	/* legacy ECN */
269f7220c48SRichard Scheffenegger 	case (TH_CWR|TH_ECE):
270f7220c48SRichard Scheffenegger 		scflags = SCF_ECN;
271f7220c48SRichard Scheffenegger 		break;
272f7220c48SRichard Scheffenegger 	default:
273f7220c48SRichard Scheffenegger 		break;
274f7220c48SRichard Scheffenegger 	}
275f7220c48SRichard Scheffenegger 	return scflags;
276f7220c48SRichard Scheffenegger }
277f7220c48SRichard Scheffenegger 
278f7220c48SRichard Scheffenegger /*
279f7220c48SRichard Scheffenegger  * Set up the ECN information for the <SYN,ACK> from
280f7220c48SRichard Scheffenegger  * syncache information.
281f7220c48SRichard Scheffenegger  */
282f7220c48SRichard Scheffenegger uint16_t
283f7220c48SRichard Scheffenegger tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc)
284f7220c48SRichard Scheffenegger {
285f7220c48SRichard Scheffenegger 	if ((thflags & TH_SYN) &&
2863f169c54SRichard Scheffenegger 	    (sc->sc_flags & SCF_ECN_MASK)) {
2873f169c54SRichard Scheffenegger 		switch (sc->sc_flags & SCF_ECN_MASK) {
288f7220c48SRichard Scheffenegger 		case SCF_ECN:
289f7220c48SRichard Scheffenegger 			thflags |= (0 | TH_ECE);
2901790549dSRichard Scheffenegger 			TCPSTAT_INC(tcps_ecn_shs);
291f7220c48SRichard Scheffenegger 			break;
292f7220c48SRichard Scheffenegger 		/* undefined SCF codepoint */
293f7220c48SRichard Scheffenegger 		default:
294f7220c48SRichard Scheffenegger 			break;
295f7220c48SRichard Scheffenegger 		}
296f7220c48SRichard Scheffenegger 	}
297f7220c48SRichard Scheffenegger 	return thflags;
298f7220c48SRichard Scheffenegger }
299