xref: /freebsd/sys/netinet/tcp_ecn.c (revision f7220c486c1bf858e97c2d8e5c4c9dac4947d50a)
1*f7220c48SRichard Scheffenegger /*-
2*f7220c48SRichard Scheffenegger  * SPDX-License-Identifier: BSD-3-Clause
3*f7220c48SRichard Scheffenegger  *
4*f7220c48SRichard Scheffenegger  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
5*f7220c48SRichard Scheffenegger  *      The Regents of the University of California.  All rights reserved.
6*f7220c48SRichard Scheffenegger  * Copyright (c) 2007-2008,2010
7*f7220c48SRichard Scheffenegger  *      Swinburne University of Technology, Melbourne, Australia.
8*f7220c48SRichard Scheffenegger  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
9*f7220c48SRichard Scheffenegger  * Copyright (c) 2010 The FreeBSD Foundation
10*f7220c48SRichard Scheffenegger  * Copyright (c) 2010-2011 Juniper Networks, Inc.
11*f7220c48SRichard Scheffenegger  * Copyright (c) 2019 Richard Scheffenegger <srichard@netapp.com>
12*f7220c48SRichard Scheffenegger  * All rights reserved.
13*f7220c48SRichard Scheffenegger  *
14*f7220c48SRichard Scheffenegger  * Portions of this software were developed at the Centre for Advanced Internet
15*f7220c48SRichard Scheffenegger  * Architectures, Swinburne University of Technology, by Lawrence Stewart,
16*f7220c48SRichard Scheffenegger  * James Healy and David Hayes, made possible in part by a grant from the Cisco
17*f7220c48SRichard Scheffenegger  * University Research Program Fund at Community Foundation Silicon Valley.
18*f7220c48SRichard Scheffenegger  *
19*f7220c48SRichard Scheffenegger  * Portions of this software were developed at the Centre for Advanced
20*f7220c48SRichard Scheffenegger  * Internet Architectures, Swinburne University of Technology, Melbourne,
21*f7220c48SRichard Scheffenegger  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
22*f7220c48SRichard Scheffenegger  *
23*f7220c48SRichard Scheffenegger  * Portions of this software were developed by Robert N. M. Watson under
24*f7220c48SRichard Scheffenegger  * contract to Juniper Networks, Inc.
25*f7220c48SRichard Scheffenegger  *
26*f7220c48SRichard Scheffenegger  * Redistribution and use in source and binary forms, with or without
27*f7220c48SRichard Scheffenegger  * modification, are permitted provided that the following conditions
28*f7220c48SRichard Scheffenegger  * are met:
29*f7220c48SRichard Scheffenegger  * 1. Redistributions of source code must retain the above copyright
30*f7220c48SRichard Scheffenegger  *    notice, this list of conditions and the following disclaimer.
31*f7220c48SRichard Scheffenegger  * 2. Redistributions in binary form must reproduce the above copyright
32*f7220c48SRichard Scheffenegger  *    notice, this list of conditions and the following disclaimer in the
33*f7220c48SRichard Scheffenegger  *    documentation and/or other materials provided with the distribution.
34*f7220c48SRichard Scheffenegger  * 3. Neither the name of the University nor the names of its contributors
35*f7220c48SRichard Scheffenegger  *    may be used to endorse or promote products derived from this software
36*f7220c48SRichard Scheffenegger  *    without specific prior written permission.
37*f7220c48SRichard Scheffenegger  *
38*f7220c48SRichard Scheffenegger  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
39*f7220c48SRichard Scheffenegger  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40*f7220c48SRichard Scheffenegger  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41*f7220c48SRichard Scheffenegger  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
42*f7220c48SRichard Scheffenegger  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
43*f7220c48SRichard Scheffenegger  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
44*f7220c48SRichard Scheffenegger  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45*f7220c48SRichard Scheffenegger  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
46*f7220c48SRichard Scheffenegger  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
47*f7220c48SRichard Scheffenegger  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48*f7220c48SRichard Scheffenegger  * SUCH DAMAGE.
49*f7220c48SRichard Scheffenegger  *
50*f7220c48SRichard Scheffenegger  *      @(#)tcp_ecn.c 8.12 (Berkeley) 5/24/95
51*f7220c48SRichard Scheffenegger  */
52*f7220c48SRichard Scheffenegger 
53*f7220c48SRichard Scheffenegger /*
54*f7220c48SRichard Scheffenegger  * Utility functions to deal with Explicit Congestion Notification in TCP
55*f7220c48SRichard Scheffenegger  * implementing the essential parts of the Accurate ECN extension
56*f7220c48SRichard Scheffenegger  * https://tools.ietf.org/html/draft-ietf-tcpm-accurate-ecn-09
57*f7220c48SRichard Scheffenegger  */
58*f7220c48SRichard Scheffenegger 
59*f7220c48SRichard Scheffenegger #include <sys/cdefs.h>
60*f7220c48SRichard Scheffenegger __FBSDID("$FreeBSD$");
61*f7220c48SRichard Scheffenegger 
62*f7220c48SRichard Scheffenegger #include "opt_inet.h"
63*f7220c48SRichard Scheffenegger #include "opt_inet6.h"
64*f7220c48SRichard Scheffenegger #include "opt_tcpdebug.h"
65*f7220c48SRichard Scheffenegger 
66*f7220c48SRichard Scheffenegger #include <sys/param.h>
67*f7220c48SRichard Scheffenegger #include <sys/systm.h>
68*f7220c48SRichard Scheffenegger #include <sys/kernel.h>
69*f7220c48SRichard Scheffenegger #include <sys/sysctl.h>
70*f7220c48SRichard Scheffenegger #include <sys/malloc.h>
71*f7220c48SRichard Scheffenegger #include <sys/mbuf.h>
72*f7220c48SRichard Scheffenegger #include <sys/socket.h>
73*f7220c48SRichard Scheffenegger #include <sys/socketvar.h>
74*f7220c48SRichard Scheffenegger 
75*f7220c48SRichard Scheffenegger #include <machine/cpu.h>
76*f7220c48SRichard Scheffenegger 
77*f7220c48SRichard Scheffenegger #include <vm/uma.h>
78*f7220c48SRichard Scheffenegger 
79*f7220c48SRichard Scheffenegger #include <net/if.h>
80*f7220c48SRichard Scheffenegger #include <net/if_var.h>
81*f7220c48SRichard Scheffenegger #include <net/route.h>
82*f7220c48SRichard Scheffenegger #include <net/vnet.h>
83*f7220c48SRichard Scheffenegger 
84*f7220c48SRichard Scheffenegger #include <netinet/in.h>
85*f7220c48SRichard Scheffenegger #include <netinet/in_systm.h>
86*f7220c48SRichard Scheffenegger #include <netinet/ip.h>
87*f7220c48SRichard Scheffenegger #include <netinet/in_var.h>
88*f7220c48SRichard Scheffenegger #include <netinet/in_pcb.h>
89*f7220c48SRichard Scheffenegger #include <netinet/ip_var.h>
90*f7220c48SRichard Scheffenegger #include <netinet/ip6.h>
91*f7220c48SRichard Scheffenegger #include <netinet/icmp6.h>
92*f7220c48SRichard Scheffenegger #include <netinet6/nd6.h>
93*f7220c48SRichard Scheffenegger #include <netinet6/ip6_var.h>
94*f7220c48SRichard Scheffenegger #include <netinet6/in6_pcb.h>
95*f7220c48SRichard Scheffenegger #include <netinet/tcp.h>
96*f7220c48SRichard Scheffenegger #include <netinet/tcp_fsm.h>
97*f7220c48SRichard Scheffenegger #include <netinet/tcp_seq.h>
98*f7220c48SRichard Scheffenegger #include <netinet/tcp_var.h>
99*f7220c48SRichard Scheffenegger #include <netinet/tcp_syncache.h>
100*f7220c48SRichard Scheffenegger #include <netinet/tcp_timer.h>
101*f7220c48SRichard Scheffenegger #include <netinet6/tcp6_var.h>
102*f7220c48SRichard Scheffenegger #include <netinet/tcpip.h>
103*f7220c48SRichard Scheffenegger #include <netinet/tcp_ecn.h>
104*f7220c48SRichard Scheffenegger 
105*f7220c48SRichard Scheffenegger 
106*f7220c48SRichard Scheffenegger /*
107*f7220c48SRichard Scheffenegger  * Process incoming SYN,ACK packet
108*f7220c48SRichard Scheffenegger  */
109*f7220c48SRichard Scheffenegger void
110*f7220c48SRichard Scheffenegger tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
111*f7220c48SRichard Scheffenegger {
112*f7220c48SRichard Scheffenegger 	thflags &= (TH_CWR|TH_ECE);
113*f7220c48SRichard Scheffenegger 
114*f7220c48SRichard Scheffenegger 	if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
115*f7220c48SRichard Scheffenegger 	    V_tcp_do_ecn) {
116*f7220c48SRichard Scheffenegger 		tp->t_flags2 |= TF2_ECN_PERMIT;
117*f7220c48SRichard Scheffenegger 		KMOD_TCPSTAT_INC(tcps_ecn_shs);
118*f7220c48SRichard Scheffenegger 	}
119*f7220c48SRichard Scheffenegger }
120*f7220c48SRichard Scheffenegger 
121*f7220c48SRichard Scheffenegger /*
122*f7220c48SRichard Scheffenegger  * Handle parallel SYN for ECN
123*f7220c48SRichard Scheffenegger  */
124*f7220c48SRichard Scheffenegger void
125*f7220c48SRichard Scheffenegger tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos)
126*f7220c48SRichard Scheffenegger {
127*f7220c48SRichard Scheffenegger 	if (thflags & TH_ACK)
128*f7220c48SRichard Scheffenegger 		return;
129*f7220c48SRichard Scheffenegger 	if (V_tcp_do_ecn == 0)
130*f7220c48SRichard Scheffenegger 		return;
131*f7220c48SRichard Scheffenegger 	if ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2)) {
132*f7220c48SRichard Scheffenegger 		/* RFC3168 ECN handling */
133*f7220c48SRichard Scheffenegger 		if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) {
134*f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
135*f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_SND_ECE;
136*f7220c48SRichard Scheffenegger 			KMOD_TCPSTAT_INC(tcps_ecn_shs);
137*f7220c48SRichard Scheffenegger 		}
138*f7220c48SRichard Scheffenegger 	}
139*f7220c48SRichard Scheffenegger }
140*f7220c48SRichard Scheffenegger 
141*f7220c48SRichard Scheffenegger /*
142*f7220c48SRichard Scheffenegger  * TCP ECN processing.
143*f7220c48SRichard Scheffenegger  */
144*f7220c48SRichard Scheffenegger int
145*f7220c48SRichard Scheffenegger tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int iptos)
146*f7220c48SRichard Scheffenegger {
147*f7220c48SRichard Scheffenegger 	int delta_ace = 0;
148*f7220c48SRichard Scheffenegger 
149*f7220c48SRichard Scheffenegger 	if (tp->t_flags2 & TF2_ECN_PERMIT) {
150*f7220c48SRichard Scheffenegger 		switch (iptos & IPTOS_ECN_MASK) {
151*f7220c48SRichard Scheffenegger 		case IPTOS_ECN_CE:
152*f7220c48SRichard Scheffenegger 			KMOD_TCPSTAT_INC(tcps_ecn_ce);
153*f7220c48SRichard Scheffenegger 			break;
154*f7220c48SRichard Scheffenegger 		case IPTOS_ECN_ECT0:
155*f7220c48SRichard Scheffenegger 			KMOD_TCPSTAT_INC(tcps_ecn_ect0);
156*f7220c48SRichard Scheffenegger 			break;
157*f7220c48SRichard Scheffenegger 		case IPTOS_ECN_ECT1:
158*f7220c48SRichard Scheffenegger 			KMOD_TCPSTAT_INC(tcps_ecn_ect1);
159*f7220c48SRichard Scheffenegger 			break;
160*f7220c48SRichard Scheffenegger 		}
161*f7220c48SRichard Scheffenegger 
162*f7220c48SRichard Scheffenegger 		/* RFC3168 ECN handling */
163*f7220c48SRichard Scheffenegger 		if (thflags & TH_ECE)
164*f7220c48SRichard Scheffenegger 			delta_ace = 1;
165*f7220c48SRichard Scheffenegger 		if (thflags & TH_CWR) {
166*f7220c48SRichard Scheffenegger 			tp->t_flags2 &= ~TF2_ECN_SND_ECE;
167*f7220c48SRichard Scheffenegger 			tp->t_flags |= TF_ACKNOW;
168*f7220c48SRichard Scheffenegger 		}
169*f7220c48SRichard Scheffenegger 		if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
170*f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_SND_ECE;
171*f7220c48SRichard Scheffenegger 
172*f7220c48SRichard Scheffenegger 		/* Process a packet differently from RFC3168. */
173*f7220c48SRichard Scheffenegger 		cc_ecnpkt_handler_flags(tp, thflags, iptos);
174*f7220c48SRichard Scheffenegger 	}
175*f7220c48SRichard Scheffenegger 
176*f7220c48SRichard Scheffenegger 	return delta_ace;
177*f7220c48SRichard Scheffenegger }
178*f7220c48SRichard Scheffenegger 
179*f7220c48SRichard Scheffenegger /*
180*f7220c48SRichard Scheffenegger  * Send ECN setup <SYN> packet header flags
181*f7220c48SRichard Scheffenegger  */
182*f7220c48SRichard Scheffenegger uint16_t
183*f7220c48SRichard Scheffenegger tcp_ecn_output_syn_sent(struct tcpcb *tp)
184*f7220c48SRichard Scheffenegger {
185*f7220c48SRichard Scheffenegger 	uint16_t thflags = 0;
186*f7220c48SRichard Scheffenegger 
187*f7220c48SRichard Scheffenegger 	if (V_tcp_do_ecn == 1) {
188*f7220c48SRichard Scheffenegger 		/* Send a RFC3168 ECN setup <SYN> packet */
189*f7220c48SRichard Scheffenegger 		if (tp->t_rxtshift >= 1) {
190*f7220c48SRichard Scheffenegger 			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
191*f7220c48SRichard Scheffenegger 				thflags = TH_ECE|TH_CWR;
192*f7220c48SRichard Scheffenegger 		} else
193*f7220c48SRichard Scheffenegger 			thflags = TH_ECE|TH_CWR;
194*f7220c48SRichard Scheffenegger 	}
195*f7220c48SRichard Scheffenegger 
196*f7220c48SRichard Scheffenegger 	return thflags;
197*f7220c48SRichard Scheffenegger }
198*f7220c48SRichard Scheffenegger 
199*f7220c48SRichard Scheffenegger /*
200*f7220c48SRichard Scheffenegger  * output processing of ECN feature
201*f7220c48SRichard Scheffenegger  * returning IP ECN header codepoint
202*f7220c48SRichard Scheffenegger  */
203*f7220c48SRichard Scheffenegger int
204*f7220c48SRichard Scheffenegger tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len)
205*f7220c48SRichard Scheffenegger {
206*f7220c48SRichard Scheffenegger 	int ipecn = IPTOS_ECN_NOTECT;
207*f7220c48SRichard Scheffenegger 	bool newdata;
208*f7220c48SRichard Scheffenegger 
209*f7220c48SRichard Scheffenegger 	/*
210*f7220c48SRichard Scheffenegger 	 * If the peer has ECN, mark data packets with
211*f7220c48SRichard Scheffenegger 	 * ECN capable transmission (ECT).
212*f7220c48SRichard Scheffenegger 	 * Ignore pure control packets, retransmissions
213*f7220c48SRichard Scheffenegger 	 * and window probes.
214*f7220c48SRichard Scheffenegger 	 */
215*f7220c48SRichard Scheffenegger 	newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
216*f7220c48SRichard Scheffenegger 		    !((tp->t_flags & TF_FORCEDATA) && len == 1));
217*f7220c48SRichard Scheffenegger 	if (newdata) {
218*f7220c48SRichard Scheffenegger 		ipecn = IPTOS_ECN_ECT0;
219*f7220c48SRichard Scheffenegger 		KMOD_TCPSTAT_INC(tcps_ecn_ect0);
220*f7220c48SRichard Scheffenegger 	}
221*f7220c48SRichard Scheffenegger 	/*
222*f7220c48SRichard Scheffenegger 	 * Reply with proper ECN notifications.
223*f7220c48SRichard Scheffenegger 	 */
224*f7220c48SRichard Scheffenegger 	if (newdata &&
225*f7220c48SRichard Scheffenegger 	    (tp->t_flags2 & TF2_ECN_SND_CWR)) {
226*f7220c48SRichard Scheffenegger 		*thflags |= TH_CWR;
227*f7220c48SRichard Scheffenegger 		tp->t_flags2 &= ~TF2_ECN_SND_CWR;
228*f7220c48SRichard Scheffenegger 	}
229*f7220c48SRichard Scheffenegger 	if (tp->t_flags2 & TF2_ECN_SND_ECE)
230*f7220c48SRichard Scheffenegger 		*thflags |= TH_ECE;
231*f7220c48SRichard Scheffenegger 
232*f7220c48SRichard Scheffenegger 	return ipecn;
233*f7220c48SRichard Scheffenegger }
234*f7220c48SRichard Scheffenegger 
235*f7220c48SRichard Scheffenegger /*
236*f7220c48SRichard Scheffenegger  * Set up the ECN related tcpcb fields from
237*f7220c48SRichard Scheffenegger  * a syncache entry
238*f7220c48SRichard Scheffenegger  */
239*f7220c48SRichard Scheffenegger void
240*f7220c48SRichard Scheffenegger tcp_ecn_syncache_socket(struct tcpcb *tp, struct syncache *sc)
241*f7220c48SRichard Scheffenegger {
242*f7220c48SRichard Scheffenegger 	if (sc->sc_flags & SCF_ECN) {
243*f7220c48SRichard Scheffenegger 		switch (sc->sc_flags & SCF_ECN) {
244*f7220c48SRichard Scheffenegger 		case SCF_ECN:
245*f7220c48SRichard Scheffenegger 			tp->t_flags2 |= TF2_ECN_PERMIT;
246*f7220c48SRichard Scheffenegger 			break;
247*f7220c48SRichard Scheffenegger 		/* undefined SCF codepoint */
248*f7220c48SRichard Scheffenegger 		default:
249*f7220c48SRichard Scheffenegger 			break;
250*f7220c48SRichard Scheffenegger 		}
251*f7220c48SRichard Scheffenegger 	}
252*f7220c48SRichard Scheffenegger }
253*f7220c48SRichard Scheffenegger 
254*f7220c48SRichard Scheffenegger /*
255*f7220c48SRichard Scheffenegger  * Process a <SYN> packets ECN information, and provide the
256*f7220c48SRichard Scheffenegger  * syncache with the relevant information.
257*f7220c48SRichard Scheffenegger  */
258*f7220c48SRichard Scheffenegger int
259*f7220c48SRichard Scheffenegger tcp_ecn_syncache_add(uint16_t thflags, int iptos)
260*f7220c48SRichard Scheffenegger {
261*f7220c48SRichard Scheffenegger 	int scflags = 0;
262*f7220c48SRichard Scheffenegger 
263*f7220c48SRichard Scheffenegger 	switch (thflags & (TH_CWR|TH_ECE)) {
264*f7220c48SRichard Scheffenegger 	/* no ECN */
265*f7220c48SRichard Scheffenegger 	case (0|0):
266*f7220c48SRichard Scheffenegger 		break;
267*f7220c48SRichard Scheffenegger 	/* legacy ECN */
268*f7220c48SRichard Scheffenegger 	case (TH_CWR|TH_ECE):
269*f7220c48SRichard Scheffenegger 		scflags = SCF_ECN;
270*f7220c48SRichard Scheffenegger 		break;
271*f7220c48SRichard Scheffenegger 	default:
272*f7220c48SRichard Scheffenegger 		break;
273*f7220c48SRichard Scheffenegger 	}
274*f7220c48SRichard Scheffenegger 	return scflags;
275*f7220c48SRichard Scheffenegger }
276*f7220c48SRichard Scheffenegger 
277*f7220c48SRichard Scheffenegger /*
278*f7220c48SRichard Scheffenegger  * Set up the ECN information for the <SYN,ACK> from
279*f7220c48SRichard Scheffenegger  * syncache information.
280*f7220c48SRichard Scheffenegger  */
281*f7220c48SRichard Scheffenegger uint16_t
282*f7220c48SRichard Scheffenegger tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc)
283*f7220c48SRichard Scheffenegger {
284*f7220c48SRichard Scheffenegger 	if ((thflags & TH_SYN) &&
285*f7220c48SRichard Scheffenegger 	    (sc->sc_flags & SCF_ECN)) {
286*f7220c48SRichard Scheffenegger 		switch (sc->sc_flags & SCF_ECN) {
287*f7220c48SRichard Scheffenegger 		case SCF_ECN:
288*f7220c48SRichard Scheffenegger 			thflags |= (0 | TH_ECE);
289*f7220c48SRichard Scheffenegger 			KMOD_TCPSTAT_INC(tcps_ecn_shs);
290*f7220c48SRichard Scheffenegger 			break;
291*f7220c48SRichard Scheffenegger 		/* undefined SCF codepoint */
292*f7220c48SRichard Scheffenegger 		default:
293*f7220c48SRichard Scheffenegger 			break;
294*f7220c48SRichard Scheffenegger 		}
295*f7220c48SRichard Scheffenegger 	}
296*f7220c48SRichard Scheffenegger 	return thflags;
297*f7220c48SRichard Scheffenegger }
298