xref: /freebsd/sys/netinet/tcp_ecn.c (revision 62ff619dcc3540659a319be71c9a489f1659e14a)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
5  *      The Regents of the University of California.  All rights reserved.
6  * Copyright (c) 2007-2008,2010
7  *      Swinburne University of Technology, Melbourne, Australia.
8  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
9  * Copyright (c) 2010 The FreeBSD Foundation
10  * Copyright (c) 2010-2011 Juniper Networks, Inc.
11  * Copyright (c) 2019 Richard Scheffenegger <srichard@netapp.com>
12  * All rights reserved.
13  *
14  * Portions of this software were developed at the Centre for Advanced Internet
15  * Architectures, Swinburne University of Technology, by Lawrence Stewart,
16  * James Healy and David Hayes, made possible in part by a grant from the Cisco
17  * University Research Program Fund at Community Foundation Silicon Valley.
18  *
19  * Portions of this software were developed at the Centre for Advanced
20  * Internet Architectures, Swinburne University of Technology, Melbourne,
21  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
22  *
23  * Portions of this software were developed by Robert N. M. Watson under
24  * contract to Juniper Networks, Inc.
25  *
26  * Redistribution and use in source and binary forms, with or without
27  * modification, are permitted provided that the following conditions
28  * are met:
29  * 1. Redistributions of source code must retain the above copyright
30  *    notice, this list of conditions and the following disclaimer.
31  * 2. Redistributions in binary form must reproduce the above copyright
32  *    notice, this list of conditions and the following disclaimer in the
33  *    documentation and/or other materials provided with the distribution.
34  * 3. Neither the name of the University nor the names of its contributors
35  *    may be used to endorse or promote products derived from this software
36  *    without specific prior written permission.
37  *
38  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
39  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
42  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
43  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
44  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
46  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
47  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48  * SUCH DAMAGE.
49  *
50  *      @(#)tcp_ecn.c 8.12 (Berkeley) 5/24/95
51  */
52 
53 /*
54  * Utility functions to deal with Explicit Congestion Notification in TCP
55  * implementing the essential parts of the Accurate ECN extension
56  * https://tools.ietf.org/html/draft-ietf-tcpm-accurate-ecn-09
57  */
58 
59 #include <sys/cdefs.h>
60 __FBSDID("$FreeBSD$");
61 
62 #include "opt_inet.h"
63 #include "opt_inet6.h"
64 #include "opt_tcpdebug.h"
65 
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/kernel.h>
69 #include <sys/sysctl.h>
70 #include <sys/malloc.h>
71 #include <sys/mbuf.h>
72 #include <sys/socket.h>
73 #include <sys/socketvar.h>
74 
75 #include <machine/cpu.h>
76 
77 #include <vm/uma.h>
78 
79 #include <net/if.h>
80 #include <net/if_var.h>
81 #include <net/route.h>
82 #include <net/vnet.h>
83 
84 #include <netinet/in.h>
85 #include <netinet/in_systm.h>
86 #include <netinet/ip.h>
87 #include <netinet/in_var.h>
88 #include <netinet/in_pcb.h>
89 #include <netinet/ip_var.h>
90 #include <netinet/ip6.h>
91 #include <netinet/icmp6.h>
92 #include <netinet6/nd6.h>
93 #include <netinet6/ip6_var.h>
94 #include <netinet6/in6_pcb.h>
95 #include <netinet/tcp.h>
96 #include <netinet/tcp_fsm.h>
97 #include <netinet/tcp_seq.h>
98 #include <netinet/tcp_var.h>
99 #include <netinet/tcp_syncache.h>
100 #include <netinet/tcp_timer.h>
101 #include <netinet6/tcp6_var.h>
102 #include <netinet/tcpip.h>
103 #include <netinet/tcp_ecn.h>
104 
105 
106 /*
107  * Process incoming SYN,ACK packet
108  */
109 void
110 tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
111 {
112 	thflags &= (TH_CWR|TH_ECE);
113 
114 	if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
115 	    V_tcp_do_ecn) {
116 		tp->t_flags2 |= TF2_ECN_PERMIT;
117 		TCPSTAT_INC(tcps_ecn_shs);
118 	}
119 }
120 
121 /*
122  * Handle parallel SYN for ECN
123  */
124 void
125 tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos)
126 {
127 	if (thflags & TH_ACK)
128 		return;
129 	if (V_tcp_do_ecn == 0)
130 		return;
131 	if ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2)) {
132 		/* RFC3168 ECN handling */
133 		if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) {
134 			tp->t_flags2 |= TF2_ECN_PERMIT;
135 			tp->t_flags2 |= TF2_ECN_SND_ECE;
136 			TCPSTAT_INC(tcps_ecn_shs);
137 		}
138 	}
139 }
140 
141 /*
142  * TCP ECN processing.
143  */
144 int
145 tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int iptos)
146 {
147 	int delta_ace = 0;
148 
149 	if (tp->t_flags2 & TF2_ECN_PERMIT) {
150 		switch (iptos & IPTOS_ECN_MASK) {
151 		case IPTOS_ECN_CE:
152 			TCPSTAT_INC(tcps_ecn_ce);
153 			break;
154 		case IPTOS_ECN_ECT0:
155 			TCPSTAT_INC(tcps_ecn_ect0);
156 			break;
157 		case IPTOS_ECN_ECT1:
158 			TCPSTAT_INC(tcps_ecn_ect1);
159 			break;
160 		}
161 
162 		/* RFC3168 ECN handling */
163 		if (thflags & TH_ECE)
164 			delta_ace = 1;
165 		if (thflags & TH_CWR) {
166 			tp->t_flags2 &= ~TF2_ECN_SND_ECE;
167 			tp->t_flags |= TF_ACKNOW;
168 		}
169 		if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
170 			tp->t_flags2 |= TF2_ECN_SND_ECE;
171 
172 		/* Process a packet differently from RFC3168. */
173 		cc_ecnpkt_handler_flags(tp, thflags, iptos);
174 	}
175 
176 	return delta_ace;
177 }
178 
179 /*
180  * Send ECN setup <SYN> packet header flags
181  */
182 uint16_t
183 tcp_ecn_output_syn_sent(struct tcpcb *tp)
184 {
185 	uint16_t thflags = 0;
186 
187 	if (V_tcp_do_ecn == 1) {
188 		/* Send a RFC3168 ECN setup <SYN> packet */
189 		if (tp->t_rxtshift >= 1) {
190 			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
191 				thflags = TH_ECE|TH_CWR;
192 		} else
193 			thflags = TH_ECE|TH_CWR;
194 	}
195 
196 	return thflags;
197 }
198 
199 /*
200  * output processing of ECN feature
201  * returning IP ECN header codepoint
202  */
203 int
204 tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len, bool rxmit)
205 {
206 	int ipecn = IPTOS_ECN_NOTECT;
207 	bool newdata;
208 
209 	/*
210 	 * If the peer has ECN, mark data packets with
211 	 * ECN capable transmission (ECT).
212 	 * Ignore pure control packets, retransmissions
213 	 * and window probes.
214 	 */
215 	newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
216 		    !rxmit &&
217 		    !((tp->t_flags & TF_FORCEDATA) && len == 1));
218 	if (newdata) {
219 		ipecn = IPTOS_ECN_ECT0;
220 		TCPSTAT_INC(tcps_ecn_ect0);
221 	}
222 	/*
223 	 * Reply with proper ECN notifications.
224 	 */
225 	if (newdata &&
226 	    (tp->t_flags2 & TF2_ECN_SND_CWR)) {
227 		*thflags |= TH_CWR;
228 		tp->t_flags2 &= ~TF2_ECN_SND_CWR;
229 	}
230 	if (tp->t_flags2 & TF2_ECN_SND_ECE)
231 		*thflags |= TH_ECE;
232 
233 	return ipecn;
234 }
235 
236 /*
237  * Set up the ECN related tcpcb fields from
238  * a syncache entry
239  */
240 void
241 tcp_ecn_syncache_socket(struct tcpcb *tp, struct syncache *sc)
242 {
243 	if (sc->sc_flags & SCF_ECN_MASK) {
244 		switch (sc->sc_flags & SCF_ECN_MASK) {
245 		case SCF_ECN:
246 			tp->t_flags2 |= TF2_ECN_PERMIT;
247 			break;
248 		/* undefined SCF codepoint */
249 		default:
250 			break;
251 		}
252 	}
253 }
254 
255 /*
256  * Process a <SYN> packets ECN information, and provide the
257  * syncache with the relevant information.
258  */
259 int
260 tcp_ecn_syncache_add(uint16_t thflags, int iptos)
261 {
262 	int scflags = 0;
263 
264 	switch (thflags & (TH_CWR|TH_ECE)) {
265 	/* no ECN */
266 	case (0|0):
267 		break;
268 	/* legacy ECN */
269 	case (TH_CWR|TH_ECE):
270 		scflags = SCF_ECN;
271 		break;
272 	default:
273 		break;
274 	}
275 	return scflags;
276 }
277 
278 /*
279  * Set up the ECN information for the <SYN,ACK> from
280  * syncache information.
281  */
282 uint16_t
283 tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc)
284 {
285 	if ((thflags & TH_SYN) &&
286 	    (sc->sc_flags & SCF_ECN_MASK)) {
287 		switch (sc->sc_flags & SCF_ECN_MASK) {
288 		case SCF_ECN:
289 			thflags |= (0 | TH_ECE);
290 			TCPSTAT_INC(tcps_ecn_shs);
291 			break;
292 		/* undefined SCF codepoint */
293 		default:
294 			break;
295 		}
296 	}
297 	return thflags;
298 }
299