xref: /freebsd/sys/netinet/tcp_timewait.c (revision 7b71f57f4e514a2ab7308ce4147e14d90e099ad0)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include "opt_inet.h"
33 #include "opt_inet6.h"
34 #include "opt_ipsec.h"
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/callout.h>
39 #include <sys/kernel.h>
40 #include <sys/sysctl.h>
41 #include <sys/malloc.h>
42 #include <sys/mbuf.h>
43 #include <sys/priv.h>
44 #include <sys/proc.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/syslog.h>
48 #include <sys/protosw.h>
49 #include <sys/random.h>
50 
51 #include <vm/uma.h>
52 
53 #include <net/route.h>
54 #include <net/if.h>
55 #include <net/if_var.h>
56 #include <net/vnet.h>
57 
58 #include <netinet/in.h>
59 #include <netinet/in_kdtrace.h>
60 #include <netinet/in_pcb.h>
61 #include <netinet/in_systm.h>
62 #include <netinet/in_var.h>
63 #include <netinet/ip.h>
64 #include <netinet/ip_icmp.h>
65 #include <netinet/ip_var.h>
66 #ifdef INET6
67 #include <netinet/ip6.h>
68 #include <netinet6/in6_pcb.h>
69 #include <netinet6/ip6_var.h>
70 #include <netinet6/scope6_var.h>
71 #include <netinet6/nd6.h>
72 #endif
73 #include <netinet/tcp.h>
74 #include <netinet/tcp_fsm.h>
75 #include <netinet/tcp_seq.h>
76 #include <netinet/tcp_timer.h>
77 #include <netinet/tcp_var.h>
78 #include <netinet/tcpip.h>
79 
80 #include <netinet/udp.h>
81 #include <netinet/udp_var.h>
82 
83 #include <netipsec/ipsec_support.h>
84 
85 #include <machine/in_cksum.h>
86 
87 #include <security/mac/mac_framework.h>
88 
89 VNET_DEFINE_STATIC(bool, nolocaltimewait) = false;
90 #define	V_nolocaltimewait	VNET(nolocaltimewait)
91 
92 static int
sysctl_net_inet_tcp_nolocaltimewait(SYSCTL_HANDLER_ARGS)93 sysctl_net_inet_tcp_nolocaltimewait(SYSCTL_HANDLER_ARGS)
94 {
95 	int error;
96 	bool new;
97 
98 	new = V_nolocaltimewait;
99 	error = sysctl_handle_bool(oidp, &new, 0, req);
100 	if (error == 0 && req->newptr) {
101 		V_nolocaltimewait = new;
102 		gone_in(16, "net.inet.tcp.nolocaltimewait is obsolete."
103 		    " Use net.inet.tcp.msl_local instead.\n");
104 	}
105 	return (error);
106 }
107 
108 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, nolocaltimewait,
109     CTLFLAG_VNET | CTLFLAG_RW | CTLTYPE_U8,
110     &VNET_NAME(nolocaltimewait), 0, sysctl_net_inet_tcp_nolocaltimewait, "CU",
111     "Do not create TCP TIME_WAIT state for local connections");
112 
113 static u_int
tcp_eff_msl(struct tcpcb * tp)114 tcp_eff_msl(struct tcpcb *tp)
115 {
116 	struct inpcb *inp = tptoinpcb(tp);
117 #ifdef INET6
118 	bool isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
119 #endif
120 
121 	if (
122 #ifdef INET6
123 	    isipv6 ? in6_localip(&inp->in6p_faddr) :
124 #endif
125 #ifdef INET
126 	    in_localip(inp->inp_faddr))
127 #else
128 	    false)
129 #endif
130 		return (V_tcp_msl_local);
131 	else
132 		return (V_tcp_msl);
133 }
134 
135 /*
136  * Move a TCP connection into TIME_WAIT state.
137  *    inp is locked, and is unlocked before returning.
138  *
139  * This function used to free tcpcb and allocate a compressed TCP time-wait
140  * structure tcptw.  This served well for 20 years but is no longer relevant
141  * on modern machines in the modern internet.  However, the function remains
142  * so that TCP stacks require less modification and we don't burn the bridge
143  * to go back to using compressed time-wait.
144  */
145 void
tcp_twstart(struct tcpcb * tp)146 tcp_twstart(struct tcpcb *tp)
147 {
148 	struct inpcb *inp = tptoinpcb(tp);
149 #ifdef INET6
150 	bool isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
151 #endif
152 
153 	NET_EPOCH_ASSERT();
154 	INP_WLOCK_ASSERT(inp);
155 
156 	/* A dropped inp should never transition to TIME_WAIT state. */
157 	KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("tcp_twstart: "
158 	    "(inp->inp_flags & INP_DROPPED) != 0"));
159 
160 	tcp_state_change(tp, TCPS_TIME_WAIT);
161 	tcp_free_sackholes(tp);
162 	soisdisconnected(inp->inp_socket);
163 
164 	if (tp->t_flags & TF_ACKNOW)
165 		(void) tcp_output(tp);
166 
167 	if (V_nolocaltimewait && (
168 #ifdef INET6
169 	    isipv6 ? in6_localip(&inp->in6p_faddr) :
170 #endif
171 #ifdef INET
172 	    in_localip(inp->inp_faddr)
173 #else
174 	    false
175 #endif
176 	    )) {
177 		if ((tp = tcp_close(tp)) != NULL)
178 			INP_WUNLOCK(inp);
179 		return;
180 	}
181 
182 	tcp_timer_activate(tp, TT_2MSL, 2 * tcp_eff_msl(tp));
183 	INP_WUNLOCK(inp);
184 }
185 
186 /*
187  * Returns true if the TIME_WAIT state was killed and we should start over,
188  * looking for a pcb in the listen state.  Otherwise returns false and frees
189  * the mbuf.
190  *
191  * For pure SYN-segments the PCB shall be read-locked and the tcpopt pointer
192  * may be NULL.  For the rest write-lock and valid tcpopt.
193  */
194 bool
tcp_twcheck(struct inpcb * inp,struct tcpopt * to,struct tcphdr * th,struct mbuf * m,int tlen)195 tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th,
196     struct mbuf *m, int tlen)
197 {
198 	struct tcpcb *tp = intotcpcb(inp);
199 	char *s;
200 	int thflags;
201 	tcp_seq seq;
202 
203 	NET_EPOCH_ASSERT();
204 	INP_LOCK_ASSERT(inp);
205 
206 	thflags = tcp_get_flags(th);
207 #ifdef INVARIANTS
208 	if ((thflags & (TH_SYN | TH_ACK)) == TH_SYN)
209 		INP_RLOCK_ASSERT(inp);
210 	else {
211 		INP_WLOCK_ASSERT(inp);
212 		KASSERT(to != NULL,
213 		    ("%s: called without options on a non-SYN segment",
214 		    __func__));
215 	}
216 #endif
217 
218 	/*
219 	 * NOTE: for FIN_WAIT_2 (to be added later),
220 	 * must validate sequence number before accepting RST
221 	 */
222 
223 	/*
224 	 * If the segment contains RST:
225 	 *	Drop the segment - see Stevens, vol. 2, p. 964 and
226 	 *      RFC 1337.
227 	 */
228 	if (thflags & TH_RST)
229 		goto drop;
230 
231 #if 0
232 /* PAWS not needed at the moment */
233 	/*
234 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
235 	 * and it's less than ts_recent, drop it.
236 	 */
237 	if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
238 	    TSTMP_LT(to.to_tsval, tp->ts_recent)) {
239 		if ((thflags & TH_ACK) == 0)
240 			goto drop;
241 		goto ack;
242 	}
243 	/*
244 	 * ts_recent is never updated because we never accept new segments.
245 	 */
246 #endif
247 
248 	/* Honor the drop_synfin sysctl variable. */
249 	if ((thflags & TH_SYN) && (thflags & TH_FIN) && V_drop_synfin) {
250 		if ((s = tcp_log_addrs(&inp->inp_inc, th, NULL, NULL))) {
251 			log(LOG_DEBUG, "%s; %s: "
252 			    "SYN|FIN segment ignored (based on "
253 			    "sysctl setting)\n", s, __func__);
254 			free(s, M_TCPLOG);
255 		}
256 		goto drop;
257 	}
258 
259 	/*
260 	 * If a new connection request is received
261 	 * while in TIME_WAIT, drop the old connection
262 	 * and start over if the sequence numbers
263 	 * are above the previous ones.
264 	 * Allow UDP port number changes in this case.
265 	 */
266 	if (((thflags & (TH_SYN | TH_ACK)) == TH_SYN) &&
267 	    SEQ_GT(th->th_seq, tp->rcv_nxt)) {
268 		/*
269 		 * In case we can't upgrade our lock just pretend we have
270 		 * lost this packet.
271 		 */
272 		if (INP_TRY_UPGRADE(inp) == 0)
273 			goto drop;
274 		if ((tp = tcp_close(tp)) != NULL)
275 			INP_WUNLOCK(inp);
276 		TCPSTAT_INC(tcps_tw_recycles);
277 		return (true);
278 	}
279 
280 	/*
281 	 * Send RST if UDP port numbers don't match
282 	 */
283 	if (tp->t_port != m->m_pkthdr.tcp_tun_port) {
284 		if (tcp_get_flags(th) & TH_ACK) {
285 			tcp_respond(tp, mtod(m, void *), th, m,
286 			    (tcp_seq)0, th->th_ack, TH_RST);
287 		} else {
288 			if (tcp_get_flags(th) & TH_SYN)
289 				tlen++;
290 			if (tcp_get_flags(th) & TH_FIN)
291 				tlen++;
292 			tcp_respond(tp, mtod(m, void *), th, m,
293 			    th->th_seq+tlen, (tcp_seq)0, TH_RST|TH_ACK);
294 		}
295 		INP_UNLOCK(inp);
296 		TCPSTAT_INC(tcps_tw_resets);
297 		return (false);
298 	}
299 
300 	/*
301 	 * Drop the segment if it does not contain an ACK.
302 	 */
303 	if ((thflags & TH_ACK) == 0)
304 		goto drop;
305 
306 	INP_WLOCK_ASSERT(inp);
307 
308 	/*
309 	 * If timestamps were negotiated during SYN/ACK and a
310 	 * segment without a timestamp is received, silently drop
311 	 * the segment, unless the missing timestamps are tolerated.
312 	 * See section 3.2 of RFC 7323.
313 	 */
314 	if (((to->to_flags & TOF_TS) == 0) && (tp->ts_recent != 0) &&
315 	    (V_tcp_tolerate_missing_ts == 0)) {
316 		goto drop;
317 	}
318 
319 	/*
320 	 * Reset the 2MSL timer if this is a duplicate FIN.
321 	 */
322 	if (thflags & TH_FIN) {
323 		seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0);
324 		if (seq + 1 == tp->rcv_nxt)
325 			tcp_timer_activate(tp, TT_2MSL, 2 * tcp_eff_msl(tp));
326 	}
327 
328 	/*
329 	 * Acknowledge the segment if it has data or is not a duplicate ACK.
330 	 */
331 	if (thflags != TH_ACK || tlen != 0 ||
332 	    th->th_seq != tp->rcv_nxt || th->th_ack != tp->snd_nxt) {
333 		TCP_PROBE5(receive, NULL, NULL, m, NULL, th);
334 		tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt,
335 		    tp->snd_nxt, TH_ACK);
336 		INP_UNLOCK(inp);
337 		TCPSTAT_INC(tcps_tw_responds);
338 		return (false);
339 	}
340 drop:
341 	TCP_PROBE5(receive, NULL, NULL, m, NULL, th);
342 	INP_UNLOCK(inp);
343 	m_freem(m);
344 	return (false);
345 }
346