xref: /freebsd/sys/netgraph/ng_tcpmss.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /*-
2  * ng_tcpmss.c
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c) 2004, Alexey Popov <lollypop@flexuser.ru>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice unmodified, this list of conditions, and the following
14  *    disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  * This software includes fragments of the following programs:
32  *	tcpmssd		Ruslan Ermilov <ru@FreeBSD.org>
33  */
34 
35 /*
36  * This node is netgraph tool for workaround of PMTUD problem. It acts
37  * like filter for IP packets. If configured, it reduces MSS of TCP SYN
38  * packets.
39  *
40  * Configuration can be done by sending NGM_TCPMSS_CONFIG message. The
41  * message sets filter for incoming packets on hook 'inHook'. Packet's
42  * TCP MSS field is lowered to 'maxMSS' parameter and resulting packet
43  * is sent to 'outHook'.
44  *
45  * XXX: statistics are updated not atomically, so they may broke on SMP.
46  */
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/endian.h>
51 #include <sys/errno.h>
52 #include <sys/kernel.h>
53 #include <sys/malloc.h>
54 #include <sys/mbuf.h>
55 
56 #include <netinet/in.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/ip.h>
59 #include <netinet/tcp.h>
60 
61 #include <netgraph/ng_message.h>
62 #include <netgraph/netgraph.h>
63 #include <netgraph/ng_parse.h>
64 #include <netgraph/ng_tcpmss.h>
65 
66 #ifdef NG_SEPARATE_MALLOC
67 static MALLOC_DEFINE(M_NETGRAPH_TCPMSS, "netgraph_tcpmss", "netgraph tcpmss node");
68 #else
69 #define M_NETGRAPH_TCPMSS M_NETGRAPH
70 #endif
71 
72 /* Per hook info. */
73 typedef struct {
74 	hook_p				outHook;
75 	struct ng_tcpmss_hookstat	stats;
76 } *hpriv_p;
77 
78 /* Netgraph methods. */
79 static ng_constructor_t	ng_tcpmss_constructor;
80 static ng_rcvmsg_t	ng_tcpmss_rcvmsg;
81 static ng_newhook_t	ng_tcpmss_newhook;
82 static ng_rcvdata_t	ng_tcpmss_rcvdata;
83 static ng_disconnect_t	ng_tcpmss_disconnect;
84 
85 static int correct_mss(struct tcphdr *, int, uint16_t, int);
86 
87 /* Parse type for struct ng_tcpmss_hookstat. */
88 static const struct ng_parse_struct_field ng_tcpmss_hookstat_type_fields[]
89 	= NG_TCPMSS_HOOKSTAT_INFO;
90 static const struct ng_parse_type ng_tcpmss_hookstat_type = {
91 	&ng_parse_struct_type,
92 	&ng_tcpmss_hookstat_type_fields
93 };
94 
95 /* Parse type for struct ng_tcpmss_config. */
96 static const struct ng_parse_struct_field ng_tcpmss_config_type_fields[]
97 	= NG_TCPMSS_CONFIG_INFO;
98 static const struct ng_parse_type ng_tcpmss_config_type = {
99 	&ng_parse_struct_type,
100 	ng_tcpmss_config_type_fields
101 };
102 
103 /* List of commands and how to convert arguments to/from ASCII. */
104 static const struct ng_cmdlist ng_tcpmss_cmds[] = {
105 	{
106 	  NGM_TCPMSS_COOKIE,
107 	  NGM_TCPMSS_GET_STATS,
108 	  "getstats",
109 	  &ng_parse_hookbuf_type,
110 	  &ng_tcpmss_hookstat_type
111 	},
112 	{
113 	  NGM_TCPMSS_COOKIE,
114 	  NGM_TCPMSS_CLR_STATS,
115 	  "clrstats",
116 	  &ng_parse_hookbuf_type,
117 	  NULL
118 	},
119 	{
120 	  NGM_TCPMSS_COOKIE,
121 	  NGM_TCPMSS_GETCLR_STATS,
122 	  "getclrstats",
123 	  &ng_parse_hookbuf_type,
124 	  &ng_tcpmss_hookstat_type
125 	},
126 	{
127 	  NGM_TCPMSS_COOKIE,
128 	  NGM_TCPMSS_CONFIG,
129 	  "config",
130 	  &ng_tcpmss_config_type,
131 	  NULL
132 	},
133 	{ 0 }
134 };
135 
136 /* Netgraph type descriptor. */
137 static struct ng_type ng_tcpmss_typestruct = {
138 	.version =	NG_ABI_VERSION,
139 	.name =		NG_TCPMSS_NODE_TYPE,
140 	.constructor =	ng_tcpmss_constructor,
141 	.rcvmsg =	ng_tcpmss_rcvmsg,
142 	.newhook =	ng_tcpmss_newhook,
143 	.rcvdata =	ng_tcpmss_rcvdata,
144 	.disconnect =	ng_tcpmss_disconnect,
145 	.cmdlist =	ng_tcpmss_cmds,
146 };
147 
148 NETGRAPH_INIT(tcpmss, &ng_tcpmss_typestruct);
149 #define	ERROUT(x)	{ error = (x); goto done; }
150 
151 /*
152  * Node constructor. No special actions required.
153  */
154 static int
155 ng_tcpmss_constructor(node_p node)
156 {
157 	return (0);
158 }
159 
160 /*
161  * Add a hook. Any unique name is OK.
162  */
163 static int
164 ng_tcpmss_newhook(node_p node, hook_p hook, const char *name)
165 {
166 	hpriv_p priv;
167 
168 	priv = malloc(sizeof(*priv), M_NETGRAPH_TCPMSS, M_NOWAIT | M_ZERO);
169 	if (priv == NULL)
170 		return (ENOMEM);
171 
172 	NG_HOOK_SET_PRIVATE(hook, priv);
173 
174 	return (0);
175 }
176 
177 /*
178  * Receive a control message.
179  */
180 static int
181 ng_tcpmss_rcvmsg
182 (node_p node, item_p item, hook_p lasthook)
183 {
184 	struct ng_mesg *msg, *resp = NULL;
185 	int error = 0;
186 
187 	NGI_GET_MSG(item, msg);
188 
189 	switch (msg->header.typecookie) {
190 	case NGM_TCPMSS_COOKIE:
191 		switch (msg->header.cmd) {
192 		case NGM_TCPMSS_GET_STATS:
193 		case NGM_TCPMSS_CLR_STATS:
194 		case NGM_TCPMSS_GETCLR_STATS:
195 		    {
196 			hook_p hook;
197 			hpriv_p priv;
198 
199 			/* Check that message is long enough. */
200 			if (msg->header.arglen != NG_HOOKSIZ)
201 				ERROUT(EINVAL);
202 
203 			/* Find this hook. */
204 			hook = ng_findhook(node, (char *)msg->data);
205 			if (hook == NULL)
206 				ERROUT(ENOENT);
207 
208 			priv = NG_HOOK_PRIVATE(hook);
209 
210 			/* Create response. */
211 			if (msg->header.cmd != NGM_TCPMSS_CLR_STATS) {
212 				NG_MKRESPONSE(resp, msg,
213 				    sizeof(struct ng_tcpmss_hookstat), M_NOWAIT);
214 				if (resp == NULL)
215 					ERROUT(ENOMEM);
216 				bcopy(&priv->stats, resp->data,
217 				    sizeof(struct ng_tcpmss_hookstat));
218 			}
219 
220 			if (msg->header.cmd != NGM_TCPMSS_GET_STATS)
221 				bzero(&priv->stats,
222 				    sizeof(struct ng_tcpmss_hookstat));
223 			break;
224 		    }
225 		case NGM_TCPMSS_CONFIG:
226 		    {
227 			struct ng_tcpmss_config *set;
228 			hook_p in, out;
229 			hpriv_p priv;
230 
231 			/* Check that message is long enough. */
232 			if (msg->header.arglen !=
233 			    sizeof(struct ng_tcpmss_config))
234 				ERROUT(EINVAL);
235 
236 			set = (struct ng_tcpmss_config *)msg->data;
237 			in = ng_findhook(node, set->inHook);
238 			out = ng_findhook(node, set->outHook);
239 			if (in == NULL || out == NULL)
240 				ERROUT(ENOENT);
241 
242 			/* Configure MSS hack. */
243 			priv = NG_HOOK_PRIVATE(in);
244 			priv->outHook = out;
245 			priv->stats.maxMSS = set->maxMSS;
246 
247 			break;
248  		    }
249 		default:
250 			error = EINVAL;
251 			break;
252 		}
253 		break;
254 	default:
255 		error = EINVAL;
256 		break;
257 	}
258 
259 done:
260 	NG_RESPOND_MSG(error, node, item, resp);
261 	NG_FREE_MSG(msg);
262 
263 	return (error);
264 }
265 
266 /*
267  * Receive data on a hook, and hack MSS.
268  *
269  */
270 static int
271 ng_tcpmss_rcvdata(hook_p hook, item_p item)
272 {
273 	hpriv_p priv = NG_HOOK_PRIVATE(hook);
274 	struct mbuf *m = NULL;
275 	struct ip *ip;
276 	struct tcphdr *tcp;
277 	int iphlen, tcphlen, pktlen;
278 	int pullup_len = 0;
279 	int error = 0;
280 
281 	/* Drop packets if filter is not configured on this hook. */
282 	if (priv->outHook == NULL)
283 		goto done;
284 
285 	NGI_GET_M(item, m);
286 
287 	/* Update stats on incoming hook. */
288 	pktlen = m->m_pkthdr.len;
289 	priv->stats.Octets += pktlen;
290 	priv->stats.Packets++;
291 
292 	/* Check whether we configured to fix MSS. */
293 	if (priv->stats.maxMSS == 0)
294 		goto send;
295 
296 #define	M_CHECK(length) do {					\
297 	pullup_len += length;					\
298 	if ((m)->m_pkthdr.len < pullup_len)			\
299 		goto send;					\
300 	if ((m)->m_len < pullup_len &&				\
301 	   (((m) = m_pullup((m), pullup_len)) == NULL))		\
302 		ERROUT(ENOBUFS);				\
303 	} while (0)
304 
305 	/* Check mbuf packet size and arrange for IP header. */
306 	M_CHECK(sizeof(struct ip));
307 	ip = mtod(m, struct ip *);
308 
309 	/* Check IP version. */
310 	if (ip->ip_v != IPVERSION)
311 		ERROUT(EINVAL);
312 
313 	/* Check IP header length. */
314 	iphlen = ip->ip_hl << 2;
315 	if (iphlen < sizeof(struct ip) || iphlen > pktlen )
316 		ERROUT(EINVAL);
317 
318         /* Check if it is TCP. */
319 	if (!(ip->ip_p == IPPROTO_TCP))
320 		goto send;
321 
322 	/* Check mbuf packet size and arrange for IP+TCP header */
323 	M_CHECK(iphlen - sizeof(struct ip) + sizeof(struct tcphdr));
324 	ip = mtod(m, struct ip *);
325 	tcp = (struct tcphdr *)((caddr_t )ip + iphlen);
326 
327 	/* Check TCP header length. */
328 	tcphlen = tcp->th_off << 2;
329 	if (tcphlen < sizeof(struct tcphdr) || tcphlen > pktlen - iphlen)
330 		ERROUT(EINVAL);
331 
332 	/* Check SYN packet and has options. */
333 	if (!(tcp_get_flags(tcp) & TH_SYN) || tcphlen == sizeof(struct tcphdr))
334 		goto send;
335 
336 	/* Update SYN stats. */
337 	priv->stats.SYNPkts++;
338 
339 	M_CHECK(tcphlen - sizeof(struct tcphdr));
340 	ip = mtod(m, struct ip *);
341 	tcp = (struct tcphdr *)((caddr_t )ip + iphlen);
342 
343 #undef	M_CHECK
344 
345 	/* Fix MSS and update stats. */
346 	if (correct_mss(tcp, tcphlen, priv->stats.maxMSS,
347 	    m->m_pkthdr.csum_flags))
348 		priv->stats.FixedPkts++;
349 
350 send:
351 	/* Deliver frame out destination hook. */
352 	NG_FWD_NEW_DATA(error, item, priv->outHook, m);
353 
354 	return (error);
355 
356 done:
357 	NG_FREE_ITEM(item);
358 	NG_FREE_M(m);
359 
360 	return (error);
361 }
362 
363 /*
364  * Hook disconnection.
365  * We must check all hooks, since they may reference this one.
366  */
367 static int
368 ng_tcpmss_disconnect(hook_p hook)
369 {
370 	node_p node = NG_HOOK_NODE(hook);
371 	hook_p hook2;
372 
373 	LIST_FOREACH(hook2, &node->nd_hooks, hk_hooks) {
374 		hpriv_p priv = NG_HOOK_PRIVATE(hook2);
375 
376 		if (priv->outHook == hook)
377 			priv->outHook = NULL;
378 	}
379 
380 	free(NG_HOOK_PRIVATE(hook), M_NETGRAPH_TCPMSS);
381 
382 	if (NG_NODE_NUMHOOKS(NG_HOOK_NODE(hook)) == 0)
383 		ng_rmnode_self(NG_HOOK_NODE(hook));
384 
385 	return (0);
386 }
387 
388 /*
389  * Code from tcpmssd.
390  */
391 
392 /*-
393  * The following macro is used to update an
394  * internet checksum.  "acc" is a 32-bit
395  * accumulation of all the changes to the
396  * checksum (adding in old 16-bit words and
397  * subtracting out new words), and "cksum"
398  * is the checksum value to be updated.
399  */
400 #define TCPMSS_ADJUST_CHECKSUM(acc, cksum) do {		\
401 	acc += cksum;					\
402 	if (acc < 0) {					\
403 		acc = -acc;				\
404 		acc = (acc >> 16) + (acc & 0xffff);	\
405 		acc += acc >> 16;			\
406 		cksum = (u_short) ~acc;			\
407 	} else {					\
408 		acc = (acc >> 16) + (acc & 0xffff);	\
409 		acc += acc >> 16;			\
410 		cksum = (u_short) acc;			\
411 	}						\
412 } while (0);
413 
414 static int
415 correct_mss(struct tcphdr *tc, int hlen, uint16_t maxmss, int flags)
416 {
417 	int olen, optlen;
418 	u_char *opt;
419 	int accumulate;
420 	int res = 0;
421 	uint16_t sum;
422 
423 	for (olen = hlen - sizeof(struct tcphdr), opt = (u_char *)(tc + 1);
424 	     olen > 0; olen -= optlen, opt += optlen) {
425 		if (*opt == TCPOPT_EOL)
426 			break;
427 		else if (*opt == TCPOPT_NOP)
428 			optlen = 1;
429 		else {
430 			optlen = *(opt + 1);
431 			if (optlen <= 0 || optlen > olen)
432 				break;
433 			if (*opt == TCPOPT_MAXSEG) {
434 				if (optlen != TCPOLEN_MAXSEG)
435 					continue;
436 				accumulate = be16dec(opt + 2);
437 				if (accumulate > maxmss) {
438 					if ((flags & CSUM_TCP) == 0) {
439 						accumulate -= maxmss;
440 						sum = be16dec(&tc->th_sum);
441 						TCPMSS_ADJUST_CHECKSUM(accumulate, sum);
442 						be16enc(&tc->th_sum, sum);
443 					}
444 					be16enc(opt + 2, maxmss);
445 					res = 1;
446 				}
447 			}
448 		}
449 	}
450 	return (res);
451 }
452