xref: /freebsd/sys/netgraph/ng_tcpmss.c (revision 31d62a73c2e6ac0ff413a7a17700ffc7dce254ef)
1 /*-
2  * ng_tcpmss.c
3  *
4  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
5  *
6  * Copyright (c) 2004, Alexey Popov <lollypop@flexuser.ru>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice unmodified, this list of conditions, and the following
14  *    disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  * This software includes fragments of the following programs:
32  *	tcpmssd		Ruslan Ermilov <ru@FreeBSD.org>
33  *
34  * $FreeBSD$
35  */
36 
37 /*
38  * This node is netgraph tool for workaround of PMTUD problem. It acts
39  * like filter for IP packets. If configured, it reduces MSS of TCP SYN
40  * packets.
41  *
42  * Configuration can be done by sending NGM_TCPMSS_CONFIG message. The
43  * message sets filter for incoming packets on hook 'inHook'. Packet's
44  * TCP MSS field is lowered to 'maxMSS' parameter and resulting packet
45  * is sent to 'outHook'.
46  *
47  * XXX: statistics are updated not atomically, so they may broke on SMP.
48  */
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/endian.h>
53 #include <sys/errno.h>
54 #include <sys/kernel.h>
55 #include <sys/malloc.h>
56 #include <sys/mbuf.h>
57 
58 #include <netinet/in.h>
59 #include <netinet/in_systm.h>
60 #include <netinet/ip.h>
61 #include <netinet/tcp.h>
62 
63 #include <netgraph/ng_message.h>
64 #include <netgraph/netgraph.h>
65 #include <netgraph/ng_parse.h>
66 #include <netgraph/ng_tcpmss.h>
67 
68 /* Per hook info. */
69 typedef struct {
70 	hook_p				outHook;
71 	struct ng_tcpmss_hookstat	stats;
72 } *hpriv_p;
73 
74 /* Netgraph methods. */
75 static ng_constructor_t	ng_tcpmss_constructor;
76 static ng_rcvmsg_t	ng_tcpmss_rcvmsg;
77 static ng_newhook_t	ng_tcpmss_newhook;
78 static ng_rcvdata_t	ng_tcpmss_rcvdata;
79 static ng_disconnect_t	ng_tcpmss_disconnect;
80 
81 static int correct_mss(struct tcphdr *, int, uint16_t, int);
82 
83 /* Parse type for struct ng_tcpmss_hookstat. */
84 static const struct ng_parse_struct_field ng_tcpmss_hookstat_type_fields[]
85 	= NG_TCPMSS_HOOKSTAT_INFO;
86 static const struct ng_parse_type ng_tcpmss_hookstat_type = {
87 	&ng_parse_struct_type,
88 	&ng_tcpmss_hookstat_type_fields
89 };
90 
91 /* Parse type for struct ng_tcpmss_config. */
92 static const struct ng_parse_struct_field ng_tcpmss_config_type_fields[]
93 	= NG_TCPMSS_CONFIG_INFO;
94 static const struct ng_parse_type ng_tcpmss_config_type = {
95 	&ng_parse_struct_type,
96 	ng_tcpmss_config_type_fields
97 };
98 
99 /* List of commands and how to convert arguments to/from ASCII. */
100 static const struct ng_cmdlist ng_tcpmss_cmds[] = {
101 	{
102 	  NGM_TCPMSS_COOKIE,
103 	  NGM_TCPMSS_GET_STATS,
104 	  "getstats",
105 	  &ng_parse_hookbuf_type,
106 	  &ng_tcpmss_hookstat_type
107 	},
108 	{
109 	  NGM_TCPMSS_COOKIE,
110 	  NGM_TCPMSS_CLR_STATS,
111 	  "clrstats",
112 	  &ng_parse_hookbuf_type,
113 	  NULL
114 	},
115 	{
116 	  NGM_TCPMSS_COOKIE,
117 	  NGM_TCPMSS_GETCLR_STATS,
118 	  "getclrstats",
119 	  &ng_parse_hookbuf_type,
120 	  &ng_tcpmss_hookstat_type
121 	},
122 	{
123 	  NGM_TCPMSS_COOKIE,
124 	  NGM_TCPMSS_CONFIG,
125 	  "config",
126 	  &ng_tcpmss_config_type,
127 	  NULL
128 	},
129 	{ 0 }
130 };
131 
132 /* Netgraph type descriptor. */
133 static struct ng_type ng_tcpmss_typestruct = {
134 	.version =	NG_ABI_VERSION,
135 	.name =		NG_TCPMSS_NODE_TYPE,
136 	.constructor =	ng_tcpmss_constructor,
137 	.rcvmsg =	ng_tcpmss_rcvmsg,
138 	.newhook =	ng_tcpmss_newhook,
139 	.rcvdata =	ng_tcpmss_rcvdata,
140 	.disconnect =	ng_tcpmss_disconnect,
141 	.cmdlist =	ng_tcpmss_cmds,
142 };
143 
144 NETGRAPH_INIT(tcpmss, &ng_tcpmss_typestruct);
145 
146 #define	ERROUT(x)	{ error = (x); goto done; }
147 
148 /*
149  * Node constructor. No special actions required.
150  */
151 static int
152 ng_tcpmss_constructor(node_p node)
153 {
154 	return (0);
155 }
156 
157 /*
158  * Add a hook. Any unique name is OK.
159  */
160 static int
161 ng_tcpmss_newhook(node_p node, hook_p hook, const char *name)
162 {
163 	hpriv_p priv;
164 
165 	priv = malloc(sizeof(*priv), M_NETGRAPH, M_NOWAIT | M_ZERO);
166 	if (priv == NULL)
167 		return (ENOMEM);
168 
169 	NG_HOOK_SET_PRIVATE(hook, priv);
170 
171 	return (0);
172 }
173 
174 /*
175  * Receive a control message.
176  */
177 static int
178 ng_tcpmss_rcvmsg
179 (node_p node, item_p item, hook_p lasthook)
180 {
181 	struct ng_mesg *msg, *resp = NULL;
182 	int error = 0;
183 
184 	NGI_GET_MSG(item, msg);
185 
186 	switch (msg->header.typecookie) {
187 	case NGM_TCPMSS_COOKIE:
188 		switch (msg->header.cmd) {
189 		case NGM_TCPMSS_GET_STATS:
190 		case NGM_TCPMSS_CLR_STATS:
191 		case NGM_TCPMSS_GETCLR_STATS:
192 		    {
193 			hook_p hook;
194 			hpriv_p priv;
195 
196 			/* Check that message is long enough. */
197 			if (msg->header.arglen != NG_HOOKSIZ)
198 				ERROUT(EINVAL);
199 
200 			/* Find this hook. */
201 			hook = ng_findhook(node, (char *)msg->data);
202 			if (hook == NULL)
203 				ERROUT(ENOENT);
204 
205 			priv = NG_HOOK_PRIVATE(hook);
206 
207 			/* Create response. */
208 			if (msg->header.cmd != NGM_TCPMSS_CLR_STATS) {
209 				NG_MKRESPONSE(resp, msg,
210 				    sizeof(struct ng_tcpmss_hookstat), M_NOWAIT);
211 				if (resp == NULL)
212 					ERROUT(ENOMEM);
213 				bcopy(&priv->stats, resp->data,
214 				    sizeof(struct ng_tcpmss_hookstat));
215 			}
216 
217 			if (msg->header.cmd != NGM_TCPMSS_GET_STATS)
218 				bzero(&priv->stats,
219 				    sizeof(struct ng_tcpmss_hookstat));
220 			break;
221 		    }
222 		case NGM_TCPMSS_CONFIG:
223 		    {
224 			struct ng_tcpmss_config *set;
225 			hook_p in, out;
226 			hpriv_p priv;
227 
228 			/* Check that message is long enough. */
229 			if (msg->header.arglen !=
230 			    sizeof(struct ng_tcpmss_config))
231 				ERROUT(EINVAL);
232 
233 			set = (struct ng_tcpmss_config *)msg->data;
234 			in = ng_findhook(node, set->inHook);
235 			out = ng_findhook(node, set->outHook);
236 			if (in == NULL || out == NULL)
237 				ERROUT(ENOENT);
238 
239 			/* Configure MSS hack. */
240 			priv = NG_HOOK_PRIVATE(in);
241 			priv->outHook = out;
242 			priv->stats.maxMSS = set->maxMSS;
243 
244 			break;
245  		    }
246 		default:
247 			error = EINVAL;
248 			break;
249 		}
250 		break;
251 	default:
252 		error = EINVAL;
253 		break;
254 	}
255 
256 done:
257 	NG_RESPOND_MSG(error, node, item, resp);
258 	NG_FREE_MSG(msg);
259 
260 	return (error);
261 }
262 
263 /*
264  * Receive data on a hook, and hack MSS.
265  *
266  */
267 static int
268 ng_tcpmss_rcvdata(hook_p hook, item_p item)
269 {
270 	hpriv_p priv = NG_HOOK_PRIVATE(hook);
271 	struct mbuf *m = NULL;
272 	struct ip *ip;
273 	struct tcphdr *tcp;
274 	int iphlen, tcphlen, pktlen;
275 	int pullup_len = 0;
276 	int error = 0;
277 
278 	/* Drop packets if filter is not configured on this hook. */
279 	if (priv->outHook == NULL)
280 		goto done;
281 
282 	NGI_GET_M(item, m);
283 
284 	/* Update stats on incoming hook. */
285 	pktlen = m->m_pkthdr.len;
286 	priv->stats.Octets += pktlen;
287 	priv->stats.Packets++;
288 
289 	/* Check whether we configured to fix MSS. */
290 	if (priv->stats.maxMSS == 0)
291 		goto send;
292 
293 #define	M_CHECK(length) do {					\
294 	pullup_len += length;					\
295 	if ((m)->m_pkthdr.len < pullup_len)			\
296 		goto send;					\
297 	if ((m)->m_len < pullup_len &&				\
298 	   (((m) = m_pullup((m), pullup_len)) == NULL))		\
299 		ERROUT(ENOBUFS);				\
300 	} while (0)
301 
302 	/* Check mbuf packet size and arrange for IP header. */
303 	M_CHECK(sizeof(struct ip));
304 	ip = mtod(m, struct ip *);
305 
306 	/* Check IP version. */
307 	if (ip->ip_v != IPVERSION)
308 		ERROUT(EINVAL);
309 
310 	/* Check IP header length. */
311 	iphlen = ip->ip_hl << 2;
312 	if (iphlen < sizeof(struct ip) || iphlen > pktlen )
313 		ERROUT(EINVAL);
314 
315         /* Check if it is TCP. */
316 	if (!(ip->ip_p == IPPROTO_TCP))
317 		goto send;
318 
319 	/* Check mbuf packet size and arrange for IP+TCP header */
320 	M_CHECK(iphlen - sizeof(struct ip) + sizeof(struct tcphdr));
321 	ip = mtod(m, struct ip *);
322 	tcp = (struct tcphdr *)((caddr_t )ip + iphlen);
323 
324 	/* Check TCP header length. */
325 	tcphlen = tcp->th_off << 2;
326 	if (tcphlen < sizeof(struct tcphdr) || tcphlen > pktlen - iphlen)
327 		ERROUT(EINVAL);
328 
329 	/* Check SYN packet and has options. */
330 	if (!(tcp->th_flags & TH_SYN) || tcphlen == sizeof(struct tcphdr))
331 		goto send;
332 
333 	/* Update SYN stats. */
334 	priv->stats.SYNPkts++;
335 
336 	M_CHECK(tcphlen - sizeof(struct tcphdr));
337 	ip = mtod(m, struct ip *);
338 	tcp = (struct tcphdr *)((caddr_t )ip + iphlen);
339 
340 #undef	M_CHECK
341 
342 	/* Fix MSS and update stats. */
343 	if (correct_mss(tcp, tcphlen, priv->stats.maxMSS,
344 	    m->m_pkthdr.csum_flags))
345 		priv->stats.FixedPkts++;
346 
347 send:
348 	/* Deliver frame out destination hook. */
349 	NG_FWD_NEW_DATA(error, item, priv->outHook, m);
350 
351 	return (error);
352 
353 done:
354 	NG_FREE_ITEM(item);
355 	NG_FREE_M(m);
356 
357 	return (error);
358 }
359 
360 /*
361  * Hook disconnection.
362  * We must check all hooks, since they may reference this one.
363  */
364 static int
365 ng_tcpmss_disconnect(hook_p hook)
366 {
367 	node_p node = NG_HOOK_NODE(hook);
368 	hook_p hook2;
369 
370 	LIST_FOREACH(hook2, &node->nd_hooks, hk_hooks) {
371 		hpriv_p priv = NG_HOOK_PRIVATE(hook2);
372 
373 		if (priv->outHook == hook)
374 			priv->outHook = NULL;
375 	}
376 
377 	free(NG_HOOK_PRIVATE(hook), M_NETGRAPH);
378 
379 	if (NG_NODE_NUMHOOKS(NG_HOOK_NODE(hook)) == 0)
380 		ng_rmnode_self(NG_HOOK_NODE(hook));
381 
382 	return (0);
383 }
384 
385 /*
386  * Code from tcpmssd.
387  */
388 
389 /*-
390  * The following macro is used to update an
391  * internet checksum.  "acc" is a 32-bit
392  * accumulation of all the changes to the
393  * checksum (adding in old 16-bit words and
394  * subtracting out new words), and "cksum"
395  * is the checksum value to be updated.
396  */
397 #define TCPMSS_ADJUST_CHECKSUM(acc, cksum) do {		\
398 	acc += cksum;					\
399 	if (acc < 0) {					\
400 		acc = -acc;				\
401 		acc = (acc >> 16) + (acc & 0xffff);	\
402 		acc += acc >> 16;			\
403 		cksum = (u_short) ~acc;			\
404 	} else {					\
405 		acc = (acc >> 16) + (acc & 0xffff);	\
406 		acc += acc >> 16;			\
407 		cksum = (u_short) acc;			\
408 	}						\
409 } while (0);
410 
411 static int
412 correct_mss(struct tcphdr *tc, int hlen, uint16_t maxmss, int flags)
413 {
414 	int olen, optlen;
415 	u_char *opt;
416 	int accumulate;
417 	int res = 0;
418 	uint16_t sum;
419 
420 	for (olen = hlen - sizeof(struct tcphdr), opt = (u_char *)(tc + 1);
421 	     olen > 0; olen -= optlen, opt += optlen) {
422 		if (*opt == TCPOPT_EOL)
423 			break;
424 		else if (*opt == TCPOPT_NOP)
425 			optlen = 1;
426 		else {
427 			optlen = *(opt + 1);
428 			if (optlen <= 0 || optlen > olen)
429 				break;
430 			if (*opt == TCPOPT_MAXSEG) {
431 				if (optlen != TCPOLEN_MAXSEG)
432 					continue;
433 				accumulate = be16dec(opt + 2);
434 				if (accumulate > maxmss) {
435 					if ((flags & CSUM_TCP) == 0) {
436 						accumulate -= maxmss;
437 						sum = be16dec(&tc->th_sum);
438 						TCPMSS_ADJUST_CHECKSUM(accumulate, sum);
439 						be16enc(&tc->th_sum, sum);
440 					}
441 					be16enc(opt + 2, maxmss);
442 					res = 1;
443 				}
444 			}
445 		}
446 	}
447 	return (res);
448 }
449