xref: /freebsd/sys/netgraph/ng_tcpmss.c (revision 2be1a816b9ff69588e55be0a84cbe2a31efc0f2f)
1 /*-
2  * ng_tcpmss.c
3  *
4  * Copyright (c) 2004, Alexey Popov <lollypop@flexuser.ru>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * This software includes fragments of the following programs:
30  *	tcpmssd		Ruslan Ermilov <ru@FreeBSD.org>
31  *
32  * $FreeBSD$
33  */
34 
35 /*
36  * This node is netgraph tool for workaround of PMTUD problem. It acts
37  * like filter for IP packets. If configured, it reduces MSS of TCP SYN
38  * packets.
39  *
40  * Configuration can be done by sending NGM_TCPMSS_CONFIG message. The
41  * message sets filter for incoming packets on hook 'inHook'. Packet's
42  * TCP MSS field is lowered to 'maxMSS' parameter and resulting packet
43  * is sent to 'outHook'.
44  *
45  * XXX: statistics are updated not atomically, so they may broke on SMP.
46  */
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/errno.h>
51 #include <sys/kernel.h>
52 #include <sys/malloc.h>
53 #include <sys/mbuf.h>
54 
55 #include <netinet/in.h>
56 #include <netinet/in_systm.h>
57 #include <netinet/ip.h>
58 #include <netinet/tcp.h>
59 
60 #include <netgraph/ng_message.h>
61 #include <netgraph/netgraph.h>
62 #include <netgraph/ng_parse.h>
63 #include <netgraph/ng_tcpmss.h>
64 
65 /* Per hook info. */
66 typedef struct {
67 	hook_p				outHook;
68 	struct ng_tcpmss_hookstat	stats;
69 } *hpriv_p;
70 
71 /* Netgraph methods. */
72 static ng_constructor_t	ng_tcpmss_constructor;
73 static ng_rcvmsg_t	ng_tcpmss_rcvmsg;
74 static ng_newhook_t	ng_tcpmss_newhook;
75 static ng_rcvdata_t	ng_tcpmss_rcvdata;
76 static ng_disconnect_t	ng_tcpmss_disconnect;
77 
78 static int correct_mss(struct tcphdr *, int, uint16_t, int);
79 
80 /* Parse type for struct ng_tcpmss_hookstat. */
81 static const struct ng_parse_struct_field ng_tcpmss_hookstat_type_fields[]
82 	= NG_TCPMSS_HOOKSTAT_INFO;
83 static const struct ng_parse_type ng_tcpmss_hookstat_type = {
84 	&ng_parse_struct_type,
85 	&ng_tcpmss_hookstat_type_fields
86 };
87 
88 /* Parse type for struct ng_tcpmss_config. */
89 static const struct ng_parse_struct_field ng_tcpmss_config_type_fields[]
90 	= NG_TCPMSS_CONFIG_INFO;
91 static const struct ng_parse_type ng_tcpmss_config_type = {
92 	&ng_parse_struct_type,
93 	ng_tcpmss_config_type_fields
94 };
95 
96 /* List of commands and how to convert arguments to/from ASCII. */
97 static const struct ng_cmdlist ng_tcpmss_cmds[] = {
98 	{
99 	  NGM_TCPMSS_COOKIE,
100 	  NGM_TCPMSS_GET_STATS,
101 	  "getstats",
102 	  &ng_parse_hookbuf_type,
103 	  &ng_tcpmss_hookstat_type
104 	},
105 	{
106 	  NGM_TCPMSS_COOKIE,
107 	  NGM_TCPMSS_CLR_STATS,
108 	  "clrstats",
109 	  &ng_parse_hookbuf_type,
110 	  NULL
111 	},
112 	{
113 	  NGM_TCPMSS_COOKIE,
114 	  NGM_TCPMSS_GETCLR_STATS,
115 	  "getclrstats",
116 	  &ng_parse_hookbuf_type,
117 	  &ng_tcpmss_hookstat_type
118 	},
119 	{
120 	  NGM_TCPMSS_COOKIE,
121 	  NGM_TCPMSS_CONFIG,
122 	  "config",
123 	  &ng_tcpmss_config_type,
124 	  NULL
125 	},
126 	{ 0 }
127 };
128 
129 /* Netgraph type descriptor. */
130 static struct ng_type ng_tcpmss_typestruct = {
131 	.version =	NG_ABI_VERSION,
132 	.name =		NG_TCPMSS_NODE_TYPE,
133 	.constructor =	ng_tcpmss_constructor,
134 	.rcvmsg =	ng_tcpmss_rcvmsg,
135 	.newhook =	ng_tcpmss_newhook,
136 	.rcvdata =	ng_tcpmss_rcvdata,
137 	.disconnect =	ng_tcpmss_disconnect,
138 	.cmdlist =	ng_tcpmss_cmds,
139 };
140 
141 NETGRAPH_INIT(tcpmss, &ng_tcpmss_typestruct);
142 
143 #define	ERROUT(x)	{ error = (x); goto done; }
144 
145 /*
146  * Node constructor. No special actions required.
147  */
148 static int
149 ng_tcpmss_constructor(node_p node)
150 {
151 	return (0);
152 }
153 
154 /*
155  * Add a hook. Any unique name is OK.
156  */
157 static int
158 ng_tcpmss_newhook(node_p node, hook_p hook, const char *name)
159 {
160 	hpriv_p priv;
161 
162 	MALLOC(priv, hpriv_p, sizeof(*priv), M_NETGRAPH, M_NOWAIT | M_ZERO);
163 	if (priv == NULL)
164 		return (ENOMEM);
165 
166 	NG_HOOK_SET_PRIVATE(hook, priv);
167 
168 	return (0);
169 }
170 
171 /*
172  * Receive a control message.
173  */
174 static int
175 ng_tcpmss_rcvmsg
176 (node_p node, item_p item, hook_p lasthook)
177 {
178 	struct ng_mesg *msg, *resp = NULL;
179 	int error = 0;
180 
181 	NGI_GET_MSG(item, msg);
182 
183 	switch (msg->header.typecookie) {
184 	case NGM_TCPMSS_COOKIE:
185 		switch (msg->header.cmd) {
186 		case NGM_TCPMSS_GET_STATS:
187 		case NGM_TCPMSS_CLR_STATS:
188 		case NGM_TCPMSS_GETCLR_STATS:
189 		    {
190 			hook_p hook;
191 			hpriv_p priv;
192 
193 			/* Check that message is long enough. */
194 			if (msg->header.arglen != NG_HOOKSIZ)
195 				ERROUT(EINVAL);
196 
197 			/* Find this hook. */
198 			hook = ng_findhook(node, (char *)msg->data);
199 			if (hook == NULL)
200 				ERROUT(ENOENT);
201 
202 			priv = NG_HOOK_PRIVATE(hook);
203 
204 			/* Create response. */
205 			if (msg->header.cmd != NGM_TCPMSS_CLR_STATS) {
206 				NG_MKRESPONSE(resp, msg,
207 				    sizeof(struct ng_tcpmss_hookstat), M_NOWAIT);
208 				if (resp == NULL)
209 					ERROUT(ENOMEM);
210 				bcopy(&priv->stats, resp->data,
211 				    sizeof(struct ng_tcpmss_hookstat));
212 			}
213 
214 			if (msg->header.cmd != NGM_TCPMSS_GET_STATS)
215 				bzero(&priv->stats,
216 				    sizeof(struct ng_tcpmss_hookstat));
217 			break;
218 		    }
219 		case NGM_TCPMSS_CONFIG:
220 		    {
221 			struct ng_tcpmss_config *set;
222 			hook_p in, out;
223 			hpriv_p priv;
224 
225 			/* Check that message is long enough. */
226 			if (msg->header.arglen !=
227 			    sizeof(struct ng_tcpmss_config))
228 				ERROUT(EINVAL);
229 
230 			set = (struct ng_tcpmss_config *)msg->data;
231 			in = ng_findhook(node, set->inHook);
232 			out = ng_findhook(node, set->outHook);
233 			if (in == NULL || out == NULL)
234 				ERROUT(ENOENT);
235 
236 			/* Configure MSS hack. */
237 			priv = NG_HOOK_PRIVATE(in);
238 			priv->outHook = out;
239 			priv->stats.maxMSS = set->maxMSS;
240 
241 			break;
242  		    }
243 		default:
244 			error = EINVAL;
245 			break;
246 		}
247 		break;
248 	default:
249 		error = EINVAL;
250 		break;
251 	}
252 
253 done:
254 	NG_RESPOND_MSG(error, node, item, resp);
255 	NG_FREE_MSG(msg);
256 
257 	return (error);
258 }
259 
260 /*
261  * Receive data on a hook, and hack MSS.
262  *
263  */
264 static int
265 ng_tcpmss_rcvdata(hook_p hook, item_p item)
266 {
267 	hpriv_p priv = NG_HOOK_PRIVATE(hook);
268 	struct mbuf *m = NULL;
269 	struct ip *ip;
270 	struct tcphdr *tcp;
271 	int iphlen, tcphlen, pktlen;
272 	int pullup_len = 0;
273 	int error = 0;
274 
275 	/* Drop packets if filter is not configured on this hook. */
276 	if (priv->outHook == NULL)
277 		goto done;
278 
279 	NGI_GET_M(item, m);
280 
281 	/* Update stats on incoming hook. */
282 	pktlen = m->m_pkthdr.len;
283 	priv->stats.Octets += pktlen;
284 	priv->stats.Packets++;
285 
286 	/* Check whether we configured to fix MSS. */
287 	if (priv->stats.maxMSS == 0)
288 		goto send;
289 
290 #define	M_CHECK(length) do {					\
291 	pullup_len += length;					\
292 	if ((m)->m_pkthdr.len < pullup_len)			\
293 		goto send;					\
294 	if ((m)->m_len < pullup_len &&				\
295 	   (((m) = m_pullup((m), pullup_len)) == NULL))		\
296 		ERROUT(ENOBUFS);				\
297 	} while (0)
298 
299 	/* Check mbuf packet size and arrange for IP header. */
300 	M_CHECK(sizeof(struct ip));
301 	ip = mtod(m, struct ip *);
302 
303 	/* Check IP version. */
304 	if (ip->ip_v != IPVERSION)
305 		ERROUT(EINVAL);
306 
307 	/* Check IP header length. */
308 	iphlen = ip->ip_hl << 2;
309 	if (iphlen < sizeof(struct ip) || iphlen > pktlen )
310 		ERROUT(EINVAL);
311 
312         /* Check if it is TCP. */
313 	if (!(ip->ip_p == IPPROTO_TCP))
314 		goto send;
315 
316 	/* Check mbuf packet size and arrange for IP+TCP header */
317 	M_CHECK(iphlen - sizeof(struct ip) + sizeof(struct tcphdr));
318 	ip = mtod(m, struct ip *);
319 	tcp = (struct tcphdr *)((caddr_t )ip + iphlen);
320 
321 	/* Check TCP header length. */
322 	tcphlen = tcp->th_off << 2;
323 	if (tcphlen < sizeof(struct tcphdr) || tcphlen > pktlen - iphlen)
324 		ERROUT(EINVAL);
325 
326 	/* Check SYN packet and has options. */
327 	if (!(tcp->th_flags & TH_SYN) || tcphlen == sizeof(struct tcphdr))
328 		goto send;
329 
330 	/* Update SYN stats. */
331 	priv->stats.SYNPkts++;
332 
333 	M_CHECK(tcphlen - sizeof(struct tcphdr));
334 	ip = mtod(m, struct ip *);
335 	tcp = (struct tcphdr *)((caddr_t )ip + iphlen);
336 
337 #undef	M_CHECK
338 
339 	/* Fix MSS and update stats. */
340 	if (correct_mss(tcp, tcphlen, priv->stats.maxMSS,
341 	    m->m_pkthdr.csum_flags))
342 		priv->stats.FixedPkts++;
343 
344 send:
345 	/* Deliver frame out destination hook. */
346 	NG_FWD_NEW_DATA(error, item, priv->outHook, m);
347 
348 	return (error);
349 
350 done:
351 	NG_FREE_ITEM(item);
352 	NG_FREE_M(m);
353 
354 	return (error);
355 }
356 
357 /*
358  * Hook disconnection.
359  * We must check all hooks, since they may reference this one.
360  */
361 static int
362 ng_tcpmss_disconnect(hook_p hook)
363 {
364 	node_p node = NG_HOOK_NODE(hook);
365 	hook_p hook2;
366 
367 	LIST_FOREACH(hook2, &node->nd_hooks, hk_hooks) {
368 		hpriv_p priv = NG_HOOK_PRIVATE(hook2);
369 
370 		if (priv->outHook == hook)
371 			priv->outHook = NULL;
372 	}
373 
374 	FREE(NG_HOOK_PRIVATE(hook), M_NETGRAPH);
375 
376 	if (NG_NODE_NUMHOOKS(NG_HOOK_NODE(hook)) == 0)
377 		ng_rmnode_self(NG_HOOK_NODE(hook));
378 
379 	return (0);
380 }
381 
382 /*
383  * Code from tcpmssd.
384  */
385 
386 /*-
387  * The following macro is used to update an
388  * internet checksum.  "acc" is a 32-bit
389  * accumulation of all the changes to the
390  * checksum (adding in old 16-bit words and
391  * subtracting out new words), and "cksum"
392  * is the checksum value to be updated.
393  */
394 #define TCPMSS_ADJUST_CHECKSUM(acc, cksum) do {		\
395 	acc += cksum;					\
396 	if (acc < 0) {					\
397 		acc = -acc;				\
398 		acc = (acc >> 16) + (acc & 0xffff);	\
399 		acc += acc >> 16;			\
400 		cksum = (u_short) ~acc;			\
401 	} else {					\
402 		acc = (acc >> 16) + (acc & 0xffff);	\
403 		acc += acc >> 16;			\
404 		cksum = (u_short) acc;			\
405 	}						\
406 } while (0);
407 
408 static int
409 correct_mss(struct tcphdr *tc, int hlen, uint16_t maxmss, int flags)
410 {
411 	int olen, optlen;
412 	u_char *opt;
413 	uint16_t *mss;
414 	int accumulate;
415 	int res = 0;
416 
417 	for (olen = hlen - sizeof(struct tcphdr), opt = (u_char *)(tc + 1);
418 	     olen > 0; olen -= optlen, opt += optlen) {
419 		if (*opt == TCPOPT_EOL)
420 			break;
421 		else if (*opt == TCPOPT_NOP)
422 			optlen = 1;
423 		else {
424 			optlen = *(opt + 1);
425 			if (optlen <= 0 || optlen > olen)
426 				break;
427 			if (*opt == TCPOPT_MAXSEG) {
428 				if (optlen != TCPOLEN_MAXSEG)
429 					continue;
430 				mss = (uint16_t *)(opt + 2);
431 				if (ntohs(*mss) > maxmss) {
432 					accumulate = *mss;
433 					*mss = htons(maxmss);
434 					accumulate -= *mss;
435 					if ((flags & CSUM_TCP) == 0)
436 						TCPMSS_ADJUST_CHECKSUM(accumulate, tc->th_sum);
437 					res = 1;
438 				}
439 			}
440 		}
441 	}
442 	return (res);
443 }
444