xref: /freebsd/sys/net/if_gre.c (revision c5a92616c41f9132d585c533e66dee88e98c73f2)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 1998 The NetBSD Foundation, Inc.
5  * Copyright (c) 2014, 2018 Andrey V. Elsukov <ae@FreeBSD.org>
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Heiko W.Rupp <hwr@pilhuhn.de>
10  *
11  * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  *
34  * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $
35  */
36 
37 #include <sys/cdefs.h>
38 #include "opt_inet.h"
39 #include "opt_inet6.h"
40 #include "opt_rss.h"
41 
42 #include <sys/param.h>
43 #include <sys/kernel.h>
44 #include <sys/lock.h>
45 #include <sys/malloc.h>
46 #include <sys/module.h>
47 #include <sys/mbuf.h>
48 #include <sys/priv.h>
49 #include <sys/proc.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/sockio.h>
53 #include <sys/sx.h>
54 #include <sys/sysctl.h>
55 #include <sys/syslog.h>
56 #include <sys/systm.h>
57 
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/if_private.h>
62 #include <net/if_clone.h>
63 #include <net/if_types.h>
64 #include <net/netisr.h>
65 #include <net/vnet.h>
66 #include <net/route.h>
67 
68 #include <netinet/in.h>
69 #include <netinet/in_pcb.h>
70 #ifdef INET
71 #include <netinet/in_var.h>
72 #include <netinet/ip.h>
73 #include <netinet/ip_var.h>
74 #ifdef RSS
75 #include <netinet/in_rss.h>
76 #endif
77 #endif
78 
79 #ifdef INET6
80 #include <netinet/ip6.h>
81 #include <netinet6/in6_var.h>
82 #include <netinet6/ip6_var.h>
83 #ifdef RSS
84 #include <netinet6/in6_rss.h>
85 #endif
86 #endif
87 
88 #include <netinet/ip_encap.h>
89 #include <netinet/udp.h>
90 #include <net/bpf.h>
91 #include <net/if_gre.h>
92 
93 #include <netlink/netlink.h>
94 #include <netlink/netlink_ctl.h>
95 #include <netlink/netlink_var.h>
96 #include <netlink/netlink_route.h>
97 #include <netlink/route/route_var.h>
98 
99 #include <machine/in_cksum.h>
100 #include <security/mac/mac_framework.h>
101 
102 #define	GREMTU			1476
103 
104 static const char grename[] = "gre";
105 MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation");
106 
107 static struct sx gre_ioctl_sx;
108 SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl");
109 #define GRE_LOCK_ASSERT() sx_assert(&gre_ioctl_sx, SA_XLOCKED);
110 
111 static int	gre_clone_create(struct if_clone *, char *, size_t,
112 		    struct ifc_data *, struct ifnet **);
113 static int	gre_clone_destroy(struct if_clone *, struct ifnet *,
114 		    uint32_t);
115 static int	gre_clone_create_nl(struct if_clone *, char *, size_t,
116 		    struct ifc_data_nl *);
117 static int	gre_clone_modify_nl(struct ifnet *, struct ifc_data_nl *);
118 static void	gre_clone_dump_nl(struct ifnet *, struct nl_writer *);
119 VNET_DEFINE_STATIC(struct if_clone *, gre_cloner);
120 #define	V_gre_cloner	VNET(gre_cloner)
121 
122 #ifdef VIMAGE
123 static void	gre_reassign(struct ifnet *, struct vnet *, char *);
124 #endif
125 static void	gre_qflush(struct ifnet *);
126 static int	gre_transmit(struct ifnet *, struct mbuf *);
127 static int	gre_ioctl(struct ifnet *, u_long, caddr_t);
128 static int	gre_output(struct ifnet *, struct mbuf *,
129 		    const struct sockaddr *, struct route *);
130 static void	gre_delete_tunnel(struct gre_softc *);
131 static int	gre_set_addr_nl(struct gre_softc *, struct nl_pstate *,
132 		    struct sockaddr *, struct sockaddr *);
133 
134 static int	gre_set_flags(struct gre_softc *, uint32_t);
135 static int	gre_set_key(struct gre_softc *, uint32_t);
136 static int	gre_set_udp_sport(struct gre_softc *, uint16_t);
137 static int	gre_setopts(struct gre_softc *, u_long, uint32_t);
138 
139 static int	gre_set_flags_nl(struct gre_softc *, struct nl_pstate *, uint32_t);
140 static int	gre_set_key_nl(struct gre_softc *, struct nl_pstate *, uint32_t);
141 static int	gre_set_encap_nl(struct gre_softc *, struct nl_pstate *, uint32_t);
142 static int	gre_set_udp_sport_nl(struct gre_softc *, struct nl_pstate *, uint16_t);
143 
144 SYSCTL_DECL(_net_link);
145 static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
146     "Generic Routing Encapsulation");
147 #ifndef MAX_GRE_NEST
148 /*
149  * This macro controls the default upper limitation on nesting of gre tunnels.
150  * Since, setting a large value to this macro with a careless configuration
151  * may introduce system crash, we don't allow any nestings by default.
152  * If you need to configure nested gre tunnels, you can define this macro
153  * in your kernel configuration file.  However, if you do so, please be
154  * careful to configure the tunnels so that it won't make a loop.
155  */
156 #define MAX_GRE_NEST 1
157 #endif
158 
159 VNET_DEFINE_STATIC(int, max_gre_nesting) = MAX_GRE_NEST;
160 #define	V_max_gre_nesting	VNET(max_gre_nesting)
161 SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
162     &VNET_NAME(max_gre_nesting), 0, "Max nested tunnels");
163 
164 struct nl_parsed_gre {
165 	struct sockaddr		*ifla_local;
166 	struct sockaddr		*ifla_remote;
167 	uint32_t		ifla_flags;
168 	uint32_t		ifla_okey;
169 	uint32_t		ifla_encap_type;
170 	uint16_t		ifla_encap_sport;
171 };
172 
173 #define _OUT(_field)	offsetof(struct nl_parsed_gre, _field)
174 static const struct nlattr_parser nla_p_gre[] = {
175 	{ .type = IFLA_GRE_LOCAL, .off = _OUT(ifla_local), .cb = nlattr_get_ip },
176 	{ .type = IFLA_GRE_REMOTE, .off = _OUT(ifla_remote), .cb = nlattr_get_ip },
177 	{ .type = IFLA_GRE_FLAGS, .off = _OUT(ifla_flags), .cb = nlattr_get_uint32 },
178 	{ .type = IFLA_GRE_OKEY, .off = _OUT(ifla_okey), .cb = nlattr_get_uint32 },
179 	{ .type = IFLA_GRE_ENCAP_TYPE, .off = _OUT(ifla_encap_type), .cb = nlattr_get_uint32 },
180 	{ .type = IFLA_GRE_ENCAP_SPORT, .off = _OUT(ifla_encap_sport), .cb = nlattr_get_uint16 },
181 };
182 #undef _OUT
183 NL_DECLARE_ATTR_PARSER(gre_modify_parser, nla_p_gre);
184 
185 static const struct nlhdr_parser *all_parsers[] = {
186 	&gre_modify_parser,
187 };
188 
189 
190 static void
vnet_gre_init(const void * unused __unused)191 vnet_gre_init(const void *unused __unused)
192 {
193 	struct if_clone_addreq_v2 req = {
194 		.version = 2,
195 		.flags = IFC_F_AUTOUNIT,
196 		.match_f = NULL,
197 		.create_f = gre_clone_create,
198 		.destroy_f = gre_clone_destroy,
199 		.create_nl_f = gre_clone_create_nl,
200 		.modify_nl_f = gre_clone_modify_nl,
201 		.dump_nl_f = gre_clone_dump_nl,
202 	};
203 	V_gre_cloner = ifc_attach_cloner(grename, (struct if_clone_addreq *)&req);
204 #ifdef INET
205 	in_gre_init();
206 #endif
207 #ifdef INET6
208 	in6_gre_init();
209 #endif
210 }
211 VNET_SYSINIT(vnet_gre_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
212     vnet_gre_init, NULL);
213 
214 static void
vnet_gre_uninit(const void * unused __unused)215 vnet_gre_uninit(const void *unused __unused)
216 {
217 
218 	ifc_detach_cloner(V_gre_cloner);
219 #ifdef INET
220 	in_gre_uninit();
221 #endif
222 #ifdef INET6
223 	in6_gre_uninit();
224 #endif
225 	/* XXX: epoch_call drain */
226 }
227 VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
228     vnet_gre_uninit, NULL);
229 
230 static int
gre_clone_create_nl(struct if_clone * ifc,char * name,size_t len,struct ifc_data_nl * ifd)231 gre_clone_create_nl(struct if_clone *ifc, char *name, size_t len,
232     struct ifc_data_nl *ifd)
233 {
234 	struct ifc_data ifd_new = {
235 		.flags = IFC_F_SYSSPACE,
236 		.unit = ifd->unit,
237 	};
238 
239 	return (gre_clone_create(ifc, name, len, &ifd_new, &ifd->ifp));
240 }
241 
242 static int
gre_clone_modify_nl(struct ifnet * ifp,struct ifc_data_nl * ifd)243 gre_clone_modify_nl(struct ifnet *ifp, struct ifc_data_nl *ifd)
244 {
245 	struct gre_softc *sc = ifp->if_softc;
246 	struct nl_parsed_link *lattrs = ifd->lattrs;
247 	struct nl_pstate *npt = ifd->npt;
248 	struct nl_parsed_gre params;
249 	struct nlattr *attrs = lattrs->ifla_idata;
250 	struct nlattr_bmask bm;
251 	int error = 0;
252 
253 	if ((attrs == NULL) ||
254 	    (nl_has_attr(ifd->bm, IFLA_LINKINFO) == 0)) {
255 		error = nl_modify_ifp_generic(ifp, lattrs, ifd->bm, npt);
256 		return (error);
257 	}
258 
259 	error = priv_check(curthread, PRIV_NET_GRE);
260 	if (error)
261 		return (error);
262 
263 	/* make sure ignored attributes by nl_parse will not cause panics */
264 	memset(&params, 0, sizeof(params));
265 
266 	nl_get_attrs_bmask_raw(NLA_DATA(attrs), NLA_DATA_LEN(attrs), &bm);
267 	if ((error = nl_parse_nested(attrs, &gre_modify_parser, npt, &params)) != 0)
268 		return (error);
269 
270 	if (nl_has_attr(&bm, IFLA_GRE_LOCAL) && nl_has_attr(&bm, IFLA_GRE_REMOTE))
271 		error = gre_set_addr_nl(sc, npt, params.ifla_local, params.ifla_remote);
272 	else if (nl_has_attr(&bm, IFLA_GRE_LOCAL) || nl_has_attr(&bm, IFLA_GRE_REMOTE)) {
273 		error = EINVAL;
274 		nlmsg_report_err_msg(npt, "Specify both remote and local address together");
275 	}
276 
277 	if (error == 0 && nl_has_attr(&bm, IFLA_GRE_FLAGS))
278 		error = gre_set_flags_nl(sc, npt, params.ifla_flags);
279 
280 	if (error == 0 && nl_has_attr(&bm, IFLA_GRE_OKEY))
281 		error = gre_set_key_nl(sc, npt, params.ifla_okey);
282 
283 	if (error == 0 && nl_has_attr(&bm, IFLA_GRE_ENCAP_TYPE))
284 		error = gre_set_encap_nl(sc, npt, params.ifla_encap_type);
285 
286 	if (error == 0 && nl_has_attr(&bm, IFLA_GRE_ENCAP_SPORT))
287 		error = gre_set_udp_sport_nl(sc, npt, params.ifla_encap_sport);
288 
289 	if (error == 0)
290 		error = nl_modify_ifp_generic(ifp, ifd->lattrs, ifd->bm, ifd->npt);
291 
292 	return (error);
293 }
294 
295 static void
gre_clone_dump_nl(struct ifnet * ifp,struct nl_writer * nw)296 gre_clone_dump_nl(struct ifnet *ifp, struct nl_writer *nw)
297 {
298 	GRE_RLOCK_TRACKER;
299 	struct gre_softc *sc;
300 
301 	nlattr_add_u32(nw, IFLA_LINK, ifp->if_index);
302 	nlattr_add_string(nw, IFLA_IFNAME, ifp->if_xname);
303 
304 	int off = nlattr_add_nested(nw, IFLA_LINKINFO);
305 	if (off == 0)
306 		return;
307 
308 	nlattr_add_string(nw, IFLA_INFO_KIND, "gre");
309 	int off2 = nlattr_add_nested(nw, IFLA_INFO_DATA);
310 	if (off2 == 0) {
311 		nlattr_set_len(nw, off);
312 		return;
313 	}
314 
315 	sc = ifp->if_softc;
316 	GRE_RLOCK();
317 
318 	if (sc->gre_family == AF_INET) {
319 #ifdef INET
320 		struct in_aliasreq in;
321 		if (in_gre_ioctl(sc, SIOCGIFPSRCADDR, (caddr_t)&in) == 0)
322 			nlattr_add_in_addr(nw, IFLA_GRE_LOCAL,
323 			    &in.ifra_addr.sin_addr);
324 		if (in_gre_ioctl(sc, SIOCGIFPDSTADDR, (caddr_t)&in) == 0)
325 			nlattr_add_in_addr(nw, IFLA_GRE_REMOTE,
326 			    &in.ifra_addr.sin_addr);
327 #endif
328 	} else if (sc->gre_family == AF_INET6) {
329 #ifdef INET6
330 		struct in6_aliasreq in6;
331 		if (in6_gre_ioctl(sc, SIOCGIFPSRCADDR_IN6, (caddr_t)&in6) == 0)
332 			nlattr_add_in6_addr(nw, IFLA_GRE_LOCAL,
333 			    &in6.ifra_addr.sin6_addr);
334 		if (in6_gre_ioctl(sc, SIOCGIFPDSTADDR_IN6, (caddr_t)&in6) == 0)
335 			nlattr_add_in6_addr(nw, IFLA_GRE_REMOTE,
336 			    &in6.ifra_addr.sin6_addr);
337 #endif
338 	}
339 
340 	nlattr_add_u32(nw, IFLA_GRE_FLAGS, sc->gre_options);
341 	nlattr_add_u32(nw, IFLA_GRE_OKEY, sc->gre_key);
342 	nlattr_add_u32(nw, IFLA_GRE_ENCAP_TYPE,
343 	    sc->gre_options & GRE_UDPENCAP ? IFLA_TUNNEL_GRE_UDP : IFLA_TUNNEL_NONE);
344 	nlattr_add_u16(nw, IFLA_GRE_ENCAP_SPORT, sc->gre_port);
345 
346 	nlattr_set_len(nw, off2);
347 	nlattr_set_len(nw, off);
348 
349 	GRE_RUNLOCK();
350 }
351 
352 static int
gre_clone_create(struct if_clone * ifc,char * name,size_t len,struct ifc_data * ifd,struct ifnet ** ifpp)353 gre_clone_create(struct if_clone *ifc, char *name, size_t len,
354     struct ifc_data *ifd, struct ifnet **ifpp)
355 {
356 	struct gre_softc *sc;
357 
358 	sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO);
359 	sc->gre_fibnum = curthread->td_proc->p_fibnum;
360 	GRE2IFP(sc) = if_alloc(IFT_TUNNEL);
361 	GRE2IFP(sc)->if_softc = sc;
362 	if_initname(GRE2IFP(sc), grename, ifd->unit);
363 
364 	GRE2IFP(sc)->if_mtu = GREMTU;
365 	GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
366 	GRE2IFP(sc)->if_output = gre_output;
367 	GRE2IFP(sc)->if_ioctl = gre_ioctl;
368 	GRE2IFP(sc)->if_transmit = gre_transmit;
369 	GRE2IFP(sc)->if_qflush = gre_qflush;
370 #ifdef VIMAGE
371 	GRE2IFP(sc)->if_reassign = gre_reassign;
372 #endif
373 	GRE2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
374 	GRE2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
375 	if_attach(GRE2IFP(sc));
376 	bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
377 	*ifpp = GRE2IFP(sc);
378 
379 	return (0);
380 }
381 
382 #ifdef VIMAGE
383 static void
gre_reassign(struct ifnet * ifp,struct vnet * new_vnet __unused,char * unused __unused)384 gre_reassign(struct ifnet *ifp, struct vnet *new_vnet __unused,
385     char *unused __unused)
386 {
387 	struct gre_softc *sc;
388 
389 	sx_xlock(&gre_ioctl_sx);
390 	sc = ifp->if_softc;
391 	if (sc != NULL)
392 		gre_delete_tunnel(sc);
393 	sx_xunlock(&gre_ioctl_sx);
394 }
395 #endif /* VIMAGE */
396 
397 static int
gre_clone_destroy(struct if_clone * ifc,struct ifnet * ifp,uint32_t flags)398 gre_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags)
399 {
400 	struct gre_softc *sc;
401 
402 	sx_xlock(&gre_ioctl_sx);
403 	sc = ifp->if_softc;
404 	gre_delete_tunnel(sc);
405 	bpfdetach(ifp);
406 	if_detach(ifp);
407 	ifp->if_softc = NULL;
408 	sx_xunlock(&gre_ioctl_sx);
409 
410 	GRE_WAIT();
411 	if_free(ifp);
412 	free(sc, M_GRE);
413 
414 	return (0);
415 }
416 
417 static int
gre_set_key(struct gre_softc * sc,uint32_t key)418 gre_set_key(struct gre_softc *sc, uint32_t key)
419 {
420 	int error = 0;
421 
422 	GRE_LOCK_ASSERT();
423 
424 	if (sc->gre_key == key)
425 		return (0);
426 	error = gre_setopts(sc, GRESKEY, key);
427 
428 	return (error);
429 }
430 
431 static int
gre_set_flags(struct gre_softc * sc,uint32_t opt)432 gre_set_flags(struct gre_softc *sc, uint32_t opt)
433 {
434 	int error = 0;
435 
436 	GRE_LOCK_ASSERT();
437 
438 	if (opt & ~GRE_OPTMASK)
439 		return (EINVAL);
440 	if (sc->gre_options == opt)
441 		return (0);
442 	error = gre_setopts(sc, GRESOPTS, opt);
443 
444 	return (error);
445 }
446 
447 static int
gre_set_udp_sport(struct gre_softc * sc,uint16_t port)448 gre_set_udp_sport(struct gre_softc *sc, uint16_t port)
449 {
450 	int error = 0;
451 
452 	GRE_LOCK_ASSERT();
453 
454 	if (port != 0 && (port < V_ipport_hifirstauto ||
455 	    port > V_ipport_hilastauto))
456 		return (EINVAL);
457 	if (sc->gre_port == port)
458 		return (0);
459 	if ((sc->gre_options & GRE_UDPENCAP) == 0) {
460 		/*
461 		 * UDP encapsulation is not enabled, thus
462 		 * there is no need to reattach softc.
463 		 */
464 		sc->gre_port = port;
465 		return (0);
466 	}
467 	error = gre_setopts(sc, GRESPORT, port);
468 
469 	return (error);
470 }
471 
472 static int
gre_setopts(struct gre_softc * sc,u_long cmd,uint32_t opt)473 gre_setopts(struct gre_softc *sc, u_long cmd, uint32_t opt)
474 {
475 	int error = 0;
476 
477 	GRE_LOCK_ASSERT();
478 
479 	switch (sc->gre_family) {
480 #ifdef INET
481 	case AF_INET:
482 		error = in_gre_setopts(sc, cmd, opt);
483 		break;
484 #endif
485 #ifdef INET6
486 	case AF_INET6:
487 		error = in6_gre_setopts(sc, cmd, opt);
488 		break;
489 #endif
490 	default:
491 		/*
492 		 * Tunnel is not yet configured.
493 		 * We can just change any parameters.
494 		 */
495 		if (cmd == GRESKEY)
496 			sc->gre_key = opt;
497 		if (cmd == GRESOPTS)
498 			sc->gre_options = opt;
499 		if (cmd == GRESPORT)
500 			sc->gre_port = opt;
501 		break;
502 	}
503 	/*
504 	 * XXX: Do we need to initiate change of interface
505 	 * state here?
506 	 */
507 	return (error);
508 };
509 
510 static int
gre_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)511 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
512 {
513 	struct ifreq *ifr = (struct ifreq *)data;
514 	struct gre_softc *sc;
515 	uint32_t opt;
516 	int error;
517 
518 	switch (cmd) {
519 	case SIOCSIFMTU:
520 		 /* XXX: */
521 		if (ifr->ifr_mtu < 576)
522 			return (EINVAL);
523 		ifp->if_mtu = ifr->ifr_mtu;
524 		return (0);
525 	case SIOCSIFADDR:
526 		ifp->if_flags |= IFF_UP;
527 	case SIOCSIFFLAGS:
528 	case SIOCADDMULTI:
529 	case SIOCDELMULTI:
530 		return (0);
531 	case GRESADDRS:
532 	case GRESADDRD:
533 	case GREGADDRS:
534 	case GREGADDRD:
535 	case GRESPROTO:
536 	case GREGPROTO:
537 		return (EOPNOTSUPP);
538 	}
539 	sx_xlock(&gre_ioctl_sx);
540 	sc = ifp->if_softc;
541 	if (sc == NULL) {
542 		error = ENXIO;
543 		goto end;
544 	}
545 	error = 0;
546 	switch (cmd) {
547 	case SIOCDIFPHYADDR:
548 		if (sc->gre_family == 0)
549 			break;
550 		gre_delete_tunnel(sc);
551 		break;
552 #ifdef INET
553 	case SIOCSIFPHYADDR:
554 	case SIOCGIFPSRCADDR:
555 	case SIOCGIFPDSTADDR:
556 		error = in_gre_ioctl(sc, cmd, data);
557 		break;
558 #endif
559 #ifdef INET6
560 	case SIOCSIFPHYADDR_IN6:
561 	case SIOCGIFPSRCADDR_IN6:
562 	case SIOCGIFPDSTADDR_IN6:
563 		error = in6_gre_ioctl(sc, cmd, data);
564 		break;
565 #endif
566 	case SIOCGTUNFIB:
567 		ifr->ifr_fib = sc->gre_fibnum;
568 		break;
569 	case SIOCSTUNFIB:
570 		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
571 			break;
572 		if (ifr->ifr_fib >= rt_numfibs)
573 			error = EINVAL;
574 		else
575 			sc->gre_fibnum = ifr->ifr_fib;
576 		break;
577 	case GRESKEY:
578 	case GRESOPTS:
579 	case GRESPORT:
580 		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
581 			break;
582 		if ((error = copyin(ifr_data_get_ptr(ifr), &opt,
583 		    sizeof(opt))) != 0)
584 			break;
585 		if (cmd == GRESKEY)
586 			error = gre_set_key(sc, opt);
587 		else if (cmd == GRESOPTS)
588 			error = gre_set_flags(sc, opt);
589 		else if (cmd == GRESPORT)
590 			error = gre_set_udp_sport(sc, opt);
591 		break;
592 	case GREGKEY:
593 		error = copyout(&sc->gre_key, ifr_data_get_ptr(ifr),
594 		    sizeof(sc->gre_key));
595 		break;
596 	case GREGOPTS:
597 		error = copyout(&sc->gre_options, ifr_data_get_ptr(ifr),
598 		    sizeof(sc->gre_options));
599 		break;
600 	case GREGPORT:
601 		error = copyout(&sc->gre_port, ifr_data_get_ptr(ifr),
602 		    sizeof(sc->gre_port));
603 		break;
604 	default:
605 		error = EINVAL;
606 		break;
607 	}
608 	if (error == 0 && sc->gre_family != 0) {
609 		if (
610 #ifdef INET
611 		    cmd == SIOCSIFPHYADDR ||
612 #endif
613 #ifdef INET6
614 		    cmd == SIOCSIFPHYADDR_IN6 ||
615 #endif
616 		    0) {
617 			if_link_state_change(ifp, LINK_STATE_UP);
618 		}
619 	}
620 end:
621 	sx_xunlock(&gre_ioctl_sx);
622 	return (error);
623 }
624 
625 static void
gre_delete_tunnel(struct gre_softc * sc)626 gre_delete_tunnel(struct gre_softc *sc)
627 {
628 	struct gre_socket *gs;
629 
630 	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
631 	if (sc->gre_family != 0) {
632 		CK_LIST_REMOVE(sc, chain);
633 		CK_LIST_REMOVE(sc, srchash);
634 		GRE_WAIT();
635 		free(sc->gre_hdr, M_GRE);
636 		sc->gre_family = 0;
637 	}
638 	/*
639 	 * If this Tunnel was the last one that could use UDP socket,
640 	 * we should unlink socket from hash table and close it.
641 	 */
642 	if ((gs = sc->gre_so) != NULL && CK_LIST_EMPTY(&gs->list)) {
643 		CK_LIST_REMOVE(gs, chain);
644 		soclose(gs->so);
645 		NET_EPOCH_CALL(gre_sofree, &gs->epoch_ctx);
646 		sc->gre_so = NULL;
647 	}
648 	GRE2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
649 	if_link_state_change(GRE2IFP(sc), LINK_STATE_DOWN);
650 }
651 
652 struct gre_list *
gre_hashinit(void)653 gre_hashinit(void)
654 {
655 	struct gre_list *hash;
656 	int i;
657 
658 	hash = malloc(sizeof(struct gre_list) * GRE_HASH_SIZE,
659 	    M_GRE, M_WAITOK);
660 	for (i = 0; i < GRE_HASH_SIZE; i++)
661 		CK_LIST_INIT(&hash[i]);
662 
663 	return (hash);
664 }
665 
666 void
gre_hashdestroy(struct gre_list * hash)667 gre_hashdestroy(struct gre_list *hash)
668 {
669 
670 	free(hash, M_GRE);
671 }
672 
673 void
gre_sofree(epoch_context_t ctx)674 gre_sofree(epoch_context_t ctx)
675 {
676 	struct gre_socket *gs;
677 
678 	gs = __containerof(ctx, struct gre_socket, epoch_ctx);
679 	free(gs, M_GRE);
680 }
681 
682 static __inline uint16_t
gre_cksum_add(uint16_t sum,uint16_t a)683 gre_cksum_add(uint16_t sum, uint16_t a)
684 {
685 	uint16_t res;
686 
687 	res = sum + a;
688 	return (res + (res < a));
689 }
690 
691 void
gre_update_udphdr(struct gre_softc * sc,struct udphdr * udp,uint16_t csum)692 gre_update_udphdr(struct gre_softc *sc, struct udphdr *udp, uint16_t csum)
693 {
694 
695 	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
696 	MPASS(sc->gre_options & GRE_UDPENCAP);
697 
698 	udp->uh_dport = htons(GRE_UDPPORT);
699 	udp->uh_sport = htons(sc->gre_port);
700 	udp->uh_sum = csum;
701 	udp->uh_ulen = 0;
702 }
703 
704 void
gre_update_hdr(struct gre_softc * sc,struct grehdr * gh)705 gre_update_hdr(struct gre_softc *sc, struct grehdr *gh)
706 {
707 	uint32_t *opts;
708 	uint16_t flags;
709 
710 	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
711 
712 	flags = 0;
713 	opts = gh->gre_opts;
714 	if (sc->gre_options & GRE_ENABLE_CSUM) {
715 		flags |= GRE_FLAGS_CP;
716 		sc->gre_hlen += 2 * sizeof(uint16_t);
717 		*opts++ = 0;
718 	}
719 	if (sc->gre_key != 0) {
720 		flags |= GRE_FLAGS_KP;
721 		sc->gre_hlen += sizeof(uint32_t);
722 		*opts++ = htonl(sc->gre_key);
723 	}
724 	if (sc->gre_options & GRE_ENABLE_SEQ) {
725 		flags |= GRE_FLAGS_SP;
726 		sc->gre_hlen += sizeof(uint32_t);
727 		*opts++ = 0;
728 	} else
729 		sc->gre_oseq = 0;
730 	gh->gre_flags = htons(flags);
731 }
732 
733 int
gre_input(struct mbuf * m,int off,int proto,void * arg)734 gre_input(struct mbuf *m, int off, int proto, void *arg)
735 {
736 	struct gre_softc *sc = arg;
737 	struct grehdr *gh;
738 	struct ifnet *ifp;
739 	uint32_t *opts;
740 #ifdef notyet
741 	uint32_t key;
742 #endif
743 	uint16_t flags;
744 	int hlen, isr, af;
745 
746 	ifp = GRE2IFP(sc);
747 	hlen = off + sizeof(struct grehdr) + 4 * sizeof(uint32_t);
748 	if (m->m_pkthdr.len < hlen)
749 		goto drop;
750 	if (m->m_len < hlen) {
751 		m = m_pullup(m, hlen);
752 		if (m == NULL)
753 			goto drop;
754 	}
755 	gh = (struct grehdr *)mtodo(m, off);
756 	flags = ntohs(gh->gre_flags);
757 	if (flags & ~GRE_FLAGS_MASK)
758 		goto drop;
759 	opts = gh->gre_opts;
760 	hlen = 2 * sizeof(uint16_t);
761 	if (flags & GRE_FLAGS_CP) {
762 		/* reserved1 field must be zero */
763 		if (((uint16_t *)opts)[1] != 0)
764 			goto drop;
765 		if (in_cksum_skip(m, m->m_pkthdr.len, off) != 0)
766 			goto drop;
767 		hlen += 2 * sizeof(uint16_t);
768 		opts++;
769 	}
770 	if (flags & GRE_FLAGS_KP) {
771 #ifdef notyet
772         /*
773          * XXX: The current implementation uses the key only for outgoing
774          * packets. But we can check the key value here, or even in the
775          * encapcheck function.
776          */
777 		key = ntohl(*opts);
778 #endif
779 		hlen += sizeof(uint32_t);
780 		opts++;
781     }
782 #ifdef notyet
783 	} else
784 		key = 0;
785 
786 	if (sc->gre_key != 0 && (key != sc->gre_key || key != 0))
787 		goto drop;
788 #endif
789 	if (flags & GRE_FLAGS_SP) {
790 #ifdef notyet
791 		seq = ntohl(*opts);
792 #endif
793 		hlen += sizeof(uint32_t);
794 	}
795 	switch (ntohs(gh->gre_proto)) {
796 	case ETHERTYPE_WCCP:
797 		/*
798 		 * For WCCP skip an additional 4 bytes if after GRE header
799 		 * doesn't follow an IP header.
800 		 */
801 		if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40)
802 			hlen += sizeof(uint32_t);
803 		/* FALLTHROUGH */
804 	case ETHERTYPE_IP:
805 		isr = NETISR_IP;
806 		af = AF_INET;
807 		break;
808 	case ETHERTYPE_IPV6:
809 		isr = NETISR_IPV6;
810 		af = AF_INET6;
811 		break;
812 	default:
813 		goto drop;
814 	}
815 	m_adj(m, off + hlen);
816 	m_clrprotoflags(m);
817 	m->m_pkthdr.rcvif = ifp;
818 	M_SETFIB(m, ifp->if_fib);
819 #ifdef MAC
820 	mac_ifnet_create_mbuf(ifp, m);
821 #endif
822 	BPF_MTAP2(ifp, &af, sizeof(af), m);
823 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
824 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
825 	if ((ifp->if_flags & IFF_MONITOR) != 0)
826 		m_freem(m);
827 	else
828 		netisr_dispatch(isr, m);
829 	return (IPPROTO_DONE);
830 drop:
831 	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
832 	m_freem(m);
833 	return (IPPROTO_DONE);
834 }
835 
836 static int
837 gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
838    struct route *ro)
839 {
840 	uint32_t af;
841 
842 	/* BPF writes need to be handled specially. */
843 	if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
844 		bcopy(dst->sa_data, &af, sizeof(af));
845 	else
846 		af = RO_GET_FAMILY(ro, dst);
847 	/*
848 	 * Now save the af in the inbound pkt csum data, this is a cheat since
849 	 * we are using the inbound csum_data field to carry the af over to
850 	 * the gre_transmit() routine, avoiding using yet another mtag.
851 	 */
852 	m->m_pkthdr.csum_data = af;
853 	return (ifp->if_transmit(ifp, m));
854 }
855 
856 static void
857 gre_setseqn(struct grehdr *gh, uint32_t seq)
858 {
859 	uint32_t *opts;
860 	uint16_t flags;
861 
862 	opts = gh->gre_opts;
863 	flags = ntohs(gh->gre_flags);
864 	KASSERT((flags & GRE_FLAGS_SP) != 0,
865 	    ("gre_setseqn called, but GRE_FLAGS_SP isn't set "));
866 	if (flags & GRE_FLAGS_CP)
867 		opts++;
868 	if (flags & GRE_FLAGS_KP)
869 		opts++;
870 	*opts = htonl(seq);
871 }
872 
873 static uint32_t
874 gre_flowid(struct gre_softc *sc, struct mbuf *m, uint32_t af)
875 {
876 	uint32_t flowid = 0;
877 
878 	if ((sc->gre_options & GRE_UDPENCAP) == 0 || sc->gre_port != 0)
879 		return (flowid);
880 	switch (af) {
881 #ifdef INET
882 	case AF_INET:
883 #ifdef RSS
884 		flowid = rss_hash_ip4_2tuple(mtod(m, struct ip *)->ip_src,
885 		    mtod(m, struct ip *)->ip_dst);
886 		break;
887 #endif
888 		flowid = mtod(m, struct ip *)->ip_src.s_addr ^
889 		    mtod(m, struct ip *)->ip_dst.s_addr;
890 		break;
891 #endif
892 #ifdef INET6
893 	case AF_INET6:
894 #ifdef RSS
895 		flowid = rss_hash_ip6_2tuple(
896 		    &mtod(m, struct ip6_hdr *)->ip6_src,
897 		    &mtod(m, struct ip6_hdr *)->ip6_dst);
898 		break;
899 #endif
900 		flowid = mtod(m, struct ip6_hdr *)->ip6_src.s6_addr32[3] ^
901 		    mtod(m, struct ip6_hdr *)->ip6_dst.s6_addr32[3];
902 		break;
903 #endif
904 	default:
905 		break;
906 	}
907 	return (flowid);
908 }
909 
910 #define	MTAG_GRE	1307983903
911 static int
912 gre_transmit(struct ifnet *ifp, struct mbuf *m)
913 {
914 	GRE_RLOCK_TRACKER;
915 	struct gre_softc *sc;
916 	struct grehdr *gh;
917 	struct udphdr *uh;
918 	uint32_t af, flowid;
919 	int error, len;
920 	uint16_t proto;
921 
922 	len = 0;
923 	GRE_RLOCK();
924 #ifdef MAC
925 	error = mac_ifnet_check_transmit(ifp, m);
926 	if (error) {
927 		m_freem(m);
928 		goto drop;
929 	}
930 #endif
931 	error = ENETDOWN;
932 	sc = ifp->if_softc;
933 	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
934 	    (ifp->if_flags & IFF_UP) == 0 ||
935 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
936 	    sc->gre_family == 0 ||
937 	    (error = if_tunnel_check_nesting(ifp, m, MTAG_GRE,
938 		V_max_gre_nesting)) != 0) {
939 		m_freem(m);
940 		goto drop;
941 	}
942 	af = m->m_pkthdr.csum_data;
943 	BPF_MTAP2(ifp, &af, sizeof(af), m);
944 	m->m_flags &= ~(M_BCAST|M_MCAST);
945 	flowid = gre_flowid(sc, m, af);
946 	M_SETFIB(m, sc->gre_fibnum);
947 	M_PREPEND(m, sc->gre_hlen, M_NOWAIT);
948 	if (m == NULL) {
949 		error = ENOBUFS;
950 		goto drop;
951 	}
952 	bcopy(sc->gre_hdr, mtod(m, void *), sc->gre_hlen);
953 	/* Determine GRE proto */
954 	switch (af) {
955 #ifdef INET
956 	case AF_INET:
957 		proto = htons(ETHERTYPE_IP);
958 		break;
959 #endif
960 #ifdef INET6
961 	case AF_INET6:
962 		proto = htons(ETHERTYPE_IPV6);
963 		break;
964 #endif
965 	default:
966 		m_freem(m);
967 		error = ENETDOWN;
968 		goto drop;
969 	}
970 	/* Determine offset of GRE header */
971 	switch (sc->gre_family) {
972 #ifdef INET
973 	case AF_INET:
974 		len = sizeof(struct ip);
975 		break;
976 #endif
977 #ifdef INET6
978 	case AF_INET6:
979 		len = sizeof(struct ip6_hdr);
980 		break;
981 #endif
982 	default:
983 		m_freem(m);
984 		error = ENETDOWN;
985 		goto drop;
986 	}
987 	if (sc->gre_options & GRE_UDPENCAP) {
988 		uh = (struct udphdr *)mtodo(m, len);
989 		uh->uh_sport |= htons(V_ipport_hifirstauto) |
990 		    (flowid >> 16) | (flowid & 0xFFFF);
991 		uh->uh_sport = htons(ntohs(uh->uh_sport) %
992 		    V_ipport_hilastauto);
993 		uh->uh_ulen = htons(m->m_pkthdr.len - len);
994 		uh->uh_sum = gre_cksum_add(uh->uh_sum,
995 		    htons(m->m_pkthdr.len - len + IPPROTO_UDP));
996 		m->m_pkthdr.csum_flags = sc->gre_csumflags;
997 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
998 		len += sizeof(struct udphdr);
999 	}
1000 	gh = (struct grehdr *)mtodo(m, len);
1001 	gh->gre_proto = proto;
1002 	if (sc->gre_options & GRE_ENABLE_SEQ)
1003 		gre_setseqn(gh, sc->gre_oseq++);
1004 	if (sc->gre_options & GRE_ENABLE_CSUM) {
1005 		*(uint16_t *)gh->gre_opts = in_cksum_skip(m,
1006 		    m->m_pkthdr.len, len);
1007 	}
1008 	len = m->m_pkthdr.len - len;
1009 	switch (sc->gre_family) {
1010 #ifdef INET
1011 	case AF_INET:
1012 		error = in_gre_output(m, af, sc->gre_hlen);
1013 		break;
1014 #endif
1015 #ifdef INET6
1016 	case AF_INET6:
1017 		error = in6_gre_output(m, af, sc->gre_hlen, flowid);
1018 		break;
1019 #endif
1020 	default:
1021 		m_freem(m);
1022 		error = ENETDOWN;
1023 	}
1024 drop:
1025 	if (error)
1026 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
1027 	else {
1028 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
1029 		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
1030 	}
1031 	GRE_RUNLOCK();
1032 	return (error);
1033 }
1034 
1035 static void
1036 gre_qflush(struct ifnet *ifp __unused)
1037 {
1038 
1039 }
1040 
1041 static int
1042 gre_set_addr_nl(struct gre_softc *sc, struct nl_pstate *npt,
1043     struct sockaddr *src, struct sockaddr *dst)
1044 {
1045 #if defined(INET) || defined(INET6)
1046 	union {
1047 #ifdef INET
1048 		struct in_aliasreq in;
1049 #endif
1050 #ifdef INET6
1051 		struct in6_aliasreq in6;
1052 #endif
1053 	} aliasreq;
1054 #endif
1055 	int error;
1056 
1057 	/* XXX: this sanity check runs again in in[6]_gre_ioctl */
1058 	if (src->sa_family != dst->sa_family)
1059 		error = EADDRNOTAVAIL;
1060 #ifdef INET
1061 	else if (src->sa_family == AF_INET) {
1062 		memcpy(&aliasreq.in.ifra_addr, src, sizeof(struct sockaddr_in));
1063 		memcpy(&aliasreq.in.ifra_dstaddr, dst, sizeof(struct sockaddr_in));
1064 		sx_xlock(&gre_ioctl_sx);
1065 		error = in_gre_ioctl(sc, SIOCSIFPHYADDR, (caddr_t)&aliasreq.in);
1066 		sx_xunlock(&gre_ioctl_sx);
1067 	}
1068 #endif
1069 #ifdef INET6
1070 	else if (src->sa_family == AF_INET6) {
1071 		memcpy(&aliasreq.in6.ifra_addr, src, sizeof(struct sockaddr_in6));
1072 		memcpy(&aliasreq.in6.ifra_dstaddr, dst, sizeof(struct sockaddr_in6));
1073 		sx_xlock(&gre_ioctl_sx);
1074 		error = in6_gre_ioctl(sc, SIOCSIFPHYADDR_IN6, (caddr_t)&aliasreq.in6);
1075 		sx_xunlock(&gre_ioctl_sx);
1076 	}
1077 #endif
1078 	else
1079 		error = EAFNOSUPPORT;
1080 
1081 	if (error == EADDRNOTAVAIL)
1082 		nlmsg_report_err_msg(npt, "address is invalid");
1083 	if (error == EEXIST)
1084 		nlmsg_report_err_msg(npt, "remote and local addresses are the same");
1085 	if (error == EAFNOSUPPORT)
1086 		nlmsg_report_err_msg(npt, "address family is not supported");
1087 
1088 	return (error);
1089 }
1090 
1091 static int
1092 gre_set_flags_nl(struct gre_softc *sc, struct nl_pstate *npt, uint32_t opt)
1093 {
1094 	int error = 0;
1095 
1096 	sx_xlock(&gre_ioctl_sx);
1097 	error = gre_set_flags(sc, opt);
1098 	sx_xunlock(&gre_ioctl_sx);
1099 
1100 	if (error == EINVAL)
1101 		nlmsg_report_err_msg(npt, "gre flags are invalid");
1102 
1103 	return (error);
1104 }
1105 
1106 static int
1107 gre_set_key_nl(struct gre_softc *sc, struct nl_pstate *npt, uint32_t key)
1108 {
1109 	int error = 0;
1110 
1111 	sx_xlock(&gre_ioctl_sx);
1112 	error = gre_set_key(sc, key);
1113 	sx_xunlock(&gre_ioctl_sx);
1114 
1115 	if (error == EINVAL)
1116 		nlmsg_report_err_msg(npt, "gre key is invalid: %u", key);
1117 
1118 	return (error);
1119 }
1120 
1121 static int
1122 gre_set_encap_nl(struct gre_softc *sc, struct nl_pstate *npt, uint32_t type)
1123 {
1124 	uint32_t opt;
1125 	int error = 0;
1126 
1127 	sx_xlock(&gre_ioctl_sx);
1128 	opt = sc->gre_options;
1129 	if (type & IFLA_TUNNEL_GRE_UDP)
1130 		opt |= GRE_UDPENCAP;
1131 	else
1132 		opt &= ~GRE_UDPENCAP;
1133 	error = gre_set_flags(sc, opt);
1134 	sx_xunlock(&gre_ioctl_sx);
1135 
1136 	if (error == EEXIST)
1137 		nlmsg_report_err_msg(npt, "same gre tunnel exist");
1138 
1139 	return (error);
1140 }
1141 
1142 
1143 static int
1144 gre_set_udp_sport_nl(struct gre_softc *sc, struct nl_pstate *npt, uint16_t port)
1145 {
1146 	int error = 0;
1147 
1148 	sx_xlock(&gre_ioctl_sx);
1149 	error = gre_set_udp_sport(sc, port);
1150 	sx_xunlock(&gre_ioctl_sx);
1151 
1152 	if (error == EINVAL)
1153 		nlmsg_report_err_msg(npt, "source port is invalid: %u", port);
1154 
1155 	return (error);
1156 }
1157 
1158 
1159 static int
1160 gremodevent(module_t mod, int type, void *data)
1161 {
1162 
1163 	switch (type) {
1164 	case MOD_LOAD:
1165 		NL_VERIFY_PARSERS(all_parsers);
1166 		break;
1167 	case MOD_UNLOAD:
1168 		break;
1169 	default:
1170 		return (EOPNOTSUPP);
1171 	}
1172 	return (0);
1173 }
1174 
1175 static moduledata_t gre_mod = {
1176 	"if_gre",
1177 	gremodevent,
1178 	0
1179 };
1180 
1181 DECLARE_MODULE(if_gre, gre_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
1182 MODULE_VERSION(if_gre, 1);
1183