xref: /freebsd/sys/net/if_gif.c (revision db3cb3640f547c063293e9fdc4db69e9dc120951)
1 /*-
2  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the project nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	$KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/malloc.h>
44 #include <sys/mbuf.h>
45 #include <sys/module.h>
46 #include <sys/rmlock.h>
47 #include <sys/socket.h>
48 #include <sys/sockio.h>
49 #include <sys/sx.h>
50 #include <sys/errno.h>
51 #include <sys/time.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/priv.h>
55 #include <sys/proc.h>
56 #include <sys/protosw.h>
57 #include <sys/conf.h>
58 #include <machine/cpu.h>
59 
60 #include <net/if.h>
61 #include <net/if_var.h>
62 #include <net/if_clone.h>
63 #include <net/if_types.h>
64 #include <net/netisr.h>
65 #include <net/route.h>
66 #include <net/bpf.h>
67 #include <net/vnet.h>
68 
69 #include <netinet/in.h>
70 #include <netinet/in_systm.h>
71 #include <netinet/ip.h>
72 #include <netinet/ip_ecn.h>
73 #ifdef	INET
74 #include <netinet/in_var.h>
75 #include <netinet/ip_var.h>
76 #endif	/* INET */
77 
78 #ifdef INET6
79 #ifndef INET
80 #include <netinet/in.h>
81 #endif
82 #include <netinet6/in6_var.h>
83 #include <netinet/ip6.h>
84 #include <netinet6/ip6_ecn.h>
85 #include <netinet6/ip6_var.h>
86 #include <netinet6/scope6_var.h>
87 #include <netinet6/ip6protosw.h>
88 #endif /* INET6 */
89 
90 #include <netinet/ip_encap.h>
91 #include <net/ethernet.h>
92 #include <net/if_bridgevar.h>
93 #include <net/if_gif.h>
94 
95 #include <security/mac/mac_framework.h>
96 
97 static const char gifname[] = "gif";
98 
99 /*
100  * gif_mtx protects a per-vnet gif_softc_list.
101  */
102 static VNET_DEFINE(struct mtx, gif_mtx);
103 #define	V_gif_mtx		VNET(gif_mtx)
104 static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
105 static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
106 #define	V_gif_softc_list	VNET(gif_softc_list)
107 static struct sx gif_ioctl_sx;
108 SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
109 
110 #define	GIF_LIST_LOCK_INIT(x)		mtx_init(&V_gif_mtx, "gif_mtx", \
111 					    NULL, MTX_DEF)
112 #define	GIF_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_gif_mtx)
113 #define	GIF_LIST_LOCK(x)		mtx_lock(&V_gif_mtx)
114 #define	GIF_LIST_UNLOCK(x)		mtx_unlock(&V_gif_mtx)
115 
116 void	(*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
117 void	(*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
118 void	(*ng_gif_attach_p)(struct ifnet *ifp);
119 void	(*ng_gif_detach_p)(struct ifnet *ifp);
120 
121 static int	gif_set_tunnel(struct ifnet *, struct sockaddr *,
122     struct sockaddr *);
123 static void	gif_delete_tunnel(struct ifnet *);
124 static int	gif_ioctl(struct ifnet *, u_long, caddr_t);
125 static int	gif_transmit(struct ifnet *, struct mbuf *);
126 static void	gif_qflush(struct ifnet *);
127 static int	gif_clone_create(struct if_clone *, int, caddr_t);
128 static void	gif_clone_destroy(struct ifnet *);
129 static VNET_DEFINE(struct if_clone *, gif_cloner);
130 #define	V_gif_cloner	VNET(gif_cloner)
131 
132 static int gifmodevent(module_t, int, void *);
133 
134 SYSCTL_DECL(_net_link);
135 static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
136     "Generic Tunnel Interface");
137 #ifndef MAX_GIF_NEST
138 /*
139  * This macro controls the default upper limitation on nesting of gif tunnels.
140  * Since, setting a large value to this macro with a careless configuration
141  * may introduce system crash, we don't allow any nestings by default.
142  * If you need to configure nested gif tunnels, you can define this macro
143  * in your kernel configuration file.  However, if you do so, please be
144  * careful to configure the tunnels so that it won't make a loop.
145  */
146 #define MAX_GIF_NEST 1
147 #endif
148 static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST;
149 #define	V_max_gif_nesting	VNET(max_gif_nesting)
150 SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW,
151     &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
152 
153 /*
154  * By default, we disallow creation of multiple tunnels between the same
155  * pair of addresses.  Some applications require this functionality so
156  * we allow control over this check here.
157  */
158 #ifdef XBONEHACK
159 static VNET_DEFINE(int, parallel_tunnels) = 1;
160 #else
161 static VNET_DEFINE(int, parallel_tunnels) = 0;
162 #endif
163 #define	V_parallel_tunnels	VNET(parallel_tunnels)
164 SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels,
165     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(parallel_tunnels), 0,
166     "Allow parallel tunnels?");
167 
168 /* copy from src/sys/net/if_ethersubr.c */
169 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
170 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
171 #ifndef ETHER_IS_BROADCAST
172 #define ETHER_IS_BROADCAST(addr) \
173 	(bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
174 #endif
175 
176 static int
177 gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
178 {
179 	struct gif_softc *sc;
180 
181 	sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
182 	sc->gif_fibnum = curthread->td_proc->p_fibnum;
183 	GIF2IFP(sc) = if_alloc(IFT_GIF);
184 	GIF_LOCK_INIT(sc);
185 	GIF2IFP(sc)->if_softc = sc;
186 	if_initname(GIF2IFP(sc), gifname, unit);
187 
188 	GIF2IFP(sc)->if_addrlen = 0;
189 	GIF2IFP(sc)->if_mtu    = GIF_MTU;
190 	GIF2IFP(sc)->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
191 #if 0
192 	/* turn off ingress filter */
193 	GIF2IFP(sc)->if_flags  |= IFF_LINK2;
194 #endif
195 	GIF2IFP(sc)->if_ioctl  = gif_ioctl;
196 	GIF2IFP(sc)->if_transmit  = gif_transmit;
197 	GIF2IFP(sc)->if_qflush  = gif_qflush;
198 	GIF2IFP(sc)->if_output = gif_output;
199 	if_attach(GIF2IFP(sc));
200 	bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
201 	if (ng_gif_attach_p != NULL)
202 		(*ng_gif_attach_p)(GIF2IFP(sc));
203 
204 	GIF_LIST_LOCK();
205 	LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
206 	GIF_LIST_UNLOCK();
207 	return (0);
208 }
209 
210 static void
211 gif_clone_destroy(struct ifnet *ifp)
212 {
213 	struct gif_softc *sc;
214 
215 	sx_xlock(&gif_ioctl_sx);
216 	sc = ifp->if_softc;
217 	gif_delete_tunnel(ifp);
218 	GIF_LIST_LOCK();
219 	LIST_REMOVE(sc, gif_list);
220 	GIF_LIST_UNLOCK();
221 	if (ng_gif_detach_p != NULL)
222 		(*ng_gif_detach_p)(ifp);
223 	bpfdetach(ifp);
224 	if_detach(ifp);
225 	ifp->if_softc = NULL;
226 	sx_xunlock(&gif_ioctl_sx);
227 
228 	if_free(ifp);
229 	GIF_LOCK_DESTROY(sc);
230 	free(sc, M_GIF);
231 }
232 
233 static void
234 vnet_gif_init(const void *unused __unused)
235 {
236 
237 	LIST_INIT(&V_gif_softc_list);
238 	GIF_LIST_LOCK_INIT();
239 	V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
240 	    gif_clone_destroy, 0);
241 }
242 VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
243     vnet_gif_init, NULL);
244 
245 static void
246 vnet_gif_uninit(const void *unused __unused)
247 {
248 
249 	if_clone_detach(V_gif_cloner);
250 	GIF_LIST_LOCK_DESTROY();
251 }
252 VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
253     vnet_gif_uninit, NULL);
254 
255 static int
256 gifmodevent(module_t mod, int type, void *data)
257 {
258 
259 	switch (type) {
260 	case MOD_LOAD:
261 	case MOD_UNLOAD:
262 		break;
263 	default:
264 		return (EOPNOTSUPP);
265 	}
266 	return (0);
267 }
268 
269 static moduledata_t gif_mod = {
270 	"if_gif",
271 	gifmodevent,
272 	0
273 };
274 
275 DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
276 MODULE_VERSION(if_gif, 1);
277 
278 int
279 gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
280 {
281 	GIF_RLOCK_TRACKER;
282 	struct gif_softc *sc;
283 	int ret;
284 	uint8_t ver;
285 
286 	sc = (struct gif_softc *)arg;
287 	if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0)
288 		return (0);
289 
290 	ret = 0;
291 	GIF_RLOCK(sc);
292 
293 	/* no physical address */
294 	if (sc->gif_family == 0)
295 		goto done;
296 
297 	switch (proto) {
298 #ifdef INET
299 	case IPPROTO_IPV4:
300 #endif
301 #ifdef INET6
302 	case IPPROTO_IPV6:
303 #endif
304 	case IPPROTO_ETHERIP:
305 		break;
306 	default:
307 		goto done;
308 	}
309 
310 	/* Bail on short packets */
311 	if (m->m_pkthdr.len < sizeof(struct ip))
312 		goto done;
313 
314 	m_copydata(m, 0, 1, &ver);
315 	switch (ver >> 4) {
316 #ifdef INET
317 	case 4:
318 		if (sc->gif_family != AF_INET)
319 			goto done;
320 		ret = in_gif_encapcheck(m, off, proto, arg);
321 		break;
322 #endif
323 #ifdef INET6
324 	case 6:
325 		if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
326 			goto done;
327 		if (sc->gif_family != AF_INET6)
328 			goto done;
329 		ret = in6_gif_encapcheck(m, off, proto, arg);
330 		break;
331 #endif
332 	}
333 done:
334 	GIF_RUNLOCK(sc);
335 	return (ret);
336 }
337 
338 static int
339 gif_transmit(struct ifnet *ifp, struct mbuf *m)
340 {
341 	struct gif_softc *sc;
342 	struct etherip_header *eth;
343 #ifdef INET
344 	struct ip *ip;
345 #endif
346 #ifdef INET6
347 	struct ip6_hdr *ip6;
348 	uint32_t t;
349 #endif
350 	uint32_t af;
351 	uint8_t proto, ecn;
352 	int error;
353 
354 	error = ENETDOWN;
355 	sc = ifp->if_softc;
356 	if (sc->gif_family == 0) {
357 		m_freem(m);
358 		goto err;
359 	}
360 	/* Now pull back the af that we stashed in the csum_data. */
361 	af = m->m_pkthdr.csum_data;
362 	BPF_MTAP2(ifp, &af, sizeof(af), m);
363 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
364 	if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
365 	M_SETFIB(m, sc->gif_fibnum);
366 	/* inner AF-specific encapsulation */
367 	ecn = 0;
368 	switch (af) {
369 #ifdef INET
370 	case AF_INET:
371 		proto = IPPROTO_IPV4;
372 		if (m->m_len < sizeof(struct ip))
373 			m = m_pullup(m, sizeof(struct ip));
374 		if (m == NULL) {
375 			error = ENOBUFS;
376 			goto err;
377 		}
378 		ip = mtod(m, struct ip *);
379 		ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
380 		    ECN_NOCARE, &ecn, &ip->ip_tos);
381 		break;
382 #endif
383 #ifdef INET6
384 	case AF_INET6:
385 		proto = IPPROTO_IPV6;
386 		if (m->m_len < sizeof(struct ip6_hdr))
387 			m = m_pullup(m, sizeof(struct ip6_hdr));
388 		if (m == NULL) {
389 			error = ENOBUFS;
390 			goto err;
391 		}
392 		t = 0;
393 		ip6 = mtod(m, struct ip6_hdr *);
394 		ip6_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
395 		    ECN_NOCARE, &t, &ip6->ip6_flow);
396 		ecn = (ntohl(t) >> 20) & 0xff;
397 		break;
398 #endif
399 	case AF_LINK:
400 		proto = IPPROTO_ETHERIP;
401 		M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT);
402 		if (m == NULL) {
403 			error = ENOBUFS;
404 			goto err;
405 		}
406 		eth = mtod(m, struct etherip_header *);
407 		eth->eip_resvh = 0;
408 		if ((sc->gif_options & GIF_SEND_REVETHIP) != 0) {
409 			eth->eip_ver = 0;
410 			eth->eip_resvl = ETHERIP_VERSION;
411 		} else {
412 			eth->eip_ver = ETHERIP_VERSION;
413 			eth->eip_resvl = 0;
414 		}
415 		break;
416 	default:
417 		error = EAFNOSUPPORT;
418 		m_freem(m);
419 		goto err;
420 	}
421 	/* XXX should we check if our outer source is legal? */
422 	/* dispatch to output logic based on outer AF */
423 	switch (sc->gif_family) {
424 #ifdef INET
425 	case AF_INET:
426 		error = in_gif_output(ifp, m, proto, ecn);
427 		break;
428 #endif
429 #ifdef INET6
430 	case AF_INET6:
431 		error = in6_gif_output(ifp, m, proto, ecn);
432 		break;
433 #endif
434 	default:
435 		m_freem(m);
436 	}
437 err:
438 	if (error)
439 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
440 	return (error);
441 }
442 
443 static void
444 gif_qflush(struct ifnet *ifp __unused)
445 {
446 
447 }
448 
449 int
450 gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
451 	struct route *ro)
452 {
453 	struct m_tag *mtag;
454 	uint32_t af;
455 	int gif_called;
456 	int error = 0;
457 #ifdef MAC
458 	error = mac_ifnet_check_transmit(ifp, m);
459 	if (error)
460 		goto err;
461 #endif
462 	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
463 	    (ifp->if_flags & IFF_UP) == 0) {
464 		error = ENETDOWN;
465 		goto err;
466 	}
467 
468 	/*
469 	 * gif may cause infinite recursion calls when misconfigured.
470 	 * We'll prevent this by detecting loops.
471 	 *
472 	 * High nesting level may cause stack exhaustion.
473 	 * We'll prevent this by introducing upper limit.
474 	 */
475 	gif_called = 1;
476 	mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, NULL);
477 	while (mtag != NULL) {
478 		if (*(struct ifnet **)(mtag + 1) == ifp) {
479 			log(LOG_NOTICE,
480 			    "gif_output: loop detected on %s\n",
481 			    (*(struct ifnet **)(mtag + 1))->if_xname);
482 			error = EIO;	/* is there better errno? */
483 			goto err;
484 		}
485 		mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, mtag);
486 		gif_called++;
487 	}
488 	if (gif_called > V_max_gif_nesting) {
489 		log(LOG_NOTICE,
490 		    "gif_output: recursively called too many times(%d)\n",
491 		    gif_called);
492 		error = EIO;	/* is there better errno? */
493 		goto err;
494 	}
495 	mtag = m_tag_alloc(MTAG_GIF, MTAG_GIF_CALLED, sizeof(struct ifnet *),
496 	    M_NOWAIT);
497 	if (mtag == NULL) {
498 		error = ENOMEM;
499 		goto err;
500 	}
501 	*(struct ifnet **)(mtag + 1) = ifp;
502 	m_tag_prepend(m, mtag);
503 
504 	m->m_flags &= ~(M_BCAST|M_MCAST);
505 	if (dst->sa_family == AF_UNSPEC)
506 		bcopy(dst->sa_data, &af, sizeof(af));
507 	else
508 		af = dst->sa_family;
509 	if (ifp->if_bridge)
510 		af = AF_LINK;
511 	/*
512 	 * Now save the af in the inbound pkt csum data, this is a cheat since
513 	 * we are using the inbound csum_data field to carry the af over to
514 	 * the gif_transmit() routine, avoiding using yet another mtag.
515 	 */
516 	m->m_pkthdr.csum_data = af;
517 	return (ifp->if_transmit(ifp, m));
518 err:
519 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
520 	m_freem(m);
521 	return (error);
522 }
523 
524 void
525 gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn)
526 {
527 	struct etherip_header *eip;
528 #ifdef INET
529 	struct ip *ip;
530 #endif
531 #ifdef INET6
532 	struct ip6_hdr *ip6;
533 	uint32_t t;
534 #endif
535 	struct gif_softc *sc;
536 	struct ether_header *eh;
537 	struct ifnet *oldifp;
538 	uint32_t gif_options;
539 	int isr, n, af;
540 
541 	if (ifp == NULL) {
542 		/* just in case */
543 		m_freem(m);
544 		return;
545 	}
546 	sc = ifp->if_softc;
547 	gif_options = sc->gif_options;
548 	m->m_pkthdr.rcvif = ifp;
549 	m_clrprotoflags(m);
550 	switch (proto) {
551 #ifdef INET
552 	case IPPROTO_IPV4:
553 		af = AF_INET;
554 		if (m->m_len < sizeof(struct ip))
555 			m = m_pullup(m, sizeof(struct ip));
556 		if (m == NULL)
557 			goto drop;
558 		ip = mtod(m, struct ip *);
559 		if (ip_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
560 		    ECN_NOCARE, &ecn, &ip->ip_tos) == 0) {
561 			m_freem(m);
562 			goto drop;
563 		}
564 		break;
565 #endif
566 #ifdef INET6
567 	case IPPROTO_IPV6:
568 		af = AF_INET6;
569 		if (m->m_len < sizeof(struct ip6_hdr))
570 			m = m_pullup(m, sizeof(struct ip6_hdr));
571 		if (m == NULL)
572 			goto drop;
573 		t = htonl((uint32_t)ecn << 20);
574 		ip6 = mtod(m, struct ip6_hdr *);
575 		if (ip6_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
576 		    ECN_NOCARE, &t, &ip6->ip6_flow) == 0) {
577 			m_freem(m);
578 			goto drop;
579 		}
580 		break;
581 #endif
582 	case IPPROTO_ETHERIP:
583 		af = AF_LINK;
584 		break;
585 	default:
586 		m_freem(m);
587 		goto drop;
588 	}
589 
590 #ifdef MAC
591 	mac_ifnet_create_mbuf(ifp, m);
592 #endif
593 
594 	if (bpf_peers_present(ifp->if_bpf)) {
595 		uint32_t af1 = af;
596 		bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
597 	}
598 
599 	if ((ifp->if_flags & IFF_MONITOR) != 0) {
600 		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
601 		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
602 		m_freem(m);
603 		return;
604 	}
605 
606 	if (ng_gif_input_p != NULL) {
607 		(*ng_gif_input_p)(ifp, &m, af);
608 		if (m == NULL)
609 			goto drop;
610 	}
611 
612 	/*
613 	 * Put the packet to the network layer input queue according to the
614 	 * specified address family.
615 	 * Note: older versions of gif_input directly called network layer
616 	 * input functions, e.g. ip6_input, here.  We changed the policy to
617 	 * prevent too many recursive calls of such input functions, which
618 	 * might cause kernel panic.  But the change may introduce another
619 	 * problem; if the input queue is full, packets are discarded.
620 	 * The kernel stack overflow really happened, and we believed
621 	 * queue-full rarely occurs, so we changed the policy.
622 	 */
623 	switch (af) {
624 #ifdef INET
625 	case AF_INET:
626 		isr = NETISR_IP;
627 		break;
628 #endif
629 #ifdef INET6
630 	case AF_INET6:
631 		isr = NETISR_IPV6;
632 		break;
633 #endif
634 	case AF_LINK:
635 		n = sizeof(struct etherip_header) + sizeof(struct ether_header);
636 		if (n > m->m_len)
637 			m = m_pullup(m, n);
638 		if (m == NULL)
639 			goto drop;
640 		eip = mtod(m, struct etherip_header *);
641 		/*
642 		 * GIF_ACCEPT_REVETHIP (enabled by default) intentionally
643 		 * accepts an EtherIP packet with revered version field in
644 		 * the header.  This is a knob for backward compatibility
645 		 * with FreeBSD 7.2R or prior.
646 		 */
647 		if (eip->eip_ver != ETHERIP_VERSION) {
648 			if ((gif_options & GIF_ACCEPT_REVETHIP) == 0 ||
649 			    eip->eip_resvl != ETHERIP_VERSION) {
650 				/* discard unknown versions */
651 				m_freem(m);
652 				goto drop;
653 			}
654 		}
655 		m_adj(m, sizeof(struct etherip_header));
656 
657 		m->m_flags &= ~(M_BCAST|M_MCAST);
658 		m->m_pkthdr.rcvif = ifp;
659 
660 		if (ifp->if_bridge) {
661 			oldifp = ifp;
662 			eh = mtod(m, struct ether_header *);
663 			if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
664 				if (ETHER_IS_BROADCAST(eh->ether_dhost))
665 					m->m_flags |= M_BCAST;
666 				else
667 					m->m_flags |= M_MCAST;
668 				if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
669 			}
670 			BRIDGE_INPUT(ifp, m);
671 
672 			if (m != NULL && ifp != oldifp) {
673 				/*
674 				 * The bridge gave us back itself or one of the
675 				 * members for which the frame is addressed.
676 				 */
677 				ether_demux(ifp, m);
678 				return;
679 			}
680 		}
681 		if (m != NULL)
682 			m_freem(m);
683 		return;
684 
685 	default:
686 		if (ng_gif_input_orphan_p != NULL)
687 			(*ng_gif_input_orphan_p)(ifp, m, af);
688 		else
689 			m_freem(m);
690 		return;
691 	}
692 
693 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
694 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
695 	M_SETFIB(m, ifp->if_fib);
696 	netisr_dispatch(isr, m);
697 	return;
698 drop:
699 	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
700 }
701 
702 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
703 int
704 gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
705 {
706 	GIF_RLOCK_TRACKER;
707 	struct ifreq *ifr = (struct ifreq*)data;
708 	struct sockaddr *dst, *src;
709 	struct gif_softc *sc;
710 #ifdef INET
711 	struct sockaddr_in *sin = NULL;
712 #endif
713 #ifdef INET6
714 	struct sockaddr_in6 *sin6 = NULL;
715 #endif
716 	u_int options;
717 	int error;
718 
719 	switch (cmd) {
720 	case SIOCSIFADDR:
721 		ifp->if_flags |= IFF_UP;
722 	case SIOCADDMULTI:
723 	case SIOCDELMULTI:
724 	case SIOCGIFMTU:
725 	case SIOCSIFFLAGS:
726 		return (0);
727 	case SIOCSIFMTU:
728 		if (ifr->ifr_mtu < GIF_MTU_MIN ||
729 		    ifr->ifr_mtu > GIF_MTU_MAX)
730 			return (EINVAL);
731 		else
732 			ifp->if_mtu = ifr->ifr_mtu;
733 		return (0);
734 	}
735 	sx_xlock(&gif_ioctl_sx);
736 	sc = ifp->if_softc;
737 	if (sc == NULL) {
738 		error = ENXIO;
739 		goto bad;
740 	}
741 	error = 0;
742 	switch (cmd) {
743 	case SIOCSIFPHYADDR:
744 #ifdef INET6
745 	case SIOCSIFPHYADDR_IN6:
746 #endif
747 		error = EINVAL;
748 		switch (cmd) {
749 #ifdef INET
750 		case SIOCSIFPHYADDR:
751 			src = (struct sockaddr *)
752 				&(((struct in_aliasreq *)data)->ifra_addr);
753 			dst = (struct sockaddr *)
754 				&(((struct in_aliasreq *)data)->ifra_dstaddr);
755 			break;
756 #endif
757 #ifdef INET6
758 		case SIOCSIFPHYADDR_IN6:
759 			src = (struct sockaddr *)
760 				&(((struct in6_aliasreq *)data)->ifra_addr);
761 			dst = (struct sockaddr *)
762 				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
763 			break;
764 #endif
765 		default:
766 			goto bad;
767 		}
768 		/* sa_family must be equal */
769 		if (src->sa_family != dst->sa_family ||
770 		    src->sa_len != dst->sa_len)
771 			goto bad;
772 
773 		/* validate sa_len */
774 		switch (src->sa_family) {
775 #ifdef INET
776 		case AF_INET:
777 			if (src->sa_len != sizeof(struct sockaddr_in))
778 				goto bad;
779 			break;
780 #endif
781 #ifdef INET6
782 		case AF_INET6:
783 			if (src->sa_len != sizeof(struct sockaddr_in6))
784 				goto bad;
785 			break;
786 #endif
787 		default:
788 			error = EAFNOSUPPORT;
789 			goto bad;
790 		}
791 		/* check sa_family looks sane for the cmd */
792 		error = EAFNOSUPPORT;
793 		switch (cmd) {
794 #ifdef INET
795 		case SIOCSIFPHYADDR:
796 			if (src->sa_family == AF_INET)
797 				break;
798 			goto bad;
799 #endif
800 #ifdef INET6
801 		case SIOCSIFPHYADDR_IN6:
802 			if (src->sa_family == AF_INET6)
803 				break;
804 			goto bad;
805 #endif
806 		}
807 		error = EADDRNOTAVAIL;
808 		switch (src->sa_family) {
809 #ifdef INET
810 		case AF_INET:
811 			if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
812 			    satosin(dst)->sin_addr.s_addr == INADDR_ANY)
813 				goto bad;
814 			break;
815 #endif
816 #ifdef INET6
817 		case AF_INET6:
818 			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
819 			    ||
820 			    IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
821 				goto bad;
822 			/*
823 			 * Check validity of the scope zone ID of the
824 			 * addresses, and convert it into the kernel
825 			 * internal form if necessary.
826 			 */
827 			error = sa6_embedscope(satosin6(src), 0);
828 			if (error != 0)
829 				goto bad;
830 			error = sa6_embedscope(satosin6(dst), 0);
831 			if (error != 0)
832 				goto bad;
833 #endif
834 		};
835 		error = gif_set_tunnel(ifp, src, dst);
836 		break;
837 	case SIOCDIFPHYADDR:
838 		gif_delete_tunnel(ifp);
839 		break;
840 	case SIOCGIFPSRCADDR:
841 	case SIOCGIFPDSTADDR:
842 #ifdef INET6
843 	case SIOCGIFPSRCADDR_IN6:
844 	case SIOCGIFPDSTADDR_IN6:
845 #endif
846 		if (sc->gif_family == 0) {
847 			error = EADDRNOTAVAIL;
848 			break;
849 		}
850 		GIF_RLOCK(sc);
851 		switch (cmd) {
852 #ifdef INET
853 		case SIOCGIFPSRCADDR:
854 		case SIOCGIFPDSTADDR:
855 			if (sc->gif_family != AF_INET) {
856 				error = EADDRNOTAVAIL;
857 				break;
858 			}
859 			sin = (struct sockaddr_in *)&ifr->ifr_addr;
860 			memset(sin, 0, sizeof(*sin));
861 			sin->sin_family = AF_INET;
862 			sin->sin_len = sizeof(*sin);
863 			break;
864 #endif
865 #ifdef INET6
866 		case SIOCGIFPSRCADDR_IN6:
867 		case SIOCGIFPDSTADDR_IN6:
868 			if (sc->gif_family != AF_INET6) {
869 				error = EADDRNOTAVAIL;
870 				break;
871 			}
872 			sin6 = (struct sockaddr_in6 *)
873 				&(((struct in6_ifreq *)data)->ifr_addr);
874 			memset(sin6, 0, sizeof(*sin6));
875 			sin6->sin6_family = AF_INET6;
876 			sin6->sin6_len = sizeof(*sin6);
877 			break;
878 #endif
879 		default:
880 			error = EAFNOSUPPORT;
881 		}
882 		if (error == 0) {
883 			switch (cmd) {
884 #ifdef INET
885 			case SIOCGIFPSRCADDR:
886 				sin->sin_addr = sc->gif_iphdr->ip_src;
887 				break;
888 			case SIOCGIFPDSTADDR:
889 				sin->sin_addr = sc->gif_iphdr->ip_dst;
890 				break;
891 #endif
892 #ifdef INET6
893 			case SIOCGIFPSRCADDR_IN6:
894 				sin6->sin6_addr = sc->gif_ip6hdr->ip6_src;
895 				break;
896 			case SIOCGIFPDSTADDR_IN6:
897 				sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst;
898 				break;
899 #endif
900 			}
901 		}
902 		GIF_RUNLOCK(sc);
903 		if (error != 0)
904 			break;
905 		switch (cmd) {
906 #ifdef INET
907 		case SIOCGIFPSRCADDR:
908 		case SIOCGIFPDSTADDR:
909 			error = prison_if(curthread->td_ucred,
910 			    (struct sockaddr *)sin);
911 			if (error != 0)
912 				memset(sin, 0, sizeof(*sin));
913 			break;
914 #endif
915 #ifdef INET6
916 		case SIOCGIFPSRCADDR_IN6:
917 		case SIOCGIFPDSTADDR_IN6:
918 			error = prison_if(curthread->td_ucred,
919 			    (struct sockaddr *)sin6);
920 			if (error == 0)
921 				error = sa6_recoverscope(sin6);
922 			if (error != 0)
923 				memset(sin6, 0, sizeof(*sin6));
924 #endif
925 		}
926 		break;
927 	case GIFGOPTS:
928 		options = sc->gif_options;
929 		error = copyout(&options, ifr->ifr_data, sizeof(options));
930 		break;
931 	case GIFSOPTS:
932 		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
933 			break;
934 		error = copyin(ifr->ifr_data, &options, sizeof(options));
935 		if (error)
936 			break;
937 		if (options & ~GIF_OPTMASK)
938 			error = EINVAL;
939 		else
940 			sc->gif_options = options;
941 		break;
942 
943 	default:
944 		error = EINVAL;
945 		break;
946 	}
947 bad:
948 	sx_xunlock(&gif_ioctl_sx);
949 	return (error);
950 }
951 
952 static void
953 gif_detach(struct gif_softc *sc)
954 {
955 
956 	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
957 	if (sc->gif_ecookie != NULL)
958 		encap_detach(sc->gif_ecookie);
959 	sc->gif_ecookie = NULL;
960 }
961 
962 static int
963 gif_attach(struct gif_softc *sc, int af)
964 {
965 
966 	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
967 	switch (af) {
968 #ifdef INET
969 	case AF_INET:
970 		return (in_gif_attach(sc));
971 #endif
972 #ifdef INET6
973 	case AF_INET6:
974 		return (in6_gif_attach(sc));
975 #endif
976 	}
977 	return (EAFNOSUPPORT);
978 }
979 
980 static int
981 gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
982 {
983 	struct gif_softc *sc = ifp->if_softc;
984 	struct gif_softc *tsc;
985 #ifdef INET
986 	struct ip *ip;
987 #endif
988 #ifdef INET6
989 	struct ip6_hdr *ip6;
990 #endif
991 	void *hdr;
992 	int error = 0;
993 
994 	if (sc == NULL)
995 		return (ENXIO);
996 	/* Disallow parallel tunnels unless instructed otherwise. */
997 	if (V_parallel_tunnels == 0) {
998 		GIF_LIST_LOCK();
999 		LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) {
1000 			if (tsc == sc || tsc->gif_family != src->sa_family)
1001 				continue;
1002 #ifdef INET
1003 			if (tsc->gif_family == AF_INET &&
1004 			    tsc->gif_iphdr->ip_src.s_addr ==
1005 			    satosin(src)->sin_addr.s_addr &&
1006 			    tsc->gif_iphdr->ip_dst.s_addr ==
1007 			    satosin(dst)->sin_addr.s_addr) {
1008 				error = EADDRNOTAVAIL;
1009 				GIF_LIST_UNLOCK();
1010 				goto bad;
1011 			}
1012 #endif
1013 #ifdef INET6
1014 			if (tsc->gif_family == AF_INET6 &&
1015 			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src,
1016 			    &satosin6(src)->sin6_addr) &&
1017 			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst,
1018 			    &satosin6(dst)->sin6_addr)) {
1019 				error = EADDRNOTAVAIL;
1020 				GIF_LIST_UNLOCK();
1021 				goto bad;
1022 			}
1023 #endif
1024 		}
1025 		GIF_LIST_UNLOCK();
1026 	}
1027 	switch (src->sa_family) {
1028 #ifdef INET
1029 	case AF_INET:
1030 		hdr = ip = malloc(sizeof(struct ip), M_GIF,
1031 		    M_WAITOK | M_ZERO);
1032 		ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr;
1033 		ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr;
1034 		break;
1035 #endif
1036 #ifdef INET6
1037 	case AF_INET6:
1038 		hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF,
1039 		    M_WAITOK | M_ZERO);
1040 		ip6->ip6_src = satosin6(src)->sin6_addr;
1041 		ip6->ip6_dst = satosin6(dst)->sin6_addr;
1042 		ip6->ip6_vfc = IPV6_VERSION;
1043 		break;
1044 #endif
1045 	default:
1046 		return (EAFNOSUPPORT);
1047 	};
1048 
1049 	if (sc->gif_family != src->sa_family)
1050 		gif_detach(sc);
1051 	if (sc->gif_family == 0 ||
1052 	    sc->gif_family != src->sa_family)
1053 		error = gif_attach(sc, src->sa_family);
1054 
1055 	GIF_WLOCK(sc);
1056 	if (sc->gif_family != 0)
1057 		free(sc->gif_hdr, M_GIF);
1058 	sc->gif_family = src->sa_family;
1059 	sc->gif_hdr = hdr;
1060 	GIF_WUNLOCK(sc);
1061 #if defined(INET) || defined(INET6)
1062 bad:
1063 #endif
1064 	if (error == 0 && sc->gif_family != 0)
1065 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
1066 	else
1067 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1068 	return (error);
1069 }
1070 
1071 static void
1072 gif_delete_tunnel(struct ifnet *ifp)
1073 {
1074 	struct gif_softc *sc = ifp->if_softc;
1075 	int family;
1076 
1077 	if (sc == NULL)
1078 		return;
1079 
1080 	GIF_WLOCK(sc);
1081 	family = sc->gif_family;
1082 	sc->gif_family = 0;
1083 	GIF_WUNLOCK(sc);
1084 	if (family != 0) {
1085 		gif_detach(sc);
1086 		free(sc->gif_hdr, M_GIF);
1087 	}
1088 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1089 }
1090