xref: /freebsd/sys/net/if_gif.c (revision 38d120bc13ac1de5b739b67b87016b9122149374)
1 /*-
2  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the project nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	$KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/malloc.h>
44 #include <sys/mbuf.h>
45 #include <sys/module.h>
46 #include <sys/rmlock.h>
47 #include <sys/socket.h>
48 #include <sys/sockio.h>
49 #include <sys/sx.h>
50 #include <sys/errno.h>
51 #include <sys/time.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/priv.h>
55 #include <sys/proc.h>
56 #include <sys/protosw.h>
57 #include <sys/conf.h>
58 #include <machine/cpu.h>
59 
60 #include <net/if.h>
61 #include <net/if_var.h>
62 #include <net/if_clone.h>
63 #include <net/if_types.h>
64 #include <net/netisr.h>
65 #include <net/route.h>
66 #include <net/bpf.h>
67 #include <net/vnet.h>
68 
69 #include <netinet/in.h>
70 #include <netinet/in_systm.h>
71 #include <netinet/ip.h>
72 #include <netinet/ip_ecn.h>
73 #ifdef	INET
74 #include <netinet/in_var.h>
75 #include <netinet/in_gif.h>
76 #include <netinet/ip_var.h>
77 #endif	/* INET */
78 
79 #ifdef INET6
80 #ifndef INET
81 #include <netinet/in.h>
82 #endif
83 #include <netinet6/in6_var.h>
84 #include <netinet/ip6.h>
85 #include <netinet6/ip6_ecn.h>
86 #include <netinet6/ip6_var.h>
87 #include <netinet6/scope6_var.h>
88 #include <netinet6/in6_gif.h>
89 #include <netinet6/ip6protosw.h>
90 #endif /* INET6 */
91 
92 #include <netinet/ip_encap.h>
93 #include <net/ethernet.h>
94 #include <net/if_bridgevar.h>
95 #include <net/if_gif.h>
96 
97 #include <security/mac/mac_framework.h>
98 
99 static const char gifname[] = "gif";
100 
101 /*
102  * gif_mtx protects a per-vnet gif_softc_list.
103  */
104 static VNET_DEFINE(struct mtx, gif_mtx);
105 #define	V_gif_mtx		VNET(gif_mtx)
106 static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
107 static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
108 #define	V_gif_softc_list	VNET(gif_softc_list)
109 static struct sx gif_ioctl_sx;
110 SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
111 
112 #define	GIF_LIST_LOCK_INIT(x)		mtx_init(&V_gif_mtx, "gif_mtx", \
113 					    NULL, MTX_DEF)
114 #define	GIF_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_gif_mtx)
115 #define	GIF_LIST_LOCK(x)		mtx_lock(&V_gif_mtx)
116 #define	GIF_LIST_UNLOCK(x)		mtx_unlock(&V_gif_mtx)
117 
118 void	(*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
119 void	(*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
120 void	(*ng_gif_attach_p)(struct ifnet *ifp);
121 void	(*ng_gif_detach_p)(struct ifnet *ifp);
122 
123 static int	gif_set_tunnel(struct ifnet *, struct sockaddr *,
124     struct sockaddr *);
125 static void	gif_delete_tunnel(struct ifnet *);
126 static int	gif_ioctl(struct ifnet *, u_long, caddr_t);
127 static int	gif_transmit(struct ifnet *, struct mbuf *);
128 static void	gif_qflush(struct ifnet *);
129 static int	gif_clone_create(struct if_clone *, int, caddr_t);
130 static void	gif_clone_destroy(struct ifnet *);
131 static VNET_DEFINE(struct if_clone *, gif_cloner);
132 #define	V_gif_cloner	VNET(gif_cloner)
133 
134 static int gifmodevent(module_t, int, void *);
135 
136 SYSCTL_DECL(_net_link);
137 static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
138     "Generic Tunnel Interface");
139 #ifndef MAX_GIF_NEST
140 /*
141  * This macro controls the default upper limitation on nesting of gif tunnels.
142  * Since, setting a large value to this macro with a careless configuration
143  * may introduce system crash, we don't allow any nestings by default.
144  * If you need to configure nested gif tunnels, you can define this macro
145  * in your kernel configuration file.  However, if you do so, please be
146  * careful to configure the tunnels so that it won't make a loop.
147  */
148 #define MAX_GIF_NEST 1
149 #endif
150 static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST;
151 #define	V_max_gif_nesting	VNET(max_gif_nesting)
152 SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW,
153     &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
154 
155 /*
156  * By default, we disallow creation of multiple tunnels between the same
157  * pair of addresses.  Some applications require this functionality so
158  * we allow control over this check here.
159  */
160 #ifdef XBONEHACK
161 static VNET_DEFINE(int, parallel_tunnels) = 1;
162 #else
163 static VNET_DEFINE(int, parallel_tunnels) = 0;
164 #endif
165 #define	V_parallel_tunnels	VNET(parallel_tunnels)
166 SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels,
167     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(parallel_tunnels), 0,
168     "Allow parallel tunnels?");
169 
170 /* copy from src/sys/net/if_ethersubr.c */
171 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
172 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
173 #ifndef ETHER_IS_BROADCAST
174 #define ETHER_IS_BROADCAST(addr) \
175 	(bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
176 #endif
177 
178 static int
179 gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
180 {
181 	struct gif_softc *sc;
182 
183 	sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
184 	sc->gif_fibnum = curthread->td_proc->p_fibnum;
185 	GIF2IFP(sc) = if_alloc(IFT_GIF);
186 	GIF_LOCK_INIT(sc);
187 	GIF2IFP(sc)->if_softc = sc;
188 	if_initname(GIF2IFP(sc), gifname, unit);
189 
190 	GIF2IFP(sc)->if_addrlen = 0;
191 	GIF2IFP(sc)->if_mtu    = GIF_MTU;
192 	GIF2IFP(sc)->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
193 #if 0
194 	/* turn off ingress filter */
195 	GIF2IFP(sc)->if_flags  |= IFF_LINK2;
196 #endif
197 	GIF2IFP(sc)->if_ioctl  = gif_ioctl;
198 	GIF2IFP(sc)->if_transmit  = gif_transmit;
199 	GIF2IFP(sc)->if_qflush  = gif_qflush;
200 	GIF2IFP(sc)->if_output = gif_output;
201 	if_attach(GIF2IFP(sc));
202 	bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
203 	if (ng_gif_attach_p != NULL)
204 		(*ng_gif_attach_p)(GIF2IFP(sc));
205 
206 	GIF_LIST_LOCK();
207 	LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
208 	GIF_LIST_UNLOCK();
209 	return (0);
210 }
211 
212 static void
213 gif_clone_destroy(struct ifnet *ifp)
214 {
215 	struct gif_softc *sc;
216 
217 	sx_xlock(&gif_ioctl_sx);
218 	sc = ifp->if_softc;
219 	gif_delete_tunnel(ifp);
220 	GIF_LIST_LOCK();
221 	LIST_REMOVE(sc, gif_list);
222 	GIF_LIST_UNLOCK();
223 	if (ng_gif_detach_p != NULL)
224 		(*ng_gif_detach_p)(ifp);
225 	bpfdetach(ifp);
226 	if_detach(ifp);
227 	ifp->if_softc = NULL;
228 	sx_xunlock(&gif_ioctl_sx);
229 
230 	if_free(ifp);
231 	GIF_LOCK_DESTROY(sc);
232 	free(sc, M_GIF);
233 }
234 
235 static void
236 vnet_gif_init(const void *unused __unused)
237 {
238 
239 	LIST_INIT(&V_gif_softc_list);
240 	GIF_LIST_LOCK_INIT();
241 	V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
242 	    gif_clone_destroy, 0);
243 }
244 VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
245     vnet_gif_init, NULL);
246 
247 static void
248 vnet_gif_uninit(const void *unused __unused)
249 {
250 
251 	if_clone_detach(V_gif_cloner);
252 	GIF_LIST_LOCK_DESTROY();
253 }
254 VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
255     vnet_gif_uninit, NULL);
256 
257 static int
258 gifmodevent(module_t mod, int type, void *data)
259 {
260 
261 	switch (type) {
262 	case MOD_LOAD:
263 	case MOD_UNLOAD:
264 		break;
265 	default:
266 		return (EOPNOTSUPP);
267 	}
268 	return (0);
269 }
270 
271 static moduledata_t gif_mod = {
272 	"if_gif",
273 	gifmodevent,
274 	0
275 };
276 
277 DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
278 MODULE_VERSION(if_gif, 1);
279 
280 int
281 gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
282 {
283 	GIF_RLOCK_TRACKER;
284 	struct gif_softc *sc;
285 	int ret;
286 	uint8_t ver;
287 
288 	sc = (struct gif_softc *)arg;
289 	if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0)
290 		return (0);
291 
292 	ret = 0;
293 	GIF_RLOCK(sc);
294 
295 	/* no physical address */
296 	if (sc->gif_family == 0)
297 		goto done;
298 
299 	switch (proto) {
300 #ifdef INET
301 	case IPPROTO_IPV4:
302 #endif
303 #ifdef INET6
304 	case IPPROTO_IPV6:
305 #endif
306 	case IPPROTO_ETHERIP:
307 		break;
308 	default:
309 		goto done;
310 	}
311 
312 	/* Bail on short packets */
313 	if (m->m_pkthdr.len < sizeof(struct ip))
314 		goto done;
315 
316 	m_copydata(m, 0, 1, &ver);
317 	switch (ver >> 4) {
318 #ifdef INET
319 	case 4:
320 		if (sc->gif_family != AF_INET)
321 			goto done;
322 		ret = in_gif_encapcheck(m, off, proto, arg);
323 		break;
324 #endif
325 #ifdef INET6
326 	case 6:
327 		if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
328 			goto done;
329 		if (sc->gif_family != AF_INET6)
330 			goto done;
331 		ret = in6_gif_encapcheck(m, off, proto, arg);
332 		break;
333 #endif
334 	}
335 done:
336 	GIF_RUNLOCK(sc);
337 	return (ret);
338 }
339 
340 static int
341 gif_transmit(struct ifnet *ifp, struct mbuf *m)
342 {
343 	struct gif_softc *sc;
344 	struct etherip_header *eth;
345 #ifdef INET
346 	struct ip *ip;
347 #endif
348 #ifdef INET6
349 	struct ip6_hdr *ip6;
350 	uint32_t t;
351 #endif
352 	uint32_t af;
353 	uint8_t proto, ecn;
354 	int error;
355 
356 	error = ENETDOWN;
357 	sc = ifp->if_softc;
358 	if (sc->gif_family == 0) {
359 		m_freem(m);
360 		goto err;
361 	}
362 	/* Now pull back the af that we stashed in the csum_data. */
363 	af = m->m_pkthdr.csum_data;
364 	BPF_MTAP2(ifp, &af, sizeof(af), m);
365 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
366 	if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
367 	M_SETFIB(m, sc->gif_fibnum);
368 	/* inner AF-specific encapsulation */
369 	ecn = 0;
370 	switch (af) {
371 #ifdef INET
372 	case AF_INET:
373 		proto = IPPROTO_IPV4;
374 		if (m->m_len < sizeof(struct ip))
375 			m = m_pullup(m, sizeof(struct ip));
376 		if (m == NULL) {
377 			error = ENOBUFS;
378 			goto err;
379 		}
380 		ip = mtod(m, struct ip *);
381 		ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
382 		    ECN_NOCARE, &ecn, &ip->ip_tos);
383 		break;
384 #endif
385 #ifdef INET6
386 	case AF_INET6:
387 		proto = IPPROTO_IPV6;
388 		if (m->m_len < sizeof(struct ip6_hdr))
389 			m = m_pullup(m, sizeof(struct ip6_hdr));
390 		if (m == NULL) {
391 			error = ENOBUFS;
392 			goto err;
393 		}
394 		t = 0;
395 		ip6 = mtod(m, struct ip6_hdr *);
396 		ip6_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
397 		    ECN_NOCARE, &t, &ip6->ip6_flow);
398 		ecn = (ntohl(t) >> 20) & 0xff;
399 		break;
400 #endif
401 	case AF_LINK:
402 		proto = IPPROTO_ETHERIP;
403 		M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT);
404 		if (m == NULL) {
405 			error = ENOBUFS;
406 			goto err;
407 		}
408 		eth = mtod(m, struct etherip_header *);
409 		eth->eip_resvh = 0;
410 		if ((sc->gif_options & GIF_SEND_REVETHIP) != 0) {
411 			eth->eip_ver = 0;
412 			eth->eip_resvl = ETHERIP_VERSION;
413 		} else {
414 			eth->eip_ver = ETHERIP_VERSION;
415 			eth->eip_resvl = 0;
416 		}
417 		break;
418 	default:
419 		error = EAFNOSUPPORT;
420 		m_freem(m);
421 		goto err;
422 	}
423 	/* XXX should we check if our outer source is legal? */
424 	/* dispatch to output logic based on outer AF */
425 	switch (sc->gif_family) {
426 #ifdef INET
427 	case AF_INET:
428 		error = in_gif_output(ifp, m, proto, ecn);
429 		break;
430 #endif
431 #ifdef INET6
432 	case AF_INET6:
433 		error = in6_gif_output(ifp, m, proto, ecn);
434 		break;
435 #endif
436 	default:
437 		m_freem(m);
438 	}
439 err:
440 	if (error)
441 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
442 	return (error);
443 }
444 
445 static void
446 gif_qflush(struct ifnet *ifp __unused)
447 {
448 
449 }
450 
451 int
452 gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
453 	struct route *ro)
454 {
455 	struct m_tag *mtag;
456 	uint32_t af;
457 	int gif_called;
458 	int error = 0;
459 #ifdef MAC
460 	error = mac_ifnet_check_transmit(ifp, m);
461 	if (error)
462 		goto err;
463 #endif
464 	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
465 	    (ifp->if_flags & IFF_UP) == 0) {
466 		error = ENETDOWN;
467 		goto err;
468 	}
469 
470 	/*
471 	 * gif may cause infinite recursion calls when misconfigured.
472 	 * We'll prevent this by detecting loops.
473 	 *
474 	 * High nesting level may cause stack exhaustion.
475 	 * We'll prevent this by introducing upper limit.
476 	 */
477 	gif_called = 1;
478 	mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, NULL);
479 	while (mtag != NULL) {
480 		if (*(struct ifnet **)(mtag + 1) == ifp) {
481 			log(LOG_NOTICE,
482 			    "gif_output: loop detected on %s\n",
483 			    (*(struct ifnet **)(mtag + 1))->if_xname);
484 			error = EIO;	/* is there better errno? */
485 			goto err;
486 		}
487 		mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, mtag);
488 		gif_called++;
489 	}
490 	if (gif_called > V_max_gif_nesting) {
491 		log(LOG_NOTICE,
492 		    "gif_output: recursively called too many times(%d)\n",
493 		    gif_called);
494 		error = EIO;	/* is there better errno? */
495 		goto err;
496 	}
497 	mtag = m_tag_alloc(MTAG_GIF, MTAG_GIF_CALLED, sizeof(struct ifnet *),
498 	    M_NOWAIT);
499 	if (mtag == NULL) {
500 		error = ENOMEM;
501 		goto err;
502 	}
503 	*(struct ifnet **)(mtag + 1) = ifp;
504 	m_tag_prepend(m, mtag);
505 
506 	m->m_flags &= ~(M_BCAST|M_MCAST);
507 	if (dst->sa_family == AF_UNSPEC)
508 		bcopy(dst->sa_data, &af, sizeof(af));
509 	else
510 		af = dst->sa_family;
511 	if (ifp->if_bridge)
512 		af = AF_LINK;
513 	/*
514 	 * Now save the af in the inbound pkt csum data, this is a cheat since
515 	 * we are using the inbound csum_data field to carry the af over to
516 	 * the gif_transmit() routine, avoiding using yet another mtag.
517 	 */
518 	m->m_pkthdr.csum_data = af;
519 	return (ifp->if_transmit(ifp, m));
520 err:
521 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
522 	m_freem(m);
523 	return (error);
524 }
525 
526 void
527 gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn)
528 {
529 	struct etherip_header *eip;
530 #ifdef INET
531 	struct ip *ip;
532 #endif
533 #ifdef INET6
534 	struct ip6_hdr *ip6;
535 	uint32_t t;
536 #endif
537 	struct gif_softc *sc;
538 	struct ether_header *eh;
539 	struct ifnet *oldifp;
540 	uint32_t gif_options;
541 	int isr, n, af;
542 
543 	if (ifp == NULL) {
544 		/* just in case */
545 		m_freem(m);
546 		return;
547 	}
548 	sc = ifp->if_softc;
549 	gif_options = sc->gif_options;
550 	m->m_pkthdr.rcvif = ifp;
551 	m_clrprotoflags(m);
552 	switch (proto) {
553 #ifdef INET
554 	case IPPROTO_IPV4:
555 		af = AF_INET;
556 		if (m->m_len < sizeof(struct ip))
557 			m = m_pullup(m, sizeof(struct ip));
558 		if (m == NULL)
559 			goto drop;
560 		ip = mtod(m, struct ip *);
561 		if (ip_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
562 		    ECN_NOCARE, &ecn, &ip->ip_tos) == 0) {
563 			m_freem(m);
564 			goto drop;
565 		}
566 		break;
567 #endif
568 #ifdef INET6
569 	case IPPROTO_IPV6:
570 		af = AF_INET6;
571 		if (m->m_len < sizeof(struct ip6_hdr))
572 			m = m_pullup(m, sizeof(struct ip6_hdr));
573 		if (m == NULL)
574 			goto drop;
575 		t = htonl((uint32_t)ecn << 20);
576 		ip6 = mtod(m, struct ip6_hdr *);
577 		if (ip6_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
578 		    ECN_NOCARE, &t, &ip6->ip6_flow) == 0) {
579 			m_freem(m);
580 			goto drop;
581 		}
582 		break;
583 #endif
584 	case IPPROTO_ETHERIP:
585 		af = AF_LINK;
586 		break;
587 	default:
588 		m_freem(m);
589 		goto drop;
590 	}
591 
592 #ifdef MAC
593 	mac_ifnet_create_mbuf(ifp, m);
594 #endif
595 
596 	if (bpf_peers_present(ifp->if_bpf)) {
597 		uint32_t af1 = af;
598 		bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
599 	}
600 
601 	if ((ifp->if_flags & IFF_MONITOR) != 0) {
602 		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
603 		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
604 		m_freem(m);
605 		return;
606 	}
607 
608 	if (ng_gif_input_p != NULL) {
609 		(*ng_gif_input_p)(ifp, &m, af);
610 		if (m == NULL)
611 			goto drop;
612 	}
613 
614 	/*
615 	 * Put the packet to the network layer input queue according to the
616 	 * specified address family.
617 	 * Note: older versions of gif_input directly called network layer
618 	 * input functions, e.g. ip6_input, here.  We changed the policy to
619 	 * prevent too many recursive calls of such input functions, which
620 	 * might cause kernel panic.  But the change may introduce another
621 	 * problem; if the input queue is full, packets are discarded.
622 	 * The kernel stack overflow really happened, and we believed
623 	 * queue-full rarely occurs, so we changed the policy.
624 	 */
625 	switch (af) {
626 #ifdef INET
627 	case AF_INET:
628 		isr = NETISR_IP;
629 		break;
630 #endif
631 #ifdef INET6
632 	case AF_INET6:
633 		isr = NETISR_IPV6;
634 		break;
635 #endif
636 	case AF_LINK:
637 		n = sizeof(struct etherip_header) + sizeof(struct ether_header);
638 		if (n > m->m_len)
639 			m = m_pullup(m, n);
640 		if (m == NULL)
641 			goto drop;
642 		eip = mtod(m, struct etherip_header *);
643 		/*
644 		 * GIF_ACCEPT_REVETHIP (enabled by default) intentionally
645 		 * accepts an EtherIP packet with revered version field in
646 		 * the header.  This is a knob for backward compatibility
647 		 * with FreeBSD 7.2R or prior.
648 		 */
649 		if (eip->eip_ver != ETHERIP_VERSION) {
650 			if ((gif_options & GIF_ACCEPT_REVETHIP) == 0 ||
651 			    eip->eip_resvl != ETHERIP_VERSION) {
652 				/* discard unknown versions */
653 				m_freem(m);
654 				goto drop;
655 			}
656 		}
657 		m_adj(m, sizeof(struct etherip_header));
658 
659 		m->m_flags &= ~(M_BCAST|M_MCAST);
660 		m->m_pkthdr.rcvif = ifp;
661 
662 		if (ifp->if_bridge) {
663 			oldifp = ifp;
664 			eh = mtod(m, struct ether_header *);
665 			if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
666 				if (ETHER_IS_BROADCAST(eh->ether_dhost))
667 					m->m_flags |= M_BCAST;
668 				else
669 					m->m_flags |= M_MCAST;
670 				if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
671 			}
672 			BRIDGE_INPUT(ifp, m);
673 
674 			if (m != NULL && ifp != oldifp) {
675 				/*
676 				 * The bridge gave us back itself or one of the
677 				 * members for which the frame is addressed.
678 				 */
679 				ether_demux(ifp, m);
680 				return;
681 			}
682 		}
683 		if (m != NULL)
684 			m_freem(m);
685 		return;
686 
687 	default:
688 		if (ng_gif_input_orphan_p != NULL)
689 			(*ng_gif_input_orphan_p)(ifp, m, af);
690 		else
691 			m_freem(m);
692 		return;
693 	}
694 
695 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
696 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
697 	M_SETFIB(m, ifp->if_fib);
698 	netisr_dispatch(isr, m);
699 	return;
700 drop:
701 	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
702 }
703 
704 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
705 int
706 gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
707 {
708 	GIF_RLOCK_TRACKER;
709 	struct ifreq *ifr = (struct ifreq*)data;
710 	struct sockaddr *dst, *src;
711 	struct gif_softc *sc;
712 #ifdef INET
713 	struct sockaddr_in *sin = NULL;
714 #endif
715 #ifdef INET6
716 	struct sockaddr_in6 *sin6 = NULL;
717 #endif
718 	u_int options;
719 	int error;
720 
721 	switch (cmd) {
722 	case SIOCSIFADDR:
723 		ifp->if_flags |= IFF_UP;
724 	case SIOCADDMULTI:
725 	case SIOCDELMULTI:
726 	case SIOCGIFMTU:
727 	case SIOCSIFFLAGS:
728 		return (0);
729 	case SIOCSIFMTU:
730 		if (ifr->ifr_mtu < GIF_MTU_MIN ||
731 		    ifr->ifr_mtu > GIF_MTU_MAX)
732 			return (EINVAL);
733 		else
734 			ifp->if_mtu = ifr->ifr_mtu;
735 		return (0);
736 	}
737 	sx_xlock(&gif_ioctl_sx);
738 	sc = ifp->if_softc;
739 	if (sc == NULL) {
740 		error = ENXIO;
741 		goto bad;
742 	}
743 	error = 0;
744 	switch (cmd) {
745 	case SIOCSIFPHYADDR:
746 #ifdef INET6
747 	case SIOCSIFPHYADDR_IN6:
748 #endif
749 		error = EINVAL;
750 		switch (cmd) {
751 #ifdef INET
752 		case SIOCSIFPHYADDR:
753 			src = (struct sockaddr *)
754 				&(((struct in_aliasreq *)data)->ifra_addr);
755 			dst = (struct sockaddr *)
756 				&(((struct in_aliasreq *)data)->ifra_dstaddr);
757 			break;
758 #endif
759 #ifdef INET6
760 		case SIOCSIFPHYADDR_IN6:
761 			src = (struct sockaddr *)
762 				&(((struct in6_aliasreq *)data)->ifra_addr);
763 			dst = (struct sockaddr *)
764 				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
765 			break;
766 #endif
767 		default:
768 			goto bad;
769 		}
770 		/* sa_family must be equal */
771 		if (src->sa_family != dst->sa_family ||
772 		    src->sa_len != dst->sa_len)
773 			goto bad;
774 
775 		/* validate sa_len */
776 		switch (src->sa_family) {
777 #ifdef INET
778 		case AF_INET:
779 			if (src->sa_len != sizeof(struct sockaddr_in))
780 				goto bad;
781 			break;
782 #endif
783 #ifdef INET6
784 		case AF_INET6:
785 			if (src->sa_len != sizeof(struct sockaddr_in6))
786 				goto bad;
787 			break;
788 #endif
789 		default:
790 			error = EAFNOSUPPORT;
791 			goto bad;
792 		}
793 		/* check sa_family looks sane for the cmd */
794 		error = EAFNOSUPPORT;
795 		switch (cmd) {
796 #ifdef INET
797 		case SIOCSIFPHYADDR:
798 			if (src->sa_family == AF_INET)
799 				break;
800 			goto bad;
801 #endif
802 #ifdef INET6
803 		case SIOCSIFPHYADDR_IN6:
804 			if (src->sa_family == AF_INET6)
805 				break;
806 			goto bad;
807 #endif
808 		}
809 		error = EADDRNOTAVAIL;
810 		switch (src->sa_family) {
811 #ifdef INET
812 		case AF_INET:
813 			if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
814 			    satosin(dst)->sin_addr.s_addr == INADDR_ANY)
815 				goto bad;
816 			break;
817 #endif
818 #ifdef INET6
819 		case AF_INET6:
820 			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
821 			    ||
822 			    IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
823 				goto bad;
824 			/*
825 			 * Check validity of the scope zone ID of the
826 			 * addresses, and convert it into the kernel
827 			 * internal form if necessary.
828 			 */
829 			error = sa6_embedscope(satosin6(src), 0);
830 			if (error != 0)
831 				goto bad;
832 			error = sa6_embedscope(satosin6(dst), 0);
833 			if (error != 0)
834 				goto bad;
835 #endif
836 		};
837 		error = gif_set_tunnel(ifp, src, dst);
838 		break;
839 	case SIOCDIFPHYADDR:
840 		gif_delete_tunnel(ifp);
841 		break;
842 	case SIOCGIFPSRCADDR:
843 	case SIOCGIFPDSTADDR:
844 #ifdef INET6
845 	case SIOCGIFPSRCADDR_IN6:
846 	case SIOCGIFPDSTADDR_IN6:
847 #endif
848 		if (sc->gif_family == 0) {
849 			error = EADDRNOTAVAIL;
850 			break;
851 		}
852 		GIF_RLOCK(sc);
853 		switch (cmd) {
854 #ifdef INET
855 		case SIOCGIFPSRCADDR:
856 		case SIOCGIFPDSTADDR:
857 			if (sc->gif_family != AF_INET) {
858 				error = EADDRNOTAVAIL;
859 				break;
860 			}
861 			sin = (struct sockaddr_in *)&ifr->ifr_addr;
862 			memset(sin, 0, sizeof(*sin));
863 			sin->sin_family = AF_INET;
864 			sin->sin_len = sizeof(*sin);
865 			break;
866 #endif
867 #ifdef INET6
868 		case SIOCGIFPSRCADDR_IN6:
869 		case SIOCGIFPDSTADDR_IN6:
870 			if (sc->gif_family != AF_INET6) {
871 				error = EADDRNOTAVAIL;
872 				break;
873 			}
874 			sin6 = (struct sockaddr_in6 *)
875 				&(((struct in6_ifreq *)data)->ifr_addr);
876 			memset(sin6, 0, sizeof(*sin6));
877 			sin6->sin6_family = AF_INET6;
878 			sin6->sin6_len = sizeof(*sin6);
879 			break;
880 #endif
881 		default:
882 			error = EAFNOSUPPORT;
883 		}
884 		if (error == 0) {
885 			switch (cmd) {
886 #ifdef INET
887 			case SIOCGIFPSRCADDR:
888 				sin->sin_addr = sc->gif_iphdr->ip_src;
889 				break;
890 			case SIOCGIFPDSTADDR:
891 				sin->sin_addr = sc->gif_iphdr->ip_dst;
892 				break;
893 #endif
894 #ifdef INET6
895 			case SIOCGIFPSRCADDR_IN6:
896 				sin6->sin6_addr = sc->gif_ip6hdr->ip6_src;
897 				break;
898 			case SIOCGIFPDSTADDR_IN6:
899 				sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst;
900 				break;
901 #endif
902 			}
903 		}
904 		GIF_RUNLOCK(sc);
905 		if (error != 0)
906 			break;
907 		switch (cmd) {
908 #ifdef INET
909 		case SIOCGIFPSRCADDR:
910 		case SIOCGIFPDSTADDR:
911 			error = prison_if(curthread->td_ucred,
912 			    (struct sockaddr *)sin);
913 			if (error != 0)
914 				memset(sin, 0, sizeof(*sin));
915 			break;
916 #endif
917 #ifdef INET6
918 		case SIOCGIFPSRCADDR_IN6:
919 		case SIOCGIFPDSTADDR_IN6:
920 			error = prison_if(curthread->td_ucred,
921 			    (struct sockaddr *)sin6);
922 			if (error == 0)
923 				error = sa6_recoverscope(sin6);
924 			if (error != 0)
925 				memset(sin6, 0, sizeof(*sin6));
926 #endif
927 		}
928 		break;
929 	case GIFGOPTS:
930 		options = sc->gif_options;
931 		error = copyout(&options, ifr->ifr_data, sizeof(options));
932 		break;
933 	case GIFSOPTS:
934 		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
935 			break;
936 		error = copyin(ifr->ifr_data, &options, sizeof(options));
937 		if (error)
938 			break;
939 		if (options & ~GIF_OPTMASK)
940 			error = EINVAL;
941 		else
942 			sc->gif_options = options;
943 		break;
944 
945 	default:
946 		error = EINVAL;
947 		break;
948 	}
949 bad:
950 	sx_xunlock(&gif_ioctl_sx);
951 	return (error);
952 }
953 
954 static void
955 gif_detach(struct gif_softc *sc)
956 {
957 
958 	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
959 	if (sc->gif_ecookie != NULL)
960 		encap_detach(sc->gif_ecookie);
961 	sc->gif_ecookie = NULL;
962 }
963 
964 static int
965 gif_attach(struct gif_softc *sc, int af)
966 {
967 
968 	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
969 	switch (af) {
970 #ifdef INET
971 	case AF_INET:
972 		return (in_gif_attach(sc));
973 #endif
974 #ifdef INET6
975 	case AF_INET6:
976 		return (in6_gif_attach(sc));
977 #endif
978 	}
979 	return (EAFNOSUPPORT);
980 }
981 
982 static int
983 gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
984 {
985 	struct gif_softc *sc = ifp->if_softc;
986 	struct gif_softc *tsc;
987 #ifdef INET
988 	struct ip *ip;
989 #endif
990 #ifdef INET6
991 	struct ip6_hdr *ip6;
992 #endif
993 	void *hdr;
994 	int error = 0;
995 
996 	if (sc == NULL)
997 		return (ENXIO);
998 	/* Disallow parallel tunnels unless instructed otherwise. */
999 	if (V_parallel_tunnels == 0) {
1000 		GIF_LIST_LOCK();
1001 		LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) {
1002 			if (tsc == sc || tsc->gif_family != src->sa_family)
1003 				continue;
1004 #ifdef INET
1005 			if (tsc->gif_family == AF_INET &&
1006 			    tsc->gif_iphdr->ip_src.s_addr ==
1007 			    satosin(src)->sin_addr.s_addr &&
1008 			    tsc->gif_iphdr->ip_dst.s_addr ==
1009 			    satosin(dst)->sin_addr.s_addr) {
1010 				error = EADDRNOTAVAIL;
1011 				GIF_LIST_UNLOCK();
1012 				goto bad;
1013 			}
1014 #endif
1015 #ifdef INET6
1016 			if (tsc->gif_family == AF_INET6 &&
1017 			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src,
1018 			    &satosin6(src)->sin6_addr) &&
1019 			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst,
1020 			    &satosin6(dst)->sin6_addr)) {
1021 				error = EADDRNOTAVAIL;
1022 				GIF_LIST_UNLOCK();
1023 				goto bad;
1024 			}
1025 #endif
1026 		}
1027 		GIF_LIST_UNLOCK();
1028 	}
1029 	switch (src->sa_family) {
1030 #ifdef INET
1031 	case AF_INET:
1032 		hdr = ip = malloc(sizeof(struct ip), M_GIF,
1033 		    M_WAITOK | M_ZERO);
1034 		ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr;
1035 		ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr;
1036 		break;
1037 #endif
1038 #ifdef INET6
1039 	case AF_INET6:
1040 		hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF,
1041 		    M_WAITOK | M_ZERO);
1042 		ip6->ip6_src = satosin6(src)->sin6_addr;
1043 		ip6->ip6_dst = satosin6(dst)->sin6_addr;
1044 		ip6->ip6_vfc = IPV6_VERSION;
1045 		break;
1046 #endif
1047 	default:
1048 		return (EAFNOSUPPORT);
1049 	};
1050 
1051 	if (sc->gif_family != src->sa_family)
1052 		gif_detach(sc);
1053 	if (sc->gif_family == 0 ||
1054 	    sc->gif_family != src->sa_family)
1055 		error = gif_attach(sc, src->sa_family);
1056 
1057 	GIF_WLOCK(sc);
1058 	if (sc->gif_family != 0)
1059 		free(sc->gif_hdr, M_GIF);
1060 	sc->gif_family = src->sa_family;
1061 	sc->gif_hdr = hdr;
1062 	GIF_WUNLOCK(sc);
1063 #if defined(INET) || defined(INET6)
1064 bad:
1065 #endif
1066 	if (error == 0 && sc->gif_family != 0)
1067 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
1068 	else
1069 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1070 	return (error);
1071 }
1072 
1073 static void
1074 gif_delete_tunnel(struct ifnet *ifp)
1075 {
1076 	struct gif_softc *sc = ifp->if_softc;
1077 	int family;
1078 
1079 	if (sc == NULL)
1080 		return;
1081 
1082 	GIF_WLOCK(sc);
1083 	family = sc->gif_family;
1084 	sc->gif_family = 0;
1085 	GIF_WUNLOCK(sc);
1086 	if (family != 0) {
1087 		gif_detach(sc);
1088 		free(sc->gif_hdr, M_GIF);
1089 	}
1090 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1091 }
1092