xref: /freebsd/sys/net/if_gif.c (revision 98e0ffaefb0f241cda3a72395d3be04192ae0d47)
1 /*-
2  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the project nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	$KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/malloc.h>
44 #include <sys/mbuf.h>
45 #include <sys/module.h>
46 #include <sys/rmlock.h>
47 #include <sys/socket.h>
48 #include <sys/sockio.h>
49 #include <sys/sx.h>
50 #include <sys/errno.h>
51 #include <sys/time.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/priv.h>
55 #include <sys/proc.h>
56 #include <sys/protosw.h>
57 #include <sys/conf.h>
58 #include <machine/cpu.h>
59 
60 #include <net/if.h>
61 #include <net/if_var.h>
62 #include <net/if_clone.h>
63 #include <net/if_types.h>
64 #include <net/netisr.h>
65 #include <net/route.h>
66 #include <net/bpf.h>
67 #include <net/vnet.h>
68 
69 #include <netinet/in.h>
70 #include <netinet/in_systm.h>
71 #include <netinet/ip.h>
72 #include <netinet/ip_ecn.h>
73 #ifdef	INET
74 #include <netinet/in_var.h>
75 #include <netinet/ip_var.h>
76 #endif	/* INET */
77 
78 #ifdef INET6
79 #ifndef INET
80 #include <netinet/in.h>
81 #endif
82 #include <netinet6/in6_var.h>
83 #include <netinet/ip6.h>
84 #include <netinet6/ip6_ecn.h>
85 #include <netinet6/ip6_var.h>
86 #include <netinet6/scope6_var.h>
87 #include <netinet6/ip6protosw.h>
88 #endif /* INET6 */
89 
90 #include <netinet/ip_encap.h>
91 #include <net/ethernet.h>
92 #include <net/if_bridgevar.h>
93 #include <net/if_gif.h>
94 
95 #include <security/mac/mac_framework.h>
96 
97 static const char gifname[] = "gif";
98 
99 /*
100  * gif_mtx protects a per-vnet gif_softc_list.
101  */
102 static VNET_DEFINE(struct mtx, gif_mtx);
103 #define	V_gif_mtx		VNET(gif_mtx)
104 static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
105 static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
106 #define	V_gif_softc_list	VNET(gif_softc_list)
107 static struct sx gif_ioctl_sx;
108 SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
109 
110 #define	GIF_LIST_LOCK_INIT(x)		mtx_init(&V_gif_mtx, "gif_mtx", \
111 					    NULL, MTX_DEF)
112 #define	GIF_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_gif_mtx)
113 #define	GIF_LIST_LOCK(x)		mtx_lock(&V_gif_mtx)
114 #define	GIF_LIST_UNLOCK(x)		mtx_unlock(&V_gif_mtx)
115 
116 void	(*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
117 void	(*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
118 void	(*ng_gif_attach_p)(struct ifnet *ifp);
119 void	(*ng_gif_detach_p)(struct ifnet *ifp);
120 
121 static int	gif_check_nesting(struct ifnet *, struct mbuf *);
122 static int	gif_set_tunnel(struct ifnet *, struct sockaddr *,
123     struct sockaddr *);
124 static void	gif_delete_tunnel(struct ifnet *);
125 static int	gif_ioctl(struct ifnet *, u_long, caddr_t);
126 static int	gif_transmit(struct ifnet *, struct mbuf *);
127 static void	gif_qflush(struct ifnet *);
128 static int	gif_clone_create(struct if_clone *, int, caddr_t);
129 static void	gif_clone_destroy(struct ifnet *);
130 static VNET_DEFINE(struct if_clone *, gif_cloner);
131 #define	V_gif_cloner	VNET(gif_cloner)
132 
133 static int gifmodevent(module_t, int, void *);
134 
135 SYSCTL_DECL(_net_link);
136 static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
137     "Generic Tunnel Interface");
138 #ifndef MAX_GIF_NEST
139 /*
140  * This macro controls the default upper limitation on nesting of gif tunnels.
141  * Since, setting a large value to this macro with a careless configuration
142  * may introduce system crash, we don't allow any nestings by default.
143  * If you need to configure nested gif tunnels, you can define this macro
144  * in your kernel configuration file.  However, if you do so, please be
145  * careful to configure the tunnels so that it won't make a loop.
146  */
147 #define MAX_GIF_NEST 1
148 #endif
149 static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST;
150 #define	V_max_gif_nesting	VNET(max_gif_nesting)
151 SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW,
152     &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
153 
154 /*
155  * By default, we disallow creation of multiple tunnels between the same
156  * pair of addresses.  Some applications require this functionality so
157  * we allow control over this check here.
158  */
159 #ifdef XBONEHACK
160 static VNET_DEFINE(int, parallel_tunnels) = 1;
161 #else
162 static VNET_DEFINE(int, parallel_tunnels) = 0;
163 #endif
164 #define	V_parallel_tunnels	VNET(parallel_tunnels)
165 SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels,
166     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(parallel_tunnels), 0,
167     "Allow parallel tunnels?");
168 
169 /* copy from src/sys/net/if_ethersubr.c */
170 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
171 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
172 #ifndef ETHER_IS_BROADCAST
173 #define ETHER_IS_BROADCAST(addr) \
174 	(bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
175 #endif
176 
177 static int
178 gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
179 {
180 	struct gif_softc *sc;
181 
182 	sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
183 	sc->gif_fibnum = curthread->td_proc->p_fibnum;
184 	GIF2IFP(sc) = if_alloc(IFT_GIF);
185 	GIF_LOCK_INIT(sc);
186 	GIF2IFP(sc)->if_softc = sc;
187 	if_initname(GIF2IFP(sc), gifname, unit);
188 
189 	GIF2IFP(sc)->if_addrlen = 0;
190 	GIF2IFP(sc)->if_mtu    = GIF_MTU;
191 	GIF2IFP(sc)->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
192 #if 0
193 	/* turn off ingress filter */
194 	GIF2IFP(sc)->if_flags  |= IFF_LINK2;
195 #endif
196 	GIF2IFP(sc)->if_ioctl  = gif_ioctl;
197 	GIF2IFP(sc)->if_transmit  = gif_transmit;
198 	GIF2IFP(sc)->if_qflush  = gif_qflush;
199 	GIF2IFP(sc)->if_output = gif_output;
200 	if_attach(GIF2IFP(sc));
201 	bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
202 	if (ng_gif_attach_p != NULL)
203 		(*ng_gif_attach_p)(GIF2IFP(sc));
204 
205 	GIF_LIST_LOCK();
206 	LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
207 	GIF_LIST_UNLOCK();
208 	return (0);
209 }
210 
211 static void
212 gif_clone_destroy(struct ifnet *ifp)
213 {
214 	struct gif_softc *sc;
215 
216 	sx_xlock(&gif_ioctl_sx);
217 	sc = ifp->if_softc;
218 	gif_delete_tunnel(ifp);
219 	GIF_LIST_LOCK();
220 	LIST_REMOVE(sc, gif_list);
221 	GIF_LIST_UNLOCK();
222 	if (ng_gif_detach_p != NULL)
223 		(*ng_gif_detach_p)(ifp);
224 	bpfdetach(ifp);
225 	if_detach(ifp);
226 	ifp->if_softc = NULL;
227 	sx_xunlock(&gif_ioctl_sx);
228 
229 	if_free(ifp);
230 	GIF_LOCK_DESTROY(sc);
231 	free(sc, M_GIF);
232 }
233 
234 static void
235 vnet_gif_init(const void *unused __unused)
236 {
237 
238 	LIST_INIT(&V_gif_softc_list);
239 	GIF_LIST_LOCK_INIT();
240 	V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
241 	    gif_clone_destroy, 0);
242 }
243 VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
244     vnet_gif_init, NULL);
245 
246 static void
247 vnet_gif_uninit(const void *unused __unused)
248 {
249 
250 	if_clone_detach(V_gif_cloner);
251 	GIF_LIST_LOCK_DESTROY();
252 }
253 VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
254     vnet_gif_uninit, NULL);
255 
256 static int
257 gifmodevent(module_t mod, int type, void *data)
258 {
259 
260 	switch (type) {
261 	case MOD_LOAD:
262 	case MOD_UNLOAD:
263 		break;
264 	default:
265 		return (EOPNOTSUPP);
266 	}
267 	return (0);
268 }
269 
270 static moduledata_t gif_mod = {
271 	"if_gif",
272 	gifmodevent,
273 	0
274 };
275 
276 DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
277 MODULE_VERSION(if_gif, 1);
278 
279 int
280 gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
281 {
282 	GIF_RLOCK_TRACKER;
283 	struct gif_softc *sc;
284 	int ret;
285 	uint8_t ver;
286 
287 	sc = (struct gif_softc *)arg;
288 	if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0)
289 		return (0);
290 
291 	ret = 0;
292 	GIF_RLOCK(sc);
293 
294 	/* no physical address */
295 	if (sc->gif_family == 0)
296 		goto done;
297 
298 	switch (proto) {
299 #ifdef INET
300 	case IPPROTO_IPV4:
301 #endif
302 #ifdef INET6
303 	case IPPROTO_IPV6:
304 #endif
305 	case IPPROTO_ETHERIP:
306 		break;
307 	default:
308 		goto done;
309 	}
310 
311 	/* Bail on short packets */
312 	if (m->m_pkthdr.len < sizeof(struct ip))
313 		goto done;
314 
315 	m_copydata(m, 0, 1, &ver);
316 	switch (ver >> 4) {
317 #ifdef INET
318 	case 4:
319 		if (sc->gif_family != AF_INET)
320 			goto done;
321 		ret = in_gif_encapcheck(m, off, proto, arg);
322 		break;
323 #endif
324 #ifdef INET6
325 	case 6:
326 		if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
327 			goto done;
328 		if (sc->gif_family != AF_INET6)
329 			goto done;
330 		ret = in6_gif_encapcheck(m, off, proto, arg);
331 		break;
332 #endif
333 	}
334 done:
335 	GIF_RUNLOCK(sc);
336 	return (ret);
337 }
338 
339 static int
340 gif_transmit(struct ifnet *ifp, struct mbuf *m)
341 {
342 	struct gif_softc *sc;
343 	struct etherip_header *eth;
344 #ifdef INET
345 	struct ip *ip;
346 #endif
347 #ifdef INET6
348 	struct ip6_hdr *ip6;
349 	uint32_t t;
350 #endif
351 	uint32_t af;
352 	uint8_t proto, ecn;
353 	int error;
354 
355 #ifdef MAC
356 	error = mac_ifnet_check_transmit(ifp, m);
357 	if (error) {
358 		m_freem(m);
359 		goto err;
360 	}
361 #endif
362 	error = ENETDOWN;
363 	sc = ifp->if_softc;
364 	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
365 	    (ifp->if_flags & IFF_UP) == 0 ||
366 	    sc->gif_family == 0 ||
367 	    (error = gif_check_nesting(ifp, m)) != 0) {
368 		m_freem(m);
369 		goto err;
370 	}
371 	/* Now pull back the af that we stashed in the csum_data. */
372 	if (ifp->if_bridge)
373 		af = AF_LINK;
374 	else
375 		af = m->m_pkthdr.csum_data;
376 	m->m_flags &= ~(M_BCAST|M_MCAST);
377 	M_SETFIB(m, sc->gif_fibnum);
378 	BPF_MTAP2(ifp, &af, sizeof(af), m);
379 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
380 	if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
381 	/* inner AF-specific encapsulation */
382 	ecn = 0;
383 	switch (af) {
384 #ifdef INET
385 	case AF_INET:
386 		proto = IPPROTO_IPV4;
387 		if (m->m_len < sizeof(struct ip))
388 			m = m_pullup(m, sizeof(struct ip));
389 		if (m == NULL) {
390 			error = ENOBUFS;
391 			goto err;
392 		}
393 		ip = mtod(m, struct ip *);
394 		ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
395 		    ECN_NOCARE, &ecn, &ip->ip_tos);
396 		break;
397 #endif
398 #ifdef INET6
399 	case AF_INET6:
400 		proto = IPPROTO_IPV6;
401 		if (m->m_len < sizeof(struct ip6_hdr))
402 			m = m_pullup(m, sizeof(struct ip6_hdr));
403 		if (m == NULL) {
404 			error = ENOBUFS;
405 			goto err;
406 		}
407 		t = 0;
408 		ip6 = mtod(m, struct ip6_hdr *);
409 		ip6_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
410 		    ECN_NOCARE, &t, &ip6->ip6_flow);
411 		ecn = (ntohl(t) >> 20) & 0xff;
412 		break;
413 #endif
414 	case AF_LINK:
415 		proto = IPPROTO_ETHERIP;
416 		M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT);
417 		if (m == NULL) {
418 			error = ENOBUFS;
419 			goto err;
420 		}
421 		eth = mtod(m, struct etherip_header *);
422 		eth->eip_resvh = 0;
423 		if ((sc->gif_options & GIF_SEND_REVETHIP) != 0) {
424 			eth->eip_ver = 0;
425 			eth->eip_resvl = ETHERIP_VERSION;
426 		} else {
427 			eth->eip_ver = ETHERIP_VERSION;
428 			eth->eip_resvl = 0;
429 		}
430 		break;
431 	default:
432 		error = EAFNOSUPPORT;
433 		m_freem(m);
434 		goto err;
435 	}
436 	/* XXX should we check if our outer source is legal? */
437 	/* dispatch to output logic based on outer AF */
438 	switch (sc->gif_family) {
439 #ifdef INET
440 	case AF_INET:
441 		error = in_gif_output(ifp, m, proto, ecn);
442 		break;
443 #endif
444 #ifdef INET6
445 	case AF_INET6:
446 		error = in6_gif_output(ifp, m, proto, ecn);
447 		break;
448 #endif
449 	default:
450 		m_freem(m);
451 	}
452 err:
453 	if (error)
454 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
455 	return (error);
456 }
457 
458 static void
459 gif_qflush(struct ifnet *ifp __unused)
460 {
461 
462 }
463 
464 #define	MTAG_GIF	1080679712
465 static int
466 gif_check_nesting(struct ifnet *ifp, struct mbuf *m)
467 {
468 	struct m_tag *mtag;
469 	int count;
470 
471 	/*
472 	 * gif may cause infinite recursion calls when misconfigured.
473 	 * We'll prevent this by detecting loops.
474 	 *
475 	 * High nesting level may cause stack exhaustion.
476 	 * We'll prevent this by introducing upper limit.
477 	 */
478 	count = 1;
479 	mtag = NULL;
480 	while ((mtag = m_tag_locate(m, MTAG_GIF, 0, mtag)) != NULL) {
481 		if (*(struct ifnet **)(mtag + 1) == ifp) {
482 			log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp));
483 			return (EIO);
484 		}
485 		count++;
486 	}
487 	if (count > V_max_gif_nesting) {
488 		log(LOG_NOTICE,
489 		    "%s: if_output recursively called too many times(%d)\n",
490 		    if_name(ifp), count);
491 		return (EIO);
492 	}
493 	mtag = m_tag_alloc(MTAG_GIF, 0, sizeof(struct ifnet *), M_NOWAIT);
494 	if (mtag == NULL)
495 		return (ENOMEM);
496 	*(struct ifnet **)(mtag + 1) = ifp;
497 	m_tag_prepend(m, mtag);
498 	return (0);
499 }
500 
501 int
502 gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
503 	struct route *ro)
504 {
505 	uint32_t af;
506 
507 	if (dst->sa_family == AF_UNSPEC)
508 		bcopy(dst->sa_data, &af, sizeof(af));
509 	else
510 		af = dst->sa_family;
511 	/*
512 	 * Now save the af in the inbound pkt csum data, this is a cheat since
513 	 * we are using the inbound csum_data field to carry the af over to
514 	 * the gif_transmit() routine, avoiding using yet another mtag.
515 	 */
516 	m->m_pkthdr.csum_data = af;
517 	return (ifp->if_transmit(ifp, m));
518 }
519 
520 void
521 gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn)
522 {
523 	struct etherip_header *eip;
524 #ifdef INET
525 	struct ip *ip;
526 #endif
527 #ifdef INET6
528 	struct ip6_hdr *ip6;
529 	uint32_t t;
530 #endif
531 	struct gif_softc *sc;
532 	struct ether_header *eh;
533 	struct ifnet *oldifp;
534 	uint32_t gif_options;
535 	int isr, n, af;
536 
537 	if (ifp == NULL) {
538 		/* just in case */
539 		m_freem(m);
540 		return;
541 	}
542 	sc = ifp->if_softc;
543 	gif_options = sc->gif_options;
544 	m->m_pkthdr.rcvif = ifp;
545 	m_clrprotoflags(m);
546 	switch (proto) {
547 #ifdef INET
548 	case IPPROTO_IPV4:
549 		af = AF_INET;
550 		if (m->m_len < sizeof(struct ip))
551 			m = m_pullup(m, sizeof(struct ip));
552 		if (m == NULL)
553 			goto drop;
554 		ip = mtod(m, struct ip *);
555 		if (ip_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
556 		    ECN_NOCARE, &ecn, &ip->ip_tos) == 0) {
557 			m_freem(m);
558 			goto drop;
559 		}
560 		break;
561 #endif
562 #ifdef INET6
563 	case IPPROTO_IPV6:
564 		af = AF_INET6;
565 		if (m->m_len < sizeof(struct ip6_hdr))
566 			m = m_pullup(m, sizeof(struct ip6_hdr));
567 		if (m == NULL)
568 			goto drop;
569 		t = htonl((uint32_t)ecn << 20);
570 		ip6 = mtod(m, struct ip6_hdr *);
571 		if (ip6_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
572 		    ECN_NOCARE, &t, &ip6->ip6_flow) == 0) {
573 			m_freem(m);
574 			goto drop;
575 		}
576 		break;
577 #endif
578 	case IPPROTO_ETHERIP:
579 		af = AF_LINK;
580 		break;
581 	default:
582 		m_freem(m);
583 		goto drop;
584 	}
585 
586 #ifdef MAC
587 	mac_ifnet_create_mbuf(ifp, m);
588 #endif
589 
590 	if (bpf_peers_present(ifp->if_bpf)) {
591 		uint32_t af1 = af;
592 		bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
593 	}
594 
595 	if ((ifp->if_flags & IFF_MONITOR) != 0) {
596 		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
597 		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
598 		m_freem(m);
599 		return;
600 	}
601 
602 	if (ng_gif_input_p != NULL) {
603 		(*ng_gif_input_p)(ifp, &m, af);
604 		if (m == NULL)
605 			goto drop;
606 	}
607 
608 	/*
609 	 * Put the packet to the network layer input queue according to the
610 	 * specified address family.
611 	 * Note: older versions of gif_input directly called network layer
612 	 * input functions, e.g. ip6_input, here.  We changed the policy to
613 	 * prevent too many recursive calls of such input functions, which
614 	 * might cause kernel panic.  But the change may introduce another
615 	 * problem; if the input queue is full, packets are discarded.
616 	 * The kernel stack overflow really happened, and we believed
617 	 * queue-full rarely occurs, so we changed the policy.
618 	 */
619 	switch (af) {
620 #ifdef INET
621 	case AF_INET:
622 		isr = NETISR_IP;
623 		break;
624 #endif
625 #ifdef INET6
626 	case AF_INET6:
627 		isr = NETISR_IPV6;
628 		break;
629 #endif
630 	case AF_LINK:
631 		n = sizeof(struct etherip_header) + sizeof(struct ether_header);
632 		if (n > m->m_len)
633 			m = m_pullup(m, n);
634 		if (m == NULL)
635 			goto drop;
636 		eip = mtod(m, struct etherip_header *);
637 		/*
638 		 * GIF_ACCEPT_REVETHIP (enabled by default) intentionally
639 		 * accepts an EtherIP packet with revered version field in
640 		 * the header.  This is a knob for backward compatibility
641 		 * with FreeBSD 7.2R or prior.
642 		 */
643 		if (eip->eip_ver != ETHERIP_VERSION) {
644 			if ((gif_options & GIF_ACCEPT_REVETHIP) == 0 ||
645 			    eip->eip_resvl != ETHERIP_VERSION) {
646 				/* discard unknown versions */
647 				m_freem(m);
648 				goto drop;
649 			}
650 		}
651 		m_adj(m, sizeof(struct etherip_header));
652 
653 		m->m_flags &= ~(M_BCAST|M_MCAST);
654 		m->m_pkthdr.rcvif = ifp;
655 
656 		if (ifp->if_bridge) {
657 			oldifp = ifp;
658 			eh = mtod(m, struct ether_header *);
659 			if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
660 				if (ETHER_IS_BROADCAST(eh->ether_dhost))
661 					m->m_flags |= M_BCAST;
662 				else
663 					m->m_flags |= M_MCAST;
664 				if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
665 			}
666 			BRIDGE_INPUT(ifp, m);
667 
668 			if (m != NULL && ifp != oldifp) {
669 				/*
670 				 * The bridge gave us back itself or one of the
671 				 * members for which the frame is addressed.
672 				 */
673 				ether_demux(ifp, m);
674 				return;
675 			}
676 		}
677 		if (m != NULL)
678 			m_freem(m);
679 		return;
680 
681 	default:
682 		if (ng_gif_input_orphan_p != NULL)
683 			(*ng_gif_input_orphan_p)(ifp, m, af);
684 		else
685 			m_freem(m);
686 		return;
687 	}
688 
689 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
690 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
691 	M_SETFIB(m, ifp->if_fib);
692 	netisr_dispatch(isr, m);
693 	return;
694 drop:
695 	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
696 }
697 
698 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
699 int
700 gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
701 {
702 	GIF_RLOCK_TRACKER;
703 	struct ifreq *ifr = (struct ifreq*)data;
704 	struct sockaddr *dst, *src;
705 	struct gif_softc *sc;
706 #ifdef INET
707 	struct sockaddr_in *sin = NULL;
708 #endif
709 #ifdef INET6
710 	struct sockaddr_in6 *sin6 = NULL;
711 #endif
712 	u_int options;
713 	int error;
714 
715 	switch (cmd) {
716 	case SIOCSIFADDR:
717 		ifp->if_flags |= IFF_UP;
718 	case SIOCADDMULTI:
719 	case SIOCDELMULTI:
720 	case SIOCGIFMTU:
721 	case SIOCSIFFLAGS:
722 		return (0);
723 	case SIOCSIFMTU:
724 		if (ifr->ifr_mtu < GIF_MTU_MIN ||
725 		    ifr->ifr_mtu > GIF_MTU_MAX)
726 			return (EINVAL);
727 		else
728 			ifp->if_mtu = ifr->ifr_mtu;
729 		return (0);
730 	}
731 	sx_xlock(&gif_ioctl_sx);
732 	sc = ifp->if_softc;
733 	if (sc == NULL) {
734 		error = ENXIO;
735 		goto bad;
736 	}
737 	error = 0;
738 	switch (cmd) {
739 	case SIOCSIFPHYADDR:
740 #ifdef INET6
741 	case SIOCSIFPHYADDR_IN6:
742 #endif
743 		error = EINVAL;
744 		switch (cmd) {
745 #ifdef INET
746 		case SIOCSIFPHYADDR:
747 			src = (struct sockaddr *)
748 				&(((struct in_aliasreq *)data)->ifra_addr);
749 			dst = (struct sockaddr *)
750 				&(((struct in_aliasreq *)data)->ifra_dstaddr);
751 			break;
752 #endif
753 #ifdef INET6
754 		case SIOCSIFPHYADDR_IN6:
755 			src = (struct sockaddr *)
756 				&(((struct in6_aliasreq *)data)->ifra_addr);
757 			dst = (struct sockaddr *)
758 				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
759 			break;
760 #endif
761 		default:
762 			goto bad;
763 		}
764 		/* sa_family must be equal */
765 		if (src->sa_family != dst->sa_family ||
766 		    src->sa_len != dst->sa_len)
767 			goto bad;
768 
769 		/* validate sa_len */
770 		switch (src->sa_family) {
771 #ifdef INET
772 		case AF_INET:
773 			if (src->sa_len != sizeof(struct sockaddr_in))
774 				goto bad;
775 			break;
776 #endif
777 #ifdef INET6
778 		case AF_INET6:
779 			if (src->sa_len != sizeof(struct sockaddr_in6))
780 				goto bad;
781 			break;
782 #endif
783 		default:
784 			error = EAFNOSUPPORT;
785 			goto bad;
786 		}
787 		/* check sa_family looks sane for the cmd */
788 		error = EAFNOSUPPORT;
789 		switch (cmd) {
790 #ifdef INET
791 		case SIOCSIFPHYADDR:
792 			if (src->sa_family == AF_INET)
793 				break;
794 			goto bad;
795 #endif
796 #ifdef INET6
797 		case SIOCSIFPHYADDR_IN6:
798 			if (src->sa_family == AF_INET6)
799 				break;
800 			goto bad;
801 #endif
802 		}
803 		error = EADDRNOTAVAIL;
804 		switch (src->sa_family) {
805 #ifdef INET
806 		case AF_INET:
807 			if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
808 			    satosin(dst)->sin_addr.s_addr == INADDR_ANY)
809 				goto bad;
810 			break;
811 #endif
812 #ifdef INET6
813 		case AF_INET6:
814 			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
815 			    ||
816 			    IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
817 				goto bad;
818 			/*
819 			 * Check validity of the scope zone ID of the
820 			 * addresses, and convert it into the kernel
821 			 * internal form if necessary.
822 			 */
823 			error = sa6_embedscope(satosin6(src), 0);
824 			if (error != 0)
825 				goto bad;
826 			error = sa6_embedscope(satosin6(dst), 0);
827 			if (error != 0)
828 				goto bad;
829 #endif
830 		};
831 		error = gif_set_tunnel(ifp, src, dst);
832 		break;
833 	case SIOCDIFPHYADDR:
834 		gif_delete_tunnel(ifp);
835 		break;
836 	case SIOCGIFPSRCADDR:
837 	case SIOCGIFPDSTADDR:
838 #ifdef INET6
839 	case SIOCGIFPSRCADDR_IN6:
840 	case SIOCGIFPDSTADDR_IN6:
841 #endif
842 		if (sc->gif_family == 0) {
843 			error = EADDRNOTAVAIL;
844 			break;
845 		}
846 		GIF_RLOCK(sc);
847 		switch (cmd) {
848 #ifdef INET
849 		case SIOCGIFPSRCADDR:
850 		case SIOCGIFPDSTADDR:
851 			if (sc->gif_family != AF_INET) {
852 				error = EADDRNOTAVAIL;
853 				break;
854 			}
855 			sin = (struct sockaddr_in *)&ifr->ifr_addr;
856 			memset(sin, 0, sizeof(*sin));
857 			sin->sin_family = AF_INET;
858 			sin->sin_len = sizeof(*sin);
859 			break;
860 #endif
861 #ifdef INET6
862 		case SIOCGIFPSRCADDR_IN6:
863 		case SIOCGIFPDSTADDR_IN6:
864 			if (sc->gif_family != AF_INET6) {
865 				error = EADDRNOTAVAIL;
866 				break;
867 			}
868 			sin6 = (struct sockaddr_in6 *)
869 				&(((struct in6_ifreq *)data)->ifr_addr);
870 			memset(sin6, 0, sizeof(*sin6));
871 			sin6->sin6_family = AF_INET6;
872 			sin6->sin6_len = sizeof(*sin6);
873 			break;
874 #endif
875 		default:
876 			error = EAFNOSUPPORT;
877 		}
878 		if (error == 0) {
879 			switch (cmd) {
880 #ifdef INET
881 			case SIOCGIFPSRCADDR:
882 				sin->sin_addr = sc->gif_iphdr->ip_src;
883 				break;
884 			case SIOCGIFPDSTADDR:
885 				sin->sin_addr = sc->gif_iphdr->ip_dst;
886 				break;
887 #endif
888 #ifdef INET6
889 			case SIOCGIFPSRCADDR_IN6:
890 				sin6->sin6_addr = sc->gif_ip6hdr->ip6_src;
891 				break;
892 			case SIOCGIFPDSTADDR_IN6:
893 				sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst;
894 				break;
895 #endif
896 			}
897 		}
898 		GIF_RUNLOCK(sc);
899 		if (error != 0)
900 			break;
901 		switch (cmd) {
902 #ifdef INET
903 		case SIOCGIFPSRCADDR:
904 		case SIOCGIFPDSTADDR:
905 			error = prison_if(curthread->td_ucred,
906 			    (struct sockaddr *)sin);
907 			if (error != 0)
908 				memset(sin, 0, sizeof(*sin));
909 			break;
910 #endif
911 #ifdef INET6
912 		case SIOCGIFPSRCADDR_IN6:
913 		case SIOCGIFPDSTADDR_IN6:
914 			error = prison_if(curthread->td_ucred,
915 			    (struct sockaddr *)sin6);
916 			if (error == 0)
917 				error = sa6_recoverscope(sin6);
918 			if (error != 0)
919 				memset(sin6, 0, sizeof(*sin6));
920 #endif
921 		}
922 		break;
923 	case SIOCGTUNFIB:
924 		ifr->ifr_fib = sc->gif_fibnum;
925 		break;
926 	case SIOCSTUNFIB:
927 		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
928 			break;
929 		if (ifr->ifr_fib >= rt_numfibs)
930 			error = EINVAL;
931 		else
932 			sc->gif_fibnum = ifr->ifr_fib;
933 		break;
934 	case GIFGOPTS:
935 		options = sc->gif_options;
936 		error = copyout(&options, ifr->ifr_data, sizeof(options));
937 		break;
938 	case GIFSOPTS:
939 		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
940 			break;
941 		error = copyin(ifr->ifr_data, &options, sizeof(options));
942 		if (error)
943 			break;
944 		if (options & ~GIF_OPTMASK)
945 			error = EINVAL;
946 		else
947 			sc->gif_options = options;
948 		break;
949 	default:
950 		error = EINVAL;
951 		break;
952 	}
953 bad:
954 	sx_xunlock(&gif_ioctl_sx);
955 	return (error);
956 }
957 
958 static void
959 gif_detach(struct gif_softc *sc)
960 {
961 
962 	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
963 	if (sc->gif_ecookie != NULL)
964 		encap_detach(sc->gif_ecookie);
965 	sc->gif_ecookie = NULL;
966 }
967 
968 static int
969 gif_attach(struct gif_softc *sc, int af)
970 {
971 
972 	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
973 	switch (af) {
974 #ifdef INET
975 	case AF_INET:
976 		return (in_gif_attach(sc));
977 #endif
978 #ifdef INET6
979 	case AF_INET6:
980 		return (in6_gif_attach(sc));
981 #endif
982 	}
983 	return (EAFNOSUPPORT);
984 }
985 
986 static int
987 gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
988 {
989 	struct gif_softc *sc = ifp->if_softc;
990 	struct gif_softc *tsc;
991 #ifdef INET
992 	struct ip *ip;
993 #endif
994 #ifdef INET6
995 	struct ip6_hdr *ip6;
996 #endif
997 	void *hdr;
998 	int error = 0;
999 
1000 	if (sc == NULL)
1001 		return (ENXIO);
1002 	/* Disallow parallel tunnels unless instructed otherwise. */
1003 	if (V_parallel_tunnels == 0) {
1004 		GIF_LIST_LOCK();
1005 		LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) {
1006 			if (tsc == sc || tsc->gif_family != src->sa_family)
1007 				continue;
1008 #ifdef INET
1009 			if (tsc->gif_family == AF_INET &&
1010 			    tsc->gif_iphdr->ip_src.s_addr ==
1011 			    satosin(src)->sin_addr.s_addr &&
1012 			    tsc->gif_iphdr->ip_dst.s_addr ==
1013 			    satosin(dst)->sin_addr.s_addr) {
1014 				error = EADDRNOTAVAIL;
1015 				GIF_LIST_UNLOCK();
1016 				goto bad;
1017 			}
1018 #endif
1019 #ifdef INET6
1020 			if (tsc->gif_family == AF_INET6 &&
1021 			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src,
1022 			    &satosin6(src)->sin6_addr) &&
1023 			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst,
1024 			    &satosin6(dst)->sin6_addr)) {
1025 				error = EADDRNOTAVAIL;
1026 				GIF_LIST_UNLOCK();
1027 				goto bad;
1028 			}
1029 #endif
1030 		}
1031 		GIF_LIST_UNLOCK();
1032 	}
1033 	switch (src->sa_family) {
1034 #ifdef INET
1035 	case AF_INET:
1036 		hdr = ip = malloc(sizeof(struct ip), M_GIF,
1037 		    M_WAITOK | M_ZERO);
1038 		ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr;
1039 		ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr;
1040 		break;
1041 #endif
1042 #ifdef INET6
1043 	case AF_INET6:
1044 		hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF,
1045 		    M_WAITOK | M_ZERO);
1046 		ip6->ip6_src = satosin6(src)->sin6_addr;
1047 		ip6->ip6_dst = satosin6(dst)->sin6_addr;
1048 		ip6->ip6_vfc = IPV6_VERSION;
1049 		break;
1050 #endif
1051 	default:
1052 		return (EAFNOSUPPORT);
1053 	};
1054 
1055 	if (sc->gif_family != src->sa_family)
1056 		gif_detach(sc);
1057 	if (sc->gif_family == 0 ||
1058 	    sc->gif_family != src->sa_family)
1059 		error = gif_attach(sc, src->sa_family);
1060 
1061 	GIF_WLOCK(sc);
1062 	if (sc->gif_family != 0)
1063 		free(sc->gif_hdr, M_GIF);
1064 	sc->gif_family = src->sa_family;
1065 	sc->gif_hdr = hdr;
1066 	GIF_WUNLOCK(sc);
1067 #if defined(INET) || defined(INET6)
1068 bad:
1069 #endif
1070 	if (error == 0 && sc->gif_family != 0)
1071 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
1072 	else
1073 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1074 	return (error);
1075 }
1076 
1077 static void
1078 gif_delete_tunnel(struct ifnet *ifp)
1079 {
1080 	struct gif_softc *sc = ifp->if_softc;
1081 	int family;
1082 
1083 	if (sc == NULL)
1084 		return;
1085 
1086 	GIF_WLOCK(sc);
1087 	family = sc->gif_family;
1088 	sc->gif_family = 0;
1089 	GIF_WUNLOCK(sc);
1090 	if (family != 0) {
1091 		gif_detach(sc);
1092 		free(sc->gif_hdr, M_GIF);
1093 	}
1094 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1095 }
1096