xref: /freebsd/sys/net/if_gif.c (revision 313376588638950ba1e93c403dd8c97bc52fd3a2)
1 /*-
2  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the project nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	$KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/jail.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/malloc.h>
44 #include <sys/mbuf.h>
45 #include <sys/module.h>
46 #include <sys/rmlock.h>
47 #include <sys/socket.h>
48 #include <sys/sockio.h>
49 #include <sys/sx.h>
50 #include <sys/errno.h>
51 #include <sys/time.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/priv.h>
55 #include <sys/proc.h>
56 #include <sys/protosw.h>
57 #include <sys/conf.h>
58 #include <machine/cpu.h>
59 
60 #include <net/if.h>
61 #include <net/if_var.h>
62 #include <net/if_clone.h>
63 #include <net/if_types.h>
64 #include <net/netisr.h>
65 #include <net/route.h>
66 #include <net/bpf.h>
67 #include <net/vnet.h>
68 
69 #include <netinet/in.h>
70 #include <netinet/in_systm.h>
71 #include <netinet/ip.h>
72 #include <netinet/ip_ecn.h>
73 #ifdef	INET
74 #include <netinet/in_var.h>
75 #include <netinet/in_gif.h>
76 #include <netinet/ip_var.h>
77 #endif	/* INET */
78 
79 #ifdef INET6
80 #ifndef INET
81 #include <netinet/in.h>
82 #endif
83 #include <netinet6/in6_var.h>
84 #include <netinet/ip6.h>
85 #include <netinet6/ip6_ecn.h>
86 #include <netinet6/ip6_var.h>
87 #include <netinet6/scope6_var.h>
88 #include <netinet6/in6_gif.h>
89 #include <netinet6/ip6protosw.h>
90 #endif /* INET6 */
91 
92 #include <netinet/ip_encap.h>
93 #include <net/ethernet.h>
94 #include <net/if_bridgevar.h>
95 #include <net/if_gif.h>
96 
97 #include <security/mac/mac_framework.h>
98 
99 static const char gifname[] = "gif";
100 
101 /*
102  * gif_mtx protects a per-vnet gif_softc_list.
103  */
104 static VNET_DEFINE(struct mtx, gif_mtx);
105 #define	V_gif_mtx		VNET(gif_mtx)
106 static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
107 static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
108 #define	V_gif_softc_list	VNET(gif_softc_list)
109 static struct sx gif_ioctl_sx;
110 SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
111 
112 #define	GIF_LIST_LOCK_INIT(x)		mtx_init(&V_gif_mtx, "gif_mtx", \
113 					    NULL, MTX_DEF)
114 #define	GIF_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_gif_mtx)
115 #define	GIF_LIST_LOCK(x)		mtx_lock(&V_gif_mtx)
116 #define	GIF_LIST_UNLOCK(x)		mtx_unlock(&V_gif_mtx)
117 
118 void	(*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
119 void	(*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
120 void	(*ng_gif_attach_p)(struct ifnet *ifp);
121 void	(*ng_gif_detach_p)(struct ifnet *ifp);
122 
123 static int	gif_set_tunnel(struct ifnet *, struct sockaddr *,
124     struct sockaddr *);
125 static void	gif_delete_tunnel(struct ifnet *);
126 static int	gif_ioctl(struct ifnet *, u_long, caddr_t);
127 static int	gif_transmit(struct ifnet *, struct mbuf *);
128 static void	gif_qflush(struct ifnet *);
129 static int	gif_clone_create(struct if_clone *, int, caddr_t);
130 static void	gif_clone_destroy(struct ifnet *);
131 static VNET_DEFINE(struct if_clone *, gif_cloner);
132 #define	V_gif_cloner	VNET(gif_cloner)
133 
134 static int gifmodevent(module_t, int, void *);
135 
136 SYSCTL_DECL(_net_link);
137 static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
138     "Generic Tunnel Interface");
139 #ifndef MAX_GIF_NEST
140 /*
141  * This macro controls the default upper limitation on nesting of gif tunnels.
142  * Since, setting a large value to this macro with a careless configuration
143  * may introduce system crash, we don't allow any nestings by default.
144  * If you need to configure nested gif tunnels, you can define this macro
145  * in your kernel configuration file.  However, if you do so, please be
146  * careful to configure the tunnels so that it won't make a loop.
147  */
148 #define MAX_GIF_NEST 1
149 #endif
150 static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST;
151 #define	V_max_gif_nesting	VNET(max_gif_nesting)
152 SYSCTL_VNET_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_RW,
153     &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
154 
155 /*
156  * By default, we disallow creation of multiple tunnels between the same
157  * pair of addresses.  Some applications require this functionality so
158  * we allow control over this check here.
159  */
160 #ifdef XBONEHACK
161 static VNET_DEFINE(int, parallel_tunnels) = 1;
162 #else
163 static VNET_DEFINE(int, parallel_tunnels) = 0;
164 #endif
165 #define	V_parallel_tunnels	VNET(parallel_tunnels)
166 SYSCTL_VNET_INT(_net_link_gif, OID_AUTO, parallel_tunnels, CTLFLAG_RW,
167     &VNET_NAME(parallel_tunnels), 0, "Allow parallel tunnels?");
168 
169 /* copy from src/sys/net/if_ethersubr.c */
170 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
171 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
172 #ifndef ETHER_IS_BROADCAST
173 #define ETHER_IS_BROADCAST(addr) \
174 	(bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
175 #endif
176 
177 static int
178 gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
179 {
180 	struct gif_softc *sc;
181 
182 	sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
183 	sc->gif_fibnum = curthread->td_proc->p_fibnum;
184 	GIF2IFP(sc) = if_alloc(IFT_GIF);
185 	GIF_LOCK_INIT(sc);
186 	GIF2IFP(sc)->if_softc = sc;
187 	if_initname(GIF2IFP(sc), gifname, unit);
188 
189 	GIF2IFP(sc)->if_addrlen = 0;
190 	GIF2IFP(sc)->if_mtu    = GIF_MTU;
191 	GIF2IFP(sc)->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
192 #if 0
193 	/* turn off ingress filter */
194 	GIF2IFP(sc)->if_flags  |= IFF_LINK2;
195 #endif
196 	GIF2IFP(sc)->if_ioctl  = gif_ioctl;
197 	GIF2IFP(sc)->if_transmit  = gif_transmit;
198 	GIF2IFP(sc)->if_qflush  = gif_qflush;
199 	GIF2IFP(sc)->if_output = gif_output;
200 	if_attach(GIF2IFP(sc));
201 	bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
202 	if (ng_gif_attach_p != NULL)
203 		(*ng_gif_attach_p)(GIF2IFP(sc));
204 
205 	GIF_LIST_LOCK();
206 	LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
207 	GIF_LIST_UNLOCK();
208 	return (0);
209 }
210 
211 static void
212 gif_clone_destroy(struct ifnet *ifp)
213 {
214 	struct gif_softc *sc;
215 
216 	sx_xlock(&gif_ioctl_sx);
217 	sc = ifp->if_softc;
218 	gif_delete_tunnel(ifp);
219 	GIF_LIST_LOCK();
220 	LIST_REMOVE(sc, gif_list);
221 	GIF_LIST_UNLOCK();
222 	if (ng_gif_detach_p != NULL)
223 		(*ng_gif_detach_p)(ifp);
224 	bpfdetach(ifp);
225 	if_detach(ifp);
226 	ifp->if_softc = NULL;
227 	sx_xunlock(&gif_ioctl_sx);
228 
229 	if_free(ifp);
230 	GIF_LOCK_DESTROY(sc);
231 	free(sc, M_GIF);
232 }
233 
234 static void
235 vnet_gif_init(const void *unused __unused)
236 {
237 
238 	LIST_INIT(&V_gif_softc_list);
239 	GIF_LIST_LOCK_INIT();
240 	V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
241 	    gif_clone_destroy, 0);
242 }
243 VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
244     vnet_gif_init, NULL);
245 
246 static void
247 vnet_gif_uninit(const void *unused __unused)
248 {
249 
250 	if_clone_detach(V_gif_cloner);
251 	GIF_LIST_LOCK_DESTROY();
252 }
253 VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
254     vnet_gif_uninit, NULL);
255 
256 static int
257 gifmodevent(module_t mod, int type, void *data)
258 {
259 
260 	switch (type) {
261 	case MOD_LOAD:
262 	case MOD_UNLOAD:
263 		break;
264 	default:
265 		return (EOPNOTSUPP);
266 	}
267 	return (0);
268 }
269 
270 static moduledata_t gif_mod = {
271 	"if_gif",
272 	gifmodevent,
273 	0
274 };
275 
276 DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
277 MODULE_VERSION(if_gif, 1);
278 
279 int
280 gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
281 {
282 	GIF_RLOCK_TRACKER;
283 	struct gif_softc *sc;
284 	int ret;
285 	uint8_t ver;
286 
287 	sc = (struct gif_softc *)arg;
288 	if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0)
289 		return (0);
290 
291 	ret = 0;
292 	GIF_RLOCK(sc);
293 
294 	/* no physical address */
295 	if (sc->gif_family == 0)
296 		goto done;
297 
298 	switch (proto) {
299 #ifdef INET
300 	case IPPROTO_IPV4:
301 #endif
302 #ifdef INET6
303 	case IPPROTO_IPV6:
304 #endif
305 	case IPPROTO_ETHERIP:
306 		break;
307 	default:
308 		goto done;
309 	}
310 
311 	/* Bail on short packets */
312 	if (m->m_pkthdr.len < sizeof(struct ip))
313 		goto done;
314 
315 	m_copydata(m, 0, 1, &ver);
316 	switch (ver >> 4) {
317 #ifdef INET
318 	case 4:
319 		if (sc->gif_family != AF_INET)
320 			goto done;
321 		ret = in_gif_encapcheck(m, off, proto, arg);
322 		break;
323 #endif
324 #ifdef INET6
325 	case 6:
326 		if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
327 			goto done;
328 		if (sc->gif_family != AF_INET6)
329 			goto done;
330 		ret = in6_gif_encapcheck(m, off, proto, arg);
331 		break;
332 #endif
333 	}
334 done:
335 	GIF_RUNLOCK(sc);
336 	return (ret);
337 }
338 
339 static int
340 gif_transmit(struct ifnet *ifp, struct mbuf *m)
341 {
342 	struct gif_softc *sc;
343 	struct etherip_header *eth;
344 #ifdef INET
345 	struct ip *ip;
346 #endif
347 #ifdef INET6
348 	struct ip6_hdr *ip6;
349 	uint32_t t;
350 #endif
351 	uint32_t af;
352 	uint8_t proto, ecn;
353 	int error;
354 
355 	error = ENETDOWN;
356 	sc = ifp->if_softc;
357 	if (sc->gif_family == 0) {
358 		m_freem(m);
359 		goto err;
360 	}
361 	/* Now pull back the af that we stashed in the csum_data. */
362 	af = m->m_pkthdr.csum_data;
363 	BPF_MTAP2(ifp, &af, sizeof(af), m);
364 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
365 	if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
366 	M_SETFIB(m, sc->gif_fibnum);
367 	/* inner AF-specific encapsulation */
368 	ecn = 0;
369 	switch (af) {
370 #ifdef INET
371 	case AF_INET:
372 		proto = IPPROTO_IPV4;
373 		if (m->m_len < sizeof(struct ip))
374 			m = m_pullup(m, sizeof(struct ip));
375 		if (m == NULL) {
376 			error = ENOBUFS;
377 			goto err;
378 		}
379 		ip = mtod(m, struct ip *);
380 		ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
381 		    ECN_NOCARE, &ecn, &ip->ip_tos);
382 		break;
383 #endif
384 #ifdef INET6
385 	case AF_INET6:
386 		proto = IPPROTO_IPV6;
387 		if (m->m_len < sizeof(struct ip6_hdr))
388 			m = m_pullup(m, sizeof(struct ip6_hdr));
389 		if (m == NULL) {
390 			error = ENOBUFS;
391 			goto err;
392 		}
393 		t = 0;
394 		ip6 = mtod(m, struct ip6_hdr *);
395 		ip6_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
396 		    ECN_NOCARE, &t, &ip6->ip6_flow);
397 		ecn = (ntohl(t) >> 20) & 0xff;
398 		break;
399 #endif
400 	case AF_LINK:
401 		proto = IPPROTO_ETHERIP;
402 		M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT);
403 		if (m != NULL && m->m_len < sizeof(struct etherip_header))
404 			m = m_pullup(m, sizeof(struct etherip_header));
405 		if (m == NULL) {
406 			error = ENOBUFS;
407 			goto err;
408 		}
409 		eth = mtod(m, struct etherip_header *);
410 		eth->eip_resvh = 0;
411 		if ((sc->gif_options & GIF_SEND_REVETHIP) != 0) {
412 			eth->eip_ver = 0;
413 			eth->eip_resvl = ETHERIP_VERSION;
414 		} else {
415 			eth->eip_ver = ETHERIP_VERSION;
416 			eth->eip_resvl = 0;
417 		}
418 		break;
419 	default:
420 		error = EAFNOSUPPORT;
421 		m_freem(m);
422 		goto err;
423 	}
424 	/* XXX should we check if our outer source is legal? */
425 	/* dispatch to output logic based on outer AF */
426 	switch (sc->gif_family) {
427 #ifdef INET
428 	case AF_INET:
429 		error = in_gif_output(ifp, m, proto, ecn);
430 		break;
431 #endif
432 #ifdef INET6
433 	case AF_INET6:
434 		error = in6_gif_output(ifp, m, proto, ecn);
435 		break;
436 #endif
437 	default:
438 		m_freem(m);
439 	}
440 err:
441 	if (error)
442 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
443 	return (error);
444 }
445 
446 static void
447 gif_qflush(struct ifnet *ifp __unused)
448 {
449 
450 }
451 
452 int
453 gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
454 	struct route *ro)
455 {
456 	struct m_tag *mtag;
457 	uint32_t af;
458 	int gif_called;
459 	int error = 0;
460 #ifdef MAC
461 	error = mac_ifnet_check_transmit(ifp, m);
462 	if (error)
463 		goto err;
464 #endif
465 	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
466 	    (ifp->if_flags & IFF_UP) == 0) {
467 		error = ENETDOWN;
468 		goto err;
469 	}
470 
471 	/*
472 	 * gif may cause infinite recursion calls when misconfigured.
473 	 * We'll prevent this by detecting loops.
474 	 *
475 	 * High nesting level may cause stack exhaustion.
476 	 * We'll prevent this by introducing upper limit.
477 	 */
478 	gif_called = 1;
479 	mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, NULL);
480 	while (mtag != NULL) {
481 		if (*(struct ifnet **)(mtag + 1) == ifp) {
482 			log(LOG_NOTICE,
483 			    "gif_output: loop detected on %s\n",
484 			    (*(struct ifnet **)(mtag + 1))->if_xname);
485 			error = EIO;	/* is there better errno? */
486 			goto err;
487 		}
488 		mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, mtag);
489 		gif_called++;
490 	}
491 	if (gif_called > V_max_gif_nesting) {
492 		log(LOG_NOTICE,
493 		    "gif_output: recursively called too many times(%d)\n",
494 		    gif_called);
495 		error = EIO;	/* is there better errno? */
496 		goto err;
497 	}
498 	mtag = m_tag_alloc(MTAG_GIF, MTAG_GIF_CALLED, sizeof(struct ifnet *),
499 	    M_NOWAIT);
500 	if (mtag == NULL) {
501 		error = ENOMEM;
502 		goto err;
503 	}
504 	*(struct ifnet **)(mtag + 1) = ifp;
505 	m_tag_prepend(m, mtag);
506 
507 	m->m_flags &= ~(M_BCAST|M_MCAST);
508 	if (dst->sa_family == AF_UNSPEC)
509 		bcopy(dst->sa_data, &af, sizeof(af));
510 	else
511 		af = dst->sa_family;
512 	if (ifp->if_bridge)
513 		af = AF_LINK;
514 	/*
515 	 * Now save the af in the inbound pkt csum data, this is a cheat since
516 	 * we are using the inbound csum_data field to carry the af over to
517 	 * the gif_transmit() routine, avoiding using yet another mtag.
518 	 */
519 	m->m_pkthdr.csum_data = af;
520 	return (ifp->if_transmit(ifp, m));
521 err:
522 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
523 	m_freem(m);
524 	return (error);
525 }
526 
527 void
528 gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn)
529 {
530 	struct etherip_header *eip;
531 #ifdef INET
532 	struct ip *ip;
533 #endif
534 #ifdef INET6
535 	struct ip6_hdr *ip6;
536 	uint32_t t;
537 #endif
538 	struct gif_softc *sc;
539 	struct ether_header *eh;
540 	struct ifnet *oldifp;
541 	uint32_t gif_options;
542 	int isr, n, af;
543 
544 	if (ifp == NULL) {
545 		/* just in case */
546 		m_freem(m);
547 		return;
548 	}
549 	sc = ifp->if_softc;
550 	gif_options = sc->gif_options;
551 	m->m_pkthdr.rcvif = ifp;
552 	m_clrprotoflags(m);
553 	switch (proto) {
554 #ifdef INET
555 	case IPPROTO_IPV4:
556 		af = AF_INET;
557 		if (m->m_len < sizeof(struct ip))
558 			m = m_pullup(m, sizeof(struct ip));
559 		if (m == NULL)
560 			goto drop;
561 		ip = mtod(m, struct ip *);
562 		if (ip_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
563 		    ECN_NOCARE, &ecn, &ip->ip_tos) == 0) {
564 			m_freem(m);
565 			goto drop;
566 		}
567 		break;
568 #endif
569 #ifdef INET6
570 	case IPPROTO_IPV6:
571 		af = AF_INET6;
572 		if (m->m_len < sizeof(struct ip6_hdr))
573 			m = m_pullup(m, sizeof(struct ip6_hdr));
574 		if (m == NULL)
575 			goto drop;
576 		t = htonl((uint32_t)ecn << 20);
577 		ip6 = mtod(m, struct ip6_hdr *);
578 		if (ip6_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
579 		    ECN_NOCARE, &t, &ip6->ip6_flow) == 0) {
580 			m_freem(m);
581 			goto drop;
582 		}
583 		break;
584 #endif
585 	case IPPROTO_ETHERIP:
586 		af = AF_LINK;
587 		break;
588 	default:
589 		m_freem(m);
590 		goto drop;
591 	}
592 
593 #ifdef MAC
594 	mac_ifnet_create_mbuf(ifp, m);
595 #endif
596 
597 	if (bpf_peers_present(ifp->if_bpf)) {
598 		uint32_t af1 = af;
599 		bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
600 	}
601 
602 	if ((ifp->if_flags & IFF_MONITOR) != 0) {
603 		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
604 		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
605 		m_freem(m);
606 		return;
607 	}
608 
609 	if (ng_gif_input_p != NULL) {
610 		(*ng_gif_input_p)(ifp, &m, af);
611 		if (m == NULL)
612 			goto drop;
613 	}
614 
615 	/*
616 	 * Put the packet to the network layer input queue according to the
617 	 * specified address family.
618 	 * Note: older versions of gif_input directly called network layer
619 	 * input functions, e.g. ip6_input, here.  We changed the policy to
620 	 * prevent too many recursive calls of such input functions, which
621 	 * might cause kernel panic.  But the change may introduce another
622 	 * problem; if the input queue is full, packets are discarded.
623 	 * The kernel stack overflow really happened, and we believed
624 	 * queue-full rarely occurs, so we changed the policy.
625 	 */
626 	switch (af) {
627 #ifdef INET
628 	case AF_INET:
629 		isr = NETISR_IP;
630 		break;
631 #endif
632 #ifdef INET6
633 	case AF_INET6:
634 		isr = NETISR_IPV6;
635 		break;
636 #endif
637 	case AF_LINK:
638 		n = sizeof(struct etherip_header) + sizeof(struct ether_header);
639 		if (n > m->m_len)
640 			m = m_pullup(m, n);
641 		if (m == NULL)
642 			goto drop;
643 		eip = mtod(m, struct etherip_header *);
644 		/*
645 		 * GIF_ACCEPT_REVETHIP (enabled by default) intentionally
646 		 * accepts an EtherIP packet with revered version field in
647 		 * the header.  This is a knob for backward compatibility
648 		 * with FreeBSD 7.2R or prior.
649 		 */
650 		if (eip->eip_ver != ETHERIP_VERSION) {
651 			if ((gif_options & GIF_ACCEPT_REVETHIP) == 0 ||
652 			    eip->eip_resvl != ETHERIP_VERSION) {
653 				/* discard unknown versions */
654 				m_freem(m);
655 				goto drop;
656 			}
657 		}
658 		m_adj(m, sizeof(struct etherip_header));
659 
660 		m->m_flags &= ~(M_BCAST|M_MCAST);
661 		m->m_pkthdr.rcvif = ifp;
662 
663 		if (ifp->if_bridge) {
664 			oldifp = ifp;
665 			eh = mtod(m, struct ether_header *);
666 			if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
667 				if (ETHER_IS_BROADCAST(eh->ether_dhost))
668 					m->m_flags |= M_BCAST;
669 				else
670 					m->m_flags |= M_MCAST;
671 				if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
672 			}
673 			BRIDGE_INPUT(ifp, m);
674 
675 			if (m != NULL && ifp != oldifp) {
676 				/*
677 				 * The bridge gave us back itself or one of the
678 				 * members for which the frame is addressed.
679 				 */
680 				ether_demux(ifp, m);
681 				return;
682 			}
683 		}
684 		if (m != NULL)
685 			m_freem(m);
686 		return;
687 
688 	default:
689 		if (ng_gif_input_orphan_p != NULL)
690 			(*ng_gif_input_orphan_p)(ifp, m, af);
691 		else
692 			m_freem(m);
693 		return;
694 	}
695 
696 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
697 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
698 	M_SETFIB(m, ifp->if_fib);
699 	netisr_dispatch(isr, m);
700 	return;
701 drop:
702 	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
703 }
704 
705 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
706 int
707 gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
708 {
709 	GIF_RLOCK_TRACKER;
710 	struct ifreq *ifr = (struct ifreq*)data;
711 	struct sockaddr *dst, *src;
712 	struct gif_softc *sc;
713 #ifdef INET
714 	struct sockaddr_in *sin = NULL;
715 #endif
716 #ifdef INET6
717 	struct sockaddr_in6 *sin6 = NULL;
718 #endif
719 	u_int options;
720 	int error;
721 
722 	switch (cmd) {
723 	case SIOCSIFADDR:
724 		ifp->if_flags |= IFF_UP;
725 	case SIOCADDMULTI:
726 	case SIOCDELMULTI:
727 	case SIOCGIFMTU:
728 	case SIOCSIFFLAGS:
729 		return (0);
730 	case SIOCSIFMTU:
731 		if (ifr->ifr_mtu < GIF_MTU_MIN ||
732 		    ifr->ifr_mtu > GIF_MTU_MAX)
733 			return (EINVAL);
734 		else
735 			ifp->if_mtu = ifr->ifr_mtu;
736 		return (0);
737 	}
738 	sx_xlock(&gif_ioctl_sx);
739 	sc = ifp->if_softc;
740 	if (sc == NULL) {
741 		error = ENXIO;
742 		goto bad;
743 	}
744 	error = 0;
745 	switch (cmd) {
746 	case SIOCSIFPHYADDR:
747 #ifdef INET6
748 	case SIOCSIFPHYADDR_IN6:
749 #endif
750 		error = EINVAL;
751 		switch (cmd) {
752 #ifdef INET
753 		case SIOCSIFPHYADDR:
754 			src = (struct sockaddr *)
755 				&(((struct in_aliasreq *)data)->ifra_addr);
756 			dst = (struct sockaddr *)
757 				&(((struct in_aliasreq *)data)->ifra_dstaddr);
758 			break;
759 #endif
760 #ifdef INET6
761 		case SIOCSIFPHYADDR_IN6:
762 			src = (struct sockaddr *)
763 				&(((struct in6_aliasreq *)data)->ifra_addr);
764 			dst = (struct sockaddr *)
765 				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
766 			break;
767 #endif
768 		default:
769 			goto bad;
770 		}
771 		/* sa_family must be equal */
772 		if (src->sa_family != dst->sa_family ||
773 		    src->sa_len != dst->sa_len)
774 			goto bad;
775 
776 		/* validate sa_len */
777 		switch (src->sa_family) {
778 #ifdef INET
779 		case AF_INET:
780 			if (src->sa_len != sizeof(struct sockaddr_in))
781 				goto bad;
782 			break;
783 #endif
784 #ifdef INET6
785 		case AF_INET6:
786 			if (src->sa_len != sizeof(struct sockaddr_in6))
787 				goto bad;
788 			break;
789 #endif
790 		default:
791 			error = EAFNOSUPPORT;
792 			goto bad;
793 		}
794 		/* check sa_family looks sane for the cmd */
795 		error = EAFNOSUPPORT;
796 		switch (cmd) {
797 #ifdef INET
798 		case SIOCSIFPHYADDR:
799 			if (src->sa_family == AF_INET)
800 				break;
801 			goto bad;
802 #endif
803 #ifdef INET6
804 		case SIOCSIFPHYADDR_IN6:
805 			if (src->sa_family == AF_INET6)
806 				break;
807 			goto bad;
808 #endif
809 		}
810 		error = EADDRNOTAVAIL;
811 		switch (src->sa_family) {
812 #ifdef INET
813 		case AF_INET:
814 			if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
815 			    satosin(dst)->sin_addr.s_addr == INADDR_ANY)
816 				goto bad;
817 			break;
818 #endif
819 #ifdef INET6
820 		case AF_INET6:
821 			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
822 			    ||
823 			    IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
824 				goto bad;
825 			/*
826 			 * Check validity of the scope zone ID of the
827 			 * addresses, and convert it into the kernel
828 			 * internal form if necessary.
829 			 */
830 			error = sa6_embedscope(satosin6(src), 0);
831 			if (error != 0)
832 				goto bad;
833 			error = sa6_embedscope(satosin6(dst), 0);
834 			if (error != 0)
835 				goto bad;
836 #endif
837 		};
838 		error = gif_set_tunnel(ifp, src, dst);
839 		break;
840 	case SIOCDIFPHYADDR:
841 		gif_delete_tunnel(ifp);
842 		break;
843 	case SIOCGIFPSRCADDR:
844 	case SIOCGIFPDSTADDR:
845 #ifdef INET6
846 	case SIOCGIFPSRCADDR_IN6:
847 	case SIOCGIFPDSTADDR_IN6:
848 #endif
849 		if (sc->gif_family == 0) {
850 			error = EADDRNOTAVAIL;
851 			break;
852 		}
853 		GIF_RLOCK(sc);
854 		switch (cmd) {
855 #ifdef INET
856 		case SIOCGIFPSRCADDR:
857 		case SIOCGIFPDSTADDR:
858 			if (sc->gif_family != AF_INET) {
859 				error = EADDRNOTAVAIL;
860 				break;
861 			}
862 			sin = (struct sockaddr_in *)&ifr->ifr_addr;
863 			memset(sin, 0, sizeof(*sin));
864 			sin->sin_family = AF_INET;
865 			sin->sin_len = sizeof(*sin);
866 			break;
867 #endif
868 #ifdef INET6
869 		case SIOCGIFPSRCADDR_IN6:
870 		case SIOCGIFPDSTADDR_IN6:
871 			if (sc->gif_family != AF_INET6) {
872 				error = EADDRNOTAVAIL;
873 				break;
874 			}
875 			sin6 = (struct sockaddr_in6 *)
876 				&(((struct in6_ifreq *)data)->ifr_addr);
877 			memset(sin6, 0, sizeof(*sin6));
878 			sin6->sin6_family = AF_INET6;
879 			sin6->sin6_len = sizeof(*sin6);
880 			break;
881 #endif
882 		default:
883 			error = EAFNOSUPPORT;
884 		}
885 		if (error == 0) {
886 			switch (cmd) {
887 #ifdef INET
888 			case SIOCGIFPSRCADDR:
889 				sin->sin_addr = sc->gif_iphdr->ip_src;
890 				break;
891 			case SIOCGIFPDSTADDR:
892 				sin->sin_addr = sc->gif_iphdr->ip_dst;
893 				break;
894 #endif
895 #ifdef INET6
896 			case SIOCGIFPSRCADDR_IN6:
897 				sin6->sin6_addr = sc->gif_ip6hdr->ip6_src;
898 				break;
899 			case SIOCGIFPDSTADDR_IN6:
900 				sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst;
901 				break;
902 #endif
903 			}
904 		}
905 		GIF_RUNLOCK(sc);
906 		if (error != 0)
907 			break;
908 		switch (cmd) {
909 #ifdef INET
910 		case SIOCGIFPSRCADDR:
911 		case SIOCGIFPDSTADDR:
912 			error = prison_if(curthread->td_ucred,
913 			    (struct sockaddr *)sin);
914 			if (error != 0)
915 				memset(sin, 0, sizeof(*sin));
916 			break;
917 #endif
918 #ifdef INET6
919 		case SIOCGIFPSRCADDR_IN6:
920 		case SIOCGIFPDSTADDR_IN6:
921 			error = prison_if(curthread->td_ucred,
922 			    (struct sockaddr *)sin6);
923 			if (error == 0)
924 				error = sa6_recoverscope(sin6);
925 			if (error != 0)
926 				memset(sin6, 0, sizeof(*sin6));
927 #endif
928 		}
929 		break;
930 	case GIFGOPTS:
931 		options = sc->gif_options;
932 		error = copyout(&options, ifr->ifr_data, sizeof(options));
933 		break;
934 	case GIFSOPTS:
935 		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
936 			break;
937 		error = copyin(ifr->ifr_data, &options, sizeof(options));
938 		if (error)
939 			break;
940 		if (options & ~GIF_OPTMASK)
941 			error = EINVAL;
942 		else
943 			sc->gif_options = options;
944 		break;
945 
946 	default:
947 		error = EINVAL;
948 		break;
949 	}
950 bad:
951 	sx_xunlock(&gif_ioctl_sx);
952 	return (error);
953 }
954 
955 static void
956 gif_detach(struct gif_softc *sc)
957 {
958 
959 	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
960 	if (sc->gif_ecookie != NULL)
961 		encap_detach(sc->gif_ecookie);
962 	sc->gif_ecookie = NULL;
963 }
964 
965 static int
966 gif_attach(struct gif_softc *sc, int af)
967 {
968 
969 	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
970 	switch (af) {
971 #ifdef INET
972 	case AF_INET:
973 		return (in_gif_attach(sc));
974 #endif
975 #ifdef INET6
976 	case AF_INET6:
977 		return (in6_gif_attach(sc));
978 #endif
979 	}
980 	return (EAFNOSUPPORT);
981 }
982 
983 static int
984 gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
985 {
986 	struct gif_softc *sc = ifp->if_softc;
987 	struct gif_softc *tsc;
988 #ifdef INET
989 	struct ip *ip;
990 #endif
991 #ifdef INET6
992 	struct ip6_hdr *ip6;
993 #endif
994 	void *hdr;
995 	int error = 0;
996 
997 	if (sc == NULL)
998 		return (ENXIO);
999 	/* Disallow parallel tunnels unless instructed otherwise. */
1000 	if (V_parallel_tunnels == 0) {
1001 		GIF_LIST_LOCK();
1002 		LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) {
1003 			if (tsc == sc || tsc->gif_family != src->sa_family)
1004 				continue;
1005 #ifdef INET
1006 			if (tsc->gif_family == AF_INET &&
1007 			    tsc->gif_iphdr->ip_src.s_addr ==
1008 			    satosin(src)->sin_addr.s_addr &&
1009 			    tsc->gif_iphdr->ip_dst.s_addr ==
1010 			    satosin(dst)->sin_addr.s_addr) {
1011 				error = EADDRNOTAVAIL;
1012 				GIF_LIST_UNLOCK();
1013 				goto bad;
1014 			}
1015 #endif
1016 #ifdef INET6
1017 			if (tsc->gif_family == AF_INET6 &&
1018 			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src,
1019 			    &satosin6(src)->sin6_addr) &&
1020 			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst,
1021 			    &satosin6(dst)->sin6_addr)) {
1022 				error = EADDRNOTAVAIL;
1023 				GIF_LIST_UNLOCK();
1024 				goto bad;
1025 			}
1026 #endif
1027 		}
1028 		GIF_LIST_UNLOCK();
1029 	}
1030 	switch (src->sa_family) {
1031 #ifdef INET
1032 	case AF_INET:
1033 		hdr = ip = malloc(sizeof(struct ip), M_GIF,
1034 		    M_WAITOK | M_ZERO);
1035 		ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr;
1036 		ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr;
1037 		break;
1038 #endif
1039 #ifdef INET6
1040 	case AF_INET6:
1041 		hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF,
1042 		    M_WAITOK | M_ZERO);
1043 		ip6->ip6_src = satosin6(src)->sin6_addr;
1044 		ip6->ip6_dst = satosin6(dst)->sin6_addr;
1045 		ip6->ip6_vfc = IPV6_VERSION;
1046 		break;
1047 #endif
1048 	default:
1049 		return (EAFNOSUPPORT);
1050 	};
1051 
1052 	if (sc->gif_family != src->sa_family)
1053 		gif_detach(sc);
1054 	if (sc->gif_family == 0 ||
1055 	    sc->gif_family != src->sa_family)
1056 		error = gif_attach(sc, src->sa_family);
1057 
1058 	GIF_WLOCK(sc);
1059 	if (sc->gif_family != 0)
1060 		free(sc->gif_hdr, M_GIF);
1061 	sc->gif_family = src->sa_family;
1062 	sc->gif_hdr = hdr;
1063 	GIF_WUNLOCK(sc);
1064 #if defined(INET) || defined(INET6)
1065 bad:
1066 #endif
1067 	if (error == 0 && sc->gif_family != 0)
1068 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
1069 	else
1070 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1071 	return (error);
1072 }
1073 
1074 static void
1075 gif_delete_tunnel(struct ifnet *ifp)
1076 {
1077 	struct gif_softc *sc = ifp->if_softc;
1078 	int family;
1079 
1080 	if (sc == NULL)
1081 		return;
1082 
1083 	GIF_WLOCK(sc);
1084 	family = sc->gif_family;
1085 	sc->gif_family = 0;
1086 	GIF_WUNLOCK(sc);
1087 	if (family != 0) {
1088 		gif_detach(sc);
1089 		free(sc->gif_hdr, M_GIF);
1090 	}
1091 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1092 }
1093