xref: /linux/net/ipv4/ip_gre.c (revision c95b819ad75b13102139aad0e7062d927be23cc6)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *	Linux NET3:	GRE over IP protocol decoder.
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
71da177e4SLinus Torvalds  *	modify it under the terms of the GNU General Public License
81da177e4SLinus Torvalds  *	as published by the Free Software Foundation; either version
91da177e4SLinus Torvalds  *	2 of the License, or (at your option) any later version.
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  */
121da177e4SLinus Torvalds 
134fc268d2SRandy Dunlap #include <linux/capability.h>
141da177e4SLinus Torvalds #include <linux/module.h>
151da177e4SLinus Torvalds #include <linux/types.h>
161da177e4SLinus Torvalds #include <linux/kernel.h>
171da177e4SLinus Torvalds #include <asm/uaccess.h>
181da177e4SLinus Torvalds #include <linux/skbuff.h>
191da177e4SLinus Torvalds #include <linux/netdevice.h>
201da177e4SLinus Torvalds #include <linux/in.h>
211da177e4SLinus Torvalds #include <linux/tcp.h>
221da177e4SLinus Torvalds #include <linux/udp.h>
231da177e4SLinus Torvalds #include <linux/if_arp.h>
241da177e4SLinus Torvalds #include <linux/mroute.h>
251da177e4SLinus Torvalds #include <linux/init.h>
261da177e4SLinus Torvalds #include <linux/in6.h>
271da177e4SLinus Torvalds #include <linux/inetdevice.h>
281da177e4SLinus Torvalds #include <linux/igmp.h>
291da177e4SLinus Torvalds #include <linux/netfilter_ipv4.h>
3046f25dffSKris Katterjohn #include <linux/if_ether.h>
311da177e4SLinus Torvalds 
321da177e4SLinus Torvalds #include <net/sock.h>
331da177e4SLinus Torvalds #include <net/ip.h>
341da177e4SLinus Torvalds #include <net/icmp.h>
351da177e4SLinus Torvalds #include <net/protocol.h>
361da177e4SLinus Torvalds #include <net/ipip.h>
371da177e4SLinus Torvalds #include <net/arp.h>
381da177e4SLinus Torvalds #include <net/checksum.h>
391da177e4SLinus Torvalds #include <net/dsfield.h>
401da177e4SLinus Torvalds #include <net/inet_ecn.h>
411da177e4SLinus Torvalds #include <net/xfrm.h>
4259a4c759SPavel Emelyanov #include <net/net_namespace.h>
4359a4c759SPavel Emelyanov #include <net/netns/generic.h>
441da177e4SLinus Torvalds 
451da177e4SLinus Torvalds #ifdef CONFIG_IPV6
461da177e4SLinus Torvalds #include <net/ipv6.h>
471da177e4SLinus Torvalds #include <net/ip6_fib.h>
481da177e4SLinus Torvalds #include <net/ip6_route.h>
491da177e4SLinus Torvalds #endif
501da177e4SLinus Torvalds 
511da177e4SLinus Torvalds /*
521da177e4SLinus Torvalds    Problems & solutions
531da177e4SLinus Torvalds    --------------------
541da177e4SLinus Torvalds 
551da177e4SLinus Torvalds    1. The most important issue is detecting local dead loops.
561da177e4SLinus Torvalds    They would cause complete host lockup in transmit, which
571da177e4SLinus Torvalds    would be "resolved" by stack overflow or, if queueing is enabled,
581da177e4SLinus Torvalds    with infinite looping in net_bh.
591da177e4SLinus Torvalds 
601da177e4SLinus Torvalds    We cannot track such dead loops during route installation,
611da177e4SLinus Torvalds    it is infeasible task. The most general solutions would be
621da177e4SLinus Torvalds    to keep skb->encapsulation counter (sort of local ttl),
631da177e4SLinus Torvalds    and silently drop packet when it expires. It is the best
641da177e4SLinus Torvalds    solution, but it supposes maintaing new variable in ALL
651da177e4SLinus Torvalds    skb, even if no tunneling is used.
661da177e4SLinus Torvalds 
671da177e4SLinus Torvalds    Current solution: t->recursion lock breaks dead loops. It looks
681da177e4SLinus Torvalds    like dev->tbusy flag, but I preferred new variable, because
691da177e4SLinus Torvalds    the semantics is different. One day, when hard_start_xmit
701da177e4SLinus Torvalds    will be multithreaded we will have to use skb->encapsulation.
711da177e4SLinus Torvalds 
721da177e4SLinus Torvalds 
731da177e4SLinus Torvalds 
741da177e4SLinus Torvalds    2. Networking dead loops would not kill routers, but would really
751da177e4SLinus Torvalds    kill network. IP hop limit plays role of "t->recursion" in this case,
761da177e4SLinus Torvalds    if we copy it from packet being encapsulated to upper header.
771da177e4SLinus Torvalds    It is very good solution, but it introduces two problems:
781da177e4SLinus Torvalds 
791da177e4SLinus Torvalds    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
801da177e4SLinus Torvalds      do not work over tunnels.
811da177e4SLinus Torvalds    - traceroute does not work. I planned to relay ICMP from tunnel,
821da177e4SLinus Torvalds      so that this problem would be solved and traceroute output
831da177e4SLinus Torvalds      would even more informative. This idea appeared to be wrong:
841da177e4SLinus Torvalds      only Linux complies to rfc1812 now (yes, guys, Linux is the only
851da177e4SLinus Torvalds      true router now :-)), all routers (at least, in neighbourhood of mine)
861da177e4SLinus Torvalds      return only 8 bytes of payload. It is the end.
871da177e4SLinus Torvalds 
881da177e4SLinus Torvalds    Hence, if we want that OSPF worked or traceroute said something reasonable,
891da177e4SLinus Torvalds    we should search for another solution.
901da177e4SLinus Torvalds 
911da177e4SLinus Torvalds    One of them is to parse packet trying to detect inner encapsulation
921da177e4SLinus Torvalds    made by our node. It is difficult or even impossible, especially,
931da177e4SLinus Torvalds    taking into account fragmentation. TO be short, tt is not solution at all.
941da177e4SLinus Torvalds 
951da177e4SLinus Torvalds    Current solution: The solution was UNEXPECTEDLY SIMPLE.
961da177e4SLinus Torvalds    We force DF flag on tunnels with preconfigured hop limit,
971da177e4SLinus Torvalds    that is ALL. :-) Well, it does not remove the problem completely,
981da177e4SLinus Torvalds    but exponential growth of network traffic is changed to linear
991da177e4SLinus Torvalds    (branches, that exceed pmtu are pruned) and tunnel mtu
1001da177e4SLinus Torvalds    fastly degrades to value <68, where looping stops.
1011da177e4SLinus Torvalds    Yes, it is not good if there exists a router in the loop,
1021da177e4SLinus Torvalds    which does not force DF, even when encapsulating packets have DF set.
1031da177e4SLinus Torvalds    But it is not our problem! Nobody could accuse us, we made
1041da177e4SLinus Torvalds    all that we could make. Even if it is your gated who injected
1051da177e4SLinus Torvalds    fatal route to network, even if it were you who configured
1061da177e4SLinus Torvalds    fatal static route: you are innocent. :-)
1071da177e4SLinus Torvalds 
1081da177e4SLinus Torvalds 
1091da177e4SLinus Torvalds 
1101da177e4SLinus Torvalds    3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
1111da177e4SLinus Torvalds    practically identical code. It would be good to glue them
1121da177e4SLinus Torvalds    together, but it is not very evident, how to make them modular.
1131da177e4SLinus Torvalds    sit is integral part of IPv6, ipip and gre are naturally modular.
1141da177e4SLinus Torvalds    We could extract common parts (hash table, ioctl etc)
1151da177e4SLinus Torvalds    to a separate module (ip_tunnel.c).
1161da177e4SLinus Torvalds 
1171da177e4SLinus Torvalds    Alexey Kuznetsov.
1181da177e4SLinus Torvalds  */
1191da177e4SLinus Torvalds 
1201da177e4SLinus Torvalds static int ipgre_tunnel_init(struct net_device *dev);
1211da177e4SLinus Torvalds static void ipgre_tunnel_setup(struct net_device *dev);
1221da177e4SLinus Torvalds 
1231da177e4SLinus Torvalds /* Fallback tunnel: no source, no destination, no key, no options */
1241da177e4SLinus Torvalds 
1251da177e4SLinus Torvalds static int ipgre_fb_tunnel_init(struct net_device *dev);
1261da177e4SLinus Torvalds 
127eb8ce741SPavel Emelyanov #define HASH_SIZE  16
128eb8ce741SPavel Emelyanov 
12959a4c759SPavel Emelyanov static int ipgre_net_id;
13059a4c759SPavel Emelyanov struct ipgre_net {
131eb8ce741SPavel Emelyanov 	struct ip_tunnel *tunnels[4][HASH_SIZE];
132eb8ce741SPavel Emelyanov 
1337daa0004SPavel Emelyanov 	struct net_device *fb_tunnel_dev;
13459a4c759SPavel Emelyanov };
13559a4c759SPavel Emelyanov 
1361da177e4SLinus Torvalds /* Tunnel hash table */
1371da177e4SLinus Torvalds 
1381da177e4SLinus Torvalds /*
1391da177e4SLinus Torvalds    4 hash tables:
1401da177e4SLinus Torvalds 
1411da177e4SLinus Torvalds    3: (remote,local)
1421da177e4SLinus Torvalds    2: (remote,*)
1431da177e4SLinus Torvalds    1: (*,local)
1441da177e4SLinus Torvalds    0: (*,*)
1451da177e4SLinus Torvalds 
1461da177e4SLinus Torvalds    We require exact key match i.e. if a key is present in packet
1471da177e4SLinus Torvalds    it will match only tunnel with the same key; if it is not present,
1481da177e4SLinus Torvalds    it will match only keyless tunnel.
1491da177e4SLinus Torvalds 
1501da177e4SLinus Torvalds    All keysless packets, if not matched configured keyless tunnels
1511da177e4SLinus Torvalds    will match fallback tunnel.
1521da177e4SLinus Torvalds  */
1531da177e4SLinus Torvalds 
154d5a0a1e3SAl Viro #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1551da177e4SLinus Torvalds 
156eb8ce741SPavel Emelyanov #define tunnels_r_l	tunnels[3]
157eb8ce741SPavel Emelyanov #define tunnels_r	tunnels[2]
158eb8ce741SPavel Emelyanov #define tunnels_l	tunnels[1]
159eb8ce741SPavel Emelyanov #define tunnels_wc	tunnels[0]
1601da177e4SLinus Torvalds 
1611da177e4SLinus Torvalds static DEFINE_RWLOCK(ipgre_lock);
1621da177e4SLinus Torvalds 
1631da177e4SLinus Torvalds /* Given src, dst and key, find appropriate for input tunnel. */
1641da177e4SLinus Torvalds 
165f57e7d5aSPavel Emelyanov static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
166f57e7d5aSPavel Emelyanov 		__be32 remote, __be32 local, __be32 key)
1671da177e4SLinus Torvalds {
1681da177e4SLinus Torvalds 	unsigned h0 = HASH(remote);
1691da177e4SLinus Torvalds 	unsigned h1 = HASH(key);
1701da177e4SLinus Torvalds 	struct ip_tunnel *t;
1717daa0004SPavel Emelyanov 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1721da177e4SLinus Torvalds 
173eb8ce741SPavel Emelyanov 	for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
1741da177e4SLinus Torvalds 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
1751da177e4SLinus Torvalds 			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
1761da177e4SLinus Torvalds 				return t;
1771da177e4SLinus Torvalds 		}
1781da177e4SLinus Torvalds 	}
179eb8ce741SPavel Emelyanov 	for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
1801da177e4SLinus Torvalds 		if (remote == t->parms.iph.daddr) {
1811da177e4SLinus Torvalds 			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
1821da177e4SLinus Torvalds 				return t;
1831da177e4SLinus Torvalds 		}
1841da177e4SLinus Torvalds 	}
185eb8ce741SPavel Emelyanov 	for (t = ign->tunnels_l[h1]; t; t = t->next) {
1861da177e4SLinus Torvalds 		if (local == t->parms.iph.saddr ||
187f97c1e0cSJoe Perches 		     (local == t->parms.iph.daddr &&
188f97c1e0cSJoe Perches 		      ipv4_is_multicast(local))) {
1891da177e4SLinus Torvalds 			if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
1901da177e4SLinus Torvalds 				return t;
1911da177e4SLinus Torvalds 		}
1921da177e4SLinus Torvalds 	}
193eb8ce741SPavel Emelyanov 	for (t = ign->tunnels_wc[h1]; t; t = t->next) {
1941da177e4SLinus Torvalds 		if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
1951da177e4SLinus Torvalds 			return t;
1961da177e4SLinus Torvalds 	}
1971da177e4SLinus Torvalds 
1987daa0004SPavel Emelyanov 	if (ign->fb_tunnel_dev->flags&IFF_UP)
1997daa0004SPavel Emelyanov 		return netdev_priv(ign->fb_tunnel_dev);
2001da177e4SLinus Torvalds 	return NULL;
2011da177e4SLinus Torvalds }
2021da177e4SLinus Torvalds 
203f57e7d5aSPavel Emelyanov static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
204f57e7d5aSPavel Emelyanov 		struct ip_tunnel_parm *parms)
2051da177e4SLinus Torvalds {
2065056a1efSYOSHIFUJI Hideaki 	__be32 remote = parms->iph.daddr;
2075056a1efSYOSHIFUJI Hideaki 	__be32 local = parms->iph.saddr;
2085056a1efSYOSHIFUJI Hideaki 	__be32 key = parms->i_key;
2091da177e4SLinus Torvalds 	unsigned h = HASH(key);
2101da177e4SLinus Torvalds 	int prio = 0;
2111da177e4SLinus Torvalds 
2121da177e4SLinus Torvalds 	if (local)
2131da177e4SLinus Torvalds 		prio |= 1;
214f97c1e0cSJoe Perches 	if (remote && !ipv4_is_multicast(remote)) {
2151da177e4SLinus Torvalds 		prio |= 2;
2161da177e4SLinus Torvalds 		h ^= HASH(remote);
2171da177e4SLinus Torvalds 	}
2181da177e4SLinus Torvalds 
219eb8ce741SPavel Emelyanov 	return &ign->tunnels[prio][h];
2201da177e4SLinus Torvalds }
2211da177e4SLinus Torvalds 
222f57e7d5aSPavel Emelyanov static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
223f57e7d5aSPavel Emelyanov 		struct ip_tunnel *t)
2245056a1efSYOSHIFUJI Hideaki {
225f57e7d5aSPavel Emelyanov 	return __ipgre_bucket(ign, &t->parms);
2265056a1efSYOSHIFUJI Hideaki }
2275056a1efSYOSHIFUJI Hideaki 
228f57e7d5aSPavel Emelyanov static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
2291da177e4SLinus Torvalds {
230f57e7d5aSPavel Emelyanov 	struct ip_tunnel **tp = ipgre_bucket(ign, t);
2311da177e4SLinus Torvalds 
2321da177e4SLinus Torvalds 	t->next = *tp;
2331da177e4SLinus Torvalds 	write_lock_bh(&ipgre_lock);
2341da177e4SLinus Torvalds 	*tp = t;
2351da177e4SLinus Torvalds 	write_unlock_bh(&ipgre_lock);
2361da177e4SLinus Torvalds }
2371da177e4SLinus Torvalds 
238f57e7d5aSPavel Emelyanov static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
2391da177e4SLinus Torvalds {
2401da177e4SLinus Torvalds 	struct ip_tunnel **tp;
2411da177e4SLinus Torvalds 
242f57e7d5aSPavel Emelyanov 	for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
2431da177e4SLinus Torvalds 		if (t == *tp) {
2441da177e4SLinus Torvalds 			write_lock_bh(&ipgre_lock);
2451da177e4SLinus Torvalds 			*tp = t->next;
2461da177e4SLinus Torvalds 			write_unlock_bh(&ipgre_lock);
2471da177e4SLinus Torvalds 			break;
2481da177e4SLinus Torvalds 		}
2491da177e4SLinus Torvalds 	}
2501da177e4SLinus Torvalds }
2511da177e4SLinus Torvalds 
252f57e7d5aSPavel Emelyanov static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
253f57e7d5aSPavel Emelyanov 		struct ip_tunnel_parm *parms, int create)
2541da177e4SLinus Torvalds {
255d5a0a1e3SAl Viro 	__be32 remote = parms->iph.daddr;
256d5a0a1e3SAl Viro 	__be32 local = parms->iph.saddr;
257d5a0a1e3SAl Viro 	__be32 key = parms->i_key;
2581da177e4SLinus Torvalds 	struct ip_tunnel *t, **tp, *nt;
2591da177e4SLinus Torvalds 	struct net_device *dev;
2601da177e4SLinus Torvalds 	char name[IFNAMSIZ];
261f57e7d5aSPavel Emelyanov 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
2621da177e4SLinus Torvalds 
263f57e7d5aSPavel Emelyanov 	for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
2641da177e4SLinus Torvalds 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
2651da177e4SLinus Torvalds 			if (key == t->parms.i_key)
2661da177e4SLinus Torvalds 				return t;
2671da177e4SLinus Torvalds 		}
2681da177e4SLinus Torvalds 	}
2691da177e4SLinus Torvalds 	if (!create)
2701da177e4SLinus Torvalds 		return NULL;
2711da177e4SLinus Torvalds 
2721da177e4SLinus Torvalds 	if (parms->name[0])
2731da177e4SLinus Torvalds 		strlcpy(name, parms->name, IFNAMSIZ);
27434cc7ba6SPavel Emelyanov 	else
27534cc7ba6SPavel Emelyanov 		sprintf(name, "gre%%d");
2761da177e4SLinus Torvalds 
2771da177e4SLinus Torvalds 	dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
2781da177e4SLinus Torvalds 	if (!dev)
2791da177e4SLinus Torvalds 	  return NULL;
2801da177e4SLinus Torvalds 
2810b67ecebSPavel Emelyanov 	dev_net_set(dev, net);
2820b67ecebSPavel Emelyanov 
283b37d428bSPavel Emelyanov 	if (strchr(name, '%')) {
284b37d428bSPavel Emelyanov 		if (dev_alloc_name(dev, name) < 0)
285b37d428bSPavel Emelyanov 			goto failed_free;
286b37d428bSPavel Emelyanov 	}
287b37d428bSPavel Emelyanov 
2881da177e4SLinus Torvalds 	dev->init = ipgre_tunnel_init;
2892941a486SPatrick McHardy 	nt = netdev_priv(dev);
2901da177e4SLinus Torvalds 	nt->parms = *parms;
2911da177e4SLinus Torvalds 
292b37d428bSPavel Emelyanov 	if (register_netdevice(dev) < 0)
293b37d428bSPavel Emelyanov 		goto failed_free;
2941da177e4SLinus Torvalds 
2951da177e4SLinus Torvalds 	dev_hold(dev);
296f57e7d5aSPavel Emelyanov 	ipgre_tunnel_link(ign, nt);
2971da177e4SLinus Torvalds 	return nt;
2981da177e4SLinus Torvalds 
299b37d428bSPavel Emelyanov failed_free:
300b37d428bSPavel Emelyanov 	free_netdev(dev);
3011da177e4SLinus Torvalds 	return NULL;
3021da177e4SLinus Torvalds }
3031da177e4SLinus Torvalds 
3041da177e4SLinus Torvalds static void ipgre_tunnel_uninit(struct net_device *dev)
3051da177e4SLinus Torvalds {
306f57e7d5aSPavel Emelyanov 	struct net *net = dev_net(dev);
307f57e7d5aSPavel Emelyanov 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
308f57e7d5aSPavel Emelyanov 
309f57e7d5aSPavel Emelyanov 	ipgre_tunnel_unlink(ign, netdev_priv(dev));
3101da177e4SLinus Torvalds 	dev_put(dev);
3111da177e4SLinus Torvalds }
3121da177e4SLinus Torvalds 
3131da177e4SLinus Torvalds 
3141da177e4SLinus Torvalds static void ipgre_err(struct sk_buff *skb, u32 info)
3151da177e4SLinus Torvalds {
3161da177e4SLinus Torvalds 
317071f92d0SRami Rosen /* All the routers (except for Linux) return only
3181da177e4SLinus Torvalds    8 bytes of packet payload. It means, that precise relaying of
3191da177e4SLinus Torvalds    ICMP in the real Internet is absolutely infeasible.
3201da177e4SLinus Torvalds 
3211da177e4SLinus Torvalds    Moreover, Cisco "wise men" put GRE key to the third word
3221da177e4SLinus Torvalds    in GRE header. It makes impossible maintaining even soft state for keyed
3231da177e4SLinus Torvalds    GRE tunnels with enabled checksum. Tell them "thank you".
3241da177e4SLinus Torvalds 
3251da177e4SLinus Torvalds    Well, I wonder, rfc1812 was written by Cisco employee,
3261da177e4SLinus Torvalds    what the hell these idiots break standrads established
3271da177e4SLinus Torvalds    by themself???
3281da177e4SLinus Torvalds  */
3291da177e4SLinus Torvalds 
3301da177e4SLinus Torvalds 	struct iphdr *iph = (struct iphdr*)skb->data;
331d5a0a1e3SAl Viro 	__be16	     *p = (__be16*)(skb->data+(iph->ihl<<2));
3321da177e4SLinus Torvalds 	int grehlen = (iph->ihl<<2) + 4;
33388c7664fSArnaldo Carvalho de Melo 	const int type = icmp_hdr(skb)->type;
33488c7664fSArnaldo Carvalho de Melo 	const int code = icmp_hdr(skb)->code;
3351da177e4SLinus Torvalds 	struct ip_tunnel *t;
336d5a0a1e3SAl Viro 	__be16 flags;
3371da177e4SLinus Torvalds 
3381da177e4SLinus Torvalds 	flags = p[0];
3391da177e4SLinus Torvalds 	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
3401da177e4SLinus Torvalds 		if (flags&(GRE_VERSION|GRE_ROUTING))
3411da177e4SLinus Torvalds 			return;
3421da177e4SLinus Torvalds 		if (flags&GRE_KEY) {
3431da177e4SLinus Torvalds 			grehlen += 4;
3441da177e4SLinus Torvalds 			if (flags&GRE_CSUM)
3451da177e4SLinus Torvalds 				grehlen += 4;
3461da177e4SLinus Torvalds 		}
3471da177e4SLinus Torvalds 	}
3481da177e4SLinus Torvalds 
3491da177e4SLinus Torvalds 	/* If only 8 bytes returned, keyed message will be dropped here */
3501da177e4SLinus Torvalds 	if (skb_headlen(skb) < grehlen)
3511da177e4SLinus Torvalds 		return;
3521da177e4SLinus Torvalds 
3531da177e4SLinus Torvalds 	switch (type) {
3541da177e4SLinus Torvalds 	default:
3551da177e4SLinus Torvalds 	case ICMP_PARAMETERPROB:
3561da177e4SLinus Torvalds 		return;
3571da177e4SLinus Torvalds 
3581da177e4SLinus Torvalds 	case ICMP_DEST_UNREACH:
3591da177e4SLinus Torvalds 		switch (code) {
3601da177e4SLinus Torvalds 		case ICMP_SR_FAILED:
3611da177e4SLinus Torvalds 		case ICMP_PORT_UNREACH:
3621da177e4SLinus Torvalds 			/* Impossible event. */
3631da177e4SLinus Torvalds 			return;
3641da177e4SLinus Torvalds 		case ICMP_FRAG_NEEDED:
3651da177e4SLinus Torvalds 			/* Soft state for pmtu is maintained by IP core. */
3661da177e4SLinus Torvalds 			return;
3671da177e4SLinus Torvalds 		default:
3681da177e4SLinus Torvalds 			/* All others are translated to HOST_UNREACH.
3691da177e4SLinus Torvalds 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
3701da177e4SLinus Torvalds 			   I believe they are just ether pollution. --ANK
3711da177e4SLinus Torvalds 			 */
3721da177e4SLinus Torvalds 			break;
3731da177e4SLinus Torvalds 		}
3741da177e4SLinus Torvalds 		break;
3751da177e4SLinus Torvalds 	case ICMP_TIME_EXCEEDED:
3761da177e4SLinus Torvalds 		if (code != ICMP_EXC_TTL)
3771da177e4SLinus Torvalds 			return;
3781da177e4SLinus Torvalds 		break;
3791da177e4SLinus Torvalds 	}
3801da177e4SLinus Torvalds 
3811da177e4SLinus Torvalds 	read_lock(&ipgre_lock);
3823b4667f3SPavel Emelyanov 	t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
383f57e7d5aSPavel Emelyanov 			(flags&GRE_KEY) ?
384f57e7d5aSPavel Emelyanov 			*(((__be32*)p) + (grehlen>>2) - 1) : 0);
385f97c1e0cSJoe Perches 	if (t == NULL || t->parms.iph.daddr == 0 ||
386f97c1e0cSJoe Perches 	    ipv4_is_multicast(t->parms.iph.daddr))
3871da177e4SLinus Torvalds 		goto out;
3881da177e4SLinus Torvalds 
3891da177e4SLinus Torvalds 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
3901da177e4SLinus Torvalds 		goto out;
3911da177e4SLinus Torvalds 
3921da177e4SLinus Torvalds 	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
3931da177e4SLinus Torvalds 		t->err_count++;
3941da177e4SLinus Torvalds 	else
3951da177e4SLinus Torvalds 		t->err_count = 1;
3961da177e4SLinus Torvalds 	t->err_time = jiffies;
3971da177e4SLinus Torvalds out:
3981da177e4SLinus Torvalds 	read_unlock(&ipgre_lock);
3991da177e4SLinus Torvalds 	return;
4001da177e4SLinus Torvalds }
4011da177e4SLinus Torvalds 
4021da177e4SLinus Torvalds static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
4031da177e4SLinus Torvalds {
4041da177e4SLinus Torvalds 	if (INET_ECN_is_ce(iph->tos)) {
4051da177e4SLinus Torvalds 		if (skb->protocol == htons(ETH_P_IP)) {
406eddc9ec5SArnaldo Carvalho de Melo 			IP_ECN_set_ce(ip_hdr(skb));
4071da177e4SLinus Torvalds 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
4080660e03fSArnaldo Carvalho de Melo 			IP6_ECN_set_ce(ipv6_hdr(skb));
4091da177e4SLinus Torvalds 		}
4101da177e4SLinus Torvalds 	}
4111da177e4SLinus Torvalds }
4121da177e4SLinus Torvalds 
4131da177e4SLinus Torvalds static inline u8
4141da177e4SLinus Torvalds ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
4151da177e4SLinus Torvalds {
4161da177e4SLinus Torvalds 	u8 inner = 0;
4171da177e4SLinus Torvalds 	if (skb->protocol == htons(ETH_P_IP))
4181da177e4SLinus Torvalds 		inner = old_iph->tos;
4191da177e4SLinus Torvalds 	else if (skb->protocol == htons(ETH_P_IPV6))
4201da177e4SLinus Torvalds 		inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
4211da177e4SLinus Torvalds 	return INET_ECN_encapsulate(tos, inner);
4221da177e4SLinus Torvalds }
4231da177e4SLinus Torvalds 
4241da177e4SLinus Torvalds static int ipgre_rcv(struct sk_buff *skb)
4251da177e4SLinus Torvalds {
4261da177e4SLinus Torvalds 	struct iphdr *iph;
4271da177e4SLinus Torvalds 	u8     *h;
428d5a0a1e3SAl Viro 	__be16    flags;
429d3bc23e7SAl Viro 	__sum16   csum = 0;
430d5a0a1e3SAl Viro 	__be32 key = 0;
4311da177e4SLinus Torvalds 	u32    seqno = 0;
4321da177e4SLinus Torvalds 	struct ip_tunnel *tunnel;
4331da177e4SLinus Torvalds 	int    offset = 4;
4341da177e4SLinus Torvalds 
4351da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, 16))
4361da177e4SLinus Torvalds 		goto drop_nolock;
4371da177e4SLinus Torvalds 
438eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
4391da177e4SLinus Torvalds 	h = skb->data;
440d5a0a1e3SAl Viro 	flags = *(__be16*)h;
4411da177e4SLinus Torvalds 
4421da177e4SLinus Torvalds 	if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
4431da177e4SLinus Torvalds 		/* - Version must be 0.
4441da177e4SLinus Torvalds 		   - We do not support routing headers.
4451da177e4SLinus Torvalds 		 */
4461da177e4SLinus Torvalds 		if (flags&(GRE_VERSION|GRE_ROUTING))
4471da177e4SLinus Torvalds 			goto drop_nolock;
4481da177e4SLinus Torvalds 
4491da177e4SLinus Torvalds 		if (flags&GRE_CSUM) {
450fb286bb2SHerbert Xu 			switch (skb->ip_summed) {
45184fa7933SPatrick McHardy 			case CHECKSUM_COMPLETE:
452d3bc23e7SAl Viro 				csum = csum_fold(skb->csum);
453fb286bb2SHerbert Xu 				if (!csum)
454fb286bb2SHerbert Xu 					break;
455fb286bb2SHerbert Xu 				/* fall through */
456fb286bb2SHerbert Xu 			case CHECKSUM_NONE:
457fb286bb2SHerbert Xu 				skb->csum = 0;
458fb286bb2SHerbert Xu 				csum = __skb_checksum_complete(skb);
45984fa7933SPatrick McHardy 				skb->ip_summed = CHECKSUM_COMPLETE;
4601da177e4SLinus Torvalds 			}
4611da177e4SLinus Torvalds 			offset += 4;
4621da177e4SLinus Torvalds 		}
4631da177e4SLinus Torvalds 		if (flags&GRE_KEY) {
464d5a0a1e3SAl Viro 			key = *(__be32*)(h + offset);
4651da177e4SLinus Torvalds 			offset += 4;
4661da177e4SLinus Torvalds 		}
4671da177e4SLinus Torvalds 		if (flags&GRE_SEQ) {
468d5a0a1e3SAl Viro 			seqno = ntohl(*(__be32*)(h + offset));
4691da177e4SLinus Torvalds 			offset += 4;
4701da177e4SLinus Torvalds 		}
4711da177e4SLinus Torvalds 	}
4721da177e4SLinus Torvalds 
4731da177e4SLinus Torvalds 	read_lock(&ipgre_lock);
4743b4667f3SPavel Emelyanov 	if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
475f57e7d5aSPavel Emelyanov 					iph->saddr, iph->daddr, key)) != NULL) {
476addd68ebSPavel Emelyanov 		struct net_device_stats *stats = &tunnel->dev->stats;
477addd68ebSPavel Emelyanov 
4781da177e4SLinus Torvalds 		secpath_reset(skb);
4791da177e4SLinus Torvalds 
480d5a0a1e3SAl Viro 		skb->protocol = *(__be16*)(h + 2);
4811da177e4SLinus Torvalds 		/* WCCP version 1 and 2 protocol decoding.
4821da177e4SLinus Torvalds 		 * - Change protocol to IP
4831da177e4SLinus Torvalds 		 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
4841da177e4SLinus Torvalds 		 */
4851da177e4SLinus Torvalds 		if (flags == 0 &&
486496c98dfSYOSHIFUJI Hideaki 		    skb->protocol == htons(ETH_P_WCCP)) {
487496c98dfSYOSHIFUJI Hideaki 			skb->protocol = htons(ETH_P_IP);
4881da177e4SLinus Torvalds 			if ((*(h + offset) & 0xF0) != 0x40)
4891da177e4SLinus Torvalds 				offset += 4;
4901da177e4SLinus Torvalds 		}
4911da177e4SLinus Torvalds 
4921d069167STimo Teras 		skb->mac_header = skb->network_header;
4934209fb60SArnaldo Carvalho de Melo 		__pskb_pull(skb, offset);
4944209fb60SArnaldo Carvalho de Melo 		skb_reset_network_header(skb);
4959c70220bSArnaldo Carvalho de Melo 		skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
4961da177e4SLinus Torvalds 		skb->pkt_type = PACKET_HOST;
4971da177e4SLinus Torvalds #ifdef CONFIG_NET_IPGRE_BROADCAST
498f97c1e0cSJoe Perches 		if (ipv4_is_multicast(iph->daddr)) {
4991da177e4SLinus Torvalds 			/* Looped back packet, drop it! */
500ee6b9673SEric Dumazet 			if (skb->rtable->fl.iif == 0)
5011da177e4SLinus Torvalds 				goto drop;
502addd68ebSPavel Emelyanov 			stats->multicast++;
5031da177e4SLinus Torvalds 			skb->pkt_type = PACKET_BROADCAST;
5041da177e4SLinus Torvalds 		}
5051da177e4SLinus Torvalds #endif
5061da177e4SLinus Torvalds 
5071da177e4SLinus Torvalds 		if (((flags&GRE_CSUM) && csum) ||
5081da177e4SLinus Torvalds 		    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
509addd68ebSPavel Emelyanov 			stats->rx_crc_errors++;
510addd68ebSPavel Emelyanov 			stats->rx_errors++;
5111da177e4SLinus Torvalds 			goto drop;
5121da177e4SLinus Torvalds 		}
5131da177e4SLinus Torvalds 		if (tunnel->parms.i_flags&GRE_SEQ) {
5141da177e4SLinus Torvalds 			if (!(flags&GRE_SEQ) ||
5151da177e4SLinus Torvalds 			    (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
516addd68ebSPavel Emelyanov 				stats->rx_fifo_errors++;
517addd68ebSPavel Emelyanov 				stats->rx_errors++;
5181da177e4SLinus Torvalds 				goto drop;
5191da177e4SLinus Torvalds 			}
5201da177e4SLinus Torvalds 			tunnel->i_seqno = seqno + 1;
5211da177e4SLinus Torvalds 		}
522addd68ebSPavel Emelyanov 		stats->rx_packets++;
523addd68ebSPavel Emelyanov 		stats->rx_bytes += skb->len;
5241da177e4SLinus Torvalds 		skb->dev = tunnel->dev;
5251da177e4SLinus Torvalds 		dst_release(skb->dst);
5261da177e4SLinus Torvalds 		skb->dst = NULL;
5271da177e4SLinus Torvalds 		nf_reset(skb);
5281da177e4SLinus Torvalds 		ipgre_ecn_decapsulate(iph, skb);
5291da177e4SLinus Torvalds 		netif_rx(skb);
5301da177e4SLinus Torvalds 		read_unlock(&ipgre_lock);
5311da177e4SLinus Torvalds 		return(0);
5321da177e4SLinus Torvalds 	}
53345af08beSHerbert Xu 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
5341da177e4SLinus Torvalds 
5351da177e4SLinus Torvalds drop:
5361da177e4SLinus Torvalds 	read_unlock(&ipgre_lock);
5371da177e4SLinus Torvalds drop_nolock:
5381da177e4SLinus Torvalds 	kfree_skb(skb);
5391da177e4SLinus Torvalds 	return(0);
5401da177e4SLinus Torvalds }
5411da177e4SLinus Torvalds 
5421da177e4SLinus Torvalds static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
5431da177e4SLinus Torvalds {
5442941a486SPatrick McHardy 	struct ip_tunnel *tunnel = netdev_priv(dev);
545addd68ebSPavel Emelyanov 	struct net_device_stats *stats = &tunnel->dev->stats;
546eddc9ec5SArnaldo Carvalho de Melo 	struct iphdr  *old_iph = ip_hdr(skb);
5471da177e4SLinus Torvalds 	struct iphdr  *tiph;
5481da177e4SLinus Torvalds 	u8     tos;
549d5a0a1e3SAl Viro 	__be16 df;
5501da177e4SLinus Torvalds 	struct rtable *rt;     			/* Route to the other host */
5511da177e4SLinus Torvalds 	struct net_device *tdev;			/* Device to other host */
5521da177e4SLinus Torvalds 	struct iphdr  *iph;			/* Our new IP header */
553c2636b4dSChuck Lever 	unsigned int max_headroom;		/* The extra header space needed */
5541da177e4SLinus Torvalds 	int    gre_hlen;
555d5a0a1e3SAl Viro 	__be32 dst;
5561da177e4SLinus Torvalds 	int    mtu;
5571da177e4SLinus Torvalds 
5581da177e4SLinus Torvalds 	if (tunnel->recursion++) {
559addd68ebSPavel Emelyanov 		stats->collisions++;
5601da177e4SLinus Torvalds 		goto tx_error;
5611da177e4SLinus Torvalds 	}
5621da177e4SLinus Torvalds 
5633b04dddeSStephen Hemminger 	if (dev->header_ops) {
5641da177e4SLinus Torvalds 		gre_hlen = 0;
5651da177e4SLinus Torvalds 		tiph = (struct iphdr*)skb->data;
5661da177e4SLinus Torvalds 	} else {
5671da177e4SLinus Torvalds 		gre_hlen = tunnel->hlen;
5681da177e4SLinus Torvalds 		tiph = &tunnel->parms.iph;
5691da177e4SLinus Torvalds 	}
5701da177e4SLinus Torvalds 
5711da177e4SLinus Torvalds 	if ((dst = tiph->daddr) == 0) {
5721da177e4SLinus Torvalds 		/* NBMA tunnel */
5731da177e4SLinus Torvalds 
5741da177e4SLinus Torvalds 		if (skb->dst == NULL) {
575addd68ebSPavel Emelyanov 			stats->tx_fifo_errors++;
5761da177e4SLinus Torvalds 			goto tx_error;
5771da177e4SLinus Torvalds 		}
5781da177e4SLinus Torvalds 
5791da177e4SLinus Torvalds 		if (skb->protocol == htons(ETH_P_IP)) {
580ee6b9673SEric Dumazet 			rt = skb->rtable;
5811da177e4SLinus Torvalds 			if ((dst = rt->rt_gateway) == 0)
5821da177e4SLinus Torvalds 				goto tx_error_icmp;
5831da177e4SLinus Torvalds 		}
5841da177e4SLinus Torvalds #ifdef CONFIG_IPV6
5851da177e4SLinus Torvalds 		else if (skb->protocol == htons(ETH_P_IPV6)) {
5861da177e4SLinus Torvalds 			struct in6_addr *addr6;
5871da177e4SLinus Torvalds 			int addr_type;
5881da177e4SLinus Torvalds 			struct neighbour *neigh = skb->dst->neighbour;
5891da177e4SLinus Torvalds 
5901da177e4SLinus Torvalds 			if (neigh == NULL)
5911da177e4SLinus Torvalds 				goto tx_error;
5921da177e4SLinus Torvalds 
5931da177e4SLinus Torvalds 			addr6 = (struct in6_addr*)&neigh->primary_key;
5941da177e4SLinus Torvalds 			addr_type = ipv6_addr_type(addr6);
5951da177e4SLinus Torvalds 
5961da177e4SLinus Torvalds 			if (addr_type == IPV6_ADDR_ANY) {
5970660e03fSArnaldo Carvalho de Melo 				addr6 = &ipv6_hdr(skb)->daddr;
5981da177e4SLinus Torvalds 				addr_type = ipv6_addr_type(addr6);
5991da177e4SLinus Torvalds 			}
6001da177e4SLinus Torvalds 
6011da177e4SLinus Torvalds 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
6021da177e4SLinus Torvalds 				goto tx_error_icmp;
6031da177e4SLinus Torvalds 
6041da177e4SLinus Torvalds 			dst = addr6->s6_addr32[3];
6051da177e4SLinus Torvalds 		}
6061da177e4SLinus Torvalds #endif
6071da177e4SLinus Torvalds 		else
6081da177e4SLinus Torvalds 			goto tx_error;
6091da177e4SLinus Torvalds 	}
6101da177e4SLinus Torvalds 
6111da177e4SLinus Torvalds 	tos = tiph->tos;
6121da177e4SLinus Torvalds 	if (tos&1) {
6131da177e4SLinus Torvalds 		if (skb->protocol == htons(ETH_P_IP))
6141da177e4SLinus Torvalds 			tos = old_iph->tos;
6151da177e4SLinus Torvalds 		tos &= ~1;
6161da177e4SLinus Torvalds 	}
6171da177e4SLinus Torvalds 
6181da177e4SLinus Torvalds 	{
6191da177e4SLinus Torvalds 		struct flowi fl = { .oif = tunnel->parms.link,
6201da177e4SLinus Torvalds 				    .nl_u = { .ip4_u =
6211da177e4SLinus Torvalds 					      { .daddr = dst,
6221da177e4SLinus Torvalds 						.saddr = tiph->saddr,
6231da177e4SLinus Torvalds 						.tos = RT_TOS(tos) } },
6241da177e4SLinus Torvalds 				    .proto = IPPROTO_GRE };
62596635522SPavel Emelyanov 		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
626addd68ebSPavel Emelyanov 			stats->tx_carrier_errors++;
6271da177e4SLinus Torvalds 			goto tx_error;
6281da177e4SLinus Torvalds 		}
6291da177e4SLinus Torvalds 	}
6301da177e4SLinus Torvalds 	tdev = rt->u.dst.dev;
6311da177e4SLinus Torvalds 
6321da177e4SLinus Torvalds 	if (tdev == dev) {
6331da177e4SLinus Torvalds 		ip_rt_put(rt);
634addd68ebSPavel Emelyanov 		stats->collisions++;
6351da177e4SLinus Torvalds 		goto tx_error;
6361da177e4SLinus Torvalds 	}
6371da177e4SLinus Torvalds 
6381da177e4SLinus Torvalds 	df = tiph->frag_off;
6391da177e4SLinus Torvalds 	if (df)
640*c95b819aSHerbert Xu 		mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
6411da177e4SLinus Torvalds 	else
6421da177e4SLinus Torvalds 		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
6431da177e4SLinus Torvalds 
6441da177e4SLinus Torvalds 	if (skb->dst)
6451da177e4SLinus Torvalds 		skb->dst->ops->update_pmtu(skb->dst, mtu);
6461da177e4SLinus Torvalds 
6471da177e4SLinus Torvalds 	if (skb->protocol == htons(ETH_P_IP)) {
6481da177e4SLinus Torvalds 		df |= (old_iph->frag_off&htons(IP_DF));
6491da177e4SLinus Torvalds 
6501da177e4SLinus Torvalds 		if ((old_iph->frag_off&htons(IP_DF)) &&
6511da177e4SLinus Torvalds 		    mtu < ntohs(old_iph->tot_len)) {
6521da177e4SLinus Torvalds 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
6531da177e4SLinus Torvalds 			ip_rt_put(rt);
6541da177e4SLinus Torvalds 			goto tx_error;
6551da177e4SLinus Torvalds 		}
6561da177e4SLinus Torvalds 	}
6571da177e4SLinus Torvalds #ifdef CONFIG_IPV6
6581da177e4SLinus Torvalds 	else if (skb->protocol == htons(ETH_P_IPV6)) {
6591da177e4SLinus Torvalds 		struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
6601da177e4SLinus Torvalds 
6611da177e4SLinus Torvalds 		if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
662f97c1e0cSJoe Perches 			if ((tunnel->parms.iph.daddr &&
663f97c1e0cSJoe Perches 			     !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
6641da177e4SLinus Torvalds 			    rt6->rt6i_dst.plen == 128) {
6651da177e4SLinus Torvalds 				rt6->rt6i_flags |= RTF_MODIFIED;
6661da177e4SLinus Torvalds 				skb->dst->metrics[RTAX_MTU-1] = mtu;
6671da177e4SLinus Torvalds 			}
6681da177e4SLinus Torvalds 		}
6691da177e4SLinus Torvalds 
6701da177e4SLinus Torvalds 		if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
6711da177e4SLinus Torvalds 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
6721da177e4SLinus Torvalds 			ip_rt_put(rt);
6731da177e4SLinus Torvalds 			goto tx_error;
6741da177e4SLinus Torvalds 		}
6751da177e4SLinus Torvalds 	}
6761da177e4SLinus Torvalds #endif
6771da177e4SLinus Torvalds 
6781da177e4SLinus Torvalds 	if (tunnel->err_count > 0) {
6791da177e4SLinus Torvalds 		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
6801da177e4SLinus Torvalds 			tunnel->err_count--;
6811da177e4SLinus Torvalds 
6821da177e4SLinus Torvalds 			dst_link_failure(skb);
6831da177e4SLinus Torvalds 		} else
6841da177e4SLinus Torvalds 			tunnel->err_count = 0;
6851da177e4SLinus Torvalds 	}
6861da177e4SLinus Torvalds 
6871da177e4SLinus Torvalds 	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
6881da177e4SLinus Torvalds 
689cfbba49dSPatrick McHardy 	if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
690cfbba49dSPatrick McHardy 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
6911da177e4SLinus Torvalds 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
6921da177e4SLinus Torvalds 		if (!new_skb) {
6931da177e4SLinus Torvalds 			ip_rt_put(rt);
6941da177e4SLinus Torvalds 			stats->tx_dropped++;
6951da177e4SLinus Torvalds 			dev_kfree_skb(skb);
6961da177e4SLinus Torvalds 			tunnel->recursion--;
6971da177e4SLinus Torvalds 			return 0;
6981da177e4SLinus Torvalds 		}
6991da177e4SLinus Torvalds 		if (skb->sk)
7001da177e4SLinus Torvalds 			skb_set_owner_w(new_skb, skb->sk);
7011da177e4SLinus Torvalds 		dev_kfree_skb(skb);
7021da177e4SLinus Torvalds 		skb = new_skb;
703eddc9ec5SArnaldo Carvalho de Melo 		old_iph = ip_hdr(skb);
7041da177e4SLinus Torvalds 	}
7051da177e4SLinus Torvalds 
706b0e380b1SArnaldo Carvalho de Melo 	skb->transport_header = skb->network_header;
707e2d1bca7SArnaldo Carvalho de Melo 	skb_push(skb, gre_hlen);
708e2d1bca7SArnaldo Carvalho de Melo 	skb_reset_network_header(skb);
7091da177e4SLinus Torvalds 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
71048d5cad8SPatrick McHardy 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
71148d5cad8SPatrick McHardy 			      IPSKB_REROUTED);
7121da177e4SLinus Torvalds 	dst_release(skb->dst);
7131da177e4SLinus Torvalds 	skb->dst = &rt->u.dst;
7141da177e4SLinus Torvalds 
7151da177e4SLinus Torvalds 	/*
7161da177e4SLinus Torvalds 	 *	Push down and install the IPIP header.
7171da177e4SLinus Torvalds 	 */
7181da177e4SLinus Torvalds 
719eddc9ec5SArnaldo Carvalho de Melo 	iph 			=	ip_hdr(skb);
7201da177e4SLinus Torvalds 	iph->version		=	4;
7211da177e4SLinus Torvalds 	iph->ihl		=	sizeof(struct iphdr) >> 2;
7221da177e4SLinus Torvalds 	iph->frag_off		=	df;
7231da177e4SLinus Torvalds 	iph->protocol		=	IPPROTO_GRE;
7241da177e4SLinus Torvalds 	iph->tos		=	ipgre_ecn_encapsulate(tos, old_iph, skb);
7251da177e4SLinus Torvalds 	iph->daddr		=	rt->rt_dst;
7261da177e4SLinus Torvalds 	iph->saddr		=	rt->rt_src;
7271da177e4SLinus Torvalds 
7281da177e4SLinus Torvalds 	if ((iph->ttl = tiph->ttl) == 0) {
7291da177e4SLinus Torvalds 		if (skb->protocol == htons(ETH_P_IP))
7301da177e4SLinus Torvalds 			iph->ttl = old_iph->ttl;
7311da177e4SLinus Torvalds #ifdef CONFIG_IPV6
7321da177e4SLinus Torvalds 		else if (skb->protocol == htons(ETH_P_IPV6))
7331da177e4SLinus Torvalds 			iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
7341da177e4SLinus Torvalds #endif
7351da177e4SLinus Torvalds 		else
7361da177e4SLinus Torvalds 			iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
7371da177e4SLinus Torvalds 	}
7381da177e4SLinus Torvalds 
739d5a0a1e3SAl Viro 	((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
740d5a0a1e3SAl Viro 	((__be16*)(iph+1))[1] = skb->protocol;
7411da177e4SLinus Torvalds 
7421da177e4SLinus Torvalds 	if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
743d5a0a1e3SAl Viro 		__be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
7441da177e4SLinus Torvalds 
7451da177e4SLinus Torvalds 		if (tunnel->parms.o_flags&GRE_SEQ) {
7461da177e4SLinus Torvalds 			++tunnel->o_seqno;
7471da177e4SLinus Torvalds 			*ptr = htonl(tunnel->o_seqno);
7481da177e4SLinus Torvalds 			ptr--;
7491da177e4SLinus Torvalds 		}
7501da177e4SLinus Torvalds 		if (tunnel->parms.o_flags&GRE_KEY) {
7511da177e4SLinus Torvalds 			*ptr = tunnel->parms.o_key;
7521da177e4SLinus Torvalds 			ptr--;
7531da177e4SLinus Torvalds 		}
7541da177e4SLinus Torvalds 		if (tunnel->parms.o_flags&GRE_CSUM) {
7551da177e4SLinus Torvalds 			*ptr = 0;
7565f92a738SAl Viro 			*(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
7571da177e4SLinus Torvalds 		}
7581da177e4SLinus Torvalds 	}
7591da177e4SLinus Torvalds 
7601da177e4SLinus Torvalds 	nf_reset(skb);
7611da177e4SLinus Torvalds 
7621da177e4SLinus Torvalds 	IPTUNNEL_XMIT();
7631da177e4SLinus Torvalds 	tunnel->recursion--;
7641da177e4SLinus Torvalds 	return 0;
7651da177e4SLinus Torvalds 
7661da177e4SLinus Torvalds tx_error_icmp:
7671da177e4SLinus Torvalds 	dst_link_failure(skb);
7681da177e4SLinus Torvalds 
7691da177e4SLinus Torvalds tx_error:
7701da177e4SLinus Torvalds 	stats->tx_errors++;
7711da177e4SLinus Torvalds 	dev_kfree_skb(skb);
7721da177e4SLinus Torvalds 	tunnel->recursion--;
7731da177e4SLinus Torvalds 	return 0;
7741da177e4SLinus Torvalds }
7751da177e4SLinus Torvalds 
776ee34c1ebSMichal Schmidt static void ipgre_tunnel_bind_dev(struct net_device *dev)
777ee34c1ebSMichal Schmidt {
778ee34c1ebSMichal Schmidt 	struct net_device *tdev = NULL;
779ee34c1ebSMichal Schmidt 	struct ip_tunnel *tunnel;
780ee34c1ebSMichal Schmidt 	struct iphdr *iph;
781ee34c1ebSMichal Schmidt 	int hlen = LL_MAX_HEADER;
782ee34c1ebSMichal Schmidt 	int mtu = ETH_DATA_LEN;
783ee34c1ebSMichal Schmidt 	int addend = sizeof(struct iphdr) + 4;
784ee34c1ebSMichal Schmidt 
785ee34c1ebSMichal Schmidt 	tunnel = netdev_priv(dev);
786ee34c1ebSMichal Schmidt 	iph = &tunnel->parms.iph;
787ee34c1ebSMichal Schmidt 
788*c95b819aSHerbert Xu 	/* Guess output device to choose reasonable mtu and needed_headroom */
789ee34c1ebSMichal Schmidt 
790ee34c1ebSMichal Schmidt 	if (iph->daddr) {
791ee34c1ebSMichal Schmidt 		struct flowi fl = { .oif = tunnel->parms.link,
792ee34c1ebSMichal Schmidt 				    .nl_u = { .ip4_u =
793ee34c1ebSMichal Schmidt 					      { .daddr = iph->daddr,
794ee34c1ebSMichal Schmidt 						.saddr = iph->saddr,
795ee34c1ebSMichal Schmidt 						.tos = RT_TOS(iph->tos) } },
796ee34c1ebSMichal Schmidt 				    .proto = IPPROTO_GRE };
797ee34c1ebSMichal Schmidt 		struct rtable *rt;
79896635522SPavel Emelyanov 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
799ee34c1ebSMichal Schmidt 			tdev = rt->u.dst.dev;
800ee34c1ebSMichal Schmidt 			ip_rt_put(rt);
801ee34c1ebSMichal Schmidt 		}
802ee34c1ebSMichal Schmidt 		dev->flags |= IFF_POINTOPOINT;
803ee34c1ebSMichal Schmidt 	}
804ee34c1ebSMichal Schmidt 
805ee34c1ebSMichal Schmidt 	if (!tdev && tunnel->parms.link)
80696635522SPavel Emelyanov 		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
807ee34c1ebSMichal Schmidt 
808ee34c1ebSMichal Schmidt 	if (tdev) {
809*c95b819aSHerbert Xu 		hlen = tdev->hard_header_len + tdev->needed_headroom;
810ee34c1ebSMichal Schmidt 		mtu = tdev->mtu;
811ee34c1ebSMichal Schmidt 	}
812ee34c1ebSMichal Schmidt 	dev->iflink = tunnel->parms.link;
813ee34c1ebSMichal Schmidt 
814ee34c1ebSMichal Schmidt 	/* Precalculate GRE options length */
815ee34c1ebSMichal Schmidt 	if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
816ee34c1ebSMichal Schmidt 		if (tunnel->parms.o_flags&GRE_CSUM)
817ee34c1ebSMichal Schmidt 			addend += 4;
818ee34c1ebSMichal Schmidt 		if (tunnel->parms.o_flags&GRE_KEY)
819ee34c1ebSMichal Schmidt 			addend += 4;
820ee34c1ebSMichal Schmidt 		if (tunnel->parms.o_flags&GRE_SEQ)
821ee34c1ebSMichal Schmidt 			addend += 4;
822ee34c1ebSMichal Schmidt 	}
823*c95b819aSHerbert Xu 	dev->needed_headroom = addend + hlen;
824*c95b819aSHerbert Xu 	dev->mtu = mtu - dev->hard_header_len - addend;
825ee34c1ebSMichal Schmidt 	tunnel->hlen = addend;
826ee34c1ebSMichal Schmidt 
827ee34c1ebSMichal Schmidt }
828ee34c1ebSMichal Schmidt 
8291da177e4SLinus Torvalds static int
8301da177e4SLinus Torvalds ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
8311da177e4SLinus Torvalds {
8321da177e4SLinus Torvalds 	int err = 0;
8331da177e4SLinus Torvalds 	struct ip_tunnel_parm p;
8341da177e4SLinus Torvalds 	struct ip_tunnel *t;
835f57e7d5aSPavel Emelyanov 	struct net *net = dev_net(dev);
836f57e7d5aSPavel Emelyanov 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
8371da177e4SLinus Torvalds 
8381da177e4SLinus Torvalds 	switch (cmd) {
8391da177e4SLinus Torvalds 	case SIOCGETTUNNEL:
8401da177e4SLinus Torvalds 		t = NULL;
8417daa0004SPavel Emelyanov 		if (dev == ign->fb_tunnel_dev) {
8421da177e4SLinus Torvalds 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
8431da177e4SLinus Torvalds 				err = -EFAULT;
8441da177e4SLinus Torvalds 				break;
8451da177e4SLinus Torvalds 			}
846f57e7d5aSPavel Emelyanov 			t = ipgre_tunnel_locate(net, &p, 0);
8471da177e4SLinus Torvalds 		}
8481da177e4SLinus Torvalds 		if (t == NULL)
8492941a486SPatrick McHardy 			t = netdev_priv(dev);
8501da177e4SLinus Torvalds 		memcpy(&p, &t->parms, sizeof(p));
8511da177e4SLinus Torvalds 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
8521da177e4SLinus Torvalds 			err = -EFAULT;
8531da177e4SLinus Torvalds 		break;
8541da177e4SLinus Torvalds 
8551da177e4SLinus Torvalds 	case SIOCADDTUNNEL:
8561da177e4SLinus Torvalds 	case SIOCCHGTUNNEL:
8571da177e4SLinus Torvalds 		err = -EPERM;
8581da177e4SLinus Torvalds 		if (!capable(CAP_NET_ADMIN))
8591da177e4SLinus Torvalds 			goto done;
8601da177e4SLinus Torvalds 
8611da177e4SLinus Torvalds 		err = -EFAULT;
8621da177e4SLinus Torvalds 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
8631da177e4SLinus Torvalds 			goto done;
8641da177e4SLinus Torvalds 
8651da177e4SLinus Torvalds 		err = -EINVAL;
8661da177e4SLinus Torvalds 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
8671da177e4SLinus Torvalds 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
8681da177e4SLinus Torvalds 		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
8691da177e4SLinus Torvalds 			goto done;
8701da177e4SLinus Torvalds 		if (p.iph.ttl)
8711da177e4SLinus Torvalds 			p.iph.frag_off |= htons(IP_DF);
8721da177e4SLinus Torvalds 
8731da177e4SLinus Torvalds 		if (!(p.i_flags&GRE_KEY))
8741da177e4SLinus Torvalds 			p.i_key = 0;
8751da177e4SLinus Torvalds 		if (!(p.o_flags&GRE_KEY))
8761da177e4SLinus Torvalds 			p.o_key = 0;
8771da177e4SLinus Torvalds 
878f57e7d5aSPavel Emelyanov 		t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
8791da177e4SLinus Torvalds 
8807daa0004SPavel Emelyanov 		if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
8811da177e4SLinus Torvalds 			if (t != NULL) {
8821da177e4SLinus Torvalds 				if (t->dev != dev) {
8831da177e4SLinus Torvalds 					err = -EEXIST;
8841da177e4SLinus Torvalds 					break;
8851da177e4SLinus Torvalds 				}
8861da177e4SLinus Torvalds 			} else {
8871da177e4SLinus Torvalds 				unsigned nflags=0;
8881da177e4SLinus Torvalds 
8892941a486SPatrick McHardy 				t = netdev_priv(dev);
8901da177e4SLinus Torvalds 
891f97c1e0cSJoe Perches 				if (ipv4_is_multicast(p.iph.daddr))
8921da177e4SLinus Torvalds 					nflags = IFF_BROADCAST;
8931da177e4SLinus Torvalds 				else if (p.iph.daddr)
8941da177e4SLinus Torvalds 					nflags = IFF_POINTOPOINT;
8951da177e4SLinus Torvalds 
8961da177e4SLinus Torvalds 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
8971da177e4SLinus Torvalds 					err = -EINVAL;
8981da177e4SLinus Torvalds 					break;
8991da177e4SLinus Torvalds 				}
900f57e7d5aSPavel Emelyanov 				ipgre_tunnel_unlink(ign, t);
9011da177e4SLinus Torvalds 				t->parms.iph.saddr = p.iph.saddr;
9021da177e4SLinus Torvalds 				t->parms.iph.daddr = p.iph.daddr;
9031da177e4SLinus Torvalds 				t->parms.i_key = p.i_key;
9041da177e4SLinus Torvalds 				t->parms.o_key = p.o_key;
9051da177e4SLinus Torvalds 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
9061da177e4SLinus Torvalds 				memcpy(dev->broadcast, &p.iph.daddr, 4);
907f57e7d5aSPavel Emelyanov 				ipgre_tunnel_link(ign, t);
9081da177e4SLinus Torvalds 				netdev_state_change(dev);
9091da177e4SLinus Torvalds 			}
9101da177e4SLinus Torvalds 		}
9111da177e4SLinus Torvalds 
9121da177e4SLinus Torvalds 		if (t) {
9131da177e4SLinus Torvalds 			err = 0;
9141da177e4SLinus Torvalds 			if (cmd == SIOCCHGTUNNEL) {
9151da177e4SLinus Torvalds 				t->parms.iph.ttl = p.iph.ttl;
9161da177e4SLinus Torvalds 				t->parms.iph.tos = p.iph.tos;
9171da177e4SLinus Torvalds 				t->parms.iph.frag_off = p.iph.frag_off;
918ee34c1ebSMichal Schmidt 				if (t->parms.link != p.link) {
919ee34c1ebSMichal Schmidt 					t->parms.link = p.link;
920ee34c1ebSMichal Schmidt 					ipgre_tunnel_bind_dev(dev);
921ee34c1ebSMichal Schmidt 					netdev_state_change(dev);
922ee34c1ebSMichal Schmidt 				}
9231da177e4SLinus Torvalds 			}
9241da177e4SLinus Torvalds 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
9251da177e4SLinus Torvalds 				err = -EFAULT;
9261da177e4SLinus Torvalds 		} else
9271da177e4SLinus Torvalds 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
9281da177e4SLinus Torvalds 		break;
9291da177e4SLinus Torvalds 
9301da177e4SLinus Torvalds 	case SIOCDELTUNNEL:
9311da177e4SLinus Torvalds 		err = -EPERM;
9321da177e4SLinus Torvalds 		if (!capable(CAP_NET_ADMIN))
9331da177e4SLinus Torvalds 			goto done;
9341da177e4SLinus Torvalds 
9357daa0004SPavel Emelyanov 		if (dev == ign->fb_tunnel_dev) {
9361da177e4SLinus Torvalds 			err = -EFAULT;
9371da177e4SLinus Torvalds 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
9381da177e4SLinus Torvalds 				goto done;
9391da177e4SLinus Torvalds 			err = -ENOENT;
940f57e7d5aSPavel Emelyanov 			if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
9411da177e4SLinus Torvalds 				goto done;
9421da177e4SLinus Torvalds 			err = -EPERM;
9437daa0004SPavel Emelyanov 			if (t == netdev_priv(ign->fb_tunnel_dev))
9441da177e4SLinus Torvalds 				goto done;
9451da177e4SLinus Torvalds 			dev = t->dev;
9461da177e4SLinus Torvalds 		}
94722f8cde5SStephen Hemminger 		unregister_netdevice(dev);
94822f8cde5SStephen Hemminger 		err = 0;
9491da177e4SLinus Torvalds 		break;
9501da177e4SLinus Torvalds 
9511da177e4SLinus Torvalds 	default:
9521da177e4SLinus Torvalds 		err = -EINVAL;
9531da177e4SLinus Torvalds 	}
9541da177e4SLinus Torvalds 
9551da177e4SLinus Torvalds done:
9561da177e4SLinus Torvalds 	return err;
9571da177e4SLinus Torvalds }
9581da177e4SLinus Torvalds 
9591da177e4SLinus Torvalds static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
9601da177e4SLinus Torvalds {
9612941a486SPatrick McHardy 	struct ip_tunnel *tunnel = netdev_priv(dev);
962*c95b819aSHerbert Xu 	if (new_mtu < 68 ||
963*c95b819aSHerbert Xu 	    new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
9641da177e4SLinus Torvalds 		return -EINVAL;
9651da177e4SLinus Torvalds 	dev->mtu = new_mtu;
9661da177e4SLinus Torvalds 	return 0;
9671da177e4SLinus Torvalds }
9681da177e4SLinus Torvalds 
9691da177e4SLinus Torvalds /* Nice toy. Unfortunately, useless in real life :-)
9701da177e4SLinus Torvalds    It allows to construct virtual multiprotocol broadcast "LAN"
9711da177e4SLinus Torvalds    over the Internet, provided multicast routing is tuned.
9721da177e4SLinus Torvalds 
9731da177e4SLinus Torvalds 
9741da177e4SLinus Torvalds    I have no idea was this bicycle invented before me,
9751da177e4SLinus Torvalds    so that I had to set ARPHRD_IPGRE to a random value.
9761da177e4SLinus Torvalds    I have an impression, that Cisco could make something similar,
9771da177e4SLinus Torvalds    but this feature is apparently missing in IOS<=11.2(8).
9781da177e4SLinus Torvalds 
9791da177e4SLinus Torvalds    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
9801da177e4SLinus Torvalds    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
9811da177e4SLinus Torvalds 
9821da177e4SLinus Torvalds    ping -t 255 224.66.66.66
9831da177e4SLinus Torvalds 
9841da177e4SLinus Torvalds    If nobody answers, mbone does not work.
9851da177e4SLinus Torvalds 
9861da177e4SLinus Torvalds    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
9871da177e4SLinus Torvalds    ip addr add 10.66.66.<somewhat>/24 dev Universe
9881da177e4SLinus Torvalds    ifconfig Universe up
9891da177e4SLinus Torvalds    ifconfig Universe add fe80::<Your_real_addr>/10
9901da177e4SLinus Torvalds    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
9911da177e4SLinus Torvalds    ftp 10.66.66.66
9921da177e4SLinus Torvalds    ...
9931da177e4SLinus Torvalds    ftp fec0:6666:6666::193.233.7.65
9941da177e4SLinus Torvalds    ...
9951da177e4SLinus Torvalds 
9961da177e4SLinus Torvalds  */
9971da177e4SLinus Torvalds 
9983b04dddeSStephen Hemminger static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
9993b04dddeSStephen Hemminger 			unsigned short type,
10003b04dddeSStephen Hemminger 			const void *daddr, const void *saddr, unsigned len)
10011da177e4SLinus Torvalds {
10022941a486SPatrick McHardy 	struct ip_tunnel *t = netdev_priv(dev);
10031da177e4SLinus Torvalds 	struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1004d5a0a1e3SAl Viro 	__be16 *p = (__be16*)(iph+1);
10051da177e4SLinus Torvalds 
10061da177e4SLinus Torvalds 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
10071da177e4SLinus Torvalds 	p[0]		= t->parms.o_flags;
10081da177e4SLinus Torvalds 	p[1]		= htons(type);
10091da177e4SLinus Torvalds 
10101da177e4SLinus Torvalds 	/*
10111da177e4SLinus Torvalds 	 *	Set the source hardware address.
10121da177e4SLinus Torvalds 	 */
10131da177e4SLinus Torvalds 
10141da177e4SLinus Torvalds 	if (saddr)
10151da177e4SLinus Torvalds 		memcpy(&iph->saddr, saddr, 4);
10161da177e4SLinus Torvalds 
10171da177e4SLinus Torvalds 	if (daddr) {
10181da177e4SLinus Torvalds 		memcpy(&iph->daddr, daddr, 4);
10191da177e4SLinus Torvalds 		return t->hlen;
10201da177e4SLinus Torvalds 	}
1021f97c1e0cSJoe Perches 	if (iph->daddr && !ipv4_is_multicast(iph->daddr))
10221da177e4SLinus Torvalds 		return t->hlen;
10231da177e4SLinus Torvalds 
10241da177e4SLinus Torvalds 	return -t->hlen;
10251da177e4SLinus Torvalds }
10261da177e4SLinus Torvalds 
10276a5f44d7STimo Teras static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
10286a5f44d7STimo Teras {
10296a5f44d7STimo Teras 	struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
10306a5f44d7STimo Teras 	memcpy(haddr, &iph->saddr, 4);
10316a5f44d7STimo Teras 	return 4;
10326a5f44d7STimo Teras }
10336a5f44d7STimo Teras 
10343b04dddeSStephen Hemminger static const struct header_ops ipgre_header_ops = {
10353b04dddeSStephen Hemminger 	.create	= ipgre_header,
10366a5f44d7STimo Teras 	.parse	= ipgre_header_parse,
10373b04dddeSStephen Hemminger };
10383b04dddeSStephen Hemminger 
10396a5f44d7STimo Teras #ifdef CONFIG_NET_IPGRE_BROADCAST
10401da177e4SLinus Torvalds static int ipgre_open(struct net_device *dev)
10411da177e4SLinus Torvalds {
10422941a486SPatrick McHardy 	struct ip_tunnel *t = netdev_priv(dev);
10431da177e4SLinus Torvalds 
1044f97c1e0cSJoe Perches 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
10451da177e4SLinus Torvalds 		struct flowi fl = { .oif = t->parms.link,
10461da177e4SLinus Torvalds 				    .nl_u = { .ip4_u =
10471da177e4SLinus Torvalds 					      { .daddr = t->parms.iph.daddr,
10481da177e4SLinus Torvalds 						.saddr = t->parms.iph.saddr,
10491da177e4SLinus Torvalds 						.tos = RT_TOS(t->parms.iph.tos) } },
10501da177e4SLinus Torvalds 				    .proto = IPPROTO_GRE };
10511da177e4SLinus Torvalds 		struct rtable *rt;
105296635522SPavel Emelyanov 		if (ip_route_output_key(dev_net(dev), &rt, &fl))
10531da177e4SLinus Torvalds 			return -EADDRNOTAVAIL;
10541da177e4SLinus Torvalds 		dev = rt->u.dst.dev;
10551da177e4SLinus Torvalds 		ip_rt_put(rt);
1056e5ed6399SHerbert Xu 		if (__in_dev_get_rtnl(dev) == NULL)
10571da177e4SLinus Torvalds 			return -EADDRNOTAVAIL;
10581da177e4SLinus Torvalds 		t->mlink = dev->ifindex;
1059e5ed6399SHerbert Xu 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
10601da177e4SLinus Torvalds 	}
10611da177e4SLinus Torvalds 	return 0;
10621da177e4SLinus Torvalds }
10631da177e4SLinus Torvalds 
10641da177e4SLinus Torvalds static int ipgre_close(struct net_device *dev)
10651da177e4SLinus Torvalds {
10662941a486SPatrick McHardy 	struct ip_tunnel *t = netdev_priv(dev);
1067f97c1e0cSJoe Perches 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
10687fee0ca2SDenis V. Lunev 		struct in_device *in_dev;
1069c346dca1SYOSHIFUJI Hideaki 		in_dev = inetdev_by_index(dev_net(dev), t->mlink);
10701da177e4SLinus Torvalds 		if (in_dev) {
10711da177e4SLinus Torvalds 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
10721da177e4SLinus Torvalds 			in_dev_put(in_dev);
10731da177e4SLinus Torvalds 		}
10741da177e4SLinus Torvalds 	}
10751da177e4SLinus Torvalds 	return 0;
10761da177e4SLinus Torvalds }
10771da177e4SLinus Torvalds 
10781da177e4SLinus Torvalds #endif
10791da177e4SLinus Torvalds 
10801da177e4SLinus Torvalds static void ipgre_tunnel_setup(struct net_device *dev)
10811da177e4SLinus Torvalds {
10821da177e4SLinus Torvalds 	dev->uninit		= ipgre_tunnel_uninit;
10831da177e4SLinus Torvalds 	dev->destructor 	= free_netdev;
10841da177e4SLinus Torvalds 	dev->hard_start_xmit	= ipgre_tunnel_xmit;
10851da177e4SLinus Torvalds 	dev->do_ioctl		= ipgre_tunnel_ioctl;
10861da177e4SLinus Torvalds 	dev->change_mtu		= ipgre_tunnel_change_mtu;
10871da177e4SLinus Torvalds 
10881da177e4SLinus Torvalds 	dev->type		= ARPHRD_IPGRE;
1089*c95b819aSHerbert Xu 	dev->needed_headroom 	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
109046f25dffSKris Katterjohn 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4;
10911da177e4SLinus Torvalds 	dev->flags		= IFF_NOARP;
10921da177e4SLinus Torvalds 	dev->iflink		= 0;
10931da177e4SLinus Torvalds 	dev->addr_len		= 4;
10940b67ecebSPavel Emelyanov 	dev->features		|= NETIF_F_NETNS_LOCAL;
10951da177e4SLinus Torvalds }
10961da177e4SLinus Torvalds 
10971da177e4SLinus Torvalds static int ipgre_tunnel_init(struct net_device *dev)
10981da177e4SLinus Torvalds {
10991da177e4SLinus Torvalds 	struct ip_tunnel *tunnel;
11001da177e4SLinus Torvalds 	struct iphdr *iph;
11011da177e4SLinus Torvalds 
11022941a486SPatrick McHardy 	tunnel = netdev_priv(dev);
11031da177e4SLinus Torvalds 	iph = &tunnel->parms.iph;
11041da177e4SLinus Torvalds 
11051da177e4SLinus Torvalds 	tunnel->dev = dev;
11061da177e4SLinus Torvalds 	strcpy(tunnel->parms.name, dev->name);
11071da177e4SLinus Torvalds 
11081da177e4SLinus Torvalds 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
11091da177e4SLinus Torvalds 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
11101da177e4SLinus Torvalds 
1111ee34c1ebSMichal Schmidt 	ipgre_tunnel_bind_dev(dev);
11121da177e4SLinus Torvalds 
11131da177e4SLinus Torvalds 	if (iph->daddr) {
11141da177e4SLinus Torvalds #ifdef CONFIG_NET_IPGRE_BROADCAST
1115f97c1e0cSJoe Perches 		if (ipv4_is_multicast(iph->daddr)) {
11161da177e4SLinus Torvalds 			if (!iph->saddr)
11171da177e4SLinus Torvalds 				return -EINVAL;
11181da177e4SLinus Torvalds 			dev->flags = IFF_BROADCAST;
11193b04dddeSStephen Hemminger 			dev->header_ops = &ipgre_header_ops;
11201da177e4SLinus Torvalds 			dev->open = ipgre_open;
11211da177e4SLinus Torvalds 			dev->stop = ipgre_close;
11221da177e4SLinus Torvalds 		}
11231da177e4SLinus Torvalds #endif
1124ee34c1ebSMichal Schmidt 	} else
11256a5f44d7STimo Teras 		dev->header_ops = &ipgre_header_ops;
11261da177e4SLinus Torvalds 
11271da177e4SLinus Torvalds 	return 0;
11281da177e4SLinus Torvalds }
11291da177e4SLinus Torvalds 
11307daa0004SPavel Emelyanov static int ipgre_fb_tunnel_init(struct net_device *dev)
11311da177e4SLinus Torvalds {
11322941a486SPatrick McHardy 	struct ip_tunnel *tunnel = netdev_priv(dev);
11331da177e4SLinus Torvalds 	struct iphdr *iph = &tunnel->parms.iph;
1134eb8ce741SPavel Emelyanov 	struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
11351da177e4SLinus Torvalds 
11361da177e4SLinus Torvalds 	tunnel->dev = dev;
11371da177e4SLinus Torvalds 	strcpy(tunnel->parms.name, dev->name);
11381da177e4SLinus Torvalds 
11391da177e4SLinus Torvalds 	iph->version		= 4;
11401da177e4SLinus Torvalds 	iph->protocol		= IPPROTO_GRE;
11411da177e4SLinus Torvalds 	iph->ihl		= 5;
11421da177e4SLinus Torvalds 	tunnel->hlen		= sizeof(struct iphdr) + 4;
11431da177e4SLinus Torvalds 
11441da177e4SLinus Torvalds 	dev_hold(dev);
1145eb8ce741SPavel Emelyanov 	ign->tunnels_wc[0]	= tunnel;
11461da177e4SLinus Torvalds 	return 0;
11471da177e4SLinus Torvalds }
11481da177e4SLinus Torvalds 
11491da177e4SLinus Torvalds 
11501da177e4SLinus Torvalds static struct net_protocol ipgre_protocol = {
11511da177e4SLinus Torvalds 	.handler	=	ipgre_rcv,
11521da177e4SLinus Torvalds 	.err_handler	=	ipgre_err,
1153f96c148fSPavel Emelyanov 	.netns_ok	=	1,
11541da177e4SLinus Torvalds };
11551da177e4SLinus Torvalds 
1156eb8ce741SPavel Emelyanov static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1157eb8ce741SPavel Emelyanov {
1158eb8ce741SPavel Emelyanov 	int prio;
1159eb8ce741SPavel Emelyanov 
1160eb8ce741SPavel Emelyanov 	for (prio = 0; prio < 4; prio++) {
1161eb8ce741SPavel Emelyanov 		int h;
1162eb8ce741SPavel Emelyanov 		for (h = 0; h < HASH_SIZE; h++) {
1163eb8ce741SPavel Emelyanov 			struct ip_tunnel *t;
1164eb8ce741SPavel Emelyanov 			while ((t = ign->tunnels[prio][h]) != NULL)
1165eb8ce741SPavel Emelyanov 				unregister_netdevice(t->dev);
1166eb8ce741SPavel Emelyanov 		}
1167eb8ce741SPavel Emelyanov 	}
1168eb8ce741SPavel Emelyanov }
1169eb8ce741SPavel Emelyanov 
117059a4c759SPavel Emelyanov static int ipgre_init_net(struct net *net)
117159a4c759SPavel Emelyanov {
117259a4c759SPavel Emelyanov 	int err;
117359a4c759SPavel Emelyanov 	struct ipgre_net *ign;
117459a4c759SPavel Emelyanov 
117559a4c759SPavel Emelyanov 	err = -ENOMEM;
1176eb8ce741SPavel Emelyanov 	ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
117759a4c759SPavel Emelyanov 	if (ign == NULL)
117859a4c759SPavel Emelyanov 		goto err_alloc;
117959a4c759SPavel Emelyanov 
118059a4c759SPavel Emelyanov 	err = net_assign_generic(net, ipgre_net_id, ign);
118159a4c759SPavel Emelyanov 	if (err < 0)
118259a4c759SPavel Emelyanov 		goto err_assign;
118359a4c759SPavel Emelyanov 
11847daa0004SPavel Emelyanov 	ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
11857daa0004SPavel Emelyanov 					   ipgre_tunnel_setup);
11867daa0004SPavel Emelyanov 	if (!ign->fb_tunnel_dev) {
11877daa0004SPavel Emelyanov 		err = -ENOMEM;
11887daa0004SPavel Emelyanov 		goto err_alloc_dev;
11897daa0004SPavel Emelyanov 	}
11907daa0004SPavel Emelyanov 
11917daa0004SPavel Emelyanov 	ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
11927daa0004SPavel Emelyanov 	dev_net_set(ign->fb_tunnel_dev, net);
11937daa0004SPavel Emelyanov 
11947daa0004SPavel Emelyanov 	if ((err = register_netdev(ign->fb_tunnel_dev)))
11957daa0004SPavel Emelyanov 		goto err_reg_dev;
11967daa0004SPavel Emelyanov 
119759a4c759SPavel Emelyanov 	return 0;
119859a4c759SPavel Emelyanov 
11997daa0004SPavel Emelyanov err_reg_dev:
12007daa0004SPavel Emelyanov 	free_netdev(ign->fb_tunnel_dev);
12017daa0004SPavel Emelyanov err_alloc_dev:
12027daa0004SPavel Emelyanov 	/* nothing */
120359a4c759SPavel Emelyanov err_assign:
120459a4c759SPavel Emelyanov 	kfree(ign);
120559a4c759SPavel Emelyanov err_alloc:
120659a4c759SPavel Emelyanov 	return err;
120759a4c759SPavel Emelyanov }
120859a4c759SPavel Emelyanov 
120959a4c759SPavel Emelyanov static void ipgre_exit_net(struct net *net)
121059a4c759SPavel Emelyanov {
121159a4c759SPavel Emelyanov 	struct ipgre_net *ign;
121259a4c759SPavel Emelyanov 
121359a4c759SPavel Emelyanov 	ign = net_generic(net, ipgre_net_id);
12147daa0004SPavel Emelyanov 	rtnl_lock();
1215eb8ce741SPavel Emelyanov 	ipgre_destroy_tunnels(ign);
12167daa0004SPavel Emelyanov 	rtnl_unlock();
121759a4c759SPavel Emelyanov 	kfree(ign);
121859a4c759SPavel Emelyanov }
121959a4c759SPavel Emelyanov 
122059a4c759SPavel Emelyanov static struct pernet_operations ipgre_net_ops = {
122159a4c759SPavel Emelyanov 	.init = ipgre_init_net,
122259a4c759SPavel Emelyanov 	.exit = ipgre_exit_net,
122359a4c759SPavel Emelyanov };
12241da177e4SLinus Torvalds 
12251da177e4SLinus Torvalds /*
12261da177e4SLinus Torvalds  *	And now the modules code and kernel interface.
12271da177e4SLinus Torvalds  */
12281da177e4SLinus Torvalds 
12291da177e4SLinus Torvalds static int __init ipgre_init(void)
12301da177e4SLinus Torvalds {
12311da177e4SLinus Torvalds 	int err;
12321da177e4SLinus Torvalds 
12331da177e4SLinus Torvalds 	printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
12341da177e4SLinus Torvalds 
12351da177e4SLinus Torvalds 	if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
12361da177e4SLinus Torvalds 		printk(KERN_INFO "ipgre init: can't add protocol\n");
12371da177e4SLinus Torvalds 		return -EAGAIN;
12381da177e4SLinus Torvalds 	}
12391da177e4SLinus Torvalds 
124059a4c759SPavel Emelyanov 	err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
124159a4c759SPavel Emelyanov 	if (err < 0)
12421da177e4SLinus Torvalds 		inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
12437daa0004SPavel Emelyanov 
12447daa0004SPavel Emelyanov 	return err;
12451da177e4SLinus Torvalds }
12461da177e4SLinus Torvalds 
1247db44575fSAlexey Kuznetsov static void __exit ipgre_fini(void)
12481da177e4SLinus Torvalds {
12491da177e4SLinus Torvalds 	if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
12501da177e4SLinus Torvalds 		printk(KERN_INFO "ipgre close: can't remove protocol\n");
12511da177e4SLinus Torvalds 
125259a4c759SPavel Emelyanov 	unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
12531da177e4SLinus Torvalds }
12541da177e4SLinus Torvalds 
12551da177e4SLinus Torvalds module_init(ipgre_init);
12561da177e4SLinus Torvalds module_exit(ipgre_fini);
12571da177e4SLinus Torvalds MODULE_LICENSE("GPL");
1258