xref: /linux/net/ipv4/ip_gre.c (revision b8c26a33c8b6f0a150e9cb38ed80b890be55395c)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *	Linux NET3:	GRE over IP protocol decoder.
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
71da177e4SLinus Torvalds  *	modify it under the terms of the GNU General Public License
81da177e4SLinus Torvalds  *	as published by the Free Software Foundation; either version
91da177e4SLinus Torvalds  *	2 of the License, or (at your option) any later version.
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  */
121da177e4SLinus Torvalds 
134fc268d2SRandy Dunlap #include <linux/capability.h>
141da177e4SLinus Torvalds #include <linux/module.h>
151da177e4SLinus Torvalds #include <linux/types.h>
161da177e4SLinus Torvalds #include <linux/kernel.h>
171da177e4SLinus Torvalds #include <asm/uaccess.h>
181da177e4SLinus Torvalds #include <linux/skbuff.h>
191da177e4SLinus Torvalds #include <linux/netdevice.h>
201da177e4SLinus Torvalds #include <linux/in.h>
211da177e4SLinus Torvalds #include <linux/tcp.h>
221da177e4SLinus Torvalds #include <linux/udp.h>
231da177e4SLinus Torvalds #include <linux/if_arp.h>
241da177e4SLinus Torvalds #include <linux/mroute.h>
251da177e4SLinus Torvalds #include <linux/init.h>
261da177e4SLinus Torvalds #include <linux/in6.h>
271da177e4SLinus Torvalds #include <linux/inetdevice.h>
281da177e4SLinus Torvalds #include <linux/igmp.h>
291da177e4SLinus Torvalds #include <linux/netfilter_ipv4.h>
30e1a80002SHerbert Xu #include <linux/etherdevice.h>
3146f25dffSKris Katterjohn #include <linux/if_ether.h>
321da177e4SLinus Torvalds 
331da177e4SLinus Torvalds #include <net/sock.h>
341da177e4SLinus Torvalds #include <net/ip.h>
351da177e4SLinus Torvalds #include <net/icmp.h>
361da177e4SLinus Torvalds #include <net/protocol.h>
371da177e4SLinus Torvalds #include <net/ipip.h>
381da177e4SLinus Torvalds #include <net/arp.h>
391da177e4SLinus Torvalds #include <net/checksum.h>
401da177e4SLinus Torvalds #include <net/dsfield.h>
411da177e4SLinus Torvalds #include <net/inet_ecn.h>
421da177e4SLinus Torvalds #include <net/xfrm.h>
4359a4c759SPavel Emelyanov #include <net/net_namespace.h>
4459a4c759SPavel Emelyanov #include <net/netns/generic.h>
45c19e654dSHerbert Xu #include <net/rtnetlink.h>
461da177e4SLinus Torvalds 
471da177e4SLinus Torvalds #ifdef CONFIG_IPV6
481da177e4SLinus Torvalds #include <net/ipv6.h>
491da177e4SLinus Torvalds #include <net/ip6_fib.h>
501da177e4SLinus Torvalds #include <net/ip6_route.h>
511da177e4SLinus Torvalds #endif
521da177e4SLinus Torvalds 
531da177e4SLinus Torvalds /*
541da177e4SLinus Torvalds    Problems & solutions
551da177e4SLinus Torvalds    --------------------
561da177e4SLinus Torvalds 
571da177e4SLinus Torvalds    1. The most important issue is detecting local dead loops.
581da177e4SLinus Torvalds    They would cause complete host lockup in transmit, which
591da177e4SLinus Torvalds    would be "resolved" by stack overflow or, if queueing is enabled,
601da177e4SLinus Torvalds    with infinite looping in net_bh.
611da177e4SLinus Torvalds 
621da177e4SLinus Torvalds    We cannot track such dead loops during route installation,
631da177e4SLinus Torvalds    it is infeasible task. The most general solutions would be
641da177e4SLinus Torvalds    to keep skb->encapsulation counter (sort of local ttl),
651da177e4SLinus Torvalds    and silently drop packet when it expires. It is the best
661da177e4SLinus Torvalds    solution, but it supposes maintaing new variable in ALL
671da177e4SLinus Torvalds    skb, even if no tunneling is used.
681da177e4SLinus Torvalds 
691da177e4SLinus Torvalds    Current solution: t->recursion lock breaks dead loops. It looks
701da177e4SLinus Torvalds    like dev->tbusy flag, but I preferred new variable, because
711da177e4SLinus Torvalds    the semantics is different. One day, when hard_start_xmit
721da177e4SLinus Torvalds    will be multithreaded we will have to use skb->encapsulation.
731da177e4SLinus Torvalds 
741da177e4SLinus Torvalds 
751da177e4SLinus Torvalds 
761da177e4SLinus Torvalds    2. Networking dead loops would not kill routers, but would really
771da177e4SLinus Torvalds    kill network. IP hop limit plays role of "t->recursion" in this case,
781da177e4SLinus Torvalds    if we copy it from packet being encapsulated to upper header.
791da177e4SLinus Torvalds    It is very good solution, but it introduces two problems:
801da177e4SLinus Torvalds 
811da177e4SLinus Torvalds    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
821da177e4SLinus Torvalds      do not work over tunnels.
831da177e4SLinus Torvalds    - traceroute does not work. I planned to relay ICMP from tunnel,
841da177e4SLinus Torvalds      so that this problem would be solved and traceroute output
851da177e4SLinus Torvalds      would even more informative. This idea appeared to be wrong:
861da177e4SLinus Torvalds      only Linux complies to rfc1812 now (yes, guys, Linux is the only
871da177e4SLinus Torvalds      true router now :-)), all routers (at least, in neighbourhood of mine)
881da177e4SLinus Torvalds      return only 8 bytes of payload. It is the end.
891da177e4SLinus Torvalds 
901da177e4SLinus Torvalds    Hence, if we want that OSPF worked or traceroute said something reasonable,
911da177e4SLinus Torvalds    we should search for another solution.
921da177e4SLinus Torvalds 
931da177e4SLinus Torvalds    One of them is to parse packet trying to detect inner encapsulation
941da177e4SLinus Torvalds    made by our node. It is difficult or even impossible, especially,
951da177e4SLinus Torvalds    taking into account fragmentation. TO be short, tt is not solution at all.
961da177e4SLinus Torvalds 
971da177e4SLinus Torvalds    Current solution: The solution was UNEXPECTEDLY SIMPLE.
981da177e4SLinus Torvalds    We force DF flag on tunnels with preconfigured hop limit,
991da177e4SLinus Torvalds    that is ALL. :-) Well, it does not remove the problem completely,
1001da177e4SLinus Torvalds    but exponential growth of network traffic is changed to linear
1011da177e4SLinus Torvalds    (branches, that exceed pmtu are pruned) and tunnel mtu
1021da177e4SLinus Torvalds    fastly degrades to value <68, where looping stops.
1031da177e4SLinus Torvalds    Yes, it is not good if there exists a router in the loop,
1041da177e4SLinus Torvalds    which does not force DF, even when encapsulating packets have DF set.
1051da177e4SLinus Torvalds    But it is not our problem! Nobody could accuse us, we made
1061da177e4SLinus Torvalds    all that we could make. Even if it is your gated who injected
1071da177e4SLinus Torvalds    fatal route to network, even if it were you who configured
1081da177e4SLinus Torvalds    fatal static route: you are innocent. :-)
1091da177e4SLinus Torvalds 
1101da177e4SLinus Torvalds 
1111da177e4SLinus Torvalds 
1121da177e4SLinus Torvalds    3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
1131da177e4SLinus Torvalds    practically identical code. It would be good to glue them
1141da177e4SLinus Torvalds    together, but it is not very evident, how to make them modular.
1151da177e4SLinus Torvalds    sit is integral part of IPv6, ipip and gre are naturally modular.
1161da177e4SLinus Torvalds    We could extract common parts (hash table, ioctl etc)
1171da177e4SLinus Torvalds    to a separate module (ip_tunnel.c).
1181da177e4SLinus Torvalds 
1191da177e4SLinus Torvalds    Alexey Kuznetsov.
1201da177e4SLinus Torvalds  */
1211da177e4SLinus Torvalds 
122c19e654dSHerbert Xu static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1231da177e4SLinus Torvalds static int ipgre_tunnel_init(struct net_device *dev);
1241da177e4SLinus Torvalds static void ipgre_tunnel_setup(struct net_device *dev);
12542aa9162SHerbert Xu static int ipgre_tunnel_bind_dev(struct net_device *dev);
1261da177e4SLinus Torvalds 
1271da177e4SLinus Torvalds /* Fallback tunnel: no source, no destination, no key, no options */
1281da177e4SLinus Torvalds 
129eb8ce741SPavel Emelyanov #define HASH_SIZE  16
130eb8ce741SPavel Emelyanov 
13159a4c759SPavel Emelyanov static int ipgre_net_id;
13259a4c759SPavel Emelyanov struct ipgre_net {
133eb8ce741SPavel Emelyanov 	struct ip_tunnel *tunnels[4][HASH_SIZE];
134eb8ce741SPavel Emelyanov 
1357daa0004SPavel Emelyanov 	struct net_device *fb_tunnel_dev;
13659a4c759SPavel Emelyanov };
13759a4c759SPavel Emelyanov 
1381da177e4SLinus Torvalds /* Tunnel hash table */
1391da177e4SLinus Torvalds 
1401da177e4SLinus Torvalds /*
1411da177e4SLinus Torvalds    4 hash tables:
1421da177e4SLinus Torvalds 
1431da177e4SLinus Torvalds    3: (remote,local)
1441da177e4SLinus Torvalds    2: (remote,*)
1451da177e4SLinus Torvalds    1: (*,local)
1461da177e4SLinus Torvalds    0: (*,*)
1471da177e4SLinus Torvalds 
1481da177e4SLinus Torvalds    We require exact key match i.e. if a key is present in packet
1491da177e4SLinus Torvalds    it will match only tunnel with the same key; if it is not present,
1501da177e4SLinus Torvalds    it will match only keyless tunnel.
1511da177e4SLinus Torvalds 
1521da177e4SLinus Torvalds    All keysless packets, if not matched configured keyless tunnels
1531da177e4SLinus Torvalds    will match fallback tunnel.
1541da177e4SLinus Torvalds  */
1551da177e4SLinus Torvalds 
156d5a0a1e3SAl Viro #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1571da177e4SLinus Torvalds 
158eb8ce741SPavel Emelyanov #define tunnels_r_l	tunnels[3]
159eb8ce741SPavel Emelyanov #define tunnels_r	tunnels[2]
160eb8ce741SPavel Emelyanov #define tunnels_l	tunnels[1]
161eb8ce741SPavel Emelyanov #define tunnels_wc	tunnels[0]
1621da177e4SLinus Torvalds 
1631da177e4SLinus Torvalds static DEFINE_RWLOCK(ipgre_lock);
1641da177e4SLinus Torvalds 
1651da177e4SLinus Torvalds /* Given src, dst and key, find appropriate for input tunnel. */
1661da177e4SLinus Torvalds 
167f57e7d5aSPavel Emelyanov static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
168e1a80002SHerbert Xu 					      __be32 remote, __be32 local,
169e1a80002SHerbert Xu 					      __be32 key, __be16 gre_proto)
1701da177e4SLinus Torvalds {
1711da177e4SLinus Torvalds 	unsigned h0 = HASH(remote);
1721da177e4SLinus Torvalds 	unsigned h1 = HASH(key);
1731da177e4SLinus Torvalds 	struct ip_tunnel *t;
174e1a80002SHerbert Xu 	struct ip_tunnel *t2 = NULL;
1757daa0004SPavel Emelyanov 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
176e1a80002SHerbert Xu 	int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
177e1a80002SHerbert Xu 		       ARPHRD_ETHER : ARPHRD_IPGRE;
1781da177e4SLinus Torvalds 
179eb8ce741SPavel Emelyanov 	for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
1801da177e4SLinus Torvalds 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
181e1a80002SHerbert Xu 			if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
182e1a80002SHerbert Xu 				if (t->dev->type == dev_type)
1831da177e4SLinus Torvalds 					return t;
184e1a80002SHerbert Xu 				if (t->dev->type == ARPHRD_IPGRE && !t2)
185e1a80002SHerbert Xu 					t2 = t;
1861da177e4SLinus Torvalds 			}
1871da177e4SLinus Torvalds 		}
188e1a80002SHerbert Xu 	}
189e1a80002SHerbert Xu 
190eb8ce741SPavel Emelyanov 	for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
1911da177e4SLinus Torvalds 		if (remote == t->parms.iph.daddr) {
192e1a80002SHerbert Xu 			if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
193e1a80002SHerbert Xu 				if (t->dev->type == dev_type)
1941da177e4SLinus Torvalds 					return t;
195e1a80002SHerbert Xu 				if (t->dev->type == ARPHRD_IPGRE && !t2)
196e1a80002SHerbert Xu 					t2 = t;
1971da177e4SLinus Torvalds 			}
1981da177e4SLinus Torvalds 		}
199e1a80002SHerbert Xu 	}
200e1a80002SHerbert Xu 
201eb8ce741SPavel Emelyanov 	for (t = ign->tunnels_l[h1]; t; t = t->next) {
2021da177e4SLinus Torvalds 		if (local == t->parms.iph.saddr ||
203f97c1e0cSJoe Perches 		     (local == t->parms.iph.daddr &&
204f97c1e0cSJoe Perches 		      ipv4_is_multicast(local))) {
205e1a80002SHerbert Xu 			if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
206e1a80002SHerbert Xu 				if (t->dev->type == dev_type)
2071da177e4SLinus Torvalds 					return t;
208e1a80002SHerbert Xu 				if (t->dev->type == ARPHRD_IPGRE && !t2)
209e1a80002SHerbert Xu 					t2 = t;
2101da177e4SLinus Torvalds 			}
2111da177e4SLinus Torvalds 		}
212e1a80002SHerbert Xu 	}
213e1a80002SHerbert Xu 
214eb8ce741SPavel Emelyanov 	for (t = ign->tunnels_wc[h1]; t; t = t->next) {
215e1a80002SHerbert Xu 		if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
216e1a80002SHerbert Xu 			if (t->dev->type == dev_type)
2171da177e4SLinus Torvalds 				return t;
218e1a80002SHerbert Xu 			if (t->dev->type == ARPHRD_IPGRE && !t2)
219e1a80002SHerbert Xu 				t2 = t;
2201da177e4SLinus Torvalds 		}
221e1a80002SHerbert Xu 	}
222e1a80002SHerbert Xu 
223e1a80002SHerbert Xu 	if (t2)
224e1a80002SHerbert Xu 		return t2;
2251da177e4SLinus Torvalds 
2267daa0004SPavel Emelyanov 	if (ign->fb_tunnel_dev->flags&IFF_UP)
2277daa0004SPavel Emelyanov 		return netdev_priv(ign->fb_tunnel_dev);
2281da177e4SLinus Torvalds 	return NULL;
2291da177e4SLinus Torvalds }
2301da177e4SLinus Torvalds 
231f57e7d5aSPavel Emelyanov static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
232f57e7d5aSPavel Emelyanov 		struct ip_tunnel_parm *parms)
2331da177e4SLinus Torvalds {
2345056a1efSYOSHIFUJI Hideaki 	__be32 remote = parms->iph.daddr;
2355056a1efSYOSHIFUJI Hideaki 	__be32 local = parms->iph.saddr;
2365056a1efSYOSHIFUJI Hideaki 	__be32 key = parms->i_key;
2371da177e4SLinus Torvalds 	unsigned h = HASH(key);
2381da177e4SLinus Torvalds 	int prio = 0;
2391da177e4SLinus Torvalds 
2401da177e4SLinus Torvalds 	if (local)
2411da177e4SLinus Torvalds 		prio |= 1;
242f97c1e0cSJoe Perches 	if (remote && !ipv4_is_multicast(remote)) {
2431da177e4SLinus Torvalds 		prio |= 2;
2441da177e4SLinus Torvalds 		h ^= HASH(remote);
2451da177e4SLinus Torvalds 	}
2461da177e4SLinus Torvalds 
247eb8ce741SPavel Emelyanov 	return &ign->tunnels[prio][h];
2481da177e4SLinus Torvalds }
2491da177e4SLinus Torvalds 
250f57e7d5aSPavel Emelyanov static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
251f57e7d5aSPavel Emelyanov 		struct ip_tunnel *t)
2525056a1efSYOSHIFUJI Hideaki {
253f57e7d5aSPavel Emelyanov 	return __ipgre_bucket(ign, &t->parms);
2545056a1efSYOSHIFUJI Hideaki }
2555056a1efSYOSHIFUJI Hideaki 
256f57e7d5aSPavel Emelyanov static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
2571da177e4SLinus Torvalds {
258f57e7d5aSPavel Emelyanov 	struct ip_tunnel **tp = ipgre_bucket(ign, t);
2591da177e4SLinus Torvalds 
2601da177e4SLinus Torvalds 	t->next = *tp;
2611da177e4SLinus Torvalds 	write_lock_bh(&ipgre_lock);
2621da177e4SLinus Torvalds 	*tp = t;
2631da177e4SLinus Torvalds 	write_unlock_bh(&ipgre_lock);
2641da177e4SLinus Torvalds }
2651da177e4SLinus Torvalds 
266f57e7d5aSPavel Emelyanov static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
2671da177e4SLinus Torvalds {
2681da177e4SLinus Torvalds 	struct ip_tunnel **tp;
2691da177e4SLinus Torvalds 
270f57e7d5aSPavel Emelyanov 	for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
2711da177e4SLinus Torvalds 		if (t == *tp) {
2721da177e4SLinus Torvalds 			write_lock_bh(&ipgre_lock);
2731da177e4SLinus Torvalds 			*tp = t->next;
2741da177e4SLinus Torvalds 			write_unlock_bh(&ipgre_lock);
2751da177e4SLinus Torvalds 			break;
2761da177e4SLinus Torvalds 		}
2771da177e4SLinus Torvalds 	}
2781da177e4SLinus Torvalds }
2791da177e4SLinus Torvalds 
280e1a80002SHerbert Xu static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
281e1a80002SHerbert Xu 					   struct ip_tunnel_parm *parms,
282e1a80002SHerbert Xu 					   int type)
2831da177e4SLinus Torvalds {
284d5a0a1e3SAl Viro 	__be32 remote = parms->iph.daddr;
285d5a0a1e3SAl Viro 	__be32 local = parms->iph.saddr;
286d5a0a1e3SAl Viro 	__be32 key = parms->i_key;
287e1a80002SHerbert Xu 	struct ip_tunnel *t, **tp;
288e1a80002SHerbert Xu 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
289e1a80002SHerbert Xu 
290e1a80002SHerbert Xu 	for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
291e1a80002SHerbert Xu 		if (local == t->parms.iph.saddr &&
292e1a80002SHerbert Xu 		    remote == t->parms.iph.daddr &&
293e1a80002SHerbert Xu 		    key == t->parms.i_key &&
294e1a80002SHerbert Xu 		    type == t->dev->type)
295e1a80002SHerbert Xu 			break;
296e1a80002SHerbert Xu 
297e1a80002SHerbert Xu 	return t;
298e1a80002SHerbert Xu }
299e1a80002SHerbert Xu 
300e1a80002SHerbert Xu static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
301e1a80002SHerbert Xu 		struct ip_tunnel_parm *parms, int create)
302e1a80002SHerbert Xu {
303e1a80002SHerbert Xu 	struct ip_tunnel *t, *nt;
3041da177e4SLinus Torvalds 	struct net_device *dev;
3051da177e4SLinus Torvalds 	char name[IFNAMSIZ];
306f57e7d5aSPavel Emelyanov 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
3071da177e4SLinus Torvalds 
308e1a80002SHerbert Xu 	t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
309e1a80002SHerbert Xu 	if (t || !create)
3101da177e4SLinus Torvalds 		return t;
3111da177e4SLinus Torvalds 
3121da177e4SLinus Torvalds 	if (parms->name[0])
3131da177e4SLinus Torvalds 		strlcpy(name, parms->name, IFNAMSIZ);
31434cc7ba6SPavel Emelyanov 	else
31534cc7ba6SPavel Emelyanov 		sprintf(name, "gre%%d");
3161da177e4SLinus Torvalds 
3171da177e4SLinus Torvalds 	dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
3181da177e4SLinus Torvalds 	if (!dev)
3191da177e4SLinus Torvalds 	  return NULL;
3201da177e4SLinus Torvalds 
3210b67ecebSPavel Emelyanov 	dev_net_set(dev, net);
3220b67ecebSPavel Emelyanov 
323b37d428bSPavel Emelyanov 	if (strchr(name, '%')) {
324b37d428bSPavel Emelyanov 		if (dev_alloc_name(dev, name) < 0)
325b37d428bSPavel Emelyanov 			goto failed_free;
326b37d428bSPavel Emelyanov 	}
327b37d428bSPavel Emelyanov 
3282941a486SPatrick McHardy 	nt = netdev_priv(dev);
3291da177e4SLinus Torvalds 	nt->parms = *parms;
330c19e654dSHerbert Xu 	dev->rtnl_link_ops = &ipgre_link_ops;
3311da177e4SLinus Torvalds 
33242aa9162SHerbert Xu 	dev->mtu = ipgre_tunnel_bind_dev(dev);
33342aa9162SHerbert Xu 
334b37d428bSPavel Emelyanov 	if (register_netdevice(dev) < 0)
335b37d428bSPavel Emelyanov 		goto failed_free;
3361da177e4SLinus Torvalds 
3371da177e4SLinus Torvalds 	dev_hold(dev);
338f57e7d5aSPavel Emelyanov 	ipgre_tunnel_link(ign, nt);
3391da177e4SLinus Torvalds 	return nt;
3401da177e4SLinus Torvalds 
341b37d428bSPavel Emelyanov failed_free:
342b37d428bSPavel Emelyanov 	free_netdev(dev);
3431da177e4SLinus Torvalds 	return NULL;
3441da177e4SLinus Torvalds }
3451da177e4SLinus Torvalds 
3461da177e4SLinus Torvalds static void ipgre_tunnel_uninit(struct net_device *dev)
3471da177e4SLinus Torvalds {
348f57e7d5aSPavel Emelyanov 	struct net *net = dev_net(dev);
349f57e7d5aSPavel Emelyanov 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
350f57e7d5aSPavel Emelyanov 
351f57e7d5aSPavel Emelyanov 	ipgre_tunnel_unlink(ign, netdev_priv(dev));
3521da177e4SLinus Torvalds 	dev_put(dev);
3531da177e4SLinus Torvalds }
3541da177e4SLinus Torvalds 
3551da177e4SLinus Torvalds 
3561da177e4SLinus Torvalds static void ipgre_err(struct sk_buff *skb, u32 info)
3571da177e4SLinus Torvalds {
3581da177e4SLinus Torvalds 
359071f92d0SRami Rosen /* All the routers (except for Linux) return only
3601da177e4SLinus Torvalds    8 bytes of packet payload. It means, that precise relaying of
3611da177e4SLinus Torvalds    ICMP in the real Internet is absolutely infeasible.
3621da177e4SLinus Torvalds 
3631da177e4SLinus Torvalds    Moreover, Cisco "wise men" put GRE key to the third word
3641da177e4SLinus Torvalds    in GRE header. It makes impossible maintaining even soft state for keyed
3651da177e4SLinus Torvalds    GRE tunnels with enabled checksum. Tell them "thank you".
3661da177e4SLinus Torvalds 
3671da177e4SLinus Torvalds    Well, I wonder, rfc1812 was written by Cisco employee,
3681da177e4SLinus Torvalds    what the hell these idiots break standrads established
3691da177e4SLinus Torvalds    by themself???
3701da177e4SLinus Torvalds  */
3711da177e4SLinus Torvalds 
3721da177e4SLinus Torvalds 	struct iphdr *iph = (struct iphdr *)skb->data;
373d5a0a1e3SAl Viro 	__be16	     *p = (__be16*)(skb->data+(iph->ihl<<2));
3741da177e4SLinus Torvalds 	int grehlen = (iph->ihl<<2) + 4;
37588c7664fSArnaldo Carvalho de Melo 	const int type = icmp_hdr(skb)->type;
37688c7664fSArnaldo Carvalho de Melo 	const int code = icmp_hdr(skb)->code;
3771da177e4SLinus Torvalds 	struct ip_tunnel *t;
378d5a0a1e3SAl Viro 	__be16 flags;
3791da177e4SLinus Torvalds 
3801da177e4SLinus Torvalds 	flags = p[0];
3811da177e4SLinus Torvalds 	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
3821da177e4SLinus Torvalds 		if (flags&(GRE_VERSION|GRE_ROUTING))
3831da177e4SLinus Torvalds 			return;
3841da177e4SLinus Torvalds 		if (flags&GRE_KEY) {
3851da177e4SLinus Torvalds 			grehlen += 4;
3861da177e4SLinus Torvalds 			if (flags&GRE_CSUM)
3871da177e4SLinus Torvalds 				grehlen += 4;
3881da177e4SLinus Torvalds 		}
3891da177e4SLinus Torvalds 	}
3901da177e4SLinus Torvalds 
3911da177e4SLinus Torvalds 	/* If only 8 bytes returned, keyed message will be dropped here */
3921da177e4SLinus Torvalds 	if (skb_headlen(skb) < grehlen)
3931da177e4SLinus Torvalds 		return;
3941da177e4SLinus Torvalds 
3951da177e4SLinus Torvalds 	switch (type) {
3961da177e4SLinus Torvalds 	default:
3971da177e4SLinus Torvalds 	case ICMP_PARAMETERPROB:
3981da177e4SLinus Torvalds 		return;
3991da177e4SLinus Torvalds 
4001da177e4SLinus Torvalds 	case ICMP_DEST_UNREACH:
4011da177e4SLinus Torvalds 		switch (code) {
4021da177e4SLinus Torvalds 		case ICMP_SR_FAILED:
4031da177e4SLinus Torvalds 		case ICMP_PORT_UNREACH:
4041da177e4SLinus Torvalds 			/* Impossible event. */
4051da177e4SLinus Torvalds 			return;
4061da177e4SLinus Torvalds 		case ICMP_FRAG_NEEDED:
4071da177e4SLinus Torvalds 			/* Soft state for pmtu is maintained by IP core. */
4081da177e4SLinus Torvalds 			return;
4091da177e4SLinus Torvalds 		default:
4101da177e4SLinus Torvalds 			/* All others are translated to HOST_UNREACH.
4111da177e4SLinus Torvalds 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
4121da177e4SLinus Torvalds 			   I believe they are just ether pollution. --ANK
4131da177e4SLinus Torvalds 			 */
4141da177e4SLinus Torvalds 			break;
4151da177e4SLinus Torvalds 		}
4161da177e4SLinus Torvalds 		break;
4171da177e4SLinus Torvalds 	case ICMP_TIME_EXCEEDED:
4181da177e4SLinus Torvalds 		if (code != ICMP_EXC_TTL)
4191da177e4SLinus Torvalds 			return;
4201da177e4SLinus Torvalds 		break;
4211da177e4SLinus Torvalds 	}
4221da177e4SLinus Torvalds 
4231da177e4SLinus Torvalds 	read_lock(&ipgre_lock);
4243b4667f3SPavel Emelyanov 	t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
425e1a80002SHerbert Xu 				flags & GRE_KEY ?
426e1a80002SHerbert Xu 				*(((__be32 *)p) + (grehlen / 4) - 1) : 0,
427e1a80002SHerbert Xu 				p[1]);
428f97c1e0cSJoe Perches 	if (t == NULL || t->parms.iph.daddr == 0 ||
429f97c1e0cSJoe Perches 	    ipv4_is_multicast(t->parms.iph.daddr))
4301da177e4SLinus Torvalds 		goto out;
4311da177e4SLinus Torvalds 
4321da177e4SLinus Torvalds 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
4331da177e4SLinus Torvalds 		goto out;
4341da177e4SLinus Torvalds 
4351da177e4SLinus Torvalds 	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
4361da177e4SLinus Torvalds 		t->err_count++;
4371da177e4SLinus Torvalds 	else
4381da177e4SLinus Torvalds 		t->err_count = 1;
4391da177e4SLinus Torvalds 	t->err_time = jiffies;
4401da177e4SLinus Torvalds out:
4411da177e4SLinus Torvalds 	read_unlock(&ipgre_lock);
4421da177e4SLinus Torvalds 	return;
4431da177e4SLinus Torvalds }
4441da177e4SLinus Torvalds 
4451da177e4SLinus Torvalds static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
4461da177e4SLinus Torvalds {
4471da177e4SLinus Torvalds 	if (INET_ECN_is_ce(iph->tos)) {
4481da177e4SLinus Torvalds 		if (skb->protocol == htons(ETH_P_IP)) {
449eddc9ec5SArnaldo Carvalho de Melo 			IP_ECN_set_ce(ip_hdr(skb));
4501da177e4SLinus Torvalds 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
4510660e03fSArnaldo Carvalho de Melo 			IP6_ECN_set_ce(ipv6_hdr(skb));
4521da177e4SLinus Torvalds 		}
4531da177e4SLinus Torvalds 	}
4541da177e4SLinus Torvalds }
4551da177e4SLinus Torvalds 
4561da177e4SLinus Torvalds static inline u8
4571da177e4SLinus Torvalds ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
4581da177e4SLinus Torvalds {
4591da177e4SLinus Torvalds 	u8 inner = 0;
4601da177e4SLinus Torvalds 	if (skb->protocol == htons(ETH_P_IP))
4611da177e4SLinus Torvalds 		inner = old_iph->tos;
4621da177e4SLinus Torvalds 	else if (skb->protocol == htons(ETH_P_IPV6))
4631da177e4SLinus Torvalds 		inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
4641da177e4SLinus Torvalds 	return INET_ECN_encapsulate(tos, inner);
4651da177e4SLinus Torvalds }
4661da177e4SLinus Torvalds 
4671da177e4SLinus Torvalds static int ipgre_rcv(struct sk_buff *skb)
4681da177e4SLinus Torvalds {
4691da177e4SLinus Torvalds 	struct iphdr *iph;
4701da177e4SLinus Torvalds 	u8     *h;
471d5a0a1e3SAl Viro 	__be16    flags;
472d3bc23e7SAl Viro 	__sum16   csum = 0;
473d5a0a1e3SAl Viro 	__be32 key = 0;
4741da177e4SLinus Torvalds 	u32    seqno = 0;
4751da177e4SLinus Torvalds 	struct ip_tunnel *tunnel;
4761da177e4SLinus Torvalds 	int    offset = 4;
477e1a80002SHerbert Xu 	__be16 gre_proto;
47864194c31SHerbert Xu 	unsigned int len;
4791da177e4SLinus Torvalds 
4801da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, 16))
4811da177e4SLinus Torvalds 		goto drop_nolock;
4821da177e4SLinus Torvalds 
483eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
4841da177e4SLinus Torvalds 	h = skb->data;
485d5a0a1e3SAl Viro 	flags = *(__be16*)h;
4861da177e4SLinus Torvalds 
4871da177e4SLinus Torvalds 	if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
4881da177e4SLinus Torvalds 		/* - Version must be 0.
4891da177e4SLinus Torvalds 		   - We do not support routing headers.
4901da177e4SLinus Torvalds 		 */
4911da177e4SLinus Torvalds 		if (flags&(GRE_VERSION|GRE_ROUTING))
4921da177e4SLinus Torvalds 			goto drop_nolock;
4931da177e4SLinus Torvalds 
4941da177e4SLinus Torvalds 		if (flags&GRE_CSUM) {
495fb286bb2SHerbert Xu 			switch (skb->ip_summed) {
49684fa7933SPatrick McHardy 			case CHECKSUM_COMPLETE:
497d3bc23e7SAl Viro 				csum = csum_fold(skb->csum);
498fb286bb2SHerbert Xu 				if (!csum)
499fb286bb2SHerbert Xu 					break;
500fb286bb2SHerbert Xu 				/* fall through */
501fb286bb2SHerbert Xu 			case CHECKSUM_NONE:
502fb286bb2SHerbert Xu 				skb->csum = 0;
503fb286bb2SHerbert Xu 				csum = __skb_checksum_complete(skb);
50484fa7933SPatrick McHardy 				skb->ip_summed = CHECKSUM_COMPLETE;
5051da177e4SLinus Torvalds 			}
5061da177e4SLinus Torvalds 			offset += 4;
5071da177e4SLinus Torvalds 		}
5081da177e4SLinus Torvalds 		if (flags&GRE_KEY) {
509d5a0a1e3SAl Viro 			key = *(__be32*)(h + offset);
5101da177e4SLinus Torvalds 			offset += 4;
5111da177e4SLinus Torvalds 		}
5121da177e4SLinus Torvalds 		if (flags&GRE_SEQ) {
513d5a0a1e3SAl Viro 			seqno = ntohl(*(__be32*)(h + offset));
5141da177e4SLinus Torvalds 			offset += 4;
5151da177e4SLinus Torvalds 		}
5161da177e4SLinus Torvalds 	}
5171da177e4SLinus Torvalds 
518e1a80002SHerbert Xu 	gre_proto = *(__be16 *)(h + 2);
519e1a80002SHerbert Xu 
5201da177e4SLinus Torvalds 	read_lock(&ipgre_lock);
5213b4667f3SPavel Emelyanov 	if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
522e1a80002SHerbert Xu 					  iph->saddr, iph->daddr, key,
523e1a80002SHerbert Xu 					  gre_proto))) {
524addd68ebSPavel Emelyanov 		struct net_device_stats *stats = &tunnel->dev->stats;
525addd68ebSPavel Emelyanov 
5261da177e4SLinus Torvalds 		secpath_reset(skb);
5271da177e4SLinus Torvalds 
528e1a80002SHerbert Xu 		skb->protocol = gre_proto;
5291da177e4SLinus Torvalds 		/* WCCP version 1 and 2 protocol decoding.
5301da177e4SLinus Torvalds 		 * - Change protocol to IP
5311da177e4SLinus Torvalds 		 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
5321da177e4SLinus Torvalds 		 */
533e1a80002SHerbert Xu 		if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
534496c98dfSYOSHIFUJI Hideaki 			skb->protocol = htons(ETH_P_IP);
5351da177e4SLinus Torvalds 			if ((*(h + offset) & 0xF0) != 0x40)
5361da177e4SLinus Torvalds 				offset += 4;
5371da177e4SLinus Torvalds 		}
5381da177e4SLinus Torvalds 
5391d069167STimo Teras 		skb->mac_header = skb->network_header;
5404209fb60SArnaldo Carvalho de Melo 		__pskb_pull(skb, offset);
5419c70220bSArnaldo Carvalho de Melo 		skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
5421da177e4SLinus Torvalds 		skb->pkt_type = PACKET_HOST;
5431da177e4SLinus Torvalds #ifdef CONFIG_NET_IPGRE_BROADCAST
544f97c1e0cSJoe Perches 		if (ipv4_is_multicast(iph->daddr)) {
5451da177e4SLinus Torvalds 			/* Looped back packet, drop it! */
546ee6b9673SEric Dumazet 			if (skb->rtable->fl.iif == 0)
5471da177e4SLinus Torvalds 				goto drop;
548addd68ebSPavel Emelyanov 			stats->multicast++;
5491da177e4SLinus Torvalds 			skb->pkt_type = PACKET_BROADCAST;
5501da177e4SLinus Torvalds 		}
5511da177e4SLinus Torvalds #endif
5521da177e4SLinus Torvalds 
5531da177e4SLinus Torvalds 		if (((flags&GRE_CSUM) && csum) ||
5541da177e4SLinus Torvalds 		    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
555addd68ebSPavel Emelyanov 			stats->rx_crc_errors++;
556addd68ebSPavel Emelyanov 			stats->rx_errors++;
5571da177e4SLinus Torvalds 			goto drop;
5581da177e4SLinus Torvalds 		}
5591da177e4SLinus Torvalds 		if (tunnel->parms.i_flags&GRE_SEQ) {
5601da177e4SLinus Torvalds 			if (!(flags&GRE_SEQ) ||
5611da177e4SLinus Torvalds 			    (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
562addd68ebSPavel Emelyanov 				stats->rx_fifo_errors++;
563addd68ebSPavel Emelyanov 				stats->rx_errors++;
5641da177e4SLinus Torvalds 				goto drop;
5651da177e4SLinus Torvalds 			}
5661da177e4SLinus Torvalds 			tunnel->i_seqno = seqno + 1;
5671da177e4SLinus Torvalds 		}
568e1a80002SHerbert Xu 
56964194c31SHerbert Xu 		len = skb->len;
57064194c31SHerbert Xu 
571e1a80002SHerbert Xu 		/* Warning: All skb pointers will be invalidated! */
572e1a80002SHerbert Xu 		if (tunnel->dev->type == ARPHRD_ETHER) {
573e1a80002SHerbert Xu 			if (!pskb_may_pull(skb, ETH_HLEN)) {
574e1a80002SHerbert Xu 				stats->rx_length_errors++;
575e1a80002SHerbert Xu 				stats->rx_errors++;
576e1a80002SHerbert Xu 				goto drop;
577e1a80002SHerbert Xu 			}
578e1a80002SHerbert Xu 
579e1a80002SHerbert Xu 			iph = ip_hdr(skb);
580e1a80002SHerbert Xu 			skb->protocol = eth_type_trans(skb, tunnel->dev);
581e1a80002SHerbert Xu 			skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
582e1a80002SHerbert Xu 		}
583e1a80002SHerbert Xu 
584addd68ebSPavel Emelyanov 		stats->rx_packets++;
58564194c31SHerbert Xu 		stats->rx_bytes += len;
5861da177e4SLinus Torvalds 		skb->dev = tunnel->dev;
5871da177e4SLinus Torvalds 		dst_release(skb->dst);
5881da177e4SLinus Torvalds 		skb->dst = NULL;
5891da177e4SLinus Torvalds 		nf_reset(skb);
590e1a80002SHerbert Xu 
591e1a80002SHerbert Xu 		skb_reset_network_header(skb);
5921da177e4SLinus Torvalds 		ipgre_ecn_decapsulate(iph, skb);
593e1a80002SHerbert Xu 
5941da177e4SLinus Torvalds 		netif_rx(skb);
5951da177e4SLinus Torvalds 		read_unlock(&ipgre_lock);
5961da177e4SLinus Torvalds 		return(0);
5971da177e4SLinus Torvalds 	}
59845af08beSHerbert Xu 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
5991da177e4SLinus Torvalds 
6001da177e4SLinus Torvalds drop:
6011da177e4SLinus Torvalds 	read_unlock(&ipgre_lock);
6021da177e4SLinus Torvalds drop_nolock:
6031da177e4SLinus Torvalds 	kfree_skb(skb);
6041da177e4SLinus Torvalds 	return(0);
6051da177e4SLinus Torvalds }
6061da177e4SLinus Torvalds 
6071da177e4SLinus Torvalds static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
6081da177e4SLinus Torvalds {
6092941a486SPatrick McHardy 	struct ip_tunnel *tunnel = netdev_priv(dev);
610addd68ebSPavel Emelyanov 	struct net_device_stats *stats = &tunnel->dev->stats;
611eddc9ec5SArnaldo Carvalho de Melo 	struct iphdr  *old_iph = ip_hdr(skb);
6121da177e4SLinus Torvalds 	struct iphdr  *tiph;
6131da177e4SLinus Torvalds 	u8     tos;
614d5a0a1e3SAl Viro 	__be16 df;
6151da177e4SLinus Torvalds 	struct rtable *rt;     			/* Route to the other host */
6161da177e4SLinus Torvalds 	struct net_device *tdev;			/* Device to other host */
6171da177e4SLinus Torvalds 	struct iphdr  *iph;			/* Our new IP header */
618c2636b4dSChuck Lever 	unsigned int max_headroom;		/* The extra header space needed */
6191da177e4SLinus Torvalds 	int    gre_hlen;
620d5a0a1e3SAl Viro 	__be32 dst;
6211da177e4SLinus Torvalds 	int    mtu;
6221da177e4SLinus Torvalds 
6231da177e4SLinus Torvalds 	if (tunnel->recursion++) {
624addd68ebSPavel Emelyanov 		stats->collisions++;
6251da177e4SLinus Torvalds 		goto tx_error;
6261da177e4SLinus Torvalds 	}
6271da177e4SLinus Torvalds 
628e1a80002SHerbert Xu 	if (dev->type == ARPHRD_ETHER)
629e1a80002SHerbert Xu 		IPCB(skb)->flags = 0;
630e1a80002SHerbert Xu 
631e1a80002SHerbert Xu 	if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
6321da177e4SLinus Torvalds 		gre_hlen = 0;
6331da177e4SLinus Torvalds 		tiph = (struct iphdr *)skb->data;
6341da177e4SLinus Torvalds 	} else {
6351da177e4SLinus Torvalds 		gre_hlen = tunnel->hlen;
6361da177e4SLinus Torvalds 		tiph = &tunnel->parms.iph;
6371da177e4SLinus Torvalds 	}
6381da177e4SLinus Torvalds 
6391da177e4SLinus Torvalds 	if ((dst = tiph->daddr) == 0) {
6401da177e4SLinus Torvalds 		/* NBMA tunnel */
6411da177e4SLinus Torvalds 
6421da177e4SLinus Torvalds 		if (skb->dst == NULL) {
643addd68ebSPavel Emelyanov 			stats->tx_fifo_errors++;
6441da177e4SLinus Torvalds 			goto tx_error;
6451da177e4SLinus Torvalds 		}
6461da177e4SLinus Torvalds 
6471da177e4SLinus Torvalds 		if (skb->protocol == htons(ETH_P_IP)) {
648ee6b9673SEric Dumazet 			rt = skb->rtable;
6491da177e4SLinus Torvalds 			if ((dst = rt->rt_gateway) == 0)
6501da177e4SLinus Torvalds 				goto tx_error_icmp;
6511da177e4SLinus Torvalds 		}
6521da177e4SLinus Torvalds #ifdef CONFIG_IPV6
6531da177e4SLinus Torvalds 		else if (skb->protocol == htons(ETH_P_IPV6)) {
6541da177e4SLinus Torvalds 			struct in6_addr *addr6;
6551da177e4SLinus Torvalds 			int addr_type;
6561da177e4SLinus Torvalds 			struct neighbour *neigh = skb->dst->neighbour;
6571da177e4SLinus Torvalds 
6581da177e4SLinus Torvalds 			if (neigh == NULL)
6591da177e4SLinus Torvalds 				goto tx_error;
6601da177e4SLinus Torvalds 
6611da177e4SLinus Torvalds 			addr6 = (struct in6_addr *)&neigh->primary_key;
6621da177e4SLinus Torvalds 			addr_type = ipv6_addr_type(addr6);
6631da177e4SLinus Torvalds 
6641da177e4SLinus Torvalds 			if (addr_type == IPV6_ADDR_ANY) {
6650660e03fSArnaldo Carvalho de Melo 				addr6 = &ipv6_hdr(skb)->daddr;
6661da177e4SLinus Torvalds 				addr_type = ipv6_addr_type(addr6);
6671da177e4SLinus Torvalds 			}
6681da177e4SLinus Torvalds 
6691da177e4SLinus Torvalds 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
6701da177e4SLinus Torvalds 				goto tx_error_icmp;
6711da177e4SLinus Torvalds 
6721da177e4SLinus Torvalds 			dst = addr6->s6_addr32[3];
6731da177e4SLinus Torvalds 		}
6741da177e4SLinus Torvalds #endif
6751da177e4SLinus Torvalds 		else
6761da177e4SLinus Torvalds 			goto tx_error;
6771da177e4SLinus Torvalds 	}
6781da177e4SLinus Torvalds 
6791da177e4SLinus Torvalds 	tos = tiph->tos;
6801da177e4SLinus Torvalds 	if (tos&1) {
6811da177e4SLinus Torvalds 		if (skb->protocol == htons(ETH_P_IP))
6821da177e4SLinus Torvalds 			tos = old_iph->tos;
6831da177e4SLinus Torvalds 		tos &= ~1;
6841da177e4SLinus Torvalds 	}
6851da177e4SLinus Torvalds 
6861da177e4SLinus Torvalds 	{
6871da177e4SLinus Torvalds 		struct flowi fl = { .oif = tunnel->parms.link,
6881da177e4SLinus Torvalds 				    .nl_u = { .ip4_u =
6891da177e4SLinus Torvalds 					      { .daddr = dst,
6901da177e4SLinus Torvalds 						.saddr = tiph->saddr,
6911da177e4SLinus Torvalds 						.tos = RT_TOS(tos) } },
6921da177e4SLinus Torvalds 				    .proto = IPPROTO_GRE };
69396635522SPavel Emelyanov 		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
694addd68ebSPavel Emelyanov 			stats->tx_carrier_errors++;
6951da177e4SLinus Torvalds 			goto tx_error;
6961da177e4SLinus Torvalds 		}
6971da177e4SLinus Torvalds 	}
6981da177e4SLinus Torvalds 	tdev = rt->u.dst.dev;
6991da177e4SLinus Torvalds 
7001da177e4SLinus Torvalds 	if (tdev == dev) {
7011da177e4SLinus Torvalds 		ip_rt_put(rt);
702addd68ebSPavel Emelyanov 		stats->collisions++;
7031da177e4SLinus Torvalds 		goto tx_error;
7041da177e4SLinus Torvalds 	}
7051da177e4SLinus Torvalds 
7061da177e4SLinus Torvalds 	df = tiph->frag_off;
7071da177e4SLinus Torvalds 	if (df)
708c95b819aSHerbert Xu 		mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
7091da177e4SLinus Torvalds 	else
7101da177e4SLinus Torvalds 		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
7111da177e4SLinus Torvalds 
7121da177e4SLinus Torvalds 	if (skb->dst)
7131da177e4SLinus Torvalds 		skb->dst->ops->update_pmtu(skb->dst, mtu);
7141da177e4SLinus Torvalds 
7151da177e4SLinus Torvalds 	if (skb->protocol == htons(ETH_P_IP)) {
7161da177e4SLinus Torvalds 		df |= (old_iph->frag_off&htons(IP_DF));
7171da177e4SLinus Torvalds 
7181da177e4SLinus Torvalds 		if ((old_iph->frag_off&htons(IP_DF)) &&
7191da177e4SLinus Torvalds 		    mtu < ntohs(old_iph->tot_len)) {
7201da177e4SLinus Torvalds 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
7211da177e4SLinus Torvalds 			ip_rt_put(rt);
7221da177e4SLinus Torvalds 			goto tx_error;
7231da177e4SLinus Torvalds 		}
7241da177e4SLinus Torvalds 	}
7251da177e4SLinus Torvalds #ifdef CONFIG_IPV6
7261da177e4SLinus Torvalds 	else if (skb->protocol == htons(ETH_P_IPV6)) {
7271da177e4SLinus Torvalds 		struct rt6_info *rt6 = (struct rt6_info *)skb->dst;
7281da177e4SLinus Torvalds 
7291da177e4SLinus Torvalds 		if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
730f97c1e0cSJoe Perches 			if ((tunnel->parms.iph.daddr &&
731f97c1e0cSJoe Perches 			     !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
7321da177e4SLinus Torvalds 			    rt6->rt6i_dst.plen == 128) {
7331da177e4SLinus Torvalds 				rt6->rt6i_flags |= RTF_MODIFIED;
7341da177e4SLinus Torvalds 				skb->dst->metrics[RTAX_MTU-1] = mtu;
7351da177e4SLinus Torvalds 			}
7361da177e4SLinus Torvalds 		}
7371da177e4SLinus Torvalds 
7381da177e4SLinus Torvalds 		if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
7391da177e4SLinus Torvalds 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
7401da177e4SLinus Torvalds 			ip_rt_put(rt);
7411da177e4SLinus Torvalds 			goto tx_error;
7421da177e4SLinus Torvalds 		}
7431da177e4SLinus Torvalds 	}
7441da177e4SLinus Torvalds #endif
7451da177e4SLinus Torvalds 
7461da177e4SLinus Torvalds 	if (tunnel->err_count > 0) {
7471da177e4SLinus Torvalds 		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
7481da177e4SLinus Torvalds 			tunnel->err_count--;
7491da177e4SLinus Torvalds 
7501da177e4SLinus Torvalds 			dst_link_failure(skb);
7511da177e4SLinus Torvalds 		} else
7521da177e4SLinus Torvalds 			tunnel->err_count = 0;
7531da177e4SLinus Torvalds 	}
7541da177e4SLinus Torvalds 
7551da177e4SLinus Torvalds 	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
7561da177e4SLinus Torvalds 
757cfbba49dSPatrick McHardy 	if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
758cfbba49dSPatrick McHardy 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
7591da177e4SLinus Torvalds 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
7601da177e4SLinus Torvalds 		if (!new_skb) {
7611da177e4SLinus Torvalds 			ip_rt_put(rt);
7621da177e4SLinus Torvalds 			stats->tx_dropped++;
7631da177e4SLinus Torvalds 			dev_kfree_skb(skb);
7641da177e4SLinus Torvalds 			tunnel->recursion--;
7651da177e4SLinus Torvalds 			return 0;
7661da177e4SLinus Torvalds 		}
7671da177e4SLinus Torvalds 		if (skb->sk)
7681da177e4SLinus Torvalds 			skb_set_owner_w(new_skb, skb->sk);
7691da177e4SLinus Torvalds 		dev_kfree_skb(skb);
7701da177e4SLinus Torvalds 		skb = new_skb;
771eddc9ec5SArnaldo Carvalho de Melo 		old_iph = ip_hdr(skb);
7721da177e4SLinus Torvalds 	}
7731da177e4SLinus Torvalds 
77464194c31SHerbert Xu 	skb_reset_transport_header(skb);
775e2d1bca7SArnaldo Carvalho de Melo 	skb_push(skb, gre_hlen);
776e2d1bca7SArnaldo Carvalho de Melo 	skb_reset_network_header(skb);
7771da177e4SLinus Torvalds 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
77848d5cad8SPatrick McHardy 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
77948d5cad8SPatrick McHardy 			      IPSKB_REROUTED);
7801da177e4SLinus Torvalds 	dst_release(skb->dst);
7811da177e4SLinus Torvalds 	skb->dst = &rt->u.dst;
7821da177e4SLinus Torvalds 
7831da177e4SLinus Torvalds 	/*
7841da177e4SLinus Torvalds 	 *	Push down and install the IPIP header.
7851da177e4SLinus Torvalds 	 */
7861da177e4SLinus Torvalds 
787eddc9ec5SArnaldo Carvalho de Melo 	iph 			=	ip_hdr(skb);
7881da177e4SLinus Torvalds 	iph->version		=	4;
7891da177e4SLinus Torvalds 	iph->ihl		=	sizeof(struct iphdr) >> 2;
7901da177e4SLinus Torvalds 	iph->frag_off		=	df;
7911da177e4SLinus Torvalds 	iph->protocol		=	IPPROTO_GRE;
7921da177e4SLinus Torvalds 	iph->tos		=	ipgre_ecn_encapsulate(tos, old_iph, skb);
7931da177e4SLinus Torvalds 	iph->daddr		=	rt->rt_dst;
7941da177e4SLinus Torvalds 	iph->saddr		=	rt->rt_src;
7951da177e4SLinus Torvalds 
7961da177e4SLinus Torvalds 	if ((iph->ttl = tiph->ttl) == 0) {
7971da177e4SLinus Torvalds 		if (skb->protocol == htons(ETH_P_IP))
7981da177e4SLinus Torvalds 			iph->ttl = old_iph->ttl;
7991da177e4SLinus Torvalds #ifdef CONFIG_IPV6
8001da177e4SLinus Torvalds 		else if (skb->protocol == htons(ETH_P_IPV6))
8011da177e4SLinus Torvalds 			iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
8021da177e4SLinus Torvalds #endif
8031da177e4SLinus Torvalds 		else
8041da177e4SLinus Torvalds 			iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
8051da177e4SLinus Torvalds 	}
8061da177e4SLinus Torvalds 
807d5a0a1e3SAl Viro 	((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
808e1a80002SHerbert Xu 	((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
809e1a80002SHerbert Xu 				   htons(ETH_P_TEB) : skb->protocol;
8101da177e4SLinus Torvalds 
8111da177e4SLinus Torvalds 	if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
812d5a0a1e3SAl Viro 		__be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
8131da177e4SLinus Torvalds 
8141da177e4SLinus Torvalds 		if (tunnel->parms.o_flags&GRE_SEQ) {
8151da177e4SLinus Torvalds 			++tunnel->o_seqno;
8161da177e4SLinus Torvalds 			*ptr = htonl(tunnel->o_seqno);
8171da177e4SLinus Torvalds 			ptr--;
8181da177e4SLinus Torvalds 		}
8191da177e4SLinus Torvalds 		if (tunnel->parms.o_flags&GRE_KEY) {
8201da177e4SLinus Torvalds 			*ptr = tunnel->parms.o_key;
8211da177e4SLinus Torvalds 			ptr--;
8221da177e4SLinus Torvalds 		}
8231da177e4SLinus Torvalds 		if (tunnel->parms.o_flags&GRE_CSUM) {
8241da177e4SLinus Torvalds 			*ptr = 0;
8255f92a738SAl Viro 			*(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
8261da177e4SLinus Torvalds 		}
8271da177e4SLinus Torvalds 	}
8281da177e4SLinus Torvalds 
8291da177e4SLinus Torvalds 	nf_reset(skb);
8301da177e4SLinus Torvalds 
8311da177e4SLinus Torvalds 	IPTUNNEL_XMIT();
8321da177e4SLinus Torvalds 	tunnel->recursion--;
8331da177e4SLinus Torvalds 	return 0;
8341da177e4SLinus Torvalds 
8351da177e4SLinus Torvalds tx_error_icmp:
8361da177e4SLinus Torvalds 	dst_link_failure(skb);
8371da177e4SLinus Torvalds 
8381da177e4SLinus Torvalds tx_error:
8391da177e4SLinus Torvalds 	stats->tx_errors++;
8401da177e4SLinus Torvalds 	dev_kfree_skb(skb);
8411da177e4SLinus Torvalds 	tunnel->recursion--;
8421da177e4SLinus Torvalds 	return 0;
8431da177e4SLinus Torvalds }
8441da177e4SLinus Torvalds 
84542aa9162SHerbert Xu static int ipgre_tunnel_bind_dev(struct net_device *dev)
846ee34c1ebSMichal Schmidt {
847ee34c1ebSMichal Schmidt 	struct net_device *tdev = NULL;
848ee34c1ebSMichal Schmidt 	struct ip_tunnel *tunnel;
849ee34c1ebSMichal Schmidt 	struct iphdr *iph;
850ee34c1ebSMichal Schmidt 	int hlen = LL_MAX_HEADER;
851ee34c1ebSMichal Schmidt 	int mtu = ETH_DATA_LEN;
852ee34c1ebSMichal Schmidt 	int addend = sizeof(struct iphdr) + 4;
853ee34c1ebSMichal Schmidt 
854ee34c1ebSMichal Schmidt 	tunnel = netdev_priv(dev);
855ee34c1ebSMichal Schmidt 	iph = &tunnel->parms.iph;
856ee34c1ebSMichal Schmidt 
857c95b819aSHerbert Xu 	/* Guess output device to choose reasonable mtu and needed_headroom */
858ee34c1ebSMichal Schmidt 
859ee34c1ebSMichal Schmidt 	if (iph->daddr) {
860ee34c1ebSMichal Schmidt 		struct flowi fl = { .oif = tunnel->parms.link,
861ee34c1ebSMichal Schmidt 				    .nl_u = { .ip4_u =
862ee34c1ebSMichal Schmidt 					      { .daddr = iph->daddr,
863ee34c1ebSMichal Schmidt 						.saddr = iph->saddr,
864ee34c1ebSMichal Schmidt 						.tos = RT_TOS(iph->tos) } },
865ee34c1ebSMichal Schmidt 				    .proto = IPPROTO_GRE };
866ee34c1ebSMichal Schmidt 		struct rtable *rt;
86796635522SPavel Emelyanov 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
868ee34c1ebSMichal Schmidt 			tdev = rt->u.dst.dev;
869ee34c1ebSMichal Schmidt 			ip_rt_put(rt);
870ee34c1ebSMichal Schmidt 		}
871e1a80002SHerbert Xu 
872e1a80002SHerbert Xu 		if (dev->type != ARPHRD_ETHER)
873ee34c1ebSMichal Schmidt 			dev->flags |= IFF_POINTOPOINT;
874ee34c1ebSMichal Schmidt 	}
875ee34c1ebSMichal Schmidt 
876ee34c1ebSMichal Schmidt 	if (!tdev && tunnel->parms.link)
87796635522SPavel Emelyanov 		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
878ee34c1ebSMichal Schmidt 
879ee34c1ebSMichal Schmidt 	if (tdev) {
880c95b819aSHerbert Xu 		hlen = tdev->hard_header_len + tdev->needed_headroom;
881ee34c1ebSMichal Schmidt 		mtu = tdev->mtu;
882ee34c1ebSMichal Schmidt 	}
883ee34c1ebSMichal Schmidt 	dev->iflink = tunnel->parms.link;
884ee34c1ebSMichal Schmidt 
885ee34c1ebSMichal Schmidt 	/* Precalculate GRE options length */
886ee34c1ebSMichal Schmidt 	if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
887ee34c1ebSMichal Schmidt 		if (tunnel->parms.o_flags&GRE_CSUM)
888ee34c1ebSMichal Schmidt 			addend += 4;
889ee34c1ebSMichal Schmidt 		if (tunnel->parms.o_flags&GRE_KEY)
890ee34c1ebSMichal Schmidt 			addend += 4;
891ee34c1ebSMichal Schmidt 		if (tunnel->parms.o_flags&GRE_SEQ)
892ee34c1ebSMichal Schmidt 			addend += 4;
893ee34c1ebSMichal Schmidt 	}
894c95b819aSHerbert Xu 	dev->needed_headroom = addend + hlen;
89542aa9162SHerbert Xu 	mtu -= dev->hard_header_len - addend;
89642aa9162SHerbert Xu 
89742aa9162SHerbert Xu 	if (mtu < 68)
89842aa9162SHerbert Xu 		mtu = 68;
89942aa9162SHerbert Xu 
900ee34c1ebSMichal Schmidt 	tunnel->hlen = addend;
901ee34c1ebSMichal Schmidt 
90242aa9162SHerbert Xu 	return mtu;
903ee34c1ebSMichal Schmidt }
904ee34c1ebSMichal Schmidt 
9051da177e4SLinus Torvalds static int
9061da177e4SLinus Torvalds ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
9071da177e4SLinus Torvalds {
9081da177e4SLinus Torvalds 	int err = 0;
9091da177e4SLinus Torvalds 	struct ip_tunnel_parm p;
9101da177e4SLinus Torvalds 	struct ip_tunnel *t;
911f57e7d5aSPavel Emelyanov 	struct net *net = dev_net(dev);
912f57e7d5aSPavel Emelyanov 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
9131da177e4SLinus Torvalds 
9141da177e4SLinus Torvalds 	switch (cmd) {
9151da177e4SLinus Torvalds 	case SIOCGETTUNNEL:
9161da177e4SLinus Torvalds 		t = NULL;
9177daa0004SPavel Emelyanov 		if (dev == ign->fb_tunnel_dev) {
9181da177e4SLinus Torvalds 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
9191da177e4SLinus Torvalds 				err = -EFAULT;
9201da177e4SLinus Torvalds 				break;
9211da177e4SLinus Torvalds 			}
922f57e7d5aSPavel Emelyanov 			t = ipgre_tunnel_locate(net, &p, 0);
9231da177e4SLinus Torvalds 		}
9241da177e4SLinus Torvalds 		if (t == NULL)
9252941a486SPatrick McHardy 			t = netdev_priv(dev);
9261da177e4SLinus Torvalds 		memcpy(&p, &t->parms, sizeof(p));
9271da177e4SLinus Torvalds 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
9281da177e4SLinus Torvalds 			err = -EFAULT;
9291da177e4SLinus Torvalds 		break;
9301da177e4SLinus Torvalds 
9311da177e4SLinus Torvalds 	case SIOCADDTUNNEL:
9321da177e4SLinus Torvalds 	case SIOCCHGTUNNEL:
9331da177e4SLinus Torvalds 		err = -EPERM;
9341da177e4SLinus Torvalds 		if (!capable(CAP_NET_ADMIN))
9351da177e4SLinus Torvalds 			goto done;
9361da177e4SLinus Torvalds 
9371da177e4SLinus Torvalds 		err = -EFAULT;
9381da177e4SLinus Torvalds 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
9391da177e4SLinus Torvalds 			goto done;
9401da177e4SLinus Torvalds 
9411da177e4SLinus Torvalds 		err = -EINVAL;
9421da177e4SLinus Torvalds 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
9431da177e4SLinus Torvalds 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
9441da177e4SLinus Torvalds 		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
9451da177e4SLinus Torvalds 			goto done;
9461da177e4SLinus Torvalds 		if (p.iph.ttl)
9471da177e4SLinus Torvalds 			p.iph.frag_off |= htons(IP_DF);
9481da177e4SLinus Torvalds 
9491da177e4SLinus Torvalds 		if (!(p.i_flags&GRE_KEY))
9501da177e4SLinus Torvalds 			p.i_key = 0;
9511da177e4SLinus Torvalds 		if (!(p.o_flags&GRE_KEY))
9521da177e4SLinus Torvalds 			p.o_key = 0;
9531da177e4SLinus Torvalds 
954f57e7d5aSPavel Emelyanov 		t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
9551da177e4SLinus Torvalds 
9567daa0004SPavel Emelyanov 		if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
9571da177e4SLinus Torvalds 			if (t != NULL) {
9581da177e4SLinus Torvalds 				if (t->dev != dev) {
9591da177e4SLinus Torvalds 					err = -EEXIST;
9601da177e4SLinus Torvalds 					break;
9611da177e4SLinus Torvalds 				}
9621da177e4SLinus Torvalds 			} else {
9631da177e4SLinus Torvalds 				unsigned nflags = 0;
9641da177e4SLinus Torvalds 
9652941a486SPatrick McHardy 				t = netdev_priv(dev);
9661da177e4SLinus Torvalds 
967f97c1e0cSJoe Perches 				if (ipv4_is_multicast(p.iph.daddr))
9681da177e4SLinus Torvalds 					nflags = IFF_BROADCAST;
9691da177e4SLinus Torvalds 				else if (p.iph.daddr)
9701da177e4SLinus Torvalds 					nflags = IFF_POINTOPOINT;
9711da177e4SLinus Torvalds 
9721da177e4SLinus Torvalds 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
9731da177e4SLinus Torvalds 					err = -EINVAL;
9741da177e4SLinus Torvalds 					break;
9751da177e4SLinus Torvalds 				}
976f57e7d5aSPavel Emelyanov 				ipgre_tunnel_unlink(ign, t);
9771da177e4SLinus Torvalds 				t->parms.iph.saddr = p.iph.saddr;
9781da177e4SLinus Torvalds 				t->parms.iph.daddr = p.iph.daddr;
9791da177e4SLinus Torvalds 				t->parms.i_key = p.i_key;
9801da177e4SLinus Torvalds 				t->parms.o_key = p.o_key;
9811da177e4SLinus Torvalds 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
9821da177e4SLinus Torvalds 				memcpy(dev->broadcast, &p.iph.daddr, 4);
983f57e7d5aSPavel Emelyanov 				ipgre_tunnel_link(ign, t);
9841da177e4SLinus Torvalds 				netdev_state_change(dev);
9851da177e4SLinus Torvalds 			}
9861da177e4SLinus Torvalds 		}
9871da177e4SLinus Torvalds 
9881da177e4SLinus Torvalds 		if (t) {
9891da177e4SLinus Torvalds 			err = 0;
9901da177e4SLinus Torvalds 			if (cmd == SIOCCHGTUNNEL) {
9911da177e4SLinus Torvalds 				t->parms.iph.ttl = p.iph.ttl;
9921da177e4SLinus Torvalds 				t->parms.iph.tos = p.iph.tos;
9931da177e4SLinus Torvalds 				t->parms.iph.frag_off = p.iph.frag_off;
994ee34c1ebSMichal Schmidt 				if (t->parms.link != p.link) {
995ee34c1ebSMichal Schmidt 					t->parms.link = p.link;
99642aa9162SHerbert Xu 					dev->mtu = ipgre_tunnel_bind_dev(dev);
997ee34c1ebSMichal Schmidt 					netdev_state_change(dev);
998ee34c1ebSMichal Schmidt 				}
9991da177e4SLinus Torvalds 			}
10001da177e4SLinus Torvalds 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
10011da177e4SLinus Torvalds 				err = -EFAULT;
10021da177e4SLinus Torvalds 		} else
10031da177e4SLinus Torvalds 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
10041da177e4SLinus Torvalds 		break;
10051da177e4SLinus Torvalds 
10061da177e4SLinus Torvalds 	case SIOCDELTUNNEL:
10071da177e4SLinus Torvalds 		err = -EPERM;
10081da177e4SLinus Torvalds 		if (!capable(CAP_NET_ADMIN))
10091da177e4SLinus Torvalds 			goto done;
10101da177e4SLinus Torvalds 
10117daa0004SPavel Emelyanov 		if (dev == ign->fb_tunnel_dev) {
10121da177e4SLinus Torvalds 			err = -EFAULT;
10131da177e4SLinus Torvalds 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
10141da177e4SLinus Torvalds 				goto done;
10151da177e4SLinus Torvalds 			err = -ENOENT;
1016f57e7d5aSPavel Emelyanov 			if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
10171da177e4SLinus Torvalds 				goto done;
10181da177e4SLinus Torvalds 			err = -EPERM;
10197daa0004SPavel Emelyanov 			if (t == netdev_priv(ign->fb_tunnel_dev))
10201da177e4SLinus Torvalds 				goto done;
10211da177e4SLinus Torvalds 			dev = t->dev;
10221da177e4SLinus Torvalds 		}
102322f8cde5SStephen Hemminger 		unregister_netdevice(dev);
102422f8cde5SStephen Hemminger 		err = 0;
10251da177e4SLinus Torvalds 		break;
10261da177e4SLinus Torvalds 
10271da177e4SLinus Torvalds 	default:
10281da177e4SLinus Torvalds 		err = -EINVAL;
10291da177e4SLinus Torvalds 	}
10301da177e4SLinus Torvalds 
10311da177e4SLinus Torvalds done:
10321da177e4SLinus Torvalds 	return err;
10331da177e4SLinus Torvalds }
10341da177e4SLinus Torvalds 
10351da177e4SLinus Torvalds static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
10361da177e4SLinus Torvalds {
10372941a486SPatrick McHardy 	struct ip_tunnel *tunnel = netdev_priv(dev);
1038c95b819aSHerbert Xu 	if (new_mtu < 68 ||
1039c95b819aSHerbert Xu 	    new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
10401da177e4SLinus Torvalds 		return -EINVAL;
10411da177e4SLinus Torvalds 	dev->mtu = new_mtu;
10421da177e4SLinus Torvalds 	return 0;
10431da177e4SLinus Torvalds }
10441da177e4SLinus Torvalds 
10451da177e4SLinus Torvalds /* Nice toy. Unfortunately, useless in real life :-)
10461da177e4SLinus Torvalds    It allows to construct virtual multiprotocol broadcast "LAN"
10471da177e4SLinus Torvalds    over the Internet, provided multicast routing is tuned.
10481da177e4SLinus Torvalds 
10491da177e4SLinus Torvalds 
10501da177e4SLinus Torvalds    I have no idea was this bicycle invented before me,
10511da177e4SLinus Torvalds    so that I had to set ARPHRD_IPGRE to a random value.
10521da177e4SLinus Torvalds    I have an impression, that Cisco could make something similar,
10531da177e4SLinus Torvalds    but this feature is apparently missing in IOS<=11.2(8).
10541da177e4SLinus Torvalds 
10551da177e4SLinus Torvalds    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
10561da177e4SLinus Torvalds    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
10571da177e4SLinus Torvalds 
10581da177e4SLinus Torvalds    ping -t 255 224.66.66.66
10591da177e4SLinus Torvalds 
10601da177e4SLinus Torvalds    If nobody answers, mbone does not work.
10611da177e4SLinus Torvalds 
10621da177e4SLinus Torvalds    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
10631da177e4SLinus Torvalds    ip addr add 10.66.66.<somewhat>/24 dev Universe
10641da177e4SLinus Torvalds    ifconfig Universe up
10651da177e4SLinus Torvalds    ifconfig Universe add fe80::<Your_real_addr>/10
10661da177e4SLinus Torvalds    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
10671da177e4SLinus Torvalds    ftp 10.66.66.66
10681da177e4SLinus Torvalds    ...
10691da177e4SLinus Torvalds    ftp fec0:6666:6666::193.233.7.65
10701da177e4SLinus Torvalds    ...
10711da177e4SLinus Torvalds 
10721da177e4SLinus Torvalds  */
10731da177e4SLinus Torvalds 
10743b04dddeSStephen Hemminger static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
10753b04dddeSStephen Hemminger 			unsigned short type,
10763b04dddeSStephen Hemminger 			const void *daddr, const void *saddr, unsigned len)
10771da177e4SLinus Torvalds {
10782941a486SPatrick McHardy 	struct ip_tunnel *t = netdev_priv(dev);
10791da177e4SLinus Torvalds 	struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1080d5a0a1e3SAl Viro 	__be16 *p = (__be16*)(iph+1);
10811da177e4SLinus Torvalds 
10821da177e4SLinus Torvalds 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
10831da177e4SLinus Torvalds 	p[0]		= t->parms.o_flags;
10841da177e4SLinus Torvalds 	p[1]		= htons(type);
10851da177e4SLinus Torvalds 
10861da177e4SLinus Torvalds 	/*
10871da177e4SLinus Torvalds 	 *	Set the source hardware address.
10881da177e4SLinus Torvalds 	 */
10891da177e4SLinus Torvalds 
10901da177e4SLinus Torvalds 	if (saddr)
10911da177e4SLinus Torvalds 		memcpy(&iph->saddr, saddr, 4);
10921da177e4SLinus Torvalds 
10931da177e4SLinus Torvalds 	if (daddr) {
10941da177e4SLinus Torvalds 		memcpy(&iph->daddr, daddr, 4);
10951da177e4SLinus Torvalds 		return t->hlen;
10961da177e4SLinus Torvalds 	}
1097f97c1e0cSJoe Perches 	if (iph->daddr && !ipv4_is_multicast(iph->daddr))
10981da177e4SLinus Torvalds 		return t->hlen;
10991da177e4SLinus Torvalds 
11001da177e4SLinus Torvalds 	return -t->hlen;
11011da177e4SLinus Torvalds }
11021da177e4SLinus Torvalds 
11036a5f44d7STimo Teras static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
11046a5f44d7STimo Teras {
11056a5f44d7STimo Teras 	struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
11066a5f44d7STimo Teras 	memcpy(haddr, &iph->saddr, 4);
11076a5f44d7STimo Teras 	return 4;
11086a5f44d7STimo Teras }
11096a5f44d7STimo Teras 
11103b04dddeSStephen Hemminger static const struct header_ops ipgre_header_ops = {
11113b04dddeSStephen Hemminger 	.create	= ipgre_header,
11126a5f44d7STimo Teras 	.parse	= ipgre_header_parse,
11133b04dddeSStephen Hemminger };
11143b04dddeSStephen Hemminger 
11156a5f44d7STimo Teras #ifdef CONFIG_NET_IPGRE_BROADCAST
11161da177e4SLinus Torvalds static int ipgre_open(struct net_device *dev)
11171da177e4SLinus Torvalds {
11182941a486SPatrick McHardy 	struct ip_tunnel *t = netdev_priv(dev);
11191da177e4SLinus Torvalds 
1120f97c1e0cSJoe Perches 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
11211da177e4SLinus Torvalds 		struct flowi fl = { .oif = t->parms.link,
11221da177e4SLinus Torvalds 				    .nl_u = { .ip4_u =
11231da177e4SLinus Torvalds 					      { .daddr = t->parms.iph.daddr,
11241da177e4SLinus Torvalds 						.saddr = t->parms.iph.saddr,
11251da177e4SLinus Torvalds 						.tos = RT_TOS(t->parms.iph.tos) } },
11261da177e4SLinus Torvalds 				    .proto = IPPROTO_GRE };
11271da177e4SLinus Torvalds 		struct rtable *rt;
112896635522SPavel Emelyanov 		if (ip_route_output_key(dev_net(dev), &rt, &fl))
11291da177e4SLinus Torvalds 			return -EADDRNOTAVAIL;
11301da177e4SLinus Torvalds 		dev = rt->u.dst.dev;
11311da177e4SLinus Torvalds 		ip_rt_put(rt);
1132e5ed6399SHerbert Xu 		if (__in_dev_get_rtnl(dev) == NULL)
11331da177e4SLinus Torvalds 			return -EADDRNOTAVAIL;
11341da177e4SLinus Torvalds 		t->mlink = dev->ifindex;
1135e5ed6399SHerbert Xu 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
11361da177e4SLinus Torvalds 	}
11371da177e4SLinus Torvalds 	return 0;
11381da177e4SLinus Torvalds }
11391da177e4SLinus Torvalds 
11401da177e4SLinus Torvalds static int ipgre_close(struct net_device *dev)
11411da177e4SLinus Torvalds {
11422941a486SPatrick McHardy 	struct ip_tunnel *t = netdev_priv(dev);
1143*b8c26a33SStephen Hemminger 
1144f97c1e0cSJoe Perches 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
11457fee0ca2SDenis V. Lunev 		struct in_device *in_dev;
1146c346dca1SYOSHIFUJI Hideaki 		in_dev = inetdev_by_index(dev_net(dev), t->mlink);
11471da177e4SLinus Torvalds 		if (in_dev) {
11481da177e4SLinus Torvalds 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
11491da177e4SLinus Torvalds 			in_dev_put(in_dev);
11501da177e4SLinus Torvalds 		}
11511da177e4SLinus Torvalds 	}
11521da177e4SLinus Torvalds 	return 0;
11531da177e4SLinus Torvalds }
11541da177e4SLinus Torvalds 
11551da177e4SLinus Torvalds #endif
11561da177e4SLinus Torvalds 
1157*b8c26a33SStephen Hemminger static const struct net_device_ops ipgre_netdev_ops = {
1158*b8c26a33SStephen Hemminger 	.ndo_init		= ipgre_tunnel_init,
1159*b8c26a33SStephen Hemminger 	.ndo_uninit		= ipgre_tunnel_uninit,
1160*b8c26a33SStephen Hemminger #ifdef CONFIG_NET_IPGRE_BROADCAST
1161*b8c26a33SStephen Hemminger 	.ndo_open		= ipgre_open,
1162*b8c26a33SStephen Hemminger 	.ndo_stop		= ipgre_close,
1163*b8c26a33SStephen Hemminger #endif
1164*b8c26a33SStephen Hemminger 	.ndo_start_xmit		= ipgre_tunnel_xmit,
1165*b8c26a33SStephen Hemminger 	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
1166*b8c26a33SStephen Hemminger 	.ndo_change_mtu		= ipgre_tunnel_change_mtu,
1167*b8c26a33SStephen Hemminger };
1168*b8c26a33SStephen Hemminger 
11691da177e4SLinus Torvalds static void ipgre_tunnel_setup(struct net_device *dev)
11701da177e4SLinus Torvalds {
1171*b8c26a33SStephen Hemminger 	dev->netdev_ops		= &ipgre_netdev_ops;
11721da177e4SLinus Torvalds 	dev->destructor 	= free_netdev;
11731da177e4SLinus Torvalds 
11741da177e4SLinus Torvalds 	dev->type		= ARPHRD_IPGRE;
1175c95b819aSHerbert Xu 	dev->needed_headroom 	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
117646f25dffSKris Katterjohn 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4;
11771da177e4SLinus Torvalds 	dev->flags		= IFF_NOARP;
11781da177e4SLinus Torvalds 	dev->iflink		= 0;
11791da177e4SLinus Torvalds 	dev->addr_len		= 4;
11800b67ecebSPavel Emelyanov 	dev->features		|= NETIF_F_NETNS_LOCAL;
11811da177e4SLinus Torvalds }
11821da177e4SLinus Torvalds 
11831da177e4SLinus Torvalds static int ipgre_tunnel_init(struct net_device *dev)
11841da177e4SLinus Torvalds {
11851da177e4SLinus Torvalds 	struct ip_tunnel *tunnel;
11861da177e4SLinus Torvalds 	struct iphdr *iph;
11871da177e4SLinus Torvalds 
11882941a486SPatrick McHardy 	tunnel = netdev_priv(dev);
11891da177e4SLinus Torvalds 	iph = &tunnel->parms.iph;
11901da177e4SLinus Torvalds 
11911da177e4SLinus Torvalds 	tunnel->dev = dev;
11921da177e4SLinus Torvalds 	strcpy(tunnel->parms.name, dev->name);
11931da177e4SLinus Torvalds 
11941da177e4SLinus Torvalds 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
11951da177e4SLinus Torvalds 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
11961da177e4SLinus Torvalds 
11971da177e4SLinus Torvalds 	if (iph->daddr) {
11981da177e4SLinus Torvalds #ifdef CONFIG_NET_IPGRE_BROADCAST
1199f97c1e0cSJoe Perches 		if (ipv4_is_multicast(iph->daddr)) {
12001da177e4SLinus Torvalds 			if (!iph->saddr)
12011da177e4SLinus Torvalds 				return -EINVAL;
12021da177e4SLinus Torvalds 			dev->flags = IFF_BROADCAST;
12033b04dddeSStephen Hemminger 			dev->header_ops = &ipgre_header_ops;
12041da177e4SLinus Torvalds 		}
12051da177e4SLinus Torvalds #endif
1206ee34c1ebSMichal Schmidt 	} else
12076a5f44d7STimo Teras 		dev->header_ops = &ipgre_header_ops;
12081da177e4SLinus Torvalds 
12091da177e4SLinus Torvalds 	return 0;
12101da177e4SLinus Torvalds }
12111da177e4SLinus Torvalds 
1212*b8c26a33SStephen Hemminger static void ipgre_fb_tunnel_init(struct net_device *dev)
12131da177e4SLinus Torvalds {
12142941a486SPatrick McHardy 	struct ip_tunnel *tunnel = netdev_priv(dev);
12151da177e4SLinus Torvalds 	struct iphdr *iph = &tunnel->parms.iph;
1216eb8ce741SPavel Emelyanov 	struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
12171da177e4SLinus Torvalds 
12181da177e4SLinus Torvalds 	tunnel->dev = dev;
12191da177e4SLinus Torvalds 	strcpy(tunnel->parms.name, dev->name);
12201da177e4SLinus Torvalds 
12211da177e4SLinus Torvalds 	iph->version		= 4;
12221da177e4SLinus Torvalds 	iph->protocol		= IPPROTO_GRE;
12231da177e4SLinus Torvalds 	iph->ihl		= 5;
12241da177e4SLinus Torvalds 	tunnel->hlen		= sizeof(struct iphdr) + 4;
12251da177e4SLinus Torvalds 
12261da177e4SLinus Torvalds 	dev_hold(dev);
1227eb8ce741SPavel Emelyanov 	ign->tunnels_wc[0]	= tunnel;
12281da177e4SLinus Torvalds }
12291da177e4SLinus Torvalds 
12301da177e4SLinus Torvalds 
12311da177e4SLinus Torvalds static struct net_protocol ipgre_protocol = {
12321da177e4SLinus Torvalds 	.handler	=	ipgre_rcv,
12331da177e4SLinus Torvalds 	.err_handler	=	ipgre_err,
1234f96c148fSPavel Emelyanov 	.netns_ok	=	1,
12351da177e4SLinus Torvalds };
12361da177e4SLinus Torvalds 
1237eb8ce741SPavel Emelyanov static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1238eb8ce741SPavel Emelyanov {
1239eb8ce741SPavel Emelyanov 	int prio;
1240eb8ce741SPavel Emelyanov 
1241eb8ce741SPavel Emelyanov 	for (prio = 0; prio < 4; prio++) {
1242eb8ce741SPavel Emelyanov 		int h;
1243eb8ce741SPavel Emelyanov 		for (h = 0; h < HASH_SIZE; h++) {
1244eb8ce741SPavel Emelyanov 			struct ip_tunnel *t;
1245eb8ce741SPavel Emelyanov 			while ((t = ign->tunnels[prio][h]) != NULL)
1246eb8ce741SPavel Emelyanov 				unregister_netdevice(t->dev);
1247eb8ce741SPavel Emelyanov 		}
1248eb8ce741SPavel Emelyanov 	}
1249eb8ce741SPavel Emelyanov }
1250eb8ce741SPavel Emelyanov 
125159a4c759SPavel Emelyanov static int ipgre_init_net(struct net *net)
125259a4c759SPavel Emelyanov {
125359a4c759SPavel Emelyanov 	int err;
125459a4c759SPavel Emelyanov 	struct ipgre_net *ign;
125559a4c759SPavel Emelyanov 
125659a4c759SPavel Emelyanov 	err = -ENOMEM;
1257eb8ce741SPavel Emelyanov 	ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
125859a4c759SPavel Emelyanov 	if (ign == NULL)
125959a4c759SPavel Emelyanov 		goto err_alloc;
126059a4c759SPavel Emelyanov 
126159a4c759SPavel Emelyanov 	err = net_assign_generic(net, ipgre_net_id, ign);
126259a4c759SPavel Emelyanov 	if (err < 0)
126359a4c759SPavel Emelyanov 		goto err_assign;
126459a4c759SPavel Emelyanov 
12657daa0004SPavel Emelyanov 	ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
12667daa0004SPavel Emelyanov 					   ipgre_tunnel_setup);
12677daa0004SPavel Emelyanov 	if (!ign->fb_tunnel_dev) {
12687daa0004SPavel Emelyanov 		err = -ENOMEM;
12697daa0004SPavel Emelyanov 		goto err_alloc_dev;
12707daa0004SPavel Emelyanov 	}
12717daa0004SPavel Emelyanov 
1272*b8c26a33SStephen Hemminger 	ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
12737daa0004SPavel Emelyanov 	dev_net_set(ign->fb_tunnel_dev, net);
1274c19e654dSHerbert Xu 	ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
12757daa0004SPavel Emelyanov 
12767daa0004SPavel Emelyanov 	if ((err = register_netdev(ign->fb_tunnel_dev)))
12777daa0004SPavel Emelyanov 		goto err_reg_dev;
12787daa0004SPavel Emelyanov 
127959a4c759SPavel Emelyanov 	return 0;
128059a4c759SPavel Emelyanov 
12817daa0004SPavel Emelyanov err_reg_dev:
12827daa0004SPavel Emelyanov 	free_netdev(ign->fb_tunnel_dev);
12837daa0004SPavel Emelyanov err_alloc_dev:
12847daa0004SPavel Emelyanov 	/* nothing */
128559a4c759SPavel Emelyanov err_assign:
128659a4c759SPavel Emelyanov 	kfree(ign);
128759a4c759SPavel Emelyanov err_alloc:
128859a4c759SPavel Emelyanov 	return err;
128959a4c759SPavel Emelyanov }
129059a4c759SPavel Emelyanov 
129159a4c759SPavel Emelyanov static void ipgre_exit_net(struct net *net)
129259a4c759SPavel Emelyanov {
129359a4c759SPavel Emelyanov 	struct ipgre_net *ign;
129459a4c759SPavel Emelyanov 
129559a4c759SPavel Emelyanov 	ign = net_generic(net, ipgre_net_id);
12967daa0004SPavel Emelyanov 	rtnl_lock();
1297eb8ce741SPavel Emelyanov 	ipgre_destroy_tunnels(ign);
12987daa0004SPavel Emelyanov 	rtnl_unlock();
129959a4c759SPavel Emelyanov 	kfree(ign);
130059a4c759SPavel Emelyanov }
130159a4c759SPavel Emelyanov 
130259a4c759SPavel Emelyanov static struct pernet_operations ipgre_net_ops = {
130359a4c759SPavel Emelyanov 	.init = ipgre_init_net,
130459a4c759SPavel Emelyanov 	.exit = ipgre_exit_net,
130559a4c759SPavel Emelyanov };
13061da177e4SLinus Torvalds 
1307c19e654dSHerbert Xu static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1308c19e654dSHerbert Xu {
1309c19e654dSHerbert Xu 	__be16 flags;
1310c19e654dSHerbert Xu 
1311c19e654dSHerbert Xu 	if (!data)
1312c19e654dSHerbert Xu 		return 0;
1313c19e654dSHerbert Xu 
1314c19e654dSHerbert Xu 	flags = 0;
1315c19e654dSHerbert Xu 	if (data[IFLA_GRE_IFLAGS])
1316c19e654dSHerbert Xu 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1317c19e654dSHerbert Xu 	if (data[IFLA_GRE_OFLAGS])
1318c19e654dSHerbert Xu 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1319c19e654dSHerbert Xu 	if (flags & (GRE_VERSION|GRE_ROUTING))
1320c19e654dSHerbert Xu 		return -EINVAL;
1321c19e654dSHerbert Xu 
1322c19e654dSHerbert Xu 	return 0;
1323c19e654dSHerbert Xu }
1324c19e654dSHerbert Xu 
1325e1a80002SHerbert Xu static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1326e1a80002SHerbert Xu {
1327e1a80002SHerbert Xu 	__be32 daddr;
1328e1a80002SHerbert Xu 
1329e1a80002SHerbert Xu 	if (tb[IFLA_ADDRESS]) {
1330e1a80002SHerbert Xu 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1331e1a80002SHerbert Xu 			return -EINVAL;
1332e1a80002SHerbert Xu 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1333e1a80002SHerbert Xu 			return -EADDRNOTAVAIL;
1334e1a80002SHerbert Xu 	}
1335e1a80002SHerbert Xu 
1336e1a80002SHerbert Xu 	if (!data)
1337e1a80002SHerbert Xu 		goto out;
1338e1a80002SHerbert Xu 
1339e1a80002SHerbert Xu 	if (data[IFLA_GRE_REMOTE]) {
1340e1a80002SHerbert Xu 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1341e1a80002SHerbert Xu 		if (!daddr)
1342e1a80002SHerbert Xu 			return -EINVAL;
1343e1a80002SHerbert Xu 	}
1344e1a80002SHerbert Xu 
1345e1a80002SHerbert Xu out:
1346e1a80002SHerbert Xu 	return ipgre_tunnel_validate(tb, data);
1347e1a80002SHerbert Xu }
1348e1a80002SHerbert Xu 
1349c19e654dSHerbert Xu static void ipgre_netlink_parms(struct nlattr *data[],
1350c19e654dSHerbert Xu 				struct ip_tunnel_parm *parms)
1351c19e654dSHerbert Xu {
13527bb82d92SHerbert Xu 	memset(parms, 0, sizeof(*parms));
1353c19e654dSHerbert Xu 
1354c19e654dSHerbert Xu 	parms->iph.protocol = IPPROTO_GRE;
1355c19e654dSHerbert Xu 
1356c19e654dSHerbert Xu 	if (!data)
1357c19e654dSHerbert Xu 		return;
1358c19e654dSHerbert Xu 
1359c19e654dSHerbert Xu 	if (data[IFLA_GRE_LINK])
1360c19e654dSHerbert Xu 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1361c19e654dSHerbert Xu 
1362c19e654dSHerbert Xu 	if (data[IFLA_GRE_IFLAGS])
1363c19e654dSHerbert Xu 		parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1364c19e654dSHerbert Xu 
1365c19e654dSHerbert Xu 	if (data[IFLA_GRE_OFLAGS])
1366c19e654dSHerbert Xu 		parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1367c19e654dSHerbert Xu 
1368c19e654dSHerbert Xu 	if (data[IFLA_GRE_IKEY])
1369c19e654dSHerbert Xu 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1370c19e654dSHerbert Xu 
1371c19e654dSHerbert Xu 	if (data[IFLA_GRE_OKEY])
1372c19e654dSHerbert Xu 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1373c19e654dSHerbert Xu 
1374c19e654dSHerbert Xu 	if (data[IFLA_GRE_LOCAL])
13754d74f8baSPatrick McHardy 		parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1376c19e654dSHerbert Xu 
1377c19e654dSHerbert Xu 	if (data[IFLA_GRE_REMOTE])
13784d74f8baSPatrick McHardy 		parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1379c19e654dSHerbert Xu 
1380c19e654dSHerbert Xu 	if (data[IFLA_GRE_TTL])
1381c19e654dSHerbert Xu 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1382c19e654dSHerbert Xu 
1383c19e654dSHerbert Xu 	if (data[IFLA_GRE_TOS])
1384c19e654dSHerbert Xu 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1385c19e654dSHerbert Xu 
1386c19e654dSHerbert Xu 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1387c19e654dSHerbert Xu 		parms->iph.frag_off = htons(IP_DF);
1388c19e654dSHerbert Xu }
1389c19e654dSHerbert Xu 
1390e1a80002SHerbert Xu static int ipgre_tap_init(struct net_device *dev)
1391e1a80002SHerbert Xu {
1392e1a80002SHerbert Xu 	struct ip_tunnel *tunnel;
1393e1a80002SHerbert Xu 
1394e1a80002SHerbert Xu 	tunnel = netdev_priv(dev);
1395e1a80002SHerbert Xu 
1396e1a80002SHerbert Xu 	tunnel->dev = dev;
1397e1a80002SHerbert Xu 	strcpy(tunnel->parms.name, dev->name);
1398e1a80002SHerbert Xu 
1399e1a80002SHerbert Xu 	ipgre_tunnel_bind_dev(dev);
1400e1a80002SHerbert Xu 
1401e1a80002SHerbert Xu 	return 0;
1402e1a80002SHerbert Xu }
1403e1a80002SHerbert Xu 
1404*b8c26a33SStephen Hemminger static const struct net_device_ops ipgre_tap_netdev_ops = {
1405*b8c26a33SStephen Hemminger 	.ndo_init		= ipgre_tap_init,
1406*b8c26a33SStephen Hemminger 	.ndo_uninit		= ipgre_tunnel_uninit,
1407*b8c26a33SStephen Hemminger 	.ndo_start_xmit		= ipgre_tunnel_xmit,
1408*b8c26a33SStephen Hemminger 	.ndo_set_mac_address 	= eth_mac_addr,
1409*b8c26a33SStephen Hemminger 	.ndo_validate_addr	= eth_validate_addr,
1410*b8c26a33SStephen Hemminger 	.ndo_change_mtu		= ipgre_tunnel_change_mtu,
1411*b8c26a33SStephen Hemminger };
1412*b8c26a33SStephen Hemminger 
1413e1a80002SHerbert Xu static void ipgre_tap_setup(struct net_device *dev)
1414e1a80002SHerbert Xu {
1415e1a80002SHerbert Xu 
1416e1a80002SHerbert Xu 	ether_setup(dev);
1417e1a80002SHerbert Xu 
1418*b8c26a33SStephen Hemminger 	dev->netdev_ops		= &ipgre_netdev_ops;
1419e1a80002SHerbert Xu 	dev->destructor 	= free_netdev;
1420e1a80002SHerbert Xu 
1421e1a80002SHerbert Xu 	dev->iflink		= 0;
1422e1a80002SHerbert Xu 	dev->features		|= NETIF_F_NETNS_LOCAL;
1423e1a80002SHerbert Xu }
1424e1a80002SHerbert Xu 
1425c19e654dSHerbert Xu static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1426c19e654dSHerbert Xu 			 struct nlattr *data[])
1427c19e654dSHerbert Xu {
1428c19e654dSHerbert Xu 	struct ip_tunnel *nt;
1429c19e654dSHerbert Xu 	struct net *net = dev_net(dev);
1430c19e654dSHerbert Xu 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1431c19e654dSHerbert Xu 	int mtu;
1432c19e654dSHerbert Xu 	int err;
1433c19e654dSHerbert Xu 
1434c19e654dSHerbert Xu 	nt = netdev_priv(dev);
1435c19e654dSHerbert Xu 	ipgre_netlink_parms(data, &nt->parms);
1436c19e654dSHerbert Xu 
1437e1a80002SHerbert Xu 	if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1438c19e654dSHerbert Xu 		return -EEXIST;
1439c19e654dSHerbert Xu 
1440e1a80002SHerbert Xu 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1441e1a80002SHerbert Xu 		random_ether_addr(dev->dev_addr);
1442e1a80002SHerbert Xu 
1443c19e654dSHerbert Xu 	mtu = ipgre_tunnel_bind_dev(dev);
1444c19e654dSHerbert Xu 	if (!tb[IFLA_MTU])
1445c19e654dSHerbert Xu 		dev->mtu = mtu;
1446c19e654dSHerbert Xu 
1447c19e654dSHerbert Xu 	err = register_netdevice(dev);
1448c19e654dSHerbert Xu 	if (err)
1449c19e654dSHerbert Xu 		goto out;
1450c19e654dSHerbert Xu 
1451c19e654dSHerbert Xu 	dev_hold(dev);
1452c19e654dSHerbert Xu 	ipgre_tunnel_link(ign, nt);
1453c19e654dSHerbert Xu 
1454c19e654dSHerbert Xu out:
1455c19e654dSHerbert Xu 	return err;
1456c19e654dSHerbert Xu }
1457c19e654dSHerbert Xu 
1458c19e654dSHerbert Xu static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1459c19e654dSHerbert Xu 			    struct nlattr *data[])
1460c19e654dSHerbert Xu {
1461c19e654dSHerbert Xu 	struct ip_tunnel *t, *nt;
1462c19e654dSHerbert Xu 	struct net *net = dev_net(dev);
1463c19e654dSHerbert Xu 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1464c19e654dSHerbert Xu 	struct ip_tunnel_parm p;
1465c19e654dSHerbert Xu 	int mtu;
1466c19e654dSHerbert Xu 
1467c19e654dSHerbert Xu 	if (dev == ign->fb_tunnel_dev)
1468c19e654dSHerbert Xu 		return -EINVAL;
1469c19e654dSHerbert Xu 
1470c19e654dSHerbert Xu 	nt = netdev_priv(dev);
1471c19e654dSHerbert Xu 	ipgre_netlink_parms(data, &p);
1472c19e654dSHerbert Xu 
1473c19e654dSHerbert Xu 	t = ipgre_tunnel_locate(net, &p, 0);
1474c19e654dSHerbert Xu 
1475c19e654dSHerbert Xu 	if (t) {
1476c19e654dSHerbert Xu 		if (t->dev != dev)
1477c19e654dSHerbert Xu 			return -EEXIST;
1478c19e654dSHerbert Xu 	} else {
1479c19e654dSHerbert Xu 		unsigned nflags = 0;
1480c19e654dSHerbert Xu 
1481c19e654dSHerbert Xu 		t = nt;
1482c19e654dSHerbert Xu 
1483c19e654dSHerbert Xu 		if (ipv4_is_multicast(p.iph.daddr))
1484c19e654dSHerbert Xu 			nflags = IFF_BROADCAST;
1485c19e654dSHerbert Xu 		else if (p.iph.daddr)
1486c19e654dSHerbert Xu 			nflags = IFF_POINTOPOINT;
1487c19e654dSHerbert Xu 
1488c19e654dSHerbert Xu 		if ((dev->flags ^ nflags) &
1489c19e654dSHerbert Xu 		    (IFF_POINTOPOINT | IFF_BROADCAST))
1490c19e654dSHerbert Xu 			return -EINVAL;
1491c19e654dSHerbert Xu 
1492c19e654dSHerbert Xu 		ipgre_tunnel_unlink(ign, t);
1493c19e654dSHerbert Xu 		t->parms.iph.saddr = p.iph.saddr;
1494c19e654dSHerbert Xu 		t->parms.iph.daddr = p.iph.daddr;
1495c19e654dSHerbert Xu 		t->parms.i_key = p.i_key;
1496c19e654dSHerbert Xu 		memcpy(dev->dev_addr, &p.iph.saddr, 4);
1497c19e654dSHerbert Xu 		memcpy(dev->broadcast, &p.iph.daddr, 4);
1498c19e654dSHerbert Xu 		ipgre_tunnel_link(ign, t);
1499c19e654dSHerbert Xu 		netdev_state_change(dev);
1500c19e654dSHerbert Xu 	}
1501c19e654dSHerbert Xu 
1502c19e654dSHerbert Xu 	t->parms.o_key = p.o_key;
1503c19e654dSHerbert Xu 	t->parms.iph.ttl = p.iph.ttl;
1504c19e654dSHerbert Xu 	t->parms.iph.tos = p.iph.tos;
1505c19e654dSHerbert Xu 	t->parms.iph.frag_off = p.iph.frag_off;
1506c19e654dSHerbert Xu 
1507c19e654dSHerbert Xu 	if (t->parms.link != p.link) {
1508c19e654dSHerbert Xu 		t->parms.link = p.link;
1509c19e654dSHerbert Xu 		mtu = ipgre_tunnel_bind_dev(dev);
1510c19e654dSHerbert Xu 		if (!tb[IFLA_MTU])
1511c19e654dSHerbert Xu 			dev->mtu = mtu;
1512c19e654dSHerbert Xu 		netdev_state_change(dev);
1513c19e654dSHerbert Xu 	}
1514c19e654dSHerbert Xu 
1515c19e654dSHerbert Xu 	return 0;
1516c19e654dSHerbert Xu }
1517c19e654dSHerbert Xu 
1518c19e654dSHerbert Xu static size_t ipgre_get_size(const struct net_device *dev)
1519c19e654dSHerbert Xu {
1520c19e654dSHerbert Xu 	return
1521c19e654dSHerbert Xu 		/* IFLA_GRE_LINK */
1522c19e654dSHerbert Xu 		nla_total_size(4) +
1523c19e654dSHerbert Xu 		/* IFLA_GRE_IFLAGS */
1524c19e654dSHerbert Xu 		nla_total_size(2) +
1525c19e654dSHerbert Xu 		/* IFLA_GRE_OFLAGS */
1526c19e654dSHerbert Xu 		nla_total_size(2) +
1527c19e654dSHerbert Xu 		/* IFLA_GRE_IKEY */
1528c19e654dSHerbert Xu 		nla_total_size(4) +
1529c19e654dSHerbert Xu 		/* IFLA_GRE_OKEY */
1530c19e654dSHerbert Xu 		nla_total_size(4) +
1531c19e654dSHerbert Xu 		/* IFLA_GRE_LOCAL */
1532c19e654dSHerbert Xu 		nla_total_size(4) +
1533c19e654dSHerbert Xu 		/* IFLA_GRE_REMOTE */
1534c19e654dSHerbert Xu 		nla_total_size(4) +
1535c19e654dSHerbert Xu 		/* IFLA_GRE_TTL */
1536c19e654dSHerbert Xu 		nla_total_size(1) +
1537c19e654dSHerbert Xu 		/* IFLA_GRE_TOS */
1538c19e654dSHerbert Xu 		nla_total_size(1) +
1539c19e654dSHerbert Xu 		/* IFLA_GRE_PMTUDISC */
1540c19e654dSHerbert Xu 		nla_total_size(1) +
1541c19e654dSHerbert Xu 		0;
1542c19e654dSHerbert Xu }
1543c19e654dSHerbert Xu 
1544c19e654dSHerbert Xu static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1545c19e654dSHerbert Xu {
1546c19e654dSHerbert Xu 	struct ip_tunnel *t = netdev_priv(dev);
1547c19e654dSHerbert Xu 	struct ip_tunnel_parm *p = &t->parms;
1548c19e654dSHerbert Xu 
1549c19e654dSHerbert Xu 	NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1550c19e654dSHerbert Xu 	NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1551c19e654dSHerbert Xu 	NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1552ba9e64b1SPatrick McHardy 	NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1553ba9e64b1SPatrick McHardy 	NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
15544d74f8baSPatrick McHardy 	NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
15554d74f8baSPatrick McHardy 	NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1556c19e654dSHerbert Xu 	NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1557c19e654dSHerbert Xu 	NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1558c19e654dSHerbert Xu 	NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1559c19e654dSHerbert Xu 
1560c19e654dSHerbert Xu 	return 0;
1561c19e654dSHerbert Xu 
1562c19e654dSHerbert Xu nla_put_failure:
1563c19e654dSHerbert Xu 	return -EMSGSIZE;
1564c19e654dSHerbert Xu }
1565c19e654dSHerbert Xu 
1566c19e654dSHerbert Xu static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1567c19e654dSHerbert Xu 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1568c19e654dSHerbert Xu 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1569c19e654dSHerbert Xu 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1570c19e654dSHerbert Xu 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1571c19e654dSHerbert Xu 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
15724d74f8baSPatrick McHardy 	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
15734d74f8baSPatrick McHardy 	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1574c19e654dSHerbert Xu 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1575c19e654dSHerbert Xu 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1576c19e654dSHerbert Xu 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1577c19e654dSHerbert Xu };
1578c19e654dSHerbert Xu 
1579c19e654dSHerbert Xu static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1580c19e654dSHerbert Xu 	.kind		= "gre",
1581c19e654dSHerbert Xu 	.maxtype	= IFLA_GRE_MAX,
1582c19e654dSHerbert Xu 	.policy		= ipgre_policy,
1583c19e654dSHerbert Xu 	.priv_size	= sizeof(struct ip_tunnel),
1584c19e654dSHerbert Xu 	.setup		= ipgre_tunnel_setup,
1585c19e654dSHerbert Xu 	.validate	= ipgre_tunnel_validate,
1586c19e654dSHerbert Xu 	.newlink	= ipgre_newlink,
1587c19e654dSHerbert Xu 	.changelink	= ipgre_changelink,
1588c19e654dSHerbert Xu 	.get_size	= ipgre_get_size,
1589c19e654dSHerbert Xu 	.fill_info	= ipgre_fill_info,
1590c19e654dSHerbert Xu };
1591c19e654dSHerbert Xu 
1592e1a80002SHerbert Xu static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1593e1a80002SHerbert Xu 	.kind		= "gretap",
1594e1a80002SHerbert Xu 	.maxtype	= IFLA_GRE_MAX,
1595e1a80002SHerbert Xu 	.policy		= ipgre_policy,
1596e1a80002SHerbert Xu 	.priv_size	= sizeof(struct ip_tunnel),
1597e1a80002SHerbert Xu 	.setup		= ipgre_tap_setup,
1598e1a80002SHerbert Xu 	.validate	= ipgre_tap_validate,
1599e1a80002SHerbert Xu 	.newlink	= ipgre_newlink,
1600e1a80002SHerbert Xu 	.changelink	= ipgre_changelink,
1601e1a80002SHerbert Xu 	.get_size	= ipgre_get_size,
1602e1a80002SHerbert Xu 	.fill_info	= ipgre_fill_info,
1603e1a80002SHerbert Xu };
1604e1a80002SHerbert Xu 
16051da177e4SLinus Torvalds /*
16061da177e4SLinus Torvalds  *	And now the modules code and kernel interface.
16071da177e4SLinus Torvalds  */
16081da177e4SLinus Torvalds 
16091da177e4SLinus Torvalds static int __init ipgre_init(void)
16101da177e4SLinus Torvalds {
16111da177e4SLinus Torvalds 	int err;
16121da177e4SLinus Torvalds 
16131da177e4SLinus Torvalds 	printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
16141da177e4SLinus Torvalds 
16151da177e4SLinus Torvalds 	if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
16161da177e4SLinus Torvalds 		printk(KERN_INFO "ipgre init: can't add protocol\n");
16171da177e4SLinus Torvalds 		return -EAGAIN;
16181da177e4SLinus Torvalds 	}
16191da177e4SLinus Torvalds 
162059a4c759SPavel Emelyanov 	err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
162159a4c759SPavel Emelyanov 	if (err < 0)
1622c19e654dSHerbert Xu 		goto gen_device_failed;
16237daa0004SPavel Emelyanov 
1624c19e654dSHerbert Xu 	err = rtnl_link_register(&ipgre_link_ops);
1625c19e654dSHerbert Xu 	if (err < 0)
1626c19e654dSHerbert Xu 		goto rtnl_link_failed;
1627c19e654dSHerbert Xu 
1628e1a80002SHerbert Xu 	err = rtnl_link_register(&ipgre_tap_ops);
1629e1a80002SHerbert Xu 	if (err < 0)
1630e1a80002SHerbert Xu 		goto tap_ops_failed;
1631e1a80002SHerbert Xu 
1632c19e654dSHerbert Xu out:
16337daa0004SPavel Emelyanov 	return err;
1634c19e654dSHerbert Xu 
1635e1a80002SHerbert Xu tap_ops_failed:
1636e1a80002SHerbert Xu 	rtnl_link_unregister(&ipgre_link_ops);
1637c19e654dSHerbert Xu rtnl_link_failed:
1638c19e654dSHerbert Xu 	unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1639c19e654dSHerbert Xu gen_device_failed:
1640c19e654dSHerbert Xu 	inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1641c19e654dSHerbert Xu 	goto out;
16421da177e4SLinus Torvalds }
16431da177e4SLinus Torvalds 
1644db44575fSAlexey Kuznetsov static void __exit ipgre_fini(void)
16451da177e4SLinus Torvalds {
1646e1a80002SHerbert Xu 	rtnl_link_unregister(&ipgre_tap_ops);
1647c19e654dSHerbert Xu 	rtnl_link_unregister(&ipgre_link_ops);
1648c19e654dSHerbert Xu 	unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
16491da177e4SLinus Torvalds 	if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
16501da177e4SLinus Torvalds 		printk(KERN_INFO "ipgre close: can't remove protocol\n");
16511da177e4SLinus Torvalds }
16521da177e4SLinus Torvalds 
16531da177e4SLinus Torvalds module_init(ipgre_init);
16541da177e4SLinus Torvalds module_exit(ipgre_fini);
16551da177e4SLinus Torvalds MODULE_LICENSE("GPL");
16564d74f8baSPatrick McHardy MODULE_ALIAS_RTNL_LINK("gre");
16574d74f8baSPatrick McHardy MODULE_ALIAS_RTNL_LINK("gretap");
1658