xref: /linux/net/ipv4/ip_gre.c (revision c2892f02712e9516d72841d5c019ed6916329794)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *	Linux NET3:	GRE over IP protocol decoder.
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
71da177e4SLinus Torvalds  *	modify it under the terms of the GNU General Public License
81da177e4SLinus Torvalds  *	as published by the Free Software Foundation; either version
91da177e4SLinus Torvalds  *	2 of the License, or (at your option) any later version.
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  */
121da177e4SLinus Torvalds 
134fc268d2SRandy Dunlap #include <linux/capability.h>
141da177e4SLinus Torvalds #include <linux/module.h>
151da177e4SLinus Torvalds #include <linux/types.h>
161da177e4SLinus Torvalds #include <linux/kernel.h>
171da177e4SLinus Torvalds #include <asm/uaccess.h>
181da177e4SLinus Torvalds #include <linux/skbuff.h>
191da177e4SLinus Torvalds #include <linux/netdevice.h>
201da177e4SLinus Torvalds #include <linux/in.h>
211da177e4SLinus Torvalds #include <linux/tcp.h>
221da177e4SLinus Torvalds #include <linux/udp.h>
231da177e4SLinus Torvalds #include <linux/if_arp.h>
241da177e4SLinus Torvalds #include <linux/mroute.h>
251da177e4SLinus Torvalds #include <linux/init.h>
261da177e4SLinus Torvalds #include <linux/in6.h>
271da177e4SLinus Torvalds #include <linux/inetdevice.h>
281da177e4SLinus Torvalds #include <linux/igmp.h>
291da177e4SLinus Torvalds #include <linux/netfilter_ipv4.h>
30e1a80002SHerbert Xu #include <linux/etherdevice.h>
3146f25dffSKris Katterjohn #include <linux/if_ether.h>
321da177e4SLinus Torvalds 
331da177e4SLinus Torvalds #include <net/sock.h>
341da177e4SLinus Torvalds #include <net/ip.h>
351da177e4SLinus Torvalds #include <net/icmp.h>
361da177e4SLinus Torvalds #include <net/protocol.h>
371da177e4SLinus Torvalds #include <net/ipip.h>
381da177e4SLinus Torvalds #include <net/arp.h>
391da177e4SLinus Torvalds #include <net/checksum.h>
401da177e4SLinus Torvalds #include <net/dsfield.h>
411da177e4SLinus Torvalds #include <net/inet_ecn.h>
421da177e4SLinus Torvalds #include <net/xfrm.h>
4359a4c759SPavel Emelyanov #include <net/net_namespace.h>
4459a4c759SPavel Emelyanov #include <net/netns/generic.h>
45c19e654dSHerbert Xu #include <net/rtnetlink.h>
461da177e4SLinus Torvalds 
471da177e4SLinus Torvalds #ifdef CONFIG_IPV6
481da177e4SLinus Torvalds #include <net/ipv6.h>
491da177e4SLinus Torvalds #include <net/ip6_fib.h>
501da177e4SLinus Torvalds #include <net/ip6_route.h>
511da177e4SLinus Torvalds #endif
521da177e4SLinus Torvalds 
531da177e4SLinus Torvalds /*
541da177e4SLinus Torvalds    Problems & solutions
551da177e4SLinus Torvalds    --------------------
561da177e4SLinus Torvalds 
571da177e4SLinus Torvalds    1. The most important issue is detecting local dead loops.
581da177e4SLinus Torvalds    They would cause complete host lockup in transmit, which
591da177e4SLinus Torvalds    would be "resolved" by stack overflow or, if queueing is enabled,
601da177e4SLinus Torvalds    with infinite looping in net_bh.
611da177e4SLinus Torvalds 
621da177e4SLinus Torvalds    We cannot track such dead loops during route installation,
631da177e4SLinus Torvalds    it is infeasible task. The most general solutions would be
641da177e4SLinus Torvalds    to keep skb->encapsulation counter (sort of local ttl),
651da177e4SLinus Torvalds    and silently drop packet when it expires. It is the best
661da177e4SLinus Torvalds    solution, but it supposes maintaing new variable in ALL
671da177e4SLinus Torvalds    skb, even if no tunneling is used.
681da177e4SLinus Torvalds 
69a43912abSEric Dumazet    Current solution: HARD_TX_LOCK lock breaks dead loops.
701da177e4SLinus Torvalds 
711da177e4SLinus Torvalds 
721da177e4SLinus Torvalds 
731da177e4SLinus Torvalds    2. Networking dead loops would not kill routers, but would really
741da177e4SLinus Torvalds    kill network. IP hop limit plays role of "t->recursion" in this case,
751da177e4SLinus Torvalds    if we copy it from packet being encapsulated to upper header.
761da177e4SLinus Torvalds    It is very good solution, but it introduces two problems:
771da177e4SLinus Torvalds 
781da177e4SLinus Torvalds    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
791da177e4SLinus Torvalds      do not work over tunnels.
801da177e4SLinus Torvalds    - traceroute does not work. I planned to relay ICMP from tunnel,
811da177e4SLinus Torvalds      so that this problem would be solved and traceroute output
821da177e4SLinus Torvalds      would even more informative. This idea appeared to be wrong:
831da177e4SLinus Torvalds      only Linux complies to rfc1812 now (yes, guys, Linux is the only
841da177e4SLinus Torvalds      true router now :-)), all routers (at least, in neighbourhood of mine)
851da177e4SLinus Torvalds      return only 8 bytes of payload. It is the end.
861da177e4SLinus Torvalds 
871da177e4SLinus Torvalds    Hence, if we want that OSPF worked or traceroute said something reasonable,
881da177e4SLinus Torvalds    we should search for another solution.
891da177e4SLinus Torvalds 
901da177e4SLinus Torvalds    One of them is to parse packet trying to detect inner encapsulation
911da177e4SLinus Torvalds    made by our node. It is difficult or even impossible, especially,
921da177e4SLinus Torvalds    taking into account fragmentation. TO be short, tt is not solution at all.
931da177e4SLinus Torvalds 
941da177e4SLinus Torvalds    Current solution: The solution was UNEXPECTEDLY SIMPLE.
951da177e4SLinus Torvalds    We force DF flag on tunnels with preconfigured hop limit,
961da177e4SLinus Torvalds    that is ALL. :-) Well, it does not remove the problem completely,
971da177e4SLinus Torvalds    but exponential growth of network traffic is changed to linear
981da177e4SLinus Torvalds    (branches, that exceed pmtu are pruned) and tunnel mtu
991da177e4SLinus Torvalds    fastly degrades to value <68, where looping stops.
1001da177e4SLinus Torvalds    Yes, it is not good if there exists a router in the loop,
1011da177e4SLinus Torvalds    which does not force DF, even when encapsulating packets have DF set.
1021da177e4SLinus Torvalds    But it is not our problem! Nobody could accuse us, we made
1031da177e4SLinus Torvalds    all that we could make. Even if it is your gated who injected
1041da177e4SLinus Torvalds    fatal route to network, even if it were you who configured
1051da177e4SLinus Torvalds    fatal static route: you are innocent. :-)
1061da177e4SLinus Torvalds 
1071da177e4SLinus Torvalds 
1081da177e4SLinus Torvalds 
1091da177e4SLinus Torvalds    3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
1101da177e4SLinus Torvalds    practically identical code. It would be good to glue them
1111da177e4SLinus Torvalds    together, but it is not very evident, how to make them modular.
1121da177e4SLinus Torvalds    sit is integral part of IPv6, ipip and gre are naturally modular.
1131da177e4SLinus Torvalds    We could extract common parts (hash table, ioctl etc)
1141da177e4SLinus Torvalds    to a separate module (ip_tunnel.c).
1151da177e4SLinus Torvalds 
1161da177e4SLinus Torvalds    Alexey Kuznetsov.
1171da177e4SLinus Torvalds  */
1181da177e4SLinus Torvalds 
119c19e654dSHerbert Xu static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1201da177e4SLinus Torvalds static int ipgre_tunnel_init(struct net_device *dev);
1211da177e4SLinus Torvalds static void ipgre_tunnel_setup(struct net_device *dev);
12242aa9162SHerbert Xu static int ipgre_tunnel_bind_dev(struct net_device *dev);
1231da177e4SLinus Torvalds 
1241da177e4SLinus Torvalds /* Fallback tunnel: no source, no destination, no key, no options */
1251da177e4SLinus Torvalds 
126eb8ce741SPavel Emelyanov #define HASH_SIZE  16
127eb8ce741SPavel Emelyanov 
128f99189b1SEric Dumazet static int ipgre_net_id __read_mostly;
12959a4c759SPavel Emelyanov struct ipgre_net {
130eb8ce741SPavel Emelyanov 	struct ip_tunnel *tunnels[4][HASH_SIZE];
131eb8ce741SPavel Emelyanov 
1327daa0004SPavel Emelyanov 	struct net_device *fb_tunnel_dev;
13359a4c759SPavel Emelyanov };
13459a4c759SPavel Emelyanov 
1351da177e4SLinus Torvalds /* Tunnel hash table */
1361da177e4SLinus Torvalds 
1371da177e4SLinus Torvalds /*
1381da177e4SLinus Torvalds    4 hash tables:
1391da177e4SLinus Torvalds 
1401da177e4SLinus Torvalds    3: (remote,local)
1411da177e4SLinus Torvalds    2: (remote,*)
1421da177e4SLinus Torvalds    1: (*,local)
1431da177e4SLinus Torvalds    0: (*,*)
1441da177e4SLinus Torvalds 
1451da177e4SLinus Torvalds    We require exact key match i.e. if a key is present in packet
1461da177e4SLinus Torvalds    it will match only tunnel with the same key; if it is not present,
1471da177e4SLinus Torvalds    it will match only keyless tunnel.
1481da177e4SLinus Torvalds 
1491da177e4SLinus Torvalds    All keysless packets, if not matched configured keyless tunnels
1501da177e4SLinus Torvalds    will match fallback tunnel.
1511da177e4SLinus Torvalds  */
1521da177e4SLinus Torvalds 
153d5a0a1e3SAl Viro #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1541da177e4SLinus Torvalds 
155eb8ce741SPavel Emelyanov #define tunnels_r_l	tunnels[3]
156eb8ce741SPavel Emelyanov #define tunnels_r	tunnels[2]
157eb8ce741SPavel Emelyanov #define tunnels_l	tunnels[1]
158eb8ce741SPavel Emelyanov #define tunnels_wc	tunnels[0]
1598d5b2c08SEric Dumazet /*
1608d5b2c08SEric Dumazet  * Locking : hash tables are protected by RCU and a spinlock
1618d5b2c08SEric Dumazet  */
1628d5b2c08SEric Dumazet static DEFINE_SPINLOCK(ipgre_lock);
1631da177e4SLinus Torvalds 
1648d5b2c08SEric Dumazet #define for_each_ip_tunnel_rcu(start) \
1658d5b2c08SEric Dumazet 	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
1661da177e4SLinus Torvalds 
1671da177e4SLinus Torvalds /* Given src, dst and key, find appropriate for input tunnel. */
1681da177e4SLinus Torvalds 
169749c10f9STimo Teras static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
170e1a80002SHerbert Xu 					      __be32 remote, __be32 local,
171e1a80002SHerbert Xu 					      __be32 key, __be16 gre_proto)
1721da177e4SLinus Torvalds {
173749c10f9STimo Teras 	struct net *net = dev_net(dev);
174749c10f9STimo Teras 	int link = dev->ifindex;
1751da177e4SLinus Torvalds 	unsigned h0 = HASH(remote);
1761da177e4SLinus Torvalds 	unsigned h1 = HASH(key);
177afcf1242STimo Teras 	struct ip_tunnel *t, *cand = NULL;
1787daa0004SPavel Emelyanov 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
179e1a80002SHerbert Xu 	int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
180e1a80002SHerbert Xu 		       ARPHRD_ETHER : ARPHRD_IPGRE;
181afcf1242STimo Teras 	int score, cand_score = 4;
1821da177e4SLinus Torvalds 
1838d5b2c08SEric Dumazet 	for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
184749c10f9STimo Teras 		if (local != t->parms.iph.saddr ||
185749c10f9STimo Teras 		    remote != t->parms.iph.daddr ||
186749c10f9STimo Teras 		    key != t->parms.i_key ||
187749c10f9STimo Teras 		    !(t->dev->flags & IFF_UP))
188749c10f9STimo Teras 			continue;
189749c10f9STimo Teras 
190749c10f9STimo Teras 		if (t->dev->type != ARPHRD_IPGRE &&
191749c10f9STimo Teras 		    t->dev->type != dev_type)
192749c10f9STimo Teras 			continue;
193749c10f9STimo Teras 
194afcf1242STimo Teras 		score = 0;
195749c10f9STimo Teras 		if (t->parms.link != link)
196afcf1242STimo Teras 			score |= 1;
197749c10f9STimo Teras 		if (t->dev->type != dev_type)
198afcf1242STimo Teras 			score |= 2;
199afcf1242STimo Teras 		if (score == 0)
2001da177e4SLinus Torvalds 			return t;
201afcf1242STimo Teras 
202afcf1242STimo Teras 		if (score < cand_score) {
203afcf1242STimo Teras 			cand = t;
204afcf1242STimo Teras 			cand_score = score;
205afcf1242STimo Teras 		}
206e1a80002SHerbert Xu 	}
207e1a80002SHerbert Xu 
2088d5b2c08SEric Dumazet 	for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
209749c10f9STimo Teras 		if (remote != t->parms.iph.daddr ||
210749c10f9STimo Teras 		    key != t->parms.i_key ||
211749c10f9STimo Teras 		    !(t->dev->flags & IFF_UP))
212749c10f9STimo Teras 			continue;
213749c10f9STimo Teras 
214749c10f9STimo Teras 		if (t->dev->type != ARPHRD_IPGRE &&
215749c10f9STimo Teras 		    t->dev->type != dev_type)
216749c10f9STimo Teras 			continue;
217749c10f9STimo Teras 
218afcf1242STimo Teras 		score = 0;
219749c10f9STimo Teras 		if (t->parms.link != link)
220afcf1242STimo Teras 			score |= 1;
221749c10f9STimo Teras 		if (t->dev->type != dev_type)
222afcf1242STimo Teras 			score |= 2;
223afcf1242STimo Teras 		if (score == 0)
2241da177e4SLinus Torvalds 			return t;
225afcf1242STimo Teras 
226afcf1242STimo Teras 		if (score < cand_score) {
227afcf1242STimo Teras 			cand = t;
228afcf1242STimo Teras 			cand_score = score;
229afcf1242STimo Teras 		}
230e1a80002SHerbert Xu 	}
231e1a80002SHerbert Xu 
2328d5b2c08SEric Dumazet 	for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
233749c10f9STimo Teras 		if ((local != t->parms.iph.saddr &&
234749c10f9STimo Teras 		     (local != t->parms.iph.daddr ||
235749c10f9STimo Teras 		      !ipv4_is_multicast(local))) ||
236749c10f9STimo Teras 		    key != t->parms.i_key ||
237749c10f9STimo Teras 		    !(t->dev->flags & IFF_UP))
238749c10f9STimo Teras 			continue;
239749c10f9STimo Teras 
240749c10f9STimo Teras 		if (t->dev->type != ARPHRD_IPGRE &&
241749c10f9STimo Teras 		    t->dev->type != dev_type)
242749c10f9STimo Teras 			continue;
243749c10f9STimo Teras 
244afcf1242STimo Teras 		score = 0;
245749c10f9STimo Teras 		if (t->parms.link != link)
246afcf1242STimo Teras 			score |= 1;
247749c10f9STimo Teras 		if (t->dev->type != dev_type)
248afcf1242STimo Teras 			score |= 2;
249afcf1242STimo Teras 		if (score == 0)
2501da177e4SLinus Torvalds 			return t;
251afcf1242STimo Teras 
252afcf1242STimo Teras 		if (score < cand_score) {
253afcf1242STimo Teras 			cand = t;
254afcf1242STimo Teras 			cand_score = score;
255afcf1242STimo Teras 		}
256e1a80002SHerbert Xu 	}
257e1a80002SHerbert Xu 
2588d5b2c08SEric Dumazet 	for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
259749c10f9STimo Teras 		if (t->parms.i_key != key ||
260749c10f9STimo Teras 		    !(t->dev->flags & IFF_UP))
261749c10f9STimo Teras 			continue;
262749c10f9STimo Teras 
263749c10f9STimo Teras 		if (t->dev->type != ARPHRD_IPGRE &&
264749c10f9STimo Teras 		    t->dev->type != dev_type)
265749c10f9STimo Teras 			continue;
266749c10f9STimo Teras 
267afcf1242STimo Teras 		score = 0;
268749c10f9STimo Teras 		if (t->parms.link != link)
269afcf1242STimo Teras 			score |= 1;
270749c10f9STimo Teras 		if (t->dev->type != dev_type)
271afcf1242STimo Teras 			score |= 2;
272afcf1242STimo Teras 		if (score == 0)
2731da177e4SLinus Torvalds 			return t;
274afcf1242STimo Teras 
275afcf1242STimo Teras 		if (score < cand_score) {
276afcf1242STimo Teras 			cand = t;
277afcf1242STimo Teras 			cand_score = score;
278afcf1242STimo Teras 		}
279e1a80002SHerbert Xu 	}
280e1a80002SHerbert Xu 
281afcf1242STimo Teras 	if (cand != NULL)
282afcf1242STimo Teras 		return cand;
2831da177e4SLinus Torvalds 
2848d5b2c08SEric Dumazet 	dev = ign->fb_tunnel_dev;
2858d5b2c08SEric Dumazet 	if (dev->flags & IFF_UP)
2868d5b2c08SEric Dumazet 		return netdev_priv(dev);
287749c10f9STimo Teras 
2881da177e4SLinus Torvalds 	return NULL;
2891da177e4SLinus Torvalds }
2901da177e4SLinus Torvalds 
291f57e7d5aSPavel Emelyanov static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
292f57e7d5aSPavel Emelyanov 		struct ip_tunnel_parm *parms)
2931da177e4SLinus Torvalds {
2945056a1efSYOSHIFUJI Hideaki 	__be32 remote = parms->iph.daddr;
2955056a1efSYOSHIFUJI Hideaki 	__be32 local = parms->iph.saddr;
2965056a1efSYOSHIFUJI Hideaki 	__be32 key = parms->i_key;
2971da177e4SLinus Torvalds 	unsigned h = HASH(key);
2981da177e4SLinus Torvalds 	int prio = 0;
2991da177e4SLinus Torvalds 
3001da177e4SLinus Torvalds 	if (local)
3011da177e4SLinus Torvalds 		prio |= 1;
302f97c1e0cSJoe Perches 	if (remote && !ipv4_is_multicast(remote)) {
3031da177e4SLinus Torvalds 		prio |= 2;
3041da177e4SLinus Torvalds 		h ^= HASH(remote);
3051da177e4SLinus Torvalds 	}
3061da177e4SLinus Torvalds 
307eb8ce741SPavel Emelyanov 	return &ign->tunnels[prio][h];
3081da177e4SLinus Torvalds }
3091da177e4SLinus Torvalds 
310f57e7d5aSPavel Emelyanov static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
311f57e7d5aSPavel Emelyanov 		struct ip_tunnel *t)
3125056a1efSYOSHIFUJI Hideaki {
313f57e7d5aSPavel Emelyanov 	return __ipgre_bucket(ign, &t->parms);
3145056a1efSYOSHIFUJI Hideaki }
3155056a1efSYOSHIFUJI Hideaki 
316f57e7d5aSPavel Emelyanov static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
3171da177e4SLinus Torvalds {
318f57e7d5aSPavel Emelyanov 	struct ip_tunnel **tp = ipgre_bucket(ign, t);
3191da177e4SLinus Torvalds 
3208d5b2c08SEric Dumazet 	spin_lock_bh(&ipgre_lock);
3211da177e4SLinus Torvalds 	t->next = *tp;
3228d5b2c08SEric Dumazet 	rcu_assign_pointer(*tp, t);
3238d5b2c08SEric Dumazet 	spin_unlock_bh(&ipgre_lock);
3241da177e4SLinus Torvalds }
3251da177e4SLinus Torvalds 
326f57e7d5aSPavel Emelyanov static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
3271da177e4SLinus Torvalds {
3281da177e4SLinus Torvalds 	struct ip_tunnel **tp;
3291da177e4SLinus Torvalds 
330f57e7d5aSPavel Emelyanov 	for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
3311da177e4SLinus Torvalds 		if (t == *tp) {
3328d5b2c08SEric Dumazet 			spin_lock_bh(&ipgre_lock);
3331da177e4SLinus Torvalds 			*tp = t->next;
3348d5b2c08SEric Dumazet 			spin_unlock_bh(&ipgre_lock);
3351da177e4SLinus Torvalds 			break;
3361da177e4SLinus Torvalds 		}
3371da177e4SLinus Torvalds 	}
3381da177e4SLinus Torvalds }
3391da177e4SLinus Torvalds 
340e1a80002SHerbert Xu static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
341e1a80002SHerbert Xu 					   struct ip_tunnel_parm *parms,
342e1a80002SHerbert Xu 					   int type)
3431da177e4SLinus Torvalds {
344d5a0a1e3SAl Viro 	__be32 remote = parms->iph.daddr;
345d5a0a1e3SAl Viro 	__be32 local = parms->iph.saddr;
346d5a0a1e3SAl Viro 	__be32 key = parms->i_key;
347749c10f9STimo Teras 	int link = parms->link;
348e1a80002SHerbert Xu 	struct ip_tunnel *t, **tp;
349e1a80002SHerbert Xu 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
350e1a80002SHerbert Xu 
351e1a80002SHerbert Xu 	for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
352e1a80002SHerbert Xu 		if (local == t->parms.iph.saddr &&
353e1a80002SHerbert Xu 		    remote == t->parms.iph.daddr &&
354e1a80002SHerbert Xu 		    key == t->parms.i_key &&
355749c10f9STimo Teras 		    link == t->parms.link &&
356e1a80002SHerbert Xu 		    type == t->dev->type)
357e1a80002SHerbert Xu 			break;
358e1a80002SHerbert Xu 
359e1a80002SHerbert Xu 	return t;
360e1a80002SHerbert Xu }
361e1a80002SHerbert Xu 
362e1a80002SHerbert Xu static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
363e1a80002SHerbert Xu 		struct ip_tunnel_parm *parms, int create)
364e1a80002SHerbert Xu {
365e1a80002SHerbert Xu 	struct ip_tunnel *t, *nt;
3661da177e4SLinus Torvalds 	struct net_device *dev;
3671da177e4SLinus Torvalds 	char name[IFNAMSIZ];
368f57e7d5aSPavel Emelyanov 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
3691da177e4SLinus Torvalds 
370e1a80002SHerbert Xu 	t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
371e1a80002SHerbert Xu 	if (t || !create)
3721da177e4SLinus Torvalds 		return t;
3731da177e4SLinus Torvalds 
3741da177e4SLinus Torvalds 	if (parms->name[0])
3751da177e4SLinus Torvalds 		strlcpy(name, parms->name, IFNAMSIZ);
37634cc7ba6SPavel Emelyanov 	else
37734cc7ba6SPavel Emelyanov 		sprintf(name, "gre%%d");
3781da177e4SLinus Torvalds 
3791da177e4SLinus Torvalds 	dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
3801da177e4SLinus Torvalds 	if (!dev)
3811da177e4SLinus Torvalds 	  return NULL;
3821da177e4SLinus Torvalds 
3830b67ecebSPavel Emelyanov 	dev_net_set(dev, net);
3840b67ecebSPavel Emelyanov 
385b37d428bSPavel Emelyanov 	if (strchr(name, '%')) {
386b37d428bSPavel Emelyanov 		if (dev_alloc_name(dev, name) < 0)
387b37d428bSPavel Emelyanov 			goto failed_free;
388b37d428bSPavel Emelyanov 	}
389b37d428bSPavel Emelyanov 
3902941a486SPatrick McHardy 	nt = netdev_priv(dev);
3911da177e4SLinus Torvalds 	nt->parms = *parms;
392c19e654dSHerbert Xu 	dev->rtnl_link_ops = &ipgre_link_ops;
3931da177e4SLinus Torvalds 
39442aa9162SHerbert Xu 	dev->mtu = ipgre_tunnel_bind_dev(dev);
39542aa9162SHerbert Xu 
396b37d428bSPavel Emelyanov 	if (register_netdevice(dev) < 0)
397b37d428bSPavel Emelyanov 		goto failed_free;
3981da177e4SLinus Torvalds 
3991da177e4SLinus Torvalds 	dev_hold(dev);
400f57e7d5aSPavel Emelyanov 	ipgre_tunnel_link(ign, nt);
4011da177e4SLinus Torvalds 	return nt;
4021da177e4SLinus Torvalds 
403b37d428bSPavel Emelyanov failed_free:
404b37d428bSPavel Emelyanov 	free_netdev(dev);
4051da177e4SLinus Torvalds 	return NULL;
4061da177e4SLinus Torvalds }
4071da177e4SLinus Torvalds 
4081da177e4SLinus Torvalds static void ipgre_tunnel_uninit(struct net_device *dev)
4091da177e4SLinus Torvalds {
410f57e7d5aSPavel Emelyanov 	struct net *net = dev_net(dev);
411f57e7d5aSPavel Emelyanov 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
412f57e7d5aSPavel Emelyanov 
413f57e7d5aSPavel Emelyanov 	ipgre_tunnel_unlink(ign, netdev_priv(dev));
4141da177e4SLinus Torvalds 	dev_put(dev);
4151da177e4SLinus Torvalds }
4161da177e4SLinus Torvalds 
4171da177e4SLinus Torvalds 
4181da177e4SLinus Torvalds static void ipgre_err(struct sk_buff *skb, u32 info)
4191da177e4SLinus Torvalds {
4201da177e4SLinus Torvalds 
421071f92d0SRami Rosen /* All the routers (except for Linux) return only
4221da177e4SLinus Torvalds    8 bytes of packet payload. It means, that precise relaying of
4231da177e4SLinus Torvalds    ICMP in the real Internet is absolutely infeasible.
4241da177e4SLinus Torvalds 
4251da177e4SLinus Torvalds    Moreover, Cisco "wise men" put GRE key to the third word
4261da177e4SLinus Torvalds    in GRE header. It makes impossible maintaining even soft state for keyed
4271da177e4SLinus Torvalds    GRE tunnels with enabled checksum. Tell them "thank you".
4281da177e4SLinus Torvalds 
4291da177e4SLinus Torvalds    Well, I wonder, rfc1812 was written by Cisco employee,
4301da177e4SLinus Torvalds    what the hell these idiots break standrads established
4311da177e4SLinus Torvalds    by themself???
4321da177e4SLinus Torvalds  */
4331da177e4SLinus Torvalds 
4341da177e4SLinus Torvalds 	struct iphdr *iph = (struct iphdr *)skb->data;
435d5a0a1e3SAl Viro 	__be16	     *p = (__be16*)(skb->data+(iph->ihl<<2));
4361da177e4SLinus Torvalds 	int grehlen = (iph->ihl<<2) + 4;
43788c7664fSArnaldo Carvalho de Melo 	const int type = icmp_hdr(skb)->type;
43888c7664fSArnaldo Carvalho de Melo 	const int code = icmp_hdr(skb)->code;
4391da177e4SLinus Torvalds 	struct ip_tunnel *t;
440d5a0a1e3SAl Viro 	__be16 flags;
4411da177e4SLinus Torvalds 
4421da177e4SLinus Torvalds 	flags = p[0];
4431da177e4SLinus Torvalds 	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
4441da177e4SLinus Torvalds 		if (flags&(GRE_VERSION|GRE_ROUTING))
4451da177e4SLinus Torvalds 			return;
4461da177e4SLinus Torvalds 		if (flags&GRE_KEY) {
4471da177e4SLinus Torvalds 			grehlen += 4;
4481da177e4SLinus Torvalds 			if (flags&GRE_CSUM)
4491da177e4SLinus Torvalds 				grehlen += 4;
4501da177e4SLinus Torvalds 		}
4511da177e4SLinus Torvalds 	}
4521da177e4SLinus Torvalds 
4531da177e4SLinus Torvalds 	/* If only 8 bytes returned, keyed message will be dropped here */
4541da177e4SLinus Torvalds 	if (skb_headlen(skb) < grehlen)
4551da177e4SLinus Torvalds 		return;
4561da177e4SLinus Torvalds 
4571da177e4SLinus Torvalds 	switch (type) {
4581da177e4SLinus Torvalds 	default:
4591da177e4SLinus Torvalds 	case ICMP_PARAMETERPROB:
4601da177e4SLinus Torvalds 		return;
4611da177e4SLinus Torvalds 
4621da177e4SLinus Torvalds 	case ICMP_DEST_UNREACH:
4631da177e4SLinus Torvalds 		switch (code) {
4641da177e4SLinus Torvalds 		case ICMP_SR_FAILED:
4651da177e4SLinus Torvalds 		case ICMP_PORT_UNREACH:
4661da177e4SLinus Torvalds 			/* Impossible event. */
4671da177e4SLinus Torvalds 			return;
4681da177e4SLinus Torvalds 		case ICMP_FRAG_NEEDED:
4691da177e4SLinus Torvalds 			/* Soft state for pmtu is maintained by IP core. */
4701da177e4SLinus Torvalds 			return;
4711da177e4SLinus Torvalds 		default:
4721da177e4SLinus Torvalds 			/* All others are translated to HOST_UNREACH.
4731da177e4SLinus Torvalds 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
4741da177e4SLinus Torvalds 			   I believe they are just ether pollution. --ANK
4751da177e4SLinus Torvalds 			 */
4761da177e4SLinus Torvalds 			break;
4771da177e4SLinus Torvalds 		}
4781da177e4SLinus Torvalds 		break;
4791da177e4SLinus Torvalds 	case ICMP_TIME_EXCEEDED:
4801da177e4SLinus Torvalds 		if (code != ICMP_EXC_TTL)
4811da177e4SLinus Torvalds 			return;
4821da177e4SLinus Torvalds 		break;
4831da177e4SLinus Torvalds 	}
4841da177e4SLinus Torvalds 
4858d5b2c08SEric Dumazet 	rcu_read_lock();
486749c10f9STimo Teras 	t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
487e1a80002SHerbert Xu 				flags & GRE_KEY ?
488e1a80002SHerbert Xu 				*(((__be32 *)p) + (grehlen / 4) - 1) : 0,
489e1a80002SHerbert Xu 				p[1]);
490f97c1e0cSJoe Perches 	if (t == NULL || t->parms.iph.daddr == 0 ||
491f97c1e0cSJoe Perches 	    ipv4_is_multicast(t->parms.iph.daddr))
4921da177e4SLinus Torvalds 		goto out;
4931da177e4SLinus Torvalds 
4941da177e4SLinus Torvalds 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
4951da177e4SLinus Torvalds 		goto out;
4961da177e4SLinus Torvalds 
497da6185d8SWei Yongjun 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
4981da177e4SLinus Torvalds 		t->err_count++;
4991da177e4SLinus Torvalds 	else
5001da177e4SLinus Torvalds 		t->err_count = 1;
5011da177e4SLinus Torvalds 	t->err_time = jiffies;
5021da177e4SLinus Torvalds out:
5038d5b2c08SEric Dumazet 	rcu_read_unlock();
5041da177e4SLinus Torvalds 	return;
5051da177e4SLinus Torvalds }
5061da177e4SLinus Torvalds 
5071da177e4SLinus Torvalds static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
5081da177e4SLinus Torvalds {
5091da177e4SLinus Torvalds 	if (INET_ECN_is_ce(iph->tos)) {
5101da177e4SLinus Torvalds 		if (skb->protocol == htons(ETH_P_IP)) {
511eddc9ec5SArnaldo Carvalho de Melo 			IP_ECN_set_ce(ip_hdr(skb));
5121da177e4SLinus Torvalds 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
5130660e03fSArnaldo Carvalho de Melo 			IP6_ECN_set_ce(ipv6_hdr(skb));
5141da177e4SLinus Torvalds 		}
5151da177e4SLinus Torvalds 	}
5161da177e4SLinus Torvalds }
5171da177e4SLinus Torvalds 
5181da177e4SLinus Torvalds static inline u8
5191da177e4SLinus Torvalds ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
5201da177e4SLinus Torvalds {
5211da177e4SLinus Torvalds 	u8 inner = 0;
5221da177e4SLinus Torvalds 	if (skb->protocol == htons(ETH_P_IP))
5231da177e4SLinus Torvalds 		inner = old_iph->tos;
5241da177e4SLinus Torvalds 	else if (skb->protocol == htons(ETH_P_IPV6))
5251da177e4SLinus Torvalds 		inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
5261da177e4SLinus Torvalds 	return INET_ECN_encapsulate(tos, inner);
5271da177e4SLinus Torvalds }
5281da177e4SLinus Torvalds 
5291da177e4SLinus Torvalds static int ipgre_rcv(struct sk_buff *skb)
5301da177e4SLinus Torvalds {
5311da177e4SLinus Torvalds 	struct iphdr *iph;
5321da177e4SLinus Torvalds 	u8     *h;
533d5a0a1e3SAl Viro 	__be16    flags;
534d3bc23e7SAl Viro 	__sum16   csum = 0;
535d5a0a1e3SAl Viro 	__be32 key = 0;
5361da177e4SLinus Torvalds 	u32    seqno = 0;
5371da177e4SLinus Torvalds 	struct ip_tunnel *tunnel;
5381da177e4SLinus Torvalds 	int    offset = 4;
539e1a80002SHerbert Xu 	__be16 gre_proto;
54064194c31SHerbert Xu 	unsigned int len;
5411da177e4SLinus Torvalds 
5421da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, 16))
5431da177e4SLinus Torvalds 		goto drop_nolock;
5441da177e4SLinus Torvalds 
545eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
5461da177e4SLinus Torvalds 	h = skb->data;
547d5a0a1e3SAl Viro 	flags = *(__be16*)h;
5481da177e4SLinus Torvalds 
5491da177e4SLinus Torvalds 	if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
5501da177e4SLinus Torvalds 		/* - Version must be 0.
5511da177e4SLinus Torvalds 		   - We do not support routing headers.
5521da177e4SLinus Torvalds 		 */
5531da177e4SLinus Torvalds 		if (flags&(GRE_VERSION|GRE_ROUTING))
5541da177e4SLinus Torvalds 			goto drop_nolock;
5551da177e4SLinus Torvalds 
5561da177e4SLinus Torvalds 		if (flags&GRE_CSUM) {
557fb286bb2SHerbert Xu 			switch (skb->ip_summed) {
55884fa7933SPatrick McHardy 			case CHECKSUM_COMPLETE:
559d3bc23e7SAl Viro 				csum = csum_fold(skb->csum);
560fb286bb2SHerbert Xu 				if (!csum)
561fb286bb2SHerbert Xu 					break;
562fb286bb2SHerbert Xu 				/* fall through */
563fb286bb2SHerbert Xu 			case CHECKSUM_NONE:
564fb286bb2SHerbert Xu 				skb->csum = 0;
565fb286bb2SHerbert Xu 				csum = __skb_checksum_complete(skb);
56684fa7933SPatrick McHardy 				skb->ip_summed = CHECKSUM_COMPLETE;
5671da177e4SLinus Torvalds 			}
5681da177e4SLinus Torvalds 			offset += 4;
5691da177e4SLinus Torvalds 		}
5701da177e4SLinus Torvalds 		if (flags&GRE_KEY) {
571d5a0a1e3SAl Viro 			key = *(__be32*)(h + offset);
5721da177e4SLinus Torvalds 			offset += 4;
5731da177e4SLinus Torvalds 		}
5741da177e4SLinus Torvalds 		if (flags&GRE_SEQ) {
575d5a0a1e3SAl Viro 			seqno = ntohl(*(__be32*)(h + offset));
5761da177e4SLinus Torvalds 			offset += 4;
5771da177e4SLinus Torvalds 		}
5781da177e4SLinus Torvalds 	}
5791da177e4SLinus Torvalds 
580e1a80002SHerbert Xu 	gre_proto = *(__be16 *)(h + 2);
581e1a80002SHerbert Xu 
5828d5b2c08SEric Dumazet 	rcu_read_lock();
583749c10f9STimo Teras 	if ((tunnel = ipgre_tunnel_lookup(skb->dev,
584e1a80002SHerbert Xu 					  iph->saddr, iph->daddr, key,
585e1a80002SHerbert Xu 					  gre_proto))) {
586addd68ebSPavel Emelyanov 		struct net_device_stats *stats = &tunnel->dev->stats;
587addd68ebSPavel Emelyanov 
5881da177e4SLinus Torvalds 		secpath_reset(skb);
5891da177e4SLinus Torvalds 
590e1a80002SHerbert Xu 		skb->protocol = gre_proto;
5911da177e4SLinus Torvalds 		/* WCCP version 1 and 2 protocol decoding.
5921da177e4SLinus Torvalds 		 * - Change protocol to IP
5931da177e4SLinus Torvalds 		 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
5941da177e4SLinus Torvalds 		 */
595e1a80002SHerbert Xu 		if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
596496c98dfSYOSHIFUJI Hideaki 			skb->protocol = htons(ETH_P_IP);
5971da177e4SLinus Torvalds 			if ((*(h + offset) & 0xF0) != 0x40)
5981da177e4SLinus Torvalds 				offset += 4;
5991da177e4SLinus Torvalds 		}
6001da177e4SLinus Torvalds 
6011d069167STimo Teras 		skb->mac_header = skb->network_header;
6024209fb60SArnaldo Carvalho de Melo 		__pskb_pull(skb, offset);
6039c70220bSArnaldo Carvalho de Melo 		skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
6041da177e4SLinus Torvalds 		skb->pkt_type = PACKET_HOST;
6051da177e4SLinus Torvalds #ifdef CONFIG_NET_IPGRE_BROADCAST
606f97c1e0cSJoe Perches 		if (ipv4_is_multicast(iph->daddr)) {
6071da177e4SLinus Torvalds 			/* Looped back packet, drop it! */
608511c3f92SEric Dumazet 			if (skb_rtable(skb)->fl.iif == 0)
6091da177e4SLinus Torvalds 				goto drop;
610addd68ebSPavel Emelyanov 			stats->multicast++;
6111da177e4SLinus Torvalds 			skb->pkt_type = PACKET_BROADCAST;
6121da177e4SLinus Torvalds 		}
6131da177e4SLinus Torvalds #endif
6141da177e4SLinus Torvalds 
6151da177e4SLinus Torvalds 		if (((flags&GRE_CSUM) && csum) ||
6161da177e4SLinus Torvalds 		    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
617addd68ebSPavel Emelyanov 			stats->rx_crc_errors++;
618addd68ebSPavel Emelyanov 			stats->rx_errors++;
6191da177e4SLinus Torvalds 			goto drop;
6201da177e4SLinus Torvalds 		}
6211da177e4SLinus Torvalds 		if (tunnel->parms.i_flags&GRE_SEQ) {
6221da177e4SLinus Torvalds 			if (!(flags&GRE_SEQ) ||
6231da177e4SLinus Torvalds 			    (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
624addd68ebSPavel Emelyanov 				stats->rx_fifo_errors++;
625addd68ebSPavel Emelyanov 				stats->rx_errors++;
6261da177e4SLinus Torvalds 				goto drop;
6271da177e4SLinus Torvalds 			}
6281da177e4SLinus Torvalds 			tunnel->i_seqno = seqno + 1;
6291da177e4SLinus Torvalds 		}
630e1a80002SHerbert Xu 
63164194c31SHerbert Xu 		len = skb->len;
63264194c31SHerbert Xu 
633e1a80002SHerbert Xu 		/* Warning: All skb pointers will be invalidated! */
634e1a80002SHerbert Xu 		if (tunnel->dev->type == ARPHRD_ETHER) {
635e1a80002SHerbert Xu 			if (!pskb_may_pull(skb, ETH_HLEN)) {
636e1a80002SHerbert Xu 				stats->rx_length_errors++;
637e1a80002SHerbert Xu 				stats->rx_errors++;
638e1a80002SHerbert Xu 				goto drop;
639e1a80002SHerbert Xu 			}
640e1a80002SHerbert Xu 
641e1a80002SHerbert Xu 			iph = ip_hdr(skb);
642e1a80002SHerbert Xu 			skb->protocol = eth_type_trans(skb, tunnel->dev);
643e1a80002SHerbert Xu 			skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
644e1a80002SHerbert Xu 		}
645e1a80002SHerbert Xu 
646addd68ebSPavel Emelyanov 		stats->rx_packets++;
64764194c31SHerbert Xu 		stats->rx_bytes += len;
6481da177e4SLinus Torvalds 		skb->dev = tunnel->dev;
649adf30907SEric Dumazet 		skb_dst_drop(skb);
6501da177e4SLinus Torvalds 		nf_reset(skb);
651e1a80002SHerbert Xu 
652e1a80002SHerbert Xu 		skb_reset_network_header(skb);
6531da177e4SLinus Torvalds 		ipgre_ecn_decapsulate(iph, skb);
654e1a80002SHerbert Xu 
6551da177e4SLinus Torvalds 		netif_rx(skb);
6568d5b2c08SEric Dumazet 		rcu_read_unlock();
6571da177e4SLinus Torvalds 		return(0);
6581da177e4SLinus Torvalds 	}
65945af08beSHerbert Xu 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
6601da177e4SLinus Torvalds 
6611da177e4SLinus Torvalds drop:
6628d5b2c08SEric Dumazet 	rcu_read_unlock();
6631da177e4SLinus Torvalds drop_nolock:
6641da177e4SLinus Torvalds 	kfree_skb(skb);
6651da177e4SLinus Torvalds 	return(0);
6661da177e4SLinus Torvalds }
6671da177e4SLinus Torvalds 
6686fef4c0cSStephen Hemminger static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
6691da177e4SLinus Torvalds {
6702941a486SPatrick McHardy 	struct ip_tunnel *tunnel = netdev_priv(dev);
6710bfbedb1SEric Dumazet 	struct net_device_stats *stats = &dev->stats;
6720bfbedb1SEric Dumazet 	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
673eddc9ec5SArnaldo Carvalho de Melo 	struct iphdr  *old_iph = ip_hdr(skb);
6741da177e4SLinus Torvalds 	struct iphdr  *tiph;
6751da177e4SLinus Torvalds 	u8     tos;
676d5a0a1e3SAl Viro 	__be16 df;
6771da177e4SLinus Torvalds 	struct rtable *rt;     			/* Route to the other host */
6781da177e4SLinus Torvalds 	struct net_device *tdev;			/* Device to other host */
6791da177e4SLinus Torvalds 	struct iphdr  *iph;			/* Our new IP header */
680c2636b4dSChuck Lever 	unsigned int max_headroom;		/* The extra header space needed */
6811da177e4SLinus Torvalds 	int    gre_hlen;
682d5a0a1e3SAl Viro 	__be32 dst;
6831da177e4SLinus Torvalds 	int    mtu;
6841da177e4SLinus Torvalds 
685e1a80002SHerbert Xu 	if (dev->type == ARPHRD_ETHER)
686e1a80002SHerbert Xu 		IPCB(skb)->flags = 0;
687e1a80002SHerbert Xu 
688e1a80002SHerbert Xu 	if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
6891da177e4SLinus Torvalds 		gre_hlen = 0;
6901da177e4SLinus Torvalds 		tiph = (struct iphdr *)skb->data;
6911da177e4SLinus Torvalds 	} else {
6921da177e4SLinus Torvalds 		gre_hlen = tunnel->hlen;
6931da177e4SLinus Torvalds 		tiph = &tunnel->parms.iph;
6941da177e4SLinus Torvalds 	}
6951da177e4SLinus Torvalds 
6961da177e4SLinus Torvalds 	if ((dst = tiph->daddr) == 0) {
6971da177e4SLinus Torvalds 		/* NBMA tunnel */
6981da177e4SLinus Torvalds 
699adf30907SEric Dumazet 		if (skb_dst(skb) == NULL) {
700addd68ebSPavel Emelyanov 			stats->tx_fifo_errors++;
7011da177e4SLinus Torvalds 			goto tx_error;
7021da177e4SLinus Torvalds 		}
7031da177e4SLinus Torvalds 
7041da177e4SLinus Torvalds 		if (skb->protocol == htons(ETH_P_IP)) {
705511c3f92SEric Dumazet 			rt = skb_rtable(skb);
7061da177e4SLinus Torvalds 			if ((dst = rt->rt_gateway) == 0)
7071da177e4SLinus Torvalds 				goto tx_error_icmp;
7081da177e4SLinus Torvalds 		}
7091da177e4SLinus Torvalds #ifdef CONFIG_IPV6
7101da177e4SLinus Torvalds 		else if (skb->protocol == htons(ETH_P_IPV6)) {
7111da177e4SLinus Torvalds 			struct in6_addr *addr6;
7121da177e4SLinus Torvalds 			int addr_type;
713adf30907SEric Dumazet 			struct neighbour *neigh = skb_dst(skb)->neighbour;
7141da177e4SLinus Torvalds 
7151da177e4SLinus Torvalds 			if (neigh == NULL)
7161da177e4SLinus Torvalds 				goto tx_error;
7171da177e4SLinus Torvalds 
7181da177e4SLinus Torvalds 			addr6 = (struct in6_addr *)&neigh->primary_key;
7191da177e4SLinus Torvalds 			addr_type = ipv6_addr_type(addr6);
7201da177e4SLinus Torvalds 
7211da177e4SLinus Torvalds 			if (addr_type == IPV6_ADDR_ANY) {
7220660e03fSArnaldo Carvalho de Melo 				addr6 = &ipv6_hdr(skb)->daddr;
7231da177e4SLinus Torvalds 				addr_type = ipv6_addr_type(addr6);
7241da177e4SLinus Torvalds 			}
7251da177e4SLinus Torvalds 
7261da177e4SLinus Torvalds 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
7271da177e4SLinus Torvalds 				goto tx_error_icmp;
7281da177e4SLinus Torvalds 
7291da177e4SLinus Torvalds 			dst = addr6->s6_addr32[3];
7301da177e4SLinus Torvalds 		}
7311da177e4SLinus Torvalds #endif
7321da177e4SLinus Torvalds 		else
7331da177e4SLinus Torvalds 			goto tx_error;
7341da177e4SLinus Torvalds 	}
7351da177e4SLinus Torvalds 
7361da177e4SLinus Torvalds 	tos = tiph->tos;
737ee686ca9SAndreas Jaggi 	if (tos == 1) {
738ee686ca9SAndreas Jaggi 		tos = 0;
7391da177e4SLinus Torvalds 		if (skb->protocol == htons(ETH_P_IP))
7401da177e4SLinus Torvalds 			tos = old_iph->tos;
7411da177e4SLinus Torvalds 	}
7421da177e4SLinus Torvalds 
7431da177e4SLinus Torvalds 	{
7441da177e4SLinus Torvalds 		struct flowi fl = { .oif = tunnel->parms.link,
7451da177e4SLinus Torvalds 				    .nl_u = { .ip4_u =
7461da177e4SLinus Torvalds 					      { .daddr = dst,
7471da177e4SLinus Torvalds 						.saddr = tiph->saddr,
7481da177e4SLinus Torvalds 						.tos = RT_TOS(tos) } },
7491da177e4SLinus Torvalds 				    .proto = IPPROTO_GRE };
75096635522SPavel Emelyanov 		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
751addd68ebSPavel Emelyanov 			stats->tx_carrier_errors++;
7521da177e4SLinus Torvalds 			goto tx_error;
7531da177e4SLinus Torvalds 		}
7541da177e4SLinus Torvalds 	}
7551da177e4SLinus Torvalds 	tdev = rt->u.dst.dev;
7561da177e4SLinus Torvalds 
7571da177e4SLinus Torvalds 	if (tdev == dev) {
7581da177e4SLinus Torvalds 		ip_rt_put(rt);
759addd68ebSPavel Emelyanov 		stats->collisions++;
7601da177e4SLinus Torvalds 		goto tx_error;
7611da177e4SLinus Torvalds 	}
7621da177e4SLinus Torvalds 
7631da177e4SLinus Torvalds 	df = tiph->frag_off;
7641da177e4SLinus Torvalds 	if (df)
765c95b819aSHerbert Xu 		mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
7661da177e4SLinus Torvalds 	else
767adf30907SEric Dumazet 		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
7681da177e4SLinus Torvalds 
769adf30907SEric Dumazet 	if (skb_dst(skb))
770adf30907SEric Dumazet 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
7711da177e4SLinus Torvalds 
7721da177e4SLinus Torvalds 	if (skb->protocol == htons(ETH_P_IP)) {
7731da177e4SLinus Torvalds 		df |= (old_iph->frag_off&htons(IP_DF));
7741da177e4SLinus Torvalds 
7751da177e4SLinus Torvalds 		if ((old_iph->frag_off&htons(IP_DF)) &&
7761da177e4SLinus Torvalds 		    mtu < ntohs(old_iph->tot_len)) {
7771da177e4SLinus Torvalds 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
7781da177e4SLinus Torvalds 			ip_rt_put(rt);
7791da177e4SLinus Torvalds 			goto tx_error;
7801da177e4SLinus Torvalds 		}
7811da177e4SLinus Torvalds 	}
7821da177e4SLinus Torvalds #ifdef CONFIG_IPV6
7831da177e4SLinus Torvalds 	else if (skb->protocol == htons(ETH_P_IPV6)) {
784adf30907SEric Dumazet 		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
7851da177e4SLinus Torvalds 
786adf30907SEric Dumazet 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
787f97c1e0cSJoe Perches 			if ((tunnel->parms.iph.daddr &&
788f97c1e0cSJoe Perches 			     !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
7891da177e4SLinus Torvalds 			    rt6->rt6i_dst.plen == 128) {
7901da177e4SLinus Torvalds 				rt6->rt6i_flags |= RTF_MODIFIED;
791adf30907SEric Dumazet 				skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
7921da177e4SLinus Torvalds 			}
7931da177e4SLinus Torvalds 		}
7941da177e4SLinus Torvalds 
7951da177e4SLinus Torvalds 		if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
7961da177e4SLinus Torvalds 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
7971da177e4SLinus Torvalds 			ip_rt_put(rt);
7981da177e4SLinus Torvalds 			goto tx_error;
7991da177e4SLinus Torvalds 		}
8001da177e4SLinus Torvalds 	}
8011da177e4SLinus Torvalds #endif
8021da177e4SLinus Torvalds 
8031da177e4SLinus Torvalds 	if (tunnel->err_count > 0) {
804da6185d8SWei Yongjun 		if (time_before(jiffies,
805da6185d8SWei Yongjun 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
8061da177e4SLinus Torvalds 			tunnel->err_count--;
8071da177e4SLinus Torvalds 
8081da177e4SLinus Torvalds 			dst_link_failure(skb);
8091da177e4SLinus Torvalds 		} else
8101da177e4SLinus Torvalds 			tunnel->err_count = 0;
8111da177e4SLinus Torvalds 	}
8121da177e4SLinus Torvalds 
8131da177e4SLinus Torvalds 	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
8141da177e4SLinus Torvalds 
815cfbba49dSPatrick McHardy 	if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
816cfbba49dSPatrick McHardy 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
8171da177e4SLinus Torvalds 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
8181da177e4SLinus Torvalds 		if (!new_skb) {
8191da177e4SLinus Torvalds 			ip_rt_put(rt);
8200bfbedb1SEric Dumazet 			txq->tx_dropped++;
8211da177e4SLinus Torvalds 			dev_kfree_skb(skb);
8226ed10654SPatrick McHardy 			return NETDEV_TX_OK;
8231da177e4SLinus Torvalds 		}
8241da177e4SLinus Torvalds 		if (skb->sk)
8251da177e4SLinus Torvalds 			skb_set_owner_w(new_skb, skb->sk);
8261da177e4SLinus Torvalds 		dev_kfree_skb(skb);
8271da177e4SLinus Torvalds 		skb = new_skb;
828eddc9ec5SArnaldo Carvalho de Melo 		old_iph = ip_hdr(skb);
8291da177e4SLinus Torvalds 	}
8301da177e4SLinus Torvalds 
83164194c31SHerbert Xu 	skb_reset_transport_header(skb);
832e2d1bca7SArnaldo Carvalho de Melo 	skb_push(skb, gre_hlen);
833e2d1bca7SArnaldo Carvalho de Melo 	skb_reset_network_header(skb);
8341da177e4SLinus Torvalds 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
83548d5cad8SPatrick McHardy 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
83648d5cad8SPatrick McHardy 			      IPSKB_REROUTED);
837adf30907SEric Dumazet 	skb_dst_drop(skb);
838adf30907SEric Dumazet 	skb_dst_set(skb, &rt->u.dst);
8391da177e4SLinus Torvalds 
8401da177e4SLinus Torvalds 	/*
8411da177e4SLinus Torvalds 	 *	Push down and install the IPIP header.
8421da177e4SLinus Torvalds 	 */
8431da177e4SLinus Torvalds 
844eddc9ec5SArnaldo Carvalho de Melo 	iph 			=	ip_hdr(skb);
8451da177e4SLinus Torvalds 	iph->version		=	4;
8461da177e4SLinus Torvalds 	iph->ihl		=	sizeof(struct iphdr) >> 2;
8471da177e4SLinus Torvalds 	iph->frag_off		=	df;
8481da177e4SLinus Torvalds 	iph->protocol		=	IPPROTO_GRE;
8491da177e4SLinus Torvalds 	iph->tos		=	ipgre_ecn_encapsulate(tos, old_iph, skb);
8501da177e4SLinus Torvalds 	iph->daddr		=	rt->rt_dst;
8511da177e4SLinus Torvalds 	iph->saddr		=	rt->rt_src;
8521da177e4SLinus Torvalds 
8531da177e4SLinus Torvalds 	if ((iph->ttl = tiph->ttl) == 0) {
8541da177e4SLinus Torvalds 		if (skb->protocol == htons(ETH_P_IP))
8551da177e4SLinus Torvalds 			iph->ttl = old_iph->ttl;
8561da177e4SLinus Torvalds #ifdef CONFIG_IPV6
8571da177e4SLinus Torvalds 		else if (skb->protocol == htons(ETH_P_IPV6))
8581da177e4SLinus Torvalds 			iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
8591da177e4SLinus Torvalds #endif
8601da177e4SLinus Torvalds 		else
8611da177e4SLinus Torvalds 			iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
8621da177e4SLinus Torvalds 	}
8631da177e4SLinus Torvalds 
864d5a0a1e3SAl Viro 	((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
865e1a80002SHerbert Xu 	((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
866e1a80002SHerbert Xu 				   htons(ETH_P_TEB) : skb->protocol;
8671da177e4SLinus Torvalds 
8681da177e4SLinus Torvalds 	if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
869d5a0a1e3SAl Viro 		__be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
8701da177e4SLinus Torvalds 
8711da177e4SLinus Torvalds 		if (tunnel->parms.o_flags&GRE_SEQ) {
8721da177e4SLinus Torvalds 			++tunnel->o_seqno;
8731da177e4SLinus Torvalds 			*ptr = htonl(tunnel->o_seqno);
8741da177e4SLinus Torvalds 			ptr--;
8751da177e4SLinus Torvalds 		}
8761da177e4SLinus Torvalds 		if (tunnel->parms.o_flags&GRE_KEY) {
8771da177e4SLinus Torvalds 			*ptr = tunnel->parms.o_key;
8781da177e4SLinus Torvalds 			ptr--;
8791da177e4SLinus Torvalds 		}
8801da177e4SLinus Torvalds 		if (tunnel->parms.o_flags&GRE_CSUM) {
8811da177e4SLinus Torvalds 			*ptr = 0;
8825f92a738SAl Viro 			*(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
8831da177e4SLinus Torvalds 		}
8841da177e4SLinus Torvalds 	}
8851da177e4SLinus Torvalds 
8861da177e4SLinus Torvalds 	nf_reset(skb);
8871da177e4SLinus Torvalds 
8881da177e4SLinus Torvalds 	IPTUNNEL_XMIT();
8896ed10654SPatrick McHardy 	return NETDEV_TX_OK;
8901da177e4SLinus Torvalds 
8911da177e4SLinus Torvalds tx_error_icmp:
8921da177e4SLinus Torvalds 	dst_link_failure(skb);
8931da177e4SLinus Torvalds 
8941da177e4SLinus Torvalds tx_error:
8951da177e4SLinus Torvalds 	stats->tx_errors++;
8961da177e4SLinus Torvalds 	dev_kfree_skb(skb);
8976ed10654SPatrick McHardy 	return NETDEV_TX_OK;
8981da177e4SLinus Torvalds }
8991da177e4SLinus Torvalds 
90042aa9162SHerbert Xu static int ipgre_tunnel_bind_dev(struct net_device *dev)
901ee34c1ebSMichal Schmidt {
902ee34c1ebSMichal Schmidt 	struct net_device *tdev = NULL;
903ee34c1ebSMichal Schmidt 	struct ip_tunnel *tunnel;
904ee34c1ebSMichal Schmidt 	struct iphdr *iph;
905ee34c1ebSMichal Schmidt 	int hlen = LL_MAX_HEADER;
906ee34c1ebSMichal Schmidt 	int mtu = ETH_DATA_LEN;
907ee34c1ebSMichal Schmidt 	int addend = sizeof(struct iphdr) + 4;
908ee34c1ebSMichal Schmidt 
909ee34c1ebSMichal Schmidt 	tunnel = netdev_priv(dev);
910ee34c1ebSMichal Schmidt 	iph = &tunnel->parms.iph;
911ee34c1ebSMichal Schmidt 
912c95b819aSHerbert Xu 	/* Guess output device to choose reasonable mtu and needed_headroom */
913ee34c1ebSMichal Schmidt 
914ee34c1ebSMichal Schmidt 	if (iph->daddr) {
915ee34c1ebSMichal Schmidt 		struct flowi fl = { .oif = tunnel->parms.link,
916ee34c1ebSMichal Schmidt 				    .nl_u = { .ip4_u =
917ee34c1ebSMichal Schmidt 					      { .daddr = iph->daddr,
918ee34c1ebSMichal Schmidt 						.saddr = iph->saddr,
919ee34c1ebSMichal Schmidt 						.tos = RT_TOS(iph->tos) } },
920ee34c1ebSMichal Schmidt 				    .proto = IPPROTO_GRE };
921ee34c1ebSMichal Schmidt 		struct rtable *rt;
92296635522SPavel Emelyanov 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
923ee34c1ebSMichal Schmidt 			tdev = rt->u.dst.dev;
924ee34c1ebSMichal Schmidt 			ip_rt_put(rt);
925ee34c1ebSMichal Schmidt 		}
926e1a80002SHerbert Xu 
927e1a80002SHerbert Xu 		if (dev->type != ARPHRD_ETHER)
928ee34c1ebSMichal Schmidt 			dev->flags |= IFF_POINTOPOINT;
929ee34c1ebSMichal Schmidt 	}
930ee34c1ebSMichal Schmidt 
931ee34c1ebSMichal Schmidt 	if (!tdev && tunnel->parms.link)
93296635522SPavel Emelyanov 		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
933ee34c1ebSMichal Schmidt 
934ee34c1ebSMichal Schmidt 	if (tdev) {
935c95b819aSHerbert Xu 		hlen = tdev->hard_header_len + tdev->needed_headroom;
936ee34c1ebSMichal Schmidt 		mtu = tdev->mtu;
937ee34c1ebSMichal Schmidt 	}
938ee34c1ebSMichal Schmidt 	dev->iflink = tunnel->parms.link;
939ee34c1ebSMichal Schmidt 
940ee34c1ebSMichal Schmidt 	/* Precalculate GRE options length */
941ee34c1ebSMichal Schmidt 	if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
942ee34c1ebSMichal Schmidt 		if (tunnel->parms.o_flags&GRE_CSUM)
943ee34c1ebSMichal Schmidt 			addend += 4;
944ee34c1ebSMichal Schmidt 		if (tunnel->parms.o_flags&GRE_KEY)
945ee34c1ebSMichal Schmidt 			addend += 4;
946ee34c1ebSMichal Schmidt 		if (tunnel->parms.o_flags&GRE_SEQ)
947ee34c1ebSMichal Schmidt 			addend += 4;
948ee34c1ebSMichal Schmidt 	}
949c95b819aSHerbert Xu 	dev->needed_headroom = addend + hlen;
9508cdb0456STom Goff 	mtu -= dev->hard_header_len + addend;
95142aa9162SHerbert Xu 
95242aa9162SHerbert Xu 	if (mtu < 68)
95342aa9162SHerbert Xu 		mtu = 68;
95442aa9162SHerbert Xu 
955ee34c1ebSMichal Schmidt 	tunnel->hlen = addend;
956ee34c1ebSMichal Schmidt 
95742aa9162SHerbert Xu 	return mtu;
958ee34c1ebSMichal Schmidt }
959ee34c1ebSMichal Schmidt 
9601da177e4SLinus Torvalds static int
9611da177e4SLinus Torvalds ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
9621da177e4SLinus Torvalds {
9631da177e4SLinus Torvalds 	int err = 0;
9641da177e4SLinus Torvalds 	struct ip_tunnel_parm p;
9651da177e4SLinus Torvalds 	struct ip_tunnel *t;
966f57e7d5aSPavel Emelyanov 	struct net *net = dev_net(dev);
967f57e7d5aSPavel Emelyanov 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
9681da177e4SLinus Torvalds 
9691da177e4SLinus Torvalds 	switch (cmd) {
9701da177e4SLinus Torvalds 	case SIOCGETTUNNEL:
9711da177e4SLinus Torvalds 		t = NULL;
9727daa0004SPavel Emelyanov 		if (dev == ign->fb_tunnel_dev) {
9731da177e4SLinus Torvalds 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
9741da177e4SLinus Torvalds 				err = -EFAULT;
9751da177e4SLinus Torvalds 				break;
9761da177e4SLinus Torvalds 			}
977f57e7d5aSPavel Emelyanov 			t = ipgre_tunnel_locate(net, &p, 0);
9781da177e4SLinus Torvalds 		}
9791da177e4SLinus Torvalds 		if (t == NULL)
9802941a486SPatrick McHardy 			t = netdev_priv(dev);
9811da177e4SLinus Torvalds 		memcpy(&p, &t->parms, sizeof(p));
9821da177e4SLinus Torvalds 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
9831da177e4SLinus Torvalds 			err = -EFAULT;
9841da177e4SLinus Torvalds 		break;
9851da177e4SLinus Torvalds 
9861da177e4SLinus Torvalds 	case SIOCADDTUNNEL:
9871da177e4SLinus Torvalds 	case SIOCCHGTUNNEL:
9881da177e4SLinus Torvalds 		err = -EPERM;
9891da177e4SLinus Torvalds 		if (!capable(CAP_NET_ADMIN))
9901da177e4SLinus Torvalds 			goto done;
9911da177e4SLinus Torvalds 
9921da177e4SLinus Torvalds 		err = -EFAULT;
9931da177e4SLinus Torvalds 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
9941da177e4SLinus Torvalds 			goto done;
9951da177e4SLinus Torvalds 
9961da177e4SLinus Torvalds 		err = -EINVAL;
9971da177e4SLinus Torvalds 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
9981da177e4SLinus Torvalds 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
9991da177e4SLinus Torvalds 		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
10001da177e4SLinus Torvalds 			goto done;
10011da177e4SLinus Torvalds 		if (p.iph.ttl)
10021da177e4SLinus Torvalds 			p.iph.frag_off |= htons(IP_DF);
10031da177e4SLinus Torvalds 
10041da177e4SLinus Torvalds 		if (!(p.i_flags&GRE_KEY))
10051da177e4SLinus Torvalds 			p.i_key = 0;
10061da177e4SLinus Torvalds 		if (!(p.o_flags&GRE_KEY))
10071da177e4SLinus Torvalds 			p.o_key = 0;
10081da177e4SLinus Torvalds 
1009f57e7d5aSPavel Emelyanov 		t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
10101da177e4SLinus Torvalds 
10117daa0004SPavel Emelyanov 		if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
10121da177e4SLinus Torvalds 			if (t != NULL) {
10131da177e4SLinus Torvalds 				if (t->dev != dev) {
10141da177e4SLinus Torvalds 					err = -EEXIST;
10151da177e4SLinus Torvalds 					break;
10161da177e4SLinus Torvalds 				}
10171da177e4SLinus Torvalds 			} else {
10181da177e4SLinus Torvalds 				unsigned nflags = 0;
10191da177e4SLinus Torvalds 
10202941a486SPatrick McHardy 				t = netdev_priv(dev);
10211da177e4SLinus Torvalds 
1022f97c1e0cSJoe Perches 				if (ipv4_is_multicast(p.iph.daddr))
10231da177e4SLinus Torvalds 					nflags = IFF_BROADCAST;
10241da177e4SLinus Torvalds 				else if (p.iph.daddr)
10251da177e4SLinus Torvalds 					nflags = IFF_POINTOPOINT;
10261da177e4SLinus Torvalds 
10271da177e4SLinus Torvalds 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
10281da177e4SLinus Torvalds 					err = -EINVAL;
10291da177e4SLinus Torvalds 					break;
10301da177e4SLinus Torvalds 				}
1031f57e7d5aSPavel Emelyanov 				ipgre_tunnel_unlink(ign, t);
10321da177e4SLinus Torvalds 				t->parms.iph.saddr = p.iph.saddr;
10331da177e4SLinus Torvalds 				t->parms.iph.daddr = p.iph.daddr;
10341da177e4SLinus Torvalds 				t->parms.i_key = p.i_key;
10351da177e4SLinus Torvalds 				t->parms.o_key = p.o_key;
10361da177e4SLinus Torvalds 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
10371da177e4SLinus Torvalds 				memcpy(dev->broadcast, &p.iph.daddr, 4);
1038f57e7d5aSPavel Emelyanov 				ipgre_tunnel_link(ign, t);
10391da177e4SLinus Torvalds 				netdev_state_change(dev);
10401da177e4SLinus Torvalds 			}
10411da177e4SLinus Torvalds 		}
10421da177e4SLinus Torvalds 
10431da177e4SLinus Torvalds 		if (t) {
10441da177e4SLinus Torvalds 			err = 0;
10451da177e4SLinus Torvalds 			if (cmd == SIOCCHGTUNNEL) {
10461da177e4SLinus Torvalds 				t->parms.iph.ttl = p.iph.ttl;
10471da177e4SLinus Torvalds 				t->parms.iph.tos = p.iph.tos;
10481da177e4SLinus Torvalds 				t->parms.iph.frag_off = p.iph.frag_off;
1049ee34c1ebSMichal Schmidt 				if (t->parms.link != p.link) {
1050ee34c1ebSMichal Schmidt 					t->parms.link = p.link;
105142aa9162SHerbert Xu 					dev->mtu = ipgre_tunnel_bind_dev(dev);
1052ee34c1ebSMichal Schmidt 					netdev_state_change(dev);
1053ee34c1ebSMichal Schmidt 				}
10541da177e4SLinus Torvalds 			}
10551da177e4SLinus Torvalds 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
10561da177e4SLinus Torvalds 				err = -EFAULT;
10571da177e4SLinus Torvalds 		} else
10581da177e4SLinus Torvalds 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
10591da177e4SLinus Torvalds 		break;
10601da177e4SLinus Torvalds 
10611da177e4SLinus Torvalds 	case SIOCDELTUNNEL:
10621da177e4SLinus Torvalds 		err = -EPERM;
10631da177e4SLinus Torvalds 		if (!capable(CAP_NET_ADMIN))
10641da177e4SLinus Torvalds 			goto done;
10651da177e4SLinus Torvalds 
10667daa0004SPavel Emelyanov 		if (dev == ign->fb_tunnel_dev) {
10671da177e4SLinus Torvalds 			err = -EFAULT;
10681da177e4SLinus Torvalds 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
10691da177e4SLinus Torvalds 				goto done;
10701da177e4SLinus Torvalds 			err = -ENOENT;
1071f57e7d5aSPavel Emelyanov 			if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
10721da177e4SLinus Torvalds 				goto done;
10731da177e4SLinus Torvalds 			err = -EPERM;
10747daa0004SPavel Emelyanov 			if (t == netdev_priv(ign->fb_tunnel_dev))
10751da177e4SLinus Torvalds 				goto done;
10761da177e4SLinus Torvalds 			dev = t->dev;
10771da177e4SLinus Torvalds 		}
107822f8cde5SStephen Hemminger 		unregister_netdevice(dev);
107922f8cde5SStephen Hemminger 		err = 0;
10801da177e4SLinus Torvalds 		break;
10811da177e4SLinus Torvalds 
10821da177e4SLinus Torvalds 	default:
10831da177e4SLinus Torvalds 		err = -EINVAL;
10841da177e4SLinus Torvalds 	}
10851da177e4SLinus Torvalds 
10861da177e4SLinus Torvalds done:
10871da177e4SLinus Torvalds 	return err;
10881da177e4SLinus Torvalds }
10891da177e4SLinus Torvalds 
10901da177e4SLinus Torvalds static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
10911da177e4SLinus Torvalds {
10922941a486SPatrick McHardy 	struct ip_tunnel *tunnel = netdev_priv(dev);
1093c95b819aSHerbert Xu 	if (new_mtu < 68 ||
1094c95b819aSHerbert Xu 	    new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
10951da177e4SLinus Torvalds 		return -EINVAL;
10961da177e4SLinus Torvalds 	dev->mtu = new_mtu;
10971da177e4SLinus Torvalds 	return 0;
10981da177e4SLinus Torvalds }
10991da177e4SLinus Torvalds 
11001da177e4SLinus Torvalds /* Nice toy. Unfortunately, useless in real life :-)
11011da177e4SLinus Torvalds    It allows to construct virtual multiprotocol broadcast "LAN"
11021da177e4SLinus Torvalds    over the Internet, provided multicast routing is tuned.
11031da177e4SLinus Torvalds 
11041da177e4SLinus Torvalds 
11051da177e4SLinus Torvalds    I have no idea was this bicycle invented before me,
11061da177e4SLinus Torvalds    so that I had to set ARPHRD_IPGRE to a random value.
11071da177e4SLinus Torvalds    I have an impression, that Cisco could make something similar,
11081da177e4SLinus Torvalds    but this feature is apparently missing in IOS<=11.2(8).
11091da177e4SLinus Torvalds 
11101da177e4SLinus Torvalds    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
11111da177e4SLinus Torvalds    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
11121da177e4SLinus Torvalds 
11131da177e4SLinus Torvalds    ping -t 255 224.66.66.66
11141da177e4SLinus Torvalds 
11151da177e4SLinus Torvalds    If nobody answers, mbone does not work.
11161da177e4SLinus Torvalds 
11171da177e4SLinus Torvalds    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
11181da177e4SLinus Torvalds    ip addr add 10.66.66.<somewhat>/24 dev Universe
11191da177e4SLinus Torvalds    ifconfig Universe up
11201da177e4SLinus Torvalds    ifconfig Universe add fe80::<Your_real_addr>/10
11211da177e4SLinus Torvalds    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
11221da177e4SLinus Torvalds    ftp 10.66.66.66
11231da177e4SLinus Torvalds    ...
11241da177e4SLinus Torvalds    ftp fec0:6666:6666::193.233.7.65
11251da177e4SLinus Torvalds    ...
11261da177e4SLinus Torvalds 
11271da177e4SLinus Torvalds  */
11281da177e4SLinus Torvalds 
11293b04dddeSStephen Hemminger static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
11303b04dddeSStephen Hemminger 			unsigned short type,
11313b04dddeSStephen Hemminger 			const void *daddr, const void *saddr, unsigned len)
11321da177e4SLinus Torvalds {
11332941a486SPatrick McHardy 	struct ip_tunnel *t = netdev_priv(dev);
11341da177e4SLinus Torvalds 	struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1135d5a0a1e3SAl Viro 	__be16 *p = (__be16*)(iph+1);
11361da177e4SLinus Torvalds 
11371da177e4SLinus Torvalds 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
11381da177e4SLinus Torvalds 	p[0]		= t->parms.o_flags;
11391da177e4SLinus Torvalds 	p[1]		= htons(type);
11401da177e4SLinus Torvalds 
11411da177e4SLinus Torvalds 	/*
11421da177e4SLinus Torvalds 	 *	Set the source hardware address.
11431da177e4SLinus Torvalds 	 */
11441da177e4SLinus Torvalds 
11451da177e4SLinus Torvalds 	if (saddr)
11461da177e4SLinus Torvalds 		memcpy(&iph->saddr, saddr, 4);
11471da177e4SLinus Torvalds 
11481da177e4SLinus Torvalds 	if (daddr) {
11491da177e4SLinus Torvalds 		memcpy(&iph->daddr, daddr, 4);
11501da177e4SLinus Torvalds 		return t->hlen;
11511da177e4SLinus Torvalds 	}
1152f97c1e0cSJoe Perches 	if (iph->daddr && !ipv4_is_multicast(iph->daddr))
11531da177e4SLinus Torvalds 		return t->hlen;
11541da177e4SLinus Torvalds 
11551da177e4SLinus Torvalds 	return -t->hlen;
11561da177e4SLinus Torvalds }
11571da177e4SLinus Torvalds 
11586a5f44d7STimo Teras static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
11596a5f44d7STimo Teras {
11606a5f44d7STimo Teras 	struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
11616a5f44d7STimo Teras 	memcpy(haddr, &iph->saddr, 4);
11626a5f44d7STimo Teras 	return 4;
11636a5f44d7STimo Teras }
11646a5f44d7STimo Teras 
11653b04dddeSStephen Hemminger static const struct header_ops ipgre_header_ops = {
11663b04dddeSStephen Hemminger 	.create	= ipgre_header,
11676a5f44d7STimo Teras 	.parse	= ipgre_header_parse,
11683b04dddeSStephen Hemminger };
11693b04dddeSStephen Hemminger 
11706a5f44d7STimo Teras #ifdef CONFIG_NET_IPGRE_BROADCAST
11711da177e4SLinus Torvalds static int ipgre_open(struct net_device *dev)
11721da177e4SLinus Torvalds {
11732941a486SPatrick McHardy 	struct ip_tunnel *t = netdev_priv(dev);
11741da177e4SLinus Torvalds 
1175f97c1e0cSJoe Perches 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
11761da177e4SLinus Torvalds 		struct flowi fl = { .oif = t->parms.link,
11771da177e4SLinus Torvalds 				    .nl_u = { .ip4_u =
11781da177e4SLinus Torvalds 					      { .daddr = t->parms.iph.daddr,
11791da177e4SLinus Torvalds 						.saddr = t->parms.iph.saddr,
11801da177e4SLinus Torvalds 						.tos = RT_TOS(t->parms.iph.tos) } },
11811da177e4SLinus Torvalds 				    .proto = IPPROTO_GRE };
11821da177e4SLinus Torvalds 		struct rtable *rt;
118396635522SPavel Emelyanov 		if (ip_route_output_key(dev_net(dev), &rt, &fl))
11841da177e4SLinus Torvalds 			return -EADDRNOTAVAIL;
11851da177e4SLinus Torvalds 		dev = rt->u.dst.dev;
11861da177e4SLinus Torvalds 		ip_rt_put(rt);
1187e5ed6399SHerbert Xu 		if (__in_dev_get_rtnl(dev) == NULL)
11881da177e4SLinus Torvalds 			return -EADDRNOTAVAIL;
11891da177e4SLinus Torvalds 		t->mlink = dev->ifindex;
1190e5ed6399SHerbert Xu 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
11911da177e4SLinus Torvalds 	}
11921da177e4SLinus Torvalds 	return 0;
11931da177e4SLinus Torvalds }
11941da177e4SLinus Torvalds 
11951da177e4SLinus Torvalds static int ipgre_close(struct net_device *dev)
11961da177e4SLinus Torvalds {
11972941a486SPatrick McHardy 	struct ip_tunnel *t = netdev_priv(dev);
1198b8c26a33SStephen Hemminger 
1199f97c1e0cSJoe Perches 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
12007fee0ca2SDenis V. Lunev 		struct in_device *in_dev;
1201c346dca1SYOSHIFUJI Hideaki 		in_dev = inetdev_by_index(dev_net(dev), t->mlink);
12021da177e4SLinus Torvalds 		if (in_dev) {
12031da177e4SLinus Torvalds 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
12041da177e4SLinus Torvalds 			in_dev_put(in_dev);
12051da177e4SLinus Torvalds 		}
12061da177e4SLinus Torvalds 	}
12071da177e4SLinus Torvalds 	return 0;
12081da177e4SLinus Torvalds }
12091da177e4SLinus Torvalds 
12101da177e4SLinus Torvalds #endif
12111da177e4SLinus Torvalds 
1212b8c26a33SStephen Hemminger static const struct net_device_ops ipgre_netdev_ops = {
1213b8c26a33SStephen Hemminger 	.ndo_init		= ipgre_tunnel_init,
1214b8c26a33SStephen Hemminger 	.ndo_uninit		= ipgre_tunnel_uninit,
1215b8c26a33SStephen Hemminger #ifdef CONFIG_NET_IPGRE_BROADCAST
1216b8c26a33SStephen Hemminger 	.ndo_open		= ipgre_open,
1217b8c26a33SStephen Hemminger 	.ndo_stop		= ipgre_close,
1218b8c26a33SStephen Hemminger #endif
1219b8c26a33SStephen Hemminger 	.ndo_start_xmit		= ipgre_tunnel_xmit,
1220b8c26a33SStephen Hemminger 	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
1221b8c26a33SStephen Hemminger 	.ndo_change_mtu		= ipgre_tunnel_change_mtu,
1222b8c26a33SStephen Hemminger };
1223b8c26a33SStephen Hemminger 
12241da177e4SLinus Torvalds static void ipgre_tunnel_setup(struct net_device *dev)
12251da177e4SLinus Torvalds {
1226b8c26a33SStephen Hemminger 	dev->netdev_ops		= &ipgre_netdev_ops;
12271da177e4SLinus Torvalds 	dev->destructor 	= free_netdev;
12281da177e4SLinus Torvalds 
12291da177e4SLinus Torvalds 	dev->type		= ARPHRD_IPGRE;
1230c95b819aSHerbert Xu 	dev->needed_headroom 	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
123146f25dffSKris Katterjohn 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4;
12321da177e4SLinus Torvalds 	dev->flags		= IFF_NOARP;
12331da177e4SLinus Torvalds 	dev->iflink		= 0;
12341da177e4SLinus Torvalds 	dev->addr_len		= 4;
12350b67ecebSPavel Emelyanov 	dev->features		|= NETIF_F_NETNS_LOCAL;
1236108bfa89SEric Dumazet 	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
12371da177e4SLinus Torvalds }
12381da177e4SLinus Torvalds 
12391da177e4SLinus Torvalds static int ipgre_tunnel_init(struct net_device *dev)
12401da177e4SLinus Torvalds {
12411da177e4SLinus Torvalds 	struct ip_tunnel *tunnel;
12421da177e4SLinus Torvalds 	struct iphdr *iph;
12431da177e4SLinus Torvalds 
12442941a486SPatrick McHardy 	tunnel = netdev_priv(dev);
12451da177e4SLinus Torvalds 	iph = &tunnel->parms.iph;
12461da177e4SLinus Torvalds 
12471da177e4SLinus Torvalds 	tunnel->dev = dev;
12481da177e4SLinus Torvalds 	strcpy(tunnel->parms.name, dev->name);
12491da177e4SLinus Torvalds 
12501da177e4SLinus Torvalds 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
12511da177e4SLinus Torvalds 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
12521da177e4SLinus Torvalds 
12531da177e4SLinus Torvalds 	if (iph->daddr) {
12541da177e4SLinus Torvalds #ifdef CONFIG_NET_IPGRE_BROADCAST
1255f97c1e0cSJoe Perches 		if (ipv4_is_multicast(iph->daddr)) {
12561da177e4SLinus Torvalds 			if (!iph->saddr)
12571da177e4SLinus Torvalds 				return -EINVAL;
12581da177e4SLinus Torvalds 			dev->flags = IFF_BROADCAST;
12593b04dddeSStephen Hemminger 			dev->header_ops = &ipgre_header_ops;
12601da177e4SLinus Torvalds 		}
12611da177e4SLinus Torvalds #endif
1262ee34c1ebSMichal Schmidt 	} else
12636a5f44d7STimo Teras 		dev->header_ops = &ipgre_header_ops;
12641da177e4SLinus Torvalds 
12651da177e4SLinus Torvalds 	return 0;
12661da177e4SLinus Torvalds }
12671da177e4SLinus Torvalds 
1268b8c26a33SStephen Hemminger static void ipgre_fb_tunnel_init(struct net_device *dev)
12691da177e4SLinus Torvalds {
12702941a486SPatrick McHardy 	struct ip_tunnel *tunnel = netdev_priv(dev);
12711da177e4SLinus Torvalds 	struct iphdr *iph = &tunnel->parms.iph;
1272eb8ce741SPavel Emelyanov 	struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
12731da177e4SLinus Torvalds 
12741da177e4SLinus Torvalds 	tunnel->dev = dev;
12751da177e4SLinus Torvalds 	strcpy(tunnel->parms.name, dev->name);
12761da177e4SLinus Torvalds 
12771da177e4SLinus Torvalds 	iph->version		= 4;
12781da177e4SLinus Torvalds 	iph->protocol		= IPPROTO_GRE;
12791da177e4SLinus Torvalds 	iph->ihl		= 5;
12801da177e4SLinus Torvalds 	tunnel->hlen		= sizeof(struct iphdr) + 4;
12811da177e4SLinus Torvalds 
12821da177e4SLinus Torvalds 	dev_hold(dev);
1283eb8ce741SPavel Emelyanov 	ign->tunnels_wc[0]	= tunnel;
12841da177e4SLinus Torvalds }
12851da177e4SLinus Torvalds 
12861da177e4SLinus Torvalds 
128732613090SAlexey Dobriyan static const struct net_protocol ipgre_protocol = {
12881da177e4SLinus Torvalds 	.handler	=	ipgre_rcv,
12891da177e4SLinus Torvalds 	.err_handler	=	ipgre_err,
1290f96c148fSPavel Emelyanov 	.netns_ok	=	1,
12911da177e4SLinus Torvalds };
12921da177e4SLinus Torvalds 
1293eef6dd65SEric Dumazet static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1294eb8ce741SPavel Emelyanov {
1295eb8ce741SPavel Emelyanov 	int prio;
1296eb8ce741SPavel Emelyanov 
1297eb8ce741SPavel Emelyanov 	for (prio = 0; prio < 4; prio++) {
1298eb8ce741SPavel Emelyanov 		int h;
1299eb8ce741SPavel Emelyanov 		for (h = 0; h < HASH_SIZE; h++) {
1300eef6dd65SEric Dumazet 			struct ip_tunnel *t = ign->tunnels[prio][h];
1301eef6dd65SEric Dumazet 
1302eef6dd65SEric Dumazet 			while (t != NULL) {
1303eef6dd65SEric Dumazet 				unregister_netdevice_queue(t->dev, head);
1304eef6dd65SEric Dumazet 				t = t->next;
1305eef6dd65SEric Dumazet 			}
1306eb8ce741SPavel Emelyanov 		}
1307eb8ce741SPavel Emelyanov 	}
1308eb8ce741SPavel Emelyanov }
1309eb8ce741SPavel Emelyanov 
13102c8c1e72SAlexey Dobriyan static int __net_init ipgre_init_net(struct net *net)
131159a4c759SPavel Emelyanov {
1312cfb8fbf2SEric W. Biederman 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
131359a4c759SPavel Emelyanov 	int err;
131459a4c759SPavel Emelyanov 
13157daa0004SPavel Emelyanov 	ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
13167daa0004SPavel Emelyanov 					   ipgre_tunnel_setup);
13177daa0004SPavel Emelyanov 	if (!ign->fb_tunnel_dev) {
13187daa0004SPavel Emelyanov 		err = -ENOMEM;
13197daa0004SPavel Emelyanov 		goto err_alloc_dev;
13207daa0004SPavel Emelyanov 	}
1321be77e593SAlexey Dobriyan 	dev_net_set(ign->fb_tunnel_dev, net);
13227daa0004SPavel Emelyanov 
1323b8c26a33SStephen Hemminger 	ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1324c19e654dSHerbert Xu 	ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
13257daa0004SPavel Emelyanov 
13267daa0004SPavel Emelyanov 	if ((err = register_netdev(ign->fb_tunnel_dev)))
13277daa0004SPavel Emelyanov 		goto err_reg_dev;
13287daa0004SPavel Emelyanov 
132959a4c759SPavel Emelyanov 	return 0;
133059a4c759SPavel Emelyanov 
13317daa0004SPavel Emelyanov err_reg_dev:
13327daa0004SPavel Emelyanov 	free_netdev(ign->fb_tunnel_dev);
13337daa0004SPavel Emelyanov err_alloc_dev:
133459a4c759SPavel Emelyanov 	return err;
133559a4c759SPavel Emelyanov }
133659a4c759SPavel Emelyanov 
13372c8c1e72SAlexey Dobriyan static void __net_exit ipgre_exit_net(struct net *net)
133859a4c759SPavel Emelyanov {
133959a4c759SPavel Emelyanov 	struct ipgre_net *ign;
1340eef6dd65SEric Dumazet 	LIST_HEAD(list);
134159a4c759SPavel Emelyanov 
134259a4c759SPavel Emelyanov 	ign = net_generic(net, ipgre_net_id);
13437daa0004SPavel Emelyanov 	rtnl_lock();
1344eef6dd65SEric Dumazet 	ipgre_destroy_tunnels(ign, &list);
1345eef6dd65SEric Dumazet 	unregister_netdevice_many(&list);
13467daa0004SPavel Emelyanov 	rtnl_unlock();
134759a4c759SPavel Emelyanov }
134859a4c759SPavel Emelyanov 
134959a4c759SPavel Emelyanov static struct pernet_operations ipgre_net_ops = {
135059a4c759SPavel Emelyanov 	.init = ipgre_init_net,
135159a4c759SPavel Emelyanov 	.exit = ipgre_exit_net,
1352cfb8fbf2SEric W. Biederman 	.id   = &ipgre_net_id,
1353cfb8fbf2SEric W. Biederman 	.size = sizeof(struct ipgre_net),
135459a4c759SPavel Emelyanov };
13551da177e4SLinus Torvalds 
1356c19e654dSHerbert Xu static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1357c19e654dSHerbert Xu {
1358c19e654dSHerbert Xu 	__be16 flags;
1359c19e654dSHerbert Xu 
1360c19e654dSHerbert Xu 	if (!data)
1361c19e654dSHerbert Xu 		return 0;
1362c19e654dSHerbert Xu 
1363c19e654dSHerbert Xu 	flags = 0;
1364c19e654dSHerbert Xu 	if (data[IFLA_GRE_IFLAGS])
1365c19e654dSHerbert Xu 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1366c19e654dSHerbert Xu 	if (data[IFLA_GRE_OFLAGS])
1367c19e654dSHerbert Xu 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1368c19e654dSHerbert Xu 	if (flags & (GRE_VERSION|GRE_ROUTING))
1369c19e654dSHerbert Xu 		return -EINVAL;
1370c19e654dSHerbert Xu 
1371c19e654dSHerbert Xu 	return 0;
1372c19e654dSHerbert Xu }
1373c19e654dSHerbert Xu 
1374e1a80002SHerbert Xu static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1375e1a80002SHerbert Xu {
1376e1a80002SHerbert Xu 	__be32 daddr;
1377e1a80002SHerbert Xu 
1378e1a80002SHerbert Xu 	if (tb[IFLA_ADDRESS]) {
1379e1a80002SHerbert Xu 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1380e1a80002SHerbert Xu 			return -EINVAL;
1381e1a80002SHerbert Xu 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1382e1a80002SHerbert Xu 			return -EADDRNOTAVAIL;
1383e1a80002SHerbert Xu 	}
1384e1a80002SHerbert Xu 
1385e1a80002SHerbert Xu 	if (!data)
1386e1a80002SHerbert Xu 		goto out;
1387e1a80002SHerbert Xu 
1388e1a80002SHerbert Xu 	if (data[IFLA_GRE_REMOTE]) {
1389e1a80002SHerbert Xu 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1390e1a80002SHerbert Xu 		if (!daddr)
1391e1a80002SHerbert Xu 			return -EINVAL;
1392e1a80002SHerbert Xu 	}
1393e1a80002SHerbert Xu 
1394e1a80002SHerbert Xu out:
1395e1a80002SHerbert Xu 	return ipgre_tunnel_validate(tb, data);
1396e1a80002SHerbert Xu }
1397e1a80002SHerbert Xu 
1398c19e654dSHerbert Xu static void ipgre_netlink_parms(struct nlattr *data[],
1399c19e654dSHerbert Xu 				struct ip_tunnel_parm *parms)
1400c19e654dSHerbert Xu {
14017bb82d92SHerbert Xu 	memset(parms, 0, sizeof(*parms));
1402c19e654dSHerbert Xu 
1403c19e654dSHerbert Xu 	parms->iph.protocol = IPPROTO_GRE;
1404c19e654dSHerbert Xu 
1405c19e654dSHerbert Xu 	if (!data)
1406c19e654dSHerbert Xu 		return;
1407c19e654dSHerbert Xu 
1408c19e654dSHerbert Xu 	if (data[IFLA_GRE_LINK])
1409c19e654dSHerbert Xu 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1410c19e654dSHerbert Xu 
1411c19e654dSHerbert Xu 	if (data[IFLA_GRE_IFLAGS])
1412c19e654dSHerbert Xu 		parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1413c19e654dSHerbert Xu 
1414c19e654dSHerbert Xu 	if (data[IFLA_GRE_OFLAGS])
1415c19e654dSHerbert Xu 		parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1416c19e654dSHerbert Xu 
1417c19e654dSHerbert Xu 	if (data[IFLA_GRE_IKEY])
1418c19e654dSHerbert Xu 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1419c19e654dSHerbert Xu 
1420c19e654dSHerbert Xu 	if (data[IFLA_GRE_OKEY])
1421c19e654dSHerbert Xu 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1422c19e654dSHerbert Xu 
1423c19e654dSHerbert Xu 	if (data[IFLA_GRE_LOCAL])
14244d74f8baSPatrick McHardy 		parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1425c19e654dSHerbert Xu 
1426c19e654dSHerbert Xu 	if (data[IFLA_GRE_REMOTE])
14274d74f8baSPatrick McHardy 		parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1428c19e654dSHerbert Xu 
1429c19e654dSHerbert Xu 	if (data[IFLA_GRE_TTL])
1430c19e654dSHerbert Xu 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1431c19e654dSHerbert Xu 
1432c19e654dSHerbert Xu 	if (data[IFLA_GRE_TOS])
1433c19e654dSHerbert Xu 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1434c19e654dSHerbert Xu 
1435c19e654dSHerbert Xu 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1436c19e654dSHerbert Xu 		parms->iph.frag_off = htons(IP_DF);
1437c19e654dSHerbert Xu }
1438c19e654dSHerbert Xu 
1439e1a80002SHerbert Xu static int ipgre_tap_init(struct net_device *dev)
1440e1a80002SHerbert Xu {
1441e1a80002SHerbert Xu 	struct ip_tunnel *tunnel;
1442e1a80002SHerbert Xu 
1443e1a80002SHerbert Xu 	tunnel = netdev_priv(dev);
1444e1a80002SHerbert Xu 
1445e1a80002SHerbert Xu 	tunnel->dev = dev;
1446e1a80002SHerbert Xu 	strcpy(tunnel->parms.name, dev->name);
1447e1a80002SHerbert Xu 
1448e1a80002SHerbert Xu 	ipgre_tunnel_bind_dev(dev);
1449e1a80002SHerbert Xu 
1450e1a80002SHerbert Xu 	return 0;
1451e1a80002SHerbert Xu }
1452e1a80002SHerbert Xu 
1453b8c26a33SStephen Hemminger static const struct net_device_ops ipgre_tap_netdev_ops = {
1454b8c26a33SStephen Hemminger 	.ndo_init		= ipgre_tap_init,
1455b8c26a33SStephen Hemminger 	.ndo_uninit		= ipgre_tunnel_uninit,
1456b8c26a33SStephen Hemminger 	.ndo_start_xmit		= ipgre_tunnel_xmit,
1457b8c26a33SStephen Hemminger 	.ndo_set_mac_address 	= eth_mac_addr,
1458b8c26a33SStephen Hemminger 	.ndo_validate_addr	= eth_validate_addr,
1459b8c26a33SStephen Hemminger 	.ndo_change_mtu		= ipgre_tunnel_change_mtu,
1460b8c26a33SStephen Hemminger };
1461b8c26a33SStephen Hemminger 
1462e1a80002SHerbert Xu static void ipgre_tap_setup(struct net_device *dev)
1463e1a80002SHerbert Xu {
1464e1a80002SHerbert Xu 
1465e1a80002SHerbert Xu 	ether_setup(dev);
1466e1a80002SHerbert Xu 
14672e9526b3SHerbert Xu 	dev->netdev_ops		= &ipgre_tap_netdev_ops;
1468e1a80002SHerbert Xu 	dev->destructor 	= free_netdev;
1469e1a80002SHerbert Xu 
1470e1a80002SHerbert Xu 	dev->iflink		= 0;
1471e1a80002SHerbert Xu 	dev->features		|= NETIF_F_NETNS_LOCAL;
1472e1a80002SHerbert Xu }
1473e1a80002SHerbert Xu 
147481adee47SEric W. Biederman static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1475c19e654dSHerbert Xu 			 struct nlattr *data[])
1476c19e654dSHerbert Xu {
1477c19e654dSHerbert Xu 	struct ip_tunnel *nt;
1478c19e654dSHerbert Xu 	struct net *net = dev_net(dev);
1479c19e654dSHerbert Xu 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1480c19e654dSHerbert Xu 	int mtu;
1481c19e654dSHerbert Xu 	int err;
1482c19e654dSHerbert Xu 
1483c19e654dSHerbert Xu 	nt = netdev_priv(dev);
1484c19e654dSHerbert Xu 	ipgre_netlink_parms(data, &nt->parms);
1485c19e654dSHerbert Xu 
1486e1a80002SHerbert Xu 	if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1487c19e654dSHerbert Xu 		return -EEXIST;
1488c19e654dSHerbert Xu 
1489e1a80002SHerbert Xu 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1490e1a80002SHerbert Xu 		random_ether_addr(dev->dev_addr);
1491e1a80002SHerbert Xu 
1492c19e654dSHerbert Xu 	mtu = ipgre_tunnel_bind_dev(dev);
1493c19e654dSHerbert Xu 	if (!tb[IFLA_MTU])
1494c19e654dSHerbert Xu 		dev->mtu = mtu;
1495c19e654dSHerbert Xu 
1496c19e654dSHerbert Xu 	err = register_netdevice(dev);
1497c19e654dSHerbert Xu 	if (err)
1498c19e654dSHerbert Xu 		goto out;
1499c19e654dSHerbert Xu 
1500c19e654dSHerbert Xu 	dev_hold(dev);
1501c19e654dSHerbert Xu 	ipgre_tunnel_link(ign, nt);
1502c19e654dSHerbert Xu 
1503c19e654dSHerbert Xu out:
1504c19e654dSHerbert Xu 	return err;
1505c19e654dSHerbert Xu }
1506c19e654dSHerbert Xu 
1507c19e654dSHerbert Xu static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1508c19e654dSHerbert Xu 			    struct nlattr *data[])
1509c19e654dSHerbert Xu {
1510c19e654dSHerbert Xu 	struct ip_tunnel *t, *nt;
1511c19e654dSHerbert Xu 	struct net *net = dev_net(dev);
1512c19e654dSHerbert Xu 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1513c19e654dSHerbert Xu 	struct ip_tunnel_parm p;
1514c19e654dSHerbert Xu 	int mtu;
1515c19e654dSHerbert Xu 
1516c19e654dSHerbert Xu 	if (dev == ign->fb_tunnel_dev)
1517c19e654dSHerbert Xu 		return -EINVAL;
1518c19e654dSHerbert Xu 
1519c19e654dSHerbert Xu 	nt = netdev_priv(dev);
1520c19e654dSHerbert Xu 	ipgre_netlink_parms(data, &p);
1521c19e654dSHerbert Xu 
1522c19e654dSHerbert Xu 	t = ipgre_tunnel_locate(net, &p, 0);
1523c19e654dSHerbert Xu 
1524c19e654dSHerbert Xu 	if (t) {
1525c19e654dSHerbert Xu 		if (t->dev != dev)
1526c19e654dSHerbert Xu 			return -EEXIST;
1527c19e654dSHerbert Xu 	} else {
1528c19e654dSHerbert Xu 		t = nt;
1529c19e654dSHerbert Xu 
15302e9526b3SHerbert Xu 		if (dev->type != ARPHRD_ETHER) {
15312e9526b3SHerbert Xu 			unsigned nflags = 0;
15322e9526b3SHerbert Xu 
1533c19e654dSHerbert Xu 			if (ipv4_is_multicast(p.iph.daddr))
1534c19e654dSHerbert Xu 				nflags = IFF_BROADCAST;
1535c19e654dSHerbert Xu 			else if (p.iph.daddr)
1536c19e654dSHerbert Xu 				nflags = IFF_POINTOPOINT;
1537c19e654dSHerbert Xu 
1538c19e654dSHerbert Xu 			if ((dev->flags ^ nflags) &
1539c19e654dSHerbert Xu 			    (IFF_POINTOPOINT | IFF_BROADCAST))
1540c19e654dSHerbert Xu 				return -EINVAL;
15412e9526b3SHerbert Xu 		}
1542c19e654dSHerbert Xu 
1543c19e654dSHerbert Xu 		ipgre_tunnel_unlink(ign, t);
1544c19e654dSHerbert Xu 		t->parms.iph.saddr = p.iph.saddr;
1545c19e654dSHerbert Xu 		t->parms.iph.daddr = p.iph.daddr;
1546c19e654dSHerbert Xu 		t->parms.i_key = p.i_key;
15472e9526b3SHerbert Xu 		if (dev->type != ARPHRD_ETHER) {
1548c19e654dSHerbert Xu 			memcpy(dev->dev_addr, &p.iph.saddr, 4);
1549c19e654dSHerbert Xu 			memcpy(dev->broadcast, &p.iph.daddr, 4);
15502e9526b3SHerbert Xu 		}
1551c19e654dSHerbert Xu 		ipgre_tunnel_link(ign, t);
1552c19e654dSHerbert Xu 		netdev_state_change(dev);
1553c19e654dSHerbert Xu 	}
1554c19e654dSHerbert Xu 
1555c19e654dSHerbert Xu 	t->parms.o_key = p.o_key;
1556c19e654dSHerbert Xu 	t->parms.iph.ttl = p.iph.ttl;
1557c19e654dSHerbert Xu 	t->parms.iph.tos = p.iph.tos;
1558c19e654dSHerbert Xu 	t->parms.iph.frag_off = p.iph.frag_off;
1559c19e654dSHerbert Xu 
1560c19e654dSHerbert Xu 	if (t->parms.link != p.link) {
1561c19e654dSHerbert Xu 		t->parms.link = p.link;
1562c19e654dSHerbert Xu 		mtu = ipgre_tunnel_bind_dev(dev);
1563c19e654dSHerbert Xu 		if (!tb[IFLA_MTU])
1564c19e654dSHerbert Xu 			dev->mtu = mtu;
1565c19e654dSHerbert Xu 		netdev_state_change(dev);
1566c19e654dSHerbert Xu 	}
1567c19e654dSHerbert Xu 
1568c19e654dSHerbert Xu 	return 0;
1569c19e654dSHerbert Xu }
1570c19e654dSHerbert Xu 
1571c19e654dSHerbert Xu static size_t ipgre_get_size(const struct net_device *dev)
1572c19e654dSHerbert Xu {
1573c19e654dSHerbert Xu 	return
1574c19e654dSHerbert Xu 		/* IFLA_GRE_LINK */
1575c19e654dSHerbert Xu 		nla_total_size(4) +
1576c19e654dSHerbert Xu 		/* IFLA_GRE_IFLAGS */
1577c19e654dSHerbert Xu 		nla_total_size(2) +
1578c19e654dSHerbert Xu 		/* IFLA_GRE_OFLAGS */
1579c19e654dSHerbert Xu 		nla_total_size(2) +
1580c19e654dSHerbert Xu 		/* IFLA_GRE_IKEY */
1581c19e654dSHerbert Xu 		nla_total_size(4) +
1582c19e654dSHerbert Xu 		/* IFLA_GRE_OKEY */
1583c19e654dSHerbert Xu 		nla_total_size(4) +
1584c19e654dSHerbert Xu 		/* IFLA_GRE_LOCAL */
1585c19e654dSHerbert Xu 		nla_total_size(4) +
1586c19e654dSHerbert Xu 		/* IFLA_GRE_REMOTE */
1587c19e654dSHerbert Xu 		nla_total_size(4) +
1588c19e654dSHerbert Xu 		/* IFLA_GRE_TTL */
1589c19e654dSHerbert Xu 		nla_total_size(1) +
1590c19e654dSHerbert Xu 		/* IFLA_GRE_TOS */
1591c19e654dSHerbert Xu 		nla_total_size(1) +
1592c19e654dSHerbert Xu 		/* IFLA_GRE_PMTUDISC */
1593c19e654dSHerbert Xu 		nla_total_size(1) +
1594c19e654dSHerbert Xu 		0;
1595c19e654dSHerbert Xu }
1596c19e654dSHerbert Xu 
1597c19e654dSHerbert Xu static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1598c19e654dSHerbert Xu {
1599c19e654dSHerbert Xu 	struct ip_tunnel *t = netdev_priv(dev);
1600c19e654dSHerbert Xu 	struct ip_tunnel_parm *p = &t->parms;
1601c19e654dSHerbert Xu 
1602c19e654dSHerbert Xu 	NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1603c19e654dSHerbert Xu 	NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1604c19e654dSHerbert Xu 	NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1605ba9e64b1SPatrick McHardy 	NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1606ba9e64b1SPatrick McHardy 	NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
16074d74f8baSPatrick McHardy 	NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
16084d74f8baSPatrick McHardy 	NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1609c19e654dSHerbert Xu 	NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1610c19e654dSHerbert Xu 	NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1611c19e654dSHerbert Xu 	NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1612c19e654dSHerbert Xu 
1613c19e654dSHerbert Xu 	return 0;
1614c19e654dSHerbert Xu 
1615c19e654dSHerbert Xu nla_put_failure:
1616c19e654dSHerbert Xu 	return -EMSGSIZE;
1617c19e654dSHerbert Xu }
1618c19e654dSHerbert Xu 
1619c19e654dSHerbert Xu static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1620c19e654dSHerbert Xu 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1621c19e654dSHerbert Xu 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1622c19e654dSHerbert Xu 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1623c19e654dSHerbert Xu 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1624c19e654dSHerbert Xu 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
16254d74f8baSPatrick McHardy 	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
16264d74f8baSPatrick McHardy 	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1627c19e654dSHerbert Xu 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1628c19e654dSHerbert Xu 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1629c19e654dSHerbert Xu 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1630c19e654dSHerbert Xu };
1631c19e654dSHerbert Xu 
1632c19e654dSHerbert Xu static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1633c19e654dSHerbert Xu 	.kind		= "gre",
1634c19e654dSHerbert Xu 	.maxtype	= IFLA_GRE_MAX,
1635c19e654dSHerbert Xu 	.policy		= ipgre_policy,
1636c19e654dSHerbert Xu 	.priv_size	= sizeof(struct ip_tunnel),
1637c19e654dSHerbert Xu 	.setup		= ipgre_tunnel_setup,
1638c19e654dSHerbert Xu 	.validate	= ipgre_tunnel_validate,
1639c19e654dSHerbert Xu 	.newlink	= ipgre_newlink,
1640c19e654dSHerbert Xu 	.changelink	= ipgre_changelink,
1641c19e654dSHerbert Xu 	.get_size	= ipgre_get_size,
1642c19e654dSHerbert Xu 	.fill_info	= ipgre_fill_info,
1643c19e654dSHerbert Xu };
1644c19e654dSHerbert Xu 
1645e1a80002SHerbert Xu static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1646e1a80002SHerbert Xu 	.kind		= "gretap",
1647e1a80002SHerbert Xu 	.maxtype	= IFLA_GRE_MAX,
1648e1a80002SHerbert Xu 	.policy		= ipgre_policy,
1649e1a80002SHerbert Xu 	.priv_size	= sizeof(struct ip_tunnel),
1650e1a80002SHerbert Xu 	.setup		= ipgre_tap_setup,
1651e1a80002SHerbert Xu 	.validate	= ipgre_tap_validate,
1652e1a80002SHerbert Xu 	.newlink	= ipgre_newlink,
1653e1a80002SHerbert Xu 	.changelink	= ipgre_changelink,
1654e1a80002SHerbert Xu 	.get_size	= ipgre_get_size,
1655e1a80002SHerbert Xu 	.fill_info	= ipgre_fill_info,
1656e1a80002SHerbert Xu };
1657e1a80002SHerbert Xu 
16581da177e4SLinus Torvalds /*
16591da177e4SLinus Torvalds  *	And now the modules code and kernel interface.
16601da177e4SLinus Torvalds  */
16611da177e4SLinus Torvalds 
16621da177e4SLinus Torvalds static int __init ipgre_init(void)
16631da177e4SLinus Torvalds {
16641da177e4SLinus Torvalds 	int err;
16651da177e4SLinus Torvalds 
16661da177e4SLinus Torvalds 	printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
16671da177e4SLinus Torvalds 
1668cfb8fbf2SEric W. Biederman 	err = register_pernet_device(&ipgre_net_ops);
166959a4c759SPavel Emelyanov 	if (err < 0)
1670*c2892f02SAlexey Dobriyan 		return err;
1671*c2892f02SAlexey Dobriyan 
1672*c2892f02SAlexey Dobriyan 	err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE);
1673*c2892f02SAlexey Dobriyan 	if (err < 0) {
1674*c2892f02SAlexey Dobriyan 		printk(KERN_INFO "ipgre init: can't add protocol\n");
1675*c2892f02SAlexey Dobriyan 		goto add_proto_failed;
1676*c2892f02SAlexey Dobriyan 	}
16777daa0004SPavel Emelyanov 
1678c19e654dSHerbert Xu 	err = rtnl_link_register(&ipgre_link_ops);
1679c19e654dSHerbert Xu 	if (err < 0)
1680c19e654dSHerbert Xu 		goto rtnl_link_failed;
1681c19e654dSHerbert Xu 
1682e1a80002SHerbert Xu 	err = rtnl_link_register(&ipgre_tap_ops);
1683e1a80002SHerbert Xu 	if (err < 0)
1684e1a80002SHerbert Xu 		goto tap_ops_failed;
1685e1a80002SHerbert Xu 
1686c19e654dSHerbert Xu out:
16877daa0004SPavel Emelyanov 	return err;
1688c19e654dSHerbert Xu 
1689e1a80002SHerbert Xu tap_ops_failed:
1690e1a80002SHerbert Xu 	rtnl_link_unregister(&ipgre_link_ops);
1691c19e654dSHerbert Xu rtnl_link_failed:
1692c19e654dSHerbert Xu 	inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1693*c2892f02SAlexey Dobriyan add_proto_failed:
1694*c2892f02SAlexey Dobriyan 	unregister_pernet_device(&ipgre_net_ops);
1695c19e654dSHerbert Xu 	goto out;
16961da177e4SLinus Torvalds }
16971da177e4SLinus Torvalds 
1698db44575fSAlexey Kuznetsov static void __exit ipgre_fini(void)
16991da177e4SLinus Torvalds {
1700e1a80002SHerbert Xu 	rtnl_link_unregister(&ipgre_tap_ops);
1701c19e654dSHerbert Xu 	rtnl_link_unregister(&ipgre_link_ops);
17021da177e4SLinus Torvalds 	if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
17031da177e4SLinus Torvalds 		printk(KERN_INFO "ipgre close: can't remove protocol\n");
1704*c2892f02SAlexey Dobriyan 	unregister_pernet_device(&ipgre_net_ops);
17051da177e4SLinus Torvalds }
17061da177e4SLinus Torvalds 
17071da177e4SLinus Torvalds module_init(ipgre_init);
17081da177e4SLinus Torvalds module_exit(ipgre_fini);
17091da177e4SLinus Torvalds MODULE_LICENSE("GPL");
17104d74f8baSPatrick McHardy MODULE_ALIAS_RTNL_LINK("gre");
17114d74f8baSPatrick McHardy MODULE_ALIAS_RTNL_LINK("gretap");
1712