xref: /linux/net/ipv4/ip_gre.c (revision d208328765a0ab425e36b5b828285f3337a85451)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *	Linux NET3:	GRE over IP protocol decoder.
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
71da177e4SLinus Torvalds  *	modify it under the terms of the GNU General Public License
81da177e4SLinus Torvalds  *	as published by the Free Software Foundation; either version
91da177e4SLinus Torvalds  *	2 of the License, or (at your option) any later version.
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  */
121da177e4SLinus Torvalds 
13afd46503SJoe Perches #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14afd46503SJoe Perches 
154fc268d2SRandy Dunlap #include <linux/capability.h>
161da177e4SLinus Torvalds #include <linux/module.h>
171da177e4SLinus Torvalds #include <linux/types.h>
181da177e4SLinus Torvalds #include <linux/kernel.h>
195a0e3ad6STejun Heo #include <linux/slab.h>
201da177e4SLinus Torvalds #include <asm/uaccess.h>
211da177e4SLinus Torvalds #include <linux/skbuff.h>
221da177e4SLinus Torvalds #include <linux/netdevice.h>
231da177e4SLinus Torvalds #include <linux/in.h>
241da177e4SLinus Torvalds #include <linux/tcp.h>
251da177e4SLinus Torvalds #include <linux/udp.h>
261da177e4SLinus Torvalds #include <linux/if_arp.h>
271da177e4SLinus Torvalds #include <linux/mroute.h>
281da177e4SLinus Torvalds #include <linux/init.h>
291da177e4SLinus Torvalds #include <linux/in6.h>
301da177e4SLinus Torvalds #include <linux/inetdevice.h>
311da177e4SLinus Torvalds #include <linux/igmp.h>
321da177e4SLinus Torvalds #include <linux/netfilter_ipv4.h>
33e1a80002SHerbert Xu #include <linux/etherdevice.h>
3446f25dffSKris Katterjohn #include <linux/if_ether.h>
351da177e4SLinus Torvalds 
361da177e4SLinus Torvalds #include <net/sock.h>
371da177e4SLinus Torvalds #include <net/ip.h>
381da177e4SLinus Torvalds #include <net/icmp.h>
391da177e4SLinus Torvalds #include <net/protocol.h>
401da177e4SLinus Torvalds #include <net/ipip.h>
411da177e4SLinus Torvalds #include <net/arp.h>
421da177e4SLinus Torvalds #include <net/checksum.h>
431da177e4SLinus Torvalds #include <net/dsfield.h>
441da177e4SLinus Torvalds #include <net/inet_ecn.h>
451da177e4SLinus Torvalds #include <net/xfrm.h>
4659a4c759SPavel Emelyanov #include <net/net_namespace.h>
4759a4c759SPavel Emelyanov #include <net/netns/generic.h>
48c19e654dSHerbert Xu #include <net/rtnetlink.h>
4900959adeSDmitry Kozlov #include <net/gre.h>
501da177e4SLinus Torvalds 
51dfd56b8bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
521da177e4SLinus Torvalds #include <net/ipv6.h>
531da177e4SLinus Torvalds #include <net/ip6_fib.h>
541da177e4SLinus Torvalds #include <net/ip6_route.h>
551da177e4SLinus Torvalds #endif
561da177e4SLinus Torvalds 
571da177e4SLinus Torvalds /*
581da177e4SLinus Torvalds    Problems & solutions
591da177e4SLinus Torvalds    --------------------
601da177e4SLinus Torvalds 
611da177e4SLinus Torvalds    1. The most important issue is detecting local dead loops.
621da177e4SLinus Torvalds    They would cause complete host lockup in transmit, which
631da177e4SLinus Torvalds    would be "resolved" by stack overflow or, if queueing is enabled,
641da177e4SLinus Torvalds    with infinite looping in net_bh.
651da177e4SLinus Torvalds 
661da177e4SLinus Torvalds    We cannot track such dead loops during route installation,
671da177e4SLinus Torvalds    it is infeasible task. The most general solutions would be
681da177e4SLinus Torvalds    to keep skb->encapsulation counter (sort of local ttl),
696d0722a2SEric Dumazet    and silently drop packet when it expires. It is a good
70bff52857Sstephen hemminger    solution, but it supposes maintaining new variable in ALL
711da177e4SLinus Torvalds    skb, even if no tunneling is used.
721da177e4SLinus Torvalds 
736d0722a2SEric Dumazet    Current solution: xmit_recursion breaks dead loops. This is a percpu
746d0722a2SEric Dumazet    counter, since when we enter the first ndo_xmit(), cpu migration is
756d0722a2SEric Dumazet    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
761da177e4SLinus Torvalds 
771da177e4SLinus Torvalds    2. Networking dead loops would not kill routers, but would really
781da177e4SLinus Torvalds    kill network. IP hop limit plays role of "t->recursion" in this case,
791da177e4SLinus Torvalds    if we copy it from packet being encapsulated to upper header.
801da177e4SLinus Torvalds    It is very good solution, but it introduces two problems:
811da177e4SLinus Torvalds 
821da177e4SLinus Torvalds    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
831da177e4SLinus Torvalds      do not work over tunnels.
841da177e4SLinus Torvalds    - traceroute does not work. I planned to relay ICMP from tunnel,
851da177e4SLinus Torvalds      so that this problem would be solved and traceroute output
861da177e4SLinus Torvalds      would even more informative. This idea appeared to be wrong:
871da177e4SLinus Torvalds      only Linux complies to rfc1812 now (yes, guys, Linux is the only
881da177e4SLinus Torvalds      true router now :-)), all routers (at least, in neighbourhood of mine)
891da177e4SLinus Torvalds      return only 8 bytes of payload. It is the end.
901da177e4SLinus Torvalds 
911da177e4SLinus Torvalds    Hence, if we want that OSPF worked or traceroute said something reasonable,
921da177e4SLinus Torvalds    we should search for another solution.
931da177e4SLinus Torvalds 
941da177e4SLinus Torvalds    One of them is to parse packet trying to detect inner encapsulation
951da177e4SLinus Torvalds    made by our node. It is difficult or even impossible, especially,
96bff52857Sstephen hemminger    taking into account fragmentation. TO be short, ttl is not solution at all.
971da177e4SLinus Torvalds 
981da177e4SLinus Torvalds    Current solution: The solution was UNEXPECTEDLY SIMPLE.
991da177e4SLinus Torvalds    We force DF flag on tunnels with preconfigured hop limit,
1001da177e4SLinus Torvalds    that is ALL. :-) Well, it does not remove the problem completely,
1011da177e4SLinus Torvalds    but exponential growth of network traffic is changed to linear
1021da177e4SLinus Torvalds    (branches, that exceed pmtu are pruned) and tunnel mtu
103bff52857Sstephen hemminger    rapidly degrades to value <68, where looping stops.
1041da177e4SLinus Torvalds    Yes, it is not good if there exists a router in the loop,
1051da177e4SLinus Torvalds    which does not force DF, even when encapsulating packets have DF set.
1061da177e4SLinus Torvalds    But it is not our problem! Nobody could accuse us, we made
1071da177e4SLinus Torvalds    all that we could make. Even if it is your gated who injected
1081da177e4SLinus Torvalds    fatal route to network, even if it were you who configured
1091da177e4SLinus Torvalds    fatal static route: you are innocent. :-)
1101da177e4SLinus Torvalds 
1111da177e4SLinus Torvalds 
1121da177e4SLinus Torvalds 
1131da177e4SLinus Torvalds    3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
1141da177e4SLinus Torvalds    practically identical code. It would be good to glue them
1151da177e4SLinus Torvalds    together, but it is not very evident, how to make them modular.
1161da177e4SLinus Torvalds    sit is integral part of IPv6, ipip and gre are naturally modular.
1171da177e4SLinus Torvalds    We could extract common parts (hash table, ioctl etc)
1181da177e4SLinus Torvalds    to a separate module (ip_tunnel.c).
1191da177e4SLinus Torvalds 
1201da177e4SLinus Torvalds    Alexey Kuznetsov.
1211da177e4SLinus Torvalds  */
1221da177e4SLinus Torvalds 
123c19e654dSHerbert Xu static struct rtnl_link_ops ipgre_link_ops __read_mostly;
1241da177e4SLinus Torvalds static int ipgre_tunnel_init(struct net_device *dev);
1251da177e4SLinus Torvalds static void ipgre_tunnel_setup(struct net_device *dev);
12642aa9162SHerbert Xu static int ipgre_tunnel_bind_dev(struct net_device *dev);
1271da177e4SLinus Torvalds 
1281da177e4SLinus Torvalds /* Fallback tunnel: no source, no destination, no key, no options */
1291da177e4SLinus Torvalds 
130eb8ce741SPavel Emelyanov #define HASH_SIZE  16
131eb8ce741SPavel Emelyanov 
132f99189b1SEric Dumazet static int ipgre_net_id __read_mostly;
13359a4c759SPavel Emelyanov struct ipgre_net {
1341507850bSEric Dumazet 	struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
135eb8ce741SPavel Emelyanov 
1367daa0004SPavel Emelyanov 	struct net_device *fb_tunnel_dev;
13759a4c759SPavel Emelyanov };
13859a4c759SPavel Emelyanov 
1391da177e4SLinus Torvalds /* Tunnel hash table */
1401da177e4SLinus Torvalds 
1411da177e4SLinus Torvalds /*
1421da177e4SLinus Torvalds    4 hash tables:
1431da177e4SLinus Torvalds 
1441da177e4SLinus Torvalds    3: (remote,local)
1451da177e4SLinus Torvalds    2: (remote,*)
1461da177e4SLinus Torvalds    1: (*,local)
1471da177e4SLinus Torvalds    0: (*,*)
1481da177e4SLinus Torvalds 
1491da177e4SLinus Torvalds    We require exact key match i.e. if a key is present in packet
1501da177e4SLinus Torvalds    it will match only tunnel with the same key; if it is not present,
1511da177e4SLinus Torvalds    it will match only keyless tunnel.
1521da177e4SLinus Torvalds 
1531da177e4SLinus Torvalds    All keysless packets, if not matched configured keyless tunnels
1541da177e4SLinus Torvalds    will match fallback tunnel.
1551da177e4SLinus Torvalds  */
1561da177e4SLinus Torvalds 
157d5a0a1e3SAl Viro #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1581da177e4SLinus Torvalds 
159eb8ce741SPavel Emelyanov #define tunnels_r_l	tunnels[3]
160eb8ce741SPavel Emelyanov #define tunnels_r	tunnels[2]
161eb8ce741SPavel Emelyanov #define tunnels_l	tunnels[1]
162eb8ce741SPavel Emelyanov #define tunnels_wc	tunnels[0]
1638d5b2c08SEric Dumazet /*
1641507850bSEric Dumazet  * Locking : hash tables are protected by RCU and RTNL
1658d5b2c08SEric Dumazet  */
1661da177e4SLinus Torvalds 
1678d5b2c08SEric Dumazet #define for_each_ip_tunnel_rcu(start) \
1688d5b2c08SEric Dumazet 	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
1691da177e4SLinus Torvalds 
170e985aad7SEric Dumazet /* often modified stats are per cpu, other are shared (netdev->stats) */
171e985aad7SEric Dumazet struct pcpu_tstats {
17287b6d218Sstephen hemminger 	u64	rx_packets;
17387b6d218Sstephen hemminger 	u64	rx_bytes;
17487b6d218Sstephen hemminger 	u64	tx_packets;
17587b6d218Sstephen hemminger 	u64	tx_bytes;
17687b6d218Sstephen hemminger 	struct u64_stats_sync	syncp;
17787b6d218Sstephen hemminger };
178e985aad7SEric Dumazet 
17987b6d218Sstephen hemminger static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
18087b6d218Sstephen hemminger 						   struct rtnl_link_stats64 *tot)
181e985aad7SEric Dumazet {
182e985aad7SEric Dumazet 	int i;
183e985aad7SEric Dumazet 
184e985aad7SEric Dumazet 	for_each_possible_cpu(i) {
185e985aad7SEric Dumazet 		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
18687b6d218Sstephen hemminger 		u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
18787b6d218Sstephen hemminger 		unsigned int start;
188e985aad7SEric Dumazet 
18987b6d218Sstephen hemminger 		do {
19087b6d218Sstephen hemminger 			start = u64_stats_fetch_begin_bh(&tstats->syncp);
19187b6d218Sstephen hemminger 			rx_packets = tstats->rx_packets;
19287b6d218Sstephen hemminger 			tx_packets = tstats->tx_packets;
19387b6d218Sstephen hemminger 			rx_bytes = tstats->rx_bytes;
19487b6d218Sstephen hemminger 			tx_bytes = tstats->tx_bytes;
19587b6d218Sstephen hemminger 		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
19687b6d218Sstephen hemminger 
19787b6d218Sstephen hemminger 		tot->rx_packets += rx_packets;
19887b6d218Sstephen hemminger 		tot->tx_packets += tx_packets;
19987b6d218Sstephen hemminger 		tot->rx_bytes   += rx_bytes;
20087b6d218Sstephen hemminger 		tot->tx_bytes   += tx_bytes;
201e985aad7SEric Dumazet 	}
20287b6d218Sstephen hemminger 
20387b6d218Sstephen hemminger 	tot->multicast = dev->stats.multicast;
20487b6d218Sstephen hemminger 	tot->rx_crc_errors = dev->stats.rx_crc_errors;
20587b6d218Sstephen hemminger 	tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
20687b6d218Sstephen hemminger 	tot->rx_length_errors = dev->stats.rx_length_errors;
20787b6d218Sstephen hemminger 	tot->rx_errors = dev->stats.rx_errors;
20887b6d218Sstephen hemminger 	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
20987b6d218Sstephen hemminger 	tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
21087b6d218Sstephen hemminger 	tot->tx_dropped = dev->stats.tx_dropped;
21187b6d218Sstephen hemminger 	tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
21287b6d218Sstephen hemminger 	tot->tx_errors = dev->stats.tx_errors;
21387b6d218Sstephen hemminger 
21487b6d218Sstephen hemminger 	return tot;
215e985aad7SEric Dumazet }
216e985aad7SEric Dumazet 
217*d2083287Sstephen hemminger /* Does key in tunnel parameters match packet */
218*d2083287Sstephen hemminger static bool ipgre_key_match(const struct ip_tunnel_parm *p,
219*d2083287Sstephen hemminger 			    __u32 flags, __be32 key)
220*d2083287Sstephen hemminger {
221*d2083287Sstephen hemminger 	if (p->i_flags & GRE_KEY) {
222*d2083287Sstephen hemminger 		if (flags & GRE_KEY)
223*d2083287Sstephen hemminger 			return key == p->i_key;
224*d2083287Sstephen hemminger 		else
225*d2083287Sstephen hemminger 			return false;	/* key expected, none present */
226*d2083287Sstephen hemminger 	} else
227*d2083287Sstephen hemminger 		return !(flags & GRE_KEY);
228*d2083287Sstephen hemminger }
229*d2083287Sstephen hemminger 
2301da177e4SLinus Torvalds /* Given src, dst and key, find appropriate for input tunnel. */
2311da177e4SLinus Torvalds 
232749c10f9STimo Teras static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
233e1a80002SHerbert Xu 					     __be32 remote, __be32 local,
234*d2083287Sstephen hemminger 					     __u32 flags, __be32 key,
235*d2083287Sstephen hemminger 					     __be16 gre_proto)
2361da177e4SLinus Torvalds {
237749c10f9STimo Teras 	struct net *net = dev_net(dev);
238749c10f9STimo Teras 	int link = dev->ifindex;
2391507850bSEric Dumazet 	unsigned int h0 = HASH(remote);
2401507850bSEric Dumazet 	unsigned int h1 = HASH(key);
241afcf1242STimo Teras 	struct ip_tunnel *t, *cand = NULL;
2427daa0004SPavel Emelyanov 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
243e1a80002SHerbert Xu 	int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
244e1a80002SHerbert Xu 		       ARPHRD_ETHER : ARPHRD_IPGRE;
245afcf1242STimo Teras 	int score, cand_score = 4;
2461da177e4SLinus Torvalds 
2478d5b2c08SEric Dumazet 	for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
248749c10f9STimo Teras 		if (local != t->parms.iph.saddr ||
249749c10f9STimo Teras 		    remote != t->parms.iph.daddr ||
250749c10f9STimo Teras 		    !(t->dev->flags & IFF_UP))
251749c10f9STimo Teras 			continue;
252749c10f9STimo Teras 
253*d2083287Sstephen hemminger 		if (!ipgre_key_match(&t->parms, flags, key))
254*d2083287Sstephen hemminger 			continue;
255*d2083287Sstephen hemminger 
256749c10f9STimo Teras 		if (t->dev->type != ARPHRD_IPGRE &&
257749c10f9STimo Teras 		    t->dev->type != dev_type)
258749c10f9STimo Teras 			continue;
259749c10f9STimo Teras 
260afcf1242STimo Teras 		score = 0;
261749c10f9STimo Teras 		if (t->parms.link != link)
262afcf1242STimo Teras 			score |= 1;
263749c10f9STimo Teras 		if (t->dev->type != dev_type)
264afcf1242STimo Teras 			score |= 2;
265afcf1242STimo Teras 		if (score == 0)
2661da177e4SLinus Torvalds 			return t;
267afcf1242STimo Teras 
268afcf1242STimo Teras 		if (score < cand_score) {
269afcf1242STimo Teras 			cand = t;
270afcf1242STimo Teras 			cand_score = score;
271afcf1242STimo Teras 		}
272e1a80002SHerbert Xu 	}
273e1a80002SHerbert Xu 
2748d5b2c08SEric Dumazet 	for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
275749c10f9STimo Teras 		if (remote != t->parms.iph.daddr ||
276749c10f9STimo Teras 		    !(t->dev->flags & IFF_UP))
277749c10f9STimo Teras 			continue;
278749c10f9STimo Teras 
279*d2083287Sstephen hemminger 		if (!ipgre_key_match(&t->parms, flags, key))
280*d2083287Sstephen hemminger 			continue;
281*d2083287Sstephen hemminger 
282749c10f9STimo Teras 		if (t->dev->type != ARPHRD_IPGRE &&
283749c10f9STimo Teras 		    t->dev->type != dev_type)
284749c10f9STimo Teras 			continue;
285749c10f9STimo Teras 
286afcf1242STimo Teras 		score = 0;
287749c10f9STimo Teras 		if (t->parms.link != link)
288afcf1242STimo Teras 			score |= 1;
289749c10f9STimo Teras 		if (t->dev->type != dev_type)
290afcf1242STimo Teras 			score |= 2;
291afcf1242STimo Teras 		if (score == 0)
2921da177e4SLinus Torvalds 			return t;
293afcf1242STimo Teras 
294afcf1242STimo Teras 		if (score < cand_score) {
295afcf1242STimo Teras 			cand = t;
296afcf1242STimo Teras 			cand_score = score;
297afcf1242STimo Teras 		}
298e1a80002SHerbert Xu 	}
299e1a80002SHerbert Xu 
3008d5b2c08SEric Dumazet 	for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
301749c10f9STimo Teras 		if ((local != t->parms.iph.saddr &&
302749c10f9STimo Teras 		     (local != t->parms.iph.daddr ||
303749c10f9STimo Teras 		      !ipv4_is_multicast(local))) ||
304749c10f9STimo Teras 		    !(t->dev->flags & IFF_UP))
305749c10f9STimo Teras 			continue;
306749c10f9STimo Teras 
307*d2083287Sstephen hemminger 		if (!ipgre_key_match(&t->parms, flags, key))
308*d2083287Sstephen hemminger 			continue;
309*d2083287Sstephen hemminger 
310749c10f9STimo Teras 		if (t->dev->type != ARPHRD_IPGRE &&
311749c10f9STimo Teras 		    t->dev->type != dev_type)
312749c10f9STimo Teras 			continue;
313749c10f9STimo Teras 
314afcf1242STimo Teras 		score = 0;
315749c10f9STimo Teras 		if (t->parms.link != link)
316afcf1242STimo Teras 			score |= 1;
317749c10f9STimo Teras 		if (t->dev->type != dev_type)
318afcf1242STimo Teras 			score |= 2;
319afcf1242STimo Teras 		if (score == 0)
3201da177e4SLinus Torvalds 			return t;
321afcf1242STimo Teras 
322afcf1242STimo Teras 		if (score < cand_score) {
323afcf1242STimo Teras 			cand = t;
324afcf1242STimo Teras 			cand_score = score;
325afcf1242STimo Teras 		}
326e1a80002SHerbert Xu 	}
327e1a80002SHerbert Xu 
3288d5b2c08SEric Dumazet 	for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
329749c10f9STimo Teras 		if (t->parms.i_key != key ||
330749c10f9STimo Teras 		    !(t->dev->flags & IFF_UP))
331749c10f9STimo Teras 			continue;
332749c10f9STimo Teras 
333749c10f9STimo Teras 		if (t->dev->type != ARPHRD_IPGRE &&
334749c10f9STimo Teras 		    t->dev->type != dev_type)
335749c10f9STimo Teras 			continue;
336749c10f9STimo Teras 
337afcf1242STimo Teras 		score = 0;
338749c10f9STimo Teras 		if (t->parms.link != link)
339afcf1242STimo Teras 			score |= 1;
340749c10f9STimo Teras 		if (t->dev->type != dev_type)
341afcf1242STimo Teras 			score |= 2;
342afcf1242STimo Teras 		if (score == 0)
3431da177e4SLinus Torvalds 			return t;
344afcf1242STimo Teras 
345afcf1242STimo Teras 		if (score < cand_score) {
346afcf1242STimo Teras 			cand = t;
347afcf1242STimo Teras 			cand_score = score;
348afcf1242STimo Teras 		}
349e1a80002SHerbert Xu 	}
350e1a80002SHerbert Xu 
351afcf1242STimo Teras 	if (cand != NULL)
352afcf1242STimo Teras 		return cand;
3531da177e4SLinus Torvalds 
3548d5b2c08SEric Dumazet 	dev = ign->fb_tunnel_dev;
3558d5b2c08SEric Dumazet 	if (dev->flags & IFF_UP)
3568d5b2c08SEric Dumazet 		return netdev_priv(dev);
357749c10f9STimo Teras 
3581da177e4SLinus Torvalds 	return NULL;
3591da177e4SLinus Torvalds }
3601da177e4SLinus Torvalds 
3611507850bSEric Dumazet static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
362f57e7d5aSPavel Emelyanov 		struct ip_tunnel_parm *parms)
3631da177e4SLinus Torvalds {
3645056a1efSYOSHIFUJI Hideaki 	__be32 remote = parms->iph.daddr;
3655056a1efSYOSHIFUJI Hideaki 	__be32 local = parms->iph.saddr;
3665056a1efSYOSHIFUJI Hideaki 	__be32 key = parms->i_key;
3671507850bSEric Dumazet 	unsigned int h = HASH(key);
3681da177e4SLinus Torvalds 	int prio = 0;
3691da177e4SLinus Torvalds 
3701da177e4SLinus Torvalds 	if (local)
3711da177e4SLinus Torvalds 		prio |= 1;
372f97c1e0cSJoe Perches 	if (remote && !ipv4_is_multicast(remote)) {
3731da177e4SLinus Torvalds 		prio |= 2;
3741da177e4SLinus Torvalds 		h ^= HASH(remote);
3751da177e4SLinus Torvalds 	}
3761da177e4SLinus Torvalds 
377eb8ce741SPavel Emelyanov 	return &ign->tunnels[prio][h];
3781da177e4SLinus Torvalds }
3791da177e4SLinus Torvalds 
3801507850bSEric Dumazet static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
381f57e7d5aSPavel Emelyanov 		struct ip_tunnel *t)
3825056a1efSYOSHIFUJI Hideaki {
383f57e7d5aSPavel Emelyanov 	return __ipgre_bucket(ign, &t->parms);
3845056a1efSYOSHIFUJI Hideaki }
3855056a1efSYOSHIFUJI Hideaki 
386f57e7d5aSPavel Emelyanov static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
3871da177e4SLinus Torvalds {
3881507850bSEric Dumazet 	struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
3891da177e4SLinus Torvalds 
3901507850bSEric Dumazet 	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
3918d5b2c08SEric Dumazet 	rcu_assign_pointer(*tp, t);
3921da177e4SLinus Torvalds }
3931da177e4SLinus Torvalds 
394f57e7d5aSPavel Emelyanov static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
3951da177e4SLinus Torvalds {
3961507850bSEric Dumazet 	struct ip_tunnel __rcu **tp;
3971507850bSEric Dumazet 	struct ip_tunnel *iter;
3981da177e4SLinus Torvalds 
3991507850bSEric Dumazet 	for (tp = ipgre_bucket(ign, t);
4001507850bSEric Dumazet 	     (iter = rtnl_dereference(*tp)) != NULL;
4011507850bSEric Dumazet 	     tp = &iter->next) {
4021507850bSEric Dumazet 		if (t == iter) {
4031507850bSEric Dumazet 			rcu_assign_pointer(*tp, t->next);
4041da177e4SLinus Torvalds 			break;
4051da177e4SLinus Torvalds 		}
4061da177e4SLinus Torvalds 	}
4071da177e4SLinus Torvalds }
4081da177e4SLinus Torvalds 
409e1a80002SHerbert Xu static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
410e1a80002SHerbert Xu 					   struct ip_tunnel_parm *parms,
411e1a80002SHerbert Xu 					   int type)
4121da177e4SLinus Torvalds {
413d5a0a1e3SAl Viro 	__be32 remote = parms->iph.daddr;
414d5a0a1e3SAl Viro 	__be32 local = parms->iph.saddr;
415d5a0a1e3SAl Viro 	__be32 key = parms->i_key;
416749c10f9STimo Teras 	int link = parms->link;
4171507850bSEric Dumazet 	struct ip_tunnel *t;
4181507850bSEric Dumazet 	struct ip_tunnel __rcu **tp;
419e1a80002SHerbert Xu 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
420e1a80002SHerbert Xu 
4211507850bSEric Dumazet 	for (tp = __ipgre_bucket(ign, parms);
4221507850bSEric Dumazet 	     (t = rtnl_dereference(*tp)) != NULL;
4231507850bSEric Dumazet 	     tp = &t->next)
424e1a80002SHerbert Xu 		if (local == t->parms.iph.saddr &&
425e1a80002SHerbert Xu 		    remote == t->parms.iph.daddr &&
426e1a80002SHerbert Xu 		    key == t->parms.i_key &&
427749c10f9STimo Teras 		    link == t->parms.link &&
428e1a80002SHerbert Xu 		    type == t->dev->type)
429e1a80002SHerbert Xu 			break;
430e1a80002SHerbert Xu 
431e1a80002SHerbert Xu 	return t;
432e1a80002SHerbert Xu }
433e1a80002SHerbert Xu 
434e1a80002SHerbert Xu static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
435e1a80002SHerbert Xu 		struct ip_tunnel_parm *parms, int create)
436e1a80002SHerbert Xu {
437e1a80002SHerbert Xu 	struct ip_tunnel *t, *nt;
4381da177e4SLinus Torvalds 	struct net_device *dev;
4391da177e4SLinus Torvalds 	char name[IFNAMSIZ];
440f57e7d5aSPavel Emelyanov 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
4411da177e4SLinus Torvalds 
442e1a80002SHerbert Xu 	t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
443e1a80002SHerbert Xu 	if (t || !create)
4441da177e4SLinus Torvalds 		return t;
4451da177e4SLinus Torvalds 
4461da177e4SLinus Torvalds 	if (parms->name[0])
4471da177e4SLinus Torvalds 		strlcpy(name, parms->name, IFNAMSIZ);
44834cc7ba6SPavel Emelyanov 	else
449407d6fcbSstephen hemminger 		strcpy(name, "gre%d");
4501da177e4SLinus Torvalds 
4511da177e4SLinus Torvalds 	dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
4521da177e4SLinus Torvalds 	if (!dev)
4531da177e4SLinus Torvalds 		return NULL;
4541da177e4SLinus Torvalds 
4550b67ecebSPavel Emelyanov 	dev_net_set(dev, net);
4560b67ecebSPavel Emelyanov 
4572941a486SPatrick McHardy 	nt = netdev_priv(dev);
4581da177e4SLinus Torvalds 	nt->parms = *parms;
459c19e654dSHerbert Xu 	dev->rtnl_link_ops = &ipgre_link_ops;
4601da177e4SLinus Torvalds 
46142aa9162SHerbert Xu 	dev->mtu = ipgre_tunnel_bind_dev(dev);
46242aa9162SHerbert Xu 
463b37d428bSPavel Emelyanov 	if (register_netdevice(dev) < 0)
464b37d428bSPavel Emelyanov 		goto failed_free;
4651da177e4SLinus Torvalds 
466f2b3ee9eSWillem de Bruijn 	/* Can use a lockless transmit, unless we generate output sequences */
467f2b3ee9eSWillem de Bruijn 	if (!(nt->parms.o_flags & GRE_SEQ))
468f2b3ee9eSWillem de Bruijn 		dev->features |= NETIF_F_LLTX;
469f2b3ee9eSWillem de Bruijn 
4701da177e4SLinus Torvalds 	dev_hold(dev);
471f57e7d5aSPavel Emelyanov 	ipgre_tunnel_link(ign, nt);
4721da177e4SLinus Torvalds 	return nt;
4731da177e4SLinus Torvalds 
474b37d428bSPavel Emelyanov failed_free:
475b37d428bSPavel Emelyanov 	free_netdev(dev);
4761da177e4SLinus Torvalds 	return NULL;
4771da177e4SLinus Torvalds }
4781da177e4SLinus Torvalds 
4791da177e4SLinus Torvalds static void ipgre_tunnel_uninit(struct net_device *dev)
4801da177e4SLinus Torvalds {
481f57e7d5aSPavel Emelyanov 	struct net *net = dev_net(dev);
482f57e7d5aSPavel Emelyanov 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
483f57e7d5aSPavel Emelyanov 
484f57e7d5aSPavel Emelyanov 	ipgre_tunnel_unlink(ign, netdev_priv(dev));
4851da177e4SLinus Torvalds 	dev_put(dev);
4861da177e4SLinus Torvalds }
4871da177e4SLinus Torvalds 
4881da177e4SLinus Torvalds 
4891da177e4SLinus Torvalds static void ipgre_err(struct sk_buff *skb, u32 info)
4901da177e4SLinus Torvalds {
4911da177e4SLinus Torvalds 
492071f92d0SRami Rosen /* All the routers (except for Linux) return only
4931da177e4SLinus Torvalds    8 bytes of packet payload. It means, that precise relaying of
4941da177e4SLinus Torvalds    ICMP in the real Internet is absolutely infeasible.
4951da177e4SLinus Torvalds 
4961da177e4SLinus Torvalds    Moreover, Cisco "wise men" put GRE key to the third word
4971da177e4SLinus Torvalds    in GRE header. It makes impossible maintaining even soft state for keyed
4981da177e4SLinus Torvalds    GRE tunnels with enabled checksum. Tell them "thank you".
4991da177e4SLinus Torvalds 
5001da177e4SLinus Torvalds    Well, I wonder, rfc1812 was written by Cisco employee,
501bff52857Sstephen hemminger    what the hell these idiots break standards established
502bff52857Sstephen hemminger    by themselves???
5031da177e4SLinus Torvalds  */
5041da177e4SLinus Torvalds 
505b71d1d42SEric Dumazet 	const struct iphdr *iph = (const struct iphdr *)skb->data;
506d5a0a1e3SAl Viro 	__be16	     *p = (__be16 *)(skb->data+(iph->ihl<<2));
5071da177e4SLinus Torvalds 	int grehlen = (iph->ihl<<2) + 4;
50888c7664fSArnaldo Carvalho de Melo 	const int type = icmp_hdr(skb)->type;
50988c7664fSArnaldo Carvalho de Melo 	const int code = icmp_hdr(skb)->code;
5101da177e4SLinus Torvalds 	struct ip_tunnel *t;
511d5a0a1e3SAl Viro 	__be16 flags;
512*d2083287Sstephen hemminger 	__be32 key = 0;
5131da177e4SLinus Torvalds 
5141da177e4SLinus Torvalds 	flags = p[0];
5151da177e4SLinus Torvalds 	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
5161da177e4SLinus Torvalds 		if (flags&(GRE_VERSION|GRE_ROUTING))
5171da177e4SLinus Torvalds 			return;
5181da177e4SLinus Torvalds 		if (flags&GRE_KEY) {
5191da177e4SLinus Torvalds 			grehlen += 4;
5201da177e4SLinus Torvalds 			if (flags&GRE_CSUM)
5211da177e4SLinus Torvalds 				grehlen += 4;
5221da177e4SLinus Torvalds 		}
5231da177e4SLinus Torvalds 	}
5241da177e4SLinus Torvalds 
5251da177e4SLinus Torvalds 	/* If only 8 bytes returned, keyed message will be dropped here */
5261da177e4SLinus Torvalds 	if (skb_headlen(skb) < grehlen)
5271da177e4SLinus Torvalds 		return;
5281da177e4SLinus Torvalds 
529*d2083287Sstephen hemminger 	if (flags & GRE_KEY)
530*d2083287Sstephen hemminger 		key = *(((__be32 *)p) + (grehlen / 4) - 1);
531*d2083287Sstephen hemminger 
5321da177e4SLinus Torvalds 	switch (type) {
5331da177e4SLinus Torvalds 	default:
5341da177e4SLinus Torvalds 	case ICMP_PARAMETERPROB:
5351da177e4SLinus Torvalds 		return;
5361da177e4SLinus Torvalds 
5371da177e4SLinus Torvalds 	case ICMP_DEST_UNREACH:
5381da177e4SLinus Torvalds 		switch (code) {
5391da177e4SLinus Torvalds 		case ICMP_SR_FAILED:
5401da177e4SLinus Torvalds 		case ICMP_PORT_UNREACH:
5411da177e4SLinus Torvalds 			/* Impossible event. */
5421da177e4SLinus Torvalds 			return;
5431da177e4SLinus Torvalds 		default:
5441da177e4SLinus Torvalds 			/* All others are translated to HOST_UNREACH.
5451da177e4SLinus Torvalds 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
5461da177e4SLinus Torvalds 			   I believe they are just ether pollution. --ANK
5471da177e4SLinus Torvalds 			 */
5481da177e4SLinus Torvalds 			break;
5491da177e4SLinus Torvalds 		}
5501da177e4SLinus Torvalds 		break;
5511da177e4SLinus Torvalds 	case ICMP_TIME_EXCEEDED:
5521da177e4SLinus Torvalds 		if (code != ICMP_EXC_TTL)
5531da177e4SLinus Torvalds 			return;
5541da177e4SLinus Torvalds 		break;
55555be7a9cSDavid S. Miller 
55655be7a9cSDavid S. Miller 	case ICMP_REDIRECT:
55755be7a9cSDavid S. Miller 		break;
5581da177e4SLinus Torvalds 	}
5591da177e4SLinus Torvalds 
5608d5b2c08SEric Dumazet 	rcu_read_lock();
561749c10f9STimo Teras 	t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
562*d2083287Sstephen hemminger 				flags, key, p[1]);
563*d2083287Sstephen hemminger 
56436393395SDavid S. Miller 	if (t == NULL)
56536393395SDavid S. Miller 		goto out;
56636393395SDavid S. Miller 
56736393395SDavid S. Miller 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
56836393395SDavid S. Miller 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
56936393395SDavid S. Miller 				 t->parms.link, 0, IPPROTO_GRE, 0);
57036393395SDavid S. Miller 		goto out;
57136393395SDavid S. Miller 	}
57255be7a9cSDavid S. Miller 	if (type == ICMP_REDIRECT) {
57355be7a9cSDavid S. Miller 		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
57455be7a9cSDavid S. Miller 			      IPPROTO_GRE, 0);
57555be7a9cSDavid S. Miller 		goto out;
57655be7a9cSDavid S. Miller 	}
57736393395SDavid S. Miller 	if (t->parms.iph.daddr == 0 ||
578f97c1e0cSJoe Perches 	    ipv4_is_multicast(t->parms.iph.daddr))
5791da177e4SLinus Torvalds 		goto out;
5801da177e4SLinus Torvalds 
5811da177e4SLinus Torvalds 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
5821da177e4SLinus Torvalds 		goto out;
5831da177e4SLinus Torvalds 
584da6185d8SWei Yongjun 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
5851da177e4SLinus Torvalds 		t->err_count++;
5861da177e4SLinus Torvalds 	else
5871da177e4SLinus Torvalds 		t->err_count = 1;
5881da177e4SLinus Torvalds 	t->err_time = jiffies;
5891da177e4SLinus Torvalds out:
5908d5b2c08SEric Dumazet 	rcu_read_unlock();
5911da177e4SLinus Torvalds }
5921da177e4SLinus Torvalds 
593b71d1d42SEric Dumazet static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
5941da177e4SLinus Torvalds {
5951da177e4SLinus Torvalds 	if (INET_ECN_is_ce(iph->tos)) {
5961da177e4SLinus Torvalds 		if (skb->protocol == htons(ETH_P_IP)) {
597eddc9ec5SArnaldo Carvalho de Melo 			IP_ECN_set_ce(ip_hdr(skb));
5981da177e4SLinus Torvalds 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
5990660e03fSArnaldo Carvalho de Melo 			IP6_ECN_set_ce(ipv6_hdr(skb));
6001da177e4SLinus Torvalds 		}
6011da177e4SLinus Torvalds 	}
6021da177e4SLinus Torvalds }
6031da177e4SLinus Torvalds 
6041da177e4SLinus Torvalds static inline u8
605b71d1d42SEric Dumazet ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
6061da177e4SLinus Torvalds {
6071da177e4SLinus Torvalds 	u8 inner = 0;
6081da177e4SLinus Torvalds 	if (skb->protocol == htons(ETH_P_IP))
6091da177e4SLinus Torvalds 		inner = old_iph->tos;
6101da177e4SLinus Torvalds 	else if (skb->protocol == htons(ETH_P_IPV6))
611b71d1d42SEric Dumazet 		inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
6121da177e4SLinus Torvalds 	return INET_ECN_encapsulate(tos, inner);
6131da177e4SLinus Torvalds }
6141da177e4SLinus Torvalds 
6151da177e4SLinus Torvalds static int ipgre_rcv(struct sk_buff *skb)
6161da177e4SLinus Torvalds {
617b71d1d42SEric Dumazet 	const struct iphdr *iph;
6181da177e4SLinus Torvalds 	u8     *h;
619d5a0a1e3SAl Viro 	__be16    flags;
620d3bc23e7SAl Viro 	__sum16   csum = 0;
621d5a0a1e3SAl Viro 	__be32 key = 0;
6221da177e4SLinus Torvalds 	u32    seqno = 0;
6231da177e4SLinus Torvalds 	struct ip_tunnel *tunnel;
6241da177e4SLinus Torvalds 	int    offset = 4;
625e1a80002SHerbert Xu 	__be16 gre_proto;
6261da177e4SLinus Torvalds 
6271da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, 16))
6281da177e4SLinus Torvalds 		goto drop_nolock;
6291da177e4SLinus Torvalds 
630eddc9ec5SArnaldo Carvalho de Melo 	iph = ip_hdr(skb);
6311da177e4SLinus Torvalds 	h = skb->data;
632d5a0a1e3SAl Viro 	flags = *(__be16 *)h;
6331da177e4SLinus Torvalds 
6341da177e4SLinus Torvalds 	if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
6351da177e4SLinus Torvalds 		/* - Version must be 0.
6361da177e4SLinus Torvalds 		   - We do not support routing headers.
6371da177e4SLinus Torvalds 		 */
6381da177e4SLinus Torvalds 		if (flags&(GRE_VERSION|GRE_ROUTING))
6391da177e4SLinus Torvalds 			goto drop_nolock;
6401da177e4SLinus Torvalds 
6411da177e4SLinus Torvalds 		if (flags&GRE_CSUM) {
642fb286bb2SHerbert Xu 			switch (skb->ip_summed) {
64384fa7933SPatrick McHardy 			case CHECKSUM_COMPLETE:
644d3bc23e7SAl Viro 				csum = csum_fold(skb->csum);
645fb286bb2SHerbert Xu 				if (!csum)
646fb286bb2SHerbert Xu 					break;
647fb286bb2SHerbert Xu 				/* fall through */
648fb286bb2SHerbert Xu 			case CHECKSUM_NONE:
649fb286bb2SHerbert Xu 				skb->csum = 0;
650fb286bb2SHerbert Xu 				csum = __skb_checksum_complete(skb);
65184fa7933SPatrick McHardy 				skb->ip_summed = CHECKSUM_COMPLETE;
6521da177e4SLinus Torvalds 			}
6531da177e4SLinus Torvalds 			offset += 4;
6541da177e4SLinus Torvalds 		}
6551da177e4SLinus Torvalds 		if (flags&GRE_KEY) {
656d5a0a1e3SAl Viro 			key = *(__be32 *)(h + offset);
6571da177e4SLinus Torvalds 			offset += 4;
6581da177e4SLinus Torvalds 		}
6591da177e4SLinus Torvalds 		if (flags&GRE_SEQ) {
660d5a0a1e3SAl Viro 			seqno = ntohl(*(__be32 *)(h + offset));
6611da177e4SLinus Torvalds 			offset += 4;
6621da177e4SLinus Torvalds 		}
6631da177e4SLinus Torvalds 	}
6641da177e4SLinus Torvalds 
665e1a80002SHerbert Xu 	gre_proto = *(__be16 *)(h + 2);
666e1a80002SHerbert Xu 
6678d5b2c08SEric Dumazet 	rcu_read_lock();
668*d2083287Sstephen hemminger 	tunnel = ipgre_tunnel_lookup(skb->dev,
669*d2083287Sstephen hemminger 				     iph->saddr, iph->daddr, flags, key,
670*d2083287Sstephen hemminger 				     gre_proto);
671*d2083287Sstephen hemminger 	if (tunnel) {
672e985aad7SEric Dumazet 		struct pcpu_tstats *tstats;
673addd68ebSPavel Emelyanov 
6741da177e4SLinus Torvalds 		secpath_reset(skb);
6751da177e4SLinus Torvalds 
676e1a80002SHerbert Xu 		skb->protocol = gre_proto;
6771da177e4SLinus Torvalds 		/* WCCP version 1 and 2 protocol decoding.
6781da177e4SLinus Torvalds 		 * - Change protocol to IP
6791da177e4SLinus Torvalds 		 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
6801da177e4SLinus Torvalds 		 */
681e1a80002SHerbert Xu 		if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
682496c98dfSYOSHIFUJI Hideaki 			skb->protocol = htons(ETH_P_IP);
6831da177e4SLinus Torvalds 			if ((*(h + offset) & 0xF0) != 0x40)
6841da177e4SLinus Torvalds 				offset += 4;
6851da177e4SLinus Torvalds 		}
6861da177e4SLinus Torvalds 
6871d069167STimo Teras 		skb->mac_header = skb->network_header;
6884209fb60SArnaldo Carvalho de Melo 		__pskb_pull(skb, offset);
6899c70220bSArnaldo Carvalho de Melo 		skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
6901da177e4SLinus Torvalds 		skb->pkt_type = PACKET_HOST;
6911da177e4SLinus Torvalds #ifdef CONFIG_NET_IPGRE_BROADCAST
692f97c1e0cSJoe Perches 		if (ipv4_is_multicast(iph->daddr)) {
6931da177e4SLinus Torvalds 			/* Looped back packet, drop it! */
694c7537967SDavid S. Miller 			if (rt_is_output_route(skb_rtable(skb)))
6951da177e4SLinus Torvalds 				goto drop;
696e985aad7SEric Dumazet 			tunnel->dev->stats.multicast++;
6971da177e4SLinus Torvalds 			skb->pkt_type = PACKET_BROADCAST;
6981da177e4SLinus Torvalds 		}
6991da177e4SLinus Torvalds #endif
7001da177e4SLinus Torvalds 
7011da177e4SLinus Torvalds 		if (((flags&GRE_CSUM) && csum) ||
7021da177e4SLinus Torvalds 		    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
703e985aad7SEric Dumazet 			tunnel->dev->stats.rx_crc_errors++;
704e985aad7SEric Dumazet 			tunnel->dev->stats.rx_errors++;
7051da177e4SLinus Torvalds 			goto drop;
7061da177e4SLinus Torvalds 		}
7071da177e4SLinus Torvalds 		if (tunnel->parms.i_flags&GRE_SEQ) {
7081da177e4SLinus Torvalds 			if (!(flags&GRE_SEQ) ||
7091da177e4SLinus Torvalds 			    (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
710e985aad7SEric Dumazet 				tunnel->dev->stats.rx_fifo_errors++;
711e985aad7SEric Dumazet 				tunnel->dev->stats.rx_errors++;
7121da177e4SLinus Torvalds 				goto drop;
7131da177e4SLinus Torvalds 			}
7141da177e4SLinus Torvalds 			tunnel->i_seqno = seqno + 1;
7151da177e4SLinus Torvalds 		}
716e1a80002SHerbert Xu 
717e1a80002SHerbert Xu 		/* Warning: All skb pointers will be invalidated! */
718e1a80002SHerbert Xu 		if (tunnel->dev->type == ARPHRD_ETHER) {
719e1a80002SHerbert Xu 			if (!pskb_may_pull(skb, ETH_HLEN)) {
720e985aad7SEric Dumazet 				tunnel->dev->stats.rx_length_errors++;
721e985aad7SEric Dumazet 				tunnel->dev->stats.rx_errors++;
722e1a80002SHerbert Xu 				goto drop;
723e1a80002SHerbert Xu 			}
724e1a80002SHerbert Xu 
725e1a80002SHerbert Xu 			iph = ip_hdr(skb);
726e1a80002SHerbert Xu 			skb->protocol = eth_type_trans(skb, tunnel->dev);
727e1a80002SHerbert Xu 			skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
728e1a80002SHerbert Xu 		}
729e1a80002SHerbert Xu 
730e985aad7SEric Dumazet 		tstats = this_cpu_ptr(tunnel->dev->tstats);
73187b6d218Sstephen hemminger 		u64_stats_update_begin(&tstats->syncp);
732e985aad7SEric Dumazet 		tstats->rx_packets++;
733e985aad7SEric Dumazet 		tstats->rx_bytes += skb->len;
73487b6d218Sstephen hemminger 		u64_stats_update_end(&tstats->syncp);
735e985aad7SEric Dumazet 
736e985aad7SEric Dumazet 		__skb_tunnel_rx(skb, tunnel->dev);
737e1a80002SHerbert Xu 
738e1a80002SHerbert Xu 		skb_reset_network_header(skb);
7391da177e4SLinus Torvalds 		ipgre_ecn_decapsulate(iph, skb);
740e1a80002SHerbert Xu 
741caf586e5SEric Dumazet 		netif_rx(skb);
7428990f468SEric Dumazet 
7438d5b2c08SEric Dumazet 		rcu_read_unlock();
7448990f468SEric Dumazet 		return 0;
7451da177e4SLinus Torvalds 	}
74645af08beSHerbert Xu 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
7471da177e4SLinus Torvalds 
7481da177e4SLinus Torvalds drop:
7498d5b2c08SEric Dumazet 	rcu_read_unlock();
7501da177e4SLinus Torvalds drop_nolock:
7511da177e4SLinus Torvalds 	kfree_skb(skb);
752a02cec21SEric Dumazet 	return 0;
7531da177e4SLinus Torvalds }
7541da177e4SLinus Torvalds 
7556fef4c0cSStephen Hemminger static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
7561da177e4SLinus Torvalds {
7572941a486SPatrick McHardy 	struct ip_tunnel *tunnel = netdev_priv(dev);
758e985aad7SEric Dumazet 	struct pcpu_tstats *tstats;
759b71d1d42SEric Dumazet 	const struct iphdr  *old_iph = ip_hdr(skb);
760b71d1d42SEric Dumazet 	const struct iphdr  *tiph;
761cbb1e85fSDavid S. Miller 	struct flowi4 fl4;
7621da177e4SLinus Torvalds 	u8     tos;
763d5a0a1e3SAl Viro 	__be16 df;
7641da177e4SLinus Torvalds 	struct rtable *rt;     			/* Route to the other host */
7651da177e4SLinus Torvalds 	struct net_device *tdev;		/* Device to other host */
7661da177e4SLinus Torvalds 	struct iphdr  *iph;			/* Our new IP header */
767c2636b4dSChuck Lever 	unsigned int max_headroom;		/* The extra header space needed */
7681da177e4SLinus Torvalds 	int    gre_hlen;
769d5a0a1e3SAl Viro 	__be32 dst;
7701da177e4SLinus Torvalds 	int    mtu;
7711da177e4SLinus Torvalds 
7726b78f16eSEric Dumazet 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
7736b78f16eSEric Dumazet 	    skb_checksum_help(skb))
7746b78f16eSEric Dumazet 		goto tx_error;
7756b78f16eSEric Dumazet 
776e1a80002SHerbert Xu 	if (dev->type == ARPHRD_ETHER)
777e1a80002SHerbert Xu 		IPCB(skb)->flags = 0;
778e1a80002SHerbert Xu 
779e1a80002SHerbert Xu 	if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
7801da177e4SLinus Torvalds 		gre_hlen = 0;
781b71d1d42SEric Dumazet 		tiph = (const struct iphdr *)skb->data;
7821da177e4SLinus Torvalds 	} else {
7831da177e4SLinus Torvalds 		gre_hlen = tunnel->hlen;
7841da177e4SLinus Torvalds 		tiph = &tunnel->parms.iph;
7851da177e4SLinus Torvalds 	}
7861da177e4SLinus Torvalds 
7871da177e4SLinus Torvalds 	if ((dst = tiph->daddr) == 0) {
7881da177e4SLinus Torvalds 		/* NBMA tunnel */
7891da177e4SLinus Torvalds 
790adf30907SEric Dumazet 		if (skb_dst(skb) == NULL) {
791e985aad7SEric Dumazet 			dev->stats.tx_fifo_errors++;
7921da177e4SLinus Torvalds 			goto tx_error;
7931da177e4SLinus Torvalds 		}
7941da177e4SLinus Torvalds 
79561d57f87SDavid S. Miller 		if (skb->protocol == htons(ETH_P_IP)) {
796511c3f92SEric Dumazet 			rt = skb_rtable(skb);
797f8126f1dSDavid S. Miller 			dst = rt_nexthop(rt, old_iph->daddr);
79861d57f87SDavid S. Miller 		}
799dfd56b8bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
8001da177e4SLinus Torvalds 		else if (skb->protocol == htons(ETH_P_IPV6)) {
801b71d1d42SEric Dumazet 			const struct in6_addr *addr6;
8020ec88662SDavid S. Miller 			struct neighbour *neigh;
8030ec88662SDavid S. Miller 			bool do_tx_error_icmp;
8041da177e4SLinus Torvalds 			int addr_type;
8051da177e4SLinus Torvalds 
8060ec88662SDavid S. Miller 			neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
8071da177e4SLinus Torvalds 			if (neigh == NULL)
8081da177e4SLinus Torvalds 				goto tx_error;
8091da177e4SLinus Torvalds 
810b71d1d42SEric Dumazet 			addr6 = (const struct in6_addr *)&neigh->primary_key;
8111da177e4SLinus Torvalds 			addr_type = ipv6_addr_type(addr6);
8121da177e4SLinus Torvalds 
8131da177e4SLinus Torvalds 			if (addr_type == IPV6_ADDR_ANY) {
8140660e03fSArnaldo Carvalho de Melo 				addr6 = &ipv6_hdr(skb)->daddr;
8151da177e4SLinus Torvalds 				addr_type = ipv6_addr_type(addr6);
8161da177e4SLinus Torvalds 			}
8171da177e4SLinus Torvalds 
8181da177e4SLinus Torvalds 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
8190ec88662SDavid S. Miller 				do_tx_error_icmp = true;
8200ec88662SDavid S. Miller 			else {
8210ec88662SDavid S. Miller 				do_tx_error_icmp = false;
8221da177e4SLinus Torvalds 				dst = addr6->s6_addr32[3];
8231da177e4SLinus Torvalds 			}
8240ec88662SDavid S. Miller 			neigh_release(neigh);
8250ec88662SDavid S. Miller 			if (do_tx_error_icmp)
8260ec88662SDavid S. Miller 				goto tx_error_icmp;
8270ec88662SDavid S. Miller 		}
8281da177e4SLinus Torvalds #endif
8291da177e4SLinus Torvalds 		else
8301da177e4SLinus Torvalds 			goto tx_error;
8311da177e4SLinus Torvalds 	}
8321da177e4SLinus Torvalds 
8331da177e4SLinus Torvalds 	tos = tiph->tos;
834ee686ca9SAndreas Jaggi 	if (tos == 1) {
835ee686ca9SAndreas Jaggi 		tos = 0;
8361da177e4SLinus Torvalds 		if (skb->protocol == htons(ETH_P_IP))
8371da177e4SLinus Torvalds 			tos = old_iph->tos;
838dd4ba83dSStephen Hemminger 		else if (skb->protocol == htons(ETH_P_IPV6))
839b71d1d42SEric Dumazet 			tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
8401da177e4SLinus Torvalds 	}
8411da177e4SLinus Torvalds 
842cbb1e85fSDavid S. Miller 	rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
84378fbfd8aSDavid S. Miller 				 tunnel->parms.o_key, RT_TOS(tos),
84478fbfd8aSDavid S. Miller 				 tunnel->parms.link);
845b23dd4feSDavid S. Miller 	if (IS_ERR(rt)) {
846e985aad7SEric Dumazet 		dev->stats.tx_carrier_errors++;
8471da177e4SLinus Torvalds 		goto tx_error;
8481da177e4SLinus Torvalds 	}
849d8d1f30bSChangli Gao 	tdev = rt->dst.dev;
8501da177e4SLinus Torvalds 
8511da177e4SLinus Torvalds 	if (tdev == dev) {
8521da177e4SLinus Torvalds 		ip_rt_put(rt);
853e985aad7SEric Dumazet 		dev->stats.collisions++;
8541da177e4SLinus Torvalds 		goto tx_error;
8551da177e4SLinus Torvalds 	}
8561da177e4SLinus Torvalds 
8571da177e4SLinus Torvalds 	df = tiph->frag_off;
8581da177e4SLinus Torvalds 	if (df)
859d8d1f30bSChangli Gao 		mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
8601da177e4SLinus Torvalds 	else
861adf30907SEric Dumazet 		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
8621da177e4SLinus Torvalds 
863adf30907SEric Dumazet 	if (skb_dst(skb))
8646700c270SDavid S. Miller 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
8651da177e4SLinus Torvalds 
8661da177e4SLinus Torvalds 	if (skb->protocol == htons(ETH_P_IP)) {
8671da177e4SLinus Torvalds 		df |= (old_iph->frag_off&htons(IP_DF));
8681da177e4SLinus Torvalds 
8691da177e4SLinus Torvalds 		if ((old_iph->frag_off&htons(IP_DF)) &&
8701da177e4SLinus Torvalds 		    mtu < ntohs(old_iph->tot_len)) {
8711da177e4SLinus Torvalds 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
8721da177e4SLinus Torvalds 			ip_rt_put(rt);
8731da177e4SLinus Torvalds 			goto tx_error;
8741da177e4SLinus Torvalds 		}
8751da177e4SLinus Torvalds 	}
876dfd56b8bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
8771da177e4SLinus Torvalds 	else if (skb->protocol == htons(ETH_P_IPV6)) {
878adf30907SEric Dumazet 		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
8791da177e4SLinus Torvalds 
880adf30907SEric Dumazet 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
881f97c1e0cSJoe Perches 			if ((tunnel->parms.iph.daddr &&
882f97c1e0cSJoe Perches 			     !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
8831da177e4SLinus Torvalds 			    rt6->rt6i_dst.plen == 128) {
8841da177e4SLinus Torvalds 				rt6->rt6i_flags |= RTF_MODIFIED;
885defb3519SDavid S. Miller 				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
8861da177e4SLinus Torvalds 			}
8871da177e4SLinus Torvalds 		}
8881da177e4SLinus Torvalds 
8891da177e4SLinus Torvalds 		if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
8903ffe533cSAlexey Dobriyan 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
8911da177e4SLinus Torvalds 			ip_rt_put(rt);
8921da177e4SLinus Torvalds 			goto tx_error;
8931da177e4SLinus Torvalds 		}
8941da177e4SLinus Torvalds 	}
8951da177e4SLinus Torvalds #endif
8961da177e4SLinus Torvalds 
8971da177e4SLinus Torvalds 	if (tunnel->err_count > 0) {
898da6185d8SWei Yongjun 		if (time_before(jiffies,
899da6185d8SWei Yongjun 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
9001da177e4SLinus Torvalds 			tunnel->err_count--;
9011da177e4SLinus Torvalds 
9021da177e4SLinus Torvalds 			dst_link_failure(skb);
9031da177e4SLinus Torvalds 		} else
9041da177e4SLinus Torvalds 			tunnel->err_count = 0;
9051da177e4SLinus Torvalds 	}
9061da177e4SLinus Torvalds 
907d8d1f30bSChangli Gao 	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
9081da177e4SLinus Torvalds 
909cfbba49dSPatrick McHardy 	if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
910cfbba49dSPatrick McHardy 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
9111da177e4SLinus Torvalds 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
912805dc1d6SHerbert Xu 		if (max_headroom > dev->needed_headroom)
913805dc1d6SHerbert Xu 			dev->needed_headroom = max_headroom;
9141da177e4SLinus Torvalds 		if (!new_skb) {
9151da177e4SLinus Torvalds 			ip_rt_put(rt);
916e985aad7SEric Dumazet 			dev->stats.tx_dropped++;
9171da177e4SLinus Torvalds 			dev_kfree_skb(skb);
9186ed10654SPatrick McHardy 			return NETDEV_TX_OK;
9191da177e4SLinus Torvalds 		}
9201da177e4SLinus Torvalds 		if (skb->sk)
9211da177e4SLinus Torvalds 			skb_set_owner_w(new_skb, skb->sk);
9221da177e4SLinus Torvalds 		dev_kfree_skb(skb);
9231da177e4SLinus Torvalds 		skb = new_skb;
924eddc9ec5SArnaldo Carvalho de Melo 		old_iph = ip_hdr(skb);
9251da177e4SLinus Torvalds 	}
9261da177e4SLinus Torvalds 
92764194c31SHerbert Xu 	skb_reset_transport_header(skb);
928e2d1bca7SArnaldo Carvalho de Melo 	skb_push(skb, gre_hlen);
929e2d1bca7SArnaldo Carvalho de Melo 	skb_reset_network_header(skb);
9301da177e4SLinus Torvalds 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
93148d5cad8SPatrick McHardy 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
93248d5cad8SPatrick McHardy 			      IPSKB_REROUTED);
933adf30907SEric Dumazet 	skb_dst_drop(skb);
934d8d1f30bSChangli Gao 	skb_dst_set(skb, &rt->dst);
9351da177e4SLinus Torvalds 
9361da177e4SLinus Torvalds 	/*
9371da177e4SLinus Torvalds 	 *	Push down and install the IPIP header.
9381da177e4SLinus Torvalds 	 */
9391da177e4SLinus Torvalds 
940eddc9ec5SArnaldo Carvalho de Melo 	iph 			=	ip_hdr(skb);
9411da177e4SLinus Torvalds 	iph->version		=	4;
9421da177e4SLinus Torvalds 	iph->ihl		=	sizeof(struct iphdr) >> 2;
9431da177e4SLinus Torvalds 	iph->frag_off		=	df;
9441da177e4SLinus Torvalds 	iph->protocol		=	IPPROTO_GRE;
9451da177e4SLinus Torvalds 	iph->tos		=	ipgre_ecn_encapsulate(tos, old_iph, skb);
946cbb1e85fSDavid S. Miller 	iph->daddr		=	fl4.daddr;
947cbb1e85fSDavid S. Miller 	iph->saddr		=	fl4.saddr;
9481da177e4SLinus Torvalds 
9491da177e4SLinus Torvalds 	if ((iph->ttl = tiph->ttl) == 0) {
9501da177e4SLinus Torvalds 		if (skb->protocol == htons(ETH_P_IP))
9511da177e4SLinus Torvalds 			iph->ttl = old_iph->ttl;
952dfd56b8bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
9531da177e4SLinus Torvalds 		else if (skb->protocol == htons(ETH_P_IPV6))
954b71d1d42SEric Dumazet 			iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
9551da177e4SLinus Torvalds #endif
9561da177e4SLinus Torvalds 		else
957323e126fSDavid S. Miller 			iph->ttl = ip4_dst_hoplimit(&rt->dst);
9581da177e4SLinus Torvalds 	}
9591da177e4SLinus Torvalds 
960d5a0a1e3SAl Viro 	((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
961e1a80002SHerbert Xu 	((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
962e1a80002SHerbert Xu 				   htons(ETH_P_TEB) : skb->protocol;
9631da177e4SLinus Torvalds 
9641da177e4SLinus Torvalds 	if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
965d5a0a1e3SAl Viro 		__be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
9661da177e4SLinus Torvalds 
9671da177e4SLinus Torvalds 		if (tunnel->parms.o_flags&GRE_SEQ) {
9681da177e4SLinus Torvalds 			++tunnel->o_seqno;
9691da177e4SLinus Torvalds 			*ptr = htonl(tunnel->o_seqno);
9701da177e4SLinus Torvalds 			ptr--;
9711da177e4SLinus Torvalds 		}
9721da177e4SLinus Torvalds 		if (tunnel->parms.o_flags&GRE_KEY) {
9731da177e4SLinus Torvalds 			*ptr = tunnel->parms.o_key;
9741da177e4SLinus Torvalds 			ptr--;
9751da177e4SLinus Torvalds 		}
9761da177e4SLinus Torvalds 		if (tunnel->parms.o_flags&GRE_CSUM) {
9771da177e4SLinus Torvalds 			*ptr = 0;
9785f92a738SAl Viro 			*(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr));
9791da177e4SLinus Torvalds 		}
9801da177e4SLinus Torvalds 	}
9811da177e4SLinus Torvalds 
9821da177e4SLinus Torvalds 	nf_reset(skb);
983e985aad7SEric Dumazet 	tstats = this_cpu_ptr(dev->tstats);
984e985aad7SEric Dumazet 	__IPTUNNEL_XMIT(tstats, &dev->stats);
9856ed10654SPatrick McHardy 	return NETDEV_TX_OK;
9861da177e4SLinus Torvalds 
987496053f4SDavid S. Miller #if IS_ENABLED(CONFIG_IPV6)
9881da177e4SLinus Torvalds tx_error_icmp:
9891da177e4SLinus Torvalds 	dst_link_failure(skb);
990496053f4SDavid S. Miller #endif
9911da177e4SLinus Torvalds tx_error:
992e985aad7SEric Dumazet 	dev->stats.tx_errors++;
9931da177e4SLinus Torvalds 	dev_kfree_skb(skb);
9946ed10654SPatrick McHardy 	return NETDEV_TX_OK;
9951da177e4SLinus Torvalds }
9961da177e4SLinus Torvalds 
99742aa9162SHerbert Xu static int ipgre_tunnel_bind_dev(struct net_device *dev)
998ee34c1ebSMichal Schmidt {
999ee34c1ebSMichal Schmidt 	struct net_device *tdev = NULL;
1000ee34c1ebSMichal Schmidt 	struct ip_tunnel *tunnel;
1001b71d1d42SEric Dumazet 	const struct iphdr *iph;
1002ee34c1ebSMichal Schmidt 	int hlen = LL_MAX_HEADER;
1003ee34c1ebSMichal Schmidt 	int mtu = ETH_DATA_LEN;
1004ee34c1ebSMichal Schmidt 	int addend = sizeof(struct iphdr) + 4;
1005ee34c1ebSMichal Schmidt 
1006ee34c1ebSMichal Schmidt 	tunnel = netdev_priv(dev);
1007ee34c1ebSMichal Schmidt 	iph = &tunnel->parms.iph;
1008ee34c1ebSMichal Schmidt 
1009c95b819aSHerbert Xu 	/* Guess output device to choose reasonable mtu and needed_headroom */
1010ee34c1ebSMichal Schmidt 
1011ee34c1ebSMichal Schmidt 	if (iph->daddr) {
1012cbb1e85fSDavid S. Miller 		struct flowi4 fl4;
1013cbb1e85fSDavid S. Miller 		struct rtable *rt;
1014cbb1e85fSDavid S. Miller 
1015cbb1e85fSDavid S. Miller 		rt = ip_route_output_gre(dev_net(dev), &fl4,
101678fbfd8aSDavid S. Miller 					 iph->daddr, iph->saddr,
101778fbfd8aSDavid S. Miller 					 tunnel->parms.o_key,
101878fbfd8aSDavid S. Miller 					 RT_TOS(iph->tos),
101978fbfd8aSDavid S. Miller 					 tunnel->parms.link);
1020b23dd4feSDavid S. Miller 		if (!IS_ERR(rt)) {
1021d8d1f30bSChangli Gao 			tdev = rt->dst.dev;
1022ee34c1ebSMichal Schmidt 			ip_rt_put(rt);
1023ee34c1ebSMichal Schmidt 		}
1024e1a80002SHerbert Xu 
1025e1a80002SHerbert Xu 		if (dev->type != ARPHRD_ETHER)
1026ee34c1ebSMichal Schmidt 			dev->flags |= IFF_POINTOPOINT;
1027ee34c1ebSMichal Schmidt 	}
1028ee34c1ebSMichal Schmidt 
1029ee34c1ebSMichal Schmidt 	if (!tdev && tunnel->parms.link)
103096635522SPavel Emelyanov 		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
1031ee34c1ebSMichal Schmidt 
1032ee34c1ebSMichal Schmidt 	if (tdev) {
1033c95b819aSHerbert Xu 		hlen = tdev->hard_header_len + tdev->needed_headroom;
1034ee34c1ebSMichal Schmidt 		mtu = tdev->mtu;
1035ee34c1ebSMichal Schmidt 	}
1036ee34c1ebSMichal Schmidt 	dev->iflink = tunnel->parms.link;
1037ee34c1ebSMichal Schmidt 
1038ee34c1ebSMichal Schmidt 	/* Precalculate GRE options length */
1039ee34c1ebSMichal Schmidt 	if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1040ee34c1ebSMichal Schmidt 		if (tunnel->parms.o_flags&GRE_CSUM)
1041ee34c1ebSMichal Schmidt 			addend += 4;
1042ee34c1ebSMichal Schmidt 		if (tunnel->parms.o_flags&GRE_KEY)
1043ee34c1ebSMichal Schmidt 			addend += 4;
1044ee34c1ebSMichal Schmidt 		if (tunnel->parms.o_flags&GRE_SEQ)
1045ee34c1ebSMichal Schmidt 			addend += 4;
1046ee34c1ebSMichal Schmidt 	}
1047c95b819aSHerbert Xu 	dev->needed_headroom = addend + hlen;
10488cdb0456STom Goff 	mtu -= dev->hard_header_len + addend;
104942aa9162SHerbert Xu 
105042aa9162SHerbert Xu 	if (mtu < 68)
105142aa9162SHerbert Xu 		mtu = 68;
105242aa9162SHerbert Xu 
1053ee34c1ebSMichal Schmidt 	tunnel->hlen = addend;
1054ee34c1ebSMichal Schmidt 
105542aa9162SHerbert Xu 	return mtu;
1056ee34c1ebSMichal Schmidt }
1057ee34c1ebSMichal Schmidt 
10581da177e4SLinus Torvalds static int
10591da177e4SLinus Torvalds ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
10601da177e4SLinus Torvalds {
10611da177e4SLinus Torvalds 	int err = 0;
10621da177e4SLinus Torvalds 	struct ip_tunnel_parm p;
10631da177e4SLinus Torvalds 	struct ip_tunnel *t;
1064f57e7d5aSPavel Emelyanov 	struct net *net = dev_net(dev);
1065f57e7d5aSPavel Emelyanov 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
10661da177e4SLinus Torvalds 
10671da177e4SLinus Torvalds 	switch (cmd) {
10681da177e4SLinus Torvalds 	case SIOCGETTUNNEL:
10691da177e4SLinus Torvalds 		t = NULL;
10707daa0004SPavel Emelyanov 		if (dev == ign->fb_tunnel_dev) {
10711da177e4SLinus Torvalds 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
10721da177e4SLinus Torvalds 				err = -EFAULT;
10731da177e4SLinus Torvalds 				break;
10741da177e4SLinus Torvalds 			}
1075f57e7d5aSPavel Emelyanov 			t = ipgre_tunnel_locate(net, &p, 0);
10761da177e4SLinus Torvalds 		}
10771da177e4SLinus Torvalds 		if (t == NULL)
10782941a486SPatrick McHardy 			t = netdev_priv(dev);
10791da177e4SLinus Torvalds 		memcpy(&p, &t->parms, sizeof(p));
10801da177e4SLinus Torvalds 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
10811da177e4SLinus Torvalds 			err = -EFAULT;
10821da177e4SLinus Torvalds 		break;
10831da177e4SLinus Torvalds 
10841da177e4SLinus Torvalds 	case SIOCADDTUNNEL:
10851da177e4SLinus Torvalds 	case SIOCCHGTUNNEL:
10861da177e4SLinus Torvalds 		err = -EPERM;
10871da177e4SLinus Torvalds 		if (!capable(CAP_NET_ADMIN))
10881da177e4SLinus Torvalds 			goto done;
10891da177e4SLinus Torvalds 
10901da177e4SLinus Torvalds 		err = -EFAULT;
10911da177e4SLinus Torvalds 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
10921da177e4SLinus Torvalds 			goto done;
10931da177e4SLinus Torvalds 
10941da177e4SLinus Torvalds 		err = -EINVAL;
10951da177e4SLinus Torvalds 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
10961da177e4SLinus Torvalds 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
10971da177e4SLinus Torvalds 		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
10981da177e4SLinus Torvalds 			goto done;
10991da177e4SLinus Torvalds 		if (p.iph.ttl)
11001da177e4SLinus Torvalds 			p.iph.frag_off |= htons(IP_DF);
11011da177e4SLinus Torvalds 
11021da177e4SLinus Torvalds 		if (!(p.i_flags&GRE_KEY))
11031da177e4SLinus Torvalds 			p.i_key = 0;
11041da177e4SLinus Torvalds 		if (!(p.o_flags&GRE_KEY))
11051da177e4SLinus Torvalds 			p.o_key = 0;
11061da177e4SLinus Torvalds 
1107f57e7d5aSPavel Emelyanov 		t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
11081da177e4SLinus Torvalds 
11097daa0004SPavel Emelyanov 		if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
11101da177e4SLinus Torvalds 			if (t != NULL) {
11111da177e4SLinus Torvalds 				if (t->dev != dev) {
11121da177e4SLinus Torvalds 					err = -EEXIST;
11131da177e4SLinus Torvalds 					break;
11141da177e4SLinus Torvalds 				}
11151da177e4SLinus Torvalds 			} else {
11161507850bSEric Dumazet 				unsigned int nflags = 0;
11171da177e4SLinus Torvalds 
11182941a486SPatrick McHardy 				t = netdev_priv(dev);
11191da177e4SLinus Torvalds 
1120f97c1e0cSJoe Perches 				if (ipv4_is_multicast(p.iph.daddr))
11211da177e4SLinus Torvalds 					nflags = IFF_BROADCAST;
11221da177e4SLinus Torvalds 				else if (p.iph.daddr)
11231da177e4SLinus Torvalds 					nflags = IFF_POINTOPOINT;
11241da177e4SLinus Torvalds 
11251da177e4SLinus Torvalds 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
11261da177e4SLinus Torvalds 					err = -EINVAL;
11271da177e4SLinus Torvalds 					break;
11281da177e4SLinus Torvalds 				}
1129f57e7d5aSPavel Emelyanov 				ipgre_tunnel_unlink(ign, t);
113074b0b85bSPavel Emelyanov 				synchronize_net();
11311da177e4SLinus Torvalds 				t->parms.iph.saddr = p.iph.saddr;
11321da177e4SLinus Torvalds 				t->parms.iph.daddr = p.iph.daddr;
11331da177e4SLinus Torvalds 				t->parms.i_key = p.i_key;
11341da177e4SLinus Torvalds 				t->parms.o_key = p.o_key;
11351da177e4SLinus Torvalds 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
11361da177e4SLinus Torvalds 				memcpy(dev->broadcast, &p.iph.daddr, 4);
1137f57e7d5aSPavel Emelyanov 				ipgre_tunnel_link(ign, t);
11381da177e4SLinus Torvalds 				netdev_state_change(dev);
11391da177e4SLinus Torvalds 			}
11401da177e4SLinus Torvalds 		}
11411da177e4SLinus Torvalds 
11421da177e4SLinus Torvalds 		if (t) {
11431da177e4SLinus Torvalds 			err = 0;
11441da177e4SLinus Torvalds 			if (cmd == SIOCCHGTUNNEL) {
11451da177e4SLinus Torvalds 				t->parms.iph.ttl = p.iph.ttl;
11461da177e4SLinus Torvalds 				t->parms.iph.tos = p.iph.tos;
11471da177e4SLinus Torvalds 				t->parms.iph.frag_off = p.iph.frag_off;
1148ee34c1ebSMichal Schmidt 				if (t->parms.link != p.link) {
1149ee34c1ebSMichal Schmidt 					t->parms.link = p.link;
115042aa9162SHerbert Xu 					dev->mtu = ipgre_tunnel_bind_dev(dev);
1151ee34c1ebSMichal Schmidt 					netdev_state_change(dev);
1152ee34c1ebSMichal Schmidt 				}
11531da177e4SLinus Torvalds 			}
11541da177e4SLinus Torvalds 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
11551da177e4SLinus Torvalds 				err = -EFAULT;
11561da177e4SLinus Torvalds 		} else
11571da177e4SLinus Torvalds 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
11581da177e4SLinus Torvalds 		break;
11591da177e4SLinus Torvalds 
11601da177e4SLinus Torvalds 	case SIOCDELTUNNEL:
11611da177e4SLinus Torvalds 		err = -EPERM;
11621da177e4SLinus Torvalds 		if (!capable(CAP_NET_ADMIN))
11631da177e4SLinus Torvalds 			goto done;
11641da177e4SLinus Torvalds 
11657daa0004SPavel Emelyanov 		if (dev == ign->fb_tunnel_dev) {
11661da177e4SLinus Torvalds 			err = -EFAULT;
11671da177e4SLinus Torvalds 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
11681da177e4SLinus Torvalds 				goto done;
11691da177e4SLinus Torvalds 			err = -ENOENT;
1170f57e7d5aSPavel Emelyanov 			if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
11711da177e4SLinus Torvalds 				goto done;
11721da177e4SLinus Torvalds 			err = -EPERM;
11737daa0004SPavel Emelyanov 			if (t == netdev_priv(ign->fb_tunnel_dev))
11741da177e4SLinus Torvalds 				goto done;
11751da177e4SLinus Torvalds 			dev = t->dev;
11761da177e4SLinus Torvalds 		}
117722f8cde5SStephen Hemminger 		unregister_netdevice(dev);
117822f8cde5SStephen Hemminger 		err = 0;
11791da177e4SLinus Torvalds 		break;
11801da177e4SLinus Torvalds 
11811da177e4SLinus Torvalds 	default:
11821da177e4SLinus Torvalds 		err = -EINVAL;
11831da177e4SLinus Torvalds 	}
11841da177e4SLinus Torvalds 
11851da177e4SLinus Torvalds done:
11861da177e4SLinus Torvalds 	return err;
11871da177e4SLinus Torvalds }
11881da177e4SLinus Torvalds 
11891da177e4SLinus Torvalds static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
11901da177e4SLinus Torvalds {
11912941a486SPatrick McHardy 	struct ip_tunnel *tunnel = netdev_priv(dev);
1192c95b819aSHerbert Xu 	if (new_mtu < 68 ||
1193c95b819aSHerbert Xu 	    new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
11941da177e4SLinus Torvalds 		return -EINVAL;
11951da177e4SLinus Torvalds 	dev->mtu = new_mtu;
11961da177e4SLinus Torvalds 	return 0;
11971da177e4SLinus Torvalds }
11981da177e4SLinus Torvalds 
11991da177e4SLinus Torvalds /* Nice toy. Unfortunately, useless in real life :-)
12001da177e4SLinus Torvalds    It allows to construct virtual multiprotocol broadcast "LAN"
12011da177e4SLinus Torvalds    over the Internet, provided multicast routing is tuned.
12021da177e4SLinus Torvalds 
12031da177e4SLinus Torvalds 
12041da177e4SLinus Torvalds    I have no idea was this bicycle invented before me,
12051da177e4SLinus Torvalds    so that I had to set ARPHRD_IPGRE to a random value.
12061da177e4SLinus Torvalds    I have an impression, that Cisco could make something similar,
12071da177e4SLinus Torvalds    but this feature is apparently missing in IOS<=11.2(8).
12081da177e4SLinus Torvalds 
12091da177e4SLinus Torvalds    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
12101da177e4SLinus Torvalds    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
12111da177e4SLinus Torvalds 
12121da177e4SLinus Torvalds    ping -t 255 224.66.66.66
12131da177e4SLinus Torvalds 
12141da177e4SLinus Torvalds    If nobody answers, mbone does not work.
12151da177e4SLinus Torvalds 
12161da177e4SLinus Torvalds    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
12171da177e4SLinus Torvalds    ip addr add 10.66.66.<somewhat>/24 dev Universe
12181da177e4SLinus Torvalds    ifconfig Universe up
12191da177e4SLinus Torvalds    ifconfig Universe add fe80::<Your_real_addr>/10
12201da177e4SLinus Torvalds    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
12211da177e4SLinus Torvalds    ftp 10.66.66.66
12221da177e4SLinus Torvalds    ...
12231da177e4SLinus Torvalds    ftp fec0:6666:6666::193.233.7.65
12241da177e4SLinus Torvalds    ...
12251da177e4SLinus Torvalds 
12261da177e4SLinus Torvalds  */
12271da177e4SLinus Torvalds 
12283b04dddeSStephen Hemminger static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
12293b04dddeSStephen Hemminger 			unsigned short type,
12301507850bSEric Dumazet 			const void *daddr, const void *saddr, unsigned int len)
12311da177e4SLinus Torvalds {
12322941a486SPatrick McHardy 	struct ip_tunnel *t = netdev_priv(dev);
12331da177e4SLinus Torvalds 	struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1234d5a0a1e3SAl Viro 	__be16 *p = (__be16 *)(iph+1);
12351da177e4SLinus Torvalds 
12361da177e4SLinus Torvalds 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
12371da177e4SLinus Torvalds 	p[0]		= t->parms.o_flags;
12381da177e4SLinus Torvalds 	p[1]		= htons(type);
12391da177e4SLinus Torvalds 
12401da177e4SLinus Torvalds 	/*
12411da177e4SLinus Torvalds 	 *	Set the source hardware address.
12421da177e4SLinus Torvalds 	 */
12431da177e4SLinus Torvalds 
12441da177e4SLinus Torvalds 	if (saddr)
12451da177e4SLinus Torvalds 		memcpy(&iph->saddr, saddr, 4);
12466d55cb91STimo Teräs 	if (daddr)
12471da177e4SLinus Torvalds 		memcpy(&iph->daddr, daddr, 4);
12486d55cb91STimo Teräs 	if (iph->daddr)
12491da177e4SLinus Torvalds 		return t->hlen;
12501da177e4SLinus Torvalds 
12511da177e4SLinus Torvalds 	return -t->hlen;
12521da177e4SLinus Torvalds }
12531da177e4SLinus Torvalds 
12546a5f44d7STimo Teras static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
12556a5f44d7STimo Teras {
1256b71d1d42SEric Dumazet 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
12576a5f44d7STimo Teras 	memcpy(haddr, &iph->saddr, 4);
12586a5f44d7STimo Teras 	return 4;
12596a5f44d7STimo Teras }
12606a5f44d7STimo Teras 
12613b04dddeSStephen Hemminger static const struct header_ops ipgre_header_ops = {
12623b04dddeSStephen Hemminger 	.create	= ipgre_header,
12636a5f44d7STimo Teras 	.parse	= ipgre_header_parse,
12643b04dddeSStephen Hemminger };
12653b04dddeSStephen Hemminger 
12666a5f44d7STimo Teras #ifdef CONFIG_NET_IPGRE_BROADCAST
12671da177e4SLinus Torvalds static int ipgre_open(struct net_device *dev)
12681da177e4SLinus Torvalds {
12692941a486SPatrick McHardy 	struct ip_tunnel *t = netdev_priv(dev);
12701da177e4SLinus Torvalds 
1271f97c1e0cSJoe Perches 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
1272cbb1e85fSDavid S. Miller 		struct flowi4 fl4;
1273cbb1e85fSDavid S. Miller 		struct rtable *rt;
1274cbb1e85fSDavid S. Miller 
1275cbb1e85fSDavid S. Miller 		rt = ip_route_output_gre(dev_net(dev), &fl4,
127678fbfd8aSDavid S. Miller 					 t->parms.iph.daddr,
127778fbfd8aSDavid S. Miller 					 t->parms.iph.saddr,
127878fbfd8aSDavid S. Miller 					 t->parms.o_key,
127978fbfd8aSDavid S. Miller 					 RT_TOS(t->parms.iph.tos),
128078fbfd8aSDavid S. Miller 					 t->parms.link);
1281b23dd4feSDavid S. Miller 		if (IS_ERR(rt))
12821da177e4SLinus Torvalds 			return -EADDRNOTAVAIL;
1283d8d1f30bSChangli Gao 		dev = rt->dst.dev;
12841da177e4SLinus Torvalds 		ip_rt_put(rt);
1285e5ed6399SHerbert Xu 		if (__in_dev_get_rtnl(dev) == NULL)
12861da177e4SLinus Torvalds 			return -EADDRNOTAVAIL;
12871da177e4SLinus Torvalds 		t->mlink = dev->ifindex;
1288e5ed6399SHerbert Xu 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
12891da177e4SLinus Torvalds 	}
12901da177e4SLinus Torvalds 	return 0;
12911da177e4SLinus Torvalds }
12921da177e4SLinus Torvalds 
12931da177e4SLinus Torvalds static int ipgre_close(struct net_device *dev)
12941da177e4SLinus Torvalds {
12952941a486SPatrick McHardy 	struct ip_tunnel *t = netdev_priv(dev);
1296b8c26a33SStephen Hemminger 
1297f97c1e0cSJoe Perches 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
12987fee0ca2SDenis V. Lunev 		struct in_device *in_dev;
1299c346dca1SYOSHIFUJI Hideaki 		in_dev = inetdev_by_index(dev_net(dev), t->mlink);
13008723e1b4SEric Dumazet 		if (in_dev)
13011da177e4SLinus Torvalds 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
13021da177e4SLinus Torvalds 	}
13031da177e4SLinus Torvalds 	return 0;
13041da177e4SLinus Torvalds }
13051da177e4SLinus Torvalds 
13061da177e4SLinus Torvalds #endif
13071da177e4SLinus Torvalds 
1308b8c26a33SStephen Hemminger static const struct net_device_ops ipgre_netdev_ops = {
1309b8c26a33SStephen Hemminger 	.ndo_init		= ipgre_tunnel_init,
1310b8c26a33SStephen Hemminger 	.ndo_uninit		= ipgre_tunnel_uninit,
1311b8c26a33SStephen Hemminger #ifdef CONFIG_NET_IPGRE_BROADCAST
1312b8c26a33SStephen Hemminger 	.ndo_open		= ipgre_open,
1313b8c26a33SStephen Hemminger 	.ndo_stop		= ipgre_close,
1314b8c26a33SStephen Hemminger #endif
1315b8c26a33SStephen Hemminger 	.ndo_start_xmit		= ipgre_tunnel_xmit,
1316b8c26a33SStephen Hemminger 	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
1317b8c26a33SStephen Hemminger 	.ndo_change_mtu		= ipgre_tunnel_change_mtu,
131887b6d218Sstephen hemminger 	.ndo_get_stats64	= ipgre_get_stats64,
1319b8c26a33SStephen Hemminger };
1320b8c26a33SStephen Hemminger 
1321e985aad7SEric Dumazet static void ipgre_dev_free(struct net_device *dev)
1322e985aad7SEric Dumazet {
1323e985aad7SEric Dumazet 	free_percpu(dev->tstats);
1324e985aad7SEric Dumazet 	free_netdev(dev);
1325e985aad7SEric Dumazet }
1326e985aad7SEric Dumazet 
13276b78f16eSEric Dumazet #define GRE_FEATURES (NETIF_F_SG |		\
13286b78f16eSEric Dumazet 		      NETIF_F_FRAGLIST |	\
13296b78f16eSEric Dumazet 		      NETIF_F_HIGHDMA |		\
13306b78f16eSEric Dumazet 		      NETIF_F_HW_CSUM)
13316b78f16eSEric Dumazet 
13321da177e4SLinus Torvalds static void ipgre_tunnel_setup(struct net_device *dev)
13331da177e4SLinus Torvalds {
1334b8c26a33SStephen Hemminger 	dev->netdev_ops		= &ipgre_netdev_ops;
1335e985aad7SEric Dumazet 	dev->destructor 	= ipgre_dev_free;
13361da177e4SLinus Torvalds 
13371da177e4SLinus Torvalds 	dev->type		= ARPHRD_IPGRE;
1338c95b819aSHerbert Xu 	dev->needed_headroom 	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
133946f25dffSKris Katterjohn 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4;
13401da177e4SLinus Torvalds 	dev->flags		= IFF_NOARP;
13411da177e4SLinus Torvalds 	dev->iflink		= 0;
13421da177e4SLinus Torvalds 	dev->addr_len		= 4;
13430b67ecebSPavel Emelyanov 	dev->features		|= NETIF_F_NETNS_LOCAL;
1344108bfa89SEric Dumazet 	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
13456b78f16eSEric Dumazet 
13466b78f16eSEric Dumazet 	dev->features		|= GRE_FEATURES;
13476b78f16eSEric Dumazet 	dev->hw_features	|= GRE_FEATURES;
13481da177e4SLinus Torvalds }
13491da177e4SLinus Torvalds 
13501da177e4SLinus Torvalds static int ipgre_tunnel_init(struct net_device *dev)
13511da177e4SLinus Torvalds {
13521da177e4SLinus Torvalds 	struct ip_tunnel *tunnel;
13531da177e4SLinus Torvalds 	struct iphdr *iph;
13541da177e4SLinus Torvalds 
13552941a486SPatrick McHardy 	tunnel = netdev_priv(dev);
13561da177e4SLinus Torvalds 	iph = &tunnel->parms.iph;
13571da177e4SLinus Torvalds 
13581da177e4SLinus Torvalds 	tunnel->dev = dev;
13591da177e4SLinus Torvalds 	strcpy(tunnel->parms.name, dev->name);
13601da177e4SLinus Torvalds 
13611da177e4SLinus Torvalds 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
13621da177e4SLinus Torvalds 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
13631da177e4SLinus Torvalds 
13641da177e4SLinus Torvalds 	if (iph->daddr) {
13651da177e4SLinus Torvalds #ifdef CONFIG_NET_IPGRE_BROADCAST
1366f97c1e0cSJoe Perches 		if (ipv4_is_multicast(iph->daddr)) {
13671da177e4SLinus Torvalds 			if (!iph->saddr)
13681da177e4SLinus Torvalds 				return -EINVAL;
13691da177e4SLinus Torvalds 			dev->flags = IFF_BROADCAST;
13703b04dddeSStephen Hemminger 			dev->header_ops = &ipgre_header_ops;
13711da177e4SLinus Torvalds 		}
13721da177e4SLinus Torvalds #endif
1373ee34c1ebSMichal Schmidt 	} else
13746a5f44d7STimo Teras 		dev->header_ops = &ipgre_header_ops;
13751da177e4SLinus Torvalds 
1376e985aad7SEric Dumazet 	dev->tstats = alloc_percpu(struct pcpu_tstats);
1377e985aad7SEric Dumazet 	if (!dev->tstats)
1378e985aad7SEric Dumazet 		return -ENOMEM;
1379e985aad7SEric Dumazet 
13801da177e4SLinus Torvalds 	return 0;
13811da177e4SLinus Torvalds }
13821da177e4SLinus Torvalds 
1383b8c26a33SStephen Hemminger static void ipgre_fb_tunnel_init(struct net_device *dev)
13841da177e4SLinus Torvalds {
13852941a486SPatrick McHardy 	struct ip_tunnel *tunnel = netdev_priv(dev);
13861da177e4SLinus Torvalds 	struct iphdr *iph = &tunnel->parms.iph;
13871da177e4SLinus Torvalds 
13881da177e4SLinus Torvalds 	tunnel->dev = dev;
13891da177e4SLinus Torvalds 	strcpy(tunnel->parms.name, dev->name);
13901da177e4SLinus Torvalds 
13911da177e4SLinus Torvalds 	iph->version		= 4;
13921da177e4SLinus Torvalds 	iph->protocol		= IPPROTO_GRE;
13931da177e4SLinus Torvalds 	iph->ihl		= 5;
13941da177e4SLinus Torvalds 	tunnel->hlen		= sizeof(struct iphdr) + 4;
13951da177e4SLinus Torvalds 
13961da177e4SLinus Torvalds 	dev_hold(dev);
13971da177e4SLinus Torvalds }
13981da177e4SLinus Torvalds 
13991da177e4SLinus Torvalds 
140000959adeSDmitry Kozlov static const struct gre_protocol ipgre_protocol = {
14011da177e4SLinus Torvalds 	.handler     = ipgre_rcv,
14021da177e4SLinus Torvalds 	.err_handler = ipgre_err,
14031da177e4SLinus Torvalds };
14041da177e4SLinus Torvalds 
1405eef6dd65SEric Dumazet static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1406eb8ce741SPavel Emelyanov {
1407eb8ce741SPavel Emelyanov 	int prio;
1408eb8ce741SPavel Emelyanov 
1409eb8ce741SPavel Emelyanov 	for (prio = 0; prio < 4; prio++) {
1410eb8ce741SPavel Emelyanov 		int h;
1411eb8ce741SPavel Emelyanov 		for (h = 0; h < HASH_SIZE; h++) {
14121507850bSEric Dumazet 			struct ip_tunnel *t;
14131507850bSEric Dumazet 
14141507850bSEric Dumazet 			t = rtnl_dereference(ign->tunnels[prio][h]);
1415eef6dd65SEric Dumazet 
1416eef6dd65SEric Dumazet 			while (t != NULL) {
1417eef6dd65SEric Dumazet 				unregister_netdevice_queue(t->dev, head);
14181507850bSEric Dumazet 				t = rtnl_dereference(t->next);
1419eef6dd65SEric Dumazet 			}
1420eb8ce741SPavel Emelyanov 		}
1421eb8ce741SPavel Emelyanov 	}
1422eb8ce741SPavel Emelyanov }
1423eb8ce741SPavel Emelyanov 
14242c8c1e72SAlexey Dobriyan static int __net_init ipgre_init_net(struct net *net)
142559a4c759SPavel Emelyanov {
1426cfb8fbf2SEric W. Biederman 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
142759a4c759SPavel Emelyanov 	int err;
142859a4c759SPavel Emelyanov 
14297daa0004SPavel Emelyanov 	ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
14307daa0004SPavel Emelyanov 					   ipgre_tunnel_setup);
14317daa0004SPavel Emelyanov 	if (!ign->fb_tunnel_dev) {
14327daa0004SPavel Emelyanov 		err = -ENOMEM;
14337daa0004SPavel Emelyanov 		goto err_alloc_dev;
14347daa0004SPavel Emelyanov 	}
1435be77e593SAlexey Dobriyan 	dev_net_set(ign->fb_tunnel_dev, net);
14367daa0004SPavel Emelyanov 
1437b8c26a33SStephen Hemminger 	ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1438c19e654dSHerbert Xu 	ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
14397daa0004SPavel Emelyanov 
14407daa0004SPavel Emelyanov 	if ((err = register_netdev(ign->fb_tunnel_dev)))
14417daa0004SPavel Emelyanov 		goto err_reg_dev;
14427daa0004SPavel Emelyanov 
14433285ee3bSEric Dumazet 	rcu_assign_pointer(ign->tunnels_wc[0],
14443285ee3bSEric Dumazet 			   netdev_priv(ign->fb_tunnel_dev));
144559a4c759SPavel Emelyanov 	return 0;
144659a4c759SPavel Emelyanov 
14477daa0004SPavel Emelyanov err_reg_dev:
14483285ee3bSEric Dumazet 	ipgre_dev_free(ign->fb_tunnel_dev);
14497daa0004SPavel Emelyanov err_alloc_dev:
145059a4c759SPavel Emelyanov 	return err;
145159a4c759SPavel Emelyanov }
145259a4c759SPavel Emelyanov 
14532c8c1e72SAlexey Dobriyan static void __net_exit ipgre_exit_net(struct net *net)
145459a4c759SPavel Emelyanov {
145559a4c759SPavel Emelyanov 	struct ipgre_net *ign;
1456eef6dd65SEric Dumazet 	LIST_HEAD(list);
145759a4c759SPavel Emelyanov 
145859a4c759SPavel Emelyanov 	ign = net_generic(net, ipgre_net_id);
14597daa0004SPavel Emelyanov 	rtnl_lock();
1460eef6dd65SEric Dumazet 	ipgre_destroy_tunnels(ign, &list);
1461eef6dd65SEric Dumazet 	unregister_netdevice_many(&list);
14627daa0004SPavel Emelyanov 	rtnl_unlock();
146359a4c759SPavel Emelyanov }
146459a4c759SPavel Emelyanov 
146559a4c759SPavel Emelyanov static struct pernet_operations ipgre_net_ops = {
146659a4c759SPavel Emelyanov 	.init = ipgre_init_net,
146759a4c759SPavel Emelyanov 	.exit = ipgre_exit_net,
1468cfb8fbf2SEric W. Biederman 	.id   = &ipgre_net_id,
1469cfb8fbf2SEric W. Biederman 	.size = sizeof(struct ipgre_net),
147059a4c759SPavel Emelyanov };
14711da177e4SLinus Torvalds 
1472c19e654dSHerbert Xu static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1473c19e654dSHerbert Xu {
1474c19e654dSHerbert Xu 	__be16 flags;
1475c19e654dSHerbert Xu 
1476c19e654dSHerbert Xu 	if (!data)
1477c19e654dSHerbert Xu 		return 0;
1478c19e654dSHerbert Xu 
1479c19e654dSHerbert Xu 	flags = 0;
1480c19e654dSHerbert Xu 	if (data[IFLA_GRE_IFLAGS])
1481c19e654dSHerbert Xu 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1482c19e654dSHerbert Xu 	if (data[IFLA_GRE_OFLAGS])
1483c19e654dSHerbert Xu 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1484c19e654dSHerbert Xu 	if (flags & (GRE_VERSION|GRE_ROUTING))
1485c19e654dSHerbert Xu 		return -EINVAL;
1486c19e654dSHerbert Xu 
1487c19e654dSHerbert Xu 	return 0;
1488c19e654dSHerbert Xu }
1489c19e654dSHerbert Xu 
1490e1a80002SHerbert Xu static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1491e1a80002SHerbert Xu {
1492e1a80002SHerbert Xu 	__be32 daddr;
1493e1a80002SHerbert Xu 
1494e1a80002SHerbert Xu 	if (tb[IFLA_ADDRESS]) {
1495e1a80002SHerbert Xu 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1496e1a80002SHerbert Xu 			return -EINVAL;
1497e1a80002SHerbert Xu 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1498e1a80002SHerbert Xu 			return -EADDRNOTAVAIL;
1499e1a80002SHerbert Xu 	}
1500e1a80002SHerbert Xu 
1501e1a80002SHerbert Xu 	if (!data)
1502e1a80002SHerbert Xu 		goto out;
1503e1a80002SHerbert Xu 
1504e1a80002SHerbert Xu 	if (data[IFLA_GRE_REMOTE]) {
1505e1a80002SHerbert Xu 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1506e1a80002SHerbert Xu 		if (!daddr)
1507e1a80002SHerbert Xu 			return -EINVAL;
1508e1a80002SHerbert Xu 	}
1509e1a80002SHerbert Xu 
1510e1a80002SHerbert Xu out:
1511e1a80002SHerbert Xu 	return ipgre_tunnel_validate(tb, data);
1512e1a80002SHerbert Xu }
1513e1a80002SHerbert Xu 
1514c19e654dSHerbert Xu static void ipgre_netlink_parms(struct nlattr *data[],
1515c19e654dSHerbert Xu 				struct ip_tunnel_parm *parms)
1516c19e654dSHerbert Xu {
15177bb82d92SHerbert Xu 	memset(parms, 0, sizeof(*parms));
1518c19e654dSHerbert Xu 
1519c19e654dSHerbert Xu 	parms->iph.protocol = IPPROTO_GRE;
1520c19e654dSHerbert Xu 
1521c19e654dSHerbert Xu 	if (!data)
1522c19e654dSHerbert Xu 		return;
1523c19e654dSHerbert Xu 
1524c19e654dSHerbert Xu 	if (data[IFLA_GRE_LINK])
1525c19e654dSHerbert Xu 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1526c19e654dSHerbert Xu 
1527c19e654dSHerbert Xu 	if (data[IFLA_GRE_IFLAGS])
1528c19e654dSHerbert Xu 		parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1529c19e654dSHerbert Xu 
1530c19e654dSHerbert Xu 	if (data[IFLA_GRE_OFLAGS])
1531c19e654dSHerbert Xu 		parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1532c19e654dSHerbert Xu 
1533c19e654dSHerbert Xu 	if (data[IFLA_GRE_IKEY])
1534c19e654dSHerbert Xu 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1535c19e654dSHerbert Xu 
1536c19e654dSHerbert Xu 	if (data[IFLA_GRE_OKEY])
1537c19e654dSHerbert Xu 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1538c19e654dSHerbert Xu 
1539c19e654dSHerbert Xu 	if (data[IFLA_GRE_LOCAL])
15404d74f8baSPatrick McHardy 		parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1541c19e654dSHerbert Xu 
1542c19e654dSHerbert Xu 	if (data[IFLA_GRE_REMOTE])
15434d74f8baSPatrick McHardy 		parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1544c19e654dSHerbert Xu 
1545c19e654dSHerbert Xu 	if (data[IFLA_GRE_TTL])
1546c19e654dSHerbert Xu 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1547c19e654dSHerbert Xu 
1548c19e654dSHerbert Xu 	if (data[IFLA_GRE_TOS])
1549c19e654dSHerbert Xu 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1550c19e654dSHerbert Xu 
1551c19e654dSHerbert Xu 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1552c19e654dSHerbert Xu 		parms->iph.frag_off = htons(IP_DF);
1553c19e654dSHerbert Xu }
1554c19e654dSHerbert Xu 
1555e1a80002SHerbert Xu static int ipgre_tap_init(struct net_device *dev)
1556e1a80002SHerbert Xu {
1557e1a80002SHerbert Xu 	struct ip_tunnel *tunnel;
1558e1a80002SHerbert Xu 
1559e1a80002SHerbert Xu 	tunnel = netdev_priv(dev);
1560e1a80002SHerbert Xu 
1561e1a80002SHerbert Xu 	tunnel->dev = dev;
1562e1a80002SHerbert Xu 	strcpy(tunnel->parms.name, dev->name);
1563e1a80002SHerbert Xu 
1564e1a80002SHerbert Xu 	ipgre_tunnel_bind_dev(dev);
1565e1a80002SHerbert Xu 
1566e985aad7SEric Dumazet 	dev->tstats = alloc_percpu(struct pcpu_tstats);
1567e985aad7SEric Dumazet 	if (!dev->tstats)
1568e985aad7SEric Dumazet 		return -ENOMEM;
1569e985aad7SEric Dumazet 
1570e1a80002SHerbert Xu 	return 0;
1571e1a80002SHerbert Xu }
1572e1a80002SHerbert Xu 
1573b8c26a33SStephen Hemminger static const struct net_device_ops ipgre_tap_netdev_ops = {
1574b8c26a33SStephen Hemminger 	.ndo_init		= ipgre_tap_init,
1575b8c26a33SStephen Hemminger 	.ndo_uninit		= ipgre_tunnel_uninit,
1576b8c26a33SStephen Hemminger 	.ndo_start_xmit		= ipgre_tunnel_xmit,
1577b8c26a33SStephen Hemminger 	.ndo_set_mac_address 	= eth_mac_addr,
1578b8c26a33SStephen Hemminger 	.ndo_validate_addr	= eth_validate_addr,
1579b8c26a33SStephen Hemminger 	.ndo_change_mtu		= ipgre_tunnel_change_mtu,
158087b6d218Sstephen hemminger 	.ndo_get_stats64	= ipgre_get_stats64,
1581b8c26a33SStephen Hemminger };
1582b8c26a33SStephen Hemminger 
1583e1a80002SHerbert Xu static void ipgre_tap_setup(struct net_device *dev)
1584e1a80002SHerbert Xu {
1585e1a80002SHerbert Xu 
1586e1a80002SHerbert Xu 	ether_setup(dev);
1587e1a80002SHerbert Xu 
15882e9526b3SHerbert Xu 	dev->netdev_ops		= &ipgre_tap_netdev_ops;
1589e985aad7SEric Dumazet 	dev->destructor 	= ipgre_dev_free;
1590e1a80002SHerbert Xu 
1591e1a80002SHerbert Xu 	dev->iflink		= 0;
1592e1a80002SHerbert Xu 	dev->features		|= NETIF_F_NETNS_LOCAL;
1593e1a80002SHerbert Xu }
1594e1a80002SHerbert Xu 
159581adee47SEric W. Biederman static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1596c19e654dSHerbert Xu 			 struct nlattr *data[])
1597c19e654dSHerbert Xu {
1598c19e654dSHerbert Xu 	struct ip_tunnel *nt;
1599c19e654dSHerbert Xu 	struct net *net = dev_net(dev);
1600c19e654dSHerbert Xu 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1601c19e654dSHerbert Xu 	int mtu;
1602c19e654dSHerbert Xu 	int err;
1603c19e654dSHerbert Xu 
1604c19e654dSHerbert Xu 	nt = netdev_priv(dev);
1605c19e654dSHerbert Xu 	ipgre_netlink_parms(data, &nt->parms);
1606c19e654dSHerbert Xu 
1607e1a80002SHerbert Xu 	if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1608c19e654dSHerbert Xu 		return -EEXIST;
1609c19e654dSHerbert Xu 
1610e1a80002SHerbert Xu 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1611f2cedb63SDanny Kukawka 		eth_hw_addr_random(dev);
1612e1a80002SHerbert Xu 
1613c19e654dSHerbert Xu 	mtu = ipgre_tunnel_bind_dev(dev);
1614c19e654dSHerbert Xu 	if (!tb[IFLA_MTU])
1615c19e654dSHerbert Xu 		dev->mtu = mtu;
1616c19e654dSHerbert Xu 
1617b790e01aSEric Dumazet 	/* Can use a lockless transmit, unless we generate output sequences */
1618b790e01aSEric Dumazet 	if (!(nt->parms.o_flags & GRE_SEQ))
1619b790e01aSEric Dumazet 		dev->features |= NETIF_F_LLTX;
1620b790e01aSEric Dumazet 
1621c19e654dSHerbert Xu 	err = register_netdevice(dev);
1622c19e654dSHerbert Xu 	if (err)
1623c19e654dSHerbert Xu 		goto out;
1624c19e654dSHerbert Xu 
1625c19e654dSHerbert Xu 	dev_hold(dev);
1626c19e654dSHerbert Xu 	ipgre_tunnel_link(ign, nt);
1627c19e654dSHerbert Xu 
1628c19e654dSHerbert Xu out:
1629c19e654dSHerbert Xu 	return err;
1630c19e654dSHerbert Xu }
1631c19e654dSHerbert Xu 
1632c19e654dSHerbert Xu static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1633c19e654dSHerbert Xu 			    struct nlattr *data[])
1634c19e654dSHerbert Xu {
1635c19e654dSHerbert Xu 	struct ip_tunnel *t, *nt;
1636c19e654dSHerbert Xu 	struct net *net = dev_net(dev);
1637c19e654dSHerbert Xu 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1638c19e654dSHerbert Xu 	struct ip_tunnel_parm p;
1639c19e654dSHerbert Xu 	int mtu;
1640c19e654dSHerbert Xu 
1641c19e654dSHerbert Xu 	if (dev == ign->fb_tunnel_dev)
1642c19e654dSHerbert Xu 		return -EINVAL;
1643c19e654dSHerbert Xu 
1644c19e654dSHerbert Xu 	nt = netdev_priv(dev);
1645c19e654dSHerbert Xu 	ipgre_netlink_parms(data, &p);
1646c19e654dSHerbert Xu 
1647c19e654dSHerbert Xu 	t = ipgre_tunnel_locate(net, &p, 0);
1648c19e654dSHerbert Xu 
1649c19e654dSHerbert Xu 	if (t) {
1650c19e654dSHerbert Xu 		if (t->dev != dev)
1651c19e654dSHerbert Xu 			return -EEXIST;
1652c19e654dSHerbert Xu 	} else {
1653c19e654dSHerbert Xu 		t = nt;
1654c19e654dSHerbert Xu 
16552e9526b3SHerbert Xu 		if (dev->type != ARPHRD_ETHER) {
16561507850bSEric Dumazet 			unsigned int nflags = 0;
16572e9526b3SHerbert Xu 
1658c19e654dSHerbert Xu 			if (ipv4_is_multicast(p.iph.daddr))
1659c19e654dSHerbert Xu 				nflags = IFF_BROADCAST;
1660c19e654dSHerbert Xu 			else if (p.iph.daddr)
1661c19e654dSHerbert Xu 				nflags = IFF_POINTOPOINT;
1662c19e654dSHerbert Xu 
1663c19e654dSHerbert Xu 			if ((dev->flags ^ nflags) &
1664c19e654dSHerbert Xu 			    (IFF_POINTOPOINT | IFF_BROADCAST))
1665c19e654dSHerbert Xu 				return -EINVAL;
16662e9526b3SHerbert Xu 		}
1667c19e654dSHerbert Xu 
1668c19e654dSHerbert Xu 		ipgre_tunnel_unlink(ign, t);
1669c19e654dSHerbert Xu 		t->parms.iph.saddr = p.iph.saddr;
1670c19e654dSHerbert Xu 		t->parms.iph.daddr = p.iph.daddr;
1671c19e654dSHerbert Xu 		t->parms.i_key = p.i_key;
16722e9526b3SHerbert Xu 		if (dev->type != ARPHRD_ETHER) {
1673c19e654dSHerbert Xu 			memcpy(dev->dev_addr, &p.iph.saddr, 4);
1674c19e654dSHerbert Xu 			memcpy(dev->broadcast, &p.iph.daddr, 4);
16752e9526b3SHerbert Xu 		}
1676c19e654dSHerbert Xu 		ipgre_tunnel_link(ign, t);
1677c19e654dSHerbert Xu 		netdev_state_change(dev);
1678c19e654dSHerbert Xu 	}
1679c19e654dSHerbert Xu 
1680c19e654dSHerbert Xu 	t->parms.o_key = p.o_key;
1681c19e654dSHerbert Xu 	t->parms.iph.ttl = p.iph.ttl;
1682c19e654dSHerbert Xu 	t->parms.iph.tos = p.iph.tos;
1683c19e654dSHerbert Xu 	t->parms.iph.frag_off = p.iph.frag_off;
1684c19e654dSHerbert Xu 
1685c19e654dSHerbert Xu 	if (t->parms.link != p.link) {
1686c19e654dSHerbert Xu 		t->parms.link = p.link;
1687c19e654dSHerbert Xu 		mtu = ipgre_tunnel_bind_dev(dev);
1688c19e654dSHerbert Xu 		if (!tb[IFLA_MTU])
1689c19e654dSHerbert Xu 			dev->mtu = mtu;
1690c19e654dSHerbert Xu 		netdev_state_change(dev);
1691c19e654dSHerbert Xu 	}
1692c19e654dSHerbert Xu 
1693c19e654dSHerbert Xu 	return 0;
1694c19e654dSHerbert Xu }
1695c19e654dSHerbert Xu 
1696c19e654dSHerbert Xu static size_t ipgre_get_size(const struct net_device *dev)
1697c19e654dSHerbert Xu {
1698c19e654dSHerbert Xu 	return
1699c19e654dSHerbert Xu 		/* IFLA_GRE_LINK */
1700c19e654dSHerbert Xu 		nla_total_size(4) +
1701c19e654dSHerbert Xu 		/* IFLA_GRE_IFLAGS */
1702c19e654dSHerbert Xu 		nla_total_size(2) +
1703c19e654dSHerbert Xu 		/* IFLA_GRE_OFLAGS */
1704c19e654dSHerbert Xu 		nla_total_size(2) +
1705c19e654dSHerbert Xu 		/* IFLA_GRE_IKEY */
1706c19e654dSHerbert Xu 		nla_total_size(4) +
1707c19e654dSHerbert Xu 		/* IFLA_GRE_OKEY */
1708c19e654dSHerbert Xu 		nla_total_size(4) +
1709c19e654dSHerbert Xu 		/* IFLA_GRE_LOCAL */
1710c19e654dSHerbert Xu 		nla_total_size(4) +
1711c19e654dSHerbert Xu 		/* IFLA_GRE_REMOTE */
1712c19e654dSHerbert Xu 		nla_total_size(4) +
1713c19e654dSHerbert Xu 		/* IFLA_GRE_TTL */
1714c19e654dSHerbert Xu 		nla_total_size(1) +
1715c19e654dSHerbert Xu 		/* IFLA_GRE_TOS */
1716c19e654dSHerbert Xu 		nla_total_size(1) +
1717c19e654dSHerbert Xu 		/* IFLA_GRE_PMTUDISC */
1718c19e654dSHerbert Xu 		nla_total_size(1) +
1719c19e654dSHerbert Xu 		0;
1720c19e654dSHerbert Xu }
1721c19e654dSHerbert Xu 
1722c19e654dSHerbert Xu static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1723c19e654dSHerbert Xu {
1724c19e654dSHerbert Xu 	struct ip_tunnel *t = netdev_priv(dev);
1725c19e654dSHerbert Xu 	struct ip_tunnel_parm *p = &t->parms;
1726c19e654dSHerbert Xu 
1727f3756b79SDavid S. Miller 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1728f3756b79SDavid S. Miller 	    nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
1729f3756b79SDavid S. Miller 	    nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
1730f3756b79SDavid S. Miller 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1731f3756b79SDavid S. Miller 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1732f3756b79SDavid S. Miller 	    nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1733f3756b79SDavid S. Miller 	    nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1734f3756b79SDavid S. Miller 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1735f3756b79SDavid S. Miller 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1736f3756b79SDavid S. Miller 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1737f3756b79SDavid S. Miller 		       !!(p->iph.frag_off & htons(IP_DF))))
1738f3756b79SDavid S. Miller 		goto nla_put_failure;
1739c19e654dSHerbert Xu 	return 0;
1740c19e654dSHerbert Xu 
1741c19e654dSHerbert Xu nla_put_failure:
1742c19e654dSHerbert Xu 	return -EMSGSIZE;
1743c19e654dSHerbert Xu }
1744c19e654dSHerbert Xu 
1745c19e654dSHerbert Xu static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1746c19e654dSHerbert Xu 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1747c19e654dSHerbert Xu 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1748c19e654dSHerbert Xu 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1749c19e654dSHerbert Xu 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1750c19e654dSHerbert Xu 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
17514d74f8baSPatrick McHardy 	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
17524d74f8baSPatrick McHardy 	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1753c19e654dSHerbert Xu 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1754c19e654dSHerbert Xu 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1755c19e654dSHerbert Xu 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1756c19e654dSHerbert Xu };
1757c19e654dSHerbert Xu 
1758c19e654dSHerbert Xu static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1759c19e654dSHerbert Xu 	.kind		= "gre",
1760c19e654dSHerbert Xu 	.maxtype	= IFLA_GRE_MAX,
1761c19e654dSHerbert Xu 	.policy		= ipgre_policy,
1762c19e654dSHerbert Xu 	.priv_size	= sizeof(struct ip_tunnel),
1763c19e654dSHerbert Xu 	.setup		= ipgre_tunnel_setup,
1764c19e654dSHerbert Xu 	.validate	= ipgre_tunnel_validate,
1765c19e654dSHerbert Xu 	.newlink	= ipgre_newlink,
1766c19e654dSHerbert Xu 	.changelink	= ipgre_changelink,
1767c19e654dSHerbert Xu 	.get_size	= ipgre_get_size,
1768c19e654dSHerbert Xu 	.fill_info	= ipgre_fill_info,
1769c19e654dSHerbert Xu };
1770c19e654dSHerbert Xu 
1771e1a80002SHerbert Xu static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1772e1a80002SHerbert Xu 	.kind		= "gretap",
1773e1a80002SHerbert Xu 	.maxtype	= IFLA_GRE_MAX,
1774e1a80002SHerbert Xu 	.policy		= ipgre_policy,
1775e1a80002SHerbert Xu 	.priv_size	= sizeof(struct ip_tunnel),
1776e1a80002SHerbert Xu 	.setup		= ipgre_tap_setup,
1777e1a80002SHerbert Xu 	.validate	= ipgre_tap_validate,
1778e1a80002SHerbert Xu 	.newlink	= ipgre_newlink,
1779e1a80002SHerbert Xu 	.changelink	= ipgre_changelink,
1780e1a80002SHerbert Xu 	.get_size	= ipgre_get_size,
1781e1a80002SHerbert Xu 	.fill_info	= ipgre_fill_info,
1782e1a80002SHerbert Xu };
1783e1a80002SHerbert Xu 
17841da177e4SLinus Torvalds /*
17851da177e4SLinus Torvalds  *	And now the modules code and kernel interface.
17861da177e4SLinus Torvalds  */
17871da177e4SLinus Torvalds 
17881da177e4SLinus Torvalds static int __init ipgre_init(void)
17891da177e4SLinus Torvalds {
17901da177e4SLinus Torvalds 	int err;
17911da177e4SLinus Torvalds 
1792058bd4d2SJoe Perches 	pr_info("GRE over IPv4 tunneling driver\n");
17931da177e4SLinus Torvalds 
1794cfb8fbf2SEric W. Biederman 	err = register_pernet_device(&ipgre_net_ops);
179559a4c759SPavel Emelyanov 	if (err < 0)
1796c2892f02SAlexey Dobriyan 		return err;
1797c2892f02SAlexey Dobriyan 
179800959adeSDmitry Kozlov 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1799c2892f02SAlexey Dobriyan 	if (err < 0) {
1800058bd4d2SJoe Perches 		pr_info("%s: can't add protocol\n", __func__);
1801c2892f02SAlexey Dobriyan 		goto add_proto_failed;
1802c2892f02SAlexey Dobriyan 	}
18037daa0004SPavel Emelyanov 
1804c19e654dSHerbert Xu 	err = rtnl_link_register(&ipgre_link_ops);
1805c19e654dSHerbert Xu 	if (err < 0)
1806c19e654dSHerbert Xu 		goto rtnl_link_failed;
1807c19e654dSHerbert Xu 
1808e1a80002SHerbert Xu 	err = rtnl_link_register(&ipgre_tap_ops);
1809e1a80002SHerbert Xu 	if (err < 0)
1810e1a80002SHerbert Xu 		goto tap_ops_failed;
1811e1a80002SHerbert Xu 
1812c19e654dSHerbert Xu out:
18137daa0004SPavel Emelyanov 	return err;
1814c19e654dSHerbert Xu 
1815e1a80002SHerbert Xu tap_ops_failed:
1816e1a80002SHerbert Xu 	rtnl_link_unregister(&ipgre_link_ops);
1817c19e654dSHerbert Xu rtnl_link_failed:
181800959adeSDmitry Kozlov 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1819c2892f02SAlexey Dobriyan add_proto_failed:
1820c2892f02SAlexey Dobriyan 	unregister_pernet_device(&ipgre_net_ops);
1821c19e654dSHerbert Xu 	goto out;
18221da177e4SLinus Torvalds }
18231da177e4SLinus Torvalds 
1824db44575fSAlexey Kuznetsov static void __exit ipgre_fini(void)
18251da177e4SLinus Torvalds {
1826e1a80002SHerbert Xu 	rtnl_link_unregister(&ipgre_tap_ops);
1827c19e654dSHerbert Xu 	rtnl_link_unregister(&ipgre_link_ops);
182800959adeSDmitry Kozlov 	if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1829058bd4d2SJoe Perches 		pr_info("%s: can't remove protocol\n", __func__);
1830c2892f02SAlexey Dobriyan 	unregister_pernet_device(&ipgre_net_ops);
18311da177e4SLinus Torvalds }
18321da177e4SLinus Torvalds 
18331da177e4SLinus Torvalds module_init(ipgre_init);
18341da177e4SLinus Torvalds module_exit(ipgre_fini);
18351da177e4SLinus Torvalds MODULE_LICENSE("GPL");
18364d74f8baSPatrick McHardy MODULE_ALIAS_RTNL_LINK("gre");
18374d74f8baSPatrick McHardy MODULE_ALIAS_RTNL_LINK("gretap");
18388909c9adSVasiliy Kulikov MODULE_ALIAS_NETDEV("gre0");
1839