xref: /linux/net/ipv4/ip_gre.c (revision c75c5ab575af7db707689cdbb5a5c458e9a034bb)
1 /*
2  *	Linux NET3:	GRE over IP protocol decoder.
3  *
4  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *	This program is free software; you can redistribute it and/or
7  *	modify it under the terms of the GNU General Public License
8  *	as published by the Free Software Foundation; either version
9  *	2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/mroute.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
35 
36 #include <net/sock.h>
37 #include <net/ip.h>
38 #include <net/icmp.h>
39 #include <net/protocol.h>
40 #include <net/ipip.h>
41 #include <net/arp.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
45 #include <net/xfrm.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
49 #include <net/gre.h>
50 
51 #if IS_ENABLED(CONFIG_IPV6)
52 #include <net/ipv6.h>
53 #include <net/ip6_fib.h>
54 #include <net/ip6_route.h>
55 #endif
56 
57 /*
58    Problems & solutions
59    --------------------
60 
61    1. The most important issue is detecting local dead loops.
62    They would cause complete host lockup in transmit, which
63    would be "resolved" by stack overflow or, if queueing is enabled,
64    with infinite looping in net_bh.
65 
66    We cannot track such dead loops during route installation,
67    it is infeasible task. The most general solutions would be
68    to keep skb->encapsulation counter (sort of local ttl),
69    and silently drop packet when it expires. It is a good
70    solution, but it supposes maintaining new variable in ALL
71    skb, even if no tunneling is used.
72 
73    Current solution: xmit_recursion breaks dead loops. This is a percpu
74    counter, since when we enter the first ndo_xmit(), cpu migration is
75    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
76 
77    2. Networking dead loops would not kill routers, but would really
78    kill network. IP hop limit plays role of "t->recursion" in this case,
79    if we copy it from packet being encapsulated to upper header.
80    It is very good solution, but it introduces two problems:
81 
82    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83      do not work over tunnels.
84    - traceroute does not work. I planned to relay ICMP from tunnel,
85      so that this problem would be solved and traceroute output
86      would even more informative. This idea appeared to be wrong:
87      only Linux complies to rfc1812 now (yes, guys, Linux is the only
88      true router now :-)), all routers (at least, in neighbourhood of mine)
89      return only 8 bytes of payload. It is the end.
90 
91    Hence, if we want that OSPF worked or traceroute said something reasonable,
92    we should search for another solution.
93 
94    One of them is to parse packet trying to detect inner encapsulation
95    made by our node. It is difficult or even impossible, especially,
96    taking into account fragmentation. TO be short, ttl is not solution at all.
97 
98    Current solution: The solution was UNEXPECTEDLY SIMPLE.
99    We force DF flag on tunnels with preconfigured hop limit,
100    that is ALL. :-) Well, it does not remove the problem completely,
101    but exponential growth of network traffic is changed to linear
102    (branches, that exceed pmtu are pruned) and tunnel mtu
103    rapidly degrades to value <68, where looping stops.
104    Yes, it is not good if there exists a router in the loop,
105    which does not force DF, even when encapsulating packets have DF set.
106    But it is not our problem! Nobody could accuse us, we made
107    all that we could make. Even if it is your gated who injected
108    fatal route to network, even if it were you who configured
109    fatal static route: you are innocent. :-)
110 
111 
112 
113    3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
114    practically identical code. It would be good to glue them
115    together, but it is not very evident, how to make them modular.
116    sit is integral part of IPv6, ipip and gre are naturally modular.
117    We could extract common parts (hash table, ioctl etc)
118    to a separate module (ip_tunnel.c).
119 
120    Alexey Kuznetsov.
121  */
122 
123 static bool log_ecn_error = true;
124 module_param(log_ecn_error, bool, 0644);
125 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126 
127 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
128 static int ipgre_tunnel_init(struct net_device *dev);
129 static void ipgre_tunnel_setup(struct net_device *dev);
130 static int ipgre_tunnel_bind_dev(struct net_device *dev);
131 
132 /* Fallback tunnel: no source, no destination, no key, no options */
133 
134 #define HASH_SIZE  16
135 
136 static int ipgre_net_id __read_mostly;
137 struct ipgre_net {
138 	struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
139 
140 	struct net_device *fb_tunnel_dev;
141 };
142 
143 /* Tunnel hash table */
144 
145 /*
146    4 hash tables:
147 
148    3: (remote,local)
149    2: (remote,*)
150    1: (*,local)
151    0: (*,*)
152 
153    We require exact key match i.e. if a key is present in packet
154    it will match only tunnel with the same key; if it is not present,
155    it will match only keyless tunnel.
156 
157    All keysless packets, if not matched configured keyless tunnels
158    will match fallback tunnel.
159  */
160 
161 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
162 
163 #define tunnels_r_l	tunnels[3]
164 #define tunnels_r	tunnels[2]
165 #define tunnels_l	tunnels[1]
166 #define tunnels_wc	tunnels[0]
167 
168 static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
169 						   struct rtnl_link_stats64 *tot)
170 {
171 	int i;
172 
173 	for_each_possible_cpu(i) {
174 		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
175 		u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
176 		unsigned int start;
177 
178 		do {
179 			start = u64_stats_fetch_begin_bh(&tstats->syncp);
180 			rx_packets = tstats->rx_packets;
181 			tx_packets = tstats->tx_packets;
182 			rx_bytes = tstats->rx_bytes;
183 			tx_bytes = tstats->tx_bytes;
184 		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
185 
186 		tot->rx_packets += rx_packets;
187 		tot->tx_packets += tx_packets;
188 		tot->rx_bytes   += rx_bytes;
189 		tot->tx_bytes   += tx_bytes;
190 	}
191 
192 	tot->multicast = dev->stats.multicast;
193 	tot->rx_crc_errors = dev->stats.rx_crc_errors;
194 	tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
195 	tot->rx_length_errors = dev->stats.rx_length_errors;
196 	tot->rx_frame_errors = dev->stats.rx_frame_errors;
197 	tot->rx_errors = dev->stats.rx_errors;
198 
199 	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
200 	tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
201 	tot->tx_dropped = dev->stats.tx_dropped;
202 	tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
203 	tot->tx_errors = dev->stats.tx_errors;
204 
205 	return tot;
206 }
207 
208 /* Does key in tunnel parameters match packet */
209 static bool ipgre_key_match(const struct ip_tunnel_parm *p,
210 			    __be16 flags, __be32 key)
211 {
212 	if (p->i_flags & GRE_KEY) {
213 		if (flags & GRE_KEY)
214 			return key == p->i_key;
215 		else
216 			return false;	/* key expected, none present */
217 	} else
218 		return !(flags & GRE_KEY);
219 }
220 
221 /* Given src, dst and key, find appropriate for input tunnel. */
222 
223 static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
224 					     __be32 remote, __be32 local,
225 					     __be16 flags, __be32 key,
226 					     __be16 gre_proto)
227 {
228 	struct net *net = dev_net(dev);
229 	int link = dev->ifindex;
230 	unsigned int h0 = HASH(remote);
231 	unsigned int h1 = HASH(key);
232 	struct ip_tunnel *t, *cand = NULL;
233 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
234 	int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
235 		       ARPHRD_ETHER : ARPHRD_IPGRE;
236 	int score, cand_score = 4;
237 
238 	for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
239 		if (local != t->parms.iph.saddr ||
240 		    remote != t->parms.iph.daddr ||
241 		    !(t->dev->flags & IFF_UP))
242 			continue;
243 
244 		if (!ipgre_key_match(&t->parms, flags, key))
245 			continue;
246 
247 		if (t->dev->type != ARPHRD_IPGRE &&
248 		    t->dev->type != dev_type)
249 			continue;
250 
251 		score = 0;
252 		if (t->parms.link != link)
253 			score |= 1;
254 		if (t->dev->type != dev_type)
255 			score |= 2;
256 		if (score == 0)
257 			return t;
258 
259 		if (score < cand_score) {
260 			cand = t;
261 			cand_score = score;
262 		}
263 	}
264 
265 	for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) {
266 		if (remote != t->parms.iph.daddr ||
267 		    !(t->dev->flags & IFF_UP))
268 			continue;
269 
270 		if (!ipgre_key_match(&t->parms, flags, key))
271 			continue;
272 
273 		if (t->dev->type != ARPHRD_IPGRE &&
274 		    t->dev->type != dev_type)
275 			continue;
276 
277 		score = 0;
278 		if (t->parms.link != link)
279 			score |= 1;
280 		if (t->dev->type != dev_type)
281 			score |= 2;
282 		if (score == 0)
283 			return t;
284 
285 		if (score < cand_score) {
286 			cand = t;
287 			cand_score = score;
288 		}
289 	}
290 
291 	for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) {
292 		if ((local != t->parms.iph.saddr &&
293 		     (local != t->parms.iph.daddr ||
294 		      !ipv4_is_multicast(local))) ||
295 		    !(t->dev->flags & IFF_UP))
296 			continue;
297 
298 		if (!ipgre_key_match(&t->parms, flags, key))
299 			continue;
300 
301 		if (t->dev->type != ARPHRD_IPGRE &&
302 		    t->dev->type != dev_type)
303 			continue;
304 
305 		score = 0;
306 		if (t->parms.link != link)
307 			score |= 1;
308 		if (t->dev->type != dev_type)
309 			score |= 2;
310 		if (score == 0)
311 			return t;
312 
313 		if (score < cand_score) {
314 			cand = t;
315 			cand_score = score;
316 		}
317 	}
318 
319 	for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) {
320 		if (t->parms.i_key != key ||
321 		    !(t->dev->flags & IFF_UP))
322 			continue;
323 
324 		if (t->dev->type != ARPHRD_IPGRE &&
325 		    t->dev->type != dev_type)
326 			continue;
327 
328 		score = 0;
329 		if (t->parms.link != link)
330 			score |= 1;
331 		if (t->dev->type != dev_type)
332 			score |= 2;
333 		if (score == 0)
334 			return t;
335 
336 		if (score < cand_score) {
337 			cand = t;
338 			cand_score = score;
339 		}
340 	}
341 
342 	if (cand != NULL)
343 		return cand;
344 
345 	dev = ign->fb_tunnel_dev;
346 	if (dev->flags & IFF_UP)
347 		return netdev_priv(dev);
348 
349 	return NULL;
350 }
351 
352 static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
353 		struct ip_tunnel_parm *parms)
354 {
355 	__be32 remote = parms->iph.daddr;
356 	__be32 local = parms->iph.saddr;
357 	__be32 key = parms->i_key;
358 	unsigned int h = HASH(key);
359 	int prio = 0;
360 
361 	if (local)
362 		prio |= 1;
363 	if (remote && !ipv4_is_multicast(remote)) {
364 		prio |= 2;
365 		h ^= HASH(remote);
366 	}
367 
368 	return &ign->tunnels[prio][h];
369 }
370 
371 static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
372 		struct ip_tunnel *t)
373 {
374 	return __ipgre_bucket(ign, &t->parms);
375 }
376 
377 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
378 {
379 	struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
380 
381 	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
382 	rcu_assign_pointer(*tp, t);
383 }
384 
385 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
386 {
387 	struct ip_tunnel __rcu **tp;
388 	struct ip_tunnel *iter;
389 
390 	for (tp = ipgre_bucket(ign, t);
391 	     (iter = rtnl_dereference(*tp)) != NULL;
392 	     tp = &iter->next) {
393 		if (t == iter) {
394 			rcu_assign_pointer(*tp, t->next);
395 			break;
396 		}
397 	}
398 }
399 
400 static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
401 					   struct ip_tunnel_parm *parms,
402 					   int type)
403 {
404 	__be32 remote = parms->iph.daddr;
405 	__be32 local = parms->iph.saddr;
406 	__be32 key = parms->i_key;
407 	int link = parms->link;
408 	struct ip_tunnel *t;
409 	struct ip_tunnel __rcu **tp;
410 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
411 
412 	for (tp = __ipgre_bucket(ign, parms);
413 	     (t = rtnl_dereference(*tp)) != NULL;
414 	     tp = &t->next)
415 		if (local == t->parms.iph.saddr &&
416 		    remote == t->parms.iph.daddr &&
417 		    key == t->parms.i_key &&
418 		    link == t->parms.link &&
419 		    type == t->dev->type)
420 			break;
421 
422 	return t;
423 }
424 
425 static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
426 		struct ip_tunnel_parm *parms, int create)
427 {
428 	struct ip_tunnel *t, *nt;
429 	struct net_device *dev;
430 	char name[IFNAMSIZ];
431 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
432 
433 	t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
434 	if (t || !create)
435 		return t;
436 
437 	if (parms->name[0])
438 		strlcpy(name, parms->name, IFNAMSIZ);
439 	else
440 		strcpy(name, "gre%d");
441 
442 	dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
443 	if (!dev)
444 		return NULL;
445 
446 	dev_net_set(dev, net);
447 
448 	nt = netdev_priv(dev);
449 	nt->parms = *parms;
450 	dev->rtnl_link_ops = &ipgre_link_ops;
451 
452 	dev->mtu = ipgre_tunnel_bind_dev(dev);
453 
454 	if (register_netdevice(dev) < 0)
455 		goto failed_free;
456 
457 	/* Can use a lockless transmit, unless we generate output sequences */
458 	if (!(nt->parms.o_flags & GRE_SEQ))
459 		dev->features |= NETIF_F_LLTX;
460 
461 	dev_hold(dev);
462 	ipgre_tunnel_link(ign, nt);
463 	return nt;
464 
465 failed_free:
466 	free_netdev(dev);
467 	return NULL;
468 }
469 
470 static void ipgre_tunnel_uninit(struct net_device *dev)
471 {
472 	struct net *net = dev_net(dev);
473 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
474 
475 	ipgre_tunnel_unlink(ign, netdev_priv(dev));
476 	dev_put(dev);
477 }
478 
479 
480 static void ipgre_err(struct sk_buff *skb, u32 info)
481 {
482 
483 /* All the routers (except for Linux) return only
484    8 bytes of packet payload. It means, that precise relaying of
485    ICMP in the real Internet is absolutely infeasible.
486 
487    Moreover, Cisco "wise men" put GRE key to the third word
488    in GRE header. It makes impossible maintaining even soft state for keyed
489    GRE tunnels with enabled checksum. Tell them "thank you".
490 
491    Well, I wonder, rfc1812 was written by Cisco employee,
492    what the hell these idiots break standards established
493    by themselves???
494  */
495 
496 	const struct iphdr *iph = (const struct iphdr *)skb->data;
497 	__be16	     *p = (__be16 *)(skb->data+(iph->ihl<<2));
498 	int grehlen = (iph->ihl<<2) + 4;
499 	const int type = icmp_hdr(skb)->type;
500 	const int code = icmp_hdr(skb)->code;
501 	struct ip_tunnel *t;
502 	__be16 flags;
503 	__be32 key = 0;
504 
505 	flags = p[0];
506 	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
507 		if (flags&(GRE_VERSION|GRE_ROUTING))
508 			return;
509 		if (flags&GRE_KEY) {
510 			grehlen += 4;
511 			if (flags&GRE_CSUM)
512 				grehlen += 4;
513 		}
514 	}
515 
516 	/* If only 8 bytes returned, keyed message will be dropped here */
517 	if (skb_headlen(skb) < grehlen)
518 		return;
519 
520 	if (flags & GRE_KEY)
521 		key = *(((__be32 *)p) + (grehlen / 4) - 1);
522 
523 	switch (type) {
524 	default:
525 	case ICMP_PARAMETERPROB:
526 		return;
527 
528 	case ICMP_DEST_UNREACH:
529 		switch (code) {
530 		case ICMP_SR_FAILED:
531 		case ICMP_PORT_UNREACH:
532 			/* Impossible event. */
533 			return;
534 		default:
535 			/* All others are translated to HOST_UNREACH.
536 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
537 			   I believe they are just ether pollution. --ANK
538 			 */
539 			break;
540 		}
541 		break;
542 	case ICMP_TIME_EXCEEDED:
543 		if (code != ICMP_EXC_TTL)
544 			return;
545 		break;
546 
547 	case ICMP_REDIRECT:
548 		break;
549 	}
550 
551 	t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
552 				flags, key, p[1]);
553 
554 	if (t == NULL)
555 		return;
556 
557 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
558 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
559 				 t->parms.link, 0, IPPROTO_GRE, 0);
560 		return;
561 	}
562 	if (type == ICMP_REDIRECT) {
563 		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
564 			      IPPROTO_GRE, 0);
565 		return;
566 	}
567 	if (t->parms.iph.daddr == 0 ||
568 	    ipv4_is_multicast(t->parms.iph.daddr))
569 		return;
570 
571 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
572 		return;
573 
574 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
575 		t->err_count++;
576 	else
577 		t->err_count = 1;
578 	t->err_time = jiffies;
579 }
580 
581 static inline u8
582 ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
583 {
584 	u8 inner = 0;
585 	if (skb->protocol == htons(ETH_P_IP))
586 		inner = old_iph->tos;
587 	else if (skb->protocol == htons(ETH_P_IPV6))
588 		inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
589 	return INET_ECN_encapsulate(tos, inner);
590 }
591 
592 static int ipgre_rcv(struct sk_buff *skb)
593 {
594 	const struct iphdr *iph;
595 	u8     *h;
596 	__be16    flags;
597 	__sum16   csum = 0;
598 	__be32 key = 0;
599 	u32    seqno = 0;
600 	struct ip_tunnel *tunnel;
601 	int    offset = 4;
602 	__be16 gre_proto;
603 	int    err;
604 
605 	if (!pskb_may_pull(skb, 16))
606 		goto drop;
607 
608 	iph = ip_hdr(skb);
609 	h = skb->data;
610 	flags = *(__be16 *)h;
611 
612 	if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
613 		/* - Version must be 0.
614 		   - We do not support routing headers.
615 		 */
616 		if (flags&(GRE_VERSION|GRE_ROUTING))
617 			goto drop;
618 
619 		if (flags&GRE_CSUM) {
620 			switch (skb->ip_summed) {
621 			case CHECKSUM_COMPLETE:
622 				csum = csum_fold(skb->csum);
623 				if (!csum)
624 					break;
625 				/* fall through */
626 			case CHECKSUM_NONE:
627 				skb->csum = 0;
628 				csum = __skb_checksum_complete(skb);
629 				skb->ip_summed = CHECKSUM_COMPLETE;
630 			}
631 			offset += 4;
632 		}
633 		if (flags&GRE_KEY) {
634 			key = *(__be32 *)(h + offset);
635 			offset += 4;
636 		}
637 		if (flags&GRE_SEQ) {
638 			seqno = ntohl(*(__be32 *)(h + offset));
639 			offset += 4;
640 		}
641 	}
642 
643 	gre_proto = *(__be16 *)(h + 2);
644 
645 	tunnel = ipgre_tunnel_lookup(skb->dev,
646 				     iph->saddr, iph->daddr, flags, key,
647 				     gre_proto);
648 	if (tunnel) {
649 		struct pcpu_tstats *tstats;
650 
651 		secpath_reset(skb);
652 
653 		skb->protocol = gre_proto;
654 		/* WCCP version 1 and 2 protocol decoding.
655 		 * - Change protocol to IP
656 		 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
657 		 */
658 		if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
659 			skb->protocol = htons(ETH_P_IP);
660 			if ((*(h + offset) & 0xF0) != 0x40)
661 				offset += 4;
662 		}
663 
664 		skb->mac_header = skb->network_header;
665 		__pskb_pull(skb, offset);
666 		skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
667 		skb->pkt_type = PACKET_HOST;
668 #ifdef CONFIG_NET_IPGRE_BROADCAST
669 		if (ipv4_is_multicast(iph->daddr)) {
670 			/* Looped back packet, drop it! */
671 			if (rt_is_output_route(skb_rtable(skb)))
672 				goto drop;
673 			tunnel->dev->stats.multicast++;
674 			skb->pkt_type = PACKET_BROADCAST;
675 		}
676 #endif
677 
678 		if (((flags&GRE_CSUM) && csum) ||
679 		    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
680 			tunnel->dev->stats.rx_crc_errors++;
681 			tunnel->dev->stats.rx_errors++;
682 			goto drop;
683 		}
684 		if (tunnel->parms.i_flags&GRE_SEQ) {
685 			if (!(flags&GRE_SEQ) ||
686 			    (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
687 				tunnel->dev->stats.rx_fifo_errors++;
688 				tunnel->dev->stats.rx_errors++;
689 				goto drop;
690 			}
691 			tunnel->i_seqno = seqno + 1;
692 		}
693 
694 		/* Warning: All skb pointers will be invalidated! */
695 		if (tunnel->dev->type == ARPHRD_ETHER) {
696 			if (!pskb_may_pull(skb, ETH_HLEN)) {
697 				tunnel->dev->stats.rx_length_errors++;
698 				tunnel->dev->stats.rx_errors++;
699 				goto drop;
700 			}
701 
702 			iph = ip_hdr(skb);
703 			skb->protocol = eth_type_trans(skb, tunnel->dev);
704 			skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
705 		}
706 
707 		__skb_tunnel_rx(skb, tunnel->dev);
708 
709 		skb_reset_network_header(skb);
710 		err = IP_ECN_decapsulate(iph, skb);
711 		if (unlikely(err)) {
712 			if (log_ecn_error)
713 				net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
714 						     &iph->saddr, iph->tos);
715 			if (err > 1) {
716 				++tunnel->dev->stats.rx_frame_errors;
717 				++tunnel->dev->stats.rx_errors;
718 				goto drop;
719 			}
720 		}
721 
722 		tstats = this_cpu_ptr(tunnel->dev->tstats);
723 		u64_stats_update_begin(&tstats->syncp);
724 		tstats->rx_packets++;
725 		tstats->rx_bytes += skb->len;
726 		u64_stats_update_end(&tstats->syncp);
727 
728 		gro_cells_receive(&tunnel->gro_cells, skb);
729 		return 0;
730 	}
731 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
732 
733 drop:
734 	kfree_skb(skb);
735 	return 0;
736 }
737 
738 static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff *skb)
739 {
740 	int err;
741 
742 	if (skb_is_gso(skb)) {
743 		err = skb_unclone(skb, GFP_ATOMIC);
744 		if (unlikely(err))
745 			goto error;
746 		skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
747 		return skb;
748 	} else if (skb->ip_summed == CHECKSUM_PARTIAL &&
749 		   tunnel->parms.o_flags&GRE_CSUM) {
750 		err = skb_checksum_help(skb);
751 		if (unlikely(err))
752 			goto error;
753 	} else if (skb->ip_summed != CHECKSUM_PARTIAL)
754 		skb->ip_summed = CHECKSUM_NONE;
755 
756 	return skb;
757 
758 error:
759 	kfree_skb(skb);
760 	return ERR_PTR(err);
761 }
762 
763 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
764 {
765 	struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats);
766 	struct ip_tunnel *tunnel = netdev_priv(dev);
767 	const struct iphdr  *old_iph;
768 	const struct iphdr  *tiph;
769 	struct flowi4 fl4;
770 	u8     tos;
771 	__be16 df;
772 	struct rtable *rt;     			/* Route to the other host */
773 	struct net_device *tdev;		/* Device to other host */
774 	struct iphdr  *iph;			/* Our new IP header */
775 	unsigned int max_headroom;		/* The extra header space needed */
776 	int    gre_hlen;
777 	__be32 dst;
778 	int    mtu;
779 	u8     ttl;
780 	int    err;
781 	int    pkt_len;
782 
783 	skb = handle_offloads(tunnel, skb);
784 	if (IS_ERR(skb)) {
785 		dev->stats.tx_dropped++;
786 		return NETDEV_TX_OK;
787 	}
788 
789 	if (!skb->encapsulation) {
790 		skb_reset_inner_headers(skb);
791 		skb->encapsulation = 1;
792 	}
793 
794 	old_iph = ip_hdr(skb);
795 
796 	if (dev->type == ARPHRD_ETHER)
797 		IPCB(skb)->flags = 0;
798 
799 	if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
800 		gre_hlen = 0;
801 		tiph = (const struct iphdr *)skb->data;
802 	} else {
803 		gre_hlen = tunnel->hlen;
804 		tiph = &tunnel->parms.iph;
805 	}
806 
807 	if ((dst = tiph->daddr) == 0) {
808 		/* NBMA tunnel */
809 
810 		if (skb_dst(skb) == NULL) {
811 			dev->stats.tx_fifo_errors++;
812 			goto tx_error;
813 		}
814 
815 		if (skb->protocol == htons(ETH_P_IP)) {
816 			rt = skb_rtable(skb);
817 			dst = rt_nexthop(rt, old_iph->daddr);
818 		}
819 #if IS_ENABLED(CONFIG_IPV6)
820 		else if (skb->protocol == htons(ETH_P_IPV6)) {
821 			const struct in6_addr *addr6;
822 			struct neighbour *neigh;
823 			bool do_tx_error_icmp;
824 			int addr_type;
825 
826 			neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
827 			if (neigh == NULL)
828 				goto tx_error;
829 
830 			addr6 = (const struct in6_addr *)&neigh->primary_key;
831 			addr_type = ipv6_addr_type(addr6);
832 
833 			if (addr_type == IPV6_ADDR_ANY) {
834 				addr6 = &ipv6_hdr(skb)->daddr;
835 				addr_type = ipv6_addr_type(addr6);
836 			}
837 
838 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
839 				do_tx_error_icmp = true;
840 			else {
841 				do_tx_error_icmp = false;
842 				dst = addr6->s6_addr32[3];
843 			}
844 			neigh_release(neigh);
845 			if (do_tx_error_icmp)
846 				goto tx_error_icmp;
847 		}
848 #endif
849 		else
850 			goto tx_error;
851 	}
852 
853 	ttl = tiph->ttl;
854 	tos = tiph->tos;
855 	if (tos & 0x1) {
856 		tos &= ~0x1;
857 		if (skb->protocol == htons(ETH_P_IP))
858 			tos = old_iph->tos;
859 		else if (skb->protocol == htons(ETH_P_IPV6))
860 			tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
861 	}
862 
863 	rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
864 				 tunnel->parms.o_key, RT_TOS(tos),
865 				 tunnel->parms.link);
866 	if (IS_ERR(rt)) {
867 		dev->stats.tx_carrier_errors++;
868 		goto tx_error;
869 	}
870 	tdev = rt->dst.dev;
871 
872 	if (tdev == dev) {
873 		ip_rt_put(rt);
874 		dev->stats.collisions++;
875 		goto tx_error;
876 	}
877 
878 	df = tiph->frag_off;
879 	if (df)
880 		mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
881 	else
882 		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
883 
884 	if (skb_dst(skb))
885 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
886 
887 	if (skb->protocol == htons(ETH_P_IP)) {
888 		df |= (old_iph->frag_off&htons(IP_DF));
889 
890 		if (!skb_is_gso(skb) &&
891 		    (old_iph->frag_off&htons(IP_DF)) &&
892 		    mtu < ntohs(old_iph->tot_len)) {
893 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
894 			ip_rt_put(rt);
895 			goto tx_error;
896 		}
897 	}
898 #if IS_ENABLED(CONFIG_IPV6)
899 	else if (skb->protocol == htons(ETH_P_IPV6)) {
900 		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
901 
902 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
903 			if ((tunnel->parms.iph.daddr &&
904 			     !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
905 			    rt6->rt6i_dst.plen == 128) {
906 				rt6->rt6i_flags |= RTF_MODIFIED;
907 				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
908 			}
909 		}
910 
911 		if (!skb_is_gso(skb) &&
912 		    mtu >= IPV6_MIN_MTU &&
913 		    mtu < skb->len - tunnel->hlen + gre_hlen) {
914 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
915 			ip_rt_put(rt);
916 			goto tx_error;
917 		}
918 	}
919 #endif
920 
921 	if (tunnel->err_count > 0) {
922 		if (time_before(jiffies,
923 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
924 			tunnel->err_count--;
925 
926 			dst_link_failure(skb);
927 		} else
928 			tunnel->err_count = 0;
929 	}
930 
931 	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
932 
933 	if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
934 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
935 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
936 		if (max_headroom > dev->needed_headroom)
937 			dev->needed_headroom = max_headroom;
938 		if (!new_skb) {
939 			ip_rt_put(rt);
940 			dev->stats.tx_dropped++;
941 			dev_kfree_skb(skb);
942 			return NETDEV_TX_OK;
943 		}
944 		if (skb->sk)
945 			skb_set_owner_w(new_skb, skb->sk);
946 		dev_kfree_skb(skb);
947 		skb = new_skb;
948 		old_iph = ip_hdr(skb);
949 		/* Warning : tiph value might point to freed memory */
950 	}
951 
952 	skb_push(skb, gre_hlen);
953 	skb_reset_network_header(skb);
954 	skb_set_transport_header(skb, sizeof(*iph));
955 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
956 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
957 			      IPSKB_REROUTED);
958 	skb_dst_drop(skb);
959 	skb_dst_set(skb, &rt->dst);
960 
961 	/*
962 	 *	Push down and install the IPIP header.
963 	 */
964 
965 	iph 			=	ip_hdr(skb);
966 	iph->version		=	4;
967 	iph->ihl		=	sizeof(struct iphdr) >> 2;
968 	iph->frag_off		=	df;
969 	iph->protocol		=	IPPROTO_GRE;
970 	iph->tos		=	ipgre_ecn_encapsulate(tos, old_iph, skb);
971 	iph->daddr		=	fl4.daddr;
972 	iph->saddr		=	fl4.saddr;
973 	iph->ttl		=	ttl;
974 
975 	tunnel_ip_select_ident(skb, old_iph, &rt->dst);
976 
977 	if (ttl == 0) {
978 		if (skb->protocol == htons(ETH_P_IP))
979 			iph->ttl = old_iph->ttl;
980 #if IS_ENABLED(CONFIG_IPV6)
981 		else if (skb->protocol == htons(ETH_P_IPV6))
982 			iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
983 #endif
984 		else
985 			iph->ttl = ip4_dst_hoplimit(&rt->dst);
986 	}
987 
988 	((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
989 	((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
990 				   htons(ETH_P_TEB) : skb->protocol;
991 
992 	if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
993 		__be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
994 
995 		if (tunnel->parms.o_flags&GRE_SEQ) {
996 			++tunnel->o_seqno;
997 			*ptr = htonl(tunnel->o_seqno);
998 			ptr--;
999 		}
1000 		if (tunnel->parms.o_flags&GRE_KEY) {
1001 			*ptr = tunnel->parms.o_key;
1002 			ptr--;
1003 		}
1004 		/* Skip GRE checksum if skb is getting offloaded. */
1005 		if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) &&
1006 		    (tunnel->parms.o_flags&GRE_CSUM)) {
1007 			int offset = skb_transport_offset(skb);
1008 
1009 			if (skb_has_shared_frag(skb)) {
1010 				err = __skb_linearize(skb);
1011 				if (err)
1012 					goto tx_error;
1013 			}
1014 
1015 			*ptr = 0;
1016 			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset,
1017 								 skb->len - offset,
1018 								 0));
1019 		}
1020 	}
1021 
1022 	nf_reset(skb);
1023 
1024 	pkt_len = skb->len - skb_transport_offset(skb);
1025 	err = ip_local_out(skb);
1026 	if (likely(net_xmit_eval(err) == 0)) {
1027 		u64_stats_update_begin(&tstats->syncp);
1028 		tstats->tx_bytes += pkt_len;
1029 		tstats->tx_packets++;
1030 		u64_stats_update_end(&tstats->syncp);
1031 	} else {
1032 		dev->stats.tx_errors++;
1033 		dev->stats.tx_aborted_errors++;
1034 	}
1035 	return NETDEV_TX_OK;
1036 
1037 #if IS_ENABLED(CONFIG_IPV6)
1038 tx_error_icmp:
1039 	dst_link_failure(skb);
1040 #endif
1041 tx_error:
1042 	dev->stats.tx_errors++;
1043 	dev_kfree_skb(skb);
1044 	return NETDEV_TX_OK;
1045 }
1046 
1047 static int ipgre_tunnel_bind_dev(struct net_device *dev)
1048 {
1049 	struct net_device *tdev = NULL;
1050 	struct ip_tunnel *tunnel;
1051 	const struct iphdr *iph;
1052 	int hlen = LL_MAX_HEADER;
1053 	int mtu = ETH_DATA_LEN;
1054 	int addend = sizeof(struct iphdr) + 4;
1055 
1056 	tunnel = netdev_priv(dev);
1057 	iph = &tunnel->parms.iph;
1058 
1059 	/* Guess output device to choose reasonable mtu and needed_headroom */
1060 
1061 	if (iph->daddr) {
1062 		struct flowi4 fl4;
1063 		struct rtable *rt;
1064 
1065 		rt = ip_route_output_gre(dev_net(dev), &fl4,
1066 					 iph->daddr, iph->saddr,
1067 					 tunnel->parms.o_key,
1068 					 RT_TOS(iph->tos),
1069 					 tunnel->parms.link);
1070 		if (!IS_ERR(rt)) {
1071 			tdev = rt->dst.dev;
1072 			ip_rt_put(rt);
1073 		}
1074 
1075 		if (dev->type != ARPHRD_ETHER)
1076 			dev->flags |= IFF_POINTOPOINT;
1077 	}
1078 
1079 	if (!tdev && tunnel->parms.link)
1080 		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
1081 
1082 	if (tdev) {
1083 		hlen = tdev->hard_header_len + tdev->needed_headroom;
1084 		mtu = tdev->mtu;
1085 	}
1086 	dev->iflink = tunnel->parms.link;
1087 
1088 	/* Precalculate GRE options length */
1089 	if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1090 		if (tunnel->parms.o_flags&GRE_CSUM)
1091 			addend += 4;
1092 		if (tunnel->parms.o_flags&GRE_KEY)
1093 			addend += 4;
1094 		if (tunnel->parms.o_flags&GRE_SEQ)
1095 			addend += 4;
1096 	}
1097 	dev->needed_headroom = addend + hlen;
1098 	mtu -= dev->hard_header_len + addend;
1099 
1100 	if (mtu < 68)
1101 		mtu = 68;
1102 
1103 	tunnel->hlen = addend;
1104 	/* TCP offload with GRE SEQ is not supported. */
1105 	if (!(tunnel->parms.o_flags & GRE_SEQ)) {
1106 		dev->features		|= NETIF_F_GSO_SOFTWARE;
1107 		dev->hw_features	|= NETIF_F_GSO_SOFTWARE;
1108 	}
1109 
1110 	return mtu;
1111 }
1112 
1113 static int
1114 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1115 {
1116 	int err = 0;
1117 	struct ip_tunnel_parm p;
1118 	struct ip_tunnel *t;
1119 	struct net *net = dev_net(dev);
1120 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1121 
1122 	switch (cmd) {
1123 	case SIOCGETTUNNEL:
1124 		t = NULL;
1125 		if (dev == ign->fb_tunnel_dev) {
1126 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1127 				err = -EFAULT;
1128 				break;
1129 			}
1130 			t = ipgre_tunnel_locate(net, &p, 0);
1131 		}
1132 		if (t == NULL)
1133 			t = netdev_priv(dev);
1134 		memcpy(&p, &t->parms, sizeof(p));
1135 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1136 			err = -EFAULT;
1137 		break;
1138 
1139 	case SIOCADDTUNNEL:
1140 	case SIOCCHGTUNNEL:
1141 		err = -EPERM;
1142 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1143 			goto done;
1144 
1145 		err = -EFAULT;
1146 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1147 			goto done;
1148 
1149 		err = -EINVAL;
1150 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1151 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1152 		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1153 			goto done;
1154 		if (p.iph.ttl)
1155 			p.iph.frag_off |= htons(IP_DF);
1156 
1157 		if (!(p.i_flags&GRE_KEY))
1158 			p.i_key = 0;
1159 		if (!(p.o_flags&GRE_KEY))
1160 			p.o_key = 0;
1161 
1162 		t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1163 
1164 		if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1165 			if (t != NULL) {
1166 				if (t->dev != dev) {
1167 					err = -EEXIST;
1168 					break;
1169 				}
1170 			} else {
1171 				unsigned int nflags = 0;
1172 
1173 				t = netdev_priv(dev);
1174 
1175 				if (ipv4_is_multicast(p.iph.daddr))
1176 					nflags = IFF_BROADCAST;
1177 				else if (p.iph.daddr)
1178 					nflags = IFF_POINTOPOINT;
1179 
1180 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1181 					err = -EINVAL;
1182 					break;
1183 				}
1184 				ipgre_tunnel_unlink(ign, t);
1185 				synchronize_net();
1186 				t->parms.iph.saddr = p.iph.saddr;
1187 				t->parms.iph.daddr = p.iph.daddr;
1188 				t->parms.i_key = p.i_key;
1189 				t->parms.o_key = p.o_key;
1190 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
1191 				memcpy(dev->broadcast, &p.iph.daddr, 4);
1192 				ipgre_tunnel_link(ign, t);
1193 				netdev_state_change(dev);
1194 			}
1195 		}
1196 
1197 		if (t) {
1198 			err = 0;
1199 			if (cmd == SIOCCHGTUNNEL) {
1200 				t->parms.iph.ttl = p.iph.ttl;
1201 				t->parms.iph.tos = p.iph.tos;
1202 				t->parms.iph.frag_off = p.iph.frag_off;
1203 				if (t->parms.link != p.link) {
1204 					t->parms.link = p.link;
1205 					dev->mtu = ipgre_tunnel_bind_dev(dev);
1206 					netdev_state_change(dev);
1207 				}
1208 			}
1209 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1210 				err = -EFAULT;
1211 		} else
1212 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1213 		break;
1214 
1215 	case SIOCDELTUNNEL:
1216 		err = -EPERM;
1217 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1218 			goto done;
1219 
1220 		if (dev == ign->fb_tunnel_dev) {
1221 			err = -EFAULT;
1222 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1223 				goto done;
1224 			err = -ENOENT;
1225 			if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1226 				goto done;
1227 			err = -EPERM;
1228 			if (t == netdev_priv(ign->fb_tunnel_dev))
1229 				goto done;
1230 			dev = t->dev;
1231 		}
1232 		unregister_netdevice(dev);
1233 		err = 0;
1234 		break;
1235 
1236 	default:
1237 		err = -EINVAL;
1238 	}
1239 
1240 done:
1241 	return err;
1242 }
1243 
1244 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1245 {
1246 	struct ip_tunnel *tunnel = netdev_priv(dev);
1247 	if (new_mtu < 68 ||
1248 	    new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1249 		return -EINVAL;
1250 	dev->mtu = new_mtu;
1251 	return 0;
1252 }
1253 
1254 /* Nice toy. Unfortunately, useless in real life :-)
1255    It allows to construct virtual multiprotocol broadcast "LAN"
1256    over the Internet, provided multicast routing is tuned.
1257 
1258 
1259    I have no idea was this bicycle invented before me,
1260    so that I had to set ARPHRD_IPGRE to a random value.
1261    I have an impression, that Cisco could make something similar,
1262    but this feature is apparently missing in IOS<=11.2(8).
1263 
1264    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1265    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1266 
1267    ping -t 255 224.66.66.66
1268 
1269    If nobody answers, mbone does not work.
1270 
1271    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1272    ip addr add 10.66.66.<somewhat>/24 dev Universe
1273    ifconfig Universe up
1274    ifconfig Universe add fe80::<Your_real_addr>/10
1275    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1276    ftp 10.66.66.66
1277    ...
1278    ftp fec0:6666:6666::193.233.7.65
1279    ...
1280 
1281  */
1282 
1283 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1284 			unsigned short type,
1285 			const void *daddr, const void *saddr, unsigned int len)
1286 {
1287 	struct ip_tunnel *t = netdev_priv(dev);
1288 	struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1289 	__be16 *p = (__be16 *)(iph+1);
1290 
1291 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1292 	p[0]		= t->parms.o_flags;
1293 	p[1]		= htons(type);
1294 
1295 	/*
1296 	 *	Set the source hardware address.
1297 	 */
1298 
1299 	if (saddr)
1300 		memcpy(&iph->saddr, saddr, 4);
1301 	if (daddr)
1302 		memcpy(&iph->daddr, daddr, 4);
1303 	if (iph->daddr)
1304 		return t->hlen;
1305 
1306 	return -t->hlen;
1307 }
1308 
1309 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1310 {
1311 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
1312 	memcpy(haddr, &iph->saddr, 4);
1313 	return 4;
1314 }
1315 
1316 static const struct header_ops ipgre_header_ops = {
1317 	.create	= ipgre_header,
1318 	.parse	= ipgre_header_parse,
1319 };
1320 
1321 #ifdef CONFIG_NET_IPGRE_BROADCAST
1322 static int ipgre_open(struct net_device *dev)
1323 {
1324 	struct ip_tunnel *t = netdev_priv(dev);
1325 
1326 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
1327 		struct flowi4 fl4;
1328 		struct rtable *rt;
1329 
1330 		rt = ip_route_output_gre(dev_net(dev), &fl4,
1331 					 t->parms.iph.daddr,
1332 					 t->parms.iph.saddr,
1333 					 t->parms.o_key,
1334 					 RT_TOS(t->parms.iph.tos),
1335 					 t->parms.link);
1336 		if (IS_ERR(rt))
1337 			return -EADDRNOTAVAIL;
1338 		dev = rt->dst.dev;
1339 		ip_rt_put(rt);
1340 		if (__in_dev_get_rtnl(dev) == NULL)
1341 			return -EADDRNOTAVAIL;
1342 		t->mlink = dev->ifindex;
1343 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1344 	}
1345 	return 0;
1346 }
1347 
1348 static int ipgre_close(struct net_device *dev)
1349 {
1350 	struct ip_tunnel *t = netdev_priv(dev);
1351 
1352 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1353 		struct in_device *in_dev;
1354 		in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1355 		if (in_dev)
1356 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1357 	}
1358 	return 0;
1359 }
1360 
1361 #endif
1362 
1363 static const struct net_device_ops ipgre_netdev_ops = {
1364 	.ndo_init		= ipgre_tunnel_init,
1365 	.ndo_uninit		= ipgre_tunnel_uninit,
1366 #ifdef CONFIG_NET_IPGRE_BROADCAST
1367 	.ndo_open		= ipgre_open,
1368 	.ndo_stop		= ipgre_close,
1369 #endif
1370 	.ndo_start_xmit		= ipgre_tunnel_xmit,
1371 	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
1372 	.ndo_change_mtu		= ipgre_tunnel_change_mtu,
1373 	.ndo_get_stats64	= ipgre_get_stats64,
1374 };
1375 
1376 static void ipgre_dev_free(struct net_device *dev)
1377 {
1378 	struct ip_tunnel *tunnel = netdev_priv(dev);
1379 
1380 	gro_cells_destroy(&tunnel->gro_cells);
1381 	free_percpu(dev->tstats);
1382 	free_netdev(dev);
1383 }
1384 
1385 #define GRE_FEATURES (NETIF_F_SG |		\
1386 		      NETIF_F_FRAGLIST |	\
1387 		      NETIF_F_HIGHDMA |		\
1388 		      NETIF_F_HW_CSUM)
1389 
1390 static void ipgre_tunnel_setup(struct net_device *dev)
1391 {
1392 	dev->netdev_ops		= &ipgre_netdev_ops;
1393 	dev->destructor 	= ipgre_dev_free;
1394 
1395 	dev->type		= ARPHRD_IPGRE;
1396 	dev->needed_headroom 	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1397 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1398 	dev->flags		= IFF_NOARP;
1399 	dev->iflink		= 0;
1400 	dev->addr_len		= 4;
1401 	dev->features		|= NETIF_F_NETNS_LOCAL;
1402 	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
1403 
1404 	dev->features		|= GRE_FEATURES;
1405 	dev->hw_features	|= GRE_FEATURES;
1406 }
1407 
1408 static int ipgre_tunnel_init(struct net_device *dev)
1409 {
1410 	struct ip_tunnel *tunnel;
1411 	struct iphdr *iph;
1412 	int err;
1413 
1414 	tunnel = netdev_priv(dev);
1415 	iph = &tunnel->parms.iph;
1416 
1417 	tunnel->dev = dev;
1418 	strcpy(tunnel->parms.name, dev->name);
1419 
1420 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1421 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1422 
1423 	if (iph->daddr) {
1424 #ifdef CONFIG_NET_IPGRE_BROADCAST
1425 		if (ipv4_is_multicast(iph->daddr)) {
1426 			if (!iph->saddr)
1427 				return -EINVAL;
1428 			dev->flags = IFF_BROADCAST;
1429 			dev->header_ops = &ipgre_header_ops;
1430 		}
1431 #endif
1432 	} else
1433 		dev->header_ops = &ipgre_header_ops;
1434 
1435 	dev->tstats = alloc_percpu(struct pcpu_tstats);
1436 	if (!dev->tstats)
1437 		return -ENOMEM;
1438 
1439 	err = gro_cells_init(&tunnel->gro_cells, dev);
1440 	if (err) {
1441 		free_percpu(dev->tstats);
1442 		return err;
1443 	}
1444 
1445 	return 0;
1446 }
1447 
1448 static void ipgre_fb_tunnel_init(struct net_device *dev)
1449 {
1450 	struct ip_tunnel *tunnel = netdev_priv(dev);
1451 	struct iphdr *iph = &tunnel->parms.iph;
1452 
1453 	tunnel->dev = dev;
1454 	strcpy(tunnel->parms.name, dev->name);
1455 
1456 	iph->version		= 4;
1457 	iph->protocol		= IPPROTO_GRE;
1458 	iph->ihl		= 5;
1459 	tunnel->hlen		= sizeof(struct iphdr) + 4;
1460 
1461 	dev_hold(dev);
1462 }
1463 
1464 
1465 static const struct gre_protocol ipgre_protocol = {
1466 	.handler     = ipgre_rcv,
1467 	.err_handler = ipgre_err,
1468 };
1469 
1470 static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1471 {
1472 	int prio;
1473 
1474 	for (prio = 0; prio < 4; prio++) {
1475 		int h;
1476 		for (h = 0; h < HASH_SIZE; h++) {
1477 			struct ip_tunnel *t;
1478 
1479 			t = rtnl_dereference(ign->tunnels[prio][h]);
1480 
1481 			while (t != NULL) {
1482 				unregister_netdevice_queue(t->dev, head);
1483 				t = rtnl_dereference(t->next);
1484 			}
1485 		}
1486 	}
1487 }
1488 
1489 static int __net_init ipgre_init_net(struct net *net)
1490 {
1491 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1492 	int err;
1493 
1494 	ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1495 					   ipgre_tunnel_setup);
1496 	if (!ign->fb_tunnel_dev) {
1497 		err = -ENOMEM;
1498 		goto err_alloc_dev;
1499 	}
1500 	dev_net_set(ign->fb_tunnel_dev, net);
1501 
1502 	ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1503 	ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1504 
1505 	if ((err = register_netdev(ign->fb_tunnel_dev)))
1506 		goto err_reg_dev;
1507 
1508 	rcu_assign_pointer(ign->tunnels_wc[0],
1509 			   netdev_priv(ign->fb_tunnel_dev));
1510 	return 0;
1511 
1512 err_reg_dev:
1513 	ipgre_dev_free(ign->fb_tunnel_dev);
1514 err_alloc_dev:
1515 	return err;
1516 }
1517 
1518 static void __net_exit ipgre_exit_net(struct net *net)
1519 {
1520 	struct ipgre_net *ign;
1521 	LIST_HEAD(list);
1522 
1523 	ign = net_generic(net, ipgre_net_id);
1524 	rtnl_lock();
1525 	ipgre_destroy_tunnels(ign, &list);
1526 	unregister_netdevice_many(&list);
1527 	rtnl_unlock();
1528 }
1529 
1530 static struct pernet_operations ipgre_net_ops = {
1531 	.init = ipgre_init_net,
1532 	.exit = ipgre_exit_net,
1533 	.id   = &ipgre_net_id,
1534 	.size = sizeof(struct ipgre_net),
1535 };
1536 
1537 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1538 {
1539 	__be16 flags;
1540 
1541 	if (!data)
1542 		return 0;
1543 
1544 	flags = 0;
1545 	if (data[IFLA_GRE_IFLAGS])
1546 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1547 	if (data[IFLA_GRE_OFLAGS])
1548 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1549 	if (flags & (GRE_VERSION|GRE_ROUTING))
1550 		return -EINVAL;
1551 
1552 	return 0;
1553 }
1554 
1555 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1556 {
1557 	__be32 daddr;
1558 
1559 	if (tb[IFLA_ADDRESS]) {
1560 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1561 			return -EINVAL;
1562 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1563 			return -EADDRNOTAVAIL;
1564 	}
1565 
1566 	if (!data)
1567 		goto out;
1568 
1569 	if (data[IFLA_GRE_REMOTE]) {
1570 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1571 		if (!daddr)
1572 			return -EINVAL;
1573 	}
1574 
1575 out:
1576 	return ipgre_tunnel_validate(tb, data);
1577 }
1578 
1579 static void ipgre_netlink_parms(struct nlattr *data[],
1580 				struct ip_tunnel_parm *parms)
1581 {
1582 	memset(parms, 0, sizeof(*parms));
1583 
1584 	parms->iph.protocol = IPPROTO_GRE;
1585 
1586 	if (!data)
1587 		return;
1588 
1589 	if (data[IFLA_GRE_LINK])
1590 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1591 
1592 	if (data[IFLA_GRE_IFLAGS])
1593 		parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1594 
1595 	if (data[IFLA_GRE_OFLAGS])
1596 		parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1597 
1598 	if (data[IFLA_GRE_IKEY])
1599 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1600 
1601 	if (data[IFLA_GRE_OKEY])
1602 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1603 
1604 	if (data[IFLA_GRE_LOCAL])
1605 		parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1606 
1607 	if (data[IFLA_GRE_REMOTE])
1608 		parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1609 
1610 	if (data[IFLA_GRE_TTL])
1611 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1612 
1613 	if (data[IFLA_GRE_TOS])
1614 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1615 
1616 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1617 		parms->iph.frag_off = htons(IP_DF);
1618 }
1619 
1620 static int ipgre_tap_init(struct net_device *dev)
1621 {
1622 	struct ip_tunnel *tunnel;
1623 
1624 	tunnel = netdev_priv(dev);
1625 
1626 	tunnel->dev = dev;
1627 	strcpy(tunnel->parms.name, dev->name);
1628 
1629 	ipgre_tunnel_bind_dev(dev);
1630 
1631 	dev->tstats = alloc_percpu(struct pcpu_tstats);
1632 	if (!dev->tstats)
1633 		return -ENOMEM;
1634 
1635 	return 0;
1636 }
1637 
1638 static const struct net_device_ops ipgre_tap_netdev_ops = {
1639 	.ndo_init		= ipgre_tap_init,
1640 	.ndo_uninit		= ipgre_tunnel_uninit,
1641 	.ndo_start_xmit		= ipgre_tunnel_xmit,
1642 	.ndo_set_mac_address 	= eth_mac_addr,
1643 	.ndo_validate_addr	= eth_validate_addr,
1644 	.ndo_change_mtu		= ipgre_tunnel_change_mtu,
1645 	.ndo_get_stats64	= ipgre_get_stats64,
1646 };
1647 
1648 static void ipgre_tap_setup(struct net_device *dev)
1649 {
1650 
1651 	ether_setup(dev);
1652 
1653 	dev->netdev_ops		= &ipgre_tap_netdev_ops;
1654 	dev->destructor 	= ipgre_dev_free;
1655 
1656 	dev->iflink		= 0;
1657 	dev->features		|= NETIF_F_NETNS_LOCAL;
1658 
1659 	dev->features		|= GRE_FEATURES;
1660 	dev->hw_features	|= GRE_FEATURES;
1661 }
1662 
1663 static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1664 			 struct nlattr *data[])
1665 {
1666 	struct ip_tunnel *nt;
1667 	struct net *net = dev_net(dev);
1668 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1669 	int mtu;
1670 	int err;
1671 
1672 	nt = netdev_priv(dev);
1673 	ipgre_netlink_parms(data, &nt->parms);
1674 
1675 	if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1676 		return -EEXIST;
1677 
1678 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1679 		eth_hw_addr_random(dev);
1680 
1681 	mtu = ipgre_tunnel_bind_dev(dev);
1682 	if (!tb[IFLA_MTU])
1683 		dev->mtu = mtu;
1684 
1685 	/* Can use a lockless transmit, unless we generate output sequences */
1686 	if (!(nt->parms.o_flags & GRE_SEQ))
1687 		dev->features |= NETIF_F_LLTX;
1688 
1689 	err = register_netdevice(dev);
1690 	if (err)
1691 		goto out;
1692 
1693 	dev_hold(dev);
1694 	ipgre_tunnel_link(ign, nt);
1695 
1696 out:
1697 	return err;
1698 }
1699 
1700 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1701 			    struct nlattr *data[])
1702 {
1703 	struct ip_tunnel *t, *nt;
1704 	struct net *net = dev_net(dev);
1705 	struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1706 	struct ip_tunnel_parm p;
1707 	int mtu;
1708 
1709 	if (dev == ign->fb_tunnel_dev)
1710 		return -EINVAL;
1711 
1712 	nt = netdev_priv(dev);
1713 	ipgre_netlink_parms(data, &p);
1714 
1715 	t = ipgre_tunnel_locate(net, &p, 0);
1716 
1717 	if (t) {
1718 		if (t->dev != dev)
1719 			return -EEXIST;
1720 	} else {
1721 		t = nt;
1722 
1723 		if (dev->type != ARPHRD_ETHER) {
1724 			unsigned int nflags = 0;
1725 
1726 			if (ipv4_is_multicast(p.iph.daddr))
1727 				nflags = IFF_BROADCAST;
1728 			else if (p.iph.daddr)
1729 				nflags = IFF_POINTOPOINT;
1730 
1731 			if ((dev->flags ^ nflags) &
1732 			    (IFF_POINTOPOINT | IFF_BROADCAST))
1733 				return -EINVAL;
1734 		}
1735 
1736 		ipgre_tunnel_unlink(ign, t);
1737 		t->parms.iph.saddr = p.iph.saddr;
1738 		t->parms.iph.daddr = p.iph.daddr;
1739 		t->parms.i_key = p.i_key;
1740 		if (dev->type != ARPHRD_ETHER) {
1741 			memcpy(dev->dev_addr, &p.iph.saddr, 4);
1742 			memcpy(dev->broadcast, &p.iph.daddr, 4);
1743 		}
1744 		ipgre_tunnel_link(ign, t);
1745 		netdev_state_change(dev);
1746 	}
1747 
1748 	t->parms.o_key = p.o_key;
1749 	t->parms.iph.ttl = p.iph.ttl;
1750 	t->parms.iph.tos = p.iph.tos;
1751 	t->parms.iph.frag_off = p.iph.frag_off;
1752 
1753 	if (t->parms.link != p.link) {
1754 		t->parms.link = p.link;
1755 		mtu = ipgre_tunnel_bind_dev(dev);
1756 		if (!tb[IFLA_MTU])
1757 			dev->mtu = mtu;
1758 		netdev_state_change(dev);
1759 	}
1760 
1761 	return 0;
1762 }
1763 
1764 static size_t ipgre_get_size(const struct net_device *dev)
1765 {
1766 	return
1767 		/* IFLA_GRE_LINK */
1768 		nla_total_size(4) +
1769 		/* IFLA_GRE_IFLAGS */
1770 		nla_total_size(2) +
1771 		/* IFLA_GRE_OFLAGS */
1772 		nla_total_size(2) +
1773 		/* IFLA_GRE_IKEY */
1774 		nla_total_size(4) +
1775 		/* IFLA_GRE_OKEY */
1776 		nla_total_size(4) +
1777 		/* IFLA_GRE_LOCAL */
1778 		nla_total_size(4) +
1779 		/* IFLA_GRE_REMOTE */
1780 		nla_total_size(4) +
1781 		/* IFLA_GRE_TTL */
1782 		nla_total_size(1) +
1783 		/* IFLA_GRE_TOS */
1784 		nla_total_size(1) +
1785 		/* IFLA_GRE_PMTUDISC */
1786 		nla_total_size(1) +
1787 		0;
1788 }
1789 
1790 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1791 {
1792 	struct ip_tunnel *t = netdev_priv(dev);
1793 	struct ip_tunnel_parm *p = &t->parms;
1794 
1795 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1796 	    nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
1797 	    nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
1798 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1799 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1800 	    nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1801 	    nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1802 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1803 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1804 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1805 		       !!(p->iph.frag_off & htons(IP_DF))))
1806 		goto nla_put_failure;
1807 	return 0;
1808 
1809 nla_put_failure:
1810 	return -EMSGSIZE;
1811 }
1812 
1813 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1814 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1815 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1816 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1817 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1818 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1819 	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1820 	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1821 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1822 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1823 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1824 };
1825 
1826 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1827 	.kind		= "gre",
1828 	.maxtype	= IFLA_GRE_MAX,
1829 	.policy		= ipgre_policy,
1830 	.priv_size	= sizeof(struct ip_tunnel),
1831 	.setup		= ipgre_tunnel_setup,
1832 	.validate	= ipgre_tunnel_validate,
1833 	.newlink	= ipgre_newlink,
1834 	.changelink	= ipgre_changelink,
1835 	.get_size	= ipgre_get_size,
1836 	.fill_info	= ipgre_fill_info,
1837 };
1838 
1839 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1840 	.kind		= "gretap",
1841 	.maxtype	= IFLA_GRE_MAX,
1842 	.policy		= ipgre_policy,
1843 	.priv_size	= sizeof(struct ip_tunnel),
1844 	.setup		= ipgre_tap_setup,
1845 	.validate	= ipgre_tap_validate,
1846 	.newlink	= ipgre_newlink,
1847 	.changelink	= ipgre_changelink,
1848 	.get_size	= ipgre_get_size,
1849 	.fill_info	= ipgre_fill_info,
1850 };
1851 
1852 /*
1853  *	And now the modules code and kernel interface.
1854  */
1855 
1856 static int __init ipgre_init(void)
1857 {
1858 	int err;
1859 
1860 	pr_info("GRE over IPv4 tunneling driver\n");
1861 
1862 	err = register_pernet_device(&ipgre_net_ops);
1863 	if (err < 0)
1864 		return err;
1865 
1866 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1867 	if (err < 0) {
1868 		pr_info("%s: can't add protocol\n", __func__);
1869 		goto add_proto_failed;
1870 	}
1871 
1872 	err = rtnl_link_register(&ipgre_link_ops);
1873 	if (err < 0)
1874 		goto rtnl_link_failed;
1875 
1876 	err = rtnl_link_register(&ipgre_tap_ops);
1877 	if (err < 0)
1878 		goto tap_ops_failed;
1879 
1880 out:
1881 	return err;
1882 
1883 tap_ops_failed:
1884 	rtnl_link_unregister(&ipgre_link_ops);
1885 rtnl_link_failed:
1886 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1887 add_proto_failed:
1888 	unregister_pernet_device(&ipgre_net_ops);
1889 	goto out;
1890 }
1891 
1892 static void __exit ipgre_fini(void)
1893 {
1894 	rtnl_link_unregister(&ipgre_tap_ops);
1895 	rtnl_link_unregister(&ipgre_link_ops);
1896 	if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1897 		pr_info("%s: can't remove protocol\n", __func__);
1898 	unregister_pernet_device(&ipgre_net_ops);
1899 }
1900 
1901 module_init(ipgre_init);
1902 module_exit(ipgre_fini);
1903 MODULE_LICENSE("GPL");
1904 MODULE_ALIAS_RTNL_LINK("gre");
1905 MODULE_ALIAS_RTNL_LINK("gretap");
1906 MODULE_ALIAS_NETDEV("gre0");
1907