xref: /linux/net/ipv4/ip_gre.c (revision 74f1af95820fc2ee580a775a3a17c416db30b38c)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Linux NET3:	GRE over IP protocol decoder.
4  *
5  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6  */
7 
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9 
10 #include <linux/capability.h>
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/uaccess.h>
16 #include <linux/skbuff.h>
17 #include <linux/netdevice.h>
18 #include <linux/in.h>
19 #include <linux/tcp.h>
20 #include <linux/udp.h>
21 #include <linux/if_arp.h>
22 #include <linux/if_vlan.h>
23 #include <linux/init.h>
24 #include <linux/in6.h>
25 #include <linux/inetdevice.h>
26 #include <linux/igmp.h>
27 #include <linux/netfilter_ipv4.h>
28 #include <linux/etherdevice.h>
29 #include <linux/if_ether.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/gre.h>
45 #include <net/dst_metadata.h>
46 #include <net/erspan.h>
47 #include <net/inet_dscp.h>
48 
49 /*
50    Problems & solutions
51    --------------------
52 
53    1. The most important issue is detecting local dead loops.
54    They would cause complete host lockup in transmit, which
55    would be "resolved" by stack overflow or, if queueing is enabled,
56    with infinite looping in net_bh.
57 
58    We cannot track such dead loops during route installation,
59    it is infeasible task. The most general solutions would be
60    to keep skb->encapsulation counter (sort of local ttl),
61    and silently drop packet when it expires. It is a good
62    solution, but it supposes maintaining new variable in ALL
63    skb, even if no tunneling is used.
64 
65    Current solution: xmit_recursion breaks dead loops. This is a percpu
66    counter, since when we enter the first ndo_xmit(), cpu migration is
67    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
68 
69    2. Networking dead loops would not kill routers, but would really
70    kill network. IP hop limit plays role of "t->recursion" in this case,
71    if we copy it from packet being encapsulated to upper header.
72    It is very good solution, but it introduces two problems:
73 
74    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
75      do not work over tunnels.
76    - traceroute does not work. I planned to relay ICMP from tunnel,
77      so that this problem would be solved and traceroute output
78      would even more informative. This idea appeared to be wrong:
79      only Linux complies to rfc1812 now (yes, guys, Linux is the only
80      true router now :-)), all routers (at least, in neighbourhood of mine)
81      return only 8 bytes of payload. It is the end.
82 
83    Hence, if we want that OSPF worked or traceroute said something reasonable,
84    we should search for another solution.
85 
86    One of them is to parse packet trying to detect inner encapsulation
87    made by our node. It is difficult or even impossible, especially,
88    taking into account fragmentation. TO be short, ttl is not solution at all.
89 
90    Current solution: The solution was UNEXPECTEDLY SIMPLE.
91    We force DF flag on tunnels with preconfigured hop limit,
92    that is ALL. :-) Well, it does not remove the problem completely,
93    but exponential growth of network traffic is changed to linear
94    (branches, that exceed pmtu are pruned) and tunnel mtu
95    rapidly degrades to value <68, where looping stops.
96    Yes, it is not good if there exists a router in the loop,
97    which does not force DF, even when encapsulating packets have DF set.
98    But it is not our problem! Nobody could accuse us, we made
99    all that we could make. Even if it is your gated who injected
100    fatal route to network, even if it were you who configured
101    fatal static route: you are innocent. :-)
102 
103    Alexey Kuznetsov.
104  */
105 
106 static bool log_ecn_error = true;
107 module_param(log_ecn_error, bool, 0644);
108 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
109 
110 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
111 static const struct header_ops ipgre_header_ops;
112 
113 static int ipgre_tunnel_init(struct net_device *dev);
114 static void erspan_build_header(struct sk_buff *skb,
115 				u32 id, u32 index,
116 				bool truncate, bool is_ipv4);
117 
118 static unsigned int ipgre_net_id __read_mostly;
119 static unsigned int gre_tap_net_id __read_mostly;
120 static unsigned int erspan_net_id __read_mostly;
121 
122 static int ipgre_err(struct sk_buff *skb, u32 info,
123 		     const struct tnl_ptk_info *tpi)
124 {
125 
126 	/* All the routers (except for Linux) return only
127 	   8 bytes of packet payload. It means, that precise relaying of
128 	   ICMP in the real Internet is absolutely infeasible.
129 
130 	   Moreover, Cisco "wise men" put GRE key to the third word
131 	   in GRE header. It makes impossible maintaining even soft
132 	   state for keyed GRE tunnels with enabled checksum. Tell
133 	   them "thank you".
134 
135 	   Well, I wonder, rfc1812 was written by Cisco employee,
136 	   what the hell these idiots break standards established
137 	   by themselves???
138 	   */
139 	struct net *net = dev_net(skb->dev);
140 	struct ip_tunnel_net *itn;
141 	const struct iphdr *iph;
142 	const int type = icmp_hdr(skb)->type;
143 	const int code = icmp_hdr(skb)->code;
144 	struct ip_tunnel *t;
145 
146 	if (tpi->proto == htons(ETH_P_TEB))
147 		itn = net_generic(net, gre_tap_net_id);
148 	else if (tpi->proto == htons(ETH_P_ERSPAN) ||
149 		 tpi->proto == htons(ETH_P_ERSPAN2))
150 		itn = net_generic(net, erspan_net_id);
151 	else
152 		itn = net_generic(net, ipgre_net_id);
153 
154 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
155 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
156 			     iph->daddr, iph->saddr, tpi->key);
157 
158 	if (!t)
159 		return -ENOENT;
160 
161 	switch (type) {
162 	default:
163 	case ICMP_PARAMETERPROB:
164 		return 0;
165 
166 	case ICMP_DEST_UNREACH:
167 		switch (code) {
168 		case ICMP_SR_FAILED:
169 		case ICMP_PORT_UNREACH:
170 			/* Impossible event. */
171 			return 0;
172 		default:
173 			/* All others are translated to HOST_UNREACH.
174 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
175 			   I believe they are just ether pollution. --ANK
176 			 */
177 			break;
178 		}
179 		break;
180 
181 	case ICMP_TIME_EXCEEDED:
182 		if (code != ICMP_EXC_TTL)
183 			return 0;
184 		break;
185 
186 	case ICMP_REDIRECT:
187 		break;
188 	}
189 
190 #if IS_ENABLED(CONFIG_IPV6)
191 	if (tpi->proto == htons(ETH_P_IPV6)) {
192 		unsigned int data_len = 0;
193 
194 		if (type == ICMP_TIME_EXCEEDED)
195 			data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
196 
197 		if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
198 						type, data_len))
199 			return 0;
200 	}
201 #endif
202 
203 	if (t->parms.iph.daddr == 0 ||
204 	    ipv4_is_multicast(t->parms.iph.daddr))
205 		return 0;
206 
207 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
208 		return 0;
209 
210 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
211 		t->err_count++;
212 	else
213 		t->err_count = 1;
214 	t->err_time = jiffies;
215 
216 	return 0;
217 }
218 
219 static void gre_err(struct sk_buff *skb, u32 info)
220 {
221 	/* All the routers (except for Linux) return only
222 	 * 8 bytes of packet payload. It means, that precise relaying of
223 	 * ICMP in the real Internet is absolutely infeasible.
224 	 *
225 	 * Moreover, Cisco "wise men" put GRE key to the third word
226 	 * in GRE header. It makes impossible maintaining even soft
227 	 * state for keyed
228 	 * GRE tunnels with enabled checksum. Tell them "thank you".
229 	 *
230 	 * Well, I wonder, rfc1812 was written by Cisco employee,
231 	 * what the hell these idiots break standards established
232 	 * by themselves???
233 	 */
234 
235 	const struct iphdr *iph = (struct iphdr *)skb->data;
236 	const int type = icmp_hdr(skb)->type;
237 	const int code = icmp_hdr(skb)->code;
238 	struct tnl_ptk_info tpi;
239 
240 	if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
241 			     iph->ihl * 4) < 0)
242 		return;
243 
244 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
245 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
246 				 skb->dev->ifindex, IPPROTO_GRE);
247 		return;
248 	}
249 	if (type == ICMP_REDIRECT) {
250 		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
251 			      IPPROTO_GRE);
252 		return;
253 	}
254 
255 	ipgre_err(skb, info, &tpi);
256 }
257 
258 static bool is_erspan_type1(int gre_hdr_len)
259 {
260 	/* Both ERSPAN type I (version 0) and type II (version 1) use
261 	 * protocol 0x88BE, but the type I has only 4-byte GRE header,
262 	 * while type II has 8-byte.
263 	 */
264 	return gre_hdr_len == 4;
265 }
266 
267 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
268 		      int gre_hdr_len)
269 {
270 	struct net *net = dev_net(skb->dev);
271 	struct metadata_dst *tun_dst = NULL;
272 	struct erspan_base_hdr *ershdr;
273 	IP_TUNNEL_DECLARE_FLAGS(flags);
274 	struct ip_tunnel_net *itn;
275 	struct ip_tunnel *tunnel;
276 	const struct iphdr *iph;
277 	struct erspan_md2 *md2;
278 	int ver;
279 	int len;
280 
281 	ip_tunnel_flags_copy(flags, tpi->flags);
282 
283 	itn = net_generic(net, erspan_net_id);
284 	iph = ip_hdr(skb);
285 	if (is_erspan_type1(gre_hdr_len)) {
286 		ver = 0;
287 		__set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
288 		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
289 					  iph->saddr, iph->daddr, 0);
290 	} else {
291 		if (unlikely(!pskb_may_pull(skb,
292 					    gre_hdr_len + sizeof(*ershdr))))
293 			return PACKET_REJECT;
294 
295 		ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
296 		ver = ershdr->ver;
297 		iph = ip_hdr(skb);
298 		__set_bit(IP_TUNNEL_KEY_BIT, flags);
299 		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
300 					  iph->saddr, iph->daddr, tpi->key);
301 	}
302 
303 	if (tunnel) {
304 		if (is_erspan_type1(gre_hdr_len))
305 			len = gre_hdr_len;
306 		else
307 			len = gre_hdr_len + erspan_hdr_len(ver);
308 
309 		if (unlikely(!pskb_may_pull(skb, len)))
310 			return PACKET_REJECT;
311 
312 		if (__iptunnel_pull_header(skb,
313 					   len,
314 					   htons(ETH_P_TEB),
315 					   false, false) < 0)
316 			goto drop;
317 
318 		if (tunnel->collect_md) {
319 			struct erspan_metadata *pkt_md, *md;
320 			struct ip_tunnel_info *info;
321 			unsigned char *gh;
322 			__be64 tun_id;
323 
324 			__set_bit(IP_TUNNEL_KEY_BIT, tpi->flags);
325 			ip_tunnel_flags_copy(flags, tpi->flags);
326 			tun_id = key32_to_tunnel_id(tpi->key);
327 
328 			tun_dst = ip_tun_rx_dst(skb, flags,
329 						tun_id, sizeof(*md));
330 			if (!tun_dst)
331 				return PACKET_REJECT;
332 
333 			/* skb can be uncloned in __iptunnel_pull_header, so
334 			 * old pkt_md is no longer valid and we need to reset
335 			 * it
336 			 */
337 			gh = skb_network_header(skb) +
338 			     skb_network_header_len(skb);
339 			pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
340 							    sizeof(*ershdr));
341 			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
342 			md->version = ver;
343 			md2 = &md->u.md2;
344 			memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
345 						       ERSPAN_V2_MDSIZE);
346 
347 			info = &tun_dst->u.tun_info;
348 			__set_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
349 				  info->key.tun_flags);
350 			info->options_len = sizeof(*md);
351 		}
352 
353 		skb_reset_mac_header(skb);
354 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
355 		return PACKET_RCVD;
356 	}
357 	return PACKET_REJECT;
358 
359 drop:
360 	kfree_skb(skb);
361 	return PACKET_RCVD;
362 }
363 
364 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
365 		       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
366 {
367 	struct metadata_dst *tun_dst = NULL;
368 	const struct iphdr *iph;
369 	struct ip_tunnel *tunnel;
370 
371 	iph = ip_hdr(skb);
372 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
373 				  iph->saddr, iph->daddr, tpi->key);
374 
375 	if (tunnel) {
376 		const struct iphdr *tnl_params;
377 
378 		if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
379 					   raw_proto, false) < 0)
380 			goto drop;
381 
382 		/* Special case for ipgre_header_parse(), which expects the
383 		 * mac_header to point to the outer IP header.
384 		 */
385 		if (tunnel->dev->header_ops == &ipgre_header_ops)
386 			skb_pop_mac_header(skb);
387 		else
388 			skb_reset_mac_header(skb);
389 
390 		tnl_params = &tunnel->parms.iph;
391 		if (tunnel->collect_md || tnl_params->daddr == 0) {
392 			IP_TUNNEL_DECLARE_FLAGS(flags) = { };
393 			__be64 tun_id;
394 
395 			__set_bit(IP_TUNNEL_CSUM_BIT, flags);
396 			__set_bit(IP_TUNNEL_KEY_BIT, flags);
397 			ip_tunnel_flags_and(flags, tpi->flags, flags);
398 
399 			tun_id = key32_to_tunnel_id(tpi->key);
400 			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
401 			if (!tun_dst)
402 				return PACKET_REJECT;
403 		}
404 
405 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
406 		return PACKET_RCVD;
407 	}
408 	return PACKET_NEXT;
409 
410 drop:
411 	kfree_skb(skb);
412 	return PACKET_RCVD;
413 }
414 
415 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
416 		     int hdr_len)
417 {
418 	struct net *net = dev_net(skb->dev);
419 	struct ip_tunnel_net *itn;
420 	int res;
421 
422 	if (tpi->proto == htons(ETH_P_TEB))
423 		itn = net_generic(net, gre_tap_net_id);
424 	else
425 		itn = net_generic(net, ipgre_net_id);
426 
427 	res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
428 	if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
429 		/* ipgre tunnels in collect metadata mode should receive
430 		 * also ETH_P_TEB traffic.
431 		 */
432 		itn = net_generic(net, ipgre_net_id);
433 		res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
434 	}
435 	return res;
436 }
437 
438 static int gre_rcv(struct sk_buff *skb)
439 {
440 	struct tnl_ptk_info tpi;
441 	bool csum_err = false;
442 	int hdr_len;
443 
444 #ifdef CONFIG_NET_IPGRE_BROADCAST
445 	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
446 		/* Looped back packet, drop it! */
447 		if (rt_is_output_route(skb_rtable(skb)))
448 			goto drop;
449 	}
450 #endif
451 
452 	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
453 	if (hdr_len < 0)
454 		goto drop;
455 
456 	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
457 		     tpi.proto == htons(ETH_P_ERSPAN2))) {
458 		if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
459 			return 0;
460 		goto out;
461 	}
462 
463 	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
464 		return 0;
465 
466 out:
467 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
468 drop:
469 	kfree_skb(skb);
470 	return 0;
471 }
472 
473 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
474 		       const struct iphdr *tnl_params,
475 		       __be16 proto)
476 {
477 	struct ip_tunnel *tunnel = netdev_priv(dev);
478 	IP_TUNNEL_DECLARE_FLAGS(flags);
479 
480 	ip_tunnel_flags_copy(flags, tunnel->parms.o_flags);
481 
482 	/* Push GRE header. */
483 	gre_build_header(skb, tunnel->tun_hlen,
484 			 flags, proto, tunnel->parms.o_key,
485 			 test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
486 			 htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
487 
488 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
489 }
490 
491 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
492 {
493 	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
494 }
495 
496 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
497 			__be16 proto)
498 {
499 	struct ip_tunnel *tunnel = netdev_priv(dev);
500 	IP_TUNNEL_DECLARE_FLAGS(flags) = { };
501 	struct ip_tunnel_info *tun_info;
502 	const struct ip_tunnel_key *key;
503 	int tunnel_hlen;
504 
505 	tun_info = skb_tunnel_info(skb);
506 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
507 		     ip_tunnel_info_af(tun_info) != AF_INET))
508 		goto err_free_skb;
509 
510 	key = &tun_info->key;
511 	tunnel_hlen = gre_calc_hlen(key->tun_flags);
512 
513 	if (skb_cow_head(skb, dev->needed_headroom))
514 		goto err_free_skb;
515 
516 	/* Push Tunnel header. */
517 	if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
518 					      tunnel->parms.o_flags)))
519 		goto err_free_skb;
520 
521 	__set_bit(IP_TUNNEL_CSUM_BIT, flags);
522 	__set_bit(IP_TUNNEL_KEY_BIT, flags);
523 	__set_bit(IP_TUNNEL_SEQ_BIT, flags);
524 	ip_tunnel_flags_and(flags, tun_info->key.tun_flags, flags);
525 
526 	gre_build_header(skb, tunnel_hlen, flags, proto,
527 			 tunnel_id_to_key32(tun_info->key.tun_id),
528 			 test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
529 			 htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
530 
531 	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
532 
533 	return;
534 
535 err_free_skb:
536 	kfree_skb(skb);
537 	DEV_STATS_INC(dev, tx_dropped);
538 }
539 
540 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
541 {
542 	struct ip_tunnel *tunnel = netdev_priv(dev);
543 	IP_TUNNEL_DECLARE_FLAGS(flags) = { };
544 	struct ip_tunnel_info *tun_info;
545 	const struct ip_tunnel_key *key;
546 	struct erspan_metadata *md;
547 	bool truncate = false;
548 	__be16 proto;
549 	int tunnel_hlen;
550 	int version;
551 	int nhoff;
552 
553 	tun_info = skb_tunnel_info(skb);
554 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
555 		     ip_tunnel_info_af(tun_info) != AF_INET))
556 		goto err_free_skb;
557 
558 	key = &tun_info->key;
559 	if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags))
560 		goto err_free_skb;
561 	if (tun_info->options_len < sizeof(*md))
562 		goto err_free_skb;
563 	md = ip_tunnel_info_opts(tun_info);
564 
565 	/* ERSPAN has fixed 8 byte GRE header */
566 	version = md->version;
567 	tunnel_hlen = 8 + erspan_hdr_len(version);
568 
569 	if (skb_cow_head(skb, dev->needed_headroom))
570 		goto err_free_skb;
571 
572 	if (gre_handle_offloads(skb, false))
573 		goto err_free_skb;
574 
575 	if (skb->len > dev->mtu + dev->hard_header_len) {
576 		if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
577 			goto err_free_skb;
578 		truncate = true;
579 	}
580 
581 	nhoff = skb_network_offset(skb);
582 	if (skb->protocol == htons(ETH_P_IP) &&
583 	    (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
584 		truncate = true;
585 
586 	if (skb->protocol == htons(ETH_P_IPV6)) {
587 		int thoff;
588 
589 		if (skb_transport_header_was_set(skb))
590 			thoff = skb_transport_offset(skb);
591 		else
592 			thoff = nhoff + sizeof(struct ipv6hdr);
593 		if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
594 			truncate = true;
595 	}
596 
597 	if (version == 1) {
598 		erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
599 				    ntohl(md->u.index), truncate, true);
600 		proto = htons(ETH_P_ERSPAN);
601 	} else if (version == 2) {
602 		erspan_build_header_v2(skb,
603 				       ntohl(tunnel_id_to_key32(key->tun_id)),
604 				       md->u.md2.dir,
605 				       get_hwid(&md->u.md2),
606 				       truncate, true);
607 		proto = htons(ETH_P_ERSPAN2);
608 	} else {
609 		goto err_free_skb;
610 	}
611 
612 	__set_bit(IP_TUNNEL_SEQ_BIT, flags);
613 	gre_build_header(skb, 8, flags, proto, 0,
614 			 htonl(atomic_fetch_inc(&tunnel->o_seqno)));
615 
616 	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
617 
618 	return;
619 
620 err_free_skb:
621 	kfree_skb(skb);
622 	DEV_STATS_INC(dev, tx_dropped);
623 }
624 
625 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
626 {
627 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
628 	const struct ip_tunnel_key *key;
629 	struct rtable *rt;
630 	struct flowi4 fl4;
631 
632 	if (ip_tunnel_info_af(info) != AF_INET)
633 		return -EINVAL;
634 
635 	key = &info->key;
636 	ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
637 			    tunnel_id_to_key32(key->tun_id),
638 			    key->tos & ~INET_ECN_MASK, dev_net(dev), 0,
639 			    skb->mark, skb_get_hash(skb), key->flow_flags);
640 	rt = ip_route_output_key(dev_net(dev), &fl4);
641 	if (IS_ERR(rt))
642 		return PTR_ERR(rt);
643 
644 	ip_rt_put(rt);
645 	info->key.u.ipv4.src = fl4.saddr;
646 	return 0;
647 }
648 
649 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
650 			      struct net_device *dev)
651 {
652 	struct ip_tunnel *tunnel = netdev_priv(dev);
653 	const struct iphdr *tnl_params;
654 
655 	if (!pskb_inet_may_pull(skb))
656 		goto free_skb;
657 
658 	if (tunnel->collect_md) {
659 		gre_fb_xmit(skb, dev, skb->protocol);
660 		return NETDEV_TX_OK;
661 	}
662 
663 	if (dev->header_ops) {
664 		int pull_len = tunnel->hlen + sizeof(struct iphdr);
665 
666 		if (skb_cow_head(skb, 0))
667 			goto free_skb;
668 
669 		if (!pskb_may_pull(skb, pull_len))
670 			goto free_skb;
671 
672 		tnl_params = (const struct iphdr *)skb->data;
673 
674 		/* ip_tunnel_xmit() needs skb->data pointing to gre header. */
675 		skb_pull(skb, pull_len);
676 		skb_reset_mac_header(skb);
677 
678 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
679 		    skb_checksum_start(skb) < skb->data)
680 			goto free_skb;
681 	} else {
682 		if (skb_cow_head(skb, dev->needed_headroom))
683 			goto free_skb;
684 
685 		tnl_params = &tunnel->parms.iph;
686 	}
687 
688 	if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
689 					      tunnel->parms.o_flags)))
690 		goto free_skb;
691 
692 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
693 	return NETDEV_TX_OK;
694 
695 free_skb:
696 	kfree_skb(skb);
697 	DEV_STATS_INC(dev, tx_dropped);
698 	return NETDEV_TX_OK;
699 }
700 
701 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
702 			       struct net_device *dev)
703 {
704 	struct ip_tunnel *tunnel = netdev_priv(dev);
705 	bool truncate = false;
706 	__be16 proto;
707 
708 	if (!pskb_inet_may_pull(skb))
709 		goto free_skb;
710 
711 	if (tunnel->collect_md) {
712 		erspan_fb_xmit(skb, dev);
713 		return NETDEV_TX_OK;
714 	}
715 
716 	if (gre_handle_offloads(skb, false))
717 		goto free_skb;
718 
719 	if (skb_cow_head(skb, dev->needed_headroom))
720 		goto free_skb;
721 
722 	if (skb->len > dev->mtu + dev->hard_header_len) {
723 		if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
724 			goto free_skb;
725 		truncate = true;
726 	}
727 
728 	/* Push ERSPAN header */
729 	if (tunnel->erspan_ver == 0) {
730 		proto = htons(ETH_P_ERSPAN);
731 		__clear_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags);
732 	} else if (tunnel->erspan_ver == 1) {
733 		erspan_build_header(skb, ntohl(tunnel->parms.o_key),
734 				    tunnel->index,
735 				    truncate, true);
736 		proto = htons(ETH_P_ERSPAN);
737 	} else if (tunnel->erspan_ver == 2) {
738 		erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
739 				       tunnel->dir, tunnel->hwid,
740 				       truncate, true);
741 		proto = htons(ETH_P_ERSPAN2);
742 	} else {
743 		goto free_skb;
744 	}
745 
746 	__clear_bit(IP_TUNNEL_KEY_BIT, tunnel->parms.o_flags);
747 	__gre_xmit(skb, dev, &tunnel->parms.iph, proto);
748 	return NETDEV_TX_OK;
749 
750 free_skb:
751 	kfree_skb(skb);
752 	DEV_STATS_INC(dev, tx_dropped);
753 	return NETDEV_TX_OK;
754 }
755 
756 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
757 				struct net_device *dev)
758 {
759 	struct ip_tunnel *tunnel = netdev_priv(dev);
760 
761 	if (!pskb_inet_may_pull(skb))
762 		goto free_skb;
763 
764 	if (tunnel->collect_md) {
765 		gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
766 		return NETDEV_TX_OK;
767 	}
768 
769 	if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
770 					      tunnel->parms.o_flags)))
771 		goto free_skb;
772 
773 	if (skb_cow_head(skb, dev->needed_headroom))
774 		goto free_skb;
775 
776 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
777 	return NETDEV_TX_OK;
778 
779 free_skb:
780 	kfree_skb(skb);
781 	DEV_STATS_INC(dev, tx_dropped);
782 	return NETDEV_TX_OK;
783 }
784 
785 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
786 {
787 	struct ip_tunnel *tunnel = netdev_priv(dev);
788 	int len;
789 
790 	len = tunnel->tun_hlen;
791 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
792 	len = tunnel->tun_hlen - len;
793 	tunnel->hlen = tunnel->hlen + len;
794 
795 	if (dev->header_ops)
796 		dev->hard_header_len += len;
797 	else
798 		dev->needed_headroom += len;
799 
800 	if (set_mtu)
801 		WRITE_ONCE(dev->mtu, max_t(int, dev->mtu - len, 68));
802 
803 	if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags) ||
804 	    (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
805 	     tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
806 		dev->features &= ~NETIF_F_GSO_SOFTWARE;
807 		dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
808 	} else {
809 		dev->features |= NETIF_F_GSO_SOFTWARE;
810 		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
811 	}
812 }
813 
814 static int ipgre_tunnel_ctl(struct net_device *dev,
815 			    struct ip_tunnel_parm_kern *p,
816 			    int cmd)
817 {
818 	__be16 i_flags, o_flags;
819 	int err;
820 
821 	if (!ip_tunnel_flags_is_be16_compat(p->i_flags) ||
822 	    !ip_tunnel_flags_is_be16_compat(p->o_flags))
823 		return -EOVERFLOW;
824 
825 	i_flags = ip_tunnel_flags_to_be16(p->i_flags);
826 	o_flags = ip_tunnel_flags_to_be16(p->o_flags);
827 
828 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
829 		if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
830 		    p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
831 		    ((i_flags | o_flags) & (GRE_VERSION | GRE_ROUTING)))
832 			return -EINVAL;
833 	}
834 
835 	gre_flags_to_tnl_flags(p->i_flags, i_flags);
836 	gre_flags_to_tnl_flags(p->o_flags, o_flags);
837 
838 	err = ip_tunnel_ctl(dev, p, cmd);
839 	if (err)
840 		return err;
841 
842 	if (cmd == SIOCCHGTUNNEL) {
843 		struct ip_tunnel *t = netdev_priv(dev);
844 
845 		ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags);
846 		ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags);
847 
848 		if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
849 			ipgre_link_update(dev, true);
850 	}
851 
852 	i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
853 	ip_tunnel_flags_from_be16(p->i_flags, i_flags);
854 	o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
855 	ip_tunnel_flags_from_be16(p->o_flags, o_flags);
856 
857 	return 0;
858 }
859 
860 /* Nice toy. Unfortunately, useless in real life :-)
861    It allows to construct virtual multiprotocol broadcast "LAN"
862    over the Internet, provided multicast routing is tuned.
863 
864 
865    I have no idea was this bicycle invented before me,
866    so that I had to set ARPHRD_IPGRE to a random value.
867    I have an impression, that Cisco could make something similar,
868    but this feature is apparently missing in IOS<=11.2(8).
869 
870    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
871    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
872 
873    ping -t 255 224.66.66.66
874 
875    If nobody answers, mbone does not work.
876 
877    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
878    ip addr add 10.66.66.<somewhat>/24 dev Universe
879    ifconfig Universe up
880    ifconfig Universe add fe80::<Your_real_addr>/10
881    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
882    ftp 10.66.66.66
883    ...
884    ftp fec0:6666:6666::193.233.7.65
885    ...
886  */
887 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
888 			unsigned short type,
889 			const void *daddr, const void *saddr, unsigned int len)
890 {
891 	struct ip_tunnel *t = netdev_priv(dev);
892 	struct iphdr *iph;
893 	struct gre_base_hdr *greh;
894 
895 	iph = skb_push(skb, t->hlen + sizeof(*iph));
896 	greh = (struct gre_base_hdr *)(iph+1);
897 	greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
898 	greh->protocol = htons(type);
899 
900 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
901 
902 	/* Set the source hardware address. */
903 	if (saddr)
904 		memcpy(&iph->saddr, saddr, 4);
905 	if (daddr)
906 		memcpy(&iph->daddr, daddr, 4);
907 	if (iph->daddr)
908 		return t->hlen + sizeof(*iph);
909 
910 	return -(t->hlen + sizeof(*iph));
911 }
912 
913 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
914 {
915 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
916 	memcpy(haddr, &iph->saddr, 4);
917 	return 4;
918 }
919 
920 static const struct header_ops ipgre_header_ops = {
921 	.create	= ipgre_header,
922 	.parse	= ipgre_header_parse,
923 };
924 
925 #ifdef CONFIG_NET_IPGRE_BROADCAST
926 static int ipgre_open(struct net_device *dev)
927 {
928 	struct ip_tunnel *t = netdev_priv(dev);
929 
930 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
931 		struct flowi4 fl4 = {
932 			.flowi4_oif = t->parms.link,
933 			.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(&t->parms.iph)),
934 			.flowi4_scope = RT_SCOPE_UNIVERSE,
935 			.flowi4_proto = IPPROTO_GRE,
936 			.saddr = t->parms.iph.saddr,
937 			.daddr = t->parms.iph.daddr,
938 			.fl4_gre_key = t->parms.o_key,
939 		};
940 		struct rtable *rt;
941 
942 		rt = ip_route_output_key(t->net, &fl4);
943 		if (IS_ERR(rt))
944 			return -EADDRNOTAVAIL;
945 		dev = rt->dst.dev;
946 		ip_rt_put(rt);
947 		if (!__in_dev_get_rtnl(dev))
948 			return -EADDRNOTAVAIL;
949 		t->mlink = dev->ifindex;
950 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
951 	}
952 	return 0;
953 }
954 
955 static int ipgre_close(struct net_device *dev)
956 {
957 	struct ip_tunnel *t = netdev_priv(dev);
958 
959 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
960 		struct in_device *in_dev;
961 		in_dev = inetdev_by_index(t->net, t->mlink);
962 		if (in_dev)
963 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
964 	}
965 	return 0;
966 }
967 #endif
968 
969 static const struct net_device_ops ipgre_netdev_ops = {
970 	.ndo_init		= ipgre_tunnel_init,
971 	.ndo_uninit		= ip_tunnel_uninit,
972 #ifdef CONFIG_NET_IPGRE_BROADCAST
973 	.ndo_open		= ipgre_open,
974 	.ndo_stop		= ipgre_close,
975 #endif
976 	.ndo_start_xmit		= ipgre_xmit,
977 	.ndo_siocdevprivate	= ip_tunnel_siocdevprivate,
978 	.ndo_change_mtu		= ip_tunnel_change_mtu,
979 	.ndo_get_stats64	= dev_get_tstats64,
980 	.ndo_get_iflink		= ip_tunnel_get_iflink,
981 	.ndo_tunnel_ctl		= ipgre_tunnel_ctl,
982 };
983 
984 #define GRE_FEATURES (NETIF_F_SG |		\
985 		      NETIF_F_FRAGLIST |	\
986 		      NETIF_F_HIGHDMA |		\
987 		      NETIF_F_HW_CSUM)
988 
989 static void ipgre_tunnel_setup(struct net_device *dev)
990 {
991 	dev->netdev_ops		= &ipgre_netdev_ops;
992 	dev->type		= ARPHRD_IPGRE;
993 	ip_tunnel_setup(dev, ipgre_net_id);
994 }
995 
996 static void __gre_tunnel_init(struct net_device *dev)
997 {
998 	struct ip_tunnel *tunnel;
999 
1000 	tunnel = netdev_priv(dev);
1001 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
1002 	tunnel->parms.iph.protocol = IPPROTO_GRE;
1003 
1004 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
1005 	dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
1006 
1007 	dev->features		|= GRE_FEATURES;
1008 	dev->hw_features	|= GRE_FEATURES;
1009 
1010 	/* TCP offload with GRE SEQ is not supported, nor can we support 2
1011 	 * levels of outer headers requiring an update.
1012 	 */
1013 	if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags))
1014 		return;
1015 	if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
1016 	    tunnel->encap.type != TUNNEL_ENCAP_NONE)
1017 		return;
1018 
1019 	dev->features |= NETIF_F_GSO_SOFTWARE;
1020 	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1021 
1022 	dev->lltx = true;
1023 }
1024 
1025 static int ipgre_tunnel_init(struct net_device *dev)
1026 {
1027 	struct ip_tunnel *tunnel = netdev_priv(dev);
1028 	struct iphdr *iph = &tunnel->parms.iph;
1029 
1030 	__gre_tunnel_init(dev);
1031 
1032 	__dev_addr_set(dev, &iph->saddr, 4);
1033 	memcpy(dev->broadcast, &iph->daddr, 4);
1034 
1035 	dev->flags		= IFF_NOARP;
1036 	netif_keep_dst(dev);
1037 	dev->addr_len		= 4;
1038 
1039 	if (iph->daddr && !tunnel->collect_md) {
1040 #ifdef CONFIG_NET_IPGRE_BROADCAST
1041 		if (ipv4_is_multicast(iph->daddr)) {
1042 			if (!iph->saddr)
1043 				return -EINVAL;
1044 			dev->flags = IFF_BROADCAST;
1045 			dev->header_ops = &ipgre_header_ops;
1046 			dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1047 			dev->needed_headroom = 0;
1048 		}
1049 #endif
1050 	} else if (!tunnel->collect_md) {
1051 		dev->header_ops = &ipgre_header_ops;
1052 		dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1053 		dev->needed_headroom = 0;
1054 	}
1055 
1056 	return ip_tunnel_init(dev);
1057 }
1058 
1059 static const struct gre_protocol ipgre_protocol = {
1060 	.handler     = gre_rcv,
1061 	.err_handler = gre_err,
1062 };
1063 
1064 static int __net_init ipgre_init_net(struct net *net)
1065 {
1066 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1067 }
1068 
1069 static void __net_exit ipgre_exit_rtnl(struct net *net,
1070 				       struct list_head *dev_to_kill)
1071 {
1072 	ip_tunnel_delete_net(net, ipgre_net_id, &ipgre_link_ops, dev_to_kill);
1073 }
1074 
1075 static struct pernet_operations ipgre_net_ops = {
1076 	.init = ipgre_init_net,
1077 	.exit_rtnl = ipgre_exit_rtnl,
1078 	.id   = &ipgre_net_id,
1079 	.size = sizeof(struct ip_tunnel_net),
1080 };
1081 
1082 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1083 				 struct netlink_ext_ack *extack)
1084 {
1085 	__be16 flags;
1086 
1087 	if (!data)
1088 		return 0;
1089 
1090 	flags = 0;
1091 	if (data[IFLA_GRE_IFLAGS])
1092 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1093 	if (data[IFLA_GRE_OFLAGS])
1094 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1095 	if (flags & (GRE_VERSION|GRE_ROUTING))
1096 		return -EINVAL;
1097 
1098 	if (data[IFLA_GRE_COLLECT_METADATA] &&
1099 	    data[IFLA_GRE_ENCAP_TYPE] &&
1100 	    nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1101 		return -EINVAL;
1102 
1103 	return 0;
1104 }
1105 
1106 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1107 			      struct netlink_ext_ack *extack)
1108 {
1109 	__be32 daddr;
1110 
1111 	if (tb[IFLA_ADDRESS]) {
1112 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1113 			return -EINVAL;
1114 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1115 			return -EADDRNOTAVAIL;
1116 	}
1117 
1118 	if (!data)
1119 		goto out;
1120 
1121 	if (data[IFLA_GRE_REMOTE]) {
1122 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1123 		if (!daddr)
1124 			return -EINVAL;
1125 	}
1126 
1127 out:
1128 	return ipgre_tunnel_validate(tb, data, extack);
1129 }
1130 
1131 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1132 			   struct netlink_ext_ack *extack)
1133 {
1134 	__be16 flags = 0;
1135 	int ret;
1136 
1137 	if (!data)
1138 		return 0;
1139 
1140 	ret = ipgre_tap_validate(tb, data, extack);
1141 	if (ret)
1142 		return ret;
1143 
1144 	if (data[IFLA_GRE_ERSPAN_VER] &&
1145 	    nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
1146 		return 0;
1147 
1148 	/* ERSPAN type II/III should only have GRE sequence and key flag */
1149 	if (data[IFLA_GRE_OFLAGS])
1150 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1151 	if (data[IFLA_GRE_IFLAGS])
1152 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1153 	if (!data[IFLA_GRE_COLLECT_METADATA] &&
1154 	    flags != (GRE_SEQ | GRE_KEY))
1155 		return -EINVAL;
1156 
1157 	/* ERSPAN Session ID only has 10-bit. Since we reuse
1158 	 * 32-bit key field as ID, check it's range.
1159 	 */
1160 	if (data[IFLA_GRE_IKEY] &&
1161 	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1162 		return -EINVAL;
1163 
1164 	if (data[IFLA_GRE_OKEY] &&
1165 	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1166 		return -EINVAL;
1167 
1168 	return 0;
1169 }
1170 
1171 static int ipgre_netlink_parms(struct net_device *dev,
1172 				struct nlattr *data[],
1173 				struct nlattr *tb[],
1174 				struct ip_tunnel_parm_kern *parms,
1175 				__u32 *fwmark)
1176 {
1177 	struct ip_tunnel *t = netdev_priv(dev);
1178 
1179 	memset(parms, 0, sizeof(*parms));
1180 
1181 	parms->iph.protocol = IPPROTO_GRE;
1182 
1183 	if (!data)
1184 		return 0;
1185 
1186 	if (data[IFLA_GRE_LINK])
1187 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1188 
1189 	if (data[IFLA_GRE_IFLAGS])
1190 		gre_flags_to_tnl_flags(parms->i_flags,
1191 				       nla_get_be16(data[IFLA_GRE_IFLAGS]));
1192 
1193 	if (data[IFLA_GRE_OFLAGS])
1194 		gre_flags_to_tnl_flags(parms->o_flags,
1195 				       nla_get_be16(data[IFLA_GRE_OFLAGS]));
1196 
1197 	if (data[IFLA_GRE_IKEY])
1198 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1199 
1200 	if (data[IFLA_GRE_OKEY])
1201 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1202 
1203 	if (data[IFLA_GRE_LOCAL])
1204 		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1205 
1206 	if (data[IFLA_GRE_REMOTE])
1207 		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1208 
1209 	if (data[IFLA_GRE_TTL])
1210 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1211 
1212 	if (data[IFLA_GRE_TOS])
1213 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1214 
1215 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1216 		if (t->ignore_df)
1217 			return -EINVAL;
1218 		parms->iph.frag_off = htons(IP_DF);
1219 	}
1220 
1221 	if (data[IFLA_GRE_COLLECT_METADATA]) {
1222 		t->collect_md = true;
1223 		if (dev->type == ARPHRD_IPGRE)
1224 			dev->type = ARPHRD_NONE;
1225 	}
1226 
1227 	if (data[IFLA_GRE_IGNORE_DF]) {
1228 		if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1229 		  && (parms->iph.frag_off & htons(IP_DF)))
1230 			return -EINVAL;
1231 		t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1232 	}
1233 
1234 	if (data[IFLA_GRE_FWMARK])
1235 		*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1236 
1237 	return 0;
1238 }
1239 
1240 static int erspan_netlink_parms(struct net_device *dev,
1241 				struct nlattr *data[],
1242 				struct nlattr *tb[],
1243 				struct ip_tunnel_parm_kern *parms,
1244 				__u32 *fwmark)
1245 {
1246 	struct ip_tunnel *t = netdev_priv(dev);
1247 	int err;
1248 
1249 	err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
1250 	if (err)
1251 		return err;
1252 	if (!data)
1253 		return 0;
1254 
1255 	if (data[IFLA_GRE_ERSPAN_VER]) {
1256 		t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1257 
1258 		if (t->erspan_ver > 2)
1259 			return -EINVAL;
1260 	}
1261 
1262 	if (t->erspan_ver == 1) {
1263 		if (data[IFLA_GRE_ERSPAN_INDEX]) {
1264 			t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1265 			if (t->index & ~INDEX_MASK)
1266 				return -EINVAL;
1267 		}
1268 	} else if (t->erspan_ver == 2) {
1269 		if (data[IFLA_GRE_ERSPAN_DIR]) {
1270 			t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1271 			if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1272 				return -EINVAL;
1273 		}
1274 		if (data[IFLA_GRE_ERSPAN_HWID]) {
1275 			t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1276 			if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1277 				return -EINVAL;
1278 		}
1279 	}
1280 
1281 	return 0;
1282 }
1283 
1284 /* This function returns true when ENCAP attributes are present in the nl msg */
1285 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1286 				      struct ip_tunnel_encap *ipencap)
1287 {
1288 	bool ret = false;
1289 
1290 	memset(ipencap, 0, sizeof(*ipencap));
1291 
1292 	if (!data)
1293 		return ret;
1294 
1295 	if (data[IFLA_GRE_ENCAP_TYPE]) {
1296 		ret = true;
1297 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1298 	}
1299 
1300 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1301 		ret = true;
1302 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1303 	}
1304 
1305 	if (data[IFLA_GRE_ENCAP_SPORT]) {
1306 		ret = true;
1307 		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1308 	}
1309 
1310 	if (data[IFLA_GRE_ENCAP_DPORT]) {
1311 		ret = true;
1312 		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1313 	}
1314 
1315 	return ret;
1316 }
1317 
1318 static int gre_tap_init(struct net_device *dev)
1319 {
1320 	__gre_tunnel_init(dev);
1321 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1322 	netif_keep_dst(dev);
1323 
1324 	return ip_tunnel_init(dev);
1325 }
1326 
1327 static const struct net_device_ops gre_tap_netdev_ops = {
1328 	.ndo_init		= gre_tap_init,
1329 	.ndo_uninit		= ip_tunnel_uninit,
1330 	.ndo_start_xmit		= gre_tap_xmit,
1331 	.ndo_set_mac_address 	= eth_mac_addr,
1332 	.ndo_validate_addr	= eth_validate_addr,
1333 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1334 	.ndo_get_stats64	= dev_get_tstats64,
1335 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1336 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1337 };
1338 
1339 static int erspan_tunnel_init(struct net_device *dev)
1340 {
1341 	struct ip_tunnel *tunnel = netdev_priv(dev);
1342 
1343 	if (tunnel->erspan_ver == 0)
1344 		tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */
1345 	else
1346 		tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */
1347 
1348 	tunnel->parms.iph.protocol = IPPROTO_GRE;
1349 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1350 		       erspan_hdr_len(tunnel->erspan_ver);
1351 
1352 	dev->features		|= GRE_FEATURES;
1353 	dev->hw_features	|= GRE_FEATURES;
1354 	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
1355 	netif_keep_dst(dev);
1356 
1357 	return ip_tunnel_init(dev);
1358 }
1359 
1360 static const struct net_device_ops erspan_netdev_ops = {
1361 	.ndo_init		= erspan_tunnel_init,
1362 	.ndo_uninit		= ip_tunnel_uninit,
1363 	.ndo_start_xmit		= erspan_xmit,
1364 	.ndo_set_mac_address	= eth_mac_addr,
1365 	.ndo_validate_addr	= eth_validate_addr,
1366 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1367 	.ndo_get_stats64	= dev_get_tstats64,
1368 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1369 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1370 };
1371 
1372 static void ipgre_tap_setup(struct net_device *dev)
1373 {
1374 	ether_setup(dev);
1375 	dev->max_mtu = 0;
1376 	dev->netdev_ops	= &gre_tap_netdev_ops;
1377 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1378 	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
1379 	ip_tunnel_setup(dev, gre_tap_net_id);
1380 }
1381 
1382 static int
1383 ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
1384 {
1385 	struct ip_tunnel_encap ipencap;
1386 
1387 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1388 		struct ip_tunnel *t = netdev_priv(dev);
1389 		int err = ip_tunnel_encap_setup(t, &ipencap);
1390 
1391 		if (err < 0)
1392 			return err;
1393 	}
1394 
1395 	return 0;
1396 }
1397 
1398 static int ipgre_newlink(struct net_device *dev,
1399 			 struct rtnl_newlink_params *params,
1400 			 struct netlink_ext_ack *extack)
1401 {
1402 	struct nlattr **data = params->data;
1403 	struct nlattr **tb = params->tb;
1404 	struct ip_tunnel_parm_kern p;
1405 	__u32 fwmark = 0;
1406 	int err;
1407 
1408 	err = ipgre_newlink_encap_setup(dev, data);
1409 	if (err)
1410 		return err;
1411 
1412 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1413 	if (err < 0)
1414 		return err;
1415 	return ip_tunnel_newlink(params->link_net ? : dev_net(dev), dev, tb, &p,
1416 				 fwmark);
1417 }
1418 
1419 static int erspan_newlink(struct net_device *dev,
1420 			  struct rtnl_newlink_params *params,
1421 			  struct netlink_ext_ack *extack)
1422 {
1423 	struct nlattr **data = params->data;
1424 	struct nlattr **tb = params->tb;
1425 	struct ip_tunnel_parm_kern p;
1426 	__u32 fwmark = 0;
1427 	int err;
1428 
1429 	err = ipgre_newlink_encap_setup(dev, data);
1430 	if (err)
1431 		return err;
1432 
1433 	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1434 	if (err)
1435 		return err;
1436 	return ip_tunnel_newlink(params->link_net ? : dev_net(dev), dev, tb, &p,
1437 				 fwmark);
1438 }
1439 
1440 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1441 			    struct nlattr *data[],
1442 			    struct netlink_ext_ack *extack)
1443 {
1444 	struct ip_tunnel *t = netdev_priv(dev);
1445 	struct ip_tunnel_parm_kern p;
1446 	__u32 fwmark = t->fwmark;
1447 	int err;
1448 
1449 	err = ipgre_newlink_encap_setup(dev, data);
1450 	if (err)
1451 		return err;
1452 
1453 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1454 	if (err < 0)
1455 		return err;
1456 
1457 	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1458 	if (err < 0)
1459 		return err;
1460 
1461 	ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
1462 	ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
1463 
1464 	ipgre_link_update(dev, !tb[IFLA_MTU]);
1465 
1466 	return 0;
1467 }
1468 
1469 static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
1470 			     struct nlattr *data[],
1471 			     struct netlink_ext_ack *extack)
1472 {
1473 	struct ip_tunnel *t = netdev_priv(dev);
1474 	struct ip_tunnel_parm_kern p;
1475 	__u32 fwmark = t->fwmark;
1476 	int err;
1477 
1478 	err = ipgre_newlink_encap_setup(dev, data);
1479 	if (err)
1480 		return err;
1481 
1482 	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1483 	if (err < 0)
1484 		return err;
1485 
1486 	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1487 	if (err < 0)
1488 		return err;
1489 
1490 	ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
1491 	ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
1492 
1493 	return 0;
1494 }
1495 
1496 static size_t ipgre_get_size(const struct net_device *dev)
1497 {
1498 	return
1499 		/* IFLA_GRE_LINK */
1500 		nla_total_size(4) +
1501 		/* IFLA_GRE_IFLAGS */
1502 		nla_total_size(2) +
1503 		/* IFLA_GRE_OFLAGS */
1504 		nla_total_size(2) +
1505 		/* IFLA_GRE_IKEY */
1506 		nla_total_size(4) +
1507 		/* IFLA_GRE_OKEY */
1508 		nla_total_size(4) +
1509 		/* IFLA_GRE_LOCAL */
1510 		nla_total_size(4) +
1511 		/* IFLA_GRE_REMOTE */
1512 		nla_total_size(4) +
1513 		/* IFLA_GRE_TTL */
1514 		nla_total_size(1) +
1515 		/* IFLA_GRE_TOS */
1516 		nla_total_size(1) +
1517 		/* IFLA_GRE_PMTUDISC */
1518 		nla_total_size(1) +
1519 		/* IFLA_GRE_ENCAP_TYPE */
1520 		nla_total_size(2) +
1521 		/* IFLA_GRE_ENCAP_FLAGS */
1522 		nla_total_size(2) +
1523 		/* IFLA_GRE_ENCAP_SPORT */
1524 		nla_total_size(2) +
1525 		/* IFLA_GRE_ENCAP_DPORT */
1526 		nla_total_size(2) +
1527 		/* IFLA_GRE_COLLECT_METADATA */
1528 		nla_total_size(0) +
1529 		/* IFLA_GRE_IGNORE_DF */
1530 		nla_total_size(1) +
1531 		/* IFLA_GRE_FWMARK */
1532 		nla_total_size(4) +
1533 		/* IFLA_GRE_ERSPAN_INDEX */
1534 		nla_total_size(4) +
1535 		/* IFLA_GRE_ERSPAN_VER */
1536 		nla_total_size(1) +
1537 		/* IFLA_GRE_ERSPAN_DIR */
1538 		nla_total_size(1) +
1539 		/* IFLA_GRE_ERSPAN_HWID */
1540 		nla_total_size(2) +
1541 		0;
1542 }
1543 
1544 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1545 {
1546 	struct ip_tunnel *t = netdev_priv(dev);
1547 	struct ip_tunnel_parm_kern *p = &t->parms;
1548 	IP_TUNNEL_DECLARE_FLAGS(o_flags);
1549 
1550 	ip_tunnel_flags_copy(o_flags, p->o_flags);
1551 
1552 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1553 	    nla_put_be16(skb, IFLA_GRE_IFLAGS,
1554 			 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1555 	    nla_put_be16(skb, IFLA_GRE_OFLAGS,
1556 			 gre_tnl_flags_to_gre_flags(o_flags)) ||
1557 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1558 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1559 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1560 	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1561 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1562 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1563 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1564 		       !!(p->iph.frag_off & htons(IP_DF))) ||
1565 	    nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1566 		goto nla_put_failure;
1567 
1568 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1569 			t->encap.type) ||
1570 	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1571 			 t->encap.sport) ||
1572 	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1573 			 t->encap.dport) ||
1574 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1575 			t->encap.flags))
1576 		goto nla_put_failure;
1577 
1578 	if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1579 		goto nla_put_failure;
1580 
1581 	if (t->collect_md) {
1582 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1583 			goto nla_put_failure;
1584 	}
1585 
1586 	return 0;
1587 
1588 nla_put_failure:
1589 	return -EMSGSIZE;
1590 }
1591 
1592 static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
1593 {
1594 	struct ip_tunnel *t = netdev_priv(dev);
1595 
1596 	if (t->erspan_ver <= 2) {
1597 		if (t->erspan_ver != 0 && !t->collect_md)
1598 			__set_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags);
1599 
1600 		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1601 			goto nla_put_failure;
1602 
1603 		if (t->erspan_ver == 1) {
1604 			if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1605 				goto nla_put_failure;
1606 		} else if (t->erspan_ver == 2) {
1607 			if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1608 				goto nla_put_failure;
1609 			if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1610 				goto nla_put_failure;
1611 		}
1612 	}
1613 
1614 	return ipgre_fill_info(skb, dev);
1615 
1616 nla_put_failure:
1617 	return -EMSGSIZE;
1618 }
1619 
1620 static void erspan_setup(struct net_device *dev)
1621 {
1622 	struct ip_tunnel *t = netdev_priv(dev);
1623 
1624 	ether_setup(dev);
1625 	dev->max_mtu = 0;
1626 	dev->netdev_ops = &erspan_netdev_ops;
1627 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1628 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1629 	ip_tunnel_setup(dev, erspan_net_id);
1630 	t->erspan_ver = 1;
1631 }
1632 
1633 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1634 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1635 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1636 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1637 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1638 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1639 	[IFLA_GRE_LOCAL]	= { .len = sizeof_field(struct iphdr, saddr) },
1640 	[IFLA_GRE_REMOTE]	= { .len = sizeof_field(struct iphdr, daddr) },
1641 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1642 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1643 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1644 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1645 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1646 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1647 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1648 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1649 	[IFLA_GRE_IGNORE_DF]	= { .type = NLA_U8 },
1650 	[IFLA_GRE_FWMARK]	= { .type = NLA_U32 },
1651 	[IFLA_GRE_ERSPAN_INDEX]	= { .type = NLA_U32 },
1652 	[IFLA_GRE_ERSPAN_VER]	= { .type = NLA_U8 },
1653 	[IFLA_GRE_ERSPAN_DIR]	= { .type = NLA_U8 },
1654 	[IFLA_GRE_ERSPAN_HWID]	= { .type = NLA_U16 },
1655 };
1656 
1657 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1658 	.kind		= "gre",
1659 	.maxtype	= IFLA_GRE_MAX,
1660 	.policy		= ipgre_policy,
1661 	.priv_size	= sizeof(struct ip_tunnel),
1662 	.setup		= ipgre_tunnel_setup,
1663 	.validate	= ipgre_tunnel_validate,
1664 	.newlink	= ipgre_newlink,
1665 	.changelink	= ipgre_changelink,
1666 	.dellink	= ip_tunnel_dellink,
1667 	.get_size	= ipgre_get_size,
1668 	.fill_info	= ipgre_fill_info,
1669 	.get_link_net	= ip_tunnel_get_link_net,
1670 };
1671 
1672 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1673 	.kind		= "gretap",
1674 	.maxtype	= IFLA_GRE_MAX,
1675 	.policy		= ipgre_policy,
1676 	.priv_size	= sizeof(struct ip_tunnel),
1677 	.setup		= ipgre_tap_setup,
1678 	.validate	= ipgre_tap_validate,
1679 	.newlink	= ipgre_newlink,
1680 	.changelink	= ipgre_changelink,
1681 	.dellink	= ip_tunnel_dellink,
1682 	.get_size	= ipgre_get_size,
1683 	.fill_info	= ipgre_fill_info,
1684 	.get_link_net	= ip_tunnel_get_link_net,
1685 };
1686 
1687 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1688 	.kind		= "erspan",
1689 	.maxtype	= IFLA_GRE_MAX,
1690 	.policy		= ipgre_policy,
1691 	.priv_size	= sizeof(struct ip_tunnel),
1692 	.setup		= erspan_setup,
1693 	.validate	= erspan_validate,
1694 	.newlink	= erspan_newlink,
1695 	.changelink	= erspan_changelink,
1696 	.dellink	= ip_tunnel_dellink,
1697 	.get_size	= ipgre_get_size,
1698 	.fill_info	= erspan_fill_info,
1699 	.get_link_net	= ip_tunnel_get_link_net,
1700 };
1701 
1702 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1703 					u8 name_assign_type)
1704 {
1705 	struct rtnl_newlink_params params = { .src_net = net };
1706 	struct nlattr *tb[IFLA_MAX + 1];
1707 	struct net_device *dev;
1708 	LIST_HEAD(list_kill);
1709 	struct ip_tunnel *t;
1710 	int err;
1711 
1712 	memset(&tb, 0, sizeof(tb));
1713 	params.tb = tb;
1714 
1715 	dev = rtnl_create_link(net, name, name_assign_type,
1716 			       &ipgre_tap_ops, tb, NULL);
1717 	if (IS_ERR(dev))
1718 		return dev;
1719 
1720 	/* Configure flow based GRE device. */
1721 	t = netdev_priv(dev);
1722 	t->collect_md = true;
1723 
1724 	err = ipgre_newlink(dev, &params, NULL);
1725 	if (err < 0) {
1726 		free_netdev(dev);
1727 		return ERR_PTR(err);
1728 	}
1729 
1730 	/* openvswitch users expect packet sizes to be unrestricted,
1731 	 * so set the largest MTU we can.
1732 	 */
1733 	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1734 	if (err)
1735 		goto out;
1736 
1737 	err = rtnl_configure_link(dev, NULL, 0, NULL);
1738 	if (err < 0)
1739 		goto out;
1740 
1741 	return dev;
1742 out:
1743 	ip_tunnel_dellink(dev, &list_kill);
1744 	unregister_netdevice_many(&list_kill);
1745 	return ERR_PTR(err);
1746 }
1747 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1748 
1749 static int __net_init ipgre_tap_init_net(struct net *net)
1750 {
1751 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1752 }
1753 
1754 static void __net_exit ipgre_tap_exit_rtnl(struct net *net,
1755 					   struct list_head *dev_to_kill)
1756 {
1757 	ip_tunnel_delete_net(net, gre_tap_net_id, &ipgre_tap_ops, dev_to_kill);
1758 }
1759 
1760 static struct pernet_operations ipgre_tap_net_ops = {
1761 	.init = ipgre_tap_init_net,
1762 	.exit_rtnl = ipgre_tap_exit_rtnl,
1763 	.id   = &gre_tap_net_id,
1764 	.size = sizeof(struct ip_tunnel_net),
1765 };
1766 
1767 static int __net_init erspan_init_net(struct net *net)
1768 {
1769 	return ip_tunnel_init_net(net, erspan_net_id,
1770 				  &erspan_link_ops, "erspan0");
1771 }
1772 
1773 static void __net_exit erspan_exit_rtnl(struct net *net,
1774 					struct list_head *dev_to_kill)
1775 {
1776 	ip_tunnel_delete_net(net, erspan_net_id, &erspan_link_ops, dev_to_kill);
1777 }
1778 
1779 static struct pernet_operations erspan_net_ops = {
1780 	.init = erspan_init_net,
1781 	.exit_rtnl = erspan_exit_rtnl,
1782 	.id   = &erspan_net_id,
1783 	.size = sizeof(struct ip_tunnel_net),
1784 };
1785 
1786 static int __init ipgre_init(void)
1787 {
1788 	int err;
1789 
1790 	pr_info("GRE over IPv4 tunneling driver\n");
1791 
1792 	err = register_pernet_device(&ipgre_net_ops);
1793 	if (err < 0)
1794 		return err;
1795 
1796 	err = register_pernet_device(&ipgre_tap_net_ops);
1797 	if (err < 0)
1798 		goto pnet_tap_failed;
1799 
1800 	err = register_pernet_device(&erspan_net_ops);
1801 	if (err < 0)
1802 		goto pnet_erspan_failed;
1803 
1804 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1805 	if (err < 0) {
1806 		pr_info("%s: can't add protocol\n", __func__);
1807 		goto add_proto_failed;
1808 	}
1809 
1810 	err = rtnl_link_register(&ipgre_link_ops);
1811 	if (err < 0)
1812 		goto rtnl_link_failed;
1813 
1814 	err = rtnl_link_register(&ipgre_tap_ops);
1815 	if (err < 0)
1816 		goto tap_ops_failed;
1817 
1818 	err = rtnl_link_register(&erspan_link_ops);
1819 	if (err < 0)
1820 		goto erspan_link_failed;
1821 
1822 	return 0;
1823 
1824 erspan_link_failed:
1825 	rtnl_link_unregister(&ipgre_tap_ops);
1826 tap_ops_failed:
1827 	rtnl_link_unregister(&ipgre_link_ops);
1828 rtnl_link_failed:
1829 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1830 add_proto_failed:
1831 	unregister_pernet_device(&erspan_net_ops);
1832 pnet_erspan_failed:
1833 	unregister_pernet_device(&ipgre_tap_net_ops);
1834 pnet_tap_failed:
1835 	unregister_pernet_device(&ipgre_net_ops);
1836 	return err;
1837 }
1838 
1839 static void __exit ipgre_fini(void)
1840 {
1841 	rtnl_link_unregister(&ipgre_tap_ops);
1842 	rtnl_link_unregister(&ipgre_link_ops);
1843 	rtnl_link_unregister(&erspan_link_ops);
1844 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1845 	unregister_pernet_device(&ipgre_tap_net_ops);
1846 	unregister_pernet_device(&ipgre_net_ops);
1847 	unregister_pernet_device(&erspan_net_ops);
1848 }
1849 
1850 module_init(ipgre_init);
1851 module_exit(ipgre_fini);
1852 MODULE_DESCRIPTION("IPv4 GRE tunnels over IP library");
1853 MODULE_LICENSE("GPL");
1854 MODULE_ALIAS_RTNL_LINK("gre");
1855 MODULE_ALIAS_RTNL_LINK("gretap");
1856 MODULE_ALIAS_RTNL_LINK("erspan");
1857 MODULE_ALIAS_NETDEV("gre0");
1858 MODULE_ALIAS_NETDEV("gretap0");
1859 MODULE_ALIAS_NETDEV("erspan0");
1860