xref: /linux/net/ipv4/ip_gre.c (revision 8632175ccb0c8cfc69b0f54c47b4b15b44c263ff)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Linux NET3:	GRE over IP protocol decoder.
4  *
5  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6  */
7 
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9 
10 #include <linux/capability.h>
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/uaccess.h>
16 #include <linux/skbuff.h>
17 #include <linux/netdevice.h>
18 #include <linux/in.h>
19 #include <linux/tcp.h>
20 #include <linux/udp.h>
21 #include <linux/if_arp.h>
22 #include <linux/if_vlan.h>
23 #include <linux/init.h>
24 #include <linux/in6.h>
25 #include <linux/inetdevice.h>
26 #include <linux/igmp.h>
27 #include <linux/netfilter_ipv4.h>
28 #include <linux/etherdevice.h>
29 #include <linux/if_ether.h>
30 
31 #include <net/flow.h>
32 #include <net/sock.h>
33 #include <net/ip.h>
34 #include <net/icmp.h>
35 #include <net/protocol.h>
36 #include <net/ip_tunnels.h>
37 #include <net/arp.h>
38 #include <net/checksum.h>
39 #include <net/dsfield.h>
40 #include <net/inet_ecn.h>
41 #include <net/xfrm.h>
42 #include <net/net_namespace.h>
43 #include <net/netns/generic.h>
44 #include <net/rtnetlink.h>
45 #include <net/gre.h>
46 #include <net/dst_metadata.h>
47 #include <net/erspan.h>
48 
49 /*
50    Problems & solutions
51    --------------------
52 
53    1. The most important issue is detecting local dead loops.
54    They would cause complete host lockup in transmit, which
55    would be "resolved" by stack overflow or, if queueing is enabled,
56    with infinite looping in net_bh.
57 
58    We cannot track such dead loops during route installation,
59    it is infeasible task. The most general solutions would be
60    to keep skb->encapsulation counter (sort of local ttl),
61    and silently drop packet when it expires. It is a good
62    solution, but it supposes maintaining new variable in ALL
63    skb, even if no tunneling is used.
64 
65    Current solution: xmit_recursion breaks dead loops. This is a percpu
66    counter, since when we enter the first ndo_xmit(), cpu migration is
67    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
68 
69    2. Networking dead loops would not kill routers, but would really
70    kill network. IP hop limit plays role of "t->recursion" in this case,
71    if we copy it from packet being encapsulated to upper header.
72    It is very good solution, but it introduces two problems:
73 
74    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
75      do not work over tunnels.
76    - traceroute does not work. I planned to relay ICMP from tunnel,
77      so that this problem would be solved and traceroute output
78      would even more informative. This idea appeared to be wrong:
79      only Linux complies to rfc1812 now (yes, guys, Linux is the only
80      true router now :-)), all routers (at least, in neighbourhood of mine)
81      return only 8 bytes of payload. It is the end.
82 
83    Hence, if we want that OSPF worked or traceroute said something reasonable,
84    we should search for another solution.
85 
86    One of them is to parse packet trying to detect inner encapsulation
87    made by our node. It is difficult or even impossible, especially,
88    taking into account fragmentation. TO be short, ttl is not solution at all.
89 
90    Current solution: The solution was UNEXPECTEDLY SIMPLE.
91    We force DF flag on tunnels with preconfigured hop limit,
92    that is ALL. :-) Well, it does not remove the problem completely,
93    but exponential growth of network traffic is changed to linear
94    (branches, that exceed pmtu are pruned) and tunnel mtu
95    rapidly degrades to value <68, where looping stops.
96    Yes, it is not good if there exists a router in the loop,
97    which does not force DF, even when encapsulating packets have DF set.
98    But it is not our problem! Nobody could accuse us, we made
99    all that we could make. Even if it is your gated who injected
100    fatal route to network, even if it were you who configured
101    fatal static route: you are innocent. :-)
102 
103    Alexey Kuznetsov.
104  */
105 
106 static bool log_ecn_error = true;
107 module_param(log_ecn_error, bool, 0644);
108 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
109 
110 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
111 static const struct header_ops ipgre_header_ops;
112 
113 static int ipgre_tunnel_init(struct net_device *dev);
114 static void erspan_build_header(struct sk_buff *skb,
115 				u32 id, u32 index,
116 				bool truncate, bool is_ipv4);
117 
118 static unsigned int ipgre_net_id __read_mostly;
119 static unsigned int gre_tap_net_id __read_mostly;
120 static unsigned int erspan_net_id __read_mostly;
121 
122 static int ipgre_err(struct sk_buff *skb, u32 info,
123 		     const struct tnl_ptk_info *tpi)
124 {
125 
126 	/* All the routers (except for Linux) return only
127 	   8 bytes of packet payload. It means, that precise relaying of
128 	   ICMP in the real Internet is absolutely infeasible.
129 
130 	   Moreover, Cisco "wise men" put GRE key to the third word
131 	   in GRE header. It makes impossible maintaining even soft
132 	   state for keyed GRE tunnels with enabled checksum. Tell
133 	   them "thank you".
134 
135 	   Well, I wonder, rfc1812 was written by Cisco employee,
136 	   what the hell these idiots break standards established
137 	   by themselves???
138 	   */
139 	struct net *net = dev_net(skb->dev);
140 	struct ip_tunnel_net *itn;
141 	const struct iphdr *iph;
142 	const int type = icmp_hdr(skb)->type;
143 	const int code = icmp_hdr(skb)->code;
144 	struct ip_tunnel *t;
145 
146 	if (tpi->proto == htons(ETH_P_TEB))
147 		itn = net_generic(net, gre_tap_net_id);
148 	else if (tpi->proto == htons(ETH_P_ERSPAN) ||
149 		 tpi->proto == htons(ETH_P_ERSPAN2))
150 		itn = net_generic(net, erspan_net_id);
151 	else
152 		itn = net_generic(net, ipgre_net_id);
153 
154 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
155 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
156 			     iph->daddr, iph->saddr, tpi->key);
157 
158 	if (!t)
159 		return -ENOENT;
160 
161 	switch (type) {
162 	default:
163 	case ICMP_PARAMETERPROB:
164 		return 0;
165 
166 	case ICMP_DEST_UNREACH:
167 		switch (code) {
168 		case ICMP_SR_FAILED:
169 		case ICMP_PORT_UNREACH:
170 			/* Impossible event. */
171 			return 0;
172 		default:
173 			/* All others are translated to HOST_UNREACH.
174 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
175 			   I believe they are just ether pollution. --ANK
176 			 */
177 			break;
178 		}
179 		break;
180 
181 	case ICMP_TIME_EXCEEDED:
182 		if (code != ICMP_EXC_TTL)
183 			return 0;
184 		break;
185 
186 	case ICMP_REDIRECT:
187 		break;
188 	}
189 
190 #if IS_ENABLED(CONFIG_IPV6)
191 	if (tpi->proto == htons(ETH_P_IPV6)) {
192 		unsigned int data_len = 0;
193 
194 		if (type == ICMP_TIME_EXCEEDED)
195 			data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
196 
197 		if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
198 						type, data_len))
199 			return 0;
200 	}
201 #endif
202 
203 	if (t->parms.iph.daddr == 0 ||
204 	    ipv4_is_multicast(t->parms.iph.daddr))
205 		return 0;
206 
207 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
208 		return 0;
209 
210 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
211 		t->err_count++;
212 	else
213 		t->err_count = 1;
214 	t->err_time = jiffies;
215 
216 	return 0;
217 }
218 
219 static void gre_err(struct sk_buff *skb, u32 info)
220 {
221 	/* All the routers (except for Linux) return only
222 	 * 8 bytes of packet payload. It means, that precise relaying of
223 	 * ICMP in the real Internet is absolutely infeasible.
224 	 *
225 	 * Moreover, Cisco "wise men" put GRE key to the third word
226 	 * in GRE header. It makes impossible maintaining even soft
227 	 * state for keyed
228 	 * GRE tunnels with enabled checksum. Tell them "thank you".
229 	 *
230 	 * Well, I wonder, rfc1812 was written by Cisco employee,
231 	 * what the hell these idiots break standards established
232 	 * by themselves???
233 	 */
234 
235 	const struct iphdr *iph = (struct iphdr *)skb->data;
236 	const int type = icmp_hdr(skb)->type;
237 	const int code = icmp_hdr(skb)->code;
238 	struct tnl_ptk_info tpi;
239 
240 	if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
241 			     iph->ihl * 4) < 0)
242 		return;
243 
244 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
245 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
246 				 skb->dev->ifindex, IPPROTO_GRE);
247 		return;
248 	}
249 	if (type == ICMP_REDIRECT) {
250 		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
251 			      IPPROTO_GRE);
252 		return;
253 	}
254 
255 	ipgre_err(skb, info, &tpi);
256 }
257 
258 static bool is_erspan_type1(int gre_hdr_len)
259 {
260 	/* Both ERSPAN type I (version 0) and type II (version 1) use
261 	 * protocol 0x88BE, but the type I has only 4-byte GRE header,
262 	 * while type II has 8-byte.
263 	 */
264 	return gre_hdr_len == 4;
265 }
266 
267 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
268 		      int gre_hdr_len)
269 {
270 	struct net *net = dev_net(skb->dev);
271 	struct metadata_dst *tun_dst = NULL;
272 	struct erspan_base_hdr *ershdr;
273 	IP_TUNNEL_DECLARE_FLAGS(flags);
274 	struct ip_tunnel_net *itn;
275 	struct ip_tunnel *tunnel;
276 	const struct iphdr *iph;
277 	struct erspan_md2 *md2;
278 	int ver;
279 	int len;
280 
281 	ip_tunnel_flags_copy(flags, tpi->flags);
282 
283 	itn = net_generic(net, erspan_net_id);
284 	iph = ip_hdr(skb);
285 	if (is_erspan_type1(gre_hdr_len)) {
286 		ver = 0;
287 		__set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
288 		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
289 					  iph->saddr, iph->daddr, 0);
290 	} else {
291 		if (unlikely(!pskb_may_pull(skb,
292 					    gre_hdr_len + sizeof(*ershdr))))
293 			return PACKET_REJECT;
294 
295 		ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
296 		ver = ershdr->ver;
297 		iph = ip_hdr(skb);
298 		__set_bit(IP_TUNNEL_KEY_BIT, flags);
299 		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
300 					  iph->saddr, iph->daddr, tpi->key);
301 	}
302 
303 	if (tunnel) {
304 		if (is_erspan_type1(gre_hdr_len))
305 			len = gre_hdr_len;
306 		else
307 			len = gre_hdr_len + erspan_hdr_len(ver);
308 
309 		if (unlikely(!pskb_may_pull(skb, len)))
310 			return PACKET_REJECT;
311 
312 		if (__iptunnel_pull_header(skb,
313 					   len,
314 					   htons(ETH_P_TEB),
315 					   false, false) < 0)
316 			goto drop;
317 
318 		if (tunnel->collect_md) {
319 			struct erspan_metadata *pkt_md, *md;
320 			struct ip_tunnel_info *info;
321 			unsigned char *gh;
322 			__be64 tun_id;
323 
324 			__set_bit(IP_TUNNEL_KEY_BIT, tpi->flags);
325 			ip_tunnel_flags_copy(flags, tpi->flags);
326 			tun_id = key32_to_tunnel_id(tpi->key);
327 
328 			tun_dst = ip_tun_rx_dst(skb, flags,
329 						tun_id, sizeof(*md));
330 			if (!tun_dst)
331 				return PACKET_REJECT;
332 
333 			/* MUST set options_len before referencing options */
334 			info = &tun_dst->u.tun_info;
335 			info->options_len = sizeof(*md);
336 
337 			/* skb can be uncloned in __iptunnel_pull_header, so
338 			 * old pkt_md is no longer valid and we need to reset
339 			 * it
340 			 */
341 			gh = skb_network_header(skb) +
342 			     skb_network_header_len(skb);
343 			pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
344 							    sizeof(*ershdr));
345 			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
346 			md->version = ver;
347 			md2 = &md->u.md2;
348 			memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
349 						       ERSPAN_V2_MDSIZE);
350 
351 			__set_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
352 				  info->key.tun_flags);
353 		}
354 
355 		skb_reset_mac_header(skb);
356 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
357 		return PACKET_RCVD;
358 	}
359 	return PACKET_REJECT;
360 
361 drop:
362 	kfree_skb(skb);
363 	return PACKET_RCVD;
364 }
365 
366 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
367 		       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
368 {
369 	struct metadata_dst *tun_dst = NULL;
370 	const struct iphdr *iph;
371 	struct ip_tunnel *tunnel;
372 
373 	iph = ip_hdr(skb);
374 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
375 				  iph->saddr, iph->daddr, tpi->key);
376 
377 	if (tunnel) {
378 		const struct iphdr *tnl_params;
379 
380 		if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
381 					   raw_proto, false) < 0)
382 			goto drop;
383 
384 		/* Special case for ipgre_header_parse(), which expects the
385 		 * mac_header to point to the outer IP header.
386 		 */
387 		if (tunnel->dev->header_ops == &ipgre_header_ops)
388 			skb_pop_mac_header(skb);
389 		else
390 			skb_reset_mac_header(skb);
391 
392 		tnl_params = &tunnel->parms.iph;
393 		if (tunnel->collect_md || tnl_params->daddr == 0) {
394 			IP_TUNNEL_DECLARE_FLAGS(flags) = { };
395 			__be64 tun_id;
396 
397 			__set_bit(IP_TUNNEL_CSUM_BIT, flags);
398 			__set_bit(IP_TUNNEL_KEY_BIT, flags);
399 			ip_tunnel_flags_and(flags, tpi->flags, flags);
400 
401 			tun_id = key32_to_tunnel_id(tpi->key);
402 			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
403 			if (!tun_dst)
404 				return PACKET_REJECT;
405 		}
406 
407 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
408 		return PACKET_RCVD;
409 	}
410 	return PACKET_NEXT;
411 
412 drop:
413 	kfree_skb(skb);
414 	return PACKET_RCVD;
415 }
416 
417 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
418 		     int hdr_len)
419 {
420 	struct net *net = dev_net(skb->dev);
421 	struct ip_tunnel_net *itn;
422 	int res;
423 
424 	if (tpi->proto == htons(ETH_P_TEB))
425 		itn = net_generic(net, gre_tap_net_id);
426 	else
427 		itn = net_generic(net, ipgre_net_id);
428 
429 	res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
430 	if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
431 		/* ipgre tunnels in collect metadata mode should receive
432 		 * also ETH_P_TEB traffic.
433 		 */
434 		itn = net_generic(net, ipgre_net_id);
435 		res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
436 	}
437 	return res;
438 }
439 
440 static int gre_rcv(struct sk_buff *skb)
441 {
442 	struct tnl_ptk_info tpi;
443 	bool csum_err = false;
444 	int hdr_len;
445 
446 #ifdef CONFIG_NET_IPGRE_BROADCAST
447 	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
448 		/* Looped back packet, drop it! */
449 		if (rt_is_output_route(skb_rtable(skb)))
450 			goto drop;
451 	}
452 #endif
453 
454 	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
455 	if (hdr_len < 0)
456 		goto drop;
457 
458 	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
459 		     tpi.proto == htons(ETH_P_ERSPAN2))) {
460 		if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
461 			return 0;
462 		goto out;
463 	}
464 
465 	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
466 		return 0;
467 
468 out:
469 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
470 drop:
471 	dev_core_stats_rx_dropped_inc(skb->dev);
472 	kfree_skb(skb);
473 	return 0;
474 }
475 
476 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
477 		       const struct iphdr *tnl_params,
478 		       __be16 proto)
479 {
480 	struct ip_tunnel *tunnel = netdev_priv(dev);
481 	IP_TUNNEL_DECLARE_FLAGS(flags);
482 
483 	ip_tunnel_flags_copy(flags, tunnel->parms.o_flags);
484 
485 	/* Push GRE header. */
486 	gre_build_header(skb, tunnel->tun_hlen,
487 			 flags, proto, tunnel->parms.o_key,
488 			 test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
489 			 htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
490 
491 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
492 }
493 
494 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
495 {
496 	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
497 }
498 
499 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
500 			__be16 proto)
501 {
502 	struct ip_tunnel *tunnel = netdev_priv(dev);
503 	IP_TUNNEL_DECLARE_FLAGS(flags) = { };
504 	struct ip_tunnel_info *tun_info;
505 	const struct ip_tunnel_key *key;
506 	int tunnel_hlen;
507 
508 	tun_info = skb_tunnel_info(skb);
509 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
510 		     ip_tunnel_info_af(tun_info) != AF_INET))
511 		goto err_free_skb;
512 
513 	key = &tun_info->key;
514 	tunnel_hlen = gre_calc_hlen(key->tun_flags);
515 
516 	if (skb_cow_head(skb, dev->needed_headroom))
517 		goto err_free_skb;
518 
519 	/* Push Tunnel header. */
520 	if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
521 					      tunnel->parms.o_flags)))
522 		goto err_free_skb;
523 
524 	__set_bit(IP_TUNNEL_CSUM_BIT, flags);
525 	__set_bit(IP_TUNNEL_KEY_BIT, flags);
526 	__set_bit(IP_TUNNEL_SEQ_BIT, flags);
527 	ip_tunnel_flags_and(flags, tun_info->key.tun_flags, flags);
528 
529 	gre_build_header(skb, tunnel_hlen, flags, proto,
530 			 tunnel_id_to_key32(tun_info->key.tun_id),
531 			 test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
532 			 htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
533 
534 	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
535 
536 	return;
537 
538 err_free_skb:
539 	kfree_skb(skb);
540 	DEV_STATS_INC(dev, tx_dropped);
541 }
542 
543 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
544 {
545 	struct ip_tunnel *tunnel = netdev_priv(dev);
546 	IP_TUNNEL_DECLARE_FLAGS(flags) = { };
547 	struct ip_tunnel_info *tun_info;
548 	const struct ip_tunnel_key *key;
549 	struct erspan_metadata *md;
550 	bool truncate = false;
551 	__be16 proto;
552 	int tunnel_hlen;
553 	int version;
554 	int nhoff;
555 
556 	tun_info = skb_tunnel_info(skb);
557 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
558 		     ip_tunnel_info_af(tun_info) != AF_INET))
559 		goto err_free_skb;
560 
561 	key = &tun_info->key;
562 	if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags))
563 		goto err_free_skb;
564 	if (tun_info->options_len < sizeof(*md))
565 		goto err_free_skb;
566 	md = ip_tunnel_info_opts(tun_info);
567 
568 	/* ERSPAN has fixed 8 byte GRE header */
569 	version = md->version;
570 	tunnel_hlen = 8 + erspan_hdr_len(version);
571 
572 	if (skb_cow_head(skb, dev->needed_headroom))
573 		goto err_free_skb;
574 
575 	if (gre_handle_offloads(skb, false))
576 		goto err_free_skb;
577 
578 	if (skb->len > dev->mtu + dev->hard_header_len) {
579 		if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
580 			goto err_free_skb;
581 		truncate = true;
582 	}
583 
584 	nhoff = skb_network_offset(skb);
585 	if (skb->protocol == htons(ETH_P_IP) &&
586 	    (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
587 		truncate = true;
588 
589 	if (skb->protocol == htons(ETH_P_IPV6)) {
590 		int thoff;
591 
592 		if (skb_transport_header_was_set(skb))
593 			thoff = skb_transport_offset(skb);
594 		else
595 			thoff = nhoff + sizeof(struct ipv6hdr);
596 		if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
597 			truncate = true;
598 	}
599 
600 	if (version == 1) {
601 		erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
602 				    ntohl(md->u.index), truncate, true);
603 		proto = htons(ETH_P_ERSPAN);
604 	} else if (version == 2) {
605 		erspan_build_header_v2(skb,
606 				       ntohl(tunnel_id_to_key32(key->tun_id)),
607 				       md->u.md2.dir,
608 				       get_hwid(&md->u.md2),
609 				       truncate, true);
610 		proto = htons(ETH_P_ERSPAN2);
611 	} else {
612 		goto err_free_skb;
613 	}
614 
615 	__set_bit(IP_TUNNEL_SEQ_BIT, flags);
616 	gre_build_header(skb, 8, flags, proto, 0,
617 			 htonl(atomic_fetch_inc(&tunnel->o_seqno)));
618 
619 	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
620 
621 	return;
622 
623 err_free_skb:
624 	kfree_skb(skb);
625 	DEV_STATS_INC(dev, tx_dropped);
626 }
627 
628 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
629 {
630 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
631 	const struct ip_tunnel_key *key;
632 	struct rtable *rt;
633 	struct flowi4 fl4;
634 
635 	if (ip_tunnel_info_af(info) != AF_INET)
636 		return -EINVAL;
637 
638 	key = &info->key;
639 	ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
640 			    tunnel_id_to_key32(key->tun_id),
641 			    key->tos & ~INET_ECN_MASK, dev_net(dev), 0,
642 			    skb->mark, skb_get_hash(skb), key->flow_flags);
643 	rt = ip_route_output_key(dev_net(dev), &fl4);
644 	if (IS_ERR(rt))
645 		return PTR_ERR(rt);
646 
647 	ip_rt_put(rt);
648 	info->key.u.ipv4.src = fl4.saddr;
649 	return 0;
650 }
651 
652 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
653 			      struct net_device *dev)
654 {
655 	struct ip_tunnel *tunnel = netdev_priv(dev);
656 	const struct iphdr *tnl_params;
657 
658 	if (!pskb_inet_may_pull(skb))
659 		goto free_skb;
660 
661 	if (tunnel->collect_md) {
662 		gre_fb_xmit(skb, dev, skb->protocol);
663 		return NETDEV_TX_OK;
664 	}
665 
666 	if (dev->header_ops) {
667 		int pull_len = tunnel->hlen + sizeof(struct iphdr);
668 
669 		if (skb_cow_head(skb, 0))
670 			goto free_skb;
671 
672 		if (!pskb_may_pull(skb, pull_len))
673 			goto free_skb;
674 
675 		tnl_params = (const struct iphdr *)skb->data;
676 
677 		/* ip_tunnel_xmit() needs skb->data pointing to gre header. */
678 		skb_pull(skb, pull_len);
679 		skb_reset_mac_header(skb);
680 
681 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
682 		    skb_checksum_start(skb) < skb->data)
683 			goto free_skb;
684 	} else {
685 		if (skb_cow_head(skb, dev->needed_headroom))
686 			goto free_skb;
687 
688 		tnl_params = &tunnel->parms.iph;
689 	}
690 
691 	if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
692 					      tunnel->parms.o_flags)))
693 		goto free_skb;
694 
695 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
696 	return NETDEV_TX_OK;
697 
698 free_skb:
699 	kfree_skb(skb);
700 	DEV_STATS_INC(dev, tx_dropped);
701 	return NETDEV_TX_OK;
702 }
703 
704 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
705 			       struct net_device *dev)
706 {
707 	struct ip_tunnel *tunnel = netdev_priv(dev);
708 	bool truncate = false;
709 	__be16 proto;
710 
711 	if (!pskb_inet_may_pull(skb))
712 		goto free_skb;
713 
714 	if (tunnel->collect_md) {
715 		erspan_fb_xmit(skb, dev);
716 		return NETDEV_TX_OK;
717 	}
718 
719 	if (gre_handle_offloads(skb, false))
720 		goto free_skb;
721 
722 	if (skb_cow_head(skb, dev->needed_headroom))
723 		goto free_skb;
724 
725 	if (skb->len > dev->mtu + dev->hard_header_len) {
726 		if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
727 			goto free_skb;
728 		truncate = true;
729 	}
730 
731 	/* Push ERSPAN header */
732 	if (tunnel->erspan_ver == 0) {
733 		proto = htons(ETH_P_ERSPAN);
734 		__clear_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags);
735 	} else if (tunnel->erspan_ver == 1) {
736 		erspan_build_header(skb, ntohl(tunnel->parms.o_key),
737 				    tunnel->index,
738 				    truncate, true);
739 		proto = htons(ETH_P_ERSPAN);
740 	} else if (tunnel->erspan_ver == 2) {
741 		erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
742 				       tunnel->dir, tunnel->hwid,
743 				       truncate, true);
744 		proto = htons(ETH_P_ERSPAN2);
745 	} else {
746 		goto free_skb;
747 	}
748 
749 	__clear_bit(IP_TUNNEL_KEY_BIT, tunnel->parms.o_flags);
750 	__gre_xmit(skb, dev, &tunnel->parms.iph, proto);
751 	return NETDEV_TX_OK;
752 
753 free_skb:
754 	kfree_skb(skb);
755 	DEV_STATS_INC(dev, tx_dropped);
756 	return NETDEV_TX_OK;
757 }
758 
759 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
760 				struct net_device *dev)
761 {
762 	struct ip_tunnel *tunnel = netdev_priv(dev);
763 
764 	if (!pskb_inet_may_pull(skb))
765 		goto free_skb;
766 
767 	if (tunnel->collect_md) {
768 		gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
769 		return NETDEV_TX_OK;
770 	}
771 
772 	if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
773 					      tunnel->parms.o_flags)))
774 		goto free_skb;
775 
776 	if (skb_cow_head(skb, dev->needed_headroom))
777 		goto free_skb;
778 
779 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
780 	return NETDEV_TX_OK;
781 
782 free_skb:
783 	kfree_skb(skb);
784 	DEV_STATS_INC(dev, tx_dropped);
785 	return NETDEV_TX_OK;
786 }
787 
788 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
789 {
790 	struct ip_tunnel *tunnel = netdev_priv(dev);
791 	int len;
792 
793 	len = tunnel->tun_hlen;
794 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
795 	len = tunnel->tun_hlen - len;
796 	tunnel->hlen = tunnel->hlen + len;
797 
798 	if (dev->header_ops)
799 		dev->hard_header_len += len;
800 	else
801 		dev->needed_headroom += len;
802 
803 	if (set_mtu)
804 		WRITE_ONCE(dev->mtu, max_t(int, dev->mtu - len, 68));
805 
806 	if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags) ||
807 	    (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
808 	     tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
809 		dev->features &= ~NETIF_F_GSO_SOFTWARE;
810 		dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
811 	} else {
812 		dev->features |= NETIF_F_GSO_SOFTWARE;
813 		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
814 	}
815 }
816 
817 static int ipgre_tunnel_ctl(struct net_device *dev,
818 			    struct ip_tunnel_parm_kern *p,
819 			    int cmd)
820 {
821 	__be16 i_flags, o_flags;
822 	int err;
823 
824 	if (!ip_tunnel_flags_is_be16_compat(p->i_flags) ||
825 	    !ip_tunnel_flags_is_be16_compat(p->o_flags))
826 		return -EOVERFLOW;
827 
828 	i_flags = ip_tunnel_flags_to_be16(p->i_flags);
829 	o_flags = ip_tunnel_flags_to_be16(p->o_flags);
830 
831 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
832 		if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
833 		    p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
834 		    ((i_flags | o_flags) & (GRE_VERSION | GRE_ROUTING)))
835 			return -EINVAL;
836 	}
837 
838 	gre_flags_to_tnl_flags(p->i_flags, i_flags);
839 	gre_flags_to_tnl_flags(p->o_flags, o_flags);
840 
841 	err = ip_tunnel_ctl(dev, p, cmd);
842 	if (err)
843 		return err;
844 
845 	if (cmd == SIOCCHGTUNNEL) {
846 		struct ip_tunnel *t = netdev_priv(dev);
847 
848 		ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags);
849 		ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags);
850 
851 		if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
852 			ipgre_link_update(dev, true);
853 	}
854 
855 	i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
856 	ip_tunnel_flags_from_be16(p->i_flags, i_flags);
857 	o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
858 	ip_tunnel_flags_from_be16(p->o_flags, o_flags);
859 
860 	return 0;
861 }
862 
863 /* Nice toy. Unfortunately, useless in real life :-)
864    It allows to construct virtual multiprotocol broadcast "LAN"
865    over the Internet, provided multicast routing is tuned.
866 
867 
868    I have no idea was this bicycle invented before me,
869    so that I had to set ARPHRD_IPGRE to a random value.
870    I have an impression, that Cisco could make something similar,
871    but this feature is apparently missing in IOS<=11.2(8).
872 
873    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
874    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
875 
876    ping -t 255 224.66.66.66
877 
878    If nobody answers, mbone does not work.
879 
880    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
881    ip addr add 10.66.66.<somewhat>/24 dev Universe
882    ifconfig Universe up
883    ifconfig Universe add fe80::<Your_real_addr>/10
884    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
885    ftp 10.66.66.66
886    ...
887    ftp fec0:6666:6666::193.233.7.65
888    ...
889  */
890 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
891 			unsigned short type,
892 			const void *daddr, const void *saddr, unsigned int len)
893 {
894 	struct ip_tunnel *t = netdev_priv(dev);
895 	struct gre_base_hdr *greh;
896 	struct iphdr *iph;
897 	int needed;
898 
899 	needed = t->hlen + sizeof(*iph);
900 	if (skb_headroom(skb) < needed &&
901 	    pskb_expand_head(skb, HH_DATA_ALIGN(needed - skb_headroom(skb)),
902 			     0, GFP_ATOMIC))
903 		return -needed;
904 
905 	iph = skb_push(skb, needed);
906 	greh = (struct gre_base_hdr *)(iph+1);
907 	greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
908 	greh->protocol = htons(type);
909 
910 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
911 
912 	/* Set the source hardware address. */
913 	if (saddr)
914 		memcpy(&iph->saddr, saddr, 4);
915 	if (daddr)
916 		memcpy(&iph->daddr, daddr, 4);
917 	if (iph->daddr)
918 		return t->hlen + sizeof(*iph);
919 
920 	return -(t->hlen + sizeof(*iph));
921 }
922 
923 static int ipgre_header_parse(const struct sk_buff *skb, const struct net_device *dev,
924 			      unsigned char *haddr)
925 {
926 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
927 	memcpy(haddr, &iph->saddr, 4);
928 	return 4;
929 }
930 
931 static const struct header_ops ipgre_header_ops = {
932 	.create	= ipgre_header,
933 	.parse	= ipgre_header_parse,
934 };
935 
936 #ifdef CONFIG_NET_IPGRE_BROADCAST
937 static int ipgre_open(struct net_device *dev)
938 {
939 	struct ip_tunnel *t = netdev_priv(dev);
940 
941 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
942 		struct flowi4 fl4 = {
943 			.flowi4_oif = t->parms.link,
944 			.flowi4_dscp = ip4h_dscp(&t->parms.iph),
945 			.flowi4_scope = RT_SCOPE_UNIVERSE,
946 			.flowi4_proto = IPPROTO_GRE,
947 			.saddr = t->parms.iph.saddr,
948 			.daddr = t->parms.iph.daddr,
949 			.fl4_gre_key = t->parms.o_key,
950 		};
951 		struct rtable *rt;
952 
953 		rt = ip_route_output_key(t->net, &fl4);
954 		if (IS_ERR(rt))
955 			return -EADDRNOTAVAIL;
956 		dev = rt->dst.dev;
957 		ip_rt_put(rt);
958 		if (!__in_dev_get_rtnl(dev))
959 			return -EADDRNOTAVAIL;
960 		t->mlink = dev->ifindex;
961 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
962 	}
963 	return 0;
964 }
965 
966 static int ipgre_close(struct net_device *dev)
967 {
968 	struct ip_tunnel *t = netdev_priv(dev);
969 
970 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
971 		struct in_device *in_dev;
972 		in_dev = inetdev_by_index(t->net, t->mlink);
973 		if (in_dev)
974 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
975 	}
976 	return 0;
977 }
978 #endif
979 
980 static const struct net_device_ops ipgre_netdev_ops = {
981 	.ndo_init		= ipgre_tunnel_init,
982 	.ndo_uninit		= ip_tunnel_uninit,
983 #ifdef CONFIG_NET_IPGRE_BROADCAST
984 	.ndo_open		= ipgre_open,
985 	.ndo_stop		= ipgre_close,
986 #endif
987 	.ndo_start_xmit		= ipgre_xmit,
988 	.ndo_siocdevprivate	= ip_tunnel_siocdevprivate,
989 	.ndo_change_mtu		= ip_tunnel_change_mtu,
990 	.ndo_get_stats64	= dev_get_tstats64,
991 	.ndo_get_iflink		= ip_tunnel_get_iflink,
992 	.ndo_tunnel_ctl		= ipgre_tunnel_ctl,
993 };
994 
995 #define GRE_FEATURES (NETIF_F_SG |		\
996 		      NETIF_F_FRAGLIST |	\
997 		      NETIF_F_HIGHDMA |		\
998 		      NETIF_F_HW_CSUM)
999 
1000 static void ipgre_tunnel_setup(struct net_device *dev)
1001 {
1002 	dev->netdev_ops		= &ipgre_netdev_ops;
1003 	dev->type		= ARPHRD_IPGRE;
1004 	ip_tunnel_setup(dev, ipgre_net_id);
1005 }
1006 
1007 static void __gre_tunnel_init(struct net_device *dev)
1008 {
1009 	struct ip_tunnel *tunnel;
1010 
1011 	tunnel = netdev_priv(dev);
1012 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
1013 	tunnel->parms.iph.protocol = IPPROTO_GRE;
1014 
1015 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
1016 	dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
1017 
1018 	dev->features		|= GRE_FEATURES;
1019 	dev->hw_features	|= GRE_FEATURES;
1020 
1021 	/* TCP offload with GRE SEQ is not supported, nor can we support 2
1022 	 * levels of outer headers requiring an update.
1023 	 */
1024 	if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags))
1025 		return;
1026 	if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
1027 	    tunnel->encap.type != TUNNEL_ENCAP_NONE)
1028 		return;
1029 
1030 	dev->features |= NETIF_F_GSO_SOFTWARE;
1031 	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1032 
1033 	dev->lltx = true;
1034 }
1035 
1036 static int ipgre_tunnel_init(struct net_device *dev)
1037 {
1038 	struct ip_tunnel *tunnel = netdev_priv(dev);
1039 	struct iphdr *iph = &tunnel->parms.iph;
1040 
1041 	__gre_tunnel_init(dev);
1042 
1043 	__dev_addr_set(dev, &iph->saddr, 4);
1044 	memcpy(dev->broadcast, &iph->daddr, 4);
1045 
1046 	dev->flags		= IFF_NOARP;
1047 	netif_keep_dst(dev);
1048 	dev->addr_len		= 4;
1049 
1050 	if (iph->daddr && !tunnel->collect_md) {
1051 #ifdef CONFIG_NET_IPGRE_BROADCAST
1052 		if (ipv4_is_multicast(iph->daddr)) {
1053 			if (!iph->saddr)
1054 				return -EINVAL;
1055 			dev->flags = IFF_BROADCAST;
1056 			dev->header_ops = &ipgre_header_ops;
1057 			dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1058 			dev->needed_headroom = 0;
1059 		}
1060 #endif
1061 	} else if (!tunnel->collect_md) {
1062 		dev->header_ops = &ipgre_header_ops;
1063 		dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1064 		dev->needed_headroom = 0;
1065 	}
1066 
1067 	return ip_tunnel_init(dev);
1068 }
1069 
1070 static const struct gre_protocol ipgre_protocol = {
1071 	.handler     = gre_rcv,
1072 	.err_handler = gre_err,
1073 };
1074 
1075 static int __net_init ipgre_init_net(struct net *net)
1076 {
1077 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1078 }
1079 
1080 static void __net_exit ipgre_exit_rtnl(struct net *net,
1081 				       struct list_head *dev_to_kill)
1082 {
1083 	ip_tunnel_delete_net(net, ipgre_net_id, &ipgre_link_ops, dev_to_kill);
1084 }
1085 
1086 static struct pernet_operations ipgre_net_ops = {
1087 	.init = ipgre_init_net,
1088 	.exit_rtnl = ipgre_exit_rtnl,
1089 	.id   = &ipgre_net_id,
1090 	.size = sizeof(struct ip_tunnel_net),
1091 };
1092 
1093 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1094 				 struct netlink_ext_ack *extack)
1095 {
1096 	__be16 flags;
1097 
1098 	if (!data)
1099 		return 0;
1100 
1101 	flags = 0;
1102 	if (data[IFLA_GRE_IFLAGS])
1103 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1104 	if (data[IFLA_GRE_OFLAGS])
1105 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1106 	if (flags & (GRE_VERSION|GRE_ROUTING))
1107 		return -EINVAL;
1108 
1109 	if (data[IFLA_GRE_COLLECT_METADATA] &&
1110 	    data[IFLA_GRE_ENCAP_TYPE] &&
1111 	    nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1112 		return -EINVAL;
1113 
1114 	return 0;
1115 }
1116 
1117 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1118 			      struct netlink_ext_ack *extack)
1119 {
1120 	__be32 daddr;
1121 
1122 	if (tb[IFLA_ADDRESS]) {
1123 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1124 			return -EINVAL;
1125 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1126 			return -EADDRNOTAVAIL;
1127 	}
1128 
1129 	if (!data)
1130 		goto out;
1131 
1132 	if (data[IFLA_GRE_REMOTE]) {
1133 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1134 		if (!daddr)
1135 			return -EINVAL;
1136 	}
1137 
1138 out:
1139 	return ipgre_tunnel_validate(tb, data, extack);
1140 }
1141 
1142 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1143 			   struct netlink_ext_ack *extack)
1144 {
1145 	__be16 flags = 0;
1146 	int ret;
1147 
1148 	if (!data)
1149 		return 0;
1150 
1151 	ret = ipgre_tap_validate(tb, data, extack);
1152 	if (ret)
1153 		return ret;
1154 
1155 	if (data[IFLA_GRE_ERSPAN_VER] &&
1156 	    nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
1157 		return 0;
1158 
1159 	/* ERSPAN type II/III should only have GRE sequence and key flag */
1160 	if (data[IFLA_GRE_OFLAGS])
1161 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1162 	if (data[IFLA_GRE_IFLAGS])
1163 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1164 	if (!data[IFLA_GRE_COLLECT_METADATA] &&
1165 	    flags != (GRE_SEQ | GRE_KEY))
1166 		return -EINVAL;
1167 
1168 	/* ERSPAN Session ID only has 10-bit. Since we reuse
1169 	 * 32-bit key field as ID, check it's range.
1170 	 */
1171 	if (data[IFLA_GRE_IKEY] &&
1172 	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1173 		return -EINVAL;
1174 
1175 	if (data[IFLA_GRE_OKEY] &&
1176 	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1177 		return -EINVAL;
1178 
1179 	return 0;
1180 }
1181 
1182 static int ipgre_netlink_parms(struct net_device *dev,
1183 				struct nlattr *data[],
1184 				struct nlattr *tb[],
1185 				struct ip_tunnel_parm_kern *parms,
1186 				__u32 *fwmark)
1187 {
1188 	struct ip_tunnel *t = netdev_priv(dev);
1189 
1190 	memset(parms, 0, sizeof(*parms));
1191 
1192 	parms->iph.protocol = IPPROTO_GRE;
1193 
1194 	if (!data)
1195 		return 0;
1196 
1197 	if (data[IFLA_GRE_LINK])
1198 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1199 
1200 	if (data[IFLA_GRE_IFLAGS])
1201 		gre_flags_to_tnl_flags(parms->i_flags,
1202 				       nla_get_be16(data[IFLA_GRE_IFLAGS]));
1203 
1204 	if (data[IFLA_GRE_OFLAGS])
1205 		gre_flags_to_tnl_flags(parms->o_flags,
1206 				       nla_get_be16(data[IFLA_GRE_OFLAGS]));
1207 
1208 	if (data[IFLA_GRE_IKEY])
1209 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1210 
1211 	if (data[IFLA_GRE_OKEY])
1212 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1213 
1214 	if (data[IFLA_GRE_LOCAL])
1215 		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1216 
1217 	if (data[IFLA_GRE_REMOTE])
1218 		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1219 
1220 	if (data[IFLA_GRE_TTL])
1221 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1222 
1223 	if (data[IFLA_GRE_TOS])
1224 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1225 
1226 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1227 		if (t->ignore_df)
1228 			return -EINVAL;
1229 		parms->iph.frag_off = htons(IP_DF);
1230 	}
1231 
1232 	if (data[IFLA_GRE_COLLECT_METADATA]) {
1233 		t->collect_md = true;
1234 		if (dev->type == ARPHRD_IPGRE)
1235 			dev->type = ARPHRD_NONE;
1236 	}
1237 
1238 	if (data[IFLA_GRE_IGNORE_DF]) {
1239 		if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1240 		  && (parms->iph.frag_off & htons(IP_DF)))
1241 			return -EINVAL;
1242 		t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1243 	}
1244 
1245 	if (data[IFLA_GRE_FWMARK])
1246 		*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1247 
1248 	return 0;
1249 }
1250 
1251 static int erspan_netlink_parms(struct net_device *dev,
1252 				struct nlattr *data[],
1253 				struct nlattr *tb[],
1254 				struct ip_tunnel_parm_kern *parms,
1255 				__u32 *fwmark)
1256 {
1257 	struct ip_tunnel *t = netdev_priv(dev);
1258 	int err;
1259 
1260 	err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
1261 	if (err)
1262 		return err;
1263 	if (!data)
1264 		return 0;
1265 
1266 	if (data[IFLA_GRE_ERSPAN_VER]) {
1267 		t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1268 
1269 		if (t->erspan_ver > 2)
1270 			return -EINVAL;
1271 	}
1272 
1273 	if (t->erspan_ver == 1) {
1274 		if (data[IFLA_GRE_ERSPAN_INDEX]) {
1275 			t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1276 			if (t->index & ~INDEX_MASK)
1277 				return -EINVAL;
1278 		}
1279 	} else if (t->erspan_ver == 2) {
1280 		if (data[IFLA_GRE_ERSPAN_DIR]) {
1281 			t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1282 			if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1283 				return -EINVAL;
1284 		}
1285 		if (data[IFLA_GRE_ERSPAN_HWID]) {
1286 			t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1287 			if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1288 				return -EINVAL;
1289 		}
1290 	}
1291 
1292 	return 0;
1293 }
1294 
1295 /* This function returns true when ENCAP attributes are present in the nl msg */
1296 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1297 				      struct ip_tunnel_encap *ipencap)
1298 {
1299 	bool ret = false;
1300 
1301 	memset(ipencap, 0, sizeof(*ipencap));
1302 
1303 	if (!data)
1304 		return ret;
1305 
1306 	if (data[IFLA_GRE_ENCAP_TYPE]) {
1307 		ret = true;
1308 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1309 	}
1310 
1311 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1312 		ret = true;
1313 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1314 	}
1315 
1316 	if (data[IFLA_GRE_ENCAP_SPORT]) {
1317 		ret = true;
1318 		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1319 	}
1320 
1321 	if (data[IFLA_GRE_ENCAP_DPORT]) {
1322 		ret = true;
1323 		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1324 	}
1325 
1326 	return ret;
1327 }
1328 
1329 static int gre_tap_init(struct net_device *dev)
1330 {
1331 	__gre_tunnel_init(dev);
1332 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1333 	netif_keep_dst(dev);
1334 
1335 	return ip_tunnel_init(dev);
1336 }
1337 
1338 static const struct net_device_ops gre_tap_netdev_ops = {
1339 	.ndo_init		= gre_tap_init,
1340 	.ndo_uninit		= ip_tunnel_uninit,
1341 	.ndo_start_xmit		= gre_tap_xmit,
1342 	.ndo_set_mac_address 	= eth_mac_addr,
1343 	.ndo_validate_addr	= eth_validate_addr,
1344 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1345 	.ndo_get_stats64	= dev_get_tstats64,
1346 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1347 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1348 };
1349 
1350 static int erspan_tunnel_init(struct net_device *dev)
1351 {
1352 	struct ip_tunnel *tunnel = netdev_priv(dev);
1353 
1354 	if (tunnel->erspan_ver == 0)
1355 		tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */
1356 	else
1357 		tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */
1358 
1359 	tunnel->parms.iph.protocol = IPPROTO_GRE;
1360 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1361 		       erspan_hdr_len(tunnel->erspan_ver);
1362 
1363 	dev->features		|= GRE_FEATURES;
1364 	dev->hw_features	|= GRE_FEATURES;
1365 	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
1366 	netif_keep_dst(dev);
1367 
1368 	return ip_tunnel_init(dev);
1369 }
1370 
1371 static const struct net_device_ops erspan_netdev_ops = {
1372 	.ndo_init		= erspan_tunnel_init,
1373 	.ndo_uninit		= ip_tunnel_uninit,
1374 	.ndo_start_xmit		= erspan_xmit,
1375 	.ndo_set_mac_address	= eth_mac_addr,
1376 	.ndo_validate_addr	= eth_validate_addr,
1377 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1378 	.ndo_get_stats64	= dev_get_tstats64,
1379 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1380 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1381 };
1382 
1383 static void ipgre_tap_setup(struct net_device *dev)
1384 {
1385 	ether_setup(dev);
1386 	dev->max_mtu = 0;
1387 	dev->netdev_ops	= &gre_tap_netdev_ops;
1388 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1389 	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
1390 	ip_tunnel_setup(dev, gre_tap_net_id);
1391 }
1392 
1393 static int
1394 ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
1395 {
1396 	struct ip_tunnel_encap ipencap;
1397 
1398 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1399 		struct ip_tunnel *t = netdev_priv(dev);
1400 		int err = ip_tunnel_encap_setup(t, &ipencap);
1401 
1402 		if (err < 0)
1403 			return err;
1404 	}
1405 
1406 	return 0;
1407 }
1408 
1409 static int ipgre_newlink(struct net_device *dev,
1410 			 struct rtnl_newlink_params *params,
1411 			 struct netlink_ext_ack *extack)
1412 {
1413 	struct nlattr **data = params->data;
1414 	struct nlattr **tb = params->tb;
1415 	struct ip_tunnel_parm_kern p;
1416 	__u32 fwmark = 0;
1417 	int err;
1418 
1419 	err = ipgre_newlink_encap_setup(dev, data);
1420 	if (err)
1421 		return err;
1422 
1423 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1424 	if (err < 0)
1425 		return err;
1426 	return ip_tunnel_newlink(params->link_net ? : dev_net(dev), dev, tb, &p,
1427 				 fwmark);
1428 }
1429 
1430 static int erspan_newlink(struct net_device *dev,
1431 			  struct rtnl_newlink_params *params,
1432 			  struct netlink_ext_ack *extack)
1433 {
1434 	struct nlattr **data = params->data;
1435 	struct nlattr **tb = params->tb;
1436 	struct ip_tunnel_parm_kern p;
1437 	__u32 fwmark = 0;
1438 	int err;
1439 
1440 	err = ipgre_newlink_encap_setup(dev, data);
1441 	if (err)
1442 		return err;
1443 
1444 	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1445 	if (err)
1446 		return err;
1447 	return ip_tunnel_newlink(params->link_net ? : dev_net(dev), dev, tb, &p,
1448 				 fwmark);
1449 }
1450 
1451 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1452 			    struct nlattr *data[],
1453 			    struct netlink_ext_ack *extack)
1454 {
1455 	struct ip_tunnel *t = netdev_priv(dev);
1456 	struct ip_tunnel_parm_kern p;
1457 	__u32 fwmark = t->fwmark;
1458 	int err;
1459 
1460 	err = ipgre_newlink_encap_setup(dev, data);
1461 	if (err)
1462 		return err;
1463 
1464 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1465 	if (err < 0)
1466 		return err;
1467 
1468 	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1469 	if (err < 0)
1470 		return err;
1471 
1472 	ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
1473 	ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
1474 
1475 	ipgre_link_update(dev, !tb[IFLA_MTU]);
1476 
1477 	return 0;
1478 }
1479 
1480 static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
1481 			     struct nlattr *data[],
1482 			     struct netlink_ext_ack *extack)
1483 {
1484 	struct ip_tunnel *t = netdev_priv(dev);
1485 	struct ip_tunnel_parm_kern p;
1486 	__u32 fwmark = t->fwmark;
1487 	int err;
1488 
1489 	err = ipgre_newlink_encap_setup(dev, data);
1490 	if (err)
1491 		return err;
1492 
1493 	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1494 	if (err < 0)
1495 		return err;
1496 
1497 	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1498 	if (err < 0)
1499 		return err;
1500 
1501 	ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
1502 	ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
1503 
1504 	return 0;
1505 }
1506 
1507 static size_t ipgre_get_size(const struct net_device *dev)
1508 {
1509 	return
1510 		/* IFLA_GRE_LINK */
1511 		nla_total_size(4) +
1512 		/* IFLA_GRE_IFLAGS */
1513 		nla_total_size(2) +
1514 		/* IFLA_GRE_OFLAGS */
1515 		nla_total_size(2) +
1516 		/* IFLA_GRE_IKEY */
1517 		nla_total_size(4) +
1518 		/* IFLA_GRE_OKEY */
1519 		nla_total_size(4) +
1520 		/* IFLA_GRE_LOCAL */
1521 		nla_total_size(4) +
1522 		/* IFLA_GRE_REMOTE */
1523 		nla_total_size(4) +
1524 		/* IFLA_GRE_TTL */
1525 		nla_total_size(1) +
1526 		/* IFLA_GRE_TOS */
1527 		nla_total_size(1) +
1528 		/* IFLA_GRE_PMTUDISC */
1529 		nla_total_size(1) +
1530 		/* IFLA_GRE_ENCAP_TYPE */
1531 		nla_total_size(2) +
1532 		/* IFLA_GRE_ENCAP_FLAGS */
1533 		nla_total_size(2) +
1534 		/* IFLA_GRE_ENCAP_SPORT */
1535 		nla_total_size(2) +
1536 		/* IFLA_GRE_ENCAP_DPORT */
1537 		nla_total_size(2) +
1538 		/* IFLA_GRE_COLLECT_METADATA */
1539 		nla_total_size(0) +
1540 		/* IFLA_GRE_IGNORE_DF */
1541 		nla_total_size(1) +
1542 		/* IFLA_GRE_FWMARK */
1543 		nla_total_size(4) +
1544 		/* IFLA_GRE_ERSPAN_INDEX */
1545 		nla_total_size(4) +
1546 		/* IFLA_GRE_ERSPAN_VER */
1547 		nla_total_size(1) +
1548 		/* IFLA_GRE_ERSPAN_DIR */
1549 		nla_total_size(1) +
1550 		/* IFLA_GRE_ERSPAN_HWID */
1551 		nla_total_size(2) +
1552 		0;
1553 }
1554 
1555 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1556 {
1557 	struct ip_tunnel *t = netdev_priv(dev);
1558 	struct ip_tunnel_parm_kern *p = &t->parms;
1559 	IP_TUNNEL_DECLARE_FLAGS(o_flags);
1560 
1561 	ip_tunnel_flags_copy(o_flags, p->o_flags);
1562 
1563 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1564 	    nla_put_be16(skb, IFLA_GRE_IFLAGS,
1565 			 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1566 	    nla_put_be16(skb, IFLA_GRE_OFLAGS,
1567 			 gre_tnl_flags_to_gre_flags(o_flags)) ||
1568 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1569 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1570 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1571 	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1572 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1573 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1574 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1575 		       !!(p->iph.frag_off & htons(IP_DF))) ||
1576 	    nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1577 		goto nla_put_failure;
1578 
1579 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1580 			t->encap.type) ||
1581 	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1582 			 t->encap.sport) ||
1583 	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1584 			 t->encap.dport) ||
1585 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1586 			t->encap.flags))
1587 		goto nla_put_failure;
1588 
1589 	if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1590 		goto nla_put_failure;
1591 
1592 	if (t->collect_md) {
1593 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1594 			goto nla_put_failure;
1595 	}
1596 
1597 	return 0;
1598 
1599 nla_put_failure:
1600 	return -EMSGSIZE;
1601 }
1602 
1603 static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
1604 {
1605 	struct ip_tunnel *t = netdev_priv(dev);
1606 
1607 	if (t->erspan_ver <= 2) {
1608 		if (t->erspan_ver != 0 && !t->collect_md)
1609 			__set_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags);
1610 
1611 		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1612 			goto nla_put_failure;
1613 
1614 		if (t->erspan_ver == 1) {
1615 			if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1616 				goto nla_put_failure;
1617 		} else if (t->erspan_ver == 2) {
1618 			if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1619 				goto nla_put_failure;
1620 			if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1621 				goto nla_put_failure;
1622 		}
1623 	}
1624 
1625 	return ipgre_fill_info(skb, dev);
1626 
1627 nla_put_failure:
1628 	return -EMSGSIZE;
1629 }
1630 
1631 static void erspan_setup(struct net_device *dev)
1632 {
1633 	struct ip_tunnel *t = netdev_priv(dev);
1634 
1635 	ether_setup(dev);
1636 	dev->max_mtu = 0;
1637 	dev->netdev_ops = &erspan_netdev_ops;
1638 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1639 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1640 	ip_tunnel_setup(dev, erspan_net_id);
1641 	t->erspan_ver = 1;
1642 }
1643 
1644 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1645 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1646 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1647 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1648 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1649 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1650 	[IFLA_GRE_LOCAL]	= { .len = sizeof_field(struct iphdr, saddr) },
1651 	[IFLA_GRE_REMOTE]	= { .len = sizeof_field(struct iphdr, daddr) },
1652 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1653 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1654 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1655 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1656 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1657 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1658 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1659 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1660 	[IFLA_GRE_IGNORE_DF]	= { .type = NLA_U8 },
1661 	[IFLA_GRE_FWMARK]	= { .type = NLA_U32 },
1662 	[IFLA_GRE_ERSPAN_INDEX]	= { .type = NLA_U32 },
1663 	[IFLA_GRE_ERSPAN_VER]	= { .type = NLA_U8 },
1664 	[IFLA_GRE_ERSPAN_DIR]	= { .type = NLA_U8 },
1665 	[IFLA_GRE_ERSPAN_HWID]	= { .type = NLA_U16 },
1666 };
1667 
1668 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1669 	.kind		= "gre",
1670 	.maxtype	= IFLA_GRE_MAX,
1671 	.policy		= ipgre_policy,
1672 	.priv_size	= sizeof(struct ip_tunnel),
1673 	.setup		= ipgre_tunnel_setup,
1674 	.validate	= ipgre_tunnel_validate,
1675 	.newlink	= ipgre_newlink,
1676 	.changelink	= ipgre_changelink,
1677 	.dellink	= ip_tunnel_dellink,
1678 	.get_size	= ipgre_get_size,
1679 	.fill_info	= ipgre_fill_info,
1680 	.get_link_net	= ip_tunnel_get_link_net,
1681 };
1682 
1683 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1684 	.kind		= "gretap",
1685 	.maxtype	= IFLA_GRE_MAX,
1686 	.policy		= ipgre_policy,
1687 	.priv_size	= sizeof(struct ip_tunnel),
1688 	.setup		= ipgre_tap_setup,
1689 	.validate	= ipgre_tap_validate,
1690 	.newlink	= ipgre_newlink,
1691 	.changelink	= ipgre_changelink,
1692 	.dellink	= ip_tunnel_dellink,
1693 	.get_size	= ipgre_get_size,
1694 	.fill_info	= ipgre_fill_info,
1695 	.get_link_net	= ip_tunnel_get_link_net,
1696 };
1697 
1698 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1699 	.kind		= "erspan",
1700 	.maxtype	= IFLA_GRE_MAX,
1701 	.policy		= ipgre_policy,
1702 	.priv_size	= sizeof(struct ip_tunnel),
1703 	.setup		= erspan_setup,
1704 	.validate	= erspan_validate,
1705 	.newlink	= erspan_newlink,
1706 	.changelink	= erspan_changelink,
1707 	.dellink	= ip_tunnel_dellink,
1708 	.get_size	= ipgre_get_size,
1709 	.fill_info	= erspan_fill_info,
1710 	.get_link_net	= ip_tunnel_get_link_net,
1711 };
1712 
1713 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1714 					u8 name_assign_type)
1715 {
1716 	struct rtnl_newlink_params params = { .src_net = net };
1717 	struct nlattr *tb[IFLA_MAX + 1];
1718 	struct net_device *dev;
1719 	LIST_HEAD(list_kill);
1720 	struct ip_tunnel *t;
1721 	int err;
1722 
1723 	memset(&tb, 0, sizeof(tb));
1724 	params.tb = tb;
1725 
1726 	dev = rtnl_create_link(net, name, name_assign_type,
1727 			       &ipgre_tap_ops, tb, NULL);
1728 	if (IS_ERR(dev))
1729 		return dev;
1730 
1731 	/* Configure flow based GRE device. */
1732 	t = netdev_priv(dev);
1733 	t->collect_md = true;
1734 
1735 	err = ipgre_newlink(dev, &params, NULL);
1736 	if (err < 0) {
1737 		free_netdev(dev);
1738 		return ERR_PTR(err);
1739 	}
1740 
1741 	/* openvswitch users expect packet sizes to be unrestricted,
1742 	 * so set the largest MTU we can.
1743 	 */
1744 	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1745 	if (err)
1746 		goto out;
1747 
1748 	err = rtnl_configure_link(dev, NULL, 0, NULL);
1749 	if (err < 0)
1750 		goto out;
1751 
1752 	return dev;
1753 out:
1754 	ip_tunnel_dellink(dev, &list_kill);
1755 	unregister_netdevice_many(&list_kill);
1756 	return ERR_PTR(err);
1757 }
1758 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1759 
1760 static int __net_init ipgre_tap_init_net(struct net *net)
1761 {
1762 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1763 }
1764 
1765 static void __net_exit ipgre_tap_exit_rtnl(struct net *net,
1766 					   struct list_head *dev_to_kill)
1767 {
1768 	ip_tunnel_delete_net(net, gre_tap_net_id, &ipgre_tap_ops, dev_to_kill);
1769 }
1770 
1771 static struct pernet_operations ipgre_tap_net_ops = {
1772 	.init = ipgre_tap_init_net,
1773 	.exit_rtnl = ipgre_tap_exit_rtnl,
1774 	.id   = &gre_tap_net_id,
1775 	.size = sizeof(struct ip_tunnel_net),
1776 };
1777 
1778 static int __net_init erspan_init_net(struct net *net)
1779 {
1780 	return ip_tunnel_init_net(net, erspan_net_id,
1781 				  &erspan_link_ops, "erspan0");
1782 }
1783 
1784 static void __net_exit erspan_exit_rtnl(struct net *net,
1785 					struct list_head *dev_to_kill)
1786 {
1787 	ip_tunnel_delete_net(net, erspan_net_id, &erspan_link_ops, dev_to_kill);
1788 }
1789 
1790 static struct pernet_operations erspan_net_ops = {
1791 	.init = erspan_init_net,
1792 	.exit_rtnl = erspan_exit_rtnl,
1793 	.id   = &erspan_net_id,
1794 	.size = sizeof(struct ip_tunnel_net),
1795 };
1796 
1797 static int __init ipgre_init(void)
1798 {
1799 	int err;
1800 
1801 	pr_info("GRE over IPv4 tunneling driver\n");
1802 
1803 	err = register_pernet_device(&ipgre_net_ops);
1804 	if (err < 0)
1805 		return err;
1806 
1807 	err = register_pernet_device(&ipgre_tap_net_ops);
1808 	if (err < 0)
1809 		goto pnet_tap_failed;
1810 
1811 	err = register_pernet_device(&erspan_net_ops);
1812 	if (err < 0)
1813 		goto pnet_erspan_failed;
1814 
1815 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1816 	if (err < 0) {
1817 		pr_info("%s: can't add protocol\n", __func__);
1818 		goto add_proto_failed;
1819 	}
1820 
1821 	err = rtnl_link_register(&ipgre_link_ops);
1822 	if (err < 0)
1823 		goto rtnl_link_failed;
1824 
1825 	err = rtnl_link_register(&ipgre_tap_ops);
1826 	if (err < 0)
1827 		goto tap_ops_failed;
1828 
1829 	err = rtnl_link_register(&erspan_link_ops);
1830 	if (err < 0)
1831 		goto erspan_link_failed;
1832 
1833 	return 0;
1834 
1835 erspan_link_failed:
1836 	rtnl_link_unregister(&ipgre_tap_ops);
1837 tap_ops_failed:
1838 	rtnl_link_unregister(&ipgre_link_ops);
1839 rtnl_link_failed:
1840 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1841 add_proto_failed:
1842 	unregister_pernet_device(&erspan_net_ops);
1843 pnet_erspan_failed:
1844 	unregister_pernet_device(&ipgre_tap_net_ops);
1845 pnet_tap_failed:
1846 	unregister_pernet_device(&ipgre_net_ops);
1847 	return err;
1848 }
1849 
1850 static void __exit ipgre_fini(void)
1851 {
1852 	rtnl_link_unregister(&ipgre_tap_ops);
1853 	rtnl_link_unregister(&ipgre_link_ops);
1854 	rtnl_link_unregister(&erspan_link_ops);
1855 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1856 	unregister_pernet_device(&ipgre_tap_net_ops);
1857 	unregister_pernet_device(&ipgre_net_ops);
1858 	unregister_pernet_device(&erspan_net_ops);
1859 }
1860 
1861 module_init(ipgre_init);
1862 module_exit(ipgre_fini);
1863 MODULE_DESCRIPTION("IPv4 GRE tunnels over IP library");
1864 MODULE_LICENSE("GPL");
1865 MODULE_ALIAS_RTNL_LINK("gre");
1866 MODULE_ALIAS_RTNL_LINK("gretap");
1867 MODULE_ALIAS_RTNL_LINK("erspan");
1868 MODULE_ALIAS_NETDEV("gre0");
1869 MODULE_ALIAS_NETDEV("gretap0");
1870 MODULE_ALIAS_NETDEV("erspan0");
1871