xref: /linux/net/ipv4/ip_gre.c (revision daa121128a2d2ac6006159e2c47676e4fcd21eab)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Linux NET3:	GRE over IP protocol decoder.
4  *
5  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6  */
7 
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9 
10 #include <linux/capability.h>
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/uaccess.h>
16 #include <linux/skbuff.h>
17 #include <linux/netdevice.h>
18 #include <linux/in.h>
19 #include <linux/tcp.h>
20 #include <linux/udp.h>
21 #include <linux/if_arp.h>
22 #include <linux/if_vlan.h>
23 #include <linux/init.h>
24 #include <linux/in6.h>
25 #include <linux/inetdevice.h>
26 #include <linux/igmp.h>
27 #include <linux/netfilter_ipv4.h>
28 #include <linux/etherdevice.h>
29 #include <linux/if_ether.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/gre.h>
45 #include <net/dst_metadata.h>
46 #include <net/erspan.h>
47 
48 /*
49    Problems & solutions
50    --------------------
51 
52    1. The most important issue is detecting local dead loops.
53    They would cause complete host lockup in transmit, which
54    would be "resolved" by stack overflow or, if queueing is enabled,
55    with infinite looping in net_bh.
56 
57    We cannot track such dead loops during route installation,
58    it is infeasible task. The most general solutions would be
59    to keep skb->encapsulation counter (sort of local ttl),
60    and silently drop packet when it expires. It is a good
61    solution, but it supposes maintaining new variable in ALL
62    skb, even if no tunneling is used.
63 
64    Current solution: xmit_recursion breaks dead loops. This is a percpu
65    counter, since when we enter the first ndo_xmit(), cpu migration is
66    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
67 
68    2. Networking dead loops would not kill routers, but would really
69    kill network. IP hop limit plays role of "t->recursion" in this case,
70    if we copy it from packet being encapsulated to upper header.
71    It is very good solution, but it introduces two problems:
72 
73    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
74      do not work over tunnels.
75    - traceroute does not work. I planned to relay ICMP from tunnel,
76      so that this problem would be solved and traceroute output
77      would even more informative. This idea appeared to be wrong:
78      only Linux complies to rfc1812 now (yes, guys, Linux is the only
79      true router now :-)), all routers (at least, in neighbourhood of mine)
80      return only 8 bytes of payload. It is the end.
81 
82    Hence, if we want that OSPF worked or traceroute said something reasonable,
83    we should search for another solution.
84 
85    One of them is to parse packet trying to detect inner encapsulation
86    made by our node. It is difficult or even impossible, especially,
87    taking into account fragmentation. TO be short, ttl is not solution at all.
88 
89    Current solution: The solution was UNEXPECTEDLY SIMPLE.
90    We force DF flag on tunnels with preconfigured hop limit,
91    that is ALL. :-) Well, it does not remove the problem completely,
92    but exponential growth of network traffic is changed to linear
93    (branches, that exceed pmtu are pruned) and tunnel mtu
94    rapidly degrades to value <68, where looping stops.
95    Yes, it is not good if there exists a router in the loop,
96    which does not force DF, even when encapsulating packets have DF set.
97    But it is not our problem! Nobody could accuse us, we made
98    all that we could make. Even if it is your gated who injected
99    fatal route to network, even if it were you who configured
100    fatal static route: you are innocent. :-)
101 
102    Alexey Kuznetsov.
103  */
104 
105 static bool log_ecn_error = true;
106 module_param(log_ecn_error, bool, 0644);
107 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
108 
109 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
110 static const struct header_ops ipgre_header_ops;
111 
112 static int ipgre_tunnel_init(struct net_device *dev);
113 static void erspan_build_header(struct sk_buff *skb,
114 				u32 id, u32 index,
115 				bool truncate, bool is_ipv4);
116 
117 static unsigned int ipgre_net_id __read_mostly;
118 static unsigned int gre_tap_net_id __read_mostly;
119 static unsigned int erspan_net_id __read_mostly;
120 
121 static int ipgre_err(struct sk_buff *skb, u32 info,
122 		     const struct tnl_ptk_info *tpi)
123 {
124 
125 	/* All the routers (except for Linux) return only
126 	   8 bytes of packet payload. It means, that precise relaying of
127 	   ICMP in the real Internet is absolutely infeasible.
128 
129 	   Moreover, Cisco "wise men" put GRE key to the third word
130 	   in GRE header. It makes impossible maintaining even soft
131 	   state for keyed GRE tunnels with enabled checksum. Tell
132 	   them "thank you".
133 
134 	   Well, I wonder, rfc1812 was written by Cisco employee,
135 	   what the hell these idiots break standards established
136 	   by themselves???
137 	   */
138 	struct net *net = dev_net(skb->dev);
139 	struct ip_tunnel_net *itn;
140 	const struct iphdr *iph;
141 	const int type = icmp_hdr(skb)->type;
142 	const int code = icmp_hdr(skb)->code;
143 	unsigned int data_len = 0;
144 	struct ip_tunnel *t;
145 
146 	if (tpi->proto == htons(ETH_P_TEB))
147 		itn = net_generic(net, gre_tap_net_id);
148 	else if (tpi->proto == htons(ETH_P_ERSPAN) ||
149 		 tpi->proto == htons(ETH_P_ERSPAN2))
150 		itn = net_generic(net, erspan_net_id);
151 	else
152 		itn = net_generic(net, ipgre_net_id);
153 
154 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
155 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
156 			     iph->daddr, iph->saddr, tpi->key);
157 
158 	if (!t)
159 		return -ENOENT;
160 
161 	switch (type) {
162 	default:
163 	case ICMP_PARAMETERPROB:
164 		return 0;
165 
166 	case ICMP_DEST_UNREACH:
167 		switch (code) {
168 		case ICMP_SR_FAILED:
169 		case ICMP_PORT_UNREACH:
170 			/* Impossible event. */
171 			return 0;
172 		default:
173 			/* All others are translated to HOST_UNREACH.
174 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
175 			   I believe they are just ether pollution. --ANK
176 			 */
177 			break;
178 		}
179 		break;
180 
181 	case ICMP_TIME_EXCEEDED:
182 		if (code != ICMP_EXC_TTL)
183 			return 0;
184 		data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
185 		break;
186 
187 	case ICMP_REDIRECT:
188 		break;
189 	}
190 
191 #if IS_ENABLED(CONFIG_IPV6)
192 	if (tpi->proto == htons(ETH_P_IPV6) &&
193 	    !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
194 					type, data_len))
195 		return 0;
196 #endif
197 
198 	if (t->parms.iph.daddr == 0 ||
199 	    ipv4_is_multicast(t->parms.iph.daddr))
200 		return 0;
201 
202 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
203 		return 0;
204 
205 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
206 		t->err_count++;
207 	else
208 		t->err_count = 1;
209 	t->err_time = jiffies;
210 
211 	return 0;
212 }
213 
214 static void gre_err(struct sk_buff *skb, u32 info)
215 {
216 	/* All the routers (except for Linux) return only
217 	 * 8 bytes of packet payload. It means, that precise relaying of
218 	 * ICMP in the real Internet is absolutely infeasible.
219 	 *
220 	 * Moreover, Cisco "wise men" put GRE key to the third word
221 	 * in GRE header. It makes impossible maintaining even soft
222 	 * state for keyed
223 	 * GRE tunnels with enabled checksum. Tell them "thank you".
224 	 *
225 	 * Well, I wonder, rfc1812 was written by Cisco employee,
226 	 * what the hell these idiots break standards established
227 	 * by themselves???
228 	 */
229 
230 	const struct iphdr *iph = (struct iphdr *)skb->data;
231 	const int type = icmp_hdr(skb)->type;
232 	const int code = icmp_hdr(skb)->code;
233 	struct tnl_ptk_info tpi;
234 
235 	if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
236 			     iph->ihl * 4) < 0)
237 		return;
238 
239 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
240 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
241 				 skb->dev->ifindex, IPPROTO_GRE);
242 		return;
243 	}
244 	if (type == ICMP_REDIRECT) {
245 		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
246 			      IPPROTO_GRE);
247 		return;
248 	}
249 
250 	ipgre_err(skb, info, &tpi);
251 }
252 
253 static bool is_erspan_type1(int gre_hdr_len)
254 {
255 	/* Both ERSPAN type I (version 0) and type II (version 1) use
256 	 * protocol 0x88BE, but the type I has only 4-byte GRE header,
257 	 * while type II has 8-byte.
258 	 */
259 	return gre_hdr_len == 4;
260 }
261 
262 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
263 		      int gre_hdr_len)
264 {
265 	struct net *net = dev_net(skb->dev);
266 	struct metadata_dst *tun_dst = NULL;
267 	struct erspan_base_hdr *ershdr;
268 	IP_TUNNEL_DECLARE_FLAGS(flags);
269 	struct ip_tunnel_net *itn;
270 	struct ip_tunnel *tunnel;
271 	const struct iphdr *iph;
272 	struct erspan_md2 *md2;
273 	int ver;
274 	int len;
275 
276 	ip_tunnel_flags_copy(flags, tpi->flags);
277 
278 	itn = net_generic(net, erspan_net_id);
279 	iph = ip_hdr(skb);
280 	if (is_erspan_type1(gre_hdr_len)) {
281 		ver = 0;
282 		__set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
283 		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
284 					  iph->saddr, iph->daddr, 0);
285 	} else {
286 		if (unlikely(!pskb_may_pull(skb,
287 					    gre_hdr_len + sizeof(*ershdr))))
288 			return PACKET_REJECT;
289 
290 		ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
291 		ver = ershdr->ver;
292 		iph = ip_hdr(skb);
293 		__set_bit(IP_TUNNEL_KEY_BIT, flags);
294 		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
295 					  iph->saddr, iph->daddr, tpi->key);
296 	}
297 
298 	if (tunnel) {
299 		if (is_erspan_type1(gre_hdr_len))
300 			len = gre_hdr_len;
301 		else
302 			len = gre_hdr_len + erspan_hdr_len(ver);
303 
304 		if (unlikely(!pskb_may_pull(skb, len)))
305 			return PACKET_REJECT;
306 
307 		if (__iptunnel_pull_header(skb,
308 					   len,
309 					   htons(ETH_P_TEB),
310 					   false, false) < 0)
311 			goto drop;
312 
313 		if (tunnel->collect_md) {
314 			struct erspan_metadata *pkt_md, *md;
315 			struct ip_tunnel_info *info;
316 			unsigned char *gh;
317 			__be64 tun_id;
318 
319 			__set_bit(IP_TUNNEL_KEY_BIT, tpi->flags);
320 			ip_tunnel_flags_copy(flags, tpi->flags);
321 			tun_id = key32_to_tunnel_id(tpi->key);
322 
323 			tun_dst = ip_tun_rx_dst(skb, flags,
324 						tun_id, sizeof(*md));
325 			if (!tun_dst)
326 				return PACKET_REJECT;
327 
328 			/* skb can be uncloned in __iptunnel_pull_header, so
329 			 * old pkt_md is no longer valid and we need to reset
330 			 * it
331 			 */
332 			gh = skb_network_header(skb) +
333 			     skb_network_header_len(skb);
334 			pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
335 							    sizeof(*ershdr));
336 			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
337 			md->version = ver;
338 			md2 = &md->u.md2;
339 			memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
340 						       ERSPAN_V2_MDSIZE);
341 
342 			info = &tun_dst->u.tun_info;
343 			__set_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
344 				  info->key.tun_flags);
345 			info->options_len = sizeof(*md);
346 		}
347 
348 		skb_reset_mac_header(skb);
349 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
350 		return PACKET_RCVD;
351 	}
352 	return PACKET_REJECT;
353 
354 drop:
355 	kfree_skb(skb);
356 	return PACKET_RCVD;
357 }
358 
359 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
360 		       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
361 {
362 	struct metadata_dst *tun_dst = NULL;
363 	const struct iphdr *iph;
364 	struct ip_tunnel *tunnel;
365 
366 	iph = ip_hdr(skb);
367 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
368 				  iph->saddr, iph->daddr, tpi->key);
369 
370 	if (tunnel) {
371 		const struct iphdr *tnl_params;
372 
373 		if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
374 					   raw_proto, false) < 0)
375 			goto drop;
376 
377 		/* Special case for ipgre_header_parse(), which expects the
378 		 * mac_header to point to the outer IP header.
379 		 */
380 		if (tunnel->dev->header_ops == &ipgre_header_ops)
381 			skb_pop_mac_header(skb);
382 		else
383 			skb_reset_mac_header(skb);
384 
385 		tnl_params = &tunnel->parms.iph;
386 		if (tunnel->collect_md || tnl_params->daddr == 0) {
387 			IP_TUNNEL_DECLARE_FLAGS(flags) = { };
388 			__be64 tun_id;
389 
390 			__set_bit(IP_TUNNEL_CSUM_BIT, flags);
391 			__set_bit(IP_TUNNEL_KEY_BIT, flags);
392 			ip_tunnel_flags_and(flags, tpi->flags, flags);
393 
394 			tun_id = key32_to_tunnel_id(tpi->key);
395 			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
396 			if (!tun_dst)
397 				return PACKET_REJECT;
398 		}
399 
400 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
401 		return PACKET_RCVD;
402 	}
403 	return PACKET_NEXT;
404 
405 drop:
406 	kfree_skb(skb);
407 	return PACKET_RCVD;
408 }
409 
410 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
411 		     int hdr_len)
412 {
413 	struct net *net = dev_net(skb->dev);
414 	struct ip_tunnel_net *itn;
415 	int res;
416 
417 	if (tpi->proto == htons(ETH_P_TEB))
418 		itn = net_generic(net, gre_tap_net_id);
419 	else
420 		itn = net_generic(net, ipgre_net_id);
421 
422 	res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
423 	if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
424 		/* ipgre tunnels in collect metadata mode should receive
425 		 * also ETH_P_TEB traffic.
426 		 */
427 		itn = net_generic(net, ipgre_net_id);
428 		res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
429 	}
430 	return res;
431 }
432 
433 static int gre_rcv(struct sk_buff *skb)
434 {
435 	struct tnl_ptk_info tpi;
436 	bool csum_err = false;
437 	int hdr_len;
438 
439 #ifdef CONFIG_NET_IPGRE_BROADCAST
440 	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
441 		/* Looped back packet, drop it! */
442 		if (rt_is_output_route(skb_rtable(skb)))
443 			goto drop;
444 	}
445 #endif
446 
447 	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
448 	if (hdr_len < 0)
449 		goto drop;
450 
451 	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
452 		     tpi.proto == htons(ETH_P_ERSPAN2))) {
453 		if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
454 			return 0;
455 		goto out;
456 	}
457 
458 	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
459 		return 0;
460 
461 out:
462 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
463 drop:
464 	kfree_skb(skb);
465 	return 0;
466 }
467 
468 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
469 		       const struct iphdr *tnl_params,
470 		       __be16 proto)
471 {
472 	struct ip_tunnel *tunnel = netdev_priv(dev);
473 	IP_TUNNEL_DECLARE_FLAGS(flags);
474 
475 	ip_tunnel_flags_copy(flags, tunnel->parms.o_flags);
476 
477 	/* Push GRE header. */
478 	gre_build_header(skb, tunnel->tun_hlen,
479 			 flags, proto, tunnel->parms.o_key,
480 			 test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
481 			 htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
482 
483 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
484 }
485 
486 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
487 {
488 	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
489 }
490 
491 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
492 			__be16 proto)
493 {
494 	struct ip_tunnel *tunnel = netdev_priv(dev);
495 	IP_TUNNEL_DECLARE_FLAGS(flags) = { };
496 	struct ip_tunnel_info *tun_info;
497 	const struct ip_tunnel_key *key;
498 	int tunnel_hlen;
499 
500 	tun_info = skb_tunnel_info(skb);
501 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
502 		     ip_tunnel_info_af(tun_info) != AF_INET))
503 		goto err_free_skb;
504 
505 	key = &tun_info->key;
506 	tunnel_hlen = gre_calc_hlen(key->tun_flags);
507 
508 	if (skb_cow_head(skb, dev->needed_headroom))
509 		goto err_free_skb;
510 
511 	/* Push Tunnel header. */
512 	if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
513 					      tunnel->parms.o_flags)))
514 		goto err_free_skb;
515 
516 	__set_bit(IP_TUNNEL_CSUM_BIT, flags);
517 	__set_bit(IP_TUNNEL_KEY_BIT, flags);
518 	__set_bit(IP_TUNNEL_SEQ_BIT, flags);
519 	ip_tunnel_flags_and(flags, tun_info->key.tun_flags, flags);
520 
521 	gre_build_header(skb, tunnel_hlen, flags, proto,
522 			 tunnel_id_to_key32(tun_info->key.tun_id),
523 			 test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
524 			 htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
525 
526 	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
527 
528 	return;
529 
530 err_free_skb:
531 	kfree_skb(skb);
532 	DEV_STATS_INC(dev, tx_dropped);
533 }
534 
535 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
536 {
537 	struct ip_tunnel *tunnel = netdev_priv(dev);
538 	IP_TUNNEL_DECLARE_FLAGS(flags) = { };
539 	struct ip_tunnel_info *tun_info;
540 	const struct ip_tunnel_key *key;
541 	struct erspan_metadata *md;
542 	bool truncate = false;
543 	__be16 proto;
544 	int tunnel_hlen;
545 	int version;
546 	int nhoff;
547 
548 	tun_info = skb_tunnel_info(skb);
549 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
550 		     ip_tunnel_info_af(tun_info) != AF_INET))
551 		goto err_free_skb;
552 
553 	key = &tun_info->key;
554 	if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags))
555 		goto err_free_skb;
556 	if (tun_info->options_len < sizeof(*md))
557 		goto err_free_skb;
558 	md = ip_tunnel_info_opts(tun_info);
559 
560 	/* ERSPAN has fixed 8 byte GRE header */
561 	version = md->version;
562 	tunnel_hlen = 8 + erspan_hdr_len(version);
563 
564 	if (skb_cow_head(skb, dev->needed_headroom))
565 		goto err_free_skb;
566 
567 	if (gre_handle_offloads(skb, false))
568 		goto err_free_skb;
569 
570 	if (skb->len > dev->mtu + dev->hard_header_len) {
571 		if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
572 			goto err_free_skb;
573 		truncate = true;
574 	}
575 
576 	nhoff = skb_network_offset(skb);
577 	if (skb->protocol == htons(ETH_P_IP) &&
578 	    (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
579 		truncate = true;
580 
581 	if (skb->protocol == htons(ETH_P_IPV6)) {
582 		int thoff;
583 
584 		if (skb_transport_header_was_set(skb))
585 			thoff = skb_transport_offset(skb);
586 		else
587 			thoff = nhoff + sizeof(struct ipv6hdr);
588 		if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
589 			truncate = true;
590 	}
591 
592 	if (version == 1) {
593 		erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
594 				    ntohl(md->u.index), truncate, true);
595 		proto = htons(ETH_P_ERSPAN);
596 	} else if (version == 2) {
597 		erspan_build_header_v2(skb,
598 				       ntohl(tunnel_id_to_key32(key->tun_id)),
599 				       md->u.md2.dir,
600 				       get_hwid(&md->u.md2),
601 				       truncate, true);
602 		proto = htons(ETH_P_ERSPAN2);
603 	} else {
604 		goto err_free_skb;
605 	}
606 
607 	__set_bit(IP_TUNNEL_SEQ_BIT, flags);
608 	gre_build_header(skb, 8, flags, proto, 0,
609 			 htonl(atomic_fetch_inc(&tunnel->o_seqno)));
610 
611 	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
612 
613 	return;
614 
615 err_free_skb:
616 	kfree_skb(skb);
617 	DEV_STATS_INC(dev, tx_dropped);
618 }
619 
620 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
621 {
622 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
623 	const struct ip_tunnel_key *key;
624 	struct rtable *rt;
625 	struct flowi4 fl4;
626 
627 	if (ip_tunnel_info_af(info) != AF_INET)
628 		return -EINVAL;
629 
630 	key = &info->key;
631 	ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
632 			    tunnel_id_to_key32(key->tun_id),
633 			    key->tos & ~INET_ECN_MASK, dev_net(dev), 0,
634 			    skb->mark, skb_get_hash(skb), key->flow_flags);
635 	rt = ip_route_output_key(dev_net(dev), &fl4);
636 	if (IS_ERR(rt))
637 		return PTR_ERR(rt);
638 
639 	ip_rt_put(rt);
640 	info->key.u.ipv4.src = fl4.saddr;
641 	return 0;
642 }
643 
644 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
645 			      struct net_device *dev)
646 {
647 	struct ip_tunnel *tunnel = netdev_priv(dev);
648 	const struct iphdr *tnl_params;
649 
650 	if (!pskb_inet_may_pull(skb))
651 		goto free_skb;
652 
653 	if (tunnel->collect_md) {
654 		gre_fb_xmit(skb, dev, skb->protocol);
655 		return NETDEV_TX_OK;
656 	}
657 
658 	if (dev->header_ops) {
659 		int pull_len = tunnel->hlen + sizeof(struct iphdr);
660 
661 		if (skb_cow_head(skb, 0))
662 			goto free_skb;
663 
664 		tnl_params = (const struct iphdr *)skb->data;
665 
666 		if (!pskb_network_may_pull(skb, pull_len))
667 			goto free_skb;
668 
669 		/* ip_tunnel_xmit() needs skb->data pointing to gre header. */
670 		skb_pull(skb, pull_len);
671 		skb_reset_mac_header(skb);
672 
673 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
674 		    skb_checksum_start(skb) < skb->data)
675 			goto free_skb;
676 	} else {
677 		if (skb_cow_head(skb, dev->needed_headroom))
678 			goto free_skb;
679 
680 		tnl_params = &tunnel->parms.iph;
681 	}
682 
683 	if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
684 					      tunnel->parms.o_flags)))
685 		goto free_skb;
686 
687 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
688 	return NETDEV_TX_OK;
689 
690 free_skb:
691 	kfree_skb(skb);
692 	DEV_STATS_INC(dev, tx_dropped);
693 	return NETDEV_TX_OK;
694 }
695 
696 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
697 			       struct net_device *dev)
698 {
699 	struct ip_tunnel *tunnel = netdev_priv(dev);
700 	bool truncate = false;
701 	__be16 proto;
702 
703 	if (!pskb_inet_may_pull(skb))
704 		goto free_skb;
705 
706 	if (tunnel->collect_md) {
707 		erspan_fb_xmit(skb, dev);
708 		return NETDEV_TX_OK;
709 	}
710 
711 	if (gre_handle_offloads(skb, false))
712 		goto free_skb;
713 
714 	if (skb_cow_head(skb, dev->needed_headroom))
715 		goto free_skb;
716 
717 	if (skb->len > dev->mtu + dev->hard_header_len) {
718 		if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
719 			goto free_skb;
720 		truncate = true;
721 	}
722 
723 	/* Push ERSPAN header */
724 	if (tunnel->erspan_ver == 0) {
725 		proto = htons(ETH_P_ERSPAN);
726 		__clear_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags);
727 	} else if (tunnel->erspan_ver == 1) {
728 		erspan_build_header(skb, ntohl(tunnel->parms.o_key),
729 				    tunnel->index,
730 				    truncate, true);
731 		proto = htons(ETH_P_ERSPAN);
732 	} else if (tunnel->erspan_ver == 2) {
733 		erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
734 				       tunnel->dir, tunnel->hwid,
735 				       truncate, true);
736 		proto = htons(ETH_P_ERSPAN2);
737 	} else {
738 		goto free_skb;
739 	}
740 
741 	__clear_bit(IP_TUNNEL_KEY_BIT, tunnel->parms.o_flags);
742 	__gre_xmit(skb, dev, &tunnel->parms.iph, proto);
743 	return NETDEV_TX_OK;
744 
745 free_skb:
746 	kfree_skb(skb);
747 	DEV_STATS_INC(dev, tx_dropped);
748 	return NETDEV_TX_OK;
749 }
750 
751 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
752 				struct net_device *dev)
753 {
754 	struct ip_tunnel *tunnel = netdev_priv(dev);
755 
756 	if (!pskb_inet_may_pull(skb))
757 		goto free_skb;
758 
759 	if (tunnel->collect_md) {
760 		gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
761 		return NETDEV_TX_OK;
762 	}
763 
764 	if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
765 					      tunnel->parms.o_flags)))
766 		goto free_skb;
767 
768 	if (skb_cow_head(skb, dev->needed_headroom))
769 		goto free_skb;
770 
771 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
772 	return NETDEV_TX_OK;
773 
774 free_skb:
775 	kfree_skb(skb);
776 	DEV_STATS_INC(dev, tx_dropped);
777 	return NETDEV_TX_OK;
778 }
779 
780 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
781 {
782 	struct ip_tunnel *tunnel = netdev_priv(dev);
783 	int len;
784 
785 	len = tunnel->tun_hlen;
786 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
787 	len = tunnel->tun_hlen - len;
788 	tunnel->hlen = tunnel->hlen + len;
789 
790 	if (dev->header_ops)
791 		dev->hard_header_len += len;
792 	else
793 		dev->needed_headroom += len;
794 
795 	if (set_mtu)
796 		WRITE_ONCE(dev->mtu, max_t(int, dev->mtu - len, 68));
797 
798 	if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags) ||
799 	    (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
800 	     tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
801 		dev->features &= ~NETIF_F_GSO_SOFTWARE;
802 		dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
803 	} else {
804 		dev->features |= NETIF_F_GSO_SOFTWARE;
805 		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
806 	}
807 }
808 
809 static int ipgre_tunnel_ctl(struct net_device *dev,
810 			    struct ip_tunnel_parm_kern *p,
811 			    int cmd)
812 {
813 	__be16 i_flags, o_flags;
814 	int err;
815 
816 	if (!ip_tunnel_flags_is_be16_compat(p->i_flags) ||
817 	    !ip_tunnel_flags_is_be16_compat(p->o_flags))
818 		return -EOVERFLOW;
819 
820 	i_flags = ip_tunnel_flags_to_be16(p->i_flags);
821 	o_flags = ip_tunnel_flags_to_be16(p->o_flags);
822 
823 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
824 		if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
825 		    p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
826 		    ((i_flags | o_flags) & (GRE_VERSION | GRE_ROUTING)))
827 			return -EINVAL;
828 	}
829 
830 	gre_flags_to_tnl_flags(p->i_flags, i_flags);
831 	gre_flags_to_tnl_flags(p->o_flags, o_flags);
832 
833 	err = ip_tunnel_ctl(dev, p, cmd);
834 	if (err)
835 		return err;
836 
837 	if (cmd == SIOCCHGTUNNEL) {
838 		struct ip_tunnel *t = netdev_priv(dev);
839 
840 		ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags);
841 		ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags);
842 
843 		if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
844 			ipgre_link_update(dev, true);
845 	}
846 
847 	i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
848 	ip_tunnel_flags_from_be16(p->i_flags, i_flags);
849 	o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
850 	ip_tunnel_flags_from_be16(p->o_flags, o_flags);
851 
852 	return 0;
853 }
854 
855 /* Nice toy. Unfortunately, useless in real life :-)
856    It allows to construct virtual multiprotocol broadcast "LAN"
857    over the Internet, provided multicast routing is tuned.
858 
859 
860    I have no idea was this bicycle invented before me,
861    so that I had to set ARPHRD_IPGRE to a random value.
862    I have an impression, that Cisco could make something similar,
863    but this feature is apparently missing in IOS<=11.2(8).
864 
865    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
866    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
867 
868    ping -t 255 224.66.66.66
869 
870    If nobody answers, mbone does not work.
871 
872    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
873    ip addr add 10.66.66.<somewhat>/24 dev Universe
874    ifconfig Universe up
875    ifconfig Universe add fe80::<Your_real_addr>/10
876    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
877    ftp 10.66.66.66
878    ...
879    ftp fec0:6666:6666::193.233.7.65
880    ...
881  */
882 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
883 			unsigned short type,
884 			const void *daddr, const void *saddr, unsigned int len)
885 {
886 	struct ip_tunnel *t = netdev_priv(dev);
887 	struct iphdr *iph;
888 	struct gre_base_hdr *greh;
889 
890 	iph = skb_push(skb, t->hlen + sizeof(*iph));
891 	greh = (struct gre_base_hdr *)(iph+1);
892 	greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
893 	greh->protocol = htons(type);
894 
895 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
896 
897 	/* Set the source hardware address. */
898 	if (saddr)
899 		memcpy(&iph->saddr, saddr, 4);
900 	if (daddr)
901 		memcpy(&iph->daddr, daddr, 4);
902 	if (iph->daddr)
903 		return t->hlen + sizeof(*iph);
904 
905 	return -(t->hlen + sizeof(*iph));
906 }
907 
908 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
909 {
910 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
911 	memcpy(haddr, &iph->saddr, 4);
912 	return 4;
913 }
914 
915 static const struct header_ops ipgre_header_ops = {
916 	.create	= ipgre_header,
917 	.parse	= ipgre_header_parse,
918 };
919 
920 #ifdef CONFIG_NET_IPGRE_BROADCAST
921 static int ipgre_open(struct net_device *dev)
922 {
923 	struct ip_tunnel *t = netdev_priv(dev);
924 
925 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
926 		struct flowi4 fl4;
927 		struct rtable *rt;
928 
929 		rt = ip_route_output_gre(t->net, &fl4,
930 					 t->parms.iph.daddr,
931 					 t->parms.iph.saddr,
932 					 t->parms.o_key,
933 					 RT_TOS(t->parms.iph.tos),
934 					 t->parms.link);
935 		if (IS_ERR(rt))
936 			return -EADDRNOTAVAIL;
937 		dev = rt->dst.dev;
938 		ip_rt_put(rt);
939 		if (!__in_dev_get_rtnl(dev))
940 			return -EADDRNOTAVAIL;
941 		t->mlink = dev->ifindex;
942 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
943 	}
944 	return 0;
945 }
946 
947 static int ipgre_close(struct net_device *dev)
948 {
949 	struct ip_tunnel *t = netdev_priv(dev);
950 
951 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
952 		struct in_device *in_dev;
953 		in_dev = inetdev_by_index(t->net, t->mlink);
954 		if (in_dev)
955 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
956 	}
957 	return 0;
958 }
959 #endif
960 
961 static const struct net_device_ops ipgre_netdev_ops = {
962 	.ndo_init		= ipgre_tunnel_init,
963 	.ndo_uninit		= ip_tunnel_uninit,
964 #ifdef CONFIG_NET_IPGRE_BROADCAST
965 	.ndo_open		= ipgre_open,
966 	.ndo_stop		= ipgre_close,
967 #endif
968 	.ndo_start_xmit		= ipgre_xmit,
969 	.ndo_siocdevprivate	= ip_tunnel_siocdevprivate,
970 	.ndo_change_mtu		= ip_tunnel_change_mtu,
971 	.ndo_get_stats64	= dev_get_tstats64,
972 	.ndo_get_iflink		= ip_tunnel_get_iflink,
973 	.ndo_tunnel_ctl		= ipgre_tunnel_ctl,
974 };
975 
976 #define GRE_FEATURES (NETIF_F_SG |		\
977 		      NETIF_F_FRAGLIST |	\
978 		      NETIF_F_HIGHDMA |		\
979 		      NETIF_F_HW_CSUM)
980 
981 static void ipgre_tunnel_setup(struct net_device *dev)
982 {
983 	dev->netdev_ops		= &ipgre_netdev_ops;
984 	dev->type		= ARPHRD_IPGRE;
985 	ip_tunnel_setup(dev, ipgre_net_id);
986 }
987 
988 static void __gre_tunnel_init(struct net_device *dev)
989 {
990 	struct ip_tunnel *tunnel;
991 
992 	tunnel = netdev_priv(dev);
993 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
994 	tunnel->parms.iph.protocol = IPPROTO_GRE;
995 
996 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
997 	dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
998 
999 	dev->features		|= GRE_FEATURES | NETIF_F_LLTX;
1000 	dev->hw_features	|= GRE_FEATURES;
1001 
1002 	/* TCP offload with GRE SEQ is not supported, nor can we support 2
1003 	 * levels of outer headers requiring an update.
1004 	 */
1005 	if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags))
1006 		return;
1007 	if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
1008 	    tunnel->encap.type != TUNNEL_ENCAP_NONE)
1009 		return;
1010 
1011 	dev->features |= NETIF_F_GSO_SOFTWARE;
1012 	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1013 }
1014 
1015 static int ipgre_tunnel_init(struct net_device *dev)
1016 {
1017 	struct ip_tunnel *tunnel = netdev_priv(dev);
1018 	struct iphdr *iph = &tunnel->parms.iph;
1019 
1020 	__gre_tunnel_init(dev);
1021 
1022 	__dev_addr_set(dev, &iph->saddr, 4);
1023 	memcpy(dev->broadcast, &iph->daddr, 4);
1024 
1025 	dev->flags		= IFF_NOARP;
1026 	netif_keep_dst(dev);
1027 	dev->addr_len		= 4;
1028 
1029 	if (iph->daddr && !tunnel->collect_md) {
1030 #ifdef CONFIG_NET_IPGRE_BROADCAST
1031 		if (ipv4_is_multicast(iph->daddr)) {
1032 			if (!iph->saddr)
1033 				return -EINVAL;
1034 			dev->flags = IFF_BROADCAST;
1035 			dev->header_ops = &ipgre_header_ops;
1036 			dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1037 			dev->needed_headroom = 0;
1038 		}
1039 #endif
1040 	} else if (!tunnel->collect_md) {
1041 		dev->header_ops = &ipgre_header_ops;
1042 		dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1043 		dev->needed_headroom = 0;
1044 	}
1045 
1046 	return ip_tunnel_init(dev);
1047 }
1048 
1049 static const struct gre_protocol ipgre_protocol = {
1050 	.handler     = gre_rcv,
1051 	.err_handler = gre_err,
1052 };
1053 
1054 static int __net_init ipgre_init_net(struct net *net)
1055 {
1056 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1057 }
1058 
1059 static void __net_exit ipgre_exit_batch_rtnl(struct list_head *list_net,
1060 					     struct list_head *dev_to_kill)
1061 {
1062 	ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops,
1063 			      dev_to_kill);
1064 }
1065 
1066 static struct pernet_operations ipgre_net_ops = {
1067 	.init = ipgre_init_net,
1068 	.exit_batch_rtnl = ipgre_exit_batch_rtnl,
1069 	.id   = &ipgre_net_id,
1070 	.size = sizeof(struct ip_tunnel_net),
1071 };
1072 
1073 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1074 				 struct netlink_ext_ack *extack)
1075 {
1076 	__be16 flags;
1077 
1078 	if (!data)
1079 		return 0;
1080 
1081 	flags = 0;
1082 	if (data[IFLA_GRE_IFLAGS])
1083 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1084 	if (data[IFLA_GRE_OFLAGS])
1085 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1086 	if (flags & (GRE_VERSION|GRE_ROUTING))
1087 		return -EINVAL;
1088 
1089 	if (data[IFLA_GRE_COLLECT_METADATA] &&
1090 	    data[IFLA_GRE_ENCAP_TYPE] &&
1091 	    nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1092 		return -EINVAL;
1093 
1094 	return 0;
1095 }
1096 
1097 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1098 			      struct netlink_ext_ack *extack)
1099 {
1100 	__be32 daddr;
1101 
1102 	if (tb[IFLA_ADDRESS]) {
1103 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1104 			return -EINVAL;
1105 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1106 			return -EADDRNOTAVAIL;
1107 	}
1108 
1109 	if (!data)
1110 		goto out;
1111 
1112 	if (data[IFLA_GRE_REMOTE]) {
1113 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1114 		if (!daddr)
1115 			return -EINVAL;
1116 	}
1117 
1118 out:
1119 	return ipgre_tunnel_validate(tb, data, extack);
1120 }
1121 
1122 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1123 			   struct netlink_ext_ack *extack)
1124 {
1125 	__be16 flags = 0;
1126 	int ret;
1127 
1128 	if (!data)
1129 		return 0;
1130 
1131 	ret = ipgre_tap_validate(tb, data, extack);
1132 	if (ret)
1133 		return ret;
1134 
1135 	if (data[IFLA_GRE_ERSPAN_VER] &&
1136 	    nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
1137 		return 0;
1138 
1139 	/* ERSPAN type II/III should only have GRE sequence and key flag */
1140 	if (data[IFLA_GRE_OFLAGS])
1141 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1142 	if (data[IFLA_GRE_IFLAGS])
1143 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1144 	if (!data[IFLA_GRE_COLLECT_METADATA] &&
1145 	    flags != (GRE_SEQ | GRE_KEY))
1146 		return -EINVAL;
1147 
1148 	/* ERSPAN Session ID only has 10-bit. Since we reuse
1149 	 * 32-bit key field as ID, check it's range.
1150 	 */
1151 	if (data[IFLA_GRE_IKEY] &&
1152 	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1153 		return -EINVAL;
1154 
1155 	if (data[IFLA_GRE_OKEY] &&
1156 	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1157 		return -EINVAL;
1158 
1159 	return 0;
1160 }
1161 
1162 static int ipgre_netlink_parms(struct net_device *dev,
1163 				struct nlattr *data[],
1164 				struct nlattr *tb[],
1165 				struct ip_tunnel_parm_kern *parms,
1166 				__u32 *fwmark)
1167 {
1168 	struct ip_tunnel *t = netdev_priv(dev);
1169 
1170 	memset(parms, 0, sizeof(*parms));
1171 
1172 	parms->iph.protocol = IPPROTO_GRE;
1173 
1174 	if (!data)
1175 		return 0;
1176 
1177 	if (data[IFLA_GRE_LINK])
1178 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1179 
1180 	if (data[IFLA_GRE_IFLAGS])
1181 		gre_flags_to_tnl_flags(parms->i_flags,
1182 				       nla_get_be16(data[IFLA_GRE_IFLAGS]));
1183 
1184 	if (data[IFLA_GRE_OFLAGS])
1185 		gre_flags_to_tnl_flags(parms->o_flags,
1186 				       nla_get_be16(data[IFLA_GRE_OFLAGS]));
1187 
1188 	if (data[IFLA_GRE_IKEY])
1189 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1190 
1191 	if (data[IFLA_GRE_OKEY])
1192 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1193 
1194 	if (data[IFLA_GRE_LOCAL])
1195 		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1196 
1197 	if (data[IFLA_GRE_REMOTE])
1198 		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1199 
1200 	if (data[IFLA_GRE_TTL])
1201 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1202 
1203 	if (data[IFLA_GRE_TOS])
1204 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1205 
1206 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1207 		if (t->ignore_df)
1208 			return -EINVAL;
1209 		parms->iph.frag_off = htons(IP_DF);
1210 	}
1211 
1212 	if (data[IFLA_GRE_COLLECT_METADATA]) {
1213 		t->collect_md = true;
1214 		if (dev->type == ARPHRD_IPGRE)
1215 			dev->type = ARPHRD_NONE;
1216 	}
1217 
1218 	if (data[IFLA_GRE_IGNORE_DF]) {
1219 		if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1220 		  && (parms->iph.frag_off & htons(IP_DF)))
1221 			return -EINVAL;
1222 		t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1223 	}
1224 
1225 	if (data[IFLA_GRE_FWMARK])
1226 		*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1227 
1228 	return 0;
1229 }
1230 
1231 static int erspan_netlink_parms(struct net_device *dev,
1232 				struct nlattr *data[],
1233 				struct nlattr *tb[],
1234 				struct ip_tunnel_parm_kern *parms,
1235 				__u32 *fwmark)
1236 {
1237 	struct ip_tunnel *t = netdev_priv(dev);
1238 	int err;
1239 
1240 	err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
1241 	if (err)
1242 		return err;
1243 	if (!data)
1244 		return 0;
1245 
1246 	if (data[IFLA_GRE_ERSPAN_VER]) {
1247 		t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1248 
1249 		if (t->erspan_ver > 2)
1250 			return -EINVAL;
1251 	}
1252 
1253 	if (t->erspan_ver == 1) {
1254 		if (data[IFLA_GRE_ERSPAN_INDEX]) {
1255 			t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1256 			if (t->index & ~INDEX_MASK)
1257 				return -EINVAL;
1258 		}
1259 	} else if (t->erspan_ver == 2) {
1260 		if (data[IFLA_GRE_ERSPAN_DIR]) {
1261 			t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1262 			if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1263 				return -EINVAL;
1264 		}
1265 		if (data[IFLA_GRE_ERSPAN_HWID]) {
1266 			t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1267 			if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1268 				return -EINVAL;
1269 		}
1270 	}
1271 
1272 	return 0;
1273 }
1274 
1275 /* This function returns true when ENCAP attributes are present in the nl msg */
1276 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1277 				      struct ip_tunnel_encap *ipencap)
1278 {
1279 	bool ret = false;
1280 
1281 	memset(ipencap, 0, sizeof(*ipencap));
1282 
1283 	if (!data)
1284 		return ret;
1285 
1286 	if (data[IFLA_GRE_ENCAP_TYPE]) {
1287 		ret = true;
1288 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1289 	}
1290 
1291 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1292 		ret = true;
1293 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1294 	}
1295 
1296 	if (data[IFLA_GRE_ENCAP_SPORT]) {
1297 		ret = true;
1298 		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1299 	}
1300 
1301 	if (data[IFLA_GRE_ENCAP_DPORT]) {
1302 		ret = true;
1303 		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1304 	}
1305 
1306 	return ret;
1307 }
1308 
1309 static int gre_tap_init(struct net_device *dev)
1310 {
1311 	__gre_tunnel_init(dev);
1312 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1313 	netif_keep_dst(dev);
1314 
1315 	return ip_tunnel_init(dev);
1316 }
1317 
1318 static const struct net_device_ops gre_tap_netdev_ops = {
1319 	.ndo_init		= gre_tap_init,
1320 	.ndo_uninit		= ip_tunnel_uninit,
1321 	.ndo_start_xmit		= gre_tap_xmit,
1322 	.ndo_set_mac_address 	= eth_mac_addr,
1323 	.ndo_validate_addr	= eth_validate_addr,
1324 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1325 	.ndo_get_stats64	= dev_get_tstats64,
1326 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1327 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1328 };
1329 
1330 static int erspan_tunnel_init(struct net_device *dev)
1331 {
1332 	struct ip_tunnel *tunnel = netdev_priv(dev);
1333 
1334 	if (tunnel->erspan_ver == 0)
1335 		tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */
1336 	else
1337 		tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */
1338 
1339 	tunnel->parms.iph.protocol = IPPROTO_GRE;
1340 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1341 		       erspan_hdr_len(tunnel->erspan_ver);
1342 
1343 	dev->features		|= GRE_FEATURES;
1344 	dev->hw_features	|= GRE_FEATURES;
1345 	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
1346 	netif_keep_dst(dev);
1347 
1348 	return ip_tunnel_init(dev);
1349 }
1350 
1351 static const struct net_device_ops erspan_netdev_ops = {
1352 	.ndo_init		= erspan_tunnel_init,
1353 	.ndo_uninit		= ip_tunnel_uninit,
1354 	.ndo_start_xmit		= erspan_xmit,
1355 	.ndo_set_mac_address	= eth_mac_addr,
1356 	.ndo_validate_addr	= eth_validate_addr,
1357 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1358 	.ndo_get_stats64	= dev_get_tstats64,
1359 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1360 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1361 };
1362 
1363 static void ipgre_tap_setup(struct net_device *dev)
1364 {
1365 	ether_setup(dev);
1366 	dev->max_mtu = 0;
1367 	dev->netdev_ops	= &gre_tap_netdev_ops;
1368 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1369 	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
1370 	ip_tunnel_setup(dev, gre_tap_net_id);
1371 }
1372 
1373 static int
1374 ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
1375 {
1376 	struct ip_tunnel_encap ipencap;
1377 
1378 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1379 		struct ip_tunnel *t = netdev_priv(dev);
1380 		int err = ip_tunnel_encap_setup(t, &ipencap);
1381 
1382 		if (err < 0)
1383 			return err;
1384 	}
1385 
1386 	return 0;
1387 }
1388 
1389 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1390 			 struct nlattr *tb[], struct nlattr *data[],
1391 			 struct netlink_ext_ack *extack)
1392 {
1393 	struct ip_tunnel_parm_kern p;
1394 	__u32 fwmark = 0;
1395 	int err;
1396 
1397 	err = ipgre_newlink_encap_setup(dev, data);
1398 	if (err)
1399 		return err;
1400 
1401 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1402 	if (err < 0)
1403 		return err;
1404 	return ip_tunnel_newlink(dev, tb, &p, fwmark);
1405 }
1406 
1407 static int erspan_newlink(struct net *src_net, struct net_device *dev,
1408 			  struct nlattr *tb[], struct nlattr *data[],
1409 			  struct netlink_ext_ack *extack)
1410 {
1411 	struct ip_tunnel_parm_kern p;
1412 	__u32 fwmark = 0;
1413 	int err;
1414 
1415 	err = ipgre_newlink_encap_setup(dev, data);
1416 	if (err)
1417 		return err;
1418 
1419 	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1420 	if (err)
1421 		return err;
1422 	return ip_tunnel_newlink(dev, tb, &p, fwmark);
1423 }
1424 
1425 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1426 			    struct nlattr *data[],
1427 			    struct netlink_ext_ack *extack)
1428 {
1429 	struct ip_tunnel *t = netdev_priv(dev);
1430 	struct ip_tunnel_parm_kern p;
1431 	__u32 fwmark = t->fwmark;
1432 	int err;
1433 
1434 	err = ipgre_newlink_encap_setup(dev, data);
1435 	if (err)
1436 		return err;
1437 
1438 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1439 	if (err < 0)
1440 		return err;
1441 
1442 	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1443 	if (err < 0)
1444 		return err;
1445 
1446 	ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
1447 	ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
1448 
1449 	ipgre_link_update(dev, !tb[IFLA_MTU]);
1450 
1451 	return 0;
1452 }
1453 
1454 static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
1455 			     struct nlattr *data[],
1456 			     struct netlink_ext_ack *extack)
1457 {
1458 	struct ip_tunnel *t = netdev_priv(dev);
1459 	struct ip_tunnel_parm_kern p;
1460 	__u32 fwmark = t->fwmark;
1461 	int err;
1462 
1463 	err = ipgre_newlink_encap_setup(dev, data);
1464 	if (err)
1465 		return err;
1466 
1467 	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1468 	if (err < 0)
1469 		return err;
1470 
1471 	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1472 	if (err < 0)
1473 		return err;
1474 
1475 	ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
1476 	ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
1477 
1478 	return 0;
1479 }
1480 
1481 static size_t ipgre_get_size(const struct net_device *dev)
1482 {
1483 	return
1484 		/* IFLA_GRE_LINK */
1485 		nla_total_size(4) +
1486 		/* IFLA_GRE_IFLAGS */
1487 		nla_total_size(2) +
1488 		/* IFLA_GRE_OFLAGS */
1489 		nla_total_size(2) +
1490 		/* IFLA_GRE_IKEY */
1491 		nla_total_size(4) +
1492 		/* IFLA_GRE_OKEY */
1493 		nla_total_size(4) +
1494 		/* IFLA_GRE_LOCAL */
1495 		nla_total_size(4) +
1496 		/* IFLA_GRE_REMOTE */
1497 		nla_total_size(4) +
1498 		/* IFLA_GRE_TTL */
1499 		nla_total_size(1) +
1500 		/* IFLA_GRE_TOS */
1501 		nla_total_size(1) +
1502 		/* IFLA_GRE_PMTUDISC */
1503 		nla_total_size(1) +
1504 		/* IFLA_GRE_ENCAP_TYPE */
1505 		nla_total_size(2) +
1506 		/* IFLA_GRE_ENCAP_FLAGS */
1507 		nla_total_size(2) +
1508 		/* IFLA_GRE_ENCAP_SPORT */
1509 		nla_total_size(2) +
1510 		/* IFLA_GRE_ENCAP_DPORT */
1511 		nla_total_size(2) +
1512 		/* IFLA_GRE_COLLECT_METADATA */
1513 		nla_total_size(0) +
1514 		/* IFLA_GRE_IGNORE_DF */
1515 		nla_total_size(1) +
1516 		/* IFLA_GRE_FWMARK */
1517 		nla_total_size(4) +
1518 		/* IFLA_GRE_ERSPAN_INDEX */
1519 		nla_total_size(4) +
1520 		/* IFLA_GRE_ERSPAN_VER */
1521 		nla_total_size(1) +
1522 		/* IFLA_GRE_ERSPAN_DIR */
1523 		nla_total_size(1) +
1524 		/* IFLA_GRE_ERSPAN_HWID */
1525 		nla_total_size(2) +
1526 		0;
1527 }
1528 
1529 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1530 {
1531 	struct ip_tunnel *t = netdev_priv(dev);
1532 	struct ip_tunnel_parm_kern *p = &t->parms;
1533 	IP_TUNNEL_DECLARE_FLAGS(o_flags);
1534 
1535 	ip_tunnel_flags_copy(o_flags, p->o_flags);
1536 
1537 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1538 	    nla_put_be16(skb, IFLA_GRE_IFLAGS,
1539 			 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1540 	    nla_put_be16(skb, IFLA_GRE_OFLAGS,
1541 			 gre_tnl_flags_to_gre_flags(o_flags)) ||
1542 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1543 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1544 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1545 	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1546 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1547 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1548 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1549 		       !!(p->iph.frag_off & htons(IP_DF))) ||
1550 	    nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1551 		goto nla_put_failure;
1552 
1553 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1554 			t->encap.type) ||
1555 	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1556 			 t->encap.sport) ||
1557 	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1558 			 t->encap.dport) ||
1559 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1560 			t->encap.flags))
1561 		goto nla_put_failure;
1562 
1563 	if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1564 		goto nla_put_failure;
1565 
1566 	if (t->collect_md) {
1567 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1568 			goto nla_put_failure;
1569 	}
1570 
1571 	return 0;
1572 
1573 nla_put_failure:
1574 	return -EMSGSIZE;
1575 }
1576 
1577 static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
1578 {
1579 	struct ip_tunnel *t = netdev_priv(dev);
1580 
1581 	if (t->erspan_ver <= 2) {
1582 		if (t->erspan_ver != 0 && !t->collect_md)
1583 			__set_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags);
1584 
1585 		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1586 			goto nla_put_failure;
1587 
1588 		if (t->erspan_ver == 1) {
1589 			if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1590 				goto nla_put_failure;
1591 		} else if (t->erspan_ver == 2) {
1592 			if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1593 				goto nla_put_failure;
1594 			if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1595 				goto nla_put_failure;
1596 		}
1597 	}
1598 
1599 	return ipgre_fill_info(skb, dev);
1600 
1601 nla_put_failure:
1602 	return -EMSGSIZE;
1603 }
1604 
1605 static void erspan_setup(struct net_device *dev)
1606 {
1607 	struct ip_tunnel *t = netdev_priv(dev);
1608 
1609 	ether_setup(dev);
1610 	dev->max_mtu = 0;
1611 	dev->netdev_ops = &erspan_netdev_ops;
1612 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1613 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1614 	ip_tunnel_setup(dev, erspan_net_id);
1615 	t->erspan_ver = 1;
1616 }
1617 
1618 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1619 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1620 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1621 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1622 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1623 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1624 	[IFLA_GRE_LOCAL]	= { .len = sizeof_field(struct iphdr, saddr) },
1625 	[IFLA_GRE_REMOTE]	= { .len = sizeof_field(struct iphdr, daddr) },
1626 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1627 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1628 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1629 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1630 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1631 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1632 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1633 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1634 	[IFLA_GRE_IGNORE_DF]	= { .type = NLA_U8 },
1635 	[IFLA_GRE_FWMARK]	= { .type = NLA_U32 },
1636 	[IFLA_GRE_ERSPAN_INDEX]	= { .type = NLA_U32 },
1637 	[IFLA_GRE_ERSPAN_VER]	= { .type = NLA_U8 },
1638 	[IFLA_GRE_ERSPAN_DIR]	= { .type = NLA_U8 },
1639 	[IFLA_GRE_ERSPAN_HWID]	= { .type = NLA_U16 },
1640 };
1641 
1642 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1643 	.kind		= "gre",
1644 	.maxtype	= IFLA_GRE_MAX,
1645 	.policy		= ipgre_policy,
1646 	.priv_size	= sizeof(struct ip_tunnel),
1647 	.setup		= ipgre_tunnel_setup,
1648 	.validate	= ipgre_tunnel_validate,
1649 	.newlink	= ipgre_newlink,
1650 	.changelink	= ipgre_changelink,
1651 	.dellink	= ip_tunnel_dellink,
1652 	.get_size	= ipgre_get_size,
1653 	.fill_info	= ipgre_fill_info,
1654 	.get_link_net	= ip_tunnel_get_link_net,
1655 };
1656 
1657 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1658 	.kind		= "gretap",
1659 	.maxtype	= IFLA_GRE_MAX,
1660 	.policy		= ipgre_policy,
1661 	.priv_size	= sizeof(struct ip_tunnel),
1662 	.setup		= ipgre_tap_setup,
1663 	.validate	= ipgre_tap_validate,
1664 	.newlink	= ipgre_newlink,
1665 	.changelink	= ipgre_changelink,
1666 	.dellink	= ip_tunnel_dellink,
1667 	.get_size	= ipgre_get_size,
1668 	.fill_info	= ipgre_fill_info,
1669 	.get_link_net	= ip_tunnel_get_link_net,
1670 };
1671 
1672 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1673 	.kind		= "erspan",
1674 	.maxtype	= IFLA_GRE_MAX,
1675 	.policy		= ipgre_policy,
1676 	.priv_size	= sizeof(struct ip_tunnel),
1677 	.setup		= erspan_setup,
1678 	.validate	= erspan_validate,
1679 	.newlink	= erspan_newlink,
1680 	.changelink	= erspan_changelink,
1681 	.dellink	= ip_tunnel_dellink,
1682 	.get_size	= ipgre_get_size,
1683 	.fill_info	= erspan_fill_info,
1684 	.get_link_net	= ip_tunnel_get_link_net,
1685 };
1686 
1687 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1688 					u8 name_assign_type)
1689 {
1690 	struct nlattr *tb[IFLA_MAX + 1];
1691 	struct net_device *dev;
1692 	LIST_HEAD(list_kill);
1693 	struct ip_tunnel *t;
1694 	int err;
1695 
1696 	memset(&tb, 0, sizeof(tb));
1697 
1698 	dev = rtnl_create_link(net, name, name_assign_type,
1699 			       &ipgre_tap_ops, tb, NULL);
1700 	if (IS_ERR(dev))
1701 		return dev;
1702 
1703 	/* Configure flow based GRE device. */
1704 	t = netdev_priv(dev);
1705 	t->collect_md = true;
1706 
1707 	err = ipgre_newlink(net, dev, tb, NULL, NULL);
1708 	if (err < 0) {
1709 		free_netdev(dev);
1710 		return ERR_PTR(err);
1711 	}
1712 
1713 	/* openvswitch users expect packet sizes to be unrestricted,
1714 	 * so set the largest MTU we can.
1715 	 */
1716 	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1717 	if (err)
1718 		goto out;
1719 
1720 	err = rtnl_configure_link(dev, NULL, 0, NULL);
1721 	if (err < 0)
1722 		goto out;
1723 
1724 	return dev;
1725 out:
1726 	ip_tunnel_dellink(dev, &list_kill);
1727 	unregister_netdevice_many(&list_kill);
1728 	return ERR_PTR(err);
1729 }
1730 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1731 
1732 static int __net_init ipgre_tap_init_net(struct net *net)
1733 {
1734 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1735 }
1736 
1737 static void __net_exit ipgre_tap_exit_batch_rtnl(struct list_head *list_net,
1738 						 struct list_head *dev_to_kill)
1739 {
1740 	ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops,
1741 			      dev_to_kill);
1742 }
1743 
1744 static struct pernet_operations ipgre_tap_net_ops = {
1745 	.init = ipgre_tap_init_net,
1746 	.exit_batch_rtnl = ipgre_tap_exit_batch_rtnl,
1747 	.id   = &gre_tap_net_id,
1748 	.size = sizeof(struct ip_tunnel_net),
1749 };
1750 
1751 static int __net_init erspan_init_net(struct net *net)
1752 {
1753 	return ip_tunnel_init_net(net, erspan_net_id,
1754 				  &erspan_link_ops, "erspan0");
1755 }
1756 
1757 static void __net_exit erspan_exit_batch_rtnl(struct list_head *net_list,
1758 					      struct list_head *dev_to_kill)
1759 {
1760 	ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops,
1761 			      dev_to_kill);
1762 }
1763 
1764 static struct pernet_operations erspan_net_ops = {
1765 	.init = erspan_init_net,
1766 	.exit_batch_rtnl = erspan_exit_batch_rtnl,
1767 	.id   = &erspan_net_id,
1768 	.size = sizeof(struct ip_tunnel_net),
1769 };
1770 
1771 static int __init ipgre_init(void)
1772 {
1773 	int err;
1774 
1775 	pr_info("GRE over IPv4 tunneling driver\n");
1776 
1777 	err = register_pernet_device(&ipgre_net_ops);
1778 	if (err < 0)
1779 		return err;
1780 
1781 	err = register_pernet_device(&ipgre_tap_net_ops);
1782 	if (err < 0)
1783 		goto pnet_tap_failed;
1784 
1785 	err = register_pernet_device(&erspan_net_ops);
1786 	if (err < 0)
1787 		goto pnet_erspan_failed;
1788 
1789 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1790 	if (err < 0) {
1791 		pr_info("%s: can't add protocol\n", __func__);
1792 		goto add_proto_failed;
1793 	}
1794 
1795 	err = rtnl_link_register(&ipgre_link_ops);
1796 	if (err < 0)
1797 		goto rtnl_link_failed;
1798 
1799 	err = rtnl_link_register(&ipgre_tap_ops);
1800 	if (err < 0)
1801 		goto tap_ops_failed;
1802 
1803 	err = rtnl_link_register(&erspan_link_ops);
1804 	if (err < 0)
1805 		goto erspan_link_failed;
1806 
1807 	return 0;
1808 
1809 erspan_link_failed:
1810 	rtnl_link_unregister(&ipgre_tap_ops);
1811 tap_ops_failed:
1812 	rtnl_link_unregister(&ipgre_link_ops);
1813 rtnl_link_failed:
1814 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1815 add_proto_failed:
1816 	unregister_pernet_device(&erspan_net_ops);
1817 pnet_erspan_failed:
1818 	unregister_pernet_device(&ipgre_tap_net_ops);
1819 pnet_tap_failed:
1820 	unregister_pernet_device(&ipgre_net_ops);
1821 	return err;
1822 }
1823 
1824 static void __exit ipgre_fini(void)
1825 {
1826 	rtnl_link_unregister(&ipgre_tap_ops);
1827 	rtnl_link_unregister(&ipgre_link_ops);
1828 	rtnl_link_unregister(&erspan_link_ops);
1829 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1830 	unregister_pernet_device(&ipgre_tap_net_ops);
1831 	unregister_pernet_device(&ipgre_net_ops);
1832 	unregister_pernet_device(&erspan_net_ops);
1833 }
1834 
1835 module_init(ipgre_init);
1836 module_exit(ipgre_fini);
1837 MODULE_DESCRIPTION("IPv4 GRE tunnels over IP library");
1838 MODULE_LICENSE("GPL");
1839 MODULE_ALIAS_RTNL_LINK("gre");
1840 MODULE_ALIAS_RTNL_LINK("gretap");
1841 MODULE_ALIAS_RTNL_LINK("erspan");
1842 MODULE_ALIAS_NETDEV("gre0");
1843 MODULE_ALIAS_NETDEV("gretap0");
1844 MODULE_ALIAS_NETDEV("erspan0");
1845