xref: /linux/net/ipv4/ip_gre.c (revision 28efb0046512e8a13ed9f9bdf0d68d10bbfbe9cf)
1 /*
2  *	Linux NET3:	GRE over IP protocol decoder.
3  *
4  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *	This program is free software; you can redistribute it and/or
7  *	modify it under the terms of the GNU General Public License
8  *	as published by the Free Software Foundation; either version
9  *	2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <linux/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/if_vlan.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
35 
36 #include <net/sock.h>
37 #include <net/ip.h>
38 #include <net/icmp.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
41 #include <net/arp.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
45 #include <net/xfrm.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
49 #include <net/gre.h>
50 #include <net/dst_metadata.h>
51 #include <net/erspan.h>
52 
53 /*
54    Problems & solutions
55    --------------------
56 
57    1. The most important issue is detecting local dead loops.
58    They would cause complete host lockup in transmit, which
59    would be "resolved" by stack overflow or, if queueing is enabled,
60    with infinite looping in net_bh.
61 
62    We cannot track such dead loops during route installation,
63    it is infeasible task. The most general solutions would be
64    to keep skb->encapsulation counter (sort of local ttl),
65    and silently drop packet when it expires. It is a good
66    solution, but it supposes maintaining new variable in ALL
67    skb, even if no tunneling is used.
68 
69    Current solution: xmit_recursion breaks dead loops. This is a percpu
70    counter, since when we enter the first ndo_xmit(), cpu migration is
71    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
72 
73    2. Networking dead loops would not kill routers, but would really
74    kill network. IP hop limit plays role of "t->recursion" in this case,
75    if we copy it from packet being encapsulated to upper header.
76    It is very good solution, but it introduces two problems:
77 
78    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79      do not work over tunnels.
80    - traceroute does not work. I planned to relay ICMP from tunnel,
81      so that this problem would be solved and traceroute output
82      would even more informative. This idea appeared to be wrong:
83      only Linux complies to rfc1812 now (yes, guys, Linux is the only
84      true router now :-)), all routers (at least, in neighbourhood of mine)
85      return only 8 bytes of payload. It is the end.
86 
87    Hence, if we want that OSPF worked or traceroute said something reasonable,
88    we should search for another solution.
89 
90    One of them is to parse packet trying to detect inner encapsulation
91    made by our node. It is difficult or even impossible, especially,
92    taking into account fragmentation. TO be short, ttl is not solution at all.
93 
94    Current solution: The solution was UNEXPECTEDLY SIMPLE.
95    We force DF flag on tunnels with preconfigured hop limit,
96    that is ALL. :-) Well, it does not remove the problem completely,
97    but exponential growth of network traffic is changed to linear
98    (branches, that exceed pmtu are pruned) and tunnel mtu
99    rapidly degrades to value <68, where looping stops.
100    Yes, it is not good if there exists a router in the loop,
101    which does not force DF, even when encapsulating packets have DF set.
102    But it is not our problem! Nobody could accuse us, we made
103    all that we could make. Even if it is your gated who injected
104    fatal route to network, even if it were you who configured
105    fatal static route: you are innocent. :-)
106 
107    Alexey Kuznetsov.
108  */
109 
110 static bool log_ecn_error = true;
111 module_param(log_ecn_error, bool, 0644);
112 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
113 
114 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
115 static int ipgre_tunnel_init(struct net_device *dev);
116 static void erspan_build_header(struct sk_buff *skb,
117 				__be32 id, u32 index, bool truncate);
118 
119 static unsigned int ipgre_net_id __read_mostly;
120 static unsigned int gre_tap_net_id __read_mostly;
121 static unsigned int erspan_net_id __read_mostly;
122 
123 static void ipgre_err(struct sk_buff *skb, u32 info,
124 		      const struct tnl_ptk_info *tpi)
125 {
126 
127 	/* All the routers (except for Linux) return only
128 	   8 bytes of packet payload. It means, that precise relaying of
129 	   ICMP in the real Internet is absolutely infeasible.
130 
131 	   Moreover, Cisco "wise men" put GRE key to the third word
132 	   in GRE header. It makes impossible maintaining even soft
133 	   state for keyed GRE tunnels with enabled checksum. Tell
134 	   them "thank you".
135 
136 	   Well, I wonder, rfc1812 was written by Cisco employee,
137 	   what the hell these idiots break standards established
138 	   by themselves???
139 	   */
140 	struct net *net = dev_net(skb->dev);
141 	struct ip_tunnel_net *itn;
142 	const struct iphdr *iph;
143 	const int type = icmp_hdr(skb)->type;
144 	const int code = icmp_hdr(skb)->code;
145 	unsigned int data_len = 0;
146 	struct ip_tunnel *t;
147 
148 	switch (type) {
149 	default:
150 	case ICMP_PARAMETERPROB:
151 		return;
152 
153 	case ICMP_DEST_UNREACH:
154 		switch (code) {
155 		case ICMP_SR_FAILED:
156 		case ICMP_PORT_UNREACH:
157 			/* Impossible event. */
158 			return;
159 		default:
160 			/* All others are translated to HOST_UNREACH.
161 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
162 			   I believe they are just ether pollution. --ANK
163 			 */
164 			break;
165 		}
166 		break;
167 
168 	case ICMP_TIME_EXCEEDED:
169 		if (code != ICMP_EXC_TTL)
170 			return;
171 		data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
172 		break;
173 
174 	case ICMP_REDIRECT:
175 		break;
176 	}
177 
178 	if (tpi->proto == htons(ETH_P_TEB))
179 		itn = net_generic(net, gre_tap_net_id);
180 	else
181 		itn = net_generic(net, ipgre_net_id);
182 
183 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
184 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
185 			     iph->daddr, iph->saddr, tpi->key);
186 
187 	if (!t)
188 		return;
189 
190 #if IS_ENABLED(CONFIG_IPV6)
191        if (tpi->proto == htons(ETH_P_IPV6) &&
192            !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
193 				       type, data_len))
194                return;
195 #endif
196 
197 	if (t->parms.iph.daddr == 0 ||
198 	    ipv4_is_multicast(t->parms.iph.daddr))
199 		return;
200 
201 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
202 		return;
203 
204 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
205 		t->err_count++;
206 	else
207 		t->err_count = 1;
208 	t->err_time = jiffies;
209 }
210 
211 static void gre_err(struct sk_buff *skb, u32 info)
212 {
213 	/* All the routers (except for Linux) return only
214 	 * 8 bytes of packet payload. It means, that precise relaying of
215 	 * ICMP in the real Internet is absolutely infeasible.
216 	 *
217 	 * Moreover, Cisco "wise men" put GRE key to the third word
218 	 * in GRE header. It makes impossible maintaining even soft
219 	 * state for keyed
220 	 * GRE tunnels with enabled checksum. Tell them "thank you".
221 	 *
222 	 * Well, I wonder, rfc1812 was written by Cisco employee,
223 	 * what the hell these idiots break standards established
224 	 * by themselves???
225 	 */
226 
227 	const struct iphdr *iph = (struct iphdr *)skb->data;
228 	const int type = icmp_hdr(skb)->type;
229 	const int code = icmp_hdr(skb)->code;
230 	struct tnl_ptk_info tpi;
231 	bool csum_err = false;
232 
233 	if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP),
234 			     iph->ihl * 4) < 0) {
235 		if (!csum_err)		/* ignore csum errors. */
236 			return;
237 	}
238 
239 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
240 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
241 				 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
242 		return;
243 	}
244 	if (type == ICMP_REDIRECT) {
245 		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
246 			      IPPROTO_GRE, 0);
247 		return;
248 	}
249 
250 	ipgre_err(skb, info, &tpi);
251 }
252 
253 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
254 		      int gre_hdr_len)
255 {
256 	struct net *net = dev_net(skb->dev);
257 	struct metadata_dst *tun_dst = NULL;
258 	struct ip_tunnel_net *itn;
259 	struct ip_tunnel *tunnel;
260 	struct erspanhdr *ershdr;
261 	const struct iphdr *iph;
262 	__be32 session_id;
263 	__be32 index;
264 	int len;
265 
266 	itn = net_generic(net, erspan_net_id);
267 	len = gre_hdr_len + sizeof(*ershdr);
268 
269 	if (unlikely(!pskb_may_pull(skb, len)))
270 		return -ENOMEM;
271 
272 	iph = ip_hdr(skb);
273 	ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
274 
275 	/* The original GRE header does not have key field,
276 	 * Use ERSPAN 10-bit session ID as key.
277 	 */
278 	session_id = cpu_to_be32(ntohs(ershdr->session_id));
279 	tpi->key = session_id;
280 	index = ershdr->md.index;
281 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
282 				  tpi->flags | TUNNEL_KEY,
283 				  iph->saddr, iph->daddr, tpi->key);
284 
285 	if (tunnel) {
286 		if (__iptunnel_pull_header(skb,
287 					   gre_hdr_len + sizeof(*ershdr),
288 					   htons(ETH_P_TEB),
289 					   false, false) < 0)
290 			goto drop;
291 
292 		if (tunnel->collect_md) {
293 			struct ip_tunnel_info *info;
294 			struct erspan_metadata *md;
295 			__be64 tun_id;
296 			__be16 flags;
297 
298 			tpi->flags |= TUNNEL_KEY;
299 			flags = tpi->flags;
300 			tun_id = key32_to_tunnel_id(tpi->key);
301 
302 			tun_dst = ip_tun_rx_dst(skb, flags,
303 						tun_id, sizeof(*md));
304 			if (!tun_dst)
305 				return PACKET_REJECT;
306 
307 			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
308 			if (!md)
309 				return PACKET_REJECT;
310 
311 			md->index = index;
312 			info = &tun_dst->u.tun_info;
313 			info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
314 			info->options_len = sizeof(*md);
315 		} else {
316 			tunnel->index = ntohl(index);
317 		}
318 
319 		skb_reset_mac_header(skb);
320 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
321 		return PACKET_RCVD;
322 	}
323 drop:
324 	kfree_skb(skb);
325 	return PACKET_RCVD;
326 }
327 
328 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
329 		       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
330 {
331 	struct metadata_dst *tun_dst = NULL;
332 	const struct iphdr *iph;
333 	struct ip_tunnel *tunnel;
334 
335 	iph = ip_hdr(skb);
336 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
337 				  iph->saddr, iph->daddr, tpi->key);
338 
339 	if (tunnel) {
340 		if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
341 					   raw_proto, false) < 0)
342 			goto drop;
343 
344 		if (tunnel->dev->type != ARPHRD_NONE)
345 			skb_pop_mac_header(skb);
346 		else
347 			skb_reset_mac_header(skb);
348 		if (tunnel->collect_md) {
349 			__be16 flags;
350 			__be64 tun_id;
351 
352 			flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
353 			tun_id = key32_to_tunnel_id(tpi->key);
354 			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
355 			if (!tun_dst)
356 				return PACKET_REJECT;
357 		}
358 
359 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
360 		return PACKET_RCVD;
361 	}
362 	return PACKET_NEXT;
363 
364 drop:
365 	kfree_skb(skb);
366 	return PACKET_RCVD;
367 }
368 
369 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
370 		     int hdr_len)
371 {
372 	struct net *net = dev_net(skb->dev);
373 	struct ip_tunnel_net *itn;
374 	int res;
375 
376 	if (tpi->proto == htons(ETH_P_TEB))
377 		itn = net_generic(net, gre_tap_net_id);
378 	else
379 		itn = net_generic(net, ipgre_net_id);
380 
381 	res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
382 	if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
383 		/* ipgre tunnels in collect metadata mode should receive
384 		 * also ETH_P_TEB traffic.
385 		 */
386 		itn = net_generic(net, ipgre_net_id);
387 		res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
388 	}
389 	return res;
390 }
391 
392 static int gre_rcv(struct sk_buff *skb)
393 {
394 	struct tnl_ptk_info tpi;
395 	bool csum_err = false;
396 	int hdr_len;
397 
398 #ifdef CONFIG_NET_IPGRE_BROADCAST
399 	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
400 		/* Looped back packet, drop it! */
401 		if (rt_is_output_route(skb_rtable(skb)))
402 			goto drop;
403 	}
404 #endif
405 
406 	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
407 	if (hdr_len < 0)
408 		goto drop;
409 
410 	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) {
411 		if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
412 			return 0;
413 	}
414 
415 	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
416 		return 0;
417 
418 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
419 drop:
420 	kfree_skb(skb);
421 	return 0;
422 }
423 
424 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
425 		       const struct iphdr *tnl_params,
426 		       __be16 proto)
427 {
428 	struct ip_tunnel *tunnel = netdev_priv(dev);
429 
430 	if (tunnel->parms.o_flags & TUNNEL_SEQ)
431 		tunnel->o_seqno++;
432 
433 	/* Push GRE header. */
434 	gre_build_header(skb, tunnel->tun_hlen,
435 			 tunnel->parms.o_flags, proto, tunnel->parms.o_key,
436 			 htonl(tunnel->o_seqno));
437 
438 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
439 }
440 
441 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
442 {
443 	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
444 }
445 
446 static struct rtable *gre_get_rt(struct sk_buff *skb,
447 				 struct net_device *dev,
448 				 struct flowi4 *fl,
449 				 const struct ip_tunnel_key *key)
450 {
451 	struct net *net = dev_net(dev);
452 
453 	memset(fl, 0, sizeof(*fl));
454 	fl->daddr = key->u.ipv4.dst;
455 	fl->saddr = key->u.ipv4.src;
456 	fl->flowi4_tos = RT_TOS(key->tos);
457 	fl->flowi4_mark = skb->mark;
458 	fl->flowi4_proto = IPPROTO_GRE;
459 
460 	return ip_route_output_key(net, fl);
461 }
462 
463 static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
464 				      struct net_device *dev,
465 				      struct flowi4 *fl,
466 				      int tunnel_hlen)
467 {
468 	struct ip_tunnel_info *tun_info;
469 	const struct ip_tunnel_key *key;
470 	struct rtable *rt = NULL;
471 	int min_headroom;
472 	bool use_cache;
473 	int err;
474 
475 	tun_info = skb_tunnel_info(skb);
476 	key = &tun_info->key;
477 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
478 
479 	if (use_cache)
480 		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
481 	if (!rt) {
482 		rt = gre_get_rt(skb, dev, fl, key);
483 		if (IS_ERR(rt))
484 			goto err_free_skb;
485 		if (use_cache)
486 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
487 					  fl->saddr);
488 	}
489 
490 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
491 			+ tunnel_hlen + sizeof(struct iphdr);
492 	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
493 		int head_delta = SKB_DATA_ALIGN(min_headroom -
494 						skb_headroom(skb) +
495 						16);
496 		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
497 				       0, GFP_ATOMIC);
498 		if (unlikely(err))
499 			goto err_free_rt;
500 	}
501 	return rt;
502 
503 err_free_rt:
504 	ip_rt_put(rt);
505 err_free_skb:
506 	kfree_skb(skb);
507 	dev->stats.tx_dropped++;
508 	return NULL;
509 }
510 
511 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
512 			__be16 proto)
513 {
514 	struct ip_tunnel_info *tun_info;
515 	const struct ip_tunnel_key *key;
516 	struct rtable *rt = NULL;
517 	struct flowi4 fl;
518 	int tunnel_hlen;
519 	__be16 df, flags;
520 
521 	tun_info = skb_tunnel_info(skb);
522 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
523 		     ip_tunnel_info_af(tun_info) != AF_INET))
524 		goto err_free_skb;
525 
526 	key = &tun_info->key;
527 	tunnel_hlen = gre_calc_hlen(key->tun_flags);
528 
529 	rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
530 	if (!rt)
531 		return;
532 
533 	/* Push Tunnel header. */
534 	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
535 		goto err_free_rt;
536 
537 	flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
538 	gre_build_header(skb, tunnel_hlen, flags, proto,
539 			 tunnel_id_to_key32(tun_info->key.tun_id), 0);
540 
541 	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
542 
543 	iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
544 		      key->tos, key->ttl, df, false);
545 	return;
546 
547 err_free_rt:
548 	ip_rt_put(rt);
549 err_free_skb:
550 	kfree_skb(skb);
551 	dev->stats.tx_dropped++;
552 }
553 
554 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
555 			   __be16 proto)
556 {
557 	struct ip_tunnel *tunnel = netdev_priv(dev);
558 	struct ip_tunnel_info *tun_info;
559 	const struct ip_tunnel_key *key;
560 	struct erspan_metadata *md;
561 	struct rtable *rt = NULL;
562 	bool truncate = false;
563 	struct flowi4 fl;
564 	int tunnel_hlen;
565 	__be16 df;
566 
567 	tun_info = skb_tunnel_info(skb);
568 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
569 		     ip_tunnel_info_af(tun_info) != AF_INET))
570 		goto err_free_skb;
571 
572 	key = &tun_info->key;
573 
574 	/* ERSPAN has fixed 8 byte GRE header */
575 	tunnel_hlen = 8 + sizeof(struct erspanhdr);
576 
577 	rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
578 	if (!rt)
579 		return;
580 
581 	if (gre_handle_offloads(skb, false))
582 		goto err_free_rt;
583 
584 	if (skb->len > dev->mtu) {
585 		pskb_trim(skb, dev->mtu);
586 		truncate = true;
587 	}
588 
589 	md = ip_tunnel_info_opts(tun_info);
590 	if (!md)
591 		goto err_free_rt;
592 
593 	erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
594 			    ntohl(md->index), truncate);
595 
596 	gre_build_header(skb, 8, TUNNEL_SEQ,
597 			 htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
598 
599 	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
600 
601 	iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
602 		      key->tos, key->ttl, df, false);
603 	return;
604 
605 err_free_rt:
606 	ip_rt_put(rt);
607 err_free_skb:
608 	kfree_skb(skb);
609 	dev->stats.tx_dropped++;
610 }
611 
612 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
613 {
614 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
615 	struct rtable *rt;
616 	struct flowi4 fl4;
617 
618 	if (ip_tunnel_info_af(info) != AF_INET)
619 		return -EINVAL;
620 
621 	rt = gre_get_rt(skb, dev, &fl4, &info->key);
622 	if (IS_ERR(rt))
623 		return PTR_ERR(rt);
624 
625 	ip_rt_put(rt);
626 	info->key.u.ipv4.src = fl4.saddr;
627 	return 0;
628 }
629 
630 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
631 			      struct net_device *dev)
632 {
633 	struct ip_tunnel *tunnel = netdev_priv(dev);
634 	const struct iphdr *tnl_params;
635 
636 	if (tunnel->collect_md) {
637 		gre_fb_xmit(skb, dev, skb->protocol);
638 		return NETDEV_TX_OK;
639 	}
640 
641 	if (dev->header_ops) {
642 		/* Need space for new headers */
643 		if (skb_cow_head(skb, dev->needed_headroom -
644 				      (tunnel->hlen + sizeof(struct iphdr))))
645 			goto free_skb;
646 
647 		tnl_params = (const struct iphdr *)skb->data;
648 
649 		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
650 		 * to gre header.
651 		 */
652 		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
653 		skb_reset_mac_header(skb);
654 	} else {
655 		if (skb_cow_head(skb, dev->needed_headroom))
656 			goto free_skb;
657 
658 		tnl_params = &tunnel->parms.iph;
659 	}
660 
661 	if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
662 		goto free_skb;
663 
664 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
665 	return NETDEV_TX_OK;
666 
667 free_skb:
668 	kfree_skb(skb);
669 	dev->stats.tx_dropped++;
670 	return NETDEV_TX_OK;
671 }
672 
673 static inline u8 tos_to_cos(u8 tos)
674 {
675 	u8 dscp, cos;
676 
677 	dscp = tos >> 2;
678 	cos = dscp >> 3;
679 	return cos;
680 }
681 
682 static void erspan_build_header(struct sk_buff *skb,
683 				__be32 id, u32 index, bool truncate)
684 {
685 	struct iphdr *iphdr = ip_hdr(skb);
686 	struct ethhdr *eth = eth_hdr(skb);
687 	enum erspan_encap_type enc_type;
688 	struct erspanhdr *ershdr;
689 	struct qtag_prefix {
690 		__be16 eth_type;
691 		__be16 tci;
692 	} *qp;
693 	u16 vlan_tci = 0;
694 
695 	enc_type = ERSPAN_ENCAP_NOVLAN;
696 
697 	/* If mirrored packet has vlan tag, extract tci and
698 	 *  perserve vlan header in the mirrored frame.
699 	 */
700 	if (eth->h_proto == htons(ETH_P_8021Q)) {
701 		qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
702 		vlan_tci = ntohs(qp->tci);
703 		enc_type = ERSPAN_ENCAP_INFRAME;
704 	}
705 
706 	skb_push(skb, sizeof(*ershdr));
707 	ershdr = (struct erspanhdr *)skb->data;
708 	memset(ershdr, 0, sizeof(*ershdr));
709 
710 	ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
711 				 (ERSPAN_VERSION << VER_OFFSET));
712 	ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
713 			   ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) |
714 			   (enc_type << EN_OFFSET & EN_MASK) |
715 			   ((truncate << T_OFFSET) & T_MASK));
716 	ershdr->md.index = htonl(index & INDEX_MASK);
717 }
718 
719 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
720 			       struct net_device *dev)
721 {
722 	struct ip_tunnel *tunnel = netdev_priv(dev);
723 	bool truncate = false;
724 
725 	if (tunnel->collect_md) {
726 		erspan_fb_xmit(skb, dev, skb->protocol);
727 		return NETDEV_TX_OK;
728 	}
729 
730 	if (gre_handle_offloads(skb, false))
731 		goto free_skb;
732 
733 	if (skb_cow_head(skb, dev->needed_headroom))
734 		goto free_skb;
735 
736 	if (skb->len > dev->mtu) {
737 		pskb_trim(skb, dev->mtu);
738 		truncate = true;
739 	}
740 
741 	/* Push ERSPAN header */
742 	erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate);
743 	tunnel->parms.o_flags &= ~TUNNEL_KEY;
744 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
745 	return NETDEV_TX_OK;
746 
747 free_skb:
748 	kfree_skb(skb);
749 	dev->stats.tx_dropped++;
750 	return NETDEV_TX_OK;
751 }
752 
753 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
754 				struct net_device *dev)
755 {
756 	struct ip_tunnel *tunnel = netdev_priv(dev);
757 
758 	if (tunnel->collect_md) {
759 		gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
760 		return NETDEV_TX_OK;
761 	}
762 
763 	if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
764 		goto free_skb;
765 
766 	if (skb_cow_head(skb, dev->needed_headroom))
767 		goto free_skb;
768 
769 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
770 	return NETDEV_TX_OK;
771 
772 free_skb:
773 	kfree_skb(skb);
774 	dev->stats.tx_dropped++;
775 	return NETDEV_TX_OK;
776 }
777 
778 static int ipgre_tunnel_ioctl(struct net_device *dev,
779 			      struct ifreq *ifr, int cmd)
780 {
781 	int err;
782 	struct ip_tunnel_parm p;
783 
784 	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
785 		return -EFAULT;
786 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
787 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
788 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
789 		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
790 			return -EINVAL;
791 	}
792 	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
793 	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
794 
795 	err = ip_tunnel_ioctl(dev, &p, cmd);
796 	if (err)
797 		return err;
798 
799 	p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
800 	p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
801 
802 	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
803 		return -EFAULT;
804 	return 0;
805 }
806 
807 /* Nice toy. Unfortunately, useless in real life :-)
808    It allows to construct virtual multiprotocol broadcast "LAN"
809    over the Internet, provided multicast routing is tuned.
810 
811 
812    I have no idea was this bicycle invented before me,
813    so that I had to set ARPHRD_IPGRE to a random value.
814    I have an impression, that Cisco could make something similar,
815    but this feature is apparently missing in IOS<=11.2(8).
816 
817    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
818    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
819 
820    ping -t 255 224.66.66.66
821 
822    If nobody answers, mbone does not work.
823 
824    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
825    ip addr add 10.66.66.<somewhat>/24 dev Universe
826    ifconfig Universe up
827    ifconfig Universe add fe80::<Your_real_addr>/10
828    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
829    ftp 10.66.66.66
830    ...
831    ftp fec0:6666:6666::193.233.7.65
832    ...
833  */
834 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
835 			unsigned short type,
836 			const void *daddr, const void *saddr, unsigned int len)
837 {
838 	struct ip_tunnel *t = netdev_priv(dev);
839 	struct iphdr *iph;
840 	struct gre_base_hdr *greh;
841 
842 	iph = skb_push(skb, t->hlen + sizeof(*iph));
843 	greh = (struct gre_base_hdr *)(iph+1);
844 	greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
845 	greh->protocol = htons(type);
846 
847 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
848 
849 	/* Set the source hardware address. */
850 	if (saddr)
851 		memcpy(&iph->saddr, saddr, 4);
852 	if (daddr)
853 		memcpy(&iph->daddr, daddr, 4);
854 	if (iph->daddr)
855 		return t->hlen + sizeof(*iph);
856 
857 	return -(t->hlen + sizeof(*iph));
858 }
859 
860 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
861 {
862 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
863 	memcpy(haddr, &iph->saddr, 4);
864 	return 4;
865 }
866 
867 static const struct header_ops ipgre_header_ops = {
868 	.create	= ipgre_header,
869 	.parse	= ipgre_header_parse,
870 };
871 
872 #ifdef CONFIG_NET_IPGRE_BROADCAST
873 static int ipgre_open(struct net_device *dev)
874 {
875 	struct ip_tunnel *t = netdev_priv(dev);
876 
877 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
878 		struct flowi4 fl4;
879 		struct rtable *rt;
880 
881 		rt = ip_route_output_gre(t->net, &fl4,
882 					 t->parms.iph.daddr,
883 					 t->parms.iph.saddr,
884 					 t->parms.o_key,
885 					 RT_TOS(t->parms.iph.tos),
886 					 t->parms.link);
887 		if (IS_ERR(rt))
888 			return -EADDRNOTAVAIL;
889 		dev = rt->dst.dev;
890 		ip_rt_put(rt);
891 		if (!__in_dev_get_rtnl(dev))
892 			return -EADDRNOTAVAIL;
893 		t->mlink = dev->ifindex;
894 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
895 	}
896 	return 0;
897 }
898 
899 static int ipgre_close(struct net_device *dev)
900 {
901 	struct ip_tunnel *t = netdev_priv(dev);
902 
903 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
904 		struct in_device *in_dev;
905 		in_dev = inetdev_by_index(t->net, t->mlink);
906 		if (in_dev)
907 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
908 	}
909 	return 0;
910 }
911 #endif
912 
913 static const struct net_device_ops ipgre_netdev_ops = {
914 	.ndo_init		= ipgre_tunnel_init,
915 	.ndo_uninit		= ip_tunnel_uninit,
916 #ifdef CONFIG_NET_IPGRE_BROADCAST
917 	.ndo_open		= ipgre_open,
918 	.ndo_stop		= ipgre_close,
919 #endif
920 	.ndo_start_xmit		= ipgre_xmit,
921 	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
922 	.ndo_change_mtu		= ip_tunnel_change_mtu,
923 	.ndo_get_stats64	= ip_tunnel_get_stats64,
924 	.ndo_get_iflink		= ip_tunnel_get_iflink,
925 };
926 
927 #define GRE_FEATURES (NETIF_F_SG |		\
928 		      NETIF_F_FRAGLIST |	\
929 		      NETIF_F_HIGHDMA |		\
930 		      NETIF_F_HW_CSUM)
931 
932 static void ipgre_tunnel_setup(struct net_device *dev)
933 {
934 	dev->netdev_ops		= &ipgre_netdev_ops;
935 	dev->type		= ARPHRD_IPGRE;
936 	ip_tunnel_setup(dev, ipgre_net_id);
937 }
938 
939 static void __gre_tunnel_init(struct net_device *dev)
940 {
941 	struct ip_tunnel *tunnel;
942 	int t_hlen;
943 
944 	tunnel = netdev_priv(dev);
945 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
946 	tunnel->parms.iph.protocol = IPPROTO_GRE;
947 
948 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
949 
950 	t_hlen = tunnel->hlen + sizeof(struct iphdr);
951 
952 	dev->needed_headroom	= LL_MAX_HEADER + t_hlen + 4;
953 	dev->mtu		= ETH_DATA_LEN - t_hlen - 4;
954 
955 	dev->features		|= GRE_FEATURES;
956 	dev->hw_features	|= GRE_FEATURES;
957 
958 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
959 		/* TCP offload with GRE SEQ is not supported, nor
960 		 * can we support 2 levels of outer headers requiring
961 		 * an update.
962 		 */
963 		if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
964 		    (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
965 			dev->features    |= NETIF_F_GSO_SOFTWARE;
966 			dev->hw_features |= NETIF_F_GSO_SOFTWARE;
967 		}
968 
969 		/* Can use a lockless transmit, unless we generate
970 		 * output sequences
971 		 */
972 		dev->features |= NETIF_F_LLTX;
973 	}
974 }
975 
976 static int ipgre_tunnel_init(struct net_device *dev)
977 {
978 	struct ip_tunnel *tunnel = netdev_priv(dev);
979 	struct iphdr *iph = &tunnel->parms.iph;
980 
981 	__gre_tunnel_init(dev);
982 
983 	memcpy(dev->dev_addr, &iph->saddr, 4);
984 	memcpy(dev->broadcast, &iph->daddr, 4);
985 
986 	dev->flags		= IFF_NOARP;
987 	netif_keep_dst(dev);
988 	dev->addr_len		= 4;
989 
990 	if (iph->daddr && !tunnel->collect_md) {
991 #ifdef CONFIG_NET_IPGRE_BROADCAST
992 		if (ipv4_is_multicast(iph->daddr)) {
993 			if (!iph->saddr)
994 				return -EINVAL;
995 			dev->flags = IFF_BROADCAST;
996 			dev->header_ops = &ipgre_header_ops;
997 		}
998 #endif
999 	} else if (!tunnel->collect_md) {
1000 		dev->header_ops = &ipgre_header_ops;
1001 	}
1002 
1003 	return ip_tunnel_init(dev);
1004 }
1005 
1006 static const struct gre_protocol ipgre_protocol = {
1007 	.handler     = gre_rcv,
1008 	.err_handler = gre_err,
1009 };
1010 
1011 static int __net_init ipgre_init_net(struct net *net)
1012 {
1013 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1014 }
1015 
1016 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1017 {
1018 	ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1019 }
1020 
1021 static struct pernet_operations ipgre_net_ops = {
1022 	.init = ipgre_init_net,
1023 	.exit_batch = ipgre_exit_batch_net,
1024 	.id   = &ipgre_net_id,
1025 	.size = sizeof(struct ip_tunnel_net),
1026 };
1027 
1028 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1029 				 struct netlink_ext_ack *extack)
1030 {
1031 	__be16 flags;
1032 
1033 	if (!data)
1034 		return 0;
1035 
1036 	flags = 0;
1037 	if (data[IFLA_GRE_IFLAGS])
1038 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1039 	if (data[IFLA_GRE_OFLAGS])
1040 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1041 	if (flags & (GRE_VERSION|GRE_ROUTING))
1042 		return -EINVAL;
1043 
1044 	if (data[IFLA_GRE_COLLECT_METADATA] &&
1045 	    data[IFLA_GRE_ENCAP_TYPE] &&
1046 	    nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1047 		return -EINVAL;
1048 
1049 	return 0;
1050 }
1051 
1052 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1053 			      struct netlink_ext_ack *extack)
1054 {
1055 	__be32 daddr;
1056 
1057 	if (tb[IFLA_ADDRESS]) {
1058 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1059 			return -EINVAL;
1060 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1061 			return -EADDRNOTAVAIL;
1062 	}
1063 
1064 	if (!data)
1065 		goto out;
1066 
1067 	if (data[IFLA_GRE_REMOTE]) {
1068 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1069 		if (!daddr)
1070 			return -EINVAL;
1071 	}
1072 
1073 out:
1074 	return ipgre_tunnel_validate(tb, data, extack);
1075 }
1076 
1077 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1078 			   struct netlink_ext_ack *extack)
1079 {
1080 	__be16 flags = 0;
1081 	int ret;
1082 
1083 	if (!data)
1084 		return 0;
1085 
1086 	ret = ipgre_tap_validate(tb, data, extack);
1087 	if (ret)
1088 		return ret;
1089 
1090 	/* ERSPAN should only have GRE sequence and key flag */
1091 	if (data[IFLA_GRE_OFLAGS])
1092 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1093 	if (data[IFLA_GRE_IFLAGS])
1094 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1095 	if (!data[IFLA_GRE_COLLECT_METADATA] &&
1096 	    flags != (GRE_SEQ | GRE_KEY))
1097 		return -EINVAL;
1098 
1099 	/* ERSPAN Session ID only has 10-bit. Since we reuse
1100 	 * 32-bit key field as ID, check it's range.
1101 	 */
1102 	if (data[IFLA_GRE_IKEY] &&
1103 	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1104 		return -EINVAL;
1105 
1106 	if (data[IFLA_GRE_OKEY] &&
1107 	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1108 		return -EINVAL;
1109 
1110 	return 0;
1111 }
1112 
1113 static int ipgre_netlink_parms(struct net_device *dev,
1114 				struct nlattr *data[],
1115 				struct nlattr *tb[],
1116 				struct ip_tunnel_parm *parms,
1117 				__u32 *fwmark)
1118 {
1119 	struct ip_tunnel *t = netdev_priv(dev);
1120 
1121 	memset(parms, 0, sizeof(*parms));
1122 
1123 	parms->iph.protocol = IPPROTO_GRE;
1124 
1125 	if (!data)
1126 		return 0;
1127 
1128 	if (data[IFLA_GRE_LINK])
1129 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1130 
1131 	if (data[IFLA_GRE_IFLAGS])
1132 		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1133 
1134 	if (data[IFLA_GRE_OFLAGS])
1135 		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1136 
1137 	if (data[IFLA_GRE_IKEY])
1138 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1139 
1140 	if (data[IFLA_GRE_OKEY])
1141 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1142 
1143 	if (data[IFLA_GRE_LOCAL])
1144 		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1145 
1146 	if (data[IFLA_GRE_REMOTE])
1147 		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1148 
1149 	if (data[IFLA_GRE_TTL])
1150 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1151 
1152 	if (data[IFLA_GRE_TOS])
1153 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1154 
1155 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1156 		if (t->ignore_df)
1157 			return -EINVAL;
1158 		parms->iph.frag_off = htons(IP_DF);
1159 	}
1160 
1161 	if (data[IFLA_GRE_COLLECT_METADATA]) {
1162 		t->collect_md = true;
1163 		if (dev->type == ARPHRD_IPGRE)
1164 			dev->type = ARPHRD_NONE;
1165 	}
1166 
1167 	if (data[IFLA_GRE_IGNORE_DF]) {
1168 		if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1169 		  && (parms->iph.frag_off & htons(IP_DF)))
1170 			return -EINVAL;
1171 		t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1172 	}
1173 
1174 	if (data[IFLA_GRE_FWMARK])
1175 		*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1176 
1177 	if (data[IFLA_GRE_ERSPAN_INDEX]) {
1178 		t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1179 
1180 		if (t->index & ~INDEX_MASK)
1181 			return -EINVAL;
1182 	}
1183 
1184 	return 0;
1185 }
1186 
1187 /* This function returns true when ENCAP attributes are present in the nl msg */
1188 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1189 				      struct ip_tunnel_encap *ipencap)
1190 {
1191 	bool ret = false;
1192 
1193 	memset(ipencap, 0, sizeof(*ipencap));
1194 
1195 	if (!data)
1196 		return ret;
1197 
1198 	if (data[IFLA_GRE_ENCAP_TYPE]) {
1199 		ret = true;
1200 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1201 	}
1202 
1203 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1204 		ret = true;
1205 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1206 	}
1207 
1208 	if (data[IFLA_GRE_ENCAP_SPORT]) {
1209 		ret = true;
1210 		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1211 	}
1212 
1213 	if (data[IFLA_GRE_ENCAP_DPORT]) {
1214 		ret = true;
1215 		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1216 	}
1217 
1218 	return ret;
1219 }
1220 
1221 static int gre_tap_init(struct net_device *dev)
1222 {
1223 	__gre_tunnel_init(dev);
1224 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1225 
1226 	return ip_tunnel_init(dev);
1227 }
1228 
1229 static const struct net_device_ops gre_tap_netdev_ops = {
1230 	.ndo_init		= gre_tap_init,
1231 	.ndo_uninit		= ip_tunnel_uninit,
1232 	.ndo_start_xmit		= gre_tap_xmit,
1233 	.ndo_set_mac_address 	= eth_mac_addr,
1234 	.ndo_validate_addr	= eth_validate_addr,
1235 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1236 	.ndo_get_stats64	= ip_tunnel_get_stats64,
1237 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1238 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1239 };
1240 
1241 static int erspan_tunnel_init(struct net_device *dev)
1242 {
1243 	struct ip_tunnel *tunnel = netdev_priv(dev);
1244 	int t_hlen;
1245 
1246 	tunnel->tun_hlen = 8;
1247 	tunnel->parms.iph.protocol = IPPROTO_GRE;
1248 	t_hlen = tunnel->hlen + sizeof(struct iphdr) + sizeof(struct erspanhdr);
1249 
1250 	dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
1251 	dev->mtu = ETH_DATA_LEN - t_hlen - 4;
1252 	dev->features		|= GRE_FEATURES;
1253 	dev->hw_features	|= GRE_FEATURES;
1254 	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
1255 
1256 	return ip_tunnel_init(dev);
1257 }
1258 
1259 static const struct net_device_ops erspan_netdev_ops = {
1260 	.ndo_init		= erspan_tunnel_init,
1261 	.ndo_uninit		= ip_tunnel_uninit,
1262 	.ndo_start_xmit		= erspan_xmit,
1263 	.ndo_set_mac_address	= eth_mac_addr,
1264 	.ndo_validate_addr	= eth_validate_addr,
1265 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1266 	.ndo_get_stats64	= ip_tunnel_get_stats64,
1267 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1268 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1269 };
1270 
1271 static void ipgre_tap_setup(struct net_device *dev)
1272 {
1273 	ether_setup(dev);
1274 	dev->netdev_ops	= &gre_tap_netdev_ops;
1275 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1276 	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
1277 	ip_tunnel_setup(dev, gre_tap_net_id);
1278 }
1279 
1280 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1281 			 struct nlattr *tb[], struct nlattr *data[],
1282 			 struct netlink_ext_ack *extack)
1283 {
1284 	struct ip_tunnel_parm p;
1285 	struct ip_tunnel_encap ipencap;
1286 	__u32 fwmark = 0;
1287 	int err;
1288 
1289 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1290 		struct ip_tunnel *t = netdev_priv(dev);
1291 		err = ip_tunnel_encap_setup(t, &ipencap);
1292 
1293 		if (err < 0)
1294 			return err;
1295 	}
1296 
1297 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1298 	if (err < 0)
1299 		return err;
1300 	return ip_tunnel_newlink(dev, tb, &p, fwmark);
1301 }
1302 
1303 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1304 			    struct nlattr *data[],
1305 			    struct netlink_ext_ack *extack)
1306 {
1307 	struct ip_tunnel *t = netdev_priv(dev);
1308 	struct ip_tunnel_parm p;
1309 	struct ip_tunnel_encap ipencap;
1310 	__u32 fwmark = t->fwmark;
1311 	int err;
1312 
1313 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1314 		err = ip_tunnel_encap_setup(t, &ipencap);
1315 
1316 		if (err < 0)
1317 			return err;
1318 	}
1319 
1320 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1321 	if (err < 0)
1322 		return err;
1323 	return ip_tunnel_changelink(dev, tb, &p, fwmark);
1324 }
1325 
1326 static size_t ipgre_get_size(const struct net_device *dev)
1327 {
1328 	return
1329 		/* IFLA_GRE_LINK */
1330 		nla_total_size(4) +
1331 		/* IFLA_GRE_IFLAGS */
1332 		nla_total_size(2) +
1333 		/* IFLA_GRE_OFLAGS */
1334 		nla_total_size(2) +
1335 		/* IFLA_GRE_IKEY */
1336 		nla_total_size(4) +
1337 		/* IFLA_GRE_OKEY */
1338 		nla_total_size(4) +
1339 		/* IFLA_GRE_LOCAL */
1340 		nla_total_size(4) +
1341 		/* IFLA_GRE_REMOTE */
1342 		nla_total_size(4) +
1343 		/* IFLA_GRE_TTL */
1344 		nla_total_size(1) +
1345 		/* IFLA_GRE_TOS */
1346 		nla_total_size(1) +
1347 		/* IFLA_GRE_PMTUDISC */
1348 		nla_total_size(1) +
1349 		/* IFLA_GRE_ENCAP_TYPE */
1350 		nla_total_size(2) +
1351 		/* IFLA_GRE_ENCAP_FLAGS */
1352 		nla_total_size(2) +
1353 		/* IFLA_GRE_ENCAP_SPORT */
1354 		nla_total_size(2) +
1355 		/* IFLA_GRE_ENCAP_DPORT */
1356 		nla_total_size(2) +
1357 		/* IFLA_GRE_COLLECT_METADATA */
1358 		nla_total_size(0) +
1359 		/* IFLA_GRE_IGNORE_DF */
1360 		nla_total_size(1) +
1361 		/* IFLA_GRE_FWMARK */
1362 		nla_total_size(4) +
1363 		/* IFLA_GRE_ERSPAN_INDEX */
1364 		nla_total_size(4) +
1365 		0;
1366 }
1367 
1368 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1369 {
1370 	struct ip_tunnel *t = netdev_priv(dev);
1371 	struct ip_tunnel_parm *p = &t->parms;
1372 
1373 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1374 	    nla_put_be16(skb, IFLA_GRE_IFLAGS,
1375 			 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1376 	    nla_put_be16(skb, IFLA_GRE_OFLAGS,
1377 			 gre_tnl_flags_to_gre_flags(p->o_flags)) ||
1378 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1379 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1380 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1381 	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1382 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1383 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1384 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1385 		       !!(p->iph.frag_off & htons(IP_DF))) ||
1386 	    nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1387 		goto nla_put_failure;
1388 
1389 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1390 			t->encap.type) ||
1391 	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1392 			 t->encap.sport) ||
1393 	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1394 			 t->encap.dport) ||
1395 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1396 			t->encap.flags))
1397 		goto nla_put_failure;
1398 
1399 	if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1400 		goto nla_put_failure;
1401 
1402 	if (t->collect_md) {
1403 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1404 			goto nla_put_failure;
1405 	}
1406 
1407 	if (t->index)
1408 		if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1409 			goto nla_put_failure;
1410 
1411 	return 0;
1412 
1413 nla_put_failure:
1414 	return -EMSGSIZE;
1415 }
1416 
1417 static void erspan_setup(struct net_device *dev)
1418 {
1419 	ether_setup(dev);
1420 	dev->netdev_ops = &erspan_netdev_ops;
1421 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1422 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1423 	ip_tunnel_setup(dev, erspan_net_id);
1424 }
1425 
1426 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1427 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1428 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1429 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1430 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1431 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1432 	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1433 	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1434 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1435 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1436 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1437 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1438 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1439 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1440 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1441 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1442 	[IFLA_GRE_IGNORE_DF]	= { .type = NLA_U8 },
1443 	[IFLA_GRE_FWMARK]	= { .type = NLA_U32 },
1444 	[IFLA_GRE_ERSPAN_INDEX]	= { .type = NLA_U32 },
1445 };
1446 
1447 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1448 	.kind		= "gre",
1449 	.maxtype	= IFLA_GRE_MAX,
1450 	.policy		= ipgre_policy,
1451 	.priv_size	= sizeof(struct ip_tunnel),
1452 	.setup		= ipgre_tunnel_setup,
1453 	.validate	= ipgre_tunnel_validate,
1454 	.newlink	= ipgre_newlink,
1455 	.changelink	= ipgre_changelink,
1456 	.dellink	= ip_tunnel_dellink,
1457 	.get_size	= ipgre_get_size,
1458 	.fill_info	= ipgre_fill_info,
1459 	.get_link_net	= ip_tunnel_get_link_net,
1460 };
1461 
1462 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1463 	.kind		= "gretap",
1464 	.maxtype	= IFLA_GRE_MAX,
1465 	.policy		= ipgre_policy,
1466 	.priv_size	= sizeof(struct ip_tunnel),
1467 	.setup		= ipgre_tap_setup,
1468 	.validate	= ipgre_tap_validate,
1469 	.newlink	= ipgre_newlink,
1470 	.changelink	= ipgre_changelink,
1471 	.dellink	= ip_tunnel_dellink,
1472 	.get_size	= ipgre_get_size,
1473 	.fill_info	= ipgre_fill_info,
1474 	.get_link_net	= ip_tunnel_get_link_net,
1475 };
1476 
1477 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1478 	.kind		= "erspan",
1479 	.maxtype	= IFLA_GRE_MAX,
1480 	.policy		= ipgre_policy,
1481 	.priv_size	= sizeof(struct ip_tunnel),
1482 	.setup		= erspan_setup,
1483 	.validate	= erspan_validate,
1484 	.newlink	= ipgre_newlink,
1485 	.changelink	= ipgre_changelink,
1486 	.dellink	= ip_tunnel_dellink,
1487 	.get_size	= ipgre_get_size,
1488 	.fill_info	= ipgre_fill_info,
1489 	.get_link_net	= ip_tunnel_get_link_net,
1490 };
1491 
1492 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1493 					u8 name_assign_type)
1494 {
1495 	struct nlattr *tb[IFLA_MAX + 1];
1496 	struct net_device *dev;
1497 	LIST_HEAD(list_kill);
1498 	struct ip_tunnel *t;
1499 	int err;
1500 
1501 	memset(&tb, 0, sizeof(tb));
1502 
1503 	dev = rtnl_create_link(net, name, name_assign_type,
1504 			       &ipgre_tap_ops, tb);
1505 	if (IS_ERR(dev))
1506 		return dev;
1507 
1508 	/* Configure flow based GRE device. */
1509 	t = netdev_priv(dev);
1510 	t->collect_md = true;
1511 
1512 	err = ipgre_newlink(net, dev, tb, NULL, NULL);
1513 	if (err < 0) {
1514 		free_netdev(dev);
1515 		return ERR_PTR(err);
1516 	}
1517 
1518 	/* openvswitch users expect packet sizes to be unrestricted,
1519 	 * so set the largest MTU we can.
1520 	 */
1521 	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1522 	if (err)
1523 		goto out;
1524 
1525 	err = rtnl_configure_link(dev, NULL);
1526 	if (err < 0)
1527 		goto out;
1528 
1529 	return dev;
1530 out:
1531 	ip_tunnel_dellink(dev, &list_kill);
1532 	unregister_netdevice_many(&list_kill);
1533 	return ERR_PTR(err);
1534 }
1535 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1536 
1537 static int __net_init ipgre_tap_init_net(struct net *net)
1538 {
1539 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1540 }
1541 
1542 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1543 {
1544 	ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1545 }
1546 
1547 static struct pernet_operations ipgre_tap_net_ops = {
1548 	.init = ipgre_tap_init_net,
1549 	.exit_batch = ipgre_tap_exit_batch_net,
1550 	.id   = &gre_tap_net_id,
1551 	.size = sizeof(struct ip_tunnel_net),
1552 };
1553 
1554 static int __net_init erspan_init_net(struct net *net)
1555 {
1556 	return ip_tunnel_init_net(net, erspan_net_id,
1557 				  &erspan_link_ops, "erspan0");
1558 }
1559 
1560 static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1561 {
1562 	ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1563 }
1564 
1565 static struct pernet_operations erspan_net_ops = {
1566 	.init = erspan_init_net,
1567 	.exit_batch = erspan_exit_batch_net,
1568 	.id   = &erspan_net_id,
1569 	.size = sizeof(struct ip_tunnel_net),
1570 };
1571 
1572 static int __init ipgre_init(void)
1573 {
1574 	int err;
1575 
1576 	pr_info("GRE over IPv4 tunneling driver\n");
1577 
1578 	err = register_pernet_device(&ipgre_net_ops);
1579 	if (err < 0)
1580 		return err;
1581 
1582 	err = register_pernet_device(&ipgre_tap_net_ops);
1583 	if (err < 0)
1584 		goto pnet_tap_failed;
1585 
1586 	err = register_pernet_device(&erspan_net_ops);
1587 	if (err < 0)
1588 		goto pnet_erspan_failed;
1589 
1590 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1591 	if (err < 0) {
1592 		pr_info("%s: can't add protocol\n", __func__);
1593 		goto add_proto_failed;
1594 	}
1595 
1596 	err = rtnl_link_register(&ipgre_link_ops);
1597 	if (err < 0)
1598 		goto rtnl_link_failed;
1599 
1600 	err = rtnl_link_register(&ipgre_tap_ops);
1601 	if (err < 0)
1602 		goto tap_ops_failed;
1603 
1604 	err = rtnl_link_register(&erspan_link_ops);
1605 	if (err < 0)
1606 		goto erspan_link_failed;
1607 
1608 	return 0;
1609 
1610 erspan_link_failed:
1611 	rtnl_link_unregister(&ipgre_tap_ops);
1612 tap_ops_failed:
1613 	rtnl_link_unregister(&ipgre_link_ops);
1614 rtnl_link_failed:
1615 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1616 add_proto_failed:
1617 	unregister_pernet_device(&erspan_net_ops);
1618 pnet_erspan_failed:
1619 	unregister_pernet_device(&ipgre_tap_net_ops);
1620 pnet_tap_failed:
1621 	unregister_pernet_device(&ipgre_net_ops);
1622 	return err;
1623 }
1624 
1625 static void __exit ipgre_fini(void)
1626 {
1627 	rtnl_link_unregister(&ipgre_tap_ops);
1628 	rtnl_link_unregister(&ipgre_link_ops);
1629 	rtnl_link_unregister(&erspan_link_ops);
1630 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1631 	unregister_pernet_device(&ipgre_tap_net_ops);
1632 	unregister_pernet_device(&ipgre_net_ops);
1633 	unregister_pernet_device(&erspan_net_ops);
1634 }
1635 
1636 module_init(ipgre_init);
1637 module_exit(ipgre_fini);
1638 MODULE_LICENSE("GPL");
1639 MODULE_ALIAS_RTNL_LINK("gre");
1640 MODULE_ALIAS_RTNL_LINK("gretap");
1641 MODULE_ALIAS_RTNL_LINK("erspan");
1642 MODULE_ALIAS_NETDEV("gre0");
1643 MODULE_ALIAS_NETDEV("gretap0");
1644 MODULE_ALIAS_NETDEV("erspan0");
1645