xref: /linux/net/ipv4/ip_gre.c (revision 37f0e658eeeac720f3d558cf5aaf9edf0705ff23)
1 /*
2  *	Linux NET3:	GRE over IP protocol decoder.
3  *
4  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *	This program is free software; you can redistribute it and/or
7  *	modify it under the terms of the GNU General Public License
8  *	as published by the Free Software Foundation; either version
9  *	2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/if_vlan.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
35 
36 #include <net/sock.h>
37 #include <net/ip.h>
38 #include <net/icmp.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
41 #include <net/arp.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
45 #include <net/xfrm.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
49 #include <net/gre.h>
50 #include <net/dst_metadata.h>
51 
52 #if IS_ENABLED(CONFIG_IPV6)
53 #include <net/ipv6.h>
54 #include <net/ip6_fib.h>
55 #include <net/ip6_route.h>
56 #endif
57 
58 /*
59    Problems & solutions
60    --------------------
61 
62    1. The most important issue is detecting local dead loops.
63    They would cause complete host lockup in transmit, which
64    would be "resolved" by stack overflow or, if queueing is enabled,
65    with infinite looping in net_bh.
66 
67    We cannot track such dead loops during route installation,
68    it is infeasible task. The most general solutions would be
69    to keep skb->encapsulation counter (sort of local ttl),
70    and silently drop packet when it expires. It is a good
71    solution, but it supposes maintaining new variable in ALL
72    skb, even if no tunneling is used.
73 
74    Current solution: xmit_recursion breaks dead loops. This is a percpu
75    counter, since when we enter the first ndo_xmit(), cpu migration is
76    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
77 
78    2. Networking dead loops would not kill routers, but would really
79    kill network. IP hop limit plays role of "t->recursion" in this case,
80    if we copy it from packet being encapsulated to upper header.
81    It is very good solution, but it introduces two problems:
82 
83    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
84      do not work over tunnels.
85    - traceroute does not work. I planned to relay ICMP from tunnel,
86      so that this problem would be solved and traceroute output
87      would even more informative. This idea appeared to be wrong:
88      only Linux complies to rfc1812 now (yes, guys, Linux is the only
89      true router now :-)), all routers (at least, in neighbourhood of mine)
90      return only 8 bytes of payload. It is the end.
91 
92    Hence, if we want that OSPF worked or traceroute said something reasonable,
93    we should search for another solution.
94 
95    One of them is to parse packet trying to detect inner encapsulation
96    made by our node. It is difficult or even impossible, especially,
97    taking into account fragmentation. TO be short, ttl is not solution at all.
98 
99    Current solution: The solution was UNEXPECTEDLY SIMPLE.
100    We force DF flag on tunnels with preconfigured hop limit,
101    that is ALL. :-) Well, it does not remove the problem completely,
102    but exponential growth of network traffic is changed to linear
103    (branches, that exceed pmtu are pruned) and tunnel mtu
104    rapidly degrades to value <68, where looping stops.
105    Yes, it is not good if there exists a router in the loop,
106    which does not force DF, even when encapsulating packets have DF set.
107    But it is not our problem! Nobody could accuse us, we made
108    all that we could make. Even if it is your gated who injected
109    fatal route to network, even if it were you who configured
110    fatal static route: you are innocent. :-)
111 
112    Alexey Kuznetsov.
113  */
114 
115 static bool log_ecn_error = true;
116 module_param(log_ecn_error, bool, 0644);
117 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
118 
119 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
120 static int ipgre_tunnel_init(struct net_device *dev);
121 
122 static int ipgre_net_id __read_mostly;
123 static int gre_tap_net_id __read_mostly;
124 
125 static int ip_gre_calc_hlen(__be16 o_flags)
126 {
127 	int addend = 4;
128 
129 	if (o_flags & TUNNEL_CSUM)
130 		addend += 4;
131 	if (o_flags & TUNNEL_KEY)
132 		addend += 4;
133 	if (o_flags & TUNNEL_SEQ)
134 		addend += 4;
135 	return addend;
136 }
137 
138 static __be16 gre_flags_to_tnl_flags(__be16 flags)
139 {
140 	__be16 tflags = 0;
141 
142 	if (flags & GRE_CSUM)
143 		tflags |= TUNNEL_CSUM;
144 	if (flags & GRE_ROUTING)
145 		tflags |= TUNNEL_ROUTING;
146 	if (flags & GRE_KEY)
147 		tflags |= TUNNEL_KEY;
148 	if (flags & GRE_SEQ)
149 		tflags |= TUNNEL_SEQ;
150 	if (flags & GRE_STRICT)
151 		tflags |= TUNNEL_STRICT;
152 	if (flags & GRE_REC)
153 		tflags |= TUNNEL_REC;
154 	if (flags & GRE_VERSION)
155 		tflags |= TUNNEL_VERSION;
156 
157 	return tflags;
158 }
159 
160 static __be16 tnl_flags_to_gre_flags(__be16 tflags)
161 {
162 	__be16 flags = 0;
163 
164 	if (tflags & TUNNEL_CSUM)
165 		flags |= GRE_CSUM;
166 	if (tflags & TUNNEL_ROUTING)
167 		flags |= GRE_ROUTING;
168 	if (tflags & TUNNEL_KEY)
169 		flags |= GRE_KEY;
170 	if (tflags & TUNNEL_SEQ)
171 		flags |= GRE_SEQ;
172 	if (tflags & TUNNEL_STRICT)
173 		flags |= GRE_STRICT;
174 	if (tflags & TUNNEL_REC)
175 		flags |= GRE_REC;
176 	if (tflags & TUNNEL_VERSION)
177 		flags |= GRE_VERSION;
178 
179 	return flags;
180 }
181 
182 /* Fills in tpi and returns header length to be pulled. */
183 static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
184 			    bool *csum_err)
185 {
186 	const struct gre_base_hdr *greh;
187 	__be32 *options;
188 	int hdr_len;
189 
190 	if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
191 		return -EINVAL;
192 
193 	greh = (struct gre_base_hdr *)skb_transport_header(skb);
194 	if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
195 		return -EINVAL;
196 
197 	tpi->flags = gre_flags_to_tnl_flags(greh->flags);
198 	hdr_len = ip_gre_calc_hlen(tpi->flags);
199 
200 	if (!pskb_may_pull(skb, hdr_len))
201 		return -EINVAL;
202 
203 	greh = (struct gre_base_hdr *)skb_transport_header(skb);
204 	tpi->proto = greh->protocol;
205 
206 	options = (__be32 *)(greh + 1);
207 	if (greh->flags & GRE_CSUM) {
208 		if (skb_checksum_simple_validate(skb)) {
209 			*csum_err = true;
210 			return -EINVAL;
211 		}
212 
213 		skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
214 					 null_compute_pseudo);
215 		options++;
216 	}
217 
218 	if (greh->flags & GRE_KEY) {
219 		tpi->key = *options;
220 		options++;
221 	} else {
222 		tpi->key = 0;
223 	}
224 	if (unlikely(greh->flags & GRE_SEQ)) {
225 		tpi->seq = *options;
226 		options++;
227 	} else {
228 		tpi->seq = 0;
229 	}
230 	/* WCCP version 1 and 2 protocol decoding.
231 	 * - Change protocol to IP
232 	 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
233 	 */
234 	if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
235 		tpi->proto = htons(ETH_P_IP);
236 		if ((*(u8 *)options & 0xF0) != 0x40) {
237 			hdr_len += 4;
238 			if (!pskb_may_pull(skb, hdr_len))
239 				return -EINVAL;
240 		}
241 	}
242 	return hdr_len;
243 }
244 
245 static void ipgre_err(struct sk_buff *skb, u32 info,
246 		      const struct tnl_ptk_info *tpi)
247 {
248 
249 	/* All the routers (except for Linux) return only
250 	   8 bytes of packet payload. It means, that precise relaying of
251 	   ICMP in the real Internet is absolutely infeasible.
252 
253 	   Moreover, Cisco "wise men" put GRE key to the third word
254 	   in GRE header. It makes impossible maintaining even soft
255 	   state for keyed GRE tunnels with enabled checksum. Tell
256 	   them "thank you".
257 
258 	   Well, I wonder, rfc1812 was written by Cisco employee,
259 	   what the hell these idiots break standards established
260 	   by themselves???
261 	   */
262 	struct net *net = dev_net(skb->dev);
263 	struct ip_tunnel_net *itn;
264 	const struct iphdr *iph;
265 	const int type = icmp_hdr(skb)->type;
266 	const int code = icmp_hdr(skb)->code;
267 	struct ip_tunnel *t;
268 
269 	switch (type) {
270 	default:
271 	case ICMP_PARAMETERPROB:
272 		return;
273 
274 	case ICMP_DEST_UNREACH:
275 		switch (code) {
276 		case ICMP_SR_FAILED:
277 		case ICMP_PORT_UNREACH:
278 			/* Impossible event. */
279 			return;
280 		default:
281 			/* All others are translated to HOST_UNREACH.
282 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
283 			   I believe they are just ether pollution. --ANK
284 			 */
285 			break;
286 		}
287 		break;
288 
289 	case ICMP_TIME_EXCEEDED:
290 		if (code != ICMP_EXC_TTL)
291 			return;
292 		break;
293 
294 	case ICMP_REDIRECT:
295 		break;
296 	}
297 
298 	if (tpi->proto == htons(ETH_P_TEB))
299 		itn = net_generic(net, gre_tap_net_id);
300 	else
301 		itn = net_generic(net, ipgre_net_id);
302 
303 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
304 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
305 			     iph->daddr, iph->saddr, tpi->key);
306 
307 	if (!t)
308 		return;
309 
310 	if (t->parms.iph.daddr == 0 ||
311 	    ipv4_is_multicast(t->parms.iph.daddr))
312 		return;
313 
314 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
315 		return;
316 
317 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
318 		t->err_count++;
319 	else
320 		t->err_count = 1;
321 	t->err_time = jiffies;
322 }
323 
324 static void gre_err(struct sk_buff *skb, u32 info)
325 {
326 	/* All the routers (except for Linux) return only
327 	 * 8 bytes of packet payload. It means, that precise relaying of
328 	 * ICMP in the real Internet is absolutely infeasible.
329 	 *
330 	 * Moreover, Cisco "wise men" put GRE key to the third word
331 	 * in GRE header. It makes impossible maintaining even soft
332 	 * state for keyed
333 	 * GRE tunnels with enabled checksum. Tell them "thank you".
334 	 *
335 	 * Well, I wonder, rfc1812 was written by Cisco employee,
336 	 * what the hell these idiots break standards established
337 	 * by themselves???
338 	 */
339 
340 	const int type = icmp_hdr(skb)->type;
341 	const int code = icmp_hdr(skb)->code;
342 	struct tnl_ptk_info tpi;
343 	bool csum_err = false;
344 
345 	if (parse_gre_header(skb, &tpi, &csum_err) < 0) {
346 		if (!csum_err)		/* ignore csum errors. */
347 			return;
348 	}
349 
350 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
351 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
352 				 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
353 		return;
354 	}
355 	if (type == ICMP_REDIRECT) {
356 		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
357 			      IPPROTO_GRE, 0);
358 		return;
359 	}
360 
361 	ipgre_err(skb, info, &tpi);
362 }
363 
364 static __be64 key_to_tunnel_id(__be32 key)
365 {
366 #ifdef __BIG_ENDIAN
367 	return (__force __be64)((__force u32)key);
368 #else
369 	return (__force __be64)((__force u64)key << 32);
370 #endif
371 }
372 
373 /* Returns the least-significant 32 bits of a __be64. */
374 static __be32 tunnel_id_to_key(__be64 x)
375 {
376 #ifdef __BIG_ENDIAN
377 	return (__force __be32)x;
378 #else
379 	return (__force __be32)((__force u64)x >> 32);
380 #endif
381 }
382 
383 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
384 {
385 	struct net *net = dev_net(skb->dev);
386 	struct metadata_dst *tun_dst = NULL;
387 	struct ip_tunnel_net *itn;
388 	const struct iphdr *iph;
389 	struct ip_tunnel *tunnel;
390 
391 	if (tpi->proto == htons(ETH_P_TEB))
392 		itn = net_generic(net, gre_tap_net_id);
393 	else
394 		itn = net_generic(net, ipgre_net_id);
395 
396 	iph = ip_hdr(skb);
397 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
398 				  iph->saddr, iph->daddr, tpi->key);
399 
400 	if (tunnel) {
401 		skb_pop_mac_header(skb);
402 		if (tunnel->collect_md) {
403 			__be16 flags;
404 			__be64 tun_id;
405 
406 			flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
407 			tun_id = key_to_tunnel_id(tpi->key);
408 			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
409 			if (!tun_dst)
410 				return PACKET_REJECT;
411 		}
412 
413 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
414 		return PACKET_RCVD;
415 	}
416 	return PACKET_REJECT;
417 }
418 
419 static int gre_rcv(struct sk_buff *skb)
420 {
421 	struct tnl_ptk_info tpi;
422 	bool csum_err = false;
423 	int hdr_len;
424 
425 #ifdef CONFIG_NET_IPGRE_BROADCAST
426 	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
427 		/* Looped back packet, drop it! */
428 		if (rt_is_output_route(skb_rtable(skb)))
429 			goto drop;
430 	}
431 #endif
432 
433 	hdr_len = parse_gre_header(skb, &tpi, &csum_err);
434 	if (hdr_len < 0)
435 		goto drop;
436 	if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false) < 0)
437 		goto drop;
438 
439 	if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)
440 		return 0;
441 
442 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
443 drop:
444 	kfree_skb(skb);
445 	return 0;
446 }
447 
448 static __sum16 gre_checksum(struct sk_buff *skb)
449 {
450 	__wsum csum;
451 
452 	if (skb->ip_summed == CHECKSUM_PARTIAL)
453 		csum = lco_csum(skb);
454 	else
455 		csum = skb_checksum(skb, 0, skb->len, 0);
456 	return csum_fold(csum);
457 }
458 
459 static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
460 			 __be16 proto, __be32 key, __be32 seq)
461 {
462 	struct gre_base_hdr *greh;
463 
464 	skb_push(skb, hdr_len);
465 
466 	skb_reset_transport_header(skb);
467 	greh = (struct gre_base_hdr *)skb->data;
468 	greh->flags = tnl_flags_to_gre_flags(flags);
469 	greh->protocol = proto;
470 
471 	if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
472 		__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
473 
474 		if (flags & TUNNEL_SEQ) {
475 			*ptr = seq;
476 			ptr--;
477 		}
478 		if (flags & TUNNEL_KEY) {
479 			*ptr = key;
480 			ptr--;
481 		}
482 		if (flags & TUNNEL_CSUM &&
483 		    !(skb_shinfo(skb)->gso_type &
484 		      (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
485 			*ptr = 0;
486 			*(__sum16 *)ptr = gre_checksum(skb);
487 		}
488 	}
489 }
490 
491 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
492 		       const struct iphdr *tnl_params,
493 		       __be16 proto)
494 {
495 	struct ip_tunnel *tunnel = netdev_priv(dev);
496 
497 	if (tunnel->parms.o_flags & TUNNEL_SEQ)
498 		tunnel->o_seqno++;
499 
500 	/* Push GRE header. */
501 	build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
502 		     proto, tunnel->parms.o_key, htonl(tunnel->o_seqno));
503 
504 	skb_set_inner_protocol(skb, proto);
505 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
506 }
507 
508 static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
509 					   bool csum)
510 {
511 	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
512 }
513 
514 static struct rtable *gre_get_rt(struct sk_buff *skb,
515 				 struct net_device *dev,
516 				 struct flowi4 *fl,
517 				 const struct ip_tunnel_key *key)
518 {
519 	struct net *net = dev_net(dev);
520 
521 	memset(fl, 0, sizeof(*fl));
522 	fl->daddr = key->u.ipv4.dst;
523 	fl->saddr = key->u.ipv4.src;
524 	fl->flowi4_tos = RT_TOS(key->tos);
525 	fl->flowi4_mark = skb->mark;
526 	fl->flowi4_proto = IPPROTO_GRE;
527 
528 	return ip_route_output_key(net, fl);
529 }
530 
531 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
532 			__be16 proto)
533 {
534 	struct ip_tunnel_info *tun_info;
535 	const struct ip_tunnel_key *key;
536 	struct rtable *rt = NULL;
537 	struct flowi4 fl;
538 	int min_headroom;
539 	int tunnel_hlen;
540 	__be16 df, flags;
541 	bool use_cache;
542 	int err;
543 
544 	tun_info = skb_tunnel_info(skb);
545 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
546 		     ip_tunnel_info_af(tun_info) != AF_INET))
547 		goto err_free_skb;
548 
549 	key = &tun_info->key;
550 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
551 	if (use_cache)
552 		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
553 	if (!rt) {
554 		rt = gre_get_rt(skb, dev, &fl, key);
555 		if (IS_ERR(rt))
556 				goto err_free_skb;
557 		if (use_cache)
558 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
559 					  fl.saddr);
560 	}
561 
562 	tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
563 
564 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
565 			+ tunnel_hlen + sizeof(struct iphdr);
566 	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
567 		int head_delta = SKB_DATA_ALIGN(min_headroom -
568 						skb_headroom(skb) +
569 						16);
570 		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
571 				       0, GFP_ATOMIC);
572 		if (unlikely(err))
573 			goto err_free_rt;
574 	}
575 
576 	/* Push Tunnel header. */
577 	skb = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM));
578 	if (IS_ERR(skb)) {
579 		skb = NULL;
580 		goto err_free_rt;
581 	}
582 
583 	flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
584 	build_header(skb, tunnel_hlen, flags, proto,
585 		     tunnel_id_to_key(tun_info->key.tun_id), 0);
586 
587 	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
588 
589 	iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
590 		      key->tos, key->ttl, df, false);
591 	return;
592 
593 err_free_rt:
594 	ip_rt_put(rt);
595 err_free_skb:
596 	kfree_skb(skb);
597 	dev->stats.tx_dropped++;
598 }
599 
600 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
601 {
602 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
603 	struct rtable *rt;
604 	struct flowi4 fl4;
605 
606 	if (ip_tunnel_info_af(info) != AF_INET)
607 		return -EINVAL;
608 
609 	rt = gre_get_rt(skb, dev, &fl4, &info->key);
610 	if (IS_ERR(rt))
611 		return PTR_ERR(rt);
612 
613 	ip_rt_put(rt);
614 	info->key.u.ipv4.src = fl4.saddr;
615 	return 0;
616 }
617 
618 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
619 			      struct net_device *dev)
620 {
621 	struct ip_tunnel *tunnel = netdev_priv(dev);
622 	const struct iphdr *tnl_params;
623 
624 	if (tunnel->collect_md) {
625 		gre_fb_xmit(skb, dev, skb->protocol);
626 		return NETDEV_TX_OK;
627 	}
628 
629 	if (dev->header_ops) {
630 		/* Need space for new headers */
631 		if (skb_cow_head(skb, dev->needed_headroom -
632 				      (tunnel->hlen + sizeof(struct iphdr))))
633 			goto free_skb;
634 
635 		tnl_params = (const struct iphdr *)skb->data;
636 
637 		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
638 		 * to gre header.
639 		 */
640 		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
641 		skb_reset_mac_header(skb);
642 	} else {
643 		if (skb_cow_head(skb, dev->needed_headroom))
644 			goto free_skb;
645 
646 		tnl_params = &tunnel->parms.iph;
647 	}
648 
649 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
650 	if (IS_ERR(skb))
651 		goto out;
652 
653 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
654 	return NETDEV_TX_OK;
655 
656 free_skb:
657 	kfree_skb(skb);
658 out:
659 	dev->stats.tx_dropped++;
660 	return NETDEV_TX_OK;
661 }
662 
663 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
664 				struct net_device *dev)
665 {
666 	struct ip_tunnel *tunnel = netdev_priv(dev);
667 
668 	if (tunnel->collect_md) {
669 		gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
670 		return NETDEV_TX_OK;
671 	}
672 
673 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
674 	if (IS_ERR(skb))
675 		goto out;
676 
677 	if (skb_cow_head(skb, dev->needed_headroom))
678 		goto free_skb;
679 
680 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
681 	return NETDEV_TX_OK;
682 
683 free_skb:
684 	kfree_skb(skb);
685 out:
686 	dev->stats.tx_dropped++;
687 	return NETDEV_TX_OK;
688 }
689 
690 static int ipgre_tunnel_ioctl(struct net_device *dev,
691 			      struct ifreq *ifr, int cmd)
692 {
693 	int err;
694 	struct ip_tunnel_parm p;
695 
696 	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
697 		return -EFAULT;
698 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
699 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
700 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
701 		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
702 			return -EINVAL;
703 	}
704 	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
705 	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
706 
707 	err = ip_tunnel_ioctl(dev, &p, cmd);
708 	if (err)
709 		return err;
710 
711 	p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
712 	p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
713 
714 	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
715 		return -EFAULT;
716 	return 0;
717 }
718 
719 /* Nice toy. Unfortunately, useless in real life :-)
720    It allows to construct virtual multiprotocol broadcast "LAN"
721    over the Internet, provided multicast routing is tuned.
722 
723 
724    I have no idea was this bicycle invented before me,
725    so that I had to set ARPHRD_IPGRE to a random value.
726    I have an impression, that Cisco could make something similar,
727    but this feature is apparently missing in IOS<=11.2(8).
728 
729    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
730    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
731 
732    ping -t 255 224.66.66.66
733 
734    If nobody answers, mbone does not work.
735 
736    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
737    ip addr add 10.66.66.<somewhat>/24 dev Universe
738    ifconfig Universe up
739    ifconfig Universe add fe80::<Your_real_addr>/10
740    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
741    ftp 10.66.66.66
742    ...
743    ftp fec0:6666:6666::193.233.7.65
744    ...
745  */
746 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
747 			unsigned short type,
748 			const void *daddr, const void *saddr, unsigned int len)
749 {
750 	struct ip_tunnel *t = netdev_priv(dev);
751 	struct iphdr *iph;
752 	struct gre_base_hdr *greh;
753 
754 	iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
755 	greh = (struct gre_base_hdr *)(iph+1);
756 	greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
757 	greh->protocol = htons(type);
758 
759 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
760 
761 	/* Set the source hardware address. */
762 	if (saddr)
763 		memcpy(&iph->saddr, saddr, 4);
764 	if (daddr)
765 		memcpy(&iph->daddr, daddr, 4);
766 	if (iph->daddr)
767 		return t->hlen + sizeof(*iph);
768 
769 	return -(t->hlen + sizeof(*iph));
770 }
771 
772 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
773 {
774 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
775 	memcpy(haddr, &iph->saddr, 4);
776 	return 4;
777 }
778 
779 static const struct header_ops ipgre_header_ops = {
780 	.create	= ipgre_header,
781 	.parse	= ipgre_header_parse,
782 };
783 
784 #ifdef CONFIG_NET_IPGRE_BROADCAST
785 static int ipgre_open(struct net_device *dev)
786 {
787 	struct ip_tunnel *t = netdev_priv(dev);
788 
789 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
790 		struct flowi4 fl4;
791 		struct rtable *rt;
792 
793 		rt = ip_route_output_gre(t->net, &fl4,
794 					 t->parms.iph.daddr,
795 					 t->parms.iph.saddr,
796 					 t->parms.o_key,
797 					 RT_TOS(t->parms.iph.tos),
798 					 t->parms.link);
799 		if (IS_ERR(rt))
800 			return -EADDRNOTAVAIL;
801 		dev = rt->dst.dev;
802 		ip_rt_put(rt);
803 		if (!__in_dev_get_rtnl(dev))
804 			return -EADDRNOTAVAIL;
805 		t->mlink = dev->ifindex;
806 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
807 	}
808 	return 0;
809 }
810 
811 static int ipgre_close(struct net_device *dev)
812 {
813 	struct ip_tunnel *t = netdev_priv(dev);
814 
815 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
816 		struct in_device *in_dev;
817 		in_dev = inetdev_by_index(t->net, t->mlink);
818 		if (in_dev)
819 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
820 	}
821 	return 0;
822 }
823 #endif
824 
825 static const struct net_device_ops ipgre_netdev_ops = {
826 	.ndo_init		= ipgre_tunnel_init,
827 	.ndo_uninit		= ip_tunnel_uninit,
828 #ifdef CONFIG_NET_IPGRE_BROADCAST
829 	.ndo_open		= ipgre_open,
830 	.ndo_stop		= ipgre_close,
831 #endif
832 	.ndo_start_xmit		= ipgre_xmit,
833 	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
834 	.ndo_change_mtu		= ip_tunnel_change_mtu,
835 	.ndo_get_stats64	= ip_tunnel_get_stats64,
836 	.ndo_get_iflink		= ip_tunnel_get_iflink,
837 };
838 
839 #define GRE_FEATURES (NETIF_F_SG |		\
840 		      NETIF_F_FRAGLIST |	\
841 		      NETIF_F_HIGHDMA |		\
842 		      NETIF_F_HW_CSUM)
843 
844 static void ipgre_tunnel_setup(struct net_device *dev)
845 {
846 	dev->netdev_ops		= &ipgre_netdev_ops;
847 	dev->type		= ARPHRD_IPGRE;
848 	ip_tunnel_setup(dev, ipgre_net_id);
849 }
850 
851 static void __gre_tunnel_init(struct net_device *dev)
852 {
853 	struct ip_tunnel *tunnel;
854 	int t_hlen;
855 
856 	tunnel = netdev_priv(dev);
857 	tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
858 	tunnel->parms.iph.protocol = IPPROTO_GRE;
859 
860 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
861 
862 	t_hlen = tunnel->hlen + sizeof(struct iphdr);
863 
864 	dev->needed_headroom	= LL_MAX_HEADER + t_hlen + 4;
865 	dev->mtu		= ETH_DATA_LEN - t_hlen - 4;
866 
867 	dev->features		|= GRE_FEATURES;
868 	dev->hw_features	|= GRE_FEATURES;
869 
870 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
871 		/* TCP offload with GRE SEQ is not supported, nor
872 		 * can we support 2 levels of outer headers requiring
873 		 * an update.
874 		 */
875 		if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
876 		    (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
877 			dev->features    |= NETIF_F_GSO_SOFTWARE;
878 			dev->hw_features |= NETIF_F_GSO_SOFTWARE;
879 		}
880 
881 		/* Can use a lockless transmit, unless we generate
882 		 * output sequences
883 		 */
884 		dev->features |= NETIF_F_LLTX;
885 	}
886 }
887 
888 static int ipgre_tunnel_init(struct net_device *dev)
889 {
890 	struct ip_tunnel *tunnel = netdev_priv(dev);
891 	struct iphdr *iph = &tunnel->parms.iph;
892 
893 	__gre_tunnel_init(dev);
894 
895 	memcpy(dev->dev_addr, &iph->saddr, 4);
896 	memcpy(dev->broadcast, &iph->daddr, 4);
897 
898 	dev->flags		= IFF_NOARP;
899 	netif_keep_dst(dev);
900 	dev->addr_len		= 4;
901 
902 	if (iph->daddr && !tunnel->collect_md) {
903 #ifdef CONFIG_NET_IPGRE_BROADCAST
904 		if (ipv4_is_multicast(iph->daddr)) {
905 			if (!iph->saddr)
906 				return -EINVAL;
907 			dev->flags = IFF_BROADCAST;
908 			dev->header_ops = &ipgre_header_ops;
909 		}
910 #endif
911 	} else if (!tunnel->collect_md) {
912 		dev->header_ops = &ipgre_header_ops;
913 	}
914 
915 	return ip_tunnel_init(dev);
916 }
917 
918 static const struct gre_protocol ipgre_protocol = {
919 	.handler     = gre_rcv,
920 	.err_handler = gre_err,
921 };
922 
923 static int __net_init ipgre_init_net(struct net *net)
924 {
925 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
926 }
927 
928 static void __net_exit ipgre_exit_net(struct net *net)
929 {
930 	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
931 	ip_tunnel_delete_net(itn, &ipgre_link_ops);
932 }
933 
934 static struct pernet_operations ipgre_net_ops = {
935 	.init = ipgre_init_net,
936 	.exit = ipgre_exit_net,
937 	.id   = &ipgre_net_id,
938 	.size = sizeof(struct ip_tunnel_net),
939 };
940 
941 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
942 {
943 	__be16 flags;
944 
945 	if (!data)
946 		return 0;
947 
948 	flags = 0;
949 	if (data[IFLA_GRE_IFLAGS])
950 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
951 	if (data[IFLA_GRE_OFLAGS])
952 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
953 	if (flags & (GRE_VERSION|GRE_ROUTING))
954 		return -EINVAL;
955 
956 	if (data[IFLA_GRE_COLLECT_METADATA] &&
957 	    data[IFLA_GRE_ENCAP_TYPE] &&
958 	    nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
959 		return -EINVAL;
960 
961 	return 0;
962 }
963 
964 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
965 {
966 	__be32 daddr;
967 
968 	if (tb[IFLA_ADDRESS]) {
969 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
970 			return -EINVAL;
971 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
972 			return -EADDRNOTAVAIL;
973 	}
974 
975 	if (!data)
976 		goto out;
977 
978 	if (data[IFLA_GRE_REMOTE]) {
979 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
980 		if (!daddr)
981 			return -EINVAL;
982 	}
983 
984 out:
985 	return ipgre_tunnel_validate(tb, data);
986 }
987 
988 static void ipgre_netlink_parms(struct net_device *dev,
989 				struct nlattr *data[],
990 				struct nlattr *tb[],
991 				struct ip_tunnel_parm *parms)
992 {
993 	memset(parms, 0, sizeof(*parms));
994 
995 	parms->iph.protocol = IPPROTO_GRE;
996 
997 	if (!data)
998 		return;
999 
1000 	if (data[IFLA_GRE_LINK])
1001 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1002 
1003 	if (data[IFLA_GRE_IFLAGS])
1004 		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1005 
1006 	if (data[IFLA_GRE_OFLAGS])
1007 		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1008 
1009 	if (data[IFLA_GRE_IKEY])
1010 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1011 
1012 	if (data[IFLA_GRE_OKEY])
1013 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1014 
1015 	if (data[IFLA_GRE_LOCAL])
1016 		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1017 
1018 	if (data[IFLA_GRE_REMOTE])
1019 		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1020 
1021 	if (data[IFLA_GRE_TTL])
1022 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1023 
1024 	if (data[IFLA_GRE_TOS])
1025 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1026 
1027 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1028 		parms->iph.frag_off = htons(IP_DF);
1029 
1030 	if (data[IFLA_GRE_COLLECT_METADATA]) {
1031 		struct ip_tunnel *t = netdev_priv(dev);
1032 
1033 		t->collect_md = true;
1034 	}
1035 }
1036 
1037 /* This function returns true when ENCAP attributes are present in the nl msg */
1038 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1039 				      struct ip_tunnel_encap *ipencap)
1040 {
1041 	bool ret = false;
1042 
1043 	memset(ipencap, 0, sizeof(*ipencap));
1044 
1045 	if (!data)
1046 		return ret;
1047 
1048 	if (data[IFLA_GRE_ENCAP_TYPE]) {
1049 		ret = true;
1050 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1051 	}
1052 
1053 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1054 		ret = true;
1055 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1056 	}
1057 
1058 	if (data[IFLA_GRE_ENCAP_SPORT]) {
1059 		ret = true;
1060 		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1061 	}
1062 
1063 	if (data[IFLA_GRE_ENCAP_DPORT]) {
1064 		ret = true;
1065 		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1066 	}
1067 
1068 	return ret;
1069 }
1070 
1071 static int gre_tap_init(struct net_device *dev)
1072 {
1073 	__gre_tunnel_init(dev);
1074 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1075 
1076 	return ip_tunnel_init(dev);
1077 }
1078 
1079 static const struct net_device_ops gre_tap_netdev_ops = {
1080 	.ndo_init		= gre_tap_init,
1081 	.ndo_uninit		= ip_tunnel_uninit,
1082 	.ndo_start_xmit		= gre_tap_xmit,
1083 	.ndo_set_mac_address 	= eth_mac_addr,
1084 	.ndo_validate_addr	= eth_validate_addr,
1085 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1086 	.ndo_get_stats64	= ip_tunnel_get_stats64,
1087 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1088 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1089 };
1090 
1091 static void ipgre_tap_setup(struct net_device *dev)
1092 {
1093 	ether_setup(dev);
1094 	dev->netdev_ops	= &gre_tap_netdev_ops;
1095 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1096 	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
1097 	ip_tunnel_setup(dev, gre_tap_net_id);
1098 }
1099 
1100 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1101 			 struct nlattr *tb[], struct nlattr *data[])
1102 {
1103 	struct ip_tunnel_parm p;
1104 	struct ip_tunnel_encap ipencap;
1105 
1106 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1107 		struct ip_tunnel *t = netdev_priv(dev);
1108 		int err = ip_tunnel_encap_setup(t, &ipencap);
1109 
1110 		if (err < 0)
1111 			return err;
1112 	}
1113 
1114 	ipgre_netlink_parms(dev, data, tb, &p);
1115 	return ip_tunnel_newlink(dev, tb, &p);
1116 }
1117 
1118 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1119 			    struct nlattr *data[])
1120 {
1121 	struct ip_tunnel_parm p;
1122 	struct ip_tunnel_encap ipencap;
1123 
1124 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1125 		struct ip_tunnel *t = netdev_priv(dev);
1126 		int err = ip_tunnel_encap_setup(t, &ipencap);
1127 
1128 		if (err < 0)
1129 			return err;
1130 	}
1131 
1132 	ipgre_netlink_parms(dev, data, tb, &p);
1133 	return ip_tunnel_changelink(dev, tb, &p);
1134 }
1135 
1136 static size_t ipgre_get_size(const struct net_device *dev)
1137 {
1138 	return
1139 		/* IFLA_GRE_LINK */
1140 		nla_total_size(4) +
1141 		/* IFLA_GRE_IFLAGS */
1142 		nla_total_size(2) +
1143 		/* IFLA_GRE_OFLAGS */
1144 		nla_total_size(2) +
1145 		/* IFLA_GRE_IKEY */
1146 		nla_total_size(4) +
1147 		/* IFLA_GRE_OKEY */
1148 		nla_total_size(4) +
1149 		/* IFLA_GRE_LOCAL */
1150 		nla_total_size(4) +
1151 		/* IFLA_GRE_REMOTE */
1152 		nla_total_size(4) +
1153 		/* IFLA_GRE_TTL */
1154 		nla_total_size(1) +
1155 		/* IFLA_GRE_TOS */
1156 		nla_total_size(1) +
1157 		/* IFLA_GRE_PMTUDISC */
1158 		nla_total_size(1) +
1159 		/* IFLA_GRE_ENCAP_TYPE */
1160 		nla_total_size(2) +
1161 		/* IFLA_GRE_ENCAP_FLAGS */
1162 		nla_total_size(2) +
1163 		/* IFLA_GRE_ENCAP_SPORT */
1164 		nla_total_size(2) +
1165 		/* IFLA_GRE_ENCAP_DPORT */
1166 		nla_total_size(2) +
1167 		/* IFLA_GRE_COLLECT_METADATA */
1168 		nla_total_size(0) +
1169 		0;
1170 }
1171 
1172 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1173 {
1174 	struct ip_tunnel *t = netdev_priv(dev);
1175 	struct ip_tunnel_parm *p = &t->parms;
1176 
1177 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1178 	    nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
1179 	    nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
1180 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1181 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1182 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1183 	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1184 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1185 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1186 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1187 		       !!(p->iph.frag_off & htons(IP_DF))))
1188 		goto nla_put_failure;
1189 
1190 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1191 			t->encap.type) ||
1192 	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1193 			 t->encap.sport) ||
1194 	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1195 			 t->encap.dport) ||
1196 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1197 			t->encap.flags))
1198 		goto nla_put_failure;
1199 
1200 	if (t->collect_md) {
1201 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1202 			goto nla_put_failure;
1203 	}
1204 
1205 	return 0;
1206 
1207 nla_put_failure:
1208 	return -EMSGSIZE;
1209 }
1210 
1211 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1212 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1213 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1214 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1215 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1216 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1217 	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1218 	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1219 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1220 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1221 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1222 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1223 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1224 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1225 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1226 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1227 };
1228 
1229 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1230 	.kind		= "gre",
1231 	.maxtype	= IFLA_GRE_MAX,
1232 	.policy		= ipgre_policy,
1233 	.priv_size	= sizeof(struct ip_tunnel),
1234 	.setup		= ipgre_tunnel_setup,
1235 	.validate	= ipgre_tunnel_validate,
1236 	.newlink	= ipgre_newlink,
1237 	.changelink	= ipgre_changelink,
1238 	.dellink	= ip_tunnel_dellink,
1239 	.get_size	= ipgre_get_size,
1240 	.fill_info	= ipgre_fill_info,
1241 	.get_link_net	= ip_tunnel_get_link_net,
1242 };
1243 
1244 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1245 	.kind		= "gretap",
1246 	.maxtype	= IFLA_GRE_MAX,
1247 	.policy		= ipgre_policy,
1248 	.priv_size	= sizeof(struct ip_tunnel),
1249 	.setup		= ipgre_tap_setup,
1250 	.validate	= ipgre_tap_validate,
1251 	.newlink	= ipgre_newlink,
1252 	.changelink	= ipgre_changelink,
1253 	.dellink	= ip_tunnel_dellink,
1254 	.get_size	= ipgre_get_size,
1255 	.fill_info	= ipgre_fill_info,
1256 	.get_link_net	= ip_tunnel_get_link_net,
1257 };
1258 
1259 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1260 					u8 name_assign_type)
1261 {
1262 	struct nlattr *tb[IFLA_MAX + 1];
1263 	struct net_device *dev;
1264 	struct ip_tunnel *t;
1265 	int err;
1266 
1267 	memset(&tb, 0, sizeof(tb));
1268 
1269 	dev = rtnl_create_link(net, name, name_assign_type,
1270 			       &ipgre_tap_ops, tb);
1271 	if (IS_ERR(dev))
1272 		return dev;
1273 
1274 	/* Configure flow based GRE device. */
1275 	t = netdev_priv(dev);
1276 	t->collect_md = true;
1277 
1278 	err = ipgre_newlink(net, dev, tb, NULL);
1279 	if (err < 0)
1280 		goto out;
1281 
1282 	/* openvswitch users expect packet sizes to be unrestricted,
1283 	 * so set the largest MTU we can.
1284 	 */
1285 	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1286 	if (err)
1287 		goto out;
1288 
1289 	return dev;
1290 out:
1291 	free_netdev(dev);
1292 	return ERR_PTR(err);
1293 }
1294 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1295 
1296 static int __net_init ipgre_tap_init_net(struct net *net)
1297 {
1298 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1299 }
1300 
1301 static void __net_exit ipgre_tap_exit_net(struct net *net)
1302 {
1303 	struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
1304 	ip_tunnel_delete_net(itn, &ipgre_tap_ops);
1305 }
1306 
1307 static struct pernet_operations ipgre_tap_net_ops = {
1308 	.init = ipgre_tap_init_net,
1309 	.exit = ipgre_tap_exit_net,
1310 	.id   = &gre_tap_net_id,
1311 	.size = sizeof(struct ip_tunnel_net),
1312 };
1313 
1314 static int __init ipgre_init(void)
1315 {
1316 	int err;
1317 
1318 	pr_info("GRE over IPv4 tunneling driver\n");
1319 
1320 	err = register_pernet_device(&ipgre_net_ops);
1321 	if (err < 0)
1322 		return err;
1323 
1324 	err = register_pernet_device(&ipgre_tap_net_ops);
1325 	if (err < 0)
1326 		goto pnet_tap_faied;
1327 
1328 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1329 	if (err < 0) {
1330 		pr_info("%s: can't add protocol\n", __func__);
1331 		goto add_proto_failed;
1332 	}
1333 
1334 	err = rtnl_link_register(&ipgre_link_ops);
1335 	if (err < 0)
1336 		goto rtnl_link_failed;
1337 
1338 	err = rtnl_link_register(&ipgre_tap_ops);
1339 	if (err < 0)
1340 		goto tap_ops_failed;
1341 
1342 	return 0;
1343 
1344 tap_ops_failed:
1345 	rtnl_link_unregister(&ipgre_link_ops);
1346 rtnl_link_failed:
1347 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1348 add_proto_failed:
1349 	unregister_pernet_device(&ipgre_tap_net_ops);
1350 pnet_tap_faied:
1351 	unregister_pernet_device(&ipgre_net_ops);
1352 	return err;
1353 }
1354 
1355 static void __exit ipgre_fini(void)
1356 {
1357 	rtnl_link_unregister(&ipgre_tap_ops);
1358 	rtnl_link_unregister(&ipgre_link_ops);
1359 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1360 	unregister_pernet_device(&ipgre_tap_net_ops);
1361 	unregister_pernet_device(&ipgre_net_ops);
1362 }
1363 
1364 module_init(ipgre_init);
1365 module_exit(ipgre_fini);
1366 MODULE_LICENSE("GPL");
1367 MODULE_ALIAS_RTNL_LINK("gre");
1368 MODULE_ALIAS_RTNL_LINK("gretap");
1369 MODULE_ALIAS_NETDEV("gre0");
1370 MODULE_ALIAS_NETDEV("gretap0");
1371