xref: /linux/net/ipv4/ip_gre.c (revision 258e4bfcbdaa6d128c391e6e25f03d54dee4f226)
1 /*
2  *	Linux NET3:	GRE over IP protocol decoder.
3  *
4  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *	This program is free software; you can redistribute it and/or
7  *	modify it under the terms of the GNU General Public License
8  *	as published by the Free Software Foundation; either version
9  *	2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/if_vlan.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
35 
36 #include <net/sock.h>
37 #include <net/ip.h>
38 #include <net/icmp.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
41 #include <net/arp.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
45 #include <net/xfrm.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
49 #include <net/gre.h>
50 #include <net/dst_metadata.h>
51 
52 #if IS_ENABLED(CONFIG_IPV6)
53 #include <net/ipv6.h>
54 #include <net/ip6_fib.h>
55 #include <net/ip6_route.h>
56 #endif
57 
58 /*
59    Problems & solutions
60    --------------------
61 
62    1. The most important issue is detecting local dead loops.
63    They would cause complete host lockup in transmit, which
64    would be "resolved" by stack overflow or, if queueing is enabled,
65    with infinite looping in net_bh.
66 
67    We cannot track such dead loops during route installation,
68    it is infeasible task. The most general solutions would be
69    to keep skb->encapsulation counter (sort of local ttl),
70    and silently drop packet when it expires. It is a good
71    solution, but it supposes maintaining new variable in ALL
72    skb, even if no tunneling is used.
73 
74    Current solution: xmit_recursion breaks dead loops. This is a percpu
75    counter, since when we enter the first ndo_xmit(), cpu migration is
76    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
77 
78    2. Networking dead loops would not kill routers, but would really
79    kill network. IP hop limit plays role of "t->recursion" in this case,
80    if we copy it from packet being encapsulated to upper header.
81    It is very good solution, but it introduces two problems:
82 
83    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
84      do not work over tunnels.
85    - traceroute does not work. I planned to relay ICMP from tunnel,
86      so that this problem would be solved and traceroute output
87      would even more informative. This idea appeared to be wrong:
88      only Linux complies to rfc1812 now (yes, guys, Linux is the only
89      true router now :-)), all routers (at least, in neighbourhood of mine)
90      return only 8 bytes of payload. It is the end.
91 
92    Hence, if we want that OSPF worked or traceroute said something reasonable,
93    we should search for another solution.
94 
95    One of them is to parse packet trying to detect inner encapsulation
96    made by our node. It is difficult or even impossible, especially,
97    taking into account fragmentation. TO be short, ttl is not solution at all.
98 
99    Current solution: The solution was UNEXPECTEDLY SIMPLE.
100    We force DF flag on tunnels with preconfigured hop limit,
101    that is ALL. :-) Well, it does not remove the problem completely,
102    but exponential growth of network traffic is changed to linear
103    (branches, that exceed pmtu are pruned) and tunnel mtu
104    rapidly degrades to value <68, where looping stops.
105    Yes, it is not good if there exists a router in the loop,
106    which does not force DF, even when encapsulating packets have DF set.
107    But it is not our problem! Nobody could accuse us, we made
108    all that we could make. Even if it is your gated who injected
109    fatal route to network, even if it were you who configured
110    fatal static route: you are innocent. :-)
111 
112    Alexey Kuznetsov.
113  */
114 
115 static bool log_ecn_error = true;
116 module_param(log_ecn_error, bool, 0644);
117 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
118 
119 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
120 static int ipgre_tunnel_init(struct net_device *dev);
121 
122 static int ipgre_net_id __read_mostly;
123 static int gre_tap_net_id __read_mostly;
124 
125 static int ip_gre_calc_hlen(__be16 o_flags)
126 {
127 	int addend = 4;
128 
129 	if (o_flags & TUNNEL_CSUM)
130 		addend += 4;
131 	if (o_flags & TUNNEL_KEY)
132 		addend += 4;
133 	if (o_flags & TUNNEL_SEQ)
134 		addend += 4;
135 	return addend;
136 }
137 
138 static __be16 gre_flags_to_tnl_flags(__be16 flags)
139 {
140 	__be16 tflags = 0;
141 
142 	if (flags & GRE_CSUM)
143 		tflags |= TUNNEL_CSUM;
144 	if (flags & GRE_ROUTING)
145 		tflags |= TUNNEL_ROUTING;
146 	if (flags & GRE_KEY)
147 		tflags |= TUNNEL_KEY;
148 	if (flags & GRE_SEQ)
149 		tflags |= TUNNEL_SEQ;
150 	if (flags & GRE_STRICT)
151 		tflags |= TUNNEL_STRICT;
152 	if (flags & GRE_REC)
153 		tflags |= TUNNEL_REC;
154 	if (flags & GRE_VERSION)
155 		tflags |= TUNNEL_VERSION;
156 
157 	return tflags;
158 }
159 
160 static __be16 tnl_flags_to_gre_flags(__be16 tflags)
161 {
162 	__be16 flags = 0;
163 
164 	if (tflags & TUNNEL_CSUM)
165 		flags |= GRE_CSUM;
166 	if (tflags & TUNNEL_ROUTING)
167 		flags |= GRE_ROUTING;
168 	if (tflags & TUNNEL_KEY)
169 		flags |= GRE_KEY;
170 	if (tflags & TUNNEL_SEQ)
171 		flags |= GRE_SEQ;
172 	if (tflags & TUNNEL_STRICT)
173 		flags |= GRE_STRICT;
174 	if (tflags & TUNNEL_REC)
175 		flags |= GRE_REC;
176 	if (tflags & TUNNEL_VERSION)
177 		flags |= GRE_VERSION;
178 
179 	return flags;
180 }
181 
182 /* Fills in tpi and returns header length to be pulled. */
183 static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
184 			    bool *csum_err)
185 {
186 	const struct gre_base_hdr *greh;
187 	__be32 *options;
188 	int hdr_len;
189 
190 	if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
191 		return -EINVAL;
192 
193 	greh = (struct gre_base_hdr *)skb_transport_header(skb);
194 	if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
195 		return -EINVAL;
196 
197 	tpi->flags = gre_flags_to_tnl_flags(greh->flags);
198 	hdr_len = ip_gre_calc_hlen(tpi->flags);
199 
200 	if (!pskb_may_pull(skb, hdr_len))
201 		return -EINVAL;
202 
203 	greh = (struct gre_base_hdr *)skb_transport_header(skb);
204 	tpi->proto = greh->protocol;
205 
206 	options = (__be32 *)(greh + 1);
207 	if (greh->flags & GRE_CSUM) {
208 		if (skb_checksum_simple_validate(skb)) {
209 			*csum_err = true;
210 			return -EINVAL;
211 		}
212 
213 		skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
214 					 null_compute_pseudo);
215 		options++;
216 	}
217 
218 	if (greh->flags & GRE_KEY) {
219 		tpi->key = *options;
220 		options++;
221 	} else {
222 		tpi->key = 0;
223 	}
224 	if (unlikely(greh->flags & GRE_SEQ)) {
225 		tpi->seq = *options;
226 		options++;
227 	} else {
228 		tpi->seq = 0;
229 	}
230 	/* WCCP version 1 and 2 protocol decoding.
231 	 * - Change protocol to IP
232 	 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
233 	 */
234 	if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
235 		tpi->proto = htons(ETH_P_IP);
236 		if ((*(u8 *)options & 0xF0) != 0x40) {
237 			hdr_len += 4;
238 			if (!pskb_may_pull(skb, hdr_len))
239 				return -EINVAL;
240 		}
241 	}
242 	return hdr_len;
243 }
244 
245 static void ipgre_err(struct sk_buff *skb, u32 info,
246 		      const struct tnl_ptk_info *tpi)
247 {
248 
249 	/* All the routers (except for Linux) return only
250 	   8 bytes of packet payload. It means, that precise relaying of
251 	   ICMP in the real Internet is absolutely infeasible.
252 
253 	   Moreover, Cisco "wise men" put GRE key to the third word
254 	   in GRE header. It makes impossible maintaining even soft
255 	   state for keyed GRE tunnels with enabled checksum. Tell
256 	   them "thank you".
257 
258 	   Well, I wonder, rfc1812 was written by Cisco employee,
259 	   what the hell these idiots break standards established
260 	   by themselves???
261 	   */
262 	struct net *net = dev_net(skb->dev);
263 	struct ip_tunnel_net *itn;
264 	const struct iphdr *iph;
265 	const int type = icmp_hdr(skb)->type;
266 	const int code = icmp_hdr(skb)->code;
267 	struct ip_tunnel *t;
268 
269 	switch (type) {
270 	default:
271 	case ICMP_PARAMETERPROB:
272 		return;
273 
274 	case ICMP_DEST_UNREACH:
275 		switch (code) {
276 		case ICMP_SR_FAILED:
277 		case ICMP_PORT_UNREACH:
278 			/* Impossible event. */
279 			return;
280 		default:
281 			/* All others are translated to HOST_UNREACH.
282 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
283 			   I believe they are just ether pollution. --ANK
284 			 */
285 			break;
286 		}
287 		break;
288 
289 	case ICMP_TIME_EXCEEDED:
290 		if (code != ICMP_EXC_TTL)
291 			return;
292 		break;
293 
294 	case ICMP_REDIRECT:
295 		break;
296 	}
297 
298 	if (tpi->proto == htons(ETH_P_TEB))
299 		itn = net_generic(net, gre_tap_net_id);
300 	else
301 		itn = net_generic(net, ipgre_net_id);
302 
303 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
304 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
305 			     iph->daddr, iph->saddr, tpi->key);
306 
307 	if (!t)
308 		return;
309 
310 	if (t->parms.iph.daddr == 0 ||
311 	    ipv4_is_multicast(t->parms.iph.daddr))
312 		return;
313 
314 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
315 		return;
316 
317 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
318 		t->err_count++;
319 	else
320 		t->err_count = 1;
321 	t->err_time = jiffies;
322 }
323 
324 static void gre_err(struct sk_buff *skb, u32 info)
325 {
326 	/* All the routers (except for Linux) return only
327 	 * 8 bytes of packet payload. It means, that precise relaying of
328 	 * ICMP in the real Internet is absolutely infeasible.
329 	 *
330 	 * Moreover, Cisco "wise men" put GRE key to the third word
331 	 * in GRE header. It makes impossible maintaining even soft
332 	 * state for keyed
333 	 * GRE tunnels with enabled checksum. Tell them "thank you".
334 	 *
335 	 * Well, I wonder, rfc1812 was written by Cisco employee,
336 	 * what the hell these idiots break standards established
337 	 * by themselves???
338 	 */
339 
340 	const int type = icmp_hdr(skb)->type;
341 	const int code = icmp_hdr(skb)->code;
342 	struct tnl_ptk_info tpi;
343 	bool csum_err = false;
344 
345 	if (parse_gre_header(skb, &tpi, &csum_err) < 0) {
346 		if (!csum_err)		/* ignore csum errors. */
347 			return;
348 	}
349 
350 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
351 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
352 				 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
353 		return;
354 	}
355 	if (type == ICMP_REDIRECT) {
356 		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
357 			      IPPROTO_GRE, 0);
358 		return;
359 	}
360 
361 	ipgre_err(skb, info, &tpi);
362 }
363 
364 static __be64 key_to_tunnel_id(__be32 key)
365 {
366 #ifdef __BIG_ENDIAN
367 	return (__force __be64)((__force u32)key);
368 #else
369 	return (__force __be64)((__force u64)key << 32);
370 #endif
371 }
372 
373 /* Returns the least-significant 32 bits of a __be64. */
374 static __be32 tunnel_id_to_key(__be64 x)
375 {
376 #ifdef __BIG_ENDIAN
377 	return (__force __be32)x;
378 #else
379 	return (__force __be32)((__force u64)x >> 32);
380 #endif
381 }
382 
383 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
384 {
385 	struct net *net = dev_net(skb->dev);
386 	struct metadata_dst *tun_dst = NULL;
387 	struct ip_tunnel_net *itn;
388 	const struct iphdr *iph;
389 	struct ip_tunnel *tunnel;
390 
391 	if (tpi->proto == htons(ETH_P_TEB))
392 		itn = net_generic(net, gre_tap_net_id);
393 	else
394 		itn = net_generic(net, ipgre_net_id);
395 
396 	iph = ip_hdr(skb);
397 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
398 				  iph->saddr, iph->daddr, tpi->key);
399 
400 	if (tunnel) {
401 		if (tunnel->dev->type != ARPHRD_NONE)
402 			skb_pop_mac_header(skb);
403 		else
404 			skb_reset_mac_header(skb);
405 		if (tunnel->collect_md) {
406 			__be16 flags;
407 			__be64 tun_id;
408 
409 			flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
410 			tun_id = key_to_tunnel_id(tpi->key);
411 			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
412 			if (!tun_dst)
413 				return PACKET_REJECT;
414 		}
415 
416 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
417 		return PACKET_RCVD;
418 	}
419 	return PACKET_REJECT;
420 }
421 
422 static int gre_rcv(struct sk_buff *skb)
423 {
424 	struct tnl_ptk_info tpi;
425 	bool csum_err = false;
426 	int hdr_len;
427 
428 #ifdef CONFIG_NET_IPGRE_BROADCAST
429 	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
430 		/* Looped back packet, drop it! */
431 		if (rt_is_output_route(skb_rtable(skb)))
432 			goto drop;
433 	}
434 #endif
435 
436 	hdr_len = parse_gre_header(skb, &tpi, &csum_err);
437 	if (hdr_len < 0)
438 		goto drop;
439 	if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false) < 0)
440 		goto drop;
441 
442 	if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)
443 		return 0;
444 
445 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
446 drop:
447 	kfree_skb(skb);
448 	return 0;
449 }
450 
451 static __sum16 gre_checksum(struct sk_buff *skb)
452 {
453 	__wsum csum;
454 
455 	if (skb->ip_summed == CHECKSUM_PARTIAL)
456 		csum = lco_csum(skb);
457 	else
458 		csum = skb_checksum(skb, 0, skb->len, 0);
459 	return csum_fold(csum);
460 }
461 
462 static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
463 			 __be16 proto, __be32 key, __be32 seq)
464 {
465 	struct gre_base_hdr *greh;
466 
467 	skb_push(skb, hdr_len);
468 
469 	skb_reset_transport_header(skb);
470 	greh = (struct gre_base_hdr *)skb->data;
471 	greh->flags = tnl_flags_to_gre_flags(flags);
472 	greh->protocol = proto;
473 
474 	if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
475 		__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
476 
477 		if (flags & TUNNEL_SEQ) {
478 			*ptr = seq;
479 			ptr--;
480 		}
481 		if (flags & TUNNEL_KEY) {
482 			*ptr = key;
483 			ptr--;
484 		}
485 		if (flags & TUNNEL_CSUM &&
486 		    !(skb_shinfo(skb)->gso_type &
487 		      (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
488 			*ptr = 0;
489 			*(__sum16 *)ptr = gre_checksum(skb);
490 		}
491 	}
492 }
493 
494 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
495 		       const struct iphdr *tnl_params,
496 		       __be16 proto)
497 {
498 	struct ip_tunnel *tunnel = netdev_priv(dev);
499 
500 	if (tunnel->parms.o_flags & TUNNEL_SEQ)
501 		tunnel->o_seqno++;
502 
503 	/* Push GRE header. */
504 	build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
505 		     proto, tunnel->parms.o_key, htonl(tunnel->o_seqno));
506 
507 	skb_set_inner_protocol(skb, proto);
508 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
509 }
510 
511 static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
512 					   bool csum)
513 {
514 	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
515 }
516 
517 static struct rtable *gre_get_rt(struct sk_buff *skb,
518 				 struct net_device *dev,
519 				 struct flowi4 *fl,
520 				 const struct ip_tunnel_key *key)
521 {
522 	struct net *net = dev_net(dev);
523 
524 	memset(fl, 0, sizeof(*fl));
525 	fl->daddr = key->u.ipv4.dst;
526 	fl->saddr = key->u.ipv4.src;
527 	fl->flowi4_tos = RT_TOS(key->tos);
528 	fl->flowi4_mark = skb->mark;
529 	fl->flowi4_proto = IPPROTO_GRE;
530 
531 	return ip_route_output_key(net, fl);
532 }
533 
534 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
535 			__be16 proto)
536 {
537 	struct ip_tunnel_info *tun_info;
538 	const struct ip_tunnel_key *key;
539 	struct rtable *rt = NULL;
540 	struct flowi4 fl;
541 	int min_headroom;
542 	int tunnel_hlen;
543 	__be16 df, flags;
544 	bool use_cache;
545 	int err;
546 
547 	tun_info = skb_tunnel_info(skb);
548 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
549 		     ip_tunnel_info_af(tun_info) != AF_INET))
550 		goto err_free_skb;
551 
552 	key = &tun_info->key;
553 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
554 	if (use_cache)
555 		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
556 	if (!rt) {
557 		rt = gre_get_rt(skb, dev, &fl, key);
558 		if (IS_ERR(rt))
559 				goto err_free_skb;
560 		if (use_cache)
561 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
562 					  fl.saddr);
563 	}
564 
565 	tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
566 
567 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
568 			+ tunnel_hlen + sizeof(struct iphdr);
569 	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
570 		int head_delta = SKB_DATA_ALIGN(min_headroom -
571 						skb_headroom(skb) +
572 						16);
573 		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
574 				       0, GFP_ATOMIC);
575 		if (unlikely(err))
576 			goto err_free_rt;
577 	}
578 
579 	/* Push Tunnel header. */
580 	skb = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM));
581 	if (IS_ERR(skb)) {
582 		skb = NULL;
583 		goto err_free_rt;
584 	}
585 
586 	flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
587 	build_header(skb, tunnel_hlen, flags, proto,
588 		     tunnel_id_to_key(tun_info->key.tun_id), 0);
589 
590 	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
591 
592 	iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
593 		      key->tos, key->ttl, df, false);
594 	return;
595 
596 err_free_rt:
597 	ip_rt_put(rt);
598 err_free_skb:
599 	kfree_skb(skb);
600 	dev->stats.tx_dropped++;
601 }
602 
603 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
604 {
605 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
606 	struct rtable *rt;
607 	struct flowi4 fl4;
608 
609 	if (ip_tunnel_info_af(info) != AF_INET)
610 		return -EINVAL;
611 
612 	rt = gre_get_rt(skb, dev, &fl4, &info->key);
613 	if (IS_ERR(rt))
614 		return PTR_ERR(rt);
615 
616 	ip_rt_put(rt);
617 	info->key.u.ipv4.src = fl4.saddr;
618 	return 0;
619 }
620 
621 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
622 			      struct net_device *dev)
623 {
624 	struct ip_tunnel *tunnel = netdev_priv(dev);
625 	const struct iphdr *tnl_params;
626 
627 	if (tunnel->collect_md) {
628 		gre_fb_xmit(skb, dev, skb->protocol);
629 		return NETDEV_TX_OK;
630 	}
631 
632 	if (dev->header_ops) {
633 		/* Need space for new headers */
634 		if (skb_cow_head(skb, dev->needed_headroom -
635 				      (tunnel->hlen + sizeof(struct iphdr))))
636 			goto free_skb;
637 
638 		tnl_params = (const struct iphdr *)skb->data;
639 
640 		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
641 		 * to gre header.
642 		 */
643 		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
644 		skb_reset_mac_header(skb);
645 	} else {
646 		if (skb_cow_head(skb, dev->needed_headroom))
647 			goto free_skb;
648 
649 		tnl_params = &tunnel->parms.iph;
650 	}
651 
652 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
653 	if (IS_ERR(skb))
654 		goto out;
655 
656 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
657 	return NETDEV_TX_OK;
658 
659 free_skb:
660 	kfree_skb(skb);
661 out:
662 	dev->stats.tx_dropped++;
663 	return NETDEV_TX_OK;
664 }
665 
666 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
667 				struct net_device *dev)
668 {
669 	struct ip_tunnel *tunnel = netdev_priv(dev);
670 
671 	if (tunnel->collect_md) {
672 		gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
673 		return NETDEV_TX_OK;
674 	}
675 
676 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
677 	if (IS_ERR(skb))
678 		goto out;
679 
680 	if (skb_cow_head(skb, dev->needed_headroom))
681 		goto free_skb;
682 
683 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
684 	return NETDEV_TX_OK;
685 
686 free_skb:
687 	kfree_skb(skb);
688 out:
689 	dev->stats.tx_dropped++;
690 	return NETDEV_TX_OK;
691 }
692 
693 static int ipgre_tunnel_ioctl(struct net_device *dev,
694 			      struct ifreq *ifr, int cmd)
695 {
696 	int err;
697 	struct ip_tunnel_parm p;
698 
699 	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
700 		return -EFAULT;
701 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
702 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
703 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
704 		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
705 			return -EINVAL;
706 	}
707 	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
708 	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
709 
710 	err = ip_tunnel_ioctl(dev, &p, cmd);
711 	if (err)
712 		return err;
713 
714 	p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
715 	p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
716 
717 	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
718 		return -EFAULT;
719 	return 0;
720 }
721 
722 /* Nice toy. Unfortunately, useless in real life :-)
723    It allows to construct virtual multiprotocol broadcast "LAN"
724    over the Internet, provided multicast routing is tuned.
725 
726 
727    I have no idea was this bicycle invented before me,
728    so that I had to set ARPHRD_IPGRE to a random value.
729    I have an impression, that Cisco could make something similar,
730    but this feature is apparently missing in IOS<=11.2(8).
731 
732    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
733    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
734 
735    ping -t 255 224.66.66.66
736 
737    If nobody answers, mbone does not work.
738 
739    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
740    ip addr add 10.66.66.<somewhat>/24 dev Universe
741    ifconfig Universe up
742    ifconfig Universe add fe80::<Your_real_addr>/10
743    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
744    ftp 10.66.66.66
745    ...
746    ftp fec0:6666:6666::193.233.7.65
747    ...
748  */
749 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
750 			unsigned short type,
751 			const void *daddr, const void *saddr, unsigned int len)
752 {
753 	struct ip_tunnel *t = netdev_priv(dev);
754 	struct iphdr *iph;
755 	struct gre_base_hdr *greh;
756 
757 	iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
758 	greh = (struct gre_base_hdr *)(iph+1);
759 	greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
760 	greh->protocol = htons(type);
761 
762 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
763 
764 	/* Set the source hardware address. */
765 	if (saddr)
766 		memcpy(&iph->saddr, saddr, 4);
767 	if (daddr)
768 		memcpy(&iph->daddr, daddr, 4);
769 	if (iph->daddr)
770 		return t->hlen + sizeof(*iph);
771 
772 	return -(t->hlen + sizeof(*iph));
773 }
774 
775 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
776 {
777 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
778 	memcpy(haddr, &iph->saddr, 4);
779 	return 4;
780 }
781 
782 static const struct header_ops ipgre_header_ops = {
783 	.create	= ipgre_header,
784 	.parse	= ipgre_header_parse,
785 };
786 
787 #ifdef CONFIG_NET_IPGRE_BROADCAST
788 static int ipgre_open(struct net_device *dev)
789 {
790 	struct ip_tunnel *t = netdev_priv(dev);
791 
792 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
793 		struct flowi4 fl4;
794 		struct rtable *rt;
795 
796 		rt = ip_route_output_gre(t->net, &fl4,
797 					 t->parms.iph.daddr,
798 					 t->parms.iph.saddr,
799 					 t->parms.o_key,
800 					 RT_TOS(t->parms.iph.tos),
801 					 t->parms.link);
802 		if (IS_ERR(rt))
803 			return -EADDRNOTAVAIL;
804 		dev = rt->dst.dev;
805 		ip_rt_put(rt);
806 		if (!__in_dev_get_rtnl(dev))
807 			return -EADDRNOTAVAIL;
808 		t->mlink = dev->ifindex;
809 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
810 	}
811 	return 0;
812 }
813 
814 static int ipgre_close(struct net_device *dev)
815 {
816 	struct ip_tunnel *t = netdev_priv(dev);
817 
818 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
819 		struct in_device *in_dev;
820 		in_dev = inetdev_by_index(t->net, t->mlink);
821 		if (in_dev)
822 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
823 	}
824 	return 0;
825 }
826 #endif
827 
828 static const struct net_device_ops ipgre_netdev_ops = {
829 	.ndo_init		= ipgre_tunnel_init,
830 	.ndo_uninit		= ip_tunnel_uninit,
831 #ifdef CONFIG_NET_IPGRE_BROADCAST
832 	.ndo_open		= ipgre_open,
833 	.ndo_stop		= ipgre_close,
834 #endif
835 	.ndo_start_xmit		= ipgre_xmit,
836 	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
837 	.ndo_change_mtu		= ip_tunnel_change_mtu,
838 	.ndo_get_stats64	= ip_tunnel_get_stats64,
839 	.ndo_get_iflink		= ip_tunnel_get_iflink,
840 };
841 
842 #define GRE_FEATURES (NETIF_F_SG |		\
843 		      NETIF_F_FRAGLIST |	\
844 		      NETIF_F_HIGHDMA |		\
845 		      NETIF_F_HW_CSUM)
846 
847 static void ipgre_tunnel_setup(struct net_device *dev)
848 {
849 	dev->netdev_ops		= &ipgre_netdev_ops;
850 	dev->type		= ARPHRD_IPGRE;
851 	ip_tunnel_setup(dev, ipgre_net_id);
852 }
853 
854 static void __gre_tunnel_init(struct net_device *dev)
855 {
856 	struct ip_tunnel *tunnel;
857 	int t_hlen;
858 
859 	tunnel = netdev_priv(dev);
860 	tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
861 	tunnel->parms.iph.protocol = IPPROTO_GRE;
862 
863 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
864 
865 	t_hlen = tunnel->hlen + sizeof(struct iphdr);
866 
867 	dev->needed_headroom	= LL_MAX_HEADER + t_hlen + 4;
868 	dev->mtu		= ETH_DATA_LEN - t_hlen - 4;
869 
870 	dev->features		|= GRE_FEATURES;
871 	dev->hw_features	|= GRE_FEATURES;
872 
873 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
874 		/* TCP offload with GRE SEQ is not supported, nor
875 		 * can we support 2 levels of outer headers requiring
876 		 * an update.
877 		 */
878 		if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
879 		    (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
880 			dev->features    |= NETIF_F_GSO_SOFTWARE;
881 			dev->hw_features |= NETIF_F_GSO_SOFTWARE;
882 		}
883 
884 		/* Can use a lockless transmit, unless we generate
885 		 * output sequences
886 		 */
887 		dev->features |= NETIF_F_LLTX;
888 	}
889 }
890 
891 static int ipgre_tunnel_init(struct net_device *dev)
892 {
893 	struct ip_tunnel *tunnel = netdev_priv(dev);
894 	struct iphdr *iph = &tunnel->parms.iph;
895 
896 	__gre_tunnel_init(dev);
897 
898 	memcpy(dev->dev_addr, &iph->saddr, 4);
899 	memcpy(dev->broadcast, &iph->daddr, 4);
900 
901 	dev->flags		= IFF_NOARP;
902 	netif_keep_dst(dev);
903 	dev->addr_len		= 4;
904 
905 	if (iph->daddr && !tunnel->collect_md) {
906 #ifdef CONFIG_NET_IPGRE_BROADCAST
907 		if (ipv4_is_multicast(iph->daddr)) {
908 			if (!iph->saddr)
909 				return -EINVAL;
910 			dev->flags = IFF_BROADCAST;
911 			dev->header_ops = &ipgre_header_ops;
912 		}
913 #endif
914 	} else if (!tunnel->collect_md) {
915 		dev->header_ops = &ipgre_header_ops;
916 	}
917 
918 	return ip_tunnel_init(dev);
919 }
920 
921 static const struct gre_protocol ipgre_protocol = {
922 	.handler     = gre_rcv,
923 	.err_handler = gre_err,
924 };
925 
926 static int __net_init ipgre_init_net(struct net *net)
927 {
928 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
929 }
930 
931 static void __net_exit ipgre_exit_net(struct net *net)
932 {
933 	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
934 	ip_tunnel_delete_net(itn, &ipgre_link_ops);
935 }
936 
937 static struct pernet_operations ipgre_net_ops = {
938 	.init = ipgre_init_net,
939 	.exit = ipgre_exit_net,
940 	.id   = &ipgre_net_id,
941 	.size = sizeof(struct ip_tunnel_net),
942 };
943 
944 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
945 {
946 	__be16 flags;
947 
948 	if (!data)
949 		return 0;
950 
951 	flags = 0;
952 	if (data[IFLA_GRE_IFLAGS])
953 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
954 	if (data[IFLA_GRE_OFLAGS])
955 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
956 	if (flags & (GRE_VERSION|GRE_ROUTING))
957 		return -EINVAL;
958 
959 	if (data[IFLA_GRE_COLLECT_METADATA] &&
960 	    data[IFLA_GRE_ENCAP_TYPE] &&
961 	    nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
962 		return -EINVAL;
963 
964 	return 0;
965 }
966 
967 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
968 {
969 	__be32 daddr;
970 
971 	if (tb[IFLA_ADDRESS]) {
972 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
973 			return -EINVAL;
974 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
975 			return -EADDRNOTAVAIL;
976 	}
977 
978 	if (!data)
979 		goto out;
980 
981 	if (data[IFLA_GRE_REMOTE]) {
982 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
983 		if (!daddr)
984 			return -EINVAL;
985 	}
986 
987 out:
988 	return ipgre_tunnel_validate(tb, data);
989 }
990 
991 static void ipgre_netlink_parms(struct net_device *dev,
992 				struct nlattr *data[],
993 				struct nlattr *tb[],
994 				struct ip_tunnel_parm *parms)
995 {
996 	memset(parms, 0, sizeof(*parms));
997 
998 	parms->iph.protocol = IPPROTO_GRE;
999 
1000 	if (!data)
1001 		return;
1002 
1003 	if (data[IFLA_GRE_LINK])
1004 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1005 
1006 	if (data[IFLA_GRE_IFLAGS])
1007 		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1008 
1009 	if (data[IFLA_GRE_OFLAGS])
1010 		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1011 
1012 	if (data[IFLA_GRE_IKEY])
1013 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1014 
1015 	if (data[IFLA_GRE_OKEY])
1016 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1017 
1018 	if (data[IFLA_GRE_LOCAL])
1019 		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1020 
1021 	if (data[IFLA_GRE_REMOTE])
1022 		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1023 
1024 	if (data[IFLA_GRE_TTL])
1025 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1026 
1027 	if (data[IFLA_GRE_TOS])
1028 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1029 
1030 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1031 		parms->iph.frag_off = htons(IP_DF);
1032 
1033 	if (data[IFLA_GRE_COLLECT_METADATA]) {
1034 		struct ip_tunnel *t = netdev_priv(dev);
1035 
1036 		t->collect_md = true;
1037 		if (dev->type == ARPHRD_IPGRE)
1038 			dev->type = ARPHRD_NONE;
1039 	}
1040 }
1041 
1042 /* This function returns true when ENCAP attributes are present in the nl msg */
1043 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1044 				      struct ip_tunnel_encap *ipencap)
1045 {
1046 	bool ret = false;
1047 
1048 	memset(ipencap, 0, sizeof(*ipencap));
1049 
1050 	if (!data)
1051 		return ret;
1052 
1053 	if (data[IFLA_GRE_ENCAP_TYPE]) {
1054 		ret = true;
1055 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1056 	}
1057 
1058 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1059 		ret = true;
1060 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1061 	}
1062 
1063 	if (data[IFLA_GRE_ENCAP_SPORT]) {
1064 		ret = true;
1065 		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1066 	}
1067 
1068 	if (data[IFLA_GRE_ENCAP_DPORT]) {
1069 		ret = true;
1070 		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1071 	}
1072 
1073 	return ret;
1074 }
1075 
1076 static int gre_tap_init(struct net_device *dev)
1077 {
1078 	__gre_tunnel_init(dev);
1079 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1080 
1081 	return ip_tunnel_init(dev);
1082 }
1083 
1084 static const struct net_device_ops gre_tap_netdev_ops = {
1085 	.ndo_init		= gre_tap_init,
1086 	.ndo_uninit		= ip_tunnel_uninit,
1087 	.ndo_start_xmit		= gre_tap_xmit,
1088 	.ndo_set_mac_address 	= eth_mac_addr,
1089 	.ndo_validate_addr	= eth_validate_addr,
1090 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1091 	.ndo_get_stats64	= ip_tunnel_get_stats64,
1092 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1093 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1094 };
1095 
1096 static void ipgre_tap_setup(struct net_device *dev)
1097 {
1098 	ether_setup(dev);
1099 	dev->netdev_ops	= &gre_tap_netdev_ops;
1100 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1101 	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
1102 	ip_tunnel_setup(dev, gre_tap_net_id);
1103 }
1104 
1105 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1106 			 struct nlattr *tb[], struct nlattr *data[])
1107 {
1108 	struct ip_tunnel_parm p;
1109 	struct ip_tunnel_encap ipencap;
1110 
1111 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1112 		struct ip_tunnel *t = netdev_priv(dev);
1113 		int err = ip_tunnel_encap_setup(t, &ipencap);
1114 
1115 		if (err < 0)
1116 			return err;
1117 	}
1118 
1119 	ipgre_netlink_parms(dev, data, tb, &p);
1120 	return ip_tunnel_newlink(dev, tb, &p);
1121 }
1122 
1123 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1124 			    struct nlattr *data[])
1125 {
1126 	struct ip_tunnel_parm p;
1127 	struct ip_tunnel_encap ipencap;
1128 
1129 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1130 		struct ip_tunnel *t = netdev_priv(dev);
1131 		int err = ip_tunnel_encap_setup(t, &ipencap);
1132 
1133 		if (err < 0)
1134 			return err;
1135 	}
1136 
1137 	ipgre_netlink_parms(dev, data, tb, &p);
1138 	return ip_tunnel_changelink(dev, tb, &p);
1139 }
1140 
1141 static size_t ipgre_get_size(const struct net_device *dev)
1142 {
1143 	return
1144 		/* IFLA_GRE_LINK */
1145 		nla_total_size(4) +
1146 		/* IFLA_GRE_IFLAGS */
1147 		nla_total_size(2) +
1148 		/* IFLA_GRE_OFLAGS */
1149 		nla_total_size(2) +
1150 		/* IFLA_GRE_IKEY */
1151 		nla_total_size(4) +
1152 		/* IFLA_GRE_OKEY */
1153 		nla_total_size(4) +
1154 		/* IFLA_GRE_LOCAL */
1155 		nla_total_size(4) +
1156 		/* IFLA_GRE_REMOTE */
1157 		nla_total_size(4) +
1158 		/* IFLA_GRE_TTL */
1159 		nla_total_size(1) +
1160 		/* IFLA_GRE_TOS */
1161 		nla_total_size(1) +
1162 		/* IFLA_GRE_PMTUDISC */
1163 		nla_total_size(1) +
1164 		/* IFLA_GRE_ENCAP_TYPE */
1165 		nla_total_size(2) +
1166 		/* IFLA_GRE_ENCAP_FLAGS */
1167 		nla_total_size(2) +
1168 		/* IFLA_GRE_ENCAP_SPORT */
1169 		nla_total_size(2) +
1170 		/* IFLA_GRE_ENCAP_DPORT */
1171 		nla_total_size(2) +
1172 		/* IFLA_GRE_COLLECT_METADATA */
1173 		nla_total_size(0) +
1174 		0;
1175 }
1176 
1177 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1178 {
1179 	struct ip_tunnel *t = netdev_priv(dev);
1180 	struct ip_tunnel_parm *p = &t->parms;
1181 
1182 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1183 	    nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
1184 	    nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
1185 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1186 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1187 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1188 	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1189 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1190 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1191 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1192 		       !!(p->iph.frag_off & htons(IP_DF))))
1193 		goto nla_put_failure;
1194 
1195 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1196 			t->encap.type) ||
1197 	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1198 			 t->encap.sport) ||
1199 	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1200 			 t->encap.dport) ||
1201 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1202 			t->encap.flags))
1203 		goto nla_put_failure;
1204 
1205 	if (t->collect_md) {
1206 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1207 			goto nla_put_failure;
1208 	}
1209 
1210 	return 0;
1211 
1212 nla_put_failure:
1213 	return -EMSGSIZE;
1214 }
1215 
1216 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1217 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1218 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1219 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1220 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1221 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1222 	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1223 	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1224 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1225 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1226 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1227 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1228 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1229 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1230 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1231 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1232 };
1233 
1234 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1235 	.kind		= "gre",
1236 	.maxtype	= IFLA_GRE_MAX,
1237 	.policy		= ipgre_policy,
1238 	.priv_size	= sizeof(struct ip_tunnel),
1239 	.setup		= ipgre_tunnel_setup,
1240 	.validate	= ipgre_tunnel_validate,
1241 	.newlink	= ipgre_newlink,
1242 	.changelink	= ipgre_changelink,
1243 	.dellink	= ip_tunnel_dellink,
1244 	.get_size	= ipgre_get_size,
1245 	.fill_info	= ipgre_fill_info,
1246 	.get_link_net	= ip_tunnel_get_link_net,
1247 };
1248 
1249 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1250 	.kind		= "gretap",
1251 	.maxtype	= IFLA_GRE_MAX,
1252 	.policy		= ipgre_policy,
1253 	.priv_size	= sizeof(struct ip_tunnel),
1254 	.setup		= ipgre_tap_setup,
1255 	.validate	= ipgre_tap_validate,
1256 	.newlink	= ipgre_newlink,
1257 	.changelink	= ipgre_changelink,
1258 	.dellink	= ip_tunnel_dellink,
1259 	.get_size	= ipgre_get_size,
1260 	.fill_info	= ipgre_fill_info,
1261 	.get_link_net	= ip_tunnel_get_link_net,
1262 };
1263 
1264 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1265 					u8 name_assign_type)
1266 {
1267 	struct nlattr *tb[IFLA_MAX + 1];
1268 	struct net_device *dev;
1269 	struct ip_tunnel *t;
1270 	int err;
1271 
1272 	memset(&tb, 0, sizeof(tb));
1273 
1274 	dev = rtnl_create_link(net, name, name_assign_type,
1275 			       &ipgre_tap_ops, tb);
1276 	if (IS_ERR(dev))
1277 		return dev;
1278 
1279 	/* Configure flow based GRE device. */
1280 	t = netdev_priv(dev);
1281 	t->collect_md = true;
1282 
1283 	err = ipgre_newlink(net, dev, tb, NULL);
1284 	if (err < 0)
1285 		goto out;
1286 
1287 	/* openvswitch users expect packet sizes to be unrestricted,
1288 	 * so set the largest MTU we can.
1289 	 */
1290 	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1291 	if (err)
1292 		goto out;
1293 
1294 	return dev;
1295 out:
1296 	free_netdev(dev);
1297 	return ERR_PTR(err);
1298 }
1299 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1300 
1301 static int __net_init ipgre_tap_init_net(struct net *net)
1302 {
1303 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1304 }
1305 
1306 static void __net_exit ipgre_tap_exit_net(struct net *net)
1307 {
1308 	struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
1309 	ip_tunnel_delete_net(itn, &ipgre_tap_ops);
1310 }
1311 
1312 static struct pernet_operations ipgre_tap_net_ops = {
1313 	.init = ipgre_tap_init_net,
1314 	.exit = ipgre_tap_exit_net,
1315 	.id   = &gre_tap_net_id,
1316 	.size = sizeof(struct ip_tunnel_net),
1317 };
1318 
1319 static int __init ipgre_init(void)
1320 {
1321 	int err;
1322 
1323 	pr_info("GRE over IPv4 tunneling driver\n");
1324 
1325 	err = register_pernet_device(&ipgre_net_ops);
1326 	if (err < 0)
1327 		return err;
1328 
1329 	err = register_pernet_device(&ipgre_tap_net_ops);
1330 	if (err < 0)
1331 		goto pnet_tap_faied;
1332 
1333 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1334 	if (err < 0) {
1335 		pr_info("%s: can't add protocol\n", __func__);
1336 		goto add_proto_failed;
1337 	}
1338 
1339 	err = rtnl_link_register(&ipgre_link_ops);
1340 	if (err < 0)
1341 		goto rtnl_link_failed;
1342 
1343 	err = rtnl_link_register(&ipgre_tap_ops);
1344 	if (err < 0)
1345 		goto tap_ops_failed;
1346 
1347 	return 0;
1348 
1349 tap_ops_failed:
1350 	rtnl_link_unregister(&ipgre_link_ops);
1351 rtnl_link_failed:
1352 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1353 add_proto_failed:
1354 	unregister_pernet_device(&ipgre_tap_net_ops);
1355 pnet_tap_faied:
1356 	unregister_pernet_device(&ipgre_net_ops);
1357 	return err;
1358 }
1359 
1360 static void __exit ipgre_fini(void)
1361 {
1362 	rtnl_link_unregister(&ipgre_tap_ops);
1363 	rtnl_link_unregister(&ipgre_link_ops);
1364 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1365 	unregister_pernet_device(&ipgre_tap_net_ops);
1366 	unregister_pernet_device(&ipgre_net_ops);
1367 }
1368 
1369 module_init(ipgre_init);
1370 module_exit(ipgre_fini);
1371 MODULE_LICENSE("GPL");
1372 MODULE_ALIAS_RTNL_LINK("gre");
1373 MODULE_ALIAS_RTNL_LINK("gretap");
1374 MODULE_ALIAS_NETDEV("gre0");
1375 MODULE_ALIAS_NETDEV("gretap0");
1376