xref: /linux/net/ipv4/ip_gre.c (revision f02e58f91a121ec909efad06b0a7aa806e1f7a84)
1 /*
2  *	Linux NET3:	GRE over IP protocol decoder.
3  *
4  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *	This program is free software; you can redistribute it and/or
7  *	modify it under the terms of the GNU General Public License
8  *	as published by the Free Software Foundation; either version
9  *	2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/mroute.h>
28 #include <linux/if_vlan.h>
29 #include <linux/init.h>
30 #include <linux/in6.h>
31 #include <linux/inetdevice.h>
32 #include <linux/igmp.h>
33 #include <linux/netfilter_ipv4.h>
34 #include <linux/etherdevice.h>
35 #include <linux/if_ether.h>
36 
37 #include <net/sock.h>
38 #include <net/ip.h>
39 #include <net/icmp.h>
40 #include <net/protocol.h>
41 #include <net/ip_tunnels.h>
42 #include <net/arp.h>
43 #include <net/checksum.h>
44 #include <net/dsfield.h>
45 #include <net/inet_ecn.h>
46 #include <net/xfrm.h>
47 #include <net/net_namespace.h>
48 #include <net/netns/generic.h>
49 #include <net/rtnetlink.h>
50 #include <net/gre.h>
51 #include <net/dst_metadata.h>
52 
53 #if IS_ENABLED(CONFIG_IPV6)
54 #include <net/ipv6.h>
55 #include <net/ip6_fib.h>
56 #include <net/ip6_route.h>
57 #endif
58 
59 /*
60    Problems & solutions
61    --------------------
62 
63    1. The most important issue is detecting local dead loops.
64    They would cause complete host lockup in transmit, which
65    would be "resolved" by stack overflow or, if queueing is enabled,
66    with infinite looping in net_bh.
67 
68    We cannot track such dead loops during route installation,
69    it is infeasible task. The most general solutions would be
70    to keep skb->encapsulation counter (sort of local ttl),
71    and silently drop packet when it expires. It is a good
72    solution, but it supposes maintaining new variable in ALL
73    skb, even if no tunneling is used.
74 
75    Current solution: xmit_recursion breaks dead loops. This is a percpu
76    counter, since when we enter the first ndo_xmit(), cpu migration is
77    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
78 
79    2. Networking dead loops would not kill routers, but would really
80    kill network. IP hop limit plays role of "t->recursion" in this case,
81    if we copy it from packet being encapsulated to upper header.
82    It is very good solution, but it introduces two problems:
83 
84    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
85      do not work over tunnels.
86    - traceroute does not work. I planned to relay ICMP from tunnel,
87      so that this problem would be solved and traceroute output
88      would even more informative. This idea appeared to be wrong:
89      only Linux complies to rfc1812 now (yes, guys, Linux is the only
90      true router now :-)), all routers (at least, in neighbourhood of mine)
91      return only 8 bytes of payload. It is the end.
92 
93    Hence, if we want that OSPF worked or traceroute said something reasonable,
94    we should search for another solution.
95 
96    One of them is to parse packet trying to detect inner encapsulation
97    made by our node. It is difficult or even impossible, especially,
98    taking into account fragmentation. TO be short, ttl is not solution at all.
99 
100    Current solution: The solution was UNEXPECTEDLY SIMPLE.
101    We force DF flag on tunnels with preconfigured hop limit,
102    that is ALL. :-) Well, it does not remove the problem completely,
103    but exponential growth of network traffic is changed to linear
104    (branches, that exceed pmtu are pruned) and tunnel mtu
105    rapidly degrades to value <68, where looping stops.
106    Yes, it is not good if there exists a router in the loop,
107    which does not force DF, even when encapsulating packets have DF set.
108    But it is not our problem! Nobody could accuse us, we made
109    all that we could make. Even if it is your gated who injected
110    fatal route to network, even if it were you who configured
111    fatal static route: you are innocent. :-)
112 
113    Alexey Kuznetsov.
114  */
115 
116 static bool log_ecn_error = true;
117 module_param(log_ecn_error, bool, 0644);
118 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
119 
120 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
121 static int ipgre_tunnel_init(struct net_device *dev);
122 
123 static int ipgre_net_id __read_mostly;
124 static int gre_tap_net_id __read_mostly;
125 
126 static int ip_gre_calc_hlen(__be16 o_flags)
127 {
128 	int addend = 4;
129 
130 	if (o_flags & TUNNEL_CSUM)
131 		addend += 4;
132 	if (o_flags & TUNNEL_KEY)
133 		addend += 4;
134 	if (o_flags & TUNNEL_SEQ)
135 		addend += 4;
136 	return addend;
137 }
138 
139 static __be16 gre_flags_to_tnl_flags(__be16 flags)
140 {
141 	__be16 tflags = 0;
142 
143 	if (flags & GRE_CSUM)
144 		tflags |= TUNNEL_CSUM;
145 	if (flags & GRE_ROUTING)
146 		tflags |= TUNNEL_ROUTING;
147 	if (flags & GRE_KEY)
148 		tflags |= TUNNEL_KEY;
149 	if (flags & GRE_SEQ)
150 		tflags |= TUNNEL_SEQ;
151 	if (flags & GRE_STRICT)
152 		tflags |= TUNNEL_STRICT;
153 	if (flags & GRE_REC)
154 		tflags |= TUNNEL_REC;
155 	if (flags & GRE_VERSION)
156 		tflags |= TUNNEL_VERSION;
157 
158 	return tflags;
159 }
160 
161 static __be16 tnl_flags_to_gre_flags(__be16 tflags)
162 {
163 	__be16 flags = 0;
164 
165 	if (tflags & TUNNEL_CSUM)
166 		flags |= GRE_CSUM;
167 	if (tflags & TUNNEL_ROUTING)
168 		flags |= GRE_ROUTING;
169 	if (tflags & TUNNEL_KEY)
170 		flags |= GRE_KEY;
171 	if (tflags & TUNNEL_SEQ)
172 		flags |= GRE_SEQ;
173 	if (tflags & TUNNEL_STRICT)
174 		flags |= GRE_STRICT;
175 	if (tflags & TUNNEL_REC)
176 		flags |= GRE_REC;
177 	if (tflags & TUNNEL_VERSION)
178 		flags |= GRE_VERSION;
179 
180 	return flags;
181 }
182 
183 static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
184 			    bool *csum_err)
185 {
186 	const struct gre_base_hdr *greh;
187 	__be32 *options;
188 	int hdr_len;
189 
190 	if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
191 		return -EINVAL;
192 
193 	greh = (struct gre_base_hdr *)skb_transport_header(skb);
194 	if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
195 		return -EINVAL;
196 
197 	tpi->flags = gre_flags_to_tnl_flags(greh->flags);
198 	hdr_len = ip_gre_calc_hlen(tpi->flags);
199 
200 	if (!pskb_may_pull(skb, hdr_len))
201 		return -EINVAL;
202 
203 	greh = (struct gre_base_hdr *)skb_transport_header(skb);
204 	tpi->proto = greh->protocol;
205 
206 	options = (__be32 *)(greh + 1);
207 	if (greh->flags & GRE_CSUM) {
208 		if (skb_checksum_simple_validate(skb)) {
209 			*csum_err = true;
210 			return -EINVAL;
211 		}
212 
213 		skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
214 					 null_compute_pseudo);
215 		options++;
216 	}
217 
218 	if (greh->flags & GRE_KEY) {
219 		tpi->key = *options;
220 		options++;
221 	} else {
222 		tpi->key = 0;
223 	}
224 	if (unlikely(greh->flags & GRE_SEQ)) {
225 		tpi->seq = *options;
226 		options++;
227 	} else {
228 		tpi->seq = 0;
229 	}
230 	/* WCCP version 1 and 2 protocol decoding.
231 	 * - Change protocol to IP
232 	 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
233 	 */
234 	if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
235 		tpi->proto = htons(ETH_P_IP);
236 		if ((*(u8 *)options & 0xF0) != 0x40) {
237 			hdr_len += 4;
238 			if (!pskb_may_pull(skb, hdr_len))
239 				return -EINVAL;
240 		}
241 	}
242 	return iptunnel_pull_header(skb, hdr_len, tpi->proto);
243 }
244 
245 static void ipgre_err(struct sk_buff *skb, u32 info,
246 		      const struct tnl_ptk_info *tpi)
247 {
248 
249 	/* All the routers (except for Linux) return only
250 	   8 bytes of packet payload. It means, that precise relaying of
251 	   ICMP in the real Internet is absolutely infeasible.
252 
253 	   Moreover, Cisco "wise men" put GRE key to the third word
254 	   in GRE header. It makes impossible maintaining even soft
255 	   state for keyed GRE tunnels with enabled checksum. Tell
256 	   them "thank you".
257 
258 	   Well, I wonder, rfc1812 was written by Cisco employee,
259 	   what the hell these idiots break standards established
260 	   by themselves???
261 	   */
262 	struct net *net = dev_net(skb->dev);
263 	struct ip_tunnel_net *itn;
264 	const struct iphdr *iph;
265 	const int type = icmp_hdr(skb)->type;
266 	const int code = icmp_hdr(skb)->code;
267 	struct ip_tunnel *t;
268 
269 	switch (type) {
270 	default:
271 	case ICMP_PARAMETERPROB:
272 		return;
273 
274 	case ICMP_DEST_UNREACH:
275 		switch (code) {
276 		case ICMP_SR_FAILED:
277 		case ICMP_PORT_UNREACH:
278 			/* Impossible event. */
279 			return;
280 		default:
281 			/* All others are translated to HOST_UNREACH.
282 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
283 			   I believe they are just ether pollution. --ANK
284 			 */
285 			break;
286 		}
287 		break;
288 
289 	case ICMP_TIME_EXCEEDED:
290 		if (code != ICMP_EXC_TTL)
291 			return;
292 		break;
293 
294 	case ICMP_REDIRECT:
295 		break;
296 	}
297 
298 	if (tpi->proto == htons(ETH_P_TEB))
299 		itn = net_generic(net, gre_tap_net_id);
300 	else
301 		itn = net_generic(net, ipgre_net_id);
302 
303 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
304 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
305 			     iph->daddr, iph->saddr, tpi->key);
306 
307 	if (!t)
308 		return;
309 
310 	if (t->parms.iph.daddr == 0 ||
311 	    ipv4_is_multicast(t->parms.iph.daddr))
312 		return;
313 
314 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
315 		return;
316 
317 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
318 		t->err_count++;
319 	else
320 		t->err_count = 1;
321 	t->err_time = jiffies;
322 }
323 
324 static void gre_err(struct sk_buff *skb, u32 info)
325 {
326 	/* All the routers (except for Linux) return only
327 	 * 8 bytes of packet payload. It means, that precise relaying of
328 	 * ICMP in the real Internet is absolutely infeasible.
329 	 *
330 	 * Moreover, Cisco "wise men" put GRE key to the third word
331 	 * in GRE header. It makes impossible maintaining even soft
332 	 * state for keyed
333 	 * GRE tunnels with enabled checksum. Tell them "thank you".
334 	 *
335 	 * Well, I wonder, rfc1812 was written by Cisco employee,
336 	 * what the hell these idiots break standards established
337 	 * by themselves???
338 	 */
339 
340 	const int type = icmp_hdr(skb)->type;
341 	const int code = icmp_hdr(skb)->code;
342 	struct tnl_ptk_info tpi;
343 	bool csum_err = false;
344 
345 	if (parse_gre_header(skb, &tpi, &csum_err)) {
346 		if (!csum_err)		/* ignore csum errors. */
347 			return;
348 	}
349 
350 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
351 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
352 				 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
353 		return;
354 	}
355 	if (type == ICMP_REDIRECT) {
356 		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
357 			      IPPROTO_GRE, 0);
358 		return;
359 	}
360 
361 	ipgre_err(skb, info, &tpi);
362 }
363 
364 static __be64 key_to_tunnel_id(__be32 key)
365 {
366 #ifdef __BIG_ENDIAN
367 	return (__force __be64)((__force u32)key);
368 #else
369 	return (__force __be64)((__force u64)key << 32);
370 #endif
371 }
372 
373 /* Returns the least-significant 32 bits of a __be64. */
374 static __be32 tunnel_id_to_key(__be64 x)
375 {
376 #ifdef __BIG_ENDIAN
377 	return (__force __be32)x;
378 #else
379 	return (__force __be32)((__force u64)x >> 32);
380 #endif
381 }
382 
383 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
384 {
385 	struct net *net = dev_net(skb->dev);
386 	struct metadata_dst *tun_dst = NULL;
387 	struct ip_tunnel_net *itn;
388 	const struct iphdr *iph;
389 	struct ip_tunnel *tunnel;
390 
391 	if (tpi->proto == htons(ETH_P_TEB))
392 		itn = net_generic(net, gre_tap_net_id);
393 	else
394 		itn = net_generic(net, ipgre_net_id);
395 
396 	iph = ip_hdr(skb);
397 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
398 				  iph->saddr, iph->daddr, tpi->key);
399 
400 	if (tunnel) {
401 		skb_pop_mac_header(skb);
402 		if (tunnel->collect_md) {
403 			struct ip_tunnel_info *info;
404 
405 			tun_dst = metadata_dst_alloc(0, GFP_ATOMIC);
406 			if (!tun_dst)
407 				return PACKET_REJECT;
408 
409 			info = &tun_dst->u.tun_info;
410 			info->key.ipv4_src = iph->saddr;
411 			info->key.ipv4_dst = iph->daddr;
412 			info->key.ipv4_tos = iph->tos;
413 			info->key.ipv4_ttl = iph->ttl;
414 
415 			info->mode = IP_TUNNEL_INFO_RX;
416 			info->key.tun_flags = tpi->flags &
417 					      (TUNNEL_CSUM | TUNNEL_KEY);
418 			info->key.tun_id = key_to_tunnel_id(tpi->key);
419 
420 			info->key.tp_src = 0;
421 			info->key.tp_dst = 0;
422 		}
423 
424 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
425 		return PACKET_RCVD;
426 	}
427 	return PACKET_REJECT;
428 }
429 
430 static int gre_rcv(struct sk_buff *skb)
431 {
432 	struct tnl_ptk_info tpi;
433 	bool csum_err = false;
434 
435 #ifdef CONFIG_NET_IPGRE_BROADCAST
436 	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
437 		/* Looped back packet, drop it! */
438 		if (rt_is_output_route(skb_rtable(skb)))
439 			goto drop;
440 	}
441 #endif
442 
443 	if (parse_gre_header(skb, &tpi, &csum_err) < 0)
444 		goto drop;
445 
446 	if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)
447 		return 0;
448 
449 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
450 drop:
451 	kfree_skb(skb);
452 	return 0;
453 }
454 
455 static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
456 			 __be16 proto, __be32 key, __be32 seq)
457 {
458 	struct gre_base_hdr *greh;
459 
460 	skb_push(skb, hdr_len);
461 
462 	skb_reset_transport_header(skb);
463 	greh = (struct gre_base_hdr *)skb->data;
464 	greh->flags = tnl_flags_to_gre_flags(flags);
465 	greh->protocol = proto;
466 
467 	if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
468 		__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
469 
470 		if (flags & TUNNEL_SEQ) {
471 			*ptr = seq;
472 			ptr--;
473 		}
474 		if (flags & TUNNEL_KEY) {
475 			*ptr = key;
476 			ptr--;
477 		}
478 		if (flags & TUNNEL_CSUM &&
479 		    !(skb_shinfo(skb)->gso_type &
480 		      (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
481 			*ptr = 0;
482 			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
483 								 skb->len, 0));
484 		}
485 	}
486 }
487 
488 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
489 		       const struct iphdr *tnl_params,
490 		       __be16 proto)
491 {
492 	struct ip_tunnel *tunnel = netdev_priv(dev);
493 
494 	if (tunnel->parms.o_flags & TUNNEL_SEQ)
495 		tunnel->o_seqno++;
496 
497 	/* Push GRE header. */
498 	build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
499 		     proto, tunnel->parms.o_key, htonl(tunnel->o_seqno));
500 
501 	skb_set_inner_protocol(skb, proto);
502 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
503 }
504 
505 static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
506 					   bool csum)
507 {
508 	return iptunnel_handle_offloads(skb, csum,
509 					csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
510 }
511 
512 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
513 {
514 	struct ip_tunnel_info *tun_info;
515 	struct net *net = dev_net(dev);
516 	const struct ip_tunnel_key *key;
517 	struct flowi4 fl;
518 	struct rtable *rt;
519 	int min_headroom;
520 	int tunnel_hlen;
521 	__be16 df, flags;
522 	int err;
523 
524 	tun_info = skb_tunnel_info(skb, AF_INET);
525 	if (unlikely(!tun_info || tun_info->mode != IP_TUNNEL_INFO_TX))
526 		goto err_free_skb;
527 
528 	key = &tun_info->key;
529 	memset(&fl, 0, sizeof(fl));
530 	fl.daddr = key->ipv4_dst;
531 	fl.saddr = key->ipv4_src;
532 	fl.flowi4_tos = RT_TOS(key->ipv4_tos);
533 	fl.flowi4_mark = skb->mark;
534 	fl.flowi4_proto = IPPROTO_GRE;
535 
536 	rt = ip_route_output_key(net, &fl);
537 	if (IS_ERR(rt))
538 		goto err_free_skb;
539 
540 	tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
541 
542 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
543 			+ tunnel_hlen + sizeof(struct iphdr);
544 	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
545 		int head_delta = SKB_DATA_ALIGN(min_headroom -
546 						skb_headroom(skb) +
547 						16);
548 		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
549 				       0, GFP_ATOMIC);
550 		if (unlikely(err))
551 			goto err_free_rt;
552 	}
553 
554 	/* Push Tunnel header. */
555 	skb = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM));
556 	if (IS_ERR(skb)) {
557 		skb = NULL;
558 		goto err_free_rt;
559 	}
560 
561 	flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
562 	build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB),
563 		     tunnel_id_to_key(tun_info->key.tun_id), 0);
564 
565 	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
566 	err = iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
567 			    key->ipv4_dst, IPPROTO_GRE,
568 			    key->ipv4_tos, key->ipv4_ttl, df, false);
569 	iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
570 	return;
571 
572 err_free_rt:
573 	ip_rt_put(rt);
574 err_free_skb:
575 	kfree_skb(skb);
576 	dev->stats.tx_dropped++;
577 }
578 
579 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
580 			      struct net_device *dev)
581 {
582 	struct ip_tunnel *tunnel = netdev_priv(dev);
583 	const struct iphdr *tnl_params;
584 
585 	if (tunnel->collect_md) {
586 		gre_fb_xmit(skb, dev);
587 		return NETDEV_TX_OK;
588 	}
589 
590 	if (dev->header_ops) {
591 		/* Need space for new headers */
592 		if (skb_cow_head(skb, dev->needed_headroom -
593 				      (tunnel->hlen + sizeof(struct iphdr))))
594 			goto free_skb;
595 
596 		tnl_params = (const struct iphdr *)skb->data;
597 
598 		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
599 		 * to gre header.
600 		 */
601 		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
602 		skb_reset_mac_header(skb);
603 	} else {
604 		if (skb_cow_head(skb, dev->needed_headroom))
605 			goto free_skb;
606 
607 		tnl_params = &tunnel->parms.iph;
608 	}
609 
610 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
611 	if (IS_ERR(skb))
612 		goto out;
613 
614 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
615 	return NETDEV_TX_OK;
616 
617 free_skb:
618 	kfree_skb(skb);
619 out:
620 	dev->stats.tx_dropped++;
621 	return NETDEV_TX_OK;
622 }
623 
624 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
625 				struct net_device *dev)
626 {
627 	struct ip_tunnel *tunnel = netdev_priv(dev);
628 
629 	if (tunnel->collect_md) {
630 		gre_fb_xmit(skb, dev);
631 		return NETDEV_TX_OK;
632 	}
633 
634 	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
635 	if (IS_ERR(skb))
636 		goto out;
637 
638 	if (skb_cow_head(skb, dev->needed_headroom))
639 		goto free_skb;
640 
641 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
642 	return NETDEV_TX_OK;
643 
644 free_skb:
645 	kfree_skb(skb);
646 out:
647 	dev->stats.tx_dropped++;
648 	return NETDEV_TX_OK;
649 }
650 
651 static int ipgre_tunnel_ioctl(struct net_device *dev,
652 			      struct ifreq *ifr, int cmd)
653 {
654 	int err;
655 	struct ip_tunnel_parm p;
656 
657 	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
658 		return -EFAULT;
659 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
660 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
661 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
662 		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
663 			return -EINVAL;
664 	}
665 	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
666 	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
667 
668 	err = ip_tunnel_ioctl(dev, &p, cmd);
669 	if (err)
670 		return err;
671 
672 	p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
673 	p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
674 
675 	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
676 		return -EFAULT;
677 	return 0;
678 }
679 
680 /* Nice toy. Unfortunately, useless in real life :-)
681    It allows to construct virtual multiprotocol broadcast "LAN"
682    over the Internet, provided multicast routing is tuned.
683 
684 
685    I have no idea was this bicycle invented before me,
686    so that I had to set ARPHRD_IPGRE to a random value.
687    I have an impression, that Cisco could make something similar,
688    but this feature is apparently missing in IOS<=11.2(8).
689 
690    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
691    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
692 
693    ping -t 255 224.66.66.66
694 
695    If nobody answers, mbone does not work.
696 
697    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
698    ip addr add 10.66.66.<somewhat>/24 dev Universe
699    ifconfig Universe up
700    ifconfig Universe add fe80::<Your_real_addr>/10
701    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
702    ftp 10.66.66.66
703    ...
704    ftp fec0:6666:6666::193.233.7.65
705    ...
706  */
707 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
708 			unsigned short type,
709 			const void *daddr, const void *saddr, unsigned int len)
710 {
711 	struct ip_tunnel *t = netdev_priv(dev);
712 	struct iphdr *iph;
713 	struct gre_base_hdr *greh;
714 
715 	iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
716 	greh = (struct gre_base_hdr *)(iph+1);
717 	greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
718 	greh->protocol = htons(type);
719 
720 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
721 
722 	/* Set the source hardware address. */
723 	if (saddr)
724 		memcpy(&iph->saddr, saddr, 4);
725 	if (daddr)
726 		memcpy(&iph->daddr, daddr, 4);
727 	if (iph->daddr)
728 		return t->hlen + sizeof(*iph);
729 
730 	return -(t->hlen + sizeof(*iph));
731 }
732 
733 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
734 {
735 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
736 	memcpy(haddr, &iph->saddr, 4);
737 	return 4;
738 }
739 
740 static const struct header_ops ipgre_header_ops = {
741 	.create	= ipgre_header,
742 	.parse	= ipgre_header_parse,
743 };
744 
745 #ifdef CONFIG_NET_IPGRE_BROADCAST
746 static int ipgre_open(struct net_device *dev)
747 {
748 	struct ip_tunnel *t = netdev_priv(dev);
749 
750 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
751 		struct flowi4 fl4;
752 		struct rtable *rt;
753 
754 		rt = ip_route_output_gre(t->net, &fl4,
755 					 t->parms.iph.daddr,
756 					 t->parms.iph.saddr,
757 					 t->parms.o_key,
758 					 RT_TOS(t->parms.iph.tos),
759 					 t->parms.link);
760 		if (IS_ERR(rt))
761 			return -EADDRNOTAVAIL;
762 		dev = rt->dst.dev;
763 		ip_rt_put(rt);
764 		if (!__in_dev_get_rtnl(dev))
765 			return -EADDRNOTAVAIL;
766 		t->mlink = dev->ifindex;
767 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
768 	}
769 	return 0;
770 }
771 
772 static int ipgre_close(struct net_device *dev)
773 {
774 	struct ip_tunnel *t = netdev_priv(dev);
775 
776 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
777 		struct in_device *in_dev;
778 		in_dev = inetdev_by_index(t->net, t->mlink);
779 		if (in_dev)
780 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
781 	}
782 	return 0;
783 }
784 #endif
785 
786 static const struct net_device_ops ipgre_netdev_ops = {
787 	.ndo_init		= ipgre_tunnel_init,
788 	.ndo_uninit		= ip_tunnel_uninit,
789 #ifdef CONFIG_NET_IPGRE_BROADCAST
790 	.ndo_open		= ipgre_open,
791 	.ndo_stop		= ipgre_close,
792 #endif
793 	.ndo_start_xmit		= ipgre_xmit,
794 	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
795 	.ndo_change_mtu		= ip_tunnel_change_mtu,
796 	.ndo_get_stats64	= ip_tunnel_get_stats64,
797 	.ndo_get_iflink		= ip_tunnel_get_iflink,
798 };
799 
800 #define GRE_FEATURES (NETIF_F_SG |		\
801 		      NETIF_F_FRAGLIST |	\
802 		      NETIF_F_HIGHDMA |		\
803 		      NETIF_F_HW_CSUM)
804 
805 static void ipgre_tunnel_setup(struct net_device *dev)
806 {
807 	dev->netdev_ops		= &ipgre_netdev_ops;
808 	dev->type		= ARPHRD_IPGRE;
809 	ip_tunnel_setup(dev, ipgre_net_id);
810 }
811 
812 static void __gre_tunnel_init(struct net_device *dev)
813 {
814 	struct ip_tunnel *tunnel;
815 	int t_hlen;
816 
817 	tunnel = netdev_priv(dev);
818 	tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
819 	tunnel->parms.iph.protocol = IPPROTO_GRE;
820 
821 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
822 
823 	t_hlen = tunnel->hlen + sizeof(struct iphdr);
824 
825 	dev->needed_headroom	= LL_MAX_HEADER + t_hlen + 4;
826 	dev->mtu		= ETH_DATA_LEN - t_hlen - 4;
827 
828 	dev->features		|= GRE_FEATURES;
829 	dev->hw_features	|= GRE_FEATURES;
830 
831 	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
832 		/* TCP offload with GRE SEQ is not supported. */
833 		dev->features    |= NETIF_F_GSO_SOFTWARE;
834 		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
835 		/* Can use a lockless transmit, unless we generate
836 		 * output sequences
837 		 */
838 		dev->features |= NETIF_F_LLTX;
839 	}
840 }
841 
842 static int ipgre_tunnel_init(struct net_device *dev)
843 {
844 	struct ip_tunnel *tunnel = netdev_priv(dev);
845 	struct iphdr *iph = &tunnel->parms.iph;
846 
847 	__gre_tunnel_init(dev);
848 
849 	memcpy(dev->dev_addr, &iph->saddr, 4);
850 	memcpy(dev->broadcast, &iph->daddr, 4);
851 
852 	dev->flags		= IFF_NOARP;
853 	netif_keep_dst(dev);
854 	dev->addr_len		= 4;
855 
856 	if (iph->daddr) {
857 #ifdef CONFIG_NET_IPGRE_BROADCAST
858 		if (ipv4_is_multicast(iph->daddr)) {
859 			if (!iph->saddr)
860 				return -EINVAL;
861 			dev->flags = IFF_BROADCAST;
862 			dev->header_ops = &ipgre_header_ops;
863 		}
864 #endif
865 	} else
866 		dev->header_ops = &ipgre_header_ops;
867 
868 	return ip_tunnel_init(dev);
869 }
870 
871 static const struct gre_protocol ipgre_protocol = {
872 	.handler     = gre_rcv,
873 	.err_handler = gre_err,
874 };
875 
876 static int __net_init ipgre_init_net(struct net *net)
877 {
878 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
879 }
880 
881 static void __net_exit ipgre_exit_net(struct net *net)
882 {
883 	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
884 	ip_tunnel_delete_net(itn, &ipgre_link_ops);
885 }
886 
887 static struct pernet_operations ipgre_net_ops = {
888 	.init = ipgre_init_net,
889 	.exit = ipgre_exit_net,
890 	.id   = &ipgre_net_id,
891 	.size = sizeof(struct ip_tunnel_net),
892 };
893 
894 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
895 {
896 	__be16 flags;
897 
898 	if (!data)
899 		return 0;
900 
901 	flags = 0;
902 	if (data[IFLA_GRE_IFLAGS])
903 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
904 	if (data[IFLA_GRE_OFLAGS])
905 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
906 	if (flags & (GRE_VERSION|GRE_ROUTING))
907 		return -EINVAL;
908 
909 	return 0;
910 }
911 
912 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
913 {
914 	__be32 daddr;
915 
916 	if (tb[IFLA_ADDRESS]) {
917 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
918 			return -EINVAL;
919 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
920 			return -EADDRNOTAVAIL;
921 	}
922 
923 	if (!data)
924 		goto out;
925 
926 	if (data[IFLA_GRE_REMOTE]) {
927 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
928 		if (!daddr)
929 			return -EINVAL;
930 	}
931 
932 out:
933 	return ipgre_tunnel_validate(tb, data);
934 }
935 
936 static void ipgre_netlink_parms(struct net_device *dev,
937 				struct nlattr *data[],
938 				struct nlattr *tb[],
939 				struct ip_tunnel_parm *parms)
940 {
941 	memset(parms, 0, sizeof(*parms));
942 
943 	parms->iph.protocol = IPPROTO_GRE;
944 
945 	if (!data)
946 		return;
947 
948 	if (data[IFLA_GRE_LINK])
949 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
950 
951 	if (data[IFLA_GRE_IFLAGS])
952 		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
953 
954 	if (data[IFLA_GRE_OFLAGS])
955 		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
956 
957 	if (data[IFLA_GRE_IKEY])
958 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
959 
960 	if (data[IFLA_GRE_OKEY])
961 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
962 
963 	if (data[IFLA_GRE_LOCAL])
964 		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
965 
966 	if (data[IFLA_GRE_REMOTE])
967 		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
968 
969 	if (data[IFLA_GRE_TTL])
970 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
971 
972 	if (data[IFLA_GRE_TOS])
973 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
974 
975 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
976 		parms->iph.frag_off = htons(IP_DF);
977 
978 	if (data[IFLA_GRE_COLLECT_METADATA]) {
979 		struct ip_tunnel *t = netdev_priv(dev);
980 
981 		t->collect_md = true;
982 	}
983 }
984 
985 /* This function returns true when ENCAP attributes are present in the nl msg */
986 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
987 				      struct ip_tunnel_encap *ipencap)
988 {
989 	bool ret = false;
990 
991 	memset(ipencap, 0, sizeof(*ipencap));
992 
993 	if (!data)
994 		return ret;
995 
996 	if (data[IFLA_GRE_ENCAP_TYPE]) {
997 		ret = true;
998 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
999 	}
1000 
1001 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1002 		ret = true;
1003 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1004 	}
1005 
1006 	if (data[IFLA_GRE_ENCAP_SPORT]) {
1007 		ret = true;
1008 		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1009 	}
1010 
1011 	if (data[IFLA_GRE_ENCAP_DPORT]) {
1012 		ret = true;
1013 		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1014 	}
1015 
1016 	return ret;
1017 }
1018 
1019 static int gre_tap_init(struct net_device *dev)
1020 {
1021 	__gre_tunnel_init(dev);
1022 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1023 
1024 	return ip_tunnel_init(dev);
1025 }
1026 
1027 static const struct net_device_ops gre_tap_netdev_ops = {
1028 	.ndo_init		= gre_tap_init,
1029 	.ndo_uninit		= ip_tunnel_uninit,
1030 	.ndo_start_xmit		= gre_tap_xmit,
1031 	.ndo_set_mac_address 	= eth_mac_addr,
1032 	.ndo_validate_addr	= eth_validate_addr,
1033 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1034 	.ndo_get_stats64	= ip_tunnel_get_stats64,
1035 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1036 };
1037 
1038 static void ipgre_tap_setup(struct net_device *dev)
1039 {
1040 	ether_setup(dev);
1041 	dev->netdev_ops		= &gre_tap_netdev_ops;
1042 	dev->priv_flags 	|= IFF_LIVE_ADDR_CHANGE;
1043 	ip_tunnel_setup(dev, gre_tap_net_id);
1044 }
1045 
1046 static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1047 			 struct nlattr *tb[], struct nlattr *data[])
1048 {
1049 	struct ip_tunnel_parm p;
1050 	struct ip_tunnel_encap ipencap;
1051 
1052 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1053 		struct ip_tunnel *t = netdev_priv(dev);
1054 		int err = ip_tunnel_encap_setup(t, &ipencap);
1055 
1056 		if (err < 0)
1057 			return err;
1058 	}
1059 
1060 	ipgre_netlink_parms(dev, data, tb, &p);
1061 	return ip_tunnel_newlink(dev, tb, &p);
1062 }
1063 
1064 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1065 			    struct nlattr *data[])
1066 {
1067 	struct ip_tunnel_parm p;
1068 	struct ip_tunnel_encap ipencap;
1069 
1070 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1071 		struct ip_tunnel *t = netdev_priv(dev);
1072 		int err = ip_tunnel_encap_setup(t, &ipencap);
1073 
1074 		if (err < 0)
1075 			return err;
1076 	}
1077 
1078 	ipgre_netlink_parms(dev, data, tb, &p);
1079 	return ip_tunnel_changelink(dev, tb, &p);
1080 }
1081 
1082 static size_t ipgre_get_size(const struct net_device *dev)
1083 {
1084 	return
1085 		/* IFLA_GRE_LINK */
1086 		nla_total_size(4) +
1087 		/* IFLA_GRE_IFLAGS */
1088 		nla_total_size(2) +
1089 		/* IFLA_GRE_OFLAGS */
1090 		nla_total_size(2) +
1091 		/* IFLA_GRE_IKEY */
1092 		nla_total_size(4) +
1093 		/* IFLA_GRE_OKEY */
1094 		nla_total_size(4) +
1095 		/* IFLA_GRE_LOCAL */
1096 		nla_total_size(4) +
1097 		/* IFLA_GRE_REMOTE */
1098 		nla_total_size(4) +
1099 		/* IFLA_GRE_TTL */
1100 		nla_total_size(1) +
1101 		/* IFLA_GRE_TOS */
1102 		nla_total_size(1) +
1103 		/* IFLA_GRE_PMTUDISC */
1104 		nla_total_size(1) +
1105 		/* IFLA_GRE_ENCAP_TYPE */
1106 		nla_total_size(2) +
1107 		/* IFLA_GRE_ENCAP_FLAGS */
1108 		nla_total_size(2) +
1109 		/* IFLA_GRE_ENCAP_SPORT */
1110 		nla_total_size(2) +
1111 		/* IFLA_GRE_ENCAP_DPORT */
1112 		nla_total_size(2) +
1113 		/* IFLA_GRE_COLLECT_METADATA */
1114 		nla_total_size(0) +
1115 		0;
1116 }
1117 
1118 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1119 {
1120 	struct ip_tunnel *t = netdev_priv(dev);
1121 	struct ip_tunnel_parm *p = &t->parms;
1122 
1123 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1124 	    nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
1125 	    nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
1126 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1127 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1128 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1129 	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1130 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1131 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1132 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1133 		       !!(p->iph.frag_off & htons(IP_DF))))
1134 		goto nla_put_failure;
1135 
1136 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1137 			t->encap.type) ||
1138 	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1139 			 t->encap.sport) ||
1140 	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1141 			 t->encap.dport) ||
1142 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1143 			t->encap.flags))
1144 		goto nla_put_failure;
1145 
1146 	if (t->collect_md) {
1147 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1148 			goto nla_put_failure;
1149 	}
1150 
1151 	return 0;
1152 
1153 nla_put_failure:
1154 	return -EMSGSIZE;
1155 }
1156 
1157 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1158 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1159 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1160 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1161 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1162 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1163 	[IFLA_GRE_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1164 	[IFLA_GRE_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1165 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1166 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1167 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1168 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1169 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1170 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1171 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1172 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1173 };
1174 
1175 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1176 	.kind		= "gre",
1177 	.maxtype	= IFLA_GRE_MAX,
1178 	.policy		= ipgre_policy,
1179 	.priv_size	= sizeof(struct ip_tunnel),
1180 	.setup		= ipgre_tunnel_setup,
1181 	.validate	= ipgre_tunnel_validate,
1182 	.newlink	= ipgre_newlink,
1183 	.changelink	= ipgre_changelink,
1184 	.dellink	= ip_tunnel_dellink,
1185 	.get_size	= ipgre_get_size,
1186 	.fill_info	= ipgre_fill_info,
1187 	.get_link_net	= ip_tunnel_get_link_net,
1188 };
1189 
1190 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1191 	.kind		= "gretap",
1192 	.maxtype	= IFLA_GRE_MAX,
1193 	.policy		= ipgre_policy,
1194 	.priv_size	= sizeof(struct ip_tunnel),
1195 	.setup		= ipgre_tap_setup,
1196 	.validate	= ipgre_tap_validate,
1197 	.newlink	= ipgre_newlink,
1198 	.changelink	= ipgre_changelink,
1199 	.dellink	= ip_tunnel_dellink,
1200 	.get_size	= ipgre_get_size,
1201 	.fill_info	= ipgre_fill_info,
1202 	.get_link_net	= ip_tunnel_get_link_net,
1203 };
1204 
1205 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1206 					u8 name_assign_type)
1207 {
1208 	struct nlattr *tb[IFLA_MAX + 1];
1209 	struct net_device *dev;
1210 	struct ip_tunnel *t;
1211 	int err;
1212 
1213 	memset(&tb, 0, sizeof(tb));
1214 
1215 	dev = rtnl_create_link(net, name, name_assign_type,
1216 			       &ipgre_tap_ops, tb);
1217 	if (IS_ERR(dev))
1218 		return dev;
1219 
1220 	/* Configure flow based GRE device. */
1221 	t = netdev_priv(dev);
1222 	t->collect_md = true;
1223 
1224 	err = ipgre_newlink(net, dev, tb, NULL);
1225 	if (err < 0)
1226 		goto out;
1227 	return dev;
1228 out:
1229 	free_netdev(dev);
1230 	return ERR_PTR(err);
1231 }
1232 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1233 
1234 static int __net_init ipgre_tap_init_net(struct net *net)
1235 {
1236 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1237 }
1238 
1239 static void __net_exit ipgre_tap_exit_net(struct net *net)
1240 {
1241 	struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
1242 	ip_tunnel_delete_net(itn, &ipgre_tap_ops);
1243 }
1244 
1245 static struct pernet_operations ipgre_tap_net_ops = {
1246 	.init = ipgre_tap_init_net,
1247 	.exit = ipgre_tap_exit_net,
1248 	.id   = &gre_tap_net_id,
1249 	.size = sizeof(struct ip_tunnel_net),
1250 };
1251 
1252 static int __init ipgre_init(void)
1253 {
1254 	int err;
1255 
1256 	pr_info("GRE over IPv4 tunneling driver\n");
1257 
1258 	err = register_pernet_device(&ipgre_net_ops);
1259 	if (err < 0)
1260 		return err;
1261 
1262 	err = register_pernet_device(&ipgre_tap_net_ops);
1263 	if (err < 0)
1264 		goto pnet_tap_faied;
1265 
1266 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1267 	if (err < 0) {
1268 		pr_info("%s: can't add protocol\n", __func__);
1269 		goto add_proto_failed;
1270 	}
1271 
1272 	err = rtnl_link_register(&ipgre_link_ops);
1273 	if (err < 0)
1274 		goto rtnl_link_failed;
1275 
1276 	err = rtnl_link_register(&ipgre_tap_ops);
1277 	if (err < 0)
1278 		goto tap_ops_failed;
1279 
1280 	return 0;
1281 
1282 tap_ops_failed:
1283 	rtnl_link_unregister(&ipgre_link_ops);
1284 rtnl_link_failed:
1285 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1286 add_proto_failed:
1287 	unregister_pernet_device(&ipgre_tap_net_ops);
1288 pnet_tap_faied:
1289 	unregister_pernet_device(&ipgre_net_ops);
1290 	return err;
1291 }
1292 
1293 static void __exit ipgre_fini(void)
1294 {
1295 	rtnl_link_unregister(&ipgre_tap_ops);
1296 	rtnl_link_unregister(&ipgre_link_ops);
1297 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1298 	unregister_pernet_device(&ipgre_tap_net_ops);
1299 	unregister_pernet_device(&ipgre_net_ops);
1300 }
1301 
1302 module_init(ipgre_init);
1303 module_exit(ipgre_fini);
1304 MODULE_LICENSE("GPL");
1305 MODULE_ALIAS_RTNL_LINK("gre");
1306 MODULE_ALIAS_RTNL_LINK("gretap");
1307 MODULE_ALIAS_NETDEV("gre0");
1308 MODULE_ALIAS_NETDEV("gretap0");
1309