xref: /linux/net/ipv4/ip_gre.c (revision 1a9239bb4253f9076b5b4b2a1a4e8d7defd77a95)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Linux NET3:	GRE over IP protocol decoder.
4  *
5  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6  */
7 
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9 
10 #include <linux/capability.h>
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/uaccess.h>
16 #include <linux/skbuff.h>
17 #include <linux/netdevice.h>
18 #include <linux/in.h>
19 #include <linux/tcp.h>
20 #include <linux/udp.h>
21 #include <linux/if_arp.h>
22 #include <linux/if_vlan.h>
23 #include <linux/init.h>
24 #include <linux/in6.h>
25 #include <linux/inetdevice.h>
26 #include <linux/igmp.h>
27 #include <linux/netfilter_ipv4.h>
28 #include <linux/etherdevice.h>
29 #include <linux/if_ether.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/gre.h>
45 #include <net/dst_metadata.h>
46 #include <net/erspan.h>
47 #include <net/inet_dscp.h>
48 
49 /*
50    Problems & solutions
51    --------------------
52 
53    1. The most important issue is detecting local dead loops.
54    They would cause complete host lockup in transmit, which
55    would be "resolved" by stack overflow or, if queueing is enabled,
56    with infinite looping in net_bh.
57 
58    We cannot track such dead loops during route installation,
59    it is infeasible task. The most general solutions would be
60    to keep skb->encapsulation counter (sort of local ttl),
61    and silently drop packet when it expires. It is a good
62    solution, but it supposes maintaining new variable in ALL
63    skb, even if no tunneling is used.
64 
65    Current solution: xmit_recursion breaks dead loops. This is a percpu
66    counter, since when we enter the first ndo_xmit(), cpu migration is
67    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
68 
69    2. Networking dead loops would not kill routers, but would really
70    kill network. IP hop limit plays role of "t->recursion" in this case,
71    if we copy it from packet being encapsulated to upper header.
72    It is very good solution, but it introduces two problems:
73 
74    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
75      do not work over tunnels.
76    - traceroute does not work. I planned to relay ICMP from tunnel,
77      so that this problem would be solved and traceroute output
78      would even more informative. This idea appeared to be wrong:
79      only Linux complies to rfc1812 now (yes, guys, Linux is the only
80      true router now :-)), all routers (at least, in neighbourhood of mine)
81      return only 8 bytes of payload. It is the end.
82 
83    Hence, if we want that OSPF worked or traceroute said something reasonable,
84    we should search for another solution.
85 
86    One of them is to parse packet trying to detect inner encapsulation
87    made by our node. It is difficult or even impossible, especially,
88    taking into account fragmentation. TO be short, ttl is not solution at all.
89 
90    Current solution: The solution was UNEXPECTEDLY SIMPLE.
91    We force DF flag on tunnels with preconfigured hop limit,
92    that is ALL. :-) Well, it does not remove the problem completely,
93    but exponential growth of network traffic is changed to linear
94    (branches, that exceed pmtu are pruned) and tunnel mtu
95    rapidly degrades to value <68, where looping stops.
96    Yes, it is not good if there exists a router in the loop,
97    which does not force DF, even when encapsulating packets have DF set.
98    But it is not our problem! Nobody could accuse us, we made
99    all that we could make. Even if it is your gated who injected
100    fatal route to network, even if it were you who configured
101    fatal static route: you are innocent. :-)
102 
103    Alexey Kuznetsov.
104  */
105 
106 static bool log_ecn_error = true;
107 module_param(log_ecn_error, bool, 0644);
108 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
109 
110 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
111 static const struct header_ops ipgre_header_ops;
112 
113 static int ipgre_tunnel_init(struct net_device *dev);
114 static void erspan_build_header(struct sk_buff *skb,
115 				u32 id, u32 index,
116 				bool truncate, bool is_ipv4);
117 
118 static unsigned int ipgre_net_id __read_mostly;
119 static unsigned int gre_tap_net_id __read_mostly;
120 static unsigned int erspan_net_id __read_mostly;
121 
ipgre_err(struct sk_buff * skb,u32 info,const struct tnl_ptk_info * tpi)122 static int ipgre_err(struct sk_buff *skb, u32 info,
123 		     const struct tnl_ptk_info *tpi)
124 {
125 
126 	/* All the routers (except for Linux) return only
127 	   8 bytes of packet payload. It means, that precise relaying of
128 	   ICMP in the real Internet is absolutely infeasible.
129 
130 	   Moreover, Cisco "wise men" put GRE key to the third word
131 	   in GRE header. It makes impossible maintaining even soft
132 	   state for keyed GRE tunnels with enabled checksum. Tell
133 	   them "thank you".
134 
135 	   Well, I wonder, rfc1812 was written by Cisco employee,
136 	   what the hell these idiots break standards established
137 	   by themselves???
138 	   */
139 	struct net *net = dev_net(skb->dev);
140 	struct ip_tunnel_net *itn;
141 	const struct iphdr *iph;
142 	const int type = icmp_hdr(skb)->type;
143 	const int code = icmp_hdr(skb)->code;
144 	struct ip_tunnel *t;
145 
146 	if (tpi->proto == htons(ETH_P_TEB))
147 		itn = net_generic(net, gre_tap_net_id);
148 	else if (tpi->proto == htons(ETH_P_ERSPAN) ||
149 		 tpi->proto == htons(ETH_P_ERSPAN2))
150 		itn = net_generic(net, erspan_net_id);
151 	else
152 		itn = net_generic(net, ipgre_net_id);
153 
154 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
155 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
156 			     iph->daddr, iph->saddr, tpi->key);
157 
158 	if (!t)
159 		return -ENOENT;
160 
161 	switch (type) {
162 	default:
163 	case ICMP_PARAMETERPROB:
164 		return 0;
165 
166 	case ICMP_DEST_UNREACH:
167 		switch (code) {
168 		case ICMP_SR_FAILED:
169 		case ICMP_PORT_UNREACH:
170 			/* Impossible event. */
171 			return 0;
172 		default:
173 			/* All others are translated to HOST_UNREACH.
174 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
175 			   I believe they are just ether pollution. --ANK
176 			 */
177 			break;
178 		}
179 		break;
180 
181 	case ICMP_TIME_EXCEEDED:
182 		if (code != ICMP_EXC_TTL)
183 			return 0;
184 		break;
185 
186 	case ICMP_REDIRECT:
187 		break;
188 	}
189 
190 #if IS_ENABLED(CONFIG_IPV6)
191 	if (tpi->proto == htons(ETH_P_IPV6)) {
192 		unsigned int data_len = 0;
193 
194 		if (type == ICMP_TIME_EXCEEDED)
195 			data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
196 
197 		if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
198 						type, data_len))
199 			return 0;
200 	}
201 #endif
202 
203 	if (t->parms.iph.daddr == 0 ||
204 	    ipv4_is_multicast(t->parms.iph.daddr))
205 		return 0;
206 
207 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
208 		return 0;
209 
210 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
211 		t->err_count++;
212 	else
213 		t->err_count = 1;
214 	t->err_time = jiffies;
215 
216 	return 0;
217 }
218 
gre_err(struct sk_buff * skb,u32 info)219 static void gre_err(struct sk_buff *skb, u32 info)
220 {
221 	/* All the routers (except for Linux) return only
222 	 * 8 bytes of packet payload. It means, that precise relaying of
223 	 * ICMP in the real Internet is absolutely infeasible.
224 	 *
225 	 * Moreover, Cisco "wise men" put GRE key to the third word
226 	 * in GRE header. It makes impossible maintaining even soft
227 	 * state for keyed
228 	 * GRE tunnels with enabled checksum. Tell them "thank you".
229 	 *
230 	 * Well, I wonder, rfc1812 was written by Cisco employee,
231 	 * what the hell these idiots break standards established
232 	 * by themselves???
233 	 */
234 
235 	const struct iphdr *iph = (struct iphdr *)skb->data;
236 	const int type = icmp_hdr(skb)->type;
237 	const int code = icmp_hdr(skb)->code;
238 	struct tnl_ptk_info tpi;
239 
240 	if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
241 			     iph->ihl * 4) < 0)
242 		return;
243 
244 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
245 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
246 				 skb->dev->ifindex, IPPROTO_GRE);
247 		return;
248 	}
249 	if (type == ICMP_REDIRECT) {
250 		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
251 			      IPPROTO_GRE);
252 		return;
253 	}
254 
255 	ipgre_err(skb, info, &tpi);
256 }
257 
is_erspan_type1(int gre_hdr_len)258 static bool is_erspan_type1(int gre_hdr_len)
259 {
260 	/* Both ERSPAN type I (version 0) and type II (version 1) use
261 	 * protocol 0x88BE, but the type I has only 4-byte GRE header,
262 	 * while type II has 8-byte.
263 	 */
264 	return gre_hdr_len == 4;
265 }
266 
erspan_rcv(struct sk_buff * skb,struct tnl_ptk_info * tpi,int gre_hdr_len)267 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
268 		      int gre_hdr_len)
269 {
270 	struct net *net = dev_net(skb->dev);
271 	struct metadata_dst *tun_dst = NULL;
272 	struct erspan_base_hdr *ershdr;
273 	IP_TUNNEL_DECLARE_FLAGS(flags);
274 	struct ip_tunnel_net *itn;
275 	struct ip_tunnel *tunnel;
276 	const struct iphdr *iph;
277 	struct erspan_md2 *md2;
278 	int ver;
279 	int len;
280 
281 	ip_tunnel_flags_copy(flags, tpi->flags);
282 
283 	itn = net_generic(net, erspan_net_id);
284 	iph = ip_hdr(skb);
285 	if (is_erspan_type1(gre_hdr_len)) {
286 		ver = 0;
287 		__set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
288 		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
289 					  iph->saddr, iph->daddr, 0);
290 	} else {
291 		if (unlikely(!pskb_may_pull(skb,
292 					    gre_hdr_len + sizeof(*ershdr))))
293 			return PACKET_REJECT;
294 
295 		ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
296 		ver = ershdr->ver;
297 		iph = ip_hdr(skb);
298 		__set_bit(IP_TUNNEL_KEY_BIT, flags);
299 		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
300 					  iph->saddr, iph->daddr, tpi->key);
301 	}
302 
303 	if (tunnel) {
304 		if (is_erspan_type1(gre_hdr_len))
305 			len = gre_hdr_len;
306 		else
307 			len = gre_hdr_len + erspan_hdr_len(ver);
308 
309 		if (unlikely(!pskb_may_pull(skb, len)))
310 			return PACKET_REJECT;
311 
312 		if (__iptunnel_pull_header(skb,
313 					   len,
314 					   htons(ETH_P_TEB),
315 					   false, false) < 0)
316 			goto drop;
317 
318 		if (tunnel->collect_md) {
319 			struct erspan_metadata *pkt_md, *md;
320 			struct ip_tunnel_info *info;
321 			unsigned char *gh;
322 			__be64 tun_id;
323 
324 			__set_bit(IP_TUNNEL_KEY_BIT, tpi->flags);
325 			ip_tunnel_flags_copy(flags, tpi->flags);
326 			tun_id = key32_to_tunnel_id(tpi->key);
327 
328 			tun_dst = ip_tun_rx_dst(skb, flags,
329 						tun_id, sizeof(*md));
330 			if (!tun_dst)
331 				return PACKET_REJECT;
332 
333 			/* skb can be uncloned in __iptunnel_pull_header, so
334 			 * old pkt_md is no longer valid and we need to reset
335 			 * it
336 			 */
337 			gh = skb_network_header(skb) +
338 			     skb_network_header_len(skb);
339 			pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
340 							    sizeof(*ershdr));
341 			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
342 			md->version = ver;
343 			md2 = &md->u.md2;
344 			memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
345 						       ERSPAN_V2_MDSIZE);
346 
347 			info = &tun_dst->u.tun_info;
348 			__set_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
349 				  info->key.tun_flags);
350 			info->options_len = sizeof(*md);
351 		}
352 
353 		skb_reset_mac_header(skb);
354 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
355 		return PACKET_RCVD;
356 	}
357 	return PACKET_REJECT;
358 
359 drop:
360 	kfree_skb(skb);
361 	return PACKET_RCVD;
362 }
363 
__ipgre_rcv(struct sk_buff * skb,const struct tnl_ptk_info * tpi,struct ip_tunnel_net * itn,int hdr_len,bool raw_proto)364 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
365 		       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
366 {
367 	struct metadata_dst *tun_dst = NULL;
368 	const struct iphdr *iph;
369 	struct ip_tunnel *tunnel;
370 
371 	iph = ip_hdr(skb);
372 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
373 				  iph->saddr, iph->daddr, tpi->key);
374 
375 	if (tunnel) {
376 		const struct iphdr *tnl_params;
377 
378 		if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
379 					   raw_proto, false) < 0)
380 			goto drop;
381 
382 		/* Special case for ipgre_header_parse(), which expects the
383 		 * mac_header to point to the outer IP header.
384 		 */
385 		if (tunnel->dev->header_ops == &ipgre_header_ops)
386 			skb_pop_mac_header(skb);
387 		else
388 			skb_reset_mac_header(skb);
389 
390 		tnl_params = &tunnel->parms.iph;
391 		if (tunnel->collect_md || tnl_params->daddr == 0) {
392 			IP_TUNNEL_DECLARE_FLAGS(flags) = { };
393 			__be64 tun_id;
394 
395 			__set_bit(IP_TUNNEL_CSUM_BIT, flags);
396 			__set_bit(IP_TUNNEL_KEY_BIT, flags);
397 			ip_tunnel_flags_and(flags, tpi->flags, flags);
398 
399 			tun_id = key32_to_tunnel_id(tpi->key);
400 			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
401 			if (!tun_dst)
402 				return PACKET_REJECT;
403 		}
404 
405 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
406 		return PACKET_RCVD;
407 	}
408 	return PACKET_NEXT;
409 
410 drop:
411 	kfree_skb(skb);
412 	return PACKET_RCVD;
413 }
414 
ipgre_rcv(struct sk_buff * skb,const struct tnl_ptk_info * tpi,int hdr_len)415 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
416 		     int hdr_len)
417 {
418 	struct net *net = dev_net(skb->dev);
419 	struct ip_tunnel_net *itn;
420 	int res;
421 
422 	if (tpi->proto == htons(ETH_P_TEB))
423 		itn = net_generic(net, gre_tap_net_id);
424 	else
425 		itn = net_generic(net, ipgre_net_id);
426 
427 	res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
428 	if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
429 		/* ipgre tunnels in collect metadata mode should receive
430 		 * also ETH_P_TEB traffic.
431 		 */
432 		itn = net_generic(net, ipgre_net_id);
433 		res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
434 	}
435 	return res;
436 }
437 
gre_rcv(struct sk_buff * skb)438 static int gre_rcv(struct sk_buff *skb)
439 {
440 	struct tnl_ptk_info tpi;
441 	bool csum_err = false;
442 	int hdr_len;
443 
444 #ifdef CONFIG_NET_IPGRE_BROADCAST
445 	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
446 		/* Looped back packet, drop it! */
447 		if (rt_is_output_route(skb_rtable(skb)))
448 			goto drop;
449 	}
450 #endif
451 
452 	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
453 	if (hdr_len < 0)
454 		goto drop;
455 
456 	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
457 		     tpi.proto == htons(ETH_P_ERSPAN2))) {
458 		if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
459 			return 0;
460 		goto out;
461 	}
462 
463 	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
464 		return 0;
465 
466 out:
467 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
468 drop:
469 	kfree_skb(skb);
470 	return 0;
471 }
472 
__gre_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,__be16 proto)473 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
474 		       const struct iphdr *tnl_params,
475 		       __be16 proto)
476 {
477 	struct ip_tunnel *tunnel = netdev_priv(dev);
478 	IP_TUNNEL_DECLARE_FLAGS(flags);
479 
480 	ip_tunnel_flags_copy(flags, tunnel->parms.o_flags);
481 
482 	/* Push GRE header. */
483 	gre_build_header(skb, tunnel->tun_hlen,
484 			 flags, proto, tunnel->parms.o_key,
485 			 test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
486 			 htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
487 
488 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
489 }
490 
gre_handle_offloads(struct sk_buff * skb,bool csum)491 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
492 {
493 	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
494 }
495 
gre_fb_xmit(struct sk_buff * skb,struct net_device * dev,__be16 proto)496 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
497 			__be16 proto)
498 {
499 	struct ip_tunnel *tunnel = netdev_priv(dev);
500 	IP_TUNNEL_DECLARE_FLAGS(flags) = { };
501 	struct ip_tunnel_info *tun_info;
502 	const struct ip_tunnel_key *key;
503 	int tunnel_hlen;
504 
505 	tun_info = skb_tunnel_info(skb);
506 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
507 		     ip_tunnel_info_af(tun_info) != AF_INET))
508 		goto err_free_skb;
509 
510 	key = &tun_info->key;
511 	tunnel_hlen = gre_calc_hlen(key->tun_flags);
512 
513 	if (skb_cow_head(skb, dev->needed_headroom))
514 		goto err_free_skb;
515 
516 	/* Push Tunnel header. */
517 	if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
518 					      tunnel->parms.o_flags)))
519 		goto err_free_skb;
520 
521 	__set_bit(IP_TUNNEL_CSUM_BIT, flags);
522 	__set_bit(IP_TUNNEL_KEY_BIT, flags);
523 	__set_bit(IP_TUNNEL_SEQ_BIT, flags);
524 	ip_tunnel_flags_and(flags, tun_info->key.tun_flags, flags);
525 
526 	gre_build_header(skb, tunnel_hlen, flags, proto,
527 			 tunnel_id_to_key32(tun_info->key.tun_id),
528 			 test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
529 			 htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
530 
531 	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
532 
533 	return;
534 
535 err_free_skb:
536 	kfree_skb(skb);
537 	DEV_STATS_INC(dev, tx_dropped);
538 }
539 
erspan_fb_xmit(struct sk_buff * skb,struct net_device * dev)540 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
541 {
542 	struct ip_tunnel *tunnel = netdev_priv(dev);
543 	IP_TUNNEL_DECLARE_FLAGS(flags) = { };
544 	struct ip_tunnel_info *tun_info;
545 	const struct ip_tunnel_key *key;
546 	struct erspan_metadata *md;
547 	bool truncate = false;
548 	__be16 proto;
549 	int tunnel_hlen;
550 	int version;
551 	int nhoff;
552 
553 	tun_info = skb_tunnel_info(skb);
554 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
555 		     ip_tunnel_info_af(tun_info) != AF_INET))
556 		goto err_free_skb;
557 
558 	key = &tun_info->key;
559 	if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags))
560 		goto err_free_skb;
561 	if (tun_info->options_len < sizeof(*md))
562 		goto err_free_skb;
563 	md = ip_tunnel_info_opts(tun_info);
564 
565 	/* ERSPAN has fixed 8 byte GRE header */
566 	version = md->version;
567 	tunnel_hlen = 8 + erspan_hdr_len(version);
568 
569 	if (skb_cow_head(skb, dev->needed_headroom))
570 		goto err_free_skb;
571 
572 	if (gre_handle_offloads(skb, false))
573 		goto err_free_skb;
574 
575 	if (skb->len > dev->mtu + dev->hard_header_len) {
576 		if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
577 			goto err_free_skb;
578 		truncate = true;
579 	}
580 
581 	nhoff = skb_network_offset(skb);
582 	if (skb->protocol == htons(ETH_P_IP) &&
583 	    (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
584 		truncate = true;
585 
586 	if (skb->protocol == htons(ETH_P_IPV6)) {
587 		int thoff;
588 
589 		if (skb_transport_header_was_set(skb))
590 			thoff = skb_transport_offset(skb);
591 		else
592 			thoff = nhoff + sizeof(struct ipv6hdr);
593 		if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
594 			truncate = true;
595 	}
596 
597 	if (version == 1) {
598 		erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
599 				    ntohl(md->u.index), truncate, true);
600 		proto = htons(ETH_P_ERSPAN);
601 	} else if (version == 2) {
602 		erspan_build_header_v2(skb,
603 				       ntohl(tunnel_id_to_key32(key->tun_id)),
604 				       md->u.md2.dir,
605 				       get_hwid(&md->u.md2),
606 				       truncate, true);
607 		proto = htons(ETH_P_ERSPAN2);
608 	} else {
609 		goto err_free_skb;
610 	}
611 
612 	__set_bit(IP_TUNNEL_SEQ_BIT, flags);
613 	gre_build_header(skb, 8, flags, proto, 0,
614 			 htonl(atomic_fetch_inc(&tunnel->o_seqno)));
615 
616 	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
617 
618 	return;
619 
620 err_free_skb:
621 	kfree_skb(skb);
622 	DEV_STATS_INC(dev, tx_dropped);
623 }
624 
gre_fill_metadata_dst(struct net_device * dev,struct sk_buff * skb)625 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
626 {
627 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
628 	const struct ip_tunnel_key *key;
629 	struct rtable *rt;
630 	struct flowi4 fl4;
631 
632 	if (ip_tunnel_info_af(info) != AF_INET)
633 		return -EINVAL;
634 
635 	key = &info->key;
636 	ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
637 			    tunnel_id_to_key32(key->tun_id),
638 			    key->tos & ~INET_ECN_MASK, dev_net(dev), 0,
639 			    skb->mark, skb_get_hash(skb), key->flow_flags);
640 	rt = ip_route_output_key(dev_net(dev), &fl4);
641 	if (IS_ERR(rt))
642 		return PTR_ERR(rt);
643 
644 	ip_rt_put(rt);
645 	info->key.u.ipv4.src = fl4.saddr;
646 	return 0;
647 }
648 
ipgre_xmit(struct sk_buff * skb,struct net_device * dev)649 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
650 			      struct net_device *dev)
651 {
652 	struct ip_tunnel *tunnel = netdev_priv(dev);
653 	const struct iphdr *tnl_params;
654 
655 	if (!pskb_inet_may_pull(skb))
656 		goto free_skb;
657 
658 	if (tunnel->collect_md) {
659 		gre_fb_xmit(skb, dev, skb->protocol);
660 		return NETDEV_TX_OK;
661 	}
662 
663 	if (dev->header_ops) {
664 		int pull_len = tunnel->hlen + sizeof(struct iphdr);
665 
666 		if (skb_cow_head(skb, 0))
667 			goto free_skb;
668 
669 		if (!pskb_may_pull(skb, pull_len))
670 			goto free_skb;
671 
672 		tnl_params = (const struct iphdr *)skb->data;
673 
674 		/* ip_tunnel_xmit() needs skb->data pointing to gre header. */
675 		skb_pull(skb, pull_len);
676 		skb_reset_mac_header(skb);
677 
678 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
679 		    skb_checksum_start(skb) < skb->data)
680 			goto free_skb;
681 	} else {
682 		if (skb_cow_head(skb, dev->needed_headroom))
683 			goto free_skb;
684 
685 		tnl_params = &tunnel->parms.iph;
686 	}
687 
688 	if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
689 					      tunnel->parms.o_flags)))
690 		goto free_skb;
691 
692 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
693 	return NETDEV_TX_OK;
694 
695 free_skb:
696 	kfree_skb(skb);
697 	DEV_STATS_INC(dev, tx_dropped);
698 	return NETDEV_TX_OK;
699 }
700 
erspan_xmit(struct sk_buff * skb,struct net_device * dev)701 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
702 			       struct net_device *dev)
703 {
704 	struct ip_tunnel *tunnel = netdev_priv(dev);
705 	bool truncate = false;
706 	__be16 proto;
707 
708 	if (!pskb_inet_may_pull(skb))
709 		goto free_skb;
710 
711 	if (tunnel->collect_md) {
712 		erspan_fb_xmit(skb, dev);
713 		return NETDEV_TX_OK;
714 	}
715 
716 	if (gre_handle_offloads(skb, false))
717 		goto free_skb;
718 
719 	if (skb_cow_head(skb, dev->needed_headroom))
720 		goto free_skb;
721 
722 	if (skb->len > dev->mtu + dev->hard_header_len) {
723 		if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
724 			goto free_skb;
725 		truncate = true;
726 	}
727 
728 	/* Push ERSPAN header */
729 	if (tunnel->erspan_ver == 0) {
730 		proto = htons(ETH_P_ERSPAN);
731 		__clear_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags);
732 	} else if (tunnel->erspan_ver == 1) {
733 		erspan_build_header(skb, ntohl(tunnel->parms.o_key),
734 				    tunnel->index,
735 				    truncate, true);
736 		proto = htons(ETH_P_ERSPAN);
737 	} else if (tunnel->erspan_ver == 2) {
738 		erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
739 				       tunnel->dir, tunnel->hwid,
740 				       truncate, true);
741 		proto = htons(ETH_P_ERSPAN2);
742 	} else {
743 		goto free_skb;
744 	}
745 
746 	__clear_bit(IP_TUNNEL_KEY_BIT, tunnel->parms.o_flags);
747 	__gre_xmit(skb, dev, &tunnel->parms.iph, proto);
748 	return NETDEV_TX_OK;
749 
750 free_skb:
751 	kfree_skb(skb);
752 	DEV_STATS_INC(dev, tx_dropped);
753 	return NETDEV_TX_OK;
754 }
755 
gre_tap_xmit(struct sk_buff * skb,struct net_device * dev)756 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
757 				struct net_device *dev)
758 {
759 	struct ip_tunnel *tunnel = netdev_priv(dev);
760 
761 	if (!pskb_inet_may_pull(skb))
762 		goto free_skb;
763 
764 	if (tunnel->collect_md) {
765 		gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
766 		return NETDEV_TX_OK;
767 	}
768 
769 	if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
770 					      tunnel->parms.o_flags)))
771 		goto free_skb;
772 
773 	if (skb_cow_head(skb, dev->needed_headroom))
774 		goto free_skb;
775 
776 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
777 	return NETDEV_TX_OK;
778 
779 free_skb:
780 	kfree_skb(skb);
781 	DEV_STATS_INC(dev, tx_dropped);
782 	return NETDEV_TX_OK;
783 }
784 
ipgre_link_update(struct net_device * dev,bool set_mtu)785 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
786 {
787 	struct ip_tunnel *tunnel = netdev_priv(dev);
788 	int len;
789 
790 	len = tunnel->tun_hlen;
791 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
792 	len = tunnel->tun_hlen - len;
793 	tunnel->hlen = tunnel->hlen + len;
794 
795 	if (dev->header_ops)
796 		dev->hard_header_len += len;
797 	else
798 		dev->needed_headroom += len;
799 
800 	if (set_mtu)
801 		WRITE_ONCE(dev->mtu, max_t(int, dev->mtu - len, 68));
802 
803 	if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags) ||
804 	    (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
805 	     tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
806 		dev->features &= ~NETIF_F_GSO_SOFTWARE;
807 		dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
808 	} else {
809 		dev->features |= NETIF_F_GSO_SOFTWARE;
810 		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
811 	}
812 }
813 
ipgre_tunnel_ctl(struct net_device * dev,struct ip_tunnel_parm_kern * p,int cmd)814 static int ipgre_tunnel_ctl(struct net_device *dev,
815 			    struct ip_tunnel_parm_kern *p,
816 			    int cmd)
817 {
818 	__be16 i_flags, o_flags;
819 	int err;
820 
821 	if (!ip_tunnel_flags_is_be16_compat(p->i_flags) ||
822 	    !ip_tunnel_flags_is_be16_compat(p->o_flags))
823 		return -EOVERFLOW;
824 
825 	i_flags = ip_tunnel_flags_to_be16(p->i_flags);
826 	o_flags = ip_tunnel_flags_to_be16(p->o_flags);
827 
828 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
829 		if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
830 		    p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
831 		    ((i_flags | o_flags) & (GRE_VERSION | GRE_ROUTING)))
832 			return -EINVAL;
833 	}
834 
835 	gre_flags_to_tnl_flags(p->i_flags, i_flags);
836 	gre_flags_to_tnl_flags(p->o_flags, o_flags);
837 
838 	err = ip_tunnel_ctl(dev, p, cmd);
839 	if (err)
840 		return err;
841 
842 	if (cmd == SIOCCHGTUNNEL) {
843 		struct ip_tunnel *t = netdev_priv(dev);
844 
845 		ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags);
846 		ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags);
847 
848 		if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
849 			ipgre_link_update(dev, true);
850 	}
851 
852 	i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
853 	ip_tunnel_flags_from_be16(p->i_flags, i_flags);
854 	o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
855 	ip_tunnel_flags_from_be16(p->o_flags, o_flags);
856 
857 	return 0;
858 }
859 
860 /* Nice toy. Unfortunately, useless in real life :-)
861    It allows to construct virtual multiprotocol broadcast "LAN"
862    over the Internet, provided multicast routing is tuned.
863 
864 
865    I have no idea was this bicycle invented before me,
866    so that I had to set ARPHRD_IPGRE to a random value.
867    I have an impression, that Cisco could make something similar,
868    but this feature is apparently missing in IOS<=11.2(8).
869 
870    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
871    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
872 
873    ping -t 255 224.66.66.66
874 
875    If nobody answers, mbone does not work.
876 
877    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
878    ip addr add 10.66.66.<somewhat>/24 dev Universe
879    ifconfig Universe up
880    ifconfig Universe add fe80::<Your_real_addr>/10
881    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
882    ftp 10.66.66.66
883    ...
884    ftp fec0:6666:6666::193.233.7.65
885    ...
886  */
ipgre_header(struct sk_buff * skb,struct net_device * dev,unsigned short type,const void * daddr,const void * saddr,unsigned int len)887 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
888 			unsigned short type,
889 			const void *daddr, const void *saddr, unsigned int len)
890 {
891 	struct ip_tunnel *t = netdev_priv(dev);
892 	struct iphdr *iph;
893 	struct gre_base_hdr *greh;
894 
895 	iph = skb_push(skb, t->hlen + sizeof(*iph));
896 	greh = (struct gre_base_hdr *)(iph+1);
897 	greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
898 	greh->protocol = htons(type);
899 
900 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
901 
902 	/* Set the source hardware address. */
903 	if (saddr)
904 		memcpy(&iph->saddr, saddr, 4);
905 	if (daddr)
906 		memcpy(&iph->daddr, daddr, 4);
907 	if (iph->daddr)
908 		return t->hlen + sizeof(*iph);
909 
910 	return -(t->hlen + sizeof(*iph));
911 }
912 
ipgre_header_parse(const struct sk_buff * skb,unsigned char * haddr)913 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
914 {
915 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
916 	memcpy(haddr, &iph->saddr, 4);
917 	return 4;
918 }
919 
920 static const struct header_ops ipgre_header_ops = {
921 	.create	= ipgre_header,
922 	.parse	= ipgre_header_parse,
923 };
924 
925 #ifdef CONFIG_NET_IPGRE_BROADCAST
ipgre_open(struct net_device * dev)926 static int ipgre_open(struct net_device *dev)
927 {
928 	struct ip_tunnel *t = netdev_priv(dev);
929 
930 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
931 		struct flowi4 fl4 = {
932 			.flowi4_oif = t->parms.link,
933 			.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(&t->parms.iph)),
934 			.flowi4_scope = RT_SCOPE_UNIVERSE,
935 			.flowi4_proto = IPPROTO_GRE,
936 			.saddr = t->parms.iph.saddr,
937 			.daddr = t->parms.iph.daddr,
938 			.fl4_gre_key = t->parms.o_key,
939 		};
940 		struct rtable *rt;
941 
942 		rt = ip_route_output_key(t->net, &fl4);
943 		if (IS_ERR(rt))
944 			return -EADDRNOTAVAIL;
945 		dev = rt->dst.dev;
946 		ip_rt_put(rt);
947 		if (!__in_dev_get_rtnl(dev))
948 			return -EADDRNOTAVAIL;
949 		t->mlink = dev->ifindex;
950 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
951 	}
952 	return 0;
953 }
954 
ipgre_close(struct net_device * dev)955 static int ipgre_close(struct net_device *dev)
956 {
957 	struct ip_tunnel *t = netdev_priv(dev);
958 
959 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
960 		struct in_device *in_dev;
961 		in_dev = inetdev_by_index(t->net, t->mlink);
962 		if (in_dev)
963 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
964 	}
965 	return 0;
966 }
967 #endif
968 
969 static const struct net_device_ops ipgre_netdev_ops = {
970 	.ndo_init		= ipgre_tunnel_init,
971 	.ndo_uninit		= ip_tunnel_uninit,
972 #ifdef CONFIG_NET_IPGRE_BROADCAST
973 	.ndo_open		= ipgre_open,
974 	.ndo_stop		= ipgre_close,
975 #endif
976 	.ndo_start_xmit		= ipgre_xmit,
977 	.ndo_siocdevprivate	= ip_tunnel_siocdevprivate,
978 	.ndo_change_mtu		= ip_tunnel_change_mtu,
979 	.ndo_get_stats64	= dev_get_tstats64,
980 	.ndo_get_iflink		= ip_tunnel_get_iflink,
981 	.ndo_tunnel_ctl		= ipgre_tunnel_ctl,
982 };
983 
984 #define GRE_FEATURES (NETIF_F_SG |		\
985 		      NETIF_F_FRAGLIST |	\
986 		      NETIF_F_HIGHDMA |		\
987 		      NETIF_F_HW_CSUM)
988 
ipgre_tunnel_setup(struct net_device * dev)989 static void ipgre_tunnel_setup(struct net_device *dev)
990 {
991 	dev->netdev_ops		= &ipgre_netdev_ops;
992 	dev->type		= ARPHRD_IPGRE;
993 	ip_tunnel_setup(dev, ipgre_net_id);
994 }
995 
__gre_tunnel_init(struct net_device * dev)996 static void __gre_tunnel_init(struct net_device *dev)
997 {
998 	struct ip_tunnel *tunnel;
999 
1000 	tunnel = netdev_priv(dev);
1001 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
1002 	tunnel->parms.iph.protocol = IPPROTO_GRE;
1003 
1004 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
1005 	dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
1006 
1007 	dev->features		|= GRE_FEATURES;
1008 	dev->hw_features	|= GRE_FEATURES;
1009 
1010 	/* TCP offload with GRE SEQ is not supported, nor can we support 2
1011 	 * levels of outer headers requiring an update.
1012 	 */
1013 	if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags))
1014 		return;
1015 	if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
1016 	    tunnel->encap.type != TUNNEL_ENCAP_NONE)
1017 		return;
1018 
1019 	dev->features |= NETIF_F_GSO_SOFTWARE;
1020 	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1021 
1022 	dev->lltx = true;
1023 }
1024 
ipgre_tunnel_init(struct net_device * dev)1025 static int ipgre_tunnel_init(struct net_device *dev)
1026 {
1027 	struct ip_tunnel *tunnel = netdev_priv(dev);
1028 	struct iphdr *iph = &tunnel->parms.iph;
1029 
1030 	__gre_tunnel_init(dev);
1031 
1032 	__dev_addr_set(dev, &iph->saddr, 4);
1033 	memcpy(dev->broadcast, &iph->daddr, 4);
1034 
1035 	dev->flags		= IFF_NOARP;
1036 	netif_keep_dst(dev);
1037 	dev->addr_len		= 4;
1038 
1039 	if (iph->daddr && !tunnel->collect_md) {
1040 #ifdef CONFIG_NET_IPGRE_BROADCAST
1041 		if (ipv4_is_multicast(iph->daddr)) {
1042 			if (!iph->saddr)
1043 				return -EINVAL;
1044 			dev->flags = IFF_BROADCAST;
1045 			dev->header_ops = &ipgre_header_ops;
1046 			dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1047 			dev->needed_headroom = 0;
1048 		}
1049 #endif
1050 	} else if (!tunnel->collect_md) {
1051 		dev->header_ops = &ipgre_header_ops;
1052 		dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1053 		dev->needed_headroom = 0;
1054 	}
1055 
1056 	return ip_tunnel_init(dev);
1057 }
1058 
1059 static const struct gre_protocol ipgre_protocol = {
1060 	.handler     = gre_rcv,
1061 	.err_handler = gre_err,
1062 };
1063 
ipgre_init_net(struct net * net)1064 static int __net_init ipgre_init_net(struct net *net)
1065 {
1066 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1067 }
1068 
ipgre_exit_batch_rtnl(struct list_head * list_net,struct list_head * dev_to_kill)1069 static void __net_exit ipgre_exit_batch_rtnl(struct list_head *list_net,
1070 					     struct list_head *dev_to_kill)
1071 {
1072 	ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops,
1073 			      dev_to_kill);
1074 }
1075 
1076 static struct pernet_operations ipgre_net_ops = {
1077 	.init = ipgre_init_net,
1078 	.exit_batch_rtnl = ipgre_exit_batch_rtnl,
1079 	.id   = &ipgre_net_id,
1080 	.size = sizeof(struct ip_tunnel_net),
1081 };
1082 
ipgre_tunnel_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1083 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1084 				 struct netlink_ext_ack *extack)
1085 {
1086 	__be16 flags;
1087 
1088 	if (!data)
1089 		return 0;
1090 
1091 	flags = 0;
1092 	if (data[IFLA_GRE_IFLAGS])
1093 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1094 	if (data[IFLA_GRE_OFLAGS])
1095 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1096 	if (flags & (GRE_VERSION|GRE_ROUTING))
1097 		return -EINVAL;
1098 
1099 	if (data[IFLA_GRE_COLLECT_METADATA] &&
1100 	    data[IFLA_GRE_ENCAP_TYPE] &&
1101 	    nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1102 		return -EINVAL;
1103 
1104 	return 0;
1105 }
1106 
ipgre_tap_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1107 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1108 			      struct netlink_ext_ack *extack)
1109 {
1110 	__be32 daddr;
1111 
1112 	if (tb[IFLA_ADDRESS]) {
1113 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1114 			return -EINVAL;
1115 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1116 			return -EADDRNOTAVAIL;
1117 	}
1118 
1119 	if (!data)
1120 		goto out;
1121 
1122 	if (data[IFLA_GRE_REMOTE]) {
1123 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1124 		if (!daddr)
1125 			return -EINVAL;
1126 	}
1127 
1128 out:
1129 	return ipgre_tunnel_validate(tb, data, extack);
1130 }
1131 
erspan_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1132 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1133 			   struct netlink_ext_ack *extack)
1134 {
1135 	__be16 flags = 0;
1136 	int ret;
1137 
1138 	if (!data)
1139 		return 0;
1140 
1141 	ret = ipgre_tap_validate(tb, data, extack);
1142 	if (ret)
1143 		return ret;
1144 
1145 	if (data[IFLA_GRE_ERSPAN_VER] &&
1146 	    nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
1147 		return 0;
1148 
1149 	/* ERSPAN type II/III should only have GRE sequence and key flag */
1150 	if (data[IFLA_GRE_OFLAGS])
1151 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1152 	if (data[IFLA_GRE_IFLAGS])
1153 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1154 	if (!data[IFLA_GRE_COLLECT_METADATA] &&
1155 	    flags != (GRE_SEQ | GRE_KEY))
1156 		return -EINVAL;
1157 
1158 	/* ERSPAN Session ID only has 10-bit. Since we reuse
1159 	 * 32-bit key field as ID, check it's range.
1160 	 */
1161 	if (data[IFLA_GRE_IKEY] &&
1162 	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1163 		return -EINVAL;
1164 
1165 	if (data[IFLA_GRE_OKEY] &&
1166 	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1167 		return -EINVAL;
1168 
1169 	return 0;
1170 }
1171 
ipgre_netlink_parms(struct net_device * dev,struct nlattr * data[],struct nlattr * tb[],struct ip_tunnel_parm_kern * parms,__u32 * fwmark)1172 static int ipgre_netlink_parms(struct net_device *dev,
1173 				struct nlattr *data[],
1174 				struct nlattr *tb[],
1175 				struct ip_tunnel_parm_kern *parms,
1176 				__u32 *fwmark)
1177 {
1178 	struct ip_tunnel *t = netdev_priv(dev);
1179 
1180 	memset(parms, 0, sizeof(*parms));
1181 
1182 	parms->iph.protocol = IPPROTO_GRE;
1183 
1184 	if (!data)
1185 		return 0;
1186 
1187 	if (data[IFLA_GRE_LINK])
1188 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1189 
1190 	if (data[IFLA_GRE_IFLAGS])
1191 		gre_flags_to_tnl_flags(parms->i_flags,
1192 				       nla_get_be16(data[IFLA_GRE_IFLAGS]));
1193 
1194 	if (data[IFLA_GRE_OFLAGS])
1195 		gre_flags_to_tnl_flags(parms->o_flags,
1196 				       nla_get_be16(data[IFLA_GRE_OFLAGS]));
1197 
1198 	if (data[IFLA_GRE_IKEY])
1199 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1200 
1201 	if (data[IFLA_GRE_OKEY])
1202 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1203 
1204 	if (data[IFLA_GRE_LOCAL])
1205 		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1206 
1207 	if (data[IFLA_GRE_REMOTE])
1208 		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1209 
1210 	if (data[IFLA_GRE_TTL])
1211 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1212 
1213 	if (data[IFLA_GRE_TOS])
1214 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1215 
1216 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1217 		if (t->ignore_df)
1218 			return -EINVAL;
1219 		parms->iph.frag_off = htons(IP_DF);
1220 	}
1221 
1222 	if (data[IFLA_GRE_COLLECT_METADATA]) {
1223 		t->collect_md = true;
1224 		if (dev->type == ARPHRD_IPGRE)
1225 			dev->type = ARPHRD_NONE;
1226 	}
1227 
1228 	if (data[IFLA_GRE_IGNORE_DF]) {
1229 		if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1230 		  && (parms->iph.frag_off & htons(IP_DF)))
1231 			return -EINVAL;
1232 		t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1233 	}
1234 
1235 	if (data[IFLA_GRE_FWMARK])
1236 		*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1237 
1238 	return 0;
1239 }
1240 
erspan_netlink_parms(struct net_device * dev,struct nlattr * data[],struct nlattr * tb[],struct ip_tunnel_parm_kern * parms,__u32 * fwmark)1241 static int erspan_netlink_parms(struct net_device *dev,
1242 				struct nlattr *data[],
1243 				struct nlattr *tb[],
1244 				struct ip_tunnel_parm_kern *parms,
1245 				__u32 *fwmark)
1246 {
1247 	struct ip_tunnel *t = netdev_priv(dev);
1248 	int err;
1249 
1250 	err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
1251 	if (err)
1252 		return err;
1253 	if (!data)
1254 		return 0;
1255 
1256 	if (data[IFLA_GRE_ERSPAN_VER]) {
1257 		t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1258 
1259 		if (t->erspan_ver > 2)
1260 			return -EINVAL;
1261 	}
1262 
1263 	if (t->erspan_ver == 1) {
1264 		if (data[IFLA_GRE_ERSPAN_INDEX]) {
1265 			t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1266 			if (t->index & ~INDEX_MASK)
1267 				return -EINVAL;
1268 		}
1269 	} else if (t->erspan_ver == 2) {
1270 		if (data[IFLA_GRE_ERSPAN_DIR]) {
1271 			t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1272 			if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1273 				return -EINVAL;
1274 		}
1275 		if (data[IFLA_GRE_ERSPAN_HWID]) {
1276 			t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1277 			if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1278 				return -EINVAL;
1279 		}
1280 	}
1281 
1282 	return 0;
1283 }
1284 
1285 /* This function returns true when ENCAP attributes are present in the nl msg */
ipgre_netlink_encap_parms(struct nlattr * data[],struct ip_tunnel_encap * ipencap)1286 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1287 				      struct ip_tunnel_encap *ipencap)
1288 {
1289 	bool ret = false;
1290 
1291 	memset(ipencap, 0, sizeof(*ipencap));
1292 
1293 	if (!data)
1294 		return ret;
1295 
1296 	if (data[IFLA_GRE_ENCAP_TYPE]) {
1297 		ret = true;
1298 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1299 	}
1300 
1301 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1302 		ret = true;
1303 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1304 	}
1305 
1306 	if (data[IFLA_GRE_ENCAP_SPORT]) {
1307 		ret = true;
1308 		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1309 	}
1310 
1311 	if (data[IFLA_GRE_ENCAP_DPORT]) {
1312 		ret = true;
1313 		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1314 	}
1315 
1316 	return ret;
1317 }
1318 
gre_tap_init(struct net_device * dev)1319 static int gre_tap_init(struct net_device *dev)
1320 {
1321 	__gre_tunnel_init(dev);
1322 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1323 	netif_keep_dst(dev);
1324 
1325 	return ip_tunnel_init(dev);
1326 }
1327 
1328 static const struct net_device_ops gre_tap_netdev_ops = {
1329 	.ndo_init		= gre_tap_init,
1330 	.ndo_uninit		= ip_tunnel_uninit,
1331 	.ndo_start_xmit		= gre_tap_xmit,
1332 	.ndo_set_mac_address 	= eth_mac_addr,
1333 	.ndo_validate_addr	= eth_validate_addr,
1334 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1335 	.ndo_get_stats64	= dev_get_tstats64,
1336 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1337 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1338 };
1339 
erspan_tunnel_init(struct net_device * dev)1340 static int erspan_tunnel_init(struct net_device *dev)
1341 {
1342 	struct ip_tunnel *tunnel = netdev_priv(dev);
1343 
1344 	if (tunnel->erspan_ver == 0)
1345 		tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */
1346 	else
1347 		tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */
1348 
1349 	tunnel->parms.iph.protocol = IPPROTO_GRE;
1350 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1351 		       erspan_hdr_len(tunnel->erspan_ver);
1352 
1353 	dev->features		|= GRE_FEATURES;
1354 	dev->hw_features	|= GRE_FEATURES;
1355 	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
1356 	netif_keep_dst(dev);
1357 
1358 	return ip_tunnel_init(dev);
1359 }
1360 
1361 static const struct net_device_ops erspan_netdev_ops = {
1362 	.ndo_init		= erspan_tunnel_init,
1363 	.ndo_uninit		= ip_tunnel_uninit,
1364 	.ndo_start_xmit		= erspan_xmit,
1365 	.ndo_set_mac_address	= eth_mac_addr,
1366 	.ndo_validate_addr	= eth_validate_addr,
1367 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1368 	.ndo_get_stats64	= dev_get_tstats64,
1369 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1370 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1371 };
1372 
ipgre_tap_setup(struct net_device * dev)1373 static void ipgre_tap_setup(struct net_device *dev)
1374 {
1375 	ether_setup(dev);
1376 	dev->max_mtu = 0;
1377 	dev->netdev_ops	= &gre_tap_netdev_ops;
1378 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1379 	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
1380 	ip_tunnel_setup(dev, gre_tap_net_id);
1381 }
1382 
1383 static int
ipgre_newlink_encap_setup(struct net_device * dev,struct nlattr * data[])1384 ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
1385 {
1386 	struct ip_tunnel_encap ipencap;
1387 
1388 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1389 		struct ip_tunnel *t = netdev_priv(dev);
1390 		int err = ip_tunnel_encap_setup(t, &ipencap);
1391 
1392 		if (err < 0)
1393 			return err;
1394 	}
1395 
1396 	return 0;
1397 }
1398 
ipgre_newlink(struct net_device * dev,struct rtnl_newlink_params * params,struct netlink_ext_ack * extack)1399 static int ipgre_newlink(struct net_device *dev,
1400 			 struct rtnl_newlink_params *params,
1401 			 struct netlink_ext_ack *extack)
1402 {
1403 	struct nlattr **data = params->data;
1404 	struct nlattr **tb = params->tb;
1405 	struct ip_tunnel_parm_kern p;
1406 	__u32 fwmark = 0;
1407 	int err;
1408 
1409 	err = ipgre_newlink_encap_setup(dev, data);
1410 	if (err)
1411 		return err;
1412 
1413 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1414 	if (err < 0)
1415 		return err;
1416 	return ip_tunnel_newlink(params->link_net ? : dev_net(dev), dev, tb, &p,
1417 				 fwmark);
1418 }
1419 
erspan_newlink(struct net_device * dev,struct rtnl_newlink_params * params,struct netlink_ext_ack * extack)1420 static int erspan_newlink(struct net_device *dev,
1421 			  struct rtnl_newlink_params *params,
1422 			  struct netlink_ext_ack *extack)
1423 {
1424 	struct nlattr **data = params->data;
1425 	struct nlattr **tb = params->tb;
1426 	struct ip_tunnel_parm_kern p;
1427 	__u32 fwmark = 0;
1428 	int err;
1429 
1430 	err = ipgre_newlink_encap_setup(dev, data);
1431 	if (err)
1432 		return err;
1433 
1434 	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1435 	if (err)
1436 		return err;
1437 	return ip_tunnel_newlink(params->link_net ? : dev_net(dev), dev, tb, &p,
1438 				 fwmark);
1439 }
1440 
ipgre_changelink(struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1441 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1442 			    struct nlattr *data[],
1443 			    struct netlink_ext_ack *extack)
1444 {
1445 	struct ip_tunnel *t = netdev_priv(dev);
1446 	struct ip_tunnel_parm_kern p;
1447 	__u32 fwmark = t->fwmark;
1448 	int err;
1449 
1450 	err = ipgre_newlink_encap_setup(dev, data);
1451 	if (err)
1452 		return err;
1453 
1454 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1455 	if (err < 0)
1456 		return err;
1457 
1458 	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1459 	if (err < 0)
1460 		return err;
1461 
1462 	ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
1463 	ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
1464 
1465 	ipgre_link_update(dev, !tb[IFLA_MTU]);
1466 
1467 	return 0;
1468 }
1469 
erspan_changelink(struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1470 static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
1471 			     struct nlattr *data[],
1472 			     struct netlink_ext_ack *extack)
1473 {
1474 	struct ip_tunnel *t = netdev_priv(dev);
1475 	struct ip_tunnel_parm_kern p;
1476 	__u32 fwmark = t->fwmark;
1477 	int err;
1478 
1479 	err = ipgre_newlink_encap_setup(dev, data);
1480 	if (err)
1481 		return err;
1482 
1483 	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1484 	if (err < 0)
1485 		return err;
1486 
1487 	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1488 	if (err < 0)
1489 		return err;
1490 
1491 	ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
1492 	ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
1493 
1494 	return 0;
1495 }
1496 
ipgre_get_size(const struct net_device * dev)1497 static size_t ipgre_get_size(const struct net_device *dev)
1498 {
1499 	return
1500 		/* IFLA_GRE_LINK */
1501 		nla_total_size(4) +
1502 		/* IFLA_GRE_IFLAGS */
1503 		nla_total_size(2) +
1504 		/* IFLA_GRE_OFLAGS */
1505 		nla_total_size(2) +
1506 		/* IFLA_GRE_IKEY */
1507 		nla_total_size(4) +
1508 		/* IFLA_GRE_OKEY */
1509 		nla_total_size(4) +
1510 		/* IFLA_GRE_LOCAL */
1511 		nla_total_size(4) +
1512 		/* IFLA_GRE_REMOTE */
1513 		nla_total_size(4) +
1514 		/* IFLA_GRE_TTL */
1515 		nla_total_size(1) +
1516 		/* IFLA_GRE_TOS */
1517 		nla_total_size(1) +
1518 		/* IFLA_GRE_PMTUDISC */
1519 		nla_total_size(1) +
1520 		/* IFLA_GRE_ENCAP_TYPE */
1521 		nla_total_size(2) +
1522 		/* IFLA_GRE_ENCAP_FLAGS */
1523 		nla_total_size(2) +
1524 		/* IFLA_GRE_ENCAP_SPORT */
1525 		nla_total_size(2) +
1526 		/* IFLA_GRE_ENCAP_DPORT */
1527 		nla_total_size(2) +
1528 		/* IFLA_GRE_COLLECT_METADATA */
1529 		nla_total_size(0) +
1530 		/* IFLA_GRE_IGNORE_DF */
1531 		nla_total_size(1) +
1532 		/* IFLA_GRE_FWMARK */
1533 		nla_total_size(4) +
1534 		/* IFLA_GRE_ERSPAN_INDEX */
1535 		nla_total_size(4) +
1536 		/* IFLA_GRE_ERSPAN_VER */
1537 		nla_total_size(1) +
1538 		/* IFLA_GRE_ERSPAN_DIR */
1539 		nla_total_size(1) +
1540 		/* IFLA_GRE_ERSPAN_HWID */
1541 		nla_total_size(2) +
1542 		0;
1543 }
1544 
ipgre_fill_info(struct sk_buff * skb,const struct net_device * dev)1545 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1546 {
1547 	struct ip_tunnel *t = netdev_priv(dev);
1548 	struct ip_tunnel_parm_kern *p = &t->parms;
1549 	IP_TUNNEL_DECLARE_FLAGS(o_flags);
1550 
1551 	ip_tunnel_flags_copy(o_flags, p->o_flags);
1552 
1553 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1554 	    nla_put_be16(skb, IFLA_GRE_IFLAGS,
1555 			 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1556 	    nla_put_be16(skb, IFLA_GRE_OFLAGS,
1557 			 gre_tnl_flags_to_gre_flags(o_flags)) ||
1558 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1559 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1560 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1561 	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1562 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1563 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1564 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1565 		       !!(p->iph.frag_off & htons(IP_DF))) ||
1566 	    nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1567 		goto nla_put_failure;
1568 
1569 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1570 			t->encap.type) ||
1571 	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1572 			 t->encap.sport) ||
1573 	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1574 			 t->encap.dport) ||
1575 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1576 			t->encap.flags))
1577 		goto nla_put_failure;
1578 
1579 	if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1580 		goto nla_put_failure;
1581 
1582 	if (t->collect_md) {
1583 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1584 			goto nla_put_failure;
1585 	}
1586 
1587 	return 0;
1588 
1589 nla_put_failure:
1590 	return -EMSGSIZE;
1591 }
1592 
erspan_fill_info(struct sk_buff * skb,const struct net_device * dev)1593 static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
1594 {
1595 	struct ip_tunnel *t = netdev_priv(dev);
1596 
1597 	if (t->erspan_ver <= 2) {
1598 		if (t->erspan_ver != 0 && !t->collect_md)
1599 			__set_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags);
1600 
1601 		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1602 			goto nla_put_failure;
1603 
1604 		if (t->erspan_ver == 1) {
1605 			if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1606 				goto nla_put_failure;
1607 		} else if (t->erspan_ver == 2) {
1608 			if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1609 				goto nla_put_failure;
1610 			if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1611 				goto nla_put_failure;
1612 		}
1613 	}
1614 
1615 	return ipgre_fill_info(skb, dev);
1616 
1617 nla_put_failure:
1618 	return -EMSGSIZE;
1619 }
1620 
erspan_setup(struct net_device * dev)1621 static void erspan_setup(struct net_device *dev)
1622 {
1623 	struct ip_tunnel *t = netdev_priv(dev);
1624 
1625 	ether_setup(dev);
1626 	dev->max_mtu = 0;
1627 	dev->netdev_ops = &erspan_netdev_ops;
1628 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1629 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1630 	ip_tunnel_setup(dev, erspan_net_id);
1631 	t->erspan_ver = 1;
1632 }
1633 
1634 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1635 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1636 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1637 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1638 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1639 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1640 	[IFLA_GRE_LOCAL]	= { .len = sizeof_field(struct iphdr, saddr) },
1641 	[IFLA_GRE_REMOTE]	= { .len = sizeof_field(struct iphdr, daddr) },
1642 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1643 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1644 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1645 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1646 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1647 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1648 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1649 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1650 	[IFLA_GRE_IGNORE_DF]	= { .type = NLA_U8 },
1651 	[IFLA_GRE_FWMARK]	= { .type = NLA_U32 },
1652 	[IFLA_GRE_ERSPAN_INDEX]	= { .type = NLA_U32 },
1653 	[IFLA_GRE_ERSPAN_VER]	= { .type = NLA_U8 },
1654 	[IFLA_GRE_ERSPAN_DIR]	= { .type = NLA_U8 },
1655 	[IFLA_GRE_ERSPAN_HWID]	= { .type = NLA_U16 },
1656 };
1657 
1658 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1659 	.kind		= "gre",
1660 	.maxtype	= IFLA_GRE_MAX,
1661 	.policy		= ipgre_policy,
1662 	.priv_size	= sizeof(struct ip_tunnel),
1663 	.setup		= ipgre_tunnel_setup,
1664 	.validate	= ipgre_tunnel_validate,
1665 	.newlink	= ipgre_newlink,
1666 	.changelink	= ipgre_changelink,
1667 	.dellink	= ip_tunnel_dellink,
1668 	.get_size	= ipgre_get_size,
1669 	.fill_info	= ipgre_fill_info,
1670 	.get_link_net	= ip_tunnel_get_link_net,
1671 };
1672 
1673 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1674 	.kind		= "gretap",
1675 	.maxtype	= IFLA_GRE_MAX,
1676 	.policy		= ipgre_policy,
1677 	.priv_size	= sizeof(struct ip_tunnel),
1678 	.setup		= ipgre_tap_setup,
1679 	.validate	= ipgre_tap_validate,
1680 	.newlink	= ipgre_newlink,
1681 	.changelink	= ipgre_changelink,
1682 	.dellink	= ip_tunnel_dellink,
1683 	.get_size	= ipgre_get_size,
1684 	.fill_info	= ipgre_fill_info,
1685 	.get_link_net	= ip_tunnel_get_link_net,
1686 };
1687 
1688 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1689 	.kind		= "erspan",
1690 	.maxtype	= IFLA_GRE_MAX,
1691 	.policy		= ipgre_policy,
1692 	.priv_size	= sizeof(struct ip_tunnel),
1693 	.setup		= erspan_setup,
1694 	.validate	= erspan_validate,
1695 	.newlink	= erspan_newlink,
1696 	.changelink	= erspan_changelink,
1697 	.dellink	= ip_tunnel_dellink,
1698 	.get_size	= ipgre_get_size,
1699 	.fill_info	= erspan_fill_info,
1700 	.get_link_net	= ip_tunnel_get_link_net,
1701 };
1702 
gretap_fb_dev_create(struct net * net,const char * name,u8 name_assign_type)1703 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1704 					u8 name_assign_type)
1705 {
1706 	struct rtnl_newlink_params params = { .src_net = net };
1707 	struct nlattr *tb[IFLA_MAX + 1];
1708 	struct net_device *dev;
1709 	LIST_HEAD(list_kill);
1710 	struct ip_tunnel *t;
1711 	int err;
1712 
1713 	memset(&tb, 0, sizeof(tb));
1714 	params.tb = tb;
1715 
1716 	dev = rtnl_create_link(net, name, name_assign_type,
1717 			       &ipgre_tap_ops, tb, NULL);
1718 	if (IS_ERR(dev))
1719 		return dev;
1720 
1721 	/* Configure flow based GRE device. */
1722 	t = netdev_priv(dev);
1723 	t->collect_md = true;
1724 
1725 	err = ipgre_newlink(dev, &params, NULL);
1726 	if (err < 0) {
1727 		free_netdev(dev);
1728 		return ERR_PTR(err);
1729 	}
1730 
1731 	/* openvswitch users expect packet sizes to be unrestricted,
1732 	 * so set the largest MTU we can.
1733 	 */
1734 	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1735 	if (err)
1736 		goto out;
1737 
1738 	err = rtnl_configure_link(dev, NULL, 0, NULL);
1739 	if (err < 0)
1740 		goto out;
1741 
1742 	return dev;
1743 out:
1744 	ip_tunnel_dellink(dev, &list_kill);
1745 	unregister_netdevice_many(&list_kill);
1746 	return ERR_PTR(err);
1747 }
1748 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1749 
ipgre_tap_init_net(struct net * net)1750 static int __net_init ipgre_tap_init_net(struct net *net)
1751 {
1752 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1753 }
1754 
ipgre_tap_exit_batch_rtnl(struct list_head * list_net,struct list_head * dev_to_kill)1755 static void __net_exit ipgre_tap_exit_batch_rtnl(struct list_head *list_net,
1756 						 struct list_head *dev_to_kill)
1757 {
1758 	ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops,
1759 			      dev_to_kill);
1760 }
1761 
1762 static struct pernet_operations ipgre_tap_net_ops = {
1763 	.init = ipgre_tap_init_net,
1764 	.exit_batch_rtnl = ipgre_tap_exit_batch_rtnl,
1765 	.id   = &gre_tap_net_id,
1766 	.size = sizeof(struct ip_tunnel_net),
1767 };
1768 
erspan_init_net(struct net * net)1769 static int __net_init erspan_init_net(struct net *net)
1770 {
1771 	return ip_tunnel_init_net(net, erspan_net_id,
1772 				  &erspan_link_ops, "erspan0");
1773 }
1774 
erspan_exit_batch_rtnl(struct list_head * net_list,struct list_head * dev_to_kill)1775 static void __net_exit erspan_exit_batch_rtnl(struct list_head *net_list,
1776 					      struct list_head *dev_to_kill)
1777 {
1778 	ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops,
1779 			      dev_to_kill);
1780 }
1781 
1782 static struct pernet_operations erspan_net_ops = {
1783 	.init = erspan_init_net,
1784 	.exit_batch_rtnl = erspan_exit_batch_rtnl,
1785 	.id   = &erspan_net_id,
1786 	.size = sizeof(struct ip_tunnel_net),
1787 };
1788 
ipgre_init(void)1789 static int __init ipgre_init(void)
1790 {
1791 	int err;
1792 
1793 	pr_info("GRE over IPv4 tunneling driver\n");
1794 
1795 	err = register_pernet_device(&ipgre_net_ops);
1796 	if (err < 0)
1797 		return err;
1798 
1799 	err = register_pernet_device(&ipgre_tap_net_ops);
1800 	if (err < 0)
1801 		goto pnet_tap_failed;
1802 
1803 	err = register_pernet_device(&erspan_net_ops);
1804 	if (err < 0)
1805 		goto pnet_erspan_failed;
1806 
1807 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1808 	if (err < 0) {
1809 		pr_info("%s: can't add protocol\n", __func__);
1810 		goto add_proto_failed;
1811 	}
1812 
1813 	err = rtnl_link_register(&ipgre_link_ops);
1814 	if (err < 0)
1815 		goto rtnl_link_failed;
1816 
1817 	err = rtnl_link_register(&ipgre_tap_ops);
1818 	if (err < 0)
1819 		goto tap_ops_failed;
1820 
1821 	err = rtnl_link_register(&erspan_link_ops);
1822 	if (err < 0)
1823 		goto erspan_link_failed;
1824 
1825 	return 0;
1826 
1827 erspan_link_failed:
1828 	rtnl_link_unregister(&ipgre_tap_ops);
1829 tap_ops_failed:
1830 	rtnl_link_unregister(&ipgre_link_ops);
1831 rtnl_link_failed:
1832 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1833 add_proto_failed:
1834 	unregister_pernet_device(&erspan_net_ops);
1835 pnet_erspan_failed:
1836 	unregister_pernet_device(&ipgre_tap_net_ops);
1837 pnet_tap_failed:
1838 	unregister_pernet_device(&ipgre_net_ops);
1839 	return err;
1840 }
1841 
ipgre_fini(void)1842 static void __exit ipgre_fini(void)
1843 {
1844 	rtnl_link_unregister(&ipgre_tap_ops);
1845 	rtnl_link_unregister(&ipgre_link_ops);
1846 	rtnl_link_unregister(&erspan_link_ops);
1847 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1848 	unregister_pernet_device(&ipgre_tap_net_ops);
1849 	unregister_pernet_device(&ipgre_net_ops);
1850 	unregister_pernet_device(&erspan_net_ops);
1851 }
1852 
1853 module_init(ipgre_init);
1854 module_exit(ipgre_fini);
1855 MODULE_DESCRIPTION("IPv4 GRE tunnels over IP library");
1856 MODULE_LICENSE("GPL");
1857 MODULE_ALIAS_RTNL_LINK("gre");
1858 MODULE_ALIAS_RTNL_LINK("gretap");
1859 MODULE_ALIAS_RTNL_LINK("erspan");
1860 MODULE_ALIAS_NETDEV("gre0");
1861 MODULE_ALIAS_NETDEV("gretap0");
1862 MODULE_ALIAS_NETDEV("erspan0");
1863