xref: /linux/net/ipv4/ip_gre.c (revision dbf8fe85a16a33d6b6bd01f2bc606fc017771465)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Linux NET3:	GRE over IP protocol decoder.
4  *
5  *	Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6  */
7 
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9 
10 #include <linux/capability.h>
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/uaccess.h>
16 #include <linux/skbuff.h>
17 #include <linux/netdevice.h>
18 #include <linux/in.h>
19 #include <linux/tcp.h>
20 #include <linux/udp.h>
21 #include <linux/if_arp.h>
22 #include <linux/if_vlan.h>
23 #include <linux/init.h>
24 #include <linux/in6.h>
25 #include <linux/inetdevice.h>
26 #include <linux/igmp.h>
27 #include <linux/netfilter_ipv4.h>
28 #include <linux/etherdevice.h>
29 #include <linux/if_ether.h>
30 
31 #include <net/flow.h>
32 #include <net/sock.h>
33 #include <net/ip.h>
34 #include <net/icmp.h>
35 #include <net/protocol.h>
36 #include <net/ip_tunnels.h>
37 #include <net/arp.h>
38 #include <net/checksum.h>
39 #include <net/dsfield.h>
40 #include <net/inet_ecn.h>
41 #include <net/xfrm.h>
42 #include <net/net_namespace.h>
43 #include <net/netns/generic.h>
44 #include <net/rtnetlink.h>
45 #include <net/gre.h>
46 #include <net/dst_metadata.h>
47 #include <net/erspan.h>
48 
49 /*
50    Problems & solutions
51    --------------------
52 
53    1. The most important issue is detecting local dead loops.
54    They would cause complete host lockup in transmit, which
55    would be "resolved" by stack overflow or, if queueing is enabled,
56    with infinite looping in net_bh.
57 
58    We cannot track such dead loops during route installation,
59    it is infeasible task. The most general solutions would be
60    to keep skb->encapsulation counter (sort of local ttl),
61    and silently drop packet when it expires. It is a good
62    solution, but it supposes maintaining new variable in ALL
63    skb, even if no tunneling is used.
64 
65    Current solution: xmit_recursion breaks dead loops. This is a percpu
66    counter, since when we enter the first ndo_xmit(), cpu migration is
67    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
68 
69    2. Networking dead loops would not kill routers, but would really
70    kill network. IP hop limit plays role of "t->recursion" in this case,
71    if we copy it from packet being encapsulated to upper header.
72    It is very good solution, but it introduces two problems:
73 
74    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
75      do not work over tunnels.
76    - traceroute does not work. I planned to relay ICMP from tunnel,
77      so that this problem would be solved and traceroute output
78      would even more informative. This idea appeared to be wrong:
79      only Linux complies to rfc1812 now (yes, guys, Linux is the only
80      true router now :-)), all routers (at least, in neighbourhood of mine)
81      return only 8 bytes of payload. It is the end.
82 
83    Hence, if we want that OSPF worked or traceroute said something reasonable,
84    we should search for another solution.
85 
86    One of them is to parse packet trying to detect inner encapsulation
87    made by our node. It is difficult or even impossible, especially,
88    taking into account fragmentation. TO be short, ttl is not solution at all.
89 
90    Current solution: The solution was UNEXPECTEDLY SIMPLE.
91    We force DF flag on tunnels with preconfigured hop limit,
92    that is ALL. :-) Well, it does not remove the problem completely,
93    but exponential growth of network traffic is changed to linear
94    (branches, that exceed pmtu are pruned) and tunnel mtu
95    rapidly degrades to value <68, where looping stops.
96    Yes, it is not good if there exists a router in the loop,
97    which does not force DF, even when encapsulating packets have DF set.
98    But it is not our problem! Nobody could accuse us, we made
99    all that we could make. Even if it is your gated who injected
100    fatal route to network, even if it were you who configured
101    fatal static route: you are innocent. :-)
102 
103    Alexey Kuznetsov.
104  */
105 
106 static bool log_ecn_error = true;
107 module_param(log_ecn_error, bool, 0644);
108 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
109 
110 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
111 static const struct header_ops ipgre_header_ops;
112 
113 static int ipgre_tunnel_init(struct net_device *dev);
114 static void erspan_build_header(struct sk_buff *skb,
115 				u32 id, u32 index,
116 				bool truncate, bool is_ipv4);
117 
118 static unsigned int ipgre_net_id __read_mostly;
119 static unsigned int gre_tap_net_id __read_mostly;
120 static unsigned int erspan_net_id __read_mostly;
121 
ipgre_err(struct sk_buff * skb,u32 info,const struct tnl_ptk_info * tpi)122 static int ipgre_err(struct sk_buff *skb, u32 info,
123 		     const struct tnl_ptk_info *tpi)
124 {
125 
126 	/* All the routers (except for Linux) return only
127 	   8 bytes of packet payload. It means, that precise relaying of
128 	   ICMP in the real Internet is absolutely infeasible.
129 
130 	   Moreover, Cisco "wise men" put GRE key to the third word
131 	   in GRE header. It makes impossible maintaining even soft
132 	   state for keyed GRE tunnels with enabled checksum. Tell
133 	   them "thank you".
134 
135 	   Well, I wonder, rfc1812 was written by Cisco employee,
136 	   what the hell these idiots break standards established
137 	   by themselves???
138 	   */
139 	struct net *net = dev_net(skb->dev);
140 	struct ip_tunnel_net *itn;
141 	const struct iphdr *iph;
142 	const int type = icmp_hdr(skb)->type;
143 	const int code = icmp_hdr(skb)->code;
144 	struct ip_tunnel *t;
145 
146 	if (tpi->proto == htons(ETH_P_TEB))
147 		itn = net_generic(net, gre_tap_net_id);
148 	else if (tpi->proto == htons(ETH_P_ERSPAN) ||
149 		 tpi->proto == htons(ETH_P_ERSPAN2))
150 		itn = net_generic(net, erspan_net_id);
151 	else
152 		itn = net_generic(net, ipgre_net_id);
153 
154 	iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
155 	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
156 			     iph->daddr, iph->saddr, tpi->key);
157 
158 	if (!t)
159 		return -ENOENT;
160 
161 	switch (type) {
162 	default:
163 	case ICMP_PARAMETERPROB:
164 		return 0;
165 
166 	case ICMP_DEST_UNREACH:
167 		switch (code) {
168 		case ICMP_SR_FAILED:
169 		case ICMP_PORT_UNREACH:
170 			/* Impossible event. */
171 			return 0;
172 		default:
173 			/* All others are translated to HOST_UNREACH.
174 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
175 			   I believe they are just ether pollution. --ANK
176 			 */
177 			break;
178 		}
179 		break;
180 
181 	case ICMP_TIME_EXCEEDED:
182 		if (code != ICMP_EXC_TTL)
183 			return 0;
184 		break;
185 
186 	case ICMP_REDIRECT:
187 		break;
188 	}
189 
190 #if IS_ENABLED(CONFIG_IPV6)
191 	if (tpi->proto == htons(ETH_P_IPV6)) {
192 		unsigned int data_len = 0;
193 
194 		if (type == ICMP_TIME_EXCEEDED)
195 			data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
196 
197 		if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
198 						type, data_len))
199 			return 0;
200 	}
201 #endif
202 
203 	if (t->parms.iph.daddr == 0 ||
204 	    ipv4_is_multicast(t->parms.iph.daddr))
205 		return 0;
206 
207 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
208 		return 0;
209 
210 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
211 		t->err_count++;
212 	else
213 		t->err_count = 1;
214 	t->err_time = jiffies;
215 
216 	return 0;
217 }
218 
gre_err(struct sk_buff * skb,u32 info)219 static void gre_err(struct sk_buff *skb, u32 info)
220 {
221 	/* All the routers (except for Linux) return only
222 	 * 8 bytes of packet payload. It means, that precise relaying of
223 	 * ICMP in the real Internet is absolutely infeasible.
224 	 *
225 	 * Moreover, Cisco "wise men" put GRE key to the third word
226 	 * in GRE header. It makes impossible maintaining even soft
227 	 * state for keyed
228 	 * GRE tunnels with enabled checksum. Tell them "thank you".
229 	 *
230 	 * Well, I wonder, rfc1812 was written by Cisco employee,
231 	 * what the hell these idiots break standards established
232 	 * by themselves???
233 	 */
234 
235 	const struct iphdr *iph = (struct iphdr *)skb->data;
236 	const int type = icmp_hdr(skb)->type;
237 	const int code = icmp_hdr(skb)->code;
238 	struct tnl_ptk_info tpi;
239 
240 	if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
241 			     iph->ihl * 4) < 0)
242 		return;
243 
244 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
245 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
246 				 skb->dev->ifindex, IPPROTO_GRE);
247 		return;
248 	}
249 	if (type == ICMP_REDIRECT) {
250 		ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
251 			      IPPROTO_GRE);
252 		return;
253 	}
254 
255 	ipgre_err(skb, info, &tpi);
256 }
257 
is_erspan_type1(int gre_hdr_len)258 static bool is_erspan_type1(int gre_hdr_len)
259 {
260 	/* Both ERSPAN type I (version 0) and type II (version 1) use
261 	 * protocol 0x88BE, but the type I has only 4-byte GRE header,
262 	 * while type II has 8-byte.
263 	 */
264 	return gre_hdr_len == 4;
265 }
266 
erspan_rcv(struct sk_buff * skb,struct tnl_ptk_info * tpi,int gre_hdr_len)267 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
268 		      int gre_hdr_len)
269 {
270 	struct net *net = dev_net(skb->dev);
271 	struct metadata_dst *tun_dst = NULL;
272 	struct erspan_base_hdr *ershdr;
273 	IP_TUNNEL_DECLARE_FLAGS(flags);
274 	struct ip_tunnel_net *itn;
275 	struct ip_tunnel *tunnel;
276 	const struct iphdr *iph;
277 	struct erspan_md2 *md2;
278 	int ver;
279 	int len;
280 
281 	ip_tunnel_flags_copy(flags, tpi->flags);
282 
283 	itn = net_generic(net, erspan_net_id);
284 	iph = ip_hdr(skb);
285 	if (is_erspan_type1(gre_hdr_len)) {
286 		ver = 0;
287 		__set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
288 		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
289 					  iph->saddr, iph->daddr, 0);
290 	} else {
291 		if (unlikely(!pskb_may_pull(skb,
292 					    gre_hdr_len + sizeof(*ershdr))))
293 			return PACKET_REJECT;
294 
295 		ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
296 		ver = ershdr->ver;
297 		iph = ip_hdr(skb);
298 		__set_bit(IP_TUNNEL_KEY_BIT, flags);
299 		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
300 					  iph->saddr, iph->daddr, tpi->key);
301 	}
302 
303 	if (tunnel) {
304 		if (is_erspan_type1(gre_hdr_len))
305 			len = gre_hdr_len;
306 		else
307 			len = gre_hdr_len + erspan_hdr_len(ver);
308 
309 		if (unlikely(!pskb_may_pull(skb, len)))
310 			return PACKET_REJECT;
311 
312 		if (__iptunnel_pull_header(skb,
313 					   len,
314 					   htons(ETH_P_TEB),
315 					   false, false) < 0)
316 			goto drop;
317 
318 		if (tunnel->collect_md) {
319 			struct erspan_metadata *pkt_md, *md;
320 			struct ip_tunnel_info *info;
321 			unsigned char *gh;
322 			__be64 tun_id;
323 
324 			__set_bit(IP_TUNNEL_KEY_BIT, tpi->flags);
325 			ip_tunnel_flags_copy(flags, tpi->flags);
326 			tun_id = key32_to_tunnel_id(tpi->key);
327 
328 			tun_dst = ip_tun_rx_dst(skb, flags,
329 						tun_id, sizeof(*md));
330 			if (!tun_dst)
331 				return PACKET_REJECT;
332 
333 			/* MUST set options_len before referencing options */
334 			info = &tun_dst->u.tun_info;
335 			info->options_len = sizeof(*md);
336 
337 			/* skb can be uncloned in __iptunnel_pull_header, so
338 			 * old pkt_md is no longer valid and we need to reset
339 			 * it
340 			 */
341 			gh = skb_network_header(skb) +
342 			     skb_network_header_len(skb);
343 			pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
344 							    sizeof(*ershdr));
345 			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
346 			md->version = ver;
347 			md2 = &md->u.md2;
348 			memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
349 						       ERSPAN_V2_MDSIZE);
350 
351 			__set_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
352 				  info->key.tun_flags);
353 		}
354 
355 		skb_reset_mac_header(skb);
356 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
357 		return PACKET_RCVD;
358 	}
359 	return PACKET_REJECT;
360 
361 drop:
362 	kfree_skb(skb);
363 	return PACKET_RCVD;
364 }
365 
__ipgre_rcv(struct sk_buff * skb,const struct tnl_ptk_info * tpi,struct ip_tunnel_net * itn,int hdr_len,bool raw_proto)366 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
367 		       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
368 {
369 	struct metadata_dst *tun_dst = NULL;
370 	const struct iphdr *iph;
371 	struct ip_tunnel *tunnel;
372 
373 	iph = ip_hdr(skb);
374 	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
375 				  iph->saddr, iph->daddr, tpi->key);
376 
377 	if (tunnel) {
378 		const struct iphdr *tnl_params;
379 
380 		if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
381 					   raw_proto, false) < 0)
382 			goto drop;
383 
384 		/* Special case for ipgre_header_parse(), which expects the
385 		 * mac_header to point to the outer IP header.
386 		 */
387 		if (tunnel->dev->header_ops == &ipgre_header_ops)
388 			skb_pop_mac_header(skb);
389 		else
390 			skb_reset_mac_header(skb);
391 
392 		tnl_params = &tunnel->parms.iph;
393 		if (tunnel->collect_md || tnl_params->daddr == 0) {
394 			IP_TUNNEL_DECLARE_FLAGS(flags) = { };
395 			__be64 tun_id;
396 
397 			__set_bit(IP_TUNNEL_CSUM_BIT, flags);
398 			__set_bit(IP_TUNNEL_KEY_BIT, flags);
399 			ip_tunnel_flags_and(flags, tpi->flags, flags);
400 
401 			tun_id = key32_to_tunnel_id(tpi->key);
402 			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
403 			if (!tun_dst)
404 				return PACKET_REJECT;
405 		}
406 
407 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
408 		return PACKET_RCVD;
409 	}
410 	return PACKET_NEXT;
411 
412 drop:
413 	kfree_skb(skb);
414 	return PACKET_RCVD;
415 }
416 
ipgre_rcv(struct sk_buff * skb,const struct tnl_ptk_info * tpi,int hdr_len)417 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
418 		     int hdr_len)
419 {
420 	struct net *net = dev_net(skb->dev);
421 	struct ip_tunnel_net *itn;
422 	int res;
423 
424 	if (tpi->proto == htons(ETH_P_TEB))
425 		itn = net_generic(net, gre_tap_net_id);
426 	else
427 		itn = net_generic(net, ipgre_net_id);
428 
429 	res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
430 	if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
431 		/* ipgre tunnels in collect metadata mode should receive
432 		 * also ETH_P_TEB traffic.
433 		 */
434 		itn = net_generic(net, ipgre_net_id);
435 		res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
436 	}
437 	return res;
438 }
439 
gre_rcv(struct sk_buff * skb)440 static int gre_rcv(struct sk_buff *skb)
441 {
442 	struct tnl_ptk_info tpi;
443 	bool csum_err = false;
444 	int hdr_len;
445 
446 #ifdef CONFIG_NET_IPGRE_BROADCAST
447 	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
448 		/* Looped back packet, drop it! */
449 		if (rt_is_output_route(skb_rtable(skb)))
450 			goto drop;
451 	}
452 #endif
453 
454 	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
455 	if (hdr_len < 0)
456 		goto drop;
457 
458 	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
459 		     tpi.proto == htons(ETH_P_ERSPAN2))) {
460 		if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
461 			return 0;
462 		goto out;
463 	}
464 
465 	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
466 		return 0;
467 
468 out:
469 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
470 drop:
471 	kfree_skb(skb);
472 	return 0;
473 }
474 
__gre_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,__be16 proto)475 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
476 		       const struct iphdr *tnl_params,
477 		       __be16 proto)
478 {
479 	struct ip_tunnel *tunnel = netdev_priv(dev);
480 	IP_TUNNEL_DECLARE_FLAGS(flags);
481 
482 	ip_tunnel_flags_copy(flags, tunnel->parms.o_flags);
483 
484 	/* Push GRE header. */
485 	gre_build_header(skb, tunnel->tun_hlen,
486 			 flags, proto, tunnel->parms.o_key,
487 			 test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
488 			 htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
489 
490 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
491 }
492 
gre_handle_offloads(struct sk_buff * skb,bool csum)493 static int gre_handle_offloads(struct sk_buff *skb, bool csum)
494 {
495 	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
496 }
497 
gre_fb_xmit(struct sk_buff * skb,struct net_device * dev,__be16 proto)498 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
499 			__be16 proto)
500 {
501 	struct ip_tunnel *tunnel = netdev_priv(dev);
502 	IP_TUNNEL_DECLARE_FLAGS(flags) = { };
503 	struct ip_tunnel_info *tun_info;
504 	const struct ip_tunnel_key *key;
505 	int tunnel_hlen;
506 
507 	tun_info = skb_tunnel_info(skb);
508 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
509 		     ip_tunnel_info_af(tun_info) != AF_INET))
510 		goto err_free_skb;
511 
512 	key = &tun_info->key;
513 	tunnel_hlen = gre_calc_hlen(key->tun_flags);
514 
515 	if (skb_cow_head(skb, dev->needed_headroom))
516 		goto err_free_skb;
517 
518 	/* Push Tunnel header. */
519 	if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
520 					      tunnel->parms.o_flags)))
521 		goto err_free_skb;
522 
523 	__set_bit(IP_TUNNEL_CSUM_BIT, flags);
524 	__set_bit(IP_TUNNEL_KEY_BIT, flags);
525 	__set_bit(IP_TUNNEL_SEQ_BIT, flags);
526 	ip_tunnel_flags_and(flags, tun_info->key.tun_flags, flags);
527 
528 	gre_build_header(skb, tunnel_hlen, flags, proto,
529 			 tunnel_id_to_key32(tun_info->key.tun_id),
530 			 test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
531 			 htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
532 
533 	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
534 
535 	return;
536 
537 err_free_skb:
538 	kfree_skb(skb);
539 	DEV_STATS_INC(dev, tx_dropped);
540 }
541 
erspan_fb_xmit(struct sk_buff * skb,struct net_device * dev)542 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
543 {
544 	struct ip_tunnel *tunnel = netdev_priv(dev);
545 	IP_TUNNEL_DECLARE_FLAGS(flags) = { };
546 	struct ip_tunnel_info *tun_info;
547 	const struct ip_tunnel_key *key;
548 	struct erspan_metadata *md;
549 	bool truncate = false;
550 	__be16 proto;
551 	int tunnel_hlen;
552 	int version;
553 	int nhoff;
554 
555 	tun_info = skb_tunnel_info(skb);
556 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
557 		     ip_tunnel_info_af(tun_info) != AF_INET))
558 		goto err_free_skb;
559 
560 	key = &tun_info->key;
561 	if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags))
562 		goto err_free_skb;
563 	if (tun_info->options_len < sizeof(*md))
564 		goto err_free_skb;
565 	md = ip_tunnel_info_opts(tun_info);
566 
567 	/* ERSPAN has fixed 8 byte GRE header */
568 	version = md->version;
569 	tunnel_hlen = 8 + erspan_hdr_len(version);
570 
571 	if (skb_cow_head(skb, dev->needed_headroom))
572 		goto err_free_skb;
573 
574 	if (gre_handle_offloads(skb, false))
575 		goto err_free_skb;
576 
577 	if (skb->len > dev->mtu + dev->hard_header_len) {
578 		if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
579 			goto err_free_skb;
580 		truncate = true;
581 	}
582 
583 	nhoff = skb_network_offset(skb);
584 	if (skb->protocol == htons(ETH_P_IP) &&
585 	    (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
586 		truncate = true;
587 
588 	if (skb->protocol == htons(ETH_P_IPV6)) {
589 		int thoff;
590 
591 		if (skb_transport_header_was_set(skb))
592 			thoff = skb_transport_offset(skb);
593 		else
594 			thoff = nhoff + sizeof(struct ipv6hdr);
595 		if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
596 			truncate = true;
597 	}
598 
599 	if (version == 1) {
600 		erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
601 				    ntohl(md->u.index), truncate, true);
602 		proto = htons(ETH_P_ERSPAN);
603 	} else if (version == 2) {
604 		erspan_build_header_v2(skb,
605 				       ntohl(tunnel_id_to_key32(key->tun_id)),
606 				       md->u.md2.dir,
607 				       get_hwid(&md->u.md2),
608 				       truncate, true);
609 		proto = htons(ETH_P_ERSPAN2);
610 	} else {
611 		goto err_free_skb;
612 	}
613 
614 	__set_bit(IP_TUNNEL_SEQ_BIT, flags);
615 	gre_build_header(skb, 8, flags, proto, 0,
616 			 htonl(atomic_fetch_inc(&tunnel->o_seqno)));
617 
618 	ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
619 
620 	return;
621 
622 err_free_skb:
623 	kfree_skb(skb);
624 	DEV_STATS_INC(dev, tx_dropped);
625 }
626 
gre_fill_metadata_dst(struct net_device * dev,struct sk_buff * skb)627 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
628 {
629 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
630 	const struct ip_tunnel_key *key;
631 	struct rtable *rt;
632 	struct flowi4 fl4;
633 
634 	if (ip_tunnel_info_af(info) != AF_INET)
635 		return -EINVAL;
636 
637 	key = &info->key;
638 	ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
639 			    tunnel_id_to_key32(key->tun_id),
640 			    key->tos & ~INET_ECN_MASK, dev_net(dev), 0,
641 			    skb->mark, skb_get_hash(skb), key->flow_flags);
642 	rt = ip_route_output_key(dev_net(dev), &fl4);
643 	if (IS_ERR(rt))
644 		return PTR_ERR(rt);
645 
646 	ip_rt_put(rt);
647 	info->key.u.ipv4.src = fl4.saddr;
648 	return 0;
649 }
650 
ipgre_xmit(struct sk_buff * skb,struct net_device * dev)651 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
652 			      struct net_device *dev)
653 {
654 	struct ip_tunnel *tunnel = netdev_priv(dev);
655 	const struct iphdr *tnl_params;
656 
657 	if (!pskb_inet_may_pull(skb))
658 		goto free_skb;
659 
660 	if (tunnel->collect_md) {
661 		gre_fb_xmit(skb, dev, skb->protocol);
662 		return NETDEV_TX_OK;
663 	}
664 
665 	if (dev->header_ops) {
666 		int pull_len = tunnel->hlen + sizeof(struct iphdr);
667 
668 		if (skb_cow_head(skb, 0))
669 			goto free_skb;
670 
671 		if (!pskb_may_pull(skb, pull_len))
672 			goto free_skb;
673 
674 		tnl_params = (const struct iphdr *)skb->data;
675 
676 		/* ip_tunnel_xmit() needs skb->data pointing to gre header. */
677 		skb_pull(skb, pull_len);
678 		skb_reset_mac_header(skb);
679 
680 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
681 		    skb_checksum_start(skb) < skb->data)
682 			goto free_skb;
683 	} else {
684 		if (skb_cow_head(skb, dev->needed_headroom))
685 			goto free_skb;
686 
687 		tnl_params = &tunnel->parms.iph;
688 	}
689 
690 	if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
691 					      tunnel->parms.o_flags)))
692 		goto free_skb;
693 
694 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
695 	return NETDEV_TX_OK;
696 
697 free_skb:
698 	kfree_skb(skb);
699 	DEV_STATS_INC(dev, tx_dropped);
700 	return NETDEV_TX_OK;
701 }
702 
erspan_xmit(struct sk_buff * skb,struct net_device * dev)703 static netdev_tx_t erspan_xmit(struct sk_buff *skb,
704 			       struct net_device *dev)
705 {
706 	struct ip_tunnel *tunnel = netdev_priv(dev);
707 	bool truncate = false;
708 	__be16 proto;
709 
710 	if (!pskb_inet_may_pull(skb))
711 		goto free_skb;
712 
713 	if (tunnel->collect_md) {
714 		erspan_fb_xmit(skb, dev);
715 		return NETDEV_TX_OK;
716 	}
717 
718 	if (gre_handle_offloads(skb, false))
719 		goto free_skb;
720 
721 	if (skb_cow_head(skb, dev->needed_headroom))
722 		goto free_skb;
723 
724 	if (skb->len > dev->mtu + dev->hard_header_len) {
725 		if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
726 			goto free_skb;
727 		truncate = true;
728 	}
729 
730 	/* Push ERSPAN header */
731 	if (tunnel->erspan_ver == 0) {
732 		proto = htons(ETH_P_ERSPAN);
733 		__clear_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags);
734 	} else if (tunnel->erspan_ver == 1) {
735 		erspan_build_header(skb, ntohl(tunnel->parms.o_key),
736 				    tunnel->index,
737 				    truncate, true);
738 		proto = htons(ETH_P_ERSPAN);
739 	} else if (tunnel->erspan_ver == 2) {
740 		erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
741 				       tunnel->dir, tunnel->hwid,
742 				       truncate, true);
743 		proto = htons(ETH_P_ERSPAN2);
744 	} else {
745 		goto free_skb;
746 	}
747 
748 	__clear_bit(IP_TUNNEL_KEY_BIT, tunnel->parms.o_flags);
749 	__gre_xmit(skb, dev, &tunnel->parms.iph, proto);
750 	return NETDEV_TX_OK;
751 
752 free_skb:
753 	kfree_skb(skb);
754 	DEV_STATS_INC(dev, tx_dropped);
755 	return NETDEV_TX_OK;
756 }
757 
gre_tap_xmit(struct sk_buff * skb,struct net_device * dev)758 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
759 				struct net_device *dev)
760 {
761 	struct ip_tunnel *tunnel = netdev_priv(dev);
762 
763 	if (!pskb_inet_may_pull(skb))
764 		goto free_skb;
765 
766 	if (tunnel->collect_md) {
767 		gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
768 		return NETDEV_TX_OK;
769 	}
770 
771 	if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
772 					      tunnel->parms.o_flags)))
773 		goto free_skb;
774 
775 	if (skb_cow_head(skb, dev->needed_headroom))
776 		goto free_skb;
777 
778 	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
779 	return NETDEV_TX_OK;
780 
781 free_skb:
782 	kfree_skb(skb);
783 	DEV_STATS_INC(dev, tx_dropped);
784 	return NETDEV_TX_OK;
785 }
786 
ipgre_link_update(struct net_device * dev,bool set_mtu)787 static void ipgre_link_update(struct net_device *dev, bool set_mtu)
788 {
789 	struct ip_tunnel *tunnel = netdev_priv(dev);
790 	int len;
791 
792 	len = tunnel->tun_hlen;
793 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
794 	len = tunnel->tun_hlen - len;
795 	tunnel->hlen = tunnel->hlen + len;
796 
797 	if (dev->header_ops)
798 		dev->hard_header_len += len;
799 	else
800 		dev->needed_headroom += len;
801 
802 	if (set_mtu)
803 		WRITE_ONCE(dev->mtu, max_t(int, dev->mtu - len, 68));
804 
805 	if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags) ||
806 	    (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
807 	     tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
808 		dev->features &= ~NETIF_F_GSO_SOFTWARE;
809 		dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
810 	} else {
811 		dev->features |= NETIF_F_GSO_SOFTWARE;
812 		dev->hw_features |= NETIF_F_GSO_SOFTWARE;
813 	}
814 }
815 
ipgre_tunnel_ctl(struct net_device * dev,struct ip_tunnel_parm_kern * p,int cmd)816 static int ipgre_tunnel_ctl(struct net_device *dev,
817 			    struct ip_tunnel_parm_kern *p,
818 			    int cmd)
819 {
820 	__be16 i_flags, o_flags;
821 	int err;
822 
823 	if (!ip_tunnel_flags_is_be16_compat(p->i_flags) ||
824 	    !ip_tunnel_flags_is_be16_compat(p->o_flags))
825 		return -EOVERFLOW;
826 
827 	i_flags = ip_tunnel_flags_to_be16(p->i_flags);
828 	o_flags = ip_tunnel_flags_to_be16(p->o_flags);
829 
830 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
831 		if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
832 		    p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
833 		    ((i_flags | o_flags) & (GRE_VERSION | GRE_ROUTING)))
834 			return -EINVAL;
835 	}
836 
837 	gre_flags_to_tnl_flags(p->i_flags, i_flags);
838 	gre_flags_to_tnl_flags(p->o_flags, o_flags);
839 
840 	err = ip_tunnel_ctl(dev, p, cmd);
841 	if (err)
842 		return err;
843 
844 	if (cmd == SIOCCHGTUNNEL) {
845 		struct ip_tunnel *t = netdev_priv(dev);
846 
847 		ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags);
848 		ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags);
849 
850 		if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
851 			ipgre_link_update(dev, true);
852 	}
853 
854 	i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
855 	ip_tunnel_flags_from_be16(p->i_flags, i_flags);
856 	o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
857 	ip_tunnel_flags_from_be16(p->o_flags, o_flags);
858 
859 	return 0;
860 }
861 
862 /* Nice toy. Unfortunately, useless in real life :-)
863    It allows to construct virtual multiprotocol broadcast "LAN"
864    over the Internet, provided multicast routing is tuned.
865 
866 
867    I have no idea was this bicycle invented before me,
868    so that I had to set ARPHRD_IPGRE to a random value.
869    I have an impression, that Cisco could make something similar,
870    but this feature is apparently missing in IOS<=11.2(8).
871 
872    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
873    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
874 
875    ping -t 255 224.66.66.66
876 
877    If nobody answers, mbone does not work.
878 
879    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
880    ip addr add 10.66.66.<somewhat>/24 dev Universe
881    ifconfig Universe up
882    ifconfig Universe add fe80::<Your_real_addr>/10
883    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
884    ftp 10.66.66.66
885    ...
886    ftp fec0:6666:6666::193.233.7.65
887    ...
888  */
ipgre_header(struct sk_buff * skb,struct net_device * dev,unsigned short type,const void * daddr,const void * saddr,unsigned int len)889 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
890 			unsigned short type,
891 			const void *daddr, const void *saddr, unsigned int len)
892 {
893 	struct ip_tunnel *t = netdev_priv(dev);
894 	struct iphdr *iph;
895 	struct gre_base_hdr *greh;
896 
897 	iph = skb_push(skb, t->hlen + sizeof(*iph));
898 	greh = (struct gre_base_hdr *)(iph+1);
899 	greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
900 	greh->protocol = htons(type);
901 
902 	memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
903 
904 	/* Set the source hardware address. */
905 	if (saddr)
906 		memcpy(&iph->saddr, saddr, 4);
907 	if (daddr)
908 		memcpy(&iph->daddr, daddr, 4);
909 	if (iph->daddr)
910 		return t->hlen + sizeof(*iph);
911 
912 	return -(t->hlen + sizeof(*iph));
913 }
914 
ipgre_header_parse(const struct sk_buff * skb,unsigned char * haddr)915 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
916 {
917 	const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
918 	memcpy(haddr, &iph->saddr, 4);
919 	return 4;
920 }
921 
922 static const struct header_ops ipgre_header_ops = {
923 	.create	= ipgre_header,
924 	.parse	= ipgre_header_parse,
925 };
926 
927 #ifdef CONFIG_NET_IPGRE_BROADCAST
ipgre_open(struct net_device * dev)928 static int ipgre_open(struct net_device *dev)
929 {
930 	struct ip_tunnel *t = netdev_priv(dev);
931 
932 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
933 		struct flowi4 fl4 = {
934 			.flowi4_oif = t->parms.link,
935 			.flowi4_dscp = ip4h_dscp(&t->parms.iph),
936 			.flowi4_scope = RT_SCOPE_UNIVERSE,
937 			.flowi4_proto = IPPROTO_GRE,
938 			.saddr = t->parms.iph.saddr,
939 			.daddr = t->parms.iph.daddr,
940 			.fl4_gre_key = t->parms.o_key,
941 		};
942 		struct rtable *rt;
943 
944 		rt = ip_route_output_key(t->net, &fl4);
945 		if (IS_ERR(rt))
946 			return -EADDRNOTAVAIL;
947 		dev = rt->dst.dev;
948 		ip_rt_put(rt);
949 		if (!__in_dev_get_rtnl(dev))
950 			return -EADDRNOTAVAIL;
951 		t->mlink = dev->ifindex;
952 		ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
953 	}
954 	return 0;
955 }
956 
ipgre_close(struct net_device * dev)957 static int ipgre_close(struct net_device *dev)
958 {
959 	struct ip_tunnel *t = netdev_priv(dev);
960 
961 	if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
962 		struct in_device *in_dev;
963 		in_dev = inetdev_by_index(t->net, t->mlink);
964 		if (in_dev)
965 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
966 	}
967 	return 0;
968 }
969 #endif
970 
971 static const struct net_device_ops ipgre_netdev_ops = {
972 	.ndo_init		= ipgre_tunnel_init,
973 	.ndo_uninit		= ip_tunnel_uninit,
974 #ifdef CONFIG_NET_IPGRE_BROADCAST
975 	.ndo_open		= ipgre_open,
976 	.ndo_stop		= ipgre_close,
977 #endif
978 	.ndo_start_xmit		= ipgre_xmit,
979 	.ndo_siocdevprivate	= ip_tunnel_siocdevprivate,
980 	.ndo_change_mtu		= ip_tunnel_change_mtu,
981 	.ndo_get_stats64	= dev_get_tstats64,
982 	.ndo_get_iflink		= ip_tunnel_get_iflink,
983 	.ndo_tunnel_ctl		= ipgre_tunnel_ctl,
984 };
985 
986 #define GRE_FEATURES (NETIF_F_SG |		\
987 		      NETIF_F_FRAGLIST |	\
988 		      NETIF_F_HIGHDMA |		\
989 		      NETIF_F_HW_CSUM)
990 
ipgre_tunnel_setup(struct net_device * dev)991 static void ipgre_tunnel_setup(struct net_device *dev)
992 {
993 	dev->netdev_ops		= &ipgre_netdev_ops;
994 	dev->type		= ARPHRD_IPGRE;
995 	ip_tunnel_setup(dev, ipgre_net_id);
996 }
997 
__gre_tunnel_init(struct net_device * dev)998 static void __gre_tunnel_init(struct net_device *dev)
999 {
1000 	struct ip_tunnel *tunnel;
1001 
1002 	tunnel = netdev_priv(dev);
1003 	tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
1004 	tunnel->parms.iph.protocol = IPPROTO_GRE;
1005 
1006 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
1007 	dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
1008 
1009 	dev->features		|= GRE_FEATURES;
1010 	dev->hw_features	|= GRE_FEATURES;
1011 
1012 	/* TCP offload with GRE SEQ is not supported, nor can we support 2
1013 	 * levels of outer headers requiring an update.
1014 	 */
1015 	if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags))
1016 		return;
1017 	if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
1018 	    tunnel->encap.type != TUNNEL_ENCAP_NONE)
1019 		return;
1020 
1021 	dev->features |= NETIF_F_GSO_SOFTWARE;
1022 	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1023 
1024 	dev->lltx = true;
1025 }
1026 
ipgre_tunnel_init(struct net_device * dev)1027 static int ipgre_tunnel_init(struct net_device *dev)
1028 {
1029 	struct ip_tunnel *tunnel = netdev_priv(dev);
1030 	struct iphdr *iph = &tunnel->parms.iph;
1031 
1032 	__gre_tunnel_init(dev);
1033 
1034 	__dev_addr_set(dev, &iph->saddr, 4);
1035 	memcpy(dev->broadcast, &iph->daddr, 4);
1036 
1037 	dev->flags		= IFF_NOARP;
1038 	netif_keep_dst(dev);
1039 	dev->addr_len		= 4;
1040 
1041 	if (iph->daddr && !tunnel->collect_md) {
1042 #ifdef CONFIG_NET_IPGRE_BROADCAST
1043 		if (ipv4_is_multicast(iph->daddr)) {
1044 			if (!iph->saddr)
1045 				return -EINVAL;
1046 			dev->flags = IFF_BROADCAST;
1047 			dev->header_ops = &ipgre_header_ops;
1048 			dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1049 			dev->needed_headroom = 0;
1050 		}
1051 #endif
1052 	} else if (!tunnel->collect_md) {
1053 		dev->header_ops = &ipgre_header_ops;
1054 		dev->hard_header_len = tunnel->hlen + sizeof(*iph);
1055 		dev->needed_headroom = 0;
1056 	}
1057 
1058 	return ip_tunnel_init(dev);
1059 }
1060 
1061 static const struct gre_protocol ipgre_protocol = {
1062 	.handler     = gre_rcv,
1063 	.err_handler = gre_err,
1064 };
1065 
ipgre_init_net(struct net * net)1066 static int __net_init ipgre_init_net(struct net *net)
1067 {
1068 	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1069 }
1070 
ipgre_exit_rtnl(struct net * net,struct list_head * dev_to_kill)1071 static void __net_exit ipgre_exit_rtnl(struct net *net,
1072 				       struct list_head *dev_to_kill)
1073 {
1074 	ip_tunnel_delete_net(net, ipgre_net_id, &ipgre_link_ops, dev_to_kill);
1075 }
1076 
1077 static struct pernet_operations ipgre_net_ops = {
1078 	.init = ipgre_init_net,
1079 	.exit_rtnl = ipgre_exit_rtnl,
1080 	.id   = &ipgre_net_id,
1081 	.size = sizeof(struct ip_tunnel_net),
1082 };
1083 
ipgre_tunnel_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1084 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1085 				 struct netlink_ext_ack *extack)
1086 {
1087 	__be16 flags;
1088 
1089 	if (!data)
1090 		return 0;
1091 
1092 	flags = 0;
1093 	if (data[IFLA_GRE_IFLAGS])
1094 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1095 	if (data[IFLA_GRE_OFLAGS])
1096 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1097 	if (flags & (GRE_VERSION|GRE_ROUTING))
1098 		return -EINVAL;
1099 
1100 	if (data[IFLA_GRE_COLLECT_METADATA] &&
1101 	    data[IFLA_GRE_ENCAP_TYPE] &&
1102 	    nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1103 		return -EINVAL;
1104 
1105 	return 0;
1106 }
1107 
ipgre_tap_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1108 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1109 			      struct netlink_ext_ack *extack)
1110 {
1111 	__be32 daddr;
1112 
1113 	if (tb[IFLA_ADDRESS]) {
1114 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1115 			return -EINVAL;
1116 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1117 			return -EADDRNOTAVAIL;
1118 	}
1119 
1120 	if (!data)
1121 		goto out;
1122 
1123 	if (data[IFLA_GRE_REMOTE]) {
1124 		memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1125 		if (!daddr)
1126 			return -EINVAL;
1127 	}
1128 
1129 out:
1130 	return ipgre_tunnel_validate(tb, data, extack);
1131 }
1132 
erspan_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1133 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1134 			   struct netlink_ext_ack *extack)
1135 {
1136 	__be16 flags = 0;
1137 	int ret;
1138 
1139 	if (!data)
1140 		return 0;
1141 
1142 	ret = ipgre_tap_validate(tb, data, extack);
1143 	if (ret)
1144 		return ret;
1145 
1146 	if (data[IFLA_GRE_ERSPAN_VER] &&
1147 	    nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
1148 		return 0;
1149 
1150 	/* ERSPAN type II/III should only have GRE sequence and key flag */
1151 	if (data[IFLA_GRE_OFLAGS])
1152 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1153 	if (data[IFLA_GRE_IFLAGS])
1154 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1155 	if (!data[IFLA_GRE_COLLECT_METADATA] &&
1156 	    flags != (GRE_SEQ | GRE_KEY))
1157 		return -EINVAL;
1158 
1159 	/* ERSPAN Session ID only has 10-bit. Since we reuse
1160 	 * 32-bit key field as ID, check it's range.
1161 	 */
1162 	if (data[IFLA_GRE_IKEY] &&
1163 	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1164 		return -EINVAL;
1165 
1166 	if (data[IFLA_GRE_OKEY] &&
1167 	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1168 		return -EINVAL;
1169 
1170 	return 0;
1171 }
1172 
ipgre_netlink_parms(struct net_device * dev,struct nlattr * data[],struct nlattr * tb[],struct ip_tunnel_parm_kern * parms,__u32 * fwmark)1173 static int ipgre_netlink_parms(struct net_device *dev,
1174 				struct nlattr *data[],
1175 				struct nlattr *tb[],
1176 				struct ip_tunnel_parm_kern *parms,
1177 				__u32 *fwmark)
1178 {
1179 	struct ip_tunnel *t = netdev_priv(dev);
1180 
1181 	memset(parms, 0, sizeof(*parms));
1182 
1183 	parms->iph.protocol = IPPROTO_GRE;
1184 
1185 	if (!data)
1186 		return 0;
1187 
1188 	if (data[IFLA_GRE_LINK])
1189 		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1190 
1191 	if (data[IFLA_GRE_IFLAGS])
1192 		gre_flags_to_tnl_flags(parms->i_flags,
1193 				       nla_get_be16(data[IFLA_GRE_IFLAGS]));
1194 
1195 	if (data[IFLA_GRE_OFLAGS])
1196 		gre_flags_to_tnl_flags(parms->o_flags,
1197 				       nla_get_be16(data[IFLA_GRE_OFLAGS]));
1198 
1199 	if (data[IFLA_GRE_IKEY])
1200 		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1201 
1202 	if (data[IFLA_GRE_OKEY])
1203 		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1204 
1205 	if (data[IFLA_GRE_LOCAL])
1206 		parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1207 
1208 	if (data[IFLA_GRE_REMOTE])
1209 		parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1210 
1211 	if (data[IFLA_GRE_TTL])
1212 		parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1213 
1214 	if (data[IFLA_GRE_TOS])
1215 		parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1216 
1217 	if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1218 		if (t->ignore_df)
1219 			return -EINVAL;
1220 		parms->iph.frag_off = htons(IP_DF);
1221 	}
1222 
1223 	if (data[IFLA_GRE_COLLECT_METADATA]) {
1224 		t->collect_md = true;
1225 		if (dev->type == ARPHRD_IPGRE)
1226 			dev->type = ARPHRD_NONE;
1227 	}
1228 
1229 	if (data[IFLA_GRE_IGNORE_DF]) {
1230 		if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1231 		  && (parms->iph.frag_off & htons(IP_DF)))
1232 			return -EINVAL;
1233 		t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1234 	}
1235 
1236 	if (data[IFLA_GRE_FWMARK])
1237 		*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1238 
1239 	return 0;
1240 }
1241 
erspan_netlink_parms(struct net_device * dev,struct nlattr * data[],struct nlattr * tb[],struct ip_tunnel_parm_kern * parms,__u32 * fwmark)1242 static int erspan_netlink_parms(struct net_device *dev,
1243 				struct nlattr *data[],
1244 				struct nlattr *tb[],
1245 				struct ip_tunnel_parm_kern *parms,
1246 				__u32 *fwmark)
1247 {
1248 	struct ip_tunnel *t = netdev_priv(dev);
1249 	int err;
1250 
1251 	err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
1252 	if (err)
1253 		return err;
1254 	if (!data)
1255 		return 0;
1256 
1257 	if (data[IFLA_GRE_ERSPAN_VER]) {
1258 		t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1259 
1260 		if (t->erspan_ver > 2)
1261 			return -EINVAL;
1262 	}
1263 
1264 	if (t->erspan_ver == 1) {
1265 		if (data[IFLA_GRE_ERSPAN_INDEX]) {
1266 			t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1267 			if (t->index & ~INDEX_MASK)
1268 				return -EINVAL;
1269 		}
1270 	} else if (t->erspan_ver == 2) {
1271 		if (data[IFLA_GRE_ERSPAN_DIR]) {
1272 			t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1273 			if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1274 				return -EINVAL;
1275 		}
1276 		if (data[IFLA_GRE_ERSPAN_HWID]) {
1277 			t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1278 			if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1279 				return -EINVAL;
1280 		}
1281 	}
1282 
1283 	return 0;
1284 }
1285 
1286 /* This function returns true when ENCAP attributes are present in the nl msg */
ipgre_netlink_encap_parms(struct nlattr * data[],struct ip_tunnel_encap * ipencap)1287 static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1288 				      struct ip_tunnel_encap *ipencap)
1289 {
1290 	bool ret = false;
1291 
1292 	memset(ipencap, 0, sizeof(*ipencap));
1293 
1294 	if (!data)
1295 		return ret;
1296 
1297 	if (data[IFLA_GRE_ENCAP_TYPE]) {
1298 		ret = true;
1299 		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1300 	}
1301 
1302 	if (data[IFLA_GRE_ENCAP_FLAGS]) {
1303 		ret = true;
1304 		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1305 	}
1306 
1307 	if (data[IFLA_GRE_ENCAP_SPORT]) {
1308 		ret = true;
1309 		ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1310 	}
1311 
1312 	if (data[IFLA_GRE_ENCAP_DPORT]) {
1313 		ret = true;
1314 		ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1315 	}
1316 
1317 	return ret;
1318 }
1319 
gre_tap_init(struct net_device * dev)1320 static int gre_tap_init(struct net_device *dev)
1321 {
1322 	__gre_tunnel_init(dev);
1323 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1324 	netif_keep_dst(dev);
1325 
1326 	return ip_tunnel_init(dev);
1327 }
1328 
1329 static const struct net_device_ops gre_tap_netdev_ops = {
1330 	.ndo_init		= gre_tap_init,
1331 	.ndo_uninit		= ip_tunnel_uninit,
1332 	.ndo_start_xmit		= gre_tap_xmit,
1333 	.ndo_set_mac_address 	= eth_mac_addr,
1334 	.ndo_validate_addr	= eth_validate_addr,
1335 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1336 	.ndo_get_stats64	= dev_get_tstats64,
1337 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1338 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1339 };
1340 
erspan_tunnel_init(struct net_device * dev)1341 static int erspan_tunnel_init(struct net_device *dev)
1342 {
1343 	struct ip_tunnel *tunnel = netdev_priv(dev);
1344 
1345 	if (tunnel->erspan_ver == 0)
1346 		tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */
1347 	else
1348 		tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */
1349 
1350 	tunnel->parms.iph.protocol = IPPROTO_GRE;
1351 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1352 		       erspan_hdr_len(tunnel->erspan_ver);
1353 
1354 	dev->features		|= GRE_FEATURES;
1355 	dev->hw_features	|= GRE_FEATURES;
1356 	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
1357 	netif_keep_dst(dev);
1358 
1359 	return ip_tunnel_init(dev);
1360 }
1361 
1362 static const struct net_device_ops erspan_netdev_ops = {
1363 	.ndo_init		= erspan_tunnel_init,
1364 	.ndo_uninit		= ip_tunnel_uninit,
1365 	.ndo_start_xmit		= erspan_xmit,
1366 	.ndo_set_mac_address	= eth_mac_addr,
1367 	.ndo_validate_addr	= eth_validate_addr,
1368 	.ndo_change_mtu		= ip_tunnel_change_mtu,
1369 	.ndo_get_stats64	= dev_get_tstats64,
1370 	.ndo_get_iflink		= ip_tunnel_get_iflink,
1371 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
1372 };
1373 
ipgre_tap_setup(struct net_device * dev)1374 static void ipgre_tap_setup(struct net_device *dev)
1375 {
1376 	ether_setup(dev);
1377 	dev->max_mtu = 0;
1378 	dev->netdev_ops	= &gre_tap_netdev_ops;
1379 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1380 	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
1381 	ip_tunnel_setup(dev, gre_tap_net_id);
1382 }
1383 
1384 static int
ipgre_newlink_encap_setup(struct net_device * dev,struct nlattr * data[])1385 ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
1386 {
1387 	struct ip_tunnel_encap ipencap;
1388 
1389 	if (ipgre_netlink_encap_parms(data, &ipencap)) {
1390 		struct ip_tunnel *t = netdev_priv(dev);
1391 		int err = ip_tunnel_encap_setup(t, &ipencap);
1392 
1393 		if (err < 0)
1394 			return err;
1395 	}
1396 
1397 	return 0;
1398 }
1399 
ipgre_newlink(struct net_device * dev,struct rtnl_newlink_params * params,struct netlink_ext_ack * extack)1400 static int ipgre_newlink(struct net_device *dev,
1401 			 struct rtnl_newlink_params *params,
1402 			 struct netlink_ext_ack *extack)
1403 {
1404 	struct nlattr **data = params->data;
1405 	struct nlattr **tb = params->tb;
1406 	struct ip_tunnel_parm_kern p;
1407 	__u32 fwmark = 0;
1408 	int err;
1409 
1410 	err = ipgre_newlink_encap_setup(dev, data);
1411 	if (err)
1412 		return err;
1413 
1414 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1415 	if (err < 0)
1416 		return err;
1417 	return ip_tunnel_newlink(params->link_net ? : dev_net(dev), dev, tb, &p,
1418 				 fwmark);
1419 }
1420 
erspan_newlink(struct net_device * dev,struct rtnl_newlink_params * params,struct netlink_ext_ack * extack)1421 static int erspan_newlink(struct net_device *dev,
1422 			  struct rtnl_newlink_params *params,
1423 			  struct netlink_ext_ack *extack)
1424 {
1425 	struct nlattr **data = params->data;
1426 	struct nlattr **tb = params->tb;
1427 	struct ip_tunnel_parm_kern p;
1428 	__u32 fwmark = 0;
1429 	int err;
1430 
1431 	err = ipgre_newlink_encap_setup(dev, data);
1432 	if (err)
1433 		return err;
1434 
1435 	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1436 	if (err)
1437 		return err;
1438 	return ip_tunnel_newlink(params->link_net ? : dev_net(dev), dev, tb, &p,
1439 				 fwmark);
1440 }
1441 
ipgre_changelink(struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1442 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1443 			    struct nlattr *data[],
1444 			    struct netlink_ext_ack *extack)
1445 {
1446 	struct ip_tunnel *t = netdev_priv(dev);
1447 	struct ip_tunnel_parm_kern p;
1448 	__u32 fwmark = t->fwmark;
1449 	int err;
1450 
1451 	err = ipgre_newlink_encap_setup(dev, data);
1452 	if (err)
1453 		return err;
1454 
1455 	err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1456 	if (err < 0)
1457 		return err;
1458 
1459 	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1460 	if (err < 0)
1461 		return err;
1462 
1463 	ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
1464 	ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
1465 
1466 	ipgre_link_update(dev, !tb[IFLA_MTU]);
1467 
1468 	return 0;
1469 }
1470 
erspan_changelink(struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1471 static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
1472 			     struct nlattr *data[],
1473 			     struct netlink_ext_ack *extack)
1474 {
1475 	struct ip_tunnel *t = netdev_priv(dev);
1476 	struct ip_tunnel_parm_kern p;
1477 	__u32 fwmark = t->fwmark;
1478 	int err;
1479 
1480 	err = ipgre_newlink_encap_setup(dev, data);
1481 	if (err)
1482 		return err;
1483 
1484 	err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
1485 	if (err < 0)
1486 		return err;
1487 
1488 	err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1489 	if (err < 0)
1490 		return err;
1491 
1492 	ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
1493 	ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
1494 
1495 	return 0;
1496 }
1497 
ipgre_get_size(const struct net_device * dev)1498 static size_t ipgre_get_size(const struct net_device *dev)
1499 {
1500 	return
1501 		/* IFLA_GRE_LINK */
1502 		nla_total_size(4) +
1503 		/* IFLA_GRE_IFLAGS */
1504 		nla_total_size(2) +
1505 		/* IFLA_GRE_OFLAGS */
1506 		nla_total_size(2) +
1507 		/* IFLA_GRE_IKEY */
1508 		nla_total_size(4) +
1509 		/* IFLA_GRE_OKEY */
1510 		nla_total_size(4) +
1511 		/* IFLA_GRE_LOCAL */
1512 		nla_total_size(4) +
1513 		/* IFLA_GRE_REMOTE */
1514 		nla_total_size(4) +
1515 		/* IFLA_GRE_TTL */
1516 		nla_total_size(1) +
1517 		/* IFLA_GRE_TOS */
1518 		nla_total_size(1) +
1519 		/* IFLA_GRE_PMTUDISC */
1520 		nla_total_size(1) +
1521 		/* IFLA_GRE_ENCAP_TYPE */
1522 		nla_total_size(2) +
1523 		/* IFLA_GRE_ENCAP_FLAGS */
1524 		nla_total_size(2) +
1525 		/* IFLA_GRE_ENCAP_SPORT */
1526 		nla_total_size(2) +
1527 		/* IFLA_GRE_ENCAP_DPORT */
1528 		nla_total_size(2) +
1529 		/* IFLA_GRE_COLLECT_METADATA */
1530 		nla_total_size(0) +
1531 		/* IFLA_GRE_IGNORE_DF */
1532 		nla_total_size(1) +
1533 		/* IFLA_GRE_FWMARK */
1534 		nla_total_size(4) +
1535 		/* IFLA_GRE_ERSPAN_INDEX */
1536 		nla_total_size(4) +
1537 		/* IFLA_GRE_ERSPAN_VER */
1538 		nla_total_size(1) +
1539 		/* IFLA_GRE_ERSPAN_DIR */
1540 		nla_total_size(1) +
1541 		/* IFLA_GRE_ERSPAN_HWID */
1542 		nla_total_size(2) +
1543 		0;
1544 }
1545 
ipgre_fill_info(struct sk_buff * skb,const struct net_device * dev)1546 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1547 {
1548 	struct ip_tunnel *t = netdev_priv(dev);
1549 	struct ip_tunnel_parm_kern *p = &t->parms;
1550 	IP_TUNNEL_DECLARE_FLAGS(o_flags);
1551 
1552 	ip_tunnel_flags_copy(o_flags, p->o_flags);
1553 
1554 	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1555 	    nla_put_be16(skb, IFLA_GRE_IFLAGS,
1556 			 gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1557 	    nla_put_be16(skb, IFLA_GRE_OFLAGS,
1558 			 gre_tnl_flags_to_gre_flags(o_flags)) ||
1559 	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1560 	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1561 	    nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1562 	    nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1563 	    nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1564 	    nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1565 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1566 		       !!(p->iph.frag_off & htons(IP_DF))) ||
1567 	    nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1568 		goto nla_put_failure;
1569 
1570 	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1571 			t->encap.type) ||
1572 	    nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1573 			 t->encap.sport) ||
1574 	    nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1575 			 t->encap.dport) ||
1576 	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1577 			t->encap.flags))
1578 		goto nla_put_failure;
1579 
1580 	if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1581 		goto nla_put_failure;
1582 
1583 	if (t->collect_md) {
1584 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1585 			goto nla_put_failure;
1586 	}
1587 
1588 	return 0;
1589 
1590 nla_put_failure:
1591 	return -EMSGSIZE;
1592 }
1593 
erspan_fill_info(struct sk_buff * skb,const struct net_device * dev)1594 static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
1595 {
1596 	struct ip_tunnel *t = netdev_priv(dev);
1597 
1598 	if (t->erspan_ver <= 2) {
1599 		if (t->erspan_ver != 0 && !t->collect_md)
1600 			__set_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags);
1601 
1602 		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1603 			goto nla_put_failure;
1604 
1605 		if (t->erspan_ver == 1) {
1606 			if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1607 				goto nla_put_failure;
1608 		} else if (t->erspan_ver == 2) {
1609 			if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1610 				goto nla_put_failure;
1611 			if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1612 				goto nla_put_failure;
1613 		}
1614 	}
1615 
1616 	return ipgre_fill_info(skb, dev);
1617 
1618 nla_put_failure:
1619 	return -EMSGSIZE;
1620 }
1621 
erspan_setup(struct net_device * dev)1622 static void erspan_setup(struct net_device *dev)
1623 {
1624 	struct ip_tunnel *t = netdev_priv(dev);
1625 
1626 	ether_setup(dev);
1627 	dev->max_mtu = 0;
1628 	dev->netdev_ops = &erspan_netdev_ops;
1629 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1630 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1631 	ip_tunnel_setup(dev, erspan_net_id);
1632 	t->erspan_ver = 1;
1633 }
1634 
1635 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1636 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
1637 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
1638 	[IFLA_GRE_OFLAGS]	= { .type = NLA_U16 },
1639 	[IFLA_GRE_IKEY]		= { .type = NLA_U32 },
1640 	[IFLA_GRE_OKEY]		= { .type = NLA_U32 },
1641 	[IFLA_GRE_LOCAL]	= { .len = sizeof_field(struct iphdr, saddr) },
1642 	[IFLA_GRE_REMOTE]	= { .len = sizeof_field(struct iphdr, daddr) },
1643 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
1644 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
1645 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
1646 	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
1647 	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
1648 	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
1649 	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
1650 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
1651 	[IFLA_GRE_IGNORE_DF]	= { .type = NLA_U8 },
1652 	[IFLA_GRE_FWMARK]	= { .type = NLA_U32 },
1653 	[IFLA_GRE_ERSPAN_INDEX]	= { .type = NLA_U32 },
1654 	[IFLA_GRE_ERSPAN_VER]	= { .type = NLA_U8 },
1655 	[IFLA_GRE_ERSPAN_DIR]	= { .type = NLA_U8 },
1656 	[IFLA_GRE_ERSPAN_HWID]	= { .type = NLA_U16 },
1657 };
1658 
1659 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1660 	.kind		= "gre",
1661 	.maxtype	= IFLA_GRE_MAX,
1662 	.policy		= ipgre_policy,
1663 	.priv_size	= sizeof(struct ip_tunnel),
1664 	.setup		= ipgre_tunnel_setup,
1665 	.validate	= ipgre_tunnel_validate,
1666 	.newlink	= ipgre_newlink,
1667 	.changelink	= ipgre_changelink,
1668 	.dellink	= ip_tunnel_dellink,
1669 	.get_size	= ipgre_get_size,
1670 	.fill_info	= ipgre_fill_info,
1671 	.get_link_net	= ip_tunnel_get_link_net,
1672 };
1673 
1674 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1675 	.kind		= "gretap",
1676 	.maxtype	= IFLA_GRE_MAX,
1677 	.policy		= ipgre_policy,
1678 	.priv_size	= sizeof(struct ip_tunnel),
1679 	.setup		= ipgre_tap_setup,
1680 	.validate	= ipgre_tap_validate,
1681 	.newlink	= ipgre_newlink,
1682 	.changelink	= ipgre_changelink,
1683 	.dellink	= ip_tunnel_dellink,
1684 	.get_size	= ipgre_get_size,
1685 	.fill_info	= ipgre_fill_info,
1686 	.get_link_net	= ip_tunnel_get_link_net,
1687 };
1688 
1689 static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1690 	.kind		= "erspan",
1691 	.maxtype	= IFLA_GRE_MAX,
1692 	.policy		= ipgre_policy,
1693 	.priv_size	= sizeof(struct ip_tunnel),
1694 	.setup		= erspan_setup,
1695 	.validate	= erspan_validate,
1696 	.newlink	= erspan_newlink,
1697 	.changelink	= erspan_changelink,
1698 	.dellink	= ip_tunnel_dellink,
1699 	.get_size	= ipgre_get_size,
1700 	.fill_info	= erspan_fill_info,
1701 	.get_link_net	= ip_tunnel_get_link_net,
1702 };
1703 
gretap_fb_dev_create(struct net * net,const char * name,u8 name_assign_type)1704 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1705 					u8 name_assign_type)
1706 {
1707 	struct rtnl_newlink_params params = { .src_net = net };
1708 	struct nlattr *tb[IFLA_MAX + 1];
1709 	struct net_device *dev;
1710 	LIST_HEAD(list_kill);
1711 	struct ip_tunnel *t;
1712 	int err;
1713 
1714 	memset(&tb, 0, sizeof(tb));
1715 	params.tb = tb;
1716 
1717 	dev = rtnl_create_link(net, name, name_assign_type,
1718 			       &ipgre_tap_ops, tb, NULL);
1719 	if (IS_ERR(dev))
1720 		return dev;
1721 
1722 	/* Configure flow based GRE device. */
1723 	t = netdev_priv(dev);
1724 	t->collect_md = true;
1725 
1726 	err = ipgre_newlink(dev, &params, NULL);
1727 	if (err < 0) {
1728 		free_netdev(dev);
1729 		return ERR_PTR(err);
1730 	}
1731 
1732 	/* openvswitch users expect packet sizes to be unrestricted,
1733 	 * so set the largest MTU we can.
1734 	 */
1735 	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1736 	if (err)
1737 		goto out;
1738 
1739 	err = rtnl_configure_link(dev, NULL, 0, NULL);
1740 	if (err < 0)
1741 		goto out;
1742 
1743 	return dev;
1744 out:
1745 	ip_tunnel_dellink(dev, &list_kill);
1746 	unregister_netdevice_many(&list_kill);
1747 	return ERR_PTR(err);
1748 }
1749 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1750 
ipgre_tap_init_net(struct net * net)1751 static int __net_init ipgre_tap_init_net(struct net *net)
1752 {
1753 	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1754 }
1755 
ipgre_tap_exit_rtnl(struct net * net,struct list_head * dev_to_kill)1756 static void __net_exit ipgre_tap_exit_rtnl(struct net *net,
1757 					   struct list_head *dev_to_kill)
1758 {
1759 	ip_tunnel_delete_net(net, gre_tap_net_id, &ipgre_tap_ops, dev_to_kill);
1760 }
1761 
1762 static struct pernet_operations ipgre_tap_net_ops = {
1763 	.init = ipgre_tap_init_net,
1764 	.exit_rtnl = ipgre_tap_exit_rtnl,
1765 	.id   = &gre_tap_net_id,
1766 	.size = sizeof(struct ip_tunnel_net),
1767 };
1768 
erspan_init_net(struct net * net)1769 static int __net_init erspan_init_net(struct net *net)
1770 {
1771 	return ip_tunnel_init_net(net, erspan_net_id,
1772 				  &erspan_link_ops, "erspan0");
1773 }
1774 
erspan_exit_rtnl(struct net * net,struct list_head * dev_to_kill)1775 static void __net_exit erspan_exit_rtnl(struct net *net,
1776 					struct list_head *dev_to_kill)
1777 {
1778 	ip_tunnel_delete_net(net, erspan_net_id, &erspan_link_ops, dev_to_kill);
1779 }
1780 
1781 static struct pernet_operations erspan_net_ops = {
1782 	.init = erspan_init_net,
1783 	.exit_rtnl = erspan_exit_rtnl,
1784 	.id   = &erspan_net_id,
1785 	.size = sizeof(struct ip_tunnel_net),
1786 };
1787 
ipgre_init(void)1788 static int __init ipgre_init(void)
1789 {
1790 	int err;
1791 
1792 	pr_info("GRE over IPv4 tunneling driver\n");
1793 
1794 	err = register_pernet_device(&ipgre_net_ops);
1795 	if (err < 0)
1796 		return err;
1797 
1798 	err = register_pernet_device(&ipgre_tap_net_ops);
1799 	if (err < 0)
1800 		goto pnet_tap_failed;
1801 
1802 	err = register_pernet_device(&erspan_net_ops);
1803 	if (err < 0)
1804 		goto pnet_erspan_failed;
1805 
1806 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1807 	if (err < 0) {
1808 		pr_info("%s: can't add protocol\n", __func__);
1809 		goto add_proto_failed;
1810 	}
1811 
1812 	err = rtnl_link_register(&ipgre_link_ops);
1813 	if (err < 0)
1814 		goto rtnl_link_failed;
1815 
1816 	err = rtnl_link_register(&ipgre_tap_ops);
1817 	if (err < 0)
1818 		goto tap_ops_failed;
1819 
1820 	err = rtnl_link_register(&erspan_link_ops);
1821 	if (err < 0)
1822 		goto erspan_link_failed;
1823 
1824 	return 0;
1825 
1826 erspan_link_failed:
1827 	rtnl_link_unregister(&ipgre_tap_ops);
1828 tap_ops_failed:
1829 	rtnl_link_unregister(&ipgre_link_ops);
1830 rtnl_link_failed:
1831 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1832 add_proto_failed:
1833 	unregister_pernet_device(&erspan_net_ops);
1834 pnet_erspan_failed:
1835 	unregister_pernet_device(&ipgre_tap_net_ops);
1836 pnet_tap_failed:
1837 	unregister_pernet_device(&ipgre_net_ops);
1838 	return err;
1839 }
1840 
ipgre_fini(void)1841 static void __exit ipgre_fini(void)
1842 {
1843 	rtnl_link_unregister(&ipgre_tap_ops);
1844 	rtnl_link_unregister(&ipgre_link_ops);
1845 	rtnl_link_unregister(&erspan_link_ops);
1846 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1847 	unregister_pernet_device(&ipgre_tap_net_ops);
1848 	unregister_pernet_device(&ipgre_net_ops);
1849 	unregister_pernet_device(&erspan_net_ops);
1850 }
1851 
1852 module_init(ipgre_init);
1853 module_exit(ipgre_fini);
1854 MODULE_DESCRIPTION("IPv4 GRE tunnels over IP library");
1855 MODULE_LICENSE("GPL");
1856 MODULE_ALIAS_RTNL_LINK("gre");
1857 MODULE_ALIAS_RTNL_LINK("gretap");
1858 MODULE_ALIAS_RTNL_LINK("erspan");
1859 MODULE_ALIAS_NETDEV("gre0");
1860 MODULE_ALIAS_NETDEV("gretap0");
1861 MODULE_ALIAS_NETDEV("erspan0");
1862