xref: /linux/net/ipv4/ip_tunnel.c (revision 8be4d31cb8aaeea27bde4b7ddb26e28a89062ebf)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Nicira, Inc.
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/netdev_lock.h>
44 #include <net/rtnetlink.h>
45 #include <net/udp.h>
46 #include <net/dst_metadata.h>
47 #include <net/inet_dscp.h>
48 
49 #if IS_ENABLED(CONFIG_IPV6)
50 #include <net/ipv6.h>
51 #include <net/ip6_fib.h>
52 #include <net/ip6_route.h>
53 #endif
54 
ip_tunnel_hash(__be32 key,__be32 remote)55 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
56 {
57 	return hash_32((__force u32)key ^ (__force u32)remote,
58 			 IP_TNL_HASH_BITS);
59 }
60 
ip_tunnel_key_match(const struct ip_tunnel_parm_kern * p,const unsigned long * flags,__be32 key)61 static bool ip_tunnel_key_match(const struct ip_tunnel_parm_kern *p,
62 				const unsigned long *flags, __be32 key)
63 {
64 	if (!test_bit(IP_TUNNEL_KEY_BIT, flags))
65 		return !test_bit(IP_TUNNEL_KEY_BIT, p->i_flags);
66 
67 	return test_bit(IP_TUNNEL_KEY_BIT, p->i_flags) && p->i_key == key;
68 }
69 
70 /* Fallback tunnel: no source, no destination, no key, no options
71 
72    Tunnel hash table:
73    We require exact key match i.e. if a key is present in packet
74    it will match only tunnel with the same key; if it is not present,
75    it will match only keyless tunnel.
76 
77    All keysless packets, if not matched configured keyless tunnels
78    will match fallback tunnel.
79    Given src, dst and key, find appropriate for input tunnel.
80 */
ip_tunnel_lookup(struct ip_tunnel_net * itn,int link,const unsigned long * flags,__be32 remote,__be32 local,__be32 key)81 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
82 				   int link, const unsigned long *flags,
83 				   __be32 remote, __be32 local,
84 				   __be32 key)
85 {
86 	struct ip_tunnel *t, *cand = NULL;
87 	struct hlist_head *head;
88 	struct net_device *ndev;
89 	unsigned int hash;
90 
91 	hash = ip_tunnel_hash(key, remote);
92 	head = &itn->tunnels[hash];
93 
94 	hlist_for_each_entry_rcu(t, head, hash_node) {
95 		if (local != t->parms.iph.saddr ||
96 		    remote != t->parms.iph.daddr ||
97 		    !(t->dev->flags & IFF_UP))
98 			continue;
99 
100 		if (!ip_tunnel_key_match(&t->parms, flags, key))
101 			continue;
102 
103 		if (READ_ONCE(t->parms.link) == link)
104 			return t;
105 		cand = t;
106 	}
107 
108 	hlist_for_each_entry_rcu(t, head, hash_node) {
109 		if (remote != t->parms.iph.daddr ||
110 		    t->parms.iph.saddr != 0 ||
111 		    !(t->dev->flags & IFF_UP))
112 			continue;
113 
114 		if (!ip_tunnel_key_match(&t->parms, flags, key))
115 			continue;
116 
117 		if (READ_ONCE(t->parms.link) == link)
118 			return t;
119 		if (!cand)
120 			cand = t;
121 	}
122 
123 	hash = ip_tunnel_hash(key, 0);
124 	head = &itn->tunnels[hash];
125 
126 	hlist_for_each_entry_rcu(t, head, hash_node) {
127 		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
128 		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
129 			continue;
130 
131 		if (!(t->dev->flags & IFF_UP))
132 			continue;
133 
134 		if (!ip_tunnel_key_match(&t->parms, flags, key))
135 			continue;
136 
137 		if (READ_ONCE(t->parms.link) == link)
138 			return t;
139 		if (!cand)
140 			cand = t;
141 	}
142 
143 	hlist_for_each_entry_rcu(t, head, hash_node) {
144 		if ((!test_bit(IP_TUNNEL_NO_KEY_BIT, flags) &&
145 		     t->parms.i_key != key) ||
146 		    t->parms.iph.saddr != 0 ||
147 		    t->parms.iph.daddr != 0 ||
148 		    !(t->dev->flags & IFF_UP))
149 			continue;
150 
151 		if (READ_ONCE(t->parms.link) == link)
152 			return t;
153 		if (!cand)
154 			cand = t;
155 	}
156 
157 	if (cand)
158 		return cand;
159 
160 	t = rcu_dereference(itn->collect_md_tun);
161 	if (t && t->dev->flags & IFF_UP)
162 		return t;
163 
164 	ndev = READ_ONCE(itn->fb_tunnel_dev);
165 	if (ndev && ndev->flags & IFF_UP)
166 		return netdev_priv(ndev);
167 
168 	return NULL;
169 }
170 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
171 
ip_bucket(struct ip_tunnel_net * itn,struct ip_tunnel_parm_kern * parms)172 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
173 				    struct ip_tunnel_parm_kern *parms)
174 {
175 	unsigned int h;
176 	__be32 remote;
177 	__be32 i_key = parms->i_key;
178 
179 	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
180 		remote = parms->iph.daddr;
181 	else
182 		remote = 0;
183 
184 	if (!test_bit(IP_TUNNEL_KEY_BIT, parms->i_flags) &&
185 	    test_bit(IP_TUNNEL_VTI_BIT, parms->i_flags))
186 		i_key = 0;
187 
188 	h = ip_tunnel_hash(i_key, remote);
189 	return &itn->tunnels[h];
190 }
191 
ip_tunnel_add(struct ip_tunnel_net * itn,struct ip_tunnel * t)192 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
193 {
194 	struct hlist_head *head = ip_bucket(itn, &t->parms);
195 
196 	if (t->collect_md)
197 		rcu_assign_pointer(itn->collect_md_tun, t);
198 	hlist_add_head_rcu(&t->hash_node, head);
199 }
200 
ip_tunnel_del(struct ip_tunnel_net * itn,struct ip_tunnel * t)201 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
202 {
203 	if (t->collect_md)
204 		rcu_assign_pointer(itn->collect_md_tun, NULL);
205 	hlist_del_init_rcu(&t->hash_node);
206 }
207 
ip_tunnel_find(struct ip_tunnel_net * itn,struct ip_tunnel_parm_kern * parms,int type)208 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
209 					struct ip_tunnel_parm_kern *parms,
210 					int type)
211 {
212 	__be32 remote = parms->iph.daddr;
213 	__be32 local = parms->iph.saddr;
214 	IP_TUNNEL_DECLARE_FLAGS(flags);
215 	__be32 key = parms->i_key;
216 	int link = parms->link;
217 	struct ip_tunnel *t = NULL;
218 	struct hlist_head *head = ip_bucket(itn, parms);
219 
220 	ip_tunnel_flags_copy(flags, parms->i_flags);
221 
222 	hlist_for_each_entry_rcu(t, head, hash_node, lockdep_rtnl_is_held()) {
223 		if (local == t->parms.iph.saddr &&
224 		    remote == t->parms.iph.daddr &&
225 		    link == READ_ONCE(t->parms.link) &&
226 		    type == t->dev->type &&
227 		    ip_tunnel_key_match(&t->parms, flags, key))
228 			break;
229 	}
230 	return t;
231 }
232 
__ip_tunnel_create(struct net * net,const struct rtnl_link_ops * ops,struct ip_tunnel_parm_kern * parms)233 static struct net_device *__ip_tunnel_create(struct net *net,
234 					     const struct rtnl_link_ops *ops,
235 					     struct ip_tunnel_parm_kern *parms)
236 {
237 	int err;
238 	struct ip_tunnel *tunnel;
239 	struct net_device *dev;
240 	char name[IFNAMSIZ];
241 
242 	err = -E2BIG;
243 	if (parms->name[0]) {
244 		if (!dev_valid_name(parms->name))
245 			goto failed;
246 		strscpy(name, parms->name);
247 	} else {
248 		if (strlen(ops->kind) > (IFNAMSIZ - 3))
249 			goto failed;
250 		strscpy(name, ops->kind);
251 		strcat(name, "%d");
252 	}
253 
254 	ASSERT_RTNL();
255 	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
256 	if (!dev) {
257 		err = -ENOMEM;
258 		goto failed;
259 	}
260 	dev_net_set(dev, net);
261 
262 	dev->rtnl_link_ops = ops;
263 
264 	tunnel = netdev_priv(dev);
265 	tunnel->parms = *parms;
266 	tunnel->net = net;
267 
268 	err = register_netdevice(dev);
269 	if (err)
270 		goto failed_free;
271 
272 	return dev;
273 
274 failed_free:
275 	free_netdev(dev);
276 failed:
277 	return ERR_PTR(err);
278 }
279 
ip_tunnel_bind_dev(struct net_device * dev)280 static int ip_tunnel_bind_dev(struct net_device *dev)
281 {
282 	struct net_device *tdev = NULL;
283 	struct ip_tunnel *tunnel = netdev_priv(dev);
284 	const struct iphdr *iph;
285 	int hlen = LL_MAX_HEADER;
286 	int mtu = ETH_DATA_LEN;
287 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
288 
289 	iph = &tunnel->parms.iph;
290 
291 	/* Guess output device to choose reasonable mtu and needed_headroom */
292 	if (iph->daddr) {
293 		struct flowi4 fl4;
294 		struct rtable *rt;
295 
296 		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
297 				    iph->saddr, tunnel->parms.o_key,
298 				    iph->tos & INET_DSCP_MASK, tunnel->net,
299 				    tunnel->parms.link, tunnel->fwmark, 0, 0);
300 		rt = ip_route_output_key(tunnel->net, &fl4);
301 
302 		if (!IS_ERR(rt)) {
303 			tdev = rt->dst.dev;
304 			ip_rt_put(rt);
305 		}
306 		if (dev->type != ARPHRD_ETHER)
307 			dev->flags |= IFF_POINTOPOINT;
308 
309 		dst_cache_reset(&tunnel->dst_cache);
310 	}
311 
312 	if (!tdev && tunnel->parms.link)
313 		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
314 
315 	if (tdev) {
316 		hlen = tdev->hard_header_len + tdev->needed_headroom;
317 		mtu = min(tdev->mtu, IP_MAX_MTU);
318 	}
319 
320 	dev->needed_headroom = t_hlen + hlen;
321 	mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
322 
323 	if (mtu < IPV4_MIN_MTU)
324 		mtu = IPV4_MIN_MTU;
325 
326 	return mtu;
327 }
328 
ip_tunnel_create(struct net * net,struct ip_tunnel_net * itn,struct ip_tunnel_parm_kern * parms)329 static struct ip_tunnel *ip_tunnel_create(struct net *net,
330 					  struct ip_tunnel_net *itn,
331 					  struct ip_tunnel_parm_kern *parms)
332 {
333 	struct ip_tunnel *nt;
334 	struct net_device *dev;
335 	int t_hlen;
336 	int mtu;
337 	int err;
338 
339 	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
340 	if (IS_ERR(dev))
341 		return ERR_CAST(dev);
342 
343 	mtu = ip_tunnel_bind_dev(dev);
344 	err = dev_set_mtu(dev, mtu);
345 	if (err)
346 		goto err_dev_set_mtu;
347 
348 	nt = netdev_priv(dev);
349 	t_hlen = nt->hlen + sizeof(struct iphdr);
350 	dev->min_mtu = ETH_MIN_MTU;
351 	dev->max_mtu = IP_MAX_MTU - t_hlen;
352 	if (dev->type == ARPHRD_ETHER)
353 		dev->max_mtu -= dev->hard_header_len;
354 
355 	ip_tunnel_add(itn, nt);
356 	return nt;
357 
358 err_dev_set_mtu:
359 	unregister_netdevice(dev);
360 	return ERR_PTR(err);
361 }
362 
ip_tunnel_md_udp_encap(struct sk_buff * skb,struct ip_tunnel_info * info)363 void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info)
364 {
365 	const struct iphdr *iph = ip_hdr(skb);
366 	const struct udphdr *udph;
367 
368 	if (iph->protocol != IPPROTO_UDP)
369 		return;
370 
371 	udph = (struct udphdr *)((__u8 *)iph + (iph->ihl << 2));
372 	info->encap.sport = udph->source;
373 	info->encap.dport = udph->dest;
374 }
375 EXPORT_SYMBOL(ip_tunnel_md_udp_encap);
376 
ip_tunnel_rcv(struct ip_tunnel * tunnel,struct sk_buff * skb,const struct tnl_ptk_info * tpi,struct metadata_dst * tun_dst,bool log_ecn_error)377 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
378 		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
379 		  bool log_ecn_error)
380 {
381 	const struct iphdr *iph = ip_hdr(skb);
382 	int nh, err;
383 
384 #ifdef CONFIG_NET_IPGRE_BROADCAST
385 	if (ipv4_is_multicast(iph->daddr)) {
386 		DEV_STATS_INC(tunnel->dev, multicast);
387 		skb->pkt_type = PACKET_BROADCAST;
388 	}
389 #endif
390 
391 	if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.i_flags) !=
392 	    test_bit(IP_TUNNEL_CSUM_BIT, tpi->flags)) {
393 		DEV_STATS_INC(tunnel->dev, rx_crc_errors);
394 		DEV_STATS_INC(tunnel->dev, rx_errors);
395 		goto drop;
396 	}
397 
398 	if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.i_flags)) {
399 		if (!test_bit(IP_TUNNEL_SEQ_BIT, tpi->flags) ||
400 		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
401 			DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
402 			DEV_STATS_INC(tunnel->dev, rx_errors);
403 			goto drop;
404 		}
405 		tunnel->i_seqno = ntohl(tpi->seq) + 1;
406 	}
407 
408 	/* Save offset of outer header relative to skb->head,
409 	 * because we are going to reset the network header to the inner header
410 	 * and might change skb->head.
411 	 */
412 	nh = skb_network_header(skb) - skb->head;
413 
414 	skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
415 
416 	if (!pskb_inet_may_pull(skb)) {
417 		DEV_STATS_INC(tunnel->dev, rx_length_errors);
418 		DEV_STATS_INC(tunnel->dev, rx_errors);
419 		goto drop;
420 	}
421 	iph = (struct iphdr *)(skb->head + nh);
422 
423 	err = IP_ECN_decapsulate(iph, skb);
424 	if (unlikely(err)) {
425 		if (log_ecn_error)
426 			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
427 					&iph->saddr, iph->tos);
428 		if (err > 1) {
429 			DEV_STATS_INC(tunnel->dev, rx_frame_errors);
430 			DEV_STATS_INC(tunnel->dev, rx_errors);
431 			goto drop;
432 		}
433 	}
434 
435 	dev_sw_netstats_rx_add(tunnel->dev, skb->len);
436 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
437 
438 	if (tunnel->dev->type == ARPHRD_ETHER) {
439 		skb->protocol = eth_type_trans(skb, tunnel->dev);
440 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
441 	} else {
442 		skb->dev = tunnel->dev;
443 	}
444 
445 	if (tun_dst)
446 		skb_dst_set(skb, (struct dst_entry *)tun_dst);
447 
448 	gro_cells_receive(&tunnel->gro_cells, skb);
449 	return 0;
450 
451 drop:
452 	if (tun_dst)
453 		dst_release((struct dst_entry *)tun_dst);
454 	kfree_skb(skb);
455 	return 0;
456 }
457 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
458 
ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)459 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
460 			    unsigned int num)
461 {
462 	if (num >= MAX_IPTUN_ENCAP_OPS)
463 		return -ERANGE;
464 
465 	return !cmpxchg((const struct ip_tunnel_encap_ops **)
466 			&iptun_encaps[num],
467 			NULL, ops) ? 0 : -1;
468 }
469 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
470 
ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)471 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
472 			    unsigned int num)
473 {
474 	int ret;
475 
476 	if (num >= MAX_IPTUN_ENCAP_OPS)
477 		return -ERANGE;
478 
479 	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
480 		       &iptun_encaps[num],
481 		       ops, NULL) == ops) ? 0 : -1;
482 
483 	synchronize_net();
484 
485 	return ret;
486 }
487 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
488 
ip_tunnel_encap_setup(struct ip_tunnel * t,struct ip_tunnel_encap * ipencap)489 int ip_tunnel_encap_setup(struct ip_tunnel *t,
490 			  struct ip_tunnel_encap *ipencap)
491 {
492 	int hlen;
493 
494 	memset(&t->encap, 0, sizeof(t->encap));
495 
496 	hlen = ip_encap_hlen(ipencap);
497 	if (hlen < 0)
498 		return hlen;
499 
500 	t->encap.type = ipencap->type;
501 	t->encap.sport = ipencap->sport;
502 	t->encap.dport = ipencap->dport;
503 	t->encap.flags = ipencap->flags;
504 
505 	t->encap_hlen = hlen;
506 	t->hlen = t->encap_hlen + t->tun_hlen;
507 
508 	return 0;
509 }
510 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
511 
tnl_update_pmtu(struct net_device * dev,struct sk_buff * skb,struct rtable * rt,__be16 df,const struct iphdr * inner_iph,int tunnel_hlen,__be32 dst,bool md)512 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
513 			    struct rtable *rt, __be16 df,
514 			    const struct iphdr *inner_iph,
515 			    int tunnel_hlen, __be32 dst, bool md)
516 {
517 	struct ip_tunnel *tunnel = netdev_priv(dev);
518 	int pkt_size;
519 	int mtu;
520 
521 	tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
522 	pkt_size = skb->len - tunnel_hlen;
523 	pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
524 
525 	if (df) {
526 		mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
527 		mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
528 	} else {
529 		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
530 	}
531 
532 	if (skb_valid_dst(skb))
533 		skb_dst_update_pmtu_no_confirm(skb, mtu);
534 
535 	if (skb->protocol == htons(ETH_P_IP)) {
536 		if (!skb_is_gso(skb) &&
537 		    (inner_iph->frag_off & htons(IP_DF)) &&
538 		    mtu < pkt_size) {
539 			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
540 			return -E2BIG;
541 		}
542 	}
543 #if IS_ENABLED(CONFIG_IPV6)
544 	else if (skb->protocol == htons(ETH_P_IPV6)) {
545 		struct rt6_info *rt6;
546 		__be32 daddr;
547 
548 		rt6 = skb_valid_dst(skb) ? dst_rt6_info(skb_dst(skb)) :
549 					   NULL;
550 		daddr = md ? dst : tunnel->parms.iph.daddr;
551 
552 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
553 			   mtu >= IPV6_MIN_MTU) {
554 			if ((daddr && !ipv4_is_multicast(daddr)) ||
555 			    rt6->rt6i_dst.plen == 128) {
556 				rt6->rt6i_flags |= RTF_MODIFIED;
557 				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
558 			}
559 		}
560 
561 		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
562 					mtu < pkt_size) {
563 			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
564 			return -E2BIG;
565 		}
566 	}
567 #endif
568 	return 0;
569 }
570 
ip_tunnel_adj_headroom(struct net_device * dev,unsigned int headroom)571 static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
572 {
573 	/* we must cap headroom to some upperlimit, else pskb_expand_head
574 	 * will overflow header offsets in skb_headers_offset_update().
575 	 */
576 	static const unsigned int max_allowed = 512;
577 
578 	if (headroom > max_allowed)
579 		headroom = max_allowed;
580 
581 	if (headroom > READ_ONCE(dev->needed_headroom))
582 		WRITE_ONCE(dev->needed_headroom, headroom);
583 }
584 
ip_md_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,u8 proto,int tunnel_hlen)585 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
586 		       u8 proto, int tunnel_hlen)
587 {
588 	struct ip_tunnel *tunnel = netdev_priv(dev);
589 	u32 headroom = sizeof(struct iphdr);
590 	struct ip_tunnel_info *tun_info;
591 	const struct ip_tunnel_key *key;
592 	const struct iphdr *inner_iph;
593 	struct rtable *rt = NULL;
594 	struct flowi4 fl4;
595 	__be16 df = 0;
596 	u8 tos, ttl;
597 	bool use_cache;
598 
599 	tun_info = skb_tunnel_info(skb);
600 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
601 		     ip_tunnel_info_af(tun_info) != AF_INET))
602 		goto tx_error;
603 	key = &tun_info->key;
604 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
605 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
606 	tos = key->tos;
607 	if (tos == 1) {
608 		if (skb->protocol == htons(ETH_P_IP))
609 			tos = inner_iph->tos;
610 		else if (skb->protocol == htons(ETH_P_IPV6))
611 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
612 	}
613 	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
614 			    tunnel_id_to_key32(key->tun_id),
615 			    tos & INET_DSCP_MASK, tunnel->net, 0, skb->mark,
616 			    skb_get_hash(skb), key->flow_flags);
617 
618 	if (!tunnel_hlen)
619 		tunnel_hlen = ip_encap_hlen(&tun_info->encap);
620 
621 	if (ip_tunnel_encap(skb, &tun_info->encap, &proto, &fl4) < 0)
622 		goto tx_error;
623 
624 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
625 	if (use_cache)
626 		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
627 	if (!rt) {
628 		rt = ip_route_output_key(tunnel->net, &fl4);
629 		if (IS_ERR(rt)) {
630 			DEV_STATS_INC(dev, tx_carrier_errors);
631 			goto tx_error;
632 		}
633 		if (use_cache)
634 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
635 					  fl4.saddr);
636 	}
637 	if (rt->dst.dev == dev) {
638 		ip_rt_put(rt);
639 		DEV_STATS_INC(dev, collisions);
640 		goto tx_error;
641 	}
642 
643 	if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags))
644 		df = htons(IP_DF);
645 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
646 			    key->u.ipv4.dst, true)) {
647 		ip_rt_put(rt);
648 		goto tx_error;
649 	}
650 
651 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
652 	ttl = key->ttl;
653 	if (ttl == 0) {
654 		if (skb->protocol == htons(ETH_P_IP))
655 			ttl = inner_iph->ttl;
656 		else if (skb->protocol == htons(ETH_P_IPV6))
657 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
658 		else
659 			ttl = ip4_dst_hoplimit(&rt->dst);
660 	}
661 
662 	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
663 	if (skb_cow_head(skb, headroom)) {
664 		ip_rt_put(rt);
665 		goto tx_dropped;
666 	}
667 
668 	ip_tunnel_adj_headroom(dev, headroom);
669 
670 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
671 		      df, !net_eq(tunnel->net, dev_net(dev)), 0);
672 	return;
673 tx_error:
674 	DEV_STATS_INC(dev, tx_errors);
675 	goto kfree;
676 tx_dropped:
677 	DEV_STATS_INC(dev, tx_dropped);
678 kfree:
679 	kfree_skb(skb);
680 }
681 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
682 
ip_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,u8 protocol)683 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
684 		    const struct iphdr *tnl_params, u8 protocol)
685 {
686 	struct ip_tunnel *tunnel = netdev_priv(dev);
687 	struct ip_tunnel_info *tun_info = NULL;
688 	const struct iphdr *inner_iph;
689 	unsigned int max_headroom;	/* The extra header space needed */
690 	struct rtable *rt = NULL;		/* Route to the other host */
691 	__be16 payload_protocol;
692 	bool use_cache = false;
693 	struct flowi4 fl4;
694 	bool md = false;
695 	bool connected;
696 	u8 tos, ttl;
697 	__be32 dst;
698 	__be16 df;
699 
700 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
701 	connected = (tunnel->parms.iph.daddr != 0);
702 	payload_protocol = skb_protocol(skb, true);
703 
704 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
705 
706 	dst = tnl_params->daddr;
707 	if (dst == 0) {
708 		/* NBMA tunnel */
709 
710 		if (!skb_dst(skb)) {
711 			DEV_STATS_INC(dev, tx_fifo_errors);
712 			goto tx_error;
713 		}
714 
715 		tun_info = skb_tunnel_info(skb);
716 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
717 		    ip_tunnel_info_af(tun_info) == AF_INET &&
718 		    tun_info->key.u.ipv4.dst) {
719 			dst = tun_info->key.u.ipv4.dst;
720 			md = true;
721 			connected = true;
722 		} else if (payload_protocol == htons(ETH_P_IP)) {
723 			rt = skb_rtable(skb);
724 			dst = rt_nexthop(rt, inner_iph->daddr);
725 		}
726 #if IS_ENABLED(CONFIG_IPV6)
727 		else if (payload_protocol == htons(ETH_P_IPV6)) {
728 			const struct in6_addr *addr6;
729 			struct neighbour *neigh;
730 			bool do_tx_error_icmp;
731 			int addr_type;
732 
733 			neigh = dst_neigh_lookup(skb_dst(skb),
734 						 &ipv6_hdr(skb)->daddr);
735 			if (!neigh)
736 				goto tx_error;
737 
738 			addr6 = (const struct in6_addr *)&neigh->primary_key;
739 			addr_type = ipv6_addr_type(addr6);
740 
741 			if (addr_type == IPV6_ADDR_ANY) {
742 				addr6 = &ipv6_hdr(skb)->daddr;
743 				addr_type = ipv6_addr_type(addr6);
744 			}
745 
746 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
747 				do_tx_error_icmp = true;
748 			else {
749 				do_tx_error_icmp = false;
750 				dst = addr6->s6_addr32[3];
751 			}
752 			neigh_release(neigh);
753 			if (do_tx_error_icmp)
754 				goto tx_error_icmp;
755 		}
756 #endif
757 		else
758 			goto tx_error;
759 
760 		if (!md)
761 			connected = false;
762 	}
763 
764 	tos = tnl_params->tos;
765 	if (tos & 0x1) {
766 		tos &= ~0x1;
767 		if (payload_protocol == htons(ETH_P_IP)) {
768 			tos = inner_iph->tos;
769 			connected = false;
770 		} else if (payload_protocol == htons(ETH_P_IPV6)) {
771 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
772 			connected = false;
773 		}
774 	}
775 
776 	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
777 			    tunnel->parms.o_key, tos & INET_DSCP_MASK,
778 			    tunnel->net, READ_ONCE(tunnel->parms.link),
779 			    tunnel->fwmark, skb_get_hash(skb), 0);
780 
781 	if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
782 		goto tx_error;
783 
784 	if (connected && md) {
785 		use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
786 		if (use_cache)
787 			rt = dst_cache_get_ip4(&tun_info->dst_cache,
788 					       &fl4.saddr);
789 	} else {
790 		rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
791 						&fl4.saddr) : NULL;
792 	}
793 
794 	if (!rt) {
795 		rt = ip_route_output_key(tunnel->net, &fl4);
796 
797 		if (IS_ERR(rt)) {
798 			DEV_STATS_INC(dev, tx_carrier_errors);
799 			goto tx_error;
800 		}
801 		if (use_cache)
802 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
803 					  fl4.saddr);
804 		else if (!md && connected)
805 			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
806 					  fl4.saddr);
807 	}
808 
809 	if (rt->dst.dev == dev) {
810 		ip_rt_put(rt);
811 		DEV_STATS_INC(dev, collisions);
812 		goto tx_error;
813 	}
814 
815 	df = tnl_params->frag_off;
816 	if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
817 		df |= (inner_iph->frag_off & htons(IP_DF));
818 
819 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
820 		ip_rt_put(rt);
821 		goto tx_error;
822 	}
823 
824 	if (tunnel->err_count > 0) {
825 		if (time_before(jiffies,
826 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
827 			tunnel->err_count--;
828 
829 			dst_link_failure(skb);
830 		} else
831 			tunnel->err_count = 0;
832 	}
833 
834 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
835 	ttl = tnl_params->ttl;
836 	if (ttl == 0) {
837 		if (payload_protocol == htons(ETH_P_IP))
838 			ttl = inner_iph->ttl;
839 #if IS_ENABLED(CONFIG_IPV6)
840 		else if (payload_protocol == htons(ETH_P_IPV6))
841 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
842 #endif
843 		else
844 			ttl = ip4_dst_hoplimit(&rt->dst);
845 	}
846 
847 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
848 			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
849 
850 	if (skb_cow_head(skb, max_headroom)) {
851 		ip_rt_put(rt);
852 		DEV_STATS_INC(dev, tx_dropped);
853 		kfree_skb(skb);
854 		return;
855 	}
856 
857 	ip_tunnel_adj_headroom(dev, max_headroom);
858 
859 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
860 		      df, !net_eq(tunnel->net, dev_net(dev)), 0);
861 	return;
862 
863 #if IS_ENABLED(CONFIG_IPV6)
864 tx_error_icmp:
865 	dst_link_failure(skb);
866 #endif
867 tx_error:
868 	DEV_STATS_INC(dev, tx_errors);
869 	kfree_skb(skb);
870 }
871 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
872 
ip_tunnel_update(struct ip_tunnel_net * itn,struct ip_tunnel * t,struct net_device * dev,struct ip_tunnel_parm_kern * p,bool set_mtu,__u32 fwmark)873 static void ip_tunnel_update(struct ip_tunnel_net *itn,
874 			     struct ip_tunnel *t,
875 			     struct net_device *dev,
876 			     struct ip_tunnel_parm_kern *p,
877 			     bool set_mtu,
878 			     __u32 fwmark)
879 {
880 	ip_tunnel_del(itn, t);
881 	t->parms.iph.saddr = p->iph.saddr;
882 	t->parms.iph.daddr = p->iph.daddr;
883 	t->parms.i_key = p->i_key;
884 	t->parms.o_key = p->o_key;
885 	if (dev->type != ARPHRD_ETHER) {
886 		__dev_addr_set(dev, &p->iph.saddr, 4);
887 		memcpy(dev->broadcast, &p->iph.daddr, 4);
888 	}
889 	ip_tunnel_add(itn, t);
890 
891 	t->parms.iph.ttl = p->iph.ttl;
892 	t->parms.iph.tos = p->iph.tos;
893 	t->parms.iph.frag_off = p->iph.frag_off;
894 
895 	if (t->parms.link != p->link || t->fwmark != fwmark) {
896 		int mtu;
897 
898 		WRITE_ONCE(t->parms.link, p->link);
899 		t->fwmark = fwmark;
900 		mtu = ip_tunnel_bind_dev(dev);
901 		if (set_mtu)
902 			WRITE_ONCE(dev->mtu, mtu);
903 	}
904 	dst_cache_reset(&t->dst_cache);
905 	netdev_state_change(dev);
906 }
907 
ip_tunnel_ctl(struct net_device * dev,struct ip_tunnel_parm_kern * p,int cmd)908 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p,
909 		  int cmd)
910 {
911 	int err = 0;
912 	struct ip_tunnel *t = netdev_priv(dev);
913 	struct net *net = t->net;
914 	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
915 
916 	switch (cmd) {
917 	case SIOCGETTUNNEL:
918 		if (dev == itn->fb_tunnel_dev) {
919 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
920 			if (!t)
921 				t = netdev_priv(dev);
922 		}
923 		memcpy(p, &t->parms, sizeof(*p));
924 		break;
925 
926 	case SIOCADDTUNNEL:
927 	case SIOCCHGTUNNEL:
928 		err = -EPERM;
929 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
930 			goto done;
931 		if (p->iph.ttl)
932 			p->iph.frag_off |= htons(IP_DF);
933 		if (!test_bit(IP_TUNNEL_VTI_BIT, p->i_flags)) {
934 			if (!test_bit(IP_TUNNEL_KEY_BIT, p->i_flags))
935 				p->i_key = 0;
936 			if (!test_bit(IP_TUNNEL_KEY_BIT, p->o_flags))
937 				p->o_key = 0;
938 		}
939 
940 		t = ip_tunnel_find(itn, p, itn->type);
941 
942 		if (cmd == SIOCADDTUNNEL) {
943 			if (!t) {
944 				t = ip_tunnel_create(net, itn, p);
945 				err = PTR_ERR_OR_ZERO(t);
946 				break;
947 			}
948 
949 			err = -EEXIST;
950 			break;
951 		}
952 		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
953 			if (t) {
954 				if (t->dev != dev) {
955 					err = -EEXIST;
956 					break;
957 				}
958 			} else {
959 				unsigned int nflags = 0;
960 
961 				if (ipv4_is_multicast(p->iph.daddr))
962 					nflags = IFF_BROADCAST;
963 				else if (p->iph.daddr)
964 					nflags = IFF_POINTOPOINT;
965 
966 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
967 					err = -EINVAL;
968 					break;
969 				}
970 
971 				t = netdev_priv(dev);
972 			}
973 		}
974 
975 		if (t) {
976 			err = 0;
977 			ip_tunnel_update(itn, t, dev, p, true, 0);
978 		} else {
979 			err = -ENOENT;
980 		}
981 		break;
982 
983 	case SIOCDELTUNNEL:
984 		err = -EPERM;
985 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
986 			goto done;
987 
988 		if (dev == itn->fb_tunnel_dev) {
989 			err = -ENOENT;
990 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
991 			if (!t)
992 				goto done;
993 			err = -EPERM;
994 			if (t == netdev_priv(itn->fb_tunnel_dev))
995 				goto done;
996 			dev = t->dev;
997 		}
998 		unregister_netdevice(dev);
999 		err = 0;
1000 		break;
1001 
1002 	default:
1003 		err = -EINVAL;
1004 	}
1005 
1006 done:
1007 	return err;
1008 }
1009 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
1010 
ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern * kp,const void __user * data)1011 bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp,
1012 			      const void __user *data)
1013 {
1014 	struct ip_tunnel_parm p;
1015 
1016 	if (copy_from_user(&p, data, sizeof(p)))
1017 		return false;
1018 
1019 	strscpy(kp->name, p.name);
1020 	kp->link = p.link;
1021 	ip_tunnel_flags_from_be16(kp->i_flags, p.i_flags);
1022 	ip_tunnel_flags_from_be16(kp->o_flags, p.o_flags);
1023 	kp->i_key = p.i_key;
1024 	kp->o_key = p.o_key;
1025 	memcpy(&kp->iph, &p.iph, min(sizeof(kp->iph), sizeof(p.iph)));
1026 
1027 	return true;
1028 }
1029 EXPORT_SYMBOL_GPL(ip_tunnel_parm_from_user);
1030 
ip_tunnel_parm_to_user(void __user * data,struct ip_tunnel_parm_kern * kp)1031 bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp)
1032 {
1033 	struct ip_tunnel_parm p;
1034 
1035 	if (!ip_tunnel_flags_is_be16_compat(kp->i_flags) ||
1036 	    !ip_tunnel_flags_is_be16_compat(kp->o_flags))
1037 		return false;
1038 
1039 	memset(&p, 0, sizeof(p));
1040 
1041 	strscpy(p.name, kp->name);
1042 	p.link = kp->link;
1043 	p.i_flags = ip_tunnel_flags_to_be16(kp->i_flags);
1044 	p.o_flags = ip_tunnel_flags_to_be16(kp->o_flags);
1045 	p.i_key = kp->i_key;
1046 	p.o_key = kp->o_key;
1047 	memcpy(&p.iph, &kp->iph, min(sizeof(p.iph), sizeof(kp->iph)));
1048 
1049 	return !copy_to_user(data, &p, sizeof(p));
1050 }
1051 EXPORT_SYMBOL_GPL(ip_tunnel_parm_to_user);
1052 
ip_tunnel_siocdevprivate(struct net_device * dev,struct ifreq * ifr,void __user * data,int cmd)1053 int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
1054 			     void __user *data, int cmd)
1055 {
1056 	struct ip_tunnel_parm_kern p;
1057 	int err;
1058 
1059 	if (!ip_tunnel_parm_from_user(&p, data))
1060 		return -EFAULT;
1061 	err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
1062 	if (!err && !ip_tunnel_parm_to_user(data, &p))
1063 		return -EFAULT;
1064 	return err;
1065 }
1066 EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
1067 
__ip_tunnel_change_mtu(struct net_device * dev,int new_mtu,bool strict)1068 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
1069 {
1070 	struct ip_tunnel *tunnel = netdev_priv(dev);
1071 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
1072 	int max_mtu = IP_MAX_MTU - t_hlen;
1073 
1074 	if (dev->type == ARPHRD_ETHER)
1075 		max_mtu -= dev->hard_header_len;
1076 
1077 	if (new_mtu < ETH_MIN_MTU)
1078 		return -EINVAL;
1079 
1080 	if (new_mtu > max_mtu) {
1081 		if (strict)
1082 			return -EINVAL;
1083 
1084 		new_mtu = max_mtu;
1085 	}
1086 
1087 	WRITE_ONCE(dev->mtu, new_mtu);
1088 	return 0;
1089 }
1090 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1091 
ip_tunnel_change_mtu(struct net_device * dev,int new_mtu)1092 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1093 {
1094 	return __ip_tunnel_change_mtu(dev, new_mtu, true);
1095 }
1096 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1097 
ip_tunnel_dev_free(struct net_device * dev)1098 static void ip_tunnel_dev_free(struct net_device *dev)
1099 {
1100 	struct ip_tunnel *tunnel = netdev_priv(dev);
1101 
1102 	gro_cells_destroy(&tunnel->gro_cells);
1103 	dst_cache_destroy(&tunnel->dst_cache);
1104 }
1105 
ip_tunnel_dellink(struct net_device * dev,struct list_head * head)1106 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1107 {
1108 	struct ip_tunnel *tunnel = netdev_priv(dev);
1109 	struct ip_tunnel_net *itn;
1110 
1111 	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1112 
1113 	if (itn->fb_tunnel_dev != dev) {
1114 		ip_tunnel_del(itn, netdev_priv(dev));
1115 		unregister_netdevice_queue(dev, head);
1116 	}
1117 }
1118 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1119 
ip_tunnel_get_link_net(const struct net_device * dev)1120 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1121 {
1122 	struct ip_tunnel *tunnel = netdev_priv(dev);
1123 
1124 	return READ_ONCE(tunnel->net);
1125 }
1126 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1127 
ip_tunnel_get_iflink(const struct net_device * dev)1128 int ip_tunnel_get_iflink(const struct net_device *dev)
1129 {
1130 	const struct ip_tunnel *tunnel = netdev_priv(dev);
1131 
1132 	return READ_ONCE(tunnel->parms.link);
1133 }
1134 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1135 
ip_tunnel_init_net(struct net * net,unsigned int ip_tnl_net_id,struct rtnl_link_ops * ops,char * devname)1136 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1137 				  struct rtnl_link_ops *ops, char *devname)
1138 {
1139 	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1140 	struct ip_tunnel_parm_kern parms;
1141 	unsigned int i;
1142 
1143 	itn->rtnl_link_ops = ops;
1144 	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1145 		INIT_HLIST_HEAD(&itn->tunnels[i]);
1146 
1147 	if (!ops || !net_has_fallback_tunnels(net)) {
1148 		struct ip_tunnel_net *it_init_net;
1149 
1150 		it_init_net = net_generic(&init_net, ip_tnl_net_id);
1151 		itn->type = it_init_net->type;
1152 		itn->fb_tunnel_dev = NULL;
1153 		return 0;
1154 	}
1155 
1156 	memset(&parms, 0, sizeof(parms));
1157 	if (devname)
1158 		strscpy(parms.name, devname, IFNAMSIZ);
1159 
1160 	rtnl_lock();
1161 	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1162 	/* FB netdevice is special: we have one, and only one per netns.
1163 	 * Allowing to move it to another netns is clearly unsafe.
1164 	 */
1165 	if (!IS_ERR(itn->fb_tunnel_dev)) {
1166 		itn->fb_tunnel_dev->netns_immutable = true;
1167 		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1168 		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1169 		itn->type = itn->fb_tunnel_dev->type;
1170 	}
1171 	rtnl_unlock();
1172 
1173 	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1174 }
1175 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1176 
ip_tunnel_delete_net(struct net * net,unsigned int id,struct rtnl_link_ops * ops,struct list_head * head)1177 void ip_tunnel_delete_net(struct net *net, unsigned int id,
1178 			  struct rtnl_link_ops *ops,
1179 			  struct list_head *head)
1180 {
1181 	struct ip_tunnel_net *itn = net_generic(net, id);
1182 	struct net_device *dev, *aux;
1183 	int h;
1184 
1185 	ASSERT_RTNL_NET(net);
1186 
1187 	for_each_netdev_safe(net, dev, aux)
1188 		if (dev->rtnl_link_ops == ops)
1189 			unregister_netdevice_queue(dev, head);
1190 
1191 	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1192 		struct ip_tunnel *t;
1193 		struct hlist_node *n;
1194 		struct hlist_head *thead = &itn->tunnels[h];
1195 
1196 		hlist_for_each_entry_safe(t, n, thead, hash_node)
1197 			/* If dev is in the same netns, it has already
1198 			 * been added to the list by the previous loop.
1199 			 */
1200 			if (!net_eq(dev_net(t->dev), net))
1201 				unregister_netdevice_queue(t->dev, head);
1202 	}
1203 }
1204 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1205 
ip_tunnel_newlink(struct net * net,struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm_kern * p,__u32 fwmark)1206 int ip_tunnel_newlink(struct net *net, struct net_device *dev,
1207 		      struct nlattr *tb[], struct ip_tunnel_parm_kern *p,
1208 		      __u32 fwmark)
1209 {
1210 	struct ip_tunnel *nt;
1211 	struct ip_tunnel_net *itn;
1212 	int mtu;
1213 	int err;
1214 
1215 	nt = netdev_priv(dev);
1216 	itn = net_generic(net, nt->ip_tnl_net_id);
1217 
1218 	if (nt->collect_md) {
1219 		if (rtnl_dereference(itn->collect_md_tun))
1220 			return -EEXIST;
1221 	} else {
1222 		if (ip_tunnel_find(itn, p, dev->type))
1223 			return -EEXIST;
1224 	}
1225 
1226 	nt->net = net;
1227 	nt->parms = *p;
1228 	nt->fwmark = fwmark;
1229 	err = register_netdevice(dev);
1230 	if (err)
1231 		goto err_register_netdevice;
1232 
1233 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1234 		eth_hw_addr_random(dev);
1235 
1236 	mtu = ip_tunnel_bind_dev(dev);
1237 	if (tb[IFLA_MTU]) {
1238 		unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1239 
1240 		if (dev->type == ARPHRD_ETHER)
1241 			max -= dev->hard_header_len;
1242 
1243 		mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1244 	}
1245 
1246 	err = dev_set_mtu(dev, mtu);
1247 	if (err)
1248 		goto err_dev_set_mtu;
1249 
1250 	ip_tunnel_add(itn, nt);
1251 	return 0;
1252 
1253 err_dev_set_mtu:
1254 	unregister_netdevice(dev);
1255 err_register_netdevice:
1256 	return err;
1257 }
1258 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1259 
ip_tunnel_changelink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm_kern * p,__u32 fwmark)1260 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1261 			 struct ip_tunnel_parm_kern *p, __u32 fwmark)
1262 {
1263 	struct ip_tunnel *t;
1264 	struct ip_tunnel *tunnel = netdev_priv(dev);
1265 	struct net *net = tunnel->net;
1266 	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1267 
1268 	if (dev == itn->fb_tunnel_dev)
1269 		return -EINVAL;
1270 
1271 	t = ip_tunnel_find(itn, p, dev->type);
1272 
1273 	if (t) {
1274 		if (t->dev != dev)
1275 			return -EEXIST;
1276 	} else {
1277 		t = tunnel;
1278 
1279 		if (dev->type != ARPHRD_ETHER) {
1280 			unsigned int nflags = 0;
1281 
1282 			if (ipv4_is_multicast(p->iph.daddr))
1283 				nflags = IFF_BROADCAST;
1284 			else if (p->iph.daddr)
1285 				nflags = IFF_POINTOPOINT;
1286 
1287 			if ((dev->flags ^ nflags) &
1288 			    (IFF_POINTOPOINT | IFF_BROADCAST))
1289 				return -EINVAL;
1290 		}
1291 	}
1292 
1293 	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1294 	return 0;
1295 }
1296 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1297 
ip_tunnel_init(struct net_device * dev)1298 int ip_tunnel_init(struct net_device *dev)
1299 {
1300 	struct ip_tunnel *tunnel = netdev_priv(dev);
1301 	struct iphdr *iph = &tunnel->parms.iph;
1302 	int err;
1303 
1304 	dev->needs_free_netdev = true;
1305 	dev->priv_destructor = ip_tunnel_dev_free;
1306 	dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
1307 
1308 	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1309 	if (err)
1310 		return err;
1311 
1312 	err = gro_cells_init(&tunnel->gro_cells, dev);
1313 	if (err) {
1314 		dst_cache_destroy(&tunnel->dst_cache);
1315 		return err;
1316 	}
1317 
1318 	tunnel->dev = dev;
1319 	strscpy(tunnel->parms.name, dev->name);
1320 	iph->version		= 4;
1321 	iph->ihl		= 5;
1322 
1323 	if (tunnel->collect_md)
1324 		netif_keep_dst(dev);
1325 	netdev_lockdep_set_classes(dev);
1326 	return 0;
1327 }
1328 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1329 
ip_tunnel_uninit(struct net_device * dev)1330 void ip_tunnel_uninit(struct net_device *dev)
1331 {
1332 	struct ip_tunnel *tunnel = netdev_priv(dev);
1333 	struct net *net = tunnel->net;
1334 	struct ip_tunnel_net *itn;
1335 
1336 	itn = net_generic(net, tunnel->ip_tnl_net_id);
1337 	ip_tunnel_del(itn, netdev_priv(dev));
1338 	if (itn->fb_tunnel_dev == dev)
1339 		WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1340 
1341 	dst_cache_reset(&tunnel->dst_cache);
1342 }
1343 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1344 
1345 /* Do least required initialization, rest of init is done in tunnel_init call */
ip_tunnel_setup(struct net_device * dev,unsigned int net_id)1346 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1347 {
1348 	struct ip_tunnel *tunnel = netdev_priv(dev);
1349 	tunnel->ip_tnl_net_id = net_id;
1350 }
1351 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1352 
1353 MODULE_DESCRIPTION("IPv4 tunnel implementation library");
1354 MODULE_LICENSE("GPL");
1355