xref: /linux/net/ipv4/ip_tunnel.c (revision ae22a94997b8a03dcb3c922857c203246711f9d4)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Nicira, Inc.
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/udp.h>
45 #include <net/dst_metadata.h>
46 
47 #if IS_ENABLED(CONFIG_IPV6)
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52 
53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54 {
55 	return hash_32((__force u32)key ^ (__force u32)remote,
56 			 IP_TNL_HASH_BITS);
57 }
58 
59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60 				__be16 flags, __be32 key)
61 {
62 	if (p->i_flags & TUNNEL_KEY) {
63 		if (flags & TUNNEL_KEY)
64 			return key == p->i_key;
65 		else
66 			/* key expected, none present */
67 			return false;
68 	} else
69 		return !(flags & TUNNEL_KEY);
70 }
71 
72 /* Fallback tunnel: no source, no destination, no key, no options
73 
74    Tunnel hash table:
75    We require exact key match i.e. if a key is present in packet
76    it will match only tunnel with the same key; if it is not present,
77    it will match only keyless tunnel.
78 
79    All keysless packets, if not matched configured keyless tunnels
80    will match fallback tunnel.
81    Given src, dst and key, find appropriate for input tunnel.
82 */
83 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84 				   int link, __be16 flags,
85 				   __be32 remote, __be32 local,
86 				   __be32 key)
87 {
88 	struct ip_tunnel *t, *cand = NULL;
89 	struct hlist_head *head;
90 	struct net_device *ndev;
91 	unsigned int hash;
92 
93 	hash = ip_tunnel_hash(key, remote);
94 	head = &itn->tunnels[hash];
95 
96 	hlist_for_each_entry_rcu(t, head, hash_node) {
97 		if (local != t->parms.iph.saddr ||
98 		    remote != t->parms.iph.daddr ||
99 		    !(t->dev->flags & IFF_UP))
100 			continue;
101 
102 		if (!ip_tunnel_key_match(&t->parms, flags, key))
103 			continue;
104 
105 		if (READ_ONCE(t->parms.link) == link)
106 			return t;
107 		cand = t;
108 	}
109 
110 	hlist_for_each_entry_rcu(t, head, hash_node) {
111 		if (remote != t->parms.iph.daddr ||
112 		    t->parms.iph.saddr != 0 ||
113 		    !(t->dev->flags & IFF_UP))
114 			continue;
115 
116 		if (!ip_tunnel_key_match(&t->parms, flags, key))
117 			continue;
118 
119 		if (READ_ONCE(t->parms.link) == link)
120 			return t;
121 		if (!cand)
122 			cand = t;
123 	}
124 
125 	hash = ip_tunnel_hash(key, 0);
126 	head = &itn->tunnels[hash];
127 
128 	hlist_for_each_entry_rcu(t, head, hash_node) {
129 		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
130 		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
131 			continue;
132 
133 		if (!(t->dev->flags & IFF_UP))
134 			continue;
135 
136 		if (!ip_tunnel_key_match(&t->parms, flags, key))
137 			continue;
138 
139 		if (READ_ONCE(t->parms.link) == link)
140 			return t;
141 		if (!cand)
142 			cand = t;
143 	}
144 
145 	hlist_for_each_entry_rcu(t, head, hash_node) {
146 		if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
147 		    t->parms.iph.saddr != 0 ||
148 		    t->parms.iph.daddr != 0 ||
149 		    !(t->dev->flags & IFF_UP))
150 			continue;
151 
152 		if (READ_ONCE(t->parms.link) == link)
153 			return t;
154 		if (!cand)
155 			cand = t;
156 	}
157 
158 	if (cand)
159 		return cand;
160 
161 	t = rcu_dereference(itn->collect_md_tun);
162 	if (t && t->dev->flags & IFF_UP)
163 		return t;
164 
165 	ndev = READ_ONCE(itn->fb_tunnel_dev);
166 	if (ndev && ndev->flags & IFF_UP)
167 		return netdev_priv(ndev);
168 
169 	return NULL;
170 }
171 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
172 
173 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
174 				    struct ip_tunnel_parm *parms)
175 {
176 	unsigned int h;
177 	__be32 remote;
178 	__be32 i_key = parms->i_key;
179 
180 	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
181 		remote = parms->iph.daddr;
182 	else
183 		remote = 0;
184 
185 	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
186 		i_key = 0;
187 
188 	h = ip_tunnel_hash(i_key, remote);
189 	return &itn->tunnels[h];
190 }
191 
192 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
193 {
194 	struct hlist_head *head = ip_bucket(itn, &t->parms);
195 
196 	if (t->collect_md)
197 		rcu_assign_pointer(itn->collect_md_tun, t);
198 	hlist_add_head_rcu(&t->hash_node, head);
199 }
200 
201 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
202 {
203 	if (t->collect_md)
204 		rcu_assign_pointer(itn->collect_md_tun, NULL);
205 	hlist_del_init_rcu(&t->hash_node);
206 }
207 
208 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
209 					struct ip_tunnel_parm *parms,
210 					int type)
211 {
212 	__be32 remote = parms->iph.daddr;
213 	__be32 local = parms->iph.saddr;
214 	__be32 key = parms->i_key;
215 	__be16 flags = parms->i_flags;
216 	int link = parms->link;
217 	struct ip_tunnel *t = NULL;
218 	struct hlist_head *head = ip_bucket(itn, parms);
219 
220 	hlist_for_each_entry_rcu(t, head, hash_node) {
221 		if (local == t->parms.iph.saddr &&
222 		    remote == t->parms.iph.daddr &&
223 		    link == READ_ONCE(t->parms.link) &&
224 		    type == t->dev->type &&
225 		    ip_tunnel_key_match(&t->parms, flags, key))
226 			break;
227 	}
228 	return t;
229 }
230 
231 static struct net_device *__ip_tunnel_create(struct net *net,
232 					     const struct rtnl_link_ops *ops,
233 					     struct ip_tunnel_parm *parms)
234 {
235 	int err;
236 	struct ip_tunnel *tunnel;
237 	struct net_device *dev;
238 	char name[IFNAMSIZ];
239 
240 	err = -E2BIG;
241 	if (parms->name[0]) {
242 		if (!dev_valid_name(parms->name))
243 			goto failed;
244 		strscpy(name, parms->name, IFNAMSIZ);
245 	} else {
246 		if (strlen(ops->kind) > (IFNAMSIZ - 3))
247 			goto failed;
248 		strcpy(name, ops->kind);
249 		strcat(name, "%d");
250 	}
251 
252 	ASSERT_RTNL();
253 	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
254 	if (!dev) {
255 		err = -ENOMEM;
256 		goto failed;
257 	}
258 	dev_net_set(dev, net);
259 
260 	dev->rtnl_link_ops = ops;
261 
262 	tunnel = netdev_priv(dev);
263 	tunnel->parms = *parms;
264 	tunnel->net = net;
265 
266 	err = register_netdevice(dev);
267 	if (err)
268 		goto failed_free;
269 
270 	return dev;
271 
272 failed_free:
273 	free_netdev(dev);
274 failed:
275 	return ERR_PTR(err);
276 }
277 
278 static int ip_tunnel_bind_dev(struct net_device *dev)
279 {
280 	struct net_device *tdev = NULL;
281 	struct ip_tunnel *tunnel = netdev_priv(dev);
282 	const struct iphdr *iph;
283 	int hlen = LL_MAX_HEADER;
284 	int mtu = ETH_DATA_LEN;
285 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
286 
287 	iph = &tunnel->parms.iph;
288 
289 	/* Guess output device to choose reasonable mtu and needed_headroom */
290 	if (iph->daddr) {
291 		struct flowi4 fl4;
292 		struct rtable *rt;
293 
294 		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
295 				    iph->saddr, tunnel->parms.o_key,
296 				    RT_TOS(iph->tos), dev_net(dev),
297 				    tunnel->parms.link, tunnel->fwmark, 0, 0);
298 		rt = ip_route_output_key(tunnel->net, &fl4);
299 
300 		if (!IS_ERR(rt)) {
301 			tdev = rt->dst.dev;
302 			ip_rt_put(rt);
303 		}
304 		if (dev->type != ARPHRD_ETHER)
305 			dev->flags |= IFF_POINTOPOINT;
306 
307 		dst_cache_reset(&tunnel->dst_cache);
308 	}
309 
310 	if (!tdev && tunnel->parms.link)
311 		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
312 
313 	if (tdev) {
314 		hlen = tdev->hard_header_len + tdev->needed_headroom;
315 		mtu = min(tdev->mtu, IP_MAX_MTU);
316 	}
317 
318 	dev->needed_headroom = t_hlen + hlen;
319 	mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
320 
321 	if (mtu < IPV4_MIN_MTU)
322 		mtu = IPV4_MIN_MTU;
323 
324 	return mtu;
325 }
326 
327 static struct ip_tunnel *ip_tunnel_create(struct net *net,
328 					  struct ip_tunnel_net *itn,
329 					  struct ip_tunnel_parm *parms)
330 {
331 	struct ip_tunnel *nt;
332 	struct net_device *dev;
333 	int t_hlen;
334 	int mtu;
335 	int err;
336 
337 	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
338 	if (IS_ERR(dev))
339 		return ERR_CAST(dev);
340 
341 	mtu = ip_tunnel_bind_dev(dev);
342 	err = dev_set_mtu(dev, mtu);
343 	if (err)
344 		goto err_dev_set_mtu;
345 
346 	nt = netdev_priv(dev);
347 	t_hlen = nt->hlen + sizeof(struct iphdr);
348 	dev->min_mtu = ETH_MIN_MTU;
349 	dev->max_mtu = IP_MAX_MTU - t_hlen;
350 	if (dev->type == ARPHRD_ETHER)
351 		dev->max_mtu -= dev->hard_header_len;
352 
353 	ip_tunnel_add(itn, nt);
354 	return nt;
355 
356 err_dev_set_mtu:
357 	unregister_netdevice(dev);
358 	return ERR_PTR(err);
359 }
360 
361 void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info)
362 {
363 	const struct iphdr *iph = ip_hdr(skb);
364 	const struct udphdr *udph;
365 
366 	if (iph->protocol != IPPROTO_UDP)
367 		return;
368 
369 	udph = (struct udphdr *)((__u8 *)iph + (iph->ihl << 2));
370 	info->encap.sport = udph->source;
371 	info->encap.dport = udph->dest;
372 }
373 EXPORT_SYMBOL(ip_tunnel_md_udp_encap);
374 
375 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
376 		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
377 		  bool log_ecn_error)
378 {
379 	const struct iphdr *iph = ip_hdr(skb);
380 	int nh, err;
381 
382 #ifdef CONFIG_NET_IPGRE_BROADCAST
383 	if (ipv4_is_multicast(iph->daddr)) {
384 		DEV_STATS_INC(tunnel->dev, multicast);
385 		skb->pkt_type = PACKET_BROADCAST;
386 	}
387 #endif
388 
389 	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
390 	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
391 		DEV_STATS_INC(tunnel->dev, rx_crc_errors);
392 		DEV_STATS_INC(tunnel->dev, rx_errors);
393 		goto drop;
394 	}
395 
396 	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
397 		if (!(tpi->flags&TUNNEL_SEQ) ||
398 		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
399 			DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
400 			DEV_STATS_INC(tunnel->dev, rx_errors);
401 			goto drop;
402 		}
403 		tunnel->i_seqno = ntohl(tpi->seq) + 1;
404 	}
405 
406 	/* Save offset of outer header relative to skb->head,
407 	 * because we are going to reset the network header to the inner header
408 	 * and might change skb->head.
409 	 */
410 	nh = skb_network_header(skb) - skb->head;
411 
412 	skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
413 
414 	if (!pskb_inet_may_pull(skb)) {
415 		DEV_STATS_INC(tunnel->dev, rx_length_errors);
416 		DEV_STATS_INC(tunnel->dev, rx_errors);
417 		goto drop;
418 	}
419 	iph = (struct iphdr *)(skb->head + nh);
420 
421 	err = IP_ECN_decapsulate(iph, skb);
422 	if (unlikely(err)) {
423 		if (log_ecn_error)
424 			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
425 					&iph->saddr, iph->tos);
426 		if (err > 1) {
427 			DEV_STATS_INC(tunnel->dev, rx_frame_errors);
428 			DEV_STATS_INC(tunnel->dev, rx_errors);
429 			goto drop;
430 		}
431 	}
432 
433 	dev_sw_netstats_rx_add(tunnel->dev, skb->len);
434 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
435 
436 	if (tunnel->dev->type == ARPHRD_ETHER) {
437 		skb->protocol = eth_type_trans(skb, tunnel->dev);
438 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
439 	} else {
440 		skb->dev = tunnel->dev;
441 	}
442 
443 	if (tun_dst)
444 		skb_dst_set(skb, (struct dst_entry *)tun_dst);
445 
446 	gro_cells_receive(&tunnel->gro_cells, skb);
447 	return 0;
448 
449 drop:
450 	if (tun_dst)
451 		dst_release((struct dst_entry *)tun_dst);
452 	kfree_skb(skb);
453 	return 0;
454 }
455 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
456 
457 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
458 			    unsigned int num)
459 {
460 	if (num >= MAX_IPTUN_ENCAP_OPS)
461 		return -ERANGE;
462 
463 	return !cmpxchg((const struct ip_tunnel_encap_ops **)
464 			&iptun_encaps[num],
465 			NULL, ops) ? 0 : -1;
466 }
467 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
468 
469 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
470 			    unsigned int num)
471 {
472 	int ret;
473 
474 	if (num >= MAX_IPTUN_ENCAP_OPS)
475 		return -ERANGE;
476 
477 	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
478 		       &iptun_encaps[num],
479 		       ops, NULL) == ops) ? 0 : -1;
480 
481 	synchronize_net();
482 
483 	return ret;
484 }
485 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
486 
487 int ip_tunnel_encap_setup(struct ip_tunnel *t,
488 			  struct ip_tunnel_encap *ipencap)
489 {
490 	int hlen;
491 
492 	memset(&t->encap, 0, sizeof(t->encap));
493 
494 	hlen = ip_encap_hlen(ipencap);
495 	if (hlen < 0)
496 		return hlen;
497 
498 	t->encap.type = ipencap->type;
499 	t->encap.sport = ipencap->sport;
500 	t->encap.dport = ipencap->dport;
501 	t->encap.flags = ipencap->flags;
502 
503 	t->encap_hlen = hlen;
504 	t->hlen = t->encap_hlen + t->tun_hlen;
505 
506 	return 0;
507 }
508 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
509 
510 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
511 			    struct rtable *rt, __be16 df,
512 			    const struct iphdr *inner_iph,
513 			    int tunnel_hlen, __be32 dst, bool md)
514 {
515 	struct ip_tunnel *tunnel = netdev_priv(dev);
516 	int pkt_size;
517 	int mtu;
518 
519 	tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
520 	pkt_size = skb->len - tunnel_hlen;
521 	pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
522 
523 	if (df) {
524 		mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
525 		mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
526 	} else {
527 		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
528 	}
529 
530 	if (skb_valid_dst(skb))
531 		skb_dst_update_pmtu_no_confirm(skb, mtu);
532 
533 	if (skb->protocol == htons(ETH_P_IP)) {
534 		if (!skb_is_gso(skb) &&
535 		    (inner_iph->frag_off & htons(IP_DF)) &&
536 		    mtu < pkt_size) {
537 			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
538 			return -E2BIG;
539 		}
540 	}
541 #if IS_ENABLED(CONFIG_IPV6)
542 	else if (skb->protocol == htons(ETH_P_IPV6)) {
543 		struct rt6_info *rt6;
544 		__be32 daddr;
545 
546 		rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
547 					   NULL;
548 		daddr = md ? dst : tunnel->parms.iph.daddr;
549 
550 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
551 			   mtu >= IPV6_MIN_MTU) {
552 			if ((daddr && !ipv4_is_multicast(daddr)) ||
553 			    rt6->rt6i_dst.plen == 128) {
554 				rt6->rt6i_flags |= RTF_MODIFIED;
555 				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
556 			}
557 		}
558 
559 		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
560 					mtu < pkt_size) {
561 			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
562 			return -E2BIG;
563 		}
564 	}
565 #endif
566 	return 0;
567 }
568 
569 static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
570 {
571 	/* we must cap headroom to some upperlimit, else pskb_expand_head
572 	 * will overflow header offsets in skb_headers_offset_update().
573 	 */
574 	static const unsigned int max_allowed = 512;
575 
576 	if (headroom > max_allowed)
577 		headroom = max_allowed;
578 
579 	if (headroom > READ_ONCE(dev->needed_headroom))
580 		WRITE_ONCE(dev->needed_headroom, headroom);
581 }
582 
583 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
584 		       u8 proto, int tunnel_hlen)
585 {
586 	struct ip_tunnel *tunnel = netdev_priv(dev);
587 	u32 headroom = sizeof(struct iphdr);
588 	struct ip_tunnel_info *tun_info;
589 	const struct ip_tunnel_key *key;
590 	const struct iphdr *inner_iph;
591 	struct rtable *rt = NULL;
592 	struct flowi4 fl4;
593 	__be16 df = 0;
594 	u8 tos, ttl;
595 	bool use_cache;
596 
597 	tun_info = skb_tunnel_info(skb);
598 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
599 		     ip_tunnel_info_af(tun_info) != AF_INET))
600 		goto tx_error;
601 	key = &tun_info->key;
602 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
603 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
604 	tos = key->tos;
605 	if (tos == 1) {
606 		if (skb->protocol == htons(ETH_P_IP))
607 			tos = inner_iph->tos;
608 		else if (skb->protocol == htons(ETH_P_IPV6))
609 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
610 	}
611 	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
612 			    tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
613 			    dev_net(dev), 0, skb->mark, skb_get_hash(skb),
614 			    key->flow_flags);
615 
616 	if (!tunnel_hlen)
617 		tunnel_hlen = ip_encap_hlen(&tun_info->encap);
618 
619 	if (ip_tunnel_encap(skb, &tun_info->encap, &proto, &fl4) < 0)
620 		goto tx_error;
621 
622 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
623 	if (use_cache)
624 		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
625 	if (!rt) {
626 		rt = ip_route_output_key(tunnel->net, &fl4);
627 		if (IS_ERR(rt)) {
628 			DEV_STATS_INC(dev, tx_carrier_errors);
629 			goto tx_error;
630 		}
631 		if (use_cache)
632 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
633 					  fl4.saddr);
634 	}
635 	if (rt->dst.dev == dev) {
636 		ip_rt_put(rt);
637 		DEV_STATS_INC(dev, collisions);
638 		goto tx_error;
639 	}
640 
641 	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
642 		df = htons(IP_DF);
643 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
644 			    key->u.ipv4.dst, true)) {
645 		ip_rt_put(rt);
646 		goto tx_error;
647 	}
648 
649 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
650 	ttl = key->ttl;
651 	if (ttl == 0) {
652 		if (skb->protocol == htons(ETH_P_IP))
653 			ttl = inner_iph->ttl;
654 		else if (skb->protocol == htons(ETH_P_IPV6))
655 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
656 		else
657 			ttl = ip4_dst_hoplimit(&rt->dst);
658 	}
659 
660 	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
661 	if (skb_cow_head(skb, headroom)) {
662 		ip_rt_put(rt);
663 		goto tx_dropped;
664 	}
665 
666 	ip_tunnel_adj_headroom(dev, headroom);
667 
668 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
669 		      df, !net_eq(tunnel->net, dev_net(dev)));
670 	return;
671 tx_error:
672 	DEV_STATS_INC(dev, tx_errors);
673 	goto kfree;
674 tx_dropped:
675 	DEV_STATS_INC(dev, tx_dropped);
676 kfree:
677 	kfree_skb(skb);
678 }
679 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
680 
681 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
682 		    const struct iphdr *tnl_params, u8 protocol)
683 {
684 	struct ip_tunnel *tunnel = netdev_priv(dev);
685 	struct ip_tunnel_info *tun_info = NULL;
686 	const struct iphdr *inner_iph;
687 	unsigned int max_headroom;	/* The extra header space needed */
688 	struct rtable *rt = NULL;		/* Route to the other host */
689 	__be16 payload_protocol;
690 	bool use_cache = false;
691 	struct flowi4 fl4;
692 	bool md = false;
693 	bool connected;
694 	u8 tos, ttl;
695 	__be32 dst;
696 	__be16 df;
697 
698 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
699 	connected = (tunnel->parms.iph.daddr != 0);
700 	payload_protocol = skb_protocol(skb, true);
701 
702 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
703 
704 	dst = tnl_params->daddr;
705 	if (dst == 0) {
706 		/* NBMA tunnel */
707 
708 		if (!skb_dst(skb)) {
709 			DEV_STATS_INC(dev, tx_fifo_errors);
710 			goto tx_error;
711 		}
712 
713 		tun_info = skb_tunnel_info(skb);
714 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
715 		    ip_tunnel_info_af(tun_info) == AF_INET &&
716 		    tun_info->key.u.ipv4.dst) {
717 			dst = tun_info->key.u.ipv4.dst;
718 			md = true;
719 			connected = true;
720 		} else if (payload_protocol == htons(ETH_P_IP)) {
721 			rt = skb_rtable(skb);
722 			dst = rt_nexthop(rt, inner_iph->daddr);
723 		}
724 #if IS_ENABLED(CONFIG_IPV6)
725 		else if (payload_protocol == htons(ETH_P_IPV6)) {
726 			const struct in6_addr *addr6;
727 			struct neighbour *neigh;
728 			bool do_tx_error_icmp;
729 			int addr_type;
730 
731 			neigh = dst_neigh_lookup(skb_dst(skb),
732 						 &ipv6_hdr(skb)->daddr);
733 			if (!neigh)
734 				goto tx_error;
735 
736 			addr6 = (const struct in6_addr *)&neigh->primary_key;
737 			addr_type = ipv6_addr_type(addr6);
738 
739 			if (addr_type == IPV6_ADDR_ANY) {
740 				addr6 = &ipv6_hdr(skb)->daddr;
741 				addr_type = ipv6_addr_type(addr6);
742 			}
743 
744 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
745 				do_tx_error_icmp = true;
746 			else {
747 				do_tx_error_icmp = false;
748 				dst = addr6->s6_addr32[3];
749 			}
750 			neigh_release(neigh);
751 			if (do_tx_error_icmp)
752 				goto tx_error_icmp;
753 		}
754 #endif
755 		else
756 			goto tx_error;
757 
758 		if (!md)
759 			connected = false;
760 	}
761 
762 	tos = tnl_params->tos;
763 	if (tos & 0x1) {
764 		tos &= ~0x1;
765 		if (payload_protocol == htons(ETH_P_IP)) {
766 			tos = inner_iph->tos;
767 			connected = false;
768 		} else if (payload_protocol == htons(ETH_P_IPV6)) {
769 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
770 			connected = false;
771 		}
772 	}
773 
774 	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
775 			    tunnel->parms.o_key, RT_TOS(tos),
776 			    dev_net(dev), READ_ONCE(tunnel->parms.link),
777 			    tunnel->fwmark, skb_get_hash(skb), 0);
778 
779 	if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
780 		goto tx_error;
781 
782 	if (connected && md) {
783 		use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
784 		if (use_cache)
785 			rt = dst_cache_get_ip4(&tun_info->dst_cache,
786 					       &fl4.saddr);
787 	} else {
788 		rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
789 						&fl4.saddr) : NULL;
790 	}
791 
792 	if (!rt) {
793 		rt = ip_route_output_key(tunnel->net, &fl4);
794 
795 		if (IS_ERR(rt)) {
796 			DEV_STATS_INC(dev, tx_carrier_errors);
797 			goto tx_error;
798 		}
799 		if (use_cache)
800 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
801 					  fl4.saddr);
802 		else if (!md && connected)
803 			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
804 					  fl4.saddr);
805 	}
806 
807 	if (rt->dst.dev == dev) {
808 		ip_rt_put(rt);
809 		DEV_STATS_INC(dev, collisions);
810 		goto tx_error;
811 	}
812 
813 	df = tnl_params->frag_off;
814 	if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
815 		df |= (inner_iph->frag_off & htons(IP_DF));
816 
817 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
818 		ip_rt_put(rt);
819 		goto tx_error;
820 	}
821 
822 	if (tunnel->err_count > 0) {
823 		if (time_before(jiffies,
824 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
825 			tunnel->err_count--;
826 
827 			dst_link_failure(skb);
828 		} else
829 			tunnel->err_count = 0;
830 	}
831 
832 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
833 	ttl = tnl_params->ttl;
834 	if (ttl == 0) {
835 		if (payload_protocol == htons(ETH_P_IP))
836 			ttl = inner_iph->ttl;
837 #if IS_ENABLED(CONFIG_IPV6)
838 		else if (payload_protocol == htons(ETH_P_IPV6))
839 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
840 #endif
841 		else
842 			ttl = ip4_dst_hoplimit(&rt->dst);
843 	}
844 
845 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
846 			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
847 
848 	if (skb_cow_head(skb, max_headroom)) {
849 		ip_rt_put(rt);
850 		DEV_STATS_INC(dev, tx_dropped);
851 		kfree_skb(skb);
852 		return;
853 	}
854 
855 	ip_tunnel_adj_headroom(dev, max_headroom);
856 
857 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
858 		      df, !net_eq(tunnel->net, dev_net(dev)));
859 	return;
860 
861 #if IS_ENABLED(CONFIG_IPV6)
862 tx_error_icmp:
863 	dst_link_failure(skb);
864 #endif
865 tx_error:
866 	DEV_STATS_INC(dev, tx_errors);
867 	kfree_skb(skb);
868 }
869 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
870 
871 static void ip_tunnel_update(struct ip_tunnel_net *itn,
872 			     struct ip_tunnel *t,
873 			     struct net_device *dev,
874 			     struct ip_tunnel_parm *p,
875 			     bool set_mtu,
876 			     __u32 fwmark)
877 {
878 	ip_tunnel_del(itn, t);
879 	t->parms.iph.saddr = p->iph.saddr;
880 	t->parms.iph.daddr = p->iph.daddr;
881 	t->parms.i_key = p->i_key;
882 	t->parms.o_key = p->o_key;
883 	if (dev->type != ARPHRD_ETHER) {
884 		__dev_addr_set(dev, &p->iph.saddr, 4);
885 		memcpy(dev->broadcast, &p->iph.daddr, 4);
886 	}
887 	ip_tunnel_add(itn, t);
888 
889 	t->parms.iph.ttl = p->iph.ttl;
890 	t->parms.iph.tos = p->iph.tos;
891 	t->parms.iph.frag_off = p->iph.frag_off;
892 
893 	if (t->parms.link != p->link || t->fwmark != fwmark) {
894 		int mtu;
895 
896 		WRITE_ONCE(t->parms.link, p->link);
897 		t->fwmark = fwmark;
898 		mtu = ip_tunnel_bind_dev(dev);
899 		if (set_mtu)
900 			dev->mtu = mtu;
901 	}
902 	dst_cache_reset(&t->dst_cache);
903 	netdev_state_change(dev);
904 }
905 
906 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
907 {
908 	int err = 0;
909 	struct ip_tunnel *t = netdev_priv(dev);
910 	struct net *net = t->net;
911 	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
912 
913 	switch (cmd) {
914 	case SIOCGETTUNNEL:
915 		if (dev == itn->fb_tunnel_dev) {
916 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
917 			if (!t)
918 				t = netdev_priv(dev);
919 		}
920 		memcpy(p, &t->parms, sizeof(*p));
921 		break;
922 
923 	case SIOCADDTUNNEL:
924 	case SIOCCHGTUNNEL:
925 		err = -EPERM;
926 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
927 			goto done;
928 		if (p->iph.ttl)
929 			p->iph.frag_off |= htons(IP_DF);
930 		if (!(p->i_flags & VTI_ISVTI)) {
931 			if (!(p->i_flags & TUNNEL_KEY))
932 				p->i_key = 0;
933 			if (!(p->o_flags & TUNNEL_KEY))
934 				p->o_key = 0;
935 		}
936 
937 		t = ip_tunnel_find(itn, p, itn->type);
938 
939 		if (cmd == SIOCADDTUNNEL) {
940 			if (!t) {
941 				t = ip_tunnel_create(net, itn, p);
942 				err = PTR_ERR_OR_ZERO(t);
943 				break;
944 			}
945 
946 			err = -EEXIST;
947 			break;
948 		}
949 		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
950 			if (t) {
951 				if (t->dev != dev) {
952 					err = -EEXIST;
953 					break;
954 				}
955 			} else {
956 				unsigned int nflags = 0;
957 
958 				if (ipv4_is_multicast(p->iph.daddr))
959 					nflags = IFF_BROADCAST;
960 				else if (p->iph.daddr)
961 					nflags = IFF_POINTOPOINT;
962 
963 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
964 					err = -EINVAL;
965 					break;
966 				}
967 
968 				t = netdev_priv(dev);
969 			}
970 		}
971 
972 		if (t) {
973 			err = 0;
974 			ip_tunnel_update(itn, t, dev, p, true, 0);
975 		} else {
976 			err = -ENOENT;
977 		}
978 		break;
979 
980 	case SIOCDELTUNNEL:
981 		err = -EPERM;
982 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
983 			goto done;
984 
985 		if (dev == itn->fb_tunnel_dev) {
986 			err = -ENOENT;
987 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
988 			if (!t)
989 				goto done;
990 			err = -EPERM;
991 			if (t == netdev_priv(itn->fb_tunnel_dev))
992 				goto done;
993 			dev = t->dev;
994 		}
995 		unregister_netdevice(dev);
996 		err = 0;
997 		break;
998 
999 	default:
1000 		err = -EINVAL;
1001 	}
1002 
1003 done:
1004 	return err;
1005 }
1006 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
1007 
1008 int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
1009 			     void __user *data, int cmd)
1010 {
1011 	struct ip_tunnel_parm p;
1012 	int err;
1013 
1014 	if (copy_from_user(&p, data, sizeof(p)))
1015 		return -EFAULT;
1016 	err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
1017 	if (!err && copy_to_user(data, &p, sizeof(p)))
1018 		return -EFAULT;
1019 	return err;
1020 }
1021 EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
1022 
1023 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
1024 {
1025 	struct ip_tunnel *tunnel = netdev_priv(dev);
1026 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
1027 	int max_mtu = IP_MAX_MTU - t_hlen;
1028 
1029 	if (dev->type == ARPHRD_ETHER)
1030 		max_mtu -= dev->hard_header_len;
1031 
1032 	if (new_mtu < ETH_MIN_MTU)
1033 		return -EINVAL;
1034 
1035 	if (new_mtu > max_mtu) {
1036 		if (strict)
1037 			return -EINVAL;
1038 
1039 		new_mtu = max_mtu;
1040 	}
1041 
1042 	dev->mtu = new_mtu;
1043 	return 0;
1044 }
1045 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1046 
1047 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1048 {
1049 	return __ip_tunnel_change_mtu(dev, new_mtu, true);
1050 }
1051 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1052 
1053 static void ip_tunnel_dev_free(struct net_device *dev)
1054 {
1055 	struct ip_tunnel *tunnel = netdev_priv(dev);
1056 
1057 	gro_cells_destroy(&tunnel->gro_cells);
1058 	dst_cache_destroy(&tunnel->dst_cache);
1059 	free_percpu(dev->tstats);
1060 }
1061 
1062 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1063 {
1064 	struct ip_tunnel *tunnel = netdev_priv(dev);
1065 	struct ip_tunnel_net *itn;
1066 
1067 	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1068 
1069 	if (itn->fb_tunnel_dev != dev) {
1070 		ip_tunnel_del(itn, netdev_priv(dev));
1071 		unregister_netdevice_queue(dev, head);
1072 	}
1073 }
1074 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1075 
1076 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1077 {
1078 	struct ip_tunnel *tunnel = netdev_priv(dev);
1079 
1080 	return tunnel->net;
1081 }
1082 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1083 
1084 int ip_tunnel_get_iflink(const struct net_device *dev)
1085 {
1086 	const struct ip_tunnel *tunnel = netdev_priv(dev);
1087 
1088 	return READ_ONCE(tunnel->parms.link);
1089 }
1090 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1091 
1092 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1093 				  struct rtnl_link_ops *ops, char *devname)
1094 {
1095 	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1096 	struct ip_tunnel_parm parms;
1097 	unsigned int i;
1098 
1099 	itn->rtnl_link_ops = ops;
1100 	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1101 		INIT_HLIST_HEAD(&itn->tunnels[i]);
1102 
1103 	if (!ops || !net_has_fallback_tunnels(net)) {
1104 		struct ip_tunnel_net *it_init_net;
1105 
1106 		it_init_net = net_generic(&init_net, ip_tnl_net_id);
1107 		itn->type = it_init_net->type;
1108 		itn->fb_tunnel_dev = NULL;
1109 		return 0;
1110 	}
1111 
1112 	memset(&parms, 0, sizeof(parms));
1113 	if (devname)
1114 		strscpy(parms.name, devname, IFNAMSIZ);
1115 
1116 	rtnl_lock();
1117 	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1118 	/* FB netdevice is special: we have one, and only one per netns.
1119 	 * Allowing to move it to another netns is clearly unsafe.
1120 	 */
1121 	if (!IS_ERR(itn->fb_tunnel_dev)) {
1122 		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1123 		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1124 		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1125 		itn->type = itn->fb_tunnel_dev->type;
1126 	}
1127 	rtnl_unlock();
1128 
1129 	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1130 }
1131 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1132 
1133 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1134 			      struct list_head *head,
1135 			      struct rtnl_link_ops *ops)
1136 {
1137 	struct net_device *dev, *aux;
1138 	int h;
1139 
1140 	for_each_netdev_safe(net, dev, aux)
1141 		if (dev->rtnl_link_ops == ops)
1142 			unregister_netdevice_queue(dev, head);
1143 
1144 	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1145 		struct ip_tunnel *t;
1146 		struct hlist_node *n;
1147 		struct hlist_head *thead = &itn->tunnels[h];
1148 
1149 		hlist_for_each_entry_safe(t, n, thead, hash_node)
1150 			/* If dev is in the same netns, it has already
1151 			 * been added to the list by the previous loop.
1152 			 */
1153 			if (!net_eq(dev_net(t->dev), net))
1154 				unregister_netdevice_queue(t->dev, head);
1155 	}
1156 }
1157 
1158 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1159 			   struct rtnl_link_ops *ops,
1160 			   struct list_head *dev_to_kill)
1161 {
1162 	struct ip_tunnel_net *itn;
1163 	struct net *net;
1164 
1165 	ASSERT_RTNL();
1166 	list_for_each_entry(net, net_list, exit_list) {
1167 		itn = net_generic(net, id);
1168 		ip_tunnel_destroy(net, itn, dev_to_kill, ops);
1169 	}
1170 }
1171 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1172 
1173 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1174 		      struct ip_tunnel_parm *p, __u32 fwmark)
1175 {
1176 	struct ip_tunnel *nt;
1177 	struct net *net = dev_net(dev);
1178 	struct ip_tunnel_net *itn;
1179 	int mtu;
1180 	int err;
1181 
1182 	nt = netdev_priv(dev);
1183 	itn = net_generic(net, nt->ip_tnl_net_id);
1184 
1185 	if (nt->collect_md) {
1186 		if (rtnl_dereference(itn->collect_md_tun))
1187 			return -EEXIST;
1188 	} else {
1189 		if (ip_tunnel_find(itn, p, dev->type))
1190 			return -EEXIST;
1191 	}
1192 
1193 	nt->net = net;
1194 	nt->parms = *p;
1195 	nt->fwmark = fwmark;
1196 	err = register_netdevice(dev);
1197 	if (err)
1198 		goto err_register_netdevice;
1199 
1200 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1201 		eth_hw_addr_random(dev);
1202 
1203 	mtu = ip_tunnel_bind_dev(dev);
1204 	if (tb[IFLA_MTU]) {
1205 		unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1206 
1207 		if (dev->type == ARPHRD_ETHER)
1208 			max -= dev->hard_header_len;
1209 
1210 		mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1211 	}
1212 
1213 	err = dev_set_mtu(dev, mtu);
1214 	if (err)
1215 		goto err_dev_set_mtu;
1216 
1217 	ip_tunnel_add(itn, nt);
1218 	return 0;
1219 
1220 err_dev_set_mtu:
1221 	unregister_netdevice(dev);
1222 err_register_netdevice:
1223 	return err;
1224 }
1225 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1226 
1227 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1228 			 struct ip_tunnel_parm *p, __u32 fwmark)
1229 {
1230 	struct ip_tunnel *t;
1231 	struct ip_tunnel *tunnel = netdev_priv(dev);
1232 	struct net *net = tunnel->net;
1233 	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1234 
1235 	if (dev == itn->fb_tunnel_dev)
1236 		return -EINVAL;
1237 
1238 	t = ip_tunnel_find(itn, p, dev->type);
1239 
1240 	if (t) {
1241 		if (t->dev != dev)
1242 			return -EEXIST;
1243 	} else {
1244 		t = tunnel;
1245 
1246 		if (dev->type != ARPHRD_ETHER) {
1247 			unsigned int nflags = 0;
1248 
1249 			if (ipv4_is_multicast(p->iph.daddr))
1250 				nflags = IFF_BROADCAST;
1251 			else if (p->iph.daddr)
1252 				nflags = IFF_POINTOPOINT;
1253 
1254 			if ((dev->flags ^ nflags) &
1255 			    (IFF_POINTOPOINT | IFF_BROADCAST))
1256 				return -EINVAL;
1257 		}
1258 	}
1259 
1260 	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1261 	return 0;
1262 }
1263 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1264 
1265 int ip_tunnel_init(struct net_device *dev)
1266 {
1267 	struct ip_tunnel *tunnel = netdev_priv(dev);
1268 	struct iphdr *iph = &tunnel->parms.iph;
1269 	int err;
1270 
1271 	dev->needs_free_netdev = true;
1272 	dev->priv_destructor = ip_tunnel_dev_free;
1273 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1274 	if (!dev->tstats)
1275 		return -ENOMEM;
1276 
1277 	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1278 	if (err) {
1279 		free_percpu(dev->tstats);
1280 		return err;
1281 	}
1282 
1283 	err = gro_cells_init(&tunnel->gro_cells, dev);
1284 	if (err) {
1285 		dst_cache_destroy(&tunnel->dst_cache);
1286 		free_percpu(dev->tstats);
1287 		return err;
1288 	}
1289 
1290 	tunnel->dev = dev;
1291 	tunnel->net = dev_net(dev);
1292 	strcpy(tunnel->parms.name, dev->name);
1293 	iph->version		= 4;
1294 	iph->ihl		= 5;
1295 
1296 	if (tunnel->collect_md)
1297 		netif_keep_dst(dev);
1298 	netdev_lockdep_set_classes(dev);
1299 	return 0;
1300 }
1301 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1302 
1303 void ip_tunnel_uninit(struct net_device *dev)
1304 {
1305 	struct ip_tunnel *tunnel = netdev_priv(dev);
1306 	struct net *net = tunnel->net;
1307 	struct ip_tunnel_net *itn;
1308 
1309 	itn = net_generic(net, tunnel->ip_tnl_net_id);
1310 	ip_tunnel_del(itn, netdev_priv(dev));
1311 	if (itn->fb_tunnel_dev == dev)
1312 		WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1313 
1314 	dst_cache_reset(&tunnel->dst_cache);
1315 }
1316 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1317 
1318 /* Do least required initialization, rest of init is done in tunnel_init call */
1319 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1320 {
1321 	struct ip_tunnel *tunnel = netdev_priv(dev);
1322 	tunnel->ip_tnl_net_id = net_id;
1323 }
1324 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1325 
1326 MODULE_DESCRIPTION("IPv4 tunnel implementation library");
1327 MODULE_LICENSE("GPL");
1328