xref: /linux/net/ipv4/ip_tunnel.c (revision c7170e7672e52cf38f5979416d20b9133a10726e)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Nicira, Inc.
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/udp.h>
45 #include <net/dst_metadata.h>
46 
47 #if IS_ENABLED(CONFIG_IPV6)
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52 
53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54 {
55 	return hash_32((__force u32)key ^ (__force u32)remote,
56 			 IP_TNL_HASH_BITS);
57 }
58 
59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60 				__be16 flags, __be32 key)
61 {
62 	if (p->i_flags & TUNNEL_KEY) {
63 		if (flags & TUNNEL_KEY)
64 			return key == p->i_key;
65 		else
66 			/* key expected, none present */
67 			return false;
68 	} else
69 		return !(flags & TUNNEL_KEY);
70 }
71 
72 /* Fallback tunnel: no source, no destination, no key, no options
73 
74    Tunnel hash table:
75    We require exact key match i.e. if a key is present in packet
76    it will match only tunnel with the same key; if it is not present,
77    it will match only keyless tunnel.
78 
79    All keysless packets, if not matched configured keyless tunnels
80    will match fallback tunnel.
81    Given src, dst and key, find appropriate for input tunnel.
82 */
83 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84 				   int link, __be16 flags,
85 				   __be32 remote, __be32 local,
86 				   __be32 key)
87 {
88 	struct ip_tunnel *t, *cand = NULL;
89 	struct hlist_head *head;
90 	struct net_device *ndev;
91 	unsigned int hash;
92 
93 	hash = ip_tunnel_hash(key, remote);
94 	head = &itn->tunnels[hash];
95 
96 	hlist_for_each_entry_rcu(t, head, hash_node) {
97 		if (local != t->parms.iph.saddr ||
98 		    remote != t->parms.iph.daddr ||
99 		    !(t->dev->flags & IFF_UP))
100 			continue;
101 
102 		if (!ip_tunnel_key_match(&t->parms, flags, key))
103 			continue;
104 
105 		if (READ_ONCE(t->parms.link) == link)
106 			return t;
107 		cand = t;
108 	}
109 
110 	hlist_for_each_entry_rcu(t, head, hash_node) {
111 		if (remote != t->parms.iph.daddr ||
112 		    t->parms.iph.saddr != 0 ||
113 		    !(t->dev->flags & IFF_UP))
114 			continue;
115 
116 		if (!ip_tunnel_key_match(&t->parms, flags, key))
117 			continue;
118 
119 		if (READ_ONCE(t->parms.link) == link)
120 			return t;
121 		if (!cand)
122 			cand = t;
123 	}
124 
125 	hash = ip_tunnel_hash(key, 0);
126 	head = &itn->tunnels[hash];
127 
128 	hlist_for_each_entry_rcu(t, head, hash_node) {
129 		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
130 		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
131 			continue;
132 
133 		if (!(t->dev->flags & IFF_UP))
134 			continue;
135 
136 		if (!ip_tunnel_key_match(&t->parms, flags, key))
137 			continue;
138 
139 		if (READ_ONCE(t->parms.link) == link)
140 			return t;
141 		if (!cand)
142 			cand = t;
143 	}
144 
145 	hlist_for_each_entry_rcu(t, head, hash_node) {
146 		if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
147 		    t->parms.iph.saddr != 0 ||
148 		    t->parms.iph.daddr != 0 ||
149 		    !(t->dev->flags & IFF_UP))
150 			continue;
151 
152 		if (READ_ONCE(t->parms.link) == link)
153 			return t;
154 		if (!cand)
155 			cand = t;
156 	}
157 
158 	if (cand)
159 		return cand;
160 
161 	t = rcu_dereference(itn->collect_md_tun);
162 	if (t && t->dev->flags & IFF_UP)
163 		return t;
164 
165 	ndev = READ_ONCE(itn->fb_tunnel_dev);
166 	if (ndev && ndev->flags & IFF_UP)
167 		return netdev_priv(ndev);
168 
169 	return NULL;
170 }
171 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
172 
173 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
174 				    struct ip_tunnel_parm *parms)
175 {
176 	unsigned int h;
177 	__be32 remote;
178 	__be32 i_key = parms->i_key;
179 
180 	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
181 		remote = parms->iph.daddr;
182 	else
183 		remote = 0;
184 
185 	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
186 		i_key = 0;
187 
188 	h = ip_tunnel_hash(i_key, remote);
189 	return &itn->tunnels[h];
190 }
191 
192 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
193 {
194 	struct hlist_head *head = ip_bucket(itn, &t->parms);
195 
196 	if (t->collect_md)
197 		rcu_assign_pointer(itn->collect_md_tun, t);
198 	hlist_add_head_rcu(&t->hash_node, head);
199 }
200 
201 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
202 {
203 	if (t->collect_md)
204 		rcu_assign_pointer(itn->collect_md_tun, NULL);
205 	hlist_del_init_rcu(&t->hash_node);
206 }
207 
208 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
209 					struct ip_tunnel_parm *parms,
210 					int type)
211 {
212 	__be32 remote = parms->iph.daddr;
213 	__be32 local = parms->iph.saddr;
214 	__be32 key = parms->i_key;
215 	__be16 flags = parms->i_flags;
216 	int link = parms->link;
217 	struct ip_tunnel *t = NULL;
218 	struct hlist_head *head = ip_bucket(itn, parms);
219 
220 	hlist_for_each_entry_rcu(t, head, hash_node) {
221 		if (local == t->parms.iph.saddr &&
222 		    remote == t->parms.iph.daddr &&
223 		    link == READ_ONCE(t->parms.link) &&
224 		    type == t->dev->type &&
225 		    ip_tunnel_key_match(&t->parms, flags, key))
226 			break;
227 	}
228 	return t;
229 }
230 
231 static struct net_device *__ip_tunnel_create(struct net *net,
232 					     const struct rtnl_link_ops *ops,
233 					     struct ip_tunnel_parm *parms)
234 {
235 	int err;
236 	struct ip_tunnel *tunnel;
237 	struct net_device *dev;
238 	char name[IFNAMSIZ];
239 
240 	err = -E2BIG;
241 	if (parms->name[0]) {
242 		if (!dev_valid_name(parms->name))
243 			goto failed;
244 		strscpy(name, parms->name, IFNAMSIZ);
245 	} else {
246 		if (strlen(ops->kind) > (IFNAMSIZ - 3))
247 			goto failed;
248 		strcpy(name, ops->kind);
249 		strcat(name, "%d");
250 	}
251 
252 	ASSERT_RTNL();
253 	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
254 	if (!dev) {
255 		err = -ENOMEM;
256 		goto failed;
257 	}
258 	dev_net_set(dev, net);
259 
260 	dev->rtnl_link_ops = ops;
261 
262 	tunnel = netdev_priv(dev);
263 	tunnel->parms = *parms;
264 	tunnel->net = net;
265 
266 	err = register_netdevice(dev);
267 	if (err)
268 		goto failed_free;
269 
270 	return dev;
271 
272 failed_free:
273 	free_netdev(dev);
274 failed:
275 	return ERR_PTR(err);
276 }
277 
278 static int ip_tunnel_bind_dev(struct net_device *dev)
279 {
280 	struct net_device *tdev = NULL;
281 	struct ip_tunnel *tunnel = netdev_priv(dev);
282 	const struct iphdr *iph;
283 	int hlen = LL_MAX_HEADER;
284 	int mtu = ETH_DATA_LEN;
285 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
286 
287 	iph = &tunnel->parms.iph;
288 
289 	/* Guess output device to choose reasonable mtu and needed_headroom */
290 	if (iph->daddr) {
291 		struct flowi4 fl4;
292 		struct rtable *rt;
293 
294 		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
295 				    iph->saddr, tunnel->parms.o_key,
296 				    RT_TOS(iph->tos), dev_net(dev),
297 				    tunnel->parms.link, tunnel->fwmark, 0, 0);
298 		rt = ip_route_output_key(tunnel->net, &fl4);
299 
300 		if (!IS_ERR(rt)) {
301 			tdev = rt->dst.dev;
302 			ip_rt_put(rt);
303 		}
304 		if (dev->type != ARPHRD_ETHER)
305 			dev->flags |= IFF_POINTOPOINT;
306 
307 		dst_cache_reset(&tunnel->dst_cache);
308 	}
309 
310 	if (!tdev && tunnel->parms.link)
311 		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
312 
313 	if (tdev) {
314 		hlen = tdev->hard_header_len + tdev->needed_headroom;
315 		mtu = min(tdev->mtu, IP_MAX_MTU);
316 	}
317 
318 	dev->needed_headroom = t_hlen + hlen;
319 	mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
320 
321 	if (mtu < IPV4_MIN_MTU)
322 		mtu = IPV4_MIN_MTU;
323 
324 	return mtu;
325 }
326 
327 static struct ip_tunnel *ip_tunnel_create(struct net *net,
328 					  struct ip_tunnel_net *itn,
329 					  struct ip_tunnel_parm *parms)
330 {
331 	struct ip_tunnel *nt;
332 	struct net_device *dev;
333 	int t_hlen;
334 	int mtu;
335 	int err;
336 
337 	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
338 	if (IS_ERR(dev))
339 		return ERR_CAST(dev);
340 
341 	mtu = ip_tunnel_bind_dev(dev);
342 	err = dev_set_mtu(dev, mtu);
343 	if (err)
344 		goto err_dev_set_mtu;
345 
346 	nt = netdev_priv(dev);
347 	t_hlen = nt->hlen + sizeof(struct iphdr);
348 	dev->min_mtu = ETH_MIN_MTU;
349 	dev->max_mtu = IP_MAX_MTU - t_hlen;
350 	if (dev->type == ARPHRD_ETHER)
351 		dev->max_mtu -= dev->hard_header_len;
352 
353 	ip_tunnel_add(itn, nt);
354 	return nt;
355 
356 err_dev_set_mtu:
357 	unregister_netdevice(dev);
358 	return ERR_PTR(err);
359 }
360 
361 void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info)
362 {
363 	const struct iphdr *iph = ip_hdr(skb);
364 	const struct udphdr *udph;
365 
366 	if (iph->protocol != IPPROTO_UDP)
367 		return;
368 
369 	udph = (struct udphdr *)((__u8 *)iph + (iph->ihl << 2));
370 	info->encap.sport = udph->source;
371 	info->encap.dport = udph->dest;
372 }
373 EXPORT_SYMBOL(ip_tunnel_md_udp_encap);
374 
375 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
376 		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
377 		  bool log_ecn_error)
378 {
379 	const struct iphdr *iph = ip_hdr(skb);
380 	int err;
381 
382 #ifdef CONFIG_NET_IPGRE_BROADCAST
383 	if (ipv4_is_multicast(iph->daddr)) {
384 		DEV_STATS_INC(tunnel->dev, multicast);
385 		skb->pkt_type = PACKET_BROADCAST;
386 	}
387 #endif
388 
389 	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
390 	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
391 		DEV_STATS_INC(tunnel->dev, rx_crc_errors);
392 		DEV_STATS_INC(tunnel->dev, rx_errors);
393 		goto drop;
394 	}
395 
396 	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
397 		if (!(tpi->flags&TUNNEL_SEQ) ||
398 		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
399 			DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
400 			DEV_STATS_INC(tunnel->dev, rx_errors);
401 			goto drop;
402 		}
403 		tunnel->i_seqno = ntohl(tpi->seq) + 1;
404 	}
405 
406 	skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
407 
408 	err = IP_ECN_decapsulate(iph, skb);
409 	if (unlikely(err)) {
410 		if (log_ecn_error)
411 			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
412 					&iph->saddr, iph->tos);
413 		if (err > 1) {
414 			DEV_STATS_INC(tunnel->dev, rx_frame_errors);
415 			DEV_STATS_INC(tunnel->dev, rx_errors);
416 			goto drop;
417 		}
418 	}
419 
420 	dev_sw_netstats_rx_add(tunnel->dev, skb->len);
421 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
422 
423 	if (tunnel->dev->type == ARPHRD_ETHER) {
424 		skb->protocol = eth_type_trans(skb, tunnel->dev);
425 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
426 	} else {
427 		skb->dev = tunnel->dev;
428 	}
429 
430 	if (tun_dst)
431 		skb_dst_set(skb, (struct dst_entry *)tun_dst);
432 
433 	gro_cells_receive(&tunnel->gro_cells, skb);
434 	return 0;
435 
436 drop:
437 	if (tun_dst)
438 		dst_release((struct dst_entry *)tun_dst);
439 	kfree_skb(skb);
440 	return 0;
441 }
442 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
443 
444 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
445 			    unsigned int num)
446 {
447 	if (num >= MAX_IPTUN_ENCAP_OPS)
448 		return -ERANGE;
449 
450 	return !cmpxchg((const struct ip_tunnel_encap_ops **)
451 			&iptun_encaps[num],
452 			NULL, ops) ? 0 : -1;
453 }
454 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
455 
456 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
457 			    unsigned int num)
458 {
459 	int ret;
460 
461 	if (num >= MAX_IPTUN_ENCAP_OPS)
462 		return -ERANGE;
463 
464 	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
465 		       &iptun_encaps[num],
466 		       ops, NULL) == ops) ? 0 : -1;
467 
468 	synchronize_net();
469 
470 	return ret;
471 }
472 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
473 
474 int ip_tunnel_encap_setup(struct ip_tunnel *t,
475 			  struct ip_tunnel_encap *ipencap)
476 {
477 	int hlen;
478 
479 	memset(&t->encap, 0, sizeof(t->encap));
480 
481 	hlen = ip_encap_hlen(ipencap);
482 	if (hlen < 0)
483 		return hlen;
484 
485 	t->encap.type = ipencap->type;
486 	t->encap.sport = ipencap->sport;
487 	t->encap.dport = ipencap->dport;
488 	t->encap.flags = ipencap->flags;
489 
490 	t->encap_hlen = hlen;
491 	t->hlen = t->encap_hlen + t->tun_hlen;
492 
493 	return 0;
494 }
495 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
496 
497 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
498 			    struct rtable *rt, __be16 df,
499 			    const struct iphdr *inner_iph,
500 			    int tunnel_hlen, __be32 dst, bool md)
501 {
502 	struct ip_tunnel *tunnel = netdev_priv(dev);
503 	int pkt_size;
504 	int mtu;
505 
506 	tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
507 	pkt_size = skb->len - tunnel_hlen;
508 	pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
509 
510 	if (df) {
511 		mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
512 		mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
513 	} else {
514 		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
515 	}
516 
517 	if (skb_valid_dst(skb))
518 		skb_dst_update_pmtu_no_confirm(skb, mtu);
519 
520 	if (skb->protocol == htons(ETH_P_IP)) {
521 		if (!skb_is_gso(skb) &&
522 		    (inner_iph->frag_off & htons(IP_DF)) &&
523 		    mtu < pkt_size) {
524 			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
525 			return -E2BIG;
526 		}
527 	}
528 #if IS_ENABLED(CONFIG_IPV6)
529 	else if (skb->protocol == htons(ETH_P_IPV6)) {
530 		struct rt6_info *rt6;
531 		__be32 daddr;
532 
533 		rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
534 					   NULL;
535 		daddr = md ? dst : tunnel->parms.iph.daddr;
536 
537 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
538 			   mtu >= IPV6_MIN_MTU) {
539 			if ((daddr && !ipv4_is_multicast(daddr)) ||
540 			    rt6->rt6i_dst.plen == 128) {
541 				rt6->rt6i_flags |= RTF_MODIFIED;
542 				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
543 			}
544 		}
545 
546 		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
547 					mtu < pkt_size) {
548 			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
549 			return -E2BIG;
550 		}
551 	}
552 #endif
553 	return 0;
554 }
555 
556 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
557 		       u8 proto, int tunnel_hlen)
558 {
559 	struct ip_tunnel *tunnel = netdev_priv(dev);
560 	u32 headroom = sizeof(struct iphdr);
561 	struct ip_tunnel_info *tun_info;
562 	const struct ip_tunnel_key *key;
563 	const struct iphdr *inner_iph;
564 	struct rtable *rt = NULL;
565 	struct flowi4 fl4;
566 	__be16 df = 0;
567 	u8 tos, ttl;
568 	bool use_cache;
569 
570 	tun_info = skb_tunnel_info(skb);
571 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
572 		     ip_tunnel_info_af(tun_info) != AF_INET))
573 		goto tx_error;
574 	key = &tun_info->key;
575 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
576 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
577 	tos = key->tos;
578 	if (tos == 1) {
579 		if (skb->protocol == htons(ETH_P_IP))
580 			tos = inner_iph->tos;
581 		else if (skb->protocol == htons(ETH_P_IPV6))
582 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
583 	}
584 	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
585 			    tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
586 			    dev_net(dev), 0, skb->mark, skb_get_hash(skb),
587 			    key->flow_flags);
588 
589 	if (!tunnel_hlen)
590 		tunnel_hlen = ip_encap_hlen(&tun_info->encap);
591 
592 	if (ip_tunnel_encap(skb, &tun_info->encap, &proto, &fl4) < 0)
593 		goto tx_error;
594 
595 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
596 	if (use_cache)
597 		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
598 	if (!rt) {
599 		rt = ip_route_output_key(tunnel->net, &fl4);
600 		if (IS_ERR(rt)) {
601 			DEV_STATS_INC(dev, tx_carrier_errors);
602 			goto tx_error;
603 		}
604 		if (use_cache)
605 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
606 					  fl4.saddr);
607 	}
608 	if (rt->dst.dev == dev) {
609 		ip_rt_put(rt);
610 		DEV_STATS_INC(dev, collisions);
611 		goto tx_error;
612 	}
613 
614 	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
615 		df = htons(IP_DF);
616 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
617 			    key->u.ipv4.dst, true)) {
618 		ip_rt_put(rt);
619 		goto tx_error;
620 	}
621 
622 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
623 	ttl = key->ttl;
624 	if (ttl == 0) {
625 		if (skb->protocol == htons(ETH_P_IP))
626 			ttl = inner_iph->ttl;
627 		else if (skb->protocol == htons(ETH_P_IPV6))
628 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
629 		else
630 			ttl = ip4_dst_hoplimit(&rt->dst);
631 	}
632 
633 	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
634 	if (headroom > READ_ONCE(dev->needed_headroom))
635 		WRITE_ONCE(dev->needed_headroom, headroom);
636 
637 	if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
638 		ip_rt_put(rt);
639 		goto tx_dropped;
640 	}
641 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
642 		      df, !net_eq(tunnel->net, dev_net(dev)));
643 	return;
644 tx_error:
645 	DEV_STATS_INC(dev, tx_errors);
646 	goto kfree;
647 tx_dropped:
648 	DEV_STATS_INC(dev, tx_dropped);
649 kfree:
650 	kfree_skb(skb);
651 }
652 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
653 
654 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
655 		    const struct iphdr *tnl_params, u8 protocol)
656 {
657 	struct ip_tunnel *tunnel = netdev_priv(dev);
658 	struct ip_tunnel_info *tun_info = NULL;
659 	const struct iphdr *inner_iph;
660 	unsigned int max_headroom;	/* The extra header space needed */
661 	struct rtable *rt = NULL;		/* Route to the other host */
662 	__be16 payload_protocol;
663 	bool use_cache = false;
664 	struct flowi4 fl4;
665 	bool md = false;
666 	bool connected;
667 	u8 tos, ttl;
668 	__be32 dst;
669 	__be16 df;
670 
671 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
672 	connected = (tunnel->parms.iph.daddr != 0);
673 	payload_protocol = skb_protocol(skb, true);
674 
675 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
676 
677 	dst = tnl_params->daddr;
678 	if (dst == 0) {
679 		/* NBMA tunnel */
680 
681 		if (!skb_dst(skb)) {
682 			DEV_STATS_INC(dev, tx_fifo_errors);
683 			goto tx_error;
684 		}
685 
686 		tun_info = skb_tunnel_info(skb);
687 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
688 		    ip_tunnel_info_af(tun_info) == AF_INET &&
689 		    tun_info->key.u.ipv4.dst) {
690 			dst = tun_info->key.u.ipv4.dst;
691 			md = true;
692 			connected = true;
693 		} else if (payload_protocol == htons(ETH_P_IP)) {
694 			rt = skb_rtable(skb);
695 			dst = rt_nexthop(rt, inner_iph->daddr);
696 		}
697 #if IS_ENABLED(CONFIG_IPV6)
698 		else if (payload_protocol == htons(ETH_P_IPV6)) {
699 			const struct in6_addr *addr6;
700 			struct neighbour *neigh;
701 			bool do_tx_error_icmp;
702 			int addr_type;
703 
704 			neigh = dst_neigh_lookup(skb_dst(skb),
705 						 &ipv6_hdr(skb)->daddr);
706 			if (!neigh)
707 				goto tx_error;
708 
709 			addr6 = (const struct in6_addr *)&neigh->primary_key;
710 			addr_type = ipv6_addr_type(addr6);
711 
712 			if (addr_type == IPV6_ADDR_ANY) {
713 				addr6 = &ipv6_hdr(skb)->daddr;
714 				addr_type = ipv6_addr_type(addr6);
715 			}
716 
717 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
718 				do_tx_error_icmp = true;
719 			else {
720 				do_tx_error_icmp = false;
721 				dst = addr6->s6_addr32[3];
722 			}
723 			neigh_release(neigh);
724 			if (do_tx_error_icmp)
725 				goto tx_error_icmp;
726 		}
727 #endif
728 		else
729 			goto tx_error;
730 
731 		if (!md)
732 			connected = false;
733 	}
734 
735 	tos = tnl_params->tos;
736 	if (tos & 0x1) {
737 		tos &= ~0x1;
738 		if (payload_protocol == htons(ETH_P_IP)) {
739 			tos = inner_iph->tos;
740 			connected = false;
741 		} else if (payload_protocol == htons(ETH_P_IPV6)) {
742 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
743 			connected = false;
744 		}
745 	}
746 
747 	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
748 			    tunnel->parms.o_key, RT_TOS(tos),
749 			    dev_net(dev), READ_ONCE(tunnel->parms.link),
750 			    tunnel->fwmark, skb_get_hash(skb), 0);
751 
752 	if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
753 		goto tx_error;
754 
755 	if (connected && md) {
756 		use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
757 		if (use_cache)
758 			rt = dst_cache_get_ip4(&tun_info->dst_cache,
759 					       &fl4.saddr);
760 	} else {
761 		rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
762 						&fl4.saddr) : NULL;
763 	}
764 
765 	if (!rt) {
766 		rt = ip_route_output_key(tunnel->net, &fl4);
767 
768 		if (IS_ERR(rt)) {
769 			DEV_STATS_INC(dev, tx_carrier_errors);
770 			goto tx_error;
771 		}
772 		if (use_cache)
773 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
774 					  fl4.saddr);
775 		else if (!md && connected)
776 			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
777 					  fl4.saddr);
778 	}
779 
780 	if (rt->dst.dev == dev) {
781 		ip_rt_put(rt);
782 		DEV_STATS_INC(dev, collisions);
783 		goto tx_error;
784 	}
785 
786 	df = tnl_params->frag_off;
787 	if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
788 		df |= (inner_iph->frag_off & htons(IP_DF));
789 
790 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
791 		ip_rt_put(rt);
792 		goto tx_error;
793 	}
794 
795 	if (tunnel->err_count > 0) {
796 		if (time_before(jiffies,
797 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
798 			tunnel->err_count--;
799 
800 			dst_link_failure(skb);
801 		} else
802 			tunnel->err_count = 0;
803 	}
804 
805 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
806 	ttl = tnl_params->ttl;
807 	if (ttl == 0) {
808 		if (payload_protocol == htons(ETH_P_IP))
809 			ttl = inner_iph->ttl;
810 #if IS_ENABLED(CONFIG_IPV6)
811 		else if (payload_protocol == htons(ETH_P_IPV6))
812 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
813 #endif
814 		else
815 			ttl = ip4_dst_hoplimit(&rt->dst);
816 	}
817 
818 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
819 			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
820 	if (max_headroom > READ_ONCE(dev->needed_headroom))
821 		WRITE_ONCE(dev->needed_headroom, max_headroom);
822 
823 	if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
824 		ip_rt_put(rt);
825 		DEV_STATS_INC(dev, tx_dropped);
826 		kfree_skb(skb);
827 		return;
828 	}
829 
830 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
831 		      df, !net_eq(tunnel->net, dev_net(dev)));
832 	return;
833 
834 #if IS_ENABLED(CONFIG_IPV6)
835 tx_error_icmp:
836 	dst_link_failure(skb);
837 #endif
838 tx_error:
839 	DEV_STATS_INC(dev, tx_errors);
840 	kfree_skb(skb);
841 }
842 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
843 
844 static void ip_tunnel_update(struct ip_tunnel_net *itn,
845 			     struct ip_tunnel *t,
846 			     struct net_device *dev,
847 			     struct ip_tunnel_parm *p,
848 			     bool set_mtu,
849 			     __u32 fwmark)
850 {
851 	ip_tunnel_del(itn, t);
852 	t->parms.iph.saddr = p->iph.saddr;
853 	t->parms.iph.daddr = p->iph.daddr;
854 	t->parms.i_key = p->i_key;
855 	t->parms.o_key = p->o_key;
856 	if (dev->type != ARPHRD_ETHER) {
857 		__dev_addr_set(dev, &p->iph.saddr, 4);
858 		memcpy(dev->broadcast, &p->iph.daddr, 4);
859 	}
860 	ip_tunnel_add(itn, t);
861 
862 	t->parms.iph.ttl = p->iph.ttl;
863 	t->parms.iph.tos = p->iph.tos;
864 	t->parms.iph.frag_off = p->iph.frag_off;
865 
866 	if (t->parms.link != p->link || t->fwmark != fwmark) {
867 		int mtu;
868 
869 		WRITE_ONCE(t->parms.link, p->link);
870 		t->fwmark = fwmark;
871 		mtu = ip_tunnel_bind_dev(dev);
872 		if (set_mtu)
873 			dev->mtu = mtu;
874 	}
875 	dst_cache_reset(&t->dst_cache);
876 	netdev_state_change(dev);
877 }
878 
879 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
880 {
881 	int err = 0;
882 	struct ip_tunnel *t = netdev_priv(dev);
883 	struct net *net = t->net;
884 	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
885 
886 	switch (cmd) {
887 	case SIOCGETTUNNEL:
888 		if (dev == itn->fb_tunnel_dev) {
889 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
890 			if (!t)
891 				t = netdev_priv(dev);
892 		}
893 		memcpy(p, &t->parms, sizeof(*p));
894 		break;
895 
896 	case SIOCADDTUNNEL:
897 	case SIOCCHGTUNNEL:
898 		err = -EPERM;
899 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
900 			goto done;
901 		if (p->iph.ttl)
902 			p->iph.frag_off |= htons(IP_DF);
903 		if (!(p->i_flags & VTI_ISVTI)) {
904 			if (!(p->i_flags & TUNNEL_KEY))
905 				p->i_key = 0;
906 			if (!(p->o_flags & TUNNEL_KEY))
907 				p->o_key = 0;
908 		}
909 
910 		t = ip_tunnel_find(itn, p, itn->type);
911 
912 		if (cmd == SIOCADDTUNNEL) {
913 			if (!t) {
914 				t = ip_tunnel_create(net, itn, p);
915 				err = PTR_ERR_OR_ZERO(t);
916 				break;
917 			}
918 
919 			err = -EEXIST;
920 			break;
921 		}
922 		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
923 			if (t) {
924 				if (t->dev != dev) {
925 					err = -EEXIST;
926 					break;
927 				}
928 			} else {
929 				unsigned int nflags = 0;
930 
931 				if (ipv4_is_multicast(p->iph.daddr))
932 					nflags = IFF_BROADCAST;
933 				else if (p->iph.daddr)
934 					nflags = IFF_POINTOPOINT;
935 
936 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
937 					err = -EINVAL;
938 					break;
939 				}
940 
941 				t = netdev_priv(dev);
942 			}
943 		}
944 
945 		if (t) {
946 			err = 0;
947 			ip_tunnel_update(itn, t, dev, p, true, 0);
948 		} else {
949 			err = -ENOENT;
950 		}
951 		break;
952 
953 	case SIOCDELTUNNEL:
954 		err = -EPERM;
955 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
956 			goto done;
957 
958 		if (dev == itn->fb_tunnel_dev) {
959 			err = -ENOENT;
960 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
961 			if (!t)
962 				goto done;
963 			err = -EPERM;
964 			if (t == netdev_priv(itn->fb_tunnel_dev))
965 				goto done;
966 			dev = t->dev;
967 		}
968 		unregister_netdevice(dev);
969 		err = 0;
970 		break;
971 
972 	default:
973 		err = -EINVAL;
974 	}
975 
976 done:
977 	return err;
978 }
979 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
980 
981 int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
982 			     void __user *data, int cmd)
983 {
984 	struct ip_tunnel_parm p;
985 	int err;
986 
987 	if (copy_from_user(&p, data, sizeof(p)))
988 		return -EFAULT;
989 	err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
990 	if (!err && copy_to_user(data, &p, sizeof(p)))
991 		return -EFAULT;
992 	return err;
993 }
994 EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
995 
996 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
997 {
998 	struct ip_tunnel *tunnel = netdev_priv(dev);
999 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
1000 	int max_mtu = IP_MAX_MTU - t_hlen;
1001 
1002 	if (dev->type == ARPHRD_ETHER)
1003 		max_mtu -= dev->hard_header_len;
1004 
1005 	if (new_mtu < ETH_MIN_MTU)
1006 		return -EINVAL;
1007 
1008 	if (new_mtu > max_mtu) {
1009 		if (strict)
1010 			return -EINVAL;
1011 
1012 		new_mtu = max_mtu;
1013 	}
1014 
1015 	dev->mtu = new_mtu;
1016 	return 0;
1017 }
1018 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1019 
1020 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1021 {
1022 	return __ip_tunnel_change_mtu(dev, new_mtu, true);
1023 }
1024 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1025 
1026 static void ip_tunnel_dev_free(struct net_device *dev)
1027 {
1028 	struct ip_tunnel *tunnel = netdev_priv(dev);
1029 
1030 	gro_cells_destroy(&tunnel->gro_cells);
1031 	dst_cache_destroy(&tunnel->dst_cache);
1032 	free_percpu(dev->tstats);
1033 }
1034 
1035 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1036 {
1037 	struct ip_tunnel *tunnel = netdev_priv(dev);
1038 	struct ip_tunnel_net *itn;
1039 
1040 	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1041 
1042 	if (itn->fb_tunnel_dev != dev) {
1043 		ip_tunnel_del(itn, netdev_priv(dev));
1044 		unregister_netdevice_queue(dev, head);
1045 	}
1046 }
1047 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1048 
1049 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1050 {
1051 	struct ip_tunnel *tunnel = netdev_priv(dev);
1052 
1053 	return tunnel->net;
1054 }
1055 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1056 
1057 int ip_tunnel_get_iflink(const struct net_device *dev)
1058 {
1059 	const struct ip_tunnel *tunnel = netdev_priv(dev);
1060 
1061 	return READ_ONCE(tunnel->parms.link);
1062 }
1063 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1064 
1065 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1066 				  struct rtnl_link_ops *ops, char *devname)
1067 {
1068 	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1069 	struct ip_tunnel_parm parms;
1070 	unsigned int i;
1071 
1072 	itn->rtnl_link_ops = ops;
1073 	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1074 		INIT_HLIST_HEAD(&itn->tunnels[i]);
1075 
1076 	if (!ops || !net_has_fallback_tunnels(net)) {
1077 		struct ip_tunnel_net *it_init_net;
1078 
1079 		it_init_net = net_generic(&init_net, ip_tnl_net_id);
1080 		itn->type = it_init_net->type;
1081 		itn->fb_tunnel_dev = NULL;
1082 		return 0;
1083 	}
1084 
1085 	memset(&parms, 0, sizeof(parms));
1086 	if (devname)
1087 		strscpy(parms.name, devname, IFNAMSIZ);
1088 
1089 	rtnl_lock();
1090 	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1091 	/* FB netdevice is special: we have one, and only one per netns.
1092 	 * Allowing to move it to another netns is clearly unsafe.
1093 	 */
1094 	if (!IS_ERR(itn->fb_tunnel_dev)) {
1095 		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1096 		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1097 		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1098 		itn->type = itn->fb_tunnel_dev->type;
1099 	}
1100 	rtnl_unlock();
1101 
1102 	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1103 }
1104 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1105 
1106 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1107 			      struct list_head *head,
1108 			      struct rtnl_link_ops *ops)
1109 {
1110 	struct net_device *dev, *aux;
1111 	int h;
1112 
1113 	for_each_netdev_safe(net, dev, aux)
1114 		if (dev->rtnl_link_ops == ops)
1115 			unregister_netdevice_queue(dev, head);
1116 
1117 	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1118 		struct ip_tunnel *t;
1119 		struct hlist_node *n;
1120 		struct hlist_head *thead = &itn->tunnels[h];
1121 
1122 		hlist_for_each_entry_safe(t, n, thead, hash_node)
1123 			/* If dev is in the same netns, it has already
1124 			 * been added to the list by the previous loop.
1125 			 */
1126 			if (!net_eq(dev_net(t->dev), net))
1127 				unregister_netdevice_queue(t->dev, head);
1128 	}
1129 }
1130 
1131 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1132 			   struct rtnl_link_ops *ops,
1133 			   struct list_head *dev_to_kill)
1134 {
1135 	struct ip_tunnel_net *itn;
1136 	struct net *net;
1137 
1138 	ASSERT_RTNL();
1139 	list_for_each_entry(net, net_list, exit_list) {
1140 		itn = net_generic(net, id);
1141 		ip_tunnel_destroy(net, itn, dev_to_kill, ops);
1142 	}
1143 }
1144 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1145 
1146 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1147 		      struct ip_tunnel_parm *p, __u32 fwmark)
1148 {
1149 	struct ip_tunnel *nt;
1150 	struct net *net = dev_net(dev);
1151 	struct ip_tunnel_net *itn;
1152 	int mtu;
1153 	int err;
1154 
1155 	nt = netdev_priv(dev);
1156 	itn = net_generic(net, nt->ip_tnl_net_id);
1157 
1158 	if (nt->collect_md) {
1159 		if (rtnl_dereference(itn->collect_md_tun))
1160 			return -EEXIST;
1161 	} else {
1162 		if (ip_tunnel_find(itn, p, dev->type))
1163 			return -EEXIST;
1164 	}
1165 
1166 	nt->net = net;
1167 	nt->parms = *p;
1168 	nt->fwmark = fwmark;
1169 	err = register_netdevice(dev);
1170 	if (err)
1171 		goto err_register_netdevice;
1172 
1173 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1174 		eth_hw_addr_random(dev);
1175 
1176 	mtu = ip_tunnel_bind_dev(dev);
1177 	if (tb[IFLA_MTU]) {
1178 		unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1179 
1180 		if (dev->type == ARPHRD_ETHER)
1181 			max -= dev->hard_header_len;
1182 
1183 		mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1184 	}
1185 
1186 	err = dev_set_mtu(dev, mtu);
1187 	if (err)
1188 		goto err_dev_set_mtu;
1189 
1190 	ip_tunnel_add(itn, nt);
1191 	return 0;
1192 
1193 err_dev_set_mtu:
1194 	unregister_netdevice(dev);
1195 err_register_netdevice:
1196 	return err;
1197 }
1198 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1199 
1200 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1201 			 struct ip_tunnel_parm *p, __u32 fwmark)
1202 {
1203 	struct ip_tunnel *t;
1204 	struct ip_tunnel *tunnel = netdev_priv(dev);
1205 	struct net *net = tunnel->net;
1206 	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1207 
1208 	if (dev == itn->fb_tunnel_dev)
1209 		return -EINVAL;
1210 
1211 	t = ip_tunnel_find(itn, p, dev->type);
1212 
1213 	if (t) {
1214 		if (t->dev != dev)
1215 			return -EEXIST;
1216 	} else {
1217 		t = tunnel;
1218 
1219 		if (dev->type != ARPHRD_ETHER) {
1220 			unsigned int nflags = 0;
1221 
1222 			if (ipv4_is_multicast(p->iph.daddr))
1223 				nflags = IFF_BROADCAST;
1224 			else if (p->iph.daddr)
1225 				nflags = IFF_POINTOPOINT;
1226 
1227 			if ((dev->flags ^ nflags) &
1228 			    (IFF_POINTOPOINT | IFF_BROADCAST))
1229 				return -EINVAL;
1230 		}
1231 	}
1232 
1233 	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1234 	return 0;
1235 }
1236 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1237 
1238 int ip_tunnel_init(struct net_device *dev)
1239 {
1240 	struct ip_tunnel *tunnel = netdev_priv(dev);
1241 	struct iphdr *iph = &tunnel->parms.iph;
1242 	int err;
1243 
1244 	dev->needs_free_netdev = true;
1245 	dev->priv_destructor = ip_tunnel_dev_free;
1246 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1247 	if (!dev->tstats)
1248 		return -ENOMEM;
1249 
1250 	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1251 	if (err) {
1252 		free_percpu(dev->tstats);
1253 		return err;
1254 	}
1255 
1256 	err = gro_cells_init(&tunnel->gro_cells, dev);
1257 	if (err) {
1258 		dst_cache_destroy(&tunnel->dst_cache);
1259 		free_percpu(dev->tstats);
1260 		return err;
1261 	}
1262 
1263 	tunnel->dev = dev;
1264 	tunnel->net = dev_net(dev);
1265 	strcpy(tunnel->parms.name, dev->name);
1266 	iph->version		= 4;
1267 	iph->ihl		= 5;
1268 
1269 	if (tunnel->collect_md)
1270 		netif_keep_dst(dev);
1271 	netdev_lockdep_set_classes(dev);
1272 	return 0;
1273 }
1274 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1275 
1276 void ip_tunnel_uninit(struct net_device *dev)
1277 {
1278 	struct ip_tunnel *tunnel = netdev_priv(dev);
1279 	struct net *net = tunnel->net;
1280 	struct ip_tunnel_net *itn;
1281 
1282 	itn = net_generic(net, tunnel->ip_tnl_net_id);
1283 	ip_tunnel_del(itn, netdev_priv(dev));
1284 	if (itn->fb_tunnel_dev == dev)
1285 		WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1286 
1287 	dst_cache_reset(&tunnel->dst_cache);
1288 }
1289 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1290 
1291 /* Do least required initialization, rest of init is done in tunnel_init call */
1292 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1293 {
1294 	struct ip_tunnel *tunnel = netdev_priv(dev);
1295 	tunnel->ip_tnl_net_id = net_id;
1296 }
1297 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1298 
1299 MODULE_DESCRIPTION("IPv4 tunnel implementation library");
1300 MODULE_LICENSE("GPL");
1301