xref: /linux/net/ipv4/ip_tunnel.c (revision a9f80df4f51440303d063b55bb98720857693821)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Nicira, Inc.
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/udp.h>
45 #include <net/dst_metadata.h>
46 
47 #if IS_ENABLED(CONFIG_IPV6)
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52 
53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54 {
55 	return hash_32((__force u32)key ^ (__force u32)remote,
56 			 IP_TNL_HASH_BITS);
57 }
58 
59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60 				__be16 flags, __be32 key)
61 {
62 	if (p->i_flags & TUNNEL_KEY) {
63 		if (flags & TUNNEL_KEY)
64 			return key == p->i_key;
65 		else
66 			/* key expected, none present */
67 			return false;
68 	} else
69 		return !(flags & TUNNEL_KEY);
70 }
71 
72 /* Fallback tunnel: no source, no destination, no key, no options
73 
74    Tunnel hash table:
75    We require exact key match i.e. if a key is present in packet
76    it will match only tunnel with the same key; if it is not present,
77    it will match only keyless tunnel.
78 
79    All keysless packets, if not matched configured keyless tunnels
80    will match fallback tunnel.
81    Given src, dst and key, find appropriate for input tunnel.
82 */
83 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84 				   int link, __be16 flags,
85 				   __be32 remote, __be32 local,
86 				   __be32 key)
87 {
88 	struct ip_tunnel *t, *cand = NULL;
89 	struct hlist_head *head;
90 	struct net_device *ndev;
91 	unsigned int hash;
92 
93 	hash = ip_tunnel_hash(key, remote);
94 	head = &itn->tunnels[hash];
95 
96 	hlist_for_each_entry_rcu(t, head, hash_node) {
97 		if (local != t->parms.iph.saddr ||
98 		    remote != t->parms.iph.daddr ||
99 		    !(t->dev->flags & IFF_UP))
100 			continue;
101 
102 		if (!ip_tunnel_key_match(&t->parms, flags, key))
103 			continue;
104 
105 		if (t->parms.link == link)
106 			return t;
107 		else
108 			cand = t;
109 	}
110 
111 	hlist_for_each_entry_rcu(t, head, hash_node) {
112 		if (remote != t->parms.iph.daddr ||
113 		    t->parms.iph.saddr != 0 ||
114 		    !(t->dev->flags & IFF_UP))
115 			continue;
116 
117 		if (!ip_tunnel_key_match(&t->parms, flags, key))
118 			continue;
119 
120 		if (t->parms.link == link)
121 			return t;
122 		else if (!cand)
123 			cand = t;
124 	}
125 
126 	hash = ip_tunnel_hash(key, 0);
127 	head = &itn->tunnels[hash];
128 
129 	hlist_for_each_entry_rcu(t, head, hash_node) {
130 		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
131 		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
132 			continue;
133 
134 		if (!(t->dev->flags & IFF_UP))
135 			continue;
136 
137 		if (!ip_tunnel_key_match(&t->parms, flags, key))
138 			continue;
139 
140 		if (t->parms.link == link)
141 			return t;
142 		else if (!cand)
143 			cand = t;
144 	}
145 
146 	hlist_for_each_entry_rcu(t, head, hash_node) {
147 		if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
148 		    t->parms.iph.saddr != 0 ||
149 		    t->parms.iph.daddr != 0 ||
150 		    !(t->dev->flags & IFF_UP))
151 			continue;
152 
153 		if (t->parms.link == link)
154 			return t;
155 		else if (!cand)
156 			cand = t;
157 	}
158 
159 	if (cand)
160 		return cand;
161 
162 	t = rcu_dereference(itn->collect_md_tun);
163 	if (t && t->dev->flags & IFF_UP)
164 		return t;
165 
166 	ndev = READ_ONCE(itn->fb_tunnel_dev);
167 	if (ndev && ndev->flags & IFF_UP)
168 		return netdev_priv(ndev);
169 
170 	return NULL;
171 }
172 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
173 
174 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
175 				    struct ip_tunnel_parm *parms)
176 {
177 	unsigned int h;
178 	__be32 remote;
179 	__be32 i_key = parms->i_key;
180 
181 	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
182 		remote = parms->iph.daddr;
183 	else
184 		remote = 0;
185 
186 	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
187 		i_key = 0;
188 
189 	h = ip_tunnel_hash(i_key, remote);
190 	return &itn->tunnels[h];
191 }
192 
193 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
194 {
195 	struct hlist_head *head = ip_bucket(itn, &t->parms);
196 
197 	if (t->collect_md)
198 		rcu_assign_pointer(itn->collect_md_tun, t);
199 	hlist_add_head_rcu(&t->hash_node, head);
200 }
201 
202 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
203 {
204 	if (t->collect_md)
205 		rcu_assign_pointer(itn->collect_md_tun, NULL);
206 	hlist_del_init_rcu(&t->hash_node);
207 }
208 
209 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
210 					struct ip_tunnel_parm *parms,
211 					int type)
212 {
213 	__be32 remote = parms->iph.daddr;
214 	__be32 local = parms->iph.saddr;
215 	__be32 key = parms->i_key;
216 	__be16 flags = parms->i_flags;
217 	int link = parms->link;
218 	struct ip_tunnel *t = NULL;
219 	struct hlist_head *head = ip_bucket(itn, parms);
220 
221 	hlist_for_each_entry_rcu(t, head, hash_node) {
222 		if (local == t->parms.iph.saddr &&
223 		    remote == t->parms.iph.daddr &&
224 		    link == t->parms.link &&
225 		    type == t->dev->type &&
226 		    ip_tunnel_key_match(&t->parms, flags, key))
227 			break;
228 	}
229 	return t;
230 }
231 
232 static struct net_device *__ip_tunnel_create(struct net *net,
233 					     const struct rtnl_link_ops *ops,
234 					     struct ip_tunnel_parm *parms)
235 {
236 	int err;
237 	struct ip_tunnel *tunnel;
238 	struct net_device *dev;
239 	char name[IFNAMSIZ];
240 
241 	err = -E2BIG;
242 	if (parms->name[0]) {
243 		if (!dev_valid_name(parms->name))
244 			goto failed;
245 		strscpy(name, parms->name, IFNAMSIZ);
246 	} else {
247 		if (strlen(ops->kind) > (IFNAMSIZ - 3))
248 			goto failed;
249 		strcpy(name, ops->kind);
250 		strcat(name, "%d");
251 	}
252 
253 	ASSERT_RTNL();
254 	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
255 	if (!dev) {
256 		err = -ENOMEM;
257 		goto failed;
258 	}
259 	dev_net_set(dev, net);
260 
261 	dev->rtnl_link_ops = ops;
262 
263 	tunnel = netdev_priv(dev);
264 	tunnel->parms = *parms;
265 	tunnel->net = net;
266 
267 	err = register_netdevice(dev);
268 	if (err)
269 		goto failed_free;
270 
271 	return dev;
272 
273 failed_free:
274 	free_netdev(dev);
275 failed:
276 	return ERR_PTR(err);
277 }
278 
279 static int ip_tunnel_bind_dev(struct net_device *dev)
280 {
281 	struct net_device *tdev = NULL;
282 	struct ip_tunnel *tunnel = netdev_priv(dev);
283 	const struct iphdr *iph;
284 	int hlen = LL_MAX_HEADER;
285 	int mtu = ETH_DATA_LEN;
286 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
287 
288 	iph = &tunnel->parms.iph;
289 
290 	/* Guess output device to choose reasonable mtu and needed_headroom */
291 	if (iph->daddr) {
292 		struct flowi4 fl4;
293 		struct rtable *rt;
294 
295 		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
296 				    iph->saddr, tunnel->parms.o_key,
297 				    RT_TOS(iph->tos), dev_net(dev),
298 				    tunnel->parms.link, tunnel->fwmark, 0, 0);
299 		rt = ip_route_output_key(tunnel->net, &fl4);
300 
301 		if (!IS_ERR(rt)) {
302 			tdev = rt->dst.dev;
303 			ip_rt_put(rt);
304 		}
305 		if (dev->type != ARPHRD_ETHER)
306 			dev->flags |= IFF_POINTOPOINT;
307 
308 		dst_cache_reset(&tunnel->dst_cache);
309 	}
310 
311 	if (!tdev && tunnel->parms.link)
312 		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
313 
314 	if (tdev) {
315 		hlen = tdev->hard_header_len + tdev->needed_headroom;
316 		mtu = min(tdev->mtu, IP_MAX_MTU);
317 	}
318 
319 	dev->needed_headroom = t_hlen + hlen;
320 	mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
321 
322 	if (mtu < IPV4_MIN_MTU)
323 		mtu = IPV4_MIN_MTU;
324 
325 	return mtu;
326 }
327 
328 static struct ip_tunnel *ip_tunnel_create(struct net *net,
329 					  struct ip_tunnel_net *itn,
330 					  struct ip_tunnel_parm *parms)
331 {
332 	struct ip_tunnel *nt;
333 	struct net_device *dev;
334 	int t_hlen;
335 	int mtu;
336 	int err;
337 
338 	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
339 	if (IS_ERR(dev))
340 		return ERR_CAST(dev);
341 
342 	mtu = ip_tunnel_bind_dev(dev);
343 	err = dev_set_mtu(dev, mtu);
344 	if (err)
345 		goto err_dev_set_mtu;
346 
347 	nt = netdev_priv(dev);
348 	t_hlen = nt->hlen + sizeof(struct iphdr);
349 	dev->min_mtu = ETH_MIN_MTU;
350 	dev->max_mtu = IP_MAX_MTU - t_hlen;
351 	if (dev->type == ARPHRD_ETHER)
352 		dev->max_mtu -= dev->hard_header_len;
353 
354 	ip_tunnel_add(itn, nt);
355 	return nt;
356 
357 err_dev_set_mtu:
358 	unregister_netdevice(dev);
359 	return ERR_PTR(err);
360 }
361 
362 void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info)
363 {
364 	const struct iphdr *iph = ip_hdr(skb);
365 	const struct udphdr *udph;
366 
367 	if (iph->protocol != IPPROTO_UDP)
368 		return;
369 
370 	udph = (struct udphdr *)((__u8 *)iph + (iph->ihl << 2));
371 	info->encap.sport = udph->source;
372 	info->encap.dport = udph->dest;
373 }
374 EXPORT_SYMBOL(ip_tunnel_md_udp_encap);
375 
376 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
377 		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
378 		  bool log_ecn_error)
379 {
380 	const struct iphdr *iph = ip_hdr(skb);
381 	int err;
382 
383 #ifdef CONFIG_NET_IPGRE_BROADCAST
384 	if (ipv4_is_multicast(iph->daddr)) {
385 		DEV_STATS_INC(tunnel->dev, multicast);
386 		skb->pkt_type = PACKET_BROADCAST;
387 	}
388 #endif
389 
390 	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
391 	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
392 		DEV_STATS_INC(tunnel->dev, rx_crc_errors);
393 		DEV_STATS_INC(tunnel->dev, rx_errors);
394 		goto drop;
395 	}
396 
397 	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
398 		if (!(tpi->flags&TUNNEL_SEQ) ||
399 		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
400 			DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
401 			DEV_STATS_INC(tunnel->dev, rx_errors);
402 			goto drop;
403 		}
404 		tunnel->i_seqno = ntohl(tpi->seq) + 1;
405 	}
406 
407 	skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
408 
409 	err = IP_ECN_decapsulate(iph, skb);
410 	if (unlikely(err)) {
411 		if (log_ecn_error)
412 			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
413 					&iph->saddr, iph->tos);
414 		if (err > 1) {
415 			DEV_STATS_INC(tunnel->dev, rx_frame_errors);
416 			DEV_STATS_INC(tunnel->dev, rx_errors);
417 			goto drop;
418 		}
419 	}
420 
421 	dev_sw_netstats_rx_add(tunnel->dev, skb->len);
422 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
423 
424 	if (tunnel->dev->type == ARPHRD_ETHER) {
425 		skb->protocol = eth_type_trans(skb, tunnel->dev);
426 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
427 	} else {
428 		skb->dev = tunnel->dev;
429 	}
430 
431 	if (tun_dst)
432 		skb_dst_set(skb, (struct dst_entry *)tun_dst);
433 
434 	gro_cells_receive(&tunnel->gro_cells, skb);
435 	return 0;
436 
437 drop:
438 	if (tun_dst)
439 		dst_release((struct dst_entry *)tun_dst);
440 	kfree_skb(skb);
441 	return 0;
442 }
443 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
444 
445 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
446 			    unsigned int num)
447 {
448 	if (num >= MAX_IPTUN_ENCAP_OPS)
449 		return -ERANGE;
450 
451 	return !cmpxchg((const struct ip_tunnel_encap_ops **)
452 			&iptun_encaps[num],
453 			NULL, ops) ? 0 : -1;
454 }
455 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
456 
457 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
458 			    unsigned int num)
459 {
460 	int ret;
461 
462 	if (num >= MAX_IPTUN_ENCAP_OPS)
463 		return -ERANGE;
464 
465 	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
466 		       &iptun_encaps[num],
467 		       ops, NULL) == ops) ? 0 : -1;
468 
469 	synchronize_net();
470 
471 	return ret;
472 }
473 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
474 
475 int ip_tunnel_encap_setup(struct ip_tunnel *t,
476 			  struct ip_tunnel_encap *ipencap)
477 {
478 	int hlen;
479 
480 	memset(&t->encap, 0, sizeof(t->encap));
481 
482 	hlen = ip_encap_hlen(ipencap);
483 	if (hlen < 0)
484 		return hlen;
485 
486 	t->encap.type = ipencap->type;
487 	t->encap.sport = ipencap->sport;
488 	t->encap.dport = ipencap->dport;
489 	t->encap.flags = ipencap->flags;
490 
491 	t->encap_hlen = hlen;
492 	t->hlen = t->encap_hlen + t->tun_hlen;
493 
494 	return 0;
495 }
496 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
497 
498 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
499 			    struct rtable *rt, __be16 df,
500 			    const struct iphdr *inner_iph,
501 			    int tunnel_hlen, __be32 dst, bool md)
502 {
503 	struct ip_tunnel *tunnel = netdev_priv(dev);
504 	int pkt_size;
505 	int mtu;
506 
507 	tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
508 	pkt_size = skb->len - tunnel_hlen;
509 	pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
510 
511 	if (df) {
512 		mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
513 		mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
514 	} else {
515 		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
516 	}
517 
518 	if (skb_valid_dst(skb))
519 		skb_dst_update_pmtu_no_confirm(skb, mtu);
520 
521 	if (skb->protocol == htons(ETH_P_IP)) {
522 		if (!skb_is_gso(skb) &&
523 		    (inner_iph->frag_off & htons(IP_DF)) &&
524 		    mtu < pkt_size) {
525 			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
526 			return -E2BIG;
527 		}
528 	}
529 #if IS_ENABLED(CONFIG_IPV6)
530 	else if (skb->protocol == htons(ETH_P_IPV6)) {
531 		struct rt6_info *rt6;
532 		__be32 daddr;
533 
534 		rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
535 					   NULL;
536 		daddr = md ? dst : tunnel->parms.iph.daddr;
537 
538 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
539 			   mtu >= IPV6_MIN_MTU) {
540 			if ((daddr && !ipv4_is_multicast(daddr)) ||
541 			    rt6->rt6i_dst.plen == 128) {
542 				rt6->rt6i_flags |= RTF_MODIFIED;
543 				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
544 			}
545 		}
546 
547 		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
548 					mtu < pkt_size) {
549 			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
550 			return -E2BIG;
551 		}
552 	}
553 #endif
554 	return 0;
555 }
556 
557 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
558 		       u8 proto, int tunnel_hlen)
559 {
560 	struct ip_tunnel *tunnel = netdev_priv(dev);
561 	u32 headroom = sizeof(struct iphdr);
562 	struct ip_tunnel_info *tun_info;
563 	const struct ip_tunnel_key *key;
564 	const struct iphdr *inner_iph;
565 	struct rtable *rt = NULL;
566 	struct flowi4 fl4;
567 	__be16 df = 0;
568 	u8 tos, ttl;
569 	bool use_cache;
570 
571 	tun_info = skb_tunnel_info(skb);
572 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
573 		     ip_tunnel_info_af(tun_info) != AF_INET))
574 		goto tx_error;
575 	key = &tun_info->key;
576 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
577 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
578 	tos = key->tos;
579 	if (tos == 1) {
580 		if (skb->protocol == htons(ETH_P_IP))
581 			tos = inner_iph->tos;
582 		else if (skb->protocol == htons(ETH_P_IPV6))
583 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
584 	}
585 	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
586 			    tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
587 			    dev_net(dev), 0, skb->mark, skb_get_hash(skb),
588 			    key->flow_flags);
589 
590 	if (!tunnel_hlen)
591 		tunnel_hlen = ip_encap_hlen(&tun_info->encap);
592 
593 	if (ip_tunnel_encap(skb, &tun_info->encap, &proto, &fl4) < 0)
594 		goto tx_error;
595 
596 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
597 	if (use_cache)
598 		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
599 	if (!rt) {
600 		rt = ip_route_output_key(tunnel->net, &fl4);
601 		if (IS_ERR(rt)) {
602 			DEV_STATS_INC(dev, tx_carrier_errors);
603 			goto tx_error;
604 		}
605 		if (use_cache)
606 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
607 					  fl4.saddr);
608 	}
609 	if (rt->dst.dev == dev) {
610 		ip_rt_put(rt);
611 		DEV_STATS_INC(dev, collisions);
612 		goto tx_error;
613 	}
614 
615 	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
616 		df = htons(IP_DF);
617 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
618 			    key->u.ipv4.dst, true)) {
619 		ip_rt_put(rt);
620 		goto tx_error;
621 	}
622 
623 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
624 	ttl = key->ttl;
625 	if (ttl == 0) {
626 		if (skb->protocol == htons(ETH_P_IP))
627 			ttl = inner_iph->ttl;
628 		else if (skb->protocol == htons(ETH_P_IPV6))
629 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
630 		else
631 			ttl = ip4_dst_hoplimit(&rt->dst);
632 	}
633 
634 	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
635 	if (headroom > READ_ONCE(dev->needed_headroom))
636 		WRITE_ONCE(dev->needed_headroom, headroom);
637 
638 	if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
639 		ip_rt_put(rt);
640 		goto tx_dropped;
641 	}
642 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
643 		      df, !net_eq(tunnel->net, dev_net(dev)));
644 	return;
645 tx_error:
646 	DEV_STATS_INC(dev, tx_errors);
647 	goto kfree;
648 tx_dropped:
649 	DEV_STATS_INC(dev, tx_dropped);
650 kfree:
651 	kfree_skb(skb);
652 }
653 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
654 
655 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
656 		    const struct iphdr *tnl_params, u8 protocol)
657 {
658 	struct ip_tunnel *tunnel = netdev_priv(dev);
659 	struct ip_tunnel_info *tun_info = NULL;
660 	const struct iphdr *inner_iph;
661 	unsigned int max_headroom;	/* The extra header space needed */
662 	struct rtable *rt = NULL;		/* Route to the other host */
663 	__be16 payload_protocol;
664 	bool use_cache = false;
665 	struct flowi4 fl4;
666 	bool md = false;
667 	bool connected;
668 	u8 tos, ttl;
669 	__be32 dst;
670 	__be16 df;
671 
672 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
673 	connected = (tunnel->parms.iph.daddr != 0);
674 	payload_protocol = skb_protocol(skb, true);
675 
676 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
677 
678 	dst = tnl_params->daddr;
679 	if (dst == 0) {
680 		/* NBMA tunnel */
681 
682 		if (!skb_dst(skb)) {
683 			DEV_STATS_INC(dev, tx_fifo_errors);
684 			goto tx_error;
685 		}
686 
687 		tun_info = skb_tunnel_info(skb);
688 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
689 		    ip_tunnel_info_af(tun_info) == AF_INET &&
690 		    tun_info->key.u.ipv4.dst) {
691 			dst = tun_info->key.u.ipv4.dst;
692 			md = true;
693 			connected = true;
694 		} else if (payload_protocol == htons(ETH_P_IP)) {
695 			rt = skb_rtable(skb);
696 			dst = rt_nexthop(rt, inner_iph->daddr);
697 		}
698 #if IS_ENABLED(CONFIG_IPV6)
699 		else if (payload_protocol == htons(ETH_P_IPV6)) {
700 			const struct in6_addr *addr6;
701 			struct neighbour *neigh;
702 			bool do_tx_error_icmp;
703 			int addr_type;
704 
705 			neigh = dst_neigh_lookup(skb_dst(skb),
706 						 &ipv6_hdr(skb)->daddr);
707 			if (!neigh)
708 				goto tx_error;
709 
710 			addr6 = (const struct in6_addr *)&neigh->primary_key;
711 			addr_type = ipv6_addr_type(addr6);
712 
713 			if (addr_type == IPV6_ADDR_ANY) {
714 				addr6 = &ipv6_hdr(skb)->daddr;
715 				addr_type = ipv6_addr_type(addr6);
716 			}
717 
718 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
719 				do_tx_error_icmp = true;
720 			else {
721 				do_tx_error_icmp = false;
722 				dst = addr6->s6_addr32[3];
723 			}
724 			neigh_release(neigh);
725 			if (do_tx_error_icmp)
726 				goto tx_error_icmp;
727 		}
728 #endif
729 		else
730 			goto tx_error;
731 
732 		if (!md)
733 			connected = false;
734 	}
735 
736 	tos = tnl_params->tos;
737 	if (tos & 0x1) {
738 		tos &= ~0x1;
739 		if (payload_protocol == htons(ETH_P_IP)) {
740 			tos = inner_iph->tos;
741 			connected = false;
742 		} else if (payload_protocol == htons(ETH_P_IPV6)) {
743 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
744 			connected = false;
745 		}
746 	}
747 
748 	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
749 			    tunnel->parms.o_key, RT_TOS(tos),
750 			    dev_net(dev), tunnel->parms.link,
751 			    tunnel->fwmark, skb_get_hash(skb), 0);
752 
753 	if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
754 		goto tx_error;
755 
756 	if (connected && md) {
757 		use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
758 		if (use_cache)
759 			rt = dst_cache_get_ip4(&tun_info->dst_cache,
760 					       &fl4.saddr);
761 	} else {
762 		rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
763 						&fl4.saddr) : NULL;
764 	}
765 
766 	if (!rt) {
767 		rt = ip_route_output_key(tunnel->net, &fl4);
768 
769 		if (IS_ERR(rt)) {
770 			DEV_STATS_INC(dev, tx_carrier_errors);
771 			goto tx_error;
772 		}
773 		if (use_cache)
774 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
775 					  fl4.saddr);
776 		else if (!md && connected)
777 			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
778 					  fl4.saddr);
779 	}
780 
781 	if (rt->dst.dev == dev) {
782 		ip_rt_put(rt);
783 		DEV_STATS_INC(dev, collisions);
784 		goto tx_error;
785 	}
786 
787 	df = tnl_params->frag_off;
788 	if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
789 		df |= (inner_iph->frag_off & htons(IP_DF));
790 
791 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
792 		ip_rt_put(rt);
793 		goto tx_error;
794 	}
795 
796 	if (tunnel->err_count > 0) {
797 		if (time_before(jiffies,
798 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
799 			tunnel->err_count--;
800 
801 			dst_link_failure(skb);
802 		} else
803 			tunnel->err_count = 0;
804 	}
805 
806 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
807 	ttl = tnl_params->ttl;
808 	if (ttl == 0) {
809 		if (payload_protocol == htons(ETH_P_IP))
810 			ttl = inner_iph->ttl;
811 #if IS_ENABLED(CONFIG_IPV6)
812 		else if (payload_protocol == htons(ETH_P_IPV6))
813 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
814 #endif
815 		else
816 			ttl = ip4_dst_hoplimit(&rt->dst);
817 	}
818 
819 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
820 			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
821 	if (max_headroom > READ_ONCE(dev->needed_headroom))
822 		WRITE_ONCE(dev->needed_headroom, max_headroom);
823 
824 	if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
825 		ip_rt_put(rt);
826 		DEV_STATS_INC(dev, tx_dropped);
827 		kfree_skb(skb);
828 		return;
829 	}
830 
831 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
832 		      df, !net_eq(tunnel->net, dev_net(dev)));
833 	return;
834 
835 #if IS_ENABLED(CONFIG_IPV6)
836 tx_error_icmp:
837 	dst_link_failure(skb);
838 #endif
839 tx_error:
840 	DEV_STATS_INC(dev, tx_errors);
841 	kfree_skb(skb);
842 }
843 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
844 
845 static void ip_tunnel_update(struct ip_tunnel_net *itn,
846 			     struct ip_tunnel *t,
847 			     struct net_device *dev,
848 			     struct ip_tunnel_parm *p,
849 			     bool set_mtu,
850 			     __u32 fwmark)
851 {
852 	ip_tunnel_del(itn, t);
853 	t->parms.iph.saddr = p->iph.saddr;
854 	t->parms.iph.daddr = p->iph.daddr;
855 	t->parms.i_key = p->i_key;
856 	t->parms.o_key = p->o_key;
857 	if (dev->type != ARPHRD_ETHER) {
858 		__dev_addr_set(dev, &p->iph.saddr, 4);
859 		memcpy(dev->broadcast, &p->iph.daddr, 4);
860 	}
861 	ip_tunnel_add(itn, t);
862 
863 	t->parms.iph.ttl = p->iph.ttl;
864 	t->parms.iph.tos = p->iph.tos;
865 	t->parms.iph.frag_off = p->iph.frag_off;
866 
867 	if (t->parms.link != p->link || t->fwmark != fwmark) {
868 		int mtu;
869 
870 		t->parms.link = p->link;
871 		t->fwmark = fwmark;
872 		mtu = ip_tunnel_bind_dev(dev);
873 		if (set_mtu)
874 			dev->mtu = mtu;
875 	}
876 	dst_cache_reset(&t->dst_cache);
877 	netdev_state_change(dev);
878 }
879 
880 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
881 {
882 	int err = 0;
883 	struct ip_tunnel *t = netdev_priv(dev);
884 	struct net *net = t->net;
885 	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
886 
887 	switch (cmd) {
888 	case SIOCGETTUNNEL:
889 		if (dev == itn->fb_tunnel_dev) {
890 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
891 			if (!t)
892 				t = netdev_priv(dev);
893 		}
894 		memcpy(p, &t->parms, sizeof(*p));
895 		break;
896 
897 	case SIOCADDTUNNEL:
898 	case SIOCCHGTUNNEL:
899 		err = -EPERM;
900 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
901 			goto done;
902 		if (p->iph.ttl)
903 			p->iph.frag_off |= htons(IP_DF);
904 		if (!(p->i_flags & VTI_ISVTI)) {
905 			if (!(p->i_flags & TUNNEL_KEY))
906 				p->i_key = 0;
907 			if (!(p->o_flags & TUNNEL_KEY))
908 				p->o_key = 0;
909 		}
910 
911 		t = ip_tunnel_find(itn, p, itn->type);
912 
913 		if (cmd == SIOCADDTUNNEL) {
914 			if (!t) {
915 				t = ip_tunnel_create(net, itn, p);
916 				err = PTR_ERR_OR_ZERO(t);
917 				break;
918 			}
919 
920 			err = -EEXIST;
921 			break;
922 		}
923 		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
924 			if (t) {
925 				if (t->dev != dev) {
926 					err = -EEXIST;
927 					break;
928 				}
929 			} else {
930 				unsigned int nflags = 0;
931 
932 				if (ipv4_is_multicast(p->iph.daddr))
933 					nflags = IFF_BROADCAST;
934 				else if (p->iph.daddr)
935 					nflags = IFF_POINTOPOINT;
936 
937 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
938 					err = -EINVAL;
939 					break;
940 				}
941 
942 				t = netdev_priv(dev);
943 			}
944 		}
945 
946 		if (t) {
947 			err = 0;
948 			ip_tunnel_update(itn, t, dev, p, true, 0);
949 		} else {
950 			err = -ENOENT;
951 		}
952 		break;
953 
954 	case SIOCDELTUNNEL:
955 		err = -EPERM;
956 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
957 			goto done;
958 
959 		if (dev == itn->fb_tunnel_dev) {
960 			err = -ENOENT;
961 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
962 			if (!t)
963 				goto done;
964 			err = -EPERM;
965 			if (t == netdev_priv(itn->fb_tunnel_dev))
966 				goto done;
967 			dev = t->dev;
968 		}
969 		unregister_netdevice(dev);
970 		err = 0;
971 		break;
972 
973 	default:
974 		err = -EINVAL;
975 	}
976 
977 done:
978 	return err;
979 }
980 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
981 
982 int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
983 			     void __user *data, int cmd)
984 {
985 	struct ip_tunnel_parm p;
986 	int err;
987 
988 	if (copy_from_user(&p, data, sizeof(p)))
989 		return -EFAULT;
990 	err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
991 	if (!err && copy_to_user(data, &p, sizeof(p)))
992 		return -EFAULT;
993 	return err;
994 }
995 EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
996 
997 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
998 {
999 	struct ip_tunnel *tunnel = netdev_priv(dev);
1000 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
1001 	int max_mtu = IP_MAX_MTU - t_hlen;
1002 
1003 	if (dev->type == ARPHRD_ETHER)
1004 		max_mtu -= dev->hard_header_len;
1005 
1006 	if (new_mtu < ETH_MIN_MTU)
1007 		return -EINVAL;
1008 
1009 	if (new_mtu > max_mtu) {
1010 		if (strict)
1011 			return -EINVAL;
1012 
1013 		new_mtu = max_mtu;
1014 	}
1015 
1016 	dev->mtu = new_mtu;
1017 	return 0;
1018 }
1019 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1020 
1021 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1022 {
1023 	return __ip_tunnel_change_mtu(dev, new_mtu, true);
1024 }
1025 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1026 
1027 static void ip_tunnel_dev_free(struct net_device *dev)
1028 {
1029 	struct ip_tunnel *tunnel = netdev_priv(dev);
1030 
1031 	gro_cells_destroy(&tunnel->gro_cells);
1032 	dst_cache_destroy(&tunnel->dst_cache);
1033 	free_percpu(dev->tstats);
1034 }
1035 
1036 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1037 {
1038 	struct ip_tunnel *tunnel = netdev_priv(dev);
1039 	struct ip_tunnel_net *itn;
1040 
1041 	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1042 
1043 	if (itn->fb_tunnel_dev != dev) {
1044 		ip_tunnel_del(itn, netdev_priv(dev));
1045 		unregister_netdevice_queue(dev, head);
1046 	}
1047 }
1048 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1049 
1050 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1051 {
1052 	struct ip_tunnel *tunnel = netdev_priv(dev);
1053 
1054 	return tunnel->net;
1055 }
1056 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1057 
1058 int ip_tunnel_get_iflink(const struct net_device *dev)
1059 {
1060 	struct ip_tunnel *tunnel = netdev_priv(dev);
1061 
1062 	return tunnel->parms.link;
1063 }
1064 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1065 
1066 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1067 				  struct rtnl_link_ops *ops, char *devname)
1068 {
1069 	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1070 	struct ip_tunnel_parm parms;
1071 	unsigned int i;
1072 
1073 	itn->rtnl_link_ops = ops;
1074 	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1075 		INIT_HLIST_HEAD(&itn->tunnels[i]);
1076 
1077 	if (!ops || !net_has_fallback_tunnels(net)) {
1078 		struct ip_tunnel_net *it_init_net;
1079 
1080 		it_init_net = net_generic(&init_net, ip_tnl_net_id);
1081 		itn->type = it_init_net->type;
1082 		itn->fb_tunnel_dev = NULL;
1083 		return 0;
1084 	}
1085 
1086 	memset(&parms, 0, sizeof(parms));
1087 	if (devname)
1088 		strscpy(parms.name, devname, IFNAMSIZ);
1089 
1090 	rtnl_lock();
1091 	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1092 	/* FB netdevice is special: we have one, and only one per netns.
1093 	 * Allowing to move it to another netns is clearly unsafe.
1094 	 */
1095 	if (!IS_ERR(itn->fb_tunnel_dev)) {
1096 		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1097 		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1098 		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1099 		itn->type = itn->fb_tunnel_dev->type;
1100 	}
1101 	rtnl_unlock();
1102 
1103 	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1104 }
1105 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1106 
1107 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1108 			      struct list_head *head,
1109 			      struct rtnl_link_ops *ops)
1110 {
1111 	struct net_device *dev, *aux;
1112 	int h;
1113 
1114 	for_each_netdev_safe(net, dev, aux)
1115 		if (dev->rtnl_link_ops == ops)
1116 			unregister_netdevice_queue(dev, head);
1117 
1118 	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1119 		struct ip_tunnel *t;
1120 		struct hlist_node *n;
1121 		struct hlist_head *thead = &itn->tunnels[h];
1122 
1123 		hlist_for_each_entry_safe(t, n, thead, hash_node)
1124 			/* If dev is in the same netns, it has already
1125 			 * been added to the list by the previous loop.
1126 			 */
1127 			if (!net_eq(dev_net(t->dev), net))
1128 				unregister_netdevice_queue(t->dev, head);
1129 	}
1130 }
1131 
1132 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1133 			   struct rtnl_link_ops *ops)
1134 {
1135 	struct ip_tunnel_net *itn;
1136 	struct net *net;
1137 	LIST_HEAD(list);
1138 
1139 	rtnl_lock();
1140 	list_for_each_entry(net, net_list, exit_list) {
1141 		itn = net_generic(net, id);
1142 		ip_tunnel_destroy(net, itn, &list, ops);
1143 	}
1144 	unregister_netdevice_many(&list);
1145 	rtnl_unlock();
1146 }
1147 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1148 
1149 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1150 		      struct ip_tunnel_parm *p, __u32 fwmark)
1151 {
1152 	struct ip_tunnel *nt;
1153 	struct net *net = dev_net(dev);
1154 	struct ip_tunnel_net *itn;
1155 	int mtu;
1156 	int err;
1157 
1158 	nt = netdev_priv(dev);
1159 	itn = net_generic(net, nt->ip_tnl_net_id);
1160 
1161 	if (nt->collect_md) {
1162 		if (rtnl_dereference(itn->collect_md_tun))
1163 			return -EEXIST;
1164 	} else {
1165 		if (ip_tunnel_find(itn, p, dev->type))
1166 			return -EEXIST;
1167 	}
1168 
1169 	nt->net = net;
1170 	nt->parms = *p;
1171 	nt->fwmark = fwmark;
1172 	err = register_netdevice(dev);
1173 	if (err)
1174 		goto err_register_netdevice;
1175 
1176 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1177 		eth_hw_addr_random(dev);
1178 
1179 	mtu = ip_tunnel_bind_dev(dev);
1180 	if (tb[IFLA_MTU]) {
1181 		unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1182 
1183 		if (dev->type == ARPHRD_ETHER)
1184 			max -= dev->hard_header_len;
1185 
1186 		mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1187 	}
1188 
1189 	err = dev_set_mtu(dev, mtu);
1190 	if (err)
1191 		goto err_dev_set_mtu;
1192 
1193 	ip_tunnel_add(itn, nt);
1194 	return 0;
1195 
1196 err_dev_set_mtu:
1197 	unregister_netdevice(dev);
1198 err_register_netdevice:
1199 	return err;
1200 }
1201 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1202 
1203 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1204 			 struct ip_tunnel_parm *p, __u32 fwmark)
1205 {
1206 	struct ip_tunnel *t;
1207 	struct ip_tunnel *tunnel = netdev_priv(dev);
1208 	struct net *net = tunnel->net;
1209 	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1210 
1211 	if (dev == itn->fb_tunnel_dev)
1212 		return -EINVAL;
1213 
1214 	t = ip_tunnel_find(itn, p, dev->type);
1215 
1216 	if (t) {
1217 		if (t->dev != dev)
1218 			return -EEXIST;
1219 	} else {
1220 		t = tunnel;
1221 
1222 		if (dev->type != ARPHRD_ETHER) {
1223 			unsigned int nflags = 0;
1224 
1225 			if (ipv4_is_multicast(p->iph.daddr))
1226 				nflags = IFF_BROADCAST;
1227 			else if (p->iph.daddr)
1228 				nflags = IFF_POINTOPOINT;
1229 
1230 			if ((dev->flags ^ nflags) &
1231 			    (IFF_POINTOPOINT | IFF_BROADCAST))
1232 				return -EINVAL;
1233 		}
1234 	}
1235 
1236 	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1237 	return 0;
1238 }
1239 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1240 
1241 int ip_tunnel_init(struct net_device *dev)
1242 {
1243 	struct ip_tunnel *tunnel = netdev_priv(dev);
1244 	struct iphdr *iph = &tunnel->parms.iph;
1245 	int err;
1246 
1247 	dev->needs_free_netdev = true;
1248 	dev->priv_destructor = ip_tunnel_dev_free;
1249 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1250 	if (!dev->tstats)
1251 		return -ENOMEM;
1252 
1253 	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1254 	if (err) {
1255 		free_percpu(dev->tstats);
1256 		return err;
1257 	}
1258 
1259 	err = gro_cells_init(&tunnel->gro_cells, dev);
1260 	if (err) {
1261 		dst_cache_destroy(&tunnel->dst_cache);
1262 		free_percpu(dev->tstats);
1263 		return err;
1264 	}
1265 
1266 	tunnel->dev = dev;
1267 	tunnel->net = dev_net(dev);
1268 	strcpy(tunnel->parms.name, dev->name);
1269 	iph->version		= 4;
1270 	iph->ihl		= 5;
1271 
1272 	if (tunnel->collect_md)
1273 		netif_keep_dst(dev);
1274 	return 0;
1275 }
1276 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1277 
1278 void ip_tunnel_uninit(struct net_device *dev)
1279 {
1280 	struct ip_tunnel *tunnel = netdev_priv(dev);
1281 	struct net *net = tunnel->net;
1282 	struct ip_tunnel_net *itn;
1283 
1284 	itn = net_generic(net, tunnel->ip_tnl_net_id);
1285 	ip_tunnel_del(itn, netdev_priv(dev));
1286 	if (itn->fb_tunnel_dev == dev)
1287 		WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1288 
1289 	dst_cache_reset(&tunnel->dst_cache);
1290 }
1291 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1292 
1293 /* Do least required initialization, rest of init is done in tunnel_init call */
1294 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1295 {
1296 	struct ip_tunnel *tunnel = netdev_priv(dev);
1297 	tunnel->ip_tnl_net_id = net_id;
1298 }
1299 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1300 
1301 MODULE_DESCRIPTION("IPv4 tunnel implementation library");
1302 MODULE_LICENSE("GPL");
1303