xref: /linux/net/ipv4/ip_tunnel.c (revision 9112fc0109fc0037ac3b8b633a169e78b4e23ca1)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Nicira, Inc.
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/udp.h>
45 #include <net/dst_metadata.h>
46 
47 #if IS_ENABLED(CONFIG_IPV6)
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52 
53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54 {
55 	return hash_32((__force u32)key ^ (__force u32)remote,
56 			 IP_TNL_HASH_BITS);
57 }
58 
59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60 				__be16 flags, __be32 key)
61 {
62 	if (p->i_flags & TUNNEL_KEY) {
63 		if (flags & TUNNEL_KEY)
64 			return key == p->i_key;
65 		else
66 			/* key expected, none present */
67 			return false;
68 	} else
69 		return !(flags & TUNNEL_KEY);
70 }
71 
72 /* Fallback tunnel: no source, no destination, no key, no options
73 
74    Tunnel hash table:
75    We require exact key match i.e. if a key is present in packet
76    it will match only tunnel with the same key; if it is not present,
77    it will match only keyless tunnel.
78 
79    All keysless packets, if not matched configured keyless tunnels
80    will match fallback tunnel.
81    Given src, dst and key, find appropriate for input tunnel.
82 */
83 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84 				   int link, __be16 flags,
85 				   __be32 remote, __be32 local,
86 				   __be32 key)
87 {
88 	struct ip_tunnel *t, *cand = NULL;
89 	struct hlist_head *head;
90 	struct net_device *ndev;
91 	unsigned int hash;
92 
93 	hash = ip_tunnel_hash(key, remote);
94 	head = &itn->tunnels[hash];
95 
96 	hlist_for_each_entry_rcu(t, head, hash_node) {
97 		if (local != t->parms.iph.saddr ||
98 		    remote != t->parms.iph.daddr ||
99 		    !(t->dev->flags & IFF_UP))
100 			continue;
101 
102 		if (!ip_tunnel_key_match(&t->parms, flags, key))
103 			continue;
104 
105 		if (READ_ONCE(t->parms.link) == link)
106 			return t;
107 		cand = t;
108 	}
109 
110 	hlist_for_each_entry_rcu(t, head, hash_node) {
111 		if (remote != t->parms.iph.daddr ||
112 		    t->parms.iph.saddr != 0 ||
113 		    !(t->dev->flags & IFF_UP))
114 			continue;
115 
116 		if (!ip_tunnel_key_match(&t->parms, flags, key))
117 			continue;
118 
119 		if (READ_ONCE(t->parms.link) == link)
120 			return t;
121 		if (!cand)
122 			cand = t;
123 	}
124 
125 	hash = ip_tunnel_hash(key, 0);
126 	head = &itn->tunnels[hash];
127 
128 	hlist_for_each_entry_rcu(t, head, hash_node) {
129 		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
130 		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
131 			continue;
132 
133 		if (!(t->dev->flags & IFF_UP))
134 			continue;
135 
136 		if (!ip_tunnel_key_match(&t->parms, flags, key))
137 			continue;
138 
139 		if (READ_ONCE(t->parms.link) == link)
140 			return t;
141 		if (!cand)
142 			cand = t;
143 	}
144 
145 	hlist_for_each_entry_rcu(t, head, hash_node) {
146 		if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
147 		    t->parms.iph.saddr != 0 ||
148 		    t->parms.iph.daddr != 0 ||
149 		    !(t->dev->flags & IFF_UP))
150 			continue;
151 
152 		if (READ_ONCE(t->parms.link) == link)
153 			return t;
154 		if (!cand)
155 			cand = t;
156 	}
157 
158 	if (cand)
159 		return cand;
160 
161 	t = rcu_dereference(itn->collect_md_tun);
162 	if (t && t->dev->flags & IFF_UP)
163 		return t;
164 
165 	ndev = READ_ONCE(itn->fb_tunnel_dev);
166 	if (ndev && ndev->flags & IFF_UP)
167 		return netdev_priv(ndev);
168 
169 	return NULL;
170 }
171 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
172 
173 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
174 				    struct ip_tunnel_parm *parms)
175 {
176 	unsigned int h;
177 	__be32 remote;
178 	__be32 i_key = parms->i_key;
179 
180 	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
181 		remote = parms->iph.daddr;
182 	else
183 		remote = 0;
184 
185 	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
186 		i_key = 0;
187 
188 	h = ip_tunnel_hash(i_key, remote);
189 	return &itn->tunnels[h];
190 }
191 
192 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
193 {
194 	struct hlist_head *head = ip_bucket(itn, &t->parms);
195 
196 	if (t->collect_md)
197 		rcu_assign_pointer(itn->collect_md_tun, t);
198 	hlist_add_head_rcu(&t->hash_node, head);
199 }
200 
201 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
202 {
203 	if (t->collect_md)
204 		rcu_assign_pointer(itn->collect_md_tun, NULL);
205 	hlist_del_init_rcu(&t->hash_node);
206 }
207 
208 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
209 					struct ip_tunnel_parm *parms,
210 					int type)
211 {
212 	__be32 remote = parms->iph.daddr;
213 	__be32 local = parms->iph.saddr;
214 	__be32 key = parms->i_key;
215 	__be16 flags = parms->i_flags;
216 	int link = parms->link;
217 	struct ip_tunnel *t = NULL;
218 	struct hlist_head *head = ip_bucket(itn, parms);
219 
220 	hlist_for_each_entry_rcu(t, head, hash_node) {
221 		if (local == t->parms.iph.saddr &&
222 		    remote == t->parms.iph.daddr &&
223 		    link == READ_ONCE(t->parms.link) &&
224 		    type == t->dev->type &&
225 		    ip_tunnel_key_match(&t->parms, flags, key))
226 			break;
227 	}
228 	return t;
229 }
230 
231 static struct net_device *__ip_tunnel_create(struct net *net,
232 					     const struct rtnl_link_ops *ops,
233 					     struct ip_tunnel_parm *parms)
234 {
235 	int err;
236 	struct ip_tunnel *tunnel;
237 	struct net_device *dev;
238 	char name[IFNAMSIZ];
239 
240 	err = -E2BIG;
241 	if (parms->name[0]) {
242 		if (!dev_valid_name(parms->name))
243 			goto failed;
244 		strscpy(name, parms->name, IFNAMSIZ);
245 	} else {
246 		if (strlen(ops->kind) > (IFNAMSIZ - 3))
247 			goto failed;
248 		strcpy(name, ops->kind);
249 		strcat(name, "%d");
250 	}
251 
252 	ASSERT_RTNL();
253 	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
254 	if (!dev) {
255 		err = -ENOMEM;
256 		goto failed;
257 	}
258 	dev_net_set(dev, net);
259 
260 	dev->rtnl_link_ops = ops;
261 
262 	tunnel = netdev_priv(dev);
263 	tunnel->parms = *parms;
264 	tunnel->net = net;
265 
266 	err = register_netdevice(dev);
267 	if (err)
268 		goto failed_free;
269 
270 	return dev;
271 
272 failed_free:
273 	free_netdev(dev);
274 failed:
275 	return ERR_PTR(err);
276 }
277 
278 static int ip_tunnel_bind_dev(struct net_device *dev)
279 {
280 	struct net_device *tdev = NULL;
281 	struct ip_tunnel *tunnel = netdev_priv(dev);
282 	const struct iphdr *iph;
283 	int hlen = LL_MAX_HEADER;
284 	int mtu = ETH_DATA_LEN;
285 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
286 
287 	iph = &tunnel->parms.iph;
288 
289 	/* Guess output device to choose reasonable mtu and needed_headroom */
290 	if (iph->daddr) {
291 		struct flowi4 fl4;
292 		struct rtable *rt;
293 
294 		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
295 				    iph->saddr, tunnel->parms.o_key,
296 				    RT_TOS(iph->tos), dev_net(dev),
297 				    tunnel->parms.link, tunnel->fwmark, 0, 0);
298 		rt = ip_route_output_key(tunnel->net, &fl4);
299 
300 		if (!IS_ERR(rt)) {
301 			tdev = rt->dst.dev;
302 			ip_rt_put(rt);
303 		}
304 		if (dev->type != ARPHRD_ETHER)
305 			dev->flags |= IFF_POINTOPOINT;
306 
307 		dst_cache_reset(&tunnel->dst_cache);
308 	}
309 
310 	if (!tdev && tunnel->parms.link)
311 		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
312 
313 	if (tdev) {
314 		hlen = tdev->hard_header_len + tdev->needed_headroom;
315 		mtu = min(tdev->mtu, IP_MAX_MTU);
316 	}
317 
318 	dev->needed_headroom = t_hlen + hlen;
319 	mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
320 
321 	if (mtu < IPV4_MIN_MTU)
322 		mtu = IPV4_MIN_MTU;
323 
324 	return mtu;
325 }
326 
327 static struct ip_tunnel *ip_tunnel_create(struct net *net,
328 					  struct ip_tunnel_net *itn,
329 					  struct ip_tunnel_parm *parms)
330 {
331 	struct ip_tunnel *nt;
332 	struct net_device *dev;
333 	int t_hlen;
334 	int mtu;
335 	int err;
336 
337 	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
338 	if (IS_ERR(dev))
339 		return ERR_CAST(dev);
340 
341 	mtu = ip_tunnel_bind_dev(dev);
342 	err = dev_set_mtu(dev, mtu);
343 	if (err)
344 		goto err_dev_set_mtu;
345 
346 	nt = netdev_priv(dev);
347 	t_hlen = nt->hlen + sizeof(struct iphdr);
348 	dev->min_mtu = ETH_MIN_MTU;
349 	dev->max_mtu = IP_MAX_MTU - t_hlen;
350 	if (dev->type == ARPHRD_ETHER)
351 		dev->max_mtu -= dev->hard_header_len;
352 
353 	ip_tunnel_add(itn, nt);
354 	return nt;
355 
356 err_dev_set_mtu:
357 	unregister_netdevice(dev);
358 	return ERR_PTR(err);
359 }
360 
361 void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info)
362 {
363 	const struct iphdr *iph = ip_hdr(skb);
364 	const struct udphdr *udph;
365 
366 	if (iph->protocol != IPPROTO_UDP)
367 		return;
368 
369 	udph = (struct udphdr *)((__u8 *)iph + (iph->ihl << 2));
370 	info->encap.sport = udph->source;
371 	info->encap.dport = udph->dest;
372 }
373 EXPORT_SYMBOL(ip_tunnel_md_udp_encap);
374 
375 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
376 		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
377 		  bool log_ecn_error)
378 {
379 	const struct iphdr *iph = ip_hdr(skb);
380 	int err;
381 
382 #ifdef CONFIG_NET_IPGRE_BROADCAST
383 	if (ipv4_is_multicast(iph->daddr)) {
384 		DEV_STATS_INC(tunnel->dev, multicast);
385 		skb->pkt_type = PACKET_BROADCAST;
386 	}
387 #endif
388 
389 	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
390 	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
391 		DEV_STATS_INC(tunnel->dev, rx_crc_errors);
392 		DEV_STATS_INC(tunnel->dev, rx_errors);
393 		goto drop;
394 	}
395 
396 	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
397 		if (!(tpi->flags&TUNNEL_SEQ) ||
398 		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
399 			DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
400 			DEV_STATS_INC(tunnel->dev, rx_errors);
401 			goto drop;
402 		}
403 		tunnel->i_seqno = ntohl(tpi->seq) + 1;
404 	}
405 
406 	skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
407 
408 	err = IP_ECN_decapsulate(iph, skb);
409 	if (unlikely(err)) {
410 		if (log_ecn_error)
411 			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
412 					&iph->saddr, iph->tos);
413 		if (err > 1) {
414 			DEV_STATS_INC(tunnel->dev, rx_frame_errors);
415 			DEV_STATS_INC(tunnel->dev, rx_errors);
416 			goto drop;
417 		}
418 	}
419 
420 	dev_sw_netstats_rx_add(tunnel->dev, skb->len);
421 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
422 
423 	if (tunnel->dev->type == ARPHRD_ETHER) {
424 		skb->protocol = eth_type_trans(skb, tunnel->dev);
425 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
426 	} else {
427 		skb->dev = tunnel->dev;
428 	}
429 
430 	if (tun_dst)
431 		skb_dst_set(skb, (struct dst_entry *)tun_dst);
432 
433 	gro_cells_receive(&tunnel->gro_cells, skb);
434 	return 0;
435 
436 drop:
437 	if (tun_dst)
438 		dst_release((struct dst_entry *)tun_dst);
439 	kfree_skb(skb);
440 	return 0;
441 }
442 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
443 
444 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
445 			    unsigned int num)
446 {
447 	if (num >= MAX_IPTUN_ENCAP_OPS)
448 		return -ERANGE;
449 
450 	return !cmpxchg((const struct ip_tunnel_encap_ops **)
451 			&iptun_encaps[num],
452 			NULL, ops) ? 0 : -1;
453 }
454 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
455 
456 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
457 			    unsigned int num)
458 {
459 	int ret;
460 
461 	if (num >= MAX_IPTUN_ENCAP_OPS)
462 		return -ERANGE;
463 
464 	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
465 		       &iptun_encaps[num],
466 		       ops, NULL) == ops) ? 0 : -1;
467 
468 	synchronize_net();
469 
470 	return ret;
471 }
472 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
473 
474 int ip_tunnel_encap_setup(struct ip_tunnel *t,
475 			  struct ip_tunnel_encap *ipencap)
476 {
477 	int hlen;
478 
479 	memset(&t->encap, 0, sizeof(t->encap));
480 
481 	hlen = ip_encap_hlen(ipencap);
482 	if (hlen < 0)
483 		return hlen;
484 
485 	t->encap.type = ipencap->type;
486 	t->encap.sport = ipencap->sport;
487 	t->encap.dport = ipencap->dport;
488 	t->encap.flags = ipencap->flags;
489 
490 	t->encap_hlen = hlen;
491 	t->hlen = t->encap_hlen + t->tun_hlen;
492 
493 	return 0;
494 }
495 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
496 
497 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
498 			    struct rtable *rt, __be16 df,
499 			    const struct iphdr *inner_iph,
500 			    int tunnel_hlen, __be32 dst, bool md)
501 {
502 	struct ip_tunnel *tunnel = netdev_priv(dev);
503 	int pkt_size;
504 	int mtu;
505 
506 	tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
507 	pkt_size = skb->len - tunnel_hlen;
508 	pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
509 
510 	if (df) {
511 		mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
512 		mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
513 	} else {
514 		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
515 	}
516 
517 	if (skb_valid_dst(skb))
518 		skb_dst_update_pmtu_no_confirm(skb, mtu);
519 
520 	if (skb->protocol == htons(ETH_P_IP)) {
521 		if (!skb_is_gso(skb) &&
522 		    (inner_iph->frag_off & htons(IP_DF)) &&
523 		    mtu < pkt_size) {
524 			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
525 			return -E2BIG;
526 		}
527 	}
528 #if IS_ENABLED(CONFIG_IPV6)
529 	else if (skb->protocol == htons(ETH_P_IPV6)) {
530 		struct rt6_info *rt6;
531 		__be32 daddr;
532 
533 		rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
534 					   NULL;
535 		daddr = md ? dst : tunnel->parms.iph.daddr;
536 
537 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
538 			   mtu >= IPV6_MIN_MTU) {
539 			if ((daddr && !ipv4_is_multicast(daddr)) ||
540 			    rt6->rt6i_dst.plen == 128) {
541 				rt6->rt6i_flags |= RTF_MODIFIED;
542 				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
543 			}
544 		}
545 
546 		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
547 					mtu < pkt_size) {
548 			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
549 			return -E2BIG;
550 		}
551 	}
552 #endif
553 	return 0;
554 }
555 
556 static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
557 {
558 	/* we must cap headroom to some upperlimit, else pskb_expand_head
559 	 * will overflow header offsets in skb_headers_offset_update().
560 	 */
561 	static const unsigned int max_allowed = 512;
562 
563 	if (headroom > max_allowed)
564 		headroom = max_allowed;
565 
566 	if (headroom > READ_ONCE(dev->needed_headroom))
567 		WRITE_ONCE(dev->needed_headroom, headroom);
568 }
569 
570 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
571 		       u8 proto, int tunnel_hlen)
572 {
573 	struct ip_tunnel *tunnel = netdev_priv(dev);
574 	u32 headroom = sizeof(struct iphdr);
575 	struct ip_tunnel_info *tun_info;
576 	const struct ip_tunnel_key *key;
577 	const struct iphdr *inner_iph;
578 	struct rtable *rt = NULL;
579 	struct flowi4 fl4;
580 	__be16 df = 0;
581 	u8 tos, ttl;
582 	bool use_cache;
583 
584 	tun_info = skb_tunnel_info(skb);
585 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
586 		     ip_tunnel_info_af(tun_info) != AF_INET))
587 		goto tx_error;
588 	key = &tun_info->key;
589 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
590 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
591 	tos = key->tos;
592 	if (tos == 1) {
593 		if (skb->protocol == htons(ETH_P_IP))
594 			tos = inner_iph->tos;
595 		else if (skb->protocol == htons(ETH_P_IPV6))
596 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
597 	}
598 	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
599 			    tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
600 			    dev_net(dev), 0, skb->mark, skb_get_hash(skb),
601 			    key->flow_flags);
602 
603 	if (!tunnel_hlen)
604 		tunnel_hlen = ip_encap_hlen(&tun_info->encap);
605 
606 	if (ip_tunnel_encap(skb, &tun_info->encap, &proto, &fl4) < 0)
607 		goto tx_error;
608 
609 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
610 	if (use_cache)
611 		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
612 	if (!rt) {
613 		rt = ip_route_output_key(tunnel->net, &fl4);
614 		if (IS_ERR(rt)) {
615 			DEV_STATS_INC(dev, tx_carrier_errors);
616 			goto tx_error;
617 		}
618 		if (use_cache)
619 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
620 					  fl4.saddr);
621 	}
622 	if (rt->dst.dev == dev) {
623 		ip_rt_put(rt);
624 		DEV_STATS_INC(dev, collisions);
625 		goto tx_error;
626 	}
627 
628 	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
629 		df = htons(IP_DF);
630 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
631 			    key->u.ipv4.dst, true)) {
632 		ip_rt_put(rt);
633 		goto tx_error;
634 	}
635 
636 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
637 	ttl = key->ttl;
638 	if (ttl == 0) {
639 		if (skb->protocol == htons(ETH_P_IP))
640 			ttl = inner_iph->ttl;
641 		else if (skb->protocol == htons(ETH_P_IPV6))
642 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
643 		else
644 			ttl = ip4_dst_hoplimit(&rt->dst);
645 	}
646 
647 	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
648 	if (skb_cow_head(skb, headroom)) {
649 		ip_rt_put(rt);
650 		goto tx_dropped;
651 	}
652 
653 	ip_tunnel_adj_headroom(dev, headroom);
654 
655 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
656 		      df, !net_eq(tunnel->net, dev_net(dev)));
657 	return;
658 tx_error:
659 	DEV_STATS_INC(dev, tx_errors);
660 	goto kfree;
661 tx_dropped:
662 	DEV_STATS_INC(dev, tx_dropped);
663 kfree:
664 	kfree_skb(skb);
665 }
666 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
667 
668 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
669 		    const struct iphdr *tnl_params, u8 protocol)
670 {
671 	struct ip_tunnel *tunnel = netdev_priv(dev);
672 	struct ip_tunnel_info *tun_info = NULL;
673 	const struct iphdr *inner_iph;
674 	unsigned int max_headroom;	/* The extra header space needed */
675 	struct rtable *rt = NULL;		/* Route to the other host */
676 	__be16 payload_protocol;
677 	bool use_cache = false;
678 	struct flowi4 fl4;
679 	bool md = false;
680 	bool connected;
681 	u8 tos, ttl;
682 	__be32 dst;
683 	__be16 df;
684 
685 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
686 	connected = (tunnel->parms.iph.daddr != 0);
687 	payload_protocol = skb_protocol(skb, true);
688 
689 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
690 
691 	dst = tnl_params->daddr;
692 	if (dst == 0) {
693 		/* NBMA tunnel */
694 
695 		if (!skb_dst(skb)) {
696 			DEV_STATS_INC(dev, tx_fifo_errors);
697 			goto tx_error;
698 		}
699 
700 		tun_info = skb_tunnel_info(skb);
701 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
702 		    ip_tunnel_info_af(tun_info) == AF_INET &&
703 		    tun_info->key.u.ipv4.dst) {
704 			dst = tun_info->key.u.ipv4.dst;
705 			md = true;
706 			connected = true;
707 		} else if (payload_protocol == htons(ETH_P_IP)) {
708 			rt = skb_rtable(skb);
709 			dst = rt_nexthop(rt, inner_iph->daddr);
710 		}
711 #if IS_ENABLED(CONFIG_IPV6)
712 		else if (payload_protocol == htons(ETH_P_IPV6)) {
713 			const struct in6_addr *addr6;
714 			struct neighbour *neigh;
715 			bool do_tx_error_icmp;
716 			int addr_type;
717 
718 			neigh = dst_neigh_lookup(skb_dst(skb),
719 						 &ipv6_hdr(skb)->daddr);
720 			if (!neigh)
721 				goto tx_error;
722 
723 			addr6 = (const struct in6_addr *)&neigh->primary_key;
724 			addr_type = ipv6_addr_type(addr6);
725 
726 			if (addr_type == IPV6_ADDR_ANY) {
727 				addr6 = &ipv6_hdr(skb)->daddr;
728 				addr_type = ipv6_addr_type(addr6);
729 			}
730 
731 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
732 				do_tx_error_icmp = true;
733 			else {
734 				do_tx_error_icmp = false;
735 				dst = addr6->s6_addr32[3];
736 			}
737 			neigh_release(neigh);
738 			if (do_tx_error_icmp)
739 				goto tx_error_icmp;
740 		}
741 #endif
742 		else
743 			goto tx_error;
744 
745 		if (!md)
746 			connected = false;
747 	}
748 
749 	tos = tnl_params->tos;
750 	if (tos & 0x1) {
751 		tos &= ~0x1;
752 		if (payload_protocol == htons(ETH_P_IP)) {
753 			tos = inner_iph->tos;
754 			connected = false;
755 		} else if (payload_protocol == htons(ETH_P_IPV6)) {
756 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
757 			connected = false;
758 		}
759 	}
760 
761 	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
762 			    tunnel->parms.o_key, RT_TOS(tos),
763 			    dev_net(dev), READ_ONCE(tunnel->parms.link),
764 			    tunnel->fwmark, skb_get_hash(skb), 0);
765 
766 	if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
767 		goto tx_error;
768 
769 	if (connected && md) {
770 		use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
771 		if (use_cache)
772 			rt = dst_cache_get_ip4(&tun_info->dst_cache,
773 					       &fl4.saddr);
774 	} else {
775 		rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
776 						&fl4.saddr) : NULL;
777 	}
778 
779 	if (!rt) {
780 		rt = ip_route_output_key(tunnel->net, &fl4);
781 
782 		if (IS_ERR(rt)) {
783 			DEV_STATS_INC(dev, tx_carrier_errors);
784 			goto tx_error;
785 		}
786 		if (use_cache)
787 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
788 					  fl4.saddr);
789 		else if (!md && connected)
790 			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
791 					  fl4.saddr);
792 	}
793 
794 	if (rt->dst.dev == dev) {
795 		ip_rt_put(rt);
796 		DEV_STATS_INC(dev, collisions);
797 		goto tx_error;
798 	}
799 
800 	df = tnl_params->frag_off;
801 	if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
802 		df |= (inner_iph->frag_off & htons(IP_DF));
803 
804 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
805 		ip_rt_put(rt);
806 		goto tx_error;
807 	}
808 
809 	if (tunnel->err_count > 0) {
810 		if (time_before(jiffies,
811 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
812 			tunnel->err_count--;
813 
814 			dst_link_failure(skb);
815 		} else
816 			tunnel->err_count = 0;
817 	}
818 
819 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
820 	ttl = tnl_params->ttl;
821 	if (ttl == 0) {
822 		if (payload_protocol == htons(ETH_P_IP))
823 			ttl = inner_iph->ttl;
824 #if IS_ENABLED(CONFIG_IPV6)
825 		else if (payload_protocol == htons(ETH_P_IPV6))
826 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
827 #endif
828 		else
829 			ttl = ip4_dst_hoplimit(&rt->dst);
830 	}
831 
832 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
833 			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
834 
835 	if (skb_cow_head(skb, max_headroom)) {
836 		ip_rt_put(rt);
837 		DEV_STATS_INC(dev, tx_dropped);
838 		kfree_skb(skb);
839 		return;
840 	}
841 
842 	ip_tunnel_adj_headroom(dev, max_headroom);
843 
844 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
845 		      df, !net_eq(tunnel->net, dev_net(dev)));
846 	return;
847 
848 #if IS_ENABLED(CONFIG_IPV6)
849 tx_error_icmp:
850 	dst_link_failure(skb);
851 #endif
852 tx_error:
853 	DEV_STATS_INC(dev, tx_errors);
854 	kfree_skb(skb);
855 }
856 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
857 
858 static void ip_tunnel_update(struct ip_tunnel_net *itn,
859 			     struct ip_tunnel *t,
860 			     struct net_device *dev,
861 			     struct ip_tunnel_parm *p,
862 			     bool set_mtu,
863 			     __u32 fwmark)
864 {
865 	ip_tunnel_del(itn, t);
866 	t->parms.iph.saddr = p->iph.saddr;
867 	t->parms.iph.daddr = p->iph.daddr;
868 	t->parms.i_key = p->i_key;
869 	t->parms.o_key = p->o_key;
870 	if (dev->type != ARPHRD_ETHER) {
871 		__dev_addr_set(dev, &p->iph.saddr, 4);
872 		memcpy(dev->broadcast, &p->iph.daddr, 4);
873 	}
874 	ip_tunnel_add(itn, t);
875 
876 	t->parms.iph.ttl = p->iph.ttl;
877 	t->parms.iph.tos = p->iph.tos;
878 	t->parms.iph.frag_off = p->iph.frag_off;
879 
880 	if (t->parms.link != p->link || t->fwmark != fwmark) {
881 		int mtu;
882 
883 		WRITE_ONCE(t->parms.link, p->link);
884 		t->fwmark = fwmark;
885 		mtu = ip_tunnel_bind_dev(dev);
886 		if (set_mtu)
887 			dev->mtu = mtu;
888 	}
889 	dst_cache_reset(&t->dst_cache);
890 	netdev_state_change(dev);
891 }
892 
893 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
894 {
895 	int err = 0;
896 	struct ip_tunnel *t = netdev_priv(dev);
897 	struct net *net = t->net;
898 	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
899 
900 	switch (cmd) {
901 	case SIOCGETTUNNEL:
902 		if (dev == itn->fb_tunnel_dev) {
903 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
904 			if (!t)
905 				t = netdev_priv(dev);
906 		}
907 		memcpy(p, &t->parms, sizeof(*p));
908 		break;
909 
910 	case SIOCADDTUNNEL:
911 	case SIOCCHGTUNNEL:
912 		err = -EPERM;
913 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
914 			goto done;
915 		if (p->iph.ttl)
916 			p->iph.frag_off |= htons(IP_DF);
917 		if (!(p->i_flags & VTI_ISVTI)) {
918 			if (!(p->i_flags & TUNNEL_KEY))
919 				p->i_key = 0;
920 			if (!(p->o_flags & TUNNEL_KEY))
921 				p->o_key = 0;
922 		}
923 
924 		t = ip_tunnel_find(itn, p, itn->type);
925 
926 		if (cmd == SIOCADDTUNNEL) {
927 			if (!t) {
928 				t = ip_tunnel_create(net, itn, p);
929 				err = PTR_ERR_OR_ZERO(t);
930 				break;
931 			}
932 
933 			err = -EEXIST;
934 			break;
935 		}
936 		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
937 			if (t) {
938 				if (t->dev != dev) {
939 					err = -EEXIST;
940 					break;
941 				}
942 			} else {
943 				unsigned int nflags = 0;
944 
945 				if (ipv4_is_multicast(p->iph.daddr))
946 					nflags = IFF_BROADCAST;
947 				else if (p->iph.daddr)
948 					nflags = IFF_POINTOPOINT;
949 
950 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
951 					err = -EINVAL;
952 					break;
953 				}
954 
955 				t = netdev_priv(dev);
956 			}
957 		}
958 
959 		if (t) {
960 			err = 0;
961 			ip_tunnel_update(itn, t, dev, p, true, 0);
962 		} else {
963 			err = -ENOENT;
964 		}
965 		break;
966 
967 	case SIOCDELTUNNEL:
968 		err = -EPERM;
969 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
970 			goto done;
971 
972 		if (dev == itn->fb_tunnel_dev) {
973 			err = -ENOENT;
974 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
975 			if (!t)
976 				goto done;
977 			err = -EPERM;
978 			if (t == netdev_priv(itn->fb_tunnel_dev))
979 				goto done;
980 			dev = t->dev;
981 		}
982 		unregister_netdevice(dev);
983 		err = 0;
984 		break;
985 
986 	default:
987 		err = -EINVAL;
988 	}
989 
990 done:
991 	return err;
992 }
993 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
994 
995 int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
996 			     void __user *data, int cmd)
997 {
998 	struct ip_tunnel_parm p;
999 	int err;
1000 
1001 	if (copy_from_user(&p, data, sizeof(p)))
1002 		return -EFAULT;
1003 	err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
1004 	if (!err && copy_to_user(data, &p, sizeof(p)))
1005 		return -EFAULT;
1006 	return err;
1007 }
1008 EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
1009 
1010 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
1011 {
1012 	struct ip_tunnel *tunnel = netdev_priv(dev);
1013 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
1014 	int max_mtu = IP_MAX_MTU - t_hlen;
1015 
1016 	if (dev->type == ARPHRD_ETHER)
1017 		max_mtu -= dev->hard_header_len;
1018 
1019 	if (new_mtu < ETH_MIN_MTU)
1020 		return -EINVAL;
1021 
1022 	if (new_mtu > max_mtu) {
1023 		if (strict)
1024 			return -EINVAL;
1025 
1026 		new_mtu = max_mtu;
1027 	}
1028 
1029 	dev->mtu = new_mtu;
1030 	return 0;
1031 }
1032 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1033 
1034 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1035 {
1036 	return __ip_tunnel_change_mtu(dev, new_mtu, true);
1037 }
1038 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1039 
1040 static void ip_tunnel_dev_free(struct net_device *dev)
1041 {
1042 	struct ip_tunnel *tunnel = netdev_priv(dev);
1043 
1044 	gro_cells_destroy(&tunnel->gro_cells);
1045 	dst_cache_destroy(&tunnel->dst_cache);
1046 	free_percpu(dev->tstats);
1047 }
1048 
1049 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1050 {
1051 	struct ip_tunnel *tunnel = netdev_priv(dev);
1052 	struct ip_tunnel_net *itn;
1053 
1054 	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1055 
1056 	if (itn->fb_tunnel_dev != dev) {
1057 		ip_tunnel_del(itn, netdev_priv(dev));
1058 		unregister_netdevice_queue(dev, head);
1059 	}
1060 }
1061 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1062 
1063 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1064 {
1065 	struct ip_tunnel *tunnel = netdev_priv(dev);
1066 
1067 	return tunnel->net;
1068 }
1069 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1070 
1071 int ip_tunnel_get_iflink(const struct net_device *dev)
1072 {
1073 	const struct ip_tunnel *tunnel = netdev_priv(dev);
1074 
1075 	return READ_ONCE(tunnel->parms.link);
1076 }
1077 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1078 
1079 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1080 				  struct rtnl_link_ops *ops, char *devname)
1081 {
1082 	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1083 	struct ip_tunnel_parm parms;
1084 	unsigned int i;
1085 
1086 	itn->rtnl_link_ops = ops;
1087 	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1088 		INIT_HLIST_HEAD(&itn->tunnels[i]);
1089 
1090 	if (!ops || !net_has_fallback_tunnels(net)) {
1091 		struct ip_tunnel_net *it_init_net;
1092 
1093 		it_init_net = net_generic(&init_net, ip_tnl_net_id);
1094 		itn->type = it_init_net->type;
1095 		itn->fb_tunnel_dev = NULL;
1096 		return 0;
1097 	}
1098 
1099 	memset(&parms, 0, sizeof(parms));
1100 	if (devname)
1101 		strscpy(parms.name, devname, IFNAMSIZ);
1102 
1103 	rtnl_lock();
1104 	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1105 	/* FB netdevice is special: we have one, and only one per netns.
1106 	 * Allowing to move it to another netns is clearly unsafe.
1107 	 */
1108 	if (!IS_ERR(itn->fb_tunnel_dev)) {
1109 		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1110 		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1111 		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1112 		itn->type = itn->fb_tunnel_dev->type;
1113 	}
1114 	rtnl_unlock();
1115 
1116 	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1117 }
1118 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1119 
1120 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1121 			      struct list_head *head,
1122 			      struct rtnl_link_ops *ops)
1123 {
1124 	struct net_device *dev, *aux;
1125 	int h;
1126 
1127 	for_each_netdev_safe(net, dev, aux)
1128 		if (dev->rtnl_link_ops == ops)
1129 			unregister_netdevice_queue(dev, head);
1130 
1131 	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1132 		struct ip_tunnel *t;
1133 		struct hlist_node *n;
1134 		struct hlist_head *thead = &itn->tunnels[h];
1135 
1136 		hlist_for_each_entry_safe(t, n, thead, hash_node)
1137 			/* If dev is in the same netns, it has already
1138 			 * been added to the list by the previous loop.
1139 			 */
1140 			if (!net_eq(dev_net(t->dev), net))
1141 				unregister_netdevice_queue(t->dev, head);
1142 	}
1143 }
1144 
1145 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1146 			   struct rtnl_link_ops *ops,
1147 			   struct list_head *dev_to_kill)
1148 {
1149 	struct ip_tunnel_net *itn;
1150 	struct net *net;
1151 
1152 	ASSERT_RTNL();
1153 	list_for_each_entry(net, net_list, exit_list) {
1154 		itn = net_generic(net, id);
1155 		ip_tunnel_destroy(net, itn, dev_to_kill, ops);
1156 	}
1157 }
1158 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1159 
1160 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1161 		      struct ip_tunnel_parm *p, __u32 fwmark)
1162 {
1163 	struct ip_tunnel *nt;
1164 	struct net *net = dev_net(dev);
1165 	struct ip_tunnel_net *itn;
1166 	int mtu;
1167 	int err;
1168 
1169 	nt = netdev_priv(dev);
1170 	itn = net_generic(net, nt->ip_tnl_net_id);
1171 
1172 	if (nt->collect_md) {
1173 		if (rtnl_dereference(itn->collect_md_tun))
1174 			return -EEXIST;
1175 	} else {
1176 		if (ip_tunnel_find(itn, p, dev->type))
1177 			return -EEXIST;
1178 	}
1179 
1180 	nt->net = net;
1181 	nt->parms = *p;
1182 	nt->fwmark = fwmark;
1183 	err = register_netdevice(dev);
1184 	if (err)
1185 		goto err_register_netdevice;
1186 
1187 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1188 		eth_hw_addr_random(dev);
1189 
1190 	mtu = ip_tunnel_bind_dev(dev);
1191 	if (tb[IFLA_MTU]) {
1192 		unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1193 
1194 		if (dev->type == ARPHRD_ETHER)
1195 			max -= dev->hard_header_len;
1196 
1197 		mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1198 	}
1199 
1200 	err = dev_set_mtu(dev, mtu);
1201 	if (err)
1202 		goto err_dev_set_mtu;
1203 
1204 	ip_tunnel_add(itn, nt);
1205 	return 0;
1206 
1207 err_dev_set_mtu:
1208 	unregister_netdevice(dev);
1209 err_register_netdevice:
1210 	return err;
1211 }
1212 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1213 
1214 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1215 			 struct ip_tunnel_parm *p, __u32 fwmark)
1216 {
1217 	struct ip_tunnel *t;
1218 	struct ip_tunnel *tunnel = netdev_priv(dev);
1219 	struct net *net = tunnel->net;
1220 	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1221 
1222 	if (dev == itn->fb_tunnel_dev)
1223 		return -EINVAL;
1224 
1225 	t = ip_tunnel_find(itn, p, dev->type);
1226 
1227 	if (t) {
1228 		if (t->dev != dev)
1229 			return -EEXIST;
1230 	} else {
1231 		t = tunnel;
1232 
1233 		if (dev->type != ARPHRD_ETHER) {
1234 			unsigned int nflags = 0;
1235 
1236 			if (ipv4_is_multicast(p->iph.daddr))
1237 				nflags = IFF_BROADCAST;
1238 			else if (p->iph.daddr)
1239 				nflags = IFF_POINTOPOINT;
1240 
1241 			if ((dev->flags ^ nflags) &
1242 			    (IFF_POINTOPOINT | IFF_BROADCAST))
1243 				return -EINVAL;
1244 		}
1245 	}
1246 
1247 	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1248 	return 0;
1249 }
1250 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1251 
1252 int ip_tunnel_init(struct net_device *dev)
1253 {
1254 	struct ip_tunnel *tunnel = netdev_priv(dev);
1255 	struct iphdr *iph = &tunnel->parms.iph;
1256 	int err;
1257 
1258 	dev->needs_free_netdev = true;
1259 	dev->priv_destructor = ip_tunnel_dev_free;
1260 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1261 	if (!dev->tstats)
1262 		return -ENOMEM;
1263 
1264 	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1265 	if (err) {
1266 		free_percpu(dev->tstats);
1267 		return err;
1268 	}
1269 
1270 	err = gro_cells_init(&tunnel->gro_cells, dev);
1271 	if (err) {
1272 		dst_cache_destroy(&tunnel->dst_cache);
1273 		free_percpu(dev->tstats);
1274 		return err;
1275 	}
1276 
1277 	tunnel->dev = dev;
1278 	tunnel->net = dev_net(dev);
1279 	strcpy(tunnel->parms.name, dev->name);
1280 	iph->version		= 4;
1281 	iph->ihl		= 5;
1282 
1283 	if (tunnel->collect_md)
1284 		netif_keep_dst(dev);
1285 	netdev_lockdep_set_classes(dev);
1286 	return 0;
1287 }
1288 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1289 
1290 void ip_tunnel_uninit(struct net_device *dev)
1291 {
1292 	struct ip_tunnel *tunnel = netdev_priv(dev);
1293 	struct net *net = tunnel->net;
1294 	struct ip_tunnel_net *itn;
1295 
1296 	itn = net_generic(net, tunnel->ip_tnl_net_id);
1297 	ip_tunnel_del(itn, netdev_priv(dev));
1298 	if (itn->fb_tunnel_dev == dev)
1299 		WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1300 
1301 	dst_cache_reset(&tunnel->dst_cache);
1302 }
1303 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1304 
1305 /* Do least required initialization, rest of init is done in tunnel_init call */
1306 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1307 {
1308 	struct ip_tunnel *tunnel = netdev_priv(dev);
1309 	tunnel->ip_tnl_net_id = net_id;
1310 }
1311 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1312 
1313 MODULE_DESCRIPTION("IPv4 tunnel implementation library");
1314 MODULE_LICENSE("GPL");
1315