xref: /linux/net/ipv4/ip_tunnel.c (revision 80a7e3507d86051e7c3c9438a4f1b4858d263622)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Nicira, Inc.
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30 
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/netdev_lock.h>
44 #include <net/rtnetlink.h>
45 #include <net/udp.h>
46 #include <net/dst_metadata.h>
47 #include <net/inet_dscp.h>
48 
49 #if IS_ENABLED(CONFIG_IPV6)
50 #include <net/ipv6.h>
51 #include <net/ip6_fib.h>
52 #include <net/ip6_route.h>
53 #endif
54 
55 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
56 {
57 	return hash_32((__force u32)key ^ (__force u32)remote,
58 			 IP_TNL_HASH_BITS);
59 }
60 
61 static bool ip_tunnel_key_match(const struct ip_tunnel_parm_kern *p,
62 				const unsigned long *flags, __be32 key)
63 {
64 	if (!test_bit(IP_TUNNEL_KEY_BIT, flags))
65 		return !test_bit(IP_TUNNEL_KEY_BIT, p->i_flags);
66 
67 	return test_bit(IP_TUNNEL_KEY_BIT, p->i_flags) && p->i_key == key;
68 }
69 
70 /* Fallback tunnel: no source, no destination, no key, no options
71 
72    Tunnel hash table:
73    We require exact key match i.e. if a key is present in packet
74    it will match only tunnel with the same key; if it is not present,
75    it will match only keyless tunnel.
76 
77    All keysless packets, if not matched configured keyless tunnels
78    will match fallback tunnel.
79    Given src, dst and key, find appropriate for input tunnel.
80 */
81 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
82 				   int link, const unsigned long *flags,
83 				   __be32 remote, __be32 local,
84 				   __be32 key)
85 {
86 	struct ip_tunnel *t, *cand = NULL;
87 	struct hlist_head *head;
88 	struct net_device *ndev;
89 	unsigned int hash;
90 
91 	hash = ip_tunnel_hash(key, remote);
92 	head = &itn->tunnels[hash];
93 
94 	hlist_for_each_entry_rcu(t, head, hash_node) {
95 		if (local != t->parms.iph.saddr ||
96 		    remote != t->parms.iph.daddr ||
97 		    !(t->dev->flags & IFF_UP))
98 			continue;
99 
100 		if (!ip_tunnel_key_match(&t->parms, flags, key))
101 			continue;
102 
103 		if (READ_ONCE(t->parms.link) == link)
104 			return t;
105 		cand = t;
106 	}
107 
108 	hlist_for_each_entry_rcu(t, head, hash_node) {
109 		if (remote != t->parms.iph.daddr ||
110 		    t->parms.iph.saddr != 0 ||
111 		    !(t->dev->flags & IFF_UP))
112 			continue;
113 
114 		if (!ip_tunnel_key_match(&t->parms, flags, key))
115 			continue;
116 
117 		if (READ_ONCE(t->parms.link) == link)
118 			return t;
119 		if (!cand)
120 			cand = t;
121 	}
122 
123 	hash = ip_tunnel_hash(key, 0);
124 	head = &itn->tunnels[hash];
125 
126 	hlist_for_each_entry_rcu(t, head, hash_node) {
127 		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
128 		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
129 			continue;
130 
131 		if (!(t->dev->flags & IFF_UP))
132 			continue;
133 
134 		if (!ip_tunnel_key_match(&t->parms, flags, key))
135 			continue;
136 
137 		if (READ_ONCE(t->parms.link) == link)
138 			return t;
139 		if (!cand)
140 			cand = t;
141 	}
142 
143 	hlist_for_each_entry_rcu(t, head, hash_node) {
144 		if ((!test_bit(IP_TUNNEL_NO_KEY_BIT, flags) &&
145 		     t->parms.i_key != key) ||
146 		    t->parms.iph.saddr != 0 ||
147 		    t->parms.iph.daddr != 0 ||
148 		    !(t->dev->flags & IFF_UP))
149 			continue;
150 
151 		if (READ_ONCE(t->parms.link) == link)
152 			return t;
153 		if (!cand)
154 			cand = t;
155 	}
156 
157 	if (cand)
158 		return cand;
159 
160 	t = rcu_dereference(itn->collect_md_tun);
161 	if (t && t->dev->flags & IFF_UP)
162 		return t;
163 
164 	ndev = READ_ONCE(itn->fb_tunnel_dev);
165 	if (ndev && ndev->flags & IFF_UP)
166 		return netdev_priv(ndev);
167 
168 	return NULL;
169 }
170 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
171 
172 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
173 				    struct ip_tunnel_parm_kern *parms)
174 {
175 	unsigned int h;
176 	__be32 remote;
177 	__be32 i_key = parms->i_key;
178 
179 	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
180 		remote = parms->iph.daddr;
181 	else
182 		remote = 0;
183 
184 	if (!test_bit(IP_TUNNEL_KEY_BIT, parms->i_flags) &&
185 	    test_bit(IP_TUNNEL_VTI_BIT, parms->i_flags))
186 		i_key = 0;
187 
188 	h = ip_tunnel_hash(i_key, remote);
189 	return &itn->tunnels[h];
190 }
191 
192 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
193 {
194 	struct hlist_head *head = ip_bucket(itn, &t->parms);
195 
196 	if (t->collect_md)
197 		rcu_assign_pointer(itn->collect_md_tun, t);
198 	hlist_add_head_rcu(&t->hash_node, head);
199 }
200 
201 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
202 {
203 	if (t->collect_md)
204 		rcu_assign_pointer(itn->collect_md_tun, NULL);
205 	hlist_del_init_rcu(&t->hash_node);
206 }
207 
208 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
209 					struct ip_tunnel_parm_kern *parms,
210 					int type)
211 {
212 	__be32 remote = parms->iph.daddr;
213 	__be32 local = parms->iph.saddr;
214 	IP_TUNNEL_DECLARE_FLAGS(flags);
215 	__be32 key = parms->i_key;
216 	int link = parms->link;
217 	struct ip_tunnel *t = NULL;
218 	struct hlist_head *head = ip_bucket(itn, parms);
219 
220 	ip_tunnel_flags_copy(flags, parms->i_flags);
221 
222 	hlist_for_each_entry_rcu(t, head, hash_node, lockdep_rtnl_is_held()) {
223 		if (local == t->parms.iph.saddr &&
224 		    remote == t->parms.iph.daddr &&
225 		    link == READ_ONCE(t->parms.link) &&
226 		    type == t->dev->type &&
227 		    ip_tunnel_key_match(&t->parms, flags, key))
228 			break;
229 	}
230 	return t;
231 }
232 
233 static struct net_device *__ip_tunnel_create(struct net *net,
234 					     const struct rtnl_link_ops *ops,
235 					     struct ip_tunnel_parm_kern *parms)
236 {
237 	int err;
238 	struct ip_tunnel *tunnel;
239 	struct net_device *dev;
240 	char name[IFNAMSIZ];
241 
242 	err = -E2BIG;
243 	if (parms->name[0]) {
244 		if (!dev_valid_name(parms->name))
245 			goto failed;
246 		strscpy(name, parms->name);
247 	} else {
248 		if (strlen(ops->kind) > (IFNAMSIZ - 3))
249 			goto failed;
250 		strscpy(name, ops->kind);
251 		strcat(name, "%d");
252 	}
253 
254 	ASSERT_RTNL();
255 	dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
256 	if (!dev) {
257 		err = -ENOMEM;
258 		goto failed;
259 	}
260 	dev_net_set(dev, net);
261 
262 	dev->rtnl_link_ops = ops;
263 
264 	tunnel = netdev_priv(dev);
265 	tunnel->parms = *parms;
266 	tunnel->net = net;
267 
268 	err = register_netdevice(dev);
269 	if (err)
270 		goto failed_free;
271 
272 	return dev;
273 
274 failed_free:
275 	free_netdev(dev);
276 failed:
277 	return ERR_PTR(err);
278 }
279 
280 static int ip_tunnel_bind_dev(struct net_device *dev)
281 {
282 	struct net_device *tdev = NULL;
283 	struct ip_tunnel *tunnel = netdev_priv(dev);
284 	const struct iphdr *iph;
285 	int hlen = LL_MAX_HEADER;
286 	int mtu = ETH_DATA_LEN;
287 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
288 
289 	iph = &tunnel->parms.iph;
290 
291 	/* Guess output device to choose reasonable mtu and needed_headroom */
292 	if (iph->daddr) {
293 		struct flowi4 fl4;
294 		struct rtable *rt;
295 
296 		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
297 				    iph->saddr, tunnel->parms.o_key,
298 				    iph->tos & INET_DSCP_MASK, tunnel->net,
299 				    tunnel->parms.link, tunnel->fwmark, 0, 0);
300 		rt = ip_route_output_key(tunnel->net, &fl4);
301 
302 		if (!IS_ERR(rt)) {
303 			tdev = rt->dst.dev;
304 			ip_rt_put(rt);
305 		}
306 		if (dev->type != ARPHRD_ETHER)
307 			dev->flags |= IFF_POINTOPOINT;
308 
309 		dst_cache_reset(&tunnel->dst_cache);
310 	}
311 
312 	if (!tdev && tunnel->parms.link)
313 		tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
314 
315 	if (tdev) {
316 		hlen = tdev->hard_header_len + tdev->needed_headroom;
317 		mtu = min(tdev->mtu, IP_MAX_MTU);
318 	}
319 
320 	dev->needed_headroom = t_hlen + hlen;
321 	mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
322 
323 	if (mtu < IPV4_MIN_MTU)
324 		mtu = IPV4_MIN_MTU;
325 
326 	return mtu;
327 }
328 
329 static struct ip_tunnel *ip_tunnel_create(struct net *net,
330 					  struct ip_tunnel_net *itn,
331 					  struct ip_tunnel_parm_kern *parms)
332 {
333 	struct ip_tunnel *nt;
334 	struct net_device *dev;
335 	int t_hlen;
336 	int mtu;
337 	int err;
338 
339 	dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
340 	if (IS_ERR(dev))
341 		return ERR_CAST(dev);
342 
343 	mtu = ip_tunnel_bind_dev(dev);
344 	err = dev_set_mtu(dev, mtu);
345 	if (err)
346 		goto err_dev_set_mtu;
347 
348 	nt = netdev_priv(dev);
349 	t_hlen = nt->hlen + sizeof(struct iphdr);
350 	dev->min_mtu = ETH_MIN_MTU;
351 	dev->max_mtu = IP_MAX_MTU - t_hlen;
352 	if (dev->type == ARPHRD_ETHER)
353 		dev->max_mtu -= dev->hard_header_len;
354 
355 	ip_tunnel_add(itn, nt);
356 	return nt;
357 
358 err_dev_set_mtu:
359 	unregister_netdevice(dev);
360 	return ERR_PTR(err);
361 }
362 
363 void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info)
364 {
365 	const struct iphdr *iph = ip_hdr(skb);
366 	const struct udphdr *udph;
367 
368 	if (iph->protocol != IPPROTO_UDP)
369 		return;
370 
371 	udph = (struct udphdr *)((__u8 *)iph + (iph->ihl << 2));
372 	info->encap.sport = udph->source;
373 	info->encap.dport = udph->dest;
374 }
375 EXPORT_SYMBOL(ip_tunnel_md_udp_encap);
376 
377 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
378 		  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
379 		  bool log_ecn_error)
380 {
381 	const struct iphdr *iph = ip_hdr(skb);
382 	int nh, err;
383 
384 #ifdef CONFIG_NET_IPGRE_BROADCAST
385 	if (ipv4_is_multicast(iph->daddr)) {
386 		DEV_STATS_INC(tunnel->dev, multicast);
387 		skb->pkt_type = PACKET_BROADCAST;
388 	}
389 #endif
390 
391 	if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.i_flags) !=
392 	    test_bit(IP_TUNNEL_CSUM_BIT, tpi->flags)) {
393 		DEV_STATS_INC(tunnel->dev, rx_crc_errors);
394 		DEV_STATS_INC(tunnel->dev, rx_errors);
395 		goto drop;
396 	}
397 
398 	if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.i_flags)) {
399 		if (!test_bit(IP_TUNNEL_SEQ_BIT, tpi->flags) ||
400 		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
401 			DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
402 			DEV_STATS_INC(tunnel->dev, rx_errors);
403 			goto drop;
404 		}
405 		tunnel->i_seqno = ntohl(tpi->seq) + 1;
406 	}
407 
408 	/* Save offset of outer header relative to skb->head,
409 	 * because we are going to reset the network header to the inner header
410 	 * and might change skb->head.
411 	 */
412 	nh = skb_network_header(skb) - skb->head;
413 
414 	skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
415 
416 	if (!pskb_inet_may_pull(skb)) {
417 		DEV_STATS_INC(tunnel->dev, rx_length_errors);
418 		DEV_STATS_INC(tunnel->dev, rx_errors);
419 		goto drop;
420 	}
421 	iph = (struct iphdr *)(skb->head + nh);
422 
423 	err = IP_ECN_decapsulate(iph, skb);
424 	if (unlikely(err)) {
425 		if (log_ecn_error)
426 			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
427 					&iph->saddr, iph->tos);
428 		if (err > 1) {
429 			DEV_STATS_INC(tunnel->dev, rx_frame_errors);
430 			DEV_STATS_INC(tunnel->dev, rx_errors);
431 			goto drop;
432 		}
433 	}
434 
435 	dev_sw_netstats_rx_add(tunnel->dev, skb->len);
436 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
437 
438 	if (tunnel->dev->type == ARPHRD_ETHER) {
439 		skb->protocol = eth_type_trans(skb, tunnel->dev);
440 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
441 	} else {
442 		skb->dev = tunnel->dev;
443 	}
444 
445 	if (tun_dst)
446 		skb_dst_set(skb, (struct dst_entry *)tun_dst);
447 
448 	gro_cells_receive(&tunnel->gro_cells, skb);
449 	return 0;
450 
451 drop:
452 	if (tun_dst)
453 		dst_release((struct dst_entry *)tun_dst);
454 	kfree_skb(skb);
455 	return 0;
456 }
457 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
458 
459 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
460 			    unsigned int num)
461 {
462 	if (num >= MAX_IPTUN_ENCAP_OPS)
463 		return -ERANGE;
464 
465 	return !cmpxchg((const struct ip_tunnel_encap_ops **)
466 			&iptun_encaps[num],
467 			NULL, ops) ? 0 : -1;
468 }
469 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
470 
471 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
472 			    unsigned int num)
473 {
474 	int ret;
475 
476 	if (num >= MAX_IPTUN_ENCAP_OPS)
477 		return -ERANGE;
478 
479 	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
480 		       &iptun_encaps[num],
481 		       ops, NULL) == ops) ? 0 : -1;
482 
483 	synchronize_net();
484 
485 	return ret;
486 }
487 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
488 
489 int ip_tunnel_encap_setup(struct ip_tunnel *t,
490 			  struct ip_tunnel_encap *ipencap)
491 {
492 	int hlen;
493 
494 	memset(&t->encap, 0, sizeof(t->encap));
495 
496 	hlen = ip_encap_hlen(ipencap);
497 	if (hlen < 0)
498 		return hlen;
499 
500 	t->encap.type = ipencap->type;
501 	t->encap.sport = ipencap->sport;
502 	t->encap.dport = ipencap->dport;
503 	t->encap.flags = ipencap->flags;
504 
505 	t->encap_hlen = hlen;
506 	t->hlen = t->encap_hlen + t->tun_hlen;
507 
508 	return 0;
509 }
510 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
511 
512 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
513 			    struct rtable *rt, __be16 df,
514 			    const struct iphdr *inner_iph,
515 			    int tunnel_hlen, __be32 dst, bool md)
516 {
517 	struct ip_tunnel *tunnel = netdev_priv(dev);
518 	int pkt_size;
519 	int mtu;
520 
521 	tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
522 	pkt_size = skb->len - tunnel_hlen;
523 	pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
524 
525 	if (df) {
526 		mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
527 		mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
528 	} else {
529 		mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
530 	}
531 
532 	if (skb_valid_dst(skb))
533 		skb_dst_update_pmtu_no_confirm(skb, mtu);
534 
535 	if (skb->protocol == htons(ETH_P_IP)) {
536 		if (!skb_is_gso(skb) &&
537 		    (inner_iph->frag_off & htons(IP_DF)) &&
538 		    mtu < pkt_size) {
539 			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
540 			return -E2BIG;
541 		}
542 	}
543 #if IS_ENABLED(CONFIG_IPV6)
544 	else if (skb->protocol == htons(ETH_P_IPV6)) {
545 		struct rt6_info *rt6;
546 		__be32 daddr;
547 
548 		rt6 = skb_valid_dst(skb) ? dst_rt6_info(skb_dst(skb)) :
549 					   NULL;
550 		daddr = md ? dst : tunnel->parms.iph.daddr;
551 
552 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
553 			   mtu >= IPV6_MIN_MTU) {
554 			if ((daddr && !ipv4_is_multicast(daddr)) ||
555 			    rt6->rt6i_dst.plen == 128) {
556 				rt6->rt6i_flags |= RTF_MODIFIED;
557 				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
558 			}
559 		}
560 
561 		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
562 					mtu < pkt_size) {
563 			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
564 			return -E2BIG;
565 		}
566 	}
567 #endif
568 	return 0;
569 }
570 
571 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
572 		       u8 proto, int tunnel_hlen)
573 {
574 	struct ip_tunnel *tunnel = netdev_priv(dev);
575 	u32 headroom = sizeof(struct iphdr);
576 	struct ip_tunnel_info *tun_info;
577 	const struct ip_tunnel_key *key;
578 	const struct iphdr *inner_iph;
579 	struct rtable *rt = NULL;
580 	struct flowi4 fl4;
581 	__be16 df = 0;
582 	u8 tos, ttl;
583 	bool use_cache;
584 
585 	tun_info = skb_tunnel_info(skb);
586 	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
587 		     ip_tunnel_info_af(tun_info) != AF_INET))
588 		goto tx_error;
589 	key = &tun_info->key;
590 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
591 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
592 	tos = key->tos;
593 	if (tos == 1) {
594 		if (skb->protocol == htons(ETH_P_IP))
595 			tos = inner_iph->tos;
596 		else if (skb->protocol == htons(ETH_P_IPV6))
597 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
598 	}
599 	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
600 			    tunnel_id_to_key32(key->tun_id),
601 			    tos & INET_DSCP_MASK, tunnel->net, 0, skb->mark,
602 			    skb_get_hash(skb), key->flow_flags);
603 
604 	if (!tunnel_hlen)
605 		tunnel_hlen = ip_encap_hlen(&tun_info->encap);
606 
607 	if (ip_tunnel_encap(skb, &tun_info->encap, &proto, &fl4) < 0)
608 		goto tx_error;
609 
610 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
611 	if (use_cache)
612 		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
613 	if (!rt) {
614 		rt = ip_route_output_key(tunnel->net, &fl4);
615 		if (IS_ERR(rt)) {
616 			DEV_STATS_INC(dev, tx_carrier_errors);
617 			goto tx_error;
618 		}
619 		if (use_cache)
620 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
621 					  fl4.saddr);
622 	}
623 	if (rt->dst.dev == dev) {
624 		ip_rt_put(rt);
625 		DEV_STATS_INC(dev, collisions);
626 		goto tx_error;
627 	}
628 
629 	if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags))
630 		df = htons(IP_DF);
631 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
632 			    key->u.ipv4.dst, true)) {
633 		ip_rt_put(rt);
634 		goto tx_error;
635 	}
636 
637 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
638 	ttl = key->ttl;
639 	if (ttl == 0) {
640 		if (skb->protocol == htons(ETH_P_IP))
641 			ttl = inner_iph->ttl;
642 		else if (skb->protocol == htons(ETH_P_IPV6))
643 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
644 		else
645 			ttl = ip4_dst_hoplimit(&rt->dst);
646 	}
647 
648 	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
649 	if (skb_cow_head(skb, headroom)) {
650 		ip_rt_put(rt);
651 		goto tx_dropped;
652 	}
653 
654 	ip_tunnel_adj_headroom(dev, headroom);
655 
656 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
657 		      df, !net_eq(tunnel->net, dev_net(dev)), 0);
658 	return;
659 tx_error:
660 	DEV_STATS_INC(dev, tx_errors);
661 	goto kfree;
662 tx_dropped:
663 	DEV_STATS_INC(dev, tx_dropped);
664 kfree:
665 	kfree_skb(skb);
666 }
667 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
668 
669 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
670 		    const struct iphdr *tnl_params, u8 protocol)
671 {
672 	struct ip_tunnel *tunnel = netdev_priv(dev);
673 	struct ip_tunnel_info *tun_info = NULL;
674 	const struct iphdr *inner_iph;
675 	unsigned int max_headroom;	/* The extra header space needed */
676 	struct rtable *rt = NULL;		/* Route to the other host */
677 	__be16 payload_protocol;
678 	bool use_cache = false;
679 	struct flowi4 fl4;
680 	bool md = false;
681 	bool connected;
682 	int err_count;
683 	u8 tos, ttl;
684 	__be32 dst;
685 	__be16 df;
686 
687 	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
688 	connected = (tunnel->parms.iph.daddr != 0);
689 	payload_protocol = skb_protocol(skb, true);
690 
691 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
692 
693 	dst = tnl_params->daddr;
694 	if (dst == 0) {
695 		/* NBMA tunnel */
696 
697 		if (!skb_dst(skb)) {
698 			DEV_STATS_INC(dev, tx_fifo_errors);
699 			goto tx_error;
700 		}
701 
702 		tun_info = skb_tunnel_info(skb);
703 		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
704 		    ip_tunnel_info_af(tun_info) == AF_INET &&
705 		    tun_info->key.u.ipv4.dst) {
706 			dst = tun_info->key.u.ipv4.dst;
707 			md = true;
708 			connected = true;
709 		} else if (payload_protocol == htons(ETH_P_IP)) {
710 			rt = skb_rtable(skb);
711 			dst = rt_nexthop(rt, inner_iph->daddr);
712 		}
713 #if IS_ENABLED(CONFIG_IPV6)
714 		else if (payload_protocol == htons(ETH_P_IPV6)) {
715 			const struct in6_addr *addr6;
716 			struct neighbour *neigh;
717 			bool do_tx_error_icmp;
718 			int addr_type;
719 
720 			neigh = dst_neigh_lookup(skb_dst(skb),
721 						 &ipv6_hdr(skb)->daddr);
722 			if (!neigh)
723 				goto tx_error;
724 
725 			addr6 = (const struct in6_addr *)&neigh->primary_key;
726 			addr_type = ipv6_addr_type(addr6);
727 
728 			if (addr_type == IPV6_ADDR_ANY) {
729 				addr6 = &ipv6_hdr(skb)->daddr;
730 				addr_type = ipv6_addr_type(addr6);
731 			}
732 
733 			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
734 				do_tx_error_icmp = true;
735 			else {
736 				do_tx_error_icmp = false;
737 				dst = addr6->s6_addr32[3];
738 			}
739 			neigh_release(neigh);
740 			if (do_tx_error_icmp)
741 				goto tx_error_icmp;
742 		}
743 #endif
744 		else
745 			goto tx_error;
746 
747 		if (!md)
748 			connected = false;
749 	}
750 
751 	tos = tnl_params->tos;
752 	if (tos & 0x1) {
753 		tos &= ~0x1;
754 		if (payload_protocol == htons(ETH_P_IP)) {
755 			tos = inner_iph->tos;
756 			connected = false;
757 		} else if (payload_protocol == htons(ETH_P_IPV6)) {
758 			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
759 			connected = false;
760 		}
761 	}
762 
763 	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
764 			    tunnel->parms.o_key, tos & INET_DSCP_MASK,
765 			    tunnel->net, READ_ONCE(tunnel->parms.link),
766 			    tunnel->fwmark, skb_get_hash(skb), 0);
767 
768 	if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
769 		goto tx_error;
770 
771 	if (connected && md) {
772 		use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
773 		if (use_cache)
774 			rt = dst_cache_get_ip4(&tun_info->dst_cache,
775 					       &fl4.saddr);
776 	} else {
777 		rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
778 						&fl4.saddr) : NULL;
779 	}
780 
781 	if (!rt) {
782 		rt = ip_route_output_key(tunnel->net, &fl4);
783 
784 		if (IS_ERR(rt)) {
785 			DEV_STATS_INC(dev, tx_carrier_errors);
786 			goto tx_error;
787 		}
788 		if (use_cache)
789 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
790 					  fl4.saddr);
791 		else if (!md && connected)
792 			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
793 					  fl4.saddr);
794 	}
795 
796 	if (rt->dst.dev == dev) {
797 		ip_rt_put(rt);
798 		DEV_STATS_INC(dev, collisions);
799 		goto tx_error;
800 	}
801 
802 	df = tnl_params->frag_off;
803 	if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
804 		df |= (inner_iph->frag_off & htons(IP_DF));
805 
806 	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
807 		ip_rt_put(rt);
808 		goto tx_error;
809 	}
810 
811 	err_count = READ_ONCE(tunnel->err_count);
812 	if (err_count > 0) {
813 		if (time_before(jiffies,
814 				READ_ONCE(tunnel->err_time) + IPTUNNEL_ERR_TIMEO)) {
815 			WRITE_ONCE(tunnel->err_count, err_count - 1);
816 
817 			dst_link_failure(skb);
818 		} else {
819 			WRITE_ONCE(tunnel->err_count, 0);
820 		}
821 	}
822 
823 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
824 	ttl = tnl_params->ttl;
825 	if (ttl == 0) {
826 		if (payload_protocol == htons(ETH_P_IP))
827 			ttl = inner_iph->ttl;
828 #if IS_ENABLED(CONFIG_IPV6)
829 		else if (payload_protocol == htons(ETH_P_IPV6))
830 			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
831 #endif
832 		else
833 			ttl = ip4_dst_hoplimit(&rt->dst);
834 	}
835 
836 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
837 			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
838 
839 	if (skb_cow_head(skb, max_headroom)) {
840 		ip_rt_put(rt);
841 		DEV_STATS_INC(dev, tx_dropped);
842 		kfree_skb(skb);
843 		return;
844 	}
845 
846 	ip_tunnel_adj_headroom(dev, max_headroom);
847 
848 	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
849 		      df, !net_eq(tunnel->net, dev_net(dev)), 0);
850 	return;
851 
852 #if IS_ENABLED(CONFIG_IPV6)
853 tx_error_icmp:
854 	dst_link_failure(skb);
855 #endif
856 tx_error:
857 	DEV_STATS_INC(dev, tx_errors);
858 	kfree_skb(skb);
859 }
860 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
861 
862 static void ip_tunnel_update(struct ip_tunnel_net *itn,
863 			     struct ip_tunnel *t,
864 			     struct net_device *dev,
865 			     struct ip_tunnel_parm_kern *p,
866 			     bool set_mtu,
867 			     __u32 fwmark)
868 {
869 	ip_tunnel_del(itn, t);
870 	t->parms.iph.saddr = p->iph.saddr;
871 	t->parms.iph.daddr = p->iph.daddr;
872 	t->parms.i_key = p->i_key;
873 	t->parms.o_key = p->o_key;
874 	if (dev->type != ARPHRD_ETHER) {
875 		__dev_addr_set(dev, &p->iph.saddr, 4);
876 		memcpy(dev->broadcast, &p->iph.daddr, 4);
877 	}
878 	ip_tunnel_add(itn, t);
879 
880 	t->parms.iph.ttl = p->iph.ttl;
881 	t->parms.iph.tos = p->iph.tos;
882 	t->parms.iph.frag_off = p->iph.frag_off;
883 
884 	if (t->parms.link != p->link || t->fwmark != fwmark) {
885 		int mtu;
886 
887 		WRITE_ONCE(t->parms.link, p->link);
888 		t->fwmark = fwmark;
889 		mtu = ip_tunnel_bind_dev(dev);
890 		if (set_mtu)
891 			WRITE_ONCE(dev->mtu, mtu);
892 	}
893 	dst_cache_reset(&t->dst_cache);
894 	netdev_state_change(dev);
895 }
896 
897 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p,
898 		  int cmd)
899 {
900 	int err = 0;
901 	struct ip_tunnel *t = netdev_priv(dev);
902 	struct net *net = t->net;
903 	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
904 
905 	switch (cmd) {
906 	case SIOCGETTUNNEL:
907 		if (dev == itn->fb_tunnel_dev) {
908 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
909 			if (!t)
910 				t = netdev_priv(dev);
911 		}
912 		memcpy(p, &t->parms, sizeof(*p));
913 		break;
914 
915 	case SIOCADDTUNNEL:
916 	case SIOCCHGTUNNEL:
917 		err = -EPERM;
918 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
919 			goto done;
920 		if (p->iph.ttl)
921 			p->iph.frag_off |= htons(IP_DF);
922 		if (!test_bit(IP_TUNNEL_VTI_BIT, p->i_flags)) {
923 			if (!test_bit(IP_TUNNEL_KEY_BIT, p->i_flags))
924 				p->i_key = 0;
925 			if (!test_bit(IP_TUNNEL_KEY_BIT, p->o_flags))
926 				p->o_key = 0;
927 		}
928 
929 		t = ip_tunnel_find(itn, p, itn->type);
930 
931 		if (cmd == SIOCADDTUNNEL) {
932 			if (!t) {
933 				t = ip_tunnel_create(net, itn, p);
934 				err = PTR_ERR_OR_ZERO(t);
935 				break;
936 			}
937 
938 			err = -EEXIST;
939 			break;
940 		}
941 		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
942 			if (t) {
943 				if (t->dev != dev) {
944 					err = -EEXIST;
945 					break;
946 				}
947 			} else {
948 				unsigned int nflags = 0;
949 
950 				if (ipv4_is_multicast(p->iph.daddr))
951 					nflags = IFF_BROADCAST;
952 				else if (p->iph.daddr)
953 					nflags = IFF_POINTOPOINT;
954 
955 				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
956 					err = -EINVAL;
957 					break;
958 				}
959 
960 				t = netdev_priv(dev);
961 			}
962 		}
963 
964 		if (t) {
965 			err = 0;
966 			ip_tunnel_update(itn, t, dev, p, true, 0);
967 		} else {
968 			err = -ENOENT;
969 		}
970 		break;
971 
972 	case SIOCDELTUNNEL:
973 		err = -EPERM;
974 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
975 			goto done;
976 
977 		if (dev == itn->fb_tunnel_dev) {
978 			err = -ENOENT;
979 			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
980 			if (!t)
981 				goto done;
982 			err = -EPERM;
983 			if (t == netdev_priv(itn->fb_tunnel_dev))
984 				goto done;
985 			dev = t->dev;
986 		}
987 		unregister_netdevice(dev);
988 		err = 0;
989 		break;
990 
991 	default:
992 		err = -EINVAL;
993 	}
994 
995 done:
996 	return err;
997 }
998 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
999 
1000 bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp,
1001 			      const void __user *data)
1002 {
1003 	struct ip_tunnel_parm p;
1004 
1005 	if (copy_from_user(&p, data, sizeof(p)))
1006 		return false;
1007 
1008 	strscpy(kp->name, p.name);
1009 	kp->link = p.link;
1010 	ip_tunnel_flags_from_be16(kp->i_flags, p.i_flags);
1011 	ip_tunnel_flags_from_be16(kp->o_flags, p.o_flags);
1012 	kp->i_key = p.i_key;
1013 	kp->o_key = p.o_key;
1014 	memcpy(&kp->iph, &p.iph, min(sizeof(kp->iph), sizeof(p.iph)));
1015 
1016 	return true;
1017 }
1018 EXPORT_SYMBOL_GPL(ip_tunnel_parm_from_user);
1019 
1020 bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp)
1021 {
1022 	struct ip_tunnel_parm p;
1023 
1024 	if (!ip_tunnel_flags_is_be16_compat(kp->i_flags) ||
1025 	    !ip_tunnel_flags_is_be16_compat(kp->o_flags))
1026 		return false;
1027 
1028 	memset(&p, 0, sizeof(p));
1029 
1030 	strscpy(p.name, kp->name);
1031 	p.link = kp->link;
1032 	p.i_flags = ip_tunnel_flags_to_be16(kp->i_flags);
1033 	p.o_flags = ip_tunnel_flags_to_be16(kp->o_flags);
1034 	p.i_key = kp->i_key;
1035 	p.o_key = kp->o_key;
1036 	memcpy(&p.iph, &kp->iph, min(sizeof(p.iph), sizeof(kp->iph)));
1037 
1038 	return !copy_to_user(data, &p, sizeof(p));
1039 }
1040 EXPORT_SYMBOL_GPL(ip_tunnel_parm_to_user);
1041 
1042 int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
1043 			     void __user *data, int cmd)
1044 {
1045 	struct ip_tunnel_parm_kern p;
1046 	int err;
1047 
1048 	if (!ip_tunnel_parm_from_user(&p, data))
1049 		return -EFAULT;
1050 	err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
1051 	if (!err && !ip_tunnel_parm_to_user(data, &p))
1052 		return -EFAULT;
1053 	return err;
1054 }
1055 EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
1056 
1057 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
1058 {
1059 	struct ip_tunnel *tunnel = netdev_priv(dev);
1060 	int t_hlen = tunnel->hlen + sizeof(struct iphdr);
1061 	int max_mtu = IP_MAX_MTU - t_hlen;
1062 
1063 	if (dev->type == ARPHRD_ETHER)
1064 		max_mtu -= dev->hard_header_len;
1065 
1066 	if (new_mtu < ETH_MIN_MTU)
1067 		return -EINVAL;
1068 
1069 	if (new_mtu > max_mtu) {
1070 		if (strict)
1071 			return -EINVAL;
1072 
1073 		new_mtu = max_mtu;
1074 	}
1075 
1076 	WRITE_ONCE(dev->mtu, new_mtu);
1077 	return 0;
1078 }
1079 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1080 
1081 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1082 {
1083 	return __ip_tunnel_change_mtu(dev, new_mtu, true);
1084 }
1085 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1086 
1087 static void ip_tunnel_dev_free(struct net_device *dev)
1088 {
1089 	struct ip_tunnel *tunnel = netdev_priv(dev);
1090 
1091 	gro_cells_destroy(&tunnel->gro_cells);
1092 	dst_cache_destroy(&tunnel->dst_cache);
1093 }
1094 
1095 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1096 {
1097 	struct ip_tunnel *tunnel = netdev_priv(dev);
1098 	struct ip_tunnel_net *itn;
1099 
1100 	itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1101 
1102 	if (itn->fb_tunnel_dev != dev) {
1103 		ip_tunnel_del(itn, netdev_priv(dev));
1104 		unregister_netdevice_queue(dev, head);
1105 	}
1106 }
1107 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1108 
1109 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1110 {
1111 	struct ip_tunnel *tunnel = netdev_priv(dev);
1112 
1113 	return READ_ONCE(tunnel->net);
1114 }
1115 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1116 
1117 int ip_tunnel_get_iflink(const struct net_device *dev)
1118 {
1119 	const struct ip_tunnel *tunnel = netdev_priv(dev);
1120 
1121 	return READ_ONCE(tunnel->parms.link);
1122 }
1123 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1124 
1125 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1126 				  struct rtnl_link_ops *ops, char *devname)
1127 {
1128 	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1129 	struct ip_tunnel_parm_kern parms;
1130 	unsigned int i;
1131 
1132 	itn->rtnl_link_ops = ops;
1133 	for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1134 		INIT_HLIST_HEAD(&itn->tunnels[i]);
1135 
1136 	if (!ops || !net_has_fallback_tunnels(net)) {
1137 		struct ip_tunnel_net *it_init_net;
1138 
1139 		it_init_net = net_generic(&init_net, ip_tnl_net_id);
1140 		itn->type = it_init_net->type;
1141 		itn->fb_tunnel_dev = NULL;
1142 		return 0;
1143 	}
1144 
1145 	memset(&parms, 0, sizeof(parms));
1146 	if (devname)
1147 		strscpy(parms.name, devname, IFNAMSIZ);
1148 
1149 	rtnl_lock();
1150 	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1151 	/* FB netdevice is special: we have one, and only one per netns.
1152 	 * Allowing to move it to another netns is clearly unsafe.
1153 	 */
1154 	if (!IS_ERR(itn->fb_tunnel_dev)) {
1155 		itn->fb_tunnel_dev->netns_immutable = true;
1156 		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1157 		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1158 		itn->type = itn->fb_tunnel_dev->type;
1159 	}
1160 	rtnl_unlock();
1161 
1162 	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1163 }
1164 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1165 
1166 void ip_tunnel_delete_net(struct net *net, unsigned int id,
1167 			  struct rtnl_link_ops *ops,
1168 			  struct list_head *head)
1169 {
1170 	struct ip_tunnel_net *itn = net_generic(net, id);
1171 	struct net_device *dev, *aux;
1172 	int h;
1173 
1174 	ASSERT_RTNL_NET(net);
1175 
1176 	for_each_netdev_safe(net, dev, aux)
1177 		if (dev->rtnl_link_ops == ops)
1178 			unregister_netdevice_queue(dev, head);
1179 
1180 	for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1181 		struct ip_tunnel *t;
1182 		struct hlist_node *n;
1183 		struct hlist_head *thead = &itn->tunnels[h];
1184 
1185 		hlist_for_each_entry_safe(t, n, thead, hash_node)
1186 			/* If dev is in the same netns, it has already
1187 			 * been added to the list by the previous loop.
1188 			 */
1189 			if (!net_eq(dev_net(t->dev), net))
1190 				unregister_netdevice_queue(t->dev, head);
1191 	}
1192 }
1193 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1194 
1195 int ip_tunnel_newlink(struct net *net, struct net_device *dev,
1196 		      struct nlattr *tb[], struct ip_tunnel_parm_kern *p,
1197 		      __u32 fwmark)
1198 {
1199 	struct ip_tunnel *nt;
1200 	struct ip_tunnel_net *itn;
1201 	int mtu;
1202 	int err;
1203 
1204 	nt = netdev_priv(dev);
1205 	itn = net_generic(net, nt->ip_tnl_net_id);
1206 
1207 	if (nt->collect_md) {
1208 		if (rtnl_dereference(itn->collect_md_tun))
1209 			return -EEXIST;
1210 	} else {
1211 		if (ip_tunnel_find(itn, p, dev->type))
1212 			return -EEXIST;
1213 	}
1214 
1215 	nt->net = net;
1216 	nt->parms = *p;
1217 	nt->fwmark = fwmark;
1218 	err = register_netdevice(dev);
1219 	if (err)
1220 		goto err_register_netdevice;
1221 
1222 	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1223 		eth_hw_addr_random(dev);
1224 
1225 	mtu = ip_tunnel_bind_dev(dev);
1226 	if (tb[IFLA_MTU]) {
1227 		unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1228 
1229 		if (dev->type == ARPHRD_ETHER)
1230 			max -= dev->hard_header_len;
1231 
1232 		mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1233 	}
1234 
1235 	err = dev_set_mtu(dev, mtu);
1236 	if (err)
1237 		goto err_dev_set_mtu;
1238 
1239 	ip_tunnel_add(itn, nt);
1240 	return 0;
1241 
1242 err_dev_set_mtu:
1243 	unregister_netdevice(dev);
1244 err_register_netdevice:
1245 	return err;
1246 }
1247 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1248 
1249 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1250 			 struct ip_tunnel_parm_kern *p, __u32 fwmark)
1251 {
1252 	struct ip_tunnel *t;
1253 	struct ip_tunnel *tunnel = netdev_priv(dev);
1254 	struct net *net = tunnel->net;
1255 	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1256 
1257 	if (dev == itn->fb_tunnel_dev)
1258 		return -EINVAL;
1259 
1260 	t = ip_tunnel_find(itn, p, dev->type);
1261 
1262 	if (t) {
1263 		if (t->dev != dev)
1264 			return -EEXIST;
1265 	} else {
1266 		t = tunnel;
1267 
1268 		if (dev->type != ARPHRD_ETHER) {
1269 			unsigned int nflags = 0;
1270 
1271 			if (ipv4_is_multicast(p->iph.daddr))
1272 				nflags = IFF_BROADCAST;
1273 			else if (p->iph.daddr)
1274 				nflags = IFF_POINTOPOINT;
1275 
1276 			if ((dev->flags ^ nflags) &
1277 			    (IFF_POINTOPOINT | IFF_BROADCAST))
1278 				return -EINVAL;
1279 		}
1280 	}
1281 
1282 	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1283 	return 0;
1284 }
1285 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1286 
1287 int __ip_tunnel_init(struct net_device *dev)
1288 {
1289 	struct ip_tunnel *tunnel = netdev_priv(dev);
1290 	struct iphdr *iph = &tunnel->parms.iph;
1291 	int err;
1292 
1293 	dev->needs_free_netdev = true;
1294 	dev->priv_destructor = ip_tunnel_dev_free;
1295 	dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
1296 
1297 	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1298 	if (err)
1299 		return err;
1300 
1301 	err = gro_cells_init(&tunnel->gro_cells, dev);
1302 	if (err) {
1303 		dst_cache_destroy(&tunnel->dst_cache);
1304 		return err;
1305 	}
1306 
1307 	tunnel->dev = dev;
1308 	strscpy(tunnel->parms.name, dev->name);
1309 	iph->version		= 4;
1310 	iph->ihl		= 5;
1311 
1312 	if (tunnel->collect_md)
1313 		netif_keep_dst(dev);
1314 	return 0;
1315 }
1316 EXPORT_SYMBOL_GPL(__ip_tunnel_init);
1317 
1318 void ip_tunnel_uninit(struct net_device *dev)
1319 {
1320 	struct ip_tunnel *tunnel = netdev_priv(dev);
1321 	struct net *net = tunnel->net;
1322 	struct ip_tunnel_net *itn;
1323 
1324 	itn = net_generic(net, tunnel->ip_tnl_net_id);
1325 	ip_tunnel_del(itn, netdev_priv(dev));
1326 	if (itn->fb_tunnel_dev == dev)
1327 		WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1328 
1329 	dst_cache_reset(&tunnel->dst_cache);
1330 }
1331 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1332 
1333 /* Do least required initialization, rest of init is done in tunnel_init call */
1334 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1335 {
1336 	struct ip_tunnel *tunnel = netdev_priv(dev);
1337 	tunnel->ip_tnl_net_id = net_id;
1338 }
1339 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1340 
1341 MODULE_DESCRIPTION("IPv4 tunnel implementation library");
1342 MODULE_LICENSE("GPL");
1343