1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2013 Nicira, Inc.
4 */
5
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/netdev_lock.h>
44 #include <net/rtnetlink.h>
45 #include <net/udp.h>
46 #include <net/dst_metadata.h>
47 #include <net/inet_dscp.h>
48
49 #if IS_ENABLED(CONFIG_IPV6)
50 #include <net/ipv6.h>
51 #include <net/ip6_fib.h>
52 #include <net/ip6_route.h>
53 #endif
54
ip_tunnel_hash(__be32 key,__be32 remote)55 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
56 {
57 return hash_32((__force u32)key ^ (__force u32)remote,
58 IP_TNL_HASH_BITS);
59 }
60
ip_tunnel_key_match(const struct ip_tunnel_parm_kern * p,const unsigned long * flags,__be32 key)61 static bool ip_tunnel_key_match(const struct ip_tunnel_parm_kern *p,
62 const unsigned long *flags, __be32 key)
63 {
64 if (!test_bit(IP_TUNNEL_KEY_BIT, flags))
65 return !test_bit(IP_TUNNEL_KEY_BIT, p->i_flags);
66
67 return test_bit(IP_TUNNEL_KEY_BIT, p->i_flags) && p->i_key == key;
68 }
69
70 /* Fallback tunnel: no source, no destination, no key, no options
71
72 Tunnel hash table:
73 We require exact key match i.e. if a key is present in packet
74 it will match only tunnel with the same key; if it is not present,
75 it will match only keyless tunnel.
76
77 All keysless packets, if not matched configured keyless tunnels
78 will match fallback tunnel.
79 Given src, dst and key, find appropriate for input tunnel.
80 */
ip_tunnel_lookup(struct ip_tunnel_net * itn,int link,const unsigned long * flags,__be32 remote,__be32 local,__be32 key)81 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
82 int link, const unsigned long *flags,
83 __be32 remote, __be32 local,
84 __be32 key)
85 {
86 struct ip_tunnel *t, *cand = NULL;
87 struct hlist_head *head;
88 struct net_device *ndev;
89 unsigned int hash;
90
91 hash = ip_tunnel_hash(key, remote);
92 head = &itn->tunnels[hash];
93
94 hlist_for_each_entry_rcu(t, head, hash_node) {
95 if (local != t->parms.iph.saddr ||
96 remote != t->parms.iph.daddr ||
97 !(t->dev->flags & IFF_UP))
98 continue;
99
100 if (!ip_tunnel_key_match(&t->parms, flags, key))
101 continue;
102
103 if (READ_ONCE(t->parms.link) == link)
104 return t;
105 cand = t;
106 }
107
108 hlist_for_each_entry_rcu(t, head, hash_node) {
109 if (remote != t->parms.iph.daddr ||
110 t->parms.iph.saddr != 0 ||
111 !(t->dev->flags & IFF_UP))
112 continue;
113
114 if (!ip_tunnel_key_match(&t->parms, flags, key))
115 continue;
116
117 if (READ_ONCE(t->parms.link) == link)
118 return t;
119 if (!cand)
120 cand = t;
121 }
122
123 hash = ip_tunnel_hash(key, 0);
124 head = &itn->tunnels[hash];
125
126 hlist_for_each_entry_rcu(t, head, hash_node) {
127 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
128 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
129 continue;
130
131 if (!(t->dev->flags & IFF_UP))
132 continue;
133
134 if (!ip_tunnel_key_match(&t->parms, flags, key))
135 continue;
136
137 if (READ_ONCE(t->parms.link) == link)
138 return t;
139 if (!cand)
140 cand = t;
141 }
142
143 hlist_for_each_entry_rcu(t, head, hash_node) {
144 if ((!test_bit(IP_TUNNEL_NO_KEY_BIT, flags) &&
145 t->parms.i_key != key) ||
146 t->parms.iph.saddr != 0 ||
147 t->parms.iph.daddr != 0 ||
148 !(t->dev->flags & IFF_UP))
149 continue;
150
151 if (READ_ONCE(t->parms.link) == link)
152 return t;
153 if (!cand)
154 cand = t;
155 }
156
157 if (cand)
158 return cand;
159
160 t = rcu_dereference(itn->collect_md_tun);
161 if (t && t->dev->flags & IFF_UP)
162 return t;
163
164 ndev = READ_ONCE(itn->fb_tunnel_dev);
165 if (ndev && ndev->flags & IFF_UP)
166 return netdev_priv(ndev);
167
168 return NULL;
169 }
170 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
171
ip_bucket(struct ip_tunnel_net * itn,struct ip_tunnel_parm_kern * parms)172 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
173 struct ip_tunnel_parm_kern *parms)
174 {
175 unsigned int h;
176 __be32 remote;
177 __be32 i_key = parms->i_key;
178
179 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
180 remote = parms->iph.daddr;
181 else
182 remote = 0;
183
184 if (!test_bit(IP_TUNNEL_KEY_BIT, parms->i_flags) &&
185 test_bit(IP_TUNNEL_VTI_BIT, parms->i_flags))
186 i_key = 0;
187
188 h = ip_tunnel_hash(i_key, remote);
189 return &itn->tunnels[h];
190 }
191
ip_tunnel_add(struct ip_tunnel_net * itn,struct ip_tunnel * t)192 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
193 {
194 struct hlist_head *head = ip_bucket(itn, &t->parms);
195
196 if (t->collect_md)
197 rcu_assign_pointer(itn->collect_md_tun, t);
198 hlist_add_head_rcu(&t->hash_node, head);
199 }
200
ip_tunnel_del(struct ip_tunnel_net * itn,struct ip_tunnel * t)201 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
202 {
203 if (t->collect_md)
204 rcu_assign_pointer(itn->collect_md_tun, NULL);
205 hlist_del_init_rcu(&t->hash_node);
206 }
207
ip_tunnel_find(struct ip_tunnel_net * itn,struct ip_tunnel_parm_kern * parms,int type)208 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
209 struct ip_tunnel_parm_kern *parms,
210 int type)
211 {
212 __be32 remote = parms->iph.daddr;
213 __be32 local = parms->iph.saddr;
214 IP_TUNNEL_DECLARE_FLAGS(flags);
215 __be32 key = parms->i_key;
216 int link = parms->link;
217 struct ip_tunnel *t = NULL;
218 struct hlist_head *head = ip_bucket(itn, parms);
219
220 ip_tunnel_flags_copy(flags, parms->i_flags);
221
222 hlist_for_each_entry_rcu(t, head, hash_node, lockdep_rtnl_is_held()) {
223 if (local == t->parms.iph.saddr &&
224 remote == t->parms.iph.daddr &&
225 link == READ_ONCE(t->parms.link) &&
226 type == t->dev->type &&
227 ip_tunnel_key_match(&t->parms, flags, key))
228 break;
229 }
230 return t;
231 }
232
__ip_tunnel_create(struct net * net,const struct rtnl_link_ops * ops,struct ip_tunnel_parm_kern * parms)233 static struct net_device *__ip_tunnel_create(struct net *net,
234 const struct rtnl_link_ops *ops,
235 struct ip_tunnel_parm_kern *parms)
236 {
237 int err;
238 struct ip_tunnel *tunnel;
239 struct net_device *dev;
240 char name[IFNAMSIZ];
241
242 err = -E2BIG;
243 if (parms->name[0]) {
244 if (!dev_valid_name(parms->name))
245 goto failed;
246 strscpy(name, parms->name);
247 } else {
248 if (strlen(ops->kind) > (IFNAMSIZ - 3))
249 goto failed;
250 strscpy(name, ops->kind);
251 strcat(name, "%d");
252 }
253
254 ASSERT_RTNL();
255 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
256 if (!dev) {
257 err = -ENOMEM;
258 goto failed;
259 }
260 dev_net_set(dev, net);
261
262 dev->rtnl_link_ops = ops;
263
264 tunnel = netdev_priv(dev);
265 tunnel->parms = *parms;
266 tunnel->net = net;
267
268 err = register_netdevice(dev);
269 if (err)
270 goto failed_free;
271
272 return dev;
273
274 failed_free:
275 free_netdev(dev);
276 failed:
277 return ERR_PTR(err);
278 }
279
ip_tunnel_bind_dev(struct net_device * dev)280 static int ip_tunnel_bind_dev(struct net_device *dev)
281 {
282 struct net_device *tdev = NULL;
283 struct ip_tunnel *tunnel = netdev_priv(dev);
284 const struct iphdr *iph;
285 int hlen = LL_MAX_HEADER;
286 int mtu = ETH_DATA_LEN;
287 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
288
289 iph = &tunnel->parms.iph;
290
291 /* Guess output device to choose reasonable mtu and needed_headroom */
292 if (iph->daddr) {
293 struct flowi4 fl4;
294 struct rtable *rt;
295
296 ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
297 iph->saddr, tunnel->parms.o_key,
298 iph->tos & INET_DSCP_MASK, tunnel->net,
299 tunnel->parms.link, tunnel->fwmark, 0, 0);
300 rt = ip_route_output_key(tunnel->net, &fl4);
301
302 if (!IS_ERR(rt)) {
303 tdev = rt->dst.dev;
304 ip_rt_put(rt);
305 }
306 if (dev->type != ARPHRD_ETHER)
307 dev->flags |= IFF_POINTOPOINT;
308
309 dst_cache_reset(&tunnel->dst_cache);
310 }
311
312 if (!tdev && tunnel->parms.link)
313 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
314
315 if (tdev) {
316 hlen = tdev->hard_header_len + tdev->needed_headroom;
317 mtu = min(tdev->mtu, IP_MAX_MTU);
318 }
319
320 dev->needed_headroom = t_hlen + hlen;
321 mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
322
323 if (mtu < IPV4_MIN_MTU)
324 mtu = IPV4_MIN_MTU;
325
326 return mtu;
327 }
328
ip_tunnel_create(struct net * net,struct ip_tunnel_net * itn,struct ip_tunnel_parm_kern * parms)329 static struct ip_tunnel *ip_tunnel_create(struct net *net,
330 struct ip_tunnel_net *itn,
331 struct ip_tunnel_parm_kern *parms)
332 {
333 struct ip_tunnel *nt;
334 struct net_device *dev;
335 int t_hlen;
336 int mtu;
337 int err;
338
339 dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
340 if (IS_ERR(dev))
341 return ERR_CAST(dev);
342
343 mtu = ip_tunnel_bind_dev(dev);
344 err = dev_set_mtu(dev, mtu);
345 if (err)
346 goto err_dev_set_mtu;
347
348 nt = netdev_priv(dev);
349 t_hlen = nt->hlen + sizeof(struct iphdr);
350 dev->min_mtu = ETH_MIN_MTU;
351 dev->max_mtu = IP_MAX_MTU - t_hlen;
352 if (dev->type == ARPHRD_ETHER)
353 dev->max_mtu -= dev->hard_header_len;
354
355 ip_tunnel_add(itn, nt);
356 return nt;
357
358 err_dev_set_mtu:
359 unregister_netdevice(dev);
360 return ERR_PTR(err);
361 }
362
ip_tunnel_md_udp_encap(struct sk_buff * skb,struct ip_tunnel_info * info)363 void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info)
364 {
365 const struct iphdr *iph = ip_hdr(skb);
366 const struct udphdr *udph;
367
368 if (iph->protocol != IPPROTO_UDP)
369 return;
370
371 udph = (struct udphdr *)((__u8 *)iph + (iph->ihl << 2));
372 info->encap.sport = udph->source;
373 info->encap.dport = udph->dest;
374 }
375 EXPORT_SYMBOL(ip_tunnel_md_udp_encap);
376
ip_tunnel_rcv(struct ip_tunnel * tunnel,struct sk_buff * skb,const struct tnl_ptk_info * tpi,struct metadata_dst * tun_dst,bool log_ecn_error)377 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
378 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
379 bool log_ecn_error)
380 {
381 const struct iphdr *iph = ip_hdr(skb);
382 int nh, err;
383
384 #ifdef CONFIG_NET_IPGRE_BROADCAST
385 if (ipv4_is_multicast(iph->daddr)) {
386 DEV_STATS_INC(tunnel->dev, multicast);
387 skb->pkt_type = PACKET_BROADCAST;
388 }
389 #endif
390
391 if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.i_flags) !=
392 test_bit(IP_TUNNEL_CSUM_BIT, tpi->flags)) {
393 DEV_STATS_INC(tunnel->dev, rx_crc_errors);
394 DEV_STATS_INC(tunnel->dev, rx_errors);
395 goto drop;
396 }
397
398 if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.i_flags)) {
399 if (!test_bit(IP_TUNNEL_SEQ_BIT, tpi->flags) ||
400 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
401 DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
402 DEV_STATS_INC(tunnel->dev, rx_errors);
403 goto drop;
404 }
405 tunnel->i_seqno = ntohl(tpi->seq) + 1;
406 }
407
408 /* Save offset of outer header relative to skb->head,
409 * because we are going to reset the network header to the inner header
410 * and might change skb->head.
411 */
412 nh = skb_network_header(skb) - skb->head;
413
414 skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
415
416 if (!pskb_inet_may_pull(skb)) {
417 DEV_STATS_INC(tunnel->dev, rx_length_errors);
418 DEV_STATS_INC(tunnel->dev, rx_errors);
419 goto drop;
420 }
421 iph = (struct iphdr *)(skb->head + nh);
422
423 err = IP_ECN_decapsulate(iph, skb);
424 if (unlikely(err)) {
425 if (log_ecn_error)
426 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
427 &iph->saddr, iph->tos);
428 if (err > 1) {
429 DEV_STATS_INC(tunnel->dev, rx_frame_errors);
430 DEV_STATS_INC(tunnel->dev, rx_errors);
431 goto drop;
432 }
433 }
434
435 dev_sw_netstats_rx_add(tunnel->dev, skb->len);
436 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
437
438 if (tunnel->dev->type == ARPHRD_ETHER) {
439 skb->protocol = eth_type_trans(skb, tunnel->dev);
440 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
441 } else {
442 skb->dev = tunnel->dev;
443 }
444
445 if (tun_dst)
446 skb_dst_set(skb, (struct dst_entry *)tun_dst);
447
448 gro_cells_receive(&tunnel->gro_cells, skb);
449 return 0;
450
451 drop:
452 if (tun_dst)
453 dst_release((struct dst_entry *)tun_dst);
454 kfree_skb(skb);
455 return 0;
456 }
457 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
458
ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)459 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
460 unsigned int num)
461 {
462 if (num >= MAX_IPTUN_ENCAP_OPS)
463 return -ERANGE;
464
465 return !cmpxchg((const struct ip_tunnel_encap_ops **)
466 &iptun_encaps[num],
467 NULL, ops) ? 0 : -1;
468 }
469 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
470
ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)471 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
472 unsigned int num)
473 {
474 int ret;
475
476 if (num >= MAX_IPTUN_ENCAP_OPS)
477 return -ERANGE;
478
479 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
480 &iptun_encaps[num],
481 ops, NULL) == ops) ? 0 : -1;
482
483 synchronize_net();
484
485 return ret;
486 }
487 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
488
ip_tunnel_encap_setup(struct ip_tunnel * t,struct ip_tunnel_encap * ipencap)489 int ip_tunnel_encap_setup(struct ip_tunnel *t,
490 struct ip_tunnel_encap *ipencap)
491 {
492 int hlen;
493
494 memset(&t->encap, 0, sizeof(t->encap));
495
496 hlen = ip_encap_hlen(ipencap);
497 if (hlen < 0)
498 return hlen;
499
500 t->encap.type = ipencap->type;
501 t->encap.sport = ipencap->sport;
502 t->encap.dport = ipencap->dport;
503 t->encap.flags = ipencap->flags;
504
505 t->encap_hlen = hlen;
506 t->hlen = t->encap_hlen + t->tun_hlen;
507
508 return 0;
509 }
510 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
511
tnl_update_pmtu(struct net_device * dev,struct sk_buff * skb,struct rtable * rt,__be16 df,const struct iphdr * inner_iph,int tunnel_hlen,__be32 dst,bool md)512 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
513 struct rtable *rt, __be16 df,
514 const struct iphdr *inner_iph,
515 int tunnel_hlen, __be32 dst, bool md)
516 {
517 struct ip_tunnel *tunnel = netdev_priv(dev);
518 int pkt_size;
519 int mtu;
520
521 tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
522 pkt_size = skb->len - tunnel_hlen;
523 pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
524
525 if (df) {
526 mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
527 mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
528 } else {
529 mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
530 }
531
532 if (skb_valid_dst(skb))
533 skb_dst_update_pmtu_no_confirm(skb, mtu);
534
535 if (skb->protocol == htons(ETH_P_IP)) {
536 if (!skb_is_gso(skb) &&
537 (inner_iph->frag_off & htons(IP_DF)) &&
538 mtu < pkt_size) {
539 icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
540 return -E2BIG;
541 }
542 }
543 #if IS_ENABLED(CONFIG_IPV6)
544 else if (skb->protocol == htons(ETH_P_IPV6)) {
545 struct rt6_info *rt6;
546 __be32 daddr;
547
548 rt6 = skb_valid_dst(skb) ? dst_rt6_info(skb_dst(skb)) :
549 NULL;
550 daddr = md ? dst : tunnel->parms.iph.daddr;
551
552 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
553 mtu >= IPV6_MIN_MTU) {
554 if ((daddr && !ipv4_is_multicast(daddr)) ||
555 rt6->rt6i_dst.plen == 128) {
556 rt6->rt6i_flags |= RTF_MODIFIED;
557 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
558 }
559 }
560
561 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
562 mtu < pkt_size) {
563 icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
564 return -E2BIG;
565 }
566 }
567 #endif
568 return 0;
569 }
570
ip_md_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,u8 proto,int tunnel_hlen)571 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
572 u8 proto, int tunnel_hlen)
573 {
574 struct ip_tunnel *tunnel = netdev_priv(dev);
575 u32 headroom = sizeof(struct iphdr);
576 struct ip_tunnel_info *tun_info;
577 const struct ip_tunnel_key *key;
578 const struct iphdr *inner_iph;
579 struct rtable *rt = NULL;
580 struct flowi4 fl4;
581 __be16 df = 0;
582 u8 tos, ttl;
583 bool use_cache;
584
585 tun_info = skb_tunnel_info(skb);
586 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
587 ip_tunnel_info_af(tun_info) != AF_INET))
588 goto tx_error;
589 key = &tun_info->key;
590 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
591 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
592 tos = key->tos;
593 if (tos == 1) {
594 if (skb->protocol == htons(ETH_P_IP))
595 tos = inner_iph->tos;
596 else if (skb->protocol == htons(ETH_P_IPV6))
597 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
598 }
599 ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
600 tunnel_id_to_key32(key->tun_id),
601 tos & INET_DSCP_MASK, tunnel->net, 0, skb->mark,
602 skb_get_hash(skb), key->flow_flags);
603
604 if (!tunnel_hlen)
605 tunnel_hlen = ip_encap_hlen(&tun_info->encap);
606
607 if (ip_tunnel_encap(skb, &tun_info->encap, &proto, &fl4) < 0)
608 goto tx_error;
609
610 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
611 if (use_cache)
612 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
613 if (!rt) {
614 rt = ip_route_output_key(tunnel->net, &fl4);
615 if (IS_ERR(rt)) {
616 DEV_STATS_INC(dev, tx_carrier_errors);
617 goto tx_error;
618 }
619 if (use_cache)
620 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
621 fl4.saddr);
622 }
623 if (rt->dst.dev == dev) {
624 ip_rt_put(rt);
625 DEV_STATS_INC(dev, collisions);
626 goto tx_error;
627 }
628
629 if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags))
630 df = htons(IP_DF);
631 if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
632 key->u.ipv4.dst, true)) {
633 ip_rt_put(rt);
634 goto tx_error;
635 }
636
637 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
638 ttl = key->ttl;
639 if (ttl == 0) {
640 if (skb->protocol == htons(ETH_P_IP))
641 ttl = inner_iph->ttl;
642 else if (skb->protocol == htons(ETH_P_IPV6))
643 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
644 else
645 ttl = ip4_dst_hoplimit(&rt->dst);
646 }
647
648 headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
649 if (skb_cow_head(skb, headroom)) {
650 ip_rt_put(rt);
651 goto tx_dropped;
652 }
653
654 ip_tunnel_adj_headroom(dev, headroom);
655
656 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
657 df, !net_eq(tunnel->net, dev_net(dev)), 0);
658 return;
659 tx_error:
660 DEV_STATS_INC(dev, tx_errors);
661 goto kfree;
662 tx_dropped:
663 DEV_STATS_INC(dev, tx_dropped);
664 kfree:
665 kfree_skb(skb);
666 }
667 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
668
ip_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,u8 protocol)669 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
670 const struct iphdr *tnl_params, u8 protocol)
671 {
672 struct ip_tunnel *tunnel = netdev_priv(dev);
673 struct ip_tunnel_info *tun_info = NULL;
674 const struct iphdr *inner_iph;
675 unsigned int max_headroom; /* The extra header space needed */
676 struct rtable *rt = NULL; /* Route to the other host */
677 __be16 payload_protocol;
678 bool use_cache = false;
679 struct flowi4 fl4;
680 bool md = false;
681 bool connected;
682 u8 tos, ttl;
683 __be32 dst;
684 __be16 df;
685
686 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
687 connected = (tunnel->parms.iph.daddr != 0);
688 payload_protocol = skb_protocol(skb, true);
689
690 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
691
692 dst = tnl_params->daddr;
693 if (dst == 0) {
694 /* NBMA tunnel */
695
696 if (!skb_dst(skb)) {
697 DEV_STATS_INC(dev, tx_fifo_errors);
698 goto tx_error;
699 }
700
701 tun_info = skb_tunnel_info(skb);
702 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
703 ip_tunnel_info_af(tun_info) == AF_INET &&
704 tun_info->key.u.ipv4.dst) {
705 dst = tun_info->key.u.ipv4.dst;
706 md = true;
707 connected = true;
708 } else if (payload_protocol == htons(ETH_P_IP)) {
709 rt = skb_rtable(skb);
710 dst = rt_nexthop(rt, inner_iph->daddr);
711 }
712 #if IS_ENABLED(CONFIG_IPV6)
713 else if (payload_protocol == htons(ETH_P_IPV6)) {
714 const struct in6_addr *addr6;
715 struct neighbour *neigh;
716 bool do_tx_error_icmp;
717 int addr_type;
718
719 neigh = dst_neigh_lookup(skb_dst(skb),
720 &ipv6_hdr(skb)->daddr);
721 if (!neigh)
722 goto tx_error;
723
724 addr6 = (const struct in6_addr *)&neigh->primary_key;
725 addr_type = ipv6_addr_type(addr6);
726
727 if (addr_type == IPV6_ADDR_ANY) {
728 addr6 = &ipv6_hdr(skb)->daddr;
729 addr_type = ipv6_addr_type(addr6);
730 }
731
732 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
733 do_tx_error_icmp = true;
734 else {
735 do_tx_error_icmp = false;
736 dst = addr6->s6_addr32[3];
737 }
738 neigh_release(neigh);
739 if (do_tx_error_icmp)
740 goto tx_error_icmp;
741 }
742 #endif
743 else
744 goto tx_error;
745
746 if (!md)
747 connected = false;
748 }
749
750 tos = tnl_params->tos;
751 if (tos & 0x1) {
752 tos &= ~0x1;
753 if (payload_protocol == htons(ETH_P_IP)) {
754 tos = inner_iph->tos;
755 connected = false;
756 } else if (payload_protocol == htons(ETH_P_IPV6)) {
757 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
758 connected = false;
759 }
760 }
761
762 ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
763 tunnel->parms.o_key, tos & INET_DSCP_MASK,
764 tunnel->net, READ_ONCE(tunnel->parms.link),
765 tunnel->fwmark, skb_get_hash(skb), 0);
766
767 if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
768 goto tx_error;
769
770 if (connected && md) {
771 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
772 if (use_cache)
773 rt = dst_cache_get_ip4(&tun_info->dst_cache,
774 &fl4.saddr);
775 } else {
776 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
777 &fl4.saddr) : NULL;
778 }
779
780 if (!rt) {
781 rt = ip_route_output_key(tunnel->net, &fl4);
782
783 if (IS_ERR(rt)) {
784 DEV_STATS_INC(dev, tx_carrier_errors);
785 goto tx_error;
786 }
787 if (use_cache)
788 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
789 fl4.saddr);
790 else if (!md && connected)
791 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
792 fl4.saddr);
793 }
794
795 if (rt->dst.dev == dev) {
796 ip_rt_put(rt);
797 DEV_STATS_INC(dev, collisions);
798 goto tx_error;
799 }
800
801 df = tnl_params->frag_off;
802 if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
803 df |= (inner_iph->frag_off & htons(IP_DF));
804
805 if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
806 ip_rt_put(rt);
807 goto tx_error;
808 }
809
810 if (tunnel->err_count > 0) {
811 if (time_before(jiffies,
812 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
813 tunnel->err_count--;
814
815 dst_link_failure(skb);
816 } else
817 tunnel->err_count = 0;
818 }
819
820 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
821 ttl = tnl_params->ttl;
822 if (ttl == 0) {
823 if (payload_protocol == htons(ETH_P_IP))
824 ttl = inner_iph->ttl;
825 #if IS_ENABLED(CONFIG_IPV6)
826 else if (payload_protocol == htons(ETH_P_IPV6))
827 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
828 #endif
829 else
830 ttl = ip4_dst_hoplimit(&rt->dst);
831 }
832
833 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
834 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
835
836 if (skb_cow_head(skb, max_headroom)) {
837 ip_rt_put(rt);
838 DEV_STATS_INC(dev, tx_dropped);
839 kfree_skb(skb);
840 return;
841 }
842
843 ip_tunnel_adj_headroom(dev, max_headroom);
844
845 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
846 df, !net_eq(tunnel->net, dev_net(dev)), 0);
847 return;
848
849 #if IS_ENABLED(CONFIG_IPV6)
850 tx_error_icmp:
851 dst_link_failure(skb);
852 #endif
853 tx_error:
854 DEV_STATS_INC(dev, tx_errors);
855 kfree_skb(skb);
856 }
857 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
858
ip_tunnel_update(struct ip_tunnel_net * itn,struct ip_tunnel * t,struct net_device * dev,struct ip_tunnel_parm_kern * p,bool set_mtu,__u32 fwmark)859 static void ip_tunnel_update(struct ip_tunnel_net *itn,
860 struct ip_tunnel *t,
861 struct net_device *dev,
862 struct ip_tunnel_parm_kern *p,
863 bool set_mtu,
864 __u32 fwmark)
865 {
866 ip_tunnel_del(itn, t);
867 t->parms.iph.saddr = p->iph.saddr;
868 t->parms.iph.daddr = p->iph.daddr;
869 t->parms.i_key = p->i_key;
870 t->parms.o_key = p->o_key;
871 if (dev->type != ARPHRD_ETHER) {
872 __dev_addr_set(dev, &p->iph.saddr, 4);
873 memcpy(dev->broadcast, &p->iph.daddr, 4);
874 }
875 ip_tunnel_add(itn, t);
876
877 t->parms.iph.ttl = p->iph.ttl;
878 t->parms.iph.tos = p->iph.tos;
879 t->parms.iph.frag_off = p->iph.frag_off;
880
881 if (t->parms.link != p->link || t->fwmark != fwmark) {
882 int mtu;
883
884 WRITE_ONCE(t->parms.link, p->link);
885 t->fwmark = fwmark;
886 mtu = ip_tunnel_bind_dev(dev);
887 if (set_mtu)
888 WRITE_ONCE(dev->mtu, mtu);
889 }
890 dst_cache_reset(&t->dst_cache);
891 netdev_state_change(dev);
892 }
893
ip_tunnel_ctl(struct net_device * dev,struct ip_tunnel_parm_kern * p,int cmd)894 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p,
895 int cmd)
896 {
897 int err = 0;
898 struct ip_tunnel *t = netdev_priv(dev);
899 struct net *net = t->net;
900 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
901
902 switch (cmd) {
903 case SIOCGETTUNNEL:
904 if (dev == itn->fb_tunnel_dev) {
905 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
906 if (!t)
907 t = netdev_priv(dev);
908 }
909 memcpy(p, &t->parms, sizeof(*p));
910 break;
911
912 case SIOCADDTUNNEL:
913 case SIOCCHGTUNNEL:
914 err = -EPERM;
915 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
916 goto done;
917 if (p->iph.ttl)
918 p->iph.frag_off |= htons(IP_DF);
919 if (!test_bit(IP_TUNNEL_VTI_BIT, p->i_flags)) {
920 if (!test_bit(IP_TUNNEL_KEY_BIT, p->i_flags))
921 p->i_key = 0;
922 if (!test_bit(IP_TUNNEL_KEY_BIT, p->o_flags))
923 p->o_key = 0;
924 }
925
926 t = ip_tunnel_find(itn, p, itn->type);
927
928 if (cmd == SIOCADDTUNNEL) {
929 if (!t) {
930 t = ip_tunnel_create(net, itn, p);
931 err = PTR_ERR_OR_ZERO(t);
932 break;
933 }
934
935 err = -EEXIST;
936 break;
937 }
938 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
939 if (t) {
940 if (t->dev != dev) {
941 err = -EEXIST;
942 break;
943 }
944 } else {
945 unsigned int nflags = 0;
946
947 if (ipv4_is_multicast(p->iph.daddr))
948 nflags = IFF_BROADCAST;
949 else if (p->iph.daddr)
950 nflags = IFF_POINTOPOINT;
951
952 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
953 err = -EINVAL;
954 break;
955 }
956
957 t = netdev_priv(dev);
958 }
959 }
960
961 if (t) {
962 err = 0;
963 ip_tunnel_update(itn, t, dev, p, true, 0);
964 } else {
965 err = -ENOENT;
966 }
967 break;
968
969 case SIOCDELTUNNEL:
970 err = -EPERM;
971 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
972 goto done;
973
974 if (dev == itn->fb_tunnel_dev) {
975 err = -ENOENT;
976 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
977 if (!t)
978 goto done;
979 err = -EPERM;
980 if (t == netdev_priv(itn->fb_tunnel_dev))
981 goto done;
982 dev = t->dev;
983 }
984 unregister_netdevice(dev);
985 err = 0;
986 break;
987
988 default:
989 err = -EINVAL;
990 }
991
992 done:
993 return err;
994 }
995 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
996
ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern * kp,const void __user * data)997 bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp,
998 const void __user *data)
999 {
1000 struct ip_tunnel_parm p;
1001
1002 if (copy_from_user(&p, data, sizeof(p)))
1003 return false;
1004
1005 strscpy(kp->name, p.name);
1006 kp->link = p.link;
1007 ip_tunnel_flags_from_be16(kp->i_flags, p.i_flags);
1008 ip_tunnel_flags_from_be16(kp->o_flags, p.o_flags);
1009 kp->i_key = p.i_key;
1010 kp->o_key = p.o_key;
1011 memcpy(&kp->iph, &p.iph, min(sizeof(kp->iph), sizeof(p.iph)));
1012
1013 return true;
1014 }
1015 EXPORT_SYMBOL_GPL(ip_tunnel_parm_from_user);
1016
ip_tunnel_parm_to_user(void __user * data,struct ip_tunnel_parm_kern * kp)1017 bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp)
1018 {
1019 struct ip_tunnel_parm p;
1020
1021 if (!ip_tunnel_flags_is_be16_compat(kp->i_flags) ||
1022 !ip_tunnel_flags_is_be16_compat(kp->o_flags))
1023 return false;
1024
1025 memset(&p, 0, sizeof(p));
1026
1027 strscpy(p.name, kp->name);
1028 p.link = kp->link;
1029 p.i_flags = ip_tunnel_flags_to_be16(kp->i_flags);
1030 p.o_flags = ip_tunnel_flags_to_be16(kp->o_flags);
1031 p.i_key = kp->i_key;
1032 p.o_key = kp->o_key;
1033 memcpy(&p.iph, &kp->iph, min(sizeof(p.iph), sizeof(kp->iph)));
1034
1035 return !copy_to_user(data, &p, sizeof(p));
1036 }
1037 EXPORT_SYMBOL_GPL(ip_tunnel_parm_to_user);
1038
ip_tunnel_siocdevprivate(struct net_device * dev,struct ifreq * ifr,void __user * data,int cmd)1039 int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
1040 void __user *data, int cmd)
1041 {
1042 struct ip_tunnel_parm_kern p;
1043 int err;
1044
1045 if (!ip_tunnel_parm_from_user(&p, data))
1046 return -EFAULT;
1047 err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
1048 if (!err && !ip_tunnel_parm_to_user(data, &p))
1049 return -EFAULT;
1050 return err;
1051 }
1052 EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
1053
__ip_tunnel_change_mtu(struct net_device * dev,int new_mtu,bool strict)1054 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
1055 {
1056 struct ip_tunnel *tunnel = netdev_priv(dev);
1057 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
1058 int max_mtu = IP_MAX_MTU - t_hlen;
1059
1060 if (dev->type == ARPHRD_ETHER)
1061 max_mtu -= dev->hard_header_len;
1062
1063 if (new_mtu < ETH_MIN_MTU)
1064 return -EINVAL;
1065
1066 if (new_mtu > max_mtu) {
1067 if (strict)
1068 return -EINVAL;
1069
1070 new_mtu = max_mtu;
1071 }
1072
1073 WRITE_ONCE(dev->mtu, new_mtu);
1074 return 0;
1075 }
1076 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
1077
ip_tunnel_change_mtu(struct net_device * dev,int new_mtu)1078 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1079 {
1080 return __ip_tunnel_change_mtu(dev, new_mtu, true);
1081 }
1082 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1083
ip_tunnel_dev_free(struct net_device * dev)1084 static void ip_tunnel_dev_free(struct net_device *dev)
1085 {
1086 struct ip_tunnel *tunnel = netdev_priv(dev);
1087
1088 gro_cells_destroy(&tunnel->gro_cells);
1089 dst_cache_destroy(&tunnel->dst_cache);
1090 }
1091
ip_tunnel_dellink(struct net_device * dev,struct list_head * head)1092 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1093 {
1094 struct ip_tunnel *tunnel = netdev_priv(dev);
1095 struct ip_tunnel_net *itn;
1096
1097 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1098
1099 if (itn->fb_tunnel_dev != dev) {
1100 ip_tunnel_del(itn, netdev_priv(dev));
1101 unregister_netdevice_queue(dev, head);
1102 }
1103 }
1104 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1105
ip_tunnel_get_link_net(const struct net_device * dev)1106 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1107 {
1108 struct ip_tunnel *tunnel = netdev_priv(dev);
1109
1110 return READ_ONCE(tunnel->net);
1111 }
1112 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1113
ip_tunnel_get_iflink(const struct net_device * dev)1114 int ip_tunnel_get_iflink(const struct net_device *dev)
1115 {
1116 const struct ip_tunnel *tunnel = netdev_priv(dev);
1117
1118 return READ_ONCE(tunnel->parms.link);
1119 }
1120 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1121
ip_tunnel_init_net(struct net * net,unsigned int ip_tnl_net_id,struct rtnl_link_ops * ops,char * devname)1122 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1123 struct rtnl_link_ops *ops, char *devname)
1124 {
1125 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1126 struct ip_tunnel_parm_kern parms;
1127 unsigned int i;
1128
1129 itn->rtnl_link_ops = ops;
1130 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1131 INIT_HLIST_HEAD(&itn->tunnels[i]);
1132
1133 if (!ops || !net_has_fallback_tunnels(net)) {
1134 struct ip_tunnel_net *it_init_net;
1135
1136 it_init_net = net_generic(&init_net, ip_tnl_net_id);
1137 itn->type = it_init_net->type;
1138 itn->fb_tunnel_dev = NULL;
1139 return 0;
1140 }
1141
1142 memset(&parms, 0, sizeof(parms));
1143 if (devname)
1144 strscpy(parms.name, devname, IFNAMSIZ);
1145
1146 rtnl_lock();
1147 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1148 /* FB netdevice is special: we have one, and only one per netns.
1149 * Allowing to move it to another netns is clearly unsafe.
1150 */
1151 if (!IS_ERR(itn->fb_tunnel_dev)) {
1152 itn->fb_tunnel_dev->netns_immutable = true;
1153 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1154 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1155 itn->type = itn->fb_tunnel_dev->type;
1156 }
1157 rtnl_unlock();
1158
1159 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1160 }
1161 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1162
ip_tunnel_delete_net(struct net * net,unsigned int id,struct rtnl_link_ops * ops,struct list_head * head)1163 void ip_tunnel_delete_net(struct net *net, unsigned int id,
1164 struct rtnl_link_ops *ops,
1165 struct list_head *head)
1166 {
1167 struct ip_tunnel_net *itn = net_generic(net, id);
1168 struct net_device *dev, *aux;
1169 int h;
1170
1171 ASSERT_RTNL_NET(net);
1172
1173 for_each_netdev_safe(net, dev, aux)
1174 if (dev->rtnl_link_ops == ops)
1175 unregister_netdevice_queue(dev, head);
1176
1177 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1178 struct ip_tunnel *t;
1179 struct hlist_node *n;
1180 struct hlist_head *thead = &itn->tunnels[h];
1181
1182 hlist_for_each_entry_safe(t, n, thead, hash_node)
1183 /* If dev is in the same netns, it has already
1184 * been added to the list by the previous loop.
1185 */
1186 if (!net_eq(dev_net(t->dev), net))
1187 unregister_netdevice_queue(t->dev, head);
1188 }
1189 }
1190 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1191
ip_tunnel_newlink(struct net * net,struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm_kern * p,__u32 fwmark)1192 int ip_tunnel_newlink(struct net *net, struct net_device *dev,
1193 struct nlattr *tb[], struct ip_tunnel_parm_kern *p,
1194 __u32 fwmark)
1195 {
1196 struct ip_tunnel *nt;
1197 struct ip_tunnel_net *itn;
1198 int mtu;
1199 int err;
1200
1201 nt = netdev_priv(dev);
1202 itn = net_generic(net, nt->ip_tnl_net_id);
1203
1204 if (nt->collect_md) {
1205 if (rtnl_dereference(itn->collect_md_tun))
1206 return -EEXIST;
1207 } else {
1208 if (ip_tunnel_find(itn, p, dev->type))
1209 return -EEXIST;
1210 }
1211
1212 nt->net = net;
1213 nt->parms = *p;
1214 nt->fwmark = fwmark;
1215 err = register_netdevice(dev);
1216 if (err)
1217 goto err_register_netdevice;
1218
1219 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1220 eth_hw_addr_random(dev);
1221
1222 mtu = ip_tunnel_bind_dev(dev);
1223 if (tb[IFLA_MTU]) {
1224 unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1225
1226 if (dev->type == ARPHRD_ETHER)
1227 max -= dev->hard_header_len;
1228
1229 mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1230 }
1231
1232 err = dev_set_mtu(dev, mtu);
1233 if (err)
1234 goto err_dev_set_mtu;
1235
1236 ip_tunnel_add(itn, nt);
1237 return 0;
1238
1239 err_dev_set_mtu:
1240 unregister_netdevice(dev);
1241 err_register_netdevice:
1242 return err;
1243 }
1244 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1245
ip_tunnel_changelink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm_kern * p,__u32 fwmark)1246 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1247 struct ip_tunnel_parm_kern *p, __u32 fwmark)
1248 {
1249 struct ip_tunnel *t;
1250 struct ip_tunnel *tunnel = netdev_priv(dev);
1251 struct net *net = tunnel->net;
1252 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1253
1254 if (dev == itn->fb_tunnel_dev)
1255 return -EINVAL;
1256
1257 t = ip_tunnel_find(itn, p, dev->type);
1258
1259 if (t) {
1260 if (t->dev != dev)
1261 return -EEXIST;
1262 } else {
1263 t = tunnel;
1264
1265 if (dev->type != ARPHRD_ETHER) {
1266 unsigned int nflags = 0;
1267
1268 if (ipv4_is_multicast(p->iph.daddr))
1269 nflags = IFF_BROADCAST;
1270 else if (p->iph.daddr)
1271 nflags = IFF_POINTOPOINT;
1272
1273 if ((dev->flags ^ nflags) &
1274 (IFF_POINTOPOINT | IFF_BROADCAST))
1275 return -EINVAL;
1276 }
1277 }
1278
1279 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1280 return 0;
1281 }
1282 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1283
ip_tunnel_init(struct net_device * dev)1284 int ip_tunnel_init(struct net_device *dev)
1285 {
1286 struct ip_tunnel *tunnel = netdev_priv(dev);
1287 struct iphdr *iph = &tunnel->parms.iph;
1288 int err;
1289
1290 dev->needs_free_netdev = true;
1291 dev->priv_destructor = ip_tunnel_dev_free;
1292 dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
1293
1294 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1295 if (err)
1296 return err;
1297
1298 err = gro_cells_init(&tunnel->gro_cells, dev);
1299 if (err) {
1300 dst_cache_destroy(&tunnel->dst_cache);
1301 return err;
1302 }
1303
1304 tunnel->dev = dev;
1305 strscpy(tunnel->parms.name, dev->name);
1306 iph->version = 4;
1307 iph->ihl = 5;
1308
1309 if (tunnel->collect_md)
1310 netif_keep_dst(dev);
1311 netdev_lockdep_set_classes(dev);
1312 return 0;
1313 }
1314 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1315
ip_tunnel_uninit(struct net_device * dev)1316 void ip_tunnel_uninit(struct net_device *dev)
1317 {
1318 struct ip_tunnel *tunnel = netdev_priv(dev);
1319 struct net *net = tunnel->net;
1320 struct ip_tunnel_net *itn;
1321
1322 itn = net_generic(net, tunnel->ip_tnl_net_id);
1323 ip_tunnel_del(itn, netdev_priv(dev));
1324 if (itn->fb_tunnel_dev == dev)
1325 WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1326
1327 dst_cache_reset(&tunnel->dst_cache);
1328 }
1329 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1330
1331 /* Do least required initialization, rest of init is done in tunnel_init call */
ip_tunnel_setup(struct net_device * dev,unsigned int net_id)1332 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1333 {
1334 struct ip_tunnel *tunnel = netdev_priv(dev);
1335 tunnel->ip_tnl_net_id = net_id;
1336 }
1337 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1338
1339 MODULE_DESCRIPTION("IPv4 tunnel implementation library");
1340 MODULE_LICENSE("GPL");
1341