1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * GENEVE: Generic Network Virtualization Encapsulation
4 *
5 * Copyright (c) 2015 Red Hat, Inc.
6 */
7
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
10 #include <linux/ethtool.h>
11 #include <linux/kernel.h>
12 #include <linux/module.h>
13 #include <linux/etherdevice.h>
14 #include <linux/hash.h>
15 #include <net/ipv6_stubs.h>
16 #include <net/dst_metadata.h>
17 #include <net/gro_cells.h>
18 #include <net/rtnetlink.h>
19 #include <net/geneve.h>
20 #include <net/gro.h>
21 #include <net/netdev_lock.h>
22 #include <net/protocol.h>
23
24 #define GENEVE_NETDEV_VER "0.6"
25
26 #define GENEVE_N_VID (1u << 24)
27 #define GENEVE_VID_MASK (GENEVE_N_VID - 1)
28
29 #define VNI_HASH_BITS 10
30 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
31
32 static bool log_ecn_error = true;
33 module_param(log_ecn_error, bool, 0644);
34 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
35
36 #define GENEVE_VER 0
37 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
38 #define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN)
39 #define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN)
40
41 #define GENEVE_OPT_NETDEV_CLASS 0x100
42 #define GENEVE_OPT_GRO_HINT_SIZE 8
43 #define GENEVE_OPT_GRO_HINT_TYPE 1
44 #define GENEVE_OPT_GRO_HINT_LEN 1
45
46 struct geneve_opt_gro_hint {
47 u8 inner_proto_id:2,
48 nested_is_v6:1;
49 u8 nested_nh_offset;
50 u8 nested_tp_offset;
51 u8 nested_hdr_len;
52 };
53
54 struct geneve_skb_cb {
55 unsigned int gro_hint_len;
56 struct geneve_opt_gro_hint gro_hint;
57 };
58
59 #define GENEVE_SKB_CB(__skb) ((struct geneve_skb_cb *)&((__skb)->cb[0]))
60
61 /* per-network namespace private data for this module */
62 struct geneve_net {
63 struct list_head geneve_list;
64 /* sock_list is protected by rtnl lock */
65 struct list_head sock_list;
66 };
67
68 static unsigned int geneve_net_id;
69
70 struct geneve_dev_node {
71 struct hlist_node hlist;
72 struct geneve_dev *geneve;
73 };
74
75 struct geneve_config {
76 bool collect_md;
77 bool use_udp6_rx_checksums;
78 bool ttl_inherit;
79 bool gro_hint;
80 enum ifla_geneve_df df;
81 bool inner_proto_inherit;
82 u16 port_min;
83 u16 port_max;
84
85 /* Must be last --ends in a flexible-array member. */
86 struct ip_tunnel_info info;
87 };
88
89 /* Pseudo network device */
90 struct geneve_dev {
91 struct geneve_dev_node hlist4; /* vni hash table for IPv4 socket */
92 #if IS_ENABLED(CONFIG_IPV6)
93 struct geneve_dev_node hlist6; /* vni hash table for IPv6 socket */
94 #endif
95 struct net *net; /* netns for packet i/o */
96 struct net_device *dev; /* netdev for geneve tunnel */
97 struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */
98 #if IS_ENABLED(CONFIG_IPV6)
99 struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */
100 #endif
101 struct list_head next; /* geneve's per namespace list */
102 struct gro_cells gro_cells;
103 struct geneve_config cfg;
104 };
105
106 struct geneve_sock {
107 bool collect_md;
108 bool gro_hint;
109 struct list_head list;
110 struct socket *sock;
111 struct rcu_head rcu;
112 int refcnt;
113 struct hlist_head vni_list[VNI_HASH_SIZE];
114 };
115
116 static const __be16 proto_id_map[] = { htons(ETH_P_TEB),
117 htons(ETH_P_IPV6),
118 htons(ETH_P_IP) };
119
proto_to_id(__be16 proto)120 static int proto_to_id(__be16 proto)
121 {
122 int i;
123
124 for (i = 0; i < ARRAY_SIZE(proto_id_map); i++)
125 if (proto_id_map[i] == proto)
126 return i;
127
128 return -1;
129 }
130
geneve_net_vni_hash(u8 vni[3])131 static inline __u32 geneve_net_vni_hash(u8 vni[3])
132 {
133 __u32 vnid;
134
135 vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2];
136 return hash_32(vnid, VNI_HASH_BITS);
137 }
138
vni_to_tunnel_id(const __u8 * vni)139 static __be64 vni_to_tunnel_id(const __u8 *vni)
140 {
141 #ifdef __BIG_ENDIAN
142 return (vni[0] << 16) | (vni[1] << 8) | vni[2];
143 #else
144 return (__force __be64)(((__force u64)vni[0] << 40) |
145 ((__force u64)vni[1] << 48) |
146 ((__force u64)vni[2] << 56));
147 #endif
148 }
149
150 /* Convert 64 bit tunnel ID to 24 bit VNI. */
tunnel_id_to_vni(__be64 tun_id,__u8 * vni)151 static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
152 {
153 #ifdef __BIG_ENDIAN
154 vni[0] = (__force __u8)(tun_id >> 16);
155 vni[1] = (__force __u8)(tun_id >> 8);
156 vni[2] = (__force __u8)tun_id;
157 #else
158 vni[0] = (__force __u8)((__force u64)tun_id >> 40);
159 vni[1] = (__force __u8)((__force u64)tun_id >> 48);
160 vni[2] = (__force __u8)((__force u64)tun_id >> 56);
161 #endif
162 }
163
eq_tun_id_and_vni(u8 * tun_id,u8 * vni)164 static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni)
165 {
166 return !memcmp(vni, &tun_id[5], 3);
167 }
168
geneve_get_sk_family(struct geneve_sock * gs)169 static sa_family_t geneve_get_sk_family(struct geneve_sock *gs)
170 {
171 return gs->sock->sk->sk_family;
172 }
173
geneve_lookup(struct geneve_sock * gs,__be32 addr,u8 vni[])174 static struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
175 __be32 addr, u8 vni[])
176 {
177 struct hlist_head *vni_list_head;
178 struct geneve_dev_node *node;
179 __u32 hash;
180
181 /* Find the device for this VNI */
182 hash = geneve_net_vni_hash(vni);
183 vni_list_head = &gs->vni_list[hash];
184 hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
185 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) &&
186 addr == node->geneve->cfg.info.key.u.ipv4.dst)
187 return node->geneve;
188 }
189 return NULL;
190 }
191
192 #if IS_ENABLED(CONFIG_IPV6)
geneve6_lookup(struct geneve_sock * gs,struct in6_addr addr6,u8 vni[])193 static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs,
194 struct in6_addr addr6, u8 vni[])
195 {
196 struct hlist_head *vni_list_head;
197 struct geneve_dev_node *node;
198 __u32 hash;
199
200 /* Find the device for this VNI */
201 hash = geneve_net_vni_hash(vni);
202 vni_list_head = &gs->vni_list[hash];
203 hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
204 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) &&
205 ipv6_addr_equal(&addr6, &node->geneve->cfg.info.key.u.ipv6.dst))
206 return node->geneve;
207 }
208 return NULL;
209 }
210 #endif
211
geneve_hdr(const struct sk_buff * skb)212 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
213 {
214 return (struct genevehdr *)(udp_hdr(skb) + 1);
215 }
216
geneve_lookup_skb(struct geneve_sock * gs,struct sk_buff * skb)217 static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs,
218 struct sk_buff *skb)
219 {
220 static u8 zero_vni[3];
221 u8 *vni;
222
223 if (geneve_get_sk_family(gs) == AF_INET) {
224 struct iphdr *iph;
225 __be32 addr;
226
227 iph = ip_hdr(skb); /* outer IP header... */
228
229 if (gs->collect_md) {
230 vni = zero_vni;
231 addr = 0;
232 } else {
233 vni = geneve_hdr(skb)->vni;
234 addr = iph->saddr;
235 }
236
237 return geneve_lookup(gs, addr, vni);
238 #if IS_ENABLED(CONFIG_IPV6)
239 } else if (geneve_get_sk_family(gs) == AF_INET6) {
240 static struct in6_addr zero_addr6;
241 struct ipv6hdr *ip6h;
242 struct in6_addr addr6;
243
244 ip6h = ipv6_hdr(skb); /* outer IPv6 header... */
245
246 if (gs->collect_md) {
247 vni = zero_vni;
248 addr6 = zero_addr6;
249 } else {
250 vni = geneve_hdr(skb)->vni;
251 addr6 = ip6h->saddr;
252 }
253
254 return geneve6_lookup(gs, addr6, vni);
255 #endif
256 }
257 return NULL;
258 }
259
260 /* geneve receive/decap routine */
geneve_rx(struct geneve_dev * geneve,struct geneve_sock * gs,struct sk_buff * skb,const struct genevehdr * gnvh)261 static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
262 struct sk_buff *skb, const struct genevehdr *gnvh)
263 {
264 struct metadata_dst *tun_dst = NULL;
265 unsigned int len;
266 int nh, err = 0;
267 void *oiph;
268
269 if (ip_tunnel_collect_metadata() || gs->collect_md) {
270 IP_TUNNEL_DECLARE_FLAGS(flags) = { };
271
272 __set_bit(IP_TUNNEL_KEY_BIT, flags);
273 __assign_bit(IP_TUNNEL_OAM_BIT, flags, gnvh->oam);
274 __assign_bit(IP_TUNNEL_CRIT_OPT_BIT, flags, gnvh->critical);
275
276 tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags,
277 vni_to_tunnel_id(gnvh->vni),
278 gnvh->opt_len * 4);
279 if (!tun_dst) {
280 dev_dstats_rx_dropped(geneve->dev);
281 goto drop;
282 }
283 /* Update tunnel dst according to Geneve options. */
284 ip_tunnel_flags_zero(flags);
285 __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, flags);
286 ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
287 gnvh->options, gnvh->opt_len * 4,
288 flags);
289 } else {
290 /* Drop packets w/ critical options,
291 * since we don't support any...
292 */
293 if (gnvh->critical) {
294 DEV_STATS_INC(geneve->dev, rx_frame_errors);
295 DEV_STATS_INC(geneve->dev, rx_errors);
296 goto drop;
297 }
298 }
299
300 if (tun_dst)
301 skb_dst_set(skb, &tun_dst->dst);
302
303 if (gnvh->proto_type == htons(ETH_P_TEB)) {
304 skb_reset_mac_header(skb);
305 skb->protocol = eth_type_trans(skb, geneve->dev);
306 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
307
308 /* Ignore packet loops (and multicast echo) */
309 if (ether_addr_equal(eth_hdr(skb)->h_source,
310 geneve->dev->dev_addr)) {
311 DEV_STATS_INC(geneve->dev, rx_errors);
312 goto drop;
313 }
314 } else {
315 skb_reset_mac_header(skb);
316 skb->dev = geneve->dev;
317 skb->pkt_type = PACKET_HOST;
318 }
319
320 /* Save offset of outer header relative to skb->head,
321 * because we are going to reset the network header to the inner header
322 * and might change skb->head.
323 */
324 nh = skb_network_header(skb) - skb->head;
325
326 skb_reset_network_header(skb);
327
328 if (!pskb_inet_may_pull(skb)) {
329 DEV_STATS_INC(geneve->dev, rx_length_errors);
330 DEV_STATS_INC(geneve->dev, rx_errors);
331 goto drop;
332 }
333
334 /* Get the outer header. */
335 oiph = skb->head + nh;
336
337 if (geneve_get_sk_family(gs) == AF_INET)
338 err = IP_ECN_decapsulate(oiph, skb);
339 #if IS_ENABLED(CONFIG_IPV6)
340 else
341 err = IP6_ECN_decapsulate(oiph, skb);
342 #endif
343
344 if (unlikely(err)) {
345 if (log_ecn_error) {
346 if (geneve_get_sk_family(gs) == AF_INET)
347 net_info_ratelimited("non-ECT from %pI4 "
348 "with TOS=%#x\n",
349 &((struct iphdr *)oiph)->saddr,
350 ((struct iphdr *)oiph)->tos);
351 #if IS_ENABLED(CONFIG_IPV6)
352 else
353 net_info_ratelimited("non-ECT from %pI6\n",
354 &((struct ipv6hdr *)oiph)->saddr);
355 #endif
356 }
357 if (err > 1) {
358 DEV_STATS_INC(geneve->dev, rx_frame_errors);
359 DEV_STATS_INC(geneve->dev, rx_errors);
360 goto drop;
361 }
362 }
363
364 /* Skip the additional GRO stage when hints are in use. */
365 len = skb->len;
366 if (skb->encapsulation)
367 err = netif_rx(skb);
368 else
369 err = gro_cells_receive(&geneve->gro_cells, skb);
370 if (likely(err == NET_RX_SUCCESS))
371 dev_dstats_rx_add(geneve->dev, len);
372
373 return;
374 drop:
375 /* Consume bad packet */
376 kfree_skb(skb);
377 }
378
379 /* Setup stats when device is created */
geneve_init(struct net_device * dev)380 static int geneve_init(struct net_device *dev)
381 {
382 struct geneve_dev *geneve = netdev_priv(dev);
383 int err;
384
385 err = gro_cells_init(&geneve->gro_cells, dev);
386 if (err)
387 return err;
388
389 err = dst_cache_init(&geneve->cfg.info.dst_cache, GFP_KERNEL);
390 if (err) {
391 gro_cells_destroy(&geneve->gro_cells);
392 return err;
393 }
394 netdev_lockdep_set_classes(dev);
395 return 0;
396 }
397
geneve_uninit(struct net_device * dev)398 static void geneve_uninit(struct net_device *dev)
399 {
400 struct geneve_dev *geneve = netdev_priv(dev);
401
402 dst_cache_destroy(&geneve->cfg.info.dst_cache);
403 gro_cells_destroy(&geneve->gro_cells);
404 }
405
geneve_hlen(const struct genevehdr * gh)406 static int geneve_hlen(const struct genevehdr *gh)
407 {
408 return sizeof(*gh) + gh->opt_len * 4;
409 }
410
411 /*
412 * Look for GRO hint in the genenve options; if not found or does not pass basic
413 * sanitization return 0, otherwise the offset WRT the geneve hdr start.
414 */
415 static unsigned int
geneve_opt_gro_hint_off(const struct genevehdr * gh,__be16 * type,unsigned int * gh_len)416 geneve_opt_gro_hint_off(const struct genevehdr *gh, __be16 *type,
417 unsigned int *gh_len)
418 {
419 struct geneve_opt *opt = (void *)(gh + 1);
420 unsigned int id, opt_len = gh->opt_len;
421 struct geneve_opt_gro_hint *gro_hint;
422
423 while (opt_len >= (GENEVE_OPT_GRO_HINT_SIZE >> 2)) {
424 if (opt->opt_class == htons(GENEVE_OPT_NETDEV_CLASS) &&
425 opt->type == GENEVE_OPT_GRO_HINT_TYPE &&
426 opt->length == GENEVE_OPT_GRO_HINT_LEN)
427 goto found;
428
429 /* check for bad opt len */
430 if (opt->length + 1 >= opt_len)
431 return 0;
432
433 /* next opt */
434 opt_len -= opt->length + 1;
435 opt = ((void *)opt) + ((opt->length + 1) << 2);
436 }
437 return 0;
438
439 found:
440 gro_hint = (struct geneve_opt_gro_hint *)opt->opt_data;
441
442 /*
443 * Sanitize the hinted hdrs: the nested transport is UDP and must fit
444 * the overall hinted hdr size.
445 */
446 if (gro_hint->nested_tp_offset + sizeof(struct udphdr) >
447 gro_hint->nested_hdr_len)
448 return 0;
449
450 if (gro_hint->nested_nh_offset +
451 (gro_hint->nested_is_v6 ? sizeof(struct ipv6hdr) :
452 sizeof(struct iphdr)) >
453 gro_hint->nested_tp_offset)
454 return 0;
455
456 /* Allow only supported L2. */
457 id = gro_hint->inner_proto_id;
458 if (id >= ARRAY_SIZE(proto_id_map))
459 return 0;
460
461 *type = proto_id_map[id];
462 *gh_len += gro_hint->nested_hdr_len;
463
464 return (void *)gro_hint - (void *)gh;
465 }
466
467 static const struct geneve_opt_gro_hint *
geneve_opt_gro_hint(const struct genevehdr * gh,unsigned int hint_off)468 geneve_opt_gro_hint(const struct genevehdr *gh, unsigned int hint_off)
469 {
470 return (const struct geneve_opt_gro_hint *)((void *)gh + hint_off);
471 }
472
473 static unsigned int
geneve_sk_gro_hint_off(const struct sock * sk,const struct genevehdr * gh,__be16 * type,unsigned int * gh_len)474 geneve_sk_gro_hint_off(const struct sock *sk, const struct genevehdr *gh,
475 __be16 *type, unsigned int *gh_len)
476 {
477 const struct geneve_sock *gs = rcu_dereference_sk_user_data(sk);
478
479 if (!gs || !gs->gro_hint)
480 return 0;
481 return geneve_opt_gro_hint_off(gh, type, gh_len);
482 }
483
484 /* Validate the packet headers pointed by data WRT the provided hint */
485 static bool
geneve_opt_gro_hint_validate(void * data,const struct geneve_opt_gro_hint * gro_hint)486 geneve_opt_gro_hint_validate(void *data,
487 const struct geneve_opt_gro_hint *gro_hint)
488 {
489 void *nested_nh = data + gro_hint->nested_nh_offset;
490 struct iphdr *iph;
491
492 if (gro_hint->nested_is_v6) {
493 struct ipv6hdr *ipv6h = nested_nh;
494 struct ipv6_opt_hdr *opth;
495 int offset, len;
496
497 if (ipv6h->nexthdr == IPPROTO_UDP)
498 return true;
499
500 offset = sizeof(*ipv6h) + gro_hint->nested_nh_offset;
501 while (offset + sizeof(*opth) <= gro_hint->nested_tp_offset) {
502 opth = data + offset;
503
504 len = ipv6_optlen(opth);
505 if (len + offset > gro_hint->nested_tp_offset)
506 return false;
507 if (opth->nexthdr == IPPROTO_UDP)
508 return true;
509
510 offset += len;
511 }
512 return false;
513 }
514
515 iph = nested_nh;
516 if (*(u8 *)iph != 0x45 || ip_is_fragment(iph) ||
517 iph->protocol != IPPROTO_UDP || ip_fast_csum((u8 *)iph, 5))
518 return false;
519
520 return true;
521 }
522
523 /*
524 * Validate the skb headers following the specified geneve hdr vs the
525 * provided hint, including nested L4 checksum.
526 * The caller already ensured that the relevant amount of data is available
527 * in the linear part.
528 */
529 static bool
geneve_opt_gro_hint_validate_csum(const struct sk_buff * skb,const struct genevehdr * gh,const struct geneve_opt_gro_hint * gro_hint)530 geneve_opt_gro_hint_validate_csum(const struct sk_buff *skb,
531 const struct genevehdr *gh,
532 const struct geneve_opt_gro_hint *gro_hint)
533 {
534 unsigned int plen, gh_len = geneve_hlen(gh);
535 void *nested = (void *)gh + gh_len;
536 struct udphdr *nested_uh;
537 unsigned int nested_len;
538 struct ipv6hdr *ipv6h;
539 struct iphdr *iph;
540 __wsum csum, psum;
541
542 if (!geneve_opt_gro_hint_validate(nested, gro_hint))
543 return false;
544
545 /* Use GRO hints with nested csum only if the outer header has csum. */
546 nested_uh = nested + gro_hint->nested_tp_offset;
547 if (!nested_uh->check || skb->ip_summed == CHECKSUM_PARTIAL)
548 return true;
549
550 if (!NAPI_GRO_CB(skb)->csum_valid)
551 return false;
552
553 /* Compute the complete checksum up to the nested transport. */
554 plen = gh_len + gro_hint->nested_tp_offset;
555 csum = csum_sub(NAPI_GRO_CB(skb)->csum, csum_partial(gh, plen, 0));
556 nested_len = skb_gro_len(skb) - plen;
557
558 /* Compute the nested pseudo header csum. */
559 ipv6h = nested + gro_hint->nested_nh_offset;
560 iph = (struct iphdr *)ipv6h;
561 psum = gro_hint->nested_is_v6 ?
562 ~csum_unfold(csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
563 nested_len, IPPROTO_UDP, 0)) :
564 csum_tcpudp_nofold(iph->saddr, iph->daddr,
565 nested_len, IPPROTO_UDP, 0);
566
567 return !csum_fold(csum_add(psum, csum));
568 }
569
geneve_post_decap_hint(const struct sock * sk,struct sk_buff * skb,unsigned int gh_len,struct genevehdr ** geneveh)570 static int geneve_post_decap_hint(const struct sock *sk, struct sk_buff *skb,
571 unsigned int gh_len,
572 struct genevehdr **geneveh)
573 {
574 const struct geneve_opt_gro_hint *gro_hint;
575 unsigned int len, total_len, hint_off;
576 struct ipv6hdr *ipv6h;
577 struct iphdr *iph;
578 struct udphdr *uh;
579 __be16 p;
580
581 hint_off = geneve_sk_gro_hint_off(sk, *geneveh, &p, &len);
582 if (!hint_off)
583 return 0;
584
585 if (!skb_is_gso(skb))
586 return 0;
587
588 gro_hint = geneve_opt_gro_hint(*geneveh, hint_off);
589 if (unlikely(!pskb_may_pull(skb, gro_hint->nested_hdr_len)))
590 return -ENOMEM;
591
592 *geneveh = geneve_hdr(skb);
593 gro_hint = geneve_opt_gro_hint(*geneveh, hint_off);
594
595 /*
596 * Validate hints from untrusted source before accessing
597 * the headers; csum will be checked later by the nested
598 * protocol rx path.
599 */
600 if (unlikely(skb_shinfo(skb)->gso_type & SKB_GSO_DODGY &&
601 !geneve_opt_gro_hint_validate(skb->data, gro_hint)))
602 return -EINVAL;
603
604 ipv6h = (void *)skb->data + gro_hint->nested_nh_offset;
605 iph = (struct iphdr *)ipv6h;
606 total_len = skb->len - gro_hint->nested_nh_offset;
607 if (total_len > GRO_LEGACY_MAX_SIZE)
608 return -E2BIG;
609
610 /*
611 * After stripping the outer encap, the packet still carries a
612 * tunnel encapsulation: the nested one.
613 */
614 skb->encapsulation = 1;
615
616 /* GSO expect a valid transpor header, move it to the current one. */
617 skb_set_transport_header(skb, gro_hint->nested_tp_offset);
618
619 /* Adjust the nested IP{6} hdr to actual GSO len. */
620 if (gro_hint->nested_is_v6) {
621 ipv6h->payload_len = htons(total_len - sizeof(*ipv6h));
622 } else {
623 __be16 old_len = iph->tot_len;
624
625 iph->tot_len = htons(total_len);
626
627 /* For IPv4 additionally adjust the nested csum. */
628 csum_replace2(&iph->check, old_len, iph->tot_len);
629 ip_send_check(iph);
630 }
631
632 /* Adjust the nested UDP header len and checksum. */
633 uh = udp_hdr(skb);
634 uh->len = htons(skb->len - gro_hint->nested_tp_offset);
635 if (uh->check) {
636 len = skb->len - gro_hint->nested_nh_offset;
637 skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
638 if (gro_hint->nested_is_v6)
639 uh->check = ~udp_v6_check(len, &ipv6h->saddr,
640 &ipv6h->daddr, 0);
641 else
642 uh->check = ~udp_v4_check(len, iph->saddr,
643 iph->daddr, 0);
644 } else {
645 skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
646 }
647 return 0;
648 }
649
650 /* Callback from net/ipv4/udp.c to receive packets */
geneve_udp_encap_recv(struct sock * sk,struct sk_buff * skb)651 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
652 {
653 struct genevehdr *geneveh;
654 struct geneve_dev *geneve;
655 struct geneve_sock *gs;
656 __be16 inner_proto;
657 int opts_len;
658
659 /* Need UDP and Geneve header to be present */
660 if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
661 goto drop;
662
663 /* Return packets with reserved bits set */
664 geneveh = geneve_hdr(skb);
665 if (unlikely(geneveh->ver != GENEVE_VER))
666 goto drop;
667
668 gs = rcu_dereference_sk_user_data(sk);
669 if (!gs)
670 goto drop;
671
672 geneve = geneve_lookup_skb(gs, skb);
673 if (!geneve)
674 goto drop;
675
676 inner_proto = geneveh->proto_type;
677
678 if (unlikely((!geneve->cfg.inner_proto_inherit &&
679 inner_proto != htons(ETH_P_TEB)))) {
680 dev_dstats_rx_dropped(geneve->dev);
681 goto drop;
682 }
683
684 opts_len = geneveh->opt_len * 4;
685 if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, inner_proto,
686 !net_eq(geneve->net, dev_net(geneve->dev)))) {
687 dev_dstats_rx_dropped(geneve->dev);
688 goto drop;
689 }
690
691 /*
692 * After hint processing, the transport header points to the inner one
693 * and we can't use anymore on geneve_hdr().
694 */
695 geneveh = geneve_hdr(skb);
696 if (geneve_post_decap_hint(sk, skb, sizeof(struct genevehdr) +
697 opts_len, &geneveh)) {
698 DEV_STATS_INC(geneve->dev, rx_errors);
699 goto drop;
700 }
701
702 geneve_rx(geneve, gs, skb, geneveh);
703 return 0;
704
705 drop:
706 /* Consume bad packet */
707 kfree_skb(skb);
708 return 0;
709 }
710
711 /* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */
geneve_udp_encap_err_lookup(struct sock * sk,struct sk_buff * skb)712 static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb)
713 {
714 struct genevehdr *geneveh;
715 struct geneve_sock *gs;
716 u8 zero_vni[3] = { 0 };
717 u8 *vni = zero_vni;
718
719 if (!pskb_may_pull(skb, skb_transport_offset(skb) + GENEVE_BASE_HLEN))
720 return -EINVAL;
721
722 geneveh = geneve_hdr(skb);
723 if (geneveh->ver != GENEVE_VER)
724 return -EINVAL;
725
726 if (geneveh->proto_type != htons(ETH_P_TEB))
727 return -EINVAL;
728
729 gs = rcu_dereference_sk_user_data(sk);
730 if (!gs)
731 return -ENOENT;
732
733 if (geneve_get_sk_family(gs) == AF_INET) {
734 struct iphdr *iph = ip_hdr(skb);
735 __be32 addr4 = 0;
736
737 if (!gs->collect_md) {
738 vni = geneve_hdr(skb)->vni;
739 addr4 = iph->daddr;
740 }
741
742 return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT;
743 }
744
745 #if IS_ENABLED(CONFIG_IPV6)
746 if (geneve_get_sk_family(gs) == AF_INET6) {
747 struct ipv6hdr *ip6h = ipv6_hdr(skb);
748 struct in6_addr addr6;
749
750 memset(&addr6, 0, sizeof(struct in6_addr));
751
752 if (!gs->collect_md) {
753 vni = geneve_hdr(skb)->vni;
754 addr6 = ip6h->daddr;
755 }
756
757 return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT;
758 }
759 #endif
760
761 return -EPFNOSUPPORT;
762 }
763
geneve_create_sock(struct net * net,bool ipv6,__be16 port,bool ipv6_rx_csum)764 static struct socket *geneve_create_sock(struct net *net, bool ipv6,
765 __be16 port, bool ipv6_rx_csum)
766 {
767 struct socket *sock;
768 struct udp_port_cfg udp_conf;
769 int err;
770
771 memset(&udp_conf, 0, sizeof(udp_conf));
772
773 if (ipv6) {
774 udp_conf.family = AF_INET6;
775 udp_conf.ipv6_v6only = 1;
776 udp_conf.use_udp6_rx_checksums = ipv6_rx_csum;
777 } else {
778 udp_conf.family = AF_INET;
779 udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
780 }
781
782 udp_conf.local_udp_port = port;
783
784 /* Open UDP socket */
785 err = udp_sock_create(net, &udp_conf, &sock);
786 if (err < 0)
787 return ERR_PTR(err);
788
789 udp_allow_gso(sock->sk);
790 return sock;
791 }
792
geneve_hdr_match(struct sk_buff * skb,const struct genevehdr * gh,const struct genevehdr * gh2,unsigned int hint_off)793 static bool geneve_hdr_match(struct sk_buff *skb,
794 const struct genevehdr *gh,
795 const struct genevehdr *gh2,
796 unsigned int hint_off)
797 {
798 const struct geneve_opt_gro_hint *gro_hint;
799 void *nested, *nested2, *nh, *nh2;
800 struct udphdr *udp, *udp2;
801 unsigned int gh_len;
802
803 /* Match the geneve hdr and options */
804 if (gh->opt_len != gh2->opt_len)
805 return false;
806
807 gh_len = geneve_hlen(gh);
808 if (memcmp(gh, gh2, gh_len))
809 return false;
810
811 if (!hint_off)
812 return true;
813
814 /*
815 * When gro is present consider the nested headers as part
816 * of the geneve options
817 */
818 nested = (void *)gh + gh_len;
819 nested2 = (void *)gh2 + gh_len;
820 gro_hint = geneve_opt_gro_hint(gh, hint_off);
821 if (!memcmp(nested, nested2, gro_hint->nested_hdr_len))
822 return true;
823
824 /*
825 * The nested headers differ; the packets can still belong to
826 * the same flow when IPs/proto/ports match; if so flushing is
827 * required.
828 */
829 nh = nested + gro_hint->nested_nh_offset;
830 nh2 = nested2 + gro_hint->nested_nh_offset;
831 if (gro_hint->nested_is_v6) {
832 struct ipv6hdr *iph = nh, *iph2 = nh2;
833 unsigned int nested_nlen;
834 __be32 first_word;
835
836 first_word = *(__be32 *)iph ^ *(__be32 *)iph2;
837 if ((first_word & htonl(0xF00FFFFF)) ||
838 !ipv6_addr_equal(&iph->saddr, &iph2->saddr) ||
839 !ipv6_addr_equal(&iph->daddr, &iph2->daddr) ||
840 iph->nexthdr != iph2->nexthdr)
841 return false;
842
843 nested_nlen = gro_hint->nested_tp_offset -
844 gro_hint->nested_nh_offset;
845 if (nested_nlen > sizeof(struct ipv6hdr) &&
846 (memcmp(iph + 1, iph2 + 1,
847 nested_nlen - sizeof(struct ipv6hdr))))
848 return false;
849 } else {
850 struct iphdr *iph = nh, *iph2 = nh2;
851
852 if ((iph->protocol ^ iph2->protocol) |
853 ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
854 ((__force u32)iph->daddr ^ (__force u32)iph2->daddr))
855 return false;
856 }
857
858 udp = nested + gro_hint->nested_tp_offset;
859 udp2 = nested2 + gro_hint->nested_tp_offset;
860 if (udp->source != udp2->source || udp->dest != udp2->dest ||
861 udp->check != udp2->check)
862 return false;
863
864 NAPI_GRO_CB(skb)->flush = 1;
865 return true;
866 }
867
geneve_gro_receive(struct sock * sk,struct list_head * head,struct sk_buff * skb)868 static struct sk_buff *geneve_gro_receive(struct sock *sk,
869 struct list_head *head,
870 struct sk_buff *skb)
871 {
872 unsigned int hlen, gh_len, off_gnv, hint_off;
873 const struct geneve_opt_gro_hint *gro_hint;
874 const struct packet_offload *ptype;
875 struct genevehdr *gh, *gh2;
876 struct sk_buff *pp = NULL;
877 struct sk_buff *p;
878 int flush = 1;
879 __be16 type;
880
881 off_gnv = skb_gro_offset(skb);
882 hlen = off_gnv + sizeof(*gh);
883 gh = skb_gro_header(skb, hlen, off_gnv);
884 if (unlikely(!gh))
885 goto out;
886
887 if (gh->ver != GENEVE_VER || gh->oam)
888 goto out;
889 gh_len = geneve_hlen(gh);
890 type = gh->proto_type;
891
892 hlen = off_gnv + gh_len;
893 if (!skb_gro_may_pull(skb, hlen)) {
894 gh = skb_gro_header_slow(skb, hlen, off_gnv);
895 if (unlikely(!gh))
896 goto out;
897 }
898
899 /* The GRO hint/nested hdr could use a different ethernet type. */
900 hint_off = geneve_sk_gro_hint_off(sk, gh, &type, &gh_len);
901 if (hint_off) {
902
903 /*
904 * If the hint is present, and nested hdr validation fails, do
905 * not attempt plain GRO: it will ignore inner hdrs and cause
906 * OoO.
907 */
908 gh = skb_gro_header(skb, off_gnv + gh_len, off_gnv);
909 if (unlikely(!gh))
910 goto out;
911
912 gro_hint = geneve_opt_gro_hint(gh, hint_off);
913 if (!geneve_opt_gro_hint_validate_csum(skb, gh, gro_hint))
914 goto out;
915 }
916
917 list_for_each_entry(p, head, list) {
918 if (!NAPI_GRO_CB(p)->same_flow)
919 continue;
920
921 gh2 = (struct genevehdr *)(p->data + off_gnv);
922 if (!geneve_hdr_match(skb, gh, gh2, hint_off)) {
923 NAPI_GRO_CB(p)->same_flow = 0;
924 continue;
925 }
926 }
927
928 skb_gro_pull(skb, gh_len);
929 skb_gro_postpull_rcsum(skb, gh, gh_len);
930 if (likely(type == htons(ETH_P_TEB)))
931 return call_gro_receive(eth_gro_receive, head, skb);
932
933 ptype = gro_find_receive_by_type(type);
934 if (!ptype)
935 goto out;
936
937 pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
938 flush = 0;
939
940 out:
941 skb_gro_flush_final(skb, pp, flush);
942
943 return pp;
944 }
945
geneve_gro_complete(struct sock * sk,struct sk_buff * skb,int nhoff)946 static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb,
947 int nhoff)
948 {
949 struct genevehdr *gh;
950 struct packet_offload *ptype;
951 __be16 type;
952 int gh_len;
953 int err = -ENOSYS;
954
955 gh = (struct genevehdr *)(skb->data + nhoff);
956 gh_len = geneve_hlen(gh);
957 type = gh->proto_type;
958 geneve_opt_gro_hint_off(gh, &type, &gh_len);
959
960 /* since skb->encapsulation is set, eth_gro_complete() sets the inner mac header */
961 if (likely(type == htons(ETH_P_TEB)))
962 return eth_gro_complete(skb, nhoff + gh_len);
963
964 ptype = gro_find_complete_by_type(type);
965 if (ptype)
966 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
967
968 skb_set_inner_mac_header(skb, nhoff + gh_len);
969
970 return err;
971 }
972
973 /* Create new listen socket if needed */
geneve_socket_create(struct net * net,__be16 port,bool ipv6,bool ipv6_rx_csum)974 static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
975 bool ipv6, bool ipv6_rx_csum)
976 {
977 struct geneve_net *gn = net_generic(net, geneve_net_id);
978 struct geneve_sock *gs;
979 struct socket *sock;
980 struct udp_tunnel_sock_cfg tunnel_cfg;
981 int h;
982
983 gs = kzalloc_obj(*gs);
984 if (!gs)
985 return ERR_PTR(-ENOMEM);
986
987 sock = geneve_create_sock(net, ipv6, port, ipv6_rx_csum);
988 if (IS_ERR(sock)) {
989 kfree(gs);
990 return ERR_CAST(sock);
991 }
992
993 gs->sock = sock;
994 gs->refcnt = 1;
995 for (h = 0; h < VNI_HASH_SIZE; ++h)
996 INIT_HLIST_HEAD(&gs->vni_list[h]);
997
998 /* Initialize the geneve udp offloads structure */
999 udp_tunnel_notify_add_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
1000
1001 /* Mark socket as an encapsulation socket */
1002 memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
1003 tunnel_cfg.sk_user_data = gs;
1004 tunnel_cfg.encap_type = 1;
1005 tunnel_cfg.gro_receive = geneve_gro_receive;
1006 tunnel_cfg.gro_complete = geneve_gro_complete;
1007 tunnel_cfg.encap_rcv = geneve_udp_encap_recv;
1008 tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup;
1009 tunnel_cfg.encap_destroy = NULL;
1010 setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
1011 list_add(&gs->list, &gn->sock_list);
1012 return gs;
1013 }
1014
__geneve_sock_release(struct geneve_sock * gs)1015 static void __geneve_sock_release(struct geneve_sock *gs)
1016 {
1017 if (!gs || --gs->refcnt)
1018 return;
1019
1020 list_del(&gs->list);
1021 udp_tunnel_notify_del_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
1022 udp_tunnel_sock_release(gs->sock);
1023 kfree_rcu(gs, rcu);
1024 }
1025
geneve_sock_release(struct geneve_dev * geneve)1026 static void geneve_sock_release(struct geneve_dev *geneve)
1027 {
1028 struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4);
1029 #if IS_ENABLED(CONFIG_IPV6)
1030 struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6);
1031
1032 rcu_assign_pointer(geneve->sock6, NULL);
1033 #endif
1034
1035 rcu_assign_pointer(geneve->sock4, NULL);
1036 synchronize_net();
1037
1038 __geneve_sock_release(gs4);
1039 #if IS_ENABLED(CONFIG_IPV6)
1040 __geneve_sock_release(gs6);
1041 #endif
1042 }
1043
geneve_find_sock(struct geneve_net * gn,sa_family_t family,__be16 dst_port,bool gro_hint)1044 static struct geneve_sock *geneve_find_sock(struct geneve_net *gn,
1045 sa_family_t family,
1046 __be16 dst_port,
1047 bool gro_hint)
1048 {
1049 struct geneve_sock *gs;
1050
1051 list_for_each_entry(gs, &gn->sock_list, list) {
1052 if (inet_sk(gs->sock->sk)->inet_sport == dst_port &&
1053 geneve_get_sk_family(gs) == family &&
1054 gs->gro_hint == gro_hint) {
1055 return gs;
1056 }
1057 }
1058 return NULL;
1059 }
1060
geneve_sock_add(struct geneve_dev * geneve,bool ipv6)1061 static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6)
1062 {
1063 struct net *net = geneve->net;
1064 struct geneve_net *gn = net_generic(net, geneve_net_id);
1065 bool gro_hint = geneve->cfg.gro_hint;
1066 struct geneve_dev_node *node;
1067 struct geneve_sock *gs;
1068 __u8 vni[3];
1069 __u32 hash;
1070
1071 gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET,
1072 geneve->cfg.info.key.tp_dst, gro_hint);
1073 if (gs) {
1074 gs->refcnt++;
1075 goto out;
1076 }
1077
1078 gs = geneve_socket_create(net, geneve->cfg.info.key.tp_dst, ipv6,
1079 geneve->cfg.use_udp6_rx_checksums);
1080 if (IS_ERR(gs))
1081 return PTR_ERR(gs);
1082
1083 out:
1084 gs->collect_md = geneve->cfg.collect_md;
1085 gs->gro_hint = gro_hint;
1086 #if IS_ENABLED(CONFIG_IPV6)
1087 if (ipv6) {
1088 rcu_assign_pointer(geneve->sock6, gs);
1089 node = &geneve->hlist6;
1090 } else
1091 #endif
1092 {
1093 rcu_assign_pointer(geneve->sock4, gs);
1094 node = &geneve->hlist4;
1095 }
1096 node->geneve = geneve;
1097
1098 tunnel_id_to_vni(geneve->cfg.info.key.tun_id, vni);
1099 hash = geneve_net_vni_hash(vni);
1100 hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]);
1101 return 0;
1102 }
1103
geneve_open(struct net_device * dev)1104 static int geneve_open(struct net_device *dev)
1105 {
1106 struct geneve_dev *geneve = netdev_priv(dev);
1107 bool metadata = geneve->cfg.collect_md;
1108 bool ipv4, ipv6;
1109 int ret = 0;
1110
1111 ipv6 = geneve->cfg.info.mode & IP_TUNNEL_INFO_IPV6 || metadata;
1112 ipv4 = !ipv6 || metadata;
1113 #if IS_ENABLED(CONFIG_IPV6)
1114 if (ipv6) {
1115 ret = geneve_sock_add(geneve, true);
1116 if (ret < 0 && ret != -EAFNOSUPPORT)
1117 ipv4 = false;
1118 }
1119 #endif
1120 if (ipv4)
1121 ret = geneve_sock_add(geneve, false);
1122 if (ret < 0)
1123 geneve_sock_release(geneve);
1124
1125 return ret;
1126 }
1127
geneve_stop(struct net_device * dev)1128 static int geneve_stop(struct net_device *dev)
1129 {
1130 struct geneve_dev *geneve = netdev_priv(dev);
1131
1132 hlist_del_init_rcu(&geneve->hlist4.hlist);
1133 #if IS_ENABLED(CONFIG_IPV6)
1134 hlist_del_init_rcu(&geneve->hlist6.hlist);
1135 #endif
1136 geneve_sock_release(geneve);
1137 return 0;
1138 }
1139
geneve_build_header(struct genevehdr * geneveh,const struct ip_tunnel_info * info,__be16 inner_proto)1140 static void geneve_build_header(struct genevehdr *geneveh,
1141 const struct ip_tunnel_info *info,
1142 __be16 inner_proto)
1143 {
1144 geneveh->ver = GENEVE_VER;
1145 geneveh->opt_len = info->options_len / 4;
1146 geneveh->oam = test_bit(IP_TUNNEL_OAM_BIT, info->key.tun_flags);
1147 geneveh->critical = test_bit(IP_TUNNEL_CRIT_OPT_BIT,
1148 info->key.tun_flags);
1149 geneveh->rsvd1 = 0;
1150 tunnel_id_to_vni(info->key.tun_id, geneveh->vni);
1151 geneveh->proto_type = inner_proto;
1152 geneveh->rsvd2 = 0;
1153
1154 if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags))
1155 ip_tunnel_info_opts_get(geneveh->options, info);
1156 }
1157
geneve_build_gro_hint_opt(const struct geneve_dev * geneve,struct sk_buff * skb)1158 static int geneve_build_gro_hint_opt(const struct geneve_dev *geneve,
1159 struct sk_buff *skb)
1160 {
1161 struct geneve_skb_cb *cb = GENEVE_SKB_CB(skb);
1162 struct geneve_opt_gro_hint *hint;
1163 unsigned int nhlen;
1164 bool nested_is_v6;
1165 int id;
1166
1167 BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct geneve_skb_cb));
1168 cb->gro_hint_len = 0;
1169
1170 /* Try to add the GRO hint only in case of double encap. */
1171 if (!geneve->cfg.gro_hint || !skb->encapsulation)
1172 return 0;
1173
1174 /*
1175 * The nested headers must fit the geneve opt len fields and the
1176 * nested encap must carry a nested transport (UDP) header.
1177 */
1178 nhlen = skb_inner_mac_header(skb) - skb->data;
1179 if (nhlen > 255 || !skb_transport_header_was_set(skb) ||
1180 skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
1181 (skb_transport_offset(skb) + sizeof(struct udphdr) > nhlen))
1182 return 0;
1183
1184 id = proto_to_id(skb->inner_protocol);
1185 if (id < 0)
1186 return 0;
1187
1188 nested_is_v6 = skb->protocol == htons(ETH_P_IPV6);
1189 if (nested_is_v6) {
1190 int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
1191 u8 proto = ipv6_hdr(skb)->nexthdr;
1192 __be16 foff;
1193
1194 if (ipv6_skip_exthdr(skb, start, &proto, &foff) < 0 ||
1195 proto != IPPROTO_UDP)
1196 return 0;
1197 } else {
1198 if (ip_hdr(skb)->protocol != IPPROTO_UDP)
1199 return 0;
1200 }
1201
1202 hint = &cb->gro_hint;
1203 memset(hint, 0, sizeof(*hint));
1204 hint->inner_proto_id = id;
1205 hint->nested_is_v6 = skb->protocol == htons(ETH_P_IPV6);
1206 hint->nested_nh_offset = skb_network_offset(skb);
1207 hint->nested_tp_offset = skb_transport_offset(skb);
1208 hint->nested_hdr_len = nhlen;
1209 cb->gro_hint_len = GENEVE_OPT_GRO_HINT_SIZE;
1210 return GENEVE_OPT_GRO_HINT_SIZE;
1211 }
1212
geneve_put_gro_hint_opt(struct genevehdr * gnvh,int opt_size,const struct geneve_opt_gro_hint * hint)1213 static void geneve_put_gro_hint_opt(struct genevehdr *gnvh, int opt_size,
1214 const struct geneve_opt_gro_hint *hint)
1215 {
1216 struct geneve_opt *gro_opt;
1217
1218 /* geneve_build_header() did not took in account the GRO hint. */
1219 gnvh->opt_len = (opt_size + GENEVE_OPT_GRO_HINT_SIZE) >> 2;
1220
1221 gro_opt = (void *)(gnvh + 1) + opt_size;
1222 memset(gro_opt, 0, sizeof(*gro_opt));
1223
1224 gro_opt->opt_class = htons(GENEVE_OPT_NETDEV_CLASS);
1225 gro_opt->type = GENEVE_OPT_GRO_HINT_TYPE;
1226 gro_opt->length = GENEVE_OPT_GRO_HINT_LEN;
1227 memcpy(gro_opt + 1, hint, sizeof(*hint));
1228 }
1229
geneve_build_skb(struct dst_entry * dst,struct sk_buff * skb,const struct ip_tunnel_info * info,const struct geneve_dev * geneve,int ip_hdr_len)1230 static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb,
1231 const struct ip_tunnel_info *info,
1232 const struct geneve_dev *geneve, int ip_hdr_len)
1233 {
1234 bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
1235 bool inner_proto_inherit = geneve->cfg.inner_proto_inherit;
1236 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
1237 struct geneve_skb_cb *cb = GENEVE_SKB_CB(skb);
1238 struct genevehdr *gnvh;
1239 __be16 inner_proto;
1240 bool double_encap;
1241 int min_headroom;
1242 int opt_size;
1243 int err;
1244
1245 skb_reset_mac_header(skb);
1246 skb_scrub_packet(skb, xnet);
1247
1248 opt_size = info->options_len + cb->gro_hint_len;
1249 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len +
1250 GENEVE_BASE_HLEN + opt_size + ip_hdr_len;
1251 err = skb_cow_head(skb, min_headroom);
1252 if (unlikely(err))
1253 goto free_dst;
1254
1255 double_encap = udp_tunnel_handle_partial(skb);
1256 err = udp_tunnel_handle_offloads(skb, udp_sum);
1257 if (err)
1258 goto free_dst;
1259
1260 gnvh = __skb_push(skb, sizeof(*gnvh) + opt_size);
1261 inner_proto = inner_proto_inherit ? skb->protocol : htons(ETH_P_TEB);
1262 geneve_build_header(gnvh, info, inner_proto);
1263
1264 if (cb->gro_hint_len)
1265 geneve_put_gro_hint_opt(gnvh, info->options_len, &cb->gro_hint);
1266
1267 udp_tunnel_set_inner_protocol(skb, double_encap, inner_proto);
1268 return 0;
1269
1270 free_dst:
1271 dst_release(dst);
1272 return err;
1273 }
1274
geneve_get_dsfield(struct sk_buff * skb,struct net_device * dev,const struct ip_tunnel_info * info,bool * use_cache)1275 static u8 geneve_get_dsfield(struct sk_buff *skb, struct net_device *dev,
1276 const struct ip_tunnel_info *info,
1277 bool *use_cache)
1278 {
1279 struct geneve_dev *geneve = netdev_priv(dev);
1280 u8 dsfield;
1281
1282 dsfield = info->key.tos;
1283 if (dsfield == 1 && !geneve->cfg.collect_md) {
1284 dsfield = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
1285 *use_cache = false;
1286 }
1287
1288 return dsfield;
1289 }
1290
geneve_xmit_skb(struct sk_buff * skb,struct net_device * dev,struct geneve_dev * geneve,const struct ip_tunnel_info * info)1291 static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
1292 struct geneve_dev *geneve,
1293 const struct ip_tunnel_info *info)
1294 {
1295 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
1296 const struct ip_tunnel_key *key = &info->key;
1297 struct rtable *rt;
1298 bool use_cache;
1299 __u8 tos, ttl;
1300 __be16 df = 0;
1301 __be32 saddr;
1302 __be16 sport;
1303 int err;
1304
1305 if (skb_vlan_inet_prepare(skb, geneve->cfg.inner_proto_inherit))
1306 return -EINVAL;
1307
1308 if (!gs4)
1309 return -EIO;
1310
1311 use_cache = ip_tunnel_dst_cache_usable(skb, info);
1312 tos = geneve_get_dsfield(skb, dev, info, &use_cache);
1313 sport = udp_flow_src_port(geneve->net, skb,
1314 geneve->cfg.port_min,
1315 geneve->cfg.port_max, true);
1316
1317 rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr,
1318 &info->key,
1319 sport, geneve->cfg.info.key.tp_dst, tos,
1320 use_cache ?
1321 (struct dst_cache *)&info->dst_cache : NULL);
1322 if (IS_ERR(rt))
1323 return PTR_ERR(rt);
1324
1325 err = skb_tunnel_check_pmtu(skb, &rt->dst,
1326 GENEVE_IPV4_HLEN + info->options_len +
1327 geneve_build_gro_hint_opt(geneve, skb),
1328 netif_is_any_bridge_port(dev));
1329 if (err < 0) {
1330 dst_release(&rt->dst);
1331 return err;
1332 } else if (err) {
1333 struct ip_tunnel_info *info;
1334
1335 info = skb_tunnel_info(skb);
1336 if (info) {
1337 struct ip_tunnel_info *unclone;
1338
1339 unclone = skb_tunnel_info_unclone(skb);
1340 if (unlikely(!unclone)) {
1341 dst_release(&rt->dst);
1342 return -ENOMEM;
1343 }
1344
1345 unclone->key.u.ipv4.dst = saddr;
1346 unclone->key.u.ipv4.src = info->key.u.ipv4.dst;
1347 }
1348
1349 if (!pskb_may_pull(skb, ETH_HLEN)) {
1350 dst_release(&rt->dst);
1351 return -EINVAL;
1352 }
1353
1354 skb->protocol = eth_type_trans(skb, geneve->dev);
1355 __netif_rx(skb);
1356 dst_release(&rt->dst);
1357 return -EMSGSIZE;
1358 }
1359
1360 tos = ip_tunnel_ecn_encap(tos, ip_hdr(skb), skb);
1361 if (geneve->cfg.collect_md) {
1362 ttl = key->ttl;
1363
1364 df = test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) ?
1365 htons(IP_DF) : 0;
1366 } else {
1367 if (geneve->cfg.ttl_inherit)
1368 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
1369 else
1370 ttl = key->ttl;
1371 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
1372
1373 if (geneve->cfg.df == GENEVE_DF_SET) {
1374 df = htons(IP_DF);
1375 } else if (geneve->cfg.df == GENEVE_DF_INHERIT) {
1376 struct ethhdr *eth = skb_eth_hdr(skb);
1377
1378 if (ntohs(eth->h_proto) == ETH_P_IPV6) {
1379 df = htons(IP_DF);
1380 } else if (ntohs(eth->h_proto) == ETH_P_IP) {
1381 struct iphdr *iph = ip_hdr(skb);
1382
1383 if (iph->frag_off & htons(IP_DF))
1384 df = htons(IP_DF);
1385 }
1386 }
1387 }
1388
1389 err = geneve_build_skb(&rt->dst, skb, info, geneve,
1390 sizeof(struct iphdr));
1391 if (unlikely(err))
1392 return err;
1393
1394 udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, saddr, info->key.u.ipv4.dst,
1395 tos, ttl, df, sport, geneve->cfg.info.key.tp_dst,
1396 !net_eq(geneve->net, dev_net(geneve->dev)),
1397 !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags),
1398 0);
1399 return 0;
1400 }
1401
1402 #if IS_ENABLED(CONFIG_IPV6)
geneve6_xmit_skb(struct sk_buff * skb,struct net_device * dev,struct geneve_dev * geneve,const struct ip_tunnel_info * info)1403 static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
1404 struct geneve_dev *geneve,
1405 const struct ip_tunnel_info *info)
1406 {
1407 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
1408 const struct ip_tunnel_key *key = &info->key;
1409 struct dst_entry *dst = NULL;
1410 struct in6_addr saddr;
1411 bool use_cache;
1412 __u8 prio, ttl;
1413 __be16 sport;
1414 int err;
1415
1416 if (skb_vlan_inet_prepare(skb, geneve->cfg.inner_proto_inherit))
1417 return -EINVAL;
1418
1419 if (!gs6)
1420 return -EIO;
1421
1422 use_cache = ip_tunnel_dst_cache_usable(skb, info);
1423 prio = geneve_get_dsfield(skb, dev, info, &use_cache);
1424 sport = udp_flow_src_port(geneve->net, skb,
1425 geneve->cfg.port_min,
1426 geneve->cfg.port_max, true);
1427
1428 dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sock, 0,
1429 &saddr, key, sport,
1430 geneve->cfg.info.key.tp_dst, prio,
1431 use_cache ?
1432 (struct dst_cache *)&info->dst_cache : NULL);
1433 if (IS_ERR(dst))
1434 return PTR_ERR(dst);
1435
1436 err = skb_tunnel_check_pmtu(skb, dst,
1437 GENEVE_IPV6_HLEN + info->options_len +
1438 geneve_build_gro_hint_opt(geneve, skb),
1439 netif_is_any_bridge_port(dev));
1440 if (err < 0) {
1441 dst_release(dst);
1442 return err;
1443 } else if (err) {
1444 struct ip_tunnel_info *info = skb_tunnel_info(skb);
1445
1446 if (info) {
1447 struct ip_tunnel_info *unclone;
1448
1449 unclone = skb_tunnel_info_unclone(skb);
1450 if (unlikely(!unclone)) {
1451 dst_release(dst);
1452 return -ENOMEM;
1453 }
1454
1455 unclone->key.u.ipv6.dst = saddr;
1456 unclone->key.u.ipv6.src = info->key.u.ipv6.dst;
1457 }
1458
1459 if (!pskb_may_pull(skb, ETH_HLEN)) {
1460 dst_release(dst);
1461 return -EINVAL;
1462 }
1463
1464 skb->protocol = eth_type_trans(skb, geneve->dev);
1465 __netif_rx(skb);
1466 dst_release(dst);
1467 return -EMSGSIZE;
1468 }
1469
1470 prio = ip_tunnel_ecn_encap(prio, ip_hdr(skb), skb);
1471 if (geneve->cfg.collect_md) {
1472 ttl = key->ttl;
1473 } else {
1474 if (geneve->cfg.ttl_inherit)
1475 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
1476 else
1477 ttl = key->ttl;
1478 ttl = ttl ? : ip6_dst_hoplimit(dst);
1479 }
1480 err = geneve_build_skb(dst, skb, info, geneve, sizeof(struct ipv6hdr));
1481 if (unlikely(err))
1482 return err;
1483
1484 udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev,
1485 &saddr, &key->u.ipv6.dst, prio, ttl,
1486 info->key.label, sport, geneve->cfg.info.key.tp_dst,
1487 !test_bit(IP_TUNNEL_CSUM_BIT,
1488 info->key.tun_flags),
1489 0);
1490 return 0;
1491 }
1492 #endif
1493
geneve_xmit(struct sk_buff * skb,struct net_device * dev)1494 static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
1495 {
1496 struct geneve_dev *geneve = netdev_priv(dev);
1497 struct ip_tunnel_info *info = NULL;
1498 int err;
1499
1500 if (geneve->cfg.collect_md) {
1501 info = skb_tunnel_info(skb);
1502 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) {
1503 netdev_dbg(dev, "no tunnel metadata\n");
1504 dev_kfree_skb(skb);
1505 dev_dstats_tx_dropped(dev);
1506 return NETDEV_TX_OK;
1507 }
1508 } else {
1509 info = &geneve->cfg.info;
1510 }
1511
1512 rcu_read_lock();
1513 #if IS_ENABLED(CONFIG_IPV6)
1514 if (info->mode & IP_TUNNEL_INFO_IPV6)
1515 err = geneve6_xmit_skb(skb, dev, geneve, info);
1516 else
1517 #endif
1518 err = geneve_xmit_skb(skb, dev, geneve, info);
1519 rcu_read_unlock();
1520
1521 if (likely(!err))
1522 return NETDEV_TX_OK;
1523
1524 if (err != -EMSGSIZE)
1525 dev_kfree_skb(skb);
1526
1527 if (err == -ELOOP)
1528 DEV_STATS_INC(dev, collisions);
1529 else if (err == -ENETUNREACH)
1530 DEV_STATS_INC(dev, tx_carrier_errors);
1531
1532 DEV_STATS_INC(dev, tx_errors);
1533 return NETDEV_TX_OK;
1534 }
1535
geneve_change_mtu(struct net_device * dev,int new_mtu)1536 static int geneve_change_mtu(struct net_device *dev, int new_mtu)
1537 {
1538 if (new_mtu > dev->max_mtu)
1539 new_mtu = dev->max_mtu;
1540 else if (new_mtu < dev->min_mtu)
1541 new_mtu = dev->min_mtu;
1542
1543 WRITE_ONCE(dev->mtu, new_mtu);
1544 return 0;
1545 }
1546
geneve_fill_metadata_dst(struct net_device * dev,struct sk_buff * skb)1547 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
1548 {
1549 struct ip_tunnel_info *info = skb_tunnel_info(skb);
1550 struct geneve_dev *geneve = netdev_priv(dev);
1551 __be16 sport;
1552
1553 if (ip_tunnel_info_af(info) == AF_INET) {
1554 struct rtable *rt;
1555 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
1556 bool use_cache;
1557 __be32 saddr;
1558 u8 tos;
1559
1560 if (!gs4)
1561 return -EIO;
1562
1563 use_cache = ip_tunnel_dst_cache_usable(skb, info);
1564 tos = geneve_get_dsfield(skb, dev, info, &use_cache);
1565 sport = udp_flow_src_port(geneve->net, skb,
1566 geneve->cfg.port_min,
1567 geneve->cfg.port_max, true);
1568
1569 rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr,
1570 &info->key,
1571 sport, geneve->cfg.info.key.tp_dst,
1572 tos,
1573 use_cache ? &info->dst_cache : NULL);
1574 if (IS_ERR(rt))
1575 return PTR_ERR(rt);
1576
1577 ip_rt_put(rt);
1578 info->key.u.ipv4.src = saddr;
1579 #if IS_ENABLED(CONFIG_IPV6)
1580 } else if (ip_tunnel_info_af(info) == AF_INET6) {
1581 struct dst_entry *dst;
1582 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
1583 struct in6_addr saddr;
1584 bool use_cache;
1585 u8 prio;
1586
1587 if (!gs6)
1588 return -EIO;
1589
1590 use_cache = ip_tunnel_dst_cache_usable(skb, info);
1591 prio = geneve_get_dsfield(skb, dev, info, &use_cache);
1592 sport = udp_flow_src_port(geneve->net, skb,
1593 geneve->cfg.port_min,
1594 geneve->cfg.port_max, true);
1595
1596 dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sock, 0,
1597 &saddr, &info->key, sport,
1598 geneve->cfg.info.key.tp_dst, prio,
1599 use_cache ? &info->dst_cache : NULL);
1600 if (IS_ERR(dst))
1601 return PTR_ERR(dst);
1602
1603 dst_release(dst);
1604 info->key.u.ipv6.src = saddr;
1605 #endif
1606 } else {
1607 return -EINVAL;
1608 }
1609
1610 info->key.tp_src = sport;
1611 info->key.tp_dst = geneve->cfg.info.key.tp_dst;
1612 return 0;
1613 }
1614
1615 static const struct net_device_ops geneve_netdev_ops = {
1616 .ndo_init = geneve_init,
1617 .ndo_uninit = geneve_uninit,
1618 .ndo_open = geneve_open,
1619 .ndo_stop = geneve_stop,
1620 .ndo_start_xmit = geneve_xmit,
1621 .ndo_change_mtu = geneve_change_mtu,
1622 .ndo_validate_addr = eth_validate_addr,
1623 .ndo_set_mac_address = eth_mac_addr,
1624 .ndo_fill_metadata_dst = geneve_fill_metadata_dst,
1625 };
1626
geneve_get_drvinfo(struct net_device * dev,struct ethtool_drvinfo * drvinfo)1627 static void geneve_get_drvinfo(struct net_device *dev,
1628 struct ethtool_drvinfo *drvinfo)
1629 {
1630 strscpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version));
1631 strscpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver));
1632 }
1633
1634 static const struct ethtool_ops geneve_ethtool_ops = {
1635 .get_drvinfo = geneve_get_drvinfo,
1636 .get_link = ethtool_op_get_link,
1637 };
1638
1639 /* Info for udev, that this is a virtual tunnel endpoint */
1640 static const struct device_type geneve_type = {
1641 .name = "geneve",
1642 };
1643
1644 /* Calls the ndo_udp_tunnel_add of the caller in order to
1645 * supply the listening GENEVE udp ports. Callers are expected
1646 * to implement the ndo_udp_tunnel_add.
1647 */
geneve_offload_rx_ports(struct net_device * dev,bool push)1648 static void geneve_offload_rx_ports(struct net_device *dev, bool push)
1649 {
1650 struct net *net = dev_net(dev);
1651 struct geneve_net *gn = net_generic(net, geneve_net_id);
1652 struct geneve_sock *gs;
1653
1654 ASSERT_RTNL();
1655
1656 list_for_each_entry(gs, &gn->sock_list, list) {
1657 if (push) {
1658 udp_tunnel_push_rx_port(dev, gs->sock,
1659 UDP_TUNNEL_TYPE_GENEVE);
1660 } else {
1661 udp_tunnel_drop_rx_port(dev, gs->sock,
1662 UDP_TUNNEL_TYPE_GENEVE);
1663 }
1664 }
1665 }
1666
1667 /* Initialize the device structure. */
geneve_setup(struct net_device * dev)1668 static void geneve_setup(struct net_device *dev)
1669 {
1670 ether_setup(dev);
1671
1672 dev->netdev_ops = &geneve_netdev_ops;
1673 dev->ethtool_ops = &geneve_ethtool_ops;
1674 dev->needs_free_netdev = true;
1675
1676 SET_NETDEV_DEVTYPE(dev, &geneve_type);
1677
1678 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
1679 dev->features |= NETIF_F_RXCSUM;
1680 dev->features |= NETIF_F_GSO_SOFTWARE;
1681
1682 /* Partial features are disabled by default. */
1683 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
1684 dev->hw_features |= NETIF_F_RXCSUM;
1685 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1686 dev->hw_features |= UDP_TUNNEL_PARTIAL_FEATURES;
1687 dev->hw_features |= NETIF_F_GSO_PARTIAL;
1688
1689 dev->hw_enc_features = dev->hw_features;
1690 dev->gso_partial_features = UDP_TUNNEL_PARTIAL_FEATURES;
1691 dev->mangleid_features = NETIF_F_GSO_PARTIAL;
1692
1693 dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
1694 /* MTU range: 68 - (something less than 65535) */
1695 dev->min_mtu = ETH_MIN_MTU;
1696 /* The max_mtu calculation does not take account of GENEVE
1697 * options, to avoid excluding potentially valid
1698 * configurations. This will be further reduced by IPvX hdr size.
1699 */
1700 dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
1701
1702 netif_keep_dst(dev);
1703 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1704 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
1705 dev->lltx = true;
1706 eth_hw_addr_random(dev);
1707 }
1708
1709 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
1710 [IFLA_GENEVE_UNSPEC] = { .strict_start_type = IFLA_GENEVE_INNER_PROTO_INHERIT },
1711 [IFLA_GENEVE_ID] = { .type = NLA_U32 },
1712 [IFLA_GENEVE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) },
1713 [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) },
1714 [IFLA_GENEVE_TTL] = { .type = NLA_U8 },
1715 [IFLA_GENEVE_TOS] = { .type = NLA_U8 },
1716 [IFLA_GENEVE_LABEL] = { .type = NLA_U32 },
1717 [IFLA_GENEVE_PORT] = { .type = NLA_U16 },
1718 [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG },
1719 [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 },
1720 [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 },
1721 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
1722 [IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 },
1723 [IFLA_GENEVE_DF] = { .type = NLA_U8 },
1724 [IFLA_GENEVE_INNER_PROTO_INHERIT] = { .type = NLA_FLAG },
1725 [IFLA_GENEVE_PORT_RANGE] = NLA_POLICY_EXACT_LEN(sizeof(struct ifla_geneve_port_range)),
1726 [IFLA_GENEVE_GRO_HINT] = { .type = NLA_FLAG },
1727 };
1728
geneve_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)1729 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
1730 struct netlink_ext_ack *extack)
1731 {
1732 if (tb[IFLA_ADDRESS]) {
1733 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
1734 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
1735 "Provided link layer address is not Ethernet");
1736 return -EINVAL;
1737 }
1738
1739 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
1740 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
1741 "Provided Ethernet address is not unicast");
1742 return -EADDRNOTAVAIL;
1743 }
1744 }
1745
1746 if (!data) {
1747 NL_SET_ERR_MSG(extack,
1748 "Not enough attributes provided to perform the operation");
1749 return -EINVAL;
1750 }
1751
1752 if (data[IFLA_GENEVE_ID]) {
1753 __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]);
1754
1755 if (vni >= GENEVE_N_VID) {
1756 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_ID],
1757 "Geneve ID must be lower than 16777216");
1758 return -ERANGE;
1759 }
1760 }
1761
1762 if (data[IFLA_GENEVE_DF]) {
1763 enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]);
1764
1765 if (df < 0 || df > GENEVE_DF_MAX) {
1766 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_DF],
1767 "Invalid DF attribute");
1768 return -EINVAL;
1769 }
1770 }
1771
1772 if (data[IFLA_GENEVE_PORT_RANGE]) {
1773 const struct ifla_geneve_port_range *p;
1774
1775 p = nla_data(data[IFLA_GENEVE_PORT_RANGE]);
1776 if (ntohs(p->high) < ntohs(p->low)) {
1777 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_PORT_RANGE],
1778 "Invalid source port range");
1779 return -EINVAL;
1780 }
1781 }
1782
1783 return 0;
1784 }
1785
geneve_find_dev(struct geneve_net * gn,const struct ip_tunnel_info * info,bool * tun_on_same_port,bool * tun_collect_md)1786 static struct geneve_dev *geneve_find_dev(struct geneve_net *gn,
1787 const struct ip_tunnel_info *info,
1788 bool *tun_on_same_port,
1789 bool *tun_collect_md)
1790 {
1791 struct geneve_dev *geneve, *t = NULL;
1792
1793 *tun_on_same_port = false;
1794 *tun_collect_md = false;
1795 list_for_each_entry(geneve, &gn->geneve_list, next) {
1796 if (info->key.tp_dst == geneve->cfg.info.key.tp_dst) {
1797 *tun_collect_md = geneve->cfg.collect_md;
1798 *tun_on_same_port = true;
1799 }
1800 if (info->key.tun_id == geneve->cfg.info.key.tun_id &&
1801 info->key.tp_dst == geneve->cfg.info.key.tp_dst &&
1802 !memcmp(&info->key.u, &geneve->cfg.info.key.u, sizeof(info->key.u)))
1803 t = geneve;
1804 }
1805 return t;
1806 }
1807
is_tnl_info_zero(const struct ip_tunnel_info * info)1808 static bool is_tnl_info_zero(const struct ip_tunnel_info *info)
1809 {
1810 return !(info->key.tun_id || info->key.tos ||
1811 !ip_tunnel_flags_empty(info->key.tun_flags) ||
1812 info->key.ttl || info->key.label || info->key.tp_src ||
1813 memchr_inv(&info->key.u, 0, sizeof(info->key.u)));
1814 }
1815
geneve_dst_addr_equal(struct ip_tunnel_info * a,struct ip_tunnel_info * b)1816 static bool geneve_dst_addr_equal(struct ip_tunnel_info *a,
1817 struct ip_tunnel_info *b)
1818 {
1819 if (ip_tunnel_info_af(a) == AF_INET)
1820 return a->key.u.ipv4.dst == b->key.u.ipv4.dst;
1821 else
1822 return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst);
1823 }
1824
geneve_configure(struct net * net,struct net_device * dev,struct netlink_ext_ack * extack,const struct geneve_config * cfg)1825 static int geneve_configure(struct net *net, struct net_device *dev,
1826 struct netlink_ext_ack *extack,
1827 const struct geneve_config *cfg)
1828 {
1829 struct geneve_net *gn = net_generic(net, geneve_net_id);
1830 struct geneve_dev *t, *geneve = netdev_priv(dev);
1831 const struct ip_tunnel_info *info = &cfg->info;
1832 bool tun_collect_md, tun_on_same_port;
1833 int err, encap_len;
1834
1835 if (cfg->collect_md && !is_tnl_info_zero(info)) {
1836 NL_SET_ERR_MSG(extack,
1837 "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified");
1838 return -EINVAL;
1839 }
1840
1841 geneve->net = net;
1842 geneve->dev = dev;
1843
1844 t = geneve_find_dev(gn, info, &tun_on_same_port, &tun_collect_md);
1845 if (t)
1846 return -EBUSY;
1847
1848 /* make enough headroom for basic scenario */
1849 encap_len = GENEVE_BASE_HLEN + ETH_HLEN;
1850 if (!cfg->collect_md && ip_tunnel_info_af(info) == AF_INET) {
1851 encap_len += sizeof(struct iphdr);
1852 dev->max_mtu -= sizeof(struct iphdr);
1853 } else {
1854 encap_len += sizeof(struct ipv6hdr);
1855 dev->max_mtu -= sizeof(struct ipv6hdr);
1856 }
1857 dev->needed_headroom = encap_len + ETH_HLEN;
1858
1859 if (cfg->collect_md) {
1860 if (tun_on_same_port) {
1861 NL_SET_ERR_MSG(extack,
1862 "There can be only one externally controlled device on a destination port");
1863 return -EPERM;
1864 }
1865 } else {
1866 if (tun_collect_md) {
1867 NL_SET_ERR_MSG(extack,
1868 "There already exists an externally controlled device on this destination port");
1869 return -EPERM;
1870 }
1871 }
1872
1873 dst_cache_reset(&geneve->cfg.info.dst_cache);
1874 memcpy(&geneve->cfg, cfg, sizeof(*cfg));
1875
1876 if (geneve->cfg.inner_proto_inherit) {
1877 dev->header_ops = NULL;
1878 dev->type = ARPHRD_NONE;
1879 dev->hard_header_len = 0;
1880 dev->addr_len = 0;
1881 dev->flags = IFF_POINTOPOINT | IFF_NOARP;
1882 }
1883
1884 err = register_netdevice(dev);
1885 if (err)
1886 return err;
1887
1888 list_add(&geneve->next, &gn->geneve_list);
1889 return 0;
1890 }
1891
init_tnl_info(struct ip_tunnel_info * info,__u16 dst_port)1892 static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port)
1893 {
1894 memset(info, 0, sizeof(*info));
1895 info->key.tp_dst = htons(dst_port);
1896 }
1897
geneve_nl2info(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack,struct geneve_config * cfg,bool changelink)1898 static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
1899 struct netlink_ext_ack *extack,
1900 struct geneve_config *cfg, bool changelink)
1901 {
1902 struct ip_tunnel_info *info = &cfg->info;
1903 int attrtype;
1904
1905 if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) {
1906 NL_SET_ERR_MSG(extack,
1907 "Cannot specify both IPv4 and IPv6 Remote addresses");
1908 return -EINVAL;
1909 }
1910
1911 if (data[IFLA_GENEVE_REMOTE]) {
1912 if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) {
1913 attrtype = IFLA_GENEVE_REMOTE;
1914 goto change_notsup;
1915 }
1916
1917 info->key.u.ipv4.dst =
1918 nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
1919
1920 if (ipv4_is_multicast(info->key.u.ipv4.dst)) {
1921 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE],
1922 "Remote IPv4 address cannot be Multicast");
1923 return -EINVAL;
1924 }
1925 }
1926
1927 if (data[IFLA_GENEVE_REMOTE6]) {
1928 #if IS_ENABLED(CONFIG_IPV6)
1929 if (changelink && (ip_tunnel_info_af(info) == AF_INET)) {
1930 attrtype = IFLA_GENEVE_REMOTE6;
1931 goto change_notsup;
1932 }
1933
1934 info->mode = IP_TUNNEL_INFO_IPV6;
1935 info->key.u.ipv6.dst =
1936 nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]);
1937
1938 if (ipv6_addr_type(&info->key.u.ipv6.dst) &
1939 IPV6_ADDR_LINKLOCAL) {
1940 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
1941 "Remote IPv6 address cannot be link-local");
1942 return -EINVAL;
1943 }
1944 if (ipv6_addr_is_multicast(&info->key.u.ipv6.dst)) {
1945 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
1946 "Remote IPv6 address cannot be Multicast");
1947 return -EINVAL;
1948 }
1949 __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
1950 cfg->use_udp6_rx_checksums = true;
1951 #else
1952 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
1953 "IPv6 support not enabled in the kernel");
1954 return -EPFNOSUPPORT;
1955 #endif
1956 }
1957
1958 if (data[IFLA_GENEVE_ID]) {
1959 __u32 vni;
1960 __u8 tvni[3];
1961 __be64 tunid;
1962
1963 vni = nla_get_u32(data[IFLA_GENEVE_ID]);
1964 tvni[0] = (vni & 0x00ff0000) >> 16;
1965 tvni[1] = (vni & 0x0000ff00) >> 8;
1966 tvni[2] = vni & 0x000000ff;
1967
1968 tunid = vni_to_tunnel_id(tvni);
1969 if (changelink && (tunid != info->key.tun_id)) {
1970 attrtype = IFLA_GENEVE_ID;
1971 goto change_notsup;
1972 }
1973 info->key.tun_id = tunid;
1974 }
1975
1976 if (data[IFLA_GENEVE_TTL_INHERIT]) {
1977 if (nla_get_u8(data[IFLA_GENEVE_TTL_INHERIT]))
1978 cfg->ttl_inherit = true;
1979 else
1980 cfg->ttl_inherit = false;
1981 } else if (data[IFLA_GENEVE_TTL]) {
1982 info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
1983 cfg->ttl_inherit = false;
1984 }
1985
1986 if (data[IFLA_GENEVE_TOS])
1987 info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
1988
1989 if (data[IFLA_GENEVE_DF])
1990 cfg->df = nla_get_u8(data[IFLA_GENEVE_DF]);
1991
1992 if (data[IFLA_GENEVE_LABEL]) {
1993 info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
1994 IPV6_FLOWLABEL_MASK;
1995 if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) {
1996 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LABEL],
1997 "Label attribute only applies for IPv6 Geneve devices");
1998 return -EINVAL;
1999 }
2000 }
2001
2002 if (data[IFLA_GENEVE_PORT]) {
2003 if (changelink) {
2004 attrtype = IFLA_GENEVE_PORT;
2005 goto change_notsup;
2006 }
2007 info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]);
2008 }
2009
2010 if (data[IFLA_GENEVE_PORT_RANGE]) {
2011 const struct ifla_geneve_port_range *p;
2012
2013 if (changelink) {
2014 attrtype = IFLA_GENEVE_PORT_RANGE;
2015 goto change_notsup;
2016 }
2017 p = nla_data(data[IFLA_GENEVE_PORT_RANGE]);
2018 cfg->port_min = ntohs(p->low);
2019 cfg->port_max = ntohs(p->high);
2020 }
2021
2022 if (data[IFLA_GENEVE_COLLECT_METADATA]) {
2023 if (changelink) {
2024 attrtype = IFLA_GENEVE_COLLECT_METADATA;
2025 goto change_notsup;
2026 }
2027 cfg->collect_md = true;
2028 }
2029
2030 if (data[IFLA_GENEVE_UDP_CSUM]) {
2031 if (changelink) {
2032 attrtype = IFLA_GENEVE_UDP_CSUM;
2033 goto change_notsup;
2034 }
2035 if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
2036 __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
2037 }
2038
2039 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) {
2040 #if IS_ENABLED(CONFIG_IPV6)
2041 if (changelink) {
2042 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_TX;
2043 goto change_notsup;
2044 }
2045 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
2046 __clear_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
2047 #else
2048 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX],
2049 "IPv6 support not enabled in the kernel");
2050 return -EPFNOSUPPORT;
2051 #endif
2052 }
2053
2054 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) {
2055 #if IS_ENABLED(CONFIG_IPV6)
2056 if (changelink) {
2057 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_RX;
2058 goto change_notsup;
2059 }
2060 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]))
2061 cfg->use_udp6_rx_checksums = false;
2062 #else
2063 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX],
2064 "IPv6 support not enabled in the kernel");
2065 return -EPFNOSUPPORT;
2066 #endif
2067 }
2068
2069 if (data[IFLA_GENEVE_INNER_PROTO_INHERIT]) {
2070 if (changelink) {
2071 attrtype = IFLA_GENEVE_INNER_PROTO_INHERIT;
2072 goto change_notsup;
2073 }
2074 cfg->inner_proto_inherit = true;
2075 }
2076
2077 if (data[IFLA_GENEVE_GRO_HINT]) {
2078 if (changelink) {
2079 attrtype = IFLA_GENEVE_GRO_HINT;
2080 goto change_notsup;
2081 }
2082 cfg->gro_hint = true;
2083 }
2084
2085 return 0;
2086 change_notsup:
2087 NL_SET_ERR_MSG_ATTR(extack, data[attrtype],
2088 "Changing VNI, Port, endpoint IP address family, external, inner_proto_inherit, gro_hint and UDP checksum attributes are not supported");
2089 return -EOPNOTSUPP;
2090 }
2091
geneve_link_config(struct net_device * dev,struct ip_tunnel_info * info,struct nlattr * tb[])2092 static void geneve_link_config(struct net_device *dev,
2093 struct ip_tunnel_info *info, struct nlattr *tb[])
2094 {
2095 struct geneve_dev *geneve = netdev_priv(dev);
2096 int ldev_mtu = 0;
2097
2098 if (tb[IFLA_MTU]) {
2099 geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
2100 return;
2101 }
2102
2103 switch (ip_tunnel_info_af(info)) {
2104 case AF_INET: {
2105 struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst };
2106 struct rtable *rt = ip_route_output_key(geneve->net, &fl4);
2107
2108 if (!IS_ERR(rt) && rt->dst.dev) {
2109 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN;
2110 ip_rt_put(rt);
2111 }
2112 break;
2113 }
2114 #if IS_ENABLED(CONFIG_IPV6)
2115 case AF_INET6: {
2116 struct rt6_info *rt;
2117
2118 if (!__in6_dev_get(dev))
2119 break;
2120
2121 rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0,
2122 NULL, 0);
2123
2124 if (rt && rt->dst.dev)
2125 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN;
2126 ip6_rt_put(rt);
2127 break;
2128 }
2129 #endif
2130 }
2131
2132 if (ldev_mtu <= 0)
2133 return;
2134
2135 geneve_change_mtu(dev, ldev_mtu - info->options_len);
2136 }
2137
geneve_newlink(struct net_device * dev,struct rtnl_newlink_params * params,struct netlink_ext_ack * extack)2138 static int geneve_newlink(struct net_device *dev,
2139 struct rtnl_newlink_params *params,
2140 struct netlink_ext_ack *extack)
2141 {
2142 struct net *link_net = rtnl_newlink_link_net(params);
2143 struct nlattr **data = params->data;
2144 struct nlattr **tb = params->tb;
2145 struct geneve_config cfg = {
2146 .df = GENEVE_DF_UNSET,
2147 .use_udp6_rx_checksums = false,
2148 .ttl_inherit = false,
2149 .collect_md = false,
2150 .port_min = 1,
2151 .port_max = USHRT_MAX,
2152 };
2153 int err;
2154
2155 init_tnl_info(&cfg.info, GENEVE_UDP_PORT);
2156 err = geneve_nl2info(tb, data, extack, &cfg, false);
2157 if (err)
2158 return err;
2159
2160 err = geneve_configure(link_net, dev, extack, &cfg);
2161 if (err)
2162 return err;
2163
2164 geneve_link_config(dev, &cfg.info, tb);
2165
2166 return 0;
2167 }
2168
2169 /* Quiesces the geneve device data path for both TX and RX.
2170 *
2171 * On transmit geneve checks for non-NULL geneve_sock before it proceeds.
2172 * So, if we set that socket to NULL under RCU and wait for synchronize_net()
2173 * to complete for the existing set of in-flight packets to be transmitted,
2174 * then we would have quiesced the transmit data path. All the future packets
2175 * will get dropped until we unquiesce the data path.
2176 *
2177 * On receive geneve dereference the geneve_sock stashed in the socket. So,
2178 * if we set that to NULL under RCU and wait for synchronize_net() to
2179 * complete, then we would have quiesced the receive data path.
2180 */
geneve_quiesce(struct geneve_dev * geneve,struct geneve_sock ** gs4,struct geneve_sock ** gs6)2181 static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4,
2182 struct geneve_sock **gs6)
2183 {
2184 *gs4 = rtnl_dereference(geneve->sock4);
2185 rcu_assign_pointer(geneve->sock4, NULL);
2186 if (*gs4)
2187 rcu_assign_sk_user_data((*gs4)->sock->sk, NULL);
2188 #if IS_ENABLED(CONFIG_IPV6)
2189 *gs6 = rtnl_dereference(geneve->sock6);
2190 rcu_assign_pointer(geneve->sock6, NULL);
2191 if (*gs6)
2192 rcu_assign_sk_user_data((*gs6)->sock->sk, NULL);
2193 #else
2194 *gs6 = NULL;
2195 #endif
2196 synchronize_net();
2197 }
2198
2199 /* Resumes the geneve device data path for both TX and RX. */
geneve_unquiesce(struct geneve_dev * geneve,struct geneve_sock * gs4,struct geneve_sock __maybe_unused * gs6)2200 static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4,
2201 struct geneve_sock __maybe_unused *gs6)
2202 {
2203 rcu_assign_pointer(geneve->sock4, gs4);
2204 if (gs4)
2205 rcu_assign_sk_user_data(gs4->sock->sk, gs4);
2206 #if IS_ENABLED(CONFIG_IPV6)
2207 rcu_assign_pointer(geneve->sock6, gs6);
2208 if (gs6)
2209 rcu_assign_sk_user_data(gs6->sock->sk, gs6);
2210 #endif
2211 synchronize_net();
2212 }
2213
geneve_changelink(struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)2214 static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
2215 struct nlattr *data[],
2216 struct netlink_ext_ack *extack)
2217 {
2218 struct geneve_dev *geneve = netdev_priv(dev);
2219 struct geneve_sock *gs4, *gs6;
2220 struct geneve_config cfg;
2221 int err;
2222
2223 /* If the geneve device is configured for metadata (or externally
2224 * controlled, for example, OVS), then nothing can be changed.
2225 */
2226 if (geneve->cfg.collect_md)
2227 return -EOPNOTSUPP;
2228
2229 /* Start with the existing info. */
2230 memcpy(&cfg, &geneve->cfg, sizeof(cfg));
2231 err = geneve_nl2info(tb, data, extack, &cfg, true);
2232 if (err)
2233 return err;
2234
2235 if (!geneve_dst_addr_equal(&geneve->cfg.info, &cfg.info)) {
2236 dst_cache_reset(&cfg.info.dst_cache);
2237 geneve_link_config(dev, &cfg.info, tb);
2238 }
2239
2240 geneve_quiesce(geneve, &gs4, &gs6);
2241 memcpy(&geneve->cfg, &cfg, sizeof(cfg));
2242 geneve_unquiesce(geneve, gs4, gs6);
2243
2244 return 0;
2245 }
2246
geneve_dellink(struct net_device * dev,struct list_head * head)2247 static void geneve_dellink(struct net_device *dev, struct list_head *head)
2248 {
2249 struct geneve_dev *geneve = netdev_priv(dev);
2250
2251 list_del(&geneve->next);
2252 unregister_netdevice_queue(dev, head);
2253 }
2254
geneve_get_size(const struct net_device * dev)2255 static size_t geneve_get_size(const struct net_device *dev)
2256 {
2257 return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */
2258 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */
2259 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */
2260 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */
2261 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_DF */
2262 nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */
2263 nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */
2264 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */
2265 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */
2266 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */
2267 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */
2268 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */
2269 nla_total_size(0) + /* IFLA_GENEVE_INNER_PROTO_INHERIT */
2270 nla_total_size(sizeof(struct ifla_geneve_port_range)) + /* IFLA_GENEVE_PORT_RANGE */
2271 nla_total_size(0) + /* IFLA_GENEVE_GRO_HINT */
2272 0;
2273 }
2274
geneve_fill_info(struct sk_buff * skb,const struct net_device * dev)2275 static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
2276 {
2277 struct geneve_dev *geneve = netdev_priv(dev);
2278 struct ip_tunnel_info *info = &geneve->cfg.info;
2279 bool ttl_inherit = geneve->cfg.ttl_inherit;
2280 bool metadata = geneve->cfg.collect_md;
2281 struct ifla_geneve_port_range ports = {
2282 .low = htons(geneve->cfg.port_min),
2283 .high = htons(geneve->cfg.port_max),
2284 };
2285 __u8 tmp_vni[3];
2286 __u32 vni;
2287
2288 tunnel_id_to_vni(info->key.tun_id, tmp_vni);
2289 vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2];
2290 if (nla_put_u32(skb, IFLA_GENEVE_ID, vni))
2291 goto nla_put_failure;
2292
2293 if (!metadata && ip_tunnel_info_af(info) == AF_INET) {
2294 if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
2295 info->key.u.ipv4.dst))
2296 goto nla_put_failure;
2297 if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM,
2298 test_bit(IP_TUNNEL_CSUM_BIT,
2299 info->key.tun_flags)))
2300 goto nla_put_failure;
2301
2302 #if IS_ENABLED(CONFIG_IPV6)
2303 } else if (!metadata) {
2304 if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6,
2305 &info->key.u.ipv6.dst))
2306 goto nla_put_failure;
2307 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
2308 !test_bit(IP_TUNNEL_CSUM_BIT,
2309 info->key.tun_flags)))
2310 goto nla_put_failure;
2311 #endif
2312 }
2313
2314 if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) ||
2315 nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) ||
2316 nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label))
2317 goto nla_put_failure;
2318
2319 if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->cfg.df))
2320 goto nla_put_failure;
2321
2322 if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst))
2323 goto nla_put_failure;
2324
2325 if (metadata && nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA))
2326 goto nla_put_failure;
2327
2328 #if IS_ENABLED(CONFIG_IPV6)
2329 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
2330 !geneve->cfg.use_udp6_rx_checksums))
2331 goto nla_put_failure;
2332 #endif
2333
2334 if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit))
2335 goto nla_put_failure;
2336
2337 if (geneve->cfg.inner_proto_inherit &&
2338 nla_put_flag(skb, IFLA_GENEVE_INNER_PROTO_INHERIT))
2339 goto nla_put_failure;
2340
2341 if (nla_put(skb, IFLA_GENEVE_PORT_RANGE, sizeof(ports), &ports))
2342 goto nla_put_failure;
2343
2344 if (geneve->cfg.gro_hint &&
2345 nla_put_flag(skb, IFLA_GENEVE_GRO_HINT))
2346 goto nla_put_failure;
2347
2348 return 0;
2349
2350 nla_put_failure:
2351 return -EMSGSIZE;
2352 }
2353
2354 static struct rtnl_link_ops geneve_link_ops __read_mostly = {
2355 .kind = "geneve",
2356 .maxtype = IFLA_GENEVE_MAX,
2357 .policy = geneve_policy,
2358 .priv_size = sizeof(struct geneve_dev),
2359 .setup = geneve_setup,
2360 .validate = geneve_validate,
2361 .newlink = geneve_newlink,
2362 .changelink = geneve_changelink,
2363 .dellink = geneve_dellink,
2364 .get_size = geneve_get_size,
2365 .fill_info = geneve_fill_info,
2366 };
2367
geneve_dev_create_fb(struct net * net,const char * name,u8 name_assign_type,u16 dst_port)2368 struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
2369 u8 name_assign_type, u16 dst_port)
2370 {
2371 struct nlattr *tb[IFLA_MAX + 1];
2372 struct net_device *dev;
2373 LIST_HEAD(list_kill);
2374 int err;
2375 struct geneve_config cfg = {
2376 .df = GENEVE_DF_UNSET,
2377 .use_udp6_rx_checksums = true,
2378 .ttl_inherit = false,
2379 .collect_md = true,
2380 .port_min = 1,
2381 .port_max = USHRT_MAX,
2382 };
2383
2384 memset(tb, 0, sizeof(tb));
2385 dev = rtnl_create_link(net, name, name_assign_type,
2386 &geneve_link_ops, tb, NULL);
2387 if (IS_ERR(dev))
2388 return dev;
2389
2390 init_tnl_info(&cfg.info, dst_port);
2391 err = geneve_configure(net, dev, NULL, &cfg);
2392 if (err) {
2393 free_netdev(dev);
2394 return ERR_PTR(err);
2395 }
2396
2397 /* openvswitch users expect packet sizes to be unrestricted,
2398 * so set the largest MTU we can.
2399 */
2400 err = geneve_change_mtu(dev, IP_MAX_MTU);
2401 if (err)
2402 goto err;
2403
2404 err = rtnl_configure_link(dev, NULL, 0, NULL);
2405 if (err < 0)
2406 goto err;
2407
2408 return dev;
2409 err:
2410 geneve_dellink(dev, &list_kill);
2411 unregister_netdevice_many(&list_kill);
2412 return ERR_PTR(err);
2413 }
2414 EXPORT_SYMBOL_GPL(geneve_dev_create_fb);
2415
geneve_netdevice_event(struct notifier_block * unused,unsigned long event,void * ptr)2416 static int geneve_netdevice_event(struct notifier_block *unused,
2417 unsigned long event, void *ptr)
2418 {
2419 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2420
2421 if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
2422 geneve_offload_rx_ports(dev, true);
2423 else if (event == NETDEV_UDP_TUNNEL_DROP_INFO)
2424 geneve_offload_rx_ports(dev, false);
2425
2426 return NOTIFY_DONE;
2427 }
2428
2429 static struct notifier_block geneve_notifier_block __read_mostly = {
2430 .notifier_call = geneve_netdevice_event,
2431 };
2432
geneve_init_net(struct net * net)2433 static __net_init int geneve_init_net(struct net *net)
2434 {
2435 struct geneve_net *gn = net_generic(net, geneve_net_id);
2436
2437 INIT_LIST_HEAD(&gn->geneve_list);
2438 INIT_LIST_HEAD(&gn->sock_list);
2439 return 0;
2440 }
2441
geneve_exit_rtnl_net(struct net * net,struct list_head * dev_to_kill)2442 static void __net_exit geneve_exit_rtnl_net(struct net *net,
2443 struct list_head *dev_to_kill)
2444 {
2445 struct geneve_net *gn = net_generic(net, geneve_net_id);
2446 struct geneve_dev *geneve, *next;
2447
2448 list_for_each_entry_safe(geneve, next, &gn->geneve_list, next)
2449 geneve_dellink(geneve->dev, dev_to_kill);
2450 }
2451
geneve_exit_net(struct net * net)2452 static void __net_exit geneve_exit_net(struct net *net)
2453 {
2454 const struct geneve_net *gn = net_generic(net, geneve_net_id);
2455
2456 WARN_ON_ONCE(!list_empty(&gn->sock_list));
2457 }
2458
2459 static struct pernet_operations geneve_net_ops = {
2460 .init = geneve_init_net,
2461 .exit_rtnl = geneve_exit_rtnl_net,
2462 .exit = geneve_exit_net,
2463 .id = &geneve_net_id,
2464 .size = sizeof(struct geneve_net),
2465 };
2466
geneve_init_module(void)2467 static int __init geneve_init_module(void)
2468 {
2469 int rc;
2470
2471 rc = register_pernet_subsys(&geneve_net_ops);
2472 if (rc)
2473 goto out1;
2474
2475 rc = register_netdevice_notifier(&geneve_notifier_block);
2476 if (rc)
2477 goto out2;
2478
2479 rc = rtnl_link_register(&geneve_link_ops);
2480 if (rc)
2481 goto out3;
2482
2483 return 0;
2484 out3:
2485 unregister_netdevice_notifier(&geneve_notifier_block);
2486 out2:
2487 unregister_pernet_subsys(&geneve_net_ops);
2488 out1:
2489 return rc;
2490 }
2491 late_initcall(geneve_init_module);
2492
geneve_cleanup_module(void)2493 static void __exit geneve_cleanup_module(void)
2494 {
2495 rtnl_link_unregister(&geneve_link_ops);
2496 unregister_netdevice_notifier(&geneve_notifier_block);
2497 unregister_pernet_subsys(&geneve_net_ops);
2498 }
2499 module_exit(geneve_cleanup_module);
2500
2501 MODULE_LICENSE("GPL");
2502 MODULE_VERSION(GENEVE_NETDEV_VER);
2503 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
2504 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic");
2505 MODULE_ALIAS_RTNL_LINK("geneve");
2506