1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * GENEVE: Generic Network Virtualization Encapsulation 4 * 5 * Copyright (c) 2015 Red Hat, Inc. 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include <linux/ethtool.h> 11 #include <linux/kernel.h> 12 #include <linux/module.h> 13 #include <linux/etherdevice.h> 14 #include <linux/hash.h> 15 #include <net/dst_metadata.h> 16 #include <net/gro_cells.h> 17 #include <net/rtnetlink.h> 18 #include <net/geneve.h> 19 #include <net/gro.h> 20 #include <net/netdev_lock.h> 21 #include <net/protocol.h> 22 23 #define GENEVE_NETDEV_VER "0.6" 24 25 #define GENEVE_N_VID (1u << 24) 26 #define GENEVE_VID_MASK (GENEVE_N_VID - 1) 27 28 #define VNI_HASH_BITS 10 29 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) 30 31 static bool log_ecn_error = true; 32 module_param(log_ecn_error, bool, 0644); 33 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 34 35 #define GENEVE_VER 0 36 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) 37 #define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN) 38 #define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN) 39 40 #define GENEVE_OPT_NETDEV_CLASS 0x100 41 #define GENEVE_OPT_GRO_HINT_SIZE 8 42 #define GENEVE_OPT_GRO_HINT_TYPE 1 43 #define GENEVE_OPT_GRO_HINT_LEN 1 44 45 struct geneve_opt_gro_hint { 46 u8 inner_proto_id:2, 47 nested_is_v6:1; 48 u8 nested_nh_offset; 49 u8 nested_tp_offset; 50 u8 nested_hdr_len; 51 }; 52 53 struct geneve_skb_cb { 54 unsigned int gro_hint_len; 55 struct geneve_opt_gro_hint gro_hint; 56 }; 57 58 #define GENEVE_SKB_CB(__skb) ((struct geneve_skb_cb *)&((__skb)->cb[0])) 59 60 /* per-network namespace private data for this module */ 61 struct geneve_net { 62 struct list_head geneve_list; 63 /* sock_list is protected by rtnl lock */ 64 struct list_head sock_list; 65 }; 66 67 static unsigned int geneve_net_id; 68 69 struct geneve_dev_node { 70 struct hlist_node hlist; 71 struct geneve_dev *geneve; 72 }; 73 74 struct geneve_config { 75 bool collect_md; 76 bool dualstack; 77 bool use_udp6_rx_checksums; 78 bool ttl_inherit; 79 bool gro_hint; 80 enum ifla_geneve_df df; 81 bool inner_proto_inherit; 82 u16 port_min; 83 u16 port_max; 84 85 /* Must be last --ends in a flexible-array member. */ 86 struct ip_tunnel_info info; 87 }; 88 89 /* Pseudo network device */ 90 struct geneve_dev { 91 struct geneve_dev_node hlist4; /* vni hash table for IPv4 socket */ 92 #if IS_ENABLED(CONFIG_IPV6) 93 struct geneve_dev_node hlist6; /* vni hash table for IPv6 socket */ 94 #endif 95 struct net *net; /* netns for packet i/o */ 96 struct net_device *dev; /* netdev for geneve tunnel */ 97 struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */ 98 #if IS_ENABLED(CONFIG_IPV6) 99 struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */ 100 #endif 101 struct list_head next; /* geneve's per namespace list */ 102 struct gro_cells gro_cells; 103 struct geneve_config cfg; 104 }; 105 106 struct geneve_sock { 107 bool collect_md; 108 bool gro_hint; 109 struct list_head list; 110 struct sock *sk; 111 struct rcu_head rcu; 112 int refcnt; 113 struct hlist_head vni_list[VNI_HASH_SIZE]; 114 }; 115 116 static const __be16 proto_id_map[] = { htons(ETH_P_TEB), 117 htons(ETH_P_IPV6), 118 htons(ETH_P_IP) }; 119 120 static int proto_to_id(__be16 proto) 121 { 122 int i; 123 124 for (i = 0; i < ARRAY_SIZE(proto_id_map); i++) 125 if (proto_id_map[i] == proto) 126 return i; 127 128 return -1; 129 } 130 131 static inline __u32 geneve_net_vni_hash(u8 vni[3]) 132 { 133 __u32 vnid; 134 135 vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2]; 136 return hash_32(vnid, VNI_HASH_BITS); 137 } 138 139 static __be64 vni_to_tunnel_id(const __u8 *vni) 140 { 141 #ifdef __BIG_ENDIAN 142 return (vni[0] << 16) | (vni[1] << 8) | vni[2]; 143 #else 144 return (__force __be64)(((__force u64)vni[0] << 40) | 145 ((__force u64)vni[1] << 48) | 146 ((__force u64)vni[2] << 56)); 147 #endif 148 } 149 150 /* Convert 64 bit tunnel ID to 24 bit VNI. */ 151 static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) 152 { 153 #ifdef __BIG_ENDIAN 154 vni[0] = (__force __u8)(tun_id >> 16); 155 vni[1] = (__force __u8)(tun_id >> 8); 156 vni[2] = (__force __u8)tun_id; 157 #else 158 vni[0] = (__force __u8)((__force u64)tun_id >> 40); 159 vni[1] = (__force __u8)((__force u64)tun_id >> 48); 160 vni[2] = (__force __u8)((__force u64)tun_id >> 56); 161 #endif 162 } 163 164 static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni) 165 { 166 return !memcmp(vni, &tun_id[5], 3); 167 } 168 169 static sa_family_t geneve_get_sk_family(struct geneve_sock *gs) 170 { 171 return gs->sk->sk_family; 172 } 173 174 static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, 175 __be32 addr, u8 vni[]) 176 { 177 struct hlist_head *vni_list_head; 178 struct geneve_dev_node *node; 179 __u32 hash; 180 181 /* Find the device for this VNI */ 182 hash = geneve_net_vni_hash(vni); 183 vni_list_head = &gs->vni_list[hash]; 184 hlist_for_each_entry_rcu(node, vni_list_head, hlist) { 185 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && 186 addr == node->geneve->cfg.info.key.u.ipv4.dst) 187 return node->geneve; 188 } 189 return NULL; 190 } 191 192 #if IS_ENABLED(CONFIG_IPV6) 193 static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs, 194 struct in6_addr addr6, u8 vni[]) 195 { 196 struct hlist_head *vni_list_head; 197 struct geneve_dev_node *node; 198 __u32 hash; 199 200 /* Find the device for this VNI */ 201 hash = geneve_net_vni_hash(vni); 202 vni_list_head = &gs->vni_list[hash]; 203 hlist_for_each_entry_rcu(node, vni_list_head, hlist) { 204 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && 205 ipv6_addr_equal(&addr6, &node->geneve->cfg.info.key.u.ipv6.dst)) 206 return node->geneve; 207 } 208 return NULL; 209 } 210 #endif 211 212 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) 213 { 214 return (struct genevehdr *)(udp_hdr(skb) + 1); 215 } 216 217 static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs, 218 struct sk_buff *skb) 219 { 220 static u8 zero_vni[3]; 221 u8 *vni; 222 223 if (geneve_get_sk_family(gs) == AF_INET) { 224 struct iphdr *iph; 225 __be32 addr; 226 227 iph = ip_hdr(skb); /* outer IP header... */ 228 229 if (gs->collect_md) { 230 vni = zero_vni; 231 addr = 0; 232 } else { 233 vni = geneve_hdr(skb)->vni; 234 addr = iph->saddr; 235 } 236 237 return geneve_lookup(gs, addr, vni); 238 #if IS_ENABLED(CONFIG_IPV6) 239 } else if (geneve_get_sk_family(gs) == AF_INET6) { 240 static struct in6_addr zero_addr6; 241 struct ipv6hdr *ip6h; 242 struct in6_addr addr6; 243 244 ip6h = ipv6_hdr(skb); /* outer IPv6 header... */ 245 246 if (gs->collect_md) { 247 vni = zero_vni; 248 addr6 = zero_addr6; 249 } else { 250 vni = geneve_hdr(skb)->vni; 251 addr6 = ip6h->saddr; 252 } 253 254 return geneve6_lookup(gs, addr6, vni); 255 #endif 256 } 257 return NULL; 258 } 259 260 /* geneve receive/decap routine */ 261 static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, 262 struct sk_buff *skb, const struct genevehdr *gnvh) 263 { 264 struct metadata_dst *tun_dst = NULL; 265 unsigned int len; 266 int nh, err = 0; 267 void *oiph; 268 269 if (ip_tunnel_collect_metadata() || gs->collect_md) { 270 IP_TUNNEL_DECLARE_FLAGS(flags) = { }; 271 272 __set_bit(IP_TUNNEL_KEY_BIT, flags); 273 __assign_bit(IP_TUNNEL_OAM_BIT, flags, gnvh->oam); 274 __assign_bit(IP_TUNNEL_CRIT_OPT_BIT, flags, gnvh->critical); 275 276 tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, 277 vni_to_tunnel_id(gnvh->vni), 278 gnvh->opt_len * 4); 279 if (!tun_dst) { 280 dev_dstats_rx_dropped(geneve->dev); 281 goto drop; 282 } 283 /* Update tunnel dst according to Geneve options. */ 284 ip_tunnel_flags_zero(flags); 285 __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, flags); 286 ip_tunnel_info_opts_set(&tun_dst->u.tun_info, 287 gnvh->options, gnvh->opt_len * 4, 288 flags); 289 } else { 290 /* Drop packets w/ critical options, 291 * since we don't support any... 292 */ 293 if (gnvh->critical) { 294 DEV_STATS_INC(geneve->dev, rx_frame_errors); 295 DEV_STATS_INC(geneve->dev, rx_errors); 296 goto drop; 297 } 298 } 299 300 if (tun_dst) 301 skb_dst_set(skb, &tun_dst->dst); 302 303 if (gnvh->proto_type == htons(ETH_P_TEB)) { 304 skb_reset_mac_header(skb); 305 skb->protocol = eth_type_trans(skb, geneve->dev); 306 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 307 308 /* Ignore packet loops (and multicast echo) */ 309 if (ether_addr_equal(eth_hdr(skb)->h_source, 310 geneve->dev->dev_addr)) { 311 DEV_STATS_INC(geneve->dev, rx_errors); 312 goto drop; 313 } 314 } else { 315 skb_reset_mac_header(skb); 316 skb->dev = geneve->dev; 317 skb->pkt_type = PACKET_HOST; 318 } 319 320 /* Save offset of outer header relative to skb->head, 321 * because we are going to reset the network header to the inner header 322 * and might change skb->head. 323 */ 324 nh = skb_network_header(skb) - skb->head; 325 326 skb_reset_network_header(skb); 327 328 if (!pskb_inet_may_pull(skb)) { 329 DEV_STATS_INC(geneve->dev, rx_length_errors); 330 DEV_STATS_INC(geneve->dev, rx_errors); 331 goto drop; 332 } 333 334 /* Get the outer header. */ 335 oiph = skb->head + nh; 336 337 if (geneve_get_sk_family(gs) == AF_INET) 338 err = IP_ECN_decapsulate(oiph, skb); 339 #if IS_ENABLED(CONFIG_IPV6) 340 else 341 err = IP6_ECN_decapsulate(oiph, skb); 342 #endif 343 344 if (unlikely(err)) { 345 if (log_ecn_error) { 346 if (geneve_get_sk_family(gs) == AF_INET) 347 net_info_ratelimited("non-ECT from %pI4 " 348 "with TOS=%#x\n", 349 &((struct iphdr *)oiph)->saddr, 350 ((struct iphdr *)oiph)->tos); 351 #if IS_ENABLED(CONFIG_IPV6) 352 else 353 net_info_ratelimited("non-ECT from %pI6\n", 354 &((struct ipv6hdr *)oiph)->saddr); 355 #endif 356 } 357 if (err > 1) { 358 DEV_STATS_INC(geneve->dev, rx_frame_errors); 359 DEV_STATS_INC(geneve->dev, rx_errors); 360 goto drop; 361 } 362 } 363 364 /* Skip the additional GRO stage when hints are in use. */ 365 len = skb->len; 366 if (skb->encapsulation) 367 err = netif_rx(skb); 368 else 369 err = gro_cells_receive(&geneve->gro_cells, skb); 370 if (likely(err == NET_RX_SUCCESS)) 371 dev_dstats_rx_add(geneve->dev, len); 372 373 return; 374 drop: 375 /* Consume bad packet */ 376 kfree_skb(skb); 377 } 378 379 /* Setup stats when device is created */ 380 static int geneve_init(struct net_device *dev) 381 { 382 struct geneve_dev *geneve = netdev_priv(dev); 383 int err; 384 385 err = gro_cells_init(&geneve->gro_cells, dev); 386 if (err) 387 return err; 388 389 err = dst_cache_init(&geneve->cfg.info.dst_cache, GFP_KERNEL); 390 if (err) { 391 gro_cells_destroy(&geneve->gro_cells); 392 return err; 393 } 394 netdev_lockdep_set_classes(dev); 395 return 0; 396 } 397 398 static void geneve_uninit(struct net_device *dev) 399 { 400 struct geneve_dev *geneve = netdev_priv(dev); 401 402 dst_cache_destroy(&geneve->cfg.info.dst_cache); 403 gro_cells_destroy(&geneve->gro_cells); 404 } 405 406 static int geneve_hlen(const struct genevehdr *gh) 407 { 408 return sizeof(*gh) + gh->opt_len * 4; 409 } 410 411 /* 412 * Look for GRO hint in the genenve options; if not found or does not pass basic 413 * sanitization return 0, otherwise the offset WRT the geneve hdr start. 414 */ 415 static unsigned int 416 geneve_opt_gro_hint_off(const struct genevehdr *gh, __be16 *type, 417 unsigned int *gh_len) 418 { 419 struct geneve_opt *opt = (void *)(gh + 1); 420 unsigned int id, opt_len = gh->opt_len; 421 struct geneve_opt_gro_hint *gro_hint; 422 423 while (opt_len >= (GENEVE_OPT_GRO_HINT_SIZE >> 2)) { 424 if (opt->opt_class == htons(GENEVE_OPT_NETDEV_CLASS) && 425 opt->type == GENEVE_OPT_GRO_HINT_TYPE && 426 opt->length == GENEVE_OPT_GRO_HINT_LEN) 427 goto found; 428 429 /* check for bad opt len */ 430 if (opt->length + 1 >= opt_len) 431 return 0; 432 433 /* next opt */ 434 opt_len -= opt->length + 1; 435 opt = ((void *)opt) + ((opt->length + 1) << 2); 436 } 437 return 0; 438 439 found: 440 gro_hint = (struct geneve_opt_gro_hint *)opt->opt_data; 441 442 /* 443 * Sanitize the hinted hdrs: the nested transport is UDP and must fit 444 * the overall hinted hdr size. 445 */ 446 if (gro_hint->nested_tp_offset + sizeof(struct udphdr) > 447 gro_hint->nested_hdr_len) 448 return 0; 449 450 if (gro_hint->nested_nh_offset + 451 (gro_hint->nested_is_v6 ? sizeof(struct ipv6hdr) : 452 sizeof(struct iphdr)) > 453 gro_hint->nested_tp_offset) 454 return 0; 455 456 /* Allow only supported L2. */ 457 id = gro_hint->inner_proto_id; 458 if (id >= ARRAY_SIZE(proto_id_map)) 459 return 0; 460 461 *type = proto_id_map[id]; 462 *gh_len += gro_hint->nested_hdr_len; 463 464 return (void *)gro_hint - (void *)gh; 465 } 466 467 static const struct geneve_opt_gro_hint * 468 geneve_opt_gro_hint(const struct genevehdr *gh, unsigned int hint_off) 469 { 470 return (const struct geneve_opt_gro_hint *)((void *)gh + hint_off); 471 } 472 473 static unsigned int 474 geneve_sk_gro_hint_off(const struct sock *sk, const struct genevehdr *gh, 475 __be16 *type, unsigned int *gh_len) 476 { 477 const struct geneve_sock *gs = rcu_dereference_sk_user_data(sk); 478 479 if (!gs || !gs->gro_hint) 480 return 0; 481 return geneve_opt_gro_hint_off(gh, type, gh_len); 482 } 483 484 /* Validate the packet headers pointed by data WRT the provided hint */ 485 static bool 486 geneve_opt_gro_hint_validate(void *data, 487 const struct geneve_opt_gro_hint *gro_hint) 488 { 489 void *nested_nh = data + gro_hint->nested_nh_offset; 490 struct iphdr *iph; 491 492 if (gro_hint->nested_is_v6) { 493 struct ipv6hdr *ipv6h = nested_nh; 494 struct ipv6_opt_hdr *opth; 495 int offset, len; 496 497 if (ipv6h->nexthdr == IPPROTO_UDP) 498 return true; 499 500 offset = sizeof(*ipv6h) + gro_hint->nested_nh_offset; 501 while (offset + sizeof(*opth) <= gro_hint->nested_tp_offset) { 502 opth = data + offset; 503 504 len = ipv6_optlen(opth); 505 if (len + offset > gro_hint->nested_tp_offset) 506 return false; 507 if (opth->nexthdr == IPPROTO_UDP) 508 return true; 509 510 offset += len; 511 } 512 return false; 513 } 514 515 iph = nested_nh; 516 if (*(u8 *)iph != 0x45 || ip_is_fragment(iph) || 517 iph->protocol != IPPROTO_UDP || ip_fast_csum((u8 *)iph, 5)) 518 return false; 519 520 return true; 521 } 522 523 /* 524 * Validate the skb headers following the specified geneve hdr vs the 525 * provided hint, including nested L4 checksum. 526 * The caller already ensured that the relevant amount of data is available 527 * in the linear part. 528 */ 529 static bool 530 geneve_opt_gro_hint_validate_csum(const struct sk_buff *skb, 531 const struct genevehdr *gh, 532 const struct geneve_opt_gro_hint *gro_hint) 533 { 534 unsigned int plen, gh_len = geneve_hlen(gh); 535 void *nested = (void *)gh + gh_len; 536 struct udphdr *nested_uh; 537 unsigned int nested_len; 538 struct ipv6hdr *ipv6h; 539 struct iphdr *iph; 540 __wsum csum, psum; 541 542 if (!geneve_opt_gro_hint_validate(nested, gro_hint)) 543 return false; 544 545 /* Use GRO hints with nested csum only if the outer header has csum. */ 546 nested_uh = nested + gro_hint->nested_tp_offset; 547 if (!nested_uh->check || skb->ip_summed == CHECKSUM_PARTIAL) 548 return true; 549 550 if (!NAPI_GRO_CB(skb)->csum_valid) 551 return false; 552 553 /* Compute the complete checksum up to the nested transport. */ 554 plen = gh_len + gro_hint->nested_tp_offset; 555 csum = csum_sub(NAPI_GRO_CB(skb)->csum, csum_partial(gh, plen, 0)); 556 nested_len = skb_gro_len(skb) - plen; 557 558 /* Compute the nested pseudo header csum. */ 559 ipv6h = nested + gro_hint->nested_nh_offset; 560 iph = (struct iphdr *)ipv6h; 561 psum = gro_hint->nested_is_v6 ? 562 ~csum_unfold(csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, 563 nested_len, IPPROTO_UDP, 0)) : 564 csum_tcpudp_nofold(iph->saddr, iph->daddr, 565 nested_len, IPPROTO_UDP, 0); 566 567 return !csum_fold(csum_add(psum, csum)); 568 } 569 570 static int geneve_post_decap_hint(const struct sock *sk, struct sk_buff *skb, 571 unsigned int gh_len, 572 struct genevehdr **geneveh) 573 { 574 const struct geneve_opt_gro_hint *gro_hint; 575 unsigned int len, total_len, hint_off; 576 struct ipv6hdr *ipv6h; 577 struct iphdr *iph; 578 struct udphdr *uh; 579 __be16 p; 580 581 hint_off = geneve_sk_gro_hint_off(sk, *geneveh, &p, &len); 582 if (!hint_off) 583 return 0; 584 585 if (!skb_is_gso(skb)) 586 return 0; 587 588 gro_hint = geneve_opt_gro_hint(*geneveh, hint_off); 589 if (unlikely(!pskb_may_pull(skb, gro_hint->nested_hdr_len))) 590 return -ENOMEM; 591 592 *geneveh = geneve_hdr(skb); 593 gro_hint = geneve_opt_gro_hint(*geneveh, hint_off); 594 595 /* 596 * Validate hints from untrusted source before accessing 597 * the headers; csum will be checked later by the nested 598 * protocol rx path. 599 */ 600 if (unlikely(skb_shinfo(skb)->gso_type & SKB_GSO_DODGY && 601 !geneve_opt_gro_hint_validate(skb->data, gro_hint))) 602 return -EINVAL; 603 604 ipv6h = (void *)skb->data + gro_hint->nested_nh_offset; 605 iph = (struct iphdr *)ipv6h; 606 total_len = skb->len - gro_hint->nested_nh_offset; 607 if (total_len >= GRO_LEGACY_MAX_SIZE) 608 return -E2BIG; 609 610 /* 611 * After stripping the outer encap, the packet still carries a 612 * tunnel encapsulation: the nested one. 613 */ 614 skb->encapsulation = 1; 615 616 /* GSO expect a valid transpor header, move it to the current one. */ 617 skb_set_transport_header(skb, gro_hint->nested_tp_offset); 618 619 /* Adjust the nested IP{6} hdr to actual GSO len. */ 620 if (gro_hint->nested_is_v6) { 621 ipv6h->payload_len = htons(total_len - sizeof(*ipv6h)); 622 } else { 623 __be16 old_len = iph->tot_len; 624 625 iph->tot_len = htons(total_len); 626 627 /* For IPv4 additionally adjust the nested csum. */ 628 csum_replace2(&iph->check, old_len, iph->tot_len); 629 ip_send_check(iph); 630 } 631 632 /* Adjust the nested UDP header len and checksum. */ 633 uh = udp_hdr(skb); 634 uh->len = htons(skb->len - gro_hint->nested_tp_offset); 635 if (uh->check) { 636 len = skb->len - gro_hint->nested_tp_offset; 637 skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; 638 if (gro_hint->nested_is_v6) 639 uh->check = ~udp_v6_check(len, &ipv6h->saddr, 640 &ipv6h->daddr, 0); 641 else 642 uh->check = ~udp_v4_check(len, iph->saddr, 643 iph->daddr, 0); 644 } else { 645 skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; 646 } 647 return 0; 648 } 649 650 /* Callback from net/ipv4/udp.c to receive packets */ 651 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) 652 { 653 struct genevehdr *geneveh; 654 struct geneve_dev *geneve; 655 struct geneve_sock *gs; 656 __be16 inner_proto; 657 int opts_len; 658 659 /* Need UDP and Geneve header to be present */ 660 if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) 661 goto drop; 662 663 /* Return packets with reserved bits set */ 664 geneveh = geneve_hdr(skb); 665 if (unlikely(geneveh->ver != GENEVE_VER)) 666 goto drop; 667 668 gs = rcu_dereference_sk_user_data(sk); 669 if (!gs) 670 goto drop; 671 672 geneve = geneve_lookup_skb(gs, skb); 673 if (!geneve) 674 goto drop; 675 676 inner_proto = geneveh->proto_type; 677 678 if (unlikely((!geneve->cfg.inner_proto_inherit && 679 inner_proto != htons(ETH_P_TEB)))) { 680 dev_dstats_rx_dropped(geneve->dev); 681 goto drop; 682 } 683 684 opts_len = geneveh->opt_len * 4; 685 if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, inner_proto, 686 !net_eq(geneve->net, dev_net(geneve->dev)))) { 687 dev_dstats_rx_dropped(geneve->dev); 688 goto drop; 689 } 690 691 /* 692 * After hint processing, the transport header points to the inner one 693 * and we can't use anymore on geneve_hdr(). 694 */ 695 geneveh = geneve_hdr(skb); 696 if (geneve_post_decap_hint(sk, skb, sizeof(struct genevehdr) + 697 opts_len, &geneveh)) { 698 DEV_STATS_INC(geneve->dev, rx_errors); 699 goto drop; 700 } 701 702 geneve_rx(geneve, gs, skb, geneveh); 703 return 0; 704 705 drop: 706 /* Consume bad packet */ 707 kfree_skb(skb); 708 return 0; 709 } 710 711 /* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */ 712 static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb) 713 { 714 struct genevehdr *geneveh; 715 struct geneve_sock *gs; 716 u8 zero_vni[3] = { 0 }; 717 u8 *vni = zero_vni; 718 719 if (!pskb_may_pull(skb, skb_transport_offset(skb) + GENEVE_BASE_HLEN)) 720 return -EINVAL; 721 722 geneveh = geneve_hdr(skb); 723 if (geneveh->ver != GENEVE_VER) 724 return -EINVAL; 725 726 if (geneveh->proto_type != htons(ETH_P_TEB)) 727 return -EINVAL; 728 729 gs = rcu_dereference_sk_user_data(sk); 730 if (!gs) 731 return -ENOENT; 732 733 if (geneve_get_sk_family(gs) == AF_INET) { 734 struct iphdr *iph = ip_hdr(skb); 735 __be32 addr4 = 0; 736 737 if (!gs->collect_md) { 738 vni = geneve_hdr(skb)->vni; 739 addr4 = iph->daddr; 740 } 741 742 return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT; 743 } 744 745 #if IS_ENABLED(CONFIG_IPV6) 746 if (geneve_get_sk_family(gs) == AF_INET6) { 747 struct ipv6hdr *ip6h = ipv6_hdr(skb); 748 struct in6_addr addr6; 749 750 memset(&addr6, 0, sizeof(struct in6_addr)); 751 752 if (!gs->collect_md) { 753 vni = geneve_hdr(skb)->vni; 754 addr6 = ip6h->daddr; 755 } 756 757 return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT; 758 } 759 #endif 760 761 return -EPFNOSUPPORT; 762 } 763 764 static struct sock *geneve_create_sock(struct net *net, 765 struct geneve_dev *geneve, bool ipv6) 766 { 767 struct ip_tunnel_info *info = &geneve->cfg.info; 768 struct udp_port_cfg udp_conf; 769 struct socket *sock; 770 int err; 771 772 memset(&udp_conf, 0, sizeof(udp_conf)); 773 774 #if IS_ENABLED(CONFIG_IPV6) 775 if (ipv6) { 776 udp_conf.family = AF_INET6; 777 udp_conf.ipv6_v6only = 1; 778 udp_conf.use_udp6_rx_checksums = geneve->cfg.use_udp6_rx_checksums; 779 udp_conf.local_ip6 = info->key.u.ipv6.src; 780 } else 781 #endif 782 { 783 udp_conf.family = AF_INET; 784 udp_conf.local_ip.s_addr = info->key.u.ipv4.src; 785 } 786 787 udp_conf.local_udp_port = info->key.tp_dst; 788 789 /* Open UDP socket */ 790 err = udp_sock_create(net, &udp_conf, &sock); 791 if (err < 0) 792 return ERR_PTR(err); 793 794 udp_allow_gso(sock->sk); 795 return sock->sk; 796 } 797 798 static bool geneve_hdr_match(struct sk_buff *skb, 799 const struct genevehdr *gh, 800 const struct genevehdr *gh2, 801 unsigned int hint_off) 802 { 803 const struct geneve_opt_gro_hint *gro_hint; 804 void *nested, *nested2, *nh, *nh2; 805 struct udphdr *udp, *udp2; 806 unsigned int gh_len; 807 808 /* Match the geneve hdr and options */ 809 if (gh->opt_len != gh2->opt_len) 810 return false; 811 812 gh_len = geneve_hlen(gh); 813 if (memcmp(gh, gh2, gh_len)) 814 return false; 815 816 if (!hint_off) 817 return true; 818 819 /* 820 * When gro is present consider the nested headers as part 821 * of the geneve options 822 */ 823 nested = (void *)gh + gh_len; 824 nested2 = (void *)gh2 + gh_len; 825 gro_hint = geneve_opt_gro_hint(gh, hint_off); 826 if (!memcmp(nested, nested2, gro_hint->nested_hdr_len)) 827 return true; 828 829 /* 830 * The nested headers differ; the packets can still belong to 831 * the same flow when IPs/proto/ports match; if so flushing is 832 * required. 833 */ 834 nh = nested + gro_hint->nested_nh_offset; 835 nh2 = nested2 + gro_hint->nested_nh_offset; 836 if (gro_hint->nested_is_v6) { 837 struct ipv6hdr *iph = nh, *iph2 = nh2; 838 unsigned int nested_nlen; 839 __be32 first_word; 840 841 first_word = *(__be32 *)iph ^ *(__be32 *)iph2; 842 if ((first_word & htonl(0xF00FFFFF)) || 843 !ipv6_addr_equal(&iph->saddr, &iph2->saddr) || 844 !ipv6_addr_equal(&iph->daddr, &iph2->daddr) || 845 iph->nexthdr != iph2->nexthdr) 846 return false; 847 848 nested_nlen = gro_hint->nested_tp_offset - 849 gro_hint->nested_nh_offset; 850 if (nested_nlen > sizeof(struct ipv6hdr) && 851 (memcmp(iph + 1, iph2 + 1, 852 nested_nlen - sizeof(struct ipv6hdr)))) 853 return false; 854 } else { 855 struct iphdr *iph = nh, *iph2 = nh2; 856 857 if ((iph->protocol ^ iph2->protocol) | 858 ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | 859 ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) 860 return false; 861 } 862 863 udp = nested + gro_hint->nested_tp_offset; 864 udp2 = nested2 + gro_hint->nested_tp_offset; 865 if (udp->source != udp2->source || udp->dest != udp2->dest || 866 udp->check != udp2->check) 867 return false; 868 869 NAPI_GRO_CB(skb)->flush = 1; 870 return true; 871 } 872 873 static struct sk_buff *geneve_gro_receive(struct sock *sk, 874 struct list_head *head, 875 struct sk_buff *skb) 876 { 877 unsigned int hlen, gh_len, off_gnv, hint_off; 878 const struct geneve_opt_gro_hint *gro_hint; 879 const struct packet_offload *ptype; 880 struct genevehdr *gh, *gh2; 881 struct sk_buff *pp = NULL; 882 struct sk_buff *p; 883 int flush = 1; 884 __be16 type; 885 886 off_gnv = skb_gro_offset(skb); 887 hlen = off_gnv + sizeof(*gh); 888 gh = skb_gro_header(skb, hlen, off_gnv); 889 if (unlikely(!gh)) 890 goto out; 891 892 if (gh->ver != GENEVE_VER || gh->oam) 893 goto out; 894 gh_len = geneve_hlen(gh); 895 type = gh->proto_type; 896 897 hlen = off_gnv + gh_len; 898 if (!skb_gro_may_pull(skb, hlen)) { 899 gh = skb_gro_header_slow(skb, hlen, off_gnv); 900 if (unlikely(!gh)) 901 goto out; 902 } 903 904 /* The GRO hint/nested hdr could use a different ethernet type. */ 905 hint_off = geneve_sk_gro_hint_off(sk, gh, &type, &gh_len); 906 if (hint_off) { 907 908 /* 909 * If the hint is present, and nested hdr validation fails, do 910 * not attempt plain GRO: it will ignore inner hdrs and cause 911 * OoO. 912 */ 913 gh = skb_gro_header(skb, off_gnv + gh_len, off_gnv); 914 if (unlikely(!gh)) 915 goto out; 916 917 gro_hint = geneve_opt_gro_hint(gh, hint_off); 918 if (!geneve_opt_gro_hint_validate_csum(skb, gh, gro_hint)) 919 goto out; 920 } 921 922 list_for_each_entry(p, head, list) { 923 if (!NAPI_GRO_CB(p)->same_flow) 924 continue; 925 926 gh2 = (struct genevehdr *)(p->data + off_gnv); 927 if (!geneve_hdr_match(skb, gh, gh2, hint_off)) { 928 NAPI_GRO_CB(p)->same_flow = 0; 929 continue; 930 } 931 } 932 933 skb_gro_pull(skb, gh_len); 934 skb_gro_postpull_rcsum(skb, gh, gh_len); 935 if (likely(type == htons(ETH_P_TEB))) 936 return call_gro_receive(eth_gro_receive, head, skb); 937 938 ptype = gro_find_receive_by_type(type); 939 if (!ptype) 940 goto out; 941 942 pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); 943 flush = 0; 944 945 out: 946 skb_gro_flush_final(skb, pp, flush); 947 948 return pp; 949 } 950 951 static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb, 952 int nhoff) 953 { 954 struct genevehdr *gh; 955 struct packet_offload *ptype; 956 __be16 type; 957 unsigned int gh_len; 958 int err = -ENOSYS; 959 960 gh = (struct genevehdr *)(skb->data + nhoff); 961 gh_len = geneve_hlen(gh); 962 type = gh->proto_type; 963 geneve_sk_gro_hint_off(sk, gh, &type, &gh_len); 964 965 /* Bail out if we are about to dispatch past the inner network header 966 * gro_receive() validated. An inner VLAN tag only pushes 967 * inner_network_offset out, so use a lower bound. 968 */ 969 if (skb->encapsulation) { 970 unsigned int inner_nh = nhoff + gh_len; 971 972 if (type == htons(ETH_P_TEB)) 973 inner_nh += ETH_HLEN; 974 975 if (unlikely(inner_nh > NAPI_GRO_CB(skb)->inner_network_offset)) 976 return -EINVAL; 977 } 978 979 /* since skb->encapsulation is set, eth_gro_complete() sets the inner mac header */ 980 if (likely(type == htons(ETH_P_TEB))) 981 return eth_gro_complete(skb, nhoff + gh_len); 982 983 ptype = gro_find_complete_by_type(type); 984 if (ptype) 985 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); 986 987 skb_set_inner_mac_header(skb, nhoff + gh_len); 988 989 return err; 990 } 991 992 /* Create new listen socket if needed */ 993 static struct geneve_sock *geneve_socket_create(struct net *net, 994 struct geneve_dev *geneve, bool ipv6) 995 { 996 struct geneve_net *gn = net_generic(net, geneve_net_id); 997 struct udp_tunnel_sock_cfg tunnel_cfg; 998 struct geneve_sock *gs; 999 struct sock *sk; 1000 int h; 1001 1002 gs = kzalloc_obj(*gs); 1003 if (!gs) 1004 return ERR_PTR(-ENOMEM); 1005 1006 sk = geneve_create_sock(net, geneve, ipv6); 1007 if (IS_ERR(sk)) { 1008 kfree(gs); 1009 return ERR_CAST(sk); 1010 } 1011 1012 gs->sk = sk; 1013 gs->refcnt = 1; 1014 for (h = 0; h < VNI_HASH_SIZE; ++h) 1015 INIT_HLIST_HEAD(&gs->vni_list[h]); 1016 1017 /* Initialize the geneve udp offloads structure */ 1018 udp_tunnel_notify_add_rx_port(sk, UDP_TUNNEL_TYPE_GENEVE); 1019 1020 /* Mark socket as an encapsulation socket */ 1021 memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); 1022 tunnel_cfg.sk_user_data = gs; 1023 tunnel_cfg.encap_type = 1; 1024 tunnel_cfg.gro_receive = geneve_gro_receive; 1025 tunnel_cfg.gro_complete = geneve_gro_complete; 1026 tunnel_cfg.encap_rcv = geneve_udp_encap_recv; 1027 tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup; 1028 tunnel_cfg.encap_destroy = NULL; 1029 setup_udp_tunnel_sock(net, sk, &tunnel_cfg); 1030 list_add(&gs->list, &gn->sock_list); 1031 return gs; 1032 } 1033 1034 static void __geneve_sock_release(struct geneve_sock *gs) 1035 { 1036 if (!gs || --gs->refcnt) 1037 return; 1038 1039 list_del(&gs->list); 1040 udp_tunnel_notify_del_rx_port(gs->sk, UDP_TUNNEL_TYPE_GENEVE); 1041 udp_tunnel_sock_release(gs->sk); 1042 kfree_rcu(gs, rcu); 1043 } 1044 1045 static void geneve_sock_release(struct geneve_dev *geneve) 1046 { 1047 struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4); 1048 #if IS_ENABLED(CONFIG_IPV6) 1049 struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6); 1050 1051 rcu_assign_pointer(geneve->sock6, NULL); 1052 #endif 1053 1054 rcu_assign_pointer(geneve->sock4, NULL); 1055 1056 __geneve_sock_release(gs4); 1057 #if IS_ENABLED(CONFIG_IPV6) 1058 __geneve_sock_release(gs6); 1059 #endif 1060 } 1061 1062 static struct geneve_sock *geneve_find_sock(struct net *net, 1063 struct geneve_dev *geneve, bool ipv6) 1064 { 1065 struct geneve_net *gn = net_generic(net, geneve_net_id); 1066 struct ip_tunnel_info *info = &geneve->cfg.info; 1067 sa_family_t family = ipv6 ? AF_INET6 : AF_INET; 1068 bool gro_hint = geneve->cfg.gro_hint; 1069 __be16 dst_port = info->key.tp_dst; 1070 struct geneve_sock *gs; 1071 1072 list_for_each_entry(gs, &gn->sock_list, list) { 1073 if (inet_sk(gs->sk)->inet_sport != dst_port) 1074 continue; 1075 1076 if (geneve_get_sk_family(gs) != family) 1077 continue; 1078 1079 if (gs->gro_hint != gro_hint) 1080 continue; 1081 1082 if (family == AF_INET && 1083 inet_sk(gs->sk)->inet_saddr != info->key.u.ipv4.src) 1084 continue; 1085 1086 #if IS_ENABLED(CONFIG_IPV6) 1087 if (family == AF_INET6 && 1088 !ipv6_addr_equal(&gs->sk->sk_v6_rcv_saddr, &info->key.u.ipv6.src)) 1089 continue; 1090 #endif 1091 1092 return gs; 1093 } 1094 1095 return NULL; 1096 } 1097 1098 static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6) 1099 { 1100 struct net *net = geneve->net; 1101 struct geneve_dev_node *node; 1102 struct geneve_sock *gs; 1103 __u8 vni[3]; 1104 __u32 hash; 1105 1106 gs = geneve_find_sock(net, geneve, ipv6); 1107 if (gs) { 1108 gs->refcnt++; 1109 goto out; 1110 } 1111 1112 gs = geneve_socket_create(net, geneve, ipv6); 1113 if (IS_ERR(gs)) 1114 return PTR_ERR(gs); 1115 1116 out: 1117 gs->collect_md = geneve->cfg.collect_md; 1118 gs->gro_hint = geneve->cfg.gro_hint; 1119 #if IS_ENABLED(CONFIG_IPV6) 1120 if (ipv6) { 1121 rcu_assign_pointer(geneve->sock6, gs); 1122 node = &geneve->hlist6; 1123 } else 1124 #endif 1125 { 1126 rcu_assign_pointer(geneve->sock4, gs); 1127 node = &geneve->hlist4; 1128 } 1129 node->geneve = geneve; 1130 1131 tunnel_id_to_vni(geneve->cfg.info.key.tun_id, vni); 1132 hash = geneve_net_vni_hash(vni); 1133 hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]); 1134 return 0; 1135 } 1136 1137 static int geneve_open(struct net_device *dev) 1138 { 1139 struct geneve_dev *geneve = netdev_priv(dev); 1140 bool dualstack = geneve->cfg.dualstack; 1141 bool ipv4, ipv6; 1142 int ret = 0; 1143 1144 ipv6 = geneve->cfg.info.mode & IP_TUNNEL_INFO_IPV6 || dualstack; 1145 ipv4 = !ipv6 || dualstack; 1146 #if IS_ENABLED(CONFIG_IPV6) 1147 if (ipv6) { 1148 ret = geneve_sock_add(geneve, true); 1149 if (ret < 0 && ret != -EAFNOSUPPORT) 1150 ipv4 = false; 1151 } 1152 #endif 1153 if (ipv4) 1154 ret = geneve_sock_add(geneve, false); 1155 if (ret < 0) 1156 geneve_sock_release(geneve); 1157 1158 return ret; 1159 } 1160 1161 static int geneve_stop(struct net_device *dev) 1162 { 1163 struct geneve_dev *geneve = netdev_priv(dev); 1164 1165 hlist_del_init_rcu(&geneve->hlist4.hlist); 1166 #if IS_ENABLED(CONFIG_IPV6) 1167 hlist_del_init_rcu(&geneve->hlist6.hlist); 1168 #endif 1169 geneve_sock_release(geneve); 1170 return 0; 1171 } 1172 1173 static void geneve_build_header(struct genevehdr *geneveh, 1174 const struct ip_tunnel_info *info, 1175 __be16 inner_proto) 1176 { 1177 geneveh->ver = GENEVE_VER; 1178 geneveh->opt_len = info->options_len / 4; 1179 geneveh->oam = test_bit(IP_TUNNEL_OAM_BIT, info->key.tun_flags); 1180 geneveh->critical = test_bit(IP_TUNNEL_CRIT_OPT_BIT, 1181 info->key.tun_flags); 1182 geneveh->rsvd1 = 0; 1183 tunnel_id_to_vni(info->key.tun_id, geneveh->vni); 1184 geneveh->proto_type = inner_proto; 1185 geneveh->rsvd2 = 0; 1186 1187 if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) 1188 ip_tunnel_info_opts_get(geneveh->options, info); 1189 } 1190 1191 static int geneve_build_gro_hint_opt(const struct geneve_dev *geneve, 1192 struct sk_buff *skb) 1193 { 1194 struct geneve_skb_cb *cb = GENEVE_SKB_CB(skb); 1195 struct geneve_opt_gro_hint *hint; 1196 unsigned int nhlen; 1197 bool nested_is_v6; 1198 int id; 1199 1200 BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct geneve_skb_cb)); 1201 cb->gro_hint_len = 0; 1202 1203 /* Try to add the GRO hint only in case of double encap. */ 1204 if (!geneve->cfg.gro_hint || !skb->encapsulation) 1205 return 0; 1206 1207 /* 1208 * The nested headers must fit the geneve opt len fields and the 1209 * nested encap must carry a nested transport (UDP) header. 1210 */ 1211 nhlen = skb_inner_mac_header(skb) - skb->data; 1212 if (nhlen > 255 || !skb_transport_header_was_set(skb) || 1213 skb->inner_protocol_type != ENCAP_TYPE_ETHER || 1214 (skb_transport_offset(skb) + sizeof(struct udphdr) > nhlen)) 1215 return 0; 1216 1217 id = proto_to_id(skb->inner_protocol); 1218 if (id < 0) 1219 return 0; 1220 1221 nested_is_v6 = skb->protocol == htons(ETH_P_IPV6); 1222 if (nested_is_v6) { 1223 int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); 1224 u8 proto = ipv6_hdr(skb)->nexthdr; 1225 __be16 foff; 1226 1227 if (ipv6_skip_exthdr(skb, start, &proto, &foff) < 0 || 1228 proto != IPPROTO_UDP) 1229 return 0; 1230 } else { 1231 if (ip_hdr(skb)->protocol != IPPROTO_UDP) 1232 return 0; 1233 } 1234 1235 hint = &cb->gro_hint; 1236 memset(hint, 0, sizeof(*hint)); 1237 hint->inner_proto_id = id; 1238 hint->nested_is_v6 = skb->protocol == htons(ETH_P_IPV6); 1239 hint->nested_nh_offset = skb_network_offset(skb); 1240 hint->nested_tp_offset = skb_transport_offset(skb); 1241 hint->nested_hdr_len = nhlen; 1242 cb->gro_hint_len = GENEVE_OPT_GRO_HINT_SIZE; 1243 return GENEVE_OPT_GRO_HINT_SIZE; 1244 } 1245 1246 static void geneve_put_gro_hint_opt(struct genevehdr *gnvh, int opt_size, 1247 const struct geneve_opt_gro_hint *hint) 1248 { 1249 struct geneve_opt *gro_opt; 1250 1251 /* geneve_build_header() did not took in account the GRO hint. */ 1252 gnvh->opt_len = (opt_size + GENEVE_OPT_GRO_HINT_SIZE) >> 2; 1253 1254 gro_opt = (void *)(gnvh + 1) + opt_size; 1255 memset(gro_opt, 0, sizeof(*gro_opt)); 1256 1257 gro_opt->opt_class = htons(GENEVE_OPT_NETDEV_CLASS); 1258 gro_opt->type = GENEVE_OPT_GRO_HINT_TYPE; 1259 gro_opt->length = GENEVE_OPT_GRO_HINT_LEN; 1260 memcpy(gro_opt + 1, hint, sizeof(*hint)); 1261 } 1262 1263 static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb, 1264 const struct ip_tunnel_info *info, 1265 const struct geneve_dev *geneve, int ip_hdr_len) 1266 { 1267 bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 1268 bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; 1269 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); 1270 struct geneve_skb_cb *cb = GENEVE_SKB_CB(skb); 1271 struct genevehdr *gnvh; 1272 __be16 inner_proto; 1273 bool double_encap; 1274 int min_headroom; 1275 int opt_size; 1276 int err; 1277 1278 skb_reset_mac_header(skb); 1279 skb_scrub_packet(skb, xnet); 1280 1281 opt_size = info->options_len + cb->gro_hint_len; 1282 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + 1283 GENEVE_BASE_HLEN + opt_size + ip_hdr_len; 1284 err = skb_cow_head(skb, min_headroom); 1285 if (unlikely(err)) 1286 goto free_dst; 1287 1288 double_encap = udp_tunnel_handle_partial(skb); 1289 err = udp_tunnel_handle_offloads(skb, udp_sum); 1290 if (err) 1291 goto free_dst; 1292 1293 gnvh = __skb_push(skb, sizeof(*gnvh) + opt_size); 1294 inner_proto = inner_proto_inherit ? skb->protocol : htons(ETH_P_TEB); 1295 geneve_build_header(gnvh, info, inner_proto); 1296 1297 if (cb->gro_hint_len) 1298 geneve_put_gro_hint_opt(gnvh, info->options_len, &cb->gro_hint); 1299 1300 udp_tunnel_set_inner_protocol(skb, double_encap, inner_proto); 1301 return 0; 1302 1303 free_dst: 1304 dst_release(dst); 1305 return err; 1306 } 1307 1308 static u8 geneve_get_dsfield(struct sk_buff *skb, struct net_device *dev, 1309 const struct ip_tunnel_info *info, 1310 bool *use_cache) 1311 { 1312 struct geneve_dev *geneve = netdev_priv(dev); 1313 u8 dsfield; 1314 1315 dsfield = info->key.tos; 1316 if (dsfield == 1 && !geneve->cfg.collect_md) { 1317 dsfield = ip_tunnel_get_dsfield(ip_hdr(skb), skb); 1318 *use_cache = false; 1319 } 1320 1321 return dsfield; 1322 } 1323 1324 static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, 1325 struct geneve_dev *geneve, 1326 const struct ip_tunnel_info *info) 1327 { 1328 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); 1329 const struct ip_tunnel_key *key = &info->key; 1330 struct rtable *rt; 1331 bool use_cache; 1332 __u8 tos, ttl; 1333 __be16 df = 0; 1334 __be32 saddr; 1335 __be16 sport; 1336 int err; 1337 1338 if (skb_vlan_inet_prepare(skb, geneve->cfg.inner_proto_inherit)) 1339 return -EINVAL; 1340 1341 if (!gs4) 1342 return -EIO; 1343 1344 use_cache = ip_tunnel_dst_cache_usable(skb, info); 1345 tos = geneve_get_dsfield(skb, dev, info, &use_cache); 1346 sport = udp_flow_src_port(geneve->net, skb, 1347 geneve->cfg.port_min, 1348 geneve->cfg.port_max, true); 1349 1350 rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr, 1351 &info->key, 1352 sport, geneve->cfg.info.key.tp_dst, tos, 1353 use_cache ? 1354 (struct dst_cache *)&info->dst_cache : NULL); 1355 if (IS_ERR(rt)) 1356 return PTR_ERR(rt); 1357 1358 if (geneve->cfg.info.key.u.ipv4.src && 1359 saddr != geneve->cfg.info.key.u.ipv4.src) { 1360 dst_release(&rt->dst); 1361 return -EADDRNOTAVAIL; 1362 } 1363 1364 err = skb_tunnel_check_pmtu(skb, &rt->dst, 1365 GENEVE_IPV4_HLEN + info->options_len + 1366 geneve_build_gro_hint_opt(geneve, skb), 1367 netif_is_any_bridge_port(dev)); 1368 if (err < 0) { 1369 dst_release(&rt->dst); 1370 return err; 1371 } else if (err) { 1372 struct ip_tunnel_info *info; 1373 1374 info = skb_tunnel_info(skb); 1375 if (info) { 1376 struct ip_tunnel_info *unclone; 1377 1378 unclone = skb_tunnel_info_unclone(skb); 1379 if (unlikely(!unclone)) { 1380 dst_release(&rt->dst); 1381 return -ENOMEM; 1382 } 1383 1384 unclone->key.u.ipv4.dst = saddr; 1385 unclone->key.u.ipv4.src = info->key.u.ipv4.dst; 1386 } 1387 1388 if (!pskb_may_pull(skb, ETH_HLEN)) { 1389 dst_release(&rt->dst); 1390 return -EINVAL; 1391 } 1392 1393 skb->protocol = eth_type_trans(skb, geneve->dev); 1394 __netif_rx(skb); 1395 dst_release(&rt->dst); 1396 return -EMSGSIZE; 1397 } 1398 1399 tos = ip_tunnel_ecn_encap(tos, ip_hdr(skb), skb); 1400 if (geneve->cfg.collect_md) { 1401 ttl = key->ttl; 1402 1403 df = test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) ? 1404 htons(IP_DF) : 0; 1405 } else { 1406 if (geneve->cfg.ttl_inherit) 1407 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); 1408 else 1409 ttl = key->ttl; 1410 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); 1411 1412 if (geneve->cfg.df == GENEVE_DF_SET) { 1413 df = htons(IP_DF); 1414 } else if (geneve->cfg.df == GENEVE_DF_INHERIT) { 1415 struct ethhdr *eth = skb_eth_hdr(skb); 1416 1417 if (ntohs(eth->h_proto) == ETH_P_IPV6) { 1418 df = htons(IP_DF); 1419 } else if (ntohs(eth->h_proto) == ETH_P_IP) { 1420 struct iphdr *iph = ip_hdr(skb); 1421 1422 if (iph->frag_off & htons(IP_DF)) 1423 df = htons(IP_DF); 1424 } 1425 } 1426 } 1427 1428 err = geneve_build_skb(&rt->dst, skb, info, geneve, 1429 sizeof(struct iphdr)); 1430 if (unlikely(err)) 1431 return err; 1432 1433 udp_tunnel_xmit_skb(rt, gs4->sk, skb, saddr, info->key.u.ipv4.dst, 1434 tos, ttl, df, sport, geneve->cfg.info.key.tp_dst, 1435 !net_eq(geneve->net, dev_net(geneve->dev)), 1436 !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags), 1437 0); 1438 return 0; 1439 } 1440 1441 #if IS_ENABLED(CONFIG_IPV6) 1442 static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, 1443 struct geneve_dev *geneve, 1444 const struct ip_tunnel_info *info) 1445 { 1446 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); 1447 const struct ip_tunnel_key *key = &info->key; 1448 struct dst_entry *dst = NULL; 1449 struct in6_addr saddr; 1450 bool use_cache; 1451 __u8 prio, ttl; 1452 __be16 sport; 1453 int err; 1454 1455 if (skb_vlan_inet_prepare(skb, geneve->cfg.inner_proto_inherit)) 1456 return -EINVAL; 1457 1458 if (!gs6) 1459 return -EIO; 1460 1461 use_cache = ip_tunnel_dst_cache_usable(skb, info); 1462 prio = geneve_get_dsfield(skb, dev, info, &use_cache); 1463 sport = udp_flow_src_port(geneve->net, skb, 1464 geneve->cfg.port_min, 1465 geneve->cfg.port_max, true); 1466 1467 dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sk, 0, 1468 &saddr, key, sport, 1469 geneve->cfg.info.key.tp_dst, prio, 1470 use_cache ? 1471 (struct dst_cache *)&info->dst_cache : NULL); 1472 if (IS_ERR(dst)) 1473 return PTR_ERR(dst); 1474 1475 if (!ipv6_addr_any(&geneve->cfg.info.key.u.ipv6.src) && 1476 !ipv6_addr_equal(&saddr, &geneve->cfg.info.key.u.ipv6.src)) { 1477 dst_release(dst); 1478 return -EADDRNOTAVAIL; 1479 } 1480 1481 err = skb_tunnel_check_pmtu(skb, dst, 1482 GENEVE_IPV6_HLEN + info->options_len + 1483 geneve_build_gro_hint_opt(geneve, skb), 1484 netif_is_any_bridge_port(dev)); 1485 if (err < 0) { 1486 dst_release(dst); 1487 return err; 1488 } else if (err) { 1489 struct ip_tunnel_info *info = skb_tunnel_info(skb); 1490 1491 if (info) { 1492 struct ip_tunnel_info *unclone; 1493 1494 unclone = skb_tunnel_info_unclone(skb); 1495 if (unlikely(!unclone)) { 1496 dst_release(dst); 1497 return -ENOMEM; 1498 } 1499 1500 unclone->key.u.ipv6.dst = saddr; 1501 unclone->key.u.ipv6.src = info->key.u.ipv6.dst; 1502 } 1503 1504 if (!pskb_may_pull(skb, ETH_HLEN)) { 1505 dst_release(dst); 1506 return -EINVAL; 1507 } 1508 1509 skb->protocol = eth_type_trans(skb, geneve->dev); 1510 __netif_rx(skb); 1511 dst_release(dst); 1512 return -EMSGSIZE; 1513 } 1514 1515 prio = ip_tunnel_ecn_encap(prio, ip_hdr(skb), skb); 1516 if (geneve->cfg.collect_md) { 1517 ttl = key->ttl; 1518 } else { 1519 if (geneve->cfg.ttl_inherit) 1520 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); 1521 else 1522 ttl = key->ttl; 1523 ttl = ttl ? : ip6_dst_hoplimit(dst); 1524 } 1525 err = geneve_build_skb(dst, skb, info, geneve, sizeof(struct ipv6hdr)); 1526 if (unlikely(err)) 1527 return err; 1528 1529 udp_tunnel6_xmit_skb(dst, gs6->sk, skb, dev, 1530 &saddr, &key->u.ipv6.dst, prio, ttl, 1531 info->key.label, sport, geneve->cfg.info.key.tp_dst, 1532 !test_bit(IP_TUNNEL_CSUM_BIT, 1533 info->key.tun_flags), 1534 0); 1535 return 0; 1536 } 1537 #endif 1538 1539 static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) 1540 { 1541 struct geneve_dev *geneve = netdev_priv(dev); 1542 struct ip_tunnel_info *info = NULL; 1543 int err; 1544 1545 if (geneve->cfg.collect_md) { 1546 info = skb_tunnel_info(skb); 1547 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { 1548 netdev_dbg(dev, "no tunnel metadata\n"); 1549 dev_kfree_skb(skb); 1550 dev_dstats_tx_dropped(dev); 1551 return NETDEV_TX_OK; 1552 } 1553 } else { 1554 info = &geneve->cfg.info; 1555 } 1556 1557 rcu_read_lock(); 1558 #if IS_ENABLED(CONFIG_IPV6) 1559 if (info->mode & IP_TUNNEL_INFO_IPV6) 1560 err = geneve6_xmit_skb(skb, dev, geneve, info); 1561 else 1562 #endif 1563 err = geneve_xmit_skb(skb, dev, geneve, info); 1564 rcu_read_unlock(); 1565 1566 if (likely(!err)) 1567 return NETDEV_TX_OK; 1568 1569 if (err != -EMSGSIZE) 1570 dev_kfree_skb(skb); 1571 1572 if (err == -ELOOP) 1573 DEV_STATS_INC(dev, collisions); 1574 else if (err == -ENETUNREACH) 1575 DEV_STATS_INC(dev, tx_carrier_errors); 1576 1577 DEV_STATS_INC(dev, tx_errors); 1578 return NETDEV_TX_OK; 1579 } 1580 1581 static int geneve_change_mtu(struct net_device *dev, int new_mtu) 1582 { 1583 if (new_mtu > dev->max_mtu) 1584 new_mtu = dev->max_mtu; 1585 else if (new_mtu < dev->min_mtu) 1586 new_mtu = dev->min_mtu; 1587 1588 WRITE_ONCE(dev->mtu, new_mtu); 1589 return 0; 1590 } 1591 1592 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) 1593 { 1594 struct ip_tunnel_info *info = skb_tunnel_info(skb); 1595 struct geneve_dev *geneve = netdev_priv(dev); 1596 __be16 sport; 1597 1598 if (ip_tunnel_info_af(info) == AF_INET) { 1599 struct rtable *rt; 1600 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); 1601 bool use_cache; 1602 __be32 saddr; 1603 u8 tos; 1604 1605 if (!gs4) 1606 return -EIO; 1607 1608 use_cache = ip_tunnel_dst_cache_usable(skb, info); 1609 tos = geneve_get_dsfield(skb, dev, info, &use_cache); 1610 sport = udp_flow_src_port(geneve->net, skb, 1611 geneve->cfg.port_min, 1612 geneve->cfg.port_max, true); 1613 1614 rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr, 1615 &info->key, 1616 sport, geneve->cfg.info.key.tp_dst, 1617 tos, 1618 use_cache ? &info->dst_cache : NULL); 1619 if (IS_ERR(rt)) 1620 return PTR_ERR(rt); 1621 1622 ip_rt_put(rt); 1623 info->key.u.ipv4.src = saddr; 1624 #if IS_ENABLED(CONFIG_IPV6) 1625 } else if (ip_tunnel_info_af(info) == AF_INET6) { 1626 struct dst_entry *dst; 1627 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); 1628 struct in6_addr saddr; 1629 bool use_cache; 1630 u8 prio; 1631 1632 if (!gs6) 1633 return -EIO; 1634 1635 use_cache = ip_tunnel_dst_cache_usable(skb, info); 1636 prio = geneve_get_dsfield(skb, dev, info, &use_cache); 1637 sport = udp_flow_src_port(geneve->net, skb, 1638 geneve->cfg.port_min, 1639 geneve->cfg.port_max, true); 1640 1641 dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sk, 0, 1642 &saddr, &info->key, sport, 1643 geneve->cfg.info.key.tp_dst, prio, 1644 use_cache ? &info->dst_cache : NULL); 1645 if (IS_ERR(dst)) 1646 return PTR_ERR(dst); 1647 1648 dst_release(dst); 1649 info->key.u.ipv6.src = saddr; 1650 #endif 1651 } else { 1652 return -EINVAL; 1653 } 1654 1655 info->key.tp_src = sport; 1656 info->key.tp_dst = geneve->cfg.info.key.tp_dst; 1657 return 0; 1658 } 1659 1660 static const struct net_device_ops geneve_netdev_ops = { 1661 .ndo_init = geneve_init, 1662 .ndo_uninit = geneve_uninit, 1663 .ndo_open = geneve_open, 1664 .ndo_stop = geneve_stop, 1665 .ndo_start_xmit = geneve_xmit, 1666 .ndo_change_mtu = geneve_change_mtu, 1667 .ndo_validate_addr = eth_validate_addr, 1668 .ndo_set_mac_address = eth_mac_addr, 1669 .ndo_fill_metadata_dst = geneve_fill_metadata_dst, 1670 }; 1671 1672 static void geneve_get_drvinfo(struct net_device *dev, 1673 struct ethtool_drvinfo *drvinfo) 1674 { 1675 strscpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version)); 1676 strscpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver)); 1677 } 1678 1679 static const struct ethtool_ops geneve_ethtool_ops = { 1680 .get_drvinfo = geneve_get_drvinfo, 1681 .get_link = ethtool_op_get_link, 1682 }; 1683 1684 /* Info for udev, that this is a virtual tunnel endpoint */ 1685 static const struct device_type geneve_type = { 1686 .name = "geneve", 1687 }; 1688 1689 /* Calls the ndo_udp_tunnel_add of the caller in order to 1690 * supply the listening GENEVE udp ports. Callers are expected 1691 * to implement the ndo_udp_tunnel_add. 1692 */ 1693 static void geneve_offload_rx_ports(struct net_device *dev, bool push) 1694 { 1695 struct net *net = dev_net(dev); 1696 struct geneve_net *gn = net_generic(net, geneve_net_id); 1697 struct geneve_sock *gs; 1698 1699 ASSERT_RTNL(); 1700 1701 list_for_each_entry(gs, &gn->sock_list, list) { 1702 if (push) { 1703 udp_tunnel_push_rx_port(dev, gs->sk, 1704 UDP_TUNNEL_TYPE_GENEVE); 1705 } else { 1706 udp_tunnel_drop_rx_port(dev, gs->sk, 1707 UDP_TUNNEL_TYPE_GENEVE); 1708 } 1709 } 1710 } 1711 1712 /* Initialize the device structure. */ 1713 static void geneve_setup(struct net_device *dev) 1714 { 1715 ether_setup(dev); 1716 1717 dev->netdev_ops = &geneve_netdev_ops; 1718 dev->ethtool_ops = &geneve_ethtool_ops; 1719 dev->needs_free_netdev = true; 1720 1721 SET_NETDEV_DEVTYPE(dev, &geneve_type); 1722 1723 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; 1724 dev->features |= NETIF_F_RXCSUM; 1725 dev->features |= NETIF_F_GSO_SOFTWARE; 1726 1727 /* Partial features are disabled by default. */ 1728 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; 1729 dev->hw_features |= NETIF_F_RXCSUM; 1730 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 1731 dev->hw_features |= UDP_TUNNEL_PARTIAL_FEATURES; 1732 dev->hw_features |= NETIF_F_GSO_PARTIAL; 1733 1734 dev->hw_enc_features = dev->hw_features; 1735 dev->gso_partial_features = UDP_TUNNEL_PARTIAL_FEATURES; 1736 dev->mangleid_features = NETIF_F_GSO_PARTIAL; 1737 1738 dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; 1739 /* MTU range: 68 - (something less than 65535) */ 1740 dev->min_mtu = ETH_MIN_MTU; 1741 /* The max_mtu calculation does not take account of GENEVE 1742 * options, to avoid excluding potentially valid 1743 * configurations. This will be further reduced by IPvX hdr size. 1744 */ 1745 dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len; 1746 1747 netif_keep_dst(dev); 1748 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1749 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; 1750 dev->lltx = true; 1751 eth_hw_addr_random(dev); 1752 } 1753 1754 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { 1755 [IFLA_GENEVE_UNSPEC] = { .strict_start_type = IFLA_GENEVE_INNER_PROTO_INHERIT }, 1756 [IFLA_GENEVE_ID] = { .type = NLA_U32 }, 1757 [IFLA_GENEVE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) }, 1758 [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) }, 1759 [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, 1760 [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, 1761 [IFLA_GENEVE_LABEL] = { .type = NLA_U32 }, 1762 [IFLA_GENEVE_PORT] = { .type = NLA_U16 }, 1763 [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG }, 1764 [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 }, 1765 [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 }, 1766 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, 1767 [IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 }, 1768 [IFLA_GENEVE_DF] = { .type = NLA_U8 }, 1769 [IFLA_GENEVE_INNER_PROTO_INHERIT] = { .type = NLA_FLAG }, 1770 [IFLA_GENEVE_PORT_RANGE] = NLA_POLICY_EXACT_LEN(sizeof(struct ifla_geneve_port_range)), 1771 [IFLA_GENEVE_GRO_HINT] = { .type = NLA_FLAG }, 1772 [IFLA_GENEVE_LOCAL] = { .type = NLA_BE32 }, 1773 [IFLA_GENEVE_LOCAL6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), 1774 }; 1775 1776 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[], 1777 struct netlink_ext_ack *extack) 1778 { 1779 if (tb[IFLA_ADDRESS]) { 1780 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { 1781 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], 1782 "Provided link layer address is not Ethernet"); 1783 return -EINVAL; 1784 } 1785 1786 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { 1787 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], 1788 "Provided Ethernet address is not unicast"); 1789 return -EADDRNOTAVAIL; 1790 } 1791 } 1792 1793 if (!data) { 1794 NL_SET_ERR_MSG(extack, 1795 "Not enough attributes provided to perform the operation"); 1796 return -EINVAL; 1797 } 1798 1799 if (data[IFLA_GENEVE_ID]) { 1800 __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 1801 1802 if (vni >= GENEVE_N_VID) { 1803 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_ID], 1804 "Geneve ID must be lower than 16777216"); 1805 return -ERANGE; 1806 } 1807 } 1808 1809 if (data[IFLA_GENEVE_DF]) { 1810 enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]); 1811 1812 if (df < 0 || df > GENEVE_DF_MAX) { 1813 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_DF], 1814 "Invalid DF attribute"); 1815 return -EINVAL; 1816 } 1817 } 1818 1819 if (data[IFLA_GENEVE_PORT_RANGE]) { 1820 const struct ifla_geneve_port_range *p; 1821 1822 p = nla_data(data[IFLA_GENEVE_PORT_RANGE]); 1823 if (ntohs(p->high) < ntohs(p->low)) { 1824 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_PORT_RANGE], 1825 "Invalid source port range"); 1826 return -EINVAL; 1827 } 1828 } 1829 1830 return 0; 1831 } 1832 1833 static bool geneve_saddr_wildcard(const struct ip_tunnel_info *info) 1834 { 1835 if (ip_tunnel_info_af(info) == AF_INET) { 1836 if (!info->key.u.ipv4.src) 1837 return true; 1838 #if IS_ENABLED(CONFIG_IPV6) 1839 } else { 1840 if (ipv6_addr_any(&info->key.u.ipv6.src)) 1841 return true; 1842 #endif 1843 } 1844 1845 return false; 1846 } 1847 1848 static bool geneve_saddr_conflict(const struct ip_tunnel_info *a, 1849 const struct ip_tunnel_info *b) 1850 { 1851 if (ip_tunnel_info_af(a) != ip_tunnel_info_af(b)) 1852 return false; 1853 1854 if (geneve_saddr_wildcard(a) || geneve_saddr_wildcard(b)) 1855 return true; 1856 1857 if (ip_tunnel_info_af(a) == AF_INET) { 1858 if (a->key.u.ipv4.src == b->key.u.ipv4.src) 1859 return true; 1860 #if IS_ENABLED(CONFIG_IPV6) 1861 } else { 1862 if (ipv6_addr_equal(&a->key.u.ipv6.src, &b->key.u.ipv6.src)) 1863 return true; 1864 #endif 1865 } 1866 1867 return false; 1868 } 1869 1870 static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, 1871 const struct geneve_config *cfg, 1872 const struct ip_tunnel_info *info, 1873 bool *tun_on_same_port, 1874 bool *tun_collect_md) 1875 { 1876 struct geneve_dev *geneve, *t = NULL; 1877 1878 *tun_on_same_port = false; 1879 *tun_collect_md = false; 1880 list_for_each_entry(geneve, &gn->geneve_list, next) { 1881 if (info->key.tp_dst == geneve->cfg.info.key.tp_dst && 1882 (cfg->dualstack || geneve->cfg.dualstack || 1883 geneve_saddr_conflict(info, &geneve->cfg.info))) { 1884 *tun_collect_md |= geneve->cfg.collect_md; 1885 *tun_on_same_port = true; 1886 } 1887 if (info->key.tun_id == geneve->cfg.info.key.tun_id && 1888 info->key.tp_dst == geneve->cfg.info.key.tp_dst && 1889 !memcmp(&info->key.u, &geneve->cfg.info.key.u, sizeof(info->key.u))) 1890 t = geneve; 1891 } 1892 return t; 1893 } 1894 1895 static bool is_tnl_info_zero(const struct ip_tunnel_info *info) 1896 { 1897 return !(info->key.tun_id || info->key.tos || 1898 !ip_tunnel_flags_empty(info->key.tun_flags) || 1899 info->key.ttl || info->key.label || info->key.tp_src || 1900 #if IS_ENABLED(CONFIG_IPV6) 1901 (ip_tunnel_info_af(info) == AF_INET6 && 1902 !ipv6_addr_any(&info->key.u.ipv6.dst)) || 1903 #endif 1904 (ip_tunnel_info_af(info) == AF_INET && 1905 info->key.u.ipv4.dst)); 1906 } 1907 1908 static bool geneve_dst_addr_equal(struct ip_tunnel_info *a, 1909 struct ip_tunnel_info *b) 1910 { 1911 if (ip_tunnel_info_af(a) == AF_INET) 1912 return a->key.u.ipv4.dst == b->key.u.ipv4.dst; 1913 else 1914 return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst); 1915 } 1916 1917 static int geneve_configure(struct net *net, struct net_device *dev, 1918 struct netlink_ext_ack *extack, 1919 const struct geneve_config *cfg) 1920 { 1921 struct geneve_net *gn = net_generic(net, geneve_net_id); 1922 struct geneve_dev *t, *geneve = netdev_priv(dev); 1923 const struct ip_tunnel_info *info = &cfg->info; 1924 bool tun_collect_md, tun_on_same_port; 1925 int err, encap_len; 1926 1927 if (cfg->collect_md && !is_tnl_info_zero(info)) { 1928 NL_SET_ERR_MSG(extack, 1929 "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified"); 1930 return -EINVAL; 1931 } 1932 1933 geneve->net = net; 1934 geneve->dev = dev; 1935 1936 t = geneve_find_dev(gn, cfg, info, &tun_on_same_port, &tun_collect_md); 1937 if (t) 1938 return -EBUSY; 1939 1940 /* make enough headroom for basic scenario */ 1941 encap_len = GENEVE_BASE_HLEN + ETH_HLEN; 1942 if (!cfg->collect_md && ip_tunnel_info_af(info) == AF_INET) { 1943 encap_len += sizeof(struct iphdr); 1944 dev->max_mtu -= sizeof(struct iphdr); 1945 } else { 1946 encap_len += sizeof(struct ipv6hdr); 1947 dev->max_mtu -= sizeof(struct ipv6hdr); 1948 } 1949 dev->needed_headroom = encap_len + ETH_HLEN; 1950 1951 if (cfg->collect_md) { 1952 if (tun_on_same_port) { 1953 NL_SET_ERR_MSG(extack, 1954 "There can be only one externally controlled device on a destination port and a source address"); 1955 return -EPERM; 1956 } 1957 } else { 1958 if (tun_collect_md) { 1959 NL_SET_ERR_MSG(extack, 1960 "There already exists an externally controlled device on this destination port and the source address"); 1961 return -EPERM; 1962 } 1963 } 1964 1965 dst_cache_reset(&geneve->cfg.info.dst_cache); 1966 memcpy(&geneve->cfg, cfg, sizeof(*cfg)); 1967 1968 if (geneve->cfg.inner_proto_inherit) { 1969 dev->header_ops = NULL; 1970 dev->type = ARPHRD_NONE; 1971 dev->hard_header_len = 0; 1972 dev->addr_len = 0; 1973 dev->flags = IFF_POINTOPOINT | IFF_NOARP; 1974 } 1975 1976 err = register_netdevice(dev); 1977 if (err) 1978 return err; 1979 1980 list_add(&geneve->next, &gn->geneve_list); 1981 return 0; 1982 } 1983 1984 static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port) 1985 { 1986 memset(info, 0, sizeof(*info)); 1987 info->key.tp_dst = htons(dst_port); 1988 } 1989 1990 static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], 1991 struct netlink_ext_ack *extack, 1992 struct geneve_config *cfg, bool changelink) 1993 { 1994 struct ip_tunnel_info *info = &cfg->info; 1995 int attrtype; 1996 1997 if (data[IFLA_GENEVE_COLLECT_METADATA]) { 1998 if (changelink) { 1999 attrtype = IFLA_GENEVE_COLLECT_METADATA; 2000 goto change_notsup; 2001 } 2002 2003 cfg->collect_md = true; 2004 cfg->dualstack = true; 2005 } 2006 2007 if ((data[IFLA_GENEVE_LOCAL] || data[IFLA_GENEVE_REMOTE]) && 2008 (data[IFLA_GENEVE_LOCAL6] || data[IFLA_GENEVE_REMOTE6])) { 2009 NL_SET_ERR_MSG(extack, 2010 "Cannot specify both IPv4/IPv6 Remote/Local addresses"); 2011 return -EINVAL; 2012 } 2013 2014 if (data[IFLA_GENEVE_REMOTE]) { 2015 if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) { 2016 attrtype = IFLA_GENEVE_REMOTE; 2017 goto change_notsup; 2018 } 2019 2020 info->key.u.ipv4.dst = 2021 nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); 2022 2023 if (ipv4_is_multicast(info->key.u.ipv4.dst)) { 2024 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE], 2025 "Remote IPv4 address cannot be Multicast"); 2026 return -EINVAL; 2027 } 2028 } 2029 2030 if (data[IFLA_GENEVE_REMOTE6]) { 2031 #if IS_ENABLED(CONFIG_IPV6) 2032 int addr_type; 2033 2034 if (changelink && (ip_tunnel_info_af(info) == AF_INET)) { 2035 attrtype = IFLA_GENEVE_REMOTE6; 2036 goto change_notsup; 2037 } 2038 2039 info->mode = IP_TUNNEL_INFO_IPV6; 2040 info->key.u.ipv6.dst = 2041 nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]); 2042 2043 addr_type = ipv6_addr_type(&info->key.u.ipv6.dst); 2044 if (addr_type & IPV6_ADDR_LINKLOCAL) { 2045 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 2046 "Remote IPv6 address cannot be link-local"); 2047 return -EINVAL; 2048 } 2049 if (addr_type & IPV6_ADDR_MULTICAST) { 2050 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 2051 "Remote IPv6 address cannot be Multicast"); 2052 return -EINVAL; 2053 } 2054 __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 2055 cfg->use_udp6_rx_checksums = true; 2056 #else 2057 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 2058 "IPv6 support not enabled in the kernel"); 2059 return -EPFNOSUPPORT; 2060 #endif 2061 } 2062 2063 if (data[IFLA_GENEVE_LOCAL]) { 2064 if (changelink) { 2065 __be32 src = nla_get_in_addr(data[IFLA_GENEVE_LOCAL]); 2066 2067 if (ip_tunnel_info_af(info) == AF_INET6 || 2068 src != info->key.u.ipv4.src) { 2069 attrtype = IFLA_GENEVE_LOCAL; 2070 goto change_notsup; 2071 } 2072 } else { 2073 info->key.u.ipv4.src = nla_get_in_addr(data[IFLA_GENEVE_LOCAL]); 2074 2075 if (ipv4_is_multicast(info->key.u.ipv4.src)) { 2076 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LOCAL], 2077 "Local IPv4 address cannot be Multicast"); 2078 return -EINVAL; 2079 } 2080 2081 cfg->dualstack = false; 2082 } 2083 } 2084 2085 if (data[IFLA_GENEVE_LOCAL6]) { 2086 #if IS_ENABLED(CONFIG_IPV6) 2087 if (changelink) { 2088 struct in6_addr src = nla_get_in6_addr(data[IFLA_GENEVE_LOCAL6]); 2089 2090 if (ip_tunnel_info_af(info) == AF_INET || 2091 !ipv6_addr_equal(&src, &info->key.u.ipv6.src)) { 2092 attrtype = IFLA_GENEVE_LOCAL6; 2093 goto change_notsup; 2094 } 2095 } else { 2096 int addr_type; 2097 2098 info->mode = IP_TUNNEL_INFO_IPV6; 2099 info->key.u.ipv6.src = nla_get_in6_addr(data[IFLA_GENEVE_LOCAL6]); 2100 2101 addr_type = ipv6_addr_type(&info->key.u.ipv6.src); 2102 if (addr_type & IPV6_ADDR_LINKLOCAL) { 2103 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LOCAL6], 2104 "Local IPv6 address cannot be link-local"); 2105 return -EINVAL; 2106 } 2107 if (addr_type & IPV6_ADDR_MULTICAST) { 2108 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LOCAL6], 2109 "Local IPv6 address cannot be Multicast"); 2110 return -EINVAL; 2111 } 2112 2113 cfg->dualstack = false; 2114 } 2115 #else 2116 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LOCAL6], 2117 "IPv6 support not enabled in the kernel"); 2118 return -EPFNOSUPPORT; 2119 #endif 2120 } 2121 2122 if (data[IFLA_GENEVE_ID]) { 2123 __u32 vni; 2124 __u8 tvni[3]; 2125 __be64 tunid; 2126 2127 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 2128 tvni[0] = (vni & 0x00ff0000) >> 16; 2129 tvni[1] = (vni & 0x0000ff00) >> 8; 2130 tvni[2] = vni & 0x000000ff; 2131 2132 tunid = vni_to_tunnel_id(tvni); 2133 if (changelink && (tunid != info->key.tun_id)) { 2134 attrtype = IFLA_GENEVE_ID; 2135 goto change_notsup; 2136 } 2137 info->key.tun_id = tunid; 2138 } 2139 2140 if (data[IFLA_GENEVE_TTL_INHERIT]) { 2141 if (nla_get_u8(data[IFLA_GENEVE_TTL_INHERIT])) 2142 cfg->ttl_inherit = true; 2143 else 2144 cfg->ttl_inherit = false; 2145 } else if (data[IFLA_GENEVE_TTL]) { 2146 info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); 2147 cfg->ttl_inherit = false; 2148 } 2149 2150 if (data[IFLA_GENEVE_TOS]) 2151 info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]); 2152 2153 if (data[IFLA_GENEVE_DF]) 2154 cfg->df = nla_get_u8(data[IFLA_GENEVE_DF]); 2155 2156 if (data[IFLA_GENEVE_LABEL]) { 2157 info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) & 2158 IPV6_FLOWLABEL_MASK; 2159 if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) { 2160 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LABEL], 2161 "Label attribute only applies for IPv6 Geneve devices"); 2162 return -EINVAL; 2163 } 2164 } 2165 2166 if (data[IFLA_GENEVE_PORT]) { 2167 if (changelink) { 2168 attrtype = IFLA_GENEVE_PORT; 2169 goto change_notsup; 2170 } 2171 info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]); 2172 } 2173 2174 if (data[IFLA_GENEVE_PORT_RANGE]) { 2175 const struct ifla_geneve_port_range *p; 2176 2177 if (changelink) { 2178 attrtype = IFLA_GENEVE_PORT_RANGE; 2179 goto change_notsup; 2180 } 2181 p = nla_data(data[IFLA_GENEVE_PORT_RANGE]); 2182 cfg->port_min = ntohs(p->low); 2183 cfg->port_max = ntohs(p->high); 2184 } 2185 2186 if (data[IFLA_GENEVE_UDP_CSUM]) { 2187 if (changelink) { 2188 attrtype = IFLA_GENEVE_UDP_CSUM; 2189 goto change_notsup; 2190 } 2191 if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) 2192 __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 2193 } 2194 2195 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) { 2196 #if IS_ENABLED(CONFIG_IPV6) 2197 if (changelink) { 2198 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_TX; 2199 goto change_notsup; 2200 } 2201 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) 2202 __clear_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 2203 #else 2204 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX], 2205 "IPv6 support not enabled in the kernel"); 2206 return -EPFNOSUPPORT; 2207 #endif 2208 } 2209 2210 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) { 2211 #if IS_ENABLED(CONFIG_IPV6) 2212 if (changelink) { 2213 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_RX; 2214 goto change_notsup; 2215 } 2216 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])) 2217 cfg->use_udp6_rx_checksums = false; 2218 #else 2219 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX], 2220 "IPv6 support not enabled in the kernel"); 2221 return -EPFNOSUPPORT; 2222 #endif 2223 } 2224 2225 if (data[IFLA_GENEVE_INNER_PROTO_INHERIT]) { 2226 if (changelink) { 2227 attrtype = IFLA_GENEVE_INNER_PROTO_INHERIT; 2228 goto change_notsup; 2229 } 2230 cfg->inner_proto_inherit = true; 2231 } 2232 2233 if (data[IFLA_GENEVE_GRO_HINT]) { 2234 if (changelink) { 2235 attrtype = IFLA_GENEVE_GRO_HINT; 2236 goto change_notsup; 2237 } 2238 cfg->gro_hint = true; 2239 } 2240 2241 return 0; 2242 change_notsup: 2243 NL_SET_ERR_MSG_ATTR(extack, data[attrtype], 2244 "Changing VNI, Port, endpoint IP address family, external, inner_proto_inherit, gro_hint and UDP checksum attributes are not supported"); 2245 return -EOPNOTSUPP; 2246 } 2247 2248 static void geneve_link_config(struct net_device *dev, 2249 struct ip_tunnel_info *info, struct nlattr *tb[]) 2250 { 2251 struct geneve_dev *geneve = netdev_priv(dev); 2252 int ldev_mtu = 0; 2253 2254 if (tb[IFLA_MTU]) { 2255 geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); 2256 return; 2257 } 2258 2259 switch (ip_tunnel_info_af(info)) { 2260 case AF_INET: { 2261 struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst }; 2262 struct rtable *rt = ip_route_output_key(geneve->net, &fl4); 2263 2264 if (!IS_ERR(rt) && rt->dst.dev) { 2265 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN; 2266 ip_rt_put(rt); 2267 } 2268 break; 2269 } 2270 #if IS_ENABLED(CONFIG_IPV6) 2271 case AF_INET6: { 2272 struct rt6_info *rt; 2273 2274 if (!__in6_dev_get(dev)) 2275 break; 2276 2277 rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0, 2278 NULL, 0); 2279 2280 if (rt && rt->dst.dev) 2281 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN; 2282 ip6_rt_put(rt); 2283 break; 2284 } 2285 #endif 2286 } 2287 2288 if (ldev_mtu <= 0) 2289 return; 2290 2291 geneve_change_mtu(dev, ldev_mtu - info->options_len); 2292 } 2293 2294 static int geneve_newlink(struct net_device *dev, 2295 struct rtnl_newlink_params *params, 2296 struct netlink_ext_ack *extack) 2297 { 2298 struct net *link_net = rtnl_newlink_link_net(params); 2299 struct nlattr **data = params->data; 2300 struct nlattr **tb = params->tb; 2301 struct geneve_config cfg = { 2302 .df = GENEVE_DF_UNSET, 2303 .use_udp6_rx_checksums = false, 2304 .ttl_inherit = false, 2305 .collect_md = false, 2306 .dualstack = false, 2307 .port_min = 1, 2308 .port_max = USHRT_MAX, 2309 }; 2310 int err; 2311 2312 init_tnl_info(&cfg.info, GENEVE_UDP_PORT); 2313 err = geneve_nl2info(tb, data, extack, &cfg, false); 2314 if (err) 2315 return err; 2316 2317 err = geneve_configure(link_net, dev, extack, &cfg); 2318 if (err) 2319 return err; 2320 2321 geneve_link_config(dev, &cfg.info, tb); 2322 2323 return 0; 2324 } 2325 2326 /* Quiesces the geneve device data path for both TX and RX. 2327 * 2328 * On transmit geneve checks for non-NULL geneve_sock before it proceeds. 2329 * So, if we set that socket to NULL under RCU and wait for synchronize_net() 2330 * to complete for the existing set of in-flight packets to be transmitted, 2331 * then we would have quiesced the transmit data path. All the future packets 2332 * will get dropped until we unquiesce the data path. 2333 * 2334 * On receive geneve dereference the geneve_sock stashed in the socket. So, 2335 * if we set that to NULL under RCU and wait for synchronize_net() to 2336 * complete, then we would have quiesced the receive data path. 2337 */ 2338 static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4, 2339 struct geneve_sock **gs6) 2340 { 2341 *gs4 = rtnl_dereference(geneve->sock4); 2342 rcu_assign_pointer(geneve->sock4, NULL); 2343 if (*gs4) 2344 rcu_assign_sk_user_data((*gs4)->sk, NULL); 2345 #if IS_ENABLED(CONFIG_IPV6) 2346 *gs6 = rtnl_dereference(geneve->sock6); 2347 rcu_assign_pointer(geneve->sock6, NULL); 2348 if (*gs6) 2349 rcu_assign_sk_user_data((*gs6)->sk, NULL); 2350 #else 2351 *gs6 = NULL; 2352 #endif 2353 synchronize_net(); 2354 } 2355 2356 /* Resumes the geneve device data path for both TX and RX. */ 2357 static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4, 2358 struct geneve_sock __maybe_unused *gs6) 2359 { 2360 rcu_assign_pointer(geneve->sock4, gs4); 2361 if (gs4) 2362 rcu_assign_sk_user_data(gs4->sk, gs4); 2363 #if IS_ENABLED(CONFIG_IPV6) 2364 rcu_assign_pointer(geneve->sock6, gs6); 2365 if (gs6) 2366 rcu_assign_sk_user_data(gs6->sk, gs6); 2367 #endif 2368 } 2369 2370 static int geneve_changelink(struct net_device *dev, struct nlattr *tb[], 2371 struct nlattr *data[], 2372 struct netlink_ext_ack *extack) 2373 { 2374 struct geneve_dev *geneve = netdev_priv(dev); 2375 struct geneve_sock *gs4, *gs6; 2376 struct geneve_config cfg; 2377 int err; 2378 2379 /* If the geneve device is configured for metadata (or externally 2380 * controlled, for example, OVS), then nothing can be changed. 2381 */ 2382 if (geneve->cfg.collect_md) 2383 return -EOPNOTSUPP; 2384 2385 /* Start with the existing info. */ 2386 memcpy(&cfg, &geneve->cfg, sizeof(cfg)); 2387 err = geneve_nl2info(tb, data, extack, &cfg, true); 2388 if (err) 2389 return err; 2390 2391 if (!geneve_dst_addr_equal(&geneve->cfg.info, &cfg.info)) { 2392 dst_cache_reset(&cfg.info.dst_cache); 2393 geneve_link_config(dev, &cfg.info, tb); 2394 } 2395 2396 geneve_quiesce(geneve, &gs4, &gs6); 2397 memcpy(&geneve->cfg, &cfg, sizeof(cfg)); 2398 geneve_unquiesce(geneve, gs4, gs6); 2399 2400 return 0; 2401 } 2402 2403 static void geneve_dellink(struct net_device *dev, struct list_head *head) 2404 { 2405 struct geneve_dev *geneve = netdev_priv(dev); 2406 2407 list_del(&geneve->next); 2408 unregister_netdevice_queue(dev, head); 2409 } 2410 2411 static size_t geneve_get_size(const struct net_device *dev) 2412 { 2413 return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */ 2414 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */ 2415 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_LOCAL{6} */ 2416 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */ 2417 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */ 2418 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_DF */ 2419 nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */ 2420 nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */ 2421 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */ 2422 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */ 2423 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */ 2424 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */ 2425 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */ 2426 nla_total_size(0) + /* IFLA_GENEVE_INNER_PROTO_INHERIT */ 2427 nla_total_size(sizeof(struct ifla_geneve_port_range)) + /* IFLA_GENEVE_PORT_RANGE */ 2428 nla_total_size(0) + /* IFLA_GENEVE_GRO_HINT */ 2429 0; 2430 } 2431 2432 static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) 2433 { 2434 struct geneve_dev *geneve = netdev_priv(dev); 2435 struct ip_tunnel_info *info = &geneve->cfg.info; 2436 bool ttl_inherit = geneve->cfg.ttl_inherit; 2437 bool metadata = geneve->cfg.collect_md; 2438 struct ifla_geneve_port_range ports = { 2439 .low = htons(geneve->cfg.port_min), 2440 .high = htons(geneve->cfg.port_max), 2441 }; 2442 __u8 tmp_vni[3]; 2443 __u32 vni; 2444 2445 tunnel_id_to_vni(info->key.tun_id, tmp_vni); 2446 vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2]; 2447 if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) 2448 goto nla_put_failure; 2449 2450 if (!metadata && ip_tunnel_info_af(info) == AF_INET) { 2451 if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, 2452 info->key.u.ipv4.dst)) 2453 goto nla_put_failure; 2454 if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, 2455 test_bit(IP_TUNNEL_CSUM_BIT, 2456 info->key.tun_flags))) 2457 goto nla_put_failure; 2458 2459 #if IS_ENABLED(CONFIG_IPV6) 2460 } else if (!metadata) { 2461 if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6, 2462 &info->key.u.ipv6.dst)) 2463 goto nla_put_failure; 2464 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, 2465 !test_bit(IP_TUNNEL_CSUM_BIT, 2466 info->key.tun_flags))) 2467 goto nla_put_failure; 2468 #endif 2469 } 2470 2471 if (!geneve->cfg.dualstack) { 2472 if (ip_tunnel_info_af(info) == AF_INET) { 2473 if ((info->key.u.ipv4.src || 2474 geneve->cfg.collect_md) && 2475 nla_put_in_addr(skb, IFLA_GENEVE_LOCAL, 2476 info->key.u.ipv4.src)) 2477 goto nla_put_failure; 2478 #if IS_ENABLED(CONFIG_IPV6) 2479 } else { 2480 if ((!ipv6_addr_any(&info->key.u.ipv6.src) || 2481 geneve->cfg.collect_md) && 2482 nla_put_in6_addr(skb, IFLA_GENEVE_LOCAL6, 2483 &info->key.u.ipv6.src)) 2484 goto nla_put_failure; 2485 #endif 2486 } 2487 } 2488 2489 if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) || 2490 nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) || 2491 nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label)) 2492 goto nla_put_failure; 2493 2494 if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->cfg.df)) 2495 goto nla_put_failure; 2496 2497 if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst)) 2498 goto nla_put_failure; 2499 2500 if (metadata && nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA)) 2501 goto nla_put_failure; 2502 2503 #if IS_ENABLED(CONFIG_IPV6) 2504 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, 2505 !geneve->cfg.use_udp6_rx_checksums)) 2506 goto nla_put_failure; 2507 #endif 2508 2509 if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit)) 2510 goto nla_put_failure; 2511 2512 if (geneve->cfg.inner_proto_inherit && 2513 nla_put_flag(skb, IFLA_GENEVE_INNER_PROTO_INHERIT)) 2514 goto nla_put_failure; 2515 2516 if (nla_put(skb, IFLA_GENEVE_PORT_RANGE, sizeof(ports), &ports)) 2517 goto nla_put_failure; 2518 2519 if (geneve->cfg.gro_hint && 2520 nla_put_flag(skb, IFLA_GENEVE_GRO_HINT)) 2521 goto nla_put_failure; 2522 2523 return 0; 2524 2525 nla_put_failure: 2526 return -EMSGSIZE; 2527 } 2528 2529 static struct rtnl_link_ops geneve_link_ops __read_mostly = { 2530 .kind = "geneve", 2531 .maxtype = IFLA_GENEVE_MAX, 2532 .policy = geneve_policy, 2533 .priv_size = sizeof(struct geneve_dev), 2534 .setup = geneve_setup, 2535 .validate = geneve_validate, 2536 .newlink = geneve_newlink, 2537 .changelink = geneve_changelink, 2538 .dellink = geneve_dellink, 2539 .get_size = geneve_get_size, 2540 .fill_info = geneve_fill_info, 2541 }; 2542 2543 struct net_device *geneve_dev_create_fb(struct net *net, const char *name, 2544 u8 name_assign_type, u16 dst_port) 2545 { 2546 struct nlattr *tb[IFLA_MAX + 1]; 2547 struct net_device *dev; 2548 LIST_HEAD(list_kill); 2549 int err; 2550 struct geneve_config cfg = { 2551 .df = GENEVE_DF_UNSET, 2552 .use_udp6_rx_checksums = true, 2553 .ttl_inherit = false, 2554 .collect_md = true, 2555 .dualstack = true, 2556 .port_min = 1, 2557 .port_max = USHRT_MAX, 2558 }; 2559 2560 memset(tb, 0, sizeof(tb)); 2561 dev = rtnl_create_link(net, name, name_assign_type, 2562 &geneve_link_ops, tb, NULL); 2563 if (IS_ERR(dev)) 2564 return dev; 2565 2566 init_tnl_info(&cfg.info, dst_port); 2567 err = geneve_configure(net, dev, NULL, &cfg); 2568 if (err) { 2569 free_netdev(dev); 2570 return ERR_PTR(err); 2571 } 2572 2573 /* openvswitch users expect packet sizes to be unrestricted, 2574 * so set the largest MTU we can. 2575 */ 2576 err = geneve_change_mtu(dev, IP_MAX_MTU); 2577 if (err) 2578 goto err; 2579 2580 err = rtnl_configure_link(dev, NULL, 0, NULL); 2581 if (err < 0) 2582 goto err; 2583 2584 return dev; 2585 err: 2586 geneve_dellink(dev, &list_kill); 2587 unregister_netdevice_many(&list_kill); 2588 return ERR_PTR(err); 2589 } 2590 EXPORT_SYMBOL_GPL(geneve_dev_create_fb); 2591 2592 static int geneve_netdevice_event(struct notifier_block *unused, 2593 unsigned long event, void *ptr) 2594 { 2595 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 2596 2597 if (event == NETDEV_UDP_TUNNEL_PUSH_INFO) 2598 geneve_offload_rx_ports(dev, true); 2599 else if (event == NETDEV_UDP_TUNNEL_DROP_INFO) 2600 geneve_offload_rx_ports(dev, false); 2601 2602 return NOTIFY_DONE; 2603 } 2604 2605 static struct notifier_block geneve_notifier_block __read_mostly = { 2606 .notifier_call = geneve_netdevice_event, 2607 }; 2608 2609 static __net_init int geneve_init_net(struct net *net) 2610 { 2611 struct geneve_net *gn = net_generic(net, geneve_net_id); 2612 2613 INIT_LIST_HEAD(&gn->geneve_list); 2614 INIT_LIST_HEAD(&gn->sock_list); 2615 return 0; 2616 } 2617 2618 static void __net_exit geneve_exit_rtnl_net(struct net *net, 2619 struct list_head *dev_to_kill) 2620 { 2621 struct geneve_net *gn = net_generic(net, geneve_net_id); 2622 struct geneve_dev *geneve, *next; 2623 2624 list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) 2625 geneve_dellink(geneve->dev, dev_to_kill); 2626 } 2627 2628 static void __net_exit geneve_exit_net(struct net *net) 2629 { 2630 const struct geneve_net *gn = net_generic(net, geneve_net_id); 2631 2632 WARN_ON_ONCE(!list_empty(&gn->sock_list)); 2633 } 2634 2635 static struct pernet_operations geneve_net_ops = { 2636 .init = geneve_init_net, 2637 .exit_rtnl = geneve_exit_rtnl_net, 2638 .exit = geneve_exit_net, 2639 .id = &geneve_net_id, 2640 .size = sizeof(struct geneve_net), 2641 }; 2642 2643 static int __init geneve_init_module(void) 2644 { 2645 int rc; 2646 2647 rc = register_pernet_subsys(&geneve_net_ops); 2648 if (rc) 2649 goto out1; 2650 2651 rc = register_netdevice_notifier(&geneve_notifier_block); 2652 if (rc) 2653 goto out2; 2654 2655 rc = rtnl_link_register(&geneve_link_ops); 2656 if (rc) 2657 goto out3; 2658 2659 return 0; 2660 out3: 2661 unregister_netdevice_notifier(&geneve_notifier_block); 2662 out2: 2663 unregister_pernet_subsys(&geneve_net_ops); 2664 out1: 2665 return rc; 2666 } 2667 late_initcall(geneve_init_module); 2668 2669 static void __exit geneve_cleanup_module(void) 2670 { 2671 rtnl_link_unregister(&geneve_link_ops); 2672 unregister_netdevice_notifier(&geneve_notifier_block); 2673 unregister_pernet_subsys(&geneve_net_ops); 2674 } 2675 module_exit(geneve_cleanup_module); 2676 2677 MODULE_LICENSE("GPL"); 2678 MODULE_VERSION(GENEVE_NETDEV_VER); 2679 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>"); 2680 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic"); 2681 MODULE_ALIAS_RTNL_LINK("geneve"); 2682