1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * GENEVE: Generic Network Virtualization Encapsulation 4 * 5 * Copyright (c) 2015 Red Hat, Inc. 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include <linux/ethtool.h> 11 #include <linux/kernel.h> 12 #include <linux/module.h> 13 #include <linux/etherdevice.h> 14 #include <linux/hash.h> 15 #include <net/dst_metadata.h> 16 #include <net/gro_cells.h> 17 #include <net/rtnetlink.h> 18 #include <net/geneve.h> 19 #include <net/gro.h> 20 #include <net/netdev_lock.h> 21 #include <net/protocol.h> 22 23 #define GENEVE_NETDEV_VER "0.6" 24 25 #define GENEVE_N_VID (1u << 24) 26 #define GENEVE_VID_MASK (GENEVE_N_VID - 1) 27 28 #define VNI_HASH_BITS 10 29 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) 30 31 static bool log_ecn_error = true; 32 module_param(log_ecn_error, bool, 0644); 33 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 34 35 #define GENEVE_VER 0 36 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) 37 #define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN) 38 #define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN) 39 40 #define GENEVE_OPT_NETDEV_CLASS 0x100 41 #define GENEVE_OPT_GRO_HINT_SIZE 8 42 #define GENEVE_OPT_GRO_HINT_TYPE 1 43 #define GENEVE_OPT_GRO_HINT_LEN 1 44 45 struct geneve_opt_gro_hint { 46 u8 inner_proto_id:2, 47 nested_is_v6:1; 48 u8 nested_nh_offset; 49 u8 nested_tp_offset; 50 u8 nested_hdr_len; 51 }; 52 53 struct geneve_skb_cb { 54 unsigned int gro_hint_len; 55 struct geneve_opt_gro_hint gro_hint; 56 }; 57 58 #define GENEVE_SKB_CB(__skb) ((struct geneve_skb_cb *)&((__skb)->cb[0])) 59 60 /* per-network namespace private data for this module */ 61 struct geneve_net { 62 struct list_head geneve_list; 63 /* sock_list is protected by rtnl lock */ 64 struct list_head sock_list; 65 }; 66 67 static unsigned int geneve_net_id; 68 69 struct geneve_dev_node { 70 struct hlist_node hlist; 71 struct geneve_dev *geneve; 72 }; 73 74 struct geneve_config { 75 bool collect_md; 76 bool dualstack; 77 bool use_udp6_rx_checksums; 78 bool ttl_inherit; 79 bool gro_hint; 80 enum ifla_geneve_df df; 81 bool inner_proto_inherit; 82 u16 port_min; 83 u16 port_max; 84 85 /* Must be last --ends in a flexible-array member. */ 86 struct ip_tunnel_info info; 87 }; 88 89 /* Pseudo network device */ 90 struct geneve_dev { 91 struct geneve_dev_node hlist4; /* vni hash table for IPv4 socket */ 92 #if IS_ENABLED(CONFIG_IPV6) 93 struct geneve_dev_node hlist6; /* vni hash table for IPv6 socket */ 94 #endif 95 struct net *net; /* netns for packet i/o */ 96 struct net_device *dev; /* netdev for geneve tunnel */ 97 struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */ 98 #if IS_ENABLED(CONFIG_IPV6) 99 struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */ 100 #endif 101 struct list_head next; /* geneve's per namespace list */ 102 struct gro_cells gro_cells; 103 struct geneve_config cfg; 104 }; 105 106 struct geneve_sock { 107 bool collect_md; 108 bool gro_hint; 109 struct list_head list; 110 struct sock *sk; 111 struct rcu_head rcu; 112 int refcnt; 113 struct hlist_head vni_list[VNI_HASH_SIZE]; 114 }; 115 116 static const __be16 proto_id_map[] = { htons(ETH_P_TEB), 117 htons(ETH_P_IPV6), 118 htons(ETH_P_IP) }; 119 120 static int proto_to_id(__be16 proto) 121 { 122 int i; 123 124 for (i = 0; i < ARRAY_SIZE(proto_id_map); i++) 125 if (proto_id_map[i] == proto) 126 return i; 127 128 return -1; 129 } 130 131 static inline __u32 geneve_net_vni_hash(u8 vni[3]) 132 { 133 __u32 vnid; 134 135 vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2]; 136 return hash_32(vnid, VNI_HASH_BITS); 137 } 138 139 static __be64 vni_to_tunnel_id(const __u8 *vni) 140 { 141 #ifdef __BIG_ENDIAN 142 return (vni[0] << 16) | (vni[1] << 8) | vni[2]; 143 #else 144 return (__force __be64)(((__force u64)vni[0] << 40) | 145 ((__force u64)vni[1] << 48) | 146 ((__force u64)vni[2] << 56)); 147 #endif 148 } 149 150 /* Convert 64 bit tunnel ID to 24 bit VNI. */ 151 static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) 152 { 153 #ifdef __BIG_ENDIAN 154 vni[0] = (__force __u8)(tun_id >> 16); 155 vni[1] = (__force __u8)(tun_id >> 8); 156 vni[2] = (__force __u8)tun_id; 157 #else 158 vni[0] = (__force __u8)((__force u64)tun_id >> 40); 159 vni[1] = (__force __u8)((__force u64)tun_id >> 48); 160 vni[2] = (__force __u8)((__force u64)tun_id >> 56); 161 #endif 162 } 163 164 static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni) 165 { 166 return !memcmp(vni, &tun_id[5], 3); 167 } 168 169 static sa_family_t geneve_get_sk_family(struct geneve_sock *gs) 170 { 171 return gs->sk->sk_family; 172 } 173 174 static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, 175 __be32 addr, u8 vni[]) 176 { 177 struct hlist_head *vni_list_head; 178 struct geneve_dev_node *node; 179 __u32 hash; 180 181 /* Find the device for this VNI */ 182 hash = geneve_net_vni_hash(vni); 183 vni_list_head = &gs->vni_list[hash]; 184 hlist_for_each_entry_rcu(node, vni_list_head, hlist) { 185 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && 186 addr == node->geneve->cfg.info.key.u.ipv4.dst) 187 return node->geneve; 188 } 189 return NULL; 190 } 191 192 #if IS_ENABLED(CONFIG_IPV6) 193 static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs, 194 struct in6_addr addr6, u8 vni[]) 195 { 196 struct hlist_head *vni_list_head; 197 struct geneve_dev_node *node; 198 __u32 hash; 199 200 /* Find the device for this VNI */ 201 hash = geneve_net_vni_hash(vni); 202 vni_list_head = &gs->vni_list[hash]; 203 hlist_for_each_entry_rcu(node, vni_list_head, hlist) { 204 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && 205 ipv6_addr_equal(&addr6, &node->geneve->cfg.info.key.u.ipv6.dst)) 206 return node->geneve; 207 } 208 return NULL; 209 } 210 #endif 211 212 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) 213 { 214 return (struct genevehdr *)(udp_hdr(skb) + 1); 215 } 216 217 static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs, 218 struct sk_buff *skb) 219 { 220 static u8 zero_vni[3]; 221 u8 *vni; 222 223 if (geneve_get_sk_family(gs) == AF_INET) { 224 struct iphdr *iph; 225 __be32 addr; 226 227 iph = ip_hdr(skb); /* outer IP header... */ 228 229 if (gs->collect_md) { 230 vni = zero_vni; 231 addr = 0; 232 } else { 233 vni = geneve_hdr(skb)->vni; 234 addr = iph->saddr; 235 } 236 237 return geneve_lookup(gs, addr, vni); 238 #if IS_ENABLED(CONFIG_IPV6) 239 } else if (geneve_get_sk_family(gs) == AF_INET6) { 240 static struct in6_addr zero_addr6; 241 struct ipv6hdr *ip6h; 242 struct in6_addr addr6; 243 244 ip6h = ipv6_hdr(skb); /* outer IPv6 header... */ 245 246 if (gs->collect_md) { 247 vni = zero_vni; 248 addr6 = zero_addr6; 249 } else { 250 vni = geneve_hdr(skb)->vni; 251 addr6 = ip6h->saddr; 252 } 253 254 return geneve6_lookup(gs, addr6, vni); 255 #endif 256 } 257 return NULL; 258 } 259 260 /* geneve receive/decap routine */ 261 static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, 262 struct sk_buff *skb, const struct genevehdr *gnvh) 263 { 264 struct metadata_dst *tun_dst = NULL; 265 unsigned int len; 266 int nh, err = 0; 267 void *oiph; 268 269 if (ip_tunnel_collect_metadata() || gs->collect_md) { 270 IP_TUNNEL_DECLARE_FLAGS(flags) = { }; 271 272 __set_bit(IP_TUNNEL_KEY_BIT, flags); 273 __assign_bit(IP_TUNNEL_OAM_BIT, flags, gnvh->oam); 274 __assign_bit(IP_TUNNEL_CRIT_OPT_BIT, flags, gnvh->critical); 275 276 tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, 277 vni_to_tunnel_id(gnvh->vni), 278 gnvh->opt_len * 4); 279 if (!tun_dst) { 280 dev_dstats_rx_dropped(geneve->dev); 281 goto drop; 282 } 283 /* Update tunnel dst according to Geneve options. */ 284 ip_tunnel_flags_zero(flags); 285 __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, flags); 286 ip_tunnel_info_opts_set(&tun_dst->u.tun_info, 287 gnvh->options, gnvh->opt_len * 4, 288 flags); 289 } else { 290 /* Drop packets w/ critical options, 291 * since we don't support any... 292 */ 293 if (gnvh->critical) { 294 DEV_STATS_INC(geneve->dev, rx_frame_errors); 295 DEV_STATS_INC(geneve->dev, rx_errors); 296 goto drop; 297 } 298 } 299 300 if (tun_dst) 301 skb_dst_set(skb, &tun_dst->dst); 302 303 if (gnvh->proto_type == htons(ETH_P_TEB)) { 304 skb_reset_mac_header(skb); 305 skb->protocol = eth_type_trans(skb, geneve->dev); 306 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 307 308 /* Ignore packet loops (and multicast echo) */ 309 if (ether_addr_equal(eth_hdr(skb)->h_source, 310 geneve->dev->dev_addr)) { 311 DEV_STATS_INC(geneve->dev, rx_errors); 312 goto drop; 313 } 314 } else { 315 skb_reset_mac_header(skb); 316 skb->dev = geneve->dev; 317 skb->pkt_type = PACKET_HOST; 318 } 319 320 /* Save offset of outer header relative to skb->head, 321 * because we are going to reset the network header to the inner header 322 * and might change skb->head. 323 */ 324 nh = skb_network_header(skb) - skb->head; 325 326 skb_reset_network_header(skb); 327 328 if (!pskb_inet_may_pull(skb)) { 329 DEV_STATS_INC(geneve->dev, rx_length_errors); 330 DEV_STATS_INC(geneve->dev, rx_errors); 331 goto drop; 332 } 333 334 /* Get the outer header. */ 335 oiph = skb->head + nh; 336 337 if (geneve_get_sk_family(gs) == AF_INET) 338 err = IP_ECN_decapsulate(oiph, skb); 339 #if IS_ENABLED(CONFIG_IPV6) 340 else 341 err = IP6_ECN_decapsulate(oiph, skb); 342 #endif 343 344 if (unlikely(err)) { 345 if (log_ecn_error) { 346 if (geneve_get_sk_family(gs) == AF_INET) 347 net_info_ratelimited("non-ECT from %pI4 " 348 "with TOS=%#x\n", 349 &((struct iphdr *)oiph)->saddr, 350 ((struct iphdr *)oiph)->tos); 351 #if IS_ENABLED(CONFIG_IPV6) 352 else 353 net_info_ratelimited("non-ECT from %pI6\n", 354 &((struct ipv6hdr *)oiph)->saddr); 355 #endif 356 } 357 if (err > 1) { 358 DEV_STATS_INC(geneve->dev, rx_frame_errors); 359 DEV_STATS_INC(geneve->dev, rx_errors); 360 goto drop; 361 } 362 } 363 364 /* Skip the additional GRO stage when hints are in use. */ 365 len = skb->len; 366 if (skb->encapsulation) 367 err = netif_rx(skb); 368 else 369 err = gro_cells_receive(&geneve->gro_cells, skb); 370 if (likely(err == NET_RX_SUCCESS)) 371 dev_dstats_rx_add(geneve->dev, len); 372 373 return; 374 drop: 375 /* Consume bad packet */ 376 kfree_skb(skb); 377 } 378 379 /* Setup stats when device is created */ 380 static int geneve_init(struct net_device *dev) 381 { 382 struct geneve_dev *geneve = netdev_priv(dev); 383 int err; 384 385 err = gro_cells_init(&geneve->gro_cells, dev); 386 if (err) 387 return err; 388 389 err = dst_cache_init(&geneve->cfg.info.dst_cache, GFP_KERNEL); 390 if (err) { 391 gro_cells_destroy(&geneve->gro_cells); 392 return err; 393 } 394 netdev_lockdep_set_classes(dev); 395 return 0; 396 } 397 398 static void geneve_uninit(struct net_device *dev) 399 { 400 struct geneve_dev *geneve = netdev_priv(dev); 401 402 dst_cache_destroy(&geneve->cfg.info.dst_cache); 403 gro_cells_destroy(&geneve->gro_cells); 404 } 405 406 static int geneve_hlen(const struct genevehdr *gh) 407 { 408 return sizeof(*gh) + gh->opt_len * 4; 409 } 410 411 /* 412 * Look for GRO hint in the genenve options; if not found or does not pass basic 413 * sanitization return 0, otherwise the offset WRT the geneve hdr start. 414 */ 415 static unsigned int 416 geneve_opt_gro_hint_off(const struct genevehdr *gh, __be16 *type, 417 unsigned int *gh_len) 418 { 419 struct geneve_opt *opt = (void *)(gh + 1); 420 unsigned int id, opt_len = gh->opt_len; 421 struct geneve_opt_gro_hint *gro_hint; 422 423 while (opt_len >= (GENEVE_OPT_GRO_HINT_SIZE >> 2)) { 424 if (opt->opt_class == htons(GENEVE_OPT_NETDEV_CLASS) && 425 opt->type == GENEVE_OPT_GRO_HINT_TYPE && 426 opt->length == GENEVE_OPT_GRO_HINT_LEN) 427 goto found; 428 429 /* check for bad opt len */ 430 if (opt->length + 1 >= opt_len) 431 return 0; 432 433 /* next opt */ 434 opt_len -= opt->length + 1; 435 opt = ((void *)opt) + ((opt->length + 1) << 2); 436 } 437 return 0; 438 439 found: 440 gro_hint = (struct geneve_opt_gro_hint *)opt->opt_data; 441 442 /* 443 * Sanitize the hinted hdrs: the nested transport is UDP and must fit 444 * the overall hinted hdr size. 445 */ 446 if (gro_hint->nested_tp_offset + sizeof(struct udphdr) > 447 gro_hint->nested_hdr_len) 448 return 0; 449 450 if (gro_hint->nested_nh_offset + 451 (gro_hint->nested_is_v6 ? sizeof(struct ipv6hdr) : 452 sizeof(struct iphdr)) > 453 gro_hint->nested_tp_offset) 454 return 0; 455 456 /* Allow only supported L2. */ 457 id = gro_hint->inner_proto_id; 458 if (id >= ARRAY_SIZE(proto_id_map)) 459 return 0; 460 461 *type = proto_id_map[id]; 462 *gh_len += gro_hint->nested_hdr_len; 463 464 return (void *)gro_hint - (void *)gh; 465 } 466 467 static const struct geneve_opt_gro_hint * 468 geneve_opt_gro_hint(const struct genevehdr *gh, unsigned int hint_off) 469 { 470 return (const struct geneve_opt_gro_hint *)((void *)gh + hint_off); 471 } 472 473 static unsigned int 474 geneve_sk_gro_hint_off(const struct sock *sk, const struct genevehdr *gh, 475 __be16 *type, unsigned int *gh_len) 476 { 477 const struct geneve_sock *gs = rcu_dereference_sk_user_data(sk); 478 479 if (!gs || !gs->gro_hint) 480 return 0; 481 return geneve_opt_gro_hint_off(gh, type, gh_len); 482 } 483 484 /* Validate the packet headers pointed by data WRT the provided hint */ 485 static bool 486 geneve_opt_gro_hint_validate(void *data, 487 const struct geneve_opt_gro_hint *gro_hint) 488 { 489 void *nested_nh = data + gro_hint->nested_nh_offset; 490 struct iphdr *iph; 491 492 if (gro_hint->nested_is_v6) { 493 struct ipv6hdr *ipv6h = nested_nh; 494 struct ipv6_opt_hdr *opth; 495 int offset, len; 496 497 if (ipv6h->nexthdr == IPPROTO_UDP) 498 return true; 499 500 offset = sizeof(*ipv6h) + gro_hint->nested_nh_offset; 501 while (offset + sizeof(*opth) <= gro_hint->nested_tp_offset) { 502 opth = data + offset; 503 504 len = ipv6_optlen(opth); 505 if (len + offset > gro_hint->nested_tp_offset) 506 return false; 507 if (opth->nexthdr == IPPROTO_UDP) 508 return true; 509 510 offset += len; 511 } 512 return false; 513 } 514 515 iph = nested_nh; 516 if (*(u8 *)iph != 0x45 || ip_is_fragment(iph) || 517 iph->protocol != IPPROTO_UDP || ip_fast_csum((u8 *)iph, 5)) 518 return false; 519 520 return true; 521 } 522 523 /* 524 * Validate the skb headers following the specified geneve hdr vs the 525 * provided hint, including nested L4 checksum. 526 * The caller already ensured that the relevant amount of data is available 527 * in the linear part. 528 */ 529 static bool 530 geneve_opt_gro_hint_validate_csum(const struct sk_buff *skb, 531 const struct genevehdr *gh, 532 const struct geneve_opt_gro_hint *gro_hint) 533 { 534 unsigned int plen, gh_len = geneve_hlen(gh); 535 void *nested = (void *)gh + gh_len; 536 struct udphdr *nested_uh; 537 unsigned int nested_len; 538 struct ipv6hdr *ipv6h; 539 struct iphdr *iph; 540 __wsum csum, psum; 541 542 if (!geneve_opt_gro_hint_validate(nested, gro_hint)) 543 return false; 544 545 /* Use GRO hints with nested csum only if the outer header has csum. */ 546 nested_uh = nested + gro_hint->nested_tp_offset; 547 if (!nested_uh->check || skb->ip_summed == CHECKSUM_PARTIAL) 548 return true; 549 550 if (!NAPI_GRO_CB(skb)->csum_valid) 551 return false; 552 553 /* Compute the complete checksum up to the nested transport. */ 554 plen = gh_len + gro_hint->nested_tp_offset; 555 csum = csum_sub(NAPI_GRO_CB(skb)->csum, csum_partial(gh, plen, 0)); 556 nested_len = skb_gro_len(skb) - plen; 557 558 /* Compute the nested pseudo header csum. */ 559 ipv6h = nested + gro_hint->nested_nh_offset; 560 iph = (struct iphdr *)ipv6h; 561 psum = gro_hint->nested_is_v6 ? 562 ~csum_unfold(csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, 563 nested_len, IPPROTO_UDP, 0)) : 564 csum_tcpudp_nofold(iph->saddr, iph->daddr, 565 nested_len, IPPROTO_UDP, 0); 566 567 return !csum_fold(csum_add(psum, csum)); 568 } 569 570 static int geneve_post_decap_hint(const struct sock *sk, struct sk_buff *skb, 571 unsigned int gh_len, 572 struct genevehdr **geneveh) 573 { 574 const struct geneve_opt_gro_hint *gro_hint; 575 unsigned int len, total_len, hint_off; 576 struct ipv6hdr *ipv6h; 577 struct iphdr *iph; 578 struct udphdr *uh; 579 __be16 p; 580 581 hint_off = geneve_sk_gro_hint_off(sk, *geneveh, &p, &len); 582 if (!hint_off) 583 return 0; 584 585 if (!skb_is_gso(skb)) 586 return 0; 587 588 gro_hint = geneve_opt_gro_hint(*geneveh, hint_off); 589 if (unlikely(!pskb_may_pull(skb, gro_hint->nested_hdr_len))) 590 return -ENOMEM; 591 592 *geneveh = geneve_hdr(skb); 593 gro_hint = geneve_opt_gro_hint(*geneveh, hint_off); 594 595 /* 596 * Validate hints from untrusted source before accessing 597 * the headers; csum will be checked later by the nested 598 * protocol rx path. 599 */ 600 if (unlikely(skb_shinfo(skb)->gso_type & SKB_GSO_DODGY && 601 !geneve_opt_gro_hint_validate(skb->data, gro_hint))) 602 return -EINVAL; 603 604 ipv6h = (void *)skb->data + gro_hint->nested_nh_offset; 605 iph = (struct iphdr *)ipv6h; 606 total_len = skb->len - gro_hint->nested_nh_offset; 607 if (total_len >= GRO_LEGACY_MAX_SIZE) 608 return -E2BIG; 609 610 /* 611 * After stripping the outer encap, the packet still carries a 612 * tunnel encapsulation: the nested one. 613 */ 614 skb->encapsulation = 1; 615 616 /* GSO expect a valid transpor header, move it to the current one. */ 617 skb_set_transport_header(skb, gro_hint->nested_tp_offset); 618 619 /* Adjust the nested IP{6} hdr to actual GSO len. */ 620 if (gro_hint->nested_is_v6) { 621 ipv6h->payload_len = htons(total_len - sizeof(*ipv6h)); 622 } else { 623 __be16 old_len = iph->tot_len; 624 625 iph->tot_len = htons(total_len); 626 627 /* For IPv4 additionally adjust the nested csum. */ 628 csum_replace2(&iph->check, old_len, iph->tot_len); 629 ip_send_check(iph); 630 } 631 632 /* Adjust the nested UDP header len and checksum. */ 633 uh = udp_hdr(skb); 634 uh->len = htons(skb->len - gro_hint->nested_tp_offset); 635 if (uh->check) { 636 len = skb->len - gro_hint->nested_tp_offset; 637 skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; 638 if (gro_hint->nested_is_v6) 639 uh->check = ~udp_v6_check(len, &ipv6h->saddr, 640 &ipv6h->daddr, 0); 641 else 642 uh->check = ~udp_v4_check(len, iph->saddr, 643 iph->daddr, 0); 644 } else { 645 skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; 646 } 647 return 0; 648 } 649 650 /* Callback from net/ipv4/udp.c to receive packets */ 651 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) 652 { 653 struct genevehdr *geneveh; 654 struct geneve_dev *geneve; 655 struct geneve_sock *gs; 656 __be16 inner_proto; 657 int opts_len; 658 659 /* Need UDP and Geneve header to be present */ 660 if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) 661 goto drop; 662 663 /* Return packets with reserved bits set */ 664 geneveh = geneve_hdr(skb); 665 if (unlikely(geneveh->ver != GENEVE_VER)) 666 goto drop; 667 668 gs = rcu_dereference_sk_user_data(sk); 669 if (!gs) 670 goto drop; 671 672 geneve = geneve_lookup_skb(gs, skb); 673 if (!geneve) 674 goto drop; 675 676 inner_proto = geneveh->proto_type; 677 678 if (unlikely((!geneve->cfg.inner_proto_inherit && 679 inner_proto != htons(ETH_P_TEB)))) { 680 dev_dstats_rx_dropped(geneve->dev); 681 goto drop; 682 } 683 684 opts_len = geneveh->opt_len * 4; 685 if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, inner_proto, 686 !net_eq(geneve->net, dev_net(geneve->dev)))) { 687 dev_dstats_rx_dropped(geneve->dev); 688 goto drop; 689 } 690 691 /* 692 * After hint processing, the transport header points to the inner one 693 * and we can't use anymore on geneve_hdr(). 694 */ 695 geneveh = geneve_hdr(skb); 696 if (geneve_post_decap_hint(sk, skb, sizeof(struct genevehdr) + 697 opts_len, &geneveh)) { 698 DEV_STATS_INC(geneve->dev, rx_errors); 699 goto drop; 700 } 701 702 geneve_rx(geneve, gs, skb, geneveh); 703 return 0; 704 705 drop: 706 /* Consume bad packet */ 707 kfree_skb(skb); 708 return 0; 709 } 710 711 /* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */ 712 static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb) 713 { 714 struct genevehdr *geneveh; 715 struct geneve_sock *gs; 716 u8 zero_vni[3] = { 0 }; 717 u8 *vni = zero_vni; 718 719 if (!pskb_may_pull(skb, skb_transport_offset(skb) + GENEVE_BASE_HLEN)) 720 return -EINVAL; 721 722 geneveh = geneve_hdr(skb); 723 if (geneveh->ver != GENEVE_VER) 724 return -EINVAL; 725 726 if (geneveh->proto_type != htons(ETH_P_TEB)) 727 return -EINVAL; 728 729 gs = rcu_dereference_sk_user_data(sk); 730 if (!gs) 731 return -ENOENT; 732 733 if (geneve_get_sk_family(gs) == AF_INET) { 734 struct iphdr *iph = ip_hdr(skb); 735 __be32 addr4 = 0; 736 737 if (!gs->collect_md) { 738 vni = geneve_hdr(skb)->vni; 739 addr4 = iph->daddr; 740 } 741 742 return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT; 743 } 744 745 #if IS_ENABLED(CONFIG_IPV6) 746 if (geneve_get_sk_family(gs) == AF_INET6) { 747 struct ipv6hdr *ip6h = ipv6_hdr(skb); 748 struct in6_addr addr6; 749 750 memset(&addr6, 0, sizeof(struct in6_addr)); 751 752 if (!gs->collect_md) { 753 vni = geneve_hdr(skb)->vni; 754 addr6 = ip6h->daddr; 755 } 756 757 return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT; 758 } 759 #endif 760 761 return -EPFNOSUPPORT; 762 } 763 764 static struct sock *geneve_create_sock(struct net *net, 765 struct geneve_dev *geneve, bool ipv6) 766 { 767 struct ip_tunnel_info *info = &geneve->cfg.info; 768 struct udp_port_cfg udp_conf; 769 struct socket *sock; 770 int err; 771 772 memset(&udp_conf, 0, sizeof(udp_conf)); 773 774 #if IS_ENABLED(CONFIG_IPV6) 775 if (ipv6) { 776 udp_conf.family = AF_INET6; 777 udp_conf.ipv6_v6only = 1; 778 udp_conf.use_udp6_rx_checksums = geneve->cfg.use_udp6_rx_checksums; 779 udp_conf.local_ip6 = info->key.u.ipv6.src; 780 } else 781 #endif 782 { 783 udp_conf.family = AF_INET; 784 udp_conf.local_ip.s_addr = info->key.u.ipv4.src; 785 } 786 787 udp_conf.local_udp_port = info->key.tp_dst; 788 789 /* Open UDP socket */ 790 err = udp_sock_create(net, &udp_conf, &sock); 791 if (err < 0) 792 return ERR_PTR(err); 793 794 udp_allow_gso(sock->sk); 795 return sock->sk; 796 } 797 798 static bool geneve_hdr_match(struct sk_buff *skb, 799 const struct genevehdr *gh, 800 const struct genevehdr *gh2, 801 unsigned int hint_off) 802 { 803 const struct geneve_opt_gro_hint *gro_hint; 804 void *nested, *nested2, *nh, *nh2; 805 struct udphdr *udp, *udp2; 806 unsigned int gh_len; 807 808 /* Match the geneve hdr and options */ 809 if (gh->opt_len != gh2->opt_len) 810 return false; 811 812 gh_len = geneve_hlen(gh); 813 if (memcmp(gh, gh2, gh_len)) 814 return false; 815 816 if (!hint_off) 817 return true; 818 819 /* 820 * When gro is present consider the nested headers as part 821 * of the geneve options 822 */ 823 nested = (void *)gh + gh_len; 824 nested2 = (void *)gh2 + gh_len; 825 gro_hint = geneve_opt_gro_hint(gh, hint_off); 826 if (!memcmp(nested, nested2, gro_hint->nested_hdr_len)) 827 return true; 828 829 /* 830 * The nested headers differ; the packets can still belong to 831 * the same flow when IPs/proto/ports match; if so flushing is 832 * required. 833 */ 834 nh = nested + gro_hint->nested_nh_offset; 835 nh2 = nested2 + gro_hint->nested_nh_offset; 836 if (gro_hint->nested_is_v6) { 837 struct ipv6hdr *iph = nh, *iph2 = nh2; 838 unsigned int nested_nlen; 839 __be32 first_word; 840 841 first_word = *(__be32 *)iph ^ *(__be32 *)iph2; 842 if ((first_word & htonl(0xF00FFFFF)) || 843 !ipv6_addr_equal(&iph->saddr, &iph2->saddr) || 844 !ipv6_addr_equal(&iph->daddr, &iph2->daddr) || 845 iph->nexthdr != iph2->nexthdr) 846 return false; 847 848 nested_nlen = gro_hint->nested_tp_offset - 849 gro_hint->nested_nh_offset; 850 if (nested_nlen > sizeof(struct ipv6hdr) && 851 (memcmp(iph + 1, iph2 + 1, 852 nested_nlen - sizeof(struct ipv6hdr)))) 853 return false; 854 } else { 855 struct iphdr *iph = nh, *iph2 = nh2; 856 857 if ((iph->protocol ^ iph2->protocol) | 858 ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | 859 ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) 860 return false; 861 } 862 863 udp = nested + gro_hint->nested_tp_offset; 864 udp2 = nested2 + gro_hint->nested_tp_offset; 865 if (udp->source != udp2->source || udp->dest != udp2->dest || 866 udp->check != udp2->check) 867 return false; 868 869 NAPI_GRO_CB(skb)->flush = 1; 870 return true; 871 } 872 873 static struct sk_buff *geneve_gro_receive(struct sock *sk, 874 struct list_head *head, 875 struct sk_buff *skb) 876 { 877 unsigned int hlen, gh_len, off_gnv, hint_off; 878 const struct geneve_opt_gro_hint *gro_hint; 879 const struct packet_offload *ptype; 880 struct genevehdr *gh, *gh2; 881 struct sk_buff *pp = NULL; 882 struct sk_buff *p; 883 int flush = 1; 884 __be16 type; 885 886 off_gnv = skb_gro_offset(skb); 887 hlen = off_gnv + sizeof(*gh); 888 gh = skb_gro_header(skb, hlen, off_gnv); 889 if (unlikely(!gh)) 890 goto out; 891 892 if (gh->ver != GENEVE_VER || gh->oam) 893 goto out; 894 gh_len = geneve_hlen(gh); 895 type = gh->proto_type; 896 897 hlen = off_gnv + gh_len; 898 if (!skb_gro_may_pull(skb, hlen)) { 899 gh = skb_gro_header_slow(skb, hlen, off_gnv); 900 if (unlikely(!gh)) 901 goto out; 902 } 903 904 /* The GRO hint/nested hdr could use a different ethernet type. */ 905 hint_off = geneve_sk_gro_hint_off(sk, gh, &type, &gh_len); 906 if (hint_off) { 907 908 /* 909 * If the hint is present, and nested hdr validation fails, do 910 * not attempt plain GRO: it will ignore inner hdrs and cause 911 * OoO. 912 */ 913 gh = skb_gro_header(skb, off_gnv + gh_len, off_gnv); 914 if (unlikely(!gh)) 915 goto out; 916 917 gro_hint = geneve_opt_gro_hint(gh, hint_off); 918 if (!geneve_opt_gro_hint_validate_csum(skb, gh, gro_hint)) 919 goto out; 920 } 921 922 list_for_each_entry(p, head, list) { 923 if (!NAPI_GRO_CB(p)->same_flow) 924 continue; 925 926 gh2 = (struct genevehdr *)(p->data + off_gnv); 927 if (!geneve_hdr_match(skb, gh, gh2, hint_off)) { 928 NAPI_GRO_CB(p)->same_flow = 0; 929 continue; 930 } 931 } 932 933 skb_gro_pull(skb, gh_len); 934 skb_gro_postpull_rcsum(skb, gh, gh_len); 935 if (likely(type == htons(ETH_P_TEB))) 936 return call_gro_receive(eth_gro_receive, head, skb); 937 938 ptype = gro_find_receive_by_type(type); 939 if (!ptype) 940 goto out; 941 942 pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); 943 flush = 0; 944 945 out: 946 skb_gro_flush_final(skb, pp, flush); 947 948 return pp; 949 } 950 951 static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb, 952 int nhoff) 953 { 954 struct genevehdr *gh; 955 struct packet_offload *ptype; 956 __be16 type; 957 int gh_len; 958 int err = -ENOSYS; 959 960 gh = (struct genevehdr *)(skb->data + nhoff); 961 gh_len = geneve_hlen(gh); 962 type = gh->proto_type; 963 geneve_opt_gro_hint_off(gh, &type, &gh_len); 964 965 /* since skb->encapsulation is set, eth_gro_complete() sets the inner mac header */ 966 if (likely(type == htons(ETH_P_TEB))) 967 return eth_gro_complete(skb, nhoff + gh_len); 968 969 ptype = gro_find_complete_by_type(type); 970 if (ptype) 971 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); 972 973 skb_set_inner_mac_header(skb, nhoff + gh_len); 974 975 return err; 976 } 977 978 /* Create new listen socket if needed */ 979 static struct geneve_sock *geneve_socket_create(struct net *net, 980 struct geneve_dev *geneve, bool ipv6) 981 { 982 struct geneve_net *gn = net_generic(net, geneve_net_id); 983 struct udp_tunnel_sock_cfg tunnel_cfg; 984 struct geneve_sock *gs; 985 struct sock *sk; 986 int h; 987 988 gs = kzalloc_obj(*gs); 989 if (!gs) 990 return ERR_PTR(-ENOMEM); 991 992 sk = geneve_create_sock(net, geneve, ipv6); 993 if (IS_ERR(sk)) { 994 kfree(gs); 995 return ERR_CAST(sk); 996 } 997 998 gs->sk = sk; 999 gs->refcnt = 1; 1000 for (h = 0; h < VNI_HASH_SIZE; ++h) 1001 INIT_HLIST_HEAD(&gs->vni_list[h]); 1002 1003 /* Initialize the geneve udp offloads structure */ 1004 udp_tunnel_notify_add_rx_port(sk, UDP_TUNNEL_TYPE_GENEVE); 1005 1006 /* Mark socket as an encapsulation socket */ 1007 memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); 1008 tunnel_cfg.sk_user_data = gs; 1009 tunnel_cfg.encap_type = 1; 1010 tunnel_cfg.gro_receive = geneve_gro_receive; 1011 tunnel_cfg.gro_complete = geneve_gro_complete; 1012 tunnel_cfg.encap_rcv = geneve_udp_encap_recv; 1013 tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup; 1014 tunnel_cfg.encap_destroy = NULL; 1015 setup_udp_tunnel_sock(net, sk, &tunnel_cfg); 1016 list_add(&gs->list, &gn->sock_list); 1017 return gs; 1018 } 1019 1020 static void __geneve_sock_release(struct geneve_sock *gs) 1021 { 1022 if (!gs || --gs->refcnt) 1023 return; 1024 1025 list_del(&gs->list); 1026 udp_tunnel_notify_del_rx_port(gs->sk, UDP_TUNNEL_TYPE_GENEVE); 1027 udp_tunnel_sock_release(gs->sk); 1028 kfree_rcu(gs, rcu); 1029 } 1030 1031 static void geneve_sock_release(struct geneve_dev *geneve) 1032 { 1033 struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4); 1034 #if IS_ENABLED(CONFIG_IPV6) 1035 struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6); 1036 1037 rcu_assign_pointer(geneve->sock6, NULL); 1038 #endif 1039 1040 rcu_assign_pointer(geneve->sock4, NULL); 1041 1042 __geneve_sock_release(gs4); 1043 #if IS_ENABLED(CONFIG_IPV6) 1044 __geneve_sock_release(gs6); 1045 #endif 1046 } 1047 1048 static struct geneve_sock *geneve_find_sock(struct net *net, 1049 struct geneve_dev *geneve, bool ipv6) 1050 { 1051 struct geneve_net *gn = net_generic(net, geneve_net_id); 1052 struct ip_tunnel_info *info = &geneve->cfg.info; 1053 sa_family_t family = ipv6 ? AF_INET6 : AF_INET; 1054 bool gro_hint = geneve->cfg.gro_hint; 1055 __be16 dst_port = info->key.tp_dst; 1056 struct geneve_sock *gs; 1057 1058 list_for_each_entry(gs, &gn->sock_list, list) { 1059 if (inet_sk(gs->sk)->inet_sport != dst_port) 1060 continue; 1061 1062 if (geneve_get_sk_family(gs) != family) 1063 continue; 1064 1065 if (gs->gro_hint != gro_hint) 1066 continue; 1067 1068 if (family == AF_INET && 1069 inet_sk(gs->sk)->inet_saddr != info->key.u.ipv4.src) 1070 continue; 1071 1072 #if IS_ENABLED(CONFIG_IPV6) 1073 if (family == AF_INET6 && 1074 !ipv6_addr_equal(&gs->sk->sk_v6_rcv_saddr, &info->key.u.ipv6.src)) 1075 continue; 1076 #endif 1077 1078 return gs; 1079 } 1080 1081 return NULL; 1082 } 1083 1084 static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6) 1085 { 1086 struct net *net = geneve->net; 1087 struct geneve_dev_node *node; 1088 struct geneve_sock *gs; 1089 __u8 vni[3]; 1090 __u32 hash; 1091 1092 gs = geneve_find_sock(net, geneve, ipv6); 1093 if (gs) { 1094 gs->refcnt++; 1095 goto out; 1096 } 1097 1098 gs = geneve_socket_create(net, geneve, ipv6); 1099 if (IS_ERR(gs)) 1100 return PTR_ERR(gs); 1101 1102 out: 1103 gs->collect_md = geneve->cfg.collect_md; 1104 gs->gro_hint = geneve->cfg.gro_hint; 1105 #if IS_ENABLED(CONFIG_IPV6) 1106 if (ipv6) { 1107 rcu_assign_pointer(geneve->sock6, gs); 1108 node = &geneve->hlist6; 1109 } else 1110 #endif 1111 { 1112 rcu_assign_pointer(geneve->sock4, gs); 1113 node = &geneve->hlist4; 1114 } 1115 node->geneve = geneve; 1116 1117 tunnel_id_to_vni(geneve->cfg.info.key.tun_id, vni); 1118 hash = geneve_net_vni_hash(vni); 1119 hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]); 1120 return 0; 1121 } 1122 1123 static int geneve_open(struct net_device *dev) 1124 { 1125 struct geneve_dev *geneve = netdev_priv(dev); 1126 bool dualstack = geneve->cfg.dualstack; 1127 bool ipv4, ipv6; 1128 int ret = 0; 1129 1130 ipv6 = geneve->cfg.info.mode & IP_TUNNEL_INFO_IPV6 || dualstack; 1131 ipv4 = !ipv6 || dualstack; 1132 #if IS_ENABLED(CONFIG_IPV6) 1133 if (ipv6) { 1134 ret = geneve_sock_add(geneve, true); 1135 if (ret < 0 && ret != -EAFNOSUPPORT) 1136 ipv4 = false; 1137 } 1138 #endif 1139 if (ipv4) 1140 ret = geneve_sock_add(geneve, false); 1141 if (ret < 0) 1142 geneve_sock_release(geneve); 1143 1144 return ret; 1145 } 1146 1147 static int geneve_stop(struct net_device *dev) 1148 { 1149 struct geneve_dev *geneve = netdev_priv(dev); 1150 1151 hlist_del_init_rcu(&geneve->hlist4.hlist); 1152 #if IS_ENABLED(CONFIG_IPV6) 1153 hlist_del_init_rcu(&geneve->hlist6.hlist); 1154 #endif 1155 geneve_sock_release(geneve); 1156 return 0; 1157 } 1158 1159 static void geneve_build_header(struct genevehdr *geneveh, 1160 const struct ip_tunnel_info *info, 1161 __be16 inner_proto) 1162 { 1163 geneveh->ver = GENEVE_VER; 1164 geneveh->opt_len = info->options_len / 4; 1165 geneveh->oam = test_bit(IP_TUNNEL_OAM_BIT, info->key.tun_flags); 1166 geneveh->critical = test_bit(IP_TUNNEL_CRIT_OPT_BIT, 1167 info->key.tun_flags); 1168 geneveh->rsvd1 = 0; 1169 tunnel_id_to_vni(info->key.tun_id, geneveh->vni); 1170 geneveh->proto_type = inner_proto; 1171 geneveh->rsvd2 = 0; 1172 1173 if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) 1174 ip_tunnel_info_opts_get(geneveh->options, info); 1175 } 1176 1177 static int geneve_build_gro_hint_opt(const struct geneve_dev *geneve, 1178 struct sk_buff *skb) 1179 { 1180 struct geneve_skb_cb *cb = GENEVE_SKB_CB(skb); 1181 struct geneve_opt_gro_hint *hint; 1182 unsigned int nhlen; 1183 bool nested_is_v6; 1184 int id; 1185 1186 BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct geneve_skb_cb)); 1187 cb->gro_hint_len = 0; 1188 1189 /* Try to add the GRO hint only in case of double encap. */ 1190 if (!geneve->cfg.gro_hint || !skb->encapsulation) 1191 return 0; 1192 1193 /* 1194 * The nested headers must fit the geneve opt len fields and the 1195 * nested encap must carry a nested transport (UDP) header. 1196 */ 1197 nhlen = skb_inner_mac_header(skb) - skb->data; 1198 if (nhlen > 255 || !skb_transport_header_was_set(skb) || 1199 skb->inner_protocol_type != ENCAP_TYPE_ETHER || 1200 (skb_transport_offset(skb) + sizeof(struct udphdr) > nhlen)) 1201 return 0; 1202 1203 id = proto_to_id(skb->inner_protocol); 1204 if (id < 0) 1205 return 0; 1206 1207 nested_is_v6 = skb->protocol == htons(ETH_P_IPV6); 1208 if (nested_is_v6) { 1209 int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); 1210 u8 proto = ipv6_hdr(skb)->nexthdr; 1211 __be16 foff; 1212 1213 if (ipv6_skip_exthdr(skb, start, &proto, &foff) < 0 || 1214 proto != IPPROTO_UDP) 1215 return 0; 1216 } else { 1217 if (ip_hdr(skb)->protocol != IPPROTO_UDP) 1218 return 0; 1219 } 1220 1221 hint = &cb->gro_hint; 1222 memset(hint, 0, sizeof(*hint)); 1223 hint->inner_proto_id = id; 1224 hint->nested_is_v6 = skb->protocol == htons(ETH_P_IPV6); 1225 hint->nested_nh_offset = skb_network_offset(skb); 1226 hint->nested_tp_offset = skb_transport_offset(skb); 1227 hint->nested_hdr_len = nhlen; 1228 cb->gro_hint_len = GENEVE_OPT_GRO_HINT_SIZE; 1229 return GENEVE_OPT_GRO_HINT_SIZE; 1230 } 1231 1232 static void geneve_put_gro_hint_opt(struct genevehdr *gnvh, int opt_size, 1233 const struct geneve_opt_gro_hint *hint) 1234 { 1235 struct geneve_opt *gro_opt; 1236 1237 /* geneve_build_header() did not took in account the GRO hint. */ 1238 gnvh->opt_len = (opt_size + GENEVE_OPT_GRO_HINT_SIZE) >> 2; 1239 1240 gro_opt = (void *)(gnvh + 1) + opt_size; 1241 memset(gro_opt, 0, sizeof(*gro_opt)); 1242 1243 gro_opt->opt_class = htons(GENEVE_OPT_NETDEV_CLASS); 1244 gro_opt->type = GENEVE_OPT_GRO_HINT_TYPE; 1245 gro_opt->length = GENEVE_OPT_GRO_HINT_LEN; 1246 memcpy(gro_opt + 1, hint, sizeof(*hint)); 1247 } 1248 1249 static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb, 1250 const struct ip_tunnel_info *info, 1251 const struct geneve_dev *geneve, int ip_hdr_len) 1252 { 1253 bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 1254 bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; 1255 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); 1256 struct geneve_skb_cb *cb = GENEVE_SKB_CB(skb); 1257 struct genevehdr *gnvh; 1258 __be16 inner_proto; 1259 bool double_encap; 1260 int min_headroom; 1261 int opt_size; 1262 int err; 1263 1264 skb_reset_mac_header(skb); 1265 skb_scrub_packet(skb, xnet); 1266 1267 opt_size = info->options_len + cb->gro_hint_len; 1268 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + 1269 GENEVE_BASE_HLEN + opt_size + ip_hdr_len; 1270 err = skb_cow_head(skb, min_headroom); 1271 if (unlikely(err)) 1272 goto free_dst; 1273 1274 double_encap = udp_tunnel_handle_partial(skb); 1275 err = udp_tunnel_handle_offloads(skb, udp_sum); 1276 if (err) 1277 goto free_dst; 1278 1279 gnvh = __skb_push(skb, sizeof(*gnvh) + opt_size); 1280 inner_proto = inner_proto_inherit ? skb->protocol : htons(ETH_P_TEB); 1281 geneve_build_header(gnvh, info, inner_proto); 1282 1283 if (cb->gro_hint_len) 1284 geneve_put_gro_hint_opt(gnvh, info->options_len, &cb->gro_hint); 1285 1286 udp_tunnel_set_inner_protocol(skb, double_encap, inner_proto); 1287 return 0; 1288 1289 free_dst: 1290 dst_release(dst); 1291 return err; 1292 } 1293 1294 static u8 geneve_get_dsfield(struct sk_buff *skb, struct net_device *dev, 1295 const struct ip_tunnel_info *info, 1296 bool *use_cache) 1297 { 1298 struct geneve_dev *geneve = netdev_priv(dev); 1299 u8 dsfield; 1300 1301 dsfield = info->key.tos; 1302 if (dsfield == 1 && !geneve->cfg.collect_md) { 1303 dsfield = ip_tunnel_get_dsfield(ip_hdr(skb), skb); 1304 *use_cache = false; 1305 } 1306 1307 return dsfield; 1308 } 1309 1310 static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, 1311 struct geneve_dev *geneve, 1312 const struct ip_tunnel_info *info) 1313 { 1314 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); 1315 const struct ip_tunnel_key *key = &info->key; 1316 struct rtable *rt; 1317 bool use_cache; 1318 __u8 tos, ttl; 1319 __be16 df = 0; 1320 __be32 saddr; 1321 __be16 sport; 1322 int err; 1323 1324 if (skb_vlan_inet_prepare(skb, geneve->cfg.inner_proto_inherit)) 1325 return -EINVAL; 1326 1327 if (!gs4) 1328 return -EIO; 1329 1330 use_cache = ip_tunnel_dst_cache_usable(skb, info); 1331 tos = geneve_get_dsfield(skb, dev, info, &use_cache); 1332 sport = udp_flow_src_port(geneve->net, skb, 1333 geneve->cfg.port_min, 1334 geneve->cfg.port_max, true); 1335 1336 rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr, 1337 &info->key, 1338 sport, geneve->cfg.info.key.tp_dst, tos, 1339 use_cache ? 1340 (struct dst_cache *)&info->dst_cache : NULL); 1341 if (IS_ERR(rt)) 1342 return PTR_ERR(rt); 1343 1344 if (geneve->cfg.info.key.u.ipv4.src && 1345 saddr != geneve->cfg.info.key.u.ipv4.src) { 1346 dst_release(&rt->dst); 1347 return -EADDRNOTAVAIL; 1348 } 1349 1350 err = skb_tunnel_check_pmtu(skb, &rt->dst, 1351 GENEVE_IPV4_HLEN + info->options_len + 1352 geneve_build_gro_hint_opt(geneve, skb), 1353 netif_is_any_bridge_port(dev)); 1354 if (err < 0) { 1355 dst_release(&rt->dst); 1356 return err; 1357 } else if (err) { 1358 struct ip_tunnel_info *info; 1359 1360 info = skb_tunnel_info(skb); 1361 if (info) { 1362 struct ip_tunnel_info *unclone; 1363 1364 unclone = skb_tunnel_info_unclone(skb); 1365 if (unlikely(!unclone)) { 1366 dst_release(&rt->dst); 1367 return -ENOMEM; 1368 } 1369 1370 unclone->key.u.ipv4.dst = saddr; 1371 unclone->key.u.ipv4.src = info->key.u.ipv4.dst; 1372 } 1373 1374 if (!pskb_may_pull(skb, ETH_HLEN)) { 1375 dst_release(&rt->dst); 1376 return -EINVAL; 1377 } 1378 1379 skb->protocol = eth_type_trans(skb, geneve->dev); 1380 __netif_rx(skb); 1381 dst_release(&rt->dst); 1382 return -EMSGSIZE; 1383 } 1384 1385 tos = ip_tunnel_ecn_encap(tos, ip_hdr(skb), skb); 1386 if (geneve->cfg.collect_md) { 1387 ttl = key->ttl; 1388 1389 df = test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) ? 1390 htons(IP_DF) : 0; 1391 } else { 1392 if (geneve->cfg.ttl_inherit) 1393 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); 1394 else 1395 ttl = key->ttl; 1396 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); 1397 1398 if (geneve->cfg.df == GENEVE_DF_SET) { 1399 df = htons(IP_DF); 1400 } else if (geneve->cfg.df == GENEVE_DF_INHERIT) { 1401 struct ethhdr *eth = skb_eth_hdr(skb); 1402 1403 if (ntohs(eth->h_proto) == ETH_P_IPV6) { 1404 df = htons(IP_DF); 1405 } else if (ntohs(eth->h_proto) == ETH_P_IP) { 1406 struct iphdr *iph = ip_hdr(skb); 1407 1408 if (iph->frag_off & htons(IP_DF)) 1409 df = htons(IP_DF); 1410 } 1411 } 1412 } 1413 1414 err = geneve_build_skb(&rt->dst, skb, info, geneve, 1415 sizeof(struct iphdr)); 1416 if (unlikely(err)) 1417 return err; 1418 1419 udp_tunnel_xmit_skb(rt, gs4->sk, skb, saddr, info->key.u.ipv4.dst, 1420 tos, ttl, df, sport, geneve->cfg.info.key.tp_dst, 1421 !net_eq(geneve->net, dev_net(geneve->dev)), 1422 !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags), 1423 0); 1424 return 0; 1425 } 1426 1427 #if IS_ENABLED(CONFIG_IPV6) 1428 static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, 1429 struct geneve_dev *geneve, 1430 const struct ip_tunnel_info *info) 1431 { 1432 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); 1433 const struct ip_tunnel_key *key = &info->key; 1434 struct dst_entry *dst = NULL; 1435 struct in6_addr saddr; 1436 bool use_cache; 1437 __u8 prio, ttl; 1438 __be16 sport; 1439 int err; 1440 1441 if (skb_vlan_inet_prepare(skb, geneve->cfg.inner_proto_inherit)) 1442 return -EINVAL; 1443 1444 if (!gs6) 1445 return -EIO; 1446 1447 use_cache = ip_tunnel_dst_cache_usable(skb, info); 1448 prio = geneve_get_dsfield(skb, dev, info, &use_cache); 1449 sport = udp_flow_src_port(geneve->net, skb, 1450 geneve->cfg.port_min, 1451 geneve->cfg.port_max, true); 1452 1453 dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sk, 0, 1454 &saddr, key, sport, 1455 geneve->cfg.info.key.tp_dst, prio, 1456 use_cache ? 1457 (struct dst_cache *)&info->dst_cache : NULL); 1458 if (IS_ERR(dst)) 1459 return PTR_ERR(dst); 1460 1461 if (!ipv6_addr_any(&geneve->cfg.info.key.u.ipv6.src) && 1462 !ipv6_addr_equal(&saddr, &geneve->cfg.info.key.u.ipv6.src)) { 1463 dst_release(dst); 1464 return -EADDRNOTAVAIL; 1465 } 1466 1467 err = skb_tunnel_check_pmtu(skb, dst, 1468 GENEVE_IPV6_HLEN + info->options_len + 1469 geneve_build_gro_hint_opt(geneve, skb), 1470 netif_is_any_bridge_port(dev)); 1471 if (err < 0) { 1472 dst_release(dst); 1473 return err; 1474 } else if (err) { 1475 struct ip_tunnel_info *info = skb_tunnel_info(skb); 1476 1477 if (info) { 1478 struct ip_tunnel_info *unclone; 1479 1480 unclone = skb_tunnel_info_unclone(skb); 1481 if (unlikely(!unclone)) { 1482 dst_release(dst); 1483 return -ENOMEM; 1484 } 1485 1486 unclone->key.u.ipv6.dst = saddr; 1487 unclone->key.u.ipv6.src = info->key.u.ipv6.dst; 1488 } 1489 1490 if (!pskb_may_pull(skb, ETH_HLEN)) { 1491 dst_release(dst); 1492 return -EINVAL; 1493 } 1494 1495 skb->protocol = eth_type_trans(skb, geneve->dev); 1496 __netif_rx(skb); 1497 dst_release(dst); 1498 return -EMSGSIZE; 1499 } 1500 1501 prio = ip_tunnel_ecn_encap(prio, ip_hdr(skb), skb); 1502 if (geneve->cfg.collect_md) { 1503 ttl = key->ttl; 1504 } else { 1505 if (geneve->cfg.ttl_inherit) 1506 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); 1507 else 1508 ttl = key->ttl; 1509 ttl = ttl ? : ip6_dst_hoplimit(dst); 1510 } 1511 err = geneve_build_skb(dst, skb, info, geneve, sizeof(struct ipv6hdr)); 1512 if (unlikely(err)) 1513 return err; 1514 1515 udp_tunnel6_xmit_skb(dst, gs6->sk, skb, dev, 1516 &saddr, &key->u.ipv6.dst, prio, ttl, 1517 info->key.label, sport, geneve->cfg.info.key.tp_dst, 1518 !test_bit(IP_TUNNEL_CSUM_BIT, 1519 info->key.tun_flags), 1520 0); 1521 return 0; 1522 } 1523 #endif 1524 1525 static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) 1526 { 1527 struct geneve_dev *geneve = netdev_priv(dev); 1528 struct ip_tunnel_info *info = NULL; 1529 int err; 1530 1531 if (geneve->cfg.collect_md) { 1532 info = skb_tunnel_info(skb); 1533 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { 1534 netdev_dbg(dev, "no tunnel metadata\n"); 1535 dev_kfree_skb(skb); 1536 dev_dstats_tx_dropped(dev); 1537 return NETDEV_TX_OK; 1538 } 1539 } else { 1540 info = &geneve->cfg.info; 1541 } 1542 1543 rcu_read_lock(); 1544 #if IS_ENABLED(CONFIG_IPV6) 1545 if (info->mode & IP_TUNNEL_INFO_IPV6) 1546 err = geneve6_xmit_skb(skb, dev, geneve, info); 1547 else 1548 #endif 1549 err = geneve_xmit_skb(skb, dev, geneve, info); 1550 rcu_read_unlock(); 1551 1552 if (likely(!err)) 1553 return NETDEV_TX_OK; 1554 1555 if (err != -EMSGSIZE) 1556 dev_kfree_skb(skb); 1557 1558 if (err == -ELOOP) 1559 DEV_STATS_INC(dev, collisions); 1560 else if (err == -ENETUNREACH) 1561 DEV_STATS_INC(dev, tx_carrier_errors); 1562 1563 DEV_STATS_INC(dev, tx_errors); 1564 return NETDEV_TX_OK; 1565 } 1566 1567 static int geneve_change_mtu(struct net_device *dev, int new_mtu) 1568 { 1569 if (new_mtu > dev->max_mtu) 1570 new_mtu = dev->max_mtu; 1571 else if (new_mtu < dev->min_mtu) 1572 new_mtu = dev->min_mtu; 1573 1574 WRITE_ONCE(dev->mtu, new_mtu); 1575 return 0; 1576 } 1577 1578 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) 1579 { 1580 struct ip_tunnel_info *info = skb_tunnel_info(skb); 1581 struct geneve_dev *geneve = netdev_priv(dev); 1582 __be16 sport; 1583 1584 if (ip_tunnel_info_af(info) == AF_INET) { 1585 struct rtable *rt; 1586 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); 1587 bool use_cache; 1588 __be32 saddr; 1589 u8 tos; 1590 1591 if (!gs4) 1592 return -EIO; 1593 1594 use_cache = ip_tunnel_dst_cache_usable(skb, info); 1595 tos = geneve_get_dsfield(skb, dev, info, &use_cache); 1596 sport = udp_flow_src_port(geneve->net, skb, 1597 geneve->cfg.port_min, 1598 geneve->cfg.port_max, true); 1599 1600 rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr, 1601 &info->key, 1602 sport, geneve->cfg.info.key.tp_dst, 1603 tos, 1604 use_cache ? &info->dst_cache : NULL); 1605 if (IS_ERR(rt)) 1606 return PTR_ERR(rt); 1607 1608 ip_rt_put(rt); 1609 info->key.u.ipv4.src = saddr; 1610 #if IS_ENABLED(CONFIG_IPV6) 1611 } else if (ip_tunnel_info_af(info) == AF_INET6) { 1612 struct dst_entry *dst; 1613 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); 1614 struct in6_addr saddr; 1615 bool use_cache; 1616 u8 prio; 1617 1618 if (!gs6) 1619 return -EIO; 1620 1621 use_cache = ip_tunnel_dst_cache_usable(skb, info); 1622 prio = geneve_get_dsfield(skb, dev, info, &use_cache); 1623 sport = udp_flow_src_port(geneve->net, skb, 1624 geneve->cfg.port_min, 1625 geneve->cfg.port_max, true); 1626 1627 dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sk, 0, 1628 &saddr, &info->key, sport, 1629 geneve->cfg.info.key.tp_dst, prio, 1630 use_cache ? &info->dst_cache : NULL); 1631 if (IS_ERR(dst)) 1632 return PTR_ERR(dst); 1633 1634 dst_release(dst); 1635 info->key.u.ipv6.src = saddr; 1636 #endif 1637 } else { 1638 return -EINVAL; 1639 } 1640 1641 info->key.tp_src = sport; 1642 info->key.tp_dst = geneve->cfg.info.key.tp_dst; 1643 return 0; 1644 } 1645 1646 static const struct net_device_ops geneve_netdev_ops = { 1647 .ndo_init = geneve_init, 1648 .ndo_uninit = geneve_uninit, 1649 .ndo_open = geneve_open, 1650 .ndo_stop = geneve_stop, 1651 .ndo_start_xmit = geneve_xmit, 1652 .ndo_change_mtu = geneve_change_mtu, 1653 .ndo_validate_addr = eth_validate_addr, 1654 .ndo_set_mac_address = eth_mac_addr, 1655 .ndo_fill_metadata_dst = geneve_fill_metadata_dst, 1656 }; 1657 1658 static void geneve_get_drvinfo(struct net_device *dev, 1659 struct ethtool_drvinfo *drvinfo) 1660 { 1661 strscpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version)); 1662 strscpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver)); 1663 } 1664 1665 static const struct ethtool_ops geneve_ethtool_ops = { 1666 .get_drvinfo = geneve_get_drvinfo, 1667 .get_link = ethtool_op_get_link, 1668 }; 1669 1670 /* Info for udev, that this is a virtual tunnel endpoint */ 1671 static const struct device_type geneve_type = { 1672 .name = "geneve", 1673 }; 1674 1675 /* Calls the ndo_udp_tunnel_add of the caller in order to 1676 * supply the listening GENEVE udp ports. Callers are expected 1677 * to implement the ndo_udp_tunnel_add. 1678 */ 1679 static void geneve_offload_rx_ports(struct net_device *dev, bool push) 1680 { 1681 struct net *net = dev_net(dev); 1682 struct geneve_net *gn = net_generic(net, geneve_net_id); 1683 struct geneve_sock *gs; 1684 1685 ASSERT_RTNL(); 1686 1687 list_for_each_entry(gs, &gn->sock_list, list) { 1688 if (push) { 1689 udp_tunnel_push_rx_port(dev, gs->sk, 1690 UDP_TUNNEL_TYPE_GENEVE); 1691 } else { 1692 udp_tunnel_drop_rx_port(dev, gs->sk, 1693 UDP_TUNNEL_TYPE_GENEVE); 1694 } 1695 } 1696 } 1697 1698 /* Initialize the device structure. */ 1699 static void geneve_setup(struct net_device *dev) 1700 { 1701 ether_setup(dev); 1702 1703 dev->netdev_ops = &geneve_netdev_ops; 1704 dev->ethtool_ops = &geneve_ethtool_ops; 1705 dev->needs_free_netdev = true; 1706 1707 SET_NETDEV_DEVTYPE(dev, &geneve_type); 1708 1709 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; 1710 dev->features |= NETIF_F_RXCSUM; 1711 dev->features |= NETIF_F_GSO_SOFTWARE; 1712 1713 /* Partial features are disabled by default. */ 1714 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; 1715 dev->hw_features |= NETIF_F_RXCSUM; 1716 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 1717 dev->hw_features |= UDP_TUNNEL_PARTIAL_FEATURES; 1718 dev->hw_features |= NETIF_F_GSO_PARTIAL; 1719 1720 dev->hw_enc_features = dev->hw_features; 1721 dev->gso_partial_features = UDP_TUNNEL_PARTIAL_FEATURES; 1722 dev->mangleid_features = NETIF_F_GSO_PARTIAL; 1723 1724 dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; 1725 /* MTU range: 68 - (something less than 65535) */ 1726 dev->min_mtu = ETH_MIN_MTU; 1727 /* The max_mtu calculation does not take account of GENEVE 1728 * options, to avoid excluding potentially valid 1729 * configurations. This will be further reduced by IPvX hdr size. 1730 */ 1731 dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len; 1732 1733 netif_keep_dst(dev); 1734 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1735 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; 1736 dev->lltx = true; 1737 eth_hw_addr_random(dev); 1738 } 1739 1740 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { 1741 [IFLA_GENEVE_UNSPEC] = { .strict_start_type = IFLA_GENEVE_INNER_PROTO_INHERIT }, 1742 [IFLA_GENEVE_ID] = { .type = NLA_U32 }, 1743 [IFLA_GENEVE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) }, 1744 [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) }, 1745 [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, 1746 [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, 1747 [IFLA_GENEVE_LABEL] = { .type = NLA_U32 }, 1748 [IFLA_GENEVE_PORT] = { .type = NLA_U16 }, 1749 [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG }, 1750 [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 }, 1751 [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 }, 1752 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, 1753 [IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 }, 1754 [IFLA_GENEVE_DF] = { .type = NLA_U8 }, 1755 [IFLA_GENEVE_INNER_PROTO_INHERIT] = { .type = NLA_FLAG }, 1756 [IFLA_GENEVE_PORT_RANGE] = NLA_POLICY_EXACT_LEN(sizeof(struct ifla_geneve_port_range)), 1757 [IFLA_GENEVE_GRO_HINT] = { .type = NLA_FLAG }, 1758 [IFLA_GENEVE_LOCAL] = { .type = NLA_BE32 }, 1759 [IFLA_GENEVE_LOCAL6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), 1760 }; 1761 1762 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[], 1763 struct netlink_ext_ack *extack) 1764 { 1765 if (tb[IFLA_ADDRESS]) { 1766 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { 1767 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], 1768 "Provided link layer address is not Ethernet"); 1769 return -EINVAL; 1770 } 1771 1772 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { 1773 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], 1774 "Provided Ethernet address is not unicast"); 1775 return -EADDRNOTAVAIL; 1776 } 1777 } 1778 1779 if (!data) { 1780 NL_SET_ERR_MSG(extack, 1781 "Not enough attributes provided to perform the operation"); 1782 return -EINVAL; 1783 } 1784 1785 if (data[IFLA_GENEVE_ID]) { 1786 __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 1787 1788 if (vni >= GENEVE_N_VID) { 1789 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_ID], 1790 "Geneve ID must be lower than 16777216"); 1791 return -ERANGE; 1792 } 1793 } 1794 1795 if (data[IFLA_GENEVE_DF]) { 1796 enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]); 1797 1798 if (df < 0 || df > GENEVE_DF_MAX) { 1799 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_DF], 1800 "Invalid DF attribute"); 1801 return -EINVAL; 1802 } 1803 } 1804 1805 if (data[IFLA_GENEVE_PORT_RANGE]) { 1806 const struct ifla_geneve_port_range *p; 1807 1808 p = nla_data(data[IFLA_GENEVE_PORT_RANGE]); 1809 if (ntohs(p->high) < ntohs(p->low)) { 1810 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_PORT_RANGE], 1811 "Invalid source port range"); 1812 return -EINVAL; 1813 } 1814 } 1815 1816 return 0; 1817 } 1818 1819 static bool geneve_saddr_wildcard(const struct ip_tunnel_info *info) 1820 { 1821 if (ip_tunnel_info_af(info) == AF_INET) { 1822 if (!info->key.u.ipv4.src) 1823 return true; 1824 #if IS_ENABLED(CONFIG_IPV6) 1825 } else { 1826 if (ipv6_addr_any(&info->key.u.ipv6.src)) 1827 return true; 1828 #endif 1829 } 1830 1831 return false; 1832 } 1833 1834 static bool geneve_saddr_conflict(const struct ip_tunnel_info *a, 1835 const struct ip_tunnel_info *b) 1836 { 1837 if (ip_tunnel_info_af(a) != ip_tunnel_info_af(b)) 1838 return false; 1839 1840 if (geneve_saddr_wildcard(a) || geneve_saddr_wildcard(b)) 1841 return true; 1842 1843 if (ip_tunnel_info_af(a) == AF_INET) { 1844 if (a->key.u.ipv4.src == b->key.u.ipv4.src) 1845 return true; 1846 #if IS_ENABLED(CONFIG_IPV6) 1847 } else { 1848 if (ipv6_addr_equal(&a->key.u.ipv6.src, &b->key.u.ipv6.src)) 1849 return true; 1850 #endif 1851 } 1852 1853 return false; 1854 } 1855 1856 static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, 1857 const struct geneve_config *cfg, 1858 const struct ip_tunnel_info *info, 1859 bool *tun_on_same_port, 1860 bool *tun_collect_md) 1861 { 1862 struct geneve_dev *geneve, *t = NULL; 1863 1864 *tun_on_same_port = false; 1865 *tun_collect_md = false; 1866 list_for_each_entry(geneve, &gn->geneve_list, next) { 1867 if (info->key.tp_dst == geneve->cfg.info.key.tp_dst && 1868 (cfg->dualstack || geneve->cfg.dualstack || 1869 geneve_saddr_conflict(info, &geneve->cfg.info))) { 1870 *tun_collect_md |= geneve->cfg.collect_md; 1871 *tun_on_same_port = true; 1872 } 1873 if (info->key.tun_id == geneve->cfg.info.key.tun_id && 1874 info->key.tp_dst == geneve->cfg.info.key.tp_dst && 1875 !memcmp(&info->key.u, &geneve->cfg.info.key.u, sizeof(info->key.u))) 1876 t = geneve; 1877 } 1878 return t; 1879 } 1880 1881 static bool is_tnl_info_zero(const struct ip_tunnel_info *info) 1882 { 1883 return !(info->key.tun_id || info->key.tos || 1884 !ip_tunnel_flags_empty(info->key.tun_flags) || 1885 info->key.ttl || info->key.label || info->key.tp_src || 1886 #if IS_ENABLED(CONFIG_IPV6) 1887 (ip_tunnel_info_af(info) == AF_INET6 && 1888 !ipv6_addr_any(&info->key.u.ipv6.dst)) || 1889 #endif 1890 (ip_tunnel_info_af(info) == AF_INET && 1891 info->key.u.ipv4.dst)); 1892 } 1893 1894 static bool geneve_dst_addr_equal(struct ip_tunnel_info *a, 1895 struct ip_tunnel_info *b) 1896 { 1897 if (ip_tunnel_info_af(a) == AF_INET) 1898 return a->key.u.ipv4.dst == b->key.u.ipv4.dst; 1899 else 1900 return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst); 1901 } 1902 1903 static int geneve_configure(struct net *net, struct net_device *dev, 1904 struct netlink_ext_ack *extack, 1905 const struct geneve_config *cfg) 1906 { 1907 struct geneve_net *gn = net_generic(net, geneve_net_id); 1908 struct geneve_dev *t, *geneve = netdev_priv(dev); 1909 const struct ip_tunnel_info *info = &cfg->info; 1910 bool tun_collect_md, tun_on_same_port; 1911 int err, encap_len; 1912 1913 if (cfg->collect_md && !is_tnl_info_zero(info)) { 1914 NL_SET_ERR_MSG(extack, 1915 "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified"); 1916 return -EINVAL; 1917 } 1918 1919 geneve->net = net; 1920 geneve->dev = dev; 1921 1922 t = geneve_find_dev(gn, cfg, info, &tun_on_same_port, &tun_collect_md); 1923 if (t) 1924 return -EBUSY; 1925 1926 /* make enough headroom for basic scenario */ 1927 encap_len = GENEVE_BASE_HLEN + ETH_HLEN; 1928 if (!cfg->collect_md && ip_tunnel_info_af(info) == AF_INET) { 1929 encap_len += sizeof(struct iphdr); 1930 dev->max_mtu -= sizeof(struct iphdr); 1931 } else { 1932 encap_len += sizeof(struct ipv6hdr); 1933 dev->max_mtu -= sizeof(struct ipv6hdr); 1934 } 1935 dev->needed_headroom = encap_len + ETH_HLEN; 1936 1937 if (cfg->collect_md) { 1938 if (tun_on_same_port) { 1939 NL_SET_ERR_MSG(extack, 1940 "There can be only one externally controlled device on a destination port and a source address"); 1941 return -EPERM; 1942 } 1943 } else { 1944 if (tun_collect_md) { 1945 NL_SET_ERR_MSG(extack, 1946 "There already exists an externally controlled device on this destination port and the source address"); 1947 return -EPERM; 1948 } 1949 } 1950 1951 dst_cache_reset(&geneve->cfg.info.dst_cache); 1952 memcpy(&geneve->cfg, cfg, sizeof(*cfg)); 1953 1954 if (geneve->cfg.inner_proto_inherit) { 1955 dev->header_ops = NULL; 1956 dev->type = ARPHRD_NONE; 1957 dev->hard_header_len = 0; 1958 dev->addr_len = 0; 1959 dev->flags = IFF_POINTOPOINT | IFF_NOARP; 1960 } 1961 1962 err = register_netdevice(dev); 1963 if (err) 1964 return err; 1965 1966 list_add(&geneve->next, &gn->geneve_list); 1967 return 0; 1968 } 1969 1970 static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port) 1971 { 1972 memset(info, 0, sizeof(*info)); 1973 info->key.tp_dst = htons(dst_port); 1974 } 1975 1976 static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], 1977 struct netlink_ext_ack *extack, 1978 struct geneve_config *cfg, bool changelink) 1979 { 1980 struct ip_tunnel_info *info = &cfg->info; 1981 int attrtype; 1982 1983 if (data[IFLA_GENEVE_COLLECT_METADATA]) { 1984 if (changelink) { 1985 attrtype = IFLA_GENEVE_COLLECT_METADATA; 1986 goto change_notsup; 1987 } 1988 1989 cfg->collect_md = true; 1990 cfg->dualstack = true; 1991 } 1992 1993 if ((data[IFLA_GENEVE_LOCAL] || data[IFLA_GENEVE_REMOTE]) && 1994 (data[IFLA_GENEVE_LOCAL6] || data[IFLA_GENEVE_REMOTE6])) { 1995 NL_SET_ERR_MSG(extack, 1996 "Cannot specify both IPv4/IPv6 Remote/Local addresses"); 1997 return -EINVAL; 1998 } 1999 2000 if (data[IFLA_GENEVE_REMOTE]) { 2001 if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) { 2002 attrtype = IFLA_GENEVE_REMOTE; 2003 goto change_notsup; 2004 } 2005 2006 info->key.u.ipv4.dst = 2007 nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); 2008 2009 if (ipv4_is_multicast(info->key.u.ipv4.dst)) { 2010 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE], 2011 "Remote IPv4 address cannot be Multicast"); 2012 return -EINVAL; 2013 } 2014 } 2015 2016 if (data[IFLA_GENEVE_REMOTE6]) { 2017 #if IS_ENABLED(CONFIG_IPV6) 2018 int addr_type; 2019 2020 if (changelink && (ip_tunnel_info_af(info) == AF_INET)) { 2021 attrtype = IFLA_GENEVE_REMOTE6; 2022 goto change_notsup; 2023 } 2024 2025 info->mode = IP_TUNNEL_INFO_IPV6; 2026 info->key.u.ipv6.dst = 2027 nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]); 2028 2029 addr_type = ipv6_addr_type(&info->key.u.ipv6.dst); 2030 if (addr_type & IPV6_ADDR_LINKLOCAL) { 2031 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 2032 "Remote IPv6 address cannot be link-local"); 2033 return -EINVAL; 2034 } 2035 if (addr_type & IPV6_ADDR_MULTICAST) { 2036 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 2037 "Remote IPv6 address cannot be Multicast"); 2038 return -EINVAL; 2039 } 2040 __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 2041 cfg->use_udp6_rx_checksums = true; 2042 #else 2043 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 2044 "IPv6 support not enabled in the kernel"); 2045 return -EPFNOSUPPORT; 2046 #endif 2047 } 2048 2049 if (data[IFLA_GENEVE_LOCAL]) { 2050 if (changelink) { 2051 __be32 src = nla_get_in_addr(data[IFLA_GENEVE_LOCAL]); 2052 2053 if (ip_tunnel_info_af(info) == AF_INET6 || 2054 src != info->key.u.ipv4.src) { 2055 attrtype = IFLA_GENEVE_LOCAL; 2056 goto change_notsup; 2057 } 2058 } else { 2059 info->key.u.ipv4.src = nla_get_in_addr(data[IFLA_GENEVE_LOCAL]); 2060 2061 if (ipv4_is_multicast(info->key.u.ipv4.src)) { 2062 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LOCAL], 2063 "Local IPv4 address cannot be Multicast"); 2064 return -EINVAL; 2065 } 2066 2067 cfg->dualstack = false; 2068 } 2069 } 2070 2071 if (data[IFLA_GENEVE_LOCAL6]) { 2072 #if IS_ENABLED(CONFIG_IPV6) 2073 if (changelink) { 2074 struct in6_addr src = nla_get_in6_addr(data[IFLA_GENEVE_LOCAL6]); 2075 2076 if (ip_tunnel_info_af(info) == AF_INET || 2077 !ipv6_addr_equal(&src, &info->key.u.ipv6.src)) { 2078 attrtype = IFLA_GENEVE_LOCAL6; 2079 goto change_notsup; 2080 } 2081 } else { 2082 int addr_type; 2083 2084 info->mode = IP_TUNNEL_INFO_IPV6; 2085 info->key.u.ipv6.src = nla_get_in6_addr(data[IFLA_GENEVE_LOCAL6]); 2086 2087 addr_type = ipv6_addr_type(&info->key.u.ipv6.src); 2088 if (addr_type & IPV6_ADDR_LINKLOCAL) { 2089 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LOCAL6], 2090 "Local IPv6 address cannot be link-local"); 2091 return -EINVAL; 2092 } 2093 if (addr_type & IPV6_ADDR_MULTICAST) { 2094 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LOCAL6], 2095 "Local IPv6 address cannot be Multicast"); 2096 return -EINVAL; 2097 } 2098 2099 cfg->dualstack = false; 2100 } 2101 #else 2102 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LOCAL6], 2103 "IPv6 support not enabled in the kernel"); 2104 return -EPFNOSUPPORT; 2105 #endif 2106 } 2107 2108 if (data[IFLA_GENEVE_ID]) { 2109 __u32 vni; 2110 __u8 tvni[3]; 2111 __be64 tunid; 2112 2113 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 2114 tvni[0] = (vni & 0x00ff0000) >> 16; 2115 tvni[1] = (vni & 0x0000ff00) >> 8; 2116 tvni[2] = vni & 0x000000ff; 2117 2118 tunid = vni_to_tunnel_id(tvni); 2119 if (changelink && (tunid != info->key.tun_id)) { 2120 attrtype = IFLA_GENEVE_ID; 2121 goto change_notsup; 2122 } 2123 info->key.tun_id = tunid; 2124 } 2125 2126 if (data[IFLA_GENEVE_TTL_INHERIT]) { 2127 if (nla_get_u8(data[IFLA_GENEVE_TTL_INHERIT])) 2128 cfg->ttl_inherit = true; 2129 else 2130 cfg->ttl_inherit = false; 2131 } else if (data[IFLA_GENEVE_TTL]) { 2132 info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); 2133 cfg->ttl_inherit = false; 2134 } 2135 2136 if (data[IFLA_GENEVE_TOS]) 2137 info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]); 2138 2139 if (data[IFLA_GENEVE_DF]) 2140 cfg->df = nla_get_u8(data[IFLA_GENEVE_DF]); 2141 2142 if (data[IFLA_GENEVE_LABEL]) { 2143 info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) & 2144 IPV6_FLOWLABEL_MASK; 2145 if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) { 2146 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LABEL], 2147 "Label attribute only applies for IPv6 Geneve devices"); 2148 return -EINVAL; 2149 } 2150 } 2151 2152 if (data[IFLA_GENEVE_PORT]) { 2153 if (changelink) { 2154 attrtype = IFLA_GENEVE_PORT; 2155 goto change_notsup; 2156 } 2157 info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]); 2158 } 2159 2160 if (data[IFLA_GENEVE_PORT_RANGE]) { 2161 const struct ifla_geneve_port_range *p; 2162 2163 if (changelink) { 2164 attrtype = IFLA_GENEVE_PORT_RANGE; 2165 goto change_notsup; 2166 } 2167 p = nla_data(data[IFLA_GENEVE_PORT_RANGE]); 2168 cfg->port_min = ntohs(p->low); 2169 cfg->port_max = ntohs(p->high); 2170 } 2171 2172 if (data[IFLA_GENEVE_UDP_CSUM]) { 2173 if (changelink) { 2174 attrtype = IFLA_GENEVE_UDP_CSUM; 2175 goto change_notsup; 2176 } 2177 if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) 2178 __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 2179 } 2180 2181 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) { 2182 #if IS_ENABLED(CONFIG_IPV6) 2183 if (changelink) { 2184 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_TX; 2185 goto change_notsup; 2186 } 2187 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) 2188 __clear_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 2189 #else 2190 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX], 2191 "IPv6 support not enabled in the kernel"); 2192 return -EPFNOSUPPORT; 2193 #endif 2194 } 2195 2196 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) { 2197 #if IS_ENABLED(CONFIG_IPV6) 2198 if (changelink) { 2199 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_RX; 2200 goto change_notsup; 2201 } 2202 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])) 2203 cfg->use_udp6_rx_checksums = false; 2204 #else 2205 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX], 2206 "IPv6 support not enabled in the kernel"); 2207 return -EPFNOSUPPORT; 2208 #endif 2209 } 2210 2211 if (data[IFLA_GENEVE_INNER_PROTO_INHERIT]) { 2212 if (changelink) { 2213 attrtype = IFLA_GENEVE_INNER_PROTO_INHERIT; 2214 goto change_notsup; 2215 } 2216 cfg->inner_proto_inherit = true; 2217 } 2218 2219 if (data[IFLA_GENEVE_GRO_HINT]) { 2220 if (changelink) { 2221 attrtype = IFLA_GENEVE_GRO_HINT; 2222 goto change_notsup; 2223 } 2224 cfg->gro_hint = true; 2225 } 2226 2227 return 0; 2228 change_notsup: 2229 NL_SET_ERR_MSG_ATTR(extack, data[attrtype], 2230 "Changing VNI, Port, endpoint IP address family, external, inner_proto_inherit, gro_hint and UDP checksum attributes are not supported"); 2231 return -EOPNOTSUPP; 2232 } 2233 2234 static void geneve_link_config(struct net_device *dev, 2235 struct ip_tunnel_info *info, struct nlattr *tb[]) 2236 { 2237 struct geneve_dev *geneve = netdev_priv(dev); 2238 int ldev_mtu = 0; 2239 2240 if (tb[IFLA_MTU]) { 2241 geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); 2242 return; 2243 } 2244 2245 switch (ip_tunnel_info_af(info)) { 2246 case AF_INET: { 2247 struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst }; 2248 struct rtable *rt = ip_route_output_key(geneve->net, &fl4); 2249 2250 if (!IS_ERR(rt) && rt->dst.dev) { 2251 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN; 2252 ip_rt_put(rt); 2253 } 2254 break; 2255 } 2256 #if IS_ENABLED(CONFIG_IPV6) 2257 case AF_INET6: { 2258 struct rt6_info *rt; 2259 2260 if (!__in6_dev_get(dev)) 2261 break; 2262 2263 rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0, 2264 NULL, 0); 2265 2266 if (rt && rt->dst.dev) 2267 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN; 2268 ip6_rt_put(rt); 2269 break; 2270 } 2271 #endif 2272 } 2273 2274 if (ldev_mtu <= 0) 2275 return; 2276 2277 geneve_change_mtu(dev, ldev_mtu - info->options_len); 2278 } 2279 2280 static int geneve_newlink(struct net_device *dev, 2281 struct rtnl_newlink_params *params, 2282 struct netlink_ext_ack *extack) 2283 { 2284 struct net *link_net = rtnl_newlink_link_net(params); 2285 struct nlattr **data = params->data; 2286 struct nlattr **tb = params->tb; 2287 struct geneve_config cfg = { 2288 .df = GENEVE_DF_UNSET, 2289 .use_udp6_rx_checksums = false, 2290 .ttl_inherit = false, 2291 .collect_md = false, 2292 .dualstack = false, 2293 .port_min = 1, 2294 .port_max = USHRT_MAX, 2295 }; 2296 int err; 2297 2298 init_tnl_info(&cfg.info, GENEVE_UDP_PORT); 2299 err = geneve_nl2info(tb, data, extack, &cfg, false); 2300 if (err) 2301 return err; 2302 2303 err = geneve_configure(link_net, dev, extack, &cfg); 2304 if (err) 2305 return err; 2306 2307 geneve_link_config(dev, &cfg.info, tb); 2308 2309 return 0; 2310 } 2311 2312 /* Quiesces the geneve device data path for both TX and RX. 2313 * 2314 * On transmit geneve checks for non-NULL geneve_sock before it proceeds. 2315 * So, if we set that socket to NULL under RCU and wait for synchronize_net() 2316 * to complete for the existing set of in-flight packets to be transmitted, 2317 * then we would have quiesced the transmit data path. All the future packets 2318 * will get dropped until we unquiesce the data path. 2319 * 2320 * On receive geneve dereference the geneve_sock stashed in the socket. So, 2321 * if we set that to NULL under RCU and wait for synchronize_net() to 2322 * complete, then we would have quiesced the receive data path. 2323 */ 2324 static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4, 2325 struct geneve_sock **gs6) 2326 { 2327 *gs4 = rtnl_dereference(geneve->sock4); 2328 rcu_assign_pointer(geneve->sock4, NULL); 2329 if (*gs4) 2330 rcu_assign_sk_user_data((*gs4)->sk, NULL); 2331 #if IS_ENABLED(CONFIG_IPV6) 2332 *gs6 = rtnl_dereference(geneve->sock6); 2333 rcu_assign_pointer(geneve->sock6, NULL); 2334 if (*gs6) 2335 rcu_assign_sk_user_data((*gs6)->sk, NULL); 2336 #else 2337 *gs6 = NULL; 2338 #endif 2339 synchronize_net(); 2340 } 2341 2342 /* Resumes the geneve device data path for both TX and RX. */ 2343 static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4, 2344 struct geneve_sock __maybe_unused *gs6) 2345 { 2346 rcu_assign_pointer(geneve->sock4, gs4); 2347 if (gs4) 2348 rcu_assign_sk_user_data(gs4->sk, gs4); 2349 #if IS_ENABLED(CONFIG_IPV6) 2350 rcu_assign_pointer(geneve->sock6, gs6); 2351 if (gs6) 2352 rcu_assign_sk_user_data(gs6->sk, gs6); 2353 #endif 2354 } 2355 2356 static int geneve_changelink(struct net_device *dev, struct nlattr *tb[], 2357 struct nlattr *data[], 2358 struct netlink_ext_ack *extack) 2359 { 2360 struct geneve_dev *geneve = netdev_priv(dev); 2361 struct geneve_sock *gs4, *gs6; 2362 struct geneve_config cfg; 2363 int err; 2364 2365 /* If the geneve device is configured for metadata (or externally 2366 * controlled, for example, OVS), then nothing can be changed. 2367 */ 2368 if (geneve->cfg.collect_md) 2369 return -EOPNOTSUPP; 2370 2371 /* Start with the existing info. */ 2372 memcpy(&cfg, &geneve->cfg, sizeof(cfg)); 2373 err = geneve_nl2info(tb, data, extack, &cfg, true); 2374 if (err) 2375 return err; 2376 2377 if (!geneve_dst_addr_equal(&geneve->cfg.info, &cfg.info)) { 2378 dst_cache_reset(&cfg.info.dst_cache); 2379 geneve_link_config(dev, &cfg.info, tb); 2380 } 2381 2382 geneve_quiesce(geneve, &gs4, &gs6); 2383 memcpy(&geneve->cfg, &cfg, sizeof(cfg)); 2384 geneve_unquiesce(geneve, gs4, gs6); 2385 2386 return 0; 2387 } 2388 2389 static void geneve_dellink(struct net_device *dev, struct list_head *head) 2390 { 2391 struct geneve_dev *geneve = netdev_priv(dev); 2392 2393 list_del(&geneve->next); 2394 unregister_netdevice_queue(dev, head); 2395 } 2396 2397 static size_t geneve_get_size(const struct net_device *dev) 2398 { 2399 return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */ 2400 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */ 2401 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_LOCAL{6} */ 2402 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */ 2403 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */ 2404 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_DF */ 2405 nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */ 2406 nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */ 2407 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */ 2408 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */ 2409 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */ 2410 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */ 2411 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */ 2412 nla_total_size(0) + /* IFLA_GENEVE_INNER_PROTO_INHERIT */ 2413 nla_total_size(sizeof(struct ifla_geneve_port_range)) + /* IFLA_GENEVE_PORT_RANGE */ 2414 nla_total_size(0) + /* IFLA_GENEVE_GRO_HINT */ 2415 0; 2416 } 2417 2418 static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) 2419 { 2420 struct geneve_dev *geneve = netdev_priv(dev); 2421 struct ip_tunnel_info *info = &geneve->cfg.info; 2422 bool ttl_inherit = geneve->cfg.ttl_inherit; 2423 bool metadata = geneve->cfg.collect_md; 2424 struct ifla_geneve_port_range ports = { 2425 .low = htons(geneve->cfg.port_min), 2426 .high = htons(geneve->cfg.port_max), 2427 }; 2428 __u8 tmp_vni[3]; 2429 __u32 vni; 2430 2431 tunnel_id_to_vni(info->key.tun_id, tmp_vni); 2432 vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2]; 2433 if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) 2434 goto nla_put_failure; 2435 2436 if (!metadata && ip_tunnel_info_af(info) == AF_INET) { 2437 if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, 2438 info->key.u.ipv4.dst)) 2439 goto nla_put_failure; 2440 if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, 2441 test_bit(IP_TUNNEL_CSUM_BIT, 2442 info->key.tun_flags))) 2443 goto nla_put_failure; 2444 2445 #if IS_ENABLED(CONFIG_IPV6) 2446 } else if (!metadata) { 2447 if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6, 2448 &info->key.u.ipv6.dst)) 2449 goto nla_put_failure; 2450 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, 2451 !test_bit(IP_TUNNEL_CSUM_BIT, 2452 info->key.tun_flags))) 2453 goto nla_put_failure; 2454 #endif 2455 } 2456 2457 if (!geneve->cfg.dualstack) { 2458 if (ip_tunnel_info_af(info) == AF_INET) { 2459 if ((info->key.u.ipv4.src || 2460 geneve->cfg.collect_md) && 2461 nla_put_in_addr(skb, IFLA_GENEVE_LOCAL, 2462 info->key.u.ipv4.src)) 2463 goto nla_put_failure; 2464 #if IS_ENABLED(CONFIG_IPV6) 2465 } else { 2466 if ((!ipv6_addr_any(&info->key.u.ipv6.src) || 2467 geneve->cfg.collect_md) && 2468 nla_put_in6_addr(skb, IFLA_GENEVE_LOCAL6, 2469 &info->key.u.ipv6.src)) 2470 goto nla_put_failure; 2471 #endif 2472 } 2473 } 2474 2475 if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) || 2476 nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) || 2477 nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label)) 2478 goto nla_put_failure; 2479 2480 if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->cfg.df)) 2481 goto nla_put_failure; 2482 2483 if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst)) 2484 goto nla_put_failure; 2485 2486 if (metadata && nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA)) 2487 goto nla_put_failure; 2488 2489 #if IS_ENABLED(CONFIG_IPV6) 2490 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, 2491 !geneve->cfg.use_udp6_rx_checksums)) 2492 goto nla_put_failure; 2493 #endif 2494 2495 if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit)) 2496 goto nla_put_failure; 2497 2498 if (geneve->cfg.inner_proto_inherit && 2499 nla_put_flag(skb, IFLA_GENEVE_INNER_PROTO_INHERIT)) 2500 goto nla_put_failure; 2501 2502 if (nla_put(skb, IFLA_GENEVE_PORT_RANGE, sizeof(ports), &ports)) 2503 goto nla_put_failure; 2504 2505 if (geneve->cfg.gro_hint && 2506 nla_put_flag(skb, IFLA_GENEVE_GRO_HINT)) 2507 goto nla_put_failure; 2508 2509 return 0; 2510 2511 nla_put_failure: 2512 return -EMSGSIZE; 2513 } 2514 2515 static struct rtnl_link_ops geneve_link_ops __read_mostly = { 2516 .kind = "geneve", 2517 .maxtype = IFLA_GENEVE_MAX, 2518 .policy = geneve_policy, 2519 .priv_size = sizeof(struct geneve_dev), 2520 .setup = geneve_setup, 2521 .validate = geneve_validate, 2522 .newlink = geneve_newlink, 2523 .changelink = geneve_changelink, 2524 .dellink = geneve_dellink, 2525 .get_size = geneve_get_size, 2526 .fill_info = geneve_fill_info, 2527 }; 2528 2529 struct net_device *geneve_dev_create_fb(struct net *net, const char *name, 2530 u8 name_assign_type, u16 dst_port) 2531 { 2532 struct nlattr *tb[IFLA_MAX + 1]; 2533 struct net_device *dev; 2534 LIST_HEAD(list_kill); 2535 int err; 2536 struct geneve_config cfg = { 2537 .df = GENEVE_DF_UNSET, 2538 .use_udp6_rx_checksums = true, 2539 .ttl_inherit = false, 2540 .collect_md = true, 2541 .dualstack = true, 2542 .port_min = 1, 2543 .port_max = USHRT_MAX, 2544 }; 2545 2546 memset(tb, 0, sizeof(tb)); 2547 dev = rtnl_create_link(net, name, name_assign_type, 2548 &geneve_link_ops, tb, NULL); 2549 if (IS_ERR(dev)) 2550 return dev; 2551 2552 init_tnl_info(&cfg.info, dst_port); 2553 err = geneve_configure(net, dev, NULL, &cfg); 2554 if (err) { 2555 free_netdev(dev); 2556 return ERR_PTR(err); 2557 } 2558 2559 /* openvswitch users expect packet sizes to be unrestricted, 2560 * so set the largest MTU we can. 2561 */ 2562 err = geneve_change_mtu(dev, IP_MAX_MTU); 2563 if (err) 2564 goto err; 2565 2566 err = rtnl_configure_link(dev, NULL, 0, NULL); 2567 if (err < 0) 2568 goto err; 2569 2570 return dev; 2571 err: 2572 geneve_dellink(dev, &list_kill); 2573 unregister_netdevice_many(&list_kill); 2574 return ERR_PTR(err); 2575 } 2576 EXPORT_SYMBOL_GPL(geneve_dev_create_fb); 2577 2578 static int geneve_netdevice_event(struct notifier_block *unused, 2579 unsigned long event, void *ptr) 2580 { 2581 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 2582 2583 if (event == NETDEV_UDP_TUNNEL_PUSH_INFO) 2584 geneve_offload_rx_ports(dev, true); 2585 else if (event == NETDEV_UDP_TUNNEL_DROP_INFO) 2586 geneve_offload_rx_ports(dev, false); 2587 2588 return NOTIFY_DONE; 2589 } 2590 2591 static struct notifier_block geneve_notifier_block __read_mostly = { 2592 .notifier_call = geneve_netdevice_event, 2593 }; 2594 2595 static __net_init int geneve_init_net(struct net *net) 2596 { 2597 struct geneve_net *gn = net_generic(net, geneve_net_id); 2598 2599 INIT_LIST_HEAD(&gn->geneve_list); 2600 INIT_LIST_HEAD(&gn->sock_list); 2601 return 0; 2602 } 2603 2604 static void __net_exit geneve_exit_rtnl_net(struct net *net, 2605 struct list_head *dev_to_kill) 2606 { 2607 struct geneve_net *gn = net_generic(net, geneve_net_id); 2608 struct geneve_dev *geneve, *next; 2609 2610 list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) 2611 geneve_dellink(geneve->dev, dev_to_kill); 2612 } 2613 2614 static void __net_exit geneve_exit_net(struct net *net) 2615 { 2616 const struct geneve_net *gn = net_generic(net, geneve_net_id); 2617 2618 WARN_ON_ONCE(!list_empty(&gn->sock_list)); 2619 } 2620 2621 static struct pernet_operations geneve_net_ops = { 2622 .init = geneve_init_net, 2623 .exit_rtnl = geneve_exit_rtnl_net, 2624 .exit = geneve_exit_net, 2625 .id = &geneve_net_id, 2626 .size = sizeof(struct geneve_net), 2627 }; 2628 2629 static int __init geneve_init_module(void) 2630 { 2631 int rc; 2632 2633 rc = register_pernet_subsys(&geneve_net_ops); 2634 if (rc) 2635 goto out1; 2636 2637 rc = register_netdevice_notifier(&geneve_notifier_block); 2638 if (rc) 2639 goto out2; 2640 2641 rc = rtnl_link_register(&geneve_link_ops); 2642 if (rc) 2643 goto out3; 2644 2645 return 0; 2646 out3: 2647 unregister_netdevice_notifier(&geneve_notifier_block); 2648 out2: 2649 unregister_pernet_subsys(&geneve_net_ops); 2650 out1: 2651 return rc; 2652 } 2653 late_initcall(geneve_init_module); 2654 2655 static void __exit geneve_cleanup_module(void) 2656 { 2657 rtnl_link_unregister(&geneve_link_ops); 2658 unregister_netdevice_notifier(&geneve_notifier_block); 2659 unregister_pernet_subsys(&geneve_net_ops); 2660 } 2661 module_exit(geneve_cleanup_module); 2662 2663 MODULE_LICENSE("GPL"); 2664 MODULE_VERSION(GENEVE_NETDEV_VER); 2665 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>"); 2666 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic"); 2667 MODULE_ALIAS_RTNL_LINK("geneve"); 2668