1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * GENEVE: Generic Network Virtualization Encapsulation 4 * 5 * Copyright (c) 2015 Red Hat, Inc. 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include <linux/ethtool.h> 11 #include <linux/kernel.h> 12 #include <linux/module.h> 13 #include <linux/etherdevice.h> 14 #include <linux/hash.h> 15 #include <net/dst_metadata.h> 16 #include <net/gro_cells.h> 17 #include <net/rtnetlink.h> 18 #include <net/geneve.h> 19 #include <net/gro.h> 20 #include <net/netdev_lock.h> 21 #include <net/protocol.h> 22 23 #define GENEVE_NETDEV_VER "0.6" 24 25 #define GENEVE_N_VID (1u << 24) 26 #define GENEVE_VID_MASK (GENEVE_N_VID - 1) 27 28 #define VNI_HASH_BITS 10 29 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) 30 31 static bool log_ecn_error = true; 32 module_param(log_ecn_error, bool, 0644); 33 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 34 35 #define GENEVE_VER 0 36 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) 37 #define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN) 38 #define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN) 39 40 #define GENEVE_OPT_NETDEV_CLASS 0x100 41 #define GENEVE_OPT_GRO_HINT_SIZE 8 42 #define GENEVE_OPT_GRO_HINT_TYPE 1 43 #define GENEVE_OPT_GRO_HINT_LEN 1 44 45 struct geneve_opt_gro_hint { 46 u8 inner_proto_id:2, 47 nested_is_v6:1; 48 u8 nested_nh_offset; 49 u8 nested_tp_offset; 50 u8 nested_hdr_len; 51 }; 52 53 struct geneve_skb_cb { 54 unsigned int gro_hint_len; 55 struct geneve_opt_gro_hint gro_hint; 56 }; 57 58 #define GENEVE_SKB_CB(__skb) ((struct geneve_skb_cb *)&((__skb)->cb[0])) 59 60 /* per-network namespace private data for this module */ 61 struct geneve_net { 62 struct list_head geneve_list; 63 /* sock_list is protected by rtnl lock */ 64 struct list_head sock_list; 65 }; 66 67 static unsigned int geneve_net_id; 68 69 struct geneve_dev_node { 70 struct hlist_node hlist; 71 struct geneve_dev *geneve; 72 }; 73 74 struct geneve_config { 75 bool collect_md; 76 bool dualstack; 77 bool use_udp6_rx_checksums; 78 bool ttl_inherit; 79 bool gro_hint; 80 enum ifla_geneve_df df; 81 bool inner_proto_inherit; 82 u16 port_min; 83 u16 port_max; 84 85 /* Must be last --ends in a flexible-array member. */ 86 struct ip_tunnel_info info; 87 }; 88 89 /* Pseudo network device */ 90 struct geneve_dev { 91 struct geneve_dev_node hlist4; /* vni hash table for IPv4 socket */ 92 #if IS_ENABLED(CONFIG_IPV6) 93 struct geneve_dev_node hlist6; /* vni hash table for IPv6 socket */ 94 #endif 95 struct net *net; /* netns for packet i/o */ 96 struct net_device *dev; /* netdev for geneve tunnel */ 97 struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */ 98 #if IS_ENABLED(CONFIG_IPV6) 99 struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */ 100 #endif 101 struct list_head next; /* geneve's per namespace list */ 102 struct gro_cells gro_cells; 103 struct geneve_config cfg; 104 }; 105 106 struct geneve_sock { 107 bool collect_md; 108 bool gro_hint; 109 struct list_head list; 110 struct sock *sk; 111 struct rcu_head rcu; 112 int refcnt; 113 struct hlist_head vni_list[VNI_HASH_SIZE]; 114 }; 115 116 static const __be16 proto_id_map[] = { htons(ETH_P_TEB), 117 htons(ETH_P_IPV6), 118 htons(ETH_P_IP) }; 119 120 static int proto_to_id(__be16 proto) 121 { 122 int i; 123 124 for (i = 0; i < ARRAY_SIZE(proto_id_map); i++) 125 if (proto_id_map[i] == proto) 126 return i; 127 128 return -1; 129 } 130 131 static inline __u32 geneve_net_vni_hash(u8 vni[3]) 132 { 133 __u32 vnid; 134 135 vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2]; 136 return hash_32(vnid, VNI_HASH_BITS); 137 } 138 139 static __be64 vni_to_tunnel_id(const __u8 *vni) 140 { 141 #ifdef __BIG_ENDIAN 142 return (vni[0] << 16) | (vni[1] << 8) | vni[2]; 143 #else 144 return (__force __be64)(((__force u64)vni[0] << 40) | 145 ((__force u64)vni[1] << 48) | 146 ((__force u64)vni[2] << 56)); 147 #endif 148 } 149 150 /* Convert 64 bit tunnel ID to 24 bit VNI. */ 151 static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) 152 { 153 #ifdef __BIG_ENDIAN 154 vni[0] = (__force __u8)(tun_id >> 16); 155 vni[1] = (__force __u8)(tun_id >> 8); 156 vni[2] = (__force __u8)tun_id; 157 #else 158 vni[0] = (__force __u8)((__force u64)tun_id >> 40); 159 vni[1] = (__force __u8)((__force u64)tun_id >> 48); 160 vni[2] = (__force __u8)((__force u64)tun_id >> 56); 161 #endif 162 } 163 164 static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni) 165 { 166 return !memcmp(vni, &tun_id[5], 3); 167 } 168 169 static sa_family_t geneve_get_sk_family(struct geneve_sock *gs) 170 { 171 return gs->sk->sk_family; 172 } 173 174 static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, 175 __be32 addr, u8 vni[]) 176 { 177 struct hlist_head *vni_list_head; 178 struct geneve_dev_node *node; 179 __u32 hash; 180 181 /* Find the device for this VNI */ 182 hash = geneve_net_vni_hash(vni); 183 vni_list_head = &gs->vni_list[hash]; 184 hlist_for_each_entry_rcu(node, vni_list_head, hlist) { 185 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && 186 addr == node->geneve->cfg.info.key.u.ipv4.dst) 187 return node->geneve; 188 } 189 return NULL; 190 } 191 192 #if IS_ENABLED(CONFIG_IPV6) 193 static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs, 194 struct in6_addr addr6, u8 vni[]) 195 { 196 struct hlist_head *vni_list_head; 197 struct geneve_dev_node *node; 198 __u32 hash; 199 200 /* Find the device for this VNI */ 201 hash = geneve_net_vni_hash(vni); 202 vni_list_head = &gs->vni_list[hash]; 203 hlist_for_each_entry_rcu(node, vni_list_head, hlist) { 204 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && 205 ipv6_addr_equal(&addr6, &node->geneve->cfg.info.key.u.ipv6.dst)) 206 return node->geneve; 207 } 208 return NULL; 209 } 210 #endif 211 212 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) 213 { 214 return (struct genevehdr *)(udp_hdr(skb) + 1); 215 } 216 217 static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs, 218 struct sk_buff *skb) 219 { 220 static u8 zero_vni[3]; 221 u8 *vni; 222 223 if (geneve_get_sk_family(gs) == AF_INET) { 224 struct iphdr *iph; 225 __be32 addr; 226 227 iph = ip_hdr(skb); /* outer IP header... */ 228 229 if (gs->collect_md) { 230 vni = zero_vni; 231 addr = 0; 232 } else { 233 vni = geneve_hdr(skb)->vni; 234 addr = iph->saddr; 235 } 236 237 return geneve_lookup(gs, addr, vni); 238 #if IS_ENABLED(CONFIG_IPV6) 239 } else if (geneve_get_sk_family(gs) == AF_INET6) { 240 static struct in6_addr zero_addr6; 241 struct ipv6hdr *ip6h; 242 struct in6_addr addr6; 243 244 ip6h = ipv6_hdr(skb); /* outer IPv6 header... */ 245 246 if (gs->collect_md) { 247 vni = zero_vni; 248 addr6 = zero_addr6; 249 } else { 250 vni = geneve_hdr(skb)->vni; 251 addr6 = ip6h->saddr; 252 } 253 254 return geneve6_lookup(gs, addr6, vni); 255 #endif 256 } 257 return NULL; 258 } 259 260 /* geneve receive/decap routine */ 261 static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, 262 struct sk_buff *skb, const struct genevehdr *gnvh) 263 { 264 struct metadata_dst *tun_dst = NULL; 265 unsigned int len; 266 int nh, err = 0; 267 void *oiph; 268 269 if (ip_tunnel_collect_metadata() || gs->collect_md) { 270 IP_TUNNEL_DECLARE_FLAGS(flags) = { }; 271 272 __set_bit(IP_TUNNEL_KEY_BIT, flags); 273 __assign_bit(IP_TUNNEL_OAM_BIT, flags, gnvh->oam); 274 __assign_bit(IP_TUNNEL_CRIT_OPT_BIT, flags, gnvh->critical); 275 276 tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, 277 vni_to_tunnel_id(gnvh->vni), 278 gnvh->opt_len * 4); 279 if (!tun_dst) { 280 dev_dstats_rx_dropped(geneve->dev); 281 goto drop; 282 } 283 /* Update tunnel dst according to Geneve options. */ 284 ip_tunnel_flags_zero(flags); 285 __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, flags); 286 ip_tunnel_info_opts_set(&tun_dst->u.tun_info, 287 gnvh->options, gnvh->opt_len * 4, 288 flags); 289 } else { 290 /* Drop packets w/ critical options, 291 * since we don't support any... 292 */ 293 if (gnvh->critical) { 294 DEV_STATS_INC(geneve->dev, rx_frame_errors); 295 DEV_STATS_INC(geneve->dev, rx_errors); 296 goto drop; 297 } 298 } 299 300 if (tun_dst) 301 skb_dst_set(skb, &tun_dst->dst); 302 303 if (gnvh->proto_type == htons(ETH_P_TEB)) { 304 skb_reset_mac_header(skb); 305 skb->protocol = eth_type_trans(skb, geneve->dev); 306 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 307 308 /* Ignore packet loops (and multicast echo) */ 309 if (ether_addr_equal(eth_hdr(skb)->h_source, 310 geneve->dev->dev_addr)) { 311 DEV_STATS_INC(geneve->dev, rx_errors); 312 goto drop; 313 } 314 } else { 315 skb_reset_mac_header(skb); 316 skb->dev = geneve->dev; 317 skb->pkt_type = PACKET_HOST; 318 } 319 320 /* Save offset of outer header relative to skb->head, 321 * because we are going to reset the network header to the inner header 322 * and might change skb->head. 323 */ 324 nh = skb_network_header(skb) - skb->head; 325 326 skb_reset_network_header(skb); 327 328 if (!pskb_inet_may_pull(skb)) { 329 DEV_STATS_INC(geneve->dev, rx_length_errors); 330 DEV_STATS_INC(geneve->dev, rx_errors); 331 goto drop; 332 } 333 334 /* Get the outer header. */ 335 oiph = skb->head + nh; 336 337 if (geneve_get_sk_family(gs) == AF_INET) 338 err = IP_ECN_decapsulate(oiph, skb); 339 #if IS_ENABLED(CONFIG_IPV6) 340 else 341 err = IP6_ECN_decapsulate(oiph, skb); 342 #endif 343 344 if (unlikely(err)) { 345 if (log_ecn_error) { 346 if (geneve_get_sk_family(gs) == AF_INET) 347 net_info_ratelimited("non-ECT from %pI4 " 348 "with TOS=%#x\n", 349 &((struct iphdr *)oiph)->saddr, 350 ((struct iphdr *)oiph)->tos); 351 #if IS_ENABLED(CONFIG_IPV6) 352 else 353 net_info_ratelimited("non-ECT from %pI6\n", 354 &((struct ipv6hdr *)oiph)->saddr); 355 #endif 356 } 357 if (err > 1) { 358 DEV_STATS_INC(geneve->dev, rx_frame_errors); 359 DEV_STATS_INC(geneve->dev, rx_errors); 360 goto drop; 361 } 362 } 363 364 /* Skip the additional GRO stage when hints are in use. */ 365 len = skb->len; 366 if (skb->encapsulation) 367 err = netif_rx(skb); 368 else 369 err = gro_cells_receive(&geneve->gro_cells, skb); 370 if (likely(err == NET_RX_SUCCESS)) 371 dev_dstats_rx_add(geneve->dev, len); 372 373 return; 374 drop: 375 /* Consume bad packet */ 376 kfree_skb(skb); 377 } 378 379 /* Setup stats when device is created */ 380 static int geneve_init(struct net_device *dev) 381 { 382 struct geneve_dev *geneve = netdev_priv(dev); 383 int err; 384 385 err = gro_cells_init(&geneve->gro_cells, dev); 386 if (err) 387 return err; 388 389 err = dst_cache_init(&geneve->cfg.info.dst_cache, GFP_KERNEL); 390 if (err) { 391 gro_cells_destroy(&geneve->gro_cells); 392 return err; 393 } 394 netdev_lockdep_set_classes(dev); 395 return 0; 396 } 397 398 static void geneve_uninit(struct net_device *dev) 399 { 400 struct geneve_dev *geneve = netdev_priv(dev); 401 402 dst_cache_destroy(&geneve->cfg.info.dst_cache); 403 gro_cells_destroy(&geneve->gro_cells); 404 } 405 406 static int geneve_hlen(const struct genevehdr *gh) 407 { 408 return sizeof(*gh) + gh->opt_len * 4; 409 } 410 411 /* 412 * Look for GRO hint in the genenve options; if not found or does not pass basic 413 * sanitization return 0, otherwise the offset WRT the geneve hdr start. 414 */ 415 static unsigned int 416 geneve_opt_gro_hint_off(const struct genevehdr *gh, __be16 *type, 417 unsigned int *gh_len) 418 { 419 struct geneve_opt *opt = (void *)(gh + 1); 420 unsigned int id, opt_len = gh->opt_len; 421 struct geneve_opt_gro_hint *gro_hint; 422 423 while (opt_len >= (GENEVE_OPT_GRO_HINT_SIZE >> 2)) { 424 if (opt->opt_class == htons(GENEVE_OPT_NETDEV_CLASS) && 425 opt->type == GENEVE_OPT_GRO_HINT_TYPE && 426 opt->length == GENEVE_OPT_GRO_HINT_LEN) 427 goto found; 428 429 /* check for bad opt len */ 430 if (opt->length + 1 >= opt_len) 431 return 0; 432 433 /* next opt */ 434 opt_len -= opt->length + 1; 435 opt = ((void *)opt) + ((opt->length + 1) << 2); 436 } 437 return 0; 438 439 found: 440 gro_hint = (struct geneve_opt_gro_hint *)opt->opt_data; 441 442 /* 443 * Sanitize the hinted hdrs: the nested transport is UDP and must fit 444 * the overall hinted hdr size. 445 */ 446 if (gro_hint->nested_tp_offset + sizeof(struct udphdr) > 447 gro_hint->nested_hdr_len) 448 return 0; 449 450 if (gro_hint->nested_nh_offset + 451 (gro_hint->nested_is_v6 ? sizeof(struct ipv6hdr) : 452 sizeof(struct iphdr)) > 453 gro_hint->nested_tp_offset) 454 return 0; 455 456 /* Allow only supported L2. */ 457 id = gro_hint->inner_proto_id; 458 if (id >= ARRAY_SIZE(proto_id_map)) 459 return 0; 460 461 *type = proto_id_map[id]; 462 *gh_len += gro_hint->nested_hdr_len; 463 464 return (void *)gro_hint - (void *)gh; 465 } 466 467 static const struct geneve_opt_gro_hint * 468 geneve_opt_gro_hint(const struct genevehdr *gh, unsigned int hint_off) 469 { 470 return (const struct geneve_opt_gro_hint *)((void *)gh + hint_off); 471 } 472 473 static unsigned int 474 geneve_sk_gro_hint_off(const struct sock *sk, const struct genevehdr *gh, 475 __be16 *type, unsigned int *gh_len) 476 { 477 const struct geneve_sock *gs = rcu_dereference_sk_user_data(sk); 478 479 if (!gs || !gs->gro_hint) 480 return 0; 481 return geneve_opt_gro_hint_off(gh, type, gh_len); 482 } 483 484 /* Validate the packet headers pointed by data WRT the provided hint */ 485 static bool 486 geneve_opt_gro_hint_validate(void *data, 487 const struct geneve_opt_gro_hint *gro_hint) 488 { 489 void *nested_nh = data + gro_hint->nested_nh_offset; 490 struct iphdr *iph; 491 492 if (gro_hint->nested_is_v6) { 493 struct ipv6hdr *ipv6h = nested_nh; 494 struct ipv6_opt_hdr *opth; 495 int offset, len; 496 497 if (ipv6h->nexthdr == IPPROTO_UDP) 498 return true; 499 500 offset = sizeof(*ipv6h) + gro_hint->nested_nh_offset; 501 while (offset + sizeof(*opth) <= gro_hint->nested_tp_offset) { 502 opth = data + offset; 503 504 len = ipv6_optlen(opth); 505 if (len + offset > gro_hint->nested_tp_offset) 506 return false; 507 if (opth->nexthdr == IPPROTO_UDP) 508 return true; 509 510 offset += len; 511 } 512 return false; 513 } 514 515 iph = nested_nh; 516 if (*(u8 *)iph != 0x45 || ip_is_fragment(iph) || 517 iph->protocol != IPPROTO_UDP || ip_fast_csum((u8 *)iph, 5)) 518 return false; 519 520 return true; 521 } 522 523 /* 524 * Validate the skb headers following the specified geneve hdr vs the 525 * provided hint, including nested L4 checksum. 526 * The caller already ensured that the relevant amount of data is available 527 * in the linear part. 528 */ 529 static bool 530 geneve_opt_gro_hint_validate_csum(const struct sk_buff *skb, 531 const struct genevehdr *gh, 532 const struct geneve_opt_gro_hint *gro_hint) 533 { 534 unsigned int plen, gh_len = geneve_hlen(gh); 535 void *nested = (void *)gh + gh_len; 536 struct udphdr *nested_uh; 537 unsigned int nested_len; 538 struct ipv6hdr *ipv6h; 539 struct iphdr *iph; 540 __wsum csum, psum; 541 542 if (!geneve_opt_gro_hint_validate(nested, gro_hint)) 543 return false; 544 545 /* Use GRO hints with nested csum only if the outer header has csum. */ 546 nested_uh = nested + gro_hint->nested_tp_offset; 547 if (!nested_uh->check || skb->ip_summed == CHECKSUM_PARTIAL) 548 return true; 549 550 if (!NAPI_GRO_CB(skb)->csum_valid) 551 return false; 552 553 /* Compute the complete checksum up to the nested transport. */ 554 plen = gh_len + gro_hint->nested_tp_offset; 555 csum = csum_sub(NAPI_GRO_CB(skb)->csum, csum_partial(gh, plen, 0)); 556 nested_len = skb_gro_len(skb) - plen; 557 558 /* Compute the nested pseudo header csum. */ 559 ipv6h = nested + gro_hint->nested_nh_offset; 560 iph = (struct iphdr *)ipv6h; 561 psum = gro_hint->nested_is_v6 ? 562 ~csum_unfold(csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, 563 nested_len, IPPROTO_UDP, 0)) : 564 csum_tcpudp_nofold(iph->saddr, iph->daddr, 565 nested_len, IPPROTO_UDP, 0); 566 567 return !csum_fold(csum_add(psum, csum)); 568 } 569 570 static int geneve_post_decap_hint(const struct sock *sk, struct sk_buff *skb, 571 unsigned int gh_len, 572 struct genevehdr **geneveh) 573 { 574 const struct geneve_opt_gro_hint *gro_hint; 575 unsigned int len, total_len, hint_off; 576 struct ipv6hdr *ipv6h; 577 struct iphdr *iph; 578 struct udphdr *uh; 579 __be16 p; 580 581 hint_off = geneve_sk_gro_hint_off(sk, *geneveh, &p, &len); 582 if (!hint_off) 583 return 0; 584 585 if (!skb_is_gso(skb)) 586 return 0; 587 588 gro_hint = geneve_opt_gro_hint(*geneveh, hint_off); 589 if (unlikely(!pskb_may_pull(skb, gro_hint->nested_hdr_len))) 590 return -ENOMEM; 591 592 *geneveh = geneve_hdr(skb); 593 gro_hint = geneve_opt_gro_hint(*geneveh, hint_off); 594 595 /* 596 * Validate hints from untrusted source before accessing 597 * the headers; csum will be checked later by the nested 598 * protocol rx path. 599 */ 600 if (unlikely(skb_shinfo(skb)->gso_type & SKB_GSO_DODGY && 601 !geneve_opt_gro_hint_validate(skb->data, gro_hint))) 602 return -EINVAL; 603 604 ipv6h = (void *)skb->data + gro_hint->nested_nh_offset; 605 iph = (struct iphdr *)ipv6h; 606 total_len = skb->len - gro_hint->nested_nh_offset; 607 if (total_len > GRO_LEGACY_MAX_SIZE) 608 return -E2BIG; 609 610 /* 611 * After stripping the outer encap, the packet still carries a 612 * tunnel encapsulation: the nested one. 613 */ 614 skb->encapsulation = 1; 615 616 /* GSO expect a valid transpor header, move it to the current one. */ 617 skb_set_transport_header(skb, gro_hint->nested_tp_offset); 618 619 /* Adjust the nested IP{6} hdr to actual GSO len. */ 620 if (gro_hint->nested_is_v6) { 621 ipv6h->payload_len = htons(total_len - sizeof(*ipv6h)); 622 } else { 623 __be16 old_len = iph->tot_len; 624 625 iph->tot_len = htons(total_len); 626 627 /* For IPv4 additionally adjust the nested csum. */ 628 csum_replace2(&iph->check, old_len, iph->tot_len); 629 ip_send_check(iph); 630 } 631 632 /* Adjust the nested UDP header len and checksum. */ 633 uh = udp_hdr(skb); 634 uh->len = htons(skb->len - gro_hint->nested_tp_offset); 635 if (uh->check) { 636 len = skb->len - gro_hint->nested_tp_offset; 637 skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; 638 if (gro_hint->nested_is_v6) 639 uh->check = ~udp_v6_check(len, &ipv6h->saddr, 640 &ipv6h->daddr, 0); 641 else 642 uh->check = ~udp_v4_check(len, iph->saddr, 643 iph->daddr, 0); 644 } else { 645 skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; 646 } 647 return 0; 648 } 649 650 /* Callback from net/ipv4/udp.c to receive packets */ 651 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) 652 { 653 struct genevehdr *geneveh; 654 struct geneve_dev *geneve; 655 struct geneve_sock *gs; 656 __be16 inner_proto; 657 int opts_len; 658 659 /* Need UDP and Geneve header to be present */ 660 if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) 661 goto drop; 662 663 /* Return packets with reserved bits set */ 664 geneveh = geneve_hdr(skb); 665 if (unlikely(geneveh->ver != GENEVE_VER)) 666 goto drop; 667 668 gs = rcu_dereference_sk_user_data(sk); 669 if (!gs) 670 goto drop; 671 672 geneve = geneve_lookup_skb(gs, skb); 673 if (!geneve) 674 goto drop; 675 676 inner_proto = geneveh->proto_type; 677 678 if (unlikely((!geneve->cfg.inner_proto_inherit && 679 inner_proto != htons(ETH_P_TEB)))) { 680 dev_dstats_rx_dropped(geneve->dev); 681 goto drop; 682 } 683 684 opts_len = geneveh->opt_len * 4; 685 if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, inner_proto, 686 !net_eq(geneve->net, dev_net(geneve->dev)))) { 687 dev_dstats_rx_dropped(geneve->dev); 688 goto drop; 689 } 690 691 /* 692 * After hint processing, the transport header points to the inner one 693 * and we can't use anymore on geneve_hdr(). 694 */ 695 geneveh = geneve_hdr(skb); 696 if (geneve_post_decap_hint(sk, skb, sizeof(struct genevehdr) + 697 opts_len, &geneveh)) { 698 DEV_STATS_INC(geneve->dev, rx_errors); 699 goto drop; 700 } 701 702 geneve_rx(geneve, gs, skb, geneveh); 703 return 0; 704 705 drop: 706 /* Consume bad packet */ 707 kfree_skb(skb); 708 return 0; 709 } 710 711 /* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */ 712 static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb) 713 { 714 struct genevehdr *geneveh; 715 struct geneve_sock *gs; 716 u8 zero_vni[3] = { 0 }; 717 u8 *vni = zero_vni; 718 719 if (!pskb_may_pull(skb, skb_transport_offset(skb) + GENEVE_BASE_HLEN)) 720 return -EINVAL; 721 722 geneveh = geneve_hdr(skb); 723 if (geneveh->ver != GENEVE_VER) 724 return -EINVAL; 725 726 if (geneveh->proto_type != htons(ETH_P_TEB)) 727 return -EINVAL; 728 729 gs = rcu_dereference_sk_user_data(sk); 730 if (!gs) 731 return -ENOENT; 732 733 if (geneve_get_sk_family(gs) == AF_INET) { 734 struct iphdr *iph = ip_hdr(skb); 735 __be32 addr4 = 0; 736 737 if (!gs->collect_md) { 738 vni = geneve_hdr(skb)->vni; 739 addr4 = iph->daddr; 740 } 741 742 return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT; 743 } 744 745 #if IS_ENABLED(CONFIG_IPV6) 746 if (geneve_get_sk_family(gs) == AF_INET6) { 747 struct ipv6hdr *ip6h = ipv6_hdr(skb); 748 struct in6_addr addr6; 749 750 memset(&addr6, 0, sizeof(struct in6_addr)); 751 752 if (!gs->collect_md) { 753 vni = geneve_hdr(skb)->vni; 754 addr6 = ip6h->daddr; 755 } 756 757 return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT; 758 } 759 #endif 760 761 return -EPFNOSUPPORT; 762 } 763 764 static struct sock *geneve_create_sock(struct net *net, 765 struct geneve_dev *geneve, bool ipv6) 766 { 767 struct ip_tunnel_info *info = &geneve->cfg.info; 768 struct udp_port_cfg udp_conf; 769 struct socket *sock; 770 int err; 771 772 memset(&udp_conf, 0, sizeof(udp_conf)); 773 774 if (ipv6) { 775 udp_conf.family = AF_INET6; 776 udp_conf.ipv6_v6only = 1; 777 udp_conf.use_udp6_rx_checksums = geneve->cfg.use_udp6_rx_checksums; 778 udp_conf.local_ip6 = info->key.u.ipv6.src; 779 } else { 780 udp_conf.family = AF_INET; 781 udp_conf.local_ip.s_addr = info->key.u.ipv4.src; 782 } 783 784 udp_conf.local_udp_port = info->key.tp_dst; 785 786 /* Open UDP socket */ 787 err = udp_sock_create(net, &udp_conf, &sock); 788 if (err < 0) 789 return ERR_PTR(err); 790 791 udp_allow_gso(sock->sk); 792 return sock->sk; 793 } 794 795 static bool geneve_hdr_match(struct sk_buff *skb, 796 const struct genevehdr *gh, 797 const struct genevehdr *gh2, 798 unsigned int hint_off) 799 { 800 const struct geneve_opt_gro_hint *gro_hint; 801 void *nested, *nested2, *nh, *nh2; 802 struct udphdr *udp, *udp2; 803 unsigned int gh_len; 804 805 /* Match the geneve hdr and options */ 806 if (gh->opt_len != gh2->opt_len) 807 return false; 808 809 gh_len = geneve_hlen(gh); 810 if (memcmp(gh, gh2, gh_len)) 811 return false; 812 813 if (!hint_off) 814 return true; 815 816 /* 817 * When gro is present consider the nested headers as part 818 * of the geneve options 819 */ 820 nested = (void *)gh + gh_len; 821 nested2 = (void *)gh2 + gh_len; 822 gro_hint = geneve_opt_gro_hint(gh, hint_off); 823 if (!memcmp(nested, nested2, gro_hint->nested_hdr_len)) 824 return true; 825 826 /* 827 * The nested headers differ; the packets can still belong to 828 * the same flow when IPs/proto/ports match; if so flushing is 829 * required. 830 */ 831 nh = nested + gro_hint->nested_nh_offset; 832 nh2 = nested2 + gro_hint->nested_nh_offset; 833 if (gro_hint->nested_is_v6) { 834 struct ipv6hdr *iph = nh, *iph2 = nh2; 835 unsigned int nested_nlen; 836 __be32 first_word; 837 838 first_word = *(__be32 *)iph ^ *(__be32 *)iph2; 839 if ((first_word & htonl(0xF00FFFFF)) || 840 !ipv6_addr_equal(&iph->saddr, &iph2->saddr) || 841 !ipv6_addr_equal(&iph->daddr, &iph2->daddr) || 842 iph->nexthdr != iph2->nexthdr) 843 return false; 844 845 nested_nlen = gro_hint->nested_tp_offset - 846 gro_hint->nested_nh_offset; 847 if (nested_nlen > sizeof(struct ipv6hdr) && 848 (memcmp(iph + 1, iph2 + 1, 849 nested_nlen - sizeof(struct ipv6hdr)))) 850 return false; 851 } else { 852 struct iphdr *iph = nh, *iph2 = nh2; 853 854 if ((iph->protocol ^ iph2->protocol) | 855 ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | 856 ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) 857 return false; 858 } 859 860 udp = nested + gro_hint->nested_tp_offset; 861 udp2 = nested2 + gro_hint->nested_tp_offset; 862 if (udp->source != udp2->source || udp->dest != udp2->dest || 863 udp->check != udp2->check) 864 return false; 865 866 NAPI_GRO_CB(skb)->flush = 1; 867 return true; 868 } 869 870 static struct sk_buff *geneve_gro_receive(struct sock *sk, 871 struct list_head *head, 872 struct sk_buff *skb) 873 { 874 unsigned int hlen, gh_len, off_gnv, hint_off; 875 const struct geneve_opt_gro_hint *gro_hint; 876 const struct packet_offload *ptype; 877 struct genevehdr *gh, *gh2; 878 struct sk_buff *pp = NULL; 879 struct sk_buff *p; 880 int flush = 1; 881 __be16 type; 882 883 off_gnv = skb_gro_offset(skb); 884 hlen = off_gnv + sizeof(*gh); 885 gh = skb_gro_header(skb, hlen, off_gnv); 886 if (unlikely(!gh)) 887 goto out; 888 889 if (gh->ver != GENEVE_VER || gh->oam) 890 goto out; 891 gh_len = geneve_hlen(gh); 892 type = gh->proto_type; 893 894 hlen = off_gnv + gh_len; 895 if (!skb_gro_may_pull(skb, hlen)) { 896 gh = skb_gro_header_slow(skb, hlen, off_gnv); 897 if (unlikely(!gh)) 898 goto out; 899 } 900 901 /* The GRO hint/nested hdr could use a different ethernet type. */ 902 hint_off = geneve_sk_gro_hint_off(sk, gh, &type, &gh_len); 903 if (hint_off) { 904 905 /* 906 * If the hint is present, and nested hdr validation fails, do 907 * not attempt plain GRO: it will ignore inner hdrs and cause 908 * OoO. 909 */ 910 gh = skb_gro_header(skb, off_gnv + gh_len, off_gnv); 911 if (unlikely(!gh)) 912 goto out; 913 914 gro_hint = geneve_opt_gro_hint(gh, hint_off); 915 if (!geneve_opt_gro_hint_validate_csum(skb, gh, gro_hint)) 916 goto out; 917 } 918 919 list_for_each_entry(p, head, list) { 920 if (!NAPI_GRO_CB(p)->same_flow) 921 continue; 922 923 gh2 = (struct genevehdr *)(p->data + off_gnv); 924 if (!geneve_hdr_match(skb, gh, gh2, hint_off)) { 925 NAPI_GRO_CB(p)->same_flow = 0; 926 continue; 927 } 928 } 929 930 skb_gro_pull(skb, gh_len); 931 skb_gro_postpull_rcsum(skb, gh, gh_len); 932 if (likely(type == htons(ETH_P_TEB))) 933 return call_gro_receive(eth_gro_receive, head, skb); 934 935 ptype = gro_find_receive_by_type(type); 936 if (!ptype) 937 goto out; 938 939 pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); 940 flush = 0; 941 942 out: 943 skb_gro_flush_final(skb, pp, flush); 944 945 return pp; 946 } 947 948 static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb, 949 int nhoff) 950 { 951 struct genevehdr *gh; 952 struct packet_offload *ptype; 953 __be16 type; 954 int gh_len; 955 int err = -ENOSYS; 956 957 gh = (struct genevehdr *)(skb->data + nhoff); 958 gh_len = geneve_hlen(gh); 959 type = gh->proto_type; 960 geneve_opt_gro_hint_off(gh, &type, &gh_len); 961 962 /* since skb->encapsulation is set, eth_gro_complete() sets the inner mac header */ 963 if (likely(type == htons(ETH_P_TEB))) 964 return eth_gro_complete(skb, nhoff + gh_len); 965 966 ptype = gro_find_complete_by_type(type); 967 if (ptype) 968 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); 969 970 skb_set_inner_mac_header(skb, nhoff + gh_len); 971 972 return err; 973 } 974 975 /* Create new listen socket if needed */ 976 static struct geneve_sock *geneve_socket_create(struct net *net, 977 struct geneve_dev *geneve, bool ipv6) 978 { 979 struct geneve_net *gn = net_generic(net, geneve_net_id); 980 struct udp_tunnel_sock_cfg tunnel_cfg; 981 struct geneve_sock *gs; 982 struct sock *sk; 983 int h; 984 985 gs = kzalloc_obj(*gs); 986 if (!gs) 987 return ERR_PTR(-ENOMEM); 988 989 sk = geneve_create_sock(net, geneve, ipv6); 990 if (IS_ERR(sk)) { 991 kfree(gs); 992 return ERR_CAST(sk); 993 } 994 995 gs->sk = sk; 996 gs->refcnt = 1; 997 for (h = 0; h < VNI_HASH_SIZE; ++h) 998 INIT_HLIST_HEAD(&gs->vni_list[h]); 999 1000 /* Initialize the geneve udp offloads structure */ 1001 udp_tunnel_notify_add_rx_port(sk, UDP_TUNNEL_TYPE_GENEVE); 1002 1003 /* Mark socket as an encapsulation socket */ 1004 memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); 1005 tunnel_cfg.sk_user_data = gs; 1006 tunnel_cfg.encap_type = 1; 1007 tunnel_cfg.gro_receive = geneve_gro_receive; 1008 tunnel_cfg.gro_complete = geneve_gro_complete; 1009 tunnel_cfg.encap_rcv = geneve_udp_encap_recv; 1010 tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup; 1011 tunnel_cfg.encap_destroy = NULL; 1012 setup_udp_tunnel_sock(net, sk, &tunnel_cfg); 1013 list_add(&gs->list, &gn->sock_list); 1014 return gs; 1015 } 1016 1017 static void __geneve_sock_release(struct geneve_sock *gs) 1018 { 1019 if (!gs || --gs->refcnt) 1020 return; 1021 1022 list_del(&gs->list); 1023 udp_tunnel_notify_del_rx_port(gs->sk, UDP_TUNNEL_TYPE_GENEVE); 1024 udp_tunnel_sock_release(gs->sk); 1025 kfree_rcu(gs, rcu); 1026 } 1027 1028 static void geneve_sock_release(struct geneve_dev *geneve) 1029 { 1030 struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4); 1031 #if IS_ENABLED(CONFIG_IPV6) 1032 struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6); 1033 1034 rcu_assign_pointer(geneve->sock6, NULL); 1035 #endif 1036 1037 rcu_assign_pointer(geneve->sock4, NULL); 1038 1039 __geneve_sock_release(gs4); 1040 #if IS_ENABLED(CONFIG_IPV6) 1041 __geneve_sock_release(gs6); 1042 #endif 1043 } 1044 1045 static struct geneve_sock *geneve_find_sock(struct net *net, 1046 struct geneve_dev *geneve, bool ipv6) 1047 { 1048 struct geneve_net *gn = net_generic(net, geneve_net_id); 1049 struct ip_tunnel_info *info = &geneve->cfg.info; 1050 sa_family_t family = ipv6 ? AF_INET6 : AF_INET; 1051 bool gro_hint = geneve->cfg.gro_hint; 1052 __be16 dst_port = info->key.tp_dst; 1053 struct geneve_sock *gs; 1054 1055 list_for_each_entry(gs, &gn->sock_list, list) { 1056 if (inet_sk(gs->sk)->inet_sport != dst_port) 1057 continue; 1058 1059 if (geneve_get_sk_family(gs) != family) 1060 continue; 1061 1062 if (gs->gro_hint != gro_hint) 1063 continue; 1064 1065 if (family == AF_INET && 1066 inet_sk(gs->sk)->inet_saddr != info->key.u.ipv4.src) 1067 continue; 1068 1069 #if IS_ENABLED(CONFIG_IPV6) 1070 if (family == AF_INET6 && 1071 !ipv6_addr_equal(&gs->sk->sk_v6_rcv_saddr, &info->key.u.ipv6.src)) 1072 continue; 1073 #endif 1074 1075 return gs; 1076 } 1077 1078 return NULL; 1079 } 1080 1081 static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6) 1082 { 1083 struct net *net = geneve->net; 1084 struct geneve_dev_node *node; 1085 struct geneve_sock *gs; 1086 __u8 vni[3]; 1087 __u32 hash; 1088 1089 gs = geneve_find_sock(net, geneve, ipv6); 1090 if (gs) { 1091 gs->refcnt++; 1092 goto out; 1093 } 1094 1095 gs = geneve_socket_create(net, geneve, ipv6); 1096 if (IS_ERR(gs)) 1097 return PTR_ERR(gs); 1098 1099 out: 1100 gs->collect_md = geneve->cfg.collect_md; 1101 gs->gro_hint = geneve->cfg.gro_hint; 1102 #if IS_ENABLED(CONFIG_IPV6) 1103 if (ipv6) { 1104 rcu_assign_pointer(geneve->sock6, gs); 1105 node = &geneve->hlist6; 1106 } else 1107 #endif 1108 { 1109 rcu_assign_pointer(geneve->sock4, gs); 1110 node = &geneve->hlist4; 1111 } 1112 node->geneve = geneve; 1113 1114 tunnel_id_to_vni(geneve->cfg.info.key.tun_id, vni); 1115 hash = geneve_net_vni_hash(vni); 1116 hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]); 1117 return 0; 1118 } 1119 1120 static int geneve_open(struct net_device *dev) 1121 { 1122 struct geneve_dev *geneve = netdev_priv(dev); 1123 bool dualstack = geneve->cfg.dualstack; 1124 bool ipv4, ipv6; 1125 int ret = 0; 1126 1127 ipv6 = geneve->cfg.info.mode & IP_TUNNEL_INFO_IPV6 || dualstack; 1128 ipv4 = !ipv6 || dualstack; 1129 #if IS_ENABLED(CONFIG_IPV6) 1130 if (ipv6) { 1131 ret = geneve_sock_add(geneve, true); 1132 if (ret < 0 && ret != -EAFNOSUPPORT) 1133 ipv4 = false; 1134 } 1135 #endif 1136 if (ipv4) 1137 ret = geneve_sock_add(geneve, false); 1138 if (ret < 0) 1139 geneve_sock_release(geneve); 1140 1141 return ret; 1142 } 1143 1144 static int geneve_stop(struct net_device *dev) 1145 { 1146 struct geneve_dev *geneve = netdev_priv(dev); 1147 1148 hlist_del_init_rcu(&geneve->hlist4.hlist); 1149 #if IS_ENABLED(CONFIG_IPV6) 1150 hlist_del_init_rcu(&geneve->hlist6.hlist); 1151 #endif 1152 geneve_sock_release(geneve); 1153 return 0; 1154 } 1155 1156 static void geneve_build_header(struct genevehdr *geneveh, 1157 const struct ip_tunnel_info *info, 1158 __be16 inner_proto) 1159 { 1160 geneveh->ver = GENEVE_VER; 1161 geneveh->opt_len = info->options_len / 4; 1162 geneveh->oam = test_bit(IP_TUNNEL_OAM_BIT, info->key.tun_flags); 1163 geneveh->critical = test_bit(IP_TUNNEL_CRIT_OPT_BIT, 1164 info->key.tun_flags); 1165 geneveh->rsvd1 = 0; 1166 tunnel_id_to_vni(info->key.tun_id, geneveh->vni); 1167 geneveh->proto_type = inner_proto; 1168 geneveh->rsvd2 = 0; 1169 1170 if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) 1171 ip_tunnel_info_opts_get(geneveh->options, info); 1172 } 1173 1174 static int geneve_build_gro_hint_opt(const struct geneve_dev *geneve, 1175 struct sk_buff *skb) 1176 { 1177 struct geneve_skb_cb *cb = GENEVE_SKB_CB(skb); 1178 struct geneve_opt_gro_hint *hint; 1179 unsigned int nhlen; 1180 bool nested_is_v6; 1181 int id; 1182 1183 BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct geneve_skb_cb)); 1184 cb->gro_hint_len = 0; 1185 1186 /* Try to add the GRO hint only in case of double encap. */ 1187 if (!geneve->cfg.gro_hint || !skb->encapsulation) 1188 return 0; 1189 1190 /* 1191 * The nested headers must fit the geneve opt len fields and the 1192 * nested encap must carry a nested transport (UDP) header. 1193 */ 1194 nhlen = skb_inner_mac_header(skb) - skb->data; 1195 if (nhlen > 255 || !skb_transport_header_was_set(skb) || 1196 skb->inner_protocol_type != ENCAP_TYPE_ETHER || 1197 (skb_transport_offset(skb) + sizeof(struct udphdr) > nhlen)) 1198 return 0; 1199 1200 id = proto_to_id(skb->inner_protocol); 1201 if (id < 0) 1202 return 0; 1203 1204 nested_is_v6 = skb->protocol == htons(ETH_P_IPV6); 1205 if (nested_is_v6) { 1206 int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); 1207 u8 proto = ipv6_hdr(skb)->nexthdr; 1208 __be16 foff; 1209 1210 if (ipv6_skip_exthdr(skb, start, &proto, &foff) < 0 || 1211 proto != IPPROTO_UDP) 1212 return 0; 1213 } else { 1214 if (ip_hdr(skb)->protocol != IPPROTO_UDP) 1215 return 0; 1216 } 1217 1218 hint = &cb->gro_hint; 1219 memset(hint, 0, sizeof(*hint)); 1220 hint->inner_proto_id = id; 1221 hint->nested_is_v6 = skb->protocol == htons(ETH_P_IPV6); 1222 hint->nested_nh_offset = skb_network_offset(skb); 1223 hint->nested_tp_offset = skb_transport_offset(skb); 1224 hint->nested_hdr_len = nhlen; 1225 cb->gro_hint_len = GENEVE_OPT_GRO_HINT_SIZE; 1226 return GENEVE_OPT_GRO_HINT_SIZE; 1227 } 1228 1229 static void geneve_put_gro_hint_opt(struct genevehdr *gnvh, int opt_size, 1230 const struct geneve_opt_gro_hint *hint) 1231 { 1232 struct geneve_opt *gro_opt; 1233 1234 /* geneve_build_header() did not took in account the GRO hint. */ 1235 gnvh->opt_len = (opt_size + GENEVE_OPT_GRO_HINT_SIZE) >> 2; 1236 1237 gro_opt = (void *)(gnvh + 1) + opt_size; 1238 memset(gro_opt, 0, sizeof(*gro_opt)); 1239 1240 gro_opt->opt_class = htons(GENEVE_OPT_NETDEV_CLASS); 1241 gro_opt->type = GENEVE_OPT_GRO_HINT_TYPE; 1242 gro_opt->length = GENEVE_OPT_GRO_HINT_LEN; 1243 memcpy(gro_opt + 1, hint, sizeof(*hint)); 1244 } 1245 1246 static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb, 1247 const struct ip_tunnel_info *info, 1248 const struct geneve_dev *geneve, int ip_hdr_len) 1249 { 1250 bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 1251 bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; 1252 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); 1253 struct geneve_skb_cb *cb = GENEVE_SKB_CB(skb); 1254 struct genevehdr *gnvh; 1255 __be16 inner_proto; 1256 bool double_encap; 1257 int min_headroom; 1258 int opt_size; 1259 int err; 1260 1261 skb_reset_mac_header(skb); 1262 skb_scrub_packet(skb, xnet); 1263 1264 opt_size = info->options_len + cb->gro_hint_len; 1265 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + 1266 GENEVE_BASE_HLEN + opt_size + ip_hdr_len; 1267 err = skb_cow_head(skb, min_headroom); 1268 if (unlikely(err)) 1269 goto free_dst; 1270 1271 double_encap = udp_tunnel_handle_partial(skb); 1272 err = udp_tunnel_handle_offloads(skb, udp_sum); 1273 if (err) 1274 goto free_dst; 1275 1276 gnvh = __skb_push(skb, sizeof(*gnvh) + opt_size); 1277 inner_proto = inner_proto_inherit ? skb->protocol : htons(ETH_P_TEB); 1278 geneve_build_header(gnvh, info, inner_proto); 1279 1280 if (cb->gro_hint_len) 1281 geneve_put_gro_hint_opt(gnvh, info->options_len, &cb->gro_hint); 1282 1283 udp_tunnel_set_inner_protocol(skb, double_encap, inner_proto); 1284 return 0; 1285 1286 free_dst: 1287 dst_release(dst); 1288 return err; 1289 } 1290 1291 static u8 geneve_get_dsfield(struct sk_buff *skb, struct net_device *dev, 1292 const struct ip_tunnel_info *info, 1293 bool *use_cache) 1294 { 1295 struct geneve_dev *geneve = netdev_priv(dev); 1296 u8 dsfield; 1297 1298 dsfield = info->key.tos; 1299 if (dsfield == 1 && !geneve->cfg.collect_md) { 1300 dsfield = ip_tunnel_get_dsfield(ip_hdr(skb), skb); 1301 *use_cache = false; 1302 } 1303 1304 return dsfield; 1305 } 1306 1307 static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, 1308 struct geneve_dev *geneve, 1309 const struct ip_tunnel_info *info) 1310 { 1311 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); 1312 const struct ip_tunnel_key *key = &info->key; 1313 struct rtable *rt; 1314 bool use_cache; 1315 __u8 tos, ttl; 1316 __be16 df = 0; 1317 __be32 saddr; 1318 __be16 sport; 1319 int err; 1320 1321 if (skb_vlan_inet_prepare(skb, geneve->cfg.inner_proto_inherit)) 1322 return -EINVAL; 1323 1324 if (!gs4) 1325 return -EIO; 1326 1327 use_cache = ip_tunnel_dst_cache_usable(skb, info); 1328 tos = geneve_get_dsfield(skb, dev, info, &use_cache); 1329 sport = udp_flow_src_port(geneve->net, skb, 1330 geneve->cfg.port_min, 1331 geneve->cfg.port_max, true); 1332 1333 rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr, 1334 &info->key, 1335 sport, geneve->cfg.info.key.tp_dst, tos, 1336 use_cache ? 1337 (struct dst_cache *)&info->dst_cache : NULL); 1338 if (IS_ERR(rt)) 1339 return PTR_ERR(rt); 1340 1341 if (geneve->cfg.info.key.u.ipv4.src && 1342 saddr != geneve->cfg.info.key.u.ipv4.src) { 1343 dst_release(&rt->dst); 1344 return -EADDRNOTAVAIL; 1345 } 1346 1347 err = skb_tunnel_check_pmtu(skb, &rt->dst, 1348 GENEVE_IPV4_HLEN + info->options_len + 1349 geneve_build_gro_hint_opt(geneve, skb), 1350 netif_is_any_bridge_port(dev)); 1351 if (err < 0) { 1352 dst_release(&rt->dst); 1353 return err; 1354 } else if (err) { 1355 struct ip_tunnel_info *info; 1356 1357 info = skb_tunnel_info(skb); 1358 if (info) { 1359 struct ip_tunnel_info *unclone; 1360 1361 unclone = skb_tunnel_info_unclone(skb); 1362 if (unlikely(!unclone)) { 1363 dst_release(&rt->dst); 1364 return -ENOMEM; 1365 } 1366 1367 unclone->key.u.ipv4.dst = saddr; 1368 unclone->key.u.ipv4.src = info->key.u.ipv4.dst; 1369 } 1370 1371 if (!pskb_may_pull(skb, ETH_HLEN)) { 1372 dst_release(&rt->dst); 1373 return -EINVAL; 1374 } 1375 1376 skb->protocol = eth_type_trans(skb, geneve->dev); 1377 __netif_rx(skb); 1378 dst_release(&rt->dst); 1379 return -EMSGSIZE; 1380 } 1381 1382 tos = ip_tunnel_ecn_encap(tos, ip_hdr(skb), skb); 1383 if (geneve->cfg.collect_md) { 1384 ttl = key->ttl; 1385 1386 df = test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) ? 1387 htons(IP_DF) : 0; 1388 } else { 1389 if (geneve->cfg.ttl_inherit) 1390 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); 1391 else 1392 ttl = key->ttl; 1393 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); 1394 1395 if (geneve->cfg.df == GENEVE_DF_SET) { 1396 df = htons(IP_DF); 1397 } else if (geneve->cfg.df == GENEVE_DF_INHERIT) { 1398 struct ethhdr *eth = skb_eth_hdr(skb); 1399 1400 if (ntohs(eth->h_proto) == ETH_P_IPV6) { 1401 df = htons(IP_DF); 1402 } else if (ntohs(eth->h_proto) == ETH_P_IP) { 1403 struct iphdr *iph = ip_hdr(skb); 1404 1405 if (iph->frag_off & htons(IP_DF)) 1406 df = htons(IP_DF); 1407 } 1408 } 1409 } 1410 1411 err = geneve_build_skb(&rt->dst, skb, info, geneve, 1412 sizeof(struct iphdr)); 1413 if (unlikely(err)) 1414 return err; 1415 1416 udp_tunnel_xmit_skb(rt, gs4->sk, skb, saddr, info->key.u.ipv4.dst, 1417 tos, ttl, df, sport, geneve->cfg.info.key.tp_dst, 1418 !net_eq(geneve->net, dev_net(geneve->dev)), 1419 !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags), 1420 0); 1421 return 0; 1422 } 1423 1424 #if IS_ENABLED(CONFIG_IPV6) 1425 static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, 1426 struct geneve_dev *geneve, 1427 const struct ip_tunnel_info *info) 1428 { 1429 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); 1430 const struct ip_tunnel_key *key = &info->key; 1431 struct dst_entry *dst = NULL; 1432 struct in6_addr saddr; 1433 bool use_cache; 1434 __u8 prio, ttl; 1435 __be16 sport; 1436 int err; 1437 1438 if (skb_vlan_inet_prepare(skb, geneve->cfg.inner_proto_inherit)) 1439 return -EINVAL; 1440 1441 if (!gs6) 1442 return -EIO; 1443 1444 use_cache = ip_tunnel_dst_cache_usable(skb, info); 1445 prio = geneve_get_dsfield(skb, dev, info, &use_cache); 1446 sport = udp_flow_src_port(geneve->net, skb, 1447 geneve->cfg.port_min, 1448 geneve->cfg.port_max, true); 1449 1450 dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sk, 0, 1451 &saddr, key, sport, 1452 geneve->cfg.info.key.tp_dst, prio, 1453 use_cache ? 1454 (struct dst_cache *)&info->dst_cache : NULL); 1455 if (IS_ERR(dst)) 1456 return PTR_ERR(dst); 1457 1458 if (!ipv6_addr_any(&geneve->cfg.info.key.u.ipv6.src) && 1459 !ipv6_addr_equal(&saddr, &geneve->cfg.info.key.u.ipv6.src)) { 1460 dst_release(dst); 1461 return -EADDRNOTAVAIL; 1462 } 1463 1464 err = skb_tunnel_check_pmtu(skb, dst, 1465 GENEVE_IPV6_HLEN + info->options_len + 1466 geneve_build_gro_hint_opt(geneve, skb), 1467 netif_is_any_bridge_port(dev)); 1468 if (err < 0) { 1469 dst_release(dst); 1470 return err; 1471 } else if (err) { 1472 struct ip_tunnel_info *info = skb_tunnel_info(skb); 1473 1474 if (info) { 1475 struct ip_tunnel_info *unclone; 1476 1477 unclone = skb_tunnel_info_unclone(skb); 1478 if (unlikely(!unclone)) { 1479 dst_release(dst); 1480 return -ENOMEM; 1481 } 1482 1483 unclone->key.u.ipv6.dst = saddr; 1484 unclone->key.u.ipv6.src = info->key.u.ipv6.dst; 1485 } 1486 1487 if (!pskb_may_pull(skb, ETH_HLEN)) { 1488 dst_release(dst); 1489 return -EINVAL; 1490 } 1491 1492 skb->protocol = eth_type_trans(skb, geneve->dev); 1493 __netif_rx(skb); 1494 dst_release(dst); 1495 return -EMSGSIZE; 1496 } 1497 1498 prio = ip_tunnel_ecn_encap(prio, ip_hdr(skb), skb); 1499 if (geneve->cfg.collect_md) { 1500 ttl = key->ttl; 1501 } else { 1502 if (geneve->cfg.ttl_inherit) 1503 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); 1504 else 1505 ttl = key->ttl; 1506 ttl = ttl ? : ip6_dst_hoplimit(dst); 1507 } 1508 err = geneve_build_skb(dst, skb, info, geneve, sizeof(struct ipv6hdr)); 1509 if (unlikely(err)) 1510 return err; 1511 1512 udp_tunnel6_xmit_skb(dst, gs6->sk, skb, dev, 1513 &saddr, &key->u.ipv6.dst, prio, ttl, 1514 info->key.label, sport, geneve->cfg.info.key.tp_dst, 1515 !test_bit(IP_TUNNEL_CSUM_BIT, 1516 info->key.tun_flags), 1517 0); 1518 return 0; 1519 } 1520 #endif 1521 1522 static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) 1523 { 1524 struct geneve_dev *geneve = netdev_priv(dev); 1525 struct ip_tunnel_info *info = NULL; 1526 int err; 1527 1528 if (geneve->cfg.collect_md) { 1529 info = skb_tunnel_info(skb); 1530 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { 1531 netdev_dbg(dev, "no tunnel metadata\n"); 1532 dev_kfree_skb(skb); 1533 dev_dstats_tx_dropped(dev); 1534 return NETDEV_TX_OK; 1535 } 1536 } else { 1537 info = &geneve->cfg.info; 1538 } 1539 1540 rcu_read_lock(); 1541 #if IS_ENABLED(CONFIG_IPV6) 1542 if (info->mode & IP_TUNNEL_INFO_IPV6) 1543 err = geneve6_xmit_skb(skb, dev, geneve, info); 1544 else 1545 #endif 1546 err = geneve_xmit_skb(skb, dev, geneve, info); 1547 rcu_read_unlock(); 1548 1549 if (likely(!err)) 1550 return NETDEV_TX_OK; 1551 1552 if (err != -EMSGSIZE) 1553 dev_kfree_skb(skb); 1554 1555 if (err == -ELOOP) 1556 DEV_STATS_INC(dev, collisions); 1557 else if (err == -ENETUNREACH) 1558 DEV_STATS_INC(dev, tx_carrier_errors); 1559 1560 DEV_STATS_INC(dev, tx_errors); 1561 return NETDEV_TX_OK; 1562 } 1563 1564 static int geneve_change_mtu(struct net_device *dev, int new_mtu) 1565 { 1566 if (new_mtu > dev->max_mtu) 1567 new_mtu = dev->max_mtu; 1568 else if (new_mtu < dev->min_mtu) 1569 new_mtu = dev->min_mtu; 1570 1571 WRITE_ONCE(dev->mtu, new_mtu); 1572 return 0; 1573 } 1574 1575 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) 1576 { 1577 struct ip_tunnel_info *info = skb_tunnel_info(skb); 1578 struct geneve_dev *geneve = netdev_priv(dev); 1579 __be16 sport; 1580 1581 if (ip_tunnel_info_af(info) == AF_INET) { 1582 struct rtable *rt; 1583 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); 1584 bool use_cache; 1585 __be32 saddr; 1586 u8 tos; 1587 1588 if (!gs4) 1589 return -EIO; 1590 1591 use_cache = ip_tunnel_dst_cache_usable(skb, info); 1592 tos = geneve_get_dsfield(skb, dev, info, &use_cache); 1593 sport = udp_flow_src_port(geneve->net, skb, 1594 geneve->cfg.port_min, 1595 geneve->cfg.port_max, true); 1596 1597 rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr, 1598 &info->key, 1599 sport, geneve->cfg.info.key.tp_dst, 1600 tos, 1601 use_cache ? &info->dst_cache : NULL); 1602 if (IS_ERR(rt)) 1603 return PTR_ERR(rt); 1604 1605 ip_rt_put(rt); 1606 info->key.u.ipv4.src = saddr; 1607 #if IS_ENABLED(CONFIG_IPV6) 1608 } else if (ip_tunnel_info_af(info) == AF_INET6) { 1609 struct dst_entry *dst; 1610 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); 1611 struct in6_addr saddr; 1612 bool use_cache; 1613 u8 prio; 1614 1615 if (!gs6) 1616 return -EIO; 1617 1618 use_cache = ip_tunnel_dst_cache_usable(skb, info); 1619 prio = geneve_get_dsfield(skb, dev, info, &use_cache); 1620 sport = udp_flow_src_port(geneve->net, skb, 1621 geneve->cfg.port_min, 1622 geneve->cfg.port_max, true); 1623 1624 dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sk, 0, 1625 &saddr, &info->key, sport, 1626 geneve->cfg.info.key.tp_dst, prio, 1627 use_cache ? &info->dst_cache : NULL); 1628 if (IS_ERR(dst)) 1629 return PTR_ERR(dst); 1630 1631 dst_release(dst); 1632 info->key.u.ipv6.src = saddr; 1633 #endif 1634 } else { 1635 return -EINVAL; 1636 } 1637 1638 info->key.tp_src = sport; 1639 info->key.tp_dst = geneve->cfg.info.key.tp_dst; 1640 return 0; 1641 } 1642 1643 static const struct net_device_ops geneve_netdev_ops = { 1644 .ndo_init = geneve_init, 1645 .ndo_uninit = geneve_uninit, 1646 .ndo_open = geneve_open, 1647 .ndo_stop = geneve_stop, 1648 .ndo_start_xmit = geneve_xmit, 1649 .ndo_change_mtu = geneve_change_mtu, 1650 .ndo_validate_addr = eth_validate_addr, 1651 .ndo_set_mac_address = eth_mac_addr, 1652 .ndo_fill_metadata_dst = geneve_fill_metadata_dst, 1653 }; 1654 1655 static void geneve_get_drvinfo(struct net_device *dev, 1656 struct ethtool_drvinfo *drvinfo) 1657 { 1658 strscpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version)); 1659 strscpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver)); 1660 } 1661 1662 static const struct ethtool_ops geneve_ethtool_ops = { 1663 .get_drvinfo = geneve_get_drvinfo, 1664 .get_link = ethtool_op_get_link, 1665 }; 1666 1667 /* Info for udev, that this is a virtual tunnel endpoint */ 1668 static const struct device_type geneve_type = { 1669 .name = "geneve", 1670 }; 1671 1672 /* Calls the ndo_udp_tunnel_add of the caller in order to 1673 * supply the listening GENEVE udp ports. Callers are expected 1674 * to implement the ndo_udp_tunnel_add. 1675 */ 1676 static void geneve_offload_rx_ports(struct net_device *dev, bool push) 1677 { 1678 struct net *net = dev_net(dev); 1679 struct geneve_net *gn = net_generic(net, geneve_net_id); 1680 struct geneve_sock *gs; 1681 1682 ASSERT_RTNL(); 1683 1684 list_for_each_entry(gs, &gn->sock_list, list) { 1685 if (push) { 1686 udp_tunnel_push_rx_port(dev, gs->sk, 1687 UDP_TUNNEL_TYPE_GENEVE); 1688 } else { 1689 udp_tunnel_drop_rx_port(dev, gs->sk, 1690 UDP_TUNNEL_TYPE_GENEVE); 1691 } 1692 } 1693 } 1694 1695 /* Initialize the device structure. */ 1696 static void geneve_setup(struct net_device *dev) 1697 { 1698 ether_setup(dev); 1699 1700 dev->netdev_ops = &geneve_netdev_ops; 1701 dev->ethtool_ops = &geneve_ethtool_ops; 1702 dev->needs_free_netdev = true; 1703 1704 SET_NETDEV_DEVTYPE(dev, &geneve_type); 1705 1706 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; 1707 dev->features |= NETIF_F_RXCSUM; 1708 dev->features |= NETIF_F_GSO_SOFTWARE; 1709 1710 /* Partial features are disabled by default. */ 1711 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; 1712 dev->hw_features |= NETIF_F_RXCSUM; 1713 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 1714 dev->hw_features |= UDP_TUNNEL_PARTIAL_FEATURES; 1715 dev->hw_features |= NETIF_F_GSO_PARTIAL; 1716 1717 dev->hw_enc_features = dev->hw_features; 1718 dev->gso_partial_features = UDP_TUNNEL_PARTIAL_FEATURES; 1719 dev->mangleid_features = NETIF_F_GSO_PARTIAL; 1720 1721 dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; 1722 /* MTU range: 68 - (something less than 65535) */ 1723 dev->min_mtu = ETH_MIN_MTU; 1724 /* The max_mtu calculation does not take account of GENEVE 1725 * options, to avoid excluding potentially valid 1726 * configurations. This will be further reduced by IPvX hdr size. 1727 */ 1728 dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len; 1729 1730 netif_keep_dst(dev); 1731 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1732 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; 1733 dev->lltx = true; 1734 eth_hw_addr_random(dev); 1735 } 1736 1737 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { 1738 [IFLA_GENEVE_UNSPEC] = { .strict_start_type = IFLA_GENEVE_INNER_PROTO_INHERIT }, 1739 [IFLA_GENEVE_ID] = { .type = NLA_U32 }, 1740 [IFLA_GENEVE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) }, 1741 [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) }, 1742 [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, 1743 [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, 1744 [IFLA_GENEVE_LABEL] = { .type = NLA_U32 }, 1745 [IFLA_GENEVE_PORT] = { .type = NLA_U16 }, 1746 [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG }, 1747 [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 }, 1748 [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 }, 1749 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, 1750 [IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 }, 1751 [IFLA_GENEVE_DF] = { .type = NLA_U8 }, 1752 [IFLA_GENEVE_INNER_PROTO_INHERIT] = { .type = NLA_FLAG }, 1753 [IFLA_GENEVE_PORT_RANGE] = NLA_POLICY_EXACT_LEN(sizeof(struct ifla_geneve_port_range)), 1754 [IFLA_GENEVE_GRO_HINT] = { .type = NLA_FLAG }, 1755 [IFLA_GENEVE_LOCAL] = { .type = NLA_BE32 }, 1756 [IFLA_GENEVE_LOCAL6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), 1757 }; 1758 1759 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[], 1760 struct netlink_ext_ack *extack) 1761 { 1762 if (tb[IFLA_ADDRESS]) { 1763 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { 1764 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], 1765 "Provided link layer address is not Ethernet"); 1766 return -EINVAL; 1767 } 1768 1769 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { 1770 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], 1771 "Provided Ethernet address is not unicast"); 1772 return -EADDRNOTAVAIL; 1773 } 1774 } 1775 1776 if (!data) { 1777 NL_SET_ERR_MSG(extack, 1778 "Not enough attributes provided to perform the operation"); 1779 return -EINVAL; 1780 } 1781 1782 if (data[IFLA_GENEVE_ID]) { 1783 __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 1784 1785 if (vni >= GENEVE_N_VID) { 1786 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_ID], 1787 "Geneve ID must be lower than 16777216"); 1788 return -ERANGE; 1789 } 1790 } 1791 1792 if (data[IFLA_GENEVE_DF]) { 1793 enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]); 1794 1795 if (df < 0 || df > GENEVE_DF_MAX) { 1796 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_DF], 1797 "Invalid DF attribute"); 1798 return -EINVAL; 1799 } 1800 } 1801 1802 if (data[IFLA_GENEVE_PORT_RANGE]) { 1803 const struct ifla_geneve_port_range *p; 1804 1805 p = nla_data(data[IFLA_GENEVE_PORT_RANGE]); 1806 if (ntohs(p->high) < ntohs(p->low)) { 1807 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_PORT_RANGE], 1808 "Invalid source port range"); 1809 return -EINVAL; 1810 } 1811 } 1812 1813 return 0; 1814 } 1815 1816 static bool geneve_saddr_wildcard(const struct ip_tunnel_info *info) 1817 { 1818 if (ip_tunnel_info_af(info) == AF_INET) { 1819 if (!info->key.u.ipv4.src) 1820 return true; 1821 #if IS_ENABLED(CONFIG_IPV6) 1822 } else { 1823 if (ipv6_addr_any(&info->key.u.ipv6.src)) 1824 return true; 1825 #endif 1826 } 1827 1828 return false; 1829 } 1830 1831 static bool geneve_saddr_conflict(const struct ip_tunnel_info *a, 1832 const struct ip_tunnel_info *b) 1833 { 1834 if (ip_tunnel_info_af(a) != ip_tunnel_info_af(b)) 1835 return false; 1836 1837 if (geneve_saddr_wildcard(a) || geneve_saddr_wildcard(b)) 1838 return true; 1839 1840 if (ip_tunnel_info_af(a) == AF_INET) { 1841 if (a->key.u.ipv4.src == b->key.u.ipv4.src) 1842 return true; 1843 #if IS_ENABLED(CONFIG_IPV6) 1844 } else { 1845 if (ipv6_addr_equal(&a->key.u.ipv6.src, &b->key.u.ipv6.src)) 1846 return true; 1847 #endif 1848 } 1849 1850 return false; 1851 } 1852 1853 static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, 1854 const struct geneve_config *cfg, 1855 const struct ip_tunnel_info *info, 1856 bool *tun_on_same_port, 1857 bool *tun_collect_md) 1858 { 1859 struct geneve_dev *geneve, *t = NULL; 1860 1861 *tun_on_same_port = false; 1862 *tun_collect_md = false; 1863 list_for_each_entry(geneve, &gn->geneve_list, next) { 1864 if (info->key.tp_dst == geneve->cfg.info.key.tp_dst && 1865 (cfg->dualstack || geneve->cfg.dualstack || 1866 geneve_saddr_conflict(info, &geneve->cfg.info))) { 1867 *tun_collect_md |= geneve->cfg.collect_md; 1868 *tun_on_same_port = true; 1869 } 1870 if (info->key.tun_id == geneve->cfg.info.key.tun_id && 1871 info->key.tp_dst == geneve->cfg.info.key.tp_dst && 1872 !memcmp(&info->key.u, &geneve->cfg.info.key.u, sizeof(info->key.u))) 1873 t = geneve; 1874 } 1875 return t; 1876 } 1877 1878 static bool is_tnl_info_zero(const struct ip_tunnel_info *info) 1879 { 1880 return !(info->key.tun_id || info->key.tos || 1881 !ip_tunnel_flags_empty(info->key.tun_flags) || 1882 info->key.ttl || info->key.label || info->key.tp_src || 1883 #if IS_ENABLED(CONFIG_IPV6) 1884 (ip_tunnel_info_af(info) == AF_INET6 && 1885 !ipv6_addr_any(&info->key.u.ipv6.dst)) || 1886 #endif 1887 (ip_tunnel_info_af(info) == AF_INET && 1888 info->key.u.ipv4.dst)); 1889 } 1890 1891 static bool geneve_dst_addr_equal(struct ip_tunnel_info *a, 1892 struct ip_tunnel_info *b) 1893 { 1894 if (ip_tunnel_info_af(a) == AF_INET) 1895 return a->key.u.ipv4.dst == b->key.u.ipv4.dst; 1896 else 1897 return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst); 1898 } 1899 1900 static int geneve_configure(struct net *net, struct net_device *dev, 1901 struct netlink_ext_ack *extack, 1902 const struct geneve_config *cfg) 1903 { 1904 struct geneve_net *gn = net_generic(net, geneve_net_id); 1905 struct geneve_dev *t, *geneve = netdev_priv(dev); 1906 const struct ip_tunnel_info *info = &cfg->info; 1907 bool tun_collect_md, tun_on_same_port; 1908 int err, encap_len; 1909 1910 if (cfg->collect_md && !is_tnl_info_zero(info)) { 1911 NL_SET_ERR_MSG(extack, 1912 "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified"); 1913 return -EINVAL; 1914 } 1915 1916 geneve->net = net; 1917 geneve->dev = dev; 1918 1919 t = geneve_find_dev(gn, cfg, info, &tun_on_same_port, &tun_collect_md); 1920 if (t) 1921 return -EBUSY; 1922 1923 /* make enough headroom for basic scenario */ 1924 encap_len = GENEVE_BASE_HLEN + ETH_HLEN; 1925 if (!cfg->collect_md && ip_tunnel_info_af(info) == AF_INET) { 1926 encap_len += sizeof(struct iphdr); 1927 dev->max_mtu -= sizeof(struct iphdr); 1928 } else { 1929 encap_len += sizeof(struct ipv6hdr); 1930 dev->max_mtu -= sizeof(struct ipv6hdr); 1931 } 1932 dev->needed_headroom = encap_len + ETH_HLEN; 1933 1934 if (cfg->collect_md) { 1935 if (tun_on_same_port) { 1936 NL_SET_ERR_MSG(extack, 1937 "There can be only one externally controlled device on a destination port and a source address"); 1938 return -EPERM; 1939 } 1940 } else { 1941 if (tun_collect_md) { 1942 NL_SET_ERR_MSG(extack, 1943 "There already exists an externally controlled device on this destination port and the source address"); 1944 return -EPERM; 1945 } 1946 } 1947 1948 dst_cache_reset(&geneve->cfg.info.dst_cache); 1949 memcpy(&geneve->cfg, cfg, sizeof(*cfg)); 1950 1951 if (geneve->cfg.inner_proto_inherit) { 1952 dev->header_ops = NULL; 1953 dev->type = ARPHRD_NONE; 1954 dev->hard_header_len = 0; 1955 dev->addr_len = 0; 1956 dev->flags = IFF_POINTOPOINT | IFF_NOARP; 1957 } 1958 1959 err = register_netdevice(dev); 1960 if (err) 1961 return err; 1962 1963 list_add(&geneve->next, &gn->geneve_list); 1964 return 0; 1965 } 1966 1967 static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port) 1968 { 1969 memset(info, 0, sizeof(*info)); 1970 info->key.tp_dst = htons(dst_port); 1971 } 1972 1973 static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], 1974 struct netlink_ext_ack *extack, 1975 struct geneve_config *cfg, bool changelink) 1976 { 1977 struct ip_tunnel_info *info = &cfg->info; 1978 int attrtype; 1979 1980 if (data[IFLA_GENEVE_COLLECT_METADATA]) { 1981 if (changelink) { 1982 attrtype = IFLA_GENEVE_COLLECT_METADATA; 1983 goto change_notsup; 1984 } 1985 1986 cfg->collect_md = true; 1987 cfg->dualstack = true; 1988 } 1989 1990 if ((data[IFLA_GENEVE_LOCAL] || data[IFLA_GENEVE_REMOTE]) && 1991 (data[IFLA_GENEVE_LOCAL6] || data[IFLA_GENEVE_REMOTE6])) { 1992 NL_SET_ERR_MSG(extack, 1993 "Cannot specify both IPv4/IPv6 Remote/Local addresses"); 1994 return -EINVAL; 1995 } 1996 1997 if (data[IFLA_GENEVE_REMOTE]) { 1998 if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) { 1999 attrtype = IFLA_GENEVE_REMOTE; 2000 goto change_notsup; 2001 } 2002 2003 info->key.u.ipv4.dst = 2004 nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); 2005 2006 if (ipv4_is_multicast(info->key.u.ipv4.dst)) { 2007 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE], 2008 "Remote IPv4 address cannot be Multicast"); 2009 return -EINVAL; 2010 } 2011 } 2012 2013 if (data[IFLA_GENEVE_REMOTE6]) { 2014 #if IS_ENABLED(CONFIG_IPV6) 2015 int addr_type; 2016 2017 if (changelink && (ip_tunnel_info_af(info) == AF_INET)) { 2018 attrtype = IFLA_GENEVE_REMOTE6; 2019 goto change_notsup; 2020 } 2021 2022 info->mode = IP_TUNNEL_INFO_IPV6; 2023 info->key.u.ipv6.dst = 2024 nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]); 2025 2026 addr_type = ipv6_addr_type(&info->key.u.ipv6.dst); 2027 if (addr_type & IPV6_ADDR_LINKLOCAL) { 2028 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 2029 "Remote IPv6 address cannot be link-local"); 2030 return -EINVAL; 2031 } 2032 if (addr_type & IPV6_ADDR_MULTICAST) { 2033 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 2034 "Remote IPv6 address cannot be Multicast"); 2035 return -EINVAL; 2036 } 2037 __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 2038 cfg->use_udp6_rx_checksums = true; 2039 #else 2040 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 2041 "IPv6 support not enabled in the kernel"); 2042 return -EPFNOSUPPORT; 2043 #endif 2044 } 2045 2046 if (data[IFLA_GENEVE_LOCAL]) { 2047 if (changelink) { 2048 __be32 src = nla_get_in_addr(data[IFLA_GENEVE_LOCAL]); 2049 2050 if (ip_tunnel_info_af(info) == AF_INET6 || 2051 src != info->key.u.ipv4.src) { 2052 attrtype = IFLA_GENEVE_LOCAL; 2053 goto change_notsup; 2054 } 2055 } else { 2056 info->key.u.ipv4.src = nla_get_in_addr(data[IFLA_GENEVE_LOCAL]); 2057 2058 if (ipv4_is_multicast(info->key.u.ipv4.src)) { 2059 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LOCAL], 2060 "Local IPv4 address cannot be Multicast"); 2061 return -EINVAL; 2062 } 2063 2064 cfg->dualstack = false; 2065 } 2066 } 2067 2068 if (data[IFLA_GENEVE_LOCAL6]) { 2069 #if IS_ENABLED(CONFIG_IPV6) 2070 if (changelink) { 2071 struct in6_addr src = nla_get_in6_addr(data[IFLA_GENEVE_LOCAL6]); 2072 2073 if (ip_tunnel_info_af(info) == AF_INET || 2074 !ipv6_addr_equal(&src, &info->key.u.ipv6.src)) { 2075 attrtype = IFLA_GENEVE_LOCAL6; 2076 goto change_notsup; 2077 } 2078 } else { 2079 int addr_type; 2080 2081 info->mode = IP_TUNNEL_INFO_IPV6; 2082 info->key.u.ipv6.src = nla_get_in6_addr(data[IFLA_GENEVE_LOCAL6]); 2083 2084 addr_type = ipv6_addr_type(&info->key.u.ipv6.src); 2085 if (addr_type & IPV6_ADDR_LINKLOCAL) { 2086 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LOCAL6], 2087 "Local IPv6 address cannot be link-local"); 2088 return -EINVAL; 2089 } 2090 if (addr_type & IPV6_ADDR_MULTICAST) { 2091 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LOCAL6], 2092 "Local IPv6 address cannot be Multicast"); 2093 return -EINVAL; 2094 } 2095 2096 cfg->dualstack = false; 2097 } 2098 #else 2099 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LOCAL6], 2100 "IPv6 support not enabled in the kernel"); 2101 return -EPFNOSUPPORT; 2102 #endif 2103 } 2104 2105 if (data[IFLA_GENEVE_ID]) { 2106 __u32 vni; 2107 __u8 tvni[3]; 2108 __be64 tunid; 2109 2110 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 2111 tvni[0] = (vni & 0x00ff0000) >> 16; 2112 tvni[1] = (vni & 0x0000ff00) >> 8; 2113 tvni[2] = vni & 0x000000ff; 2114 2115 tunid = vni_to_tunnel_id(tvni); 2116 if (changelink && (tunid != info->key.tun_id)) { 2117 attrtype = IFLA_GENEVE_ID; 2118 goto change_notsup; 2119 } 2120 info->key.tun_id = tunid; 2121 } 2122 2123 if (data[IFLA_GENEVE_TTL_INHERIT]) { 2124 if (nla_get_u8(data[IFLA_GENEVE_TTL_INHERIT])) 2125 cfg->ttl_inherit = true; 2126 else 2127 cfg->ttl_inherit = false; 2128 } else if (data[IFLA_GENEVE_TTL]) { 2129 info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); 2130 cfg->ttl_inherit = false; 2131 } 2132 2133 if (data[IFLA_GENEVE_TOS]) 2134 info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]); 2135 2136 if (data[IFLA_GENEVE_DF]) 2137 cfg->df = nla_get_u8(data[IFLA_GENEVE_DF]); 2138 2139 if (data[IFLA_GENEVE_LABEL]) { 2140 info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) & 2141 IPV6_FLOWLABEL_MASK; 2142 if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) { 2143 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LABEL], 2144 "Label attribute only applies for IPv6 Geneve devices"); 2145 return -EINVAL; 2146 } 2147 } 2148 2149 if (data[IFLA_GENEVE_PORT]) { 2150 if (changelink) { 2151 attrtype = IFLA_GENEVE_PORT; 2152 goto change_notsup; 2153 } 2154 info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]); 2155 } 2156 2157 if (data[IFLA_GENEVE_PORT_RANGE]) { 2158 const struct ifla_geneve_port_range *p; 2159 2160 if (changelink) { 2161 attrtype = IFLA_GENEVE_PORT_RANGE; 2162 goto change_notsup; 2163 } 2164 p = nla_data(data[IFLA_GENEVE_PORT_RANGE]); 2165 cfg->port_min = ntohs(p->low); 2166 cfg->port_max = ntohs(p->high); 2167 } 2168 2169 if (data[IFLA_GENEVE_UDP_CSUM]) { 2170 if (changelink) { 2171 attrtype = IFLA_GENEVE_UDP_CSUM; 2172 goto change_notsup; 2173 } 2174 if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) 2175 __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 2176 } 2177 2178 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) { 2179 #if IS_ENABLED(CONFIG_IPV6) 2180 if (changelink) { 2181 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_TX; 2182 goto change_notsup; 2183 } 2184 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) 2185 __clear_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 2186 #else 2187 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX], 2188 "IPv6 support not enabled in the kernel"); 2189 return -EPFNOSUPPORT; 2190 #endif 2191 } 2192 2193 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) { 2194 #if IS_ENABLED(CONFIG_IPV6) 2195 if (changelink) { 2196 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_RX; 2197 goto change_notsup; 2198 } 2199 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])) 2200 cfg->use_udp6_rx_checksums = false; 2201 #else 2202 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX], 2203 "IPv6 support not enabled in the kernel"); 2204 return -EPFNOSUPPORT; 2205 #endif 2206 } 2207 2208 if (data[IFLA_GENEVE_INNER_PROTO_INHERIT]) { 2209 if (changelink) { 2210 attrtype = IFLA_GENEVE_INNER_PROTO_INHERIT; 2211 goto change_notsup; 2212 } 2213 cfg->inner_proto_inherit = true; 2214 } 2215 2216 if (data[IFLA_GENEVE_GRO_HINT]) { 2217 if (changelink) { 2218 attrtype = IFLA_GENEVE_GRO_HINT; 2219 goto change_notsup; 2220 } 2221 cfg->gro_hint = true; 2222 } 2223 2224 return 0; 2225 change_notsup: 2226 NL_SET_ERR_MSG_ATTR(extack, data[attrtype], 2227 "Changing VNI, Port, endpoint IP address family, external, inner_proto_inherit, gro_hint and UDP checksum attributes are not supported"); 2228 return -EOPNOTSUPP; 2229 } 2230 2231 static void geneve_link_config(struct net_device *dev, 2232 struct ip_tunnel_info *info, struct nlattr *tb[]) 2233 { 2234 struct geneve_dev *geneve = netdev_priv(dev); 2235 int ldev_mtu = 0; 2236 2237 if (tb[IFLA_MTU]) { 2238 geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); 2239 return; 2240 } 2241 2242 switch (ip_tunnel_info_af(info)) { 2243 case AF_INET: { 2244 struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst }; 2245 struct rtable *rt = ip_route_output_key(geneve->net, &fl4); 2246 2247 if (!IS_ERR(rt) && rt->dst.dev) { 2248 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN; 2249 ip_rt_put(rt); 2250 } 2251 break; 2252 } 2253 #if IS_ENABLED(CONFIG_IPV6) 2254 case AF_INET6: { 2255 struct rt6_info *rt; 2256 2257 if (!__in6_dev_get(dev)) 2258 break; 2259 2260 rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0, 2261 NULL, 0); 2262 2263 if (rt && rt->dst.dev) 2264 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN; 2265 ip6_rt_put(rt); 2266 break; 2267 } 2268 #endif 2269 } 2270 2271 if (ldev_mtu <= 0) 2272 return; 2273 2274 geneve_change_mtu(dev, ldev_mtu - info->options_len); 2275 } 2276 2277 static int geneve_newlink(struct net_device *dev, 2278 struct rtnl_newlink_params *params, 2279 struct netlink_ext_ack *extack) 2280 { 2281 struct net *link_net = rtnl_newlink_link_net(params); 2282 struct nlattr **data = params->data; 2283 struct nlattr **tb = params->tb; 2284 struct geneve_config cfg = { 2285 .df = GENEVE_DF_UNSET, 2286 .use_udp6_rx_checksums = false, 2287 .ttl_inherit = false, 2288 .collect_md = false, 2289 .dualstack = false, 2290 .port_min = 1, 2291 .port_max = USHRT_MAX, 2292 }; 2293 int err; 2294 2295 init_tnl_info(&cfg.info, GENEVE_UDP_PORT); 2296 err = geneve_nl2info(tb, data, extack, &cfg, false); 2297 if (err) 2298 return err; 2299 2300 err = geneve_configure(link_net, dev, extack, &cfg); 2301 if (err) 2302 return err; 2303 2304 geneve_link_config(dev, &cfg.info, tb); 2305 2306 return 0; 2307 } 2308 2309 /* Quiesces the geneve device data path for both TX and RX. 2310 * 2311 * On transmit geneve checks for non-NULL geneve_sock before it proceeds. 2312 * So, if we set that socket to NULL under RCU and wait for synchronize_net() 2313 * to complete for the existing set of in-flight packets to be transmitted, 2314 * then we would have quiesced the transmit data path. All the future packets 2315 * will get dropped until we unquiesce the data path. 2316 * 2317 * On receive geneve dereference the geneve_sock stashed in the socket. So, 2318 * if we set that to NULL under RCU and wait for synchronize_net() to 2319 * complete, then we would have quiesced the receive data path. 2320 */ 2321 static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4, 2322 struct geneve_sock **gs6) 2323 { 2324 *gs4 = rtnl_dereference(geneve->sock4); 2325 rcu_assign_pointer(geneve->sock4, NULL); 2326 if (*gs4) 2327 rcu_assign_sk_user_data((*gs4)->sk, NULL); 2328 #if IS_ENABLED(CONFIG_IPV6) 2329 *gs6 = rtnl_dereference(geneve->sock6); 2330 rcu_assign_pointer(geneve->sock6, NULL); 2331 if (*gs6) 2332 rcu_assign_sk_user_data((*gs6)->sk, NULL); 2333 #else 2334 *gs6 = NULL; 2335 #endif 2336 synchronize_net(); 2337 } 2338 2339 /* Resumes the geneve device data path for both TX and RX. */ 2340 static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4, 2341 struct geneve_sock __maybe_unused *gs6) 2342 { 2343 rcu_assign_pointer(geneve->sock4, gs4); 2344 if (gs4) 2345 rcu_assign_sk_user_data(gs4->sk, gs4); 2346 #if IS_ENABLED(CONFIG_IPV6) 2347 rcu_assign_pointer(geneve->sock6, gs6); 2348 if (gs6) 2349 rcu_assign_sk_user_data(gs6->sk, gs6); 2350 #endif 2351 } 2352 2353 static int geneve_changelink(struct net_device *dev, struct nlattr *tb[], 2354 struct nlattr *data[], 2355 struct netlink_ext_ack *extack) 2356 { 2357 struct geneve_dev *geneve = netdev_priv(dev); 2358 struct geneve_sock *gs4, *gs6; 2359 struct geneve_config cfg; 2360 int err; 2361 2362 /* If the geneve device is configured for metadata (or externally 2363 * controlled, for example, OVS), then nothing can be changed. 2364 */ 2365 if (geneve->cfg.collect_md) 2366 return -EOPNOTSUPP; 2367 2368 /* Start with the existing info. */ 2369 memcpy(&cfg, &geneve->cfg, sizeof(cfg)); 2370 err = geneve_nl2info(tb, data, extack, &cfg, true); 2371 if (err) 2372 return err; 2373 2374 if (!geneve_dst_addr_equal(&geneve->cfg.info, &cfg.info)) { 2375 dst_cache_reset(&cfg.info.dst_cache); 2376 geneve_link_config(dev, &cfg.info, tb); 2377 } 2378 2379 geneve_quiesce(geneve, &gs4, &gs6); 2380 memcpy(&geneve->cfg, &cfg, sizeof(cfg)); 2381 geneve_unquiesce(geneve, gs4, gs6); 2382 2383 return 0; 2384 } 2385 2386 static void geneve_dellink(struct net_device *dev, struct list_head *head) 2387 { 2388 struct geneve_dev *geneve = netdev_priv(dev); 2389 2390 list_del(&geneve->next); 2391 unregister_netdevice_queue(dev, head); 2392 } 2393 2394 static size_t geneve_get_size(const struct net_device *dev) 2395 { 2396 return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */ 2397 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */ 2398 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_LOCAL{6} */ 2399 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */ 2400 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */ 2401 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_DF */ 2402 nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */ 2403 nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */ 2404 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */ 2405 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */ 2406 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */ 2407 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */ 2408 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */ 2409 nla_total_size(0) + /* IFLA_GENEVE_INNER_PROTO_INHERIT */ 2410 nla_total_size(sizeof(struct ifla_geneve_port_range)) + /* IFLA_GENEVE_PORT_RANGE */ 2411 nla_total_size(0) + /* IFLA_GENEVE_GRO_HINT */ 2412 0; 2413 } 2414 2415 static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) 2416 { 2417 struct geneve_dev *geneve = netdev_priv(dev); 2418 struct ip_tunnel_info *info = &geneve->cfg.info; 2419 bool ttl_inherit = geneve->cfg.ttl_inherit; 2420 bool metadata = geneve->cfg.collect_md; 2421 struct ifla_geneve_port_range ports = { 2422 .low = htons(geneve->cfg.port_min), 2423 .high = htons(geneve->cfg.port_max), 2424 }; 2425 __u8 tmp_vni[3]; 2426 __u32 vni; 2427 2428 tunnel_id_to_vni(info->key.tun_id, tmp_vni); 2429 vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2]; 2430 if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) 2431 goto nla_put_failure; 2432 2433 if (!metadata && ip_tunnel_info_af(info) == AF_INET) { 2434 if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, 2435 info->key.u.ipv4.dst)) 2436 goto nla_put_failure; 2437 if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, 2438 test_bit(IP_TUNNEL_CSUM_BIT, 2439 info->key.tun_flags))) 2440 goto nla_put_failure; 2441 2442 #if IS_ENABLED(CONFIG_IPV6) 2443 } else if (!metadata) { 2444 if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6, 2445 &info->key.u.ipv6.dst)) 2446 goto nla_put_failure; 2447 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, 2448 !test_bit(IP_TUNNEL_CSUM_BIT, 2449 info->key.tun_flags))) 2450 goto nla_put_failure; 2451 #endif 2452 } 2453 2454 if (!geneve->cfg.dualstack) { 2455 if (ip_tunnel_info_af(info) == AF_INET) { 2456 if ((info->key.u.ipv4.src || 2457 geneve->cfg.collect_md) && 2458 nla_put_in_addr(skb, IFLA_GENEVE_LOCAL, 2459 info->key.u.ipv4.src)) 2460 goto nla_put_failure; 2461 #if IS_ENABLED(CONFIG_IPV6) 2462 } else { 2463 if ((!ipv6_addr_any(&info->key.u.ipv6.src) || 2464 geneve->cfg.collect_md) && 2465 nla_put_in6_addr(skb, IFLA_GENEVE_LOCAL6, 2466 &info->key.u.ipv6.src)) 2467 goto nla_put_failure; 2468 #endif 2469 } 2470 } 2471 2472 if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) || 2473 nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) || 2474 nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label)) 2475 goto nla_put_failure; 2476 2477 if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->cfg.df)) 2478 goto nla_put_failure; 2479 2480 if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst)) 2481 goto nla_put_failure; 2482 2483 if (metadata && nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA)) 2484 goto nla_put_failure; 2485 2486 #if IS_ENABLED(CONFIG_IPV6) 2487 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, 2488 !geneve->cfg.use_udp6_rx_checksums)) 2489 goto nla_put_failure; 2490 #endif 2491 2492 if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit)) 2493 goto nla_put_failure; 2494 2495 if (geneve->cfg.inner_proto_inherit && 2496 nla_put_flag(skb, IFLA_GENEVE_INNER_PROTO_INHERIT)) 2497 goto nla_put_failure; 2498 2499 if (nla_put(skb, IFLA_GENEVE_PORT_RANGE, sizeof(ports), &ports)) 2500 goto nla_put_failure; 2501 2502 if (geneve->cfg.gro_hint && 2503 nla_put_flag(skb, IFLA_GENEVE_GRO_HINT)) 2504 goto nla_put_failure; 2505 2506 return 0; 2507 2508 nla_put_failure: 2509 return -EMSGSIZE; 2510 } 2511 2512 static struct rtnl_link_ops geneve_link_ops __read_mostly = { 2513 .kind = "geneve", 2514 .maxtype = IFLA_GENEVE_MAX, 2515 .policy = geneve_policy, 2516 .priv_size = sizeof(struct geneve_dev), 2517 .setup = geneve_setup, 2518 .validate = geneve_validate, 2519 .newlink = geneve_newlink, 2520 .changelink = geneve_changelink, 2521 .dellink = geneve_dellink, 2522 .get_size = geneve_get_size, 2523 .fill_info = geneve_fill_info, 2524 }; 2525 2526 struct net_device *geneve_dev_create_fb(struct net *net, const char *name, 2527 u8 name_assign_type, u16 dst_port) 2528 { 2529 struct nlattr *tb[IFLA_MAX + 1]; 2530 struct net_device *dev; 2531 LIST_HEAD(list_kill); 2532 int err; 2533 struct geneve_config cfg = { 2534 .df = GENEVE_DF_UNSET, 2535 .use_udp6_rx_checksums = true, 2536 .ttl_inherit = false, 2537 .collect_md = true, 2538 .dualstack = true, 2539 .port_min = 1, 2540 .port_max = USHRT_MAX, 2541 }; 2542 2543 memset(tb, 0, sizeof(tb)); 2544 dev = rtnl_create_link(net, name, name_assign_type, 2545 &geneve_link_ops, tb, NULL); 2546 if (IS_ERR(dev)) 2547 return dev; 2548 2549 init_tnl_info(&cfg.info, dst_port); 2550 err = geneve_configure(net, dev, NULL, &cfg); 2551 if (err) { 2552 free_netdev(dev); 2553 return ERR_PTR(err); 2554 } 2555 2556 /* openvswitch users expect packet sizes to be unrestricted, 2557 * so set the largest MTU we can. 2558 */ 2559 err = geneve_change_mtu(dev, IP_MAX_MTU); 2560 if (err) 2561 goto err; 2562 2563 err = rtnl_configure_link(dev, NULL, 0, NULL); 2564 if (err < 0) 2565 goto err; 2566 2567 return dev; 2568 err: 2569 geneve_dellink(dev, &list_kill); 2570 unregister_netdevice_many(&list_kill); 2571 return ERR_PTR(err); 2572 } 2573 EXPORT_SYMBOL_GPL(geneve_dev_create_fb); 2574 2575 static int geneve_netdevice_event(struct notifier_block *unused, 2576 unsigned long event, void *ptr) 2577 { 2578 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 2579 2580 if (event == NETDEV_UDP_TUNNEL_PUSH_INFO) 2581 geneve_offload_rx_ports(dev, true); 2582 else if (event == NETDEV_UDP_TUNNEL_DROP_INFO) 2583 geneve_offload_rx_ports(dev, false); 2584 2585 return NOTIFY_DONE; 2586 } 2587 2588 static struct notifier_block geneve_notifier_block __read_mostly = { 2589 .notifier_call = geneve_netdevice_event, 2590 }; 2591 2592 static __net_init int geneve_init_net(struct net *net) 2593 { 2594 struct geneve_net *gn = net_generic(net, geneve_net_id); 2595 2596 INIT_LIST_HEAD(&gn->geneve_list); 2597 INIT_LIST_HEAD(&gn->sock_list); 2598 return 0; 2599 } 2600 2601 static void __net_exit geneve_exit_rtnl_net(struct net *net, 2602 struct list_head *dev_to_kill) 2603 { 2604 struct geneve_net *gn = net_generic(net, geneve_net_id); 2605 struct geneve_dev *geneve, *next; 2606 2607 list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) 2608 geneve_dellink(geneve->dev, dev_to_kill); 2609 } 2610 2611 static void __net_exit geneve_exit_net(struct net *net) 2612 { 2613 const struct geneve_net *gn = net_generic(net, geneve_net_id); 2614 2615 WARN_ON_ONCE(!list_empty(&gn->sock_list)); 2616 } 2617 2618 static struct pernet_operations geneve_net_ops = { 2619 .init = geneve_init_net, 2620 .exit_rtnl = geneve_exit_rtnl_net, 2621 .exit = geneve_exit_net, 2622 .id = &geneve_net_id, 2623 .size = sizeof(struct geneve_net), 2624 }; 2625 2626 static int __init geneve_init_module(void) 2627 { 2628 int rc; 2629 2630 rc = register_pernet_subsys(&geneve_net_ops); 2631 if (rc) 2632 goto out1; 2633 2634 rc = register_netdevice_notifier(&geneve_notifier_block); 2635 if (rc) 2636 goto out2; 2637 2638 rc = rtnl_link_register(&geneve_link_ops); 2639 if (rc) 2640 goto out3; 2641 2642 return 0; 2643 out3: 2644 unregister_netdevice_notifier(&geneve_notifier_block); 2645 out2: 2646 unregister_pernet_subsys(&geneve_net_ops); 2647 out1: 2648 return rc; 2649 } 2650 late_initcall(geneve_init_module); 2651 2652 static void __exit geneve_cleanup_module(void) 2653 { 2654 rtnl_link_unregister(&geneve_link_ops); 2655 unregister_netdevice_notifier(&geneve_notifier_block); 2656 unregister_pernet_subsys(&geneve_net_ops); 2657 } 2658 module_exit(geneve_cleanup_module); 2659 2660 MODULE_LICENSE("GPL"); 2661 MODULE_VERSION(GENEVE_NETDEV_VER); 2662 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>"); 2663 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic"); 2664 MODULE_ALIAS_RTNL_LINK("geneve"); 2665