1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * GENEVE: Generic Network Virtualization Encapsulation 4 * 5 * Copyright (c) 2015 Red Hat, Inc. 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include <linux/ethtool.h> 11 #include <linux/kernel.h> 12 #include <linux/module.h> 13 #include <linux/etherdevice.h> 14 #include <linux/hash.h> 15 #include <net/dst_metadata.h> 16 #include <net/gro_cells.h> 17 #include <net/rtnetlink.h> 18 #include <net/geneve.h> 19 #include <net/gro.h> 20 #include <net/netdev_lock.h> 21 #include <net/protocol.h> 22 23 #define GENEVE_NETDEV_VER "0.6" 24 25 #define GENEVE_N_VID (1u << 24) 26 #define GENEVE_VID_MASK (GENEVE_N_VID - 1) 27 28 #define VNI_HASH_BITS 10 29 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) 30 31 static bool log_ecn_error = true; 32 module_param(log_ecn_error, bool, 0644); 33 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 34 35 #define GENEVE_VER 0 36 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) 37 #define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN) 38 #define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN) 39 40 #define GENEVE_OPT_NETDEV_CLASS 0x100 41 #define GENEVE_OPT_GRO_HINT_SIZE 8 42 #define GENEVE_OPT_GRO_HINT_TYPE 1 43 #define GENEVE_OPT_GRO_HINT_LEN 1 44 45 struct geneve_opt_gro_hint { 46 u8 inner_proto_id:2, 47 nested_is_v6:1; 48 u8 nested_nh_offset; 49 u8 nested_tp_offset; 50 u8 nested_hdr_len; 51 }; 52 53 struct geneve_skb_cb { 54 unsigned int gro_hint_len; 55 struct geneve_opt_gro_hint gro_hint; 56 }; 57 58 #define GENEVE_SKB_CB(__skb) ((struct geneve_skb_cb *)&((__skb)->cb[0])) 59 60 /* per-network namespace private data for this module */ 61 struct geneve_net { 62 struct list_head geneve_list; 63 /* sock_list is protected by rtnl lock */ 64 struct list_head sock_list; 65 }; 66 67 static unsigned int geneve_net_id; 68 69 struct geneve_dev_node { 70 struct hlist_node hlist; 71 struct geneve_dev *geneve; 72 }; 73 74 struct geneve_config { 75 bool collect_md; 76 bool use_udp6_rx_checksums; 77 bool ttl_inherit; 78 bool gro_hint; 79 enum ifla_geneve_df df; 80 bool inner_proto_inherit; 81 u16 port_min; 82 u16 port_max; 83 84 /* Must be last --ends in a flexible-array member. */ 85 struct ip_tunnel_info info; 86 }; 87 88 /* Pseudo network device */ 89 struct geneve_dev { 90 struct geneve_dev_node hlist4; /* vni hash table for IPv4 socket */ 91 #if IS_ENABLED(CONFIG_IPV6) 92 struct geneve_dev_node hlist6; /* vni hash table for IPv6 socket */ 93 #endif 94 struct net *net; /* netns for packet i/o */ 95 struct net_device *dev; /* netdev for geneve tunnel */ 96 struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */ 97 #if IS_ENABLED(CONFIG_IPV6) 98 struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */ 99 #endif 100 struct list_head next; /* geneve's per namespace list */ 101 struct gro_cells gro_cells; 102 struct geneve_config cfg; 103 }; 104 105 struct geneve_sock { 106 bool collect_md; 107 bool gro_hint; 108 struct list_head list; 109 struct sock *sk; 110 struct rcu_head rcu; 111 int refcnt; 112 struct hlist_head vni_list[VNI_HASH_SIZE]; 113 }; 114 115 static const __be16 proto_id_map[] = { htons(ETH_P_TEB), 116 htons(ETH_P_IPV6), 117 htons(ETH_P_IP) }; 118 119 static int proto_to_id(__be16 proto) 120 { 121 int i; 122 123 for (i = 0; i < ARRAY_SIZE(proto_id_map); i++) 124 if (proto_id_map[i] == proto) 125 return i; 126 127 return -1; 128 } 129 130 static inline __u32 geneve_net_vni_hash(u8 vni[3]) 131 { 132 __u32 vnid; 133 134 vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2]; 135 return hash_32(vnid, VNI_HASH_BITS); 136 } 137 138 static __be64 vni_to_tunnel_id(const __u8 *vni) 139 { 140 #ifdef __BIG_ENDIAN 141 return (vni[0] << 16) | (vni[1] << 8) | vni[2]; 142 #else 143 return (__force __be64)(((__force u64)vni[0] << 40) | 144 ((__force u64)vni[1] << 48) | 145 ((__force u64)vni[2] << 56)); 146 #endif 147 } 148 149 /* Convert 64 bit tunnel ID to 24 bit VNI. */ 150 static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) 151 { 152 #ifdef __BIG_ENDIAN 153 vni[0] = (__force __u8)(tun_id >> 16); 154 vni[1] = (__force __u8)(tun_id >> 8); 155 vni[2] = (__force __u8)tun_id; 156 #else 157 vni[0] = (__force __u8)((__force u64)tun_id >> 40); 158 vni[1] = (__force __u8)((__force u64)tun_id >> 48); 159 vni[2] = (__force __u8)((__force u64)tun_id >> 56); 160 #endif 161 } 162 163 static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni) 164 { 165 return !memcmp(vni, &tun_id[5], 3); 166 } 167 168 static sa_family_t geneve_get_sk_family(struct geneve_sock *gs) 169 { 170 return gs->sk->sk_family; 171 } 172 173 static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, 174 __be32 addr, u8 vni[]) 175 { 176 struct hlist_head *vni_list_head; 177 struct geneve_dev_node *node; 178 __u32 hash; 179 180 /* Find the device for this VNI */ 181 hash = geneve_net_vni_hash(vni); 182 vni_list_head = &gs->vni_list[hash]; 183 hlist_for_each_entry_rcu(node, vni_list_head, hlist) { 184 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && 185 addr == node->geneve->cfg.info.key.u.ipv4.dst) 186 return node->geneve; 187 } 188 return NULL; 189 } 190 191 #if IS_ENABLED(CONFIG_IPV6) 192 static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs, 193 struct in6_addr addr6, u8 vni[]) 194 { 195 struct hlist_head *vni_list_head; 196 struct geneve_dev_node *node; 197 __u32 hash; 198 199 /* Find the device for this VNI */ 200 hash = geneve_net_vni_hash(vni); 201 vni_list_head = &gs->vni_list[hash]; 202 hlist_for_each_entry_rcu(node, vni_list_head, hlist) { 203 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && 204 ipv6_addr_equal(&addr6, &node->geneve->cfg.info.key.u.ipv6.dst)) 205 return node->geneve; 206 } 207 return NULL; 208 } 209 #endif 210 211 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) 212 { 213 return (struct genevehdr *)(udp_hdr(skb) + 1); 214 } 215 216 static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs, 217 struct sk_buff *skb) 218 { 219 static u8 zero_vni[3]; 220 u8 *vni; 221 222 if (geneve_get_sk_family(gs) == AF_INET) { 223 struct iphdr *iph; 224 __be32 addr; 225 226 iph = ip_hdr(skb); /* outer IP header... */ 227 228 if (gs->collect_md) { 229 vni = zero_vni; 230 addr = 0; 231 } else { 232 vni = geneve_hdr(skb)->vni; 233 addr = iph->saddr; 234 } 235 236 return geneve_lookup(gs, addr, vni); 237 #if IS_ENABLED(CONFIG_IPV6) 238 } else if (geneve_get_sk_family(gs) == AF_INET6) { 239 static struct in6_addr zero_addr6; 240 struct ipv6hdr *ip6h; 241 struct in6_addr addr6; 242 243 ip6h = ipv6_hdr(skb); /* outer IPv6 header... */ 244 245 if (gs->collect_md) { 246 vni = zero_vni; 247 addr6 = zero_addr6; 248 } else { 249 vni = geneve_hdr(skb)->vni; 250 addr6 = ip6h->saddr; 251 } 252 253 return geneve6_lookup(gs, addr6, vni); 254 #endif 255 } 256 return NULL; 257 } 258 259 /* geneve receive/decap routine */ 260 static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, 261 struct sk_buff *skb, const struct genevehdr *gnvh) 262 { 263 struct metadata_dst *tun_dst = NULL; 264 unsigned int len; 265 int nh, err = 0; 266 void *oiph; 267 268 if (ip_tunnel_collect_metadata() || gs->collect_md) { 269 IP_TUNNEL_DECLARE_FLAGS(flags) = { }; 270 271 __set_bit(IP_TUNNEL_KEY_BIT, flags); 272 __assign_bit(IP_TUNNEL_OAM_BIT, flags, gnvh->oam); 273 __assign_bit(IP_TUNNEL_CRIT_OPT_BIT, flags, gnvh->critical); 274 275 tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, 276 vni_to_tunnel_id(gnvh->vni), 277 gnvh->opt_len * 4); 278 if (!tun_dst) { 279 dev_dstats_rx_dropped(geneve->dev); 280 goto drop; 281 } 282 /* Update tunnel dst according to Geneve options. */ 283 ip_tunnel_flags_zero(flags); 284 __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, flags); 285 ip_tunnel_info_opts_set(&tun_dst->u.tun_info, 286 gnvh->options, gnvh->opt_len * 4, 287 flags); 288 } else { 289 /* Drop packets w/ critical options, 290 * since we don't support any... 291 */ 292 if (gnvh->critical) { 293 DEV_STATS_INC(geneve->dev, rx_frame_errors); 294 DEV_STATS_INC(geneve->dev, rx_errors); 295 goto drop; 296 } 297 } 298 299 if (tun_dst) 300 skb_dst_set(skb, &tun_dst->dst); 301 302 if (gnvh->proto_type == htons(ETH_P_TEB)) { 303 skb_reset_mac_header(skb); 304 skb->protocol = eth_type_trans(skb, geneve->dev); 305 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 306 307 /* Ignore packet loops (and multicast echo) */ 308 if (ether_addr_equal(eth_hdr(skb)->h_source, 309 geneve->dev->dev_addr)) { 310 DEV_STATS_INC(geneve->dev, rx_errors); 311 goto drop; 312 } 313 } else { 314 skb_reset_mac_header(skb); 315 skb->dev = geneve->dev; 316 skb->pkt_type = PACKET_HOST; 317 } 318 319 /* Save offset of outer header relative to skb->head, 320 * because we are going to reset the network header to the inner header 321 * and might change skb->head. 322 */ 323 nh = skb_network_header(skb) - skb->head; 324 325 skb_reset_network_header(skb); 326 327 if (!pskb_inet_may_pull(skb)) { 328 DEV_STATS_INC(geneve->dev, rx_length_errors); 329 DEV_STATS_INC(geneve->dev, rx_errors); 330 goto drop; 331 } 332 333 /* Get the outer header. */ 334 oiph = skb->head + nh; 335 336 if (geneve_get_sk_family(gs) == AF_INET) 337 err = IP_ECN_decapsulate(oiph, skb); 338 #if IS_ENABLED(CONFIG_IPV6) 339 else 340 err = IP6_ECN_decapsulate(oiph, skb); 341 #endif 342 343 if (unlikely(err)) { 344 if (log_ecn_error) { 345 if (geneve_get_sk_family(gs) == AF_INET) 346 net_info_ratelimited("non-ECT from %pI4 " 347 "with TOS=%#x\n", 348 &((struct iphdr *)oiph)->saddr, 349 ((struct iphdr *)oiph)->tos); 350 #if IS_ENABLED(CONFIG_IPV6) 351 else 352 net_info_ratelimited("non-ECT from %pI6\n", 353 &((struct ipv6hdr *)oiph)->saddr); 354 #endif 355 } 356 if (err > 1) { 357 DEV_STATS_INC(geneve->dev, rx_frame_errors); 358 DEV_STATS_INC(geneve->dev, rx_errors); 359 goto drop; 360 } 361 } 362 363 /* Skip the additional GRO stage when hints are in use. */ 364 len = skb->len; 365 if (skb->encapsulation) 366 err = netif_rx(skb); 367 else 368 err = gro_cells_receive(&geneve->gro_cells, skb); 369 if (likely(err == NET_RX_SUCCESS)) 370 dev_dstats_rx_add(geneve->dev, len); 371 372 return; 373 drop: 374 /* Consume bad packet */ 375 kfree_skb(skb); 376 } 377 378 /* Setup stats when device is created */ 379 static int geneve_init(struct net_device *dev) 380 { 381 struct geneve_dev *geneve = netdev_priv(dev); 382 int err; 383 384 err = gro_cells_init(&geneve->gro_cells, dev); 385 if (err) 386 return err; 387 388 err = dst_cache_init(&geneve->cfg.info.dst_cache, GFP_KERNEL); 389 if (err) { 390 gro_cells_destroy(&geneve->gro_cells); 391 return err; 392 } 393 netdev_lockdep_set_classes(dev); 394 return 0; 395 } 396 397 static void geneve_uninit(struct net_device *dev) 398 { 399 struct geneve_dev *geneve = netdev_priv(dev); 400 401 dst_cache_destroy(&geneve->cfg.info.dst_cache); 402 gro_cells_destroy(&geneve->gro_cells); 403 } 404 405 static int geneve_hlen(const struct genevehdr *gh) 406 { 407 return sizeof(*gh) + gh->opt_len * 4; 408 } 409 410 /* 411 * Look for GRO hint in the genenve options; if not found or does not pass basic 412 * sanitization return 0, otherwise the offset WRT the geneve hdr start. 413 */ 414 static unsigned int 415 geneve_opt_gro_hint_off(const struct genevehdr *gh, __be16 *type, 416 unsigned int *gh_len) 417 { 418 struct geneve_opt *opt = (void *)(gh + 1); 419 unsigned int id, opt_len = gh->opt_len; 420 struct geneve_opt_gro_hint *gro_hint; 421 422 while (opt_len >= (GENEVE_OPT_GRO_HINT_SIZE >> 2)) { 423 if (opt->opt_class == htons(GENEVE_OPT_NETDEV_CLASS) && 424 opt->type == GENEVE_OPT_GRO_HINT_TYPE && 425 opt->length == GENEVE_OPT_GRO_HINT_LEN) 426 goto found; 427 428 /* check for bad opt len */ 429 if (opt->length + 1 >= opt_len) 430 return 0; 431 432 /* next opt */ 433 opt_len -= opt->length + 1; 434 opt = ((void *)opt) + ((opt->length + 1) << 2); 435 } 436 return 0; 437 438 found: 439 gro_hint = (struct geneve_opt_gro_hint *)opt->opt_data; 440 441 /* 442 * Sanitize the hinted hdrs: the nested transport is UDP and must fit 443 * the overall hinted hdr size. 444 */ 445 if (gro_hint->nested_tp_offset + sizeof(struct udphdr) > 446 gro_hint->nested_hdr_len) 447 return 0; 448 449 if (gro_hint->nested_nh_offset + 450 (gro_hint->nested_is_v6 ? sizeof(struct ipv6hdr) : 451 sizeof(struct iphdr)) > 452 gro_hint->nested_tp_offset) 453 return 0; 454 455 /* Allow only supported L2. */ 456 id = gro_hint->inner_proto_id; 457 if (id >= ARRAY_SIZE(proto_id_map)) 458 return 0; 459 460 *type = proto_id_map[id]; 461 *gh_len += gro_hint->nested_hdr_len; 462 463 return (void *)gro_hint - (void *)gh; 464 } 465 466 static const struct geneve_opt_gro_hint * 467 geneve_opt_gro_hint(const struct genevehdr *gh, unsigned int hint_off) 468 { 469 return (const struct geneve_opt_gro_hint *)((void *)gh + hint_off); 470 } 471 472 static unsigned int 473 geneve_sk_gro_hint_off(const struct sock *sk, const struct genevehdr *gh, 474 __be16 *type, unsigned int *gh_len) 475 { 476 const struct geneve_sock *gs = rcu_dereference_sk_user_data(sk); 477 478 if (!gs || !gs->gro_hint) 479 return 0; 480 return geneve_opt_gro_hint_off(gh, type, gh_len); 481 } 482 483 /* Validate the packet headers pointed by data WRT the provided hint */ 484 static bool 485 geneve_opt_gro_hint_validate(void *data, 486 const struct geneve_opt_gro_hint *gro_hint) 487 { 488 void *nested_nh = data + gro_hint->nested_nh_offset; 489 struct iphdr *iph; 490 491 if (gro_hint->nested_is_v6) { 492 struct ipv6hdr *ipv6h = nested_nh; 493 struct ipv6_opt_hdr *opth; 494 int offset, len; 495 496 if (ipv6h->nexthdr == IPPROTO_UDP) 497 return true; 498 499 offset = sizeof(*ipv6h) + gro_hint->nested_nh_offset; 500 while (offset + sizeof(*opth) <= gro_hint->nested_tp_offset) { 501 opth = data + offset; 502 503 len = ipv6_optlen(opth); 504 if (len + offset > gro_hint->nested_tp_offset) 505 return false; 506 if (opth->nexthdr == IPPROTO_UDP) 507 return true; 508 509 offset += len; 510 } 511 return false; 512 } 513 514 iph = nested_nh; 515 if (*(u8 *)iph != 0x45 || ip_is_fragment(iph) || 516 iph->protocol != IPPROTO_UDP || ip_fast_csum((u8 *)iph, 5)) 517 return false; 518 519 return true; 520 } 521 522 /* 523 * Validate the skb headers following the specified geneve hdr vs the 524 * provided hint, including nested L4 checksum. 525 * The caller already ensured that the relevant amount of data is available 526 * in the linear part. 527 */ 528 static bool 529 geneve_opt_gro_hint_validate_csum(const struct sk_buff *skb, 530 const struct genevehdr *gh, 531 const struct geneve_opt_gro_hint *gro_hint) 532 { 533 unsigned int plen, gh_len = geneve_hlen(gh); 534 void *nested = (void *)gh + gh_len; 535 struct udphdr *nested_uh; 536 unsigned int nested_len; 537 struct ipv6hdr *ipv6h; 538 struct iphdr *iph; 539 __wsum csum, psum; 540 541 if (!geneve_opt_gro_hint_validate(nested, gro_hint)) 542 return false; 543 544 /* Use GRO hints with nested csum only if the outer header has csum. */ 545 nested_uh = nested + gro_hint->nested_tp_offset; 546 if (!nested_uh->check || skb->ip_summed == CHECKSUM_PARTIAL) 547 return true; 548 549 if (!NAPI_GRO_CB(skb)->csum_valid) 550 return false; 551 552 /* Compute the complete checksum up to the nested transport. */ 553 plen = gh_len + gro_hint->nested_tp_offset; 554 csum = csum_sub(NAPI_GRO_CB(skb)->csum, csum_partial(gh, plen, 0)); 555 nested_len = skb_gro_len(skb) - plen; 556 557 /* Compute the nested pseudo header csum. */ 558 ipv6h = nested + gro_hint->nested_nh_offset; 559 iph = (struct iphdr *)ipv6h; 560 psum = gro_hint->nested_is_v6 ? 561 ~csum_unfold(csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, 562 nested_len, IPPROTO_UDP, 0)) : 563 csum_tcpudp_nofold(iph->saddr, iph->daddr, 564 nested_len, IPPROTO_UDP, 0); 565 566 return !csum_fold(csum_add(psum, csum)); 567 } 568 569 static int geneve_post_decap_hint(const struct sock *sk, struct sk_buff *skb, 570 unsigned int gh_len, 571 struct genevehdr **geneveh) 572 { 573 const struct geneve_opt_gro_hint *gro_hint; 574 unsigned int len, total_len, hint_off; 575 struct ipv6hdr *ipv6h; 576 struct iphdr *iph; 577 struct udphdr *uh; 578 __be16 p; 579 580 hint_off = geneve_sk_gro_hint_off(sk, *geneveh, &p, &len); 581 if (!hint_off) 582 return 0; 583 584 if (!skb_is_gso(skb)) 585 return 0; 586 587 gro_hint = geneve_opt_gro_hint(*geneveh, hint_off); 588 if (unlikely(!pskb_may_pull(skb, gro_hint->nested_hdr_len))) 589 return -ENOMEM; 590 591 *geneveh = geneve_hdr(skb); 592 gro_hint = geneve_opt_gro_hint(*geneveh, hint_off); 593 594 /* 595 * Validate hints from untrusted source before accessing 596 * the headers; csum will be checked later by the nested 597 * protocol rx path. 598 */ 599 if (unlikely(skb_shinfo(skb)->gso_type & SKB_GSO_DODGY && 600 !geneve_opt_gro_hint_validate(skb->data, gro_hint))) 601 return -EINVAL; 602 603 ipv6h = (void *)skb->data + gro_hint->nested_nh_offset; 604 iph = (struct iphdr *)ipv6h; 605 total_len = skb->len - gro_hint->nested_nh_offset; 606 if (total_len > GRO_LEGACY_MAX_SIZE) 607 return -E2BIG; 608 609 /* 610 * After stripping the outer encap, the packet still carries a 611 * tunnel encapsulation: the nested one. 612 */ 613 skb->encapsulation = 1; 614 615 /* GSO expect a valid transpor header, move it to the current one. */ 616 skb_set_transport_header(skb, gro_hint->nested_tp_offset); 617 618 /* Adjust the nested IP{6} hdr to actual GSO len. */ 619 if (gro_hint->nested_is_v6) { 620 ipv6h->payload_len = htons(total_len - sizeof(*ipv6h)); 621 } else { 622 __be16 old_len = iph->tot_len; 623 624 iph->tot_len = htons(total_len); 625 626 /* For IPv4 additionally adjust the nested csum. */ 627 csum_replace2(&iph->check, old_len, iph->tot_len); 628 ip_send_check(iph); 629 } 630 631 /* Adjust the nested UDP header len and checksum. */ 632 uh = udp_hdr(skb); 633 uh->len = htons(skb->len - gro_hint->nested_tp_offset); 634 if (uh->check) { 635 len = skb->len - gro_hint->nested_nh_offset; 636 skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; 637 if (gro_hint->nested_is_v6) 638 uh->check = ~udp_v6_check(len, &ipv6h->saddr, 639 &ipv6h->daddr, 0); 640 else 641 uh->check = ~udp_v4_check(len, iph->saddr, 642 iph->daddr, 0); 643 } else { 644 skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; 645 } 646 return 0; 647 } 648 649 /* Callback from net/ipv4/udp.c to receive packets */ 650 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) 651 { 652 struct genevehdr *geneveh; 653 struct geneve_dev *geneve; 654 struct geneve_sock *gs; 655 __be16 inner_proto; 656 int opts_len; 657 658 /* Need UDP and Geneve header to be present */ 659 if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) 660 goto drop; 661 662 /* Return packets with reserved bits set */ 663 geneveh = geneve_hdr(skb); 664 if (unlikely(geneveh->ver != GENEVE_VER)) 665 goto drop; 666 667 gs = rcu_dereference_sk_user_data(sk); 668 if (!gs) 669 goto drop; 670 671 geneve = geneve_lookup_skb(gs, skb); 672 if (!geneve) 673 goto drop; 674 675 inner_proto = geneveh->proto_type; 676 677 if (unlikely((!geneve->cfg.inner_proto_inherit && 678 inner_proto != htons(ETH_P_TEB)))) { 679 dev_dstats_rx_dropped(geneve->dev); 680 goto drop; 681 } 682 683 opts_len = geneveh->opt_len * 4; 684 if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, inner_proto, 685 !net_eq(geneve->net, dev_net(geneve->dev)))) { 686 dev_dstats_rx_dropped(geneve->dev); 687 goto drop; 688 } 689 690 /* 691 * After hint processing, the transport header points to the inner one 692 * and we can't use anymore on geneve_hdr(). 693 */ 694 geneveh = geneve_hdr(skb); 695 if (geneve_post_decap_hint(sk, skb, sizeof(struct genevehdr) + 696 opts_len, &geneveh)) { 697 DEV_STATS_INC(geneve->dev, rx_errors); 698 goto drop; 699 } 700 701 geneve_rx(geneve, gs, skb, geneveh); 702 return 0; 703 704 drop: 705 /* Consume bad packet */ 706 kfree_skb(skb); 707 return 0; 708 } 709 710 /* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */ 711 static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb) 712 { 713 struct genevehdr *geneveh; 714 struct geneve_sock *gs; 715 u8 zero_vni[3] = { 0 }; 716 u8 *vni = zero_vni; 717 718 if (!pskb_may_pull(skb, skb_transport_offset(skb) + GENEVE_BASE_HLEN)) 719 return -EINVAL; 720 721 geneveh = geneve_hdr(skb); 722 if (geneveh->ver != GENEVE_VER) 723 return -EINVAL; 724 725 if (geneveh->proto_type != htons(ETH_P_TEB)) 726 return -EINVAL; 727 728 gs = rcu_dereference_sk_user_data(sk); 729 if (!gs) 730 return -ENOENT; 731 732 if (geneve_get_sk_family(gs) == AF_INET) { 733 struct iphdr *iph = ip_hdr(skb); 734 __be32 addr4 = 0; 735 736 if (!gs->collect_md) { 737 vni = geneve_hdr(skb)->vni; 738 addr4 = iph->daddr; 739 } 740 741 return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT; 742 } 743 744 #if IS_ENABLED(CONFIG_IPV6) 745 if (geneve_get_sk_family(gs) == AF_INET6) { 746 struct ipv6hdr *ip6h = ipv6_hdr(skb); 747 struct in6_addr addr6; 748 749 memset(&addr6, 0, sizeof(struct in6_addr)); 750 751 if (!gs->collect_md) { 752 vni = geneve_hdr(skb)->vni; 753 addr6 = ip6h->daddr; 754 } 755 756 return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT; 757 } 758 #endif 759 760 return -EPFNOSUPPORT; 761 } 762 763 static struct sock *geneve_create_sock(struct net *net, bool ipv6, 764 __be16 port, bool ipv6_rx_csum) 765 { 766 struct udp_port_cfg udp_conf; 767 struct socket *sock; 768 int err; 769 770 memset(&udp_conf, 0, sizeof(udp_conf)); 771 772 if (ipv6) { 773 udp_conf.family = AF_INET6; 774 udp_conf.ipv6_v6only = 1; 775 udp_conf.use_udp6_rx_checksums = ipv6_rx_csum; 776 } else { 777 udp_conf.family = AF_INET; 778 udp_conf.local_ip.s_addr = htonl(INADDR_ANY); 779 } 780 781 udp_conf.local_udp_port = port; 782 783 /* Open UDP socket */ 784 err = udp_sock_create(net, &udp_conf, &sock); 785 if (err < 0) 786 return ERR_PTR(err); 787 788 udp_allow_gso(sock->sk); 789 return sock->sk; 790 } 791 792 static bool geneve_hdr_match(struct sk_buff *skb, 793 const struct genevehdr *gh, 794 const struct genevehdr *gh2, 795 unsigned int hint_off) 796 { 797 const struct geneve_opt_gro_hint *gro_hint; 798 void *nested, *nested2, *nh, *nh2; 799 struct udphdr *udp, *udp2; 800 unsigned int gh_len; 801 802 /* Match the geneve hdr and options */ 803 if (gh->opt_len != gh2->opt_len) 804 return false; 805 806 gh_len = geneve_hlen(gh); 807 if (memcmp(gh, gh2, gh_len)) 808 return false; 809 810 if (!hint_off) 811 return true; 812 813 /* 814 * When gro is present consider the nested headers as part 815 * of the geneve options 816 */ 817 nested = (void *)gh + gh_len; 818 nested2 = (void *)gh2 + gh_len; 819 gro_hint = geneve_opt_gro_hint(gh, hint_off); 820 if (!memcmp(nested, nested2, gro_hint->nested_hdr_len)) 821 return true; 822 823 /* 824 * The nested headers differ; the packets can still belong to 825 * the same flow when IPs/proto/ports match; if so flushing is 826 * required. 827 */ 828 nh = nested + gro_hint->nested_nh_offset; 829 nh2 = nested2 + gro_hint->nested_nh_offset; 830 if (gro_hint->nested_is_v6) { 831 struct ipv6hdr *iph = nh, *iph2 = nh2; 832 unsigned int nested_nlen; 833 __be32 first_word; 834 835 first_word = *(__be32 *)iph ^ *(__be32 *)iph2; 836 if ((first_word & htonl(0xF00FFFFF)) || 837 !ipv6_addr_equal(&iph->saddr, &iph2->saddr) || 838 !ipv6_addr_equal(&iph->daddr, &iph2->daddr) || 839 iph->nexthdr != iph2->nexthdr) 840 return false; 841 842 nested_nlen = gro_hint->nested_tp_offset - 843 gro_hint->nested_nh_offset; 844 if (nested_nlen > sizeof(struct ipv6hdr) && 845 (memcmp(iph + 1, iph2 + 1, 846 nested_nlen - sizeof(struct ipv6hdr)))) 847 return false; 848 } else { 849 struct iphdr *iph = nh, *iph2 = nh2; 850 851 if ((iph->protocol ^ iph2->protocol) | 852 ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | 853 ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) 854 return false; 855 } 856 857 udp = nested + gro_hint->nested_tp_offset; 858 udp2 = nested2 + gro_hint->nested_tp_offset; 859 if (udp->source != udp2->source || udp->dest != udp2->dest || 860 udp->check != udp2->check) 861 return false; 862 863 NAPI_GRO_CB(skb)->flush = 1; 864 return true; 865 } 866 867 static struct sk_buff *geneve_gro_receive(struct sock *sk, 868 struct list_head *head, 869 struct sk_buff *skb) 870 { 871 unsigned int hlen, gh_len, off_gnv, hint_off; 872 const struct geneve_opt_gro_hint *gro_hint; 873 const struct packet_offload *ptype; 874 struct genevehdr *gh, *gh2; 875 struct sk_buff *pp = NULL; 876 struct sk_buff *p; 877 int flush = 1; 878 __be16 type; 879 880 off_gnv = skb_gro_offset(skb); 881 hlen = off_gnv + sizeof(*gh); 882 gh = skb_gro_header(skb, hlen, off_gnv); 883 if (unlikely(!gh)) 884 goto out; 885 886 if (gh->ver != GENEVE_VER || gh->oam) 887 goto out; 888 gh_len = geneve_hlen(gh); 889 type = gh->proto_type; 890 891 hlen = off_gnv + gh_len; 892 if (!skb_gro_may_pull(skb, hlen)) { 893 gh = skb_gro_header_slow(skb, hlen, off_gnv); 894 if (unlikely(!gh)) 895 goto out; 896 } 897 898 /* The GRO hint/nested hdr could use a different ethernet type. */ 899 hint_off = geneve_sk_gro_hint_off(sk, gh, &type, &gh_len); 900 if (hint_off) { 901 902 /* 903 * If the hint is present, and nested hdr validation fails, do 904 * not attempt plain GRO: it will ignore inner hdrs and cause 905 * OoO. 906 */ 907 gh = skb_gro_header(skb, off_gnv + gh_len, off_gnv); 908 if (unlikely(!gh)) 909 goto out; 910 911 gro_hint = geneve_opt_gro_hint(gh, hint_off); 912 if (!geneve_opt_gro_hint_validate_csum(skb, gh, gro_hint)) 913 goto out; 914 } 915 916 list_for_each_entry(p, head, list) { 917 if (!NAPI_GRO_CB(p)->same_flow) 918 continue; 919 920 gh2 = (struct genevehdr *)(p->data + off_gnv); 921 if (!geneve_hdr_match(skb, gh, gh2, hint_off)) { 922 NAPI_GRO_CB(p)->same_flow = 0; 923 continue; 924 } 925 } 926 927 skb_gro_pull(skb, gh_len); 928 skb_gro_postpull_rcsum(skb, gh, gh_len); 929 if (likely(type == htons(ETH_P_TEB))) 930 return call_gro_receive(eth_gro_receive, head, skb); 931 932 ptype = gro_find_receive_by_type(type); 933 if (!ptype) 934 goto out; 935 936 pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); 937 flush = 0; 938 939 out: 940 skb_gro_flush_final(skb, pp, flush); 941 942 return pp; 943 } 944 945 static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb, 946 int nhoff) 947 { 948 struct genevehdr *gh; 949 struct packet_offload *ptype; 950 __be16 type; 951 int gh_len; 952 int err = -ENOSYS; 953 954 gh = (struct genevehdr *)(skb->data + nhoff); 955 gh_len = geneve_hlen(gh); 956 type = gh->proto_type; 957 geneve_opt_gro_hint_off(gh, &type, &gh_len); 958 959 /* since skb->encapsulation is set, eth_gro_complete() sets the inner mac header */ 960 if (likely(type == htons(ETH_P_TEB))) 961 return eth_gro_complete(skb, nhoff + gh_len); 962 963 ptype = gro_find_complete_by_type(type); 964 if (ptype) 965 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); 966 967 skb_set_inner_mac_header(skb, nhoff + gh_len); 968 969 return err; 970 } 971 972 /* Create new listen socket if needed */ 973 static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, 974 bool ipv6, bool ipv6_rx_csum) 975 { 976 struct geneve_net *gn = net_generic(net, geneve_net_id); 977 struct udp_tunnel_sock_cfg tunnel_cfg; 978 struct geneve_sock *gs; 979 struct sock *sk; 980 int h; 981 982 gs = kzalloc_obj(*gs); 983 if (!gs) 984 return ERR_PTR(-ENOMEM); 985 986 sk = geneve_create_sock(net, ipv6, port, ipv6_rx_csum); 987 if (IS_ERR(sk)) { 988 kfree(gs); 989 return ERR_CAST(sk); 990 } 991 992 gs->sk = sk; 993 gs->refcnt = 1; 994 for (h = 0; h < VNI_HASH_SIZE; ++h) 995 INIT_HLIST_HEAD(&gs->vni_list[h]); 996 997 /* Initialize the geneve udp offloads structure */ 998 udp_tunnel_notify_add_rx_port(sk, UDP_TUNNEL_TYPE_GENEVE); 999 1000 /* Mark socket as an encapsulation socket */ 1001 memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); 1002 tunnel_cfg.sk_user_data = gs; 1003 tunnel_cfg.encap_type = 1; 1004 tunnel_cfg.gro_receive = geneve_gro_receive; 1005 tunnel_cfg.gro_complete = geneve_gro_complete; 1006 tunnel_cfg.encap_rcv = geneve_udp_encap_recv; 1007 tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup; 1008 tunnel_cfg.encap_destroy = NULL; 1009 setup_udp_tunnel_sock(net, sk, &tunnel_cfg); 1010 list_add(&gs->list, &gn->sock_list); 1011 return gs; 1012 } 1013 1014 static void __geneve_sock_release(struct geneve_sock *gs) 1015 { 1016 if (!gs || --gs->refcnt) 1017 return; 1018 1019 list_del(&gs->list); 1020 udp_tunnel_notify_del_rx_port(gs->sk, UDP_TUNNEL_TYPE_GENEVE); 1021 udp_tunnel_sock_release(gs->sk); 1022 kfree_rcu(gs, rcu); 1023 } 1024 1025 static void geneve_sock_release(struct geneve_dev *geneve) 1026 { 1027 struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4); 1028 #if IS_ENABLED(CONFIG_IPV6) 1029 struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6); 1030 1031 rcu_assign_pointer(geneve->sock6, NULL); 1032 #endif 1033 1034 rcu_assign_pointer(geneve->sock4, NULL); 1035 1036 __geneve_sock_release(gs4); 1037 #if IS_ENABLED(CONFIG_IPV6) 1038 __geneve_sock_release(gs6); 1039 #endif 1040 } 1041 1042 static struct geneve_sock *geneve_find_sock(struct geneve_net *gn, 1043 sa_family_t family, 1044 __be16 dst_port, 1045 bool gro_hint) 1046 { 1047 struct geneve_sock *gs; 1048 1049 list_for_each_entry(gs, &gn->sock_list, list) { 1050 if (inet_sk(gs->sk)->inet_sport == dst_port && 1051 geneve_get_sk_family(gs) == family && 1052 gs->gro_hint == gro_hint) { 1053 return gs; 1054 } 1055 } 1056 return NULL; 1057 } 1058 1059 static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6) 1060 { 1061 struct net *net = geneve->net; 1062 struct geneve_net *gn = net_generic(net, geneve_net_id); 1063 bool gro_hint = geneve->cfg.gro_hint; 1064 struct geneve_dev_node *node; 1065 struct geneve_sock *gs; 1066 __u8 vni[3]; 1067 __u32 hash; 1068 1069 gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, 1070 geneve->cfg.info.key.tp_dst, gro_hint); 1071 if (gs) { 1072 gs->refcnt++; 1073 goto out; 1074 } 1075 1076 gs = geneve_socket_create(net, geneve->cfg.info.key.tp_dst, ipv6, 1077 geneve->cfg.use_udp6_rx_checksums); 1078 if (IS_ERR(gs)) 1079 return PTR_ERR(gs); 1080 1081 out: 1082 gs->collect_md = geneve->cfg.collect_md; 1083 gs->gro_hint = gro_hint; 1084 #if IS_ENABLED(CONFIG_IPV6) 1085 if (ipv6) { 1086 rcu_assign_pointer(geneve->sock6, gs); 1087 node = &geneve->hlist6; 1088 } else 1089 #endif 1090 { 1091 rcu_assign_pointer(geneve->sock4, gs); 1092 node = &geneve->hlist4; 1093 } 1094 node->geneve = geneve; 1095 1096 tunnel_id_to_vni(geneve->cfg.info.key.tun_id, vni); 1097 hash = geneve_net_vni_hash(vni); 1098 hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]); 1099 return 0; 1100 } 1101 1102 static int geneve_open(struct net_device *dev) 1103 { 1104 struct geneve_dev *geneve = netdev_priv(dev); 1105 bool metadata = geneve->cfg.collect_md; 1106 bool ipv4, ipv6; 1107 int ret = 0; 1108 1109 ipv6 = geneve->cfg.info.mode & IP_TUNNEL_INFO_IPV6 || metadata; 1110 ipv4 = !ipv6 || metadata; 1111 #if IS_ENABLED(CONFIG_IPV6) 1112 if (ipv6) { 1113 ret = geneve_sock_add(geneve, true); 1114 if (ret < 0 && ret != -EAFNOSUPPORT) 1115 ipv4 = false; 1116 } 1117 #endif 1118 if (ipv4) 1119 ret = geneve_sock_add(geneve, false); 1120 if (ret < 0) 1121 geneve_sock_release(geneve); 1122 1123 return ret; 1124 } 1125 1126 static int geneve_stop(struct net_device *dev) 1127 { 1128 struct geneve_dev *geneve = netdev_priv(dev); 1129 1130 hlist_del_init_rcu(&geneve->hlist4.hlist); 1131 #if IS_ENABLED(CONFIG_IPV6) 1132 hlist_del_init_rcu(&geneve->hlist6.hlist); 1133 #endif 1134 geneve_sock_release(geneve); 1135 return 0; 1136 } 1137 1138 static void geneve_build_header(struct genevehdr *geneveh, 1139 const struct ip_tunnel_info *info, 1140 __be16 inner_proto) 1141 { 1142 geneveh->ver = GENEVE_VER; 1143 geneveh->opt_len = info->options_len / 4; 1144 geneveh->oam = test_bit(IP_TUNNEL_OAM_BIT, info->key.tun_flags); 1145 geneveh->critical = test_bit(IP_TUNNEL_CRIT_OPT_BIT, 1146 info->key.tun_flags); 1147 geneveh->rsvd1 = 0; 1148 tunnel_id_to_vni(info->key.tun_id, geneveh->vni); 1149 geneveh->proto_type = inner_proto; 1150 geneveh->rsvd2 = 0; 1151 1152 if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) 1153 ip_tunnel_info_opts_get(geneveh->options, info); 1154 } 1155 1156 static int geneve_build_gro_hint_opt(const struct geneve_dev *geneve, 1157 struct sk_buff *skb) 1158 { 1159 struct geneve_skb_cb *cb = GENEVE_SKB_CB(skb); 1160 struct geneve_opt_gro_hint *hint; 1161 unsigned int nhlen; 1162 bool nested_is_v6; 1163 int id; 1164 1165 BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct geneve_skb_cb)); 1166 cb->gro_hint_len = 0; 1167 1168 /* Try to add the GRO hint only in case of double encap. */ 1169 if (!geneve->cfg.gro_hint || !skb->encapsulation) 1170 return 0; 1171 1172 /* 1173 * The nested headers must fit the geneve opt len fields and the 1174 * nested encap must carry a nested transport (UDP) header. 1175 */ 1176 nhlen = skb_inner_mac_header(skb) - skb->data; 1177 if (nhlen > 255 || !skb_transport_header_was_set(skb) || 1178 skb->inner_protocol_type != ENCAP_TYPE_ETHER || 1179 (skb_transport_offset(skb) + sizeof(struct udphdr) > nhlen)) 1180 return 0; 1181 1182 id = proto_to_id(skb->inner_protocol); 1183 if (id < 0) 1184 return 0; 1185 1186 nested_is_v6 = skb->protocol == htons(ETH_P_IPV6); 1187 if (nested_is_v6) { 1188 int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); 1189 u8 proto = ipv6_hdr(skb)->nexthdr; 1190 __be16 foff; 1191 1192 if (ipv6_skip_exthdr(skb, start, &proto, &foff) < 0 || 1193 proto != IPPROTO_UDP) 1194 return 0; 1195 } else { 1196 if (ip_hdr(skb)->protocol != IPPROTO_UDP) 1197 return 0; 1198 } 1199 1200 hint = &cb->gro_hint; 1201 memset(hint, 0, sizeof(*hint)); 1202 hint->inner_proto_id = id; 1203 hint->nested_is_v6 = skb->protocol == htons(ETH_P_IPV6); 1204 hint->nested_nh_offset = skb_network_offset(skb); 1205 hint->nested_tp_offset = skb_transport_offset(skb); 1206 hint->nested_hdr_len = nhlen; 1207 cb->gro_hint_len = GENEVE_OPT_GRO_HINT_SIZE; 1208 return GENEVE_OPT_GRO_HINT_SIZE; 1209 } 1210 1211 static void geneve_put_gro_hint_opt(struct genevehdr *gnvh, int opt_size, 1212 const struct geneve_opt_gro_hint *hint) 1213 { 1214 struct geneve_opt *gro_opt; 1215 1216 /* geneve_build_header() did not took in account the GRO hint. */ 1217 gnvh->opt_len = (opt_size + GENEVE_OPT_GRO_HINT_SIZE) >> 2; 1218 1219 gro_opt = (void *)(gnvh + 1) + opt_size; 1220 memset(gro_opt, 0, sizeof(*gro_opt)); 1221 1222 gro_opt->opt_class = htons(GENEVE_OPT_NETDEV_CLASS); 1223 gro_opt->type = GENEVE_OPT_GRO_HINT_TYPE; 1224 gro_opt->length = GENEVE_OPT_GRO_HINT_LEN; 1225 memcpy(gro_opt + 1, hint, sizeof(*hint)); 1226 } 1227 1228 static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb, 1229 const struct ip_tunnel_info *info, 1230 const struct geneve_dev *geneve, int ip_hdr_len) 1231 { 1232 bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 1233 bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; 1234 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); 1235 struct geneve_skb_cb *cb = GENEVE_SKB_CB(skb); 1236 struct genevehdr *gnvh; 1237 __be16 inner_proto; 1238 bool double_encap; 1239 int min_headroom; 1240 int opt_size; 1241 int err; 1242 1243 skb_reset_mac_header(skb); 1244 skb_scrub_packet(skb, xnet); 1245 1246 opt_size = info->options_len + cb->gro_hint_len; 1247 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + 1248 GENEVE_BASE_HLEN + opt_size + ip_hdr_len; 1249 err = skb_cow_head(skb, min_headroom); 1250 if (unlikely(err)) 1251 goto free_dst; 1252 1253 double_encap = udp_tunnel_handle_partial(skb); 1254 err = udp_tunnel_handle_offloads(skb, udp_sum); 1255 if (err) 1256 goto free_dst; 1257 1258 gnvh = __skb_push(skb, sizeof(*gnvh) + opt_size); 1259 inner_proto = inner_proto_inherit ? skb->protocol : htons(ETH_P_TEB); 1260 geneve_build_header(gnvh, info, inner_proto); 1261 1262 if (cb->gro_hint_len) 1263 geneve_put_gro_hint_opt(gnvh, info->options_len, &cb->gro_hint); 1264 1265 udp_tunnel_set_inner_protocol(skb, double_encap, inner_proto); 1266 return 0; 1267 1268 free_dst: 1269 dst_release(dst); 1270 return err; 1271 } 1272 1273 static u8 geneve_get_dsfield(struct sk_buff *skb, struct net_device *dev, 1274 const struct ip_tunnel_info *info, 1275 bool *use_cache) 1276 { 1277 struct geneve_dev *geneve = netdev_priv(dev); 1278 u8 dsfield; 1279 1280 dsfield = info->key.tos; 1281 if (dsfield == 1 && !geneve->cfg.collect_md) { 1282 dsfield = ip_tunnel_get_dsfield(ip_hdr(skb), skb); 1283 *use_cache = false; 1284 } 1285 1286 return dsfield; 1287 } 1288 1289 static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, 1290 struct geneve_dev *geneve, 1291 const struct ip_tunnel_info *info) 1292 { 1293 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); 1294 const struct ip_tunnel_key *key = &info->key; 1295 struct rtable *rt; 1296 bool use_cache; 1297 __u8 tos, ttl; 1298 __be16 df = 0; 1299 __be32 saddr; 1300 __be16 sport; 1301 int err; 1302 1303 if (skb_vlan_inet_prepare(skb, geneve->cfg.inner_proto_inherit)) 1304 return -EINVAL; 1305 1306 if (!gs4) 1307 return -EIO; 1308 1309 use_cache = ip_tunnel_dst_cache_usable(skb, info); 1310 tos = geneve_get_dsfield(skb, dev, info, &use_cache); 1311 sport = udp_flow_src_port(geneve->net, skb, 1312 geneve->cfg.port_min, 1313 geneve->cfg.port_max, true); 1314 1315 rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr, 1316 &info->key, 1317 sport, geneve->cfg.info.key.tp_dst, tos, 1318 use_cache ? 1319 (struct dst_cache *)&info->dst_cache : NULL); 1320 if (IS_ERR(rt)) 1321 return PTR_ERR(rt); 1322 1323 err = skb_tunnel_check_pmtu(skb, &rt->dst, 1324 GENEVE_IPV4_HLEN + info->options_len + 1325 geneve_build_gro_hint_opt(geneve, skb), 1326 netif_is_any_bridge_port(dev)); 1327 if (err < 0) { 1328 dst_release(&rt->dst); 1329 return err; 1330 } else if (err) { 1331 struct ip_tunnel_info *info; 1332 1333 info = skb_tunnel_info(skb); 1334 if (info) { 1335 struct ip_tunnel_info *unclone; 1336 1337 unclone = skb_tunnel_info_unclone(skb); 1338 if (unlikely(!unclone)) { 1339 dst_release(&rt->dst); 1340 return -ENOMEM; 1341 } 1342 1343 unclone->key.u.ipv4.dst = saddr; 1344 unclone->key.u.ipv4.src = info->key.u.ipv4.dst; 1345 } 1346 1347 if (!pskb_may_pull(skb, ETH_HLEN)) { 1348 dst_release(&rt->dst); 1349 return -EINVAL; 1350 } 1351 1352 skb->protocol = eth_type_trans(skb, geneve->dev); 1353 __netif_rx(skb); 1354 dst_release(&rt->dst); 1355 return -EMSGSIZE; 1356 } 1357 1358 tos = ip_tunnel_ecn_encap(tos, ip_hdr(skb), skb); 1359 if (geneve->cfg.collect_md) { 1360 ttl = key->ttl; 1361 1362 df = test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) ? 1363 htons(IP_DF) : 0; 1364 } else { 1365 if (geneve->cfg.ttl_inherit) 1366 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); 1367 else 1368 ttl = key->ttl; 1369 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); 1370 1371 if (geneve->cfg.df == GENEVE_DF_SET) { 1372 df = htons(IP_DF); 1373 } else if (geneve->cfg.df == GENEVE_DF_INHERIT) { 1374 struct ethhdr *eth = skb_eth_hdr(skb); 1375 1376 if (ntohs(eth->h_proto) == ETH_P_IPV6) { 1377 df = htons(IP_DF); 1378 } else if (ntohs(eth->h_proto) == ETH_P_IP) { 1379 struct iphdr *iph = ip_hdr(skb); 1380 1381 if (iph->frag_off & htons(IP_DF)) 1382 df = htons(IP_DF); 1383 } 1384 } 1385 } 1386 1387 err = geneve_build_skb(&rt->dst, skb, info, geneve, 1388 sizeof(struct iphdr)); 1389 if (unlikely(err)) 1390 return err; 1391 1392 udp_tunnel_xmit_skb(rt, gs4->sk, skb, saddr, info->key.u.ipv4.dst, 1393 tos, ttl, df, sport, geneve->cfg.info.key.tp_dst, 1394 !net_eq(geneve->net, dev_net(geneve->dev)), 1395 !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags), 1396 0); 1397 return 0; 1398 } 1399 1400 #if IS_ENABLED(CONFIG_IPV6) 1401 static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, 1402 struct geneve_dev *geneve, 1403 const struct ip_tunnel_info *info) 1404 { 1405 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); 1406 const struct ip_tunnel_key *key = &info->key; 1407 struct dst_entry *dst = NULL; 1408 struct in6_addr saddr; 1409 bool use_cache; 1410 __u8 prio, ttl; 1411 __be16 sport; 1412 int err; 1413 1414 if (skb_vlan_inet_prepare(skb, geneve->cfg.inner_proto_inherit)) 1415 return -EINVAL; 1416 1417 if (!gs6) 1418 return -EIO; 1419 1420 use_cache = ip_tunnel_dst_cache_usable(skb, info); 1421 prio = geneve_get_dsfield(skb, dev, info, &use_cache); 1422 sport = udp_flow_src_port(geneve->net, skb, 1423 geneve->cfg.port_min, 1424 geneve->cfg.port_max, true); 1425 1426 dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sk, 0, 1427 &saddr, key, sport, 1428 geneve->cfg.info.key.tp_dst, prio, 1429 use_cache ? 1430 (struct dst_cache *)&info->dst_cache : NULL); 1431 if (IS_ERR(dst)) 1432 return PTR_ERR(dst); 1433 1434 err = skb_tunnel_check_pmtu(skb, dst, 1435 GENEVE_IPV6_HLEN + info->options_len + 1436 geneve_build_gro_hint_opt(geneve, skb), 1437 netif_is_any_bridge_port(dev)); 1438 if (err < 0) { 1439 dst_release(dst); 1440 return err; 1441 } else if (err) { 1442 struct ip_tunnel_info *info = skb_tunnel_info(skb); 1443 1444 if (info) { 1445 struct ip_tunnel_info *unclone; 1446 1447 unclone = skb_tunnel_info_unclone(skb); 1448 if (unlikely(!unclone)) { 1449 dst_release(dst); 1450 return -ENOMEM; 1451 } 1452 1453 unclone->key.u.ipv6.dst = saddr; 1454 unclone->key.u.ipv6.src = info->key.u.ipv6.dst; 1455 } 1456 1457 if (!pskb_may_pull(skb, ETH_HLEN)) { 1458 dst_release(dst); 1459 return -EINVAL; 1460 } 1461 1462 skb->protocol = eth_type_trans(skb, geneve->dev); 1463 __netif_rx(skb); 1464 dst_release(dst); 1465 return -EMSGSIZE; 1466 } 1467 1468 prio = ip_tunnel_ecn_encap(prio, ip_hdr(skb), skb); 1469 if (geneve->cfg.collect_md) { 1470 ttl = key->ttl; 1471 } else { 1472 if (geneve->cfg.ttl_inherit) 1473 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); 1474 else 1475 ttl = key->ttl; 1476 ttl = ttl ? : ip6_dst_hoplimit(dst); 1477 } 1478 err = geneve_build_skb(dst, skb, info, geneve, sizeof(struct ipv6hdr)); 1479 if (unlikely(err)) 1480 return err; 1481 1482 udp_tunnel6_xmit_skb(dst, gs6->sk, skb, dev, 1483 &saddr, &key->u.ipv6.dst, prio, ttl, 1484 info->key.label, sport, geneve->cfg.info.key.tp_dst, 1485 !test_bit(IP_TUNNEL_CSUM_BIT, 1486 info->key.tun_flags), 1487 0); 1488 return 0; 1489 } 1490 #endif 1491 1492 static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) 1493 { 1494 struct geneve_dev *geneve = netdev_priv(dev); 1495 struct ip_tunnel_info *info = NULL; 1496 int err; 1497 1498 if (geneve->cfg.collect_md) { 1499 info = skb_tunnel_info(skb); 1500 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { 1501 netdev_dbg(dev, "no tunnel metadata\n"); 1502 dev_kfree_skb(skb); 1503 dev_dstats_tx_dropped(dev); 1504 return NETDEV_TX_OK; 1505 } 1506 } else { 1507 info = &geneve->cfg.info; 1508 } 1509 1510 rcu_read_lock(); 1511 #if IS_ENABLED(CONFIG_IPV6) 1512 if (info->mode & IP_TUNNEL_INFO_IPV6) 1513 err = geneve6_xmit_skb(skb, dev, geneve, info); 1514 else 1515 #endif 1516 err = geneve_xmit_skb(skb, dev, geneve, info); 1517 rcu_read_unlock(); 1518 1519 if (likely(!err)) 1520 return NETDEV_TX_OK; 1521 1522 if (err != -EMSGSIZE) 1523 dev_kfree_skb(skb); 1524 1525 if (err == -ELOOP) 1526 DEV_STATS_INC(dev, collisions); 1527 else if (err == -ENETUNREACH) 1528 DEV_STATS_INC(dev, tx_carrier_errors); 1529 1530 DEV_STATS_INC(dev, tx_errors); 1531 return NETDEV_TX_OK; 1532 } 1533 1534 static int geneve_change_mtu(struct net_device *dev, int new_mtu) 1535 { 1536 if (new_mtu > dev->max_mtu) 1537 new_mtu = dev->max_mtu; 1538 else if (new_mtu < dev->min_mtu) 1539 new_mtu = dev->min_mtu; 1540 1541 WRITE_ONCE(dev->mtu, new_mtu); 1542 return 0; 1543 } 1544 1545 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) 1546 { 1547 struct ip_tunnel_info *info = skb_tunnel_info(skb); 1548 struct geneve_dev *geneve = netdev_priv(dev); 1549 __be16 sport; 1550 1551 if (ip_tunnel_info_af(info) == AF_INET) { 1552 struct rtable *rt; 1553 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); 1554 bool use_cache; 1555 __be32 saddr; 1556 u8 tos; 1557 1558 if (!gs4) 1559 return -EIO; 1560 1561 use_cache = ip_tunnel_dst_cache_usable(skb, info); 1562 tos = geneve_get_dsfield(skb, dev, info, &use_cache); 1563 sport = udp_flow_src_port(geneve->net, skb, 1564 geneve->cfg.port_min, 1565 geneve->cfg.port_max, true); 1566 1567 rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr, 1568 &info->key, 1569 sport, geneve->cfg.info.key.tp_dst, 1570 tos, 1571 use_cache ? &info->dst_cache : NULL); 1572 if (IS_ERR(rt)) 1573 return PTR_ERR(rt); 1574 1575 ip_rt_put(rt); 1576 info->key.u.ipv4.src = saddr; 1577 #if IS_ENABLED(CONFIG_IPV6) 1578 } else if (ip_tunnel_info_af(info) == AF_INET6) { 1579 struct dst_entry *dst; 1580 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); 1581 struct in6_addr saddr; 1582 bool use_cache; 1583 u8 prio; 1584 1585 if (!gs6) 1586 return -EIO; 1587 1588 use_cache = ip_tunnel_dst_cache_usable(skb, info); 1589 prio = geneve_get_dsfield(skb, dev, info, &use_cache); 1590 sport = udp_flow_src_port(geneve->net, skb, 1591 geneve->cfg.port_min, 1592 geneve->cfg.port_max, true); 1593 1594 dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sk, 0, 1595 &saddr, &info->key, sport, 1596 geneve->cfg.info.key.tp_dst, prio, 1597 use_cache ? &info->dst_cache : NULL); 1598 if (IS_ERR(dst)) 1599 return PTR_ERR(dst); 1600 1601 dst_release(dst); 1602 info->key.u.ipv6.src = saddr; 1603 #endif 1604 } else { 1605 return -EINVAL; 1606 } 1607 1608 info->key.tp_src = sport; 1609 info->key.tp_dst = geneve->cfg.info.key.tp_dst; 1610 return 0; 1611 } 1612 1613 static const struct net_device_ops geneve_netdev_ops = { 1614 .ndo_init = geneve_init, 1615 .ndo_uninit = geneve_uninit, 1616 .ndo_open = geneve_open, 1617 .ndo_stop = geneve_stop, 1618 .ndo_start_xmit = geneve_xmit, 1619 .ndo_change_mtu = geneve_change_mtu, 1620 .ndo_validate_addr = eth_validate_addr, 1621 .ndo_set_mac_address = eth_mac_addr, 1622 .ndo_fill_metadata_dst = geneve_fill_metadata_dst, 1623 }; 1624 1625 static void geneve_get_drvinfo(struct net_device *dev, 1626 struct ethtool_drvinfo *drvinfo) 1627 { 1628 strscpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version)); 1629 strscpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver)); 1630 } 1631 1632 static const struct ethtool_ops geneve_ethtool_ops = { 1633 .get_drvinfo = geneve_get_drvinfo, 1634 .get_link = ethtool_op_get_link, 1635 }; 1636 1637 /* Info for udev, that this is a virtual tunnel endpoint */ 1638 static const struct device_type geneve_type = { 1639 .name = "geneve", 1640 }; 1641 1642 /* Calls the ndo_udp_tunnel_add of the caller in order to 1643 * supply the listening GENEVE udp ports. Callers are expected 1644 * to implement the ndo_udp_tunnel_add. 1645 */ 1646 static void geneve_offload_rx_ports(struct net_device *dev, bool push) 1647 { 1648 struct net *net = dev_net(dev); 1649 struct geneve_net *gn = net_generic(net, geneve_net_id); 1650 struct geneve_sock *gs; 1651 1652 ASSERT_RTNL(); 1653 1654 list_for_each_entry(gs, &gn->sock_list, list) { 1655 if (push) { 1656 udp_tunnel_push_rx_port(dev, gs->sk, 1657 UDP_TUNNEL_TYPE_GENEVE); 1658 } else { 1659 udp_tunnel_drop_rx_port(dev, gs->sk, 1660 UDP_TUNNEL_TYPE_GENEVE); 1661 } 1662 } 1663 } 1664 1665 /* Initialize the device structure. */ 1666 static void geneve_setup(struct net_device *dev) 1667 { 1668 ether_setup(dev); 1669 1670 dev->netdev_ops = &geneve_netdev_ops; 1671 dev->ethtool_ops = &geneve_ethtool_ops; 1672 dev->needs_free_netdev = true; 1673 1674 SET_NETDEV_DEVTYPE(dev, &geneve_type); 1675 1676 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; 1677 dev->features |= NETIF_F_RXCSUM; 1678 dev->features |= NETIF_F_GSO_SOFTWARE; 1679 1680 /* Partial features are disabled by default. */ 1681 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; 1682 dev->hw_features |= NETIF_F_RXCSUM; 1683 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 1684 dev->hw_features |= UDP_TUNNEL_PARTIAL_FEATURES; 1685 dev->hw_features |= NETIF_F_GSO_PARTIAL; 1686 1687 dev->hw_enc_features = dev->hw_features; 1688 dev->gso_partial_features = UDP_TUNNEL_PARTIAL_FEATURES; 1689 dev->mangleid_features = NETIF_F_GSO_PARTIAL; 1690 1691 dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; 1692 /* MTU range: 68 - (something less than 65535) */ 1693 dev->min_mtu = ETH_MIN_MTU; 1694 /* The max_mtu calculation does not take account of GENEVE 1695 * options, to avoid excluding potentially valid 1696 * configurations. This will be further reduced by IPvX hdr size. 1697 */ 1698 dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len; 1699 1700 netif_keep_dst(dev); 1701 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1702 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; 1703 dev->lltx = true; 1704 eth_hw_addr_random(dev); 1705 } 1706 1707 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { 1708 [IFLA_GENEVE_UNSPEC] = { .strict_start_type = IFLA_GENEVE_INNER_PROTO_INHERIT }, 1709 [IFLA_GENEVE_ID] = { .type = NLA_U32 }, 1710 [IFLA_GENEVE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) }, 1711 [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) }, 1712 [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, 1713 [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, 1714 [IFLA_GENEVE_LABEL] = { .type = NLA_U32 }, 1715 [IFLA_GENEVE_PORT] = { .type = NLA_U16 }, 1716 [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG }, 1717 [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 }, 1718 [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 }, 1719 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, 1720 [IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 }, 1721 [IFLA_GENEVE_DF] = { .type = NLA_U8 }, 1722 [IFLA_GENEVE_INNER_PROTO_INHERIT] = { .type = NLA_FLAG }, 1723 [IFLA_GENEVE_PORT_RANGE] = NLA_POLICY_EXACT_LEN(sizeof(struct ifla_geneve_port_range)), 1724 [IFLA_GENEVE_GRO_HINT] = { .type = NLA_FLAG }, 1725 }; 1726 1727 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[], 1728 struct netlink_ext_ack *extack) 1729 { 1730 if (tb[IFLA_ADDRESS]) { 1731 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { 1732 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], 1733 "Provided link layer address is not Ethernet"); 1734 return -EINVAL; 1735 } 1736 1737 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { 1738 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], 1739 "Provided Ethernet address is not unicast"); 1740 return -EADDRNOTAVAIL; 1741 } 1742 } 1743 1744 if (!data) { 1745 NL_SET_ERR_MSG(extack, 1746 "Not enough attributes provided to perform the operation"); 1747 return -EINVAL; 1748 } 1749 1750 if (data[IFLA_GENEVE_ID]) { 1751 __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 1752 1753 if (vni >= GENEVE_N_VID) { 1754 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_ID], 1755 "Geneve ID must be lower than 16777216"); 1756 return -ERANGE; 1757 } 1758 } 1759 1760 if (data[IFLA_GENEVE_DF]) { 1761 enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]); 1762 1763 if (df < 0 || df > GENEVE_DF_MAX) { 1764 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_DF], 1765 "Invalid DF attribute"); 1766 return -EINVAL; 1767 } 1768 } 1769 1770 if (data[IFLA_GENEVE_PORT_RANGE]) { 1771 const struct ifla_geneve_port_range *p; 1772 1773 p = nla_data(data[IFLA_GENEVE_PORT_RANGE]); 1774 if (ntohs(p->high) < ntohs(p->low)) { 1775 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_PORT_RANGE], 1776 "Invalid source port range"); 1777 return -EINVAL; 1778 } 1779 } 1780 1781 return 0; 1782 } 1783 1784 static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, 1785 const struct ip_tunnel_info *info, 1786 bool *tun_on_same_port, 1787 bool *tun_collect_md) 1788 { 1789 struct geneve_dev *geneve, *t = NULL; 1790 1791 *tun_on_same_port = false; 1792 *tun_collect_md = false; 1793 list_for_each_entry(geneve, &gn->geneve_list, next) { 1794 if (info->key.tp_dst == geneve->cfg.info.key.tp_dst) { 1795 *tun_collect_md = geneve->cfg.collect_md; 1796 *tun_on_same_port = true; 1797 } 1798 if (info->key.tun_id == geneve->cfg.info.key.tun_id && 1799 info->key.tp_dst == geneve->cfg.info.key.tp_dst && 1800 !memcmp(&info->key.u, &geneve->cfg.info.key.u, sizeof(info->key.u))) 1801 t = geneve; 1802 } 1803 return t; 1804 } 1805 1806 static bool is_tnl_info_zero(const struct ip_tunnel_info *info) 1807 { 1808 return !(info->key.tun_id || info->key.tos || 1809 !ip_tunnel_flags_empty(info->key.tun_flags) || 1810 info->key.ttl || info->key.label || info->key.tp_src || 1811 memchr_inv(&info->key.u, 0, sizeof(info->key.u))); 1812 } 1813 1814 static bool geneve_dst_addr_equal(struct ip_tunnel_info *a, 1815 struct ip_tunnel_info *b) 1816 { 1817 if (ip_tunnel_info_af(a) == AF_INET) 1818 return a->key.u.ipv4.dst == b->key.u.ipv4.dst; 1819 else 1820 return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst); 1821 } 1822 1823 static int geneve_configure(struct net *net, struct net_device *dev, 1824 struct netlink_ext_ack *extack, 1825 const struct geneve_config *cfg) 1826 { 1827 struct geneve_net *gn = net_generic(net, geneve_net_id); 1828 struct geneve_dev *t, *geneve = netdev_priv(dev); 1829 const struct ip_tunnel_info *info = &cfg->info; 1830 bool tun_collect_md, tun_on_same_port; 1831 int err, encap_len; 1832 1833 if (cfg->collect_md && !is_tnl_info_zero(info)) { 1834 NL_SET_ERR_MSG(extack, 1835 "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified"); 1836 return -EINVAL; 1837 } 1838 1839 geneve->net = net; 1840 geneve->dev = dev; 1841 1842 t = geneve_find_dev(gn, info, &tun_on_same_port, &tun_collect_md); 1843 if (t) 1844 return -EBUSY; 1845 1846 /* make enough headroom for basic scenario */ 1847 encap_len = GENEVE_BASE_HLEN + ETH_HLEN; 1848 if (!cfg->collect_md && ip_tunnel_info_af(info) == AF_INET) { 1849 encap_len += sizeof(struct iphdr); 1850 dev->max_mtu -= sizeof(struct iphdr); 1851 } else { 1852 encap_len += sizeof(struct ipv6hdr); 1853 dev->max_mtu -= sizeof(struct ipv6hdr); 1854 } 1855 dev->needed_headroom = encap_len + ETH_HLEN; 1856 1857 if (cfg->collect_md) { 1858 if (tun_on_same_port) { 1859 NL_SET_ERR_MSG(extack, 1860 "There can be only one externally controlled device on a destination port"); 1861 return -EPERM; 1862 } 1863 } else { 1864 if (tun_collect_md) { 1865 NL_SET_ERR_MSG(extack, 1866 "There already exists an externally controlled device on this destination port"); 1867 return -EPERM; 1868 } 1869 } 1870 1871 dst_cache_reset(&geneve->cfg.info.dst_cache); 1872 memcpy(&geneve->cfg, cfg, sizeof(*cfg)); 1873 1874 if (geneve->cfg.inner_proto_inherit) { 1875 dev->header_ops = NULL; 1876 dev->type = ARPHRD_NONE; 1877 dev->hard_header_len = 0; 1878 dev->addr_len = 0; 1879 dev->flags = IFF_POINTOPOINT | IFF_NOARP; 1880 } 1881 1882 err = register_netdevice(dev); 1883 if (err) 1884 return err; 1885 1886 list_add(&geneve->next, &gn->geneve_list); 1887 return 0; 1888 } 1889 1890 static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port) 1891 { 1892 memset(info, 0, sizeof(*info)); 1893 info->key.tp_dst = htons(dst_port); 1894 } 1895 1896 static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], 1897 struct netlink_ext_ack *extack, 1898 struct geneve_config *cfg, bool changelink) 1899 { 1900 struct ip_tunnel_info *info = &cfg->info; 1901 int attrtype; 1902 1903 if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) { 1904 NL_SET_ERR_MSG(extack, 1905 "Cannot specify both IPv4 and IPv6 Remote addresses"); 1906 return -EINVAL; 1907 } 1908 1909 if (data[IFLA_GENEVE_REMOTE]) { 1910 if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) { 1911 attrtype = IFLA_GENEVE_REMOTE; 1912 goto change_notsup; 1913 } 1914 1915 info->key.u.ipv4.dst = 1916 nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); 1917 1918 if (ipv4_is_multicast(info->key.u.ipv4.dst)) { 1919 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE], 1920 "Remote IPv4 address cannot be Multicast"); 1921 return -EINVAL; 1922 } 1923 } 1924 1925 if (data[IFLA_GENEVE_REMOTE6]) { 1926 #if IS_ENABLED(CONFIG_IPV6) 1927 if (changelink && (ip_tunnel_info_af(info) == AF_INET)) { 1928 attrtype = IFLA_GENEVE_REMOTE6; 1929 goto change_notsup; 1930 } 1931 1932 info->mode = IP_TUNNEL_INFO_IPV6; 1933 info->key.u.ipv6.dst = 1934 nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]); 1935 1936 if (ipv6_addr_type(&info->key.u.ipv6.dst) & 1937 IPV6_ADDR_LINKLOCAL) { 1938 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 1939 "Remote IPv6 address cannot be link-local"); 1940 return -EINVAL; 1941 } 1942 if (ipv6_addr_is_multicast(&info->key.u.ipv6.dst)) { 1943 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 1944 "Remote IPv6 address cannot be Multicast"); 1945 return -EINVAL; 1946 } 1947 __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 1948 cfg->use_udp6_rx_checksums = true; 1949 #else 1950 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 1951 "IPv6 support not enabled in the kernel"); 1952 return -EPFNOSUPPORT; 1953 #endif 1954 } 1955 1956 if (data[IFLA_GENEVE_ID]) { 1957 __u32 vni; 1958 __u8 tvni[3]; 1959 __be64 tunid; 1960 1961 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 1962 tvni[0] = (vni & 0x00ff0000) >> 16; 1963 tvni[1] = (vni & 0x0000ff00) >> 8; 1964 tvni[2] = vni & 0x000000ff; 1965 1966 tunid = vni_to_tunnel_id(tvni); 1967 if (changelink && (tunid != info->key.tun_id)) { 1968 attrtype = IFLA_GENEVE_ID; 1969 goto change_notsup; 1970 } 1971 info->key.tun_id = tunid; 1972 } 1973 1974 if (data[IFLA_GENEVE_TTL_INHERIT]) { 1975 if (nla_get_u8(data[IFLA_GENEVE_TTL_INHERIT])) 1976 cfg->ttl_inherit = true; 1977 else 1978 cfg->ttl_inherit = false; 1979 } else if (data[IFLA_GENEVE_TTL]) { 1980 info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); 1981 cfg->ttl_inherit = false; 1982 } 1983 1984 if (data[IFLA_GENEVE_TOS]) 1985 info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]); 1986 1987 if (data[IFLA_GENEVE_DF]) 1988 cfg->df = nla_get_u8(data[IFLA_GENEVE_DF]); 1989 1990 if (data[IFLA_GENEVE_LABEL]) { 1991 info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) & 1992 IPV6_FLOWLABEL_MASK; 1993 if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) { 1994 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LABEL], 1995 "Label attribute only applies for IPv6 Geneve devices"); 1996 return -EINVAL; 1997 } 1998 } 1999 2000 if (data[IFLA_GENEVE_PORT]) { 2001 if (changelink) { 2002 attrtype = IFLA_GENEVE_PORT; 2003 goto change_notsup; 2004 } 2005 info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]); 2006 } 2007 2008 if (data[IFLA_GENEVE_PORT_RANGE]) { 2009 const struct ifla_geneve_port_range *p; 2010 2011 if (changelink) { 2012 attrtype = IFLA_GENEVE_PORT_RANGE; 2013 goto change_notsup; 2014 } 2015 p = nla_data(data[IFLA_GENEVE_PORT_RANGE]); 2016 cfg->port_min = ntohs(p->low); 2017 cfg->port_max = ntohs(p->high); 2018 } 2019 2020 if (data[IFLA_GENEVE_COLLECT_METADATA]) { 2021 if (changelink) { 2022 attrtype = IFLA_GENEVE_COLLECT_METADATA; 2023 goto change_notsup; 2024 } 2025 cfg->collect_md = true; 2026 } 2027 2028 if (data[IFLA_GENEVE_UDP_CSUM]) { 2029 if (changelink) { 2030 attrtype = IFLA_GENEVE_UDP_CSUM; 2031 goto change_notsup; 2032 } 2033 if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) 2034 __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 2035 } 2036 2037 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) { 2038 #if IS_ENABLED(CONFIG_IPV6) 2039 if (changelink) { 2040 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_TX; 2041 goto change_notsup; 2042 } 2043 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) 2044 __clear_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 2045 #else 2046 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX], 2047 "IPv6 support not enabled in the kernel"); 2048 return -EPFNOSUPPORT; 2049 #endif 2050 } 2051 2052 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) { 2053 #if IS_ENABLED(CONFIG_IPV6) 2054 if (changelink) { 2055 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_RX; 2056 goto change_notsup; 2057 } 2058 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])) 2059 cfg->use_udp6_rx_checksums = false; 2060 #else 2061 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX], 2062 "IPv6 support not enabled in the kernel"); 2063 return -EPFNOSUPPORT; 2064 #endif 2065 } 2066 2067 if (data[IFLA_GENEVE_INNER_PROTO_INHERIT]) { 2068 if (changelink) { 2069 attrtype = IFLA_GENEVE_INNER_PROTO_INHERIT; 2070 goto change_notsup; 2071 } 2072 cfg->inner_proto_inherit = true; 2073 } 2074 2075 if (data[IFLA_GENEVE_GRO_HINT]) { 2076 if (changelink) { 2077 attrtype = IFLA_GENEVE_GRO_HINT; 2078 goto change_notsup; 2079 } 2080 cfg->gro_hint = true; 2081 } 2082 2083 return 0; 2084 change_notsup: 2085 NL_SET_ERR_MSG_ATTR(extack, data[attrtype], 2086 "Changing VNI, Port, endpoint IP address family, external, inner_proto_inherit, gro_hint and UDP checksum attributes are not supported"); 2087 return -EOPNOTSUPP; 2088 } 2089 2090 static void geneve_link_config(struct net_device *dev, 2091 struct ip_tunnel_info *info, struct nlattr *tb[]) 2092 { 2093 struct geneve_dev *geneve = netdev_priv(dev); 2094 int ldev_mtu = 0; 2095 2096 if (tb[IFLA_MTU]) { 2097 geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); 2098 return; 2099 } 2100 2101 switch (ip_tunnel_info_af(info)) { 2102 case AF_INET: { 2103 struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst }; 2104 struct rtable *rt = ip_route_output_key(geneve->net, &fl4); 2105 2106 if (!IS_ERR(rt) && rt->dst.dev) { 2107 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN; 2108 ip_rt_put(rt); 2109 } 2110 break; 2111 } 2112 #if IS_ENABLED(CONFIG_IPV6) 2113 case AF_INET6: { 2114 struct rt6_info *rt; 2115 2116 if (!__in6_dev_get(dev)) 2117 break; 2118 2119 rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0, 2120 NULL, 0); 2121 2122 if (rt && rt->dst.dev) 2123 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN; 2124 ip6_rt_put(rt); 2125 break; 2126 } 2127 #endif 2128 } 2129 2130 if (ldev_mtu <= 0) 2131 return; 2132 2133 geneve_change_mtu(dev, ldev_mtu - info->options_len); 2134 } 2135 2136 static int geneve_newlink(struct net_device *dev, 2137 struct rtnl_newlink_params *params, 2138 struct netlink_ext_ack *extack) 2139 { 2140 struct net *link_net = rtnl_newlink_link_net(params); 2141 struct nlattr **data = params->data; 2142 struct nlattr **tb = params->tb; 2143 struct geneve_config cfg = { 2144 .df = GENEVE_DF_UNSET, 2145 .use_udp6_rx_checksums = false, 2146 .ttl_inherit = false, 2147 .collect_md = false, 2148 .port_min = 1, 2149 .port_max = USHRT_MAX, 2150 }; 2151 int err; 2152 2153 init_tnl_info(&cfg.info, GENEVE_UDP_PORT); 2154 err = geneve_nl2info(tb, data, extack, &cfg, false); 2155 if (err) 2156 return err; 2157 2158 err = geneve_configure(link_net, dev, extack, &cfg); 2159 if (err) 2160 return err; 2161 2162 geneve_link_config(dev, &cfg.info, tb); 2163 2164 return 0; 2165 } 2166 2167 /* Quiesces the geneve device data path for both TX and RX. 2168 * 2169 * On transmit geneve checks for non-NULL geneve_sock before it proceeds. 2170 * So, if we set that socket to NULL under RCU and wait for synchronize_net() 2171 * to complete for the existing set of in-flight packets to be transmitted, 2172 * then we would have quiesced the transmit data path. All the future packets 2173 * will get dropped until we unquiesce the data path. 2174 * 2175 * On receive geneve dereference the geneve_sock stashed in the socket. So, 2176 * if we set that to NULL under RCU and wait for synchronize_net() to 2177 * complete, then we would have quiesced the receive data path. 2178 */ 2179 static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4, 2180 struct geneve_sock **gs6) 2181 { 2182 *gs4 = rtnl_dereference(geneve->sock4); 2183 rcu_assign_pointer(geneve->sock4, NULL); 2184 if (*gs4) 2185 rcu_assign_sk_user_data((*gs4)->sk, NULL); 2186 #if IS_ENABLED(CONFIG_IPV6) 2187 *gs6 = rtnl_dereference(geneve->sock6); 2188 rcu_assign_pointer(geneve->sock6, NULL); 2189 if (*gs6) 2190 rcu_assign_sk_user_data((*gs6)->sk, NULL); 2191 #else 2192 *gs6 = NULL; 2193 #endif 2194 synchronize_net(); 2195 } 2196 2197 /* Resumes the geneve device data path for both TX and RX. */ 2198 static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4, 2199 struct geneve_sock __maybe_unused *gs6) 2200 { 2201 rcu_assign_pointer(geneve->sock4, gs4); 2202 if (gs4) 2203 rcu_assign_sk_user_data(gs4->sk, gs4); 2204 #if IS_ENABLED(CONFIG_IPV6) 2205 rcu_assign_pointer(geneve->sock6, gs6); 2206 if (gs6) 2207 rcu_assign_sk_user_data(gs6->sk, gs6); 2208 #endif 2209 } 2210 2211 static int geneve_changelink(struct net_device *dev, struct nlattr *tb[], 2212 struct nlattr *data[], 2213 struct netlink_ext_ack *extack) 2214 { 2215 struct geneve_dev *geneve = netdev_priv(dev); 2216 struct geneve_sock *gs4, *gs6; 2217 struct geneve_config cfg; 2218 int err; 2219 2220 /* If the geneve device is configured for metadata (or externally 2221 * controlled, for example, OVS), then nothing can be changed. 2222 */ 2223 if (geneve->cfg.collect_md) 2224 return -EOPNOTSUPP; 2225 2226 /* Start with the existing info. */ 2227 memcpy(&cfg, &geneve->cfg, sizeof(cfg)); 2228 err = geneve_nl2info(tb, data, extack, &cfg, true); 2229 if (err) 2230 return err; 2231 2232 if (!geneve_dst_addr_equal(&geneve->cfg.info, &cfg.info)) { 2233 dst_cache_reset(&cfg.info.dst_cache); 2234 geneve_link_config(dev, &cfg.info, tb); 2235 } 2236 2237 geneve_quiesce(geneve, &gs4, &gs6); 2238 memcpy(&geneve->cfg, &cfg, sizeof(cfg)); 2239 geneve_unquiesce(geneve, gs4, gs6); 2240 2241 return 0; 2242 } 2243 2244 static void geneve_dellink(struct net_device *dev, struct list_head *head) 2245 { 2246 struct geneve_dev *geneve = netdev_priv(dev); 2247 2248 list_del(&geneve->next); 2249 unregister_netdevice_queue(dev, head); 2250 } 2251 2252 static size_t geneve_get_size(const struct net_device *dev) 2253 { 2254 return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */ 2255 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */ 2256 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */ 2257 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */ 2258 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_DF */ 2259 nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */ 2260 nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */ 2261 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */ 2262 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */ 2263 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */ 2264 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */ 2265 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */ 2266 nla_total_size(0) + /* IFLA_GENEVE_INNER_PROTO_INHERIT */ 2267 nla_total_size(sizeof(struct ifla_geneve_port_range)) + /* IFLA_GENEVE_PORT_RANGE */ 2268 nla_total_size(0) + /* IFLA_GENEVE_GRO_HINT */ 2269 0; 2270 } 2271 2272 static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) 2273 { 2274 struct geneve_dev *geneve = netdev_priv(dev); 2275 struct ip_tunnel_info *info = &geneve->cfg.info; 2276 bool ttl_inherit = geneve->cfg.ttl_inherit; 2277 bool metadata = geneve->cfg.collect_md; 2278 struct ifla_geneve_port_range ports = { 2279 .low = htons(geneve->cfg.port_min), 2280 .high = htons(geneve->cfg.port_max), 2281 }; 2282 __u8 tmp_vni[3]; 2283 __u32 vni; 2284 2285 tunnel_id_to_vni(info->key.tun_id, tmp_vni); 2286 vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2]; 2287 if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) 2288 goto nla_put_failure; 2289 2290 if (!metadata && ip_tunnel_info_af(info) == AF_INET) { 2291 if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, 2292 info->key.u.ipv4.dst)) 2293 goto nla_put_failure; 2294 if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, 2295 test_bit(IP_TUNNEL_CSUM_BIT, 2296 info->key.tun_flags))) 2297 goto nla_put_failure; 2298 2299 #if IS_ENABLED(CONFIG_IPV6) 2300 } else if (!metadata) { 2301 if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6, 2302 &info->key.u.ipv6.dst)) 2303 goto nla_put_failure; 2304 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, 2305 !test_bit(IP_TUNNEL_CSUM_BIT, 2306 info->key.tun_flags))) 2307 goto nla_put_failure; 2308 #endif 2309 } 2310 2311 if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) || 2312 nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) || 2313 nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label)) 2314 goto nla_put_failure; 2315 2316 if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->cfg.df)) 2317 goto nla_put_failure; 2318 2319 if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst)) 2320 goto nla_put_failure; 2321 2322 if (metadata && nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA)) 2323 goto nla_put_failure; 2324 2325 #if IS_ENABLED(CONFIG_IPV6) 2326 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, 2327 !geneve->cfg.use_udp6_rx_checksums)) 2328 goto nla_put_failure; 2329 #endif 2330 2331 if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit)) 2332 goto nla_put_failure; 2333 2334 if (geneve->cfg.inner_proto_inherit && 2335 nla_put_flag(skb, IFLA_GENEVE_INNER_PROTO_INHERIT)) 2336 goto nla_put_failure; 2337 2338 if (nla_put(skb, IFLA_GENEVE_PORT_RANGE, sizeof(ports), &ports)) 2339 goto nla_put_failure; 2340 2341 if (geneve->cfg.gro_hint && 2342 nla_put_flag(skb, IFLA_GENEVE_GRO_HINT)) 2343 goto nla_put_failure; 2344 2345 return 0; 2346 2347 nla_put_failure: 2348 return -EMSGSIZE; 2349 } 2350 2351 static struct rtnl_link_ops geneve_link_ops __read_mostly = { 2352 .kind = "geneve", 2353 .maxtype = IFLA_GENEVE_MAX, 2354 .policy = geneve_policy, 2355 .priv_size = sizeof(struct geneve_dev), 2356 .setup = geneve_setup, 2357 .validate = geneve_validate, 2358 .newlink = geneve_newlink, 2359 .changelink = geneve_changelink, 2360 .dellink = geneve_dellink, 2361 .get_size = geneve_get_size, 2362 .fill_info = geneve_fill_info, 2363 }; 2364 2365 struct net_device *geneve_dev_create_fb(struct net *net, const char *name, 2366 u8 name_assign_type, u16 dst_port) 2367 { 2368 struct nlattr *tb[IFLA_MAX + 1]; 2369 struct net_device *dev; 2370 LIST_HEAD(list_kill); 2371 int err; 2372 struct geneve_config cfg = { 2373 .df = GENEVE_DF_UNSET, 2374 .use_udp6_rx_checksums = true, 2375 .ttl_inherit = false, 2376 .collect_md = true, 2377 .port_min = 1, 2378 .port_max = USHRT_MAX, 2379 }; 2380 2381 memset(tb, 0, sizeof(tb)); 2382 dev = rtnl_create_link(net, name, name_assign_type, 2383 &geneve_link_ops, tb, NULL); 2384 if (IS_ERR(dev)) 2385 return dev; 2386 2387 init_tnl_info(&cfg.info, dst_port); 2388 err = geneve_configure(net, dev, NULL, &cfg); 2389 if (err) { 2390 free_netdev(dev); 2391 return ERR_PTR(err); 2392 } 2393 2394 /* openvswitch users expect packet sizes to be unrestricted, 2395 * so set the largest MTU we can. 2396 */ 2397 err = geneve_change_mtu(dev, IP_MAX_MTU); 2398 if (err) 2399 goto err; 2400 2401 err = rtnl_configure_link(dev, NULL, 0, NULL); 2402 if (err < 0) 2403 goto err; 2404 2405 return dev; 2406 err: 2407 geneve_dellink(dev, &list_kill); 2408 unregister_netdevice_many(&list_kill); 2409 return ERR_PTR(err); 2410 } 2411 EXPORT_SYMBOL_GPL(geneve_dev_create_fb); 2412 2413 static int geneve_netdevice_event(struct notifier_block *unused, 2414 unsigned long event, void *ptr) 2415 { 2416 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 2417 2418 if (event == NETDEV_UDP_TUNNEL_PUSH_INFO) 2419 geneve_offload_rx_ports(dev, true); 2420 else if (event == NETDEV_UDP_TUNNEL_DROP_INFO) 2421 geneve_offload_rx_ports(dev, false); 2422 2423 return NOTIFY_DONE; 2424 } 2425 2426 static struct notifier_block geneve_notifier_block __read_mostly = { 2427 .notifier_call = geneve_netdevice_event, 2428 }; 2429 2430 static __net_init int geneve_init_net(struct net *net) 2431 { 2432 struct geneve_net *gn = net_generic(net, geneve_net_id); 2433 2434 INIT_LIST_HEAD(&gn->geneve_list); 2435 INIT_LIST_HEAD(&gn->sock_list); 2436 return 0; 2437 } 2438 2439 static void __net_exit geneve_exit_rtnl_net(struct net *net, 2440 struct list_head *dev_to_kill) 2441 { 2442 struct geneve_net *gn = net_generic(net, geneve_net_id); 2443 struct geneve_dev *geneve, *next; 2444 2445 list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) 2446 geneve_dellink(geneve->dev, dev_to_kill); 2447 } 2448 2449 static void __net_exit geneve_exit_net(struct net *net) 2450 { 2451 const struct geneve_net *gn = net_generic(net, geneve_net_id); 2452 2453 WARN_ON_ONCE(!list_empty(&gn->sock_list)); 2454 } 2455 2456 static struct pernet_operations geneve_net_ops = { 2457 .init = geneve_init_net, 2458 .exit_rtnl = geneve_exit_rtnl_net, 2459 .exit = geneve_exit_net, 2460 .id = &geneve_net_id, 2461 .size = sizeof(struct geneve_net), 2462 }; 2463 2464 static int __init geneve_init_module(void) 2465 { 2466 int rc; 2467 2468 rc = register_pernet_subsys(&geneve_net_ops); 2469 if (rc) 2470 goto out1; 2471 2472 rc = register_netdevice_notifier(&geneve_notifier_block); 2473 if (rc) 2474 goto out2; 2475 2476 rc = rtnl_link_register(&geneve_link_ops); 2477 if (rc) 2478 goto out3; 2479 2480 return 0; 2481 out3: 2482 unregister_netdevice_notifier(&geneve_notifier_block); 2483 out2: 2484 unregister_pernet_subsys(&geneve_net_ops); 2485 out1: 2486 return rc; 2487 } 2488 late_initcall(geneve_init_module); 2489 2490 static void __exit geneve_cleanup_module(void) 2491 { 2492 rtnl_link_unregister(&geneve_link_ops); 2493 unregister_netdevice_notifier(&geneve_notifier_block); 2494 unregister_pernet_subsys(&geneve_net_ops); 2495 } 2496 module_exit(geneve_cleanup_module); 2497 2498 MODULE_LICENSE("GPL"); 2499 MODULE_VERSION(GENEVE_NETDEV_VER); 2500 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>"); 2501 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic"); 2502 MODULE_ALIAS_RTNL_LINK("geneve"); 2503