1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * GENEVE: Generic Network Virtualization Encapsulation 4 * 5 * Copyright (c) 2015 Red Hat, Inc. 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include <linux/ethtool.h> 11 #include <linux/kernel.h> 12 #include <linux/module.h> 13 #include <linux/etherdevice.h> 14 #include <linux/hash.h> 15 #include <net/ipv6_stubs.h> 16 #include <net/dst_metadata.h> 17 #include <net/gro_cells.h> 18 #include <net/rtnetlink.h> 19 #include <net/geneve.h> 20 #include <net/gro.h> 21 #include <net/protocol.h> 22 23 #define GENEVE_NETDEV_VER "0.6" 24 25 #define GENEVE_N_VID (1u << 24) 26 #define GENEVE_VID_MASK (GENEVE_N_VID - 1) 27 28 #define VNI_HASH_BITS 10 29 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) 30 31 static bool log_ecn_error = true; 32 module_param(log_ecn_error, bool, 0644); 33 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 34 35 #define GENEVE_VER 0 36 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) 37 #define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN) 38 #define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN) 39 40 /* per-network namespace private data for this module */ 41 struct geneve_net { 42 struct list_head geneve_list; 43 struct list_head sock_list; 44 }; 45 46 static unsigned int geneve_net_id; 47 48 struct geneve_dev_node { 49 struct hlist_node hlist; 50 struct geneve_dev *geneve; 51 }; 52 53 struct geneve_config { 54 struct ip_tunnel_info info; 55 bool collect_md; 56 bool use_udp6_rx_checksums; 57 bool ttl_inherit; 58 enum ifla_geneve_df df; 59 bool inner_proto_inherit; 60 u16 port_min; 61 u16 port_max; 62 }; 63 64 /* Pseudo network device */ 65 struct geneve_dev { 66 struct geneve_dev_node hlist4; /* vni hash table for IPv4 socket */ 67 #if IS_ENABLED(CONFIG_IPV6) 68 struct geneve_dev_node hlist6; /* vni hash table for IPv6 socket */ 69 #endif 70 struct net *net; /* netns for packet i/o */ 71 struct net_device *dev; /* netdev for geneve tunnel */ 72 struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */ 73 #if IS_ENABLED(CONFIG_IPV6) 74 struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */ 75 #endif 76 struct list_head next; /* geneve's per namespace list */ 77 struct gro_cells gro_cells; 78 struct geneve_config cfg; 79 }; 80 81 struct geneve_sock { 82 bool collect_md; 83 struct list_head list; 84 struct socket *sock; 85 struct rcu_head rcu; 86 int refcnt; 87 struct hlist_head vni_list[VNI_HASH_SIZE]; 88 }; 89 90 static inline __u32 geneve_net_vni_hash(u8 vni[3]) 91 { 92 __u32 vnid; 93 94 vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2]; 95 return hash_32(vnid, VNI_HASH_BITS); 96 } 97 98 static __be64 vni_to_tunnel_id(const __u8 *vni) 99 { 100 #ifdef __BIG_ENDIAN 101 return (vni[0] << 16) | (vni[1] << 8) | vni[2]; 102 #else 103 return (__force __be64)(((__force u64)vni[0] << 40) | 104 ((__force u64)vni[1] << 48) | 105 ((__force u64)vni[2] << 56)); 106 #endif 107 } 108 109 /* Convert 64 bit tunnel ID to 24 bit VNI. */ 110 static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) 111 { 112 #ifdef __BIG_ENDIAN 113 vni[0] = (__force __u8)(tun_id >> 16); 114 vni[1] = (__force __u8)(tun_id >> 8); 115 vni[2] = (__force __u8)tun_id; 116 #else 117 vni[0] = (__force __u8)((__force u64)tun_id >> 40); 118 vni[1] = (__force __u8)((__force u64)tun_id >> 48); 119 vni[2] = (__force __u8)((__force u64)tun_id >> 56); 120 #endif 121 } 122 123 static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni) 124 { 125 return !memcmp(vni, &tun_id[5], 3); 126 } 127 128 static sa_family_t geneve_get_sk_family(struct geneve_sock *gs) 129 { 130 return gs->sock->sk->sk_family; 131 } 132 133 static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, 134 __be32 addr, u8 vni[]) 135 { 136 struct hlist_head *vni_list_head; 137 struct geneve_dev_node *node; 138 __u32 hash; 139 140 /* Find the device for this VNI */ 141 hash = geneve_net_vni_hash(vni); 142 vni_list_head = &gs->vni_list[hash]; 143 hlist_for_each_entry_rcu(node, vni_list_head, hlist) { 144 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && 145 addr == node->geneve->cfg.info.key.u.ipv4.dst) 146 return node->geneve; 147 } 148 return NULL; 149 } 150 151 #if IS_ENABLED(CONFIG_IPV6) 152 static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs, 153 struct in6_addr addr6, u8 vni[]) 154 { 155 struct hlist_head *vni_list_head; 156 struct geneve_dev_node *node; 157 __u32 hash; 158 159 /* Find the device for this VNI */ 160 hash = geneve_net_vni_hash(vni); 161 vni_list_head = &gs->vni_list[hash]; 162 hlist_for_each_entry_rcu(node, vni_list_head, hlist) { 163 if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && 164 ipv6_addr_equal(&addr6, &node->geneve->cfg.info.key.u.ipv6.dst)) 165 return node->geneve; 166 } 167 return NULL; 168 } 169 #endif 170 171 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) 172 { 173 return (struct genevehdr *)(udp_hdr(skb) + 1); 174 } 175 176 static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs, 177 struct sk_buff *skb) 178 { 179 static u8 zero_vni[3]; 180 u8 *vni; 181 182 if (geneve_get_sk_family(gs) == AF_INET) { 183 struct iphdr *iph; 184 __be32 addr; 185 186 iph = ip_hdr(skb); /* outer IP header... */ 187 188 if (gs->collect_md) { 189 vni = zero_vni; 190 addr = 0; 191 } else { 192 vni = geneve_hdr(skb)->vni; 193 addr = iph->saddr; 194 } 195 196 return geneve_lookup(gs, addr, vni); 197 #if IS_ENABLED(CONFIG_IPV6) 198 } else if (geneve_get_sk_family(gs) == AF_INET6) { 199 static struct in6_addr zero_addr6; 200 struct ipv6hdr *ip6h; 201 struct in6_addr addr6; 202 203 ip6h = ipv6_hdr(skb); /* outer IPv6 header... */ 204 205 if (gs->collect_md) { 206 vni = zero_vni; 207 addr6 = zero_addr6; 208 } else { 209 vni = geneve_hdr(skb)->vni; 210 addr6 = ip6h->saddr; 211 } 212 213 return geneve6_lookup(gs, addr6, vni); 214 #endif 215 } 216 return NULL; 217 } 218 219 /* geneve receive/decap routine */ 220 static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, 221 struct sk_buff *skb) 222 { 223 struct genevehdr *gnvh = geneve_hdr(skb); 224 struct metadata_dst *tun_dst = NULL; 225 unsigned int len; 226 int nh, err = 0; 227 void *oiph; 228 229 if (ip_tunnel_collect_metadata() || gs->collect_md) { 230 IP_TUNNEL_DECLARE_FLAGS(flags) = { }; 231 232 __set_bit(IP_TUNNEL_KEY_BIT, flags); 233 __assign_bit(IP_TUNNEL_OAM_BIT, flags, gnvh->oam); 234 __assign_bit(IP_TUNNEL_CRIT_OPT_BIT, flags, gnvh->critical); 235 236 tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, 237 vni_to_tunnel_id(gnvh->vni), 238 gnvh->opt_len * 4); 239 if (!tun_dst) { 240 dev_dstats_rx_dropped(geneve->dev); 241 goto drop; 242 } 243 /* Update tunnel dst according to Geneve options. */ 244 ip_tunnel_flags_zero(flags); 245 __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, flags); 246 ip_tunnel_info_opts_set(&tun_dst->u.tun_info, 247 gnvh->options, gnvh->opt_len * 4, 248 flags); 249 } else { 250 /* Drop packets w/ critical options, 251 * since we don't support any... 252 */ 253 if (gnvh->critical) { 254 DEV_STATS_INC(geneve->dev, rx_frame_errors); 255 DEV_STATS_INC(geneve->dev, rx_errors); 256 goto drop; 257 } 258 } 259 260 if (tun_dst) 261 skb_dst_set(skb, &tun_dst->dst); 262 263 if (gnvh->proto_type == htons(ETH_P_TEB)) { 264 skb_reset_mac_header(skb); 265 skb->protocol = eth_type_trans(skb, geneve->dev); 266 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 267 268 /* Ignore packet loops (and multicast echo) */ 269 if (ether_addr_equal(eth_hdr(skb)->h_source, 270 geneve->dev->dev_addr)) { 271 DEV_STATS_INC(geneve->dev, rx_errors); 272 goto drop; 273 } 274 } else { 275 skb_reset_mac_header(skb); 276 skb->dev = geneve->dev; 277 skb->pkt_type = PACKET_HOST; 278 } 279 280 /* Save offset of outer header relative to skb->head, 281 * because we are going to reset the network header to the inner header 282 * and might change skb->head. 283 */ 284 nh = skb_network_header(skb) - skb->head; 285 286 skb_reset_network_header(skb); 287 288 if (!pskb_inet_may_pull(skb)) { 289 DEV_STATS_INC(geneve->dev, rx_length_errors); 290 DEV_STATS_INC(geneve->dev, rx_errors); 291 goto drop; 292 } 293 294 /* Get the outer header. */ 295 oiph = skb->head + nh; 296 297 if (geneve_get_sk_family(gs) == AF_INET) 298 err = IP_ECN_decapsulate(oiph, skb); 299 #if IS_ENABLED(CONFIG_IPV6) 300 else 301 err = IP6_ECN_decapsulate(oiph, skb); 302 #endif 303 304 if (unlikely(err)) { 305 if (log_ecn_error) { 306 if (geneve_get_sk_family(gs) == AF_INET) 307 net_info_ratelimited("non-ECT from %pI4 " 308 "with TOS=%#x\n", 309 &((struct iphdr *)oiph)->saddr, 310 ((struct iphdr *)oiph)->tos); 311 #if IS_ENABLED(CONFIG_IPV6) 312 else 313 net_info_ratelimited("non-ECT from %pI6\n", 314 &((struct ipv6hdr *)oiph)->saddr); 315 #endif 316 } 317 if (err > 1) { 318 DEV_STATS_INC(geneve->dev, rx_frame_errors); 319 DEV_STATS_INC(geneve->dev, rx_errors); 320 goto drop; 321 } 322 } 323 324 len = skb->len; 325 err = gro_cells_receive(&geneve->gro_cells, skb); 326 if (likely(err == NET_RX_SUCCESS)) 327 dev_dstats_rx_add(geneve->dev, len); 328 329 return; 330 drop: 331 /* Consume bad packet */ 332 kfree_skb(skb); 333 } 334 335 /* Setup stats when device is created */ 336 static int geneve_init(struct net_device *dev) 337 { 338 struct geneve_dev *geneve = netdev_priv(dev); 339 int err; 340 341 err = gro_cells_init(&geneve->gro_cells, dev); 342 if (err) 343 return err; 344 345 err = dst_cache_init(&geneve->cfg.info.dst_cache, GFP_KERNEL); 346 if (err) { 347 gro_cells_destroy(&geneve->gro_cells); 348 return err; 349 } 350 netdev_lockdep_set_classes(dev); 351 return 0; 352 } 353 354 static void geneve_uninit(struct net_device *dev) 355 { 356 struct geneve_dev *geneve = netdev_priv(dev); 357 358 dst_cache_destroy(&geneve->cfg.info.dst_cache); 359 gro_cells_destroy(&geneve->gro_cells); 360 } 361 362 /* Callback from net/ipv4/udp.c to receive packets */ 363 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) 364 { 365 struct genevehdr *geneveh; 366 struct geneve_dev *geneve; 367 struct geneve_sock *gs; 368 __be16 inner_proto; 369 int opts_len; 370 371 /* Need UDP and Geneve header to be present */ 372 if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) 373 goto drop; 374 375 /* Return packets with reserved bits set */ 376 geneveh = geneve_hdr(skb); 377 if (unlikely(geneveh->ver != GENEVE_VER)) 378 goto drop; 379 380 gs = rcu_dereference_sk_user_data(sk); 381 if (!gs) 382 goto drop; 383 384 geneve = geneve_lookup_skb(gs, skb); 385 if (!geneve) 386 goto drop; 387 388 inner_proto = geneveh->proto_type; 389 390 if (unlikely((!geneve->cfg.inner_proto_inherit && 391 inner_proto != htons(ETH_P_TEB)))) { 392 dev_dstats_rx_dropped(geneve->dev); 393 goto drop; 394 } 395 396 opts_len = geneveh->opt_len * 4; 397 if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, inner_proto, 398 !net_eq(geneve->net, dev_net(geneve->dev)))) { 399 dev_dstats_rx_dropped(geneve->dev); 400 goto drop; 401 } 402 403 geneve_rx(geneve, gs, skb); 404 return 0; 405 406 drop: 407 /* Consume bad packet */ 408 kfree_skb(skb); 409 return 0; 410 } 411 412 /* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */ 413 static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb) 414 { 415 struct genevehdr *geneveh; 416 struct geneve_sock *gs; 417 u8 zero_vni[3] = { 0 }; 418 u8 *vni = zero_vni; 419 420 if (!pskb_may_pull(skb, skb_transport_offset(skb) + GENEVE_BASE_HLEN)) 421 return -EINVAL; 422 423 geneveh = geneve_hdr(skb); 424 if (geneveh->ver != GENEVE_VER) 425 return -EINVAL; 426 427 if (geneveh->proto_type != htons(ETH_P_TEB)) 428 return -EINVAL; 429 430 gs = rcu_dereference_sk_user_data(sk); 431 if (!gs) 432 return -ENOENT; 433 434 if (geneve_get_sk_family(gs) == AF_INET) { 435 struct iphdr *iph = ip_hdr(skb); 436 __be32 addr4 = 0; 437 438 if (!gs->collect_md) { 439 vni = geneve_hdr(skb)->vni; 440 addr4 = iph->daddr; 441 } 442 443 return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT; 444 } 445 446 #if IS_ENABLED(CONFIG_IPV6) 447 if (geneve_get_sk_family(gs) == AF_INET6) { 448 struct ipv6hdr *ip6h = ipv6_hdr(skb); 449 struct in6_addr addr6; 450 451 memset(&addr6, 0, sizeof(struct in6_addr)); 452 453 if (!gs->collect_md) { 454 vni = geneve_hdr(skb)->vni; 455 addr6 = ip6h->daddr; 456 } 457 458 return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT; 459 } 460 #endif 461 462 return -EPFNOSUPPORT; 463 } 464 465 static struct socket *geneve_create_sock(struct net *net, bool ipv6, 466 __be16 port, bool ipv6_rx_csum) 467 { 468 struct socket *sock; 469 struct udp_port_cfg udp_conf; 470 int err; 471 472 memset(&udp_conf, 0, sizeof(udp_conf)); 473 474 if (ipv6) { 475 udp_conf.family = AF_INET6; 476 udp_conf.ipv6_v6only = 1; 477 udp_conf.use_udp6_rx_checksums = ipv6_rx_csum; 478 } else { 479 udp_conf.family = AF_INET; 480 udp_conf.local_ip.s_addr = htonl(INADDR_ANY); 481 } 482 483 udp_conf.local_udp_port = port; 484 485 /* Open UDP socket */ 486 err = udp_sock_create(net, &udp_conf, &sock); 487 if (err < 0) 488 return ERR_PTR(err); 489 490 udp_allow_gso(sock->sk); 491 return sock; 492 } 493 494 static int geneve_hlen(struct genevehdr *gh) 495 { 496 return sizeof(*gh) + gh->opt_len * 4; 497 } 498 499 static struct sk_buff *geneve_gro_receive(struct sock *sk, 500 struct list_head *head, 501 struct sk_buff *skb) 502 { 503 struct sk_buff *pp = NULL; 504 struct sk_buff *p; 505 struct genevehdr *gh, *gh2; 506 unsigned int hlen, gh_len, off_gnv; 507 const struct packet_offload *ptype; 508 __be16 type; 509 int flush = 1; 510 511 off_gnv = skb_gro_offset(skb); 512 hlen = off_gnv + sizeof(*gh); 513 gh = skb_gro_header(skb, hlen, off_gnv); 514 if (unlikely(!gh)) 515 goto out; 516 517 if (gh->ver != GENEVE_VER || gh->oam) 518 goto out; 519 gh_len = geneve_hlen(gh); 520 521 hlen = off_gnv + gh_len; 522 if (!skb_gro_may_pull(skb, hlen)) { 523 gh = skb_gro_header_slow(skb, hlen, off_gnv); 524 if (unlikely(!gh)) 525 goto out; 526 } 527 528 list_for_each_entry(p, head, list) { 529 if (!NAPI_GRO_CB(p)->same_flow) 530 continue; 531 532 gh2 = (struct genevehdr *)(p->data + off_gnv); 533 if (gh->opt_len != gh2->opt_len || 534 memcmp(gh, gh2, gh_len)) { 535 NAPI_GRO_CB(p)->same_flow = 0; 536 continue; 537 } 538 } 539 540 skb_gro_pull(skb, gh_len); 541 skb_gro_postpull_rcsum(skb, gh, gh_len); 542 type = gh->proto_type; 543 if (likely(type == htons(ETH_P_TEB))) 544 return call_gro_receive(eth_gro_receive, head, skb); 545 546 ptype = gro_find_receive_by_type(type); 547 if (!ptype) 548 goto out; 549 550 pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); 551 flush = 0; 552 553 out: 554 skb_gro_flush_final(skb, pp, flush); 555 556 return pp; 557 } 558 559 static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb, 560 int nhoff) 561 { 562 struct genevehdr *gh; 563 struct packet_offload *ptype; 564 __be16 type; 565 int gh_len; 566 int err = -ENOSYS; 567 568 gh = (struct genevehdr *)(skb->data + nhoff); 569 gh_len = geneve_hlen(gh); 570 type = gh->proto_type; 571 572 /* since skb->encapsulation is set, eth_gro_complete() sets the inner mac header */ 573 if (likely(type == htons(ETH_P_TEB))) 574 return eth_gro_complete(skb, nhoff + gh_len); 575 576 ptype = gro_find_complete_by_type(type); 577 if (ptype) 578 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); 579 580 skb_set_inner_mac_header(skb, nhoff + gh_len); 581 582 return err; 583 } 584 585 /* Create new listen socket if needed */ 586 static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, 587 bool ipv6, bool ipv6_rx_csum) 588 { 589 struct geneve_net *gn = net_generic(net, geneve_net_id); 590 struct geneve_sock *gs; 591 struct socket *sock; 592 struct udp_tunnel_sock_cfg tunnel_cfg; 593 int h; 594 595 gs = kzalloc(sizeof(*gs), GFP_KERNEL); 596 if (!gs) 597 return ERR_PTR(-ENOMEM); 598 599 sock = geneve_create_sock(net, ipv6, port, ipv6_rx_csum); 600 if (IS_ERR(sock)) { 601 kfree(gs); 602 return ERR_CAST(sock); 603 } 604 605 gs->sock = sock; 606 gs->refcnt = 1; 607 for (h = 0; h < VNI_HASH_SIZE; ++h) 608 INIT_HLIST_HEAD(&gs->vni_list[h]); 609 610 /* Initialize the geneve udp offloads structure */ 611 udp_tunnel_notify_add_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE); 612 613 /* Mark socket as an encapsulation socket */ 614 memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); 615 tunnel_cfg.sk_user_data = gs; 616 tunnel_cfg.encap_type = 1; 617 tunnel_cfg.gro_receive = geneve_gro_receive; 618 tunnel_cfg.gro_complete = geneve_gro_complete; 619 tunnel_cfg.encap_rcv = geneve_udp_encap_recv; 620 tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup; 621 tunnel_cfg.encap_destroy = NULL; 622 setup_udp_tunnel_sock(net, sock, &tunnel_cfg); 623 list_add(&gs->list, &gn->sock_list); 624 return gs; 625 } 626 627 static void __geneve_sock_release(struct geneve_sock *gs) 628 { 629 if (!gs || --gs->refcnt) 630 return; 631 632 list_del(&gs->list); 633 udp_tunnel_notify_del_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE); 634 udp_tunnel_sock_release(gs->sock); 635 kfree_rcu(gs, rcu); 636 } 637 638 static void geneve_sock_release(struct geneve_dev *geneve) 639 { 640 struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4); 641 #if IS_ENABLED(CONFIG_IPV6) 642 struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6); 643 644 rcu_assign_pointer(geneve->sock6, NULL); 645 #endif 646 647 rcu_assign_pointer(geneve->sock4, NULL); 648 synchronize_net(); 649 650 __geneve_sock_release(gs4); 651 #if IS_ENABLED(CONFIG_IPV6) 652 __geneve_sock_release(gs6); 653 #endif 654 } 655 656 static struct geneve_sock *geneve_find_sock(struct geneve_net *gn, 657 sa_family_t family, 658 __be16 dst_port) 659 { 660 struct geneve_sock *gs; 661 662 list_for_each_entry(gs, &gn->sock_list, list) { 663 if (inet_sk(gs->sock->sk)->inet_sport == dst_port && 664 geneve_get_sk_family(gs) == family) { 665 return gs; 666 } 667 } 668 return NULL; 669 } 670 671 static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6) 672 { 673 struct net *net = geneve->net; 674 struct geneve_net *gn = net_generic(net, geneve_net_id); 675 struct geneve_dev_node *node; 676 struct geneve_sock *gs; 677 __u8 vni[3]; 678 __u32 hash; 679 680 gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->cfg.info.key.tp_dst); 681 if (gs) { 682 gs->refcnt++; 683 goto out; 684 } 685 686 gs = geneve_socket_create(net, geneve->cfg.info.key.tp_dst, ipv6, 687 geneve->cfg.use_udp6_rx_checksums); 688 if (IS_ERR(gs)) 689 return PTR_ERR(gs); 690 691 out: 692 gs->collect_md = geneve->cfg.collect_md; 693 #if IS_ENABLED(CONFIG_IPV6) 694 if (ipv6) { 695 rcu_assign_pointer(geneve->sock6, gs); 696 node = &geneve->hlist6; 697 } else 698 #endif 699 { 700 rcu_assign_pointer(geneve->sock4, gs); 701 node = &geneve->hlist4; 702 } 703 node->geneve = geneve; 704 705 tunnel_id_to_vni(geneve->cfg.info.key.tun_id, vni); 706 hash = geneve_net_vni_hash(vni); 707 hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]); 708 return 0; 709 } 710 711 static int geneve_open(struct net_device *dev) 712 { 713 struct geneve_dev *geneve = netdev_priv(dev); 714 bool metadata = geneve->cfg.collect_md; 715 bool ipv4, ipv6; 716 int ret = 0; 717 718 ipv6 = geneve->cfg.info.mode & IP_TUNNEL_INFO_IPV6 || metadata; 719 ipv4 = !ipv6 || metadata; 720 #if IS_ENABLED(CONFIG_IPV6) 721 if (ipv6) { 722 ret = geneve_sock_add(geneve, true); 723 if (ret < 0 && ret != -EAFNOSUPPORT) 724 ipv4 = false; 725 } 726 #endif 727 if (ipv4) 728 ret = geneve_sock_add(geneve, false); 729 if (ret < 0) 730 geneve_sock_release(geneve); 731 732 return ret; 733 } 734 735 static int geneve_stop(struct net_device *dev) 736 { 737 struct geneve_dev *geneve = netdev_priv(dev); 738 739 hlist_del_init_rcu(&geneve->hlist4.hlist); 740 #if IS_ENABLED(CONFIG_IPV6) 741 hlist_del_init_rcu(&geneve->hlist6.hlist); 742 #endif 743 geneve_sock_release(geneve); 744 return 0; 745 } 746 747 static void geneve_build_header(struct genevehdr *geneveh, 748 const struct ip_tunnel_info *info, 749 __be16 inner_proto) 750 { 751 geneveh->ver = GENEVE_VER; 752 geneveh->opt_len = info->options_len / 4; 753 geneveh->oam = test_bit(IP_TUNNEL_OAM_BIT, info->key.tun_flags); 754 geneveh->critical = test_bit(IP_TUNNEL_CRIT_OPT_BIT, 755 info->key.tun_flags); 756 geneveh->rsvd1 = 0; 757 tunnel_id_to_vni(info->key.tun_id, geneveh->vni); 758 geneveh->proto_type = inner_proto; 759 geneveh->rsvd2 = 0; 760 761 if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) 762 ip_tunnel_info_opts_get(geneveh->options, info); 763 } 764 765 static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb, 766 const struct ip_tunnel_info *info, 767 bool xnet, int ip_hdr_len, 768 bool inner_proto_inherit) 769 { 770 bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 771 struct genevehdr *gnvh; 772 __be16 inner_proto; 773 int min_headroom; 774 int err; 775 776 skb_reset_mac_header(skb); 777 skb_scrub_packet(skb, xnet); 778 779 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + 780 GENEVE_BASE_HLEN + info->options_len + ip_hdr_len; 781 err = skb_cow_head(skb, min_headroom); 782 if (unlikely(err)) 783 goto free_dst; 784 785 err = udp_tunnel_handle_offloads(skb, udp_sum); 786 if (err) 787 goto free_dst; 788 789 gnvh = __skb_push(skb, sizeof(*gnvh) + info->options_len); 790 inner_proto = inner_proto_inherit ? skb->protocol : htons(ETH_P_TEB); 791 geneve_build_header(gnvh, info, inner_proto); 792 skb_set_inner_protocol(skb, inner_proto); 793 return 0; 794 795 free_dst: 796 dst_release(dst); 797 return err; 798 } 799 800 static u8 geneve_get_dsfield(struct sk_buff *skb, struct net_device *dev, 801 const struct ip_tunnel_info *info, 802 bool *use_cache) 803 { 804 struct geneve_dev *geneve = netdev_priv(dev); 805 u8 dsfield; 806 807 dsfield = info->key.tos; 808 if (dsfield == 1 && !geneve->cfg.collect_md) { 809 dsfield = ip_tunnel_get_dsfield(ip_hdr(skb), skb); 810 *use_cache = false; 811 } 812 813 return dsfield; 814 } 815 816 static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, 817 struct geneve_dev *geneve, 818 const struct ip_tunnel_info *info) 819 { 820 bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; 821 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); 822 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); 823 const struct ip_tunnel_key *key = &info->key; 824 struct rtable *rt; 825 bool use_cache; 826 __u8 tos, ttl; 827 __be16 df = 0; 828 __be32 saddr; 829 __be16 sport; 830 int err; 831 832 if (skb_vlan_inet_prepare(skb, inner_proto_inherit)) 833 return -EINVAL; 834 835 if (!gs4) 836 return -EIO; 837 838 use_cache = ip_tunnel_dst_cache_usable(skb, info); 839 tos = geneve_get_dsfield(skb, dev, info, &use_cache); 840 sport = udp_flow_src_port(geneve->net, skb, 841 geneve->cfg.port_min, 842 geneve->cfg.port_max, true); 843 844 rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr, 845 &info->key, 846 sport, geneve->cfg.info.key.tp_dst, tos, 847 use_cache ? 848 (struct dst_cache *)&info->dst_cache : NULL); 849 if (IS_ERR(rt)) 850 return PTR_ERR(rt); 851 852 err = skb_tunnel_check_pmtu(skb, &rt->dst, 853 GENEVE_IPV4_HLEN + info->options_len, 854 netif_is_any_bridge_port(dev)); 855 if (err < 0) { 856 dst_release(&rt->dst); 857 return err; 858 } else if (err) { 859 struct ip_tunnel_info *info; 860 861 info = skb_tunnel_info(skb); 862 if (info) { 863 struct ip_tunnel_info *unclone; 864 865 unclone = skb_tunnel_info_unclone(skb); 866 if (unlikely(!unclone)) { 867 dst_release(&rt->dst); 868 return -ENOMEM; 869 } 870 871 unclone->key.u.ipv4.dst = saddr; 872 unclone->key.u.ipv4.src = info->key.u.ipv4.dst; 873 } 874 875 if (!pskb_may_pull(skb, ETH_HLEN)) { 876 dst_release(&rt->dst); 877 return -EINVAL; 878 } 879 880 skb->protocol = eth_type_trans(skb, geneve->dev); 881 __netif_rx(skb); 882 dst_release(&rt->dst); 883 return -EMSGSIZE; 884 } 885 886 tos = ip_tunnel_ecn_encap(tos, ip_hdr(skb), skb); 887 if (geneve->cfg.collect_md) { 888 ttl = key->ttl; 889 890 df = test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) ? 891 htons(IP_DF) : 0; 892 } else { 893 if (geneve->cfg.ttl_inherit) 894 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); 895 else 896 ttl = key->ttl; 897 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); 898 899 if (geneve->cfg.df == GENEVE_DF_SET) { 900 df = htons(IP_DF); 901 } else if (geneve->cfg.df == GENEVE_DF_INHERIT) { 902 struct ethhdr *eth = skb_eth_hdr(skb); 903 904 if (ntohs(eth->h_proto) == ETH_P_IPV6) { 905 df = htons(IP_DF); 906 } else if (ntohs(eth->h_proto) == ETH_P_IP) { 907 struct iphdr *iph = ip_hdr(skb); 908 909 if (iph->frag_off & htons(IP_DF)) 910 df = htons(IP_DF); 911 } 912 } 913 } 914 915 err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr), 916 inner_proto_inherit); 917 if (unlikely(err)) 918 return err; 919 920 udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, saddr, info->key.u.ipv4.dst, 921 tos, ttl, df, sport, geneve->cfg.info.key.tp_dst, 922 !net_eq(geneve->net, dev_net(geneve->dev)), 923 !test_bit(IP_TUNNEL_CSUM_BIT, 924 info->key.tun_flags)); 925 return 0; 926 } 927 928 #if IS_ENABLED(CONFIG_IPV6) 929 static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, 930 struct geneve_dev *geneve, 931 const struct ip_tunnel_info *info) 932 { 933 bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; 934 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); 935 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); 936 const struct ip_tunnel_key *key = &info->key; 937 struct dst_entry *dst = NULL; 938 struct in6_addr saddr; 939 bool use_cache; 940 __u8 prio, ttl; 941 __be16 sport; 942 int err; 943 944 if (skb_vlan_inet_prepare(skb, inner_proto_inherit)) 945 return -EINVAL; 946 947 if (!gs6) 948 return -EIO; 949 950 use_cache = ip_tunnel_dst_cache_usable(skb, info); 951 prio = geneve_get_dsfield(skb, dev, info, &use_cache); 952 sport = udp_flow_src_port(geneve->net, skb, 953 geneve->cfg.port_min, 954 geneve->cfg.port_max, true); 955 956 dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sock, 0, 957 &saddr, key, sport, 958 geneve->cfg.info.key.tp_dst, prio, 959 use_cache ? 960 (struct dst_cache *)&info->dst_cache : NULL); 961 if (IS_ERR(dst)) 962 return PTR_ERR(dst); 963 964 err = skb_tunnel_check_pmtu(skb, dst, 965 GENEVE_IPV6_HLEN + info->options_len, 966 netif_is_any_bridge_port(dev)); 967 if (err < 0) { 968 dst_release(dst); 969 return err; 970 } else if (err) { 971 struct ip_tunnel_info *info = skb_tunnel_info(skb); 972 973 if (info) { 974 struct ip_tunnel_info *unclone; 975 976 unclone = skb_tunnel_info_unclone(skb); 977 if (unlikely(!unclone)) { 978 dst_release(dst); 979 return -ENOMEM; 980 } 981 982 unclone->key.u.ipv6.dst = saddr; 983 unclone->key.u.ipv6.src = info->key.u.ipv6.dst; 984 } 985 986 if (!pskb_may_pull(skb, ETH_HLEN)) { 987 dst_release(dst); 988 return -EINVAL; 989 } 990 991 skb->protocol = eth_type_trans(skb, geneve->dev); 992 __netif_rx(skb); 993 dst_release(dst); 994 return -EMSGSIZE; 995 } 996 997 prio = ip_tunnel_ecn_encap(prio, ip_hdr(skb), skb); 998 if (geneve->cfg.collect_md) { 999 ttl = key->ttl; 1000 } else { 1001 if (geneve->cfg.ttl_inherit) 1002 ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); 1003 else 1004 ttl = key->ttl; 1005 ttl = ttl ? : ip6_dst_hoplimit(dst); 1006 } 1007 err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr), 1008 inner_proto_inherit); 1009 if (unlikely(err)) 1010 return err; 1011 1012 udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, 1013 &saddr, &key->u.ipv6.dst, prio, ttl, 1014 info->key.label, sport, geneve->cfg.info.key.tp_dst, 1015 !test_bit(IP_TUNNEL_CSUM_BIT, 1016 info->key.tun_flags)); 1017 return 0; 1018 } 1019 #endif 1020 1021 static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) 1022 { 1023 struct geneve_dev *geneve = netdev_priv(dev); 1024 struct ip_tunnel_info *info = NULL; 1025 int err; 1026 1027 if (geneve->cfg.collect_md) { 1028 info = skb_tunnel_info(skb); 1029 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { 1030 netdev_dbg(dev, "no tunnel metadata\n"); 1031 dev_kfree_skb(skb); 1032 dev_dstats_tx_dropped(dev); 1033 return NETDEV_TX_OK; 1034 } 1035 } else { 1036 info = &geneve->cfg.info; 1037 } 1038 1039 rcu_read_lock(); 1040 #if IS_ENABLED(CONFIG_IPV6) 1041 if (info->mode & IP_TUNNEL_INFO_IPV6) 1042 err = geneve6_xmit_skb(skb, dev, geneve, info); 1043 else 1044 #endif 1045 err = geneve_xmit_skb(skb, dev, geneve, info); 1046 rcu_read_unlock(); 1047 1048 if (likely(!err)) 1049 return NETDEV_TX_OK; 1050 1051 if (err != -EMSGSIZE) 1052 dev_kfree_skb(skb); 1053 1054 if (err == -ELOOP) 1055 DEV_STATS_INC(dev, collisions); 1056 else if (err == -ENETUNREACH) 1057 DEV_STATS_INC(dev, tx_carrier_errors); 1058 1059 DEV_STATS_INC(dev, tx_errors); 1060 return NETDEV_TX_OK; 1061 } 1062 1063 static int geneve_change_mtu(struct net_device *dev, int new_mtu) 1064 { 1065 if (new_mtu > dev->max_mtu) 1066 new_mtu = dev->max_mtu; 1067 else if (new_mtu < dev->min_mtu) 1068 new_mtu = dev->min_mtu; 1069 1070 WRITE_ONCE(dev->mtu, new_mtu); 1071 return 0; 1072 } 1073 1074 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) 1075 { 1076 struct ip_tunnel_info *info = skb_tunnel_info(skb); 1077 struct geneve_dev *geneve = netdev_priv(dev); 1078 __be16 sport; 1079 1080 if (ip_tunnel_info_af(info) == AF_INET) { 1081 struct rtable *rt; 1082 struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); 1083 bool use_cache; 1084 __be32 saddr; 1085 u8 tos; 1086 1087 if (!gs4) 1088 return -EIO; 1089 1090 use_cache = ip_tunnel_dst_cache_usable(skb, info); 1091 tos = geneve_get_dsfield(skb, dev, info, &use_cache); 1092 sport = udp_flow_src_port(geneve->net, skb, 1093 geneve->cfg.port_min, 1094 geneve->cfg.port_max, true); 1095 1096 rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr, 1097 &info->key, 1098 sport, geneve->cfg.info.key.tp_dst, 1099 tos, 1100 use_cache ? &info->dst_cache : NULL); 1101 if (IS_ERR(rt)) 1102 return PTR_ERR(rt); 1103 1104 ip_rt_put(rt); 1105 info->key.u.ipv4.src = saddr; 1106 #if IS_ENABLED(CONFIG_IPV6) 1107 } else if (ip_tunnel_info_af(info) == AF_INET6) { 1108 struct dst_entry *dst; 1109 struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); 1110 struct in6_addr saddr; 1111 bool use_cache; 1112 u8 prio; 1113 1114 if (!gs6) 1115 return -EIO; 1116 1117 use_cache = ip_tunnel_dst_cache_usable(skb, info); 1118 prio = geneve_get_dsfield(skb, dev, info, &use_cache); 1119 sport = udp_flow_src_port(geneve->net, skb, 1120 geneve->cfg.port_min, 1121 geneve->cfg.port_max, true); 1122 1123 dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sock, 0, 1124 &saddr, &info->key, sport, 1125 geneve->cfg.info.key.tp_dst, prio, 1126 use_cache ? &info->dst_cache : NULL); 1127 if (IS_ERR(dst)) 1128 return PTR_ERR(dst); 1129 1130 dst_release(dst); 1131 info->key.u.ipv6.src = saddr; 1132 #endif 1133 } else { 1134 return -EINVAL; 1135 } 1136 1137 info->key.tp_src = sport; 1138 info->key.tp_dst = geneve->cfg.info.key.tp_dst; 1139 return 0; 1140 } 1141 1142 static const struct net_device_ops geneve_netdev_ops = { 1143 .ndo_init = geneve_init, 1144 .ndo_uninit = geneve_uninit, 1145 .ndo_open = geneve_open, 1146 .ndo_stop = geneve_stop, 1147 .ndo_start_xmit = geneve_xmit, 1148 .ndo_change_mtu = geneve_change_mtu, 1149 .ndo_validate_addr = eth_validate_addr, 1150 .ndo_set_mac_address = eth_mac_addr, 1151 .ndo_fill_metadata_dst = geneve_fill_metadata_dst, 1152 }; 1153 1154 static void geneve_get_drvinfo(struct net_device *dev, 1155 struct ethtool_drvinfo *drvinfo) 1156 { 1157 strscpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version)); 1158 strscpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver)); 1159 } 1160 1161 static const struct ethtool_ops geneve_ethtool_ops = { 1162 .get_drvinfo = geneve_get_drvinfo, 1163 .get_link = ethtool_op_get_link, 1164 }; 1165 1166 /* Info for udev, that this is a virtual tunnel endpoint */ 1167 static const struct device_type geneve_type = { 1168 .name = "geneve", 1169 }; 1170 1171 /* Calls the ndo_udp_tunnel_add of the caller in order to 1172 * supply the listening GENEVE udp ports. Callers are expected 1173 * to implement the ndo_udp_tunnel_add. 1174 */ 1175 static void geneve_offload_rx_ports(struct net_device *dev, bool push) 1176 { 1177 struct net *net = dev_net(dev); 1178 struct geneve_net *gn = net_generic(net, geneve_net_id); 1179 struct geneve_sock *gs; 1180 1181 rcu_read_lock(); 1182 list_for_each_entry_rcu(gs, &gn->sock_list, list) { 1183 if (push) { 1184 udp_tunnel_push_rx_port(dev, gs->sock, 1185 UDP_TUNNEL_TYPE_GENEVE); 1186 } else { 1187 udp_tunnel_drop_rx_port(dev, gs->sock, 1188 UDP_TUNNEL_TYPE_GENEVE); 1189 } 1190 } 1191 rcu_read_unlock(); 1192 } 1193 1194 /* Initialize the device structure. */ 1195 static void geneve_setup(struct net_device *dev) 1196 { 1197 ether_setup(dev); 1198 1199 dev->netdev_ops = &geneve_netdev_ops; 1200 dev->ethtool_ops = &geneve_ethtool_ops; 1201 dev->needs_free_netdev = true; 1202 1203 SET_NETDEV_DEVTYPE(dev, &geneve_type); 1204 1205 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; 1206 dev->features |= NETIF_F_RXCSUM; 1207 dev->features |= NETIF_F_GSO_SOFTWARE; 1208 1209 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; 1210 dev->hw_features |= NETIF_F_RXCSUM; 1211 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 1212 1213 dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; 1214 /* MTU range: 68 - (something less than 65535) */ 1215 dev->min_mtu = ETH_MIN_MTU; 1216 /* The max_mtu calculation does not take account of GENEVE 1217 * options, to avoid excluding potentially valid 1218 * configurations. This will be further reduced by IPvX hdr size. 1219 */ 1220 dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len; 1221 1222 netif_keep_dst(dev); 1223 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1224 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; 1225 dev->lltx = true; 1226 eth_hw_addr_random(dev); 1227 } 1228 1229 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { 1230 [IFLA_GENEVE_UNSPEC] = { .strict_start_type = IFLA_GENEVE_INNER_PROTO_INHERIT }, 1231 [IFLA_GENEVE_ID] = { .type = NLA_U32 }, 1232 [IFLA_GENEVE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) }, 1233 [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) }, 1234 [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, 1235 [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, 1236 [IFLA_GENEVE_LABEL] = { .type = NLA_U32 }, 1237 [IFLA_GENEVE_PORT] = { .type = NLA_U16 }, 1238 [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG }, 1239 [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 }, 1240 [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 }, 1241 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, 1242 [IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 }, 1243 [IFLA_GENEVE_DF] = { .type = NLA_U8 }, 1244 [IFLA_GENEVE_INNER_PROTO_INHERIT] = { .type = NLA_FLAG }, 1245 [IFLA_GENEVE_PORT_RANGE] = NLA_POLICY_EXACT_LEN(sizeof(struct ifla_geneve_port_range)), 1246 }; 1247 1248 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[], 1249 struct netlink_ext_ack *extack) 1250 { 1251 if (tb[IFLA_ADDRESS]) { 1252 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { 1253 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], 1254 "Provided link layer address is not Ethernet"); 1255 return -EINVAL; 1256 } 1257 1258 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { 1259 NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], 1260 "Provided Ethernet address is not unicast"); 1261 return -EADDRNOTAVAIL; 1262 } 1263 } 1264 1265 if (!data) { 1266 NL_SET_ERR_MSG(extack, 1267 "Not enough attributes provided to perform the operation"); 1268 return -EINVAL; 1269 } 1270 1271 if (data[IFLA_GENEVE_ID]) { 1272 __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 1273 1274 if (vni >= GENEVE_N_VID) { 1275 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_ID], 1276 "Geneve ID must be lower than 16777216"); 1277 return -ERANGE; 1278 } 1279 } 1280 1281 if (data[IFLA_GENEVE_DF]) { 1282 enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]); 1283 1284 if (df < 0 || df > GENEVE_DF_MAX) { 1285 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_DF], 1286 "Invalid DF attribute"); 1287 return -EINVAL; 1288 } 1289 } 1290 1291 if (data[IFLA_GENEVE_PORT_RANGE]) { 1292 const struct ifla_geneve_port_range *p; 1293 1294 p = nla_data(data[IFLA_GENEVE_PORT_RANGE]); 1295 if (ntohs(p->high) < ntohs(p->low)) { 1296 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_PORT_RANGE], 1297 "Invalid source port range"); 1298 return -EINVAL; 1299 } 1300 } 1301 1302 return 0; 1303 } 1304 1305 static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, 1306 const struct ip_tunnel_info *info, 1307 bool *tun_on_same_port, 1308 bool *tun_collect_md) 1309 { 1310 struct geneve_dev *geneve, *t = NULL; 1311 1312 *tun_on_same_port = false; 1313 *tun_collect_md = false; 1314 list_for_each_entry(geneve, &gn->geneve_list, next) { 1315 if (info->key.tp_dst == geneve->cfg.info.key.tp_dst) { 1316 *tun_collect_md = geneve->cfg.collect_md; 1317 *tun_on_same_port = true; 1318 } 1319 if (info->key.tun_id == geneve->cfg.info.key.tun_id && 1320 info->key.tp_dst == geneve->cfg.info.key.tp_dst && 1321 !memcmp(&info->key.u, &geneve->cfg.info.key.u, sizeof(info->key.u))) 1322 t = geneve; 1323 } 1324 return t; 1325 } 1326 1327 static bool is_tnl_info_zero(const struct ip_tunnel_info *info) 1328 { 1329 return !(info->key.tun_id || info->key.tos || 1330 !ip_tunnel_flags_empty(info->key.tun_flags) || 1331 info->key.ttl || info->key.label || info->key.tp_src || 1332 memchr_inv(&info->key.u, 0, sizeof(info->key.u))); 1333 } 1334 1335 static bool geneve_dst_addr_equal(struct ip_tunnel_info *a, 1336 struct ip_tunnel_info *b) 1337 { 1338 if (ip_tunnel_info_af(a) == AF_INET) 1339 return a->key.u.ipv4.dst == b->key.u.ipv4.dst; 1340 else 1341 return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst); 1342 } 1343 1344 static int geneve_configure(struct net *net, struct net_device *dev, 1345 struct netlink_ext_ack *extack, 1346 const struct geneve_config *cfg) 1347 { 1348 struct geneve_net *gn = net_generic(net, geneve_net_id); 1349 struct geneve_dev *t, *geneve = netdev_priv(dev); 1350 const struct ip_tunnel_info *info = &cfg->info; 1351 bool tun_collect_md, tun_on_same_port; 1352 int err, encap_len; 1353 1354 if (cfg->collect_md && !is_tnl_info_zero(info)) { 1355 NL_SET_ERR_MSG(extack, 1356 "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified"); 1357 return -EINVAL; 1358 } 1359 1360 geneve->net = net; 1361 geneve->dev = dev; 1362 1363 t = geneve_find_dev(gn, info, &tun_on_same_port, &tun_collect_md); 1364 if (t) 1365 return -EBUSY; 1366 1367 /* make enough headroom for basic scenario */ 1368 encap_len = GENEVE_BASE_HLEN + ETH_HLEN; 1369 if (!cfg->collect_md && ip_tunnel_info_af(info) == AF_INET) { 1370 encap_len += sizeof(struct iphdr); 1371 dev->max_mtu -= sizeof(struct iphdr); 1372 } else { 1373 encap_len += sizeof(struct ipv6hdr); 1374 dev->max_mtu -= sizeof(struct ipv6hdr); 1375 } 1376 dev->needed_headroom = encap_len + ETH_HLEN; 1377 1378 if (cfg->collect_md) { 1379 if (tun_on_same_port) { 1380 NL_SET_ERR_MSG(extack, 1381 "There can be only one externally controlled device on a destination port"); 1382 return -EPERM; 1383 } 1384 } else { 1385 if (tun_collect_md) { 1386 NL_SET_ERR_MSG(extack, 1387 "There already exists an externally controlled device on this destination port"); 1388 return -EPERM; 1389 } 1390 } 1391 1392 dst_cache_reset(&geneve->cfg.info.dst_cache); 1393 memcpy(&geneve->cfg, cfg, sizeof(*cfg)); 1394 1395 if (geneve->cfg.inner_proto_inherit) { 1396 dev->header_ops = NULL; 1397 dev->type = ARPHRD_NONE; 1398 dev->hard_header_len = 0; 1399 dev->addr_len = 0; 1400 dev->flags = IFF_POINTOPOINT | IFF_NOARP; 1401 } 1402 1403 err = register_netdevice(dev); 1404 if (err) 1405 return err; 1406 1407 list_add(&geneve->next, &gn->geneve_list); 1408 return 0; 1409 } 1410 1411 static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port) 1412 { 1413 memset(info, 0, sizeof(*info)); 1414 info->key.tp_dst = htons(dst_port); 1415 } 1416 1417 static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], 1418 struct netlink_ext_ack *extack, 1419 struct geneve_config *cfg, bool changelink) 1420 { 1421 struct ip_tunnel_info *info = &cfg->info; 1422 int attrtype; 1423 1424 if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) { 1425 NL_SET_ERR_MSG(extack, 1426 "Cannot specify both IPv4 and IPv6 Remote addresses"); 1427 return -EINVAL; 1428 } 1429 1430 if (data[IFLA_GENEVE_REMOTE]) { 1431 if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) { 1432 attrtype = IFLA_GENEVE_REMOTE; 1433 goto change_notsup; 1434 } 1435 1436 info->key.u.ipv4.dst = 1437 nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); 1438 1439 if (ipv4_is_multicast(info->key.u.ipv4.dst)) { 1440 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE], 1441 "Remote IPv4 address cannot be Multicast"); 1442 return -EINVAL; 1443 } 1444 } 1445 1446 if (data[IFLA_GENEVE_REMOTE6]) { 1447 #if IS_ENABLED(CONFIG_IPV6) 1448 if (changelink && (ip_tunnel_info_af(info) == AF_INET)) { 1449 attrtype = IFLA_GENEVE_REMOTE6; 1450 goto change_notsup; 1451 } 1452 1453 info->mode = IP_TUNNEL_INFO_IPV6; 1454 info->key.u.ipv6.dst = 1455 nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]); 1456 1457 if (ipv6_addr_type(&info->key.u.ipv6.dst) & 1458 IPV6_ADDR_LINKLOCAL) { 1459 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 1460 "Remote IPv6 address cannot be link-local"); 1461 return -EINVAL; 1462 } 1463 if (ipv6_addr_is_multicast(&info->key.u.ipv6.dst)) { 1464 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 1465 "Remote IPv6 address cannot be Multicast"); 1466 return -EINVAL; 1467 } 1468 __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 1469 cfg->use_udp6_rx_checksums = true; 1470 #else 1471 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], 1472 "IPv6 support not enabled in the kernel"); 1473 return -EPFNOSUPPORT; 1474 #endif 1475 } 1476 1477 if (data[IFLA_GENEVE_ID]) { 1478 __u32 vni; 1479 __u8 tvni[3]; 1480 __be64 tunid; 1481 1482 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 1483 tvni[0] = (vni & 0x00ff0000) >> 16; 1484 tvni[1] = (vni & 0x0000ff00) >> 8; 1485 tvni[2] = vni & 0x000000ff; 1486 1487 tunid = vni_to_tunnel_id(tvni); 1488 if (changelink && (tunid != info->key.tun_id)) { 1489 attrtype = IFLA_GENEVE_ID; 1490 goto change_notsup; 1491 } 1492 info->key.tun_id = tunid; 1493 } 1494 1495 if (data[IFLA_GENEVE_TTL_INHERIT]) { 1496 if (nla_get_u8(data[IFLA_GENEVE_TTL_INHERIT])) 1497 cfg->ttl_inherit = true; 1498 else 1499 cfg->ttl_inherit = false; 1500 } else if (data[IFLA_GENEVE_TTL]) { 1501 info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); 1502 cfg->ttl_inherit = false; 1503 } 1504 1505 if (data[IFLA_GENEVE_TOS]) 1506 info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]); 1507 1508 if (data[IFLA_GENEVE_DF]) 1509 cfg->df = nla_get_u8(data[IFLA_GENEVE_DF]); 1510 1511 if (data[IFLA_GENEVE_LABEL]) { 1512 info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) & 1513 IPV6_FLOWLABEL_MASK; 1514 if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) { 1515 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LABEL], 1516 "Label attribute only applies for IPv6 Geneve devices"); 1517 return -EINVAL; 1518 } 1519 } 1520 1521 if (data[IFLA_GENEVE_PORT]) { 1522 if (changelink) { 1523 attrtype = IFLA_GENEVE_PORT; 1524 goto change_notsup; 1525 } 1526 info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]); 1527 } 1528 1529 if (data[IFLA_GENEVE_PORT_RANGE]) { 1530 const struct ifla_geneve_port_range *p; 1531 1532 if (changelink) { 1533 attrtype = IFLA_GENEVE_PORT_RANGE; 1534 goto change_notsup; 1535 } 1536 p = nla_data(data[IFLA_GENEVE_PORT_RANGE]); 1537 cfg->port_min = ntohs(p->low); 1538 cfg->port_max = ntohs(p->high); 1539 } 1540 1541 if (data[IFLA_GENEVE_COLLECT_METADATA]) { 1542 if (changelink) { 1543 attrtype = IFLA_GENEVE_COLLECT_METADATA; 1544 goto change_notsup; 1545 } 1546 cfg->collect_md = true; 1547 } 1548 1549 if (data[IFLA_GENEVE_UDP_CSUM]) { 1550 if (changelink) { 1551 attrtype = IFLA_GENEVE_UDP_CSUM; 1552 goto change_notsup; 1553 } 1554 if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) 1555 __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 1556 } 1557 1558 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) { 1559 #if IS_ENABLED(CONFIG_IPV6) 1560 if (changelink) { 1561 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_TX; 1562 goto change_notsup; 1563 } 1564 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) 1565 __clear_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); 1566 #else 1567 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX], 1568 "IPv6 support not enabled in the kernel"); 1569 return -EPFNOSUPPORT; 1570 #endif 1571 } 1572 1573 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) { 1574 #if IS_ENABLED(CONFIG_IPV6) 1575 if (changelink) { 1576 attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_RX; 1577 goto change_notsup; 1578 } 1579 if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])) 1580 cfg->use_udp6_rx_checksums = false; 1581 #else 1582 NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX], 1583 "IPv6 support not enabled in the kernel"); 1584 return -EPFNOSUPPORT; 1585 #endif 1586 } 1587 1588 if (data[IFLA_GENEVE_INNER_PROTO_INHERIT]) { 1589 if (changelink) { 1590 attrtype = IFLA_GENEVE_INNER_PROTO_INHERIT; 1591 goto change_notsup; 1592 } 1593 cfg->inner_proto_inherit = true; 1594 } 1595 1596 return 0; 1597 change_notsup: 1598 NL_SET_ERR_MSG_ATTR(extack, data[attrtype], 1599 "Changing VNI, Port, endpoint IP address family, external, inner_proto_inherit, and UDP checksum attributes are not supported"); 1600 return -EOPNOTSUPP; 1601 } 1602 1603 static void geneve_link_config(struct net_device *dev, 1604 struct ip_tunnel_info *info, struct nlattr *tb[]) 1605 { 1606 struct geneve_dev *geneve = netdev_priv(dev); 1607 int ldev_mtu = 0; 1608 1609 if (tb[IFLA_MTU]) { 1610 geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); 1611 return; 1612 } 1613 1614 switch (ip_tunnel_info_af(info)) { 1615 case AF_INET: { 1616 struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst }; 1617 struct rtable *rt = ip_route_output_key(geneve->net, &fl4); 1618 1619 if (!IS_ERR(rt) && rt->dst.dev) { 1620 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN; 1621 ip_rt_put(rt); 1622 } 1623 break; 1624 } 1625 #if IS_ENABLED(CONFIG_IPV6) 1626 case AF_INET6: { 1627 struct rt6_info *rt; 1628 1629 if (!__in6_dev_get(dev)) 1630 break; 1631 1632 rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0, 1633 NULL, 0); 1634 1635 if (rt && rt->dst.dev) 1636 ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN; 1637 ip6_rt_put(rt); 1638 break; 1639 } 1640 #endif 1641 } 1642 1643 if (ldev_mtu <= 0) 1644 return; 1645 1646 geneve_change_mtu(dev, ldev_mtu - info->options_len); 1647 } 1648 1649 static int geneve_newlink(struct net_device *dev, 1650 struct rtnl_newlink_params *params, 1651 struct netlink_ext_ack *extack) 1652 { 1653 struct net *link_net = rtnl_newlink_link_net(params); 1654 struct nlattr **data = params->data; 1655 struct nlattr **tb = params->tb; 1656 struct geneve_config cfg = { 1657 .df = GENEVE_DF_UNSET, 1658 .use_udp6_rx_checksums = false, 1659 .ttl_inherit = false, 1660 .collect_md = false, 1661 .port_min = 1, 1662 .port_max = USHRT_MAX, 1663 }; 1664 int err; 1665 1666 init_tnl_info(&cfg.info, GENEVE_UDP_PORT); 1667 err = geneve_nl2info(tb, data, extack, &cfg, false); 1668 if (err) 1669 return err; 1670 1671 err = geneve_configure(link_net, dev, extack, &cfg); 1672 if (err) 1673 return err; 1674 1675 geneve_link_config(dev, &cfg.info, tb); 1676 1677 return 0; 1678 } 1679 1680 /* Quiesces the geneve device data path for both TX and RX. 1681 * 1682 * On transmit geneve checks for non-NULL geneve_sock before it proceeds. 1683 * So, if we set that socket to NULL under RCU and wait for synchronize_net() 1684 * to complete for the existing set of in-flight packets to be transmitted, 1685 * then we would have quiesced the transmit data path. All the future packets 1686 * will get dropped until we unquiesce the data path. 1687 * 1688 * On receive geneve dereference the geneve_sock stashed in the socket. So, 1689 * if we set that to NULL under RCU and wait for synchronize_net() to 1690 * complete, then we would have quiesced the receive data path. 1691 */ 1692 static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4, 1693 struct geneve_sock **gs6) 1694 { 1695 *gs4 = rtnl_dereference(geneve->sock4); 1696 rcu_assign_pointer(geneve->sock4, NULL); 1697 if (*gs4) 1698 rcu_assign_sk_user_data((*gs4)->sock->sk, NULL); 1699 #if IS_ENABLED(CONFIG_IPV6) 1700 *gs6 = rtnl_dereference(geneve->sock6); 1701 rcu_assign_pointer(geneve->sock6, NULL); 1702 if (*gs6) 1703 rcu_assign_sk_user_data((*gs6)->sock->sk, NULL); 1704 #else 1705 *gs6 = NULL; 1706 #endif 1707 synchronize_net(); 1708 } 1709 1710 /* Resumes the geneve device data path for both TX and RX. */ 1711 static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4, 1712 struct geneve_sock __maybe_unused *gs6) 1713 { 1714 rcu_assign_pointer(geneve->sock4, gs4); 1715 if (gs4) 1716 rcu_assign_sk_user_data(gs4->sock->sk, gs4); 1717 #if IS_ENABLED(CONFIG_IPV6) 1718 rcu_assign_pointer(geneve->sock6, gs6); 1719 if (gs6) 1720 rcu_assign_sk_user_data(gs6->sock->sk, gs6); 1721 #endif 1722 synchronize_net(); 1723 } 1724 1725 static int geneve_changelink(struct net_device *dev, struct nlattr *tb[], 1726 struct nlattr *data[], 1727 struct netlink_ext_ack *extack) 1728 { 1729 struct geneve_dev *geneve = netdev_priv(dev); 1730 struct geneve_sock *gs4, *gs6; 1731 struct geneve_config cfg; 1732 int err; 1733 1734 /* If the geneve device is configured for metadata (or externally 1735 * controlled, for example, OVS), then nothing can be changed. 1736 */ 1737 if (geneve->cfg.collect_md) 1738 return -EOPNOTSUPP; 1739 1740 /* Start with the existing info. */ 1741 memcpy(&cfg, &geneve->cfg, sizeof(cfg)); 1742 err = geneve_nl2info(tb, data, extack, &cfg, true); 1743 if (err) 1744 return err; 1745 1746 if (!geneve_dst_addr_equal(&geneve->cfg.info, &cfg.info)) { 1747 dst_cache_reset(&cfg.info.dst_cache); 1748 geneve_link_config(dev, &cfg.info, tb); 1749 } 1750 1751 geneve_quiesce(geneve, &gs4, &gs6); 1752 memcpy(&geneve->cfg, &cfg, sizeof(cfg)); 1753 geneve_unquiesce(geneve, gs4, gs6); 1754 1755 return 0; 1756 } 1757 1758 static void geneve_dellink(struct net_device *dev, struct list_head *head) 1759 { 1760 struct geneve_dev *geneve = netdev_priv(dev); 1761 1762 list_del(&geneve->next); 1763 unregister_netdevice_queue(dev, head); 1764 } 1765 1766 static size_t geneve_get_size(const struct net_device *dev) 1767 { 1768 return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */ 1769 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */ 1770 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */ 1771 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */ 1772 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_DF */ 1773 nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */ 1774 nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */ 1775 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */ 1776 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */ 1777 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */ 1778 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */ 1779 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */ 1780 nla_total_size(0) + /* IFLA_GENEVE_INNER_PROTO_INHERIT */ 1781 nla_total_size(sizeof(struct ifla_geneve_port_range)) + /* IFLA_GENEVE_PORT_RANGE */ 1782 0; 1783 } 1784 1785 static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) 1786 { 1787 struct geneve_dev *geneve = netdev_priv(dev); 1788 struct ip_tunnel_info *info = &geneve->cfg.info; 1789 bool ttl_inherit = geneve->cfg.ttl_inherit; 1790 bool metadata = geneve->cfg.collect_md; 1791 struct ifla_geneve_port_range ports = { 1792 .low = htons(geneve->cfg.port_min), 1793 .high = htons(geneve->cfg.port_max), 1794 }; 1795 __u8 tmp_vni[3]; 1796 __u32 vni; 1797 1798 tunnel_id_to_vni(info->key.tun_id, tmp_vni); 1799 vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2]; 1800 if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) 1801 goto nla_put_failure; 1802 1803 if (!metadata && ip_tunnel_info_af(info) == AF_INET) { 1804 if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, 1805 info->key.u.ipv4.dst)) 1806 goto nla_put_failure; 1807 if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, 1808 test_bit(IP_TUNNEL_CSUM_BIT, 1809 info->key.tun_flags))) 1810 goto nla_put_failure; 1811 1812 #if IS_ENABLED(CONFIG_IPV6) 1813 } else if (!metadata) { 1814 if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6, 1815 &info->key.u.ipv6.dst)) 1816 goto nla_put_failure; 1817 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, 1818 !test_bit(IP_TUNNEL_CSUM_BIT, 1819 info->key.tun_flags))) 1820 goto nla_put_failure; 1821 #endif 1822 } 1823 1824 if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) || 1825 nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) || 1826 nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label)) 1827 goto nla_put_failure; 1828 1829 if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->cfg.df)) 1830 goto nla_put_failure; 1831 1832 if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst)) 1833 goto nla_put_failure; 1834 1835 if (metadata && nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA)) 1836 goto nla_put_failure; 1837 1838 #if IS_ENABLED(CONFIG_IPV6) 1839 if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, 1840 !geneve->cfg.use_udp6_rx_checksums)) 1841 goto nla_put_failure; 1842 #endif 1843 1844 if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit)) 1845 goto nla_put_failure; 1846 1847 if (geneve->cfg.inner_proto_inherit && 1848 nla_put_flag(skb, IFLA_GENEVE_INNER_PROTO_INHERIT)) 1849 goto nla_put_failure; 1850 1851 if (nla_put(skb, IFLA_GENEVE_PORT_RANGE, sizeof(ports), &ports)) 1852 goto nla_put_failure; 1853 1854 return 0; 1855 1856 nla_put_failure: 1857 return -EMSGSIZE; 1858 } 1859 1860 static struct rtnl_link_ops geneve_link_ops __read_mostly = { 1861 .kind = "geneve", 1862 .maxtype = IFLA_GENEVE_MAX, 1863 .policy = geneve_policy, 1864 .priv_size = sizeof(struct geneve_dev), 1865 .setup = geneve_setup, 1866 .validate = geneve_validate, 1867 .newlink = geneve_newlink, 1868 .changelink = geneve_changelink, 1869 .dellink = geneve_dellink, 1870 .get_size = geneve_get_size, 1871 .fill_info = geneve_fill_info, 1872 }; 1873 1874 struct net_device *geneve_dev_create_fb(struct net *net, const char *name, 1875 u8 name_assign_type, u16 dst_port) 1876 { 1877 struct nlattr *tb[IFLA_MAX + 1]; 1878 struct net_device *dev; 1879 LIST_HEAD(list_kill); 1880 int err; 1881 struct geneve_config cfg = { 1882 .df = GENEVE_DF_UNSET, 1883 .use_udp6_rx_checksums = true, 1884 .ttl_inherit = false, 1885 .collect_md = true, 1886 .port_min = 1, 1887 .port_max = USHRT_MAX, 1888 }; 1889 1890 memset(tb, 0, sizeof(tb)); 1891 dev = rtnl_create_link(net, name, name_assign_type, 1892 &geneve_link_ops, tb, NULL); 1893 if (IS_ERR(dev)) 1894 return dev; 1895 1896 init_tnl_info(&cfg.info, dst_port); 1897 err = geneve_configure(net, dev, NULL, &cfg); 1898 if (err) { 1899 free_netdev(dev); 1900 return ERR_PTR(err); 1901 } 1902 1903 /* openvswitch users expect packet sizes to be unrestricted, 1904 * so set the largest MTU we can. 1905 */ 1906 err = geneve_change_mtu(dev, IP_MAX_MTU); 1907 if (err) 1908 goto err; 1909 1910 err = rtnl_configure_link(dev, NULL, 0, NULL); 1911 if (err < 0) 1912 goto err; 1913 1914 return dev; 1915 err: 1916 geneve_dellink(dev, &list_kill); 1917 unregister_netdevice_many(&list_kill); 1918 return ERR_PTR(err); 1919 } 1920 EXPORT_SYMBOL_GPL(geneve_dev_create_fb); 1921 1922 static int geneve_netdevice_event(struct notifier_block *unused, 1923 unsigned long event, void *ptr) 1924 { 1925 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1926 1927 if (event == NETDEV_UDP_TUNNEL_PUSH_INFO) 1928 geneve_offload_rx_ports(dev, true); 1929 else if (event == NETDEV_UDP_TUNNEL_DROP_INFO) 1930 geneve_offload_rx_ports(dev, false); 1931 1932 return NOTIFY_DONE; 1933 } 1934 1935 static struct notifier_block geneve_notifier_block __read_mostly = { 1936 .notifier_call = geneve_netdevice_event, 1937 }; 1938 1939 static __net_init int geneve_init_net(struct net *net) 1940 { 1941 struct geneve_net *gn = net_generic(net, geneve_net_id); 1942 1943 INIT_LIST_HEAD(&gn->geneve_list); 1944 INIT_LIST_HEAD(&gn->sock_list); 1945 return 0; 1946 } 1947 1948 static void geneve_destroy_tunnels(struct net *net, struct list_head *head) 1949 { 1950 struct geneve_net *gn = net_generic(net, geneve_net_id); 1951 struct geneve_dev *geneve, *next; 1952 1953 list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) 1954 geneve_dellink(geneve->dev, head); 1955 } 1956 1957 static void __net_exit geneve_exit_batch_rtnl(struct list_head *net_list, 1958 struct list_head *dev_to_kill) 1959 { 1960 struct net *net; 1961 1962 list_for_each_entry(net, net_list, exit_list) 1963 geneve_destroy_tunnels(net, dev_to_kill); 1964 } 1965 1966 static void __net_exit geneve_exit_net(struct net *net) 1967 { 1968 const struct geneve_net *gn = net_generic(net, geneve_net_id); 1969 1970 WARN_ON_ONCE(!list_empty(&gn->sock_list)); 1971 } 1972 1973 static struct pernet_operations geneve_net_ops = { 1974 .init = geneve_init_net, 1975 .exit_batch_rtnl = geneve_exit_batch_rtnl, 1976 .exit = geneve_exit_net, 1977 .id = &geneve_net_id, 1978 .size = sizeof(struct geneve_net), 1979 }; 1980 1981 static int __init geneve_init_module(void) 1982 { 1983 int rc; 1984 1985 rc = register_pernet_subsys(&geneve_net_ops); 1986 if (rc) 1987 goto out1; 1988 1989 rc = register_netdevice_notifier(&geneve_notifier_block); 1990 if (rc) 1991 goto out2; 1992 1993 rc = rtnl_link_register(&geneve_link_ops); 1994 if (rc) 1995 goto out3; 1996 1997 return 0; 1998 out3: 1999 unregister_netdevice_notifier(&geneve_notifier_block); 2000 out2: 2001 unregister_pernet_subsys(&geneve_net_ops); 2002 out1: 2003 return rc; 2004 } 2005 late_initcall(geneve_init_module); 2006 2007 static void __exit geneve_cleanup_module(void) 2008 { 2009 rtnl_link_unregister(&geneve_link_ops); 2010 unregister_netdevice_notifier(&geneve_notifier_block); 2011 unregister_pernet_subsys(&geneve_net_ops); 2012 } 2013 module_exit(geneve_cleanup_module); 2014 2015 MODULE_LICENSE("GPL"); 2016 MODULE_VERSION(GENEVE_NETDEV_VER); 2017 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>"); 2018 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic"); 2019 MODULE_ALIAS_RTNL_LINK("geneve"); 2020