1 /* 2 * GENEVE: Generic Network Virtualization Encapsulation 3 * 4 * Copyright (c) 2015 Red Hat, Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 12 13 #include <linux/kernel.h> 14 #include <linux/module.h> 15 #include <linux/netdevice.h> 16 #include <linux/etherdevice.h> 17 #include <linux/hash.h> 18 #include <net/dst_metadata.h> 19 #include <net/gro_cells.h> 20 #include <net/rtnetlink.h> 21 #include <net/geneve.h> 22 #include <net/protocol.h> 23 24 #define GENEVE_NETDEV_VER "0.6" 25 26 #define GENEVE_UDP_PORT 6081 27 28 #define GENEVE_N_VID (1u << 24) 29 #define GENEVE_VID_MASK (GENEVE_N_VID - 1) 30 31 #define VNI_HASH_BITS 10 32 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) 33 34 static bool log_ecn_error = true; 35 module_param(log_ecn_error, bool, 0644); 36 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 37 38 #define GENEVE_VER 0 39 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) 40 41 /* per-network namespace private data for this module */ 42 struct geneve_net { 43 struct list_head geneve_list; 44 struct list_head sock_list; 45 }; 46 47 static int geneve_net_id; 48 49 union geneve_addr { 50 struct sockaddr_in sin; 51 struct sockaddr_in6 sin6; 52 struct sockaddr sa; 53 }; 54 55 static union geneve_addr geneve_remote_unspec = { .sa.sa_family = AF_UNSPEC, }; 56 57 /* Pseudo network device */ 58 struct geneve_dev { 59 struct hlist_node hlist; /* vni hash table */ 60 struct net *net; /* netns for packet i/o */ 61 struct net_device *dev; /* netdev for geneve tunnel */ 62 struct geneve_sock *sock4; /* IPv4 socket used for geneve tunnel */ 63 #if IS_ENABLED(CONFIG_IPV6) 64 struct geneve_sock *sock6; /* IPv6 socket used for geneve tunnel */ 65 #endif 66 u8 vni[3]; /* virtual network ID for tunnel */ 67 u8 ttl; /* TTL override */ 68 u8 tos; /* TOS override */ 69 union geneve_addr remote; /* IP address for link partner */ 70 struct list_head next; /* geneve's per namespace list */ 71 __be16 dst_port; 72 bool collect_md; 73 struct gro_cells gro_cells; 74 u32 flags; 75 struct dst_cache dst_cache; 76 }; 77 78 /* Geneve device flags */ 79 #define GENEVE_F_UDP_ZERO_CSUM_TX BIT(0) 80 #define GENEVE_F_UDP_ZERO_CSUM6_TX BIT(1) 81 #define GENEVE_F_UDP_ZERO_CSUM6_RX BIT(2) 82 83 struct geneve_sock { 84 bool collect_md; 85 struct list_head list; 86 struct socket *sock; 87 struct rcu_head rcu; 88 int refcnt; 89 struct udp_offload udp_offloads; 90 struct hlist_head vni_list[VNI_HASH_SIZE]; 91 u32 flags; 92 }; 93 94 static inline __u32 geneve_net_vni_hash(u8 vni[3]) 95 { 96 __u32 vnid; 97 98 vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2]; 99 return hash_32(vnid, VNI_HASH_BITS); 100 } 101 102 static __be64 vni_to_tunnel_id(const __u8 *vni) 103 { 104 #ifdef __BIG_ENDIAN 105 return (vni[0] << 16) | (vni[1] << 8) | vni[2]; 106 #else 107 return (__force __be64)(((__force u64)vni[0] << 40) | 108 ((__force u64)vni[1] << 48) | 109 ((__force u64)vni[2] << 56)); 110 #endif 111 } 112 113 static sa_family_t geneve_get_sk_family(struct geneve_sock *gs) 114 { 115 return gs->sock->sk->sk_family; 116 } 117 118 static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, 119 __be32 addr, u8 vni[]) 120 { 121 struct hlist_head *vni_list_head; 122 struct geneve_dev *geneve; 123 __u32 hash; 124 125 /* Find the device for this VNI */ 126 hash = geneve_net_vni_hash(vni); 127 vni_list_head = &gs->vni_list[hash]; 128 hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) { 129 if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) && 130 addr == geneve->remote.sin.sin_addr.s_addr) 131 return geneve; 132 } 133 return NULL; 134 } 135 136 #if IS_ENABLED(CONFIG_IPV6) 137 static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs, 138 struct in6_addr addr6, u8 vni[]) 139 { 140 struct hlist_head *vni_list_head; 141 struct geneve_dev *geneve; 142 __u32 hash; 143 144 /* Find the device for this VNI */ 145 hash = geneve_net_vni_hash(vni); 146 vni_list_head = &gs->vni_list[hash]; 147 hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) { 148 if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) && 149 ipv6_addr_equal(&addr6, &geneve->remote.sin6.sin6_addr)) 150 return geneve; 151 } 152 return NULL; 153 } 154 #endif 155 156 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) 157 { 158 return (struct genevehdr *)(udp_hdr(skb) + 1); 159 } 160 161 static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs, 162 struct sk_buff *skb) 163 { 164 u8 *vni; 165 __be32 addr; 166 static u8 zero_vni[3]; 167 #if IS_ENABLED(CONFIG_IPV6) 168 static struct in6_addr zero_addr6; 169 #endif 170 171 if (geneve_get_sk_family(gs) == AF_INET) { 172 struct iphdr *iph; 173 174 iph = ip_hdr(skb); /* outer IP header... */ 175 176 if (gs->collect_md) { 177 vni = zero_vni; 178 addr = 0; 179 } else { 180 vni = geneve_hdr(skb)->vni; 181 addr = iph->saddr; 182 } 183 184 return geneve_lookup(gs, addr, vni); 185 #if IS_ENABLED(CONFIG_IPV6) 186 } else if (geneve_get_sk_family(gs) == AF_INET6) { 187 struct ipv6hdr *ip6h; 188 struct in6_addr addr6; 189 190 ip6h = ipv6_hdr(skb); /* outer IPv6 header... */ 191 192 if (gs->collect_md) { 193 vni = zero_vni; 194 addr6 = zero_addr6; 195 } else { 196 vni = geneve_hdr(skb)->vni; 197 addr6 = ip6h->saddr; 198 } 199 200 return geneve6_lookup(gs, addr6, vni); 201 #endif 202 } 203 return NULL; 204 } 205 206 /* geneve receive/decap routine */ 207 static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, 208 struct sk_buff *skb) 209 { 210 struct genevehdr *gnvh = geneve_hdr(skb); 211 struct metadata_dst *tun_dst = NULL; 212 struct pcpu_sw_netstats *stats; 213 int err = 0; 214 void *oiph; 215 216 if (ip_tunnel_collect_metadata() || gs->collect_md) { 217 __be16 flags; 218 219 flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | 220 (gnvh->oam ? TUNNEL_OAM : 0) | 221 (gnvh->critical ? TUNNEL_CRIT_OPT : 0); 222 223 tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, 224 vni_to_tunnel_id(gnvh->vni), 225 gnvh->opt_len * 4); 226 if (!tun_dst) 227 goto drop; 228 /* Update tunnel dst according to Geneve options. */ 229 ip_tunnel_info_opts_set(&tun_dst->u.tun_info, 230 gnvh->options, gnvh->opt_len * 4); 231 } else { 232 /* Drop packets w/ critical options, 233 * since we don't support any... 234 */ 235 if (gnvh->critical) 236 goto drop; 237 } 238 239 skb_reset_mac_header(skb); 240 skb->protocol = eth_type_trans(skb, geneve->dev); 241 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 242 243 if (tun_dst) 244 skb_dst_set(skb, &tun_dst->dst); 245 246 /* Ignore packet loops (and multicast echo) */ 247 if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) 248 goto drop; 249 250 oiph = skb_network_header(skb); 251 skb_reset_network_header(skb); 252 253 if (geneve_get_sk_family(gs) == AF_INET) 254 err = IP_ECN_decapsulate(oiph, skb); 255 #if IS_ENABLED(CONFIG_IPV6) 256 else 257 err = IP6_ECN_decapsulate(oiph, skb); 258 #endif 259 260 if (unlikely(err)) { 261 if (log_ecn_error) { 262 if (geneve_get_sk_family(gs) == AF_INET) 263 net_info_ratelimited("non-ECT from %pI4 " 264 "with TOS=%#x\n", 265 &((struct iphdr *)oiph)->saddr, 266 ((struct iphdr *)oiph)->tos); 267 #if IS_ENABLED(CONFIG_IPV6) 268 else 269 net_info_ratelimited("non-ECT from %pI6\n", 270 &((struct ipv6hdr *)oiph)->saddr); 271 #endif 272 } 273 if (err > 1) { 274 ++geneve->dev->stats.rx_frame_errors; 275 ++geneve->dev->stats.rx_errors; 276 goto drop; 277 } 278 } 279 280 stats = this_cpu_ptr(geneve->dev->tstats); 281 u64_stats_update_begin(&stats->syncp); 282 stats->rx_packets++; 283 stats->rx_bytes += skb->len; 284 u64_stats_update_end(&stats->syncp); 285 286 gro_cells_receive(&geneve->gro_cells, skb); 287 return; 288 drop: 289 /* Consume bad packet */ 290 kfree_skb(skb); 291 } 292 293 /* Setup stats when device is created */ 294 static int geneve_init(struct net_device *dev) 295 { 296 struct geneve_dev *geneve = netdev_priv(dev); 297 int err; 298 299 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 300 if (!dev->tstats) 301 return -ENOMEM; 302 303 err = gro_cells_init(&geneve->gro_cells, dev); 304 if (err) { 305 free_percpu(dev->tstats); 306 return err; 307 } 308 309 err = dst_cache_init(&geneve->dst_cache, GFP_KERNEL); 310 if (err) { 311 free_percpu(dev->tstats); 312 gro_cells_destroy(&geneve->gro_cells); 313 return err; 314 } 315 316 return 0; 317 } 318 319 static void geneve_uninit(struct net_device *dev) 320 { 321 struct geneve_dev *geneve = netdev_priv(dev); 322 323 dst_cache_destroy(&geneve->dst_cache); 324 gro_cells_destroy(&geneve->gro_cells); 325 free_percpu(dev->tstats); 326 } 327 328 /* Callback from net/ipv4/udp.c to receive packets */ 329 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) 330 { 331 struct genevehdr *geneveh; 332 struct geneve_dev *geneve; 333 struct geneve_sock *gs; 334 int opts_len; 335 336 /* Need Geneve and inner Ethernet header to be present */ 337 if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) 338 goto error; 339 340 /* Return packets with reserved bits set */ 341 geneveh = geneve_hdr(skb); 342 if (unlikely(geneveh->ver != GENEVE_VER)) 343 goto error; 344 345 if (unlikely(geneveh->proto_type != htons(ETH_P_TEB))) 346 goto error; 347 348 gs = rcu_dereference_sk_user_data(sk); 349 if (!gs) 350 goto drop; 351 352 geneve = geneve_lookup_skb(gs, skb); 353 if (!geneve) 354 goto drop; 355 356 opts_len = geneveh->opt_len * 4; 357 if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, 358 htons(ETH_P_TEB), 359 !net_eq(geneve->net, dev_net(geneve->dev)))) 360 goto drop; 361 362 geneve_rx(geneve, gs, skb); 363 return 0; 364 365 drop: 366 /* Consume bad packet */ 367 kfree_skb(skb); 368 return 0; 369 370 error: 371 /* Let the UDP layer deal with the skb */ 372 return 1; 373 } 374 375 static struct socket *geneve_create_sock(struct net *net, bool ipv6, 376 __be16 port, u32 flags) 377 { 378 struct socket *sock; 379 struct udp_port_cfg udp_conf; 380 int err; 381 382 memset(&udp_conf, 0, sizeof(udp_conf)); 383 384 if (ipv6) { 385 udp_conf.family = AF_INET6; 386 udp_conf.ipv6_v6only = 1; 387 udp_conf.use_udp6_rx_checksums = 388 !(flags & GENEVE_F_UDP_ZERO_CSUM6_RX); 389 } else { 390 udp_conf.family = AF_INET; 391 udp_conf.local_ip.s_addr = htonl(INADDR_ANY); 392 } 393 394 udp_conf.local_udp_port = port; 395 396 /* Open UDP socket */ 397 err = udp_sock_create(net, &udp_conf, &sock); 398 if (err < 0) 399 return ERR_PTR(err); 400 401 return sock; 402 } 403 404 static void geneve_notify_add_rx_port(struct geneve_sock *gs) 405 { 406 struct net_device *dev; 407 struct sock *sk = gs->sock->sk; 408 struct net *net = sock_net(sk); 409 sa_family_t sa_family = geneve_get_sk_family(gs); 410 __be16 port = inet_sk(sk)->inet_sport; 411 int err; 412 413 if (sa_family == AF_INET) { 414 err = udp_add_offload(sock_net(sk), &gs->udp_offloads); 415 if (err) 416 pr_warn("geneve: udp_add_offload failed with status %d\n", 417 err); 418 } 419 420 rcu_read_lock(); 421 for_each_netdev_rcu(net, dev) { 422 if (dev->netdev_ops->ndo_add_geneve_port) 423 dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, 424 port); 425 } 426 rcu_read_unlock(); 427 } 428 429 static int geneve_hlen(struct genevehdr *gh) 430 { 431 return sizeof(*gh) + gh->opt_len * 4; 432 } 433 434 static struct sk_buff **geneve_gro_receive(struct sk_buff **head, 435 struct sk_buff *skb, 436 struct udp_offload *uoff) 437 { 438 struct sk_buff *p, **pp = NULL; 439 struct genevehdr *gh, *gh2; 440 unsigned int hlen, gh_len, off_gnv; 441 const struct packet_offload *ptype; 442 __be16 type; 443 int flush = 1; 444 445 off_gnv = skb_gro_offset(skb); 446 hlen = off_gnv + sizeof(*gh); 447 gh = skb_gro_header_fast(skb, off_gnv); 448 if (skb_gro_header_hard(skb, hlen)) { 449 gh = skb_gro_header_slow(skb, hlen, off_gnv); 450 if (unlikely(!gh)) 451 goto out; 452 } 453 454 if (gh->ver != GENEVE_VER || gh->oam) 455 goto out; 456 gh_len = geneve_hlen(gh); 457 458 hlen = off_gnv + gh_len; 459 if (skb_gro_header_hard(skb, hlen)) { 460 gh = skb_gro_header_slow(skb, hlen, off_gnv); 461 if (unlikely(!gh)) 462 goto out; 463 } 464 465 flush = 0; 466 467 for (p = *head; p; p = p->next) { 468 if (!NAPI_GRO_CB(p)->same_flow) 469 continue; 470 471 gh2 = (struct genevehdr *)(p->data + off_gnv); 472 if (gh->opt_len != gh2->opt_len || 473 memcmp(gh, gh2, gh_len)) { 474 NAPI_GRO_CB(p)->same_flow = 0; 475 continue; 476 } 477 } 478 479 type = gh->proto_type; 480 481 rcu_read_lock(); 482 ptype = gro_find_receive_by_type(type); 483 if (!ptype) { 484 flush = 1; 485 goto out_unlock; 486 } 487 488 skb_gro_pull(skb, gh_len); 489 skb_gro_postpull_rcsum(skb, gh, gh_len); 490 pp = ptype->callbacks.gro_receive(head, skb); 491 492 out_unlock: 493 rcu_read_unlock(); 494 out: 495 NAPI_GRO_CB(skb)->flush |= flush; 496 497 return pp; 498 } 499 500 static int geneve_gro_complete(struct sk_buff *skb, int nhoff, 501 struct udp_offload *uoff) 502 { 503 struct genevehdr *gh; 504 struct packet_offload *ptype; 505 __be16 type; 506 int gh_len; 507 int err = -ENOSYS; 508 509 udp_tunnel_gro_complete(skb, nhoff); 510 511 gh = (struct genevehdr *)(skb->data + nhoff); 512 gh_len = geneve_hlen(gh); 513 type = gh->proto_type; 514 515 rcu_read_lock(); 516 ptype = gro_find_complete_by_type(type); 517 if (ptype) 518 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); 519 520 rcu_read_unlock(); 521 return err; 522 } 523 524 /* Create new listen socket if needed */ 525 static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, 526 bool ipv6, u32 flags) 527 { 528 struct geneve_net *gn = net_generic(net, geneve_net_id); 529 struct geneve_sock *gs; 530 struct socket *sock; 531 struct udp_tunnel_sock_cfg tunnel_cfg; 532 int h; 533 534 gs = kzalloc(sizeof(*gs), GFP_KERNEL); 535 if (!gs) 536 return ERR_PTR(-ENOMEM); 537 538 sock = geneve_create_sock(net, ipv6, port, flags); 539 if (IS_ERR(sock)) { 540 kfree(gs); 541 return ERR_CAST(sock); 542 } 543 544 gs->sock = sock; 545 gs->refcnt = 1; 546 for (h = 0; h < VNI_HASH_SIZE; ++h) 547 INIT_HLIST_HEAD(&gs->vni_list[h]); 548 549 /* Initialize the geneve udp offloads structure */ 550 gs->udp_offloads.port = port; 551 gs->udp_offloads.callbacks.gro_receive = geneve_gro_receive; 552 gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete; 553 geneve_notify_add_rx_port(gs); 554 555 /* Mark socket as an encapsulation socket */ 556 tunnel_cfg.sk_user_data = gs; 557 tunnel_cfg.encap_type = 1; 558 tunnel_cfg.encap_rcv = geneve_udp_encap_recv; 559 tunnel_cfg.encap_destroy = NULL; 560 setup_udp_tunnel_sock(net, sock, &tunnel_cfg); 561 list_add(&gs->list, &gn->sock_list); 562 return gs; 563 } 564 565 static void geneve_notify_del_rx_port(struct geneve_sock *gs) 566 { 567 struct net_device *dev; 568 struct sock *sk = gs->sock->sk; 569 struct net *net = sock_net(sk); 570 sa_family_t sa_family = geneve_get_sk_family(gs); 571 __be16 port = inet_sk(sk)->inet_sport; 572 573 rcu_read_lock(); 574 for_each_netdev_rcu(net, dev) { 575 if (dev->netdev_ops->ndo_del_geneve_port) 576 dev->netdev_ops->ndo_del_geneve_port(dev, sa_family, 577 port); 578 } 579 580 rcu_read_unlock(); 581 582 if (sa_family == AF_INET) 583 udp_del_offload(&gs->udp_offloads); 584 } 585 586 static void __geneve_sock_release(struct geneve_sock *gs) 587 { 588 if (!gs || --gs->refcnt) 589 return; 590 591 list_del(&gs->list); 592 geneve_notify_del_rx_port(gs); 593 udp_tunnel_sock_release(gs->sock); 594 kfree_rcu(gs, rcu); 595 } 596 597 static void geneve_sock_release(struct geneve_dev *geneve) 598 { 599 __geneve_sock_release(geneve->sock4); 600 #if IS_ENABLED(CONFIG_IPV6) 601 __geneve_sock_release(geneve->sock6); 602 #endif 603 } 604 605 static struct geneve_sock *geneve_find_sock(struct geneve_net *gn, 606 sa_family_t family, 607 __be16 dst_port) 608 { 609 struct geneve_sock *gs; 610 611 list_for_each_entry(gs, &gn->sock_list, list) { 612 if (inet_sk(gs->sock->sk)->inet_sport == dst_port && 613 geneve_get_sk_family(gs) == family) { 614 return gs; 615 } 616 } 617 return NULL; 618 } 619 620 static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6) 621 { 622 struct net *net = geneve->net; 623 struct geneve_net *gn = net_generic(net, geneve_net_id); 624 struct geneve_sock *gs; 625 __u32 hash; 626 627 gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->dst_port); 628 if (gs) { 629 gs->refcnt++; 630 goto out; 631 } 632 633 gs = geneve_socket_create(net, geneve->dst_port, ipv6, geneve->flags); 634 if (IS_ERR(gs)) 635 return PTR_ERR(gs); 636 637 out: 638 gs->collect_md = geneve->collect_md; 639 gs->flags = geneve->flags; 640 #if IS_ENABLED(CONFIG_IPV6) 641 if (ipv6) 642 geneve->sock6 = gs; 643 else 644 #endif 645 geneve->sock4 = gs; 646 647 hash = geneve_net_vni_hash(geneve->vni); 648 hlist_add_head_rcu(&geneve->hlist, &gs->vni_list[hash]); 649 return 0; 650 } 651 652 static int geneve_open(struct net_device *dev) 653 { 654 struct geneve_dev *geneve = netdev_priv(dev); 655 bool ipv6 = geneve->remote.sa.sa_family == AF_INET6; 656 bool metadata = geneve->collect_md; 657 int ret = 0; 658 659 geneve->sock4 = NULL; 660 #if IS_ENABLED(CONFIG_IPV6) 661 geneve->sock6 = NULL; 662 if (ipv6 || metadata) 663 ret = geneve_sock_add(geneve, true); 664 #endif 665 if (!ret && (!ipv6 || metadata)) 666 ret = geneve_sock_add(geneve, false); 667 if (ret < 0) 668 geneve_sock_release(geneve); 669 670 return ret; 671 } 672 673 static int geneve_stop(struct net_device *dev) 674 { 675 struct geneve_dev *geneve = netdev_priv(dev); 676 677 if (!hlist_unhashed(&geneve->hlist)) 678 hlist_del_rcu(&geneve->hlist); 679 geneve_sock_release(geneve); 680 return 0; 681 } 682 683 static void geneve_build_header(struct genevehdr *geneveh, 684 __be16 tun_flags, u8 vni[3], 685 u8 options_len, u8 *options) 686 { 687 geneveh->ver = GENEVE_VER; 688 geneveh->opt_len = options_len / 4; 689 geneveh->oam = !!(tun_flags & TUNNEL_OAM); 690 geneveh->critical = !!(tun_flags & TUNNEL_CRIT_OPT); 691 geneveh->rsvd1 = 0; 692 memcpy(geneveh->vni, vni, 3); 693 geneveh->proto_type = htons(ETH_P_TEB); 694 geneveh->rsvd2 = 0; 695 696 memcpy(geneveh->options, options, options_len); 697 } 698 699 static int geneve_build_skb(struct rtable *rt, struct sk_buff *skb, 700 __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, 701 u32 flags, bool xnet) 702 { 703 struct genevehdr *gnvh; 704 int min_headroom; 705 int err; 706 bool udp_sum = !(flags & GENEVE_F_UDP_ZERO_CSUM_TX); 707 708 skb_scrub_packet(skb, xnet); 709 710 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len 711 + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr); 712 err = skb_cow_head(skb, min_headroom); 713 if (unlikely(err)) { 714 kfree_skb(skb); 715 goto free_rt; 716 } 717 718 skb = udp_tunnel_handle_offloads(skb, udp_sum); 719 if (IS_ERR(skb)) { 720 err = PTR_ERR(skb); 721 goto free_rt; 722 } 723 724 gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); 725 geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); 726 727 skb_set_inner_protocol(skb, htons(ETH_P_TEB)); 728 return 0; 729 730 free_rt: 731 ip_rt_put(rt); 732 return err; 733 } 734 735 #if IS_ENABLED(CONFIG_IPV6) 736 static int geneve6_build_skb(struct dst_entry *dst, struct sk_buff *skb, 737 __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, 738 u32 flags, bool xnet) 739 { 740 struct genevehdr *gnvh; 741 int min_headroom; 742 int err; 743 bool udp_sum = !(flags & GENEVE_F_UDP_ZERO_CSUM6_TX); 744 745 skb_scrub_packet(skb, xnet); 746 747 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len 748 + GENEVE_BASE_HLEN + opt_len + sizeof(struct ipv6hdr); 749 err = skb_cow_head(skb, min_headroom); 750 if (unlikely(err)) { 751 kfree_skb(skb); 752 goto free_dst; 753 } 754 755 skb = udp_tunnel_handle_offloads(skb, udp_sum); 756 if (IS_ERR(skb)) { 757 err = PTR_ERR(skb); 758 goto free_dst; 759 } 760 761 gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); 762 geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); 763 764 skb_set_inner_protocol(skb, htons(ETH_P_TEB)); 765 return 0; 766 767 free_dst: 768 dst_release(dst); 769 return err; 770 } 771 #endif 772 773 static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, 774 struct net_device *dev, 775 struct flowi4 *fl4, 776 struct ip_tunnel_info *info) 777 { 778 struct geneve_dev *geneve = netdev_priv(dev); 779 struct dst_cache *dst_cache; 780 struct rtable *rt = NULL; 781 bool use_cache = true; 782 __u8 tos; 783 784 memset(fl4, 0, sizeof(*fl4)); 785 fl4->flowi4_mark = skb->mark; 786 fl4->flowi4_proto = IPPROTO_UDP; 787 788 if (info) { 789 fl4->daddr = info->key.u.ipv4.dst; 790 fl4->saddr = info->key.u.ipv4.src; 791 fl4->flowi4_tos = RT_TOS(info->key.tos); 792 dst_cache = &info->dst_cache; 793 } else { 794 tos = geneve->tos; 795 if (tos == 1) { 796 const struct iphdr *iip = ip_hdr(skb); 797 798 tos = ip_tunnel_get_dsfield(iip, skb); 799 use_cache = false; 800 } 801 802 fl4->flowi4_tos = RT_TOS(tos); 803 fl4->daddr = geneve->remote.sin.sin_addr.s_addr; 804 dst_cache = &geneve->dst_cache; 805 } 806 807 use_cache = use_cache && !skb->mark; 808 if (use_cache) { 809 rt = dst_cache_get_ip4(dst_cache, &fl4->saddr); 810 if (rt) 811 return rt; 812 } 813 814 rt = ip_route_output_key(geneve->net, fl4); 815 if (IS_ERR(rt)) { 816 netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr); 817 return ERR_PTR(-ENETUNREACH); 818 } 819 if (rt->dst.dev == dev) { /* is this necessary? */ 820 netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr); 821 ip_rt_put(rt); 822 return ERR_PTR(-ELOOP); 823 } 824 if (use_cache) 825 dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr); 826 return rt; 827 } 828 829 #if IS_ENABLED(CONFIG_IPV6) 830 static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, 831 struct net_device *dev, 832 struct flowi6 *fl6, 833 struct ip_tunnel_info *info) 834 { 835 struct geneve_dev *geneve = netdev_priv(dev); 836 struct geneve_sock *gs6 = geneve->sock6; 837 struct dst_entry *dst = NULL; 838 struct dst_cache *dst_cache; 839 bool use_cache = true; 840 __u8 prio; 841 842 memset(fl6, 0, sizeof(*fl6)); 843 fl6->flowi6_mark = skb->mark; 844 fl6->flowi6_proto = IPPROTO_UDP; 845 846 if (info) { 847 fl6->daddr = info->key.u.ipv6.dst; 848 fl6->saddr = info->key.u.ipv6.src; 849 fl6->flowi6_tos = RT_TOS(info->key.tos); 850 dst_cache = &info->dst_cache; 851 } else { 852 prio = geneve->tos; 853 if (prio == 1) { 854 const struct iphdr *iip = ip_hdr(skb); 855 856 prio = ip_tunnel_get_dsfield(iip, skb); 857 use_cache = false; 858 } 859 860 fl6->flowi6_tos = RT_TOS(prio); 861 fl6->daddr = geneve->remote.sin6.sin6_addr; 862 dst_cache = &geneve->dst_cache; 863 } 864 865 use_cache = use_cache && !skb->mark; 866 if (use_cache) { 867 dst = dst_cache_get_ip6(dst_cache, &fl6->saddr); 868 if (dst) 869 return dst; 870 } 871 872 if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) { 873 netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr); 874 return ERR_PTR(-ENETUNREACH); 875 } 876 if (dst->dev == dev) { /* is this necessary? */ 877 netdev_dbg(dev, "circular route to %pI6\n", &fl6->daddr); 878 dst_release(dst); 879 return ERR_PTR(-ELOOP); 880 } 881 882 if (use_cache) 883 dst_cache_set_ip6(dst_cache, dst, &fl6->saddr); 884 return dst; 885 } 886 #endif 887 888 /* Convert 64 bit tunnel ID to 24 bit VNI. */ 889 static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) 890 { 891 #ifdef __BIG_ENDIAN 892 vni[0] = (__force __u8)(tun_id >> 16); 893 vni[1] = (__force __u8)(tun_id >> 8); 894 vni[2] = (__force __u8)tun_id; 895 #else 896 vni[0] = (__force __u8)((__force u64)tun_id >> 40); 897 vni[1] = (__force __u8)((__force u64)tun_id >> 48); 898 vni[2] = (__force __u8)((__force u64)tun_id >> 56); 899 #endif 900 } 901 902 static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, 903 struct ip_tunnel_info *info) 904 { 905 struct geneve_dev *geneve = netdev_priv(dev); 906 struct geneve_sock *gs4 = geneve->sock4; 907 struct rtable *rt = NULL; 908 const struct iphdr *iip; /* interior IP header */ 909 int err = -EINVAL; 910 struct flowi4 fl4; 911 __u8 tos, ttl; 912 __be16 sport; 913 __be16 df; 914 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); 915 u32 flags = geneve->flags; 916 917 if (geneve->collect_md) { 918 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { 919 netdev_dbg(dev, "no tunnel metadata\n"); 920 goto tx_error; 921 } 922 if (info && ip_tunnel_info_af(info) != AF_INET) 923 goto tx_error; 924 } 925 926 rt = geneve_get_v4_rt(skb, dev, &fl4, info); 927 if (IS_ERR(rt)) { 928 err = PTR_ERR(rt); 929 goto tx_error; 930 } 931 932 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); 933 skb_reset_mac_header(skb); 934 935 iip = ip_hdr(skb); 936 937 if (info) { 938 const struct ip_tunnel_key *key = &info->key; 939 u8 *opts = NULL; 940 u8 vni[3]; 941 942 tunnel_id_to_vni(key->tun_id, vni); 943 if (key->tun_flags & TUNNEL_GENEVE_OPT) 944 opts = ip_tunnel_info_opts(info); 945 946 if (key->tun_flags & TUNNEL_CSUM) 947 flags &= ~GENEVE_F_UDP_ZERO_CSUM_TX; 948 else 949 flags |= GENEVE_F_UDP_ZERO_CSUM_TX; 950 951 err = geneve_build_skb(rt, skb, key->tun_flags, vni, 952 info->options_len, opts, flags, xnet); 953 if (unlikely(err)) 954 goto err; 955 956 tos = ip_tunnel_ecn_encap(key->tos, iip, skb); 957 ttl = key->ttl; 958 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; 959 } else { 960 err = geneve_build_skb(rt, skb, 0, geneve->vni, 961 0, NULL, flags, xnet); 962 if (unlikely(err)) 963 goto err; 964 965 tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, iip, skb); 966 ttl = geneve->ttl; 967 if (!ttl && IN_MULTICAST(ntohl(fl4.daddr))) 968 ttl = 1; 969 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); 970 df = 0; 971 } 972 udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr, 973 tos, ttl, df, sport, geneve->dst_port, 974 !net_eq(geneve->net, dev_net(geneve->dev)), 975 !!(flags & GENEVE_F_UDP_ZERO_CSUM_TX)); 976 977 return NETDEV_TX_OK; 978 979 tx_error: 980 dev_kfree_skb(skb); 981 err: 982 if (err == -ELOOP) 983 dev->stats.collisions++; 984 else if (err == -ENETUNREACH) 985 dev->stats.tx_carrier_errors++; 986 else 987 dev->stats.tx_errors++; 988 return NETDEV_TX_OK; 989 } 990 991 #if IS_ENABLED(CONFIG_IPV6) 992 static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, 993 struct ip_tunnel_info *info) 994 { 995 struct geneve_dev *geneve = netdev_priv(dev); 996 struct geneve_sock *gs6 = geneve->sock6; 997 struct dst_entry *dst = NULL; 998 const struct iphdr *iip; /* interior IP header */ 999 int err = -EINVAL; 1000 struct flowi6 fl6; 1001 __u8 prio, ttl; 1002 __be16 sport; 1003 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); 1004 u32 flags = geneve->flags; 1005 1006 if (geneve->collect_md) { 1007 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { 1008 netdev_dbg(dev, "no tunnel metadata\n"); 1009 goto tx_error; 1010 } 1011 } 1012 1013 dst = geneve_get_v6_dst(skb, dev, &fl6, info); 1014 if (IS_ERR(dst)) { 1015 err = PTR_ERR(dst); 1016 goto tx_error; 1017 } 1018 1019 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); 1020 skb_reset_mac_header(skb); 1021 1022 iip = ip_hdr(skb); 1023 1024 if (info) { 1025 const struct ip_tunnel_key *key = &info->key; 1026 u8 *opts = NULL; 1027 u8 vni[3]; 1028 1029 tunnel_id_to_vni(key->tun_id, vni); 1030 if (key->tun_flags & TUNNEL_GENEVE_OPT) 1031 opts = ip_tunnel_info_opts(info); 1032 1033 if (key->tun_flags & TUNNEL_CSUM) 1034 flags &= ~GENEVE_F_UDP_ZERO_CSUM6_TX; 1035 else 1036 flags |= GENEVE_F_UDP_ZERO_CSUM6_TX; 1037 1038 err = geneve6_build_skb(dst, skb, key->tun_flags, vni, 1039 info->options_len, opts, 1040 flags, xnet); 1041 if (unlikely(err)) 1042 goto err; 1043 1044 prio = ip_tunnel_ecn_encap(key->tos, iip, skb); 1045 ttl = key->ttl; 1046 } else { 1047 err = geneve6_build_skb(dst, skb, 0, geneve->vni, 1048 0, NULL, flags, xnet); 1049 if (unlikely(err)) 1050 goto err; 1051 1052 prio = ip_tunnel_ecn_encap(fl6.flowi6_tos, iip, skb); 1053 ttl = geneve->ttl; 1054 if (!ttl && ipv6_addr_is_multicast(&fl6.daddr)) 1055 ttl = 1; 1056 ttl = ttl ? : ip6_dst_hoplimit(dst); 1057 } 1058 udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, 1059 &fl6.saddr, &fl6.daddr, prio, ttl, 1060 sport, geneve->dst_port, 1061 !!(flags & GENEVE_F_UDP_ZERO_CSUM6_TX)); 1062 return NETDEV_TX_OK; 1063 1064 tx_error: 1065 dev_kfree_skb(skb); 1066 err: 1067 if (err == -ELOOP) 1068 dev->stats.collisions++; 1069 else if (err == -ENETUNREACH) 1070 dev->stats.tx_carrier_errors++; 1071 else 1072 dev->stats.tx_errors++; 1073 return NETDEV_TX_OK; 1074 } 1075 #endif 1076 1077 static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) 1078 { 1079 struct geneve_dev *geneve = netdev_priv(dev); 1080 struct ip_tunnel_info *info = NULL; 1081 1082 if (geneve->collect_md) 1083 info = skb_tunnel_info(skb); 1084 1085 #if IS_ENABLED(CONFIG_IPV6) 1086 if ((info && ip_tunnel_info_af(info) == AF_INET6) || 1087 (!info && geneve->remote.sa.sa_family == AF_INET6)) 1088 return geneve6_xmit_skb(skb, dev, info); 1089 #endif 1090 return geneve_xmit_skb(skb, dev, info); 1091 } 1092 1093 static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict) 1094 { 1095 /* The max_mtu calculation does not take account of GENEVE 1096 * options, to avoid excluding potentially valid 1097 * configurations. 1098 */ 1099 int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - sizeof(struct iphdr) 1100 - dev->hard_header_len; 1101 1102 if (new_mtu < 68) 1103 return -EINVAL; 1104 1105 if (new_mtu > max_mtu) { 1106 if (strict) 1107 return -EINVAL; 1108 1109 new_mtu = max_mtu; 1110 } 1111 1112 dev->mtu = new_mtu; 1113 return 0; 1114 } 1115 1116 static int geneve_change_mtu(struct net_device *dev, int new_mtu) 1117 { 1118 return __geneve_change_mtu(dev, new_mtu, true); 1119 } 1120 1121 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) 1122 { 1123 struct ip_tunnel_info *info = skb_tunnel_info(skb); 1124 struct geneve_dev *geneve = netdev_priv(dev); 1125 struct rtable *rt; 1126 struct flowi4 fl4; 1127 #if IS_ENABLED(CONFIG_IPV6) 1128 struct dst_entry *dst; 1129 struct flowi6 fl6; 1130 #endif 1131 1132 if (ip_tunnel_info_af(info) == AF_INET) { 1133 rt = geneve_get_v4_rt(skb, dev, &fl4, info); 1134 if (IS_ERR(rt)) 1135 return PTR_ERR(rt); 1136 1137 ip_rt_put(rt); 1138 info->key.u.ipv4.src = fl4.saddr; 1139 #if IS_ENABLED(CONFIG_IPV6) 1140 } else if (ip_tunnel_info_af(info) == AF_INET6) { 1141 dst = geneve_get_v6_dst(skb, dev, &fl6, info); 1142 if (IS_ERR(dst)) 1143 return PTR_ERR(dst); 1144 1145 dst_release(dst); 1146 info->key.u.ipv6.src = fl6.saddr; 1147 #endif 1148 } else { 1149 return -EINVAL; 1150 } 1151 1152 info->key.tp_src = udp_flow_src_port(geneve->net, skb, 1153 1, USHRT_MAX, true); 1154 info->key.tp_dst = geneve->dst_port; 1155 return 0; 1156 } 1157 1158 static const struct net_device_ops geneve_netdev_ops = { 1159 .ndo_init = geneve_init, 1160 .ndo_uninit = geneve_uninit, 1161 .ndo_open = geneve_open, 1162 .ndo_stop = geneve_stop, 1163 .ndo_start_xmit = geneve_xmit, 1164 .ndo_get_stats64 = ip_tunnel_get_stats64, 1165 .ndo_change_mtu = geneve_change_mtu, 1166 .ndo_validate_addr = eth_validate_addr, 1167 .ndo_set_mac_address = eth_mac_addr, 1168 .ndo_fill_metadata_dst = geneve_fill_metadata_dst, 1169 }; 1170 1171 static void geneve_get_drvinfo(struct net_device *dev, 1172 struct ethtool_drvinfo *drvinfo) 1173 { 1174 strlcpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version)); 1175 strlcpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver)); 1176 } 1177 1178 static const struct ethtool_ops geneve_ethtool_ops = { 1179 .get_drvinfo = geneve_get_drvinfo, 1180 .get_link = ethtool_op_get_link, 1181 }; 1182 1183 /* Info for udev, that this is a virtual tunnel endpoint */ 1184 static struct device_type geneve_type = { 1185 .name = "geneve", 1186 }; 1187 1188 /* Calls the ndo_add_geneve_port of the caller in order to 1189 * supply the listening GENEVE udp ports. Callers are expected 1190 * to implement the ndo_add_geneve_port. 1191 */ 1192 void geneve_get_rx_port(struct net_device *dev) 1193 { 1194 struct net *net = dev_net(dev); 1195 struct geneve_net *gn = net_generic(net, geneve_net_id); 1196 struct geneve_sock *gs; 1197 sa_family_t sa_family; 1198 struct sock *sk; 1199 __be16 port; 1200 1201 rcu_read_lock(); 1202 list_for_each_entry_rcu(gs, &gn->sock_list, list) { 1203 sk = gs->sock->sk; 1204 sa_family = sk->sk_family; 1205 port = inet_sk(sk)->inet_sport; 1206 dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, port); 1207 } 1208 rcu_read_unlock(); 1209 } 1210 EXPORT_SYMBOL_GPL(geneve_get_rx_port); 1211 1212 /* Initialize the device structure. */ 1213 static void geneve_setup(struct net_device *dev) 1214 { 1215 ether_setup(dev); 1216 1217 dev->netdev_ops = &geneve_netdev_ops; 1218 dev->ethtool_ops = &geneve_ethtool_ops; 1219 dev->destructor = free_netdev; 1220 1221 SET_NETDEV_DEVTYPE(dev, &geneve_type); 1222 1223 dev->features |= NETIF_F_LLTX; 1224 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; 1225 dev->features |= NETIF_F_RXCSUM; 1226 dev->features |= NETIF_F_GSO_SOFTWARE; 1227 1228 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; 1229 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 1230 1231 netif_keep_dst(dev); 1232 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1233 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; 1234 eth_hw_addr_random(dev); 1235 } 1236 1237 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { 1238 [IFLA_GENEVE_ID] = { .type = NLA_U32 }, 1239 [IFLA_GENEVE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, 1240 [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) }, 1241 [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, 1242 [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, 1243 [IFLA_GENEVE_PORT] = { .type = NLA_U16 }, 1244 [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG }, 1245 [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 }, 1246 [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 }, 1247 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, 1248 }; 1249 1250 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[]) 1251 { 1252 if (tb[IFLA_ADDRESS]) { 1253 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1254 return -EINVAL; 1255 1256 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1257 return -EADDRNOTAVAIL; 1258 } 1259 1260 if (!data) 1261 return -EINVAL; 1262 1263 if (data[IFLA_GENEVE_ID]) { 1264 __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 1265 1266 if (vni >= GENEVE_VID_MASK) 1267 return -ERANGE; 1268 } 1269 1270 return 0; 1271 } 1272 1273 static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, 1274 __be16 dst_port, 1275 union geneve_addr *remote, 1276 u8 vni[], 1277 bool *tun_on_same_port, 1278 bool *tun_collect_md) 1279 { 1280 struct geneve_dev *geneve, *t; 1281 1282 *tun_on_same_port = false; 1283 *tun_collect_md = false; 1284 t = NULL; 1285 list_for_each_entry(geneve, &gn->geneve_list, next) { 1286 if (geneve->dst_port == dst_port) { 1287 *tun_collect_md = geneve->collect_md; 1288 *tun_on_same_port = true; 1289 } 1290 if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) && 1291 !memcmp(remote, &geneve->remote, sizeof(geneve->remote)) && 1292 dst_port == geneve->dst_port) 1293 t = geneve; 1294 } 1295 return t; 1296 } 1297 1298 static int geneve_configure(struct net *net, struct net_device *dev, 1299 union geneve_addr *remote, 1300 __u32 vni, __u8 ttl, __u8 tos, __be16 dst_port, 1301 bool metadata, u32 flags) 1302 { 1303 struct geneve_net *gn = net_generic(net, geneve_net_id); 1304 struct geneve_dev *t, *geneve = netdev_priv(dev); 1305 bool tun_collect_md, tun_on_same_port; 1306 int err, encap_len; 1307 1308 if (!remote) 1309 return -EINVAL; 1310 if (metadata && 1311 (remote->sa.sa_family != AF_UNSPEC || vni || tos || ttl)) 1312 return -EINVAL; 1313 1314 geneve->net = net; 1315 geneve->dev = dev; 1316 1317 geneve->vni[0] = (vni & 0x00ff0000) >> 16; 1318 geneve->vni[1] = (vni & 0x0000ff00) >> 8; 1319 geneve->vni[2] = vni & 0x000000ff; 1320 1321 if ((remote->sa.sa_family == AF_INET && 1322 IN_MULTICAST(ntohl(remote->sin.sin_addr.s_addr))) || 1323 (remote->sa.sa_family == AF_INET6 && 1324 ipv6_addr_is_multicast(&remote->sin6.sin6_addr))) 1325 return -EINVAL; 1326 geneve->remote = *remote; 1327 1328 geneve->ttl = ttl; 1329 geneve->tos = tos; 1330 geneve->dst_port = dst_port; 1331 geneve->collect_md = metadata; 1332 geneve->flags = flags; 1333 1334 t = geneve_find_dev(gn, dst_port, remote, geneve->vni, 1335 &tun_on_same_port, &tun_collect_md); 1336 if (t) 1337 return -EBUSY; 1338 1339 /* make enough headroom for basic scenario */ 1340 encap_len = GENEVE_BASE_HLEN + ETH_HLEN; 1341 if (remote->sa.sa_family == AF_INET) 1342 encap_len += sizeof(struct iphdr); 1343 else 1344 encap_len += sizeof(struct ipv6hdr); 1345 dev->needed_headroom = encap_len + ETH_HLEN; 1346 1347 if (metadata) { 1348 if (tun_on_same_port) 1349 return -EPERM; 1350 } else { 1351 if (tun_collect_md) 1352 return -EPERM; 1353 } 1354 1355 dst_cache_reset(&geneve->dst_cache); 1356 1357 err = register_netdevice(dev); 1358 if (err) 1359 return err; 1360 1361 list_add(&geneve->next, &gn->geneve_list); 1362 return 0; 1363 } 1364 1365 static int geneve_newlink(struct net *net, struct net_device *dev, 1366 struct nlattr *tb[], struct nlattr *data[]) 1367 { 1368 __be16 dst_port = htons(GENEVE_UDP_PORT); 1369 __u8 ttl = 0, tos = 0; 1370 bool metadata = false; 1371 union geneve_addr remote = geneve_remote_unspec; 1372 __u32 vni = 0; 1373 u32 flags = 0; 1374 1375 if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) 1376 return -EINVAL; 1377 1378 if (data[IFLA_GENEVE_REMOTE]) { 1379 remote.sa.sa_family = AF_INET; 1380 remote.sin.sin_addr.s_addr = 1381 nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); 1382 } 1383 1384 if (data[IFLA_GENEVE_REMOTE6]) { 1385 if (!IS_ENABLED(CONFIG_IPV6)) 1386 return -EPFNOSUPPORT; 1387 1388 remote.sa.sa_family = AF_INET6; 1389 remote.sin6.sin6_addr = 1390 nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]); 1391 1392 if (ipv6_addr_type(&remote.sin6.sin6_addr) & 1393 IPV6_ADDR_LINKLOCAL) { 1394 netdev_dbg(dev, "link-local remote is unsupported\n"); 1395 return -EINVAL; 1396 } 1397 } 1398 1399 if (data[IFLA_GENEVE_ID]) 1400 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 1401 1402 if (data[IFLA_GENEVE_TTL]) 1403 ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); 1404 1405 if (data[IFLA_GENEVE_TOS]) 1406 tos = nla_get_u8(data[IFLA_GENEVE_TOS]); 1407 1408 if (data[IFLA_GENEVE_PORT]) 1409 dst_port = nla_get_be16(data[IFLA_GENEVE_PORT]); 1410 1411 if (data[IFLA_GENEVE_COLLECT_METADATA]) 1412 metadata = true; 1413 1414 if (data[IFLA_GENEVE_UDP_CSUM] && 1415 !nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) 1416 flags |= GENEVE_F_UDP_ZERO_CSUM_TX; 1417 1418 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] && 1419 nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) 1420 flags |= GENEVE_F_UDP_ZERO_CSUM6_TX; 1421 1422 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX] && 1423 nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])) 1424 flags |= GENEVE_F_UDP_ZERO_CSUM6_RX; 1425 1426 return geneve_configure(net, dev, &remote, vni, ttl, tos, dst_port, 1427 metadata, flags); 1428 } 1429 1430 static void geneve_dellink(struct net_device *dev, struct list_head *head) 1431 { 1432 struct geneve_dev *geneve = netdev_priv(dev); 1433 1434 list_del(&geneve->next); 1435 unregister_netdevice_queue(dev, head); 1436 } 1437 1438 static size_t geneve_get_size(const struct net_device *dev) 1439 { 1440 return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */ 1441 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */ 1442 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */ 1443 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */ 1444 nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */ 1445 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */ 1446 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */ 1447 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */ 1448 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */ 1449 0; 1450 } 1451 1452 static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) 1453 { 1454 struct geneve_dev *geneve = netdev_priv(dev); 1455 __u32 vni; 1456 1457 vni = (geneve->vni[0] << 16) | (geneve->vni[1] << 8) | geneve->vni[2]; 1458 if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) 1459 goto nla_put_failure; 1460 1461 if (geneve->remote.sa.sa_family == AF_INET) { 1462 if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, 1463 geneve->remote.sin.sin_addr.s_addr)) 1464 goto nla_put_failure; 1465 #if IS_ENABLED(CONFIG_IPV6) 1466 } else { 1467 if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6, 1468 &geneve->remote.sin6.sin6_addr)) 1469 goto nla_put_failure; 1470 #endif 1471 } 1472 1473 if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl) || 1474 nla_put_u8(skb, IFLA_GENEVE_TOS, geneve->tos)) 1475 goto nla_put_failure; 1476 1477 if (nla_put_be16(skb, IFLA_GENEVE_PORT, geneve->dst_port)) 1478 goto nla_put_failure; 1479 1480 if (geneve->collect_md) { 1481 if (nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA)) 1482 goto nla_put_failure; 1483 } 1484 1485 if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, 1486 !(geneve->flags & GENEVE_F_UDP_ZERO_CSUM_TX)) || 1487 nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, 1488 !!(geneve->flags & GENEVE_F_UDP_ZERO_CSUM6_TX)) || 1489 nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, 1490 !!(geneve->flags & GENEVE_F_UDP_ZERO_CSUM6_RX))) 1491 goto nla_put_failure; 1492 1493 return 0; 1494 1495 nla_put_failure: 1496 return -EMSGSIZE; 1497 } 1498 1499 static struct rtnl_link_ops geneve_link_ops __read_mostly = { 1500 .kind = "geneve", 1501 .maxtype = IFLA_GENEVE_MAX, 1502 .policy = geneve_policy, 1503 .priv_size = sizeof(struct geneve_dev), 1504 .setup = geneve_setup, 1505 .validate = geneve_validate, 1506 .newlink = geneve_newlink, 1507 .dellink = geneve_dellink, 1508 .get_size = geneve_get_size, 1509 .fill_info = geneve_fill_info, 1510 }; 1511 1512 struct net_device *geneve_dev_create_fb(struct net *net, const char *name, 1513 u8 name_assign_type, u16 dst_port) 1514 { 1515 struct nlattr *tb[IFLA_MAX + 1]; 1516 struct net_device *dev; 1517 int err; 1518 1519 memset(tb, 0, sizeof(tb)); 1520 dev = rtnl_create_link(net, name, name_assign_type, 1521 &geneve_link_ops, tb); 1522 if (IS_ERR(dev)) 1523 return dev; 1524 1525 err = geneve_configure(net, dev, &geneve_remote_unspec, 1526 0, 0, 0, htons(dst_port), true, 1527 GENEVE_F_UDP_ZERO_CSUM6_RX); 1528 if (err) 1529 goto err; 1530 1531 /* openvswitch users expect packet sizes to be unrestricted, 1532 * so set the largest MTU we can. 1533 */ 1534 err = __geneve_change_mtu(dev, IP_MAX_MTU, false); 1535 if (err) 1536 goto err; 1537 1538 return dev; 1539 1540 err: 1541 free_netdev(dev); 1542 return ERR_PTR(err); 1543 } 1544 EXPORT_SYMBOL_GPL(geneve_dev_create_fb); 1545 1546 static __net_init int geneve_init_net(struct net *net) 1547 { 1548 struct geneve_net *gn = net_generic(net, geneve_net_id); 1549 1550 INIT_LIST_HEAD(&gn->geneve_list); 1551 INIT_LIST_HEAD(&gn->sock_list); 1552 return 0; 1553 } 1554 1555 static void __net_exit geneve_exit_net(struct net *net) 1556 { 1557 struct geneve_net *gn = net_generic(net, geneve_net_id); 1558 struct geneve_dev *geneve, *next; 1559 struct net_device *dev, *aux; 1560 LIST_HEAD(list); 1561 1562 rtnl_lock(); 1563 1564 /* gather any geneve devices that were moved into this ns */ 1565 for_each_netdev_safe(net, dev, aux) 1566 if (dev->rtnl_link_ops == &geneve_link_ops) 1567 unregister_netdevice_queue(dev, &list); 1568 1569 /* now gather any other geneve devices that were created in this ns */ 1570 list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) { 1571 /* If geneve->dev is in the same netns, it was already added 1572 * to the list by the previous loop. 1573 */ 1574 if (!net_eq(dev_net(geneve->dev), net)) 1575 unregister_netdevice_queue(geneve->dev, &list); 1576 } 1577 1578 /* unregister the devices gathered above */ 1579 unregister_netdevice_many(&list); 1580 rtnl_unlock(); 1581 } 1582 1583 static struct pernet_operations geneve_net_ops = { 1584 .init = geneve_init_net, 1585 .exit = geneve_exit_net, 1586 .id = &geneve_net_id, 1587 .size = sizeof(struct geneve_net), 1588 }; 1589 1590 static int __init geneve_init_module(void) 1591 { 1592 int rc; 1593 1594 rc = register_pernet_subsys(&geneve_net_ops); 1595 if (rc) 1596 goto out1; 1597 1598 rc = rtnl_link_register(&geneve_link_ops); 1599 if (rc) 1600 goto out2; 1601 1602 return 0; 1603 out2: 1604 unregister_pernet_subsys(&geneve_net_ops); 1605 out1: 1606 return rc; 1607 } 1608 late_initcall(geneve_init_module); 1609 1610 static void __exit geneve_cleanup_module(void) 1611 { 1612 rtnl_link_unregister(&geneve_link_ops); 1613 unregister_pernet_subsys(&geneve_net_ops); 1614 } 1615 module_exit(geneve_cleanup_module); 1616 1617 MODULE_LICENSE("GPL"); 1618 MODULE_VERSION(GENEVE_NETDEV_VER); 1619 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>"); 1620 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic"); 1621 MODULE_ALIAS_RTNL_LINK("geneve"); 1622