1 /* 2 * GENEVE: Generic Network Virtualization Encapsulation 3 * 4 * Copyright (c) 2015 Red Hat, Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 12 13 #include <linux/kernel.h> 14 #include <linux/module.h> 15 #include <linux/netdevice.h> 16 #include <linux/etherdevice.h> 17 #include <linux/hash.h> 18 #include <net/dst_metadata.h> 19 #include <net/gro_cells.h> 20 #include <net/rtnetlink.h> 21 #include <net/geneve.h> 22 #include <net/protocol.h> 23 24 #define GENEVE_NETDEV_VER "0.6" 25 26 #define GENEVE_UDP_PORT 6081 27 28 #define GENEVE_N_VID (1u << 24) 29 #define GENEVE_VID_MASK (GENEVE_N_VID - 1) 30 31 #define VNI_HASH_BITS 10 32 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) 33 34 static bool log_ecn_error = true; 35 module_param(log_ecn_error, bool, 0644); 36 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 37 38 #define GENEVE_VER 0 39 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) 40 41 /* per-network namespace private data for this module */ 42 struct geneve_net { 43 struct list_head geneve_list; 44 struct list_head sock_list; 45 }; 46 47 static int geneve_net_id; 48 49 union geneve_addr { 50 struct sockaddr_in sin; 51 struct sockaddr_in6 sin6; 52 struct sockaddr sa; 53 }; 54 55 static union geneve_addr geneve_remote_unspec = { .sa.sa_family = AF_UNSPEC, }; 56 57 /* Pseudo network device */ 58 struct geneve_dev { 59 struct hlist_node hlist; /* vni hash table */ 60 struct net *net; /* netns for packet i/o */ 61 struct net_device *dev; /* netdev for geneve tunnel */ 62 struct geneve_sock *sock4; /* IPv4 socket used for geneve tunnel */ 63 #if IS_ENABLED(CONFIG_IPV6) 64 struct geneve_sock *sock6; /* IPv6 socket used for geneve tunnel */ 65 #endif 66 u8 vni[3]; /* virtual network ID for tunnel */ 67 u8 ttl; /* TTL override */ 68 u8 tos; /* TOS override */ 69 union geneve_addr remote; /* IP address for link partner */ 70 struct list_head next; /* geneve's per namespace list */ 71 __be16 dst_port; 72 bool collect_md; 73 struct gro_cells gro_cells; 74 u32 flags; 75 }; 76 77 /* Geneve device flags */ 78 #define GENEVE_F_UDP_CSUM BIT(0) 79 #define GENEVE_F_UDP_ZERO_CSUM6_TX BIT(1) 80 #define GENEVE_F_UDP_ZERO_CSUM6_RX BIT(2) 81 82 struct geneve_sock { 83 bool collect_md; 84 struct list_head list; 85 struct socket *sock; 86 struct rcu_head rcu; 87 int refcnt; 88 struct udp_offload udp_offloads; 89 struct hlist_head vni_list[VNI_HASH_SIZE]; 90 u32 flags; 91 }; 92 93 static inline __u32 geneve_net_vni_hash(u8 vni[3]) 94 { 95 __u32 vnid; 96 97 vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2]; 98 return hash_32(vnid, VNI_HASH_BITS); 99 } 100 101 static __be64 vni_to_tunnel_id(const __u8 *vni) 102 { 103 #ifdef __BIG_ENDIAN 104 return (vni[0] << 16) | (vni[1] << 8) | vni[2]; 105 #else 106 return (__force __be64)(((__force u64)vni[0] << 40) | 107 ((__force u64)vni[1] << 48) | 108 ((__force u64)vni[2] << 56)); 109 #endif 110 } 111 112 static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, 113 __be32 addr, u8 vni[]) 114 { 115 struct hlist_head *vni_list_head; 116 struct geneve_dev *geneve; 117 __u32 hash; 118 119 /* Find the device for this VNI */ 120 hash = geneve_net_vni_hash(vni); 121 vni_list_head = &gs->vni_list[hash]; 122 hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) { 123 if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) && 124 addr == geneve->remote.sin.sin_addr.s_addr) 125 return geneve; 126 } 127 return NULL; 128 } 129 130 #if IS_ENABLED(CONFIG_IPV6) 131 static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs, 132 struct in6_addr addr6, u8 vni[]) 133 { 134 struct hlist_head *vni_list_head; 135 struct geneve_dev *geneve; 136 __u32 hash; 137 138 /* Find the device for this VNI */ 139 hash = geneve_net_vni_hash(vni); 140 vni_list_head = &gs->vni_list[hash]; 141 hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) { 142 if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) && 143 ipv6_addr_equal(&addr6, &geneve->remote.sin6.sin6_addr)) 144 return geneve; 145 } 146 return NULL; 147 } 148 #endif 149 150 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) 151 { 152 return (struct genevehdr *)(udp_hdr(skb) + 1); 153 } 154 155 /* geneve receive/decap routine */ 156 static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) 157 { 158 struct genevehdr *gnvh = geneve_hdr(skb); 159 struct metadata_dst *tun_dst = NULL; 160 struct geneve_dev *geneve = NULL; 161 struct pcpu_sw_netstats *stats; 162 struct iphdr *iph = NULL; 163 __be32 addr; 164 static u8 zero_vni[3]; 165 u8 *vni; 166 int err = 0; 167 sa_family_t sa_family; 168 #if IS_ENABLED(CONFIG_IPV6) 169 struct ipv6hdr *ip6h = NULL; 170 struct in6_addr addr6; 171 static struct in6_addr zero_addr6; 172 #endif 173 174 sa_family = gs->sock->sk->sk_family; 175 176 if (sa_family == AF_INET) { 177 iph = ip_hdr(skb); /* outer IP header... */ 178 179 if (gs->collect_md) { 180 vni = zero_vni; 181 addr = 0; 182 } else { 183 vni = gnvh->vni; 184 185 addr = iph->saddr; 186 } 187 188 geneve = geneve_lookup(gs, addr, vni); 189 #if IS_ENABLED(CONFIG_IPV6) 190 } else if (sa_family == AF_INET6) { 191 ip6h = ipv6_hdr(skb); /* outer IPv6 header... */ 192 193 if (gs->collect_md) { 194 vni = zero_vni; 195 addr6 = zero_addr6; 196 } else { 197 vni = gnvh->vni; 198 199 addr6 = ip6h->saddr; 200 } 201 202 geneve = geneve6_lookup(gs, addr6, vni); 203 #endif 204 } 205 if (!geneve) 206 goto drop; 207 208 if (ip_tunnel_collect_metadata() || gs->collect_md) { 209 __be16 flags; 210 211 flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | 212 (gnvh->oam ? TUNNEL_OAM : 0) | 213 (gnvh->critical ? TUNNEL_CRIT_OPT : 0); 214 215 tun_dst = udp_tun_rx_dst(skb, sa_family, flags, 216 vni_to_tunnel_id(gnvh->vni), 217 gnvh->opt_len * 4); 218 if (!tun_dst) 219 goto drop; 220 /* Update tunnel dst according to Geneve options. */ 221 ip_tunnel_info_opts_set(&tun_dst->u.tun_info, 222 gnvh->options, gnvh->opt_len * 4); 223 } else { 224 /* Drop packets w/ critical options, 225 * since we don't support any... 226 */ 227 if (gnvh->critical) 228 goto drop; 229 } 230 231 skb_reset_mac_header(skb); 232 skb_scrub_packet(skb, !net_eq(geneve->net, dev_net(geneve->dev))); 233 skb->protocol = eth_type_trans(skb, geneve->dev); 234 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 235 236 if (tun_dst) 237 skb_dst_set(skb, &tun_dst->dst); 238 239 /* Ignore packet loops (and multicast echo) */ 240 if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) 241 goto drop; 242 243 skb_reset_network_header(skb); 244 245 if (iph) 246 err = IP_ECN_decapsulate(iph, skb); 247 #if IS_ENABLED(CONFIG_IPV6) 248 if (ip6h) 249 err = IP6_ECN_decapsulate(ip6h, skb); 250 #endif 251 252 if (unlikely(err)) { 253 if (log_ecn_error) { 254 if (iph) 255 net_info_ratelimited("non-ECT from %pI4 " 256 "with TOS=%#x\n", 257 &iph->saddr, iph->tos); 258 #if IS_ENABLED(CONFIG_IPV6) 259 if (ip6h) 260 net_info_ratelimited("non-ECT from %pI6\n", 261 &ip6h->saddr); 262 #endif 263 } 264 if (err > 1) { 265 ++geneve->dev->stats.rx_frame_errors; 266 ++geneve->dev->stats.rx_errors; 267 goto drop; 268 } 269 } 270 271 stats = this_cpu_ptr(geneve->dev->tstats); 272 u64_stats_update_begin(&stats->syncp); 273 stats->rx_packets++; 274 stats->rx_bytes += skb->len; 275 u64_stats_update_end(&stats->syncp); 276 277 gro_cells_receive(&geneve->gro_cells, skb); 278 return; 279 drop: 280 /* Consume bad packet */ 281 kfree_skb(skb); 282 } 283 284 /* Setup stats when device is created */ 285 static int geneve_init(struct net_device *dev) 286 { 287 struct geneve_dev *geneve = netdev_priv(dev); 288 int err; 289 290 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 291 if (!dev->tstats) 292 return -ENOMEM; 293 294 err = gro_cells_init(&geneve->gro_cells, dev); 295 if (err) { 296 free_percpu(dev->tstats); 297 return err; 298 } 299 300 return 0; 301 } 302 303 static void geneve_uninit(struct net_device *dev) 304 { 305 struct geneve_dev *geneve = netdev_priv(dev); 306 307 gro_cells_destroy(&geneve->gro_cells); 308 free_percpu(dev->tstats); 309 } 310 311 /* Callback from net/ipv4/udp.c to receive packets */ 312 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) 313 { 314 struct genevehdr *geneveh; 315 struct geneve_sock *gs; 316 int opts_len; 317 318 /* Need Geneve and inner Ethernet header to be present */ 319 if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) 320 goto error; 321 322 /* Return packets with reserved bits set */ 323 geneveh = geneve_hdr(skb); 324 if (unlikely(geneveh->ver != GENEVE_VER)) 325 goto error; 326 327 if (unlikely(geneveh->proto_type != htons(ETH_P_TEB))) 328 goto error; 329 330 opts_len = geneveh->opt_len * 4; 331 if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, 332 htons(ETH_P_TEB))) 333 goto drop; 334 335 gs = rcu_dereference_sk_user_data(sk); 336 if (!gs) 337 goto drop; 338 339 geneve_rx(gs, skb); 340 return 0; 341 342 drop: 343 /* Consume bad packet */ 344 kfree_skb(skb); 345 return 0; 346 347 error: 348 /* Let the UDP layer deal with the skb */ 349 return 1; 350 } 351 352 static struct socket *geneve_create_sock(struct net *net, bool ipv6, 353 __be16 port, u32 flags) 354 { 355 struct socket *sock; 356 struct udp_port_cfg udp_conf; 357 int err; 358 359 memset(&udp_conf, 0, sizeof(udp_conf)); 360 361 if (ipv6) { 362 udp_conf.family = AF_INET6; 363 udp_conf.ipv6_v6only = 1; 364 udp_conf.use_udp6_rx_checksums = 365 !(flags & GENEVE_F_UDP_ZERO_CSUM6_RX); 366 } else { 367 udp_conf.family = AF_INET; 368 udp_conf.local_ip.s_addr = htonl(INADDR_ANY); 369 } 370 371 udp_conf.local_udp_port = port; 372 373 /* Open UDP socket */ 374 err = udp_sock_create(net, &udp_conf, &sock); 375 if (err < 0) 376 return ERR_PTR(err); 377 378 return sock; 379 } 380 381 static void geneve_notify_add_rx_port(struct geneve_sock *gs) 382 { 383 struct net_device *dev; 384 struct sock *sk = gs->sock->sk; 385 struct net *net = sock_net(sk); 386 sa_family_t sa_family = sk->sk_family; 387 __be16 port = inet_sk(sk)->inet_sport; 388 int err; 389 390 if (sa_family == AF_INET) { 391 err = udp_add_offload(sock_net(sk), &gs->udp_offloads); 392 if (err) 393 pr_warn("geneve: udp_add_offload failed with status %d\n", 394 err); 395 } 396 397 rcu_read_lock(); 398 for_each_netdev_rcu(net, dev) { 399 if (dev->netdev_ops->ndo_add_geneve_port) 400 dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, 401 port); 402 } 403 rcu_read_unlock(); 404 } 405 406 static int geneve_hlen(struct genevehdr *gh) 407 { 408 return sizeof(*gh) + gh->opt_len * 4; 409 } 410 411 static struct sk_buff **geneve_gro_receive(struct sk_buff **head, 412 struct sk_buff *skb, 413 struct udp_offload *uoff) 414 { 415 struct sk_buff *p, **pp = NULL; 416 struct genevehdr *gh, *gh2; 417 unsigned int hlen, gh_len, off_gnv; 418 const struct packet_offload *ptype; 419 __be16 type; 420 int flush = 1; 421 422 off_gnv = skb_gro_offset(skb); 423 hlen = off_gnv + sizeof(*gh); 424 gh = skb_gro_header_fast(skb, off_gnv); 425 if (skb_gro_header_hard(skb, hlen)) { 426 gh = skb_gro_header_slow(skb, hlen, off_gnv); 427 if (unlikely(!gh)) 428 goto out; 429 } 430 431 if (gh->ver != GENEVE_VER || gh->oam) 432 goto out; 433 gh_len = geneve_hlen(gh); 434 435 hlen = off_gnv + gh_len; 436 if (skb_gro_header_hard(skb, hlen)) { 437 gh = skb_gro_header_slow(skb, hlen, off_gnv); 438 if (unlikely(!gh)) 439 goto out; 440 } 441 442 flush = 0; 443 444 for (p = *head; p; p = p->next) { 445 if (!NAPI_GRO_CB(p)->same_flow) 446 continue; 447 448 gh2 = (struct genevehdr *)(p->data + off_gnv); 449 if (gh->opt_len != gh2->opt_len || 450 memcmp(gh, gh2, gh_len)) { 451 NAPI_GRO_CB(p)->same_flow = 0; 452 continue; 453 } 454 } 455 456 type = gh->proto_type; 457 458 rcu_read_lock(); 459 ptype = gro_find_receive_by_type(type); 460 if (!ptype) { 461 flush = 1; 462 goto out_unlock; 463 } 464 465 skb_gro_pull(skb, gh_len); 466 skb_gro_postpull_rcsum(skb, gh, gh_len); 467 pp = ptype->callbacks.gro_receive(head, skb); 468 469 out_unlock: 470 rcu_read_unlock(); 471 out: 472 NAPI_GRO_CB(skb)->flush |= flush; 473 474 return pp; 475 } 476 477 static int geneve_gro_complete(struct sk_buff *skb, int nhoff, 478 struct udp_offload *uoff) 479 { 480 struct genevehdr *gh; 481 struct packet_offload *ptype; 482 __be16 type; 483 int gh_len; 484 int err = -ENOSYS; 485 486 udp_tunnel_gro_complete(skb, nhoff); 487 488 gh = (struct genevehdr *)(skb->data + nhoff); 489 gh_len = geneve_hlen(gh); 490 type = gh->proto_type; 491 492 rcu_read_lock(); 493 ptype = gro_find_complete_by_type(type); 494 if (ptype) 495 err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); 496 497 rcu_read_unlock(); 498 return err; 499 } 500 501 /* Create new listen socket if needed */ 502 static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, 503 bool ipv6, u32 flags) 504 { 505 struct geneve_net *gn = net_generic(net, geneve_net_id); 506 struct geneve_sock *gs; 507 struct socket *sock; 508 struct udp_tunnel_sock_cfg tunnel_cfg; 509 int h; 510 511 gs = kzalloc(sizeof(*gs), GFP_KERNEL); 512 if (!gs) 513 return ERR_PTR(-ENOMEM); 514 515 sock = geneve_create_sock(net, ipv6, port, flags); 516 if (IS_ERR(sock)) { 517 kfree(gs); 518 return ERR_CAST(sock); 519 } 520 521 gs->sock = sock; 522 gs->refcnt = 1; 523 for (h = 0; h < VNI_HASH_SIZE; ++h) 524 INIT_HLIST_HEAD(&gs->vni_list[h]); 525 526 /* Initialize the geneve udp offloads structure */ 527 gs->udp_offloads.port = port; 528 gs->udp_offloads.callbacks.gro_receive = geneve_gro_receive; 529 gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete; 530 geneve_notify_add_rx_port(gs); 531 532 /* Mark socket as an encapsulation socket */ 533 tunnel_cfg.sk_user_data = gs; 534 tunnel_cfg.encap_type = 1; 535 tunnel_cfg.encap_rcv = geneve_udp_encap_recv; 536 tunnel_cfg.encap_destroy = NULL; 537 setup_udp_tunnel_sock(net, sock, &tunnel_cfg); 538 list_add(&gs->list, &gn->sock_list); 539 return gs; 540 } 541 542 static void geneve_notify_del_rx_port(struct geneve_sock *gs) 543 { 544 struct net_device *dev; 545 struct sock *sk = gs->sock->sk; 546 struct net *net = sock_net(sk); 547 sa_family_t sa_family = sk->sk_family; 548 __be16 port = inet_sk(sk)->inet_sport; 549 550 rcu_read_lock(); 551 for_each_netdev_rcu(net, dev) { 552 if (dev->netdev_ops->ndo_del_geneve_port) 553 dev->netdev_ops->ndo_del_geneve_port(dev, sa_family, 554 port); 555 } 556 557 rcu_read_unlock(); 558 559 if (sa_family == AF_INET) 560 udp_del_offload(&gs->udp_offloads); 561 } 562 563 static void __geneve_sock_release(struct geneve_sock *gs) 564 { 565 if (!gs || --gs->refcnt) 566 return; 567 568 list_del(&gs->list); 569 geneve_notify_del_rx_port(gs); 570 udp_tunnel_sock_release(gs->sock); 571 kfree_rcu(gs, rcu); 572 } 573 574 static void geneve_sock_release(struct geneve_dev *geneve) 575 { 576 __geneve_sock_release(geneve->sock4); 577 #if IS_ENABLED(CONFIG_IPV6) 578 __geneve_sock_release(geneve->sock6); 579 #endif 580 } 581 582 static struct geneve_sock *geneve_find_sock(struct geneve_net *gn, 583 sa_family_t family, 584 __be16 dst_port) 585 { 586 struct geneve_sock *gs; 587 588 list_for_each_entry(gs, &gn->sock_list, list) { 589 if (inet_sk(gs->sock->sk)->inet_sport == dst_port && 590 inet_sk(gs->sock->sk)->sk.sk_family == family) { 591 return gs; 592 } 593 } 594 return NULL; 595 } 596 597 static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6) 598 { 599 struct net *net = geneve->net; 600 struct geneve_net *gn = net_generic(net, geneve_net_id); 601 struct geneve_sock *gs; 602 __u32 hash; 603 604 gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->dst_port); 605 if (gs) { 606 gs->refcnt++; 607 goto out; 608 } 609 610 gs = geneve_socket_create(net, geneve->dst_port, ipv6, geneve->flags); 611 if (IS_ERR(gs)) 612 return PTR_ERR(gs); 613 614 out: 615 gs->collect_md = geneve->collect_md; 616 gs->flags = geneve->flags; 617 #if IS_ENABLED(CONFIG_IPV6) 618 if (ipv6) 619 geneve->sock6 = gs; 620 else 621 #endif 622 geneve->sock4 = gs; 623 624 hash = geneve_net_vni_hash(geneve->vni); 625 hlist_add_head_rcu(&geneve->hlist, &gs->vni_list[hash]); 626 return 0; 627 } 628 629 static int geneve_open(struct net_device *dev) 630 { 631 struct geneve_dev *geneve = netdev_priv(dev); 632 bool ipv6 = geneve->remote.sa.sa_family == AF_INET6; 633 bool metadata = geneve->collect_md; 634 int ret = 0; 635 636 geneve->sock4 = NULL; 637 #if IS_ENABLED(CONFIG_IPV6) 638 geneve->sock6 = NULL; 639 if (ipv6 || metadata) 640 ret = geneve_sock_add(geneve, true); 641 #endif 642 if (!ret && (!ipv6 || metadata)) 643 ret = geneve_sock_add(geneve, false); 644 if (ret < 0) 645 geneve_sock_release(geneve); 646 647 return ret; 648 } 649 650 static int geneve_stop(struct net_device *dev) 651 { 652 struct geneve_dev *geneve = netdev_priv(dev); 653 654 if (!hlist_unhashed(&geneve->hlist)) 655 hlist_del_rcu(&geneve->hlist); 656 geneve_sock_release(geneve); 657 return 0; 658 } 659 660 static void geneve_build_header(struct genevehdr *geneveh, 661 __be16 tun_flags, u8 vni[3], 662 u8 options_len, u8 *options) 663 { 664 geneveh->ver = GENEVE_VER; 665 geneveh->opt_len = options_len / 4; 666 geneveh->oam = !!(tun_flags & TUNNEL_OAM); 667 geneveh->critical = !!(tun_flags & TUNNEL_CRIT_OPT); 668 geneveh->rsvd1 = 0; 669 memcpy(geneveh->vni, vni, 3); 670 geneveh->proto_type = htons(ETH_P_TEB); 671 geneveh->rsvd2 = 0; 672 673 memcpy(geneveh->options, options, options_len); 674 } 675 676 static int geneve_build_skb(struct rtable *rt, struct sk_buff *skb, 677 __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, 678 u32 flags, bool xnet) 679 { 680 struct genevehdr *gnvh; 681 int min_headroom; 682 int err; 683 bool udp_sum = !!(flags & GENEVE_F_UDP_CSUM); 684 685 skb_scrub_packet(skb, xnet); 686 687 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len 688 + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr); 689 err = skb_cow_head(skb, min_headroom); 690 if (unlikely(err)) { 691 kfree_skb(skb); 692 goto free_rt; 693 } 694 695 skb = udp_tunnel_handle_offloads(skb, udp_sum); 696 if (IS_ERR(skb)) { 697 err = PTR_ERR(skb); 698 goto free_rt; 699 } 700 701 gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); 702 geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); 703 704 skb_set_inner_protocol(skb, htons(ETH_P_TEB)); 705 return 0; 706 707 free_rt: 708 ip_rt_put(rt); 709 return err; 710 } 711 712 #if IS_ENABLED(CONFIG_IPV6) 713 static int geneve6_build_skb(struct dst_entry *dst, struct sk_buff *skb, 714 __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, 715 u32 flags, bool xnet) 716 { 717 struct genevehdr *gnvh; 718 int min_headroom; 719 int err; 720 bool udp_sum = !(flags & GENEVE_F_UDP_ZERO_CSUM6_TX); 721 722 skb_scrub_packet(skb, xnet); 723 724 min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len 725 + GENEVE_BASE_HLEN + opt_len + sizeof(struct ipv6hdr); 726 err = skb_cow_head(skb, min_headroom); 727 if (unlikely(err)) { 728 kfree_skb(skb); 729 goto free_dst; 730 } 731 732 skb = udp_tunnel_handle_offloads(skb, udp_sum); 733 if (IS_ERR(skb)) { 734 err = PTR_ERR(skb); 735 goto free_dst; 736 } 737 738 gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); 739 geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); 740 741 skb_set_inner_protocol(skb, htons(ETH_P_TEB)); 742 return 0; 743 744 free_dst: 745 dst_release(dst); 746 return err; 747 } 748 #endif 749 750 static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, 751 struct net_device *dev, 752 struct flowi4 *fl4, 753 struct ip_tunnel_info *info) 754 { 755 struct geneve_dev *geneve = netdev_priv(dev); 756 struct rtable *rt = NULL; 757 __u8 tos; 758 759 memset(fl4, 0, sizeof(*fl4)); 760 fl4->flowi4_mark = skb->mark; 761 fl4->flowi4_proto = IPPROTO_UDP; 762 763 if (info) { 764 fl4->daddr = info->key.u.ipv4.dst; 765 fl4->saddr = info->key.u.ipv4.src; 766 fl4->flowi4_tos = RT_TOS(info->key.tos); 767 } else { 768 tos = geneve->tos; 769 if (tos == 1) { 770 const struct iphdr *iip = ip_hdr(skb); 771 772 tos = ip_tunnel_get_dsfield(iip, skb); 773 } 774 775 fl4->flowi4_tos = RT_TOS(tos); 776 fl4->daddr = geneve->remote.sin.sin_addr.s_addr; 777 } 778 779 rt = ip_route_output_key(geneve->net, fl4); 780 if (IS_ERR(rt)) { 781 netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr); 782 return ERR_PTR(-ENETUNREACH); 783 } 784 if (rt->dst.dev == dev) { /* is this necessary? */ 785 netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr); 786 ip_rt_put(rt); 787 return ERR_PTR(-ELOOP); 788 } 789 return rt; 790 } 791 792 #if IS_ENABLED(CONFIG_IPV6) 793 static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, 794 struct net_device *dev, 795 struct flowi6 *fl6, 796 struct ip_tunnel_info *info) 797 { 798 struct geneve_dev *geneve = netdev_priv(dev); 799 struct geneve_sock *gs6 = geneve->sock6; 800 struct dst_entry *dst = NULL; 801 __u8 prio; 802 803 memset(fl6, 0, sizeof(*fl6)); 804 fl6->flowi6_mark = skb->mark; 805 fl6->flowi6_proto = IPPROTO_UDP; 806 807 if (info) { 808 fl6->daddr = info->key.u.ipv6.dst; 809 fl6->saddr = info->key.u.ipv6.src; 810 fl6->flowi6_tos = RT_TOS(info->key.tos); 811 } else { 812 prio = geneve->tos; 813 if (prio == 1) { 814 const struct iphdr *iip = ip_hdr(skb); 815 816 prio = ip_tunnel_get_dsfield(iip, skb); 817 } 818 819 fl6->flowi6_tos = RT_TOS(prio); 820 fl6->daddr = geneve->remote.sin6.sin6_addr; 821 } 822 823 if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) { 824 netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr); 825 return ERR_PTR(-ENETUNREACH); 826 } 827 if (dst->dev == dev) { /* is this necessary? */ 828 netdev_dbg(dev, "circular route to %pI6\n", &fl6->daddr); 829 dst_release(dst); 830 return ERR_PTR(-ELOOP); 831 } 832 833 return dst; 834 } 835 #endif 836 837 /* Convert 64 bit tunnel ID to 24 bit VNI. */ 838 static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) 839 { 840 #ifdef __BIG_ENDIAN 841 vni[0] = (__force __u8)(tun_id >> 16); 842 vni[1] = (__force __u8)(tun_id >> 8); 843 vni[2] = (__force __u8)tun_id; 844 #else 845 vni[0] = (__force __u8)((__force u64)tun_id >> 40); 846 vni[1] = (__force __u8)((__force u64)tun_id >> 48); 847 vni[2] = (__force __u8)((__force u64)tun_id >> 56); 848 #endif 849 } 850 851 static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, 852 struct ip_tunnel_info *info) 853 { 854 struct geneve_dev *geneve = netdev_priv(dev); 855 struct geneve_sock *gs4 = geneve->sock4; 856 struct rtable *rt = NULL; 857 const struct iphdr *iip; /* interior IP header */ 858 int err = -EINVAL; 859 struct flowi4 fl4; 860 __u8 tos, ttl; 861 __be16 sport; 862 __be16 df; 863 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); 864 u32 flags = geneve->flags; 865 866 if (geneve->collect_md) { 867 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { 868 netdev_dbg(dev, "no tunnel metadata\n"); 869 goto tx_error; 870 } 871 if (info && ip_tunnel_info_af(info) != AF_INET) 872 goto tx_error; 873 } 874 875 rt = geneve_get_v4_rt(skb, dev, &fl4, info); 876 if (IS_ERR(rt)) { 877 err = PTR_ERR(rt); 878 goto tx_error; 879 } 880 881 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); 882 skb_reset_mac_header(skb); 883 884 iip = ip_hdr(skb); 885 886 if (info) { 887 const struct ip_tunnel_key *key = &info->key; 888 u8 *opts = NULL; 889 u8 vni[3]; 890 891 tunnel_id_to_vni(key->tun_id, vni); 892 if (key->tun_flags & TUNNEL_GENEVE_OPT) 893 opts = ip_tunnel_info_opts(info); 894 895 if (key->tun_flags & TUNNEL_CSUM) 896 flags |= GENEVE_F_UDP_CSUM; 897 else 898 flags &= ~GENEVE_F_UDP_CSUM; 899 900 err = geneve_build_skb(rt, skb, key->tun_flags, vni, 901 info->options_len, opts, flags, xnet); 902 if (unlikely(err)) 903 goto err; 904 905 tos = ip_tunnel_ecn_encap(key->tos, iip, skb); 906 ttl = key->ttl; 907 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; 908 } else { 909 err = geneve_build_skb(rt, skb, 0, geneve->vni, 910 0, NULL, flags, xnet); 911 if (unlikely(err)) 912 goto err; 913 914 tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, iip, skb); 915 ttl = geneve->ttl; 916 if (!ttl && IN_MULTICAST(ntohl(fl4.daddr))) 917 ttl = 1; 918 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); 919 df = 0; 920 } 921 udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr, 922 tos, ttl, df, sport, geneve->dst_port, 923 !net_eq(geneve->net, dev_net(geneve->dev)), 924 !(flags & GENEVE_F_UDP_CSUM)); 925 926 return NETDEV_TX_OK; 927 928 tx_error: 929 dev_kfree_skb(skb); 930 err: 931 if (err == -ELOOP) 932 dev->stats.collisions++; 933 else if (err == -ENETUNREACH) 934 dev->stats.tx_carrier_errors++; 935 else 936 dev->stats.tx_errors++; 937 return NETDEV_TX_OK; 938 } 939 940 #if IS_ENABLED(CONFIG_IPV6) 941 static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, 942 struct ip_tunnel_info *info) 943 { 944 struct geneve_dev *geneve = netdev_priv(dev); 945 struct geneve_sock *gs6 = geneve->sock6; 946 struct dst_entry *dst = NULL; 947 const struct iphdr *iip; /* interior IP header */ 948 int err = -EINVAL; 949 struct flowi6 fl6; 950 __u8 prio, ttl; 951 __be16 sport; 952 bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); 953 u32 flags = geneve->flags; 954 955 if (geneve->collect_md) { 956 if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { 957 netdev_dbg(dev, "no tunnel metadata\n"); 958 goto tx_error; 959 } 960 } 961 962 dst = geneve_get_v6_dst(skb, dev, &fl6, info); 963 if (IS_ERR(dst)) { 964 err = PTR_ERR(dst); 965 goto tx_error; 966 } 967 968 sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); 969 skb_reset_mac_header(skb); 970 971 iip = ip_hdr(skb); 972 973 if (info) { 974 const struct ip_tunnel_key *key = &info->key; 975 u8 *opts = NULL; 976 u8 vni[3]; 977 978 tunnel_id_to_vni(key->tun_id, vni); 979 if (key->tun_flags & TUNNEL_GENEVE_OPT) 980 opts = ip_tunnel_info_opts(info); 981 982 if (key->tun_flags & TUNNEL_CSUM) 983 flags &= ~GENEVE_F_UDP_ZERO_CSUM6_TX; 984 else 985 flags |= GENEVE_F_UDP_ZERO_CSUM6_TX; 986 987 err = geneve6_build_skb(dst, skb, key->tun_flags, vni, 988 info->options_len, opts, 989 flags, xnet); 990 if (unlikely(err)) 991 goto err; 992 993 prio = ip_tunnel_ecn_encap(key->tos, iip, skb); 994 ttl = key->ttl; 995 } else { 996 err = geneve6_build_skb(dst, skb, 0, geneve->vni, 997 0, NULL, flags, xnet); 998 if (unlikely(err)) 999 goto err; 1000 1001 prio = ip_tunnel_ecn_encap(fl6.flowi6_tos, iip, skb); 1002 ttl = geneve->ttl; 1003 if (!ttl && ipv6_addr_is_multicast(&fl6.daddr)) 1004 ttl = 1; 1005 ttl = ttl ? : ip6_dst_hoplimit(dst); 1006 } 1007 udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, 1008 &fl6.saddr, &fl6.daddr, prio, ttl, 1009 sport, geneve->dst_port, 1010 !!(flags & GENEVE_F_UDP_ZERO_CSUM6_TX)); 1011 return NETDEV_TX_OK; 1012 1013 tx_error: 1014 dev_kfree_skb(skb); 1015 err: 1016 if (err == -ELOOP) 1017 dev->stats.collisions++; 1018 else if (err == -ENETUNREACH) 1019 dev->stats.tx_carrier_errors++; 1020 else 1021 dev->stats.tx_errors++; 1022 return NETDEV_TX_OK; 1023 } 1024 #endif 1025 1026 static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) 1027 { 1028 struct geneve_dev *geneve = netdev_priv(dev); 1029 struct ip_tunnel_info *info = NULL; 1030 1031 if (geneve->collect_md) 1032 info = skb_tunnel_info(skb); 1033 1034 #if IS_ENABLED(CONFIG_IPV6) 1035 if ((info && ip_tunnel_info_af(info) == AF_INET6) || 1036 (!info && geneve->remote.sa.sa_family == AF_INET6)) 1037 return geneve6_xmit_skb(skb, dev, info); 1038 #endif 1039 return geneve_xmit_skb(skb, dev, info); 1040 } 1041 1042 static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict) 1043 { 1044 /* The max_mtu calculation does not take account of GENEVE 1045 * options, to avoid excluding potentially valid 1046 * configurations. 1047 */ 1048 int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - sizeof(struct iphdr) 1049 - dev->hard_header_len; 1050 1051 if (new_mtu < 68) 1052 return -EINVAL; 1053 1054 if (new_mtu > max_mtu) { 1055 if (strict) 1056 return -EINVAL; 1057 1058 new_mtu = max_mtu; 1059 } 1060 1061 dev->mtu = new_mtu; 1062 return 0; 1063 } 1064 1065 static int geneve_change_mtu(struct net_device *dev, int new_mtu) 1066 { 1067 return __geneve_change_mtu(dev, new_mtu, true); 1068 } 1069 1070 static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) 1071 { 1072 struct ip_tunnel_info *info = skb_tunnel_info(skb); 1073 struct geneve_dev *geneve = netdev_priv(dev); 1074 struct rtable *rt; 1075 struct flowi4 fl4; 1076 #if IS_ENABLED(CONFIG_IPV6) 1077 struct dst_entry *dst; 1078 struct flowi6 fl6; 1079 #endif 1080 1081 if (ip_tunnel_info_af(info) == AF_INET) { 1082 rt = geneve_get_v4_rt(skb, dev, &fl4, info); 1083 if (IS_ERR(rt)) 1084 return PTR_ERR(rt); 1085 1086 ip_rt_put(rt); 1087 info->key.u.ipv4.src = fl4.saddr; 1088 #if IS_ENABLED(CONFIG_IPV6) 1089 } else if (ip_tunnel_info_af(info) == AF_INET6) { 1090 dst = geneve_get_v6_dst(skb, dev, &fl6, info); 1091 if (IS_ERR(dst)) 1092 return PTR_ERR(dst); 1093 1094 dst_release(dst); 1095 info->key.u.ipv6.src = fl6.saddr; 1096 #endif 1097 } else { 1098 return -EINVAL; 1099 } 1100 1101 info->key.tp_src = udp_flow_src_port(geneve->net, skb, 1102 1, USHRT_MAX, true); 1103 info->key.tp_dst = geneve->dst_port; 1104 return 0; 1105 } 1106 1107 static const struct net_device_ops geneve_netdev_ops = { 1108 .ndo_init = geneve_init, 1109 .ndo_uninit = geneve_uninit, 1110 .ndo_open = geneve_open, 1111 .ndo_stop = geneve_stop, 1112 .ndo_start_xmit = geneve_xmit, 1113 .ndo_get_stats64 = ip_tunnel_get_stats64, 1114 .ndo_change_mtu = geneve_change_mtu, 1115 .ndo_validate_addr = eth_validate_addr, 1116 .ndo_set_mac_address = eth_mac_addr, 1117 .ndo_fill_metadata_dst = geneve_fill_metadata_dst, 1118 }; 1119 1120 static void geneve_get_drvinfo(struct net_device *dev, 1121 struct ethtool_drvinfo *drvinfo) 1122 { 1123 strlcpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version)); 1124 strlcpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver)); 1125 } 1126 1127 static const struct ethtool_ops geneve_ethtool_ops = { 1128 .get_drvinfo = geneve_get_drvinfo, 1129 .get_link = ethtool_op_get_link, 1130 }; 1131 1132 /* Info for udev, that this is a virtual tunnel endpoint */ 1133 static struct device_type geneve_type = { 1134 .name = "geneve", 1135 }; 1136 1137 /* Calls the ndo_add_geneve_port of the caller in order to 1138 * supply the listening GENEVE udp ports. Callers are expected 1139 * to implement the ndo_add_geneve_port. 1140 */ 1141 void geneve_get_rx_port(struct net_device *dev) 1142 { 1143 struct net *net = dev_net(dev); 1144 struct geneve_net *gn = net_generic(net, geneve_net_id); 1145 struct geneve_sock *gs; 1146 sa_family_t sa_family; 1147 struct sock *sk; 1148 __be16 port; 1149 1150 rcu_read_lock(); 1151 list_for_each_entry_rcu(gs, &gn->sock_list, list) { 1152 sk = gs->sock->sk; 1153 sa_family = sk->sk_family; 1154 port = inet_sk(sk)->inet_sport; 1155 dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, port); 1156 } 1157 rcu_read_unlock(); 1158 } 1159 EXPORT_SYMBOL_GPL(geneve_get_rx_port); 1160 1161 /* Initialize the device structure. */ 1162 static void geneve_setup(struct net_device *dev) 1163 { 1164 ether_setup(dev); 1165 1166 dev->netdev_ops = &geneve_netdev_ops; 1167 dev->ethtool_ops = &geneve_ethtool_ops; 1168 dev->destructor = free_netdev; 1169 1170 SET_NETDEV_DEVTYPE(dev, &geneve_type); 1171 1172 dev->features |= NETIF_F_LLTX; 1173 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; 1174 dev->features |= NETIF_F_RXCSUM; 1175 dev->features |= NETIF_F_GSO_SOFTWARE; 1176 1177 dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; 1178 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 1179 1180 netif_keep_dst(dev); 1181 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1182 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; 1183 eth_hw_addr_random(dev); 1184 } 1185 1186 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { 1187 [IFLA_GENEVE_ID] = { .type = NLA_U32 }, 1188 [IFLA_GENEVE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, 1189 [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) }, 1190 [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, 1191 [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, 1192 [IFLA_GENEVE_PORT] = { .type = NLA_U16 }, 1193 [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG }, 1194 [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 }, 1195 [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 }, 1196 [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, 1197 }; 1198 1199 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[]) 1200 { 1201 if (tb[IFLA_ADDRESS]) { 1202 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1203 return -EINVAL; 1204 1205 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1206 return -EADDRNOTAVAIL; 1207 } 1208 1209 if (!data) 1210 return -EINVAL; 1211 1212 if (data[IFLA_GENEVE_ID]) { 1213 __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 1214 1215 if (vni >= GENEVE_VID_MASK) 1216 return -ERANGE; 1217 } 1218 1219 return 0; 1220 } 1221 1222 static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, 1223 __be16 dst_port, 1224 union geneve_addr *remote, 1225 u8 vni[], 1226 bool *tun_on_same_port, 1227 bool *tun_collect_md) 1228 { 1229 struct geneve_dev *geneve, *t; 1230 1231 *tun_on_same_port = false; 1232 *tun_collect_md = false; 1233 t = NULL; 1234 list_for_each_entry(geneve, &gn->geneve_list, next) { 1235 if (geneve->dst_port == dst_port) { 1236 *tun_collect_md = geneve->collect_md; 1237 *tun_on_same_port = true; 1238 } 1239 if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) && 1240 !memcmp(remote, &geneve->remote, sizeof(geneve->remote)) && 1241 dst_port == geneve->dst_port) 1242 t = geneve; 1243 } 1244 return t; 1245 } 1246 1247 static int geneve_configure(struct net *net, struct net_device *dev, 1248 union geneve_addr *remote, 1249 __u32 vni, __u8 ttl, __u8 tos, __be16 dst_port, 1250 bool metadata, u32 flags) 1251 { 1252 struct geneve_net *gn = net_generic(net, geneve_net_id); 1253 struct geneve_dev *t, *geneve = netdev_priv(dev); 1254 bool tun_collect_md, tun_on_same_port; 1255 int err, encap_len; 1256 1257 if (!remote) 1258 return -EINVAL; 1259 if (metadata && 1260 (remote->sa.sa_family != AF_UNSPEC || vni || tos || ttl)) 1261 return -EINVAL; 1262 1263 geneve->net = net; 1264 geneve->dev = dev; 1265 1266 geneve->vni[0] = (vni & 0x00ff0000) >> 16; 1267 geneve->vni[1] = (vni & 0x0000ff00) >> 8; 1268 geneve->vni[2] = vni & 0x000000ff; 1269 1270 if ((remote->sa.sa_family == AF_INET && 1271 IN_MULTICAST(ntohl(remote->sin.sin_addr.s_addr))) || 1272 (remote->sa.sa_family == AF_INET6 && 1273 ipv6_addr_is_multicast(&remote->sin6.sin6_addr))) 1274 return -EINVAL; 1275 geneve->remote = *remote; 1276 1277 geneve->ttl = ttl; 1278 geneve->tos = tos; 1279 geneve->dst_port = dst_port; 1280 geneve->collect_md = metadata; 1281 geneve->flags = flags; 1282 1283 t = geneve_find_dev(gn, dst_port, remote, geneve->vni, 1284 &tun_on_same_port, &tun_collect_md); 1285 if (t) 1286 return -EBUSY; 1287 1288 /* make enough headroom for basic scenario */ 1289 encap_len = GENEVE_BASE_HLEN + ETH_HLEN; 1290 if (remote->sa.sa_family == AF_INET) 1291 encap_len += sizeof(struct iphdr); 1292 else 1293 encap_len += sizeof(struct ipv6hdr); 1294 dev->needed_headroom = encap_len + ETH_HLEN; 1295 1296 if (metadata) { 1297 if (tun_on_same_port) 1298 return -EPERM; 1299 } else { 1300 if (tun_collect_md) 1301 return -EPERM; 1302 } 1303 1304 err = register_netdevice(dev); 1305 if (err) 1306 return err; 1307 1308 list_add(&geneve->next, &gn->geneve_list); 1309 return 0; 1310 } 1311 1312 static int geneve_newlink(struct net *net, struct net_device *dev, 1313 struct nlattr *tb[], struct nlattr *data[]) 1314 { 1315 __be16 dst_port = htons(GENEVE_UDP_PORT); 1316 __u8 ttl = 0, tos = 0; 1317 bool metadata = false; 1318 union geneve_addr remote = geneve_remote_unspec; 1319 __u32 vni = 0; 1320 u32 flags = 0; 1321 1322 if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) 1323 return -EINVAL; 1324 1325 if (data[IFLA_GENEVE_REMOTE]) { 1326 remote.sa.sa_family = AF_INET; 1327 remote.sin.sin_addr.s_addr = 1328 nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); 1329 } 1330 1331 if (data[IFLA_GENEVE_REMOTE6]) { 1332 if (!IS_ENABLED(CONFIG_IPV6)) 1333 return -EPFNOSUPPORT; 1334 1335 remote.sa.sa_family = AF_INET6; 1336 remote.sin6.sin6_addr = 1337 nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]); 1338 1339 if (ipv6_addr_type(&remote.sin6.sin6_addr) & 1340 IPV6_ADDR_LINKLOCAL) { 1341 netdev_dbg(dev, "link-local remote is unsupported\n"); 1342 return -EINVAL; 1343 } 1344 } 1345 1346 if (data[IFLA_GENEVE_ID]) 1347 vni = nla_get_u32(data[IFLA_GENEVE_ID]); 1348 1349 if (data[IFLA_GENEVE_TTL]) 1350 ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); 1351 1352 if (data[IFLA_GENEVE_TOS]) 1353 tos = nla_get_u8(data[IFLA_GENEVE_TOS]); 1354 1355 if (data[IFLA_GENEVE_PORT]) 1356 dst_port = nla_get_be16(data[IFLA_GENEVE_PORT]); 1357 1358 if (data[IFLA_GENEVE_COLLECT_METADATA]) 1359 metadata = true; 1360 1361 if (data[IFLA_GENEVE_UDP_CSUM] && 1362 nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) 1363 flags |= GENEVE_F_UDP_CSUM; 1364 1365 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] && 1366 nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) 1367 flags |= GENEVE_F_UDP_ZERO_CSUM6_TX; 1368 1369 if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX] && 1370 nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])) 1371 flags |= GENEVE_F_UDP_ZERO_CSUM6_RX; 1372 1373 return geneve_configure(net, dev, &remote, vni, ttl, tos, dst_port, 1374 metadata, flags); 1375 } 1376 1377 static void geneve_dellink(struct net_device *dev, struct list_head *head) 1378 { 1379 struct geneve_dev *geneve = netdev_priv(dev); 1380 1381 list_del(&geneve->next); 1382 unregister_netdevice_queue(dev, head); 1383 } 1384 1385 static size_t geneve_get_size(const struct net_device *dev) 1386 { 1387 return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */ 1388 nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */ 1389 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */ 1390 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */ 1391 nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */ 1392 nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */ 1393 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */ 1394 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */ 1395 nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */ 1396 0; 1397 } 1398 1399 static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) 1400 { 1401 struct geneve_dev *geneve = netdev_priv(dev); 1402 __u32 vni; 1403 1404 vni = (geneve->vni[0] << 16) | (geneve->vni[1] << 8) | geneve->vni[2]; 1405 if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) 1406 goto nla_put_failure; 1407 1408 if (geneve->remote.sa.sa_family == AF_INET) { 1409 if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, 1410 geneve->remote.sin.sin_addr.s_addr)) 1411 goto nla_put_failure; 1412 #if IS_ENABLED(CONFIG_IPV6) 1413 } else { 1414 if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6, 1415 &geneve->remote.sin6.sin6_addr)) 1416 goto nla_put_failure; 1417 #endif 1418 } 1419 1420 if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl) || 1421 nla_put_u8(skb, IFLA_GENEVE_TOS, geneve->tos)) 1422 goto nla_put_failure; 1423 1424 if (nla_put_be16(skb, IFLA_GENEVE_PORT, geneve->dst_port)) 1425 goto nla_put_failure; 1426 1427 if (geneve->collect_md) { 1428 if (nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA)) 1429 goto nla_put_failure; 1430 } 1431 1432 if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, 1433 !!(geneve->flags & GENEVE_F_UDP_CSUM)) || 1434 nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, 1435 !!(geneve->flags & GENEVE_F_UDP_ZERO_CSUM6_TX)) || 1436 nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, 1437 !!(geneve->flags & GENEVE_F_UDP_ZERO_CSUM6_RX))) 1438 goto nla_put_failure; 1439 1440 return 0; 1441 1442 nla_put_failure: 1443 return -EMSGSIZE; 1444 } 1445 1446 static struct rtnl_link_ops geneve_link_ops __read_mostly = { 1447 .kind = "geneve", 1448 .maxtype = IFLA_GENEVE_MAX, 1449 .policy = geneve_policy, 1450 .priv_size = sizeof(struct geneve_dev), 1451 .setup = geneve_setup, 1452 .validate = geneve_validate, 1453 .newlink = geneve_newlink, 1454 .dellink = geneve_dellink, 1455 .get_size = geneve_get_size, 1456 .fill_info = geneve_fill_info, 1457 }; 1458 1459 struct net_device *geneve_dev_create_fb(struct net *net, const char *name, 1460 u8 name_assign_type, u16 dst_port) 1461 { 1462 struct nlattr *tb[IFLA_MAX + 1]; 1463 struct net_device *dev; 1464 int err; 1465 1466 memset(tb, 0, sizeof(tb)); 1467 dev = rtnl_create_link(net, name, name_assign_type, 1468 &geneve_link_ops, tb); 1469 if (IS_ERR(dev)) 1470 return dev; 1471 1472 err = geneve_configure(net, dev, &geneve_remote_unspec, 1473 0, 0, 0, htons(dst_port), true, 1474 GENEVE_F_UDP_ZERO_CSUM6_RX); 1475 if (err) 1476 goto err; 1477 1478 /* openvswitch users expect packet sizes to be unrestricted, 1479 * so set the largest MTU we can. 1480 */ 1481 err = __geneve_change_mtu(dev, IP_MAX_MTU, false); 1482 if (err) 1483 goto err; 1484 1485 return dev; 1486 1487 err: 1488 free_netdev(dev); 1489 return ERR_PTR(err); 1490 } 1491 EXPORT_SYMBOL_GPL(geneve_dev_create_fb); 1492 1493 static __net_init int geneve_init_net(struct net *net) 1494 { 1495 struct geneve_net *gn = net_generic(net, geneve_net_id); 1496 1497 INIT_LIST_HEAD(&gn->geneve_list); 1498 INIT_LIST_HEAD(&gn->sock_list); 1499 return 0; 1500 } 1501 1502 static void __net_exit geneve_exit_net(struct net *net) 1503 { 1504 struct geneve_net *gn = net_generic(net, geneve_net_id); 1505 struct geneve_dev *geneve, *next; 1506 struct net_device *dev, *aux; 1507 LIST_HEAD(list); 1508 1509 rtnl_lock(); 1510 1511 /* gather any geneve devices that were moved into this ns */ 1512 for_each_netdev_safe(net, dev, aux) 1513 if (dev->rtnl_link_ops == &geneve_link_ops) 1514 unregister_netdevice_queue(dev, &list); 1515 1516 /* now gather any other geneve devices that were created in this ns */ 1517 list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) { 1518 /* If geneve->dev is in the same netns, it was already added 1519 * to the list by the previous loop. 1520 */ 1521 if (!net_eq(dev_net(geneve->dev), net)) 1522 unregister_netdevice_queue(geneve->dev, &list); 1523 } 1524 1525 /* unregister the devices gathered above */ 1526 unregister_netdevice_many(&list); 1527 rtnl_unlock(); 1528 } 1529 1530 static struct pernet_operations geneve_net_ops = { 1531 .init = geneve_init_net, 1532 .exit = geneve_exit_net, 1533 .id = &geneve_net_id, 1534 .size = sizeof(struct geneve_net), 1535 }; 1536 1537 static int __init geneve_init_module(void) 1538 { 1539 int rc; 1540 1541 rc = register_pernet_subsys(&geneve_net_ops); 1542 if (rc) 1543 goto out1; 1544 1545 rc = rtnl_link_register(&geneve_link_ops); 1546 if (rc) 1547 goto out2; 1548 1549 return 0; 1550 out2: 1551 unregister_pernet_subsys(&geneve_net_ops); 1552 out1: 1553 return rc; 1554 } 1555 late_initcall(geneve_init_module); 1556 1557 static void __exit geneve_cleanup_module(void) 1558 { 1559 rtnl_link_unregister(&geneve_link_ops); 1560 unregister_pernet_subsys(&geneve_net_ops); 1561 } 1562 module_exit(geneve_cleanup_module); 1563 1564 MODULE_LICENSE("GPL"); 1565 MODULE_VERSION(GENEVE_NETDEV_VER); 1566 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>"); 1567 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic"); 1568 MODULE_ALIAS_RTNL_LINK("geneve"); 1569