1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Vxlan vni filter for collect metadata mode 4 * 5 * Authors: Roopa Prabhu <roopa@nvidia.com> 6 * 7 */ 8 9 #include <linux/kernel.h> 10 #include <linux/slab.h> 11 #include <linux/etherdevice.h> 12 #include <linux/rhashtable.h> 13 #include <net/rtnetlink.h> 14 #include <net/net_namespace.h> 15 #include <net/sock.h> 16 #include <net/vxlan.h> 17 18 #include "vxlan_private.h" 19 20 static inline int vxlan_vni_cmp(struct rhashtable_compare_arg *arg, 21 const void *ptr) 22 { 23 const struct vxlan_vni_node *vnode = ptr; 24 __be32 vni = *(__be32 *)arg->key; 25 26 return vnode->vni != vni; 27 } 28 29 const struct rhashtable_params vxlan_vni_rht_params = { 30 .head_offset = offsetof(struct vxlan_vni_node, vnode), 31 .key_offset = offsetof(struct vxlan_vni_node, vni), 32 .key_len = sizeof(__be32), 33 .nelem_hint = 3, 34 .max_size = VXLAN_N_VID, 35 .obj_cmpfn = vxlan_vni_cmp, 36 .automatic_shrinking = true, 37 }; 38 39 static void vxlan_vs_add_del_vninode(struct vxlan_dev *vxlan, 40 struct vxlan_vni_node *v, 41 bool del) 42 { 43 struct vxlan_dev_node *node; 44 struct vxlan_sock *vs; 45 46 ASSERT_RTNL(); 47 48 if (del) { 49 if (!hlist_unhashed(&v->hlist4.hlist)) 50 hlist_del_init_rcu(&v->hlist4.hlist); 51 #if IS_ENABLED(CONFIG_IPV6) 52 if (!hlist_unhashed(&v->hlist6.hlist)) 53 hlist_del_init_rcu(&v->hlist6.hlist); 54 #endif 55 return; 56 } 57 58 #if IS_ENABLED(CONFIG_IPV6) 59 vs = rtnl_dereference(vxlan->vn6_sock); 60 if (vs && v) { 61 node = &v->hlist6; 62 hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); 63 } 64 #endif 65 vs = rtnl_dereference(vxlan->vn4_sock); 66 if (vs && v) { 67 node = &v->hlist4; 68 hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); 69 } 70 } 71 72 void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan, 73 struct vxlan_sock *vs, 74 bool ipv6) 75 { 76 struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); 77 struct vxlan_vni_node *v, *tmp; 78 struct vxlan_dev_node *node; 79 80 ASSERT_RTNL(); 81 82 if (!vg) 83 return; 84 85 list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { 86 #if IS_ENABLED(CONFIG_IPV6) 87 if (ipv6) 88 node = &v->hlist6; 89 else 90 #endif 91 node = &v->hlist4; 92 node->vxlan = vxlan; 93 hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni)); 94 } 95 } 96 97 void vxlan_vs_del_vnigrp(struct vxlan_dev *vxlan) 98 { 99 struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp); 100 struct vxlan_vni_node *v, *tmp; 101 102 ASSERT_RTNL(); 103 104 if (!vg) 105 return; 106 107 list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { 108 hlist_del_init_rcu(&v->hlist4.hlist); 109 #if IS_ENABLED(CONFIG_IPV6) 110 hlist_del_init_rcu(&v->hlist6.hlist); 111 #endif 112 } 113 } 114 115 static void vxlan_vnifilter_stats_get(const struct vxlan_vni_node *vninode, 116 struct vxlan_vni_stats *dest) 117 { 118 int i; 119 120 memset(dest, 0, sizeof(*dest)); 121 for_each_possible_cpu(i) { 122 struct vxlan_vni_stats_pcpu *pstats; 123 struct vxlan_vni_stats temp; 124 unsigned int start; 125 126 pstats = per_cpu_ptr(vninode->stats, i); 127 do { 128 start = u64_stats_fetch_begin(&pstats->syncp); 129 u64_stats_copy(&temp, &pstats->stats, sizeof(temp)); 130 } while (u64_stats_fetch_retry(&pstats->syncp, start)); 131 132 dest->rx_packets += temp.rx_packets; 133 dest->rx_bytes += temp.rx_bytes; 134 dest->rx_drops += temp.rx_drops; 135 dest->rx_errors += temp.rx_errors; 136 dest->tx_packets += temp.tx_packets; 137 dest->tx_bytes += temp.tx_bytes; 138 dest->tx_drops += temp.tx_drops; 139 dest->tx_errors += temp.tx_errors; 140 } 141 } 142 143 static void vxlan_vnifilter_stats_add(struct vxlan_vni_node *vninode, 144 int type, unsigned int len) 145 { 146 struct vxlan_vni_stats_pcpu *pstats = this_cpu_ptr(vninode->stats); 147 148 u64_stats_update_begin(&pstats->syncp); 149 switch (type) { 150 case VXLAN_VNI_STATS_RX: 151 pstats->stats.rx_bytes += len; 152 pstats->stats.rx_packets++; 153 break; 154 case VXLAN_VNI_STATS_RX_DROPS: 155 pstats->stats.rx_drops++; 156 break; 157 case VXLAN_VNI_STATS_RX_ERRORS: 158 pstats->stats.rx_errors++; 159 break; 160 case VXLAN_VNI_STATS_TX: 161 pstats->stats.tx_bytes += len; 162 pstats->stats.tx_packets++; 163 break; 164 case VXLAN_VNI_STATS_TX_DROPS: 165 pstats->stats.tx_drops++; 166 break; 167 case VXLAN_VNI_STATS_TX_ERRORS: 168 pstats->stats.tx_errors++; 169 break; 170 } 171 u64_stats_update_end(&pstats->syncp); 172 } 173 174 void vxlan_vnifilter_count(struct vxlan_dev *vxlan, __be32 vni, 175 struct vxlan_vni_node *vninode, 176 int type, unsigned int len) 177 { 178 struct vxlan_vni_node *vnode; 179 180 if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) 181 return; 182 183 if (vninode) { 184 vnode = vninode; 185 } else { 186 vnode = vxlan_vnifilter_lookup(vxlan, vni); 187 if (!vnode) 188 return; 189 } 190 191 vxlan_vnifilter_stats_add(vnode, type, len); 192 } 193 194 static u32 vnirange(struct vxlan_vni_node *vbegin, 195 struct vxlan_vni_node *vend) 196 { 197 return (be32_to_cpu(vend->vni) - be32_to_cpu(vbegin->vni)); 198 } 199 200 static size_t vxlan_vnifilter_entry_nlmsg_size(void) 201 { 202 return NLMSG_ALIGN(sizeof(struct tunnel_msg)) 203 + nla_total_size(0) /* VXLAN_VNIFILTER_ENTRY */ 204 + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_START */ 205 + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_END */ 206 + nla_total_size(sizeof(struct in6_addr));/* VXLAN_VNIFILTER_ENTRY_GROUP{6} */ 207 } 208 209 static int __vnifilter_entry_fill_stats(struct sk_buff *skb, 210 const struct vxlan_vni_node *vbegin) 211 { 212 struct vxlan_vni_stats vstats; 213 struct nlattr *vstats_attr; 214 215 vstats_attr = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY_STATS); 216 if (!vstats_attr) 217 goto out_stats_err; 218 219 vxlan_vnifilter_stats_get(vbegin, &vstats); 220 if (nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_BYTES, 221 vstats.rx_bytes, VNIFILTER_ENTRY_STATS_PAD) || 222 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_PKTS, 223 vstats.rx_packets, VNIFILTER_ENTRY_STATS_PAD) || 224 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_DROPS, 225 vstats.rx_drops, VNIFILTER_ENTRY_STATS_PAD) || 226 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_ERRORS, 227 vstats.rx_errors, VNIFILTER_ENTRY_STATS_PAD) || 228 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_BYTES, 229 vstats.tx_bytes, VNIFILTER_ENTRY_STATS_PAD) || 230 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_PKTS, 231 vstats.tx_packets, VNIFILTER_ENTRY_STATS_PAD) || 232 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_DROPS, 233 vstats.tx_drops, VNIFILTER_ENTRY_STATS_PAD) || 234 nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_ERRORS, 235 vstats.tx_errors, VNIFILTER_ENTRY_STATS_PAD)) 236 goto out_stats_err; 237 238 nla_nest_end(skb, vstats_attr); 239 240 return 0; 241 242 out_stats_err: 243 nla_nest_cancel(skb, vstats_attr); 244 return -EMSGSIZE; 245 } 246 247 static bool vxlan_fill_vni_filter_entry(struct sk_buff *skb, 248 struct vxlan_vni_node *vbegin, 249 struct vxlan_vni_node *vend, 250 bool fill_stats) 251 { 252 struct nlattr *ventry; 253 u32 vs = be32_to_cpu(vbegin->vni); 254 u32 ve = 0; 255 256 if (vbegin != vend) 257 ve = be32_to_cpu(vend->vni); 258 259 ventry = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY); 260 if (!ventry) 261 return false; 262 263 if (nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_START, vs)) 264 goto out_err; 265 266 if (ve && nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_END, ve)) 267 goto out_err; 268 269 if (!vxlan_addr_any(&vbegin->remote_ip)) { 270 if (vbegin->remote_ip.sa.sa_family == AF_INET) { 271 if (nla_put_in_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP, 272 vbegin->remote_ip.sin.sin_addr.s_addr)) 273 goto out_err; 274 #if IS_ENABLED(CONFIG_IPV6) 275 } else { 276 if (nla_put_in6_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP6, 277 &vbegin->remote_ip.sin6.sin6_addr)) 278 goto out_err; 279 #endif 280 } 281 } 282 283 if (fill_stats && __vnifilter_entry_fill_stats(skb, vbegin)) 284 goto out_err; 285 286 nla_nest_end(skb, ventry); 287 288 return true; 289 290 out_err: 291 nla_nest_cancel(skb, ventry); 292 293 return false; 294 } 295 296 static void vxlan_vnifilter_notify(const struct vxlan_dev *vxlan, 297 struct vxlan_vni_node *vninode, int cmd) 298 { 299 struct tunnel_msg *tmsg; 300 struct sk_buff *skb; 301 struct nlmsghdr *nlh; 302 struct net *net = dev_net(vxlan->dev); 303 int err = -ENOBUFS; 304 305 skb = nlmsg_new(vxlan_vnifilter_entry_nlmsg_size(), GFP_KERNEL); 306 if (!skb) 307 goto out_err; 308 309 err = -EMSGSIZE; 310 nlh = nlmsg_put(skb, 0, 0, cmd, sizeof(*tmsg), 0); 311 if (!nlh) 312 goto out_err; 313 tmsg = nlmsg_data(nlh); 314 memset(tmsg, 0, sizeof(*tmsg)); 315 tmsg->family = AF_BRIDGE; 316 tmsg->ifindex = vxlan->dev->ifindex; 317 318 if (!vxlan_fill_vni_filter_entry(skb, vninode, vninode, false)) 319 goto out_err; 320 321 nlmsg_end(skb, nlh); 322 rtnl_notify(skb, net, 0, RTNLGRP_TUNNEL, NULL, GFP_KERNEL); 323 324 return; 325 326 out_err: 327 rtnl_set_sk_err(net, RTNLGRP_TUNNEL, err); 328 329 kfree_skb(skb); 330 } 331 332 static int vxlan_vnifilter_dump_dev(const struct net_device *dev, 333 struct sk_buff *skb, 334 struct netlink_callback *cb) 335 { 336 struct vxlan_vni_node *tmp, *v, *vbegin = NULL, *vend = NULL; 337 struct vxlan_dev *vxlan = netdev_priv(dev); 338 struct tunnel_msg *new_tmsg, *tmsg; 339 int idx = 0, s_idx = cb->args[1]; 340 struct vxlan_vni_group *vg; 341 struct nlmsghdr *nlh; 342 bool dump_stats; 343 int err = 0; 344 345 if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) 346 return -EINVAL; 347 348 /* RCU needed because of the vni locking rules (rcu || rtnl) */ 349 vg = rcu_dereference(vxlan->vnigrp); 350 if (!vg || !vg->num_vnis) 351 return 0; 352 353 tmsg = nlmsg_data(cb->nlh); 354 dump_stats = !!(tmsg->flags & TUNNEL_MSG_FLAG_STATS); 355 356 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 357 RTM_NEWTUNNEL, sizeof(*new_tmsg), NLM_F_MULTI); 358 if (!nlh) 359 return -EMSGSIZE; 360 new_tmsg = nlmsg_data(nlh); 361 memset(new_tmsg, 0, sizeof(*new_tmsg)); 362 new_tmsg->family = PF_BRIDGE; 363 new_tmsg->ifindex = dev->ifindex; 364 365 list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { 366 if (idx < s_idx) { 367 idx++; 368 continue; 369 } 370 if (!vbegin) { 371 vbegin = v; 372 vend = v; 373 continue; 374 } 375 if (!dump_stats && vnirange(vend, v) == 1 && 376 vxlan_addr_equal(&v->remote_ip, &vend->remote_ip)) { 377 goto update_end; 378 } else { 379 if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend, 380 dump_stats)) { 381 err = -EMSGSIZE; 382 break; 383 } 384 idx += vnirange(vbegin, vend) + 1; 385 vbegin = v; 386 } 387 update_end: 388 vend = v; 389 } 390 391 if (!err && vbegin) { 392 if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend, dump_stats)) 393 err = -EMSGSIZE; 394 } 395 396 cb->args[1] = err ? idx : 0; 397 398 nlmsg_end(skb, nlh); 399 400 return err; 401 } 402 403 static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb) 404 { 405 int idx = 0, err = 0, s_idx = cb->args[0]; 406 struct net *net = sock_net(skb->sk); 407 struct tunnel_msg *tmsg; 408 struct net_device *dev; 409 410 tmsg = nlmsg_payload(cb->nlh, sizeof(*tmsg)); 411 if (!tmsg) { 412 NL_SET_ERR_MSG(cb->extack, "Invalid msg length"); 413 return -EINVAL; 414 } 415 416 if (tmsg->flags & ~TUNNEL_MSG_VALID_USER_FLAGS) { 417 NL_SET_ERR_MSG(cb->extack, "Invalid tunnelmsg flags in ancillary header"); 418 return -EINVAL; 419 } 420 421 rcu_read_lock(); 422 if (tmsg->ifindex) { 423 dev = dev_get_by_index_rcu(net, tmsg->ifindex); 424 if (!dev) { 425 err = -ENODEV; 426 goto out_err; 427 } 428 if (!netif_is_vxlan(dev)) { 429 NL_SET_ERR_MSG(cb->extack, 430 "The device is not a vxlan device"); 431 err = -EINVAL; 432 goto out_err; 433 } 434 err = vxlan_vnifilter_dump_dev(dev, skb, cb); 435 /* if the dump completed without an error we return 0 here */ 436 if (err != -EMSGSIZE) 437 goto out_err; 438 } else { 439 for_each_netdev_rcu(net, dev) { 440 if (!netif_is_vxlan(dev)) 441 continue; 442 if (idx < s_idx) 443 goto skip; 444 err = vxlan_vnifilter_dump_dev(dev, skb, cb); 445 if (err == -EMSGSIZE) 446 break; 447 skip: 448 idx++; 449 } 450 } 451 cb->args[0] = idx; 452 rcu_read_unlock(); 453 454 return skb->len; 455 456 out_err: 457 rcu_read_unlock(); 458 459 return err; 460 } 461 462 static const struct nla_policy vni_filter_entry_policy[VXLAN_VNIFILTER_ENTRY_MAX + 1] = { 463 [VXLAN_VNIFILTER_ENTRY_START] = { .type = NLA_U32 }, 464 [VXLAN_VNIFILTER_ENTRY_END] = { .type = NLA_U32 }, 465 [VXLAN_VNIFILTER_ENTRY_GROUP] = { .type = NLA_BINARY, 466 .len = sizeof_field(struct iphdr, daddr) }, 467 [VXLAN_VNIFILTER_ENTRY_GROUP6] = { .type = NLA_BINARY, 468 .len = sizeof(struct in6_addr) }, 469 }; 470 471 static const struct nla_policy vni_filter_policy[VXLAN_VNIFILTER_MAX + 1] = { 472 [VXLAN_VNIFILTER_ENTRY] = { .type = NLA_NESTED }, 473 }; 474 475 static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni, 476 union vxlan_addr *old_remote_ip, 477 union vxlan_addr *remote_ip, 478 struct netlink_ext_ack *extack) 479 { 480 struct vxlan_rdst *dst = &vxlan->default_dst; 481 int err = 0; 482 483 spin_lock_bh(&vxlan->hash_lock); 484 if (remote_ip && !vxlan_addr_any(remote_ip)) { 485 err = vxlan_fdb_update(vxlan, all_zeros_mac, 486 remote_ip, 487 NUD_REACHABLE | NUD_PERMANENT, 488 NLM_F_APPEND | NLM_F_CREATE, 489 vxlan->cfg.dst_port, 490 vni, 491 vni, 492 dst->remote_ifindex, 493 NTF_SELF, 0, true, extack); 494 if (err) { 495 spin_unlock_bh(&vxlan->hash_lock); 496 return err; 497 } 498 } 499 500 if (old_remote_ip && !vxlan_addr_any(old_remote_ip)) { 501 __vxlan_fdb_delete(vxlan, all_zeros_mac, 502 *old_remote_ip, 503 vxlan->cfg.dst_port, 504 vni, vni, 505 dst->remote_ifindex, 506 true); 507 } 508 spin_unlock_bh(&vxlan->hash_lock); 509 510 return err; 511 } 512 513 static int vxlan_vni_update_group(struct vxlan_dev *vxlan, 514 struct vxlan_vni_node *vninode, 515 union vxlan_addr *group, 516 bool create, bool *changed, 517 struct netlink_ext_ack *extack) 518 { 519 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); 520 struct vxlan_rdst *dst = &vxlan->default_dst; 521 union vxlan_addr *newrip = NULL, *oldrip = NULL; 522 union vxlan_addr old_remote_ip; 523 int ret = 0; 524 525 memcpy(&old_remote_ip, &vninode->remote_ip, sizeof(old_remote_ip)); 526 527 /* if per vni remote ip is not present use vxlan dev 528 * default dst remote ip for fdb entry 529 */ 530 if (group && !vxlan_addr_any(group)) { 531 newrip = group; 532 } else { 533 if (!vxlan_addr_any(&dst->remote_ip)) 534 newrip = &dst->remote_ip; 535 } 536 537 /* if old rip exists, and no newrip, 538 * explicitly delete old rip 539 */ 540 if (!newrip && !vxlan_addr_any(&old_remote_ip)) 541 oldrip = &old_remote_ip; 542 543 if (!newrip && !oldrip) 544 return 0; 545 546 if (!create && oldrip && newrip && vxlan_addr_equal(oldrip, newrip)) 547 return 0; 548 549 ret = vxlan_update_default_fdb_entry(vxlan, vninode->vni, 550 oldrip, newrip, 551 extack); 552 if (ret) 553 goto out; 554 555 if (group) 556 memcpy(&vninode->remote_ip, group, sizeof(vninode->remote_ip)); 557 558 if (vxlan->dev->flags & IFF_UP) { 559 if (vxlan_addr_multicast(&old_remote_ip) && 560 !vxlan_group_used(vn, vxlan, vninode->vni, 561 &old_remote_ip, 562 vxlan->default_dst.remote_ifindex)) { 563 ret = vxlan_igmp_leave(vxlan, &old_remote_ip, 564 0); 565 if (ret) 566 goto out; 567 } 568 569 if (vxlan_addr_multicast(&vninode->remote_ip)) { 570 ret = vxlan_igmp_join(vxlan, &vninode->remote_ip, 0); 571 if (ret == -EADDRINUSE) 572 ret = 0; 573 if (ret) 574 goto out; 575 } 576 } 577 578 *changed = true; 579 580 return 0; 581 out: 582 return ret; 583 } 584 585 int vxlan_vnilist_update_group(struct vxlan_dev *vxlan, 586 union vxlan_addr *old_remote_ip, 587 union vxlan_addr *new_remote_ip, 588 struct netlink_ext_ack *extack) 589 { 590 struct list_head *headp, *hpos; 591 struct vxlan_vni_group *vg; 592 struct vxlan_vni_node *vent; 593 int ret; 594 595 vg = rtnl_dereference(vxlan->vnigrp); 596 597 headp = &vg->vni_list; 598 list_for_each_prev(hpos, headp) { 599 vent = list_entry(hpos, struct vxlan_vni_node, vlist); 600 if (vxlan_addr_any(&vent->remote_ip)) { 601 ret = vxlan_update_default_fdb_entry(vxlan, vent->vni, 602 old_remote_ip, 603 new_remote_ip, 604 extack); 605 if (ret) 606 return ret; 607 } 608 } 609 610 return 0; 611 } 612 613 static void vxlan_vni_delete_group(struct vxlan_dev *vxlan, 614 struct vxlan_vni_node *vninode) 615 { 616 struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); 617 struct vxlan_rdst *dst = &vxlan->default_dst; 618 619 /* if per vni remote_ip not present, delete the 620 * default dst remote_ip previously added for this vni 621 */ 622 if (!vxlan_addr_any(&vninode->remote_ip) || 623 !vxlan_addr_any(&dst->remote_ip)) { 624 spin_lock_bh(&vxlan->hash_lock); 625 __vxlan_fdb_delete(vxlan, all_zeros_mac, 626 (vxlan_addr_any(&vninode->remote_ip) ? 627 dst->remote_ip : vninode->remote_ip), 628 vxlan->cfg.dst_port, 629 vninode->vni, vninode->vni, 630 dst->remote_ifindex, 631 true); 632 spin_unlock_bh(&vxlan->hash_lock); 633 } 634 635 if (vxlan->dev->flags & IFF_UP) { 636 if (vxlan_addr_multicast(&vninode->remote_ip) && 637 !vxlan_group_used(vn, vxlan, vninode->vni, 638 &vninode->remote_ip, 639 dst->remote_ifindex)) { 640 vxlan_igmp_leave(vxlan, &vninode->remote_ip, 0); 641 } 642 } 643 } 644 645 static int vxlan_vni_update(struct vxlan_dev *vxlan, 646 struct vxlan_vni_group *vg, 647 __be32 vni, union vxlan_addr *group, 648 bool *changed, 649 struct netlink_ext_ack *extack) 650 { 651 struct vxlan_vni_node *vninode; 652 int ret; 653 654 vninode = rhashtable_lookup_fast(&vg->vni_hash, &vni, 655 vxlan_vni_rht_params); 656 if (!vninode) 657 return 0; 658 659 ret = vxlan_vni_update_group(vxlan, vninode, group, false, changed, 660 extack); 661 if (ret) 662 return ret; 663 664 if (changed) 665 vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); 666 667 return 0; 668 } 669 670 static void __vxlan_vni_add_list(struct vxlan_vni_group *vg, 671 struct vxlan_vni_node *v) 672 { 673 struct list_head *headp, *hpos; 674 struct vxlan_vni_node *vent; 675 676 headp = &vg->vni_list; 677 list_for_each_prev(hpos, headp) { 678 vent = list_entry(hpos, struct vxlan_vni_node, vlist); 679 if (be32_to_cpu(v->vni) < be32_to_cpu(vent->vni)) 680 continue; 681 else 682 break; 683 } 684 list_add_rcu(&v->vlist, hpos); 685 vg->num_vnis++; 686 } 687 688 static void __vxlan_vni_del_list(struct vxlan_vni_group *vg, 689 struct vxlan_vni_node *v) 690 { 691 list_del_rcu(&v->vlist); 692 vg->num_vnis--; 693 } 694 695 static struct vxlan_vni_node *vxlan_vni_alloc(struct vxlan_dev *vxlan, 696 __be32 vni) 697 { 698 struct vxlan_vni_node *vninode; 699 700 vninode = kzalloc(sizeof(*vninode), GFP_KERNEL); 701 if (!vninode) 702 return NULL; 703 vninode->stats = netdev_alloc_pcpu_stats(struct vxlan_vni_stats_pcpu); 704 if (!vninode->stats) { 705 kfree(vninode); 706 return NULL; 707 } 708 vninode->vni = vni; 709 vninode->hlist4.vxlan = vxlan; 710 #if IS_ENABLED(CONFIG_IPV6) 711 vninode->hlist6.vxlan = vxlan; 712 #endif 713 714 return vninode; 715 } 716 717 static void vxlan_vni_free(struct vxlan_vni_node *vninode) 718 { 719 free_percpu(vninode->stats); 720 kfree(vninode); 721 } 722 723 static int vxlan_vni_add(struct vxlan_dev *vxlan, 724 struct vxlan_vni_group *vg, 725 u32 vni, union vxlan_addr *group, 726 struct netlink_ext_ack *extack) 727 { 728 struct vxlan_vni_node *vninode; 729 __be32 v = cpu_to_be32(vni); 730 bool changed = false; 731 int err = 0; 732 733 if (vxlan_vnifilter_lookup(vxlan, v)) 734 return vxlan_vni_update(vxlan, vg, v, group, &changed, extack); 735 736 err = vxlan_vni_in_use(vxlan->net, vxlan, &vxlan->cfg, v); 737 if (err) { 738 NL_SET_ERR_MSG(extack, "VNI in use"); 739 return err; 740 } 741 742 vninode = vxlan_vni_alloc(vxlan, v); 743 if (!vninode) 744 return -ENOMEM; 745 746 err = rhashtable_lookup_insert_fast(&vg->vni_hash, 747 &vninode->vnode, 748 vxlan_vni_rht_params); 749 if (err) { 750 vxlan_vni_free(vninode); 751 return err; 752 } 753 754 __vxlan_vni_add_list(vg, vninode); 755 756 if (vxlan->dev->flags & IFF_UP) 757 vxlan_vs_add_del_vninode(vxlan, vninode, false); 758 759 err = vxlan_vni_update_group(vxlan, vninode, group, true, &changed, 760 extack); 761 762 if (changed) 763 vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL); 764 765 return err; 766 } 767 768 static void vxlan_vni_node_rcu_free(struct rcu_head *rcu) 769 { 770 struct vxlan_vni_node *v; 771 772 v = container_of(rcu, struct vxlan_vni_node, rcu); 773 vxlan_vni_free(v); 774 } 775 776 static int vxlan_vni_del(struct vxlan_dev *vxlan, 777 struct vxlan_vni_group *vg, 778 u32 vni, struct netlink_ext_ack *extack) 779 { 780 struct vxlan_vni_node *vninode; 781 __be32 v = cpu_to_be32(vni); 782 int err = 0; 783 784 vg = rtnl_dereference(vxlan->vnigrp); 785 786 vninode = rhashtable_lookup_fast(&vg->vni_hash, &v, 787 vxlan_vni_rht_params); 788 if (!vninode) { 789 err = -ENOENT; 790 goto out; 791 } 792 793 vxlan_vni_delete_group(vxlan, vninode); 794 795 err = rhashtable_remove_fast(&vg->vni_hash, 796 &vninode->vnode, 797 vxlan_vni_rht_params); 798 if (err) 799 goto out; 800 801 __vxlan_vni_del_list(vg, vninode); 802 803 vxlan_vnifilter_notify(vxlan, vninode, RTM_DELTUNNEL); 804 805 if (vxlan->dev->flags & IFF_UP) 806 vxlan_vs_add_del_vninode(vxlan, vninode, true); 807 808 call_rcu(&vninode->rcu, vxlan_vni_node_rcu_free); 809 810 return 0; 811 out: 812 return err; 813 } 814 815 static int vxlan_vni_add_del(struct vxlan_dev *vxlan, __u32 start_vni, 816 __u32 end_vni, union vxlan_addr *group, 817 int cmd, struct netlink_ext_ack *extack) 818 { 819 struct vxlan_vni_group *vg; 820 int v, err = 0; 821 822 vg = rtnl_dereference(vxlan->vnigrp); 823 824 for (v = start_vni; v <= end_vni; v++) { 825 switch (cmd) { 826 case RTM_NEWTUNNEL: 827 err = vxlan_vni_add(vxlan, vg, v, group, extack); 828 break; 829 case RTM_DELTUNNEL: 830 err = vxlan_vni_del(vxlan, vg, v, extack); 831 break; 832 default: 833 err = -EOPNOTSUPP; 834 break; 835 } 836 if (err) 837 goto out; 838 } 839 840 return 0; 841 out: 842 return err; 843 } 844 845 static int vxlan_process_vni_filter(struct vxlan_dev *vxlan, 846 struct nlattr *nlvnifilter, 847 int cmd, struct netlink_ext_ack *extack) 848 { 849 struct nlattr *vattrs[VXLAN_VNIFILTER_ENTRY_MAX + 1]; 850 u32 vni_start = 0, vni_end = 0; 851 union vxlan_addr group; 852 int err; 853 854 err = nla_parse_nested(vattrs, 855 VXLAN_VNIFILTER_ENTRY_MAX, 856 nlvnifilter, vni_filter_entry_policy, 857 extack); 858 if (err) 859 return err; 860 861 if (vattrs[VXLAN_VNIFILTER_ENTRY_START]) { 862 vni_start = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_START]); 863 vni_end = vni_start; 864 } 865 866 if (vattrs[VXLAN_VNIFILTER_ENTRY_END]) 867 vni_end = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_END]); 868 869 if (!vni_start && !vni_end) { 870 NL_SET_ERR_MSG_ATTR(extack, nlvnifilter, 871 "vni start nor end found in vni entry"); 872 return -EINVAL; 873 } 874 875 if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]) { 876 group.sin.sin_addr.s_addr = 877 nla_get_in_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]); 878 group.sa.sa_family = AF_INET; 879 } else if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]) { 880 group.sin6.sin6_addr = 881 nla_get_in6_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]); 882 group.sa.sa_family = AF_INET6; 883 } else { 884 memset(&group, 0, sizeof(group)); 885 } 886 887 if (vxlan_addr_multicast(&group) && !vxlan->default_dst.remote_ifindex) { 888 NL_SET_ERR_MSG(extack, 889 "Local interface required for multicast remote group"); 890 891 return -EINVAL; 892 } 893 894 err = vxlan_vni_add_del(vxlan, vni_start, vni_end, &group, cmd, 895 extack); 896 if (err) 897 return err; 898 899 return 0; 900 } 901 902 void vxlan_vnigroup_uninit(struct vxlan_dev *vxlan) 903 { 904 struct vxlan_vni_node *v, *tmp; 905 struct vxlan_vni_group *vg; 906 907 vg = rtnl_dereference(vxlan->vnigrp); 908 list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) { 909 rhashtable_remove_fast(&vg->vni_hash, &v->vnode, 910 vxlan_vni_rht_params); 911 hlist_del_init_rcu(&v->hlist4.hlist); 912 #if IS_ENABLED(CONFIG_IPV6) 913 hlist_del_init_rcu(&v->hlist6.hlist); 914 #endif 915 __vxlan_vni_del_list(vg, v); 916 vxlan_vnifilter_notify(vxlan, v, RTM_DELTUNNEL); 917 call_rcu(&v->rcu, vxlan_vni_node_rcu_free); 918 } 919 rhashtable_destroy(&vg->vni_hash); 920 kfree(vg); 921 } 922 923 int vxlan_vnigroup_init(struct vxlan_dev *vxlan) 924 { 925 struct vxlan_vni_group *vg; 926 int ret; 927 928 vg = kzalloc(sizeof(*vg), GFP_KERNEL); 929 if (!vg) 930 return -ENOMEM; 931 ret = rhashtable_init(&vg->vni_hash, &vxlan_vni_rht_params); 932 if (ret) { 933 kfree(vg); 934 return ret; 935 } 936 INIT_LIST_HEAD(&vg->vni_list); 937 rcu_assign_pointer(vxlan->vnigrp, vg); 938 939 return 0; 940 } 941 942 static int vxlan_vnifilter_process(struct sk_buff *skb, struct nlmsghdr *nlh, 943 struct netlink_ext_ack *extack) 944 { 945 struct net *net = sock_net(skb->sk); 946 struct tunnel_msg *tmsg; 947 struct vxlan_dev *vxlan; 948 struct net_device *dev; 949 struct nlattr *attr; 950 int err, vnis = 0; 951 int rem; 952 953 /* this should validate the header and check for remaining bytes */ 954 err = nlmsg_parse(nlh, sizeof(*tmsg), NULL, VXLAN_VNIFILTER_MAX, 955 vni_filter_policy, extack); 956 if (err < 0) 957 return err; 958 959 tmsg = nlmsg_data(nlh); 960 dev = __dev_get_by_index(net, tmsg->ifindex); 961 if (!dev) 962 return -ENODEV; 963 964 if (!netif_is_vxlan(dev)) { 965 NL_SET_ERR_MSG_MOD(extack, "The device is not a vxlan device"); 966 return -EINVAL; 967 } 968 969 vxlan = netdev_priv(dev); 970 971 if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)) 972 return -EOPNOTSUPP; 973 974 nlmsg_for_each_attr_type(attr, VXLAN_VNIFILTER_ENTRY, nlh, 975 sizeof(*tmsg), rem) { 976 err = vxlan_process_vni_filter(vxlan, attr, nlh->nlmsg_type, 977 extack); 978 vnis++; 979 if (err) 980 break; 981 } 982 983 if (!vnis) { 984 NL_SET_ERR_MSG_MOD(extack, "No vnis found to process"); 985 err = -EINVAL; 986 } 987 988 return err; 989 } 990 991 static const struct rtnl_msg_handler vxlan_vnifilter_rtnl_msg_handlers[] = { 992 {THIS_MODULE, PF_BRIDGE, RTM_GETTUNNEL, NULL, vxlan_vnifilter_dump, 0}, 993 {THIS_MODULE, PF_BRIDGE, RTM_NEWTUNNEL, vxlan_vnifilter_process, NULL, 0}, 994 {THIS_MODULE, PF_BRIDGE, RTM_DELTUNNEL, vxlan_vnifilter_process, NULL, 0}, 995 }; 996 997 int vxlan_vnifilter_init(void) 998 { 999 return rtnl_register_many(vxlan_vnifilter_rtnl_msg_handlers); 1000 } 1001 1002 void vxlan_vnifilter_uninit(void) 1003 { 1004 rtnl_unregister_many(vxlan_vnifilter_rtnl_msg_handlers); 1005 } 1006