1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * SR-IPv6 implementation 4 * 5 * Authors: 6 * David Lebrun <david.lebrun@uclouvain.be> 7 * eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com> 8 */ 9 10 #include <linux/filter.h> 11 #include <linux/types.h> 12 #include <linux/skbuff.h> 13 #include <linux/net.h> 14 #include <linux/module.h> 15 #include <net/ip.h> 16 #include <net/lwtunnel.h> 17 #include <net/netevent.h> 18 #include <net/netns/generic.h> 19 #include <net/ip6_fib.h> 20 #include <net/route.h> 21 #include <net/seg6.h> 22 #include <linux/seg6.h> 23 #include <linux/seg6_local.h> 24 #include <net/addrconf.h> 25 #include <net/ip6_route.h> 26 #include <net/dst_cache.h> 27 #include <net/ip_tunnels.h> 28 #ifdef CONFIG_IPV6_SEG6_HMAC 29 #include <net/seg6_hmac.h> 30 #endif 31 #include <net/seg6_local.h> 32 #include <linux/etherdevice.h> 33 #include <linux/bpf.h> 34 #include <linux/netfilter.h> 35 36 #define SEG6_F_ATTR(i) BIT(i) 37 38 struct seg6_local_lwt; 39 40 /* callbacks used for customizing the creation and destruction of a behavior */ 41 struct seg6_local_lwtunnel_ops { 42 int (*build_state)(struct seg6_local_lwt *slwt, const void *cfg, 43 struct netlink_ext_ack *extack); 44 void (*destroy_state)(struct seg6_local_lwt *slwt); 45 }; 46 47 struct seg6_action_desc { 48 int action; 49 unsigned long attrs; 50 51 /* The optattrs field is used for specifying all the optional 52 * attributes supported by a specific behavior. 53 * It means that if one of these attributes is not provided in the 54 * netlink message during the behavior creation, no errors will be 55 * returned to the userspace. 56 * 57 * Each attribute can be only of two types (mutually exclusive): 58 * 1) required or 2) optional. 59 * Every user MUST obey to this rule! If you set an attribute as 60 * required the same attribute CANNOT be set as optional and vice 61 * versa. 62 */ 63 unsigned long optattrs; 64 65 int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt); 66 int static_headroom; 67 68 struct seg6_local_lwtunnel_ops slwt_ops; 69 }; 70 71 struct bpf_lwt_prog { 72 struct bpf_prog *prog; 73 char *name; 74 }; 75 76 enum seg6_end_dt_mode { 77 DT_INVALID_MODE = -EINVAL, 78 DT_LEGACY_MODE = 0, 79 DT_VRF_MODE = 1, 80 }; 81 82 struct seg6_end_dt_info { 83 enum seg6_end_dt_mode mode; 84 85 struct net *net; 86 /* VRF device associated to the routing table used by the SRv6 87 * End.DT4/DT6 behavior for routing IPv4/IPv6 packets. 88 */ 89 int vrf_ifindex; 90 int vrf_table; 91 92 /* tunneled packet family (IPv4 or IPv6). 93 * Protocol and header length are inferred from family. 94 */ 95 u16 family; 96 }; 97 98 struct pcpu_seg6_local_counters { 99 u64_stats_t packets; 100 u64_stats_t bytes; 101 u64_stats_t errors; 102 103 struct u64_stats_sync syncp; 104 }; 105 106 /* This struct groups all the SRv6 Behavior counters supported so far. 107 * 108 * put_nla_counters() makes use of this data structure to collect all counter 109 * values after the per-CPU counter evaluation has been performed. 110 * Finally, each counter value (in seg6_local_counters) is stored in the 111 * corresponding netlink attribute and sent to user space. 112 * 113 * NB: we don't want to expose this structure to user space! 114 */ 115 struct seg6_local_counters { 116 __u64 packets; 117 __u64 bytes; 118 __u64 errors; 119 }; 120 121 #define seg6_local_alloc_pcpu_counters(__gfp) \ 122 __netdev_alloc_pcpu_stats(struct pcpu_seg6_local_counters, \ 123 ((__gfp) | __GFP_ZERO)) 124 125 #define SEG6_F_LOCAL_COUNTERS SEG6_F_ATTR(SEG6_LOCAL_COUNTERS) 126 127 struct seg6_local_lwt { 128 int action; 129 struct ipv6_sr_hdr *srh; 130 int table; 131 struct in_addr nh4; 132 struct in6_addr nh6; 133 int iif; 134 int oif; 135 struct bpf_lwt_prog bpf; 136 #ifdef CONFIG_NET_L3_MASTER_DEV 137 struct seg6_end_dt_info dt_info; 138 #endif 139 struct pcpu_seg6_local_counters __percpu *pcpu_counters; 140 141 int headroom; 142 struct seg6_action_desc *desc; 143 /* unlike the required attrs, we have to track the optional attributes 144 * that have been effectively parsed. 145 */ 146 unsigned long parsed_optattrs; 147 }; 148 149 static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt) 150 { 151 return (struct seg6_local_lwt *)lwt->data; 152 } 153 154 static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb, int flags) 155 { 156 struct ipv6_sr_hdr *srh; 157 int len, srhoff = 0; 158 159 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, &flags) < 0) 160 return NULL; 161 162 if (!pskb_may_pull(skb, srhoff + sizeof(*srh))) 163 return NULL; 164 165 srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); 166 167 len = (srh->hdrlen + 1) << 3; 168 169 if (!pskb_may_pull(skb, srhoff + len)) 170 return NULL; 171 172 /* note that pskb_may_pull may change pointers in header; 173 * for this reason it is necessary to reload them when needed. 174 */ 175 srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); 176 177 if (!seg6_validate_srh(srh, len, true)) 178 return NULL; 179 180 return srh; 181 } 182 183 static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb) 184 { 185 struct ipv6_sr_hdr *srh; 186 187 srh = get_srh(skb, IP6_FH_F_SKIP_RH); 188 if (!srh) 189 return NULL; 190 191 #ifdef CONFIG_IPV6_SEG6_HMAC 192 if (!seg6_hmac_validate_skb(skb)) 193 return NULL; 194 #endif 195 196 return srh; 197 } 198 199 static bool decap_and_validate(struct sk_buff *skb, int proto) 200 { 201 struct ipv6_sr_hdr *srh; 202 unsigned int off = 0; 203 204 srh = get_srh(skb, 0); 205 if (srh && srh->segments_left > 0) 206 return false; 207 208 #ifdef CONFIG_IPV6_SEG6_HMAC 209 if (srh && !seg6_hmac_validate_skb(skb)) 210 return false; 211 #endif 212 213 if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0) 214 return false; 215 216 if (!pskb_pull(skb, off)) 217 return false; 218 219 skb_postpull_rcsum(skb, skb_network_header(skb), off); 220 221 skb_reset_network_header(skb); 222 skb_reset_transport_header(skb); 223 if (iptunnel_pull_offloads(skb)) 224 return false; 225 226 return true; 227 } 228 229 static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr) 230 { 231 struct in6_addr *addr; 232 233 srh->segments_left--; 234 addr = srh->segments + srh->segments_left; 235 *daddr = *addr; 236 } 237 238 static int 239 seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, 240 u32 tbl_id, bool local_delivery) 241 { 242 struct net *net = dev_net(skb->dev); 243 struct ipv6hdr *hdr = ipv6_hdr(skb); 244 int flags = RT6_LOOKUP_F_HAS_SADDR; 245 struct dst_entry *dst = NULL; 246 struct rt6_info *rt; 247 struct flowi6 fl6; 248 int dev_flags = 0; 249 250 fl6.flowi6_iif = skb->dev->ifindex; 251 fl6.daddr = nhaddr ? *nhaddr : hdr->daddr; 252 fl6.saddr = hdr->saddr; 253 fl6.flowlabel = ip6_flowinfo(hdr); 254 fl6.flowi6_mark = skb->mark; 255 fl6.flowi6_proto = hdr->nexthdr; 256 257 if (nhaddr) 258 fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; 259 260 if (!tbl_id) { 261 dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags); 262 } else { 263 struct fib6_table *table; 264 265 table = fib6_get_table(net, tbl_id); 266 if (!table) 267 goto out; 268 269 rt = ip6_pol_route(net, table, 0, &fl6, skb, flags); 270 dst = &rt->dst; 271 } 272 273 /* we want to discard traffic destined for local packet processing, 274 * if @local_delivery is set to false. 275 */ 276 if (!local_delivery) 277 dev_flags |= IFF_LOOPBACK; 278 279 if (dst && (dst->dev->flags & dev_flags) && !dst->error) { 280 dst_release(dst); 281 dst = NULL; 282 } 283 284 out: 285 if (!dst) { 286 rt = net->ipv6.ip6_blk_hole_entry; 287 dst = &rt->dst; 288 dst_hold(dst); 289 } 290 291 skb_dst_drop(skb); 292 skb_dst_set(skb, dst); 293 return dst->error; 294 } 295 296 int seg6_lookup_nexthop(struct sk_buff *skb, 297 struct in6_addr *nhaddr, u32 tbl_id) 298 { 299 return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false); 300 } 301 302 /* regular endpoint function */ 303 static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt) 304 { 305 struct ipv6_sr_hdr *srh; 306 307 srh = get_and_validate_srh(skb); 308 if (!srh) 309 goto drop; 310 311 advance_nextseg(srh, &ipv6_hdr(skb)->daddr); 312 313 seg6_lookup_nexthop(skb, NULL, 0); 314 315 return dst_input(skb); 316 317 drop: 318 kfree_skb(skb); 319 return -EINVAL; 320 } 321 322 /* regular endpoint, and forward to specified nexthop */ 323 static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt) 324 { 325 struct ipv6_sr_hdr *srh; 326 327 srh = get_and_validate_srh(skb); 328 if (!srh) 329 goto drop; 330 331 advance_nextseg(srh, &ipv6_hdr(skb)->daddr); 332 333 seg6_lookup_nexthop(skb, &slwt->nh6, 0); 334 335 return dst_input(skb); 336 337 drop: 338 kfree_skb(skb); 339 return -EINVAL; 340 } 341 342 static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt) 343 { 344 struct ipv6_sr_hdr *srh; 345 346 srh = get_and_validate_srh(skb); 347 if (!srh) 348 goto drop; 349 350 advance_nextseg(srh, &ipv6_hdr(skb)->daddr); 351 352 seg6_lookup_nexthop(skb, NULL, slwt->table); 353 354 return dst_input(skb); 355 356 drop: 357 kfree_skb(skb); 358 return -EINVAL; 359 } 360 361 /* decapsulate and forward inner L2 frame on specified interface */ 362 static int input_action_end_dx2(struct sk_buff *skb, 363 struct seg6_local_lwt *slwt) 364 { 365 struct net *net = dev_net(skb->dev); 366 struct net_device *odev; 367 struct ethhdr *eth; 368 369 if (!decap_and_validate(skb, IPPROTO_ETHERNET)) 370 goto drop; 371 372 if (!pskb_may_pull(skb, ETH_HLEN)) 373 goto drop; 374 375 skb_reset_mac_header(skb); 376 eth = (struct ethhdr *)skb->data; 377 378 /* To determine the frame's protocol, we assume it is 802.3. This avoids 379 * a call to eth_type_trans(), which is not really relevant for our 380 * use case. 381 */ 382 if (!eth_proto_is_802_3(eth->h_proto)) 383 goto drop; 384 385 odev = dev_get_by_index_rcu(net, slwt->oif); 386 if (!odev) 387 goto drop; 388 389 /* As we accept Ethernet frames, make sure the egress device is of 390 * the correct type. 391 */ 392 if (odev->type != ARPHRD_ETHER) 393 goto drop; 394 395 if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev)) 396 goto drop; 397 398 skb_orphan(skb); 399 400 if (skb_warn_if_lro(skb)) 401 goto drop; 402 403 skb_forward_csum(skb); 404 405 if (skb->len - ETH_HLEN > odev->mtu) 406 goto drop; 407 408 skb->dev = odev; 409 skb->protocol = eth->h_proto; 410 411 return dev_queue_xmit(skb); 412 413 drop: 414 kfree_skb(skb); 415 return -EINVAL; 416 } 417 418 static int input_action_end_dx6_finish(struct net *net, struct sock *sk, 419 struct sk_buff *skb) 420 { 421 struct dst_entry *orig_dst = skb_dst(skb); 422 struct in6_addr *nhaddr = NULL; 423 struct seg6_local_lwt *slwt; 424 425 slwt = seg6_local_lwtunnel(orig_dst->lwtstate); 426 427 /* The inner packet is not associated to any local interface, 428 * so we do not call netif_rx(). 429 * 430 * If slwt->nh6 is set to ::, then lookup the nexthop for the 431 * inner packet's DA. Otherwise, use the specified nexthop. 432 */ 433 if (!ipv6_addr_any(&slwt->nh6)) 434 nhaddr = &slwt->nh6; 435 436 seg6_lookup_nexthop(skb, nhaddr, 0); 437 438 return dst_input(skb); 439 } 440 441 /* decapsulate and forward to specified nexthop */ 442 static int input_action_end_dx6(struct sk_buff *skb, 443 struct seg6_local_lwt *slwt) 444 { 445 /* this function accepts IPv6 encapsulated packets, with either 446 * an SRH with SL=0, or no SRH. 447 */ 448 449 if (!decap_and_validate(skb, IPPROTO_IPV6)) 450 goto drop; 451 452 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 453 goto drop; 454 455 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 456 nf_reset_ct(skb); 457 458 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 459 return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, 460 dev_net(skb->dev), NULL, skb, NULL, 461 skb_dst(skb)->dev, input_action_end_dx6_finish); 462 463 return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb); 464 drop: 465 kfree_skb(skb); 466 return -EINVAL; 467 } 468 469 static int input_action_end_dx4_finish(struct net *net, struct sock *sk, 470 struct sk_buff *skb) 471 { 472 struct dst_entry *orig_dst = skb_dst(skb); 473 struct seg6_local_lwt *slwt; 474 struct iphdr *iph; 475 __be32 nhaddr; 476 int err; 477 478 slwt = seg6_local_lwtunnel(orig_dst->lwtstate); 479 480 iph = ip_hdr(skb); 481 482 nhaddr = slwt->nh4.s_addr ?: iph->daddr; 483 484 skb_dst_drop(skb); 485 486 err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev); 487 if (err) { 488 kfree_skb(skb); 489 return -EINVAL; 490 } 491 492 return dst_input(skb); 493 } 494 495 static int input_action_end_dx4(struct sk_buff *skb, 496 struct seg6_local_lwt *slwt) 497 { 498 if (!decap_and_validate(skb, IPPROTO_IPIP)) 499 goto drop; 500 501 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 502 goto drop; 503 504 skb->protocol = htons(ETH_P_IP); 505 skb_set_transport_header(skb, sizeof(struct iphdr)); 506 nf_reset_ct(skb); 507 508 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 509 return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, 510 dev_net(skb->dev), NULL, skb, NULL, 511 skb_dst(skb)->dev, input_action_end_dx4_finish); 512 513 return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb); 514 drop: 515 kfree_skb(skb); 516 return -EINVAL; 517 } 518 519 #ifdef CONFIG_NET_L3_MASTER_DEV 520 static struct net *fib6_config_get_net(const struct fib6_config *fib6_cfg) 521 { 522 const struct nl_info *nli = &fib6_cfg->fc_nlinfo; 523 524 return nli->nl_net; 525 } 526 527 static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg, 528 u16 family, struct netlink_ext_ack *extack) 529 { 530 struct seg6_end_dt_info *info = &slwt->dt_info; 531 int vrf_ifindex; 532 struct net *net; 533 534 net = fib6_config_get_net(cfg); 535 536 /* note that vrf_table was already set by parse_nla_vrftable() */ 537 vrf_ifindex = l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF, net, 538 info->vrf_table); 539 if (vrf_ifindex < 0) { 540 if (vrf_ifindex == -EPERM) { 541 NL_SET_ERR_MSG(extack, 542 "Strict mode for VRF is disabled"); 543 } else if (vrf_ifindex == -ENODEV) { 544 NL_SET_ERR_MSG(extack, 545 "Table has no associated VRF device"); 546 } else { 547 pr_debug("seg6local: SRv6 End.DT* creation error=%d\n", 548 vrf_ifindex); 549 } 550 551 return vrf_ifindex; 552 } 553 554 info->net = net; 555 info->vrf_ifindex = vrf_ifindex; 556 557 info->family = family; 558 info->mode = DT_VRF_MODE; 559 560 return 0; 561 } 562 563 /* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and 564 * routes the IPv4/IPv6 packet by looking at the configured routing table. 565 * 566 * In the SRv6 End.DT4/DT6 use case, we can receive traffic (IPv6+Segment 567 * Routing Header packets) from several interfaces and the outer IPv6 568 * destination address (DA) is used for retrieving the specific instance of the 569 * End.DT4/DT6 behavior that should process the packets. 570 * 571 * However, the inner IPv4/IPv6 packet is not really bound to any receiving 572 * interface and thus the End.DT4/DT6 sets the VRF (associated with the 573 * corresponding routing table) as the *receiving* interface. 574 * In other words, the End.DT4/DT6 processes a packet as if it has been received 575 * directly by the VRF (and not by one of its slave devices, if any). 576 * In this way, the VRF interface is used for routing the IPv4/IPv6 packet in 577 * according to the routing table configured by the End.DT4/DT6 instance. 578 * 579 * This design allows you to get some interesting features like: 580 * 1) the statistics on rx packets; 581 * 2) the possibility to install a packet sniffer on the receiving interface 582 * (the VRF one) for looking at the incoming packets; 583 * 3) the possibility to leverage the netfilter prerouting hook for the inner 584 * IPv4 packet. 585 * 586 * This function returns: 587 * - the sk_buff* when the VRF rcv handler has processed the packet correctly; 588 * - NULL when the skb is consumed by the VRF rcv handler; 589 * - a pointer which encodes a negative error number in case of error. 590 * Note that in this case, the function takes care of freeing the skb. 591 */ 592 static struct sk_buff *end_dt_vrf_rcv(struct sk_buff *skb, u16 family, 593 struct net_device *dev) 594 { 595 /* based on l3mdev_ip_rcv; we are only interested in the master */ 596 if (unlikely(!netif_is_l3_master(dev) && !netif_has_l3_rx_handler(dev))) 597 goto drop; 598 599 if (unlikely(!dev->l3mdev_ops->l3mdev_l3_rcv)) 600 goto drop; 601 602 /* the decap packet IPv4/IPv6 does not come with any mac header info. 603 * We must unset the mac header to allow the VRF device to rebuild it, 604 * just in case there is a sniffer attached on the device. 605 */ 606 skb_unset_mac_header(skb); 607 608 skb = dev->l3mdev_ops->l3mdev_l3_rcv(dev, skb, family); 609 if (!skb) 610 /* the skb buffer was consumed by the handler */ 611 return NULL; 612 613 /* when a packet is received by a VRF or by one of its slaves, the 614 * master device reference is set into the skb. 615 */ 616 if (unlikely(skb->dev != dev || skb->skb_iif != dev->ifindex)) 617 goto drop; 618 619 return skb; 620 621 drop: 622 kfree_skb(skb); 623 return ERR_PTR(-EINVAL); 624 } 625 626 static struct net_device *end_dt_get_vrf_rcu(struct sk_buff *skb, 627 struct seg6_end_dt_info *info) 628 { 629 int vrf_ifindex = info->vrf_ifindex; 630 struct net *net = info->net; 631 632 if (unlikely(vrf_ifindex < 0)) 633 goto error; 634 635 if (unlikely(!net_eq(dev_net(skb->dev), net))) 636 goto error; 637 638 return dev_get_by_index_rcu(net, vrf_ifindex); 639 640 error: 641 return NULL; 642 } 643 644 static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb, 645 struct seg6_local_lwt *slwt, u16 family) 646 { 647 struct seg6_end_dt_info *info = &slwt->dt_info; 648 struct net_device *vrf; 649 __be16 protocol; 650 int hdrlen; 651 652 vrf = end_dt_get_vrf_rcu(skb, info); 653 if (unlikely(!vrf)) 654 goto drop; 655 656 switch (family) { 657 case AF_INET: 658 protocol = htons(ETH_P_IP); 659 hdrlen = sizeof(struct iphdr); 660 break; 661 case AF_INET6: 662 protocol = htons(ETH_P_IPV6); 663 hdrlen = sizeof(struct ipv6hdr); 664 break; 665 case AF_UNSPEC: 666 fallthrough; 667 default: 668 goto drop; 669 } 670 671 if (unlikely(info->family != AF_UNSPEC && info->family != family)) { 672 pr_warn_once("seg6local: SRv6 End.DT* family mismatch"); 673 goto drop; 674 } 675 676 skb->protocol = protocol; 677 678 skb_dst_drop(skb); 679 680 skb_set_transport_header(skb, hdrlen); 681 nf_reset_ct(skb); 682 683 return end_dt_vrf_rcv(skb, family, vrf); 684 685 drop: 686 kfree_skb(skb); 687 return ERR_PTR(-EINVAL); 688 } 689 690 static int input_action_end_dt4(struct sk_buff *skb, 691 struct seg6_local_lwt *slwt) 692 { 693 struct iphdr *iph; 694 int err; 695 696 if (!decap_and_validate(skb, IPPROTO_IPIP)) 697 goto drop; 698 699 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 700 goto drop; 701 702 skb = end_dt_vrf_core(skb, slwt, AF_INET); 703 if (!skb) 704 /* packet has been processed and consumed by the VRF */ 705 return 0; 706 707 if (IS_ERR(skb)) 708 return PTR_ERR(skb); 709 710 iph = ip_hdr(skb); 711 712 err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev); 713 if (unlikely(err)) 714 goto drop; 715 716 return dst_input(skb); 717 718 drop: 719 kfree_skb(skb); 720 return -EINVAL; 721 } 722 723 static int seg6_end_dt4_build(struct seg6_local_lwt *slwt, const void *cfg, 724 struct netlink_ext_ack *extack) 725 { 726 return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET, extack); 727 } 728 729 static enum 730 seg6_end_dt_mode seg6_end_dt6_parse_mode(struct seg6_local_lwt *slwt) 731 { 732 unsigned long parsed_optattrs = slwt->parsed_optattrs; 733 bool legacy, vrfmode; 734 735 legacy = !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE)); 736 vrfmode = !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE)); 737 738 if (!(legacy ^ vrfmode)) 739 /* both are absent or present: invalid DT6 mode */ 740 return DT_INVALID_MODE; 741 742 return legacy ? DT_LEGACY_MODE : DT_VRF_MODE; 743 } 744 745 static enum seg6_end_dt_mode seg6_end_dt6_get_mode(struct seg6_local_lwt *slwt) 746 { 747 struct seg6_end_dt_info *info = &slwt->dt_info; 748 749 return info->mode; 750 } 751 752 static int seg6_end_dt6_build(struct seg6_local_lwt *slwt, const void *cfg, 753 struct netlink_ext_ack *extack) 754 { 755 enum seg6_end_dt_mode mode = seg6_end_dt6_parse_mode(slwt); 756 struct seg6_end_dt_info *info = &slwt->dt_info; 757 758 switch (mode) { 759 case DT_LEGACY_MODE: 760 info->mode = DT_LEGACY_MODE; 761 return 0; 762 case DT_VRF_MODE: 763 return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET6, extack); 764 default: 765 NL_SET_ERR_MSG(extack, "table or vrftable must be specified"); 766 return -EINVAL; 767 } 768 } 769 #endif 770 771 static int input_action_end_dt6(struct sk_buff *skb, 772 struct seg6_local_lwt *slwt) 773 { 774 if (!decap_and_validate(skb, IPPROTO_IPV6)) 775 goto drop; 776 777 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 778 goto drop; 779 780 #ifdef CONFIG_NET_L3_MASTER_DEV 781 if (seg6_end_dt6_get_mode(slwt) == DT_LEGACY_MODE) 782 goto legacy_mode; 783 784 /* DT6_VRF_MODE */ 785 skb = end_dt_vrf_core(skb, slwt, AF_INET6); 786 if (!skb) 787 /* packet has been processed and consumed by the VRF */ 788 return 0; 789 790 if (IS_ERR(skb)) 791 return PTR_ERR(skb); 792 793 /* note: this time we do not need to specify the table because the VRF 794 * takes care of selecting the correct table. 795 */ 796 seg6_lookup_any_nexthop(skb, NULL, 0, true); 797 798 return dst_input(skb); 799 800 legacy_mode: 801 #endif 802 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 803 804 seg6_lookup_any_nexthop(skb, NULL, slwt->table, true); 805 806 return dst_input(skb); 807 808 drop: 809 kfree_skb(skb); 810 return -EINVAL; 811 } 812 813 #ifdef CONFIG_NET_L3_MASTER_DEV 814 static int seg6_end_dt46_build(struct seg6_local_lwt *slwt, const void *cfg, 815 struct netlink_ext_ack *extack) 816 { 817 return __seg6_end_dt_vrf_build(slwt, cfg, AF_UNSPEC, extack); 818 } 819 820 static int input_action_end_dt46(struct sk_buff *skb, 821 struct seg6_local_lwt *slwt) 822 { 823 unsigned int off = 0; 824 int nexthdr; 825 826 nexthdr = ipv6_find_hdr(skb, &off, -1, NULL, NULL); 827 if (unlikely(nexthdr < 0)) 828 goto drop; 829 830 switch (nexthdr) { 831 case IPPROTO_IPIP: 832 return input_action_end_dt4(skb, slwt); 833 case IPPROTO_IPV6: 834 return input_action_end_dt6(skb, slwt); 835 } 836 837 drop: 838 kfree_skb(skb); 839 return -EINVAL; 840 } 841 #endif 842 843 /* push an SRH on top of the current one */ 844 static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt) 845 { 846 struct ipv6_sr_hdr *srh; 847 int err = -EINVAL; 848 849 srh = get_and_validate_srh(skb); 850 if (!srh) 851 goto drop; 852 853 err = seg6_do_srh_inline(skb, slwt->srh); 854 if (err) 855 goto drop; 856 857 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 858 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 859 860 seg6_lookup_nexthop(skb, NULL, 0); 861 862 return dst_input(skb); 863 864 drop: 865 kfree_skb(skb); 866 return err; 867 } 868 869 /* encapsulate within an outer IPv6 header and a specified SRH */ 870 static int input_action_end_b6_encap(struct sk_buff *skb, 871 struct seg6_local_lwt *slwt) 872 { 873 struct ipv6_sr_hdr *srh; 874 int err = -EINVAL; 875 876 srh = get_and_validate_srh(skb); 877 if (!srh) 878 goto drop; 879 880 advance_nextseg(srh, &ipv6_hdr(skb)->daddr); 881 882 skb_reset_inner_headers(skb); 883 skb->encapsulation = 1; 884 885 err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6); 886 if (err) 887 goto drop; 888 889 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 890 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 891 892 seg6_lookup_nexthop(skb, NULL, 0); 893 894 return dst_input(skb); 895 896 drop: 897 kfree_skb(skb); 898 return err; 899 } 900 901 DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states); 902 903 bool seg6_bpf_has_valid_srh(struct sk_buff *skb) 904 { 905 struct seg6_bpf_srh_state *srh_state = 906 this_cpu_ptr(&seg6_bpf_srh_states); 907 struct ipv6_sr_hdr *srh = srh_state->srh; 908 909 if (unlikely(srh == NULL)) 910 return false; 911 912 if (unlikely(!srh_state->valid)) { 913 if ((srh_state->hdrlen & 7) != 0) 914 return false; 915 916 srh->hdrlen = (u8)(srh_state->hdrlen >> 3); 917 if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3, true)) 918 return false; 919 920 srh_state->valid = true; 921 } 922 923 return true; 924 } 925 926 static int input_action_end_bpf(struct sk_buff *skb, 927 struct seg6_local_lwt *slwt) 928 { 929 struct seg6_bpf_srh_state *srh_state = 930 this_cpu_ptr(&seg6_bpf_srh_states); 931 struct ipv6_sr_hdr *srh; 932 int ret; 933 934 srh = get_and_validate_srh(skb); 935 if (!srh) { 936 kfree_skb(skb); 937 return -EINVAL; 938 } 939 advance_nextseg(srh, &ipv6_hdr(skb)->daddr); 940 941 /* preempt_disable is needed to protect the per-CPU buffer srh_state, 942 * which is also accessed by the bpf_lwt_seg6_* helpers 943 */ 944 preempt_disable(); 945 srh_state->srh = srh; 946 srh_state->hdrlen = srh->hdrlen << 3; 947 srh_state->valid = true; 948 949 rcu_read_lock(); 950 bpf_compute_data_pointers(skb); 951 ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb); 952 rcu_read_unlock(); 953 954 switch (ret) { 955 case BPF_OK: 956 case BPF_REDIRECT: 957 break; 958 case BPF_DROP: 959 goto drop; 960 default: 961 pr_warn_once("bpf-seg6local: Illegal return value %u\n", ret); 962 goto drop; 963 } 964 965 if (srh_state->srh && !seg6_bpf_has_valid_srh(skb)) 966 goto drop; 967 968 preempt_enable(); 969 if (ret != BPF_REDIRECT) 970 seg6_lookup_nexthop(skb, NULL, 0); 971 972 return dst_input(skb); 973 974 drop: 975 preempt_enable(); 976 kfree_skb(skb); 977 return -EINVAL; 978 } 979 980 static struct seg6_action_desc seg6_action_table[] = { 981 { 982 .action = SEG6_LOCAL_ACTION_END, 983 .attrs = 0, 984 .optattrs = SEG6_F_LOCAL_COUNTERS, 985 .input = input_action_end, 986 }, 987 { 988 .action = SEG6_LOCAL_ACTION_END_X, 989 .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6), 990 .optattrs = SEG6_F_LOCAL_COUNTERS, 991 .input = input_action_end_x, 992 }, 993 { 994 .action = SEG6_LOCAL_ACTION_END_T, 995 .attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE), 996 .optattrs = SEG6_F_LOCAL_COUNTERS, 997 .input = input_action_end_t, 998 }, 999 { 1000 .action = SEG6_LOCAL_ACTION_END_DX2, 1001 .attrs = SEG6_F_ATTR(SEG6_LOCAL_OIF), 1002 .optattrs = SEG6_F_LOCAL_COUNTERS, 1003 .input = input_action_end_dx2, 1004 }, 1005 { 1006 .action = SEG6_LOCAL_ACTION_END_DX6, 1007 .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6), 1008 .optattrs = SEG6_F_LOCAL_COUNTERS, 1009 .input = input_action_end_dx6, 1010 }, 1011 { 1012 .action = SEG6_LOCAL_ACTION_END_DX4, 1013 .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH4), 1014 .optattrs = SEG6_F_LOCAL_COUNTERS, 1015 .input = input_action_end_dx4, 1016 }, 1017 { 1018 .action = SEG6_LOCAL_ACTION_END_DT4, 1019 .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE), 1020 .optattrs = SEG6_F_LOCAL_COUNTERS, 1021 #ifdef CONFIG_NET_L3_MASTER_DEV 1022 .input = input_action_end_dt4, 1023 .slwt_ops = { 1024 .build_state = seg6_end_dt4_build, 1025 }, 1026 #endif 1027 }, 1028 { 1029 .action = SEG6_LOCAL_ACTION_END_DT6, 1030 #ifdef CONFIG_NET_L3_MASTER_DEV 1031 .attrs = 0, 1032 .optattrs = SEG6_F_LOCAL_COUNTERS | 1033 SEG6_F_ATTR(SEG6_LOCAL_TABLE) | 1034 SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE), 1035 .slwt_ops = { 1036 .build_state = seg6_end_dt6_build, 1037 }, 1038 #else 1039 .attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE), 1040 .optattrs = SEG6_F_LOCAL_COUNTERS, 1041 #endif 1042 .input = input_action_end_dt6, 1043 }, 1044 { 1045 .action = SEG6_LOCAL_ACTION_END_DT46, 1046 .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE), 1047 .optattrs = SEG6_F_LOCAL_COUNTERS, 1048 #ifdef CONFIG_NET_L3_MASTER_DEV 1049 .input = input_action_end_dt46, 1050 .slwt_ops = { 1051 .build_state = seg6_end_dt46_build, 1052 }, 1053 #endif 1054 }, 1055 { 1056 .action = SEG6_LOCAL_ACTION_END_B6, 1057 .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH), 1058 .optattrs = SEG6_F_LOCAL_COUNTERS, 1059 .input = input_action_end_b6, 1060 }, 1061 { 1062 .action = SEG6_LOCAL_ACTION_END_B6_ENCAP, 1063 .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH), 1064 .optattrs = SEG6_F_LOCAL_COUNTERS, 1065 .input = input_action_end_b6_encap, 1066 .static_headroom = sizeof(struct ipv6hdr), 1067 }, 1068 { 1069 .action = SEG6_LOCAL_ACTION_END_BPF, 1070 .attrs = SEG6_F_ATTR(SEG6_LOCAL_BPF), 1071 .optattrs = SEG6_F_LOCAL_COUNTERS, 1072 .input = input_action_end_bpf, 1073 }, 1074 1075 }; 1076 1077 static struct seg6_action_desc *__get_action_desc(int action) 1078 { 1079 struct seg6_action_desc *desc; 1080 int i, count; 1081 1082 count = ARRAY_SIZE(seg6_action_table); 1083 for (i = 0; i < count; i++) { 1084 desc = &seg6_action_table[i]; 1085 if (desc->action == action) 1086 return desc; 1087 } 1088 1089 return NULL; 1090 } 1091 1092 static bool seg6_lwtunnel_counters_enabled(struct seg6_local_lwt *slwt) 1093 { 1094 return slwt->parsed_optattrs & SEG6_F_LOCAL_COUNTERS; 1095 } 1096 1097 static void seg6_local_update_counters(struct seg6_local_lwt *slwt, 1098 unsigned int len, int err) 1099 { 1100 struct pcpu_seg6_local_counters *pcounters; 1101 1102 pcounters = this_cpu_ptr(slwt->pcpu_counters); 1103 u64_stats_update_begin(&pcounters->syncp); 1104 1105 if (likely(!err)) { 1106 u64_stats_inc(&pcounters->packets); 1107 u64_stats_add(&pcounters->bytes, len); 1108 } else { 1109 u64_stats_inc(&pcounters->errors); 1110 } 1111 1112 u64_stats_update_end(&pcounters->syncp); 1113 } 1114 1115 static int seg6_local_input_core(struct net *net, struct sock *sk, 1116 struct sk_buff *skb) 1117 { 1118 struct dst_entry *orig_dst = skb_dst(skb); 1119 struct seg6_action_desc *desc; 1120 struct seg6_local_lwt *slwt; 1121 unsigned int len = skb->len; 1122 int rc; 1123 1124 slwt = seg6_local_lwtunnel(orig_dst->lwtstate); 1125 desc = slwt->desc; 1126 1127 rc = desc->input(skb, slwt); 1128 1129 if (!seg6_lwtunnel_counters_enabled(slwt)) 1130 return rc; 1131 1132 seg6_local_update_counters(slwt, len, rc); 1133 1134 return rc; 1135 } 1136 1137 static int seg6_local_input(struct sk_buff *skb) 1138 { 1139 if (skb->protocol != htons(ETH_P_IPV6)) { 1140 kfree_skb(skb); 1141 return -EINVAL; 1142 } 1143 1144 if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 1145 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, 1146 dev_net(skb->dev), NULL, skb, skb->dev, NULL, 1147 seg6_local_input_core); 1148 1149 return seg6_local_input_core(dev_net(skb->dev), NULL, skb); 1150 } 1151 1152 static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = { 1153 [SEG6_LOCAL_ACTION] = { .type = NLA_U32 }, 1154 [SEG6_LOCAL_SRH] = { .type = NLA_BINARY }, 1155 [SEG6_LOCAL_TABLE] = { .type = NLA_U32 }, 1156 [SEG6_LOCAL_VRFTABLE] = { .type = NLA_U32 }, 1157 [SEG6_LOCAL_NH4] = { .type = NLA_BINARY, 1158 .len = sizeof(struct in_addr) }, 1159 [SEG6_LOCAL_NH6] = { .type = NLA_BINARY, 1160 .len = sizeof(struct in6_addr) }, 1161 [SEG6_LOCAL_IIF] = { .type = NLA_U32 }, 1162 [SEG6_LOCAL_OIF] = { .type = NLA_U32 }, 1163 [SEG6_LOCAL_BPF] = { .type = NLA_NESTED }, 1164 [SEG6_LOCAL_COUNTERS] = { .type = NLA_NESTED }, 1165 }; 1166 1167 static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1168 { 1169 struct ipv6_sr_hdr *srh; 1170 int len; 1171 1172 srh = nla_data(attrs[SEG6_LOCAL_SRH]); 1173 len = nla_len(attrs[SEG6_LOCAL_SRH]); 1174 1175 /* SRH must contain at least one segment */ 1176 if (len < sizeof(*srh) + sizeof(struct in6_addr)) 1177 return -EINVAL; 1178 1179 if (!seg6_validate_srh(srh, len, false)) 1180 return -EINVAL; 1181 1182 slwt->srh = kmemdup(srh, len, GFP_KERNEL); 1183 if (!slwt->srh) 1184 return -ENOMEM; 1185 1186 slwt->headroom += len; 1187 1188 return 0; 1189 } 1190 1191 static int put_nla_srh(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1192 { 1193 struct ipv6_sr_hdr *srh; 1194 struct nlattr *nla; 1195 int len; 1196 1197 srh = slwt->srh; 1198 len = (srh->hdrlen + 1) << 3; 1199 1200 nla = nla_reserve(skb, SEG6_LOCAL_SRH, len); 1201 if (!nla) 1202 return -EMSGSIZE; 1203 1204 memcpy(nla_data(nla), srh, len); 1205 1206 return 0; 1207 } 1208 1209 static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1210 { 1211 int len = (a->srh->hdrlen + 1) << 3; 1212 1213 if (len != ((b->srh->hdrlen + 1) << 3)) 1214 return 1; 1215 1216 return memcmp(a->srh, b->srh, len); 1217 } 1218 1219 static void destroy_attr_srh(struct seg6_local_lwt *slwt) 1220 { 1221 kfree(slwt->srh); 1222 } 1223 1224 static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1225 { 1226 slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]); 1227 1228 return 0; 1229 } 1230 1231 static int put_nla_table(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1232 { 1233 if (nla_put_u32(skb, SEG6_LOCAL_TABLE, slwt->table)) 1234 return -EMSGSIZE; 1235 1236 return 0; 1237 } 1238 1239 static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1240 { 1241 if (a->table != b->table) 1242 return 1; 1243 1244 return 0; 1245 } 1246 1247 static struct 1248 seg6_end_dt_info *seg6_possible_end_dt_info(struct seg6_local_lwt *slwt) 1249 { 1250 #ifdef CONFIG_NET_L3_MASTER_DEV 1251 return &slwt->dt_info; 1252 #else 1253 return ERR_PTR(-EOPNOTSUPP); 1254 #endif 1255 } 1256 1257 static int parse_nla_vrftable(struct nlattr **attrs, 1258 struct seg6_local_lwt *slwt) 1259 { 1260 struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt); 1261 1262 if (IS_ERR(info)) 1263 return PTR_ERR(info); 1264 1265 info->vrf_table = nla_get_u32(attrs[SEG6_LOCAL_VRFTABLE]); 1266 1267 return 0; 1268 } 1269 1270 static int put_nla_vrftable(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1271 { 1272 struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt); 1273 1274 if (IS_ERR(info)) 1275 return PTR_ERR(info); 1276 1277 if (nla_put_u32(skb, SEG6_LOCAL_VRFTABLE, info->vrf_table)) 1278 return -EMSGSIZE; 1279 1280 return 0; 1281 } 1282 1283 static int cmp_nla_vrftable(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1284 { 1285 struct seg6_end_dt_info *info_a = seg6_possible_end_dt_info(a); 1286 struct seg6_end_dt_info *info_b = seg6_possible_end_dt_info(b); 1287 1288 if (info_a->vrf_table != info_b->vrf_table) 1289 return 1; 1290 1291 return 0; 1292 } 1293 1294 static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1295 { 1296 memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]), 1297 sizeof(struct in_addr)); 1298 1299 return 0; 1300 } 1301 1302 static int put_nla_nh4(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1303 { 1304 struct nlattr *nla; 1305 1306 nla = nla_reserve(skb, SEG6_LOCAL_NH4, sizeof(struct in_addr)); 1307 if (!nla) 1308 return -EMSGSIZE; 1309 1310 memcpy(nla_data(nla), &slwt->nh4, sizeof(struct in_addr)); 1311 1312 return 0; 1313 } 1314 1315 static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1316 { 1317 return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr)); 1318 } 1319 1320 static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1321 { 1322 memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]), 1323 sizeof(struct in6_addr)); 1324 1325 return 0; 1326 } 1327 1328 static int put_nla_nh6(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1329 { 1330 struct nlattr *nla; 1331 1332 nla = nla_reserve(skb, SEG6_LOCAL_NH6, sizeof(struct in6_addr)); 1333 if (!nla) 1334 return -EMSGSIZE; 1335 1336 memcpy(nla_data(nla), &slwt->nh6, sizeof(struct in6_addr)); 1337 1338 return 0; 1339 } 1340 1341 static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1342 { 1343 return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr)); 1344 } 1345 1346 static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1347 { 1348 slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]); 1349 1350 return 0; 1351 } 1352 1353 static int put_nla_iif(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1354 { 1355 if (nla_put_u32(skb, SEG6_LOCAL_IIF, slwt->iif)) 1356 return -EMSGSIZE; 1357 1358 return 0; 1359 } 1360 1361 static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1362 { 1363 if (a->iif != b->iif) 1364 return 1; 1365 1366 return 0; 1367 } 1368 1369 static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1370 { 1371 slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]); 1372 1373 return 0; 1374 } 1375 1376 static int put_nla_oif(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1377 { 1378 if (nla_put_u32(skb, SEG6_LOCAL_OIF, slwt->oif)) 1379 return -EMSGSIZE; 1380 1381 return 0; 1382 } 1383 1384 static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1385 { 1386 if (a->oif != b->oif) 1387 return 1; 1388 1389 return 0; 1390 } 1391 1392 #define MAX_PROG_NAME 256 1393 static const struct nla_policy bpf_prog_policy[SEG6_LOCAL_BPF_PROG_MAX + 1] = { 1394 [SEG6_LOCAL_BPF_PROG] = { .type = NLA_U32, }, 1395 [SEG6_LOCAL_BPF_PROG_NAME] = { .type = NLA_NUL_STRING, 1396 .len = MAX_PROG_NAME }, 1397 }; 1398 1399 static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1400 { 1401 struct nlattr *tb[SEG6_LOCAL_BPF_PROG_MAX + 1]; 1402 struct bpf_prog *p; 1403 int ret; 1404 u32 fd; 1405 1406 ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_BPF_PROG_MAX, 1407 attrs[SEG6_LOCAL_BPF], 1408 bpf_prog_policy, NULL); 1409 if (ret < 0) 1410 return ret; 1411 1412 if (!tb[SEG6_LOCAL_BPF_PROG] || !tb[SEG6_LOCAL_BPF_PROG_NAME]) 1413 return -EINVAL; 1414 1415 slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_KERNEL); 1416 if (!slwt->bpf.name) 1417 return -ENOMEM; 1418 1419 fd = nla_get_u32(tb[SEG6_LOCAL_BPF_PROG]); 1420 p = bpf_prog_get_type(fd, BPF_PROG_TYPE_LWT_SEG6LOCAL); 1421 if (IS_ERR(p)) { 1422 kfree(slwt->bpf.name); 1423 return PTR_ERR(p); 1424 } 1425 1426 slwt->bpf.prog = p; 1427 return 0; 1428 } 1429 1430 static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1431 { 1432 struct nlattr *nest; 1433 1434 if (!slwt->bpf.prog) 1435 return 0; 1436 1437 nest = nla_nest_start_noflag(skb, SEG6_LOCAL_BPF); 1438 if (!nest) 1439 return -EMSGSIZE; 1440 1441 if (nla_put_u32(skb, SEG6_LOCAL_BPF_PROG, slwt->bpf.prog->aux->id)) 1442 return -EMSGSIZE; 1443 1444 if (slwt->bpf.name && 1445 nla_put_string(skb, SEG6_LOCAL_BPF_PROG_NAME, slwt->bpf.name)) 1446 return -EMSGSIZE; 1447 1448 return nla_nest_end(skb, nest); 1449 } 1450 1451 static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1452 { 1453 if (!a->bpf.name && !b->bpf.name) 1454 return 0; 1455 1456 if (!a->bpf.name || !b->bpf.name) 1457 return 1; 1458 1459 return strcmp(a->bpf.name, b->bpf.name); 1460 } 1461 1462 static void destroy_attr_bpf(struct seg6_local_lwt *slwt) 1463 { 1464 kfree(slwt->bpf.name); 1465 if (slwt->bpf.prog) 1466 bpf_prog_put(slwt->bpf.prog); 1467 } 1468 1469 static const struct 1470 nla_policy seg6_local_counters_policy[SEG6_LOCAL_CNT_MAX + 1] = { 1471 [SEG6_LOCAL_CNT_PACKETS] = { .type = NLA_U64 }, 1472 [SEG6_LOCAL_CNT_BYTES] = { .type = NLA_U64 }, 1473 [SEG6_LOCAL_CNT_ERRORS] = { .type = NLA_U64 }, 1474 }; 1475 1476 static int parse_nla_counters(struct nlattr **attrs, 1477 struct seg6_local_lwt *slwt) 1478 { 1479 struct pcpu_seg6_local_counters __percpu *pcounters; 1480 struct nlattr *tb[SEG6_LOCAL_CNT_MAX + 1]; 1481 int ret; 1482 1483 ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_CNT_MAX, 1484 attrs[SEG6_LOCAL_COUNTERS], 1485 seg6_local_counters_policy, NULL); 1486 if (ret < 0) 1487 return ret; 1488 1489 /* basic support for SRv6 Behavior counters requires at least: 1490 * packets, bytes and errors. 1491 */ 1492 if (!tb[SEG6_LOCAL_CNT_PACKETS] || !tb[SEG6_LOCAL_CNT_BYTES] || 1493 !tb[SEG6_LOCAL_CNT_ERRORS]) 1494 return -EINVAL; 1495 1496 /* counters are always zero initialized */ 1497 pcounters = seg6_local_alloc_pcpu_counters(GFP_KERNEL); 1498 if (!pcounters) 1499 return -ENOMEM; 1500 1501 slwt->pcpu_counters = pcounters; 1502 1503 return 0; 1504 } 1505 1506 static int seg6_local_fill_nla_counters(struct sk_buff *skb, 1507 struct seg6_local_counters *counters) 1508 { 1509 if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_PACKETS, counters->packets, 1510 SEG6_LOCAL_CNT_PAD)) 1511 return -EMSGSIZE; 1512 1513 if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_BYTES, counters->bytes, 1514 SEG6_LOCAL_CNT_PAD)) 1515 return -EMSGSIZE; 1516 1517 if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_ERRORS, counters->errors, 1518 SEG6_LOCAL_CNT_PAD)) 1519 return -EMSGSIZE; 1520 1521 return 0; 1522 } 1523 1524 static int put_nla_counters(struct sk_buff *skb, struct seg6_local_lwt *slwt) 1525 { 1526 struct seg6_local_counters counters = { 0, 0, 0 }; 1527 struct nlattr *nest; 1528 int rc, i; 1529 1530 nest = nla_nest_start(skb, SEG6_LOCAL_COUNTERS); 1531 if (!nest) 1532 return -EMSGSIZE; 1533 1534 for_each_possible_cpu(i) { 1535 struct pcpu_seg6_local_counters *pcounters; 1536 u64 packets, bytes, errors; 1537 unsigned int start; 1538 1539 pcounters = per_cpu_ptr(slwt->pcpu_counters, i); 1540 do { 1541 start = u64_stats_fetch_begin_irq(&pcounters->syncp); 1542 1543 packets = u64_stats_read(&pcounters->packets); 1544 bytes = u64_stats_read(&pcounters->bytes); 1545 errors = u64_stats_read(&pcounters->errors); 1546 1547 } while (u64_stats_fetch_retry_irq(&pcounters->syncp, start)); 1548 1549 counters.packets += packets; 1550 counters.bytes += bytes; 1551 counters.errors += errors; 1552 } 1553 1554 rc = seg6_local_fill_nla_counters(skb, &counters); 1555 if (rc < 0) { 1556 nla_nest_cancel(skb, nest); 1557 return rc; 1558 } 1559 1560 return nla_nest_end(skb, nest); 1561 } 1562 1563 static int cmp_nla_counters(struct seg6_local_lwt *a, struct seg6_local_lwt *b) 1564 { 1565 /* a and b are equal if both have pcpu_counters set or not */ 1566 return (!!((unsigned long)a->pcpu_counters)) ^ 1567 (!!((unsigned long)b->pcpu_counters)); 1568 } 1569 1570 static void destroy_attr_counters(struct seg6_local_lwt *slwt) 1571 { 1572 free_percpu(slwt->pcpu_counters); 1573 } 1574 1575 struct seg6_action_param { 1576 int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt); 1577 int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt); 1578 int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b); 1579 1580 /* optional destroy() callback useful for releasing resources which 1581 * have been previously acquired in the corresponding parse() 1582 * function. 1583 */ 1584 void (*destroy)(struct seg6_local_lwt *slwt); 1585 }; 1586 1587 static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = { 1588 [SEG6_LOCAL_SRH] = { .parse = parse_nla_srh, 1589 .put = put_nla_srh, 1590 .cmp = cmp_nla_srh, 1591 .destroy = destroy_attr_srh }, 1592 1593 [SEG6_LOCAL_TABLE] = { .parse = parse_nla_table, 1594 .put = put_nla_table, 1595 .cmp = cmp_nla_table }, 1596 1597 [SEG6_LOCAL_NH4] = { .parse = parse_nla_nh4, 1598 .put = put_nla_nh4, 1599 .cmp = cmp_nla_nh4 }, 1600 1601 [SEG6_LOCAL_NH6] = { .parse = parse_nla_nh6, 1602 .put = put_nla_nh6, 1603 .cmp = cmp_nla_nh6 }, 1604 1605 [SEG6_LOCAL_IIF] = { .parse = parse_nla_iif, 1606 .put = put_nla_iif, 1607 .cmp = cmp_nla_iif }, 1608 1609 [SEG6_LOCAL_OIF] = { .parse = parse_nla_oif, 1610 .put = put_nla_oif, 1611 .cmp = cmp_nla_oif }, 1612 1613 [SEG6_LOCAL_BPF] = { .parse = parse_nla_bpf, 1614 .put = put_nla_bpf, 1615 .cmp = cmp_nla_bpf, 1616 .destroy = destroy_attr_bpf }, 1617 1618 [SEG6_LOCAL_VRFTABLE] = { .parse = parse_nla_vrftable, 1619 .put = put_nla_vrftable, 1620 .cmp = cmp_nla_vrftable }, 1621 1622 [SEG6_LOCAL_COUNTERS] = { .parse = parse_nla_counters, 1623 .put = put_nla_counters, 1624 .cmp = cmp_nla_counters, 1625 .destroy = destroy_attr_counters }, 1626 }; 1627 1628 /* call the destroy() callback (if available) for each set attribute in 1629 * @parsed_attrs, starting from the first attribute up to the @max_parsed 1630 * (excluded) attribute. 1631 */ 1632 static void __destroy_attrs(unsigned long parsed_attrs, int max_parsed, 1633 struct seg6_local_lwt *slwt) 1634 { 1635 struct seg6_action_param *param; 1636 int i; 1637 1638 /* Every required seg6local attribute is identified by an ID which is 1639 * encoded as a flag (i.e: 1 << ID) in the 'attrs' bitmask; 1640 * 1641 * We scan the 'parsed_attrs' bitmask, starting from the first attribute 1642 * up to the @max_parsed (excluded) attribute. 1643 * For each set attribute, we retrieve the corresponding destroy() 1644 * callback. If the callback is not available, then we skip to the next 1645 * attribute; otherwise, we call the destroy() callback. 1646 */ 1647 for (i = 0; i < max_parsed; ++i) { 1648 if (!(parsed_attrs & SEG6_F_ATTR(i))) 1649 continue; 1650 1651 param = &seg6_action_params[i]; 1652 1653 if (param->destroy) 1654 param->destroy(slwt); 1655 } 1656 } 1657 1658 /* release all the resources that may have been acquired during parsing 1659 * operations. 1660 */ 1661 static void destroy_attrs(struct seg6_local_lwt *slwt) 1662 { 1663 unsigned long attrs = slwt->desc->attrs | slwt->parsed_optattrs; 1664 1665 __destroy_attrs(attrs, SEG6_LOCAL_MAX + 1, slwt); 1666 } 1667 1668 static int parse_nla_optional_attrs(struct nlattr **attrs, 1669 struct seg6_local_lwt *slwt) 1670 { 1671 struct seg6_action_desc *desc = slwt->desc; 1672 unsigned long parsed_optattrs = 0; 1673 struct seg6_action_param *param; 1674 int err, i; 1675 1676 for (i = 0; i < SEG6_LOCAL_MAX + 1; ++i) { 1677 if (!(desc->optattrs & SEG6_F_ATTR(i)) || !attrs[i]) 1678 continue; 1679 1680 /* once here, the i-th attribute is provided by the 1681 * userspace AND it is identified optional as well. 1682 */ 1683 param = &seg6_action_params[i]; 1684 1685 err = param->parse(attrs, slwt); 1686 if (err < 0) 1687 goto parse_optattrs_err; 1688 1689 /* current attribute has been correctly parsed */ 1690 parsed_optattrs |= SEG6_F_ATTR(i); 1691 } 1692 1693 /* store in the tunnel state all the optional attributed successfully 1694 * parsed. 1695 */ 1696 slwt->parsed_optattrs = parsed_optattrs; 1697 1698 return 0; 1699 1700 parse_optattrs_err: 1701 __destroy_attrs(parsed_optattrs, i, slwt); 1702 1703 return err; 1704 } 1705 1706 /* call the custom constructor of the behavior during its initialization phase 1707 * and after that all its attributes have been parsed successfully. 1708 */ 1709 static int 1710 seg6_local_lwtunnel_build_state(struct seg6_local_lwt *slwt, const void *cfg, 1711 struct netlink_ext_ack *extack) 1712 { 1713 struct seg6_action_desc *desc = slwt->desc; 1714 struct seg6_local_lwtunnel_ops *ops; 1715 1716 ops = &desc->slwt_ops; 1717 if (!ops->build_state) 1718 return 0; 1719 1720 return ops->build_state(slwt, cfg, extack); 1721 } 1722 1723 /* call the custom destructor of the behavior which is invoked before the 1724 * tunnel is going to be destroyed. 1725 */ 1726 static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt *slwt) 1727 { 1728 struct seg6_action_desc *desc = slwt->desc; 1729 struct seg6_local_lwtunnel_ops *ops; 1730 1731 ops = &desc->slwt_ops; 1732 if (!ops->destroy_state) 1733 return; 1734 1735 ops->destroy_state(slwt); 1736 } 1737 1738 static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) 1739 { 1740 struct seg6_action_param *param; 1741 struct seg6_action_desc *desc; 1742 unsigned long invalid_attrs; 1743 int i, err; 1744 1745 desc = __get_action_desc(slwt->action); 1746 if (!desc) 1747 return -EINVAL; 1748 1749 if (!desc->input) 1750 return -EOPNOTSUPP; 1751 1752 slwt->desc = desc; 1753 slwt->headroom += desc->static_headroom; 1754 1755 /* Forcing the desc->optattrs *set* and the desc->attrs *set* to be 1756 * disjoined, this allow us to release acquired resources by optional 1757 * attributes and by required attributes independently from each other 1758 * without any interference. 1759 * In other terms, we are sure that we do not release some the acquired 1760 * resources twice. 1761 * 1762 * Note that if an attribute is configured both as required and as 1763 * optional, it means that the user has messed something up in the 1764 * seg6_action_table. Therefore, this check is required for SRv6 1765 * behaviors to work properly. 1766 */ 1767 invalid_attrs = desc->attrs & desc->optattrs; 1768 if (invalid_attrs) { 1769 WARN_ONCE(1, 1770 "An attribute cannot be both required AND optional"); 1771 return -EINVAL; 1772 } 1773 1774 /* parse the required attributes */ 1775 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { 1776 if (desc->attrs & SEG6_F_ATTR(i)) { 1777 if (!attrs[i]) 1778 return -EINVAL; 1779 1780 param = &seg6_action_params[i]; 1781 1782 err = param->parse(attrs, slwt); 1783 if (err < 0) 1784 goto parse_attrs_err; 1785 } 1786 } 1787 1788 /* parse the optional attributes, if any */ 1789 err = parse_nla_optional_attrs(attrs, slwt); 1790 if (err < 0) 1791 goto parse_attrs_err; 1792 1793 return 0; 1794 1795 parse_attrs_err: 1796 /* release any resource that may have been acquired during the i-1 1797 * parse() operations. 1798 */ 1799 __destroy_attrs(desc->attrs, i, slwt); 1800 1801 return err; 1802 } 1803 1804 static int seg6_local_build_state(struct net *net, struct nlattr *nla, 1805 unsigned int family, const void *cfg, 1806 struct lwtunnel_state **ts, 1807 struct netlink_ext_ack *extack) 1808 { 1809 struct nlattr *tb[SEG6_LOCAL_MAX + 1]; 1810 struct lwtunnel_state *newts; 1811 struct seg6_local_lwt *slwt; 1812 int err; 1813 1814 if (family != AF_INET6) 1815 return -EINVAL; 1816 1817 err = nla_parse_nested_deprecated(tb, SEG6_LOCAL_MAX, nla, 1818 seg6_local_policy, extack); 1819 1820 if (err < 0) 1821 return err; 1822 1823 if (!tb[SEG6_LOCAL_ACTION]) 1824 return -EINVAL; 1825 1826 newts = lwtunnel_state_alloc(sizeof(*slwt)); 1827 if (!newts) 1828 return -ENOMEM; 1829 1830 slwt = seg6_local_lwtunnel(newts); 1831 slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]); 1832 1833 err = parse_nla_action(tb, slwt); 1834 if (err < 0) 1835 goto out_free; 1836 1837 err = seg6_local_lwtunnel_build_state(slwt, cfg, extack); 1838 if (err < 0) 1839 goto out_destroy_attrs; 1840 1841 newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL; 1842 newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT; 1843 newts->headroom = slwt->headroom; 1844 1845 *ts = newts; 1846 1847 return 0; 1848 1849 out_destroy_attrs: 1850 destroy_attrs(slwt); 1851 out_free: 1852 kfree(newts); 1853 return err; 1854 } 1855 1856 static void seg6_local_destroy_state(struct lwtunnel_state *lwt) 1857 { 1858 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); 1859 1860 seg6_local_lwtunnel_destroy_state(slwt); 1861 1862 destroy_attrs(slwt); 1863 1864 return; 1865 } 1866 1867 static int seg6_local_fill_encap(struct sk_buff *skb, 1868 struct lwtunnel_state *lwt) 1869 { 1870 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); 1871 struct seg6_action_param *param; 1872 unsigned long attrs; 1873 int i, err; 1874 1875 if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action)) 1876 return -EMSGSIZE; 1877 1878 attrs = slwt->desc->attrs | slwt->parsed_optattrs; 1879 1880 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { 1881 if (attrs & SEG6_F_ATTR(i)) { 1882 param = &seg6_action_params[i]; 1883 err = param->put(skb, slwt); 1884 if (err < 0) 1885 return err; 1886 } 1887 } 1888 1889 return 0; 1890 } 1891 1892 static int seg6_local_get_encap_size(struct lwtunnel_state *lwt) 1893 { 1894 struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); 1895 unsigned long attrs; 1896 int nlsize; 1897 1898 nlsize = nla_total_size(4); /* action */ 1899 1900 attrs = slwt->desc->attrs | slwt->parsed_optattrs; 1901 1902 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_SRH)) 1903 nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3); 1904 1905 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE)) 1906 nlsize += nla_total_size(4); 1907 1908 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH4)) 1909 nlsize += nla_total_size(4); 1910 1911 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH6)) 1912 nlsize += nla_total_size(16); 1913 1914 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_IIF)) 1915 nlsize += nla_total_size(4); 1916 1917 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_OIF)) 1918 nlsize += nla_total_size(4); 1919 1920 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_BPF)) 1921 nlsize += nla_total_size(sizeof(struct nlattr)) + 1922 nla_total_size(MAX_PROG_NAME) + 1923 nla_total_size(4); 1924 1925 if (attrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE)) 1926 nlsize += nla_total_size(4); 1927 1928 if (attrs & SEG6_F_LOCAL_COUNTERS) 1929 nlsize += nla_total_size(0) + /* nest SEG6_LOCAL_COUNTERS */ 1930 /* SEG6_LOCAL_CNT_PACKETS */ 1931 nla_total_size_64bit(sizeof(__u64)) + 1932 /* SEG6_LOCAL_CNT_BYTES */ 1933 nla_total_size_64bit(sizeof(__u64)) + 1934 /* SEG6_LOCAL_CNT_ERRORS */ 1935 nla_total_size_64bit(sizeof(__u64)); 1936 1937 return nlsize; 1938 } 1939 1940 static int seg6_local_cmp_encap(struct lwtunnel_state *a, 1941 struct lwtunnel_state *b) 1942 { 1943 struct seg6_local_lwt *slwt_a, *slwt_b; 1944 struct seg6_action_param *param; 1945 unsigned long attrs_a, attrs_b; 1946 int i; 1947 1948 slwt_a = seg6_local_lwtunnel(a); 1949 slwt_b = seg6_local_lwtunnel(b); 1950 1951 if (slwt_a->action != slwt_b->action) 1952 return 1; 1953 1954 attrs_a = slwt_a->desc->attrs | slwt_a->parsed_optattrs; 1955 attrs_b = slwt_b->desc->attrs | slwt_b->parsed_optattrs; 1956 1957 if (attrs_a != attrs_b) 1958 return 1; 1959 1960 for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { 1961 if (attrs_a & SEG6_F_ATTR(i)) { 1962 param = &seg6_action_params[i]; 1963 if (param->cmp(slwt_a, slwt_b)) 1964 return 1; 1965 } 1966 } 1967 1968 return 0; 1969 } 1970 1971 static const struct lwtunnel_encap_ops seg6_local_ops = { 1972 .build_state = seg6_local_build_state, 1973 .destroy_state = seg6_local_destroy_state, 1974 .input = seg6_local_input, 1975 .fill_encap = seg6_local_fill_encap, 1976 .get_encap_size = seg6_local_get_encap_size, 1977 .cmp_encap = seg6_local_cmp_encap, 1978 .owner = THIS_MODULE, 1979 }; 1980 1981 int __init seg6_local_init(void) 1982 { 1983 /* If the max total number of defined attributes is reached, then your 1984 * kernel build stops here. 1985 * 1986 * This check is required to avoid arithmetic overflows when processing 1987 * behavior attributes and the maximum number of defined attributes 1988 * exceeds the allowed value. 1989 */ 1990 BUILD_BUG_ON(SEG6_LOCAL_MAX + 1 > BITS_PER_TYPE(unsigned long)); 1991 1992 return lwtunnel_encap_add_ops(&seg6_local_ops, 1993 LWTUNNEL_ENCAP_SEG6_LOCAL); 1994 } 1995 1996 void seg6_local_exit(void) 1997 { 1998 lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL); 1999 } 2000