1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/types.h> 3 #include <linux/skbuff.h> 4 #include <linux/socket.h> 5 #include <linux/sysctl.h> 6 #include <linux/net.h> 7 #include <linux/module.h> 8 #include <linux/if_arp.h> 9 #include <linux/ipv6.h> 10 #include <linux/mpls.h> 11 #include <linux/netconf.h> 12 #include <linux/nospec.h> 13 #include <linux/vmalloc.h> 14 #include <linux/percpu.h> 15 #include <net/gso.h> 16 #include <net/ip.h> 17 #include <net/dst.h> 18 #include <net/sock.h> 19 #include <net/arp.h> 20 #include <net/ip_fib.h> 21 #include <net/netevent.h> 22 #include <net/ip_tunnels.h> 23 #include <net/netns/generic.h> 24 #if IS_ENABLED(CONFIG_IPV6) 25 #include <net/ipv6.h> 26 #endif 27 #include <net/ipv6_stubs.h> 28 #include <net/rtnh.h> 29 #include "internal.h" 30 31 /* max memory we will use for mpls_route */ 32 #define MAX_MPLS_ROUTE_MEM 4096 33 34 /* Maximum number of labels to look ahead at when selecting a path of 35 * a multipath route 36 */ 37 #define MAX_MP_SELECT_LABELS 4 38 39 #define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1) 40 41 static int label_limit = (1 << 20) - 1; 42 static int ttl_max = 255; 43 44 #if IS_ENABLED(CONFIG_NET_IP_TUNNEL) 45 static size_t ipgre_mpls_encap_hlen(struct ip_tunnel_encap *e) 46 { 47 return sizeof(struct mpls_shim_hdr); 48 } 49 50 static const struct ip_tunnel_encap_ops mpls_iptun_ops = { 51 .encap_hlen = ipgre_mpls_encap_hlen, 52 }; 53 54 static int ipgre_tunnel_encap_add_mpls_ops(void) 55 { 56 return ip_tunnel_encap_add_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS); 57 } 58 59 static void ipgre_tunnel_encap_del_mpls_ops(void) 60 { 61 ip_tunnel_encap_del_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS); 62 } 63 #else 64 static int ipgre_tunnel_encap_add_mpls_ops(void) 65 { 66 return 0; 67 } 68 69 static void ipgre_tunnel_encap_del_mpls_ops(void) 70 { 71 } 72 #endif 73 74 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, 75 struct nlmsghdr *nlh, struct net *net, u32 portid, 76 unsigned int nlm_flags); 77 78 static struct mpls_route *mpls_route_input(struct net *net, unsigned int index) 79 { 80 struct mpls_route __rcu **platform_label; 81 82 platform_label = mpls_dereference(net, net->mpls.platform_label); 83 return mpls_dereference(net, platform_label[index]); 84 } 85 86 static struct mpls_route __rcu **mpls_platform_label_rcu(struct net *net, size_t *platform_labels) 87 { 88 struct mpls_route __rcu **platform_label; 89 unsigned int sequence; 90 91 do { 92 sequence = read_seqcount_begin(&net->mpls.platform_label_seq); 93 platform_label = rcu_dereference(net->mpls.platform_label); 94 *platform_labels = net->mpls.platform_labels; 95 } while (read_seqcount_retry(&net->mpls.platform_label_seq, sequence)); 96 97 return platform_label; 98 } 99 100 static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned int index) 101 { 102 struct mpls_route __rcu **platform_label; 103 size_t platform_labels; 104 105 platform_label = mpls_platform_label_rcu(net, &platform_labels); 106 107 if (index >= platform_labels) 108 return NULL; 109 110 return rcu_dereference(platform_label[index]); 111 } 112 113 bool mpls_output_possible(const struct net_device *dev) 114 { 115 return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); 116 } 117 EXPORT_SYMBOL_GPL(mpls_output_possible); 118 119 static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh) 120 { 121 return (u8 *)nh + rt->rt_via_offset; 122 } 123 124 static const u8 *mpls_nh_via(const struct mpls_route *rt, 125 const struct mpls_nh *nh) 126 { 127 return __mpls_nh_via((struct mpls_route *)rt, (struct mpls_nh *)nh); 128 } 129 130 static unsigned int mpls_nh_header_size(const struct mpls_nh *nh) 131 { 132 /* The size of the layer 2.5 labels to be added for this route */ 133 return nh->nh_labels * sizeof(struct mpls_shim_hdr); 134 } 135 136 unsigned int mpls_dev_mtu(const struct net_device *dev) 137 { 138 /* The amount of data the layer 2 frame can hold */ 139 return dev->mtu; 140 } 141 EXPORT_SYMBOL_GPL(mpls_dev_mtu); 142 143 bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) 144 { 145 if (skb->len <= mtu) 146 return false; 147 148 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) 149 return false; 150 151 return true; 152 } 153 EXPORT_SYMBOL_GPL(mpls_pkt_too_big); 154 155 void mpls_stats_inc_outucastpkts(struct net *net, 156 struct net_device *dev, 157 const struct sk_buff *skb) 158 { 159 struct mpls_dev *mdev; 160 161 if (skb->protocol == htons(ETH_P_MPLS_UC)) { 162 mdev = mpls_dev_rcu(dev); 163 if (mdev) 164 MPLS_INC_STATS_LEN(mdev, skb->len, 165 tx_packets, 166 tx_bytes); 167 } else if (skb->protocol == htons(ETH_P_IP)) { 168 IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len); 169 #if IS_ENABLED(CONFIG_IPV6) 170 } else if (skb->protocol == htons(ETH_P_IPV6)) { 171 struct inet6_dev *in6dev = in6_dev_rcu(dev); 172 173 if (in6dev) 174 IP6_UPD_PO_STATS(net, in6dev, 175 IPSTATS_MIB_OUT, skb->len); 176 #endif 177 } 178 } 179 EXPORT_SYMBOL_GPL(mpls_stats_inc_outucastpkts); 180 181 static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb) 182 { 183 struct mpls_entry_decoded dec; 184 unsigned int mpls_hdr_len = 0; 185 struct mpls_shim_hdr *hdr; 186 bool eli_seen = false; 187 int label_index; 188 u32 hash = 0; 189 190 for (label_index = 0; label_index < MAX_MP_SELECT_LABELS; 191 label_index++) { 192 mpls_hdr_len += sizeof(*hdr); 193 if (!pskb_may_pull(skb, mpls_hdr_len)) 194 break; 195 196 /* Read and decode the current label */ 197 hdr = mpls_hdr(skb) + label_index; 198 dec = mpls_entry_decode(hdr); 199 200 /* RFC6790 - reserved labels MUST NOT be used as keys 201 * for the load-balancing function 202 */ 203 if (likely(dec.label >= MPLS_LABEL_FIRST_UNRESERVED)) { 204 hash = jhash_1word(dec.label, hash); 205 206 /* The entropy label follows the entropy label 207 * indicator, so this means that the entropy 208 * label was just added to the hash - no need to 209 * go any deeper either in the label stack or in the 210 * payload 211 */ 212 if (eli_seen) 213 break; 214 } else if (dec.label == MPLS_LABEL_ENTROPY) { 215 eli_seen = true; 216 } 217 218 if (!dec.bos) 219 continue; 220 221 /* found bottom label; does skb have room for a header? */ 222 if (pskb_may_pull(skb, mpls_hdr_len + sizeof(struct iphdr))) { 223 const struct iphdr *v4hdr; 224 225 v4hdr = (const struct iphdr *)(hdr + 1); 226 if (v4hdr->version == 4) { 227 hash = jhash_3words(ntohl(v4hdr->saddr), 228 ntohl(v4hdr->daddr), 229 v4hdr->protocol, hash); 230 } else if (v4hdr->version == 6 && 231 pskb_may_pull(skb, mpls_hdr_len + 232 sizeof(struct ipv6hdr))) { 233 const struct ipv6hdr *v6hdr; 234 235 v6hdr = (const struct ipv6hdr *)(hdr + 1); 236 hash = __ipv6_addr_jhash(&v6hdr->saddr, hash); 237 hash = __ipv6_addr_jhash(&v6hdr->daddr, hash); 238 hash = jhash_1word(v6hdr->nexthdr, hash); 239 } 240 } 241 242 break; 243 } 244 245 return hash; 246 } 247 248 static struct mpls_nh *mpls_get_nexthop(struct mpls_route *rt, u8 index) 249 { 250 return (struct mpls_nh *)((u8 *)rt->rt_nh + index * rt->rt_nh_size); 251 } 252 253 /* number of alive nexthops (rt->rt_nhn_alive) and the flags for 254 * a next hop (nh->nh_flags) are modified by netdev event handlers. 255 * Since those fields can change at any moment, use READ_ONCE to 256 * access both. 257 */ 258 static const struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, 259 struct sk_buff *skb) 260 { 261 u32 hash = 0; 262 int nh_index = 0; 263 int n = 0; 264 u8 alive; 265 266 /* No need to look further into packet if there's only 267 * one path 268 */ 269 if (rt->rt_nhn == 1) 270 return rt->rt_nh; 271 272 alive = READ_ONCE(rt->rt_nhn_alive); 273 if (alive == 0) 274 return NULL; 275 276 hash = mpls_multipath_hash(rt, skb); 277 nh_index = hash % alive; 278 if (alive == rt->rt_nhn) 279 goto out; 280 for_nexthops(rt) { 281 unsigned int nh_flags = READ_ONCE(nh->nh_flags); 282 283 if (nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) 284 continue; 285 if (n == nh_index) 286 return nh; 287 n++; 288 } endfor_nexthops(rt); 289 290 out: 291 return mpls_get_nexthop(rt, nh_index); 292 } 293 294 static bool mpls_egress(struct net *net, struct mpls_route *rt, 295 struct sk_buff *skb, struct mpls_entry_decoded dec) 296 { 297 enum mpls_payload_type payload_type; 298 bool success = false; 299 300 /* The IPv4 code below accesses through the IPv4 header 301 * checksum, which is 12 bytes into the packet. 302 * The IPv6 code below accesses through the IPv6 hop limit 303 * which is 8 bytes into the packet. 304 * 305 * For all supported cases there should always be at least 12 306 * bytes of packet data present. The IPv4 header is 20 bytes 307 * without options and the IPv6 header is always 40 bytes 308 * long. 309 */ 310 if (!pskb_may_pull(skb, 12)) 311 return false; 312 313 payload_type = rt->rt_payload_type; 314 if (payload_type == MPT_UNSPEC) 315 payload_type = ip_hdr(skb)->version; 316 317 switch (payload_type) { 318 case MPT_IPV4: { 319 struct iphdr *hdr4 = ip_hdr(skb); 320 u8 new_ttl; 321 skb->protocol = htons(ETH_P_IP); 322 323 /* If propagating TTL, take the decremented TTL from 324 * the incoming MPLS header, otherwise decrement the 325 * TTL, but only if not 0 to avoid underflow. 326 */ 327 if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED || 328 (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT && 329 net->mpls.ip_ttl_propagate)) 330 new_ttl = dec.ttl; 331 else 332 new_ttl = hdr4->ttl ? hdr4->ttl - 1 : 0; 333 334 csum_replace2(&hdr4->check, 335 htons(hdr4->ttl << 8), 336 htons(new_ttl << 8)); 337 hdr4->ttl = new_ttl; 338 success = true; 339 break; 340 } 341 case MPT_IPV6: { 342 struct ipv6hdr *hdr6 = ipv6_hdr(skb); 343 skb->protocol = htons(ETH_P_IPV6); 344 345 /* If propagating TTL, take the decremented TTL from 346 * the incoming MPLS header, otherwise decrement the 347 * hop limit, but only if not 0 to avoid underflow. 348 */ 349 if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED || 350 (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT && 351 net->mpls.ip_ttl_propagate)) 352 hdr6->hop_limit = dec.ttl; 353 else if (hdr6->hop_limit) 354 hdr6->hop_limit = hdr6->hop_limit - 1; 355 success = true; 356 break; 357 } 358 case MPT_UNSPEC: 359 /* Should have decided which protocol it is by now */ 360 break; 361 } 362 363 return success; 364 } 365 366 static int mpls_forward(struct sk_buff *skb, struct net_device *dev, 367 struct packet_type *pt, struct net_device *orig_dev) 368 { 369 struct net *net = dev_net_rcu(dev); 370 struct mpls_shim_hdr *hdr; 371 const struct mpls_nh *nh; 372 struct mpls_route *rt; 373 struct mpls_entry_decoded dec; 374 struct net_device *out_dev; 375 struct mpls_dev *out_mdev; 376 struct mpls_dev *mdev; 377 unsigned int hh_len; 378 unsigned int new_header_size; 379 unsigned int mtu; 380 int err; 381 382 /* Careful this entire function runs inside of an rcu critical section */ 383 384 mdev = mpls_dev_rcu(dev); 385 if (!mdev) 386 goto drop; 387 388 MPLS_INC_STATS_LEN(mdev, skb->len, rx_packets, 389 rx_bytes); 390 391 if (!mdev->input_enabled) { 392 MPLS_INC_STATS(mdev, rx_dropped); 393 goto drop; 394 } 395 396 if (skb->pkt_type != PACKET_HOST) 397 goto err; 398 399 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) 400 goto err; 401 402 if (!pskb_may_pull(skb, sizeof(*hdr))) 403 goto err; 404 405 skb_dst_drop(skb); 406 407 /* Read and decode the label */ 408 hdr = mpls_hdr(skb); 409 dec = mpls_entry_decode(hdr); 410 411 rt = mpls_route_input_rcu(net, dec.label); 412 if (!rt) { 413 MPLS_INC_STATS(mdev, rx_noroute); 414 goto drop; 415 } 416 417 nh = mpls_select_multipath(rt, skb); 418 if (!nh) 419 goto err; 420 421 /* Pop the label */ 422 skb_pull(skb, sizeof(*hdr)); 423 skb_reset_network_header(skb); 424 425 skb_orphan(skb); 426 427 if (skb_warn_if_lro(skb)) 428 goto err; 429 430 skb_forward_csum(skb); 431 432 /* Verify ttl is valid */ 433 if (dec.ttl <= 1) 434 goto err; 435 436 /* Find the output device */ 437 out_dev = nh->nh_dev; 438 if (!mpls_output_possible(out_dev)) 439 goto tx_err; 440 441 /* Verify the destination can hold the packet */ 442 new_header_size = mpls_nh_header_size(nh); 443 mtu = mpls_dev_mtu(out_dev); 444 if (mpls_pkt_too_big(skb, mtu - new_header_size)) 445 goto tx_err; 446 447 hh_len = LL_RESERVED_SPACE(out_dev); 448 if (!out_dev->header_ops) 449 hh_len = 0; 450 451 /* Ensure there is enough space for the headers in the skb */ 452 if (skb_cow(skb, hh_len + new_header_size)) 453 goto tx_err; 454 455 skb->dev = out_dev; 456 skb->protocol = htons(ETH_P_MPLS_UC); 457 458 dec.ttl -= 1; 459 if (unlikely(!new_header_size && dec.bos)) { 460 /* Penultimate hop popping */ 461 if (!mpls_egress(net, rt, skb, dec)) 462 goto err; 463 } else { 464 bool bos; 465 int i; 466 skb_push(skb, new_header_size); 467 skb_reset_network_header(skb); 468 /* Push the new labels */ 469 hdr = mpls_hdr(skb); 470 bos = dec.bos; 471 for (i = nh->nh_labels - 1; i >= 0; i--) { 472 hdr[i] = mpls_entry_encode(nh->nh_label[i], 473 dec.ttl, 0, bos); 474 bos = false; 475 } 476 } 477 478 mpls_stats_inc_outucastpkts(net, out_dev, skb); 479 480 /* If via wasn't specified then send out using device address */ 481 if (nh->nh_via_table == MPLS_NEIGH_TABLE_UNSPEC) 482 err = neigh_xmit(NEIGH_LINK_TABLE, out_dev, 483 out_dev->dev_addr, skb); 484 else 485 err = neigh_xmit(nh->nh_via_table, out_dev, 486 mpls_nh_via(rt, nh), skb); 487 if (err) 488 net_dbg_ratelimited("%s: packet transmission failed: %d\n", 489 __func__, err); 490 return 0; 491 492 tx_err: 493 out_mdev = out_dev ? mpls_dev_rcu(out_dev) : NULL; 494 if (out_mdev) 495 MPLS_INC_STATS(out_mdev, tx_errors); 496 goto drop; 497 err: 498 MPLS_INC_STATS(mdev, rx_errors); 499 drop: 500 kfree_skb(skb); 501 return NET_RX_DROP; 502 } 503 504 static struct packet_type mpls_packet_type __read_mostly = { 505 .type = cpu_to_be16(ETH_P_MPLS_UC), 506 .func = mpls_forward, 507 }; 508 509 static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = { 510 [RTA_DST] = { .type = NLA_U32 }, 511 [RTA_OIF] = { .type = NLA_U32 }, 512 [RTA_TTL_PROPAGATE] = { .type = NLA_U8 }, 513 }; 514 515 struct mpls_route_config { 516 u32 rc_protocol; 517 u32 rc_ifindex; 518 u8 rc_via_table; 519 u8 rc_via_alen; 520 u8 rc_via[MAX_VIA_ALEN]; 521 u32 rc_label; 522 u8 rc_ttl_propagate; 523 u8 rc_output_labels; 524 u32 rc_output_label[MAX_NEW_LABELS]; 525 u32 rc_nlflags; 526 enum mpls_payload_type rc_payload_type; 527 struct nl_info rc_nlinfo; 528 struct rtnexthop *rc_mp; 529 int rc_mp_len; 530 }; 531 532 /* all nexthops within a route have the same size based on max 533 * number of labels and max via length for a hop 534 */ 535 static struct mpls_route *mpls_rt_alloc(u8 num_nh, u8 max_alen, u8 max_labels) 536 { 537 u8 nh_size = MPLS_NH_SIZE(max_labels, max_alen); 538 struct mpls_route *rt; 539 size_t size; 540 541 size = sizeof(*rt) + num_nh * nh_size; 542 if (size > MAX_MPLS_ROUTE_MEM) 543 return ERR_PTR(-EINVAL); 544 545 rt = kzalloc(size, GFP_KERNEL); 546 if (!rt) 547 return ERR_PTR(-ENOMEM); 548 549 rt->rt_nhn = num_nh; 550 rt->rt_nhn_alive = num_nh; 551 rt->rt_nh_size = nh_size; 552 rt->rt_via_offset = MPLS_NH_VIA_OFF(max_labels); 553 554 return rt; 555 } 556 557 static void mpls_rt_free_rcu(struct rcu_head *head) 558 { 559 struct mpls_route *rt; 560 561 rt = container_of(head, struct mpls_route, rt_rcu); 562 563 change_nexthops(rt) { 564 netdev_put(nh->nh_dev, &nh->nh_dev_tracker); 565 } endfor_nexthops(rt); 566 567 kfree(rt); 568 } 569 570 static void mpls_rt_free(struct mpls_route *rt) 571 { 572 if (rt) 573 call_rcu(&rt->rt_rcu, mpls_rt_free_rcu); 574 } 575 576 static void mpls_notify_route(struct net *net, unsigned index, 577 struct mpls_route *old, struct mpls_route *new, 578 const struct nl_info *info) 579 { 580 struct nlmsghdr *nlh = info ? info->nlh : NULL; 581 unsigned portid = info ? info->portid : 0; 582 int event = new ? RTM_NEWROUTE : RTM_DELROUTE; 583 struct mpls_route *rt = new ? new : old; 584 unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0; 585 /* Ignore reserved labels for now */ 586 if (rt && (index >= MPLS_LABEL_FIRST_UNRESERVED)) 587 rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags); 588 } 589 590 static void mpls_route_update(struct net *net, unsigned index, 591 struct mpls_route *new, 592 const struct nl_info *info) 593 { 594 struct mpls_route __rcu **platform_label; 595 struct mpls_route *rt; 596 597 platform_label = mpls_dereference(net, net->mpls.platform_label); 598 rt = mpls_dereference(net, platform_label[index]); 599 rcu_assign_pointer(platform_label[index], new); 600 601 mpls_notify_route(net, index, rt, new, info); 602 603 /* If we removed a route free it now */ 604 mpls_rt_free(rt); 605 } 606 607 static unsigned int find_free_label(struct net *net) 608 { 609 unsigned int index; 610 611 for (index = MPLS_LABEL_FIRST_UNRESERVED; 612 index < net->mpls.platform_labels; 613 index++) { 614 if (!mpls_route_input(net, index)) 615 return index; 616 } 617 618 return LABEL_NOT_SPECIFIED; 619 } 620 621 #if IS_ENABLED(CONFIG_INET) 622 static struct net_device *inet_fib_lookup_dev(struct net *net, 623 struct mpls_nh *nh, 624 const void *addr) 625 { 626 struct net_device *dev; 627 struct rtable *rt; 628 struct in_addr daddr; 629 630 memcpy(&daddr, addr, sizeof(struct in_addr)); 631 rt = ip_route_output(net, daddr.s_addr, 0, 0, 0, RT_SCOPE_UNIVERSE); 632 if (IS_ERR(rt)) 633 return ERR_CAST(rt); 634 635 dev = rt->dst.dev; 636 netdev_hold(dev, &nh->nh_dev_tracker, GFP_KERNEL); 637 ip_rt_put(rt); 638 639 return dev; 640 } 641 #else 642 static struct net_device *inet_fib_lookup_dev(struct net *net, 643 struct mpls_nh *nh, 644 const void *addr) 645 { 646 return ERR_PTR(-EAFNOSUPPORT); 647 } 648 #endif 649 650 #if IS_ENABLED(CONFIG_IPV6) 651 static struct net_device *inet6_fib_lookup_dev(struct net *net, 652 struct mpls_nh *nh, 653 const void *addr) 654 { 655 struct net_device *dev; 656 struct dst_entry *dst; 657 struct flowi6 fl6; 658 659 if (!ipv6_stub) 660 return ERR_PTR(-EAFNOSUPPORT); 661 662 memset(&fl6, 0, sizeof(fl6)); 663 memcpy(&fl6.daddr, addr, sizeof(struct in6_addr)); 664 dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL); 665 if (IS_ERR(dst)) 666 return ERR_CAST(dst); 667 668 dev = dst->dev; 669 netdev_hold(dev, &nh->nh_dev_tracker, GFP_KERNEL); 670 dst_release(dst); 671 672 return dev; 673 } 674 #else 675 static struct net_device *inet6_fib_lookup_dev(struct net *net, 676 struct mpls_nh *nh, 677 const void *addr) 678 { 679 return ERR_PTR(-EAFNOSUPPORT); 680 } 681 #endif 682 683 static struct net_device *find_outdev(struct net *net, 684 struct mpls_route *rt, 685 struct mpls_nh *nh, int oif) 686 { 687 struct net_device *dev = NULL; 688 689 if (!oif) { 690 switch (nh->nh_via_table) { 691 case NEIGH_ARP_TABLE: 692 dev = inet_fib_lookup_dev(net, nh, mpls_nh_via(rt, nh)); 693 break; 694 case NEIGH_ND_TABLE: 695 dev = inet6_fib_lookup_dev(net, nh, mpls_nh_via(rt, nh)); 696 break; 697 case NEIGH_LINK_TABLE: 698 break; 699 } 700 } else { 701 dev = netdev_get_by_index(net, oif, 702 &nh->nh_dev_tracker, GFP_KERNEL); 703 } 704 705 if (!dev) 706 return ERR_PTR(-ENODEV); 707 708 if (IS_ERR(dev)) 709 return dev; 710 711 nh->nh_dev = dev; 712 713 return dev; 714 } 715 716 static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt, 717 struct mpls_nh *nh, int oif) 718 { 719 struct net_device *dev = NULL; 720 int err = -ENODEV; 721 722 dev = find_outdev(net, rt, nh, oif); 723 if (IS_ERR(dev)) { 724 err = PTR_ERR(dev); 725 goto errout; 726 } 727 728 /* Ensure this is a supported device */ 729 err = -EINVAL; 730 if (!mpls_dev_get(net, dev)) 731 goto errout_put; 732 733 if ((nh->nh_via_table == NEIGH_LINK_TABLE) && 734 (dev->addr_len != nh->nh_via_alen)) 735 goto errout_put; 736 737 if (!(dev->flags & IFF_UP)) { 738 nh->nh_flags |= RTNH_F_DEAD; 739 } else { 740 unsigned int flags; 741 742 flags = netif_get_flags(dev); 743 if (!(flags & (IFF_RUNNING | IFF_LOWER_UP))) 744 nh->nh_flags |= RTNH_F_LINKDOWN; 745 } 746 747 return 0; 748 749 errout_put: 750 netdev_put(nh->nh_dev, &nh->nh_dev_tracker); 751 nh->nh_dev = NULL; 752 errout: 753 return err; 754 } 755 756 static int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table, 757 u8 via_addr[], struct netlink_ext_ack *extack) 758 { 759 struct rtvia *via = nla_data(nla); 760 int err = -EINVAL; 761 int alen; 762 763 if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) { 764 NL_SET_ERR_MSG_ATTR(extack, nla, 765 "Invalid attribute length for RTA_VIA"); 766 goto errout; 767 } 768 alen = nla_len(nla) - 769 offsetof(struct rtvia, rtvia_addr); 770 if (alen > MAX_VIA_ALEN) { 771 NL_SET_ERR_MSG_ATTR(extack, nla, 772 "Invalid address length for RTA_VIA"); 773 goto errout; 774 } 775 776 /* Validate the address family */ 777 switch (via->rtvia_family) { 778 case AF_PACKET: 779 *via_table = NEIGH_LINK_TABLE; 780 break; 781 case AF_INET: 782 *via_table = NEIGH_ARP_TABLE; 783 if (alen != 4) 784 goto errout; 785 break; 786 case AF_INET6: 787 *via_table = NEIGH_ND_TABLE; 788 if (alen != 16) 789 goto errout; 790 break; 791 default: 792 /* Unsupported address family */ 793 goto errout; 794 } 795 796 memcpy(via_addr, via->rtvia_addr, alen); 797 *via_alen = alen; 798 err = 0; 799 800 errout: 801 return err; 802 } 803 804 static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, 805 struct mpls_route *rt) 806 { 807 struct net *net = cfg->rc_nlinfo.nl_net; 808 struct mpls_nh *nh = rt->rt_nh; 809 int err; 810 int i; 811 812 if (!nh) 813 return -ENOMEM; 814 815 nh->nh_labels = cfg->rc_output_labels; 816 for (i = 0; i < nh->nh_labels; i++) 817 nh->nh_label[i] = cfg->rc_output_label[i]; 818 819 nh->nh_via_table = cfg->rc_via_table; 820 memcpy(__mpls_nh_via(rt, nh), cfg->rc_via, cfg->rc_via_alen); 821 nh->nh_via_alen = cfg->rc_via_alen; 822 823 err = mpls_nh_assign_dev(net, rt, nh, cfg->rc_ifindex); 824 if (err) 825 goto errout; 826 827 if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) 828 rt->rt_nhn_alive--; 829 830 return 0; 831 832 errout: 833 return err; 834 } 835 836 static int mpls_nh_build(struct net *net, struct mpls_route *rt, 837 struct mpls_nh *nh, int oif, struct nlattr *via, 838 struct nlattr *newdst, u8 max_labels, 839 struct netlink_ext_ack *extack) 840 { 841 int err = -ENOMEM; 842 843 if (!nh) 844 goto errout; 845 846 if (newdst) { 847 err = nla_get_labels(newdst, max_labels, &nh->nh_labels, 848 nh->nh_label, extack); 849 if (err) 850 goto errout; 851 } 852 853 if (via) { 854 err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table, 855 __mpls_nh_via(rt, nh), extack); 856 if (err) 857 goto errout; 858 } else { 859 nh->nh_via_table = MPLS_NEIGH_TABLE_UNSPEC; 860 } 861 862 err = mpls_nh_assign_dev(net, rt, nh, oif); 863 if (err) 864 goto errout; 865 866 return 0; 867 868 errout: 869 return err; 870 } 871 872 static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len, 873 u8 cfg_via_alen, u8 *max_via_alen, 874 u8 *max_labels) 875 { 876 int remaining = len; 877 u8 nhs = 0; 878 879 *max_via_alen = 0; 880 *max_labels = 0; 881 882 while (rtnh_ok(rtnh, remaining)) { 883 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 884 int attrlen; 885 u8 n_labels = 0; 886 887 attrlen = rtnh_attrlen(rtnh); 888 nla = nla_find(attrs, attrlen, RTA_VIA); 889 if (nla && nla_len(nla) >= 890 offsetof(struct rtvia, rtvia_addr)) { 891 int via_alen = nla_len(nla) - 892 offsetof(struct rtvia, rtvia_addr); 893 894 if (via_alen <= MAX_VIA_ALEN) 895 *max_via_alen = max_t(u16, *max_via_alen, 896 via_alen); 897 } 898 899 nla = nla_find(attrs, attrlen, RTA_NEWDST); 900 if (nla && 901 nla_get_labels(nla, MAX_NEW_LABELS, &n_labels, 902 NULL, NULL) != 0) 903 return 0; 904 905 *max_labels = max_t(u8, *max_labels, n_labels); 906 907 /* number of nexthops is tracked by a u8. 908 * Check for overflow. 909 */ 910 if (nhs == 255) 911 return 0; 912 nhs++; 913 914 rtnh = rtnh_next(rtnh, &remaining); 915 } 916 917 /* leftover implies invalid nexthop configuration, discard it */ 918 return remaining > 0 ? 0 : nhs; 919 } 920 921 static int mpls_nh_build_multi(struct mpls_route_config *cfg, 922 struct mpls_route *rt, u8 max_labels, 923 struct netlink_ext_ack *extack) 924 { 925 struct rtnexthop *rtnh = cfg->rc_mp; 926 struct nlattr *nla_via, *nla_newdst; 927 int remaining = cfg->rc_mp_len; 928 int err = 0; 929 930 rt->rt_nhn = 0; 931 932 change_nexthops(rt) { 933 int attrlen; 934 935 nla_via = NULL; 936 nla_newdst = NULL; 937 938 err = -EINVAL; 939 if (!rtnh_ok(rtnh, remaining)) 940 goto errout; 941 942 /* neither weighted multipath nor any flags 943 * are supported 944 */ 945 if (rtnh->rtnh_hops || rtnh->rtnh_flags) 946 goto errout; 947 948 attrlen = rtnh_attrlen(rtnh); 949 if (attrlen > 0) { 950 struct nlattr *attrs = rtnh_attrs(rtnh); 951 952 nla_via = nla_find(attrs, attrlen, RTA_VIA); 953 nla_newdst = nla_find(attrs, attrlen, RTA_NEWDST); 954 } 955 956 err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, 957 rtnh->rtnh_ifindex, nla_via, nla_newdst, 958 max_labels, extack); 959 if (err) 960 goto errout; 961 962 if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) 963 rt->rt_nhn_alive--; 964 965 rtnh = rtnh_next(rtnh, &remaining); 966 rt->rt_nhn++; 967 } endfor_nexthops(rt); 968 969 return 0; 970 971 errout: 972 return err; 973 } 974 975 static bool mpls_label_ok(struct net *net, unsigned int *index, 976 struct netlink_ext_ack *extack) 977 { 978 /* Reserved labels may not be set */ 979 if (*index < MPLS_LABEL_FIRST_UNRESERVED) { 980 NL_SET_ERR_MSG(extack, 981 "Invalid label - must be MPLS_LABEL_FIRST_UNRESERVED or higher"); 982 return false; 983 } 984 985 /* The full 20 bit range may not be supported. */ 986 if (*index >= net->mpls.platform_labels) { 987 NL_SET_ERR_MSG(extack, 988 "Label >= configured maximum in platform_labels"); 989 return false; 990 } 991 992 *index = array_index_nospec(*index, net->mpls.platform_labels); 993 994 return true; 995 } 996 997 static int mpls_route_add(struct mpls_route_config *cfg, 998 struct netlink_ext_ack *extack) 999 { 1000 struct net *net = cfg->rc_nlinfo.nl_net; 1001 struct mpls_route *rt, *old; 1002 int err = -EINVAL; 1003 u8 max_via_alen; 1004 unsigned index; 1005 u8 max_labels; 1006 u8 nhs; 1007 1008 index = cfg->rc_label; 1009 1010 /* If a label was not specified during insert pick one */ 1011 if ((index == LABEL_NOT_SPECIFIED) && 1012 (cfg->rc_nlflags & NLM_F_CREATE)) { 1013 index = find_free_label(net); 1014 } 1015 1016 if (!mpls_label_ok(net, &index, extack)) 1017 goto errout; 1018 1019 /* Append makes no sense with mpls */ 1020 err = -EOPNOTSUPP; 1021 if (cfg->rc_nlflags & NLM_F_APPEND) { 1022 NL_SET_ERR_MSG(extack, "MPLS does not support route append"); 1023 goto errout; 1024 } 1025 1026 err = -EEXIST; 1027 old = mpls_route_input(net, index); 1028 if ((cfg->rc_nlflags & NLM_F_EXCL) && old) 1029 goto errout; 1030 1031 err = -EEXIST; 1032 if (!(cfg->rc_nlflags & NLM_F_REPLACE) && old) 1033 goto errout; 1034 1035 err = -ENOENT; 1036 if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old) 1037 goto errout; 1038 1039 err = -EINVAL; 1040 if (cfg->rc_mp) { 1041 nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len, 1042 cfg->rc_via_alen, &max_via_alen, 1043 &max_labels); 1044 } else { 1045 max_via_alen = cfg->rc_via_alen; 1046 max_labels = cfg->rc_output_labels; 1047 nhs = 1; 1048 } 1049 1050 if (nhs == 0) { 1051 NL_SET_ERR_MSG(extack, "Route does not contain a nexthop"); 1052 goto errout; 1053 } 1054 1055 rt = mpls_rt_alloc(nhs, max_via_alen, max_labels); 1056 if (IS_ERR(rt)) { 1057 err = PTR_ERR(rt); 1058 goto errout; 1059 } 1060 1061 rt->rt_protocol = cfg->rc_protocol; 1062 rt->rt_payload_type = cfg->rc_payload_type; 1063 rt->rt_ttl_propagate = cfg->rc_ttl_propagate; 1064 1065 if (cfg->rc_mp) 1066 err = mpls_nh_build_multi(cfg, rt, max_labels, extack); 1067 else 1068 err = mpls_nh_build_from_cfg(cfg, rt); 1069 if (err) 1070 goto freert; 1071 1072 mpls_route_update(net, index, rt, &cfg->rc_nlinfo); 1073 1074 return 0; 1075 1076 freert: 1077 mpls_rt_free(rt); 1078 errout: 1079 return err; 1080 } 1081 1082 static int mpls_route_del(struct mpls_route_config *cfg, 1083 struct netlink_ext_ack *extack) 1084 { 1085 struct net *net = cfg->rc_nlinfo.nl_net; 1086 unsigned index; 1087 int err = -EINVAL; 1088 1089 index = cfg->rc_label; 1090 1091 if (!mpls_label_ok(net, &index, extack)) 1092 goto errout; 1093 1094 mpls_route_update(net, index, NULL, &cfg->rc_nlinfo); 1095 1096 err = 0; 1097 errout: 1098 return err; 1099 } 1100 1101 static void mpls_get_stats(struct mpls_dev *mdev, 1102 struct mpls_link_stats *stats) 1103 { 1104 struct mpls_pcpu_stats *p; 1105 int i; 1106 1107 memset(stats, 0, sizeof(*stats)); 1108 1109 for_each_possible_cpu(i) { 1110 struct mpls_link_stats local; 1111 unsigned int start; 1112 1113 p = per_cpu_ptr(mdev->stats, i); 1114 do { 1115 start = u64_stats_fetch_begin(&p->syncp); 1116 local = p->stats; 1117 } while (u64_stats_fetch_retry(&p->syncp, start)); 1118 1119 stats->rx_packets += local.rx_packets; 1120 stats->rx_bytes += local.rx_bytes; 1121 stats->tx_packets += local.tx_packets; 1122 stats->tx_bytes += local.tx_bytes; 1123 stats->rx_errors += local.rx_errors; 1124 stats->tx_errors += local.tx_errors; 1125 stats->rx_dropped += local.rx_dropped; 1126 stats->tx_dropped += local.tx_dropped; 1127 stats->rx_noroute += local.rx_noroute; 1128 } 1129 } 1130 1131 static int mpls_fill_stats_af(struct sk_buff *skb, 1132 const struct net_device *dev) 1133 { 1134 struct mpls_link_stats *stats; 1135 struct mpls_dev *mdev; 1136 struct nlattr *nla; 1137 1138 mdev = mpls_dev_rcu(dev); 1139 if (!mdev) 1140 return -ENODATA; 1141 1142 nla = nla_reserve_64bit(skb, MPLS_STATS_LINK, 1143 sizeof(struct mpls_link_stats), 1144 MPLS_STATS_UNSPEC); 1145 if (!nla) 1146 return -EMSGSIZE; 1147 1148 stats = nla_data(nla); 1149 mpls_get_stats(mdev, stats); 1150 1151 return 0; 1152 } 1153 1154 static size_t mpls_get_stats_af_size(const struct net_device *dev) 1155 { 1156 struct mpls_dev *mdev; 1157 1158 mdev = mpls_dev_rcu(dev); 1159 if (!mdev) 1160 return 0; 1161 1162 return nla_total_size_64bit(sizeof(struct mpls_link_stats)); 1163 } 1164 1165 static int mpls_netconf_fill_devconf(struct sk_buff *skb, struct mpls_dev *mdev, 1166 u32 portid, u32 seq, int event, 1167 unsigned int flags, int type) 1168 { 1169 struct nlmsghdr *nlh; 1170 struct netconfmsg *ncm; 1171 bool all = false; 1172 1173 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), 1174 flags); 1175 if (!nlh) 1176 return -EMSGSIZE; 1177 1178 if (type == NETCONFA_ALL) 1179 all = true; 1180 1181 ncm = nlmsg_data(nlh); 1182 ncm->ncm_family = AF_MPLS; 1183 1184 if (nla_put_s32(skb, NETCONFA_IFINDEX, mdev->dev->ifindex) < 0) 1185 goto nla_put_failure; 1186 1187 if ((all || type == NETCONFA_INPUT) && 1188 nla_put_s32(skb, NETCONFA_INPUT, 1189 READ_ONCE(mdev->input_enabled)) < 0) 1190 goto nla_put_failure; 1191 1192 nlmsg_end(skb, nlh); 1193 return 0; 1194 1195 nla_put_failure: 1196 nlmsg_cancel(skb, nlh); 1197 return -EMSGSIZE; 1198 } 1199 1200 static int mpls_netconf_msgsize_devconf(int type) 1201 { 1202 int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) 1203 + nla_total_size(4); /* NETCONFA_IFINDEX */ 1204 bool all = false; 1205 1206 if (type == NETCONFA_ALL) 1207 all = true; 1208 1209 if (all || type == NETCONFA_INPUT) 1210 size += nla_total_size(4); 1211 1212 return size; 1213 } 1214 1215 static void mpls_netconf_notify_devconf(struct net *net, int event, 1216 int type, struct mpls_dev *mdev) 1217 { 1218 struct sk_buff *skb; 1219 int err = -ENOBUFS; 1220 1221 skb = nlmsg_new(mpls_netconf_msgsize_devconf(type), GFP_KERNEL); 1222 if (!skb) 1223 goto errout; 1224 1225 err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, event, 0, type); 1226 if (err < 0) { 1227 /* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */ 1228 WARN_ON(err == -EMSGSIZE); 1229 kfree_skb(skb); 1230 goto errout; 1231 } 1232 1233 rtnl_notify(skb, net, 0, RTNLGRP_MPLS_NETCONF, NULL, GFP_KERNEL); 1234 return; 1235 errout: 1236 rtnl_set_sk_err(net, RTNLGRP_MPLS_NETCONF, err); 1237 } 1238 1239 static const struct nla_policy devconf_mpls_policy[NETCONFA_MAX + 1] = { 1240 [NETCONFA_IFINDEX] = { .len = sizeof(int) }, 1241 }; 1242 1243 static int mpls_netconf_valid_get_req(struct sk_buff *skb, 1244 const struct nlmsghdr *nlh, 1245 struct nlattr **tb, 1246 struct netlink_ext_ack *extack) 1247 { 1248 int i, err; 1249 1250 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) { 1251 NL_SET_ERR_MSG_MOD(extack, 1252 "Invalid header for netconf get request"); 1253 return -EINVAL; 1254 } 1255 1256 if (!netlink_strict_get_check(skb)) 1257 return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg), 1258 tb, NETCONFA_MAX, 1259 devconf_mpls_policy, extack); 1260 1261 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg), 1262 tb, NETCONFA_MAX, 1263 devconf_mpls_policy, extack); 1264 if (err) 1265 return err; 1266 1267 for (i = 0; i <= NETCONFA_MAX; i++) { 1268 if (!tb[i]) 1269 continue; 1270 1271 switch (i) { 1272 case NETCONFA_IFINDEX: 1273 break; 1274 default: 1275 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in netconf get request"); 1276 return -EINVAL; 1277 } 1278 } 1279 1280 return 0; 1281 } 1282 1283 static int mpls_netconf_get_devconf(struct sk_buff *in_skb, 1284 struct nlmsghdr *nlh, 1285 struct netlink_ext_ack *extack) 1286 { 1287 struct net *net = sock_net(in_skb->sk); 1288 struct nlattr *tb[NETCONFA_MAX + 1]; 1289 struct net_device *dev; 1290 struct mpls_dev *mdev; 1291 struct sk_buff *skb; 1292 int ifindex; 1293 int err; 1294 1295 err = mpls_netconf_valid_get_req(in_skb, nlh, tb, extack); 1296 if (err < 0) 1297 goto errout; 1298 1299 if (!tb[NETCONFA_IFINDEX]) { 1300 err = -EINVAL; 1301 goto errout; 1302 } 1303 1304 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); 1305 1306 skb = nlmsg_new(mpls_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL); 1307 if (!skb) { 1308 err = -ENOBUFS; 1309 goto errout; 1310 } 1311 1312 rcu_read_lock(); 1313 1314 dev = dev_get_by_index_rcu(net, ifindex); 1315 if (!dev) { 1316 err = -EINVAL; 1317 goto errout_unlock; 1318 } 1319 1320 mdev = mpls_dev_rcu(dev); 1321 if (!mdev) { 1322 err = -EINVAL; 1323 goto errout_unlock; 1324 } 1325 1326 err = mpls_netconf_fill_devconf(skb, mdev, 1327 NETLINK_CB(in_skb).portid, 1328 nlh->nlmsg_seq, RTM_NEWNETCONF, 0, 1329 NETCONFA_ALL); 1330 if (err < 0) { 1331 /* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */ 1332 WARN_ON(err == -EMSGSIZE); 1333 goto errout_unlock; 1334 } 1335 1336 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 1337 1338 rcu_read_unlock(); 1339 errout: 1340 return err; 1341 1342 errout_unlock: 1343 rcu_read_unlock(); 1344 kfree_skb(skb); 1345 goto errout; 1346 } 1347 1348 static int mpls_netconf_dump_devconf(struct sk_buff *skb, 1349 struct netlink_callback *cb) 1350 { 1351 const struct nlmsghdr *nlh = cb->nlh; 1352 struct net *net = sock_net(skb->sk); 1353 struct { 1354 unsigned long ifindex; 1355 } *ctx = (void *)cb->ctx; 1356 struct net_device *dev; 1357 struct mpls_dev *mdev; 1358 int err = 0; 1359 1360 if (cb->strict_check) { 1361 struct netlink_ext_ack *extack = cb->extack; 1362 struct netconfmsg *ncm; 1363 1364 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) { 1365 NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf dump request"); 1366 return -EINVAL; 1367 } 1368 1369 if (nlmsg_attrlen(nlh, sizeof(*ncm))) { 1370 NL_SET_ERR_MSG_MOD(extack, "Invalid data after header in netconf dump request"); 1371 return -EINVAL; 1372 } 1373 } 1374 1375 rcu_read_lock(); 1376 for_each_netdev_dump(net, dev, ctx->ifindex) { 1377 mdev = mpls_dev_rcu(dev); 1378 if (!mdev) 1379 continue; 1380 err = mpls_netconf_fill_devconf(skb, mdev, 1381 NETLINK_CB(cb->skb).portid, 1382 nlh->nlmsg_seq, 1383 RTM_NEWNETCONF, 1384 NLM_F_MULTI, 1385 NETCONFA_ALL); 1386 if (err < 0) 1387 break; 1388 } 1389 rcu_read_unlock(); 1390 1391 return err; 1392 } 1393 1394 #define MPLS_PERDEV_SYSCTL_OFFSET(field) \ 1395 (&((struct mpls_dev *)0)->field) 1396 1397 static int mpls_conf_proc(const struct ctl_table *ctl, int write, 1398 void *buffer, size_t *lenp, loff_t *ppos) 1399 { 1400 int oval = *(int *)ctl->data; 1401 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 1402 1403 if (write) { 1404 struct mpls_dev *mdev = ctl->extra1; 1405 int i = (int *)ctl->data - (int *)mdev; 1406 struct net *net = ctl->extra2; 1407 int val = *(int *)ctl->data; 1408 1409 if (i == offsetof(struct mpls_dev, input_enabled) && 1410 val != oval) { 1411 mpls_netconf_notify_devconf(net, RTM_NEWNETCONF, 1412 NETCONFA_INPUT, mdev); 1413 } 1414 } 1415 1416 return ret; 1417 } 1418 1419 static const struct ctl_table mpls_dev_table[] = { 1420 { 1421 .procname = "input", 1422 .maxlen = sizeof(int), 1423 .mode = 0644, 1424 .proc_handler = mpls_conf_proc, 1425 .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled), 1426 }, 1427 }; 1428 1429 static int mpls_dev_sysctl_register(struct net_device *dev, 1430 struct mpls_dev *mdev) 1431 { 1432 char path[sizeof("net/mpls/conf/") + IFNAMSIZ]; 1433 size_t table_size = ARRAY_SIZE(mpls_dev_table); 1434 struct net *net = dev_net(dev); 1435 struct ctl_table *table; 1436 int i; 1437 1438 table = kmemdup(&mpls_dev_table, sizeof(mpls_dev_table), GFP_KERNEL); 1439 if (!table) 1440 goto out; 1441 1442 /* Table data contains only offsets relative to the base of 1443 * the mdev at this point, so make them absolute. 1444 */ 1445 for (i = 0; i < table_size; i++) { 1446 table[i].data = (char *)mdev + (uintptr_t)table[i].data; 1447 table[i].extra1 = mdev; 1448 table[i].extra2 = net; 1449 } 1450 1451 snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name); 1452 1453 mdev->sysctl = register_net_sysctl_sz(net, path, table, table_size); 1454 if (!mdev->sysctl) 1455 goto free; 1456 1457 mpls_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, mdev); 1458 return 0; 1459 1460 free: 1461 kfree(table); 1462 out: 1463 mdev->sysctl = NULL; 1464 return -ENOBUFS; 1465 } 1466 1467 static void mpls_dev_sysctl_unregister(struct net_device *dev, 1468 struct mpls_dev *mdev) 1469 { 1470 struct net *net = dev_net(dev); 1471 const struct ctl_table *table; 1472 1473 if (!mdev->sysctl) 1474 return; 1475 1476 table = mdev->sysctl->ctl_table_arg; 1477 unregister_net_sysctl_table(mdev->sysctl); 1478 kfree(table); 1479 1480 mpls_netconf_notify_devconf(net, RTM_DELNETCONF, 0, mdev); 1481 } 1482 1483 static struct mpls_dev *mpls_add_dev(struct net_device *dev) 1484 { 1485 struct mpls_dev *mdev; 1486 int err = -ENOMEM; 1487 int i; 1488 1489 mdev = kzalloc_obj(*mdev); 1490 if (!mdev) 1491 return ERR_PTR(err); 1492 1493 mdev->stats = alloc_percpu(struct mpls_pcpu_stats); 1494 if (!mdev->stats) 1495 goto free; 1496 1497 for_each_possible_cpu(i) { 1498 struct mpls_pcpu_stats *mpls_stats; 1499 1500 mpls_stats = per_cpu_ptr(mdev->stats, i); 1501 u64_stats_init(&mpls_stats->syncp); 1502 } 1503 1504 mdev->dev = dev; 1505 1506 err = mpls_dev_sysctl_register(dev, mdev); 1507 if (err) 1508 goto free; 1509 1510 rcu_assign_pointer(dev->mpls_ptr, mdev); 1511 1512 return mdev; 1513 1514 free: 1515 free_percpu(mdev->stats); 1516 kfree(mdev); 1517 return ERR_PTR(err); 1518 } 1519 1520 static void mpls_dev_destroy_rcu(struct rcu_head *head) 1521 { 1522 struct mpls_dev *mdev = container_of(head, struct mpls_dev, rcu); 1523 1524 free_percpu(mdev->stats); 1525 kfree(mdev); 1526 } 1527 1528 static int mpls_ifdown(struct net_device *dev, int event) 1529 { 1530 struct net *net = dev_net(dev); 1531 unsigned int index; 1532 1533 for (index = 0; index < net->mpls.platform_labels; index++) { 1534 struct mpls_route *rt; 1535 bool nh_del = false; 1536 u8 alive = 0; 1537 1538 rt = mpls_route_input(net, index); 1539 if (!rt) 1540 continue; 1541 1542 if (event == NETDEV_UNREGISTER) { 1543 u8 deleted = 0; 1544 1545 for_nexthops(rt) { 1546 if (!nh->nh_dev || nh->nh_dev == dev) 1547 deleted++; 1548 if (nh->nh_dev == dev) 1549 nh_del = true; 1550 } endfor_nexthops(rt); 1551 1552 /* if there are no more nexthops, delete the route */ 1553 if (deleted == rt->rt_nhn) { 1554 mpls_route_update(net, index, NULL, NULL); 1555 continue; 1556 } 1557 1558 if (nh_del) { 1559 size_t size = sizeof(*rt) + rt->rt_nhn * 1560 rt->rt_nh_size; 1561 struct mpls_route *orig = rt; 1562 1563 rt = kmemdup(orig, size, GFP_KERNEL); 1564 if (!rt) 1565 return -ENOMEM; 1566 } 1567 } 1568 1569 change_nexthops(rt) { 1570 unsigned int nh_flags = nh->nh_flags; 1571 1572 if (nh->nh_dev != dev) { 1573 if (nh_del) 1574 netdev_hold(nh->nh_dev, &nh->nh_dev_tracker, 1575 GFP_KERNEL); 1576 goto next; 1577 } 1578 1579 switch (event) { 1580 case NETDEV_DOWN: 1581 case NETDEV_UNREGISTER: 1582 nh_flags |= RTNH_F_DEAD; 1583 fallthrough; 1584 case NETDEV_CHANGE: 1585 nh_flags |= RTNH_F_LINKDOWN; 1586 break; 1587 } 1588 if (event == NETDEV_UNREGISTER) 1589 nh->nh_dev = NULL; 1590 1591 if (nh->nh_flags != nh_flags) 1592 WRITE_ONCE(nh->nh_flags, nh_flags); 1593 next: 1594 if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))) 1595 alive++; 1596 } endfor_nexthops(rt); 1597 1598 WRITE_ONCE(rt->rt_nhn_alive, alive); 1599 1600 if (nh_del) 1601 mpls_route_update(net, index, rt, NULL); 1602 } 1603 1604 return 0; 1605 } 1606 1607 static void mpls_ifup(struct net_device *dev, unsigned int flags) 1608 { 1609 struct net *net = dev_net(dev); 1610 unsigned int index; 1611 u8 alive; 1612 1613 for (index = 0; index < net->mpls.platform_labels; index++) { 1614 struct mpls_route *rt; 1615 1616 rt = mpls_route_input(net, index); 1617 if (!rt) 1618 continue; 1619 1620 alive = 0; 1621 change_nexthops(rt) { 1622 unsigned int nh_flags = nh->nh_flags; 1623 1624 if (!(nh_flags & flags)) { 1625 alive++; 1626 continue; 1627 } 1628 if (nh->nh_dev != dev) 1629 continue; 1630 alive++; 1631 nh_flags &= ~flags; 1632 WRITE_ONCE(nh->nh_flags, nh_flags); 1633 } endfor_nexthops(rt); 1634 1635 WRITE_ONCE(rt->rt_nhn_alive, alive); 1636 } 1637 } 1638 1639 static int mpls_dev_notify(struct notifier_block *this, unsigned long event, 1640 void *ptr) 1641 { 1642 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1643 struct net *net = dev_net(dev); 1644 struct mpls_dev *mdev; 1645 unsigned int flags; 1646 int err; 1647 1648 mutex_lock(&net->mpls.platform_mutex); 1649 1650 if (event == NETDEV_REGISTER) { 1651 mdev = mpls_add_dev(dev); 1652 if (IS_ERR(mdev)) { 1653 err = PTR_ERR(mdev); 1654 goto err; 1655 } 1656 1657 goto out; 1658 } 1659 1660 mdev = mpls_dev_get(net, dev); 1661 if (!mdev) 1662 goto out; 1663 1664 switch (event) { 1665 1666 case NETDEV_DOWN: 1667 err = mpls_ifdown(dev, event); 1668 if (err) 1669 goto err; 1670 break; 1671 case NETDEV_UP: 1672 flags = netif_get_flags(dev); 1673 if (flags & (IFF_RUNNING | IFF_LOWER_UP)) 1674 mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); 1675 else 1676 mpls_ifup(dev, RTNH_F_DEAD); 1677 break; 1678 case NETDEV_CHANGE: 1679 flags = netif_get_flags(dev); 1680 if (flags & (IFF_RUNNING | IFF_LOWER_UP)) { 1681 mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); 1682 } else { 1683 err = mpls_ifdown(dev, event); 1684 if (err) 1685 goto err; 1686 } 1687 break; 1688 case NETDEV_UNREGISTER: 1689 err = mpls_ifdown(dev, event); 1690 if (err) 1691 goto err; 1692 1693 mdev = mpls_dev_get(net, dev); 1694 if (mdev) { 1695 mpls_dev_sysctl_unregister(dev, mdev); 1696 RCU_INIT_POINTER(dev->mpls_ptr, NULL); 1697 call_rcu(&mdev->rcu, mpls_dev_destroy_rcu); 1698 } 1699 break; 1700 case NETDEV_CHANGENAME: 1701 mdev = mpls_dev_get(net, dev); 1702 if (mdev) { 1703 mpls_dev_sysctl_unregister(dev, mdev); 1704 err = mpls_dev_sysctl_register(dev, mdev); 1705 if (err) 1706 goto err; 1707 } 1708 break; 1709 } 1710 1711 out: 1712 mutex_unlock(&net->mpls.platform_mutex); 1713 return NOTIFY_OK; 1714 1715 err: 1716 mutex_unlock(&net->mpls.platform_mutex); 1717 return notifier_from_errno(err); 1718 } 1719 1720 static struct notifier_block mpls_dev_notifier = { 1721 .notifier_call = mpls_dev_notify, 1722 }; 1723 1724 static int nla_put_via(struct sk_buff *skb, 1725 u8 table, const void *addr, int alen) 1726 { 1727 static const int table_to_family[NEIGH_NR_TABLES + 1] = { 1728 AF_INET, AF_INET6, AF_PACKET, 1729 }; 1730 struct nlattr *nla; 1731 struct rtvia *via; 1732 int family = AF_UNSPEC; 1733 1734 nla = nla_reserve(skb, RTA_VIA, alen + 2); 1735 if (!nla) 1736 return -EMSGSIZE; 1737 1738 if (table <= NEIGH_NR_TABLES) 1739 family = table_to_family[table]; 1740 1741 via = nla_data(nla); 1742 via->rtvia_family = family; 1743 memcpy(via->rtvia_addr, addr, alen); 1744 return 0; 1745 } 1746 1747 int nla_put_labels(struct sk_buff *skb, int attrtype, 1748 u8 labels, const u32 label[]) 1749 { 1750 struct nlattr *nla; 1751 struct mpls_shim_hdr *nla_label; 1752 bool bos; 1753 int i; 1754 nla = nla_reserve(skb, attrtype, labels*4); 1755 if (!nla) 1756 return -EMSGSIZE; 1757 1758 nla_label = nla_data(nla); 1759 bos = true; 1760 for (i = labels - 1; i >= 0; i--) { 1761 nla_label[i] = mpls_entry_encode(label[i], 0, 0, bos); 1762 bos = false; 1763 } 1764 1765 return 0; 1766 } 1767 EXPORT_SYMBOL_GPL(nla_put_labels); 1768 1769 int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels, 1770 u32 label[], struct netlink_ext_ack *extack) 1771 { 1772 unsigned len = nla_len(nla); 1773 struct mpls_shim_hdr *nla_label; 1774 u8 nla_labels; 1775 bool bos; 1776 int i; 1777 1778 /* len needs to be an even multiple of 4 (the label size). Number 1779 * of labels is a u8 so check for overflow. 1780 */ 1781 if (len & 3 || len / 4 > 255) { 1782 NL_SET_ERR_MSG_ATTR(extack, nla, 1783 "Invalid length for labels attribute"); 1784 return -EINVAL; 1785 } 1786 1787 /* Limit the number of new labels allowed */ 1788 nla_labels = len/4; 1789 if (nla_labels > max_labels) { 1790 NL_SET_ERR_MSG(extack, "Too many labels"); 1791 return -EINVAL; 1792 } 1793 1794 /* when label == NULL, caller wants number of labels */ 1795 if (!label) 1796 goto out; 1797 1798 nla_label = nla_data(nla); 1799 bos = true; 1800 for (i = nla_labels - 1; i >= 0; i--, bos = false) { 1801 struct mpls_entry_decoded dec; 1802 dec = mpls_entry_decode(nla_label + i); 1803 1804 /* Ensure the bottom of stack flag is properly set 1805 * and ttl and tc are both clear. 1806 */ 1807 if (dec.ttl) { 1808 NL_SET_ERR_MSG_ATTR(extack, nla, 1809 "TTL in label must be 0"); 1810 return -EINVAL; 1811 } 1812 1813 if (dec.tc) { 1814 NL_SET_ERR_MSG_ATTR(extack, nla, 1815 "Traffic class in label must be 0"); 1816 return -EINVAL; 1817 } 1818 1819 if (dec.bos != bos) { 1820 NL_SET_BAD_ATTR(extack, nla); 1821 if (bos) { 1822 NL_SET_ERR_MSG(extack, 1823 "BOS bit must be set in first label"); 1824 } else { 1825 NL_SET_ERR_MSG(extack, 1826 "BOS bit can only be set in first label"); 1827 } 1828 return -EINVAL; 1829 } 1830 1831 switch (dec.label) { 1832 case MPLS_LABEL_IMPLNULL: 1833 /* RFC3032: This is a label that an LSR may 1834 * assign and distribute, but which never 1835 * actually appears in the encapsulation. 1836 */ 1837 NL_SET_ERR_MSG_ATTR(extack, nla, 1838 "Implicit NULL Label (3) can not be used in encapsulation"); 1839 return -EINVAL; 1840 } 1841 1842 label[i] = dec.label; 1843 } 1844 out: 1845 *labels = nla_labels; 1846 return 0; 1847 } 1848 EXPORT_SYMBOL_GPL(nla_get_labels); 1849 1850 static int rtm_to_route_config(struct sk_buff *skb, 1851 struct nlmsghdr *nlh, 1852 struct mpls_route_config *cfg, 1853 struct netlink_ext_ack *extack) 1854 { 1855 struct rtmsg *rtm; 1856 struct nlattr *tb[RTA_MAX+1]; 1857 int index; 1858 int err; 1859 1860 err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, 1861 rtm_mpls_policy, extack); 1862 if (err < 0) 1863 goto errout; 1864 1865 err = -EINVAL; 1866 rtm = nlmsg_data(nlh); 1867 1868 if (rtm->rtm_family != AF_MPLS) { 1869 NL_SET_ERR_MSG(extack, "Invalid address family in rtmsg"); 1870 goto errout; 1871 } 1872 if (rtm->rtm_dst_len != 20) { 1873 NL_SET_ERR_MSG(extack, "rtm_dst_len must be 20 for MPLS"); 1874 goto errout; 1875 } 1876 if (rtm->rtm_src_len != 0) { 1877 NL_SET_ERR_MSG(extack, "rtm_src_len must be 0 for MPLS"); 1878 goto errout; 1879 } 1880 if (rtm->rtm_tos != 0) { 1881 NL_SET_ERR_MSG(extack, "rtm_tos must be 0 for MPLS"); 1882 goto errout; 1883 } 1884 if (rtm->rtm_table != RT_TABLE_MAIN) { 1885 NL_SET_ERR_MSG(extack, 1886 "MPLS only supports the main route table"); 1887 goto errout; 1888 } 1889 /* Any value is acceptable for rtm_protocol */ 1890 1891 /* As mpls uses destination specific addresses 1892 * (or source specific address in the case of multicast) 1893 * all addresses have universal scope. 1894 */ 1895 if (rtm->rtm_scope != RT_SCOPE_UNIVERSE) { 1896 NL_SET_ERR_MSG(extack, 1897 "Invalid route scope - MPLS only supports UNIVERSE"); 1898 goto errout; 1899 } 1900 if (rtm->rtm_type != RTN_UNICAST) { 1901 NL_SET_ERR_MSG(extack, 1902 "Invalid route type - MPLS only supports UNICAST"); 1903 goto errout; 1904 } 1905 if (rtm->rtm_flags != 0) { 1906 NL_SET_ERR_MSG(extack, "rtm_flags must be 0 for MPLS"); 1907 goto errout; 1908 } 1909 1910 cfg->rc_label = LABEL_NOT_SPECIFIED; 1911 cfg->rc_protocol = rtm->rtm_protocol; 1912 cfg->rc_via_table = MPLS_NEIGH_TABLE_UNSPEC; 1913 cfg->rc_ttl_propagate = MPLS_TTL_PROP_DEFAULT; 1914 cfg->rc_nlflags = nlh->nlmsg_flags; 1915 cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid; 1916 cfg->rc_nlinfo.nlh = nlh; 1917 cfg->rc_nlinfo.nl_net = sock_net(skb->sk); 1918 1919 for (index = 0; index <= RTA_MAX; index++) { 1920 struct nlattr *nla = tb[index]; 1921 if (!nla) 1922 continue; 1923 1924 switch (index) { 1925 case RTA_OIF: 1926 cfg->rc_ifindex = nla_get_u32(nla); 1927 break; 1928 case RTA_NEWDST: 1929 if (nla_get_labels(nla, MAX_NEW_LABELS, 1930 &cfg->rc_output_labels, 1931 cfg->rc_output_label, extack)) 1932 goto errout; 1933 break; 1934 case RTA_DST: 1935 { 1936 u8 label_count; 1937 if (nla_get_labels(nla, 1, &label_count, 1938 &cfg->rc_label, extack)) 1939 goto errout; 1940 1941 if (!mpls_label_ok(cfg->rc_nlinfo.nl_net, 1942 &cfg->rc_label, extack)) 1943 goto errout; 1944 break; 1945 } 1946 case RTA_GATEWAY: 1947 NL_SET_ERR_MSG(extack, "MPLS does not support RTA_GATEWAY attribute"); 1948 goto errout; 1949 case RTA_VIA: 1950 { 1951 if (nla_get_via(nla, &cfg->rc_via_alen, 1952 &cfg->rc_via_table, cfg->rc_via, 1953 extack)) 1954 goto errout; 1955 break; 1956 } 1957 case RTA_MULTIPATH: 1958 { 1959 cfg->rc_mp = nla_data(nla); 1960 cfg->rc_mp_len = nla_len(nla); 1961 break; 1962 } 1963 case RTA_TTL_PROPAGATE: 1964 { 1965 u8 ttl_propagate = nla_get_u8(nla); 1966 1967 if (ttl_propagate > 1) { 1968 NL_SET_ERR_MSG_ATTR(extack, nla, 1969 "RTA_TTL_PROPAGATE can only be 0 or 1"); 1970 goto errout; 1971 } 1972 cfg->rc_ttl_propagate = ttl_propagate ? 1973 MPLS_TTL_PROP_ENABLED : 1974 MPLS_TTL_PROP_DISABLED; 1975 break; 1976 } 1977 default: 1978 NL_SET_ERR_MSG_ATTR(extack, nla, "Unknown attribute"); 1979 /* Unsupported attribute */ 1980 goto errout; 1981 } 1982 } 1983 1984 err = 0; 1985 errout: 1986 return err; 1987 } 1988 1989 static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, 1990 struct netlink_ext_ack *extack) 1991 { 1992 struct net *net = sock_net(skb->sk); 1993 struct mpls_route_config *cfg; 1994 int err; 1995 1996 cfg = kzalloc_obj(*cfg); 1997 if (!cfg) 1998 return -ENOMEM; 1999 2000 err = rtm_to_route_config(skb, nlh, cfg, extack); 2001 if (err < 0) 2002 goto out; 2003 2004 mutex_lock(&net->mpls.platform_mutex); 2005 err = mpls_route_del(cfg, extack); 2006 mutex_unlock(&net->mpls.platform_mutex); 2007 out: 2008 kfree(cfg); 2009 2010 return err; 2011 } 2012 2013 2014 static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, 2015 struct netlink_ext_ack *extack) 2016 { 2017 struct net *net = sock_net(skb->sk); 2018 struct mpls_route_config *cfg; 2019 int err; 2020 2021 cfg = kzalloc_obj(*cfg); 2022 if (!cfg) 2023 return -ENOMEM; 2024 2025 err = rtm_to_route_config(skb, nlh, cfg, extack); 2026 if (err < 0) 2027 goto out; 2028 2029 mutex_lock(&net->mpls.platform_mutex); 2030 err = mpls_route_add(cfg, extack); 2031 mutex_unlock(&net->mpls.platform_mutex); 2032 out: 2033 kfree(cfg); 2034 2035 return err; 2036 } 2037 2038 static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, 2039 u32 label, struct mpls_route *rt, int flags) 2040 { 2041 struct net_device *dev; 2042 struct nlmsghdr *nlh; 2043 struct rtmsg *rtm; 2044 2045 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); 2046 if (nlh == NULL) 2047 return -EMSGSIZE; 2048 2049 rtm = nlmsg_data(nlh); 2050 rtm->rtm_family = AF_MPLS; 2051 rtm->rtm_dst_len = 20; 2052 rtm->rtm_src_len = 0; 2053 rtm->rtm_tos = 0; 2054 rtm->rtm_table = RT_TABLE_MAIN; 2055 rtm->rtm_protocol = rt->rt_protocol; 2056 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2057 rtm->rtm_type = RTN_UNICAST; 2058 rtm->rtm_flags = 0; 2059 2060 if (nla_put_labels(skb, RTA_DST, 1, &label)) 2061 goto nla_put_failure; 2062 2063 if (rt->rt_ttl_propagate != MPLS_TTL_PROP_DEFAULT) { 2064 bool ttl_propagate = 2065 rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED; 2066 2067 if (nla_put_u8(skb, RTA_TTL_PROPAGATE, 2068 ttl_propagate)) 2069 goto nla_put_failure; 2070 } 2071 if (rt->rt_nhn == 1) { 2072 const struct mpls_nh *nh = rt->rt_nh; 2073 2074 if (nh->nh_labels && 2075 nla_put_labels(skb, RTA_NEWDST, nh->nh_labels, 2076 nh->nh_label)) 2077 goto nla_put_failure; 2078 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC && 2079 nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh), 2080 nh->nh_via_alen)) 2081 goto nla_put_failure; 2082 dev = nh->nh_dev; 2083 if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) 2084 goto nla_put_failure; 2085 if (nh->nh_flags & RTNH_F_LINKDOWN) 2086 rtm->rtm_flags |= RTNH_F_LINKDOWN; 2087 if (nh->nh_flags & RTNH_F_DEAD) 2088 rtm->rtm_flags |= RTNH_F_DEAD; 2089 } else { 2090 struct rtnexthop *rtnh; 2091 struct nlattr *mp; 2092 u8 linkdown = 0; 2093 u8 dead = 0; 2094 2095 mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); 2096 if (!mp) 2097 goto nla_put_failure; 2098 2099 for_nexthops(rt) { 2100 dev = nh->nh_dev; 2101 if (!dev) 2102 continue; 2103 2104 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 2105 if (!rtnh) 2106 goto nla_put_failure; 2107 2108 rtnh->rtnh_ifindex = dev->ifindex; 2109 if (nh->nh_flags & RTNH_F_LINKDOWN) { 2110 rtnh->rtnh_flags |= RTNH_F_LINKDOWN; 2111 linkdown++; 2112 } 2113 if (nh->nh_flags & RTNH_F_DEAD) { 2114 rtnh->rtnh_flags |= RTNH_F_DEAD; 2115 dead++; 2116 } 2117 2118 if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST, 2119 nh->nh_labels, 2120 nh->nh_label)) 2121 goto nla_put_failure; 2122 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC && 2123 nla_put_via(skb, nh->nh_via_table, 2124 mpls_nh_via(rt, nh), 2125 nh->nh_via_alen)) 2126 goto nla_put_failure; 2127 2128 /* length of rtnetlink header + attributes */ 2129 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; 2130 } endfor_nexthops(rt); 2131 2132 if (linkdown == rt->rt_nhn) 2133 rtm->rtm_flags |= RTNH_F_LINKDOWN; 2134 if (dead == rt->rt_nhn) 2135 rtm->rtm_flags |= RTNH_F_DEAD; 2136 2137 nla_nest_end(skb, mp); 2138 } 2139 2140 nlmsg_end(skb, nlh); 2141 return 0; 2142 2143 nla_put_failure: 2144 nlmsg_cancel(skb, nlh); 2145 return -EMSGSIZE; 2146 } 2147 2148 #if IS_ENABLED(CONFIG_INET) 2149 static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, 2150 struct fib_dump_filter *filter, 2151 struct netlink_callback *cb) 2152 { 2153 return ip_valid_fib_dump_req(net, nlh, filter, cb); 2154 } 2155 #else 2156 static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, 2157 struct fib_dump_filter *filter, 2158 struct netlink_callback *cb) 2159 { 2160 struct netlink_ext_ack *extack = cb->extack; 2161 struct nlattr *tb[RTA_MAX + 1]; 2162 struct rtmsg *rtm; 2163 int err, i; 2164 2165 rtm = nlmsg_payload(nlh, sizeof(*rtm)); 2166 if (!rtm) { 2167 NL_SET_ERR_MSG_MOD(extack, "Invalid header for FIB dump request"); 2168 return -EINVAL; 2169 } 2170 2171 if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos || 2172 rtm->rtm_table || rtm->rtm_scope || rtm->rtm_type || 2173 rtm->rtm_flags) { 2174 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for FIB dump request"); 2175 return -EINVAL; 2176 } 2177 2178 if (rtm->rtm_protocol) { 2179 filter->protocol = rtm->rtm_protocol; 2180 filter->filter_set = 1; 2181 cb->answer_flags = NLM_F_DUMP_FILTERED; 2182 } 2183 2184 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, 2185 rtm_mpls_policy, extack); 2186 if (err < 0) 2187 return err; 2188 2189 for (i = 0; i <= RTA_MAX; ++i) { 2190 int ifindex; 2191 2192 if (i == RTA_OIF) { 2193 ifindex = nla_get_u32(tb[i]); 2194 filter->dev = dev_get_by_index_rcu(net, ifindex); 2195 if (!filter->dev) 2196 return -ENODEV; 2197 filter->filter_set = 1; 2198 } else if (tb[i]) { 2199 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in dump request"); 2200 return -EINVAL; 2201 } 2202 } 2203 2204 return 0; 2205 } 2206 #endif 2207 2208 static bool mpls_rt_uses_dev(struct mpls_route *rt, 2209 const struct net_device *dev) 2210 { 2211 if (rt->rt_nhn == 1) { 2212 struct mpls_nh *nh = rt->rt_nh; 2213 2214 if (nh->nh_dev == dev) 2215 return true; 2216 } else { 2217 for_nexthops(rt) { 2218 if (nh->nh_dev == dev) 2219 return true; 2220 } endfor_nexthops(rt); 2221 } 2222 2223 return false; 2224 } 2225 2226 static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) 2227 { 2228 const struct nlmsghdr *nlh = cb->nlh; 2229 struct net *net = sock_net(skb->sk); 2230 struct mpls_route __rcu **platform_label; 2231 struct fib_dump_filter filter = { 2232 .rtnl_held = false, 2233 }; 2234 unsigned int flags = NLM_F_MULTI; 2235 size_t platform_labels; 2236 unsigned int index; 2237 int err; 2238 2239 rcu_read_lock(); 2240 2241 if (cb->strict_check) { 2242 err = mpls_valid_fib_dump_req(net, nlh, &filter, cb); 2243 if (err < 0) 2244 goto err; 2245 2246 /* for MPLS, there is only 1 table with fixed type and flags. 2247 * If either are set in the filter then return nothing. 2248 */ 2249 if ((filter.table_id && filter.table_id != RT_TABLE_MAIN) || 2250 (filter.rt_type && filter.rt_type != RTN_UNICAST) || 2251 filter.flags) 2252 goto unlock; 2253 } 2254 2255 index = cb->args[0]; 2256 if (index < MPLS_LABEL_FIRST_UNRESERVED) 2257 index = MPLS_LABEL_FIRST_UNRESERVED; 2258 2259 platform_label = mpls_platform_label_rcu(net, &platform_labels); 2260 2261 if (filter.filter_set) 2262 flags |= NLM_F_DUMP_FILTERED; 2263 2264 for (; index < platform_labels; index++) { 2265 struct mpls_route *rt; 2266 2267 rt = rcu_dereference(platform_label[index]); 2268 if (!rt) 2269 continue; 2270 2271 if ((filter.dev && !mpls_rt_uses_dev(rt, filter.dev)) || 2272 (filter.protocol && rt->rt_protocol != filter.protocol)) 2273 continue; 2274 2275 if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid, 2276 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 2277 index, rt, flags) < 0) 2278 break; 2279 } 2280 cb->args[0] = index; 2281 2282 unlock: 2283 rcu_read_unlock(); 2284 return skb->len; 2285 2286 err: 2287 rcu_read_unlock(); 2288 return err; 2289 } 2290 2291 static inline size_t lfib_nlmsg_size(struct mpls_route *rt) 2292 { 2293 size_t payload = 2294 NLMSG_ALIGN(sizeof(struct rtmsg)) 2295 + nla_total_size(4) /* RTA_DST */ 2296 + nla_total_size(1); /* RTA_TTL_PROPAGATE */ 2297 2298 if (rt->rt_nhn == 1) { 2299 struct mpls_nh *nh = rt->rt_nh; 2300 2301 if (nh->nh_dev) 2302 payload += nla_total_size(4); /* RTA_OIF */ 2303 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) /* RTA_VIA */ 2304 payload += nla_total_size(2 + nh->nh_via_alen); 2305 if (nh->nh_labels) /* RTA_NEWDST */ 2306 payload += nla_total_size(nh->nh_labels * 4); 2307 } else { 2308 /* each nexthop is packed in an attribute */ 2309 size_t nhsize = 0; 2310 2311 for_nexthops(rt) { 2312 if (!nh->nh_dev) 2313 continue; 2314 nhsize += nla_total_size(sizeof(struct rtnexthop)); 2315 /* RTA_VIA */ 2316 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) 2317 nhsize += nla_total_size(2 + nh->nh_via_alen); 2318 if (nh->nh_labels) 2319 nhsize += nla_total_size(nh->nh_labels * 4); 2320 } endfor_nexthops(rt); 2321 /* nested attribute */ 2322 payload += nla_total_size(nhsize); 2323 } 2324 2325 return payload; 2326 } 2327 2328 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, 2329 struct nlmsghdr *nlh, struct net *net, u32 portid, 2330 unsigned int nlm_flags) 2331 { 2332 struct sk_buff *skb; 2333 u32 seq = nlh ? nlh->nlmsg_seq : 0; 2334 int err = -ENOBUFS; 2335 2336 skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL); 2337 if (skb == NULL) 2338 goto errout; 2339 2340 err = mpls_dump_route(skb, portid, seq, event, label, rt, nlm_flags); 2341 if (err < 0) { 2342 /* -EMSGSIZE implies BUG in lfib_nlmsg_size */ 2343 WARN_ON(err == -EMSGSIZE); 2344 kfree_skb(skb); 2345 goto errout; 2346 } 2347 rtnl_notify(skb, net, portid, RTNLGRP_MPLS_ROUTE, nlh, GFP_KERNEL); 2348 2349 return; 2350 errout: 2351 rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err); 2352 } 2353 2354 static int mpls_valid_getroute_req(struct sk_buff *skb, 2355 const struct nlmsghdr *nlh, 2356 struct nlattr **tb, 2357 struct netlink_ext_ack *extack) 2358 { 2359 struct rtmsg *rtm; 2360 int i, err; 2361 2362 rtm = nlmsg_payload(nlh, sizeof(*rtm)); 2363 if (!rtm) { 2364 NL_SET_ERR_MSG_MOD(extack, 2365 "Invalid header for get route request"); 2366 return -EINVAL; 2367 } 2368 2369 if (!netlink_strict_get_check(skb)) 2370 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, 2371 rtm_mpls_policy, extack); 2372 2373 if ((rtm->rtm_dst_len && rtm->rtm_dst_len != 20) || 2374 rtm->rtm_src_len || rtm->rtm_tos || rtm->rtm_table || 2375 rtm->rtm_protocol || rtm->rtm_scope || rtm->rtm_type) { 2376 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request"); 2377 return -EINVAL; 2378 } 2379 if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) { 2380 NL_SET_ERR_MSG_MOD(extack, 2381 "Invalid flags for get route request"); 2382 return -EINVAL; 2383 } 2384 2385 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, 2386 rtm_mpls_policy, extack); 2387 if (err) 2388 return err; 2389 2390 if ((tb[RTA_DST] || tb[RTA_NEWDST]) && !rtm->rtm_dst_len) { 2391 NL_SET_ERR_MSG_MOD(extack, "rtm_dst_len must be 20 for MPLS"); 2392 return -EINVAL; 2393 } 2394 2395 for (i = 0; i <= RTA_MAX; i++) { 2396 if (!tb[i]) 2397 continue; 2398 2399 switch (i) { 2400 case RTA_DST: 2401 case RTA_NEWDST: 2402 break; 2403 default: 2404 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request"); 2405 return -EINVAL; 2406 } 2407 } 2408 2409 return 0; 2410 } 2411 2412 static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, 2413 struct netlink_ext_ack *extack) 2414 { 2415 struct net *net = sock_net(in_skb->sk); 2416 u32 portid = NETLINK_CB(in_skb).portid; 2417 u32 in_label = LABEL_NOT_SPECIFIED; 2418 struct nlattr *tb[RTA_MAX + 1]; 2419 struct mpls_route *rt = NULL; 2420 u32 labels[MAX_NEW_LABELS]; 2421 struct mpls_shim_hdr *hdr; 2422 unsigned int hdr_size = 0; 2423 const struct mpls_nh *nh; 2424 struct net_device *dev; 2425 struct rtmsg *rtm, *r; 2426 struct nlmsghdr *nlh; 2427 struct sk_buff *skb; 2428 u8 n_labels; 2429 int err; 2430 2431 mutex_lock(&net->mpls.platform_mutex); 2432 2433 err = mpls_valid_getroute_req(in_skb, in_nlh, tb, extack); 2434 if (err < 0) 2435 goto errout; 2436 2437 rtm = nlmsg_data(in_nlh); 2438 2439 if (tb[RTA_DST]) { 2440 u8 label_count; 2441 2442 if (nla_get_labels(tb[RTA_DST], 1, &label_count, 2443 &in_label, extack)) { 2444 err = -EINVAL; 2445 goto errout; 2446 } 2447 2448 if (!mpls_label_ok(net, &in_label, extack)) { 2449 err = -EINVAL; 2450 goto errout; 2451 } 2452 } 2453 2454 if (in_label < net->mpls.platform_labels) 2455 rt = mpls_route_input(net, in_label); 2456 if (!rt) { 2457 err = -ENETUNREACH; 2458 goto errout; 2459 } 2460 2461 if (rtm->rtm_flags & RTM_F_FIB_MATCH) { 2462 skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL); 2463 if (!skb) { 2464 err = -ENOBUFS; 2465 goto errout; 2466 } 2467 2468 err = mpls_dump_route(skb, portid, in_nlh->nlmsg_seq, 2469 RTM_NEWROUTE, in_label, rt, 0); 2470 if (err < 0) { 2471 /* -EMSGSIZE implies BUG in lfib_nlmsg_size */ 2472 WARN_ON(err == -EMSGSIZE); 2473 goto errout_free; 2474 } 2475 2476 err = rtnl_unicast(skb, net, portid); 2477 goto errout; 2478 } 2479 2480 if (tb[RTA_NEWDST]) { 2481 if (nla_get_labels(tb[RTA_NEWDST], MAX_NEW_LABELS, &n_labels, 2482 labels, extack) != 0) { 2483 err = -EINVAL; 2484 goto errout; 2485 } 2486 2487 hdr_size = n_labels * sizeof(struct mpls_shim_hdr); 2488 } 2489 2490 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2491 if (!skb) { 2492 err = -ENOBUFS; 2493 goto errout; 2494 } 2495 2496 skb->protocol = htons(ETH_P_MPLS_UC); 2497 2498 if (hdr_size) { 2499 bool bos; 2500 int i; 2501 2502 if (skb_cow(skb, hdr_size)) { 2503 err = -ENOBUFS; 2504 goto errout_free; 2505 } 2506 2507 skb_reserve(skb, hdr_size); 2508 skb_push(skb, hdr_size); 2509 skb_reset_network_header(skb); 2510 2511 /* Push new labels */ 2512 hdr = mpls_hdr(skb); 2513 bos = true; 2514 for (i = n_labels - 1; i >= 0; i--) { 2515 hdr[i] = mpls_entry_encode(labels[i], 2516 1, 0, bos); 2517 bos = false; 2518 } 2519 } 2520 2521 nh = mpls_select_multipath(rt, skb); 2522 if (!nh) { 2523 err = -ENETUNREACH; 2524 goto errout_free; 2525 } 2526 2527 if (hdr_size) { 2528 skb_pull(skb, hdr_size); 2529 skb_reset_network_header(skb); 2530 } 2531 2532 nlh = nlmsg_put(skb, portid, in_nlh->nlmsg_seq, 2533 RTM_NEWROUTE, sizeof(*r), 0); 2534 if (!nlh) { 2535 err = -EMSGSIZE; 2536 goto errout_free; 2537 } 2538 2539 r = nlmsg_data(nlh); 2540 r->rtm_family = AF_MPLS; 2541 r->rtm_dst_len = 20; 2542 r->rtm_src_len = 0; 2543 r->rtm_table = RT_TABLE_MAIN; 2544 r->rtm_type = RTN_UNICAST; 2545 r->rtm_scope = RT_SCOPE_UNIVERSE; 2546 r->rtm_protocol = rt->rt_protocol; 2547 r->rtm_flags = 0; 2548 2549 if (nla_put_labels(skb, RTA_DST, 1, &in_label)) 2550 goto nla_put_failure; 2551 2552 if (nh->nh_labels && 2553 nla_put_labels(skb, RTA_NEWDST, nh->nh_labels, 2554 nh->nh_label)) 2555 goto nla_put_failure; 2556 2557 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC && 2558 nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh), 2559 nh->nh_via_alen)) 2560 goto nla_put_failure; 2561 dev = nh->nh_dev; 2562 if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) 2563 goto nla_put_failure; 2564 2565 nlmsg_end(skb, nlh); 2566 2567 err = rtnl_unicast(skb, net, portid); 2568 errout: 2569 mutex_unlock(&net->mpls.platform_mutex); 2570 return err; 2571 2572 nla_put_failure: 2573 nlmsg_cancel(skb, nlh); 2574 err = -EMSGSIZE; 2575 errout_free: 2576 mutex_unlock(&net->mpls.platform_mutex); 2577 kfree_skb(skb); 2578 return err; 2579 } 2580 2581 static int resize_platform_label_table(struct net *net, size_t limit) 2582 { 2583 size_t size = sizeof(struct mpls_route *) * limit; 2584 size_t old_limit; 2585 size_t cp_size; 2586 struct mpls_route __rcu **labels = NULL, **old; 2587 struct mpls_route *rt0 = NULL, *rt2 = NULL; 2588 unsigned index; 2589 2590 if (size) { 2591 labels = kvzalloc(size, GFP_KERNEL); 2592 if (!labels) 2593 goto nolabels; 2594 } 2595 2596 /* In case the predefined labels need to be populated */ 2597 if (limit > MPLS_LABEL_IPV4NULL) { 2598 struct net_device *lo = net->loopback_dev; 2599 2600 rt0 = mpls_rt_alloc(1, lo->addr_len, 0); 2601 if (IS_ERR(rt0)) 2602 goto nort0; 2603 2604 rt0->rt_nh->nh_dev = lo; 2605 netdev_hold(lo, &rt0->rt_nh->nh_dev_tracker, GFP_KERNEL); 2606 rt0->rt_protocol = RTPROT_KERNEL; 2607 rt0->rt_payload_type = MPT_IPV4; 2608 rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT; 2609 rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE; 2610 rt0->rt_nh->nh_via_alen = lo->addr_len; 2611 memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr, 2612 lo->addr_len); 2613 } 2614 if (limit > MPLS_LABEL_IPV6NULL) { 2615 struct net_device *lo = net->loopback_dev; 2616 2617 rt2 = mpls_rt_alloc(1, lo->addr_len, 0); 2618 if (IS_ERR(rt2)) 2619 goto nort2; 2620 2621 rt2->rt_nh->nh_dev = lo; 2622 netdev_hold(lo, &rt2->rt_nh->nh_dev_tracker, GFP_KERNEL); 2623 rt2->rt_protocol = RTPROT_KERNEL; 2624 rt2->rt_payload_type = MPT_IPV6; 2625 rt2->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT; 2626 rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE; 2627 rt2->rt_nh->nh_via_alen = lo->addr_len; 2628 memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr, 2629 lo->addr_len); 2630 } 2631 2632 mutex_lock(&net->mpls.platform_mutex); 2633 2634 /* Remember the original table */ 2635 old = mpls_dereference(net, net->mpls.platform_label); 2636 old_limit = net->mpls.platform_labels; 2637 2638 /* Free any labels beyond the new table */ 2639 for (index = limit; index < old_limit; index++) 2640 mpls_route_update(net, index, NULL, NULL); 2641 2642 /* Copy over the old labels */ 2643 cp_size = size; 2644 if (old_limit < limit) 2645 cp_size = old_limit * sizeof(struct mpls_route *); 2646 2647 memcpy(labels, old, cp_size); 2648 2649 /* If needed set the predefined labels */ 2650 if ((old_limit <= MPLS_LABEL_IPV6NULL) && 2651 (limit > MPLS_LABEL_IPV6NULL)) { 2652 RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6NULL], rt2); 2653 rt2 = NULL; 2654 } 2655 2656 if ((old_limit <= MPLS_LABEL_IPV4NULL) && 2657 (limit > MPLS_LABEL_IPV4NULL)) { 2658 RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4NULL], rt0); 2659 rt0 = NULL; 2660 } 2661 2662 /* Update the global pointers */ 2663 local_bh_disable(); 2664 write_seqcount_begin(&net->mpls.platform_label_seq); 2665 net->mpls.platform_labels = limit; 2666 rcu_assign_pointer(net->mpls.platform_label, labels); 2667 write_seqcount_end(&net->mpls.platform_label_seq); 2668 local_bh_enable(); 2669 2670 mutex_unlock(&net->mpls.platform_mutex); 2671 2672 mpls_rt_free(rt2); 2673 mpls_rt_free(rt0); 2674 2675 if (old) { 2676 synchronize_rcu(); 2677 kvfree(old); 2678 } 2679 return 0; 2680 2681 nort2: 2682 mpls_rt_free(rt0); 2683 nort0: 2684 kvfree(labels); 2685 nolabels: 2686 return -ENOMEM; 2687 } 2688 2689 static int mpls_platform_labels(const struct ctl_table *table, int write, 2690 void *buffer, size_t *lenp, loff_t *ppos) 2691 { 2692 struct net *net = table->data; 2693 int platform_labels = net->mpls.platform_labels; 2694 int ret; 2695 struct ctl_table tmp = { 2696 .procname = table->procname, 2697 .data = &platform_labels, 2698 .maxlen = sizeof(int), 2699 .mode = table->mode, 2700 .extra1 = SYSCTL_ZERO, 2701 .extra2 = &label_limit, 2702 }; 2703 2704 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 2705 2706 if (write && ret == 0) 2707 ret = resize_platform_label_table(net, platform_labels); 2708 2709 return ret; 2710 } 2711 2712 #define MPLS_NS_SYSCTL_OFFSET(field) \ 2713 (&((struct net *)0)->field) 2714 2715 static const struct ctl_table mpls_table[] = { 2716 { 2717 .procname = "platform_labels", 2718 .data = NULL, 2719 .maxlen = sizeof(int), 2720 .mode = 0644, 2721 .proc_handler = mpls_platform_labels, 2722 }, 2723 { 2724 .procname = "ip_ttl_propagate", 2725 .data = MPLS_NS_SYSCTL_OFFSET(mpls.ip_ttl_propagate), 2726 .maxlen = sizeof(int), 2727 .mode = 0644, 2728 .proc_handler = proc_dointvec_minmax, 2729 .extra1 = SYSCTL_ZERO, 2730 .extra2 = SYSCTL_ONE, 2731 }, 2732 { 2733 .procname = "default_ttl", 2734 .data = MPLS_NS_SYSCTL_OFFSET(mpls.default_ttl), 2735 .maxlen = sizeof(int), 2736 .mode = 0644, 2737 .proc_handler = proc_dointvec_minmax, 2738 .extra1 = SYSCTL_ONE, 2739 .extra2 = &ttl_max, 2740 }, 2741 }; 2742 2743 static __net_init int mpls_net_init(struct net *net) 2744 { 2745 size_t table_size = ARRAY_SIZE(mpls_table); 2746 struct ctl_table *table; 2747 int i; 2748 2749 mutex_init(&net->mpls.platform_mutex); 2750 seqcount_mutex_init(&net->mpls.platform_label_seq, &net->mpls.platform_mutex); 2751 2752 net->mpls.platform_labels = 0; 2753 net->mpls.platform_label = NULL; 2754 net->mpls.ip_ttl_propagate = 1; 2755 net->mpls.default_ttl = 255; 2756 2757 table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL); 2758 if (table == NULL) 2759 return -ENOMEM; 2760 2761 /* Table data contains only offsets relative to the base of 2762 * the mdev at this point, so make them absolute. 2763 */ 2764 for (i = 0; i < table_size; i++) 2765 table[i].data = (char *)net + (uintptr_t)table[i].data; 2766 2767 net->mpls.ctl = register_net_sysctl_sz(net, "net/mpls", table, 2768 table_size); 2769 if (net->mpls.ctl == NULL) { 2770 kfree(table); 2771 return -ENOMEM; 2772 } 2773 2774 return 0; 2775 } 2776 2777 static __net_exit void mpls_net_exit(struct net *net) 2778 { 2779 struct mpls_route __rcu **platform_label; 2780 size_t platform_labels; 2781 const struct ctl_table *table; 2782 unsigned int index; 2783 2784 table = net->mpls.ctl->ctl_table_arg; 2785 unregister_net_sysctl_table(net->mpls.ctl); 2786 kfree(table); 2787 2788 /* An rcu grace period has passed since there was a device in 2789 * the network namespace (and thus the last in flight packet) 2790 * left this network namespace. This is because 2791 * unregister_netdevice_many and netdev_run_todo has completed 2792 * for each network device that was in this network namespace. 2793 * 2794 * As such no additional rcu synchronization is necessary when 2795 * freeing the platform_label table. 2796 */ 2797 mutex_lock(&net->mpls.platform_mutex); 2798 2799 platform_label = mpls_dereference(net, net->mpls.platform_label); 2800 platform_labels = net->mpls.platform_labels; 2801 2802 for (index = 0; index < platform_labels; index++) { 2803 struct mpls_route *rt; 2804 2805 rt = mpls_dereference(net, platform_label[index]); 2806 mpls_notify_route(net, index, rt, NULL, NULL); 2807 mpls_rt_free(rt); 2808 } 2809 2810 mutex_unlock(&net->mpls.platform_mutex); 2811 2812 kvfree(platform_label); 2813 } 2814 2815 static struct pernet_operations mpls_net_ops = { 2816 .init = mpls_net_init, 2817 .exit = mpls_net_exit, 2818 }; 2819 2820 static struct rtnl_af_ops mpls_af_ops __read_mostly = { 2821 .family = AF_MPLS, 2822 .fill_stats_af = mpls_fill_stats_af, 2823 .get_stats_af_size = mpls_get_stats_af_size, 2824 }; 2825 2826 static const struct rtnl_msg_handler mpls_rtnl_msg_handlers[] __initdata_or_module = { 2827 {THIS_MODULE, PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, 2828 RTNL_FLAG_DOIT_UNLOCKED}, 2829 {THIS_MODULE, PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, 2830 RTNL_FLAG_DOIT_UNLOCKED}, 2831 {THIS_MODULE, PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes, 2832 RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, 2833 {THIS_MODULE, PF_MPLS, RTM_GETNETCONF, 2834 mpls_netconf_get_devconf, mpls_netconf_dump_devconf, 2835 RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, 2836 }; 2837 2838 static int __init mpls_init(void) 2839 { 2840 int err; 2841 2842 BUILD_BUG_ON(sizeof(struct mpls_shim_hdr) != 4); 2843 2844 err = register_pernet_subsys(&mpls_net_ops); 2845 if (err) 2846 goto out; 2847 2848 err = register_netdevice_notifier(&mpls_dev_notifier); 2849 if (err) 2850 goto out_unregister_pernet; 2851 2852 dev_add_pack(&mpls_packet_type); 2853 2854 err = rtnl_af_register(&mpls_af_ops); 2855 if (err) 2856 goto out_unregister_dev_type; 2857 2858 err = rtnl_register_many(mpls_rtnl_msg_handlers); 2859 if (err) 2860 goto out_unregister_rtnl_af; 2861 2862 err = ipgre_tunnel_encap_add_mpls_ops(); 2863 if (err) { 2864 pr_err("Can't add mpls over gre tunnel ops\n"); 2865 goto out_unregister_rtnl; 2866 } 2867 2868 err = 0; 2869 out: 2870 return err; 2871 2872 out_unregister_rtnl: 2873 rtnl_unregister_many(mpls_rtnl_msg_handlers); 2874 out_unregister_rtnl_af: 2875 rtnl_af_unregister(&mpls_af_ops); 2876 out_unregister_dev_type: 2877 dev_remove_pack(&mpls_packet_type); 2878 unregister_netdevice_notifier(&mpls_dev_notifier); 2879 out_unregister_pernet: 2880 unregister_pernet_subsys(&mpls_net_ops); 2881 goto out; 2882 } 2883 module_init(mpls_init); 2884 2885 static void __exit mpls_exit(void) 2886 { 2887 rtnl_unregister_all(PF_MPLS); 2888 rtnl_af_unregister(&mpls_af_ops); 2889 dev_remove_pack(&mpls_packet_type); 2890 unregister_netdevice_notifier(&mpls_dev_notifier); 2891 unregister_pernet_subsys(&mpls_net_ops); 2892 ipgre_tunnel_encap_del_mpls_ops(); 2893 } 2894 module_exit(mpls_exit); 2895 2896 MODULE_DESCRIPTION("MultiProtocol Label Switching"); 2897 MODULE_LICENSE("GPL v2"); 2898 MODULE_ALIAS_NETPROTO(PF_MPLS); 2899